28 lines
733 B
Python
28 lines
733 B
Python
import requests
|
|
import logging
|
|
import os
|
|
|
|
LLM_ENDPOINT = os.getenv("LOCAL_LLM_ENDPOINT")
|
|
MODEL_NAME = os.getenv("LOCAL_LLM_MODEL", "llama3")
|
|
|
|
def chat_completion(messages, temperature=0.3, max_tokens=1024):
|
|
payload = {
|
|
"model": MODEL_NAME,
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"max_tokens": max_tokens
|
|
}
|
|
|
|
try:
|
|
resp = requests.post(
|
|
LLM_ENDPOINT,
|
|
json=payload,
|
|
timeout=60
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()["choices"][0]["message"]["content"]
|
|
|
|
except Exception as e:
|
|
logging.error(f"Local LLM call failed: {e}", exc_info=True)
|
|
return "⚠️ Local LLM is currently unavailable."
|