import requests import logging import os LLM_ENDPOINT = os.getenv("LOCAL_LLM_ENDPOINT") MODEL_NAME = os.getenv("LOCAL_LLM_MODEL", "llama3") def chat_completion(messages, temperature=0.3, max_tokens=1024): payload = { "model": MODEL_NAME, "messages": messages, "temperature": temperature, "max_tokens": max_tokens } try: resp = requests.post( LLM_ENDPOINT, json=payload, timeout=60 ) resp.raise_for_status() return resp.json()["choices"][0]["message"]["content"] except Exception as e: logging.error(f"Local LLM call failed: {e}", exc_info=True) return "⚠️ Local LLM is currently unavailable."