import logging import requests import os from typing import List, Dict, Any import urllib3 # Disable SSL warnings (only for development with self-signed certs!) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) logger = logging.getLogger(__name__) # Get LLM configuration from environment LLM_API_URL = os.getenv("LLM_API_URL", "http://api.chat.pathcore.org") LLM_API_KEY = os.getenv("LLM_API_KEY") # Optional API key LLM_MODEL = os.getenv("LLM_MODEL", "llama3") # Default Ollama model def chat_completion( messages: List[Dict[str, Any]], model: str = None, temperature: float = 0.7, max_tokens: int = 1000, tools: List[Dict[str, Any]] | None = None, ) -> Dict[str, Any]: """ Call Ollama API for chat completion. Args: messages: List of message dicts with 'role' and 'content' model: Model name to use (defaults to LLM_MODEL from env) temperature: Sampling temperature (0.0 to 2.0) max_tokens: Maximum tokens to generate Returns: Dict with 'content' key containing the response text Raises: Exception: If the API call fails """ # Use provided model or fall back to env variable model = model or LLM_MODEL # Ollama chat endpoint url = f"{LLM_API_URL}/api/chat" # Convert OpenAI-style messages to Ollama format # Ollama expects messages in the same format, but we need to ensure proper structure logger.info(f"Calling Ollama API at {url} with model: {model}") payload = { "model": model, "messages": messages, "stream": False, "options": { "temperature": temperature, "num_predict": max_tokens, } } if tools: payload["tools"] = tools headers = { "Content-Type": "application/json" } # Add API key if configured (Ollama doesn't usually need this, but just in case) if LLM_API_KEY: headers["Authorization"] = f"Bearer {LLM_API_KEY}" try: resp = requests.post( url, json=payload, headers=headers, timeout=120, # Ollama can be slow on first request verify=False ) # Raise exception for HTTP errors resp.raise_for_status() data = resp.json() logger.debug(f"Ollama API response: {data}") # Extract the assistant's response from Ollama format # Ollama returns: {"message": {"role": "assistant", "content": "..."}} message = data.get("message", {}) content = message.get("content", "") tool_calls = message.get("tool_calls", []) logger.info( f"Ollama response received " f"(content={len(content)} chars, tool_calls={len(tool_calls)})" ) return { "content": content, "tool_calls": tool_calls, "raw": data } except requests.exceptions.Timeout: logger.error("Ollama API request timed out after 120 seconds") raise Exception("LLM request timed out. The model might be loading for the first time.") except requests.exceptions.ConnectionError as e: logger.error(f"Cannot connect to Ollama API: {e}") raise Exception("Cannot connect to Ollama server. Is it running?") except requests.exceptions.HTTPError as e: logger.error(f"HTTP error from Ollama API: {e}") if e.response.status_code == 404: raise Exception(f"Model '{model}' not found. Try: ollama pull {model}") raise Exception(f"Ollama API error: {e}") except ValueError as e: logger.error(f"Invalid response from Ollama API: {e}") raise except Exception as e: logger.error(f"Unexpected error calling Ollama API: {e}", exc_info=True) raise def list_models(): """ List available Ollama models. Returns: List of model names """ url = f"{LLM_API_URL}/api/tags" try: resp = requests.get(url, timeout=10, verify=False) resp.raise_for_status() data = resp.json() if "models" in data: models = [model["name"] for model in data["models"]] logger.info(f"Available models: {models}") return models return [] except Exception as e: logger.error(f"Error listing models: {e}") return [] # Make functions available for import __all__ = ['chat_completion', 'list_models']