155 lines
4.5 KiB
Python
155 lines
4.5 KiB
Python
import logging
|
|
import requests
|
|
import os
|
|
from typing import List, Dict, Any
|
|
import urllib3
|
|
|
|
# Disable SSL warnings (only for development with self-signed certs!)
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Get LLM configuration from environment
|
|
LLM_API_URL = os.getenv("LLM_API_URL", "http://api.chat.pathcore.org")
|
|
LLM_API_KEY = os.getenv("LLM_API_KEY") # Optional API key
|
|
LLM_MODEL = os.getenv("LLM_MODEL", "llama3") # Default Ollama model
|
|
|
|
|
|
def chat_completion(
|
|
messages: List[Dict[str, Any]],
|
|
model: str = None,
|
|
temperature: float = 0.7,
|
|
max_tokens: int = 1000,
|
|
tools: List[Dict[str, Any]] | None = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Call Ollama API for chat completion.
|
|
|
|
Args:
|
|
messages: List of message dicts with 'role' and 'content'
|
|
model: Model name to use (defaults to LLM_MODEL from env)
|
|
temperature: Sampling temperature (0.0 to 2.0)
|
|
max_tokens: Maximum tokens to generate
|
|
|
|
Returns:
|
|
Dict with 'content' key containing the response text
|
|
|
|
Raises:
|
|
Exception: If the API call fails
|
|
"""
|
|
|
|
# Use provided model or fall back to env variable
|
|
model = model or LLM_MODEL
|
|
|
|
# Ollama chat endpoint
|
|
url = f"{LLM_API_URL}/api/chat"
|
|
|
|
# Convert OpenAI-style messages to Ollama format
|
|
# Ollama expects messages in the same format, but we need to ensure proper structure
|
|
|
|
logger.info(f"Calling Ollama API at {url} with model: {model}")
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": temperature,
|
|
"num_predict": max_tokens,
|
|
}
|
|
}
|
|
|
|
if tools:
|
|
payload["tools"] = tools
|
|
|
|
headers = {
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
# Add API key if configured (Ollama doesn't usually need this, but just in case)
|
|
if LLM_API_KEY:
|
|
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
|
|
|
|
try:
|
|
resp = requests.post(
|
|
url,
|
|
json=payload,
|
|
headers=headers,
|
|
timeout=120, # Ollama can be slow on first request
|
|
verify=False
|
|
)
|
|
|
|
# Raise exception for HTTP errors
|
|
resp.raise_for_status()
|
|
|
|
data = resp.json()
|
|
logger.debug(f"Ollama API response: {data}")
|
|
|
|
# Extract the assistant's response from Ollama format
|
|
# Ollama returns: {"message": {"role": "assistant", "content": "..."}}
|
|
message = data.get("message", {})
|
|
|
|
content = message.get("content", "")
|
|
tool_calls = message.get("tool_calls", [])
|
|
|
|
logger.info(
|
|
f"Ollama response received "
|
|
f"(content={len(content)} chars, tool_calls={len(tool_calls)})"
|
|
)
|
|
|
|
return {
|
|
"content": content,
|
|
"tool_calls": tool_calls,
|
|
"raw": data
|
|
}
|
|
|
|
except requests.exceptions.Timeout:
|
|
logger.error("Ollama API request timed out after 120 seconds")
|
|
raise Exception("LLM request timed out. The model might be loading for the first time.")
|
|
|
|
except requests.exceptions.ConnectionError as e:
|
|
logger.error(f"Cannot connect to Ollama API: {e}")
|
|
raise Exception("Cannot connect to Ollama server. Is it running?")
|
|
|
|
except requests.exceptions.HTTPError as e:
|
|
logger.error(f"HTTP error from Ollama API: {e}")
|
|
if e.response.status_code == 404:
|
|
raise Exception(f"Model '{model}' not found. Try: ollama pull {model}")
|
|
raise Exception(f"Ollama API error: {e}")
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Invalid response from Ollama API: {e}")
|
|
raise
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error calling Ollama API: {e}", exc_info=True)
|
|
raise
|
|
|
|
|
|
def list_models():
|
|
"""
|
|
List available Ollama models.
|
|
|
|
Returns:
|
|
List of model names
|
|
"""
|
|
url = f"{LLM_API_URL}/api/tags"
|
|
|
|
try:
|
|
resp = requests.get(url, timeout=10, verify=False)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
if "models" in data:
|
|
models = [model["name"] for model in data["models"]]
|
|
logger.info(f"Available models: {models}")
|
|
return models
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing models: {e}")
|
|
return []
|
|
|
|
|
|
# Make functions available for import
|
|
__all__ = ['chat_completion', 'list_models'] |