abot/llm/local_llm_client.py

import logging
import requests
import os
from typing import List, Dict, Any
import urllib3

# Disable SSL warnings (only for development with self-signed certs!)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

logger = logging.getLogger(__name__)

# Get LLM configuration from environment
LLM_API_URL = os.getenv("LLM_API_URL", "http://api.chat.pathcore.org")
LLM_API_KEY = os.getenv("LLM_API_KEY")  # Optional API key
LLM_MODEL = os.getenv("LLM_MODEL", "llama3")  # Default Ollama model


def chat_completion(
    messages: List[Dict[str, Any]],
    model: str = None,
    temperature: float = 0.7,
    max_tokens: int = 1000,
    tools: List[Dict[str, Any]] | None = None,
) -> Dict[str, Any]:
    """
    Call Ollama API for chat completion.

    Args:
        messages: List of message dicts with 'role' and 'content'
        model: Model name to use (defaults to LLM_MODEL from env)
        temperature: Sampling temperature (0.0 to 2.0)
        max_tokens: Maximum tokens to generate

    Returns:
        Dict with 'content' key containing the response text

    Raises:
        Exception: If the API call fails
    """

    # Use provided model or fall back to env variable
    model = model or LLM_MODEL

    # Ollama chat endpoint
    url = f"{LLM_API_URL}/api/chat"

    # Convert OpenAI-style messages to Ollama format
    # Ollama expects messages in the same format, but we need to ensure proper structure

    logger.info(f"Calling Ollama API at {url} with model: {model}")

    payload = {
    "model": model,
    "messages": messages,
    "stream": False,
    "options": {
        "temperature": temperature,
        "num_predict": max_tokens,
    }
}

if tools:
    payload["tools"] = tools

    headers = {
        "Content-Type": "application/json"
    }

    # Add API key if configured (Ollama doesn't usually need this, but just in case)
    if LLM_API_KEY:
        headers["Authorization"] = f"Bearer {LLM_API_KEY}"

    try:
        resp = requests.post(
            url,
            json=payload,
            headers=headers,
            timeout=120,  # Ollama can be slow on first request
            verify=False
        )

        # Raise exception for HTTP errors
        resp.raise_for_status()

        data = resp.json()
        logger.debug(f"Ollama API response: {data}")

        # Extract the assistant's response from Ollama format
        # Ollama returns: {"message": {"role": "assistant", "content": "..."}}
        message = data.get("message", {})

        content = message.get("content", "")
        tool_calls = message.get("tool_calls", [])

        logger.info(
            f"Ollama response received "
            f"(content={len(content)} chars, tool_calls={len(tool_calls)})"
        )

        return {
            "content": content,
            "tool_calls": tool_calls,
            "raw": data
        }

    except requests.exceptions.Timeout:
        logger.error("Ollama API request timed out after 120 seconds")
        raise Exception("LLM request timed out. The model might be loading for the first time.")

    except requests.exceptions.ConnectionError as e:
        logger.error(f"Cannot connect to Ollama API: {e}")
        raise Exception("Cannot connect to Ollama server. Is it running?")

    except requests.exceptions.HTTPError as e:
        logger.error(f"HTTP error from Ollama API: {e}")
        if e.response.status_code == 404:
            raise Exception(f"Model '{model}' not found. Try: ollama pull {model}")
        raise Exception(f"Ollama API error: {e}")

    except ValueError as e:
        logger.error(f"Invalid response from Ollama API: {e}")
        raise

    except Exception as e:
        logger.error(f"Unexpected error calling Ollama API: {e}", exc_info=True)
        raise


def list_models():
    """
    List available Ollama models.

    Returns:
        List of model names
    """
    url = f"{LLM_API_URL}/api/tags"

    try:
        resp = requests.get(url, timeout=10, verify=False)
        resp.raise_for_status()
        data = resp.json()

        if "models" in data:
            models = [model["name"] for model in data["models"]]
            logger.info(f"Available models: {models}")
            return models
        return []

    except Exception as e:
        logger.error(f"Error listing models: {e}")
        return []


# Make functions available for import
__all__ = ['chat_completion', 'list_models']