import httpx import json from typing import Dict, List, Any, Optional, AsyncGenerator from app.core.config import settings import logging logger = logging.getLogger(__name__) class OllamaClient: def __init__(self, base_url: str = None, model: str = None): self.base_url = base_url or settings.OLLAMA_BASE_URL self.model = model or settings.DEFAULT_LLM_MODEL self.client = httpx.AsyncClient(timeout=60.0) async def chat( self, messages: List[Dict[str, str]], system_prompt: Optional[str] = None, temperature: float = 0.7, max_tokens: int = 2000 ) -> str: """Send chat messages to Ollama and get response""" try: # Format messages for Ollama if system_prompt: messages.insert(0, {"role": "system", "content": system_prompt}) payload = { "model": self.model, "messages": messages, "options": { "temperature": temperature, "num_predict": max_tokens }, "stream": False } response = await self.client.post( f"{self.base_url}/api/chat", json=payload ) response.raise_for_status() result = response.json() return result.get("message", {}).get("content", "") except httpx.RequestError as e: logger.error(f"Request error communicating with Ollama: {e}") raise Exception(f"Failed to communicate with local LLM: {e}") except httpx.HTTPStatusError as e: logger.error(f"HTTP error from Ollama: {e}") raise Exception(f"LLM service error: {e}") async def chat_stream( self, messages: List[Dict[str, str]], system_prompt: Optional[str] = None, temperature: float = 0.7 ) -> AsyncGenerator[str, None]: """Stream chat response from Ollama""" try: if system_prompt: messages.insert(0, {"role": "system", "content": system_prompt}) payload = { "model": self.model, "messages": messages, "options": { "temperature": temperature }, "stream": True } async with self.client.stream( "POST", f"{self.base_url}/api/chat", json=payload ) as response: response.raise_for_status() async for line in response.aiter_lines(): if line: try: data = json.loads(line) if "message" in data and "content" in data["message"]: yield data["message"]["content"] except json.JSONDecodeError: continue except httpx.RequestError as e: logger.error(f"Request error streaming from Ollama: {e}") raise Exception(f"Failed to stream from local LLM: {e}") async def generate_embedding(self, text: str) -> List[float]: """Generate embeddings using Ollama (if supported by model)""" try: payload = { "model": "nomic-embed-text", # Use embedding-specific model "prompt": text } response = await self.client.post( f"{self.base_url}/api/embeddings", json=payload ) response.raise_for_status() result = response.json() return result.get("embedding", []) except httpx.RequestError as e: logger.error(f"Request error getting embeddings from Ollama: {e}") return [] except httpx.HTTPStatusError as e: logger.error(f"HTTP error getting embeddings from Ollama: {e}") return [] async def check_health(self) -> bool: """Check if Ollama service is available""" try: response = await self.client.get(f"{self.base_url}/api/tags") return response.status_code == 200 except: return False async def list_models(self) -> List[str]: """List available models in Ollama""" try: response = await self.client.get(f"{self.base_url}/api/tags") response.raise_for_status() result = response.json() models = result.get("models", []) return [model["name"] for model in models] except httpx.RequestError as e: logger.error(f"Request error listing models from Ollama: {e}") return [] async def close(self): """Close the HTTP client""" await self.client.aclose() # Global instance ollama_client = OllamaClient()