foxus/backend/app/services/ollama_service.py

288 lines
8.1 KiB
Python

"""
Ollama service for local LLM integration
"""
import httpx
import json
import asyncio
from typing import Dict, List, Optional, AsyncGenerator
from app.core.config import settings
from app.models.ai import LanguageType, AICommand
class OllamaService:
"""Service for interacting with Ollama API"""
def __init__(self):
self.base_url = settings.OLLAMA_BASE_URL
self.default_model = settings.DEFAULT_MODEL
self.client = httpx.AsyncClient(timeout=60.0)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.client.aclose()
async def is_available(self) -> bool:
"""Check if Ollama is running and available"""
try:
response = await self.client.get(f"{self.base_url}/api/tags")
return response.status_code == 200
except Exception:
return False
async def list_models(self) -> List[Dict]:
"""List available models from Ollama"""
try:
response = await self.client.get(f"{self.base_url}/api/tags")
if response.status_code == 200:
data = response.json()
return data.get("models", [])
except Exception as e:
print(f"Error listing models: {e}")
return []
async def pull_model(self, model_name: str) -> bool:
"""Pull/download a model if not available"""
try:
payload = {"name": model_name}
response = await self.client.post(
f"{self.base_url}/api/pull",
json=payload,
timeout=300.0 # 5 minutes for model download
)
return response.status_code == 200
except Exception as e:
print(f"Error pulling model {model_name}: {e}")
return False
async def generate_completion(
self,
prompt: str,
model: Optional[str] = None,
temperature: float = settings.TEMPERATURE,
max_tokens: int = settings.MAX_TOKENS,
stream: bool = False
) -> str:
"""Generate text completion from Ollama"""
model_name = model or self.default_model
payload = {
"model": model_name,
"prompt": prompt,
"stream": stream,
"options": {
"temperature": temperature,
"num_predict": max_tokens,
"top_p": settings.TOP_P
}
}
try:
response = await self.client.post(
f"{self.base_url}/api/generate",
json=payload,
timeout=120.0
)
if response.status_code == 200:
if stream:
# Handle streaming response
full_response = ""
for line in response.iter_lines():
if line:
data = json.loads(line)
if "response" in data:
full_response += data["response"]
if data.get("done", False):
break
return full_response
else:
# Handle single response
data = response.json()
return data.get("response", "")
else:
raise Exception(f"Ollama API error: {response.status_code}")
except Exception as e:
print(f"Error generating completion: {e}")
raise
async def generate_streaming(
self,
prompt: str,
model: Optional[str] = None,
temperature: float = settings.TEMPERATURE,
max_tokens: int = settings.MAX_TOKENS
) -> AsyncGenerator[str, None]:
"""Generate streaming completion from Ollama"""
model_name = model or self.default_model
payload = {
"model": model_name,
"prompt": prompt,
"stream": True,
"options": {
"temperature": temperature,
"num_predict": max_tokens,
"top_p": settings.TOP_P
}
}
try:
async with self.client.stream(
"POST",
f"{self.base_url}/api/generate",
json=payload,
timeout=120.0
) as response:
if response.status_code == 200:
async for line in response.aiter_lines():
if line:
try:
data = json.loads(line)
if "response" in data:
yield data["response"]
if data.get("done", False):
break
except json.JSONDecodeError:
continue
else:
raise Exception(f"Ollama API error: {response.status_code}")
except Exception as e:
print(f"Error in streaming generation: {e}")
raise
def build_prompt(
self,
command: AICommand,
code: str,
language: Optional[LanguageType] = None,
context: Optional[str] = None,
error_message: Optional[str] = None
) -> str:
"""Build appropriate prompt based on command and context"""
lang_name = language.value if language else "code"
prompts = {
AICommand.EXPLAIN: f"""
Explain the following {lang_name} code in clear, concise terms:
```{lang_name}
{code}
```
Please provide:
1. What this code does
2. Key concepts and algorithms used
3. Any potential issues or improvements
Response:""",
AICommand.REFACTOR: f"""
Refactor the following {lang_name} code to improve readability, performance, and maintainability:
```{lang_name}
{code}
```
Please provide:
1. Refactored code
2. Explanation of changes made
3. Benefits of the refactoring
Refactored code:""",
AICommand.FIX: f"""
Fix the bugs or issues in the following {lang_name} code:
```{lang_name}
{code}
```
{f"Error message: {error_message}" if error_message else ""}
Please provide:
1. Fixed code
2. Explanation of what was wrong
3. How the fix addresses the issue
Fixed code:""",
AICommand.COMPLETE: f"""
Complete the following {lang_name} code based on the context:
```{lang_name}
{code}
```
Please provide the most likely completion that follows naturally from the existing code.
Completion:""",
AICommand.COMMENT: f"""
Add clear, helpful comments to the following {lang_name} code:
```{lang_name}
{code}
```
Please provide the same code with appropriate comments explaining the functionality.
Commented code:""",
AICommand.TEST: f"""
Generate comprehensive unit tests for the following {lang_name} code:
```{lang_name}
{code}
```
Please provide:
1. Complete test cases covering different scenarios
2. Test setup and teardown if needed
3. Comments explaining what each test validates
Test code:""",
AICommand.OPTIMIZE: f"""
Optimize the following {lang_name} code for better performance:
```{lang_name}
{code}
```
Please provide:
1. Optimized code
2. Explanation of optimizations made
3. Expected performance improvements
Optimized code:""",
AICommand.DOCUMENT: f"""
Generate comprehensive documentation for the following {lang_name} code:
```{lang_name}
{code}
```
Please provide:
1. Function/class documentation
2. Parameter descriptions
3. Return value descriptions
4. Usage examples
Documentation:"""
}
base_prompt = prompts.get(command, f"Analyze this {lang_name} code:\n\n```{lang_name}\n{code}\n```\n\nResponse:")
if context:
base_prompt = f"Context: {context}\n\n{base_prompt}"
return base_prompt
# Create singleton instance
ollama_service = OllamaService()