foxus/backend/app/services/ollama_service.py

"""
Ollama service for local LLM integration
"""

import httpx
import json
import asyncio
from typing import Dict, List, Optional, AsyncGenerator
from app.core.config import settings
from app.models.ai import LanguageType, AICommand

class OllamaService:
    """Service for interacting with Ollama API"""

    def __init__(self):
        self.base_url = settings.OLLAMA_BASE_URL
        self.default_model = settings.DEFAULT_MODEL
        self.client = httpx.AsyncClient(timeout=60.0)

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.client.aclose()

    async def is_available(self) -> bool:
        """Check if Ollama is running and available"""
        try:
            response = await self.client.get(f"{self.base_url}/api/tags")
            return response.status_code == 200
        except Exception:
            return False

    async def list_models(self) -> List[Dict]:
        """List available models from Ollama"""
        try:
            response = await self.client.get(f"{self.base_url}/api/tags")
            if response.status_code == 200:
                data = response.json()
                return data.get("models", [])
        except Exception as e:
            print(f"Error listing models: {e}")
        return []

    async def pull_model(self, model_name: str) -> bool:
        """Pull/download a model if not available"""
        try:
            payload = {"name": model_name}
            response = await self.client.post(
                f"{self.base_url}/api/pull",
                json=payload,
                timeout=300.0  # 5 minutes for model download
            )
            return response.status_code == 200
        except Exception as e:
            print(f"Error pulling model {model_name}: {e}")
            return False

    async def generate_completion(
        self,
        prompt: str,
        model: Optional[str] = None,
        temperature: float = settings.TEMPERATURE,
        max_tokens: int = settings.MAX_TOKENS,
        stream: bool = False
    ) -> str:
        """Generate text completion from Ollama"""
        model_name = model or self.default_model

        payload = {
            "model": model_name,
            "prompt": prompt,
            "stream": stream,
            "options": {
                "temperature": temperature,
                "num_predict": max_tokens,
                "top_p": settings.TOP_P
            }
        }

        try:
            response = await self.client.post(
                f"{self.base_url}/api/generate",
                json=payload,
                timeout=120.0
            )

            if response.status_code == 200:
                if stream:
                    # Handle streaming response
                    full_response = ""
                    for line in response.iter_lines():
                        if line:
                            data = json.loads(line)
                            if "response" in data:
                                full_response += data["response"]
                            if data.get("done", False):
                                break
                    return full_response
                else:
                    # Handle single response
                    data = response.json()
                    return data.get("response", "")
            else:
                raise Exception(f"Ollama API error: {response.status_code}")

        except Exception as e:
            print(f"Error generating completion: {e}")
            raise

    async def generate_streaming(
        self,
        prompt: str,
        model: Optional[str] = None,
        temperature: float = settings.TEMPERATURE,
        max_tokens: int = settings.MAX_TOKENS
    ) -> AsyncGenerator[str, None]:
        """Generate streaming completion from Ollama"""
        model_name = model or self.default_model

        payload = {
            "model": model_name,
            "prompt": prompt,
            "stream": True,
            "options": {
                "temperature": temperature,
                "num_predict": max_tokens,
                "top_p": settings.TOP_P
            }
        }

        try:
            async with self.client.stream(
                "POST",
                f"{self.base_url}/api/generate",
                json=payload,
                timeout=120.0
            ) as response:
                if response.status_code == 200:
                    async for line in response.aiter_lines():
                        if line:
                            try:
                                data = json.loads(line)
                                if "response" in data:
                                    yield data["response"]
                                if data.get("done", False):
                                    break
                            except json.JSONDecodeError:
                                continue
                else:
                    raise Exception(f"Ollama API error: {response.status_code}")

        except Exception as e:
            print(f"Error in streaming generation: {e}")
            raise

    def build_prompt(
        self,
        command: AICommand,
        code: str,
        language: Optional[LanguageType] = None,
        context: Optional[str] = None,
        error_message: Optional[str] = None
    ) -> str:
        """Build appropriate prompt based on command and context"""

        lang_name = language.value if language else "code"

        prompts = {
            AICommand.EXPLAIN: f"""
Explain the following {lang_name} code in clear, concise terms:

```{lang_name}
{code}
```

Please provide:
1. What this code does
2. Key concepts and algorithms used
3. Any potential issues or improvements

Response:""",

            AICommand.REFACTOR: f"""
Refactor the following {lang_name} code to improve readability, performance, and maintainability:

```{lang_name}
{code}
```

Please provide:
1. Refactored code
2. Explanation of changes made
3. Benefits of the refactoring

Refactored code:""",

            AICommand.FIX: f"""
Fix the bugs or issues in the following {lang_name} code:

```{lang_name}
{code}
```

{f"Error message: {error_message}" if error_message else ""}

Please provide:
1. Fixed code
2. Explanation of what was wrong
3. How the fix addresses the issue

Fixed code:""",

            AICommand.COMPLETE: f"""
Complete the following {lang_name} code based on the context:

```{lang_name}
{code}
```

Please provide the most likely completion that follows naturally from the existing code.

Completion:""",

            AICommand.COMMENT: f"""
Add clear, helpful comments to the following {lang_name} code:

```{lang_name}
{code}
```

Please provide the same code with appropriate comments explaining the functionality.

Commented code:""",

            AICommand.TEST: f"""
Generate comprehensive unit tests for the following {lang_name} code:

```{lang_name}
{code}
```

Please provide:
1. Complete test cases covering different scenarios
2. Test setup and teardown if needed
3. Comments explaining what each test validates

Test code:""",

            AICommand.OPTIMIZE: f"""
Optimize the following {lang_name} code for better performance:

```{lang_name}
{code}
```

Please provide:
1. Optimized code
2. Explanation of optimizations made
3. Expected performance improvements

Optimized code:""",

            AICommand.DOCUMENT: f"""
Generate comprehensive documentation for the following {lang_name} code:

```{lang_name}
{code}
```

Please provide:
1. Function/class documentation
2. Parameter descriptions
3. Return value descriptions
4. Usage examples

Documentation:"""
        }

        base_prompt = prompts.get(command, f"Analyze this {lang_name} code:\n\n```{lang_name}\n{code}\n```\n\nResponse:")

        if context:
            base_prompt = f"Context: {context}\n\n{base_prompt}"

        return base_prompt

# Create singleton instance
ollama_service = OllamaService()