242 lines
7.9 KiB
Python
242 lines
7.9 KiB
Python
"""
|
|
Models API routes for managing LLM models
|
|
"""
|
|
|
|
from typing import List
|
|
from fastapi import APIRouter, HTTPException
|
|
from app.models.ai import ModelInfo, ModelListResponse
|
|
from app.services.ollama_service import ollama_service
|
|
from app.core.config import settings
|
|
|
|
router = APIRouter()
|
|
|
|
@router.get("/list", response_model=ModelListResponse)
|
|
async def list_models():
|
|
"""List all available models from Ollama"""
|
|
try:
|
|
if not await ollama_service.is_available():
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Ollama service is not available. Please ensure Ollama is running."
|
|
)
|
|
|
|
# Get models from Ollama
|
|
ollama_models = await ollama_service.list_models()
|
|
|
|
model_infos = []
|
|
for model in ollama_models:
|
|
model_name = model.get("name", "unknown")
|
|
model_size = model.get("size", 0)
|
|
|
|
# Format size for display
|
|
size_str = format_bytes(model_size)
|
|
|
|
# Get model capabilities based on name
|
|
capabilities = get_model_capabilities(model_name)
|
|
|
|
# Get model description
|
|
description = get_model_description(model_name)
|
|
|
|
model_info = ModelInfo(
|
|
name=model_name,
|
|
size=size_str,
|
|
description=description,
|
|
capabilities=capabilities,
|
|
is_available=True
|
|
)
|
|
|
|
model_infos.append(model_info)
|
|
|
|
# Add supported models that aren't installed
|
|
installed_model_names = [m.name for m in model_infos]
|
|
for supported_model in settings.SUPPORTED_MODELS:
|
|
if supported_model not in installed_model_names:
|
|
model_info = ModelInfo(
|
|
name=supported_model,
|
|
size="Not installed",
|
|
description=get_model_description(supported_model),
|
|
capabilities=get_model_capabilities(supported_model),
|
|
is_available=False
|
|
)
|
|
model_infos.append(model_info)
|
|
|
|
return ModelListResponse(
|
|
models=model_infos,
|
|
default_model=settings.DEFAULT_MODEL,
|
|
current_model=settings.DEFAULT_MODEL
|
|
)
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.post("/pull/{model_name}")
|
|
async def pull_model(model_name: str):
|
|
"""Pull/download a model from Ollama"""
|
|
try:
|
|
if not await ollama_service.is_available():
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Ollama service is not available"
|
|
)
|
|
|
|
if model_name not in settings.SUPPORTED_MODELS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Model {model_name} is not in the supported models list"
|
|
)
|
|
|
|
success = await ollama_service.pull_model(model_name)
|
|
|
|
if success:
|
|
return {"message": f"Model {model_name} pulled successfully"}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to pull model {model_name}"
|
|
)
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.get("/info/{model_name}", response_model=ModelInfo)
|
|
async def get_model_info(model_name: str):
|
|
"""Get detailed information about a specific model"""
|
|
try:
|
|
if not await ollama_service.is_available():
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Ollama service is not available"
|
|
)
|
|
|
|
# Get all models
|
|
ollama_models = await ollama_service.list_models()
|
|
|
|
# Find the specific model
|
|
target_model = None
|
|
for model in ollama_models:
|
|
if model.get("name") == model_name:
|
|
target_model = model
|
|
break
|
|
|
|
if not target_model:
|
|
# Check if it's a supported model that's not installed
|
|
if model_name in settings.SUPPORTED_MODELS:
|
|
return ModelInfo(
|
|
name=model_name,
|
|
size="Not installed",
|
|
description=get_model_description(model_name),
|
|
capabilities=get_model_capabilities(model_name),
|
|
is_available=False
|
|
)
|
|
else:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Model {model_name} not found"
|
|
)
|
|
|
|
size_str = format_bytes(target_model.get("size", 0))
|
|
|
|
return ModelInfo(
|
|
name=model_name,
|
|
size=size_str,
|
|
description=get_model_description(model_name),
|
|
capabilities=get_model_capabilities(model_name),
|
|
is_available=True
|
|
)
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.get("/supported")
|
|
async def get_supported_models():
|
|
"""Get list of supported models"""
|
|
return {
|
|
"supported_models": settings.SUPPORTED_MODELS,
|
|
"default_model": settings.DEFAULT_MODEL,
|
|
"model_descriptions": {
|
|
model: get_model_description(model)
|
|
for model in settings.SUPPORTED_MODELS
|
|
}
|
|
}
|
|
|
|
@router.get("/current")
|
|
async def get_current_model():
|
|
"""Get currently selected model"""
|
|
return {
|
|
"current_model": settings.DEFAULT_MODEL,
|
|
"is_available": await ollama_service.is_available()
|
|
}
|
|
|
|
def format_bytes(size_bytes: int) -> str:
|
|
"""Format bytes into human readable format"""
|
|
if size_bytes == 0:
|
|
return "0 B"
|
|
|
|
size_names = ["B", "KB", "MB", "GB", "TB"]
|
|
import math
|
|
i = int(math.floor(math.log(size_bytes, 1024)))
|
|
p = math.pow(1024, i)
|
|
s = round(size_bytes / p, 2)
|
|
return f"{s} {size_names[i]}"
|
|
|
|
def get_model_capabilities(model_name: str) -> List[str]:
|
|
"""Get capabilities for a specific model"""
|
|
capabilities_map = {
|
|
"codellama": [
|
|
"Code generation",
|
|
"Code completion",
|
|
"Bug fixing",
|
|
"Code explanation",
|
|
"Refactoring",
|
|
"Multi-language support"
|
|
],
|
|
"deepseek-coder": [
|
|
"Advanced code generation",
|
|
"Code understanding",
|
|
"Bug detection",
|
|
"Code optimization",
|
|
"Documentation generation",
|
|
"Multi-language support"
|
|
],
|
|
"starcoder": [
|
|
"Code completion",
|
|
"Code generation",
|
|
"Cross-language understanding",
|
|
"Documentation",
|
|
"Code translation"
|
|
],
|
|
"codegemma": [
|
|
"Code generation",
|
|
"Code explanation",
|
|
"Bug fixing",
|
|
"Refactoring",
|
|
"Test generation"
|
|
]
|
|
}
|
|
|
|
# Find matching capabilities
|
|
for key, capabilities in capabilities_map.items():
|
|
if key in model_name.lower():
|
|
return capabilities
|
|
|
|
# Default capabilities
|
|
return [
|
|
"Code assistance",
|
|
"Text generation",
|
|
"Code completion"
|
|
]
|
|
|
|
def get_model_description(model_name: str) -> str:
|
|
"""Get description for a specific model"""
|
|
descriptions = {
|
|
"codellama:7b-code": "Meta's CodeLlama 7B optimized for code generation and understanding",
|
|
"codellama:13b-code": "Meta's CodeLlama 13B with enhanced code capabilities",
|
|
"deepseek-coder:6.7b": "DeepSeek's code-specialized model with strong programming abilities",
|
|
"starcoder:7b": "BigCode's StarCoder model for code generation and completion",
|
|
"codegemma:7b": "Google's CodeGemma model for code understanding and generation"
|
|
}
|
|
|
|
return descriptions.get(
|
|
model_name,
|
|
f"Code-specialized language model: {model_name}"
|
|
) |