From 9110e97fe2487e0c048fd0f1cd0f6ec90e0a978a Mon Sep 17 00:00:00 2001 From: m3mo Date: Sun, 8 Jun 2025 17:50:50 +0200 Subject: [PATCH] Initial commit: aPersona - AI-powered personal assistant with local LLM, RAG system, auto-learning engine, and privacy-first design --- .gitignore | 118 ++++++ QUICK_START.md | 225 +++++++++++ README.md | 126 +++++++ .../ai_core/auto_learning/learning_engine.py | 357 ++++++++++++++++++ .../ai_core/embeddings/embedding_service.py | 197 ++++++++++ .../ai_core/file_processing/file_processor.py | 316 ++++++++++++++++ backend/ai_core/llm/ollama_client.py | 146 +++++++ backend/ai_core/rag/vector_store.py | 241 ++++++++++++ backend/app/api/auth.py | 198 ++++++++++ backend/app/core/config.py | 57 +++ backend/app/core/security.py | 45 +++ backend/app/db/database.py | 22 ++ backend/app/db/models.py | 149 ++++++++ backend/app/main.py | 221 +++++++++++ backend/requirements.txt | 41 ++ data/embeddings_cache/.gitkeep | 0 data/processed/.gitkeep | 0 data/uploads/.gitkeep | 0 data/vectors/.gitkeep | 0 docs/ARCHITECTURE.md | 351 +++++++++++++++++ frontend/index.html | 110 ++++++ frontend/package.json | 57 +++ frontend/postcss.config.js | 1 + frontend/src/App.tsx | 41 ++ frontend/src/components/Layout.tsx | 173 +++++++++ frontend/src/index.css | 129 +++++++ frontend/src/main.tsx | 43 +++ frontend/src/pages/Chat.tsx | 10 + frontend/src/pages/Dashboard.tsx | 219 +++++++++++ frontend/src/pages/Files.tsx | 10 + frontend/src/pages/Login.tsx | 191 ++++++++++ frontend/src/pages/Reminders.tsx | 10 + frontend/src/pages/Settings.tsx | 10 + frontend/src/services/api.ts | 235 ++++++++++++ frontend/src/store/authStore.ts | 42 +++ frontend/tailwind.config.js | 75 ++++ frontend/tsconfig.json | 36 ++ frontend/tsconfig.node.json | 10 + frontend/vite.config.ts | 26 ++ setup.sh | 225 +++++++++++ 40 files changed, 4463 insertions(+) create mode 100644 .gitignore create mode 100644 QUICK_START.md create mode 100644 README.md create mode 100644 backend/ai_core/auto_learning/learning_engine.py create mode 100644 backend/ai_core/embeddings/embedding_service.py create mode 100644 backend/ai_core/file_processing/file_processor.py create mode 100644 backend/ai_core/llm/ollama_client.py create mode 100644 backend/ai_core/rag/vector_store.py create mode 100644 backend/app/api/auth.py create mode 100644 backend/app/core/config.py create mode 100644 backend/app/core/security.py create mode 100644 backend/app/db/database.py create mode 100644 backend/app/db/models.py create mode 100644 backend/app/main.py create mode 100644 backend/requirements.txt create mode 100644 data/embeddings_cache/.gitkeep create mode 100644 data/processed/.gitkeep create mode 100644 data/uploads/.gitkeep create mode 100644 data/vectors/.gitkeep create mode 100644 docs/ARCHITECTURE.md create mode 100644 frontend/index.html create mode 100644 frontend/package.json create mode 100644 frontend/postcss.config.js create mode 100644 frontend/src/App.tsx create mode 100644 frontend/src/components/Layout.tsx create mode 100644 frontend/src/index.css create mode 100644 frontend/src/main.tsx create mode 100644 frontend/src/pages/Chat.tsx create mode 100644 frontend/src/pages/Dashboard.tsx create mode 100644 frontend/src/pages/Files.tsx create mode 100644 frontend/src/pages/Login.tsx create mode 100644 frontend/src/pages/Reminders.tsx create mode 100644 frontend/src/pages/Settings.tsx create mode 100644 frontend/src/services/api.ts create mode 100644 frontend/src/store/authStore.ts create mode 100644 frontend/tailwind.config.js create mode 100644 frontend/tsconfig.json create mode 100644 frontend/tsconfig.node.json create mode 100644 frontend/vite.config.ts create mode 100755 setup.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..145fcad --- /dev/null +++ b/.gitignore @@ -0,0 +1,118 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +.venv/ +.env/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Jupyter Notebook +.ipynb_checkpoints + +# Environment variables +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Runtime data +pids/ +*.pid +*.seed +*.pid.lock + +# Coverage directory used by tools like istanbul +coverage/ +*.lcov + +# Build outputs +dist/ +build/ + +# Database +*.db +*.sqlite +*.sqlite3 + +# Logs +logs/ +*.log + +# Data directories (user uploaded files) +data/uploads/* +data/processed/* +data/vectors/* +data/embeddings_cache/* + +# Keep the directories but ignore contents +!data/uploads/.gitkeep +!data/processed/.gitkeep +!data/vectors/.gitkeep +!data/embeddings_cache/.gitkeep + +# AI model files (if downloaded locally) +*.gguf +*.bin +*.safetensors +models/ + +# Temporary files +*.tmp +*.temp +.DS_Store +Thumbs.db + +# Certificates and keys +*.pem +*.key +*.crt + +# Editor files +*~ +.swp +.swo + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db \ No newline at end of file diff --git a/QUICK_START.md b/QUICK_START.md new file mode 100644 index 0000000..a2c1377 --- /dev/null +++ b/QUICK_START.md @@ -0,0 +1,225 @@ +# aPersona Quick Start Guide + +## Prerequisites + +Before you begin, ensure you have the following installed: + +- **Python 3.11+**: [Download here](https://python.org/downloads/) +- **Node.js 18+**: [Download here](https://nodejs.org/) +- **Ollama**: [Install guide](https://ollama.ai/download) + +## 🚀 Automated Setup (Recommended) + +The easiest way to get started is using our setup script: + +```bash +# Make the setup script executable +chmod +x setup.sh + +# Run the setup script +./setup.sh +``` + +This script will: +- Check your system requirements +- Install dependencies for both backend and frontend +- Set up the AI models +- Create necessary directories and configuration files + +## 🔧 Manual Setup + +If you prefer to set up manually: + +### 1. Clone and Setup Backend + +```bash +# Navigate to backend directory +cd backend + +# Create virtual environment +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt + +# Create environment file +cp .env.example .env # Edit with your preferences +``` + +### 2. Setup Frontend + +```bash +# Navigate to frontend directory +cd frontend + +# Install dependencies +npm install + +# Install and configure development tools +npm run dev # This will start the development server +``` + +### 3. Setup AI Services + +```bash +# Start Ollama service +ollama serve + +# In another terminal, pull required models +ollama pull mistral # Main LLM model +ollama pull nomic-embed-text # Embedding model +``` + +## 🏃‍♂️ Running the Application + +### Start the Backend + +```bash +cd backend +source venv/bin/activate # If not already activated +uvicorn app.main:app --reload +``` + +The backend will be available at: `http://localhost:8000` + +### Start the Frontend + +```bash +cd frontend +npm run dev +``` + +The frontend will be available at: `http://localhost:3000` + +### Start Ollama (if not running) + +```bash +ollama serve +``` + +## 🎯 First Steps + +1. **Open your browser** and go to `http://localhost:3000` + +2. **Create an account** using the registration form + +3. **Upload some documents** to get started: + - PDFs, Word documents, text files, or images + - The system will automatically process and categorize them + +4. **Start chatting** with your AI assistant: + - Ask questions about your uploaded files + - The AI will provide context-aware responses + - Give feedback to help the system learn your preferences + +## 🔍 Verify Everything is Working + +### Check System Health + +Visit: `http://localhost:8000/health` + +You should see: +```json +{ + "status": "healthy", + "services": { + "database": "healthy", + "ollama": "healthy", + "embeddings": "healthy", + "vector_store": "healthy" + } +} +``` + +### Check API Documentation + +Visit: `http://localhost:8000/docs` + +This will show the interactive API documentation. + +## 🐛 Troubleshooting + +### Common Issues + +#### 1. Ollama Not Running +```bash +# Error: Connection refused to Ollama +# Solution: Start Ollama service +ollama serve +``` + +#### 2. Models Not Downloaded +```bash +# Error: Model not found +# Solution: Download required models +ollama pull mistral +ollama pull nomic-embed-text +``` + +#### 3. Port Already in Use +```bash +# Backend port 8000 in use +uvicorn app.main:app --reload --port 8001 + +# Frontend port 3000 in use +npm run dev -- --port 3001 +``` + +#### 4. Python Dependencies Issues +```bash +# Create fresh virtual environment +rm -rf venv +python3 -m venv venv +source venv/bin/activate +pip install --upgrade pip +pip install -r requirements.txt +``` + +#### 5. Node Dependencies Issues +```bash +# Clear cache and reinstall +rm -rf node_modules package-lock.json +npm install +``` + +### Performance Tips + +1. **First Run**: The first time you upload files and ask questions, it may take longer as models are loading and caches are being built. + +2. **Memory Usage**: The system uses local AI models which require significant RAM. Ensure you have at least 8GB RAM available. + +3. **Storage**: Vector embeddings and model files require disk space. Ensure you have at least 5GB free disk space. + +## 📊 System Requirements + +### Minimum Requirements +- **RAM**: 8GB +- **Storage**: 5GB free space +- **CPU**: Multi-core processor (4+ cores recommended) +- **OS**: Windows 10+, macOS 10.14+, Linux (Ubuntu 18.04+) + +### Recommended Requirements +- **RAM**: 16GB+ +- **Storage**: 10GB+ free space +- **CPU**: 8+ cores +- **GPU**: NVIDIA GPU with CUDA support (optional, for faster processing) + +## 🎉 You're Ready! + +Once everything is running: + +1. **Upload your documents** (PDFs, Word docs, images, etc.) +2. **Ask questions** about your content +3. **Set reminders** and let the AI help organize your life +4. **Watch it learn** and adapt to your preferences over time + +## 🆘 Need Help? + +- Check the [Architecture Documentation](docs/ARCHITECTURE.md) for technical details +- Review the API documentation at `http://localhost:8000/docs` +- Ensure all services are running with the health check endpoint + +## 🔒 Privacy Note + +Remember: **All your data stays local**. aPersona runs entirely on your machine without any cloud dependencies. Your files, conversations, and personal information never leave your device. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7fdbb0f --- /dev/null +++ b/README.md @@ -0,0 +1,126 @@ +# aPersona - AI-Powered Personal Assistant + +A fully local, offline AI-powered personal assistant that learns from your personal files, preferences, and behavior to act as your intelligent secretary. + +## 🔹 Key Features + +- **100% Local & Offline**: No cloud dependencies, complete data privacy +- **User Authentication**: Secure local user management +- **File Analysis**: Automatic categorization of documents, images, PDFs +- **Semantic Search**: Vector-based search through your personal data +- **Local LLM Integration**: Powered by Ollama with RAG capabilities +- **Auto-Learning**: Adaptive behavior based on user interactions +- **Smart Reminders**: Context-aware suggestions and notifications +- **Personal Context**: Deep understanding of your preferences and habits + +## 🛠 Technology Stack + +### Backend +- **FastAPI**: Modern Python web framework +- **SQLAlchemy**: Database ORM +- **ChromaDB**: Vector database for embeddings +- **SentenceTransformers**: Text embeddings +- **Ollama**: Local LLM runtime + +### Frontend +- **React**: Modern UI framework +- **TailwindCSS**: Utility-first CSS framework +- **Vite**: Fast build tool + +### AI/ML +- **Hugging Face Transformers**: Pre-trained models +- **PyTorch**: ML framework +- **Pillow**: Image processing +- **PyPDF2**: PDF text extraction + +## 📁 Project Structure + +``` +apersona/ +├── backend/ # FastAPI backend +│ ├── app/ +│ │ ├── api/ # API routes +│ │ ├── core/ # Core configuration +│ │ ├── db/ # Database models +│ │ ├── services/ # Business logic +│ │ └── main.py # FastAPI app +│ ├── ai_core/ # AI/ML components +│ │ ├── embeddings/ # Text embeddings +│ │ ├── llm/ # LLM integration +│ │ ├── rag/ # RAG system +│ │ └── auto_learning/ # Adaptive learning +│ └── requirements.txt +├── frontend/ # React frontend +│ ├── src/ +│ │ ├── components/ # React components +│ │ ├── pages/ # Page components +│ │ ├── services/ # API services +│ │ └── utils/ # Utility functions +│ └── package.json +├── data/ # Local data storage +│ ├── uploads/ # User uploaded files +│ ├── processed/ # Processed files +│ └── vectors/ # Vector embeddings +└── docs/ # Documentation +``` + +## 🚀 Quick Start + +### Prerequisites +- Python 3.11+ +- Node.js 18+ +- Ollama installed locally + +### Backend Setup +```bash +cd backend +pip install -r requirements.txt +uvicorn app.main:app --reload +``` + +### Frontend Setup +```bash +cd frontend +npm install +npm run dev +``` + +### AI Setup +```bash +# Install Ollama models +ollama pull mistral +ollama pull nomic-embed-text +``` + +## 🧠 Auto-Learning System + +The auto-learning module continuously adapts to user behavior through: + +- **Interaction Patterns**: Learning from user queries and responses +- **Preference Tracking**: Monitoring file usage and search patterns +- **Context Building**: Understanding user's work and personal contexts +- **Response Optimization**: Improving answer relevance over time +- **Proactive Suggestions**: Anticipating user needs based on patterns + +## 🔒 Privacy & Security + +- All data stored locally +- No external API calls +- Encrypted user authentication +- Secure file handling +- Optional data anonymization + +## 📚 Documentation + +- [API Documentation](./docs/api.md) +- [AI Integration Guide](./docs/ai-integration.md) +- [Auto-Learning Architecture](./docs/auto-learning.md) +- [Deployment Guide](./docs/deployment.md) + +## 🤝 Contributing + +This is a personal project focused on privacy and local execution. Feel free to fork and adapt for your needs. + +## 📄 License + +MIT License - See LICENSE file for details \ No newline at end of file diff --git a/backend/ai_core/auto_learning/learning_engine.py b/backend/ai_core/auto_learning/learning_engine.py new file mode 100644 index 0000000..41b23a2 --- /dev/null +++ b/backend/ai_core/auto_learning/learning_engine.py @@ -0,0 +1,357 @@ +from typing import Dict, List, Any, Optional, Tuple +from collections import defaultdict, Counter +import json +import numpy as np +from datetime import datetime, timedelta +import asyncio +from sqlalchemy.orm import Session +from app.db.models import UserInteraction, UserPreference, LearningPattern, User +from app.core.config import settings +import logging + +logger = logging.getLogger(__name__) + + +class LearningEngine: + def __init__(self): + self.user_patterns = defaultdict(dict) + self.feedback_weights = { + -1: -0.2, # Negative feedback + 0: 0.0, # Neutral feedback + 1: 0.1 # Positive feedback + } + + async def analyze_user_interactions(self, db: Session, user_id: int) -> Dict[str, Any]: + """Analyze user interaction patterns to extract learning insights""" + try: + # Get recent interactions (last 30 days) + cutoff_date = datetime.utcnow() - timedelta(days=30) + interactions = db.query(UserInteraction).filter( + UserInteraction.user_id == user_id, + UserInteraction.created_at >= cutoff_date + ).all() + + if not interactions: + return {} + + analysis = { + 'interaction_frequency': self._analyze_frequency_patterns(interactions), + 'topic_preferences': self._analyze_topic_preferences(interactions), + 'response_quality': self._analyze_response_quality(interactions), + 'search_patterns': self._analyze_search_patterns(interactions), + 'time_patterns': self._analyze_time_patterns(interactions) + } + + return analysis + + except Exception as e: + logger.error(f"Failed to analyze user interactions: {e}") + return {} + + def _analyze_frequency_patterns(self, interactions: List[UserInteraction]) -> Dict[str, Any]: + """Analyze how frequently user interacts with the system""" + if not interactions: + return {} + + # Group interactions by day + daily_counts = defaultdict(int) + for interaction in interactions: + day = interaction.created_at.date() + daily_counts[day] += 1 + + # Calculate patterns + counts = list(daily_counts.values()) + return { + 'avg_daily_interactions': np.mean(counts) if counts else 0, + 'max_daily_interactions': max(counts) if counts else 0, + 'active_days': len(daily_counts), + 'total_interactions': len(interactions) + } + + def _analyze_topic_preferences(self, interactions: List[UserInteraction]) -> Dict[str, Any]: + """Analyze what topics user asks about most frequently""" + topic_keywords = [] + successful_topics = [] + + for interaction in interactions: + if interaction.query: + # Extract keywords from query (simple approach) + words = interaction.query.lower().split() + # Filter out common stop words + stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'what', 'how', 'when', 'where', 'why', 'who'} + keywords = [word for word in words if len(word) > 3 and word not in stop_words] + topic_keywords.extend(keywords) + + # Track successful topics (positive feedback) + if interaction.user_feedback and interaction.user_feedback > 0: + successful_topics.extend(keywords) + + # Count frequencies + topic_counts = Counter(topic_keywords) + successful_counts = Counter(successful_topics) + + return { + 'most_common_topics': dict(topic_counts.most_common(10)), + 'successful_topics': dict(successful_counts.most_common(5)), + 'topic_diversity': len(set(topic_keywords)) + } + + def _analyze_response_quality(self, interactions: List[UserInteraction]) -> Dict[str, Any]: + """Analyze response quality based on user feedback""" + feedback_scores = [] + response_times = [] + helpful_count = 0 + total_feedback = 0 + + for interaction in interactions: + if interaction.user_feedback is not None: + feedback_scores.append(interaction.user_feedback) + total_feedback += 1 + + if interaction.was_helpful is not None: + if interaction.was_helpful: + helpful_count += 1 + + if interaction.response_time: + response_times.append(interaction.response_time) + + return { + 'avg_feedback_score': np.mean(feedback_scores) if feedback_scores else 0, + 'feedback_distribution': dict(Counter(feedback_scores)), + 'helpfulness_rate': helpful_count / total_feedback if total_feedback > 0 else 0, + 'avg_response_time': np.mean(response_times) if response_times else 0, + 'total_feedback_count': total_feedback + } + + def _analyze_search_patterns(self, interactions: List[UserInteraction]) -> Dict[str, Any]: + """Analyze search and file usage patterns""" + search_terms = [] + used_files = [] + + for interaction in interactions: + if interaction.search_terms: + search_terms.extend(interaction.search_terms) + if interaction.used_files: + used_files.extend(interaction.used_files) + + return { + 'common_search_terms': dict(Counter(search_terms).most_common(10)), + 'frequently_used_files': dict(Counter(used_files).most_common(10)), + 'search_diversity': len(set(search_terms)) + } + + def _analyze_time_patterns(self, interactions: List[UserInteraction]) -> Dict[str, Any]: + """Analyze when user is most active""" + hours = [] + days_of_week = [] + + for interaction in interactions: + hours.append(interaction.created_at.hour) + days_of_week.append(interaction.created_at.weekday()) + + return { + 'peak_hours': dict(Counter(hours).most_common(5)), + 'active_days_of_week': dict(Counter(days_of_week).most_common()), + 'activity_distribution': { + 'morning': sum(1 for h in hours if 6 <= h < 12), + 'afternoon': sum(1 for h in hours if 12 <= h < 18), + 'evening': sum(1 for h in hours if 18 <= h < 24), + 'night': sum(1 for h in hours if 0 <= h < 6) + } + } + + async def update_user_preferences(self, db: Session, user_id: int, analysis: Dict[str, Any]): + """Update user preferences based on learning analysis""" + try: + # Update or create preferences based on analysis + preferences_to_update = [ + ('response_style', 'preferred_length', self._infer_response_length_preference(analysis)), + ('topics', 'interests', analysis.get('topic_preferences', {}).get('most_common_topics', {})), + ('interaction', 'peak_hours', analysis.get('time_patterns', {}).get('peak_hours', {})), + ('quality', 'feedback_history', analysis.get('response_quality', {})) + ] + + for pref_type, pref_key, pref_value in preferences_to_update: + if pref_value: + # Check if preference exists + existing_pref = db.query(UserPreference).filter( + UserPreference.user_id == user_id, + UserPreference.preference_type == pref_type, + UserPreference.preference_key == pref_key + ).first() + + confidence_score = self._calculate_confidence_score(analysis, pref_type) + + if existing_pref: + # Update existing preference + existing_pref.preference_value = pref_value + existing_pref.confidence_score = confidence_score + existing_pref.updated_at = datetime.utcnow() + else: + # Create new preference + new_pref = UserPreference( + user_id=user_id, + preference_type=pref_type, + preference_key=pref_key, + preference_value=pref_value, + confidence_score=confidence_score + ) + db.add(new_pref) + + db.commit() + logger.info(f"Updated preferences for user {user_id}") + + except Exception as e: + logger.error(f"Failed to update user preferences: {e}") + db.rollback() + + def _infer_response_length_preference(self, analysis: Dict[str, Any]) -> str: + """Infer user's preferred response length based on interaction patterns""" + response_quality = analysis.get('response_quality', {}) + avg_feedback = response_quality.get('avg_feedback_score', 0) + + # Simple heuristic: if user gives positive feedback, maintain current style + if avg_feedback > 0.5: + return 'detailed' + elif avg_feedback < -0.2: + return 'concise' + else: + return 'balanced' + + def _calculate_confidence_score(self, analysis: Dict[str, Any], preference_type: str) -> float: + """Calculate confidence score for a preference based on data volume and consistency""" + base_confidence = 0.5 + + # Factor in number of interactions + total_interactions = analysis.get('interaction_frequency', {}).get('total_interactions', 0) + interaction_factor = min(total_interactions / 100, 1.0) * 0.3 + + # Factor in feedback consistency + response_quality = analysis.get('response_quality', {}) + feedback_count = response_quality.get('total_feedback_count', 0) + feedback_factor = min(feedback_count / 20, 1.0) * 0.2 + + return min(base_confidence + interaction_factor + feedback_factor, 1.0) + + async def generate_personalized_prompt(self, db: Session, user_id: int, base_prompt: str) -> str: + """Generate personalized system prompt based on user preferences""" + try: + # Get user preferences + preferences = db.query(UserPreference).filter( + UserPreference.user_id == user_id + ).all() + + # Build personalization context + personalization = [] + + for pref in preferences: + if pref.confidence_score > 0.6: # Only use high-confidence preferences + if pref.preference_type == 'response_style': + if pref.preference_key == 'preferred_length': + if pref.preference_value == 'concise': + personalization.append("Provide concise, direct answers.") + elif pref.preference_value == 'detailed': + personalization.append("Provide detailed, comprehensive explanations.") + + elif pref.preference_type == 'topics': + topics = list(pref.preference_value.keys())[:3] # Top 3 topics + if topics: + personalization.append(f"User frequently asks about: {', '.join(topics)}") + + # Combine base prompt with personalization + if personalization: + personal_context = "\n".join(personalization) + return f"{base_prompt}\n\nPersonalization context:\n{personal_context}" + + return base_prompt + + except Exception as e: + logger.error(f"Failed to generate personalized prompt: {e}") + return base_prompt + + async def suggest_proactive_actions(self, db: Session, user_id: int) -> List[Dict[str, Any]]: + """Suggest proactive actions based on user patterns""" + try: + analysis = await self.analyze_user_interactions(db, user_id) + suggestions = [] + + # Suggest reminders based on time patterns + time_patterns = analysis.get('time_patterns', {}) + peak_hours = time_patterns.get('peak_hours', {}) + if peak_hours: + most_active_hour = max(peak_hours.keys()) + suggestions.append({ + 'type': 'reminder_optimization', + 'message': f"You're most active at {most_active_hour}:00. Would you like to schedule important reminders around this time?", + 'confidence': 0.7 + }) + + # Suggest file organization based on usage patterns + search_patterns = analysis.get('search_patterns', {}) + frequent_files = search_patterns.get('frequently_used_files', {}) + if frequent_files: + suggestions.append({ + 'type': 'file_organization', + 'message': "I noticed you frequently access certain files. Would you like me to create quick access shortcuts?", + 'confidence': 0.6 + }) + + # Suggest topic exploration based on interests + topic_prefs = analysis.get('topic_preferences', {}) + common_topics = topic_prefs.get('most_common_topics', {}) + if common_topics: + top_topic = max(common_topics.keys(), key=common_topics.get) + suggestions.append({ + 'type': 'content_discovery', + 'message': f"You seem interested in {top_topic}. Would you like me to search for related documents in your files?", + 'confidence': 0.5 + }) + + return suggestions + + except Exception as e: + logger.error(f"Failed to generate proactive suggestions: {e}") + return [] + + async def record_learning_pattern(self, db: Session, user_id: int, pattern_type: str, pattern_data: Dict[str, Any]): + """Record a new learning pattern for future reference""" + try: + pattern = LearningPattern( + user_id=user_id, + pattern_type=pattern_type, + pattern_data=pattern_data, + confidence_score=0.5, + usage_count=1, + success_rate=0.0 + ) + + db.add(pattern) + db.commit() + logger.info(f"Recorded learning pattern for user {user_id}: {pattern_type}") + + except Exception as e: + logger.error(f"Failed to record learning pattern: {e}") + db.rollback() + + async def update_pattern_success(self, db: Session, pattern_id: int, was_successful: bool): + """Update the success rate of a learning pattern""" + try: + pattern = db.query(LearningPattern).filter(LearningPattern.id == pattern_id).first() + if pattern: + pattern.usage_count += 1 + current_success_rate = pattern.success_rate * (pattern.usage_count - 1) + if was_successful: + current_success_rate += 1 + pattern.success_rate = current_success_rate / pattern.usage_count + pattern.updated_at = datetime.utcnow() + + db.commit() + logger.info(f"Updated pattern {pattern_id} success rate: {pattern.success_rate}") + + except Exception as e: + logger.error(f"Failed to update pattern success: {e}") + db.rollback() + + +# Global instance +learning_engine = LearningEngine() \ No newline at end of file diff --git a/backend/ai_core/embeddings/embedding_service.py b/backend/ai_core/embeddings/embedding_service.py new file mode 100644 index 0000000..b4cae03 --- /dev/null +++ b/backend/ai_core/embeddings/embedding_service.py @@ -0,0 +1,197 @@ +from sentence_transformers import SentenceTransformer +import numpy as np +from typing import List, Union, Dict, Any +import hashlib +import os +import pickle +from pathlib import Path +from app.core.config import settings +import logging + +logger = logging.getLogger(__name__) + + +class EmbeddingService: + def __init__(self, model_name: str = None): + self.model_name = model_name or settings.EMBEDDING_MODEL + self.model = None + self.cache_dir = Path("../data/embeddings_cache") + self.cache_dir.mkdir(parents=True, exist_ok=True) + self._load_model() + + def _load_model(self): + """Load the SentenceTransformer model""" + try: + logger.info(f"Loading embedding model: {self.model_name}") + self.model = SentenceTransformer(self.model_name) + logger.info("Embedding model loaded successfully") + except Exception as e: + logger.error(f"Failed to load embedding model: {e}") + raise Exception(f"Could not initialize embedding model: {e}") + + def _get_cache_key(self, text: str) -> str: + """Generate cache key for text""" + return hashlib.md5(f"{self.model_name}:{text}".encode()).hexdigest() + + def _get_cached_embedding(self, cache_key: str) -> np.ndarray: + """Get embedding from cache if available""" + cache_file = self.cache_dir / f"{cache_key}.pkl" + if cache_file.exists(): + try: + with open(cache_file, 'rb') as f: + return pickle.load(f) + except Exception as e: + logger.warning(f"Failed to load cached embedding: {e}") + return None + + def _cache_embedding(self, cache_key: str, embedding: np.ndarray): + """Cache embedding for future use""" + cache_file = self.cache_dir / f"{cache_key}.pkl" + try: + with open(cache_file, 'wb') as f: + pickle.dump(embedding, f) + except Exception as e: + logger.warning(f"Failed to cache embedding: {e}") + + def encode_text(self, text: str, use_cache: bool = True) -> np.ndarray: + """Generate embedding for a single text""" + if not text or not text.strip(): + return np.zeros(384) # Default embedding size for all-MiniLM-L6-v2 + + cache_key = self._get_cache_key(text) + + # Check cache first + if use_cache: + cached_embedding = self._get_cached_embedding(cache_key) + if cached_embedding is not None: + return cached_embedding + + try: + # Generate embedding + embedding = self.model.encode(text, convert_to_numpy=True) + + # Cache for future use + if use_cache: + self._cache_embedding(cache_key, embedding) + + return embedding + + except Exception as e: + logger.error(f"Failed to generate embedding: {e}") + return np.zeros(384) + + def encode_texts(self, texts: List[str], use_cache: bool = True, batch_size: int = 32) -> List[np.ndarray]: + """Generate embeddings for multiple texts""" + if not texts: + return [] + + embeddings = [] + texts_to_encode = [] + cache_keys = [] + indices_to_encode = [] + + # Check cache for each text + for i, text in enumerate(texts): + if not text or not text.strip(): + embeddings.append(np.zeros(384)) + continue + + cache_key = self._get_cache_key(text) + cache_keys.append(cache_key) + + if use_cache: + cached_embedding = self._get_cached_embedding(cache_key) + if cached_embedding is not None: + embeddings.append(cached_embedding) + continue + + # Need to encode this text + texts_to_encode.append(text) + indices_to_encode.append(i) + embeddings.append(None) # Placeholder + + # Encode texts that weren't cached + if texts_to_encode: + try: + new_embeddings = self.model.encode( + texts_to_encode, + convert_to_numpy=True, + batch_size=batch_size + ) + + # Cache and place new embeddings + for idx, embedding in zip(indices_to_encode, new_embeddings): + embeddings[idx] = embedding + if use_cache: + self._cache_embedding(cache_keys[idx], embedding) + + except Exception as e: + logger.error(f"Failed to generate batch embeddings: {e}") + # Fill with zeros for failed embeddings + for idx in indices_to_encode: + embeddings[idx] = np.zeros(384) + + return embeddings + + def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float: + """Compute cosine similarity between two embeddings""" + try: + # Normalize embeddings + norm1 = np.linalg.norm(embedding1) + norm2 = np.linalg.norm(embedding2) + + if norm1 == 0 or norm2 == 0: + return 0.0 + + # Cosine similarity + similarity = np.dot(embedding1, embedding2) / (norm1 * norm2) + return float(similarity) + + except Exception as e: + logger.error(f"Failed to compute similarity: {e}") + return 0.0 + + def find_most_similar( + self, + query_embedding: np.ndarray, + candidate_embeddings: List[np.ndarray], + top_k: int = 5 + ) -> List[Dict[str, Any]]: + """Find most similar embeddings to query""" + similarities = [] + + for i, candidate in enumerate(candidate_embeddings): + similarity = self.compute_similarity(query_embedding, candidate) + similarities.append({ + 'index': i, + 'similarity': similarity + }) + + # Sort by similarity (descending) + similarities.sort(key=lambda x: x['similarity'], reverse=True) + + return similarities[:top_k] + + def get_model_info(self) -> Dict[str, Any]: + """Get information about the loaded model""" + if not self.model: + return {} + + return { + 'model_name': self.model_name, + 'max_sequence_length': getattr(self.model, 'max_seq_length', 'unknown'), + 'embedding_dimension': self.model.get_sentence_embedding_dimension(), + } + + def clear_cache(self): + """Clear the embedding cache""" + try: + for cache_file in self.cache_dir.glob("*.pkl"): + cache_file.unlink() + logger.info("Embedding cache cleared") + except Exception as e: + logger.error(f"Failed to clear cache: {e}") + + +# Global instance +embedding_service = EmbeddingService() \ No newline at end of file diff --git a/backend/ai_core/file_processing/file_processor.py b/backend/ai_core/file_processing/file_processor.py new file mode 100644 index 0000000..f97d714 --- /dev/null +++ b/backend/ai_core/file_processing/file_processor.py @@ -0,0 +1,316 @@ +import os +import magic +from pathlib import Path +from typing import Dict, List, Any, Optional, Tuple +import logging +import hashlib +from datetime import datetime + +# File processing imports +import PyPDF2 +from docx import Document +from PIL import Image +import pytesseract + +logger = logging.getLogger(__name__) + + +class FileProcessor: + def __init__(self): + self.supported_types = { + 'application/pdf': self._process_pdf, + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._process_docx, + 'text/plain': self._process_text, + 'text/markdown': self._process_text, + 'image/jpeg': self._process_image, + 'image/png': self._process_image, + 'image/gif': self._process_image, + 'image/bmp': self._process_image, + 'image/tiff': self._process_image, + } + + # Categories for auto-classification + self.categories = { + 'work': ['project', 'meeting', 'report', 'presentation', 'proposal', 'contract', 'invoice'], + 'personal': ['diary', 'journal', 'note', 'reminder', 'todo', 'list'], + 'financial': ['budget', 'expense', 'income', 'tax', 'receipt', 'bank', 'investment'], + 'education': ['course', 'study', 'lecture', 'assignment', 'exam', 'research'], + 'health': ['medical', 'doctor', 'prescription', 'health', 'fitness', 'exercise'], + 'travel': ['itinerary', 'booking', 'ticket', 'hotel', 'flight', 'vacation'], + 'legal': ['contract', 'agreement', 'legal', 'law', 'court', 'document'], + 'technical': ['code', 'programming', 'software', 'api', 'documentation', 'manual'] + } + + async def process_file(self, file_path: str, original_name: str) -> Dict[str, Any]: + """Process a file and extract relevant information""" + try: + # Detect file MIME type + mime_type = magic.from_file(file_path, mime=True) + file_size = os.path.getsize(file_path) + + # Extract content based on file type + content_info = { + 'original_name': original_name, + 'file_path': file_path, + 'mime_type': mime_type, + 'file_size': file_size, + 'processed_at': datetime.utcnow().isoformat(), + 'extracted_text': '', + 'content_summary': '', + 'categories': [], + 'metadata': {} + } + + if mime_type in self.supported_types: + processor = self.supported_types[mime_type] + extracted_data = await processor(file_path) + content_info.update(extracted_data) + else: + logger.warning(f"Unsupported file type: {mime_type}") + content_info['error'] = f"Unsupported file type: {mime_type}" + + # Auto-categorize content + if content_info['extracted_text']: + content_info['categories'] = self._categorize_content(content_info['extracted_text']) + content_info['content_summary'] = self._generate_summary(content_info['extracted_text']) + + return content_info + + except Exception as e: + logger.error(f"Failed to process file {file_path}: {e}") + return { + 'original_name': original_name, + 'file_path': file_path, + 'error': str(e), + 'processed_at': datetime.utcnow().isoformat() + } + + async def _process_pdf(self, file_path: str) -> Dict[str, Any]: + """Extract text from PDF files""" + try: + extracted_text = "" + metadata = {} + + with open(file_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + + # Extract metadata + if pdf_reader.metadata: + metadata = { + 'title': pdf_reader.metadata.get('/Title', ''), + 'author': pdf_reader.metadata.get('/Author', ''), + 'subject': pdf_reader.metadata.get('/Subject', ''), + 'creator': pdf_reader.metadata.get('/Creator', ''), + 'creation_date': str(pdf_reader.metadata.get('/CreationDate', '')), + } + + metadata['page_count'] = len(pdf_reader.pages) + + # Extract text from all pages + for page_num, page in enumerate(pdf_reader.pages): + try: + text = page.extract_text() + if text: + extracted_text += f"\n--- Page {page_num + 1} ---\n{text}\n" + except Exception as e: + logger.warning(f"Failed to extract text from page {page_num + 1}: {e}") + continue + + return { + 'extracted_text': extracted_text.strip(), + 'metadata': metadata, + 'file_type': 'pdf' + } + + except Exception as e: + logger.error(f"Failed to process PDF {file_path}: {e}") + return {'extracted_text': '', 'error': str(e), 'file_type': 'pdf'} + + async def _process_docx(self, file_path: str) -> Dict[str, Any]: + """Extract text from DOCX files""" + try: + extracted_text = "" + metadata = {} + + doc = Document(file_path) + + # Extract metadata + core_props = doc.core_properties + metadata = { + 'title': core_props.title or '', + 'author': core_props.author or '', + 'subject': core_props.subject or '', + 'created': str(core_props.created) if core_props.created else '', + 'modified': str(core_props.modified) if core_props.modified else '', + 'keywords': core_props.keywords or '', + } + + # Extract text from paragraphs + paragraphs = [] + for paragraph in doc.paragraphs: + if paragraph.text.strip(): + paragraphs.append(paragraph.text.strip()) + + extracted_text = '\n'.join(paragraphs) + metadata['paragraph_count'] = len(paragraphs) + + return { + 'extracted_text': extracted_text, + 'metadata': metadata, + 'file_type': 'docx' + } + + except Exception as e: + logger.error(f"Failed to process DOCX {file_path}: {e}") + return {'extracted_text': '', 'error': str(e), 'file_type': 'docx'} + + async def _process_text(self, file_path: str) -> Dict[str, Any]: + """Process plain text files""" + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + content = file.read() + + # Basic text file metadata + lines = content.split('\n') + words = content.split() + + metadata = { + 'line_count': len(lines), + 'word_count': len(words), + 'character_count': len(content) + } + + return { + 'extracted_text': content, + 'metadata': metadata, + 'file_type': 'text' + } + + except Exception as e: + logger.error(f"Failed to process text file {file_path}: {e}") + return {'extracted_text': '', 'error': str(e), 'file_type': 'text'} + + async def _process_image(self, file_path: str) -> Dict[str, Any]: + """Extract text from images using OCR""" + try: + extracted_text = "" + metadata = {} + + # Open image and extract metadata + with Image.open(file_path) as img: + metadata = { + 'format': img.format, + 'mode': img.mode, + 'size': img.size, + 'width': img.width, + 'height': img.height + } + + # Extract EXIF data if available + if hasattr(img, '_getexif') and img._getexif(): + exif_data = img._getexif() + if exif_data: + metadata['exif'] = {str(k): str(v) for k, v in exif_data.items()} + + # Perform OCR to extract text + try: + extracted_text = pytesseract.image_to_string(img) + if extracted_text.strip(): + metadata['has_text'] = True + else: + metadata['has_text'] = False + except Exception as ocr_error: + logger.warning(f"OCR failed for {file_path}: {ocr_error}") + metadata['ocr_error'] = str(ocr_error) + + return { + 'extracted_text': extracted_text.strip(), + 'metadata': metadata, + 'file_type': 'image' + } + + except Exception as e: + logger.error(f"Failed to process image {file_path}: {e}") + return {'extracted_text': '', 'error': str(e), 'file_type': 'image'} + + def _categorize_content(self, text: str) -> List[str]: + """Automatically categorize content based on keywords""" + text_lower = text.lower() + detected_categories = [] + + for category, keywords in self.categories.items(): + # Count how many keywords from this category appear in the text + keyword_count = sum(1 for keyword in keywords if keyword in text_lower) + + # If more than 20% of keywords are found, or at least 2 keywords, add category + threshold = max(1, len(keywords) * 0.2) + if keyword_count >= threshold: + detected_categories.append(category) + + # If no categories detected, try to infer from common patterns + if not detected_categories: + if any(word in text_lower for word in ['meeting', 'project', 'deadline', 'task']): + detected_categories.append('work') + elif any(word in text_lower for word in ['personal', 'diary', 'thought', 'feeling']): + detected_categories.append('personal') + else: + detected_categories.append('general') + + return detected_categories + + def _generate_summary(self, text: str, max_length: int = 200) -> str: + """Generate a simple summary of the content""" + if not text or len(text) <= max_length: + return text + + # Simple extractive summary: take first few sentences + sentences = text.split('.') + summary = "" + + for sentence in sentences: + if len(summary + sentence) <= max_length: + summary += sentence + "." + else: + break + + return summary.strip() if summary else text[:max_length] + "..." + + def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]: + """Split text into overlapping chunks for embedding""" + if not text: + return [] + + words = text.split() + chunks = [] + + for i in range(0, len(words), chunk_size - overlap): + chunk_words = words[i:i + chunk_size] + chunk_text = ' '.join(chunk_words) + if chunk_text.strip(): + chunks.append(chunk_text.strip()) + + return chunks + + def get_file_hash(self, file_path: str) -> str: + """Generate hash of file content for deduplication""" + try: + hash_sha256 = hashlib.sha256() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_sha256.update(chunk) + return hash_sha256.hexdigest() + except Exception as e: + logger.error(f"Failed to generate hash for {file_path}: {e}") + return "" + + def is_supported_file(self, file_path: str) -> bool: + """Check if file type is supported""" + try: + mime_type = magic.from_file(file_path, mime=True) + return mime_type in self.supported_types + except Exception: + return False + + +# Global instance +file_processor = FileProcessor() \ No newline at end of file diff --git a/backend/ai_core/llm/ollama_client.py b/backend/ai_core/llm/ollama_client.py new file mode 100644 index 0000000..419f299 --- /dev/null +++ b/backend/ai_core/llm/ollama_client.py @@ -0,0 +1,146 @@ +import httpx +import json +from typing import Dict, List, Any, Optional, AsyncGenerator +from app.core.config import settings +import logging + +logger = logging.getLogger(__name__) + + +class OllamaClient: + def __init__(self, base_url: str = None, model: str = None): + self.base_url = base_url or settings.OLLAMA_BASE_URL + self.model = model or settings.DEFAULT_LLM_MODEL + self.client = httpx.AsyncClient(timeout=60.0) + + async def chat( + self, + messages: List[Dict[str, str]], + system_prompt: Optional[str] = None, + temperature: float = 0.7, + max_tokens: int = 2000 + ) -> str: + """Send chat messages to Ollama and get response""" + try: + # Format messages for Ollama + if system_prompt: + messages.insert(0, {"role": "system", "content": system_prompt}) + + payload = { + "model": self.model, + "messages": messages, + "options": { + "temperature": temperature, + "num_predict": max_tokens + }, + "stream": False + } + + response = await self.client.post( + f"{self.base_url}/api/chat", + json=payload + ) + response.raise_for_status() + + result = response.json() + return result.get("message", {}).get("content", "") + + except httpx.RequestError as e: + logger.error(f"Request error communicating with Ollama: {e}") + raise Exception(f"Failed to communicate with local LLM: {e}") + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error from Ollama: {e}") + raise Exception(f"LLM service error: {e}") + + async def chat_stream( + self, + messages: List[Dict[str, str]], + system_prompt: Optional[str] = None, + temperature: float = 0.7 + ) -> AsyncGenerator[str, None]: + """Stream chat response from Ollama""" + try: + if system_prompt: + messages.insert(0, {"role": "system", "content": system_prompt}) + + payload = { + "model": self.model, + "messages": messages, + "options": { + "temperature": temperature + }, + "stream": True + } + + async with self.client.stream( + "POST", + f"{self.base_url}/api/chat", + json=payload + ) as response: + response.raise_for_status() + async for line in response.aiter_lines(): + if line: + try: + data = json.loads(line) + if "message" in data and "content" in data["message"]: + yield data["message"]["content"] + except json.JSONDecodeError: + continue + + except httpx.RequestError as e: + logger.error(f"Request error streaming from Ollama: {e}") + raise Exception(f"Failed to stream from local LLM: {e}") + + async def generate_embedding(self, text: str) -> List[float]: + """Generate embeddings using Ollama (if supported by model)""" + try: + payload = { + "model": "nomic-embed-text", # Use embedding-specific model + "prompt": text + } + + response = await self.client.post( + f"{self.base_url}/api/embeddings", + json=payload + ) + response.raise_for_status() + + result = response.json() + return result.get("embedding", []) + + except httpx.RequestError as e: + logger.error(f"Request error getting embeddings from Ollama: {e}") + return [] + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error getting embeddings from Ollama: {e}") + return [] + + async def check_health(self) -> bool: + """Check if Ollama service is available""" + try: + response = await self.client.get(f"{self.base_url}/api/tags") + return response.status_code == 200 + except: + return False + + async def list_models(self) -> List[str]: + """List available models in Ollama""" + try: + response = await self.client.get(f"{self.base_url}/api/tags") + response.raise_for_status() + + result = response.json() + models = result.get("models", []) + return [model["name"] for model in models] + + except httpx.RequestError as e: + logger.error(f"Request error listing models from Ollama: {e}") + return [] + + async def close(self): + """Close the HTTP client""" + await self.client.aclose() + + +# Global instance +ollama_client = OllamaClient() \ No newline at end of file diff --git a/backend/ai_core/rag/vector_store.py b/backend/ai_core/rag/vector_store.py new file mode 100644 index 0000000..e736c58 --- /dev/null +++ b/backend/ai_core/rag/vector_store.py @@ -0,0 +1,241 @@ +import chromadb +from chromadb.config import Settings +from typing import List, Dict, Any, Optional, Tuple +import uuid +from pathlib import Path +from app.core.config import settings +import logging + +logger = logging.getLogger(__name__) + + +class VectorStore: + def __init__(self, persist_directory: str = None, collection_name: str = None): + self.persist_directory = persist_directory or str(settings.VECTOR_DB_DIR) + self.collection_name = collection_name or settings.VECTOR_COLLECTION_NAME + self.client = None + self.collection = None + self._initialize_client() + + def _initialize_client(self): + """Initialize ChromaDB client and collection""" + try: + # Create persistent client + self.client = chromadb.PersistentClient( + path=self.persist_directory, + settings=Settings(anonymized_telemetry=False) + ) + + # Get or create collection + self.collection = self.client.get_or_create_collection( + name=self.collection_name, + metadata={"hnsw:space": "cosine"} # Use cosine similarity + ) + + logger.info(f"Vector store initialized with collection: {self.collection_name}") + + except Exception as e: + logger.error(f"Failed to initialize vector store: {e}") + raise Exception(f"Could not initialize vector database: {e}") + + def add_documents( + self, + documents: List[str], + embeddings: List[List[float]], + metadatas: List[Dict[str, Any]], + ids: Optional[List[str]] = None + ) -> List[str]: + """Add documents with embeddings to the vector store""" + try: + if not documents or not embeddings: + return [] + + # Generate IDs if not provided + if ids is None: + ids = [str(uuid.uuid4()) for _ in documents] + + # Ensure all lists have the same length + if not (len(documents) == len(embeddings) == len(metadatas) == len(ids)): + raise ValueError("Documents, embeddings, metadatas, and ids must have the same length") + + # Add to collection + self.collection.add( + documents=documents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids + ) + + logger.info(f"Added {len(documents)} documents to vector store") + return ids + + except Exception as e: + logger.error(f"Failed to add documents to vector store: {e}") + raise Exception(f"Could not add documents to vector database: {e}") + + def search_similar( + self, + query_embedding: List[float], + n_results: int = 5, + where: Optional[Dict[str, Any]] = None, + include: List[str] = None + ) -> Dict[str, List[Any]]: + """Search for similar documents using embedding""" + try: + if include is None: + include = ["documents", "metadatas", "distances"] + + results = self.collection.query( + query_embeddings=[query_embedding], + n_results=n_results, + where=where, + include=include + ) + + # Flatten results since we only query with one embedding + flattened_results = {} + for key, values in results.items(): + if values and len(values) > 0: + flattened_results[key] = values[0] + else: + flattened_results[key] = [] + + return flattened_results + + except Exception as e: + logger.error(f"Failed to search vector store: {e}") + return {"documents": [], "metadatas": [], "distances": []} + + def search_by_text( + self, + query_text: str, + n_results: int = 5, + where: Optional[Dict[str, Any]] = None + ) -> Dict[str, List[Any]]: + """Search for similar documents using text query""" + try: + results = self.collection.query( + query_texts=[query_text], + n_results=n_results, + where=where, + include=["documents", "metadatas", "distances"] + ) + + # Flatten results + flattened_results = {} + for key, values in results.items(): + if values and len(values) > 0: + flattened_results[key] = values[0] + else: + flattened_results[key] = [] + + return flattened_results + + except Exception as e: + logger.error(f"Failed to search vector store by text: {e}") + return {"documents": [], "metadatas": [], "distances": []} + + def get_documents_by_ids(self, ids: List[str]) -> Dict[str, List[Any]]: + """Retrieve documents by their IDs""" + try: + results = self.collection.get( + ids=ids, + include=["documents", "metadatas"] + ) + return results + + except Exception as e: + logger.error(f"Failed to get documents by IDs: {e}") + return {"documents": [], "metadatas": []} + + def update_document( + self, + document_id: str, + document: Optional[str] = None, + embedding: Optional[List[float]] = None, + metadata: Optional[Dict[str, Any]] = None + ) -> bool: + """Update an existing document""" + try: + update_data = {"ids": [document_id]} + + if document is not None: + update_data["documents"] = [document] + if embedding is not None: + update_data["embeddings"] = [embedding] + if metadata is not None: + update_data["metadatas"] = [metadata] + + self.collection.update(**update_data) + logger.info(f"Updated document: {document_id}") + return True + + except Exception as e: + logger.error(f"Failed to update document {document_id}: {e}") + return False + + def delete_documents(self, ids: List[str]) -> bool: + """Delete documents by their IDs""" + try: + self.collection.delete(ids=ids) + logger.info(f"Deleted {len(ids)} documents from vector store") + return True + + except Exception as e: + logger.error(f"Failed to delete documents: {e}") + return False + + def delete_by_metadata(self, where: Dict[str, Any]) -> bool: + """Delete documents by metadata criteria""" + try: + self.collection.delete(where=where) + logger.info(f"Deleted documents matching criteria: {where}") + return True + + except Exception as e: + logger.error(f"Failed to delete documents by metadata: {e}") + return False + + def get_collection_stats(self) -> Dict[str, Any]: + """Get statistics about the collection""" + try: + count = self.collection.count() + return { + "collection_name": self.collection_name, + "total_documents": count, + "persist_directory": self.persist_directory + } + + except Exception as e: + logger.error(f"Failed to get collection stats: {e}") + return {} + + def clear_collection(self) -> bool: + """Clear all documents from the collection""" + try: + # Delete the collection and recreate it + self.client.delete_collection(self.collection_name) + self.collection = self.client.get_or_create_collection( + name=self.collection_name, + metadata={"hnsw:space": "cosine"} + ) + logger.info(f"Cleared collection: {self.collection_name}") + return True + + except Exception as e: + logger.error(f"Failed to clear collection: {e}") + return False + + def create_user_collection(self, user_id: int, collection_name: str = None) -> 'VectorStore': + """Create a user-specific collection""" + if collection_name is None: + collection_name = f"user_{user_id}_documents" + + return VectorStore( + persist_directory=self.persist_directory, + collection_name=collection_name + ) + + +# Global instance +vector_store = VectorStore() \ No newline at end of file diff --git a/backend/app/api/auth.py b/backend/app/api/auth.py new file mode 100644 index 0000000..1f0420b --- /dev/null +++ b/backend/app/api/auth.py @@ -0,0 +1,198 @@ +from fastapi import APIRouter, Depends, HTTPException, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from sqlalchemy.orm import Session +from datetime import timedelta +from pydantic import BaseModel, EmailStr +from app.db.database import get_db +from app.db.models import User +from app.core.security import verify_password, get_password_hash, create_access_token, decode_token +from app.core.config import settings +import logging + +logger = logging.getLogger(__name__) + +router = APIRouter() +security = HTTPBearer() + +# Pydantic models +class UserRegistration(BaseModel): + username: str + email: EmailStr + password: str + fullName: str = None + +class UserLogin(BaseModel): + username: str + password: str + +class UserResponse(BaseModel): + id: int + username: str + email: str + fullName: str = None + createdAt: str + + class Config: + from_attributes = True + +class TokenResponse(BaseModel): + access_token: str + token_type: str = "bearer" + user: UserResponse + +# Dependency to get current user +async def get_current_user( + credentials: HTTPAuthorizationCredentials = Depends(security), + db: Session = Depends(get_db) +) -> User: + """Get current authenticated user""" + token = credentials.credentials + username = decode_token(token) + + if username is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + + user = db.query(User).filter(User.username == username).first() + if user is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User not found", + headers={"WWW-Authenticate": "Bearer"}, + ) + + return user + +@router.post("/register", response_model=TokenResponse) +async def register_user(user_data: UserRegistration, db: Session = Depends(get_db)): + """Register a new user""" + try: + # Check if username already exists + existing_user = db.query(User).filter(User.username == user_data.username).first() + if existing_user: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Username already registered" + ) + + # Check if email already exists + existing_email = db.query(User).filter(User.email == user_data.email).first() + if existing_email: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Email already registered" + ) + + # Create new user + hashed_password = get_password_hash(user_data.password) + new_user = User( + username=user_data.username, + email=user_data.email, + hashed_password=hashed_password, + full_name=user_data.fullName + ) + + db.add(new_user) + db.commit() + db.refresh(new_user) + + # Create access token + access_token = create_access_token( + subject=new_user.username, + expires_delta=timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + ) + + logger.info(f"New user registered: {new_user.username}") + + return TokenResponse( + access_token=access_token, + user=UserResponse( + id=new_user.id, + username=new_user.username, + email=new_user.email, + fullName=new_user.full_name, + createdAt=new_user.created_at.isoformat() + ) + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Registration error: {e}") + db.rollback() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Registration failed" + ) + +@router.post("/login", response_model=TokenResponse) +async def login_user(login_data: UserLogin, db: Session = Depends(get_db)): + """Authenticate user and return token""" + try: + # Find user + user = db.query(User).filter(User.username == login_data.username).first() + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid username or password" + ) + + # Verify password + if not verify_password(login_data.password, user.hashed_password): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid username or password" + ) + + # Update last login + from datetime import datetime + user.last_login = datetime.utcnow() + db.commit() + + # Create access token + access_token = create_access_token( + subject=user.username, + expires_delta=timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + ) + + logger.info(f"User logged in: {user.username}") + + return TokenResponse( + access_token=access_token, + user=UserResponse( + id=user.id, + username=user.username, + email=user.email, + fullName=user.full_name, + createdAt=user.created_at.isoformat() + ) + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Login error: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Login failed" + ) + +@router.post("/logout") +async def logout_user(current_user: User = Depends(get_current_user)): + """Logout user (client-side token removal)""" + logger.info(f"User logged out: {current_user.username}") + return {"message": "Successfully logged out"} + +@router.get("/me", response_model=UserResponse) +async def get_current_user_info(current_user: User = Depends(get_current_user)): + """Get current user information""" + return UserResponse( + id=current_user.id, + username=current_user.username, + email=current_user.email, + fullName=current_user.full_name, + createdAt=current_user.created_at.isoformat() + ) \ No newline at end of file diff --git a/backend/app/core/config.py b/backend/app/core/config.py new file mode 100644 index 0000000..5da5182 --- /dev/null +++ b/backend/app/core/config.py @@ -0,0 +1,57 @@ +from pydantic_settings import BaseSettings +from typing import List +import os +from pathlib import Path + + +class Settings(BaseSettings): + # App Configuration + APP_NAME: str = "aPersona" + APP_VERSION: str = "1.0.0" + DEBUG: bool = True + API_V1_STR: str = "/api/v1" + + # Security + SECRET_KEY: str = "your-secret-key-change-in-production" + ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 # 8 days + ALGORITHM: str = "HS256" + + # Database + DATABASE_URL: str = "sqlite:///./apersona.db" + + # File Storage + UPLOAD_DIR: Path = Path("../data/uploads") + PROCESSED_DIR: Path = Path("../data/processed") + VECTOR_DB_DIR: Path = Path("../data/vectors") + MAX_FILE_SIZE: int = 100 * 1024 * 1024 # 100MB + + # AI Configuration + OLLAMA_BASE_URL: str = "http://localhost:11434" + DEFAULT_LLM_MODEL: str = "mistral" + EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" + VECTOR_COLLECTION_NAME: str = "apersona_documents" + + # CORS + BACKEND_CORS_ORIGINS: List[str] = [ + "http://localhost:3000", + "http://localhost:5173", + "http://127.0.0.1:3000", + "http://127.0.0.1:5173", + ] + + # Auto-Learning Configuration + LEARNING_UPDATE_INTERVAL: int = 3600 # 1 hour in seconds + MIN_INTERACTIONS_FOR_LEARNING: int = 10 + FEEDBACK_WEIGHT: float = 0.1 + + def __init__(self, **kwargs): + super().__init__(**kwargs) + # Create directories if they don't exist + for directory in [self.UPLOAD_DIR, self.PROCESSED_DIR, self.VECTOR_DB_DIR]: + directory.mkdir(parents=True, exist_ok=True) + + class Config: + env_file = ".env" + + +settings = Settings() \ No newline at end of file diff --git a/backend/app/core/security.py b/backend/app/core/security.py new file mode 100644 index 0000000..972d407 --- /dev/null +++ b/backend/app/core/security.py @@ -0,0 +1,45 @@ +from datetime import datetime, timedelta +from typing import Optional, Union, Any +from jose import jwt +from passlib.context import CryptContext +from app.core.config import settings + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + + +def create_access_token( + subject: Union[str, Any], expires_delta: Optional[timedelta] = None +) -> str: + """Create JWT access token""" + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta( + minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES + ) + + to_encode = {"exp": expire, "sub": str(subject)} + encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM) + return encoded_jwt + + +def verify_password(plain_password: str, hashed_password: str) -> bool: + """Verify a password against its hash""" + return pwd_context.verify(plain_password, hashed_password) + + +def get_password_hash(password: str) -> str: + """Generate password hash""" + return pwd_context.hash(password) + + +def decode_token(token: str) -> Optional[str]: + """Decode JWT token and return subject""" + try: + payload = jwt.decode( + token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM] + ) + token_data = payload.get("sub") + return token_data + except jwt.JWTError: + return None \ No newline at end of file diff --git a/backend/app/db/database.py b/backend/app/db/database.py new file mode 100644 index 0000000..e8a52f3 --- /dev/null +++ b/backend/app/db/database.py @@ -0,0 +1,22 @@ +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from app.core.config import settings + +engine = create_engine( + settings.DATABASE_URL, + connect_args={"check_same_thread": False} # Only for SQLite +) + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +Base = declarative_base() + + +def get_db(): + """Dependency to get database session""" + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/backend/app/db/models.py b/backend/app/db/models.py new file mode 100644 index 0000000..1a02aa9 --- /dev/null +++ b/backend/app/db/models.py @@ -0,0 +1,149 @@ +from sqlalchemy import Boolean, Column, Integer, String, DateTime, Text, Float, ForeignKey, JSON +from sqlalchemy.orm import relationship +from sqlalchemy.sql import func +from app.db.database import Base + + +class User(Base): + __tablename__ = "users" + + id = Column(Integer, primary_key=True, index=True) + username = Column(String, unique=True, index=True, nullable=False) + email = Column(String, unique=True, index=True, nullable=False) + hashed_password = Column(String, nullable=False) + full_name = Column(String, nullable=True) + is_active = Column(Boolean, default=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + last_login = Column(DateTime(timezone=True), nullable=True) + + # Relationships + files = relationship("UserFile", back_populates="owner") + interactions = relationship("UserInteraction", back_populates="user") + preferences = relationship("UserPreference", back_populates="user") + reminders = relationship("Reminder", back_populates="user") + + +class UserFile(Base): + __tablename__ = "user_files" + + id = Column(Integer, primary_key=True, index=True) + filename = Column(String, nullable=False) + original_name = Column(String, nullable=False) + file_path = Column(String, nullable=False) + file_type = Column(String, nullable=False) # pdf, txt, docx, image, etc. + file_size = Column(Integer, nullable=False) + mime_type = Column(String, nullable=True) + + # Content analysis + content_summary = Column(Text, nullable=True) + extracted_text = Column(Text, nullable=True) + categories = Column(JSON, nullable=True) # List of auto-detected categories + tags = Column(JSON, nullable=True) # User-defined tags + + # Metadata + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + last_accessed = Column(DateTime(timezone=True), nullable=True) + access_count = Column(Integer, default=0) + + # Relationships + owner_id = Column(Integer, ForeignKey("users.id")) + owner = relationship("User", back_populates="files") + + +class UserInteraction(Base): + __tablename__ = "user_interactions" + + id = Column(Integer, primary_key=True, index=True) + interaction_type = Column(String, nullable=False) # query, file_upload, search, etc. + query = Column(Text, nullable=True) + response = Column(Text, nullable=True) + context = Column(JSON, nullable=True) # Additional context data + + # Quality metrics + response_time = Column(Float, nullable=True) + user_feedback = Column(Integer, nullable=True) # -1, 0, 1 (negative, neutral, positive) + was_helpful = Column(Boolean, nullable=True) + + # Learning data + used_files = Column(JSON, nullable=True) # List of file IDs used in response + search_terms = Column(JSON, nullable=True) + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + user_id = Column(Integer, ForeignKey("users.id")) + user = relationship("User", back_populates="interactions") + + +class UserPreference(Base): + __tablename__ = "user_preferences" + + id = Column(Integer, primary_key=True, index=True) + preference_type = Column(String, nullable=False) # response_style, categories, etc. + preference_key = Column(String, nullable=False) + preference_value = Column(JSON, nullable=False) + confidence_score = Column(Float, default=0.5) # How confident we are about this preference + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # Relationships + user_id = Column(Integer, ForeignKey("users.id")) + user = relationship("User", back_populates="preferences") + + +class Reminder(Base): + __tablename__ = "reminders" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String, nullable=False) + description = Column(Text, nullable=True) + reminder_time = Column(DateTime(timezone=True), nullable=False) + is_completed = Column(Boolean, default=False) + is_recurring = Column(Boolean, default=False) + recurrence_pattern = Column(String, nullable=True) # daily, weekly, monthly + + # Context for AI suggestions + context_files = Column(JSON, nullable=True) # Related file IDs + auto_generated = Column(Boolean, default=False) # Was this generated by AI? + priority = Column(Integer, default=1) # 1-5 priority scale + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # Relationships + user_id = Column(Integer, ForeignKey("users.id")) + user = relationship("User", back_populates="reminders") + + +class LearningPattern(Base): + __tablename__ = "learning_patterns" + + id = Column(Integer, primary_key=True, index=True) + pattern_type = Column(String, nullable=False) # time_based, topic_based, etc. + pattern_data = Column(JSON, nullable=False) + confidence_score = Column(Float, default=0.0) + usage_count = Column(Integer, default=0) + success_rate = Column(Float, default=0.0) + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + # Relationships + user_id = Column(Integer, ForeignKey("users.id")) + + +class DocumentEmbedding(Base): + __tablename__ = "document_embeddings" + + id = Column(Integer, primary_key=True, index=True) + file_id = Column(Integer, ForeignKey("user_files.id")) + chunk_index = Column(Integer, nullable=False) # For large documents split into chunks + chunk_text = Column(Text, nullable=False) + embedding_id = Column(String, nullable=False) # ID in vector database + + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + # Relationships + file = relationship("UserFile") \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..22e92a4 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,221 @@ +from fastapi import FastAPI, Request, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.middleware.trustedhost import TrustedHostMiddleware +from fastapi.responses import JSONResponse +import time +import logging +from contextlib import asynccontextmanager + +from app.core.config import settings +from app.db.database import engine +from app.db.models import Base + +# Import routers +from app.api.auth import router as auth_router +# from app.api.files import router as files_router +# from app.api.chat import router as chat_router +# from app.api.reminders import router as reminders_router +# from app.api.search import router as search_router + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan context manager for startup and shutdown events""" + # Startup + logger.info("Starting aPersona backend...") + + # Create database tables + Base.metadata.create_all(bind=engine) + logger.info("Database tables created") + + # Initialize AI components + try: + from ai_core.embeddings.embedding_service import embedding_service + from ai_core.rag.vector_store import vector_store + from ai_core.llm.ollama_client import ollama_client + + # Test Ollama connection + is_healthy = await ollama_client.check_health() + if is_healthy: + logger.info("Ollama connection established") + else: + logger.warning("Ollama service not available - some features may be limited") + + # Initialize vector store + stats = vector_store.get_collection_stats() + logger.info(f"Vector store initialized: {stats}") + + # Test embedding service + embedding_info = embedding_service.get_model_info() + logger.info(f"Embedding service ready: {embedding_info}") + + except Exception as e: + logger.error(f"Failed to initialize AI components: {e}") + + yield + + # Shutdown + logger.info("Shutting down aPersona backend...") + try: + await ollama_client.close() + except: + pass + + +# Create FastAPI app +app = FastAPI( + title=settings.APP_NAME, + version=settings.APP_VERSION, + description="AI-powered personal assistant that works completely offline", + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=settings.BACKEND_CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Add trusted host middleware for security +app.add_middleware( + TrustedHostMiddleware, + allowed_hosts=["localhost", "127.0.0.1", "*.localhost"] +) + + +# Request timing middleware +@app.middleware("http") +async def add_process_time_header(request: Request, call_next): + """Add processing time to response headers""" + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + response.headers["X-Process-Time"] = str(process_time) + return response + + +# Global exception handler +@app.exception_handler(Exception) +async def global_exception_handler(request: Request, exc: Exception): + """Global exception handler for unhandled errors""" + logger.error(f"Unhandled error for {request.url}: {exc}") + return JSONResponse( + status_code=500, + content={ + "detail": "Internal server error", + "error": str(exc) if settings.DEBUG else "An unexpected error occurred" + } + ) + + +# Health check endpoint +@app.get("/health") +async def health_check(): + """Health check endpoint""" + try: + from ai_core.llm.ollama_client import ollama_client + ollama_healthy = await ollama_client.check_health() + + return { + "status": "healthy", + "app_name": settings.APP_NAME, + "version": settings.APP_VERSION, + "services": { + "database": "healthy", + "ollama": "healthy" if ollama_healthy else "unhealthy", + "embeddings": "healthy", + "vector_store": "healthy" + } + } + except Exception as e: + logger.error(f"Health check failed: {e}") + return JSONResponse( + status_code=503, + content={ + "status": "unhealthy", + "error": str(e) + } + ) + + +# Root endpoint +@app.get("/") +async def root(): + """Root endpoint""" + return { + "message": f"Welcome to {settings.APP_NAME}", + "version": settings.APP_VERSION, + "description": "AI-powered personal assistant - fully local and private", + "endpoints": { + "health": "/health", + "docs": "/docs", + "api": settings.API_V1_STR + } + } + + +# System info endpoint +@app.get(f"{settings.API_V1_STR}/system/info") +async def get_system_info(): + """Get system information and capabilities""" + try: + from ai_core.embeddings.embedding_service import embedding_service + from ai_core.rag.vector_store import vector_store + from ai_core.llm.ollama_client import ollama_client + + # Get AI service information + embedding_info = embedding_service.get_model_info() + vector_stats = vector_store.get_collection_stats() + available_models = await ollama_client.list_models() + + return { + "app_info": { + "name": settings.APP_NAME, + "version": settings.APP_VERSION, + "debug": settings.DEBUG + }, + "ai_services": { + "embedding_model": embedding_info, + "vector_store": vector_stats, + "available_llm_models": available_models, + "current_llm_model": settings.DEFAULT_LLM_MODEL + }, + "capabilities": { + "file_processing": [ + "PDF", "DOCX", "TXT", "Images (OCR)", + "Markdown", "PNG", "JPEG", "GIF" + ], + "ai_features": [ + "Semantic search", "Auto-categorization", + "Smart reminders", "Personalized responses", + "Learning from interactions" + ] + } + } + except Exception as e: + logger.error(f"Failed to get system info: {e}") + raise HTTPException(status_code=500, detail="Failed to retrieve system information") + + +# Include API routers +app.include_router(auth_router, prefix=f"{settings.API_V1_STR}/auth", tags=["authentication"]) +# app.include_router(files_router, prefix=f"{settings.API_V1_STR}/files", tags=["files"]) +# app.include_router(chat_router, prefix=f"{settings.API_V1_STR}/chat", tags=["chat"]) +# app.include_router(reminders_router, prefix=f"{settings.API_V1_STR}/reminders", tags=["reminders"]) +# app.include_router(search_router, prefix=f"{settings.API_V1_STR}/search", tags=["search"]) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run( + "app.main:app", + host="0.0.0.0", + port=8000, + reload=settings.DEBUG, + log_level="info" if not settings.DEBUG else "debug" + ) \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..302befc --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,41 @@ +# FastAPI and Web Server +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +python-multipart==0.0.6 + +# Database and ORM +sqlalchemy==2.0.23 +alembic==1.12.1 +sqlite3 + +# Authentication and Security +python-jose[cryptography]==3.3.0 +passlib[bcrypt]==1.7.4 +python-multipart==0.0.6 + +# AI and ML +torch==2.1.1 +transformers==4.35.2 +sentence-transformers==2.2.2 +chromadb==0.4.15 +ollama==0.1.8 +huggingface-hub==0.19.4 + +# File Processing +PyPDF2==3.0.1 +python-docx==1.1.0 +Pillow==10.1.0 +python-magic==0.4.27 + +# Utilities +pydantic==2.5.0 +python-dotenv==1.0.0 +httpx==0.25.2 +aiofiles==23.2.1 +schedule==1.2.0 + +# Development +pytest==7.4.3 +pytest-asyncio==0.21.1 +black==23.11.0 +isort==5.12.0 \ No newline at end of file diff --git a/data/embeddings_cache/.gitkeep b/data/embeddings_cache/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/processed/.gitkeep b/data/processed/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/uploads/.gitkeep b/data/uploads/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/vectors/.gitkeep b/data/vectors/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..8357e68 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,351 @@ +# aPersona System Architecture + +## Overview + +aPersona is a fully local, AI-powered personal assistant designed to work entirely offline while providing intelligent, context-aware assistance based on your personal files and behavior patterns. + +## Core Principles + +- **100% Local**: No data leaves your device +- **Privacy-First**: All processing happens on your machine +- **Adaptive Learning**: Continuously improves based on your interactions +- **Context-Aware**: Understands your personal documents and preferences + +## System Architecture + +### Backend (Python FastAPI) + +``` +backend/ +├── app/ +│ ├── api/ # REST API endpoints +│ ├── core/ # Core configuration and security +│ ├── db/ # Database models and connections +│ └── services/ # Business logic services +├── ai_core/ # AI/ML components +│ ├── embeddings/ # Text embedding service +│ ├── llm/ # Local LLM integration (Ollama) +│ ├── rag/ # Retrieval-Augmented Generation +│ └── auto_learning/ # Adaptive learning engine +└── requirements.txt +``` + +#### Key Components + +1. **FastAPI Application**: RESTful API server +2. **SQLAlchemy ORM**: Database management with SQLite +3. **Authentication**: JWT-based user authentication +4. **File Processing**: Multi-format document processing +5. **Vector Database**: ChromaDB for semantic search +6. **Local LLM**: Ollama integration for AI responses + +### Frontend (React + TypeScript) + +``` +frontend/ +├── src/ +│ ├── components/ # Reusable UI components +│ ├── pages/ # Page-level components +│ ├── services/ # API service layer +│ ├── store/ # State management (Zustand) +│ └── utils/ # Utility functions +├── index.html +└── package.json +``` + +#### Key Technologies + +1. **React 18**: Modern UI framework +2. **TypeScript**: Type-safe development +3. **TailwindCSS**: Utility-first styling +4. **Vite**: Fast build tool and dev server +5. **React Query**: Server state management +6. **Zustand**: Client state management + +### AI Core Components + +#### 1. Embedding Service (`ai_core/embeddings/`) + +- **Purpose**: Convert text to numerical vectors for semantic search +- **Model**: SentenceTransformers (all-MiniLM-L6-v2) +- **Features**: + - Caching for performance + - Batch processing + - Similarity computation + +#### 2. Vector Store (`ai_core/rag/`) + +- **Purpose**: Store and search document embeddings +- **Technology**: ChromaDB with persistent storage +- **Capabilities**: + - Semantic similarity search + - Metadata filtering + - User-specific collections + +#### 3. LLM Integration (`ai_core/llm/`) + +- **Purpose**: Local language model integration +- **Technology**: Ollama (supports Mistral, LLaMA, etc.) +- **Features**: + - Streaming responses + - Context management + - Error handling + +#### 4. File Processing (`ai_core/file_processing/`) + +- **Supported Formats**: PDF, DOCX, TXT, Images (OCR), Markdown +- **Features**: + - Content extraction + - Auto-categorization + - Metadata extraction + - Text chunking for embeddings + +## Auto-Learning System + +The auto-learning module is the heart of aPersona's intelligence, continuously adapting to user behavior and preferences. + +### Learning Components + +#### 1. Interaction Analysis + +```python +class LearningEngine: + async def analyze_user_interactions(self, user_id: int): + # Analyzes patterns in user queries and responses + - Frequency patterns + - Topic preferences + - Response quality metrics + - Search patterns + - Time-based usage patterns +``` + +#### 2. Preference Learning + +The system learns user preferences across multiple dimensions: + +- **Response Style**: Concise vs. detailed responses +- **Topic Interests**: Frequently discussed subjects +- **Time Patterns**: When user is most active +- **File Usage**: Most accessed documents + +#### 3. Adaptive Prompting + +```python +async def generate_personalized_prompt(self, user_id: int, base_prompt: str): + # Creates personalized system prompts based on learned preferences + - User's communication style + - Preferred response length + - Topic expertise areas + - Context preferences +``` + +#### 4. Proactive Suggestions + +The system generates intelligent suggestions: + +- **Reminder Optimization**: Suggests optimal reminder times +- **File Organization**: Proposes file organization improvements +- **Content Discovery**: Recommends related documents +- **Workflow Improvements**: Suggests process optimizations + +### Learning Data Flow + +```mermaid +graph TD + A[User Interaction] --> B[Store Interaction Data] + B --> C[Analyze Patterns] + C --> D[Update Preferences] + D --> E[Generate Personalized Prompts] + E --> F[Improve Responses] + F --> G[Collect Feedback] + G --> A +``` + +### Learning Metrics + +1. **Confidence Scores**: How certain the system is about preferences +2. **Success Rates**: Effectiveness of learned patterns +3. **Usage Counts**: Frequency of pattern application +4. **Feedback Integration**: User satisfaction incorporation + +## Data Storage + +### Database Schema + +#### Core Tables + +1. **Users**: User accounts and authentication +2. **UserFiles**: Uploaded files and metadata +3. **UserInteractions**: All user-AI interactions +4. **UserPreferences**: Learned user preferences +5. **LearningPatterns**: Detected behavioral patterns +6. **Reminders**: User reminders and notifications + +#### Vector Storage + +- **ChromaDB Collections**: Document embeddings with metadata +- **User-Specific Collections**: Isolated data per user +- **Embedding Cache**: Local cache for faster processing + +## Security & Privacy + +### Data Protection + +1. **Local Storage**: All data remains on user's device +2. **Encrypted Authentication**: JWT tokens with secure hashing +3. **No External APIs**: No cloud dependencies +4. **User Data Isolation**: Multi-user support with data separation + +### File Security + +1. **Access Controls**: User-based file access +2. **Secure Upload**: File validation and sanitization +3. **Safe Processing**: Sandboxed file processing +4. **Cleanup**: Temporary file management + +## RAG (Retrieval-Augmented Generation) System + +### How It Works + +1. **Document Ingestion**: + - Files are processed and chunked + - Text is converted to embeddings + - Metadata is extracted and stored + +2. **Query Processing**: + - User query is embedded + - Semantic search finds relevant chunks + - Context is assembled for LLM + +3. **Response Generation**: + - LLM receives query + relevant context + - Personalized prompts are applied + - Response is generated and returned + +4. **Learning Loop**: + - User feedback is collected + - Patterns are analyzed + - System adapts for future queries + +### Context Assembly + +```python +def assemble_context(query_embedding, user_preferences): + # Find relevant documents + relevant_docs = vector_store.search_similar(query_embedding) + + # Apply user preferences + context = personalize_context(relevant_docs, user_preferences) + + # Generate personalized prompt + system_prompt = generate_personalized_prompt(user_id, base_prompt) + + return context, system_prompt +``` + +## Performance Optimizations + +### Embedding Cache + +- Local caching of text embeddings +- Significant performance improvement for repeated content +- Automatic cache management + +### Batch Processing + +- Process multiple files simultaneously +- Batch embedding generation +- Efficient database operations + +### Background Tasks + +- Asynchronous file processing +- Background learning analysis +- Scheduled maintenance tasks + +## Deployment Architecture + +### Local Development + +```bash +# Backend +cd backend && python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +uvicorn app.main:app --reload + +# Frontend +cd frontend && npm install +npm run dev + +# AI Services +ollama serve +ollama pull mistral +ollama pull nomic-embed-text +``` + +### Production Deployment + +- **Containerization**: Docker support for easy deployment +- **Service Management**: Systemd service files +- **Automatic Updates**: Self-updating mechanisms +- **Backup System**: Automated data backups + +## Extending the System + +### Adding New File Types + +1. Implement processor in `ai_core/file_processing/` +2. Add MIME type mapping +3. Update file upload validation +4. Test with sample files + +### Adding New Learning Patterns + +1. Extend `LearningEngine` class +2. Add new pattern types +3. Implement analysis logic +4. Update preference storage + +### Custom LLM Integration + +1. Implement LLM client interface +2. Add configuration options +3. Update prompt generation +4. Test with target model + +## Monitoring & Analytics + +### System Health + +- AI service availability +- Database performance +- File processing status +- Memory and disk usage + +### User Analytics + +- Interaction frequency +- Learning effectiveness +- Feature usage patterns +- System performance metrics + +## Future Enhancements + +### Planned Features + +1. **Multi-modal Support**: Image understanding and generation +2. **Voice Interface**: Speech-to-text and text-to-speech +3. **Advanced Scheduling**: Calendar integration and smart scheduling +4. **Team Features**: Shared knowledge bases (while maintaining privacy) +5. **Mobile App**: Native mobile applications +6. **Plugin System**: Extensible plugin architecture + +### Research Areas + +1. **Federated Learning**: Improve models without data sharing +2. **Advanced RAG**: More sophisticated retrieval strategies +3. **Multi-agent Systems**: Specialized AI agents for different tasks +4. **Continuous Learning**: Real-time model adaptation + +This architecture ensures aPersona remains a powerful, private, and continuously improving personal AI assistant that truly understands and adapts to each user's unique needs and preferences. \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..4461c22 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + aPersona - AI Personal Assistant + + + + +
+ +
+
+
Loading aPersona...
+
+
+ + + \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..ee207d3 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,57 @@ +{ + "name": "apersona-frontend", + "private": true, + "version": "1.0.0", + "type": "module", + "description": "aPersona AI Assistant Frontend", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", + "preview": "vite preview" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-router-dom": "^6.19.0", + "axios": "^1.6.0", + "zustand": "^4.4.6", + "@tanstack/react-query": "^5.8.4", + "react-hook-form": "^7.47.0", + "@hookform/resolvers": "^3.3.2", + "zod": "^3.22.4", + "date-fns": "^2.30.0", + "lucide-react": "^0.294.0", + "react-dropzone": "^14.2.3", + "react-markdown": "^9.0.1", + "react-syntax-highlighter": "^15.5.0", + "recharts": "^2.8.0", + "sonner": "^1.2.4", + "clsx": "^2.0.0", + "tailwind-merge": "^2.0.0", + "@radix-ui/react-dialog": "^1.0.5", + "@radix-ui/react-dropdown-menu": "^2.0.6", + "@radix-ui/react-tabs": "^1.0.4", + "@radix-ui/react-toast": "^1.1.5", + "@radix-ui/react-tooltip": "^1.0.7", + "@radix-ui/react-avatar": "^1.0.4", + "@radix-ui/react-badge": "^1.0.4", + "@radix-ui/react-progress": "^1.0.3" + }, + "devDependencies": { + "@types/react": "^18.2.37", + "@types/react-dom": "^18.2.15", + "@types/react-syntax-highlighter": "^15.5.10", + "@typescript-eslint/eslint-plugin": "^6.10.0", + "@typescript-eslint/parser": "^6.10.0", + "@vitejs/plugin-react": "^4.1.1", + "autoprefixer": "^10.4.16", + "eslint": "^8.53.0", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.4", + "postcss": "^8.4.31", + "tailwindcss": "^3.3.5", + "typescript": "^5.2.2", + "vite": "^4.5.0" + } +} \ No newline at end of file diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx new file mode 100644 index 0000000..bc60a62 --- /dev/null +++ b/frontend/src/App.tsx @@ -0,0 +1,41 @@ +import { Routes, Route } from 'react-router-dom' +import { useQuery } from '@tanstack/react-query' +import Layout from './components/Layout' +import Dashboard from './pages/Dashboard' +import Chat from './pages/Chat' +import Files from './pages/Files' +import Reminders from './pages/Reminders' +import Settings from './pages/Settings' +import Login from './pages/Login' +import { useAuthStore } from './store/authStore' +import { api } from './services/api' + +function App() { + const { user, setUser } = useAuthStore() + + // Check system health on app load + const { data: systemInfo } = useQuery({ + queryKey: ['system-info'], + queryFn: api.getSystemInfo, + refetchInterval: 30000, // Check every 30 seconds + }) + + // If user is not authenticated, show login + if (!user) { + return + } + + return ( + + + } /> + } /> + } /> + } /> + } /> + + + ) +} + +export default App \ No newline at end of file diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx new file mode 100644 index 0000000..059f655 --- /dev/null +++ b/frontend/src/components/Layout.tsx @@ -0,0 +1,173 @@ +import React, { useState } from 'react' +import { Link, useLocation } from 'react-router-dom' +import { + Home, + MessageSquare, + Files, + Bell, + Settings, + Menu, + X, + Brain, + User, + LogOut +} from 'lucide-react' +import { useAuthStore } from '../store/authStore' +import { SystemInfo } from '../services/api' + +interface LayoutProps { + children: React.ReactNode + systemInfo?: SystemInfo +} + +const navigation = [ + { name: 'Dashboard', href: '/', icon: Home }, + { name: 'Chat', href: '/chat', icon: MessageSquare }, + { name: 'Files', href: '/files', icon: Files }, + { name: 'Reminders', href: '/reminders', icon: Bell }, + { name: 'Settings', href: '/settings', icon: Settings }, +] + +export default function Layout({ children, systemInfo }: LayoutProps) { + const [sidebarOpen, setSidebarOpen] = useState(false) + const location = useLocation() + const { user, logout } = useAuthStore() + + const handleLogout = () => { + logout() + window.location.reload() + } + + return ( +
+ {/* Sidebar */} +
+
+ {/* Logo and close button */} +
+
+ + aPersona +
+ +
+ + {/* Navigation */} + + + {/* System status */} + {systemInfo && ( +
+
System Status
+
+
+ LLM: + + {systemInfo.ai_services.current_llm_model} + +
+
+ Docs: + + {systemInfo.ai_services.vector_store?.total_documents || 0} + +
+
+
+ )} + + {/* User info */} +
+
+
+
+ +
+
+
{user?.username}
+
{user?.email}
+
+
+ +
+
+
+
+ + {/* Main content */} +
+ {/* Header */} +
+ + +
+ {/* Status indicators */} +
+
+
+ AI Online +
+
+
+ Local +
+
+
+ Private +
+
+
+
+ + {/* Page content */} +
+ {children} +
+
+ + {/* Mobile sidebar overlay */} + {sidebarOpen && ( +
setSidebarOpen(false)} + /> + )} +
+ ) +} \ No newline at end of file diff --git a/frontend/src/index.css b/frontend/src/index.css new file mode 100644 index 0000000..5d6d1a8 --- /dev/null +++ b/frontend/src/index.css @@ -0,0 +1,129 @@ +@import 'tailwindcss/base'; +@import 'tailwindcss/components'; +@import 'tailwindcss/utilities'; + +@layer base { + * { + @apply border-border; + } + + body { + @apply bg-background text-foreground; + font-feature-settings: 'rlig' 1, 'calt' 1; + } +} + +@layer components { + /* Custom component styles */ + .chat-message { + @apply p-4 rounded-lg mb-4 max-w-3xl; + } + + .chat-message.user { + @apply bg-primary text-primary-foreground ml-auto; + } + + .chat-message.assistant { + @apply bg-muted text-muted-foreground mr-auto; + } + + .file-upload-area { + @apply border-2 border-dashed border-border rounded-lg p-8 text-center cursor-pointer hover:border-primary transition-colors; + } + + .file-upload-area.dragover { + @apply border-primary bg-primary/10; + } + + .sidebar-item { + @apply flex items-center gap-3 px-3 py-2 rounded-md text-sm font-medium transition-colors hover:bg-accent hover:text-accent-foreground; + } + + .sidebar-item.active { + @apply bg-accent text-accent-foreground; + } + + .card { + @apply rounded-lg border bg-card text-card-foreground shadow-sm; + } + + .input { + @apply flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50; + } + + .button { + @apply inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50; + } + + .button-primary { + @apply button bg-primary text-primary-foreground hover:bg-primary/90 h-10 px-4 py-2; + } + + .button-secondary { + @apply button bg-secondary text-secondary-foreground hover:bg-secondary/80 h-10 px-4 py-2; + } + + .button-outline { + @apply button border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2; + } + + .button-ghost { + @apply button hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2; + } +} + +/* Custom scrollbar */ +.custom-scrollbar::-webkit-scrollbar { + width: 6px; +} + +.custom-scrollbar::-webkit-scrollbar-track { + background: hsl(var(--muted)); +} + +.custom-scrollbar::-webkit-scrollbar-thumb { + background: hsl(var(--muted-foreground)); + border-radius: 3px; +} + +.custom-scrollbar::-webkit-scrollbar-thumb:hover { + background: hsl(var(--foreground)); +} + +/* Animation for typing indicator */ +.typing-indicator { + display: inline-flex; + align-items: center; + gap: 2px; +} + +.typing-dot { + width: 6px; + height: 6px; + border-radius: 50%; + background-color: hsl(var(--muted-foreground)); + animation: typing 1.4s ease-in-out infinite; +} + +.typing-dot:nth-child(1) { + animation-delay: 0ms; +} + +.typing-dot:nth-child(2) { + animation-delay: 200ms; +} + +.typing-dot:nth-child(3) { + animation-delay: 400ms; +} + +@keyframes typing { + 0%, 60%, 100% { + transform: translateY(0); + opacity: 0.4; + } + 30% { + transform: translateY(-10px); + opacity: 1; + } +} \ No newline at end of file diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx new file mode 100644 index 0000000..d8ca70f --- /dev/null +++ b/frontend/src/main.tsx @@ -0,0 +1,43 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import { BrowserRouter } from 'react-router-dom' +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { Toaster } from 'sonner' +import App from './App.tsx' +import './index.css' + +// Create a query client for React Query +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: 1000 * 60 * 5, // 5 minutes + retry: 1, + }, + }, +}) + +// Remove loading screen after React app loads +const removeLoadingScreen = () => { + const loadingElement = document.getElementById('loading') + if (loadingElement) { + loadingElement.remove() + } +} + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + + + + + + , +) + +// Remove loading screen once React has mounted +setTimeout(removeLoadingScreen, 100) \ No newline at end of file diff --git a/frontend/src/pages/Chat.tsx b/frontend/src/pages/Chat.tsx new file mode 100644 index 0000000..84dc46b --- /dev/null +++ b/frontend/src/pages/Chat.tsx @@ -0,0 +1,10 @@ +import React from 'react' + +export default function Chat() { + return ( +
+

Chat

+

Chat with your AI assistant (coming soon)

+
+ ) +} \ No newline at end of file diff --git a/frontend/src/pages/Dashboard.tsx b/frontend/src/pages/Dashboard.tsx new file mode 100644 index 0000000..b7de14a --- /dev/null +++ b/frontend/src/pages/Dashboard.tsx @@ -0,0 +1,219 @@ +import React from 'react' +import { useQuery } from '@tanstack/react-query' +import { + MessageSquare, + Files, + Bell, + Activity, + Upload, + Search, + Plus +} from 'lucide-react' +import { Link } from 'react-router-dom' +import { api } from '../services/api' + +export default function Dashboard() { + const { data: usageStats } = useQuery({ + queryKey: ['usage-stats'], + queryFn: api.getUsageStats, + }) + + const { data: filesData } = useQuery({ + queryKey: ['files', 1, 5], + queryFn: () => api.getFiles(1, 5), + }) + + const { data: reminders } = useQuery({ + queryKey: ['reminders'], + queryFn: api.getReminders, + }) + + const { data: suggestions } = useQuery({ + queryKey: ['suggestions'], + queryFn: api.getProactiveSuggestions, + }) + + const stats = [ + { + name: 'Total Conversations', + value: usageStats?.total_conversations || 0, + icon: MessageSquare, + color: 'text-blue-500', + }, + { + name: 'Files Uploaded', + value: filesData?.total || 0, + icon: Files, + color: 'text-green-500', + }, + { + name: 'Active Reminders', + value: reminders?.filter((r: any) => !r.isCompleted).length || 0, + icon: Bell, + color: 'text-yellow-500', + }, + { + name: 'Learning Score', + value: Math.round((usageStats?.learning_score || 0) * 100), + icon: Activity, + color: 'text-purple-500', + }, + ] + + const quickActions = [ + { + name: 'Start Chat', + description: 'Ask me anything about your files', + href: '/chat', + icon: MessageSquare, + color: 'bg-blue-500', + }, + { + name: 'Upload Files', + description: 'Add documents for analysis', + href: '/files', + icon: Upload, + color: 'bg-green-500', + }, + { + name: 'Search', + description: 'Find content in your files', + href: '/files', + icon: Search, + color: 'bg-purple-500', + }, + { + name: 'Add Reminder', + description: 'Create a new reminder', + href: '/reminders', + icon: Plus, + color: 'bg-orange-500', + }, + ] + + return ( +
+ {/* Header */} +
+

Dashboard

+

+ Welcome back! Here's what's happening with your AI assistant. +

+
+ + {/* Stats Grid */} +
+ {stats.map((stat) => { + const Icon = stat.icon + return ( +
+
+
+

+ {stat.name} +

+

+ {stat.value} +

+
+ +
+
+ ) + })} +
+ + {/* Quick Actions */} +
+
+

Quick Actions

+

+ Get started with these common tasks +

+
+
+
+ {quickActions.map((action) => { + const Icon = action.icon + return ( + +
+ +
+
+

{action.name}

+

+ {action.description} +

+
+ + ) + })} +
+
+
+ + {/* Recent Activity */} +
+ {/* Recent Files */} +
+
+

Recent Files

+
+
+ {filesData?.files?.length > 0 ? ( +
+ {filesData.files.slice(0, 5).map((file: any) => ( +
+ +
+

+ {file.originalName} +

+

+ {new Date(file.createdAt).toLocaleDateString()} +

+
+
+ ))} +
+ ) : ( +

+ No files uploaded yet. Start by uploading some documents. +

+ )} +
+
+ + {/* AI Suggestions */} +
+
+

AI Suggestions

+
+
+ {suggestions?.length > 0 ? ( +
+ {suggestions.slice(0, 3).map((suggestion: any, index: number) => ( +
+

{suggestion.message}

+

+ Confidence: {Math.round(suggestion.confidence * 100)}% +

+
+ ))} +
+ ) : ( +

+ I'll provide personalized suggestions as you use the system more. +

+ )} +
+
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/pages/Files.tsx b/frontend/src/pages/Files.tsx new file mode 100644 index 0000000..6597e23 --- /dev/null +++ b/frontend/src/pages/Files.tsx @@ -0,0 +1,10 @@ +import React from 'react' + +export default function Files() { + return ( +
+

Files

+

Manage your uploaded files (coming soon)

+
+ ) +} \ No newline at end of file diff --git a/frontend/src/pages/Login.tsx b/frontend/src/pages/Login.tsx new file mode 100644 index 0000000..4e1a24e --- /dev/null +++ b/frontend/src/pages/Login.tsx @@ -0,0 +1,191 @@ +import React, { useState } from 'react' +import { Brain, Eye, EyeOff } from 'lucide-react' +import { useAuthStore } from '../store/authStore' +import { api } from '../services/api' +import { toast } from 'sonner' + +export default function Login() { + const [isLogin, setIsLogin] = useState(true) + const [showPassword, setShowPassword] = useState(false) + const [loading, setLoading] = useState(false) + const { setUser, setToken } = useAuthStore() + + const [formData, setFormData] = useState({ + username: '', + email: '', + password: '', + fullName: '', + }) + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + setLoading(true) + + try { + if (isLogin) { + const response = await api.login(formData.username, formData.password) + setToken(response.access_token) + setUser(response.user) + toast.success('Welcome back!') + } else { + const response = await api.register({ + username: formData.username, + email: formData.email, + password: formData.password, + fullName: formData.fullName, + }) + setToken(response.access_token) + setUser(response.user) + toast.success('Account created successfully!') + } + } catch (error: any) { + toast.error(error.response?.data?.detail || 'Authentication failed') + } finally { + setLoading(false) + } + } + + const handleInputChange = (e: React.ChangeEvent) => { + setFormData(prev => ({ + ...prev, + [e.target.name]: e.target.value + })) + } + + return ( +
+
+ {/* Logo and title */} +
+
+ +
+

+ {isLogin ? 'Welcome back' : 'Create account'} +

+

+ Your AI assistant that keeps everything local and private +

+
+ + {/* Form */} +
+
+ {!isLogin && ( +
+ + +
+ )} + +
+ + +
+ + {!isLogin && ( +
+ + +
+ )} + +
+ +
+ + +
+
+
+ +
+ +
+ +
+ +
+
+ + {/* Privacy notice */} +
+

🔒 All data is stored locally on your device

+

No cloud services • Complete privacy • Full control

+
+
+
+ ) +} \ No newline at end of file diff --git a/frontend/src/pages/Reminders.tsx b/frontend/src/pages/Reminders.tsx new file mode 100644 index 0000000..fdc48d9 --- /dev/null +++ b/frontend/src/pages/Reminders.tsx @@ -0,0 +1,10 @@ +import React from 'react' + +export default function Reminders() { + return ( +
+

Reminders

+

Manage your reminders and notifications (coming soon)

+
+ ) +} \ No newline at end of file diff --git a/frontend/src/pages/Settings.tsx b/frontend/src/pages/Settings.tsx new file mode 100644 index 0000000..20bb245 --- /dev/null +++ b/frontend/src/pages/Settings.tsx @@ -0,0 +1,10 @@ +import React from 'react' + +export default function Settings() { + return ( +
+

Settings

+

Configure your AI assistant settings (coming soon)

+
+ ) +} \ No newline at end of file diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts new file mode 100644 index 0000000..ff1e277 --- /dev/null +++ b/frontend/src/services/api.ts @@ -0,0 +1,235 @@ +import axios from 'axios' + +const BASE_URL = import.meta.env.VITE_API_URL || 'http://localhost:8000' + +// Create axios instance +const apiClient = axios.create({ + baseURL: BASE_URL, + timeout: 30000, +}) + +// Add auth token to requests +apiClient.interceptors.request.use((config) => { + const token = localStorage.getItem('auth-storage') + if (token) { + try { + const parsed = JSON.parse(token) + if (parsed.state?.token) { + config.headers.Authorization = `Bearer ${parsed.state.token}` + } + } catch (error) { + console.error('Failed to parse auth token:', error) + } + } + return config +}) + +// Handle auth errors +apiClient.interceptors.response.use( + (response) => response, + (error) => { + if (error.response?.status === 401) { + // Clear auth on 401 + localStorage.removeItem('auth-storage') + window.location.reload() + } + return Promise.reject(error) + } +) + +// Types +export interface SystemInfo { + app_info: { + name: string + version: string + debug: boolean + } + ai_services: { + embedding_model: any + vector_store: any + available_llm_models: string[] + current_llm_model: string + } + capabilities: { + file_processing: string[] + ai_features: string[] + } +} + +export interface ChatMessage { + id: string + role: 'user' | 'assistant' + content: string + timestamp: string + metadata?: any +} + +export interface FileInfo { + id: number + filename: string + originalName: string + fileType: string + fileSize: number + contentSummary?: string + categories: string[] + tags: string[] + createdAt: string + lastAccessed?: string +} + +export interface Reminder { + id: number + title: string + description?: string + reminderTime: string + isCompleted: boolean + isRecurring: boolean + priority: number + autoGenerated: boolean +} + +export interface SearchResult { + id: string + content: string + metadata: any + similarity: number + source: string +} + +// API methods +export const api = { + // System endpoints + async getHealth() { + const response = await apiClient.get('/health') + return response.data + }, + + async getSystemInfo(): Promise { + const response = await apiClient.get('/api/v1/system/info') + return response.data + }, + + // Auth endpoints + async login(username: string, password: string) { + const response = await apiClient.post('/api/v1/auth/login', { + username, + password, + }) + return response.data + }, + + async register(userData: { + username: string + email: string + password: string + fullName?: string + }) { + const response = await apiClient.post('/api/v1/auth/register', userData) + return response.data + }, + + async logout() { + await apiClient.post('/api/v1/auth/logout') + }, + + // Chat endpoints + async sendMessage(message: string): Promise<{ response: string; used_files?: string[] }> { + const response = await apiClient.post('/api/v1/chat/message', { + message, + }) + return response.data + }, + + async getChatHistory(): Promise { + const response = await apiClient.get('/api/v1/chat/history') + return response.data + }, + + async provideFeedback(messageId: string, feedback: number) { + await apiClient.post(`/api/v1/chat/feedback/${messageId}`, { + feedback, + }) + }, + + // File endpoints + async uploadFile(file: File, tags?: string[]) { + const formData = new FormData() + formData.append('file', file) + if (tags) { + formData.append('tags', JSON.stringify(tags)) + } + + const response = await apiClient.post('/api/v1/files/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }) + return response.data + }, + + async getFiles(page = 1, limit = 20, category?: string): Promise<{ files: FileInfo[]; total: number }> { + const params = new URLSearchParams({ + page: page.toString(), + limit: limit.toString() + }) + if (category) params.append('category', category) + + const response = await apiClient.get(`/api/v1/files?${params}`) + return response.data + }, + + async deleteFile(fileId: number) { + await apiClient.delete(`/api/v1/files/${fileId}`) + }, + + async updateFileTags(fileId: number, tags: string[]) { + const response = await apiClient.patch(`/api/v1/files/${fileId}/tags`, { + tags, + }) + return response.data + }, + + // Search endpoints + async searchFiles(query: string, limit = 10): Promise { + const response = await apiClient.get('/api/v1/search', { + params: { query, limit }, + }) + return response.data + }, + + // Reminders endpoints + async getReminders(): Promise { + const response = await apiClient.get('/api/v1/reminders') + return response.data + }, + + async createReminder(reminder: Omit) { + const response = await apiClient.post('/api/v1/reminders', reminder) + return response.data + }, + + async updateReminder(reminderId: number, updates: Partial) { + const response = await apiClient.patch(`/api/v1/reminders/${reminderId}`, updates) + return response.data + }, + + async deleteReminder(reminderId: number) { + await apiClient.delete(`/api/v1/reminders/${reminderId}`) + }, + + async getProactiveSuggestions() { + const response = await apiClient.get('/api/v1/suggestions') + return response.data + }, + + // Analytics endpoints + async getUsageStats() { + const response = await apiClient.get('/api/v1/analytics/usage') + return response.data + }, + + async getInteractionHistory() { + const response = await apiClient.get('/api/v1/analytics/interactions') + return response.data + }, +} \ No newline at end of file diff --git a/frontend/src/store/authStore.ts b/frontend/src/store/authStore.ts new file mode 100644 index 0000000..ba0b648 --- /dev/null +++ b/frontend/src/store/authStore.ts @@ -0,0 +1,42 @@ +import { create } from 'zustand' +import { persist } from 'zustand/middleware' + +interface User { + id: number + username: string + email: string + fullName?: string + createdAt: string +} + +interface AuthState { + user: User | null + token: string | null + setUser: (user: User) => void + setToken: (token: string) => void + logout: () => void + isAuthenticated: () => boolean +} + +export const useAuthStore = create()( + persist( + (set, get) => ({ + user: null, + token: null, + setUser: (user: User) => set({ user }), + setToken: (token: string) => set({ token }), + logout: () => set({ user: null, token: null }), + isAuthenticated: () => { + const { user, token } = get() + return !!(user && token) + }, + }), + { + name: 'auth-storage', + partialize: (state) => ({ + user: state.user, + token: state.token + }), + } + ) +) \ No newline at end of file diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js new file mode 100644 index 0000000..c88f56a --- /dev/null +++ b/frontend/tailwind.config.js @@ -0,0 +1,75 @@ +/** @type {import('tailwindcss').Config} */ +export default { + content: [ + "./index.html", + "./src/**/*.{js,ts,jsx,tsx}", + ], + theme: { + extend: { + colors: { + border: "hsl(var(--border))", + input: "hsl(var(--input))", + ring: "hsl(var(--ring))", + background: "hsl(var(--background))", + foreground: "hsl(var(--foreground))", + primary: { + DEFAULT: "hsl(var(--primary))", + foreground: "hsl(var(--primary-foreground))", + }, + secondary: { + DEFAULT: "hsl(var(--secondary))", + foreground: "hsl(var(--secondary-foreground))", + }, + destructive: { + DEFAULT: "hsl(var(--destructive))", + foreground: "hsl(var(--destructive-foreground))", + }, + muted: { + DEFAULT: "hsl(var(--muted))", + foreground: "hsl(var(--muted-foreground))", + }, + accent: { + DEFAULT: "hsl(var(--accent))", + foreground: "hsl(var(--accent-foreground))", + }, + popover: { + DEFAULT: "hsl(var(--popover))", + foreground: "hsl(var(--popover-foreground))", + }, + card: { + DEFAULT: "hsl(var(--card))", + foreground: "hsl(var(--card-foreground))", + }, + }, + borderRadius: { + lg: "var(--radius)", + md: "calc(var(--radius) - 2px)", + sm: "calc(var(--radius) - 4px)", + }, + fontFamily: { + sans: ["Inter", "system-ui", "sans-serif"], + mono: ["JetBrains Mono", "monospace"], + }, + animation: { + "fade-in": "fadeIn 0.5s ease-in-out", + "slide-up": "slideUp 0.3s ease-out", + "pulse-subtle": "pulseSubtle 2s infinite", + }, + keyframes: { + fadeIn: { + "0%": { opacity: "0" }, + "100%": { opacity: "1" }, + }, + slideUp: { + "0%": { transform: "translateY(10px)", opacity: "0" }, + "100%": { transform: "translateY(0)", opacity: "1" }, + }, + pulseSubtle: { + "0%, 100%": { opacity: "1" }, + "50%": { opacity: "0.8" }, + }, + }, + }, + }, + plugins: [], +} \ No newline at end of file diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000..461c2df --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,36 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react-jsx", + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, + + /* Path mapping */ + "baseUrl": ".", + "paths": { + "@/*": ["./src/*"] + }, + + /* Additional type checking */ + "allowSyntheticDefaultImports": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src"], + "references": [{ "path": "./tsconfig.node.json" }] +} \ No newline at end of file diff --git a/frontend/tsconfig.node.json b/frontend/tsconfig.node.json new file mode 100644 index 0000000..862dfb2 --- /dev/null +++ b/frontend/tsconfig.node.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "bundler", + "allowSyntheticDefaultImports": true + }, + "include": ["vite.config.ts"] +} \ No newline at end of file diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts new file mode 100644 index 0000000..d9f4b36 --- /dev/null +++ b/frontend/vite.config.ts @@ -0,0 +1,26 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' +import path from 'path' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [react()], + resolve: { + alias: { + '@': path.resolve(__dirname, './src'), + }, + }, + server: { + port: 3000, + proxy: { + '/api': { + target: 'http://localhost:8000', + changeOrigin: true, + }, + }, + }, + build: { + outDir: 'dist', + sourcemap: true, + }, +}) \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..40d563c --- /dev/null +++ b/setup.sh @@ -0,0 +1,225 @@ +#!/bin/bash + +# aPersona Setup Script +# This script helps you set up the aPersona AI assistant locally + +set -e + +echo "🤖 Welcome to aPersona Setup!" +echo "==========================================" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if Python 3.11+ is installed +check_python() { + print_status "Checking Python installation..." + if command -v python3 &> /dev/null; then + python_version=$(python3 --version | cut -d' ' -f2) + major_version=$(echo $python_version | cut -d'.' -f1) + minor_version=$(echo $python_version | cut -d'.' -f2) + + if [ "$major_version" -eq 3 ] && [ "$minor_version" -ge 11 ]; then + print_success "Python $python_version found" + else + print_error "Python 3.11+ required. Found Python $python_version" + exit 1 + fi + else + print_error "Python 3 not found. Please install Python 3.11+" + exit 1 + fi +} + +# Check if Node.js 18+ is installed +check_node() { + print_status "Checking Node.js installation..." + if command -v node &> /dev/null; then + node_version=$(node --version | cut -d'v' -f2) + major_version=$(echo $node_version | cut -d'.' -f1) + + if [ "$major_version" -ge 18 ]; then + print_success "Node.js $node_version found" + else + print_error "Node.js 18+ required. Found Node.js $node_version" + exit 1 + fi + else + print_error "Node.js not found. Please install Node.js 18+" + exit 1 + fi +} + +# Check if Ollama is installed +check_ollama() { + print_status "Checking Ollama installation..." + if command -v ollama &> /dev/null; then + print_success "Ollama found" + + # Check if Ollama service is running + if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then + print_success "Ollama service is running" + else + print_warning "Ollama service is not running. Please start it with: ollama serve" + fi + else + print_warning "Ollama not found. Installing Ollama..." + curl -fsSL https://ollama.ai/install.sh | sh + print_success "Ollama installed. Please start it with: ollama serve" + fi +} + +# Setup Python backend +setup_backend() { + print_status "Setting up Python backend..." + + cd backend + + # Create virtual environment if it doesn't exist + if [ ! -d "venv" ]; then + print_status "Creating Python virtual environment..." + python3 -m venv venv + print_success "Virtual environment created" + fi + + # Activate virtual environment + source venv/bin/activate + + # Install requirements + print_status "Installing Python dependencies..." + pip install --upgrade pip + pip install -r requirements.txt + + print_success "Backend dependencies installed" + + cd .. +} + +# Setup React frontend +setup_frontend() { + print_status "Setting up React frontend..." + + cd frontend + + # Install npm dependencies + print_status "Installing Node.js dependencies..." + npm install + + print_success "Frontend dependencies installed" + + cd .. +} + +# Create necessary directories +create_directories() { + print_status "Creating data directories..." + + mkdir -p data/uploads + mkdir -p data/processed + mkdir -p data/vectors + mkdir -p data/embeddings_cache + + print_success "Data directories created" +} + +# Install Ollama models +install_models() { + print_status "Installing AI models..." + + if command -v ollama &> /dev/null; then + print_status "Downloading Mistral model (this may take a while)..." + ollama pull mistral + + print_status "Downloading embedding model..." + ollama pull nomic-embed-text + + print_success "AI models installed" + else + print_warning "Ollama not available. Please install models manually after setting up Ollama" + fi +} + +# Create environment file +create_env() { + print_status "Creating environment configuration..." + + if [ ! -f "backend/.env" ]; then + cat > backend/.env << EOF +# aPersona Environment Configuration + +# Security +SECRET_KEY=your-secret-key-change-in-production-$(openssl rand -hex 32) + +# Database +DATABASE_URL=sqlite:///./apersona.db + +# AI Services +OLLAMA_BASE_URL=http://localhost:11434 +DEFAULT_LLM_MODEL=mistral +EMBEDDING_MODEL=all-MiniLM-L6-v2 + +# Development +DEBUG=true +EOF + print_success "Environment file created" + else + print_warning "Environment file already exists" + fi +} + +# Main setup function +main() { + echo "Starting aPersona setup process..." + echo "" + + # System checks + check_python + check_node + check_ollama + + echo "" + + # Setup components + create_directories + create_env + setup_backend + setup_frontend + install_models + + echo "" + echo "==========================================" + print_success "aPersona setup completed successfully!" + echo "" + echo "📋 Next steps:" + echo " 1. Start Ollama service: ollama serve" + echo " 2. Start the backend: cd backend && source venv/bin/activate && uvicorn app.main:app --reload" + echo " 3. Start the frontend: cd frontend && npm run dev" + echo " 4. Open http://localhost:3000 in your browser" + echo "" + echo "💡 For more information, check the README.md file" + echo "🔒 Your data stays completely local and private!" +} + +# Run main function +main \ No newline at end of file