Spaces:

fokan
/

train-modle

Running

File size: 73,138 Bytes

"""
Multi-Modal Knowledge Distillation Web Application

A FastAPI-based web application for creating new AI models through knowledge distillation
from multiple pre-trained models across different modalities.
"""

import os
import asyncio
import logging
import uuid
from typing import List, Dict, Any, Optional, Union
from pathlib import Path
import json
import shutil
from datetime import datetime

from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks, WebSocket, WebSocketDisconnect, Request
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
import uvicorn

from src.model_loader import ModelLoader
from src.distillation import KnowledgeDistillationTrainer
from src.utils import setup_logging, validate_file, cleanup_temp_files, get_system_info

# Import new core components
from src.core.memory_manager import AdvancedMemoryManager
from src.core.chunk_loader import AdvancedChunkLoader
from src.core.cpu_optimizer import CPUOptimizer
from src.core.token_manager import TokenManager

# Import medical components
from src.medical.medical_datasets import MedicalDatasetManager
from src.medical.dicom_handler import DicomHandler
from src.medical.medical_preprocessing import MedicalPreprocessor

# Import database components
from database.database import DatabaseManager
from src.database_manager import DatabaseManager as PlatformDatabaseManager
from src.models_manager import ModelsManager

# Setup logging with error handling
try:
    setup_logging()
    logger = logging.getLogger(__name__)
except Exception as e:
    # Fallback to basic logging if setup fails
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    logger.warning(f"Failed to setup advanced logging: {e}")

# Custom JSON encoder for handling Path objects and other non-serializable types
class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Path):
            return str(obj)
        elif hasattr(obj, '__dict__'):
            return obj.__dict__
        elif hasattr(obj, 'tolist'):  # For numpy arrays
            return obj.tolist()
        elif hasattr(obj, 'detach'):  # For PyTorch tensors
            return obj.detach().cpu().numpy().tolist()
        return super().default(obj)

def safe_json_serialize(data):
    """Safely serialize data to JSON, handling non-serializable objects"""
    try:
        return json.loads(json.dumps(data, cls=CustomJSONEncoder))
    except Exception as e:
        logger.warning(f"Failed to serialize data: {e}")
        # Return a safe version
        if isinstance(data, dict):
            safe_data = {}
            for k, v in data.items():
                try:
                    json.dumps(v, cls=CustomJSONEncoder)
                    safe_data[k] = v
                except:
                    safe_data[k] = str(v)
            return safe_data
        else:
            return str(data)

def cleanup_training_session(session_id: str):
    """Clean up training session resources"""
    try:
        if session_id in training_sessions:
            session = training_sessions[session_id]

            # Clean up any temporary files
            model_path = session.get("model_path")
            if model_path and Path(model_path).exists():
                try:
                    shutil.rmtree(model_path)
                    logger.info(f"Cleaned up model files for session {session_id}")
                except Exception as e:
                    logger.warning(f"Failed to clean up model files: {e}")

            # Remove from active sessions
            del training_sessions[session_id]

            # Remove WebSocket connection if exists
            if session_id in active_connections:
                del active_connections[session_id]

            logger.info(f"Cleaned up training session: {session_id}")

    except Exception as e:
        logger.error(f"Error cleaning up session {session_id}: {e}")

def cleanup_old_sessions():
    """Clean up old completed or failed sessions"""
    try:
        current_time = datetime.now().timestamp()
        sessions_to_remove = []

        for session_id, session in training_sessions.items():
            session_status = session.get("status", "unknown")
            end_time = session.get("end_time")

            # Remove sessions older than 1 hour if completed/failed
            if session_status in ["completed", "failed", "cancelled"] and end_time:
                if current_time - end_time > 3600:  # 1 hour
                    sessions_to_remove.append(session_id)

        for session_id in sessions_to_remove:
            cleanup_training_session(session_id)
            logger.info(f"Auto-cleaned old session: {session_id}")

    except Exception as e:
        logger.error(f"Error during automatic cleanup: {e}")

# Initialize FastAPI app
app = FastAPI(
    title="Multi-Modal Knowledge Distillation",
    description="Create new AI models through knowledge distillation from multiple pre-trained models",
    version="2.1.0",
    docs_url="/docs",
    redoc_url="/redoc"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount static files and templates
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")

# Global variables for tracking training sessions
training_sessions: Dict[str, Dict[str, Any]] = {}
active_connections: Dict[str, WebSocket] = {}

# Startup event to clean old sessions
@app.on_event("startup")
async def startup_event():
    """Initialize application and clean up old sessions"""
    try:
        logger.info("Starting Multi-Modal Knowledge Distillation Platform")

        # Clean up any old sessions from previous runs
        cleanup_old_sessions()

        # Initialize core components
        logger.info("Initializing core components...")

        # Log system information
        system_info = get_system_info()
        logger.info(f"System Info: {system_info}")

        logger.info("Application startup completed successfully")

    except Exception as e:
        logger.error(f"Error during startup: {e}")

# Shutdown event to clean up resources
@app.on_event("shutdown")
async def shutdown_event():
    """Clean up resources on shutdown"""
    try:
        logger.info("Shutting down application...")

        # Clean up all active sessions
        for session_id in list(training_sessions.keys()):
            cleanup_training_session(session_id)

        # Clean up temporary files
        cleanup_temp_files()

        logger.info("Application shutdown completed")

    except Exception as e:
        logger.error(f"Error during shutdown: {e}")

# Pydantic models for API
class TrainingConfig(BaseModel):
    session_id: str = Field(..., description="Unique session identifier")
    teacher_models: List[Union[str, Dict[str, Any]]] = Field(..., description="List of teacher model paths/URLs or model configs")
    student_config: Dict[str, Any] = Field(default_factory=dict, description="Student model configuration")
    training_params: Dict[str, Any] = Field(default_factory=dict, description="Training parameters")
    distillation_strategy: str = Field(default="ensemble", description="Distillation strategy")
    hf_token: Optional[str] = Field(default=None, description="Hugging Face token")
    trust_remote_code: bool = Field(default=False, description="Trust remote code execution")
    existing_student_model: Optional[str] = Field(default=None, description="Path to existing trained student model for retraining")
    incremental_training: bool = Field(default=False, description="Whether this is incremental training")

class TrainingStatus(BaseModel):
    session_id: str
    status: str
    progress: float
    current_step: int
    total_steps: int
    loss: Optional[float] = None
    eta: Optional[str] = None
    message: str = ""

class ModelInfo(BaseModel):
    name: str
    size: int
    format: str
    modality: str
    architecture: Optional[str] = None

class DatabaseInfo(BaseModel):
    name: str
    name_ar: Optional[str] = ""
    dataset_id: str
    category: str = "general"
    description: str = ""
    description_ar: Optional[str] = ""
    size: Optional[str] = "Unknown"
    language: Optional[str] = "Unknown"
    modality: str = "text"
    license: Optional[str] = "Unknown"

class DatabaseSearchRequest(BaseModel):
    query: str
    limit: int = 20
    category: Optional[str] = None

class DatabaseSelectionRequest(BaseModel):
    database_ids: List[str]

class ModelSearchRequest(BaseModel):
    query: str
    limit: int = 20
    model_type: Optional[str] = None

class ModelSelectionRequest(BaseModel):
    teacher_models: List[str] = []
    student_model: Optional[str] = None

# Initialize components
model_loader = ModelLoader()
distillation_trainer = KnowledgeDistillationTrainer()

# Initialize new advanced components
memory_manager = AdvancedMemoryManager(max_memory_gb=14.0)  # 14GB for 16GB systems
chunk_loader = AdvancedChunkLoader(memory_manager)
cpu_optimizer = CPUOptimizer(memory_manager)
token_manager = TokenManager()

# Initialize database manager
platform_db_manager = PlatformDatabaseManager()

# Initialize models manager
models_manager = ModelsManager()
database_manager = DatabaseManager()

# Initialize medical components
medical_dataset_manager = MedicalDatasetManager(memory_manager)
dicom_handler = DicomHandler(memory_limit_mb=1000.0)
medical_preprocessor = MedicalPreprocessor()

@app.on_event("startup")
async def startup_event():
    """Initialize application on startup"""
    logger.info("Starting Multi-Modal Knowledge Distillation application")

    # Create necessary directories with error handling
    for directory in ["uploads", "models", "temp", "logs"]:
        try:
            Path(directory).mkdir(exist_ok=True)
            logger.info(f"Created/verified directory: {directory}")
        except PermissionError:
            logger.warning(f"Cannot create directory {directory}, using temp directory")
        except Exception as e:
            logger.warning(f"Error creating directory {directory}: {e}")

    # Log system information
    try:
        system_info = get_system_info()
        logger.info(f"System info: {system_info}")
    except Exception as e:
        logger.warning(f"Could not get system info: {e}")

@app.on_event("shutdown")
async def shutdown_event():
    """Cleanup on application shutdown"""
    logger.info("Shutting down application")
    cleanup_temp_files()

@app.get("/", response_class=HTMLResponse)
async def read_root():
    """Serve the main web interface"""
    return templates.TemplateResponse("index.html", {"request": {}})

@app.get("/health")
async def health_check():
    """Health check endpoint for Docker and monitoring"""
    try:
        # Get system information
        memory_info = memory_manager.get_memory_info()

        # Check if default token is available
        default_token = token_manager.get_token()

        return {
            "status": "healthy",
            "version": "2.0.0",
            "timestamp": datetime.now().isoformat(),
            "memory": {
                "usage_percent": memory_info.get("process_memory_percent", 0),
                "available_gb": memory_info.get("system_memory_available_gb", 0),
                "status": memory_manager.check_memory_status()
            },
            "tokens": {
                "default_available": bool(default_token),
                "total_tokens": len(token_manager.list_tokens())
            },
            "features": {
                "memory_management": True,
                "chunk_loading": True,
                "cpu_optimization": True,
                "medical_datasets": True,
                "token_management": True
            },
            "system_info": get_system_info()
        }
    except Exception as e:
        logger.error(f"Health check failed: {e}")
        return {
            "status": "unhealthy",
            "error": str(e),
            "timestamp": datetime.now().isoformat(),
            "version": "2.0.0"
        }

@app.get("/test-token")
async def test_token():
    """Test if HF token is working"""
    hf_token = (
        os.getenv('HF_TOKEN') or
        os.getenv('HUGGINGFACE_TOKEN') or
        os.getenv('HUGGINGFACE_HUB_TOKEN')
    )

    if not hf_token:
        return {
            "token_available": False,
            "message": "No HF token found in environment variables"
        }

    try:
        # Test token by trying to access a gated model's config
        from transformers import AutoConfig
        config = AutoConfig.from_pretrained("google/gemma-2b", token=hf_token)
        return {
            "token_available": True,
            "token_valid": True,
            "message": "Token is working correctly"
        }
    except Exception as e:
        return {
            "token_available": True,
            "token_valid": False,
            "message": f"Token validation failed: {str(e)}"
        }

@app.post("/test-model")
async def test_model_loading(request: Dict[str, Any]):
    """Test loading a specific model"""
    try:
        model_path = request.get('model_path')
        trust_remote_code = request.get('trust_remote_code', False)

        if not model_path:
            return {"success": False, "error": "model_path is required"}

        # Get appropriate token based on access type
        access_type = request.get('access_type', 'read')
        hf_token = request.get('token')

        if not hf_token or hf_token == 'auto':
            # Get appropriate token for the access type
            hf_token = token_manager.get_token_for_task(access_type)
            if hf_token:
                logger.info(f"Using {access_type} token for model testing")
            else:
                logger.warning(f"No suitable token found for {access_type} access")
                # Fallback to environment variables
                hf_token = (
                    os.getenv('HF_TOKEN') or
                    os.getenv('HUGGINGFACE_TOKEN') or
                    os.getenv('HUGGINGFACE_HUB_TOKEN')
                )

        # Test model loading
        model_info = await model_loader.get_model_info(model_path)

        return {
            "success": True,
            "model_info": model_info,
            "message": f"Model {model_path} can be loaded"
        }

    except Exception as e:
        error_msg = str(e)
        suggestions = []

        if 'trust_remote_code' in error_msg.lower():
            suggestions.append("فعّل 'Trust Remote Code' للنماذج التي تتطلب كود مخصص")
        elif 'gated' in error_msg.lower():
            suggestions.append("النموذج يتطلب إذن وصول خاص - استخدم رمز مخصص")
        elif 'siglip' in error_msg.lower():
            suggestions.append("جرب تفعيل 'Trust Remote Code' لنماذج SigLIP")
        elif '401' in error_msg or 'authentication' in error_msg.lower():
            suggestions.append("تحقق من رمز Hugging Face الخاص بك")
            suggestions.append("تأكد من أن الرمز له صلاحية الوصول لهذا النموذج")
        elif '404' in error_msg or 'not found' in error_msg.lower():
            suggestions.append("تحقق من اسم مستودع النموذج")
            suggestions.append("تأكد من وجود النموذج على Hugging Face")

        return {
            "success": False,
            "error": error_msg,
            "suggestions": suggestions
        }

@app.post("/upload", response_model=Dict[str, Any])
async def upload_model(
    background_tasks: BackgroundTasks,
    files: List[UploadFile] = File(...),
    model_names: List[str] = Form(...)
):
    """Upload model files"""
    try:
        uploaded_models = []
        
        for file, name in zip(files, model_names):
            # Validate file
            validation_result = validate_file(file)
            if not validation_result["valid"]:
                raise HTTPException(status_code=400, detail=validation_result["error"])
            
            # Generate unique filename
            file_id = str(uuid.uuid4())
            file_extension = Path(file.filename).suffix
            safe_filename = f"{file_id}{file_extension}"
            file_path = Path("uploads") / safe_filename
            
            # Save file
            with open(file_path, "wb") as buffer:
                content = await file.read()
                buffer.write(content)
            
            # Get model info
            model_info = await model_loader.get_model_info(str(file_path))
            
            uploaded_models.append({
                "id": file_id,
                "name": name,
                "filename": file.filename,
                "path": str(file_path),
                "size": len(content),
                "info": model_info
            })
            
            logger.info(f"Uploaded model: {name} ({file.filename})")
        
        # Schedule cleanup of old files
        background_tasks.add_task(cleanup_temp_files, max_age_hours=24)
        
        return {
            "success": True,
            "models": uploaded_models,
            "message": f"Successfully uploaded {len(uploaded_models)} model(s)"
        }
        
    except Exception as e:
        logger.error(f"Error uploading models: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/start-training", response_model=Dict[str, Any])
async def start_training(
    background_tasks: BackgroundTasks,
    config: TrainingConfig
):
    """Start knowledge distillation training"""
    try:
        session_id = config.session_id

        # Handle existing sessions
        if session_id in training_sessions:
            existing_session = training_sessions[session_id]
            existing_status = existing_session.get("status", "unknown")

            # Allow restarting failed or completed sessions
            if existing_status in ["failed", "completed", "cancelled"]:
                logger.info(f"Restarting session {session_id} (previous status: {existing_status})")
                # Clean up old session
                cleanup_training_session(session_id)
            elif existing_status in ["running", "initializing"]:
                raise HTTPException(
                    status_code=400,
                    detail=f"Training session already running (status: {existing_status})"
                )
            else:
                # Unknown status, clean up and restart
                logger.warning(f"Unknown session status {existing_status}, cleaning up")
                cleanup_training_session(session_id)

        # Set HF token from environment if available
        hf_token = os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_TOKEN')
        if hf_token:
            os.environ['HF_TOKEN'] = hf_token
            logger.info("Using Hugging Face token from environment")

        # Check for large models and warn
        large_models = []
        for model_info in config.teacher_models:
            model_path = model_info if isinstance(model_info, str) else model_info.get('path', '')
            if any(size_indicator in model_path.lower() for size_indicator in ['27b', '70b', '13b']):
                large_models.append(model_path)

        # Initialize training session with safe config serialization
        safe_config = safe_json_serialize(config.dict())
        training_sessions[session_id] = {
            "status": "initializing",
            "progress": 0.0,
            "current_step": 0,
            "total_steps": config.training_params.get("max_steps", 1000),
            "config": safe_config,
            "start_time": None,
            "end_time": None,
            "model_path": None,
            "logs": [],
            "large_models": large_models,
            "message": "Initializing training session..." + (
                f" (Large models detected: {', '.join(large_models)})" if large_models else ""
            )
        }

        # Start training in background
        background_tasks.add_task(run_training, session_id, config)

        logger.info(f"Started training session: {session_id}")

        return {
            "success": True,
            "session_id": session_id,
            "message": "Training started successfully"
        }

    except Exception as e:
        logger.error(f"Error starting training: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))

async def run_training(session_id: str, config: TrainingConfig):
    """Run knowledge distillation training in background"""
    try:
        session = training_sessions[session_id]
        session["status"] = "running"
        session["start_time"] = asyncio.get_event_loop().time()

        # Set timeout for the entire operation (30 minutes)
        timeout_seconds = 30 * 60

        # Set HF token for this session - prioritize config token
        config_token = getattr(config, 'hf_token', None)
        env_token = (
            os.getenv('HF_TOKEN') or
            os.getenv('HUGGINGFACE_TOKEN') or
            os.getenv('HUGGINGFACE_HUB_TOKEN')
        )

        hf_token = config_token or env_token

        if hf_token:
            logger.info(f"Using Hugging Face token from {'config' if config_token else 'environment'}")
            # Set token in environment for this session
            os.environ['HF_TOKEN'] = hf_token
        else:
            logger.warning("No Hugging Face token found - private models may fail")

        # Handle existing student model for incremental training
        existing_student = None
        if config.existing_student_model and config.incremental_training:
            try:
                await update_training_status(session_id, "loading_student", 0.05, "Loading existing student model...")

                # Determine student source and load accordingly
                student_source = getattr(config, 'student_source', 'local')
                student_path = config.existing_student_model

                if student_source == 'huggingface' or ('/' in student_path and not Path(student_path).exists()):
                    logger.info(f"Loading student model from Hugging Face: {student_path}")
                    existing_student = await model_loader.load_trained_student(student_path)
                elif student_source == 'space':
                    logger.info(f"Loading student model from Hugging Face Space: {student_path}")
                    # For spaces, we'll try to load from the space's models directory
                    space_model_path = f"spaces/{student_path}/models"
                    existing_student = await model_loader.load_trained_student_from_space(student_path)
                else:
                    logger.info(f"Loading student model from local path: {student_path}")
                    existing_student = await model_loader.load_trained_student(student_path)

                logger.info(f"Successfully loaded existing student model: {existing_student.get('type', 'unknown')}")

                # Merge original teachers with new teachers
                original_teachers = existing_student.get('original_teachers', [])
                new_teachers = [
                    model_info if isinstance(model_info, str) else model_info.get('path', '')
                    for model_info in config.teacher_models
                ]

                # Combine teachers (avoid duplicates)
                all_teachers = original_teachers.copy()
                for teacher in new_teachers:
                    if teacher not in all_teachers:
                        all_teachers.append(teacher)

                logger.info(f"Incremental training: Original teachers: {original_teachers}")
                logger.info(f"Incremental training: New teachers: {new_teachers}")
                logger.info(f"Incremental training: All teachers: {all_teachers}")

                # Update config with all teachers
                config.teacher_models = all_teachers

            except Exception as e:
                logger.error(f"Error loading existing student model: {e}")
                await update_training_status(session_id, "failed", session.get("progress", 0), f"Failed to load existing student: {str(e)}")
                return

        # Load teacher models
        await update_training_status(session_id, "loading_models", 0.1, "Loading teacher models...")
        teacher_models = []
        trust_remote_code = config.training_params.get('trust_remote_code', False)

        total_models = len(config.teacher_models)
        for i, model_info in enumerate(config.teacher_models):
            try:
                # Handle both old format (string) and new format (dict)
                if isinstance(model_info, str):
                    model_path = model_info
                    model_token = hf_token
                    model_trust_code = trust_remote_code
                else:
                    model_path = model_info.get('path', model_info)
                    model_token = model_info.get('token') or hf_token
                    model_trust_code = model_info.get('trust_remote_code', trust_remote_code)

                # Update progress
                progress = 0.1 + (i * 0.3 / total_models)  # 0.1 to 0.4
                await update_training_status(
                    session_id,
                    "loading_models",
                    progress,
                    f"Loading model {i+1}/{total_models}: {model_path}..."
                )

                logger.info(f"Loading model {model_path} with trust_remote_code={model_trust_code}")

                # Special handling for known problematic models
                if model_path == 'Wan-AI/Wan2.2-TI2V-5B':
                    logger.info(f"Detected ti2v model {model_path}, forcing trust_remote_code=True")
                    model_trust_code = True
                elif model_path == 'deepseek-ai/DeepSeek-V3.1-Base':
                    logger.warning(f"Skipping {model_path}: Requires GPU with FP8 quantization support")
                    await update_training_status(
                        session_id,
                        "loading_models",
                        progress,
                        f"Skipping {model_path}: Requires GPU with FP8 quantization"
                    )
                    continue

                model = await model_loader.load_model(
                    model_path,
                    token=model_token,
                    trust_remote_code=model_trust_code
                )
                teacher_models.append(model)
                logger.info(f"Successfully loaded model: {model_path}")

                # Update progress after successful load
                progress = 0.1 + ((i + 1) * 0.3 / total_models)
                await update_training_status(
                    session_id,
                    "loading_models",
                    progress,
                    f"Loaded {i+1}/{total_models} models successfully"
                )

            except Exception as e:
                error_msg = f"Failed to load model {model_path}: {str(e)}"
                logger.error(error_msg)

                # Provide helpful suggestions based on the error
                suggestions = []
                error_str = str(e).lower()

                # Check if we should retry with trust_remote_code=True
                if not model_trust_code and ('ti2v' in error_str or 'does not recognize this architecture' in error_str):
                    try:
                        logger.info(f"Retrying {model_path} with trust_remote_code=True")
                        await update_training_status(
                            session_id,
                            "loading_models",
                            progress,
                            f"Retrying {model_path} with trust_remote_code=True..."
                        )

                        model = await model_loader.load_model(
                            model_path,
                            token=model_token,
                            trust_remote_code=True
                        )
                        teacher_models.append(model)
                        logger.info(f"Successfully loaded model on retry: {model_path}")

                        # Update progress after successful retry
                        progress = 0.1 + ((i + 1) * 0.3 / total_models)
                        await update_training_status(
                            session_id,
                            "loading_models",
                            progress,
                            f"Loaded {i+1}/{total_models} models successfully (retry)"
                        )
                        continue

                    except Exception as retry_e:
                        logger.error(f"Retry also failed for {model_path}: {str(retry_e)}")
                        error_msg = f"Failed even with trust_remote_code=True: {str(retry_e)}"

                if 'trust_remote_code' in error_str:
                    suggestions.append("Try enabling 'Trust Remote Code' option")
                elif 'gated' in error_str or 'access' in error_str:
                    suggestions.append("This model requires access permission and a valid HF token")
                elif 'siglip' in error_str or 'unknown' in error_str:
                    suggestions.append("This model may require special loading. Try enabling 'Trust Remote Code'")
                elif 'connection' in error_str or 'network' in error_str:
                    suggestions.append("Check your internet connection")
                elif 'ti2v' in error_str:
                    suggestions.append("This ti2v model requires trust_remote_code=True")

                if suggestions:
                    error_msg += f". Suggestions: {'; '.join(suggestions)}"

                await update_training_status(session_id, "failed", session.get("progress", 0), error_msg)
                return
        
        # Initialize student model
        await update_training_status(session_id, "initializing_student", 0.2, "Initializing student model...")
        student_model = await distillation_trainer.create_student_model(
            teacher_models, config.student_config
        )
        
        # Run distillation training
        await update_training_status(session_id, "training", 0.3, "Starting knowledge distillation...")
        
        async def progress_callback(step: int, total_steps: int, loss: float, metrics: Dict[str, Any]):
            progress = 0.3 + (step / total_steps) * 0.6  # 30% to 90%
            await update_training_status(
                session_id, "training", progress, 
                f"Training step {step}/{total_steps}, Loss: {loss:.4f}",
                current_step=step, loss=loss
            )
        
        trained_model = await distillation_trainer.train(
            student_model, teacher_models, config.training_params, progress_callback
        )
        
        # Save trained model with metadata
        await update_training_status(session_id, "saving", 0.9, "Saving trained model...")

        # Create model directory with proper structure
        model_dir = Path("models") / f"distilled_model_{session_id}"
        model_dir.mkdir(parents=True, exist_ok=True)

        model_path = model_dir / "pytorch_model.safetensors"

        # Prepare training metadata for saving
        training_metadata = {
            'session_id': session_id,
            'teacher_models': [
                model_info if isinstance(model_info, str) else model_info.get('path', '')
                for model_info in config.teacher_models
            ],
            'strategy': config.distillation_strategy,
            'training_params': config.training_params,
            'incremental_training': config.incremental_training,
            'existing_student_model': config.existing_student_model
        }

        await distillation_trainer.save_model(trained_model, str(model_path), training_metadata)

        # Complete training
        session["status"] = "completed"
        session["progress"] = 1.0
        session["end_time"] = asyncio.get_event_loop().time()
        session["model_path"] = model_path
        session["training_metadata"] = training_metadata
        
        await update_training_status(session_id, "completed", 1.0, "Training completed successfully!")
        
        logger.info(f"Training session {session_id} completed successfully")
        
    except Exception as e:
        logger.error(f"Training session {session_id} failed: {str(e)}")
        session = training_sessions.get(session_id, {})
        session["status"] = "failed"
        session["error"] = str(e)
        await update_training_status(session_id, "failed", session.get("progress", 0), f"Training failed: {str(e)}")

async def update_training_status(
    session_id: str, 
    status: str, 
    progress: float, 
    message: str,
    current_step: int = None,
    loss: float = None
):
    """Update training status and notify connected clients"""
    if session_id in training_sessions:
        session = training_sessions[session_id]
        session["status"] = status
        session["progress"] = progress
        session["message"] = message
        if current_step is not None:
            session["current_step"] = current_step
        if loss is not None:
            session["loss"] = loss
        
        # Calculate ETA
        if session.get("start_time") and progress > 0:
            elapsed = asyncio.get_event_loop().time() - session["start_time"]
            if progress < 1.0:
                eta_seconds = (elapsed / progress) * (1.0 - progress)
                eta = f"{int(eta_seconds // 60)}m {int(eta_seconds % 60)}s"
                session["eta"] = eta
        
        # Notify WebSocket clients
        if session_id in active_connections:
            try:
                # Safely serialize session data
                safe_session_data = safe_json_serialize(session)
                await active_connections[session_id].send_json({
                    "type": "training_update",
                    "data": safe_session_data
                })
            except Exception as e:
                logger.warning(f"Failed to send WebSocket update: {e}")
                # Remove disconnected client
                if session_id in active_connections:
                    del active_connections[session_id]

@app.get("/progress/{session_id}", response_model=TrainingStatus)
async def get_training_progress(session_id: str):
    """Get training progress for a session"""
    if session_id not in training_sessions:
        raise HTTPException(status_code=404, detail="Training session not found")
    
    session = training_sessions[session_id]
    return TrainingStatus(
        session_id=session_id,
        status=session["status"],
        progress=session["progress"],
        current_step=session["current_step"],
        total_steps=session["total_steps"],
        loss=session.get("loss"),
        eta=session.get("eta"),
        message=session.get("message", "")
    )

@app.get("/download/{session_id}")
async def download_model(session_id: str):
    """Download trained model"""
    try:
        if session_id not in training_sessions:
            raise HTTPException(status_code=404, detail="Training session not found")

        session = training_sessions[session_id]
        if session["status"] != "completed":
            raise HTTPException(status_code=400, detail="Training not completed")

        model_path = session.get("model_path")
        if not model_path:
            # Try to find model in models directory
            models_dir = Path("models")
            possible_paths = [
                models_dir / f"distilled_model_{session_id}",
                models_dir / f"distilled_model_{session_id}.safetensors",
                models_dir / f"model_{session_id}",
                models_dir / f"student_model_{session_id}"
            ]

            for path in possible_paths:
                if path.exists():
                    model_path = str(path)
                    break

        if not model_path or not Path(model_path).exists():
            raise HTTPException(status_code=404, detail="Model file not found. The model may not have been saved properly.")

        # Create a zip file with all model files
        import zipfile
        import tempfile

        model_dir = Path(model_path)
        if model_dir.is_file():
            # Single file
            return FileResponse(
                model_path,
                media_type="application/octet-stream",
                filename=f"distilled_model_{session_id}.safetensors"
            )
        else:
            # Directory with multiple files
            temp_zip = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
            with zipfile.ZipFile(temp_zip.name, 'w') as zipf:
                for file_path in model_dir.rglob('*'):
                    if file_path.is_file():
                        zipf.write(file_path, file_path.relative_to(model_dir))

            return FileResponse(
                temp_zip.name,
                media_type="application/zip",
                filename=f"distilled_model_{session_id}.zip"
            )

    except Exception as e:
        logger.error(f"Error downloading model: {e}")
        raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}")

@app.post("/upload-to-hf/{session_id}")
async def upload_to_huggingface(
    session_id: str,
    repo_name: str = Form(...),
    description: str = Form(""),
    private: bool = Form(False),
    hf_token: str = Form(...)
):
    """Upload trained model to Hugging Face Hub"""
    try:
        if session_id not in training_sessions:
            raise HTTPException(status_code=404, detail="Training session not found")

        session = training_sessions[session_id]
        if session["status"] != "completed":
            raise HTTPException(status_code=400, detail="Training not completed")

        model_path = session.get("model_path")
        if not model_path or not Path(model_path).exists():
            raise HTTPException(status_code=404, detail="Model file not found")

        # Import huggingface_hub
        try:
            from huggingface_hub import HfApi, create_repo
        except ImportError:
            raise HTTPException(status_code=500, detail="huggingface_hub not installed")

        # Initialize HF API
        api = HfApi(token=hf_token)

        # Validate repository name format
        if '/' not in repo_name:
            raise HTTPException(status_code=400, detail="Repository name must be in format 'username/model-name'")

        username, model_name = repo_name.split('/', 1)

        # Create repository with better error handling
        try:
            repo_url = create_repo(
                repo_id=repo_name,
                token=hf_token,
                private=private,
                exist_ok=True
            )
            logger.info(f"Created/accessed repository: {repo_url}")
        except Exception as e:
            error_msg = str(e)
            if "403" in error_msg or "Forbidden" in error_msg:
                raise HTTPException(
                    status_code=403,
                    detail=f"Permission denied. Please check: 1) Your token has 'Write' permissions, 2) You own the namespace '{username}', 3) The repository name is correct. Error: {error_msg}"
                )
            elif "401" in error_msg or "Unauthorized" in error_msg:
                raise HTTPException(
                    status_code=401,
                    detail=f"Invalid token. Please check your Hugging Face token. Error: {error_msg}"
                )
            else:
                raise HTTPException(status_code=400, detail=f"Failed to create repository: {error_msg}")

        # Upload model files
        model_path_obj = Path(model_path)
        uploaded_files = []

        # Determine the model directory
        if model_path_obj.is_file():
            model_dir = model_path_obj.parent
        else:
            model_dir = model_path_obj

        # Upload all files in the model directory
        essential_files = [
            'pytorch_model.safetensors', 'config.json', 'model.py',
            'training_history.json', 'README.md'
        ]

        # Upload essential files first
        for file_name in essential_files:
            file_path = model_dir / file_name
            if file_path.exists():
                try:
                    api.upload_file(
                        path_or_fileobj=str(file_path),
                        path_in_repo=file_name,
                        repo_id=repo_name,
                        token=hf_token
                    )
                    uploaded_files.append(file_name)
                    logger.info(f"Uploaded {file_name}")
                except Exception as e:
                    logger.warning(f"Failed to upload {file_name}: {e}")

        # Upload any additional files
        for file_path in model_dir.rglob('*'):
            if file_path.is_file() and file_path.name not in essential_files:
                try:
                    relative_path = file_path.relative_to(model_dir)
                    api.upload_file(
                        path_or_fileobj=str(file_path),
                        path_in_repo=str(relative_path),
                        repo_id=repo_name,
                        token=hf_token
                    )
                    uploaded_files.append(str(relative_path))
                    logger.info(f"Uploaded additional file: {relative_path}")
                except Exception as e:
                    logger.warning(f"Failed to upload {relative_path}: {e}")

        # Create README.md
        config_info = session.get("config", {})
        teacher_models_raw = config_info.get("teacher_models", [])

        # Extract model paths from teacher_models (handle both string and dict formats)
        teacher_models = []
        for model in teacher_models_raw:
            if isinstance(model, str):
                teacher_models.append(model)
            elif isinstance(model, dict):
                teacher_models.append(model.get('path', str(model)))
            else:
                teacher_models.append(str(model))

        readme_content = f"""---
license: apache-2.0
tags:
- knowledge-distillation
- pytorch
- transformers
base_model: {teacher_models[0] if teacher_models else 'unknown'}
---

# {repo_name}

This model was created using knowledge distillation from the following teacher model(s):
{chr(10).join([f"- {model}" for model in teacher_models])}

## Model Description

{description if description else 'A distilled model created using multi-modal knowledge distillation.'}

## Training Details

- **Teacher Models**: {', '.join(teacher_models)}
- **Distillation Strategy**: {config_info.get('distillation_strategy', 'ensemble')}
- **Training Steps**: {config_info.get('training_params', {}).get('max_steps', 'unknown')}
- **Learning Rate**: {config_info.get('training_params', {}).get('learning_rate', 'unknown')}

## Usage

```python
from transformers import AutoModel, AutoTokenizer

model = AutoModel.from_pretrained("{repo_name}")
tokenizer = AutoTokenizer.from_pretrained("{teacher_models[0] if teacher_models else 'bert-base-uncased'}")
```

## Created with

This model was created using the Multi-Modal Knowledge Distillation platform.
"""

        # Upload README
        api.upload_file(
            path_or_fileobj=readme_content.encode(),
            path_in_repo="README.md",
            repo_id=repo_name,
            token=hf_token
        )
        uploaded_files.append("README.md")

        return {
            "success": True,
            "repo_url": f"https://huggingface.co/{repo_name}",
            "uploaded_files": uploaded_files,
            "message": f"Model successfully uploaded to {repo_name}"
        }

    except Exception as e:
        logger.error(f"Error uploading to Hugging Face: {e}")
        raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")

@app.post("/validate-repo-name")
async def validate_repo_name(request: Dict[str, Any]):
    """Validate repository name and check permissions"""
    try:
        repo_name = request.get('repo_name', '').strip()
        hf_token = request.get('hf_token', '').strip()

        if not repo_name or not hf_token:
            return {"valid": False, "error": "Repository name and token are required"}

        if '/' not in repo_name:
            return {"valid": False, "error": "Repository name must be in format 'username/model-name'"}

        username, model_name = repo_name.split('/', 1)

        # Check if username matches token owner
        try:
            from huggingface_hub import HfApi
            api = HfApi(token=hf_token)

            # Try to get user info
            user_info = api.whoami()
            token_username = user_info.get('name', '')

            if username != token_username:
                return {
                    "valid": False,
                    "error": f"Username mismatch. Token belongs to '{token_username}' but trying to create repo under '{username}'. Use '{token_username}/{model_name}' instead.",
                    "suggested_name": f"{token_username}/{model_name}"
                }

            return {
                "valid": True,
                "message": f"Repository name '{repo_name}' is valid for your account",
                "username": token_username
            }

        except Exception as e:
            return {"valid": False, "error": f"Token validation failed: {str(e)}"}

    except Exception as e:
        return {"valid": False, "error": f"Validation error: {str(e)}"}

@app.post("/test-space")
async def test_space(request: Dict[str, Any]):
    """Test if a Hugging Face Space exists and has trained models"""
    try:
        space_name = request.get('space_name', '').strip()
        hf_token = request.get('hf_token', '').strip()

        if not space_name:
            return {"success": False, "error": "Space name is required"}

        if '/' not in space_name:
            return {"success": False, "error": "Space name must be in format 'username/space-name'"}

        try:
            from huggingface_hub import HfApi
            api = HfApi(token=hf_token if hf_token else None)

            # Check if the Space exists
            try:
                space_info = api.space_info(space_name)
                logger.info(f"Found Space: {space_name}")
            except Exception as e:
                return {"success": False, "error": f"Space not found or not accessible: {str(e)}"}

            # Try to list files in the Space to see if it has models
            try:
                files = api.list_repo_files(space_name, repo_type="space")
                model_files = [f for f in files if f.endswith(('.safetensors', '.bin', '.pt'))]

                # Check for models directory
                models_dir_files = [f for f in files if f.startswith('models/')]

                return {
                    "success": True,
                    "space_info": {
                        "name": space_name,
                        "model_files": model_files,
                        "models_directory": len(models_dir_files) > 0,
                        "total_files": len(files)
                    },
                    "models": model_files,
                    "message": f"Space {space_name} is accessible"
                }

            except Exception as e:
                # Space exists but we can't list files (might be private or no access)
                return {
                    "success": True,
                    "space_info": {"name": space_name},
                    "models": [],
                    "message": f"Space {space_name} exists but file listing not available (might be private)"
                }

        except Exception as e:
            return {"success": False, "error": f"Error accessing Hugging Face: {str(e)}"}

    except Exception as e:
        logger.error(f"Error testing Space: {e}")
        return {"success": False, "error": f"Test failed: {str(e)}"}

@app.get("/trained-students")
async def list_trained_students():
    """List available trained student models for retraining"""
    try:
        models_dir = Path("models")
        trained_students = []

        if models_dir.exists():
            for model_dir in models_dir.iterdir():
                if model_dir.is_dir():
                    try:
                        # Check if it's a trained student model
                        config_files = list(model_dir.glob("*config.json"))
                        history_files = list(model_dir.glob("*training_history.json"))

                        if config_files:
                            with open(config_files[0], 'r') as f:
                                config = json.load(f)

                            if config.get('is_student_model', False):
                                history = {}
                                if history_files:
                                    with open(history_files[0], 'r') as f:
                                        history = json.load(f)

                                model_info = {
                                    "id": model_dir.name,
                                    "name": model_dir.name,
                                    "path": str(model_dir),
                                    "type": "trained_student",
                                    "created_at": config.get('created_at', 'unknown'),
                                    "architecture": config.get('architecture', 'unknown'),
                                    "modalities": config.get('modalities', ['text']),
                                    "can_be_retrained": config.get('can_be_retrained', True),
                                    "original_teachers": history.get('retraining_info', {}).get('original_teachers', []),
                                    "training_sessions": len(history.get('training_sessions', [])),
                                    "last_training": history.get('training_sessions', [{}])[-1].get('timestamp', 'unknown') if history.get('training_sessions') else 'unknown'
                                }
                                trained_students.append(model_info)
                    except Exception as e:
                        logger.warning(f"Error reading model {model_dir}: {e}")
                        continue

        return {"trained_students": trained_students}

    except Exception as e:
        logger.error(f"Error listing trained students: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/models", response_model=List[ModelInfo])
async def list_models():
    """List available models"""
    models = []
    
    # List uploaded models
    uploads_dir = Path("uploads")
    if uploads_dir.exists():
        for file_path in uploads_dir.iterdir():
            if file_path.is_file():
                try:
                    info = await model_loader.get_model_info(str(file_path))
                    models.append(ModelInfo(
                        name=file_path.stem,
                        size=file_path.stat().st_size,
                        format=file_path.suffix[1:],
                        modality=info.get("modality", "unknown"),
                        architecture=info.get("architecture")
                    ))
                except Exception as e:
                    logger.warning(f"Error getting info for {file_path}: {e}")
    
    return models

@app.websocket("/ws/{session_id}")
async def websocket_endpoint(websocket: WebSocket, session_id: str):
    """WebSocket endpoint for real-time training updates"""
    await websocket.accept()
    active_connections[session_id] = websocket
    
    try:
        # Send current status if session exists
        if session_id in training_sessions:
            await websocket.send_json({
                "type": "training_update",
                "data": training_sessions[session_id]
            })
        
        # Keep connection alive
        while True:
            await websocket.receive_text()
            
    except WebSocketDisconnect:
        if session_id in active_connections:
            del active_connections[session_id]
    except Exception as e:
        logger.error(f"WebSocket error for session {session_id}: {e}")
        if session_id in active_connections:
            del active_connections[session_id]

# ==================== NEW ADVANCED ENDPOINTS ====================

# Token Management Endpoints
@app.get("/tokens")
async def token_management_page(request: Request):
    """Token management page"""
    return templates.TemplateResponse("token-management.html", {"request": request})

@app.post("/api/tokens")
async def save_token(
    name: str = Form(...),
    token: str = Form(...),
    token_type: str = Form("read"),
    description: str = Form(""),
    is_default: bool = Form(False)
):
    """Save HF token"""
    try:
        success = token_manager.save_token(name, token, token_type, description, is_default)
        if success:
            return {"success": True, "message": f"Token '{name}' saved successfully"}
        else:
            raise HTTPException(status_code=400, detail="Failed to save token")
    except Exception as e:
        logger.error(f"Error saving token: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/tokens")
async def list_tokens():
    """List all saved tokens"""
    try:
        tokens = token_manager.list_tokens()
        return {"tokens": tokens}
    except Exception as e:
        logger.error(f"Error listing tokens: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.delete("/api/tokens/{token_name}")
async def delete_token(token_name: str):
    """Delete a token"""
    try:
        success = token_manager.delete_token(token_name)
        if success:
            return {"success": True, "message": f"Token '{token_name}' deleted"}
        else:
            raise HTTPException(status_code=404, detail="Token not found")
    except Exception as e:
        logger.error(f"Error deleting token: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/tokens/{token_name}/set-default")
async def set_default_token(token_name: str):
    """Set token as default"""
    try:
        success = token_manager.set_default_token(token_name)
        if success:
            return {"success": True, "message": f"Token '{token_name}' set as default"}
        else:
            raise HTTPException(status_code=404, detail="Token not found")
    except Exception as e:
        logger.error(f"Error setting default token: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/tokens/validate")
async def validate_token(token: str = Form(...)):
    """Validate HF token"""
    try:
        result = token_manager.validate_token(token)
        return result
    except Exception as e:
        logger.error(f"Error validating token: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/tokens/for-task/{task_type}")
async def get_token_for_task(task_type: str):
    """Get appropriate token for specific task"""
    try:
        # Get token for task
        token = token_manager.get_token_for_task(task_type)

        if not token:
            raise HTTPException(status_code=404, detail=f"No suitable token found for task: {task_type}")

        # Get token information
        tokens = token_manager.list_tokens()
        token_info = None

        # Find which token was selected
        for t in tokens:
            test_token = token_manager.get_token(t['name'])
            if test_token == token:
                token_info = t
                break

        if not token_info:
            # Token from environment variable
            token_info = {
                'name': f'{task_type}_token',
                'type': task_type,
                'description': f'رمز من متغيرات البيئة للمهمة: {task_type}',
                'last_used': None,
                'usage_count': 0
            }

        # Get token type information
        type_info = token_manager.token_types.get(token_info['type'], {})

        return {
            "success": True,
            "task_type": task_type,
            "token_info": {
                "token_name": token_info['name'],
                "type": token_info['type'],
                "type_name": type_info.get('name', token_info['type']),
                "description": token_info['description'],
                "security_level": type_info.get('security_level', 'medium'),
                "recommended_for": type_info.get('recommended_for', 'general'),
                "last_used": token_info.get('last_used'),
                "usage_count": token_info.get('usage_count', 0)
            }
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting token for task {task_type}: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Medical Dataset Endpoints
@app.get("/medical-datasets")
async def medical_datasets_page(request: Request):
    """Medical datasets management page"""
    return templates.TemplateResponse("medical-datasets.html", {"request": request})

@app.get("/api/medical-datasets")
async def list_medical_datasets():
    """List supported medical datasets"""
    try:
        datasets = medical_dataset_manager.list_supported_datasets()
        return {"datasets": datasets}
    except Exception as e:
        logger.error(f"Error listing medical datasets: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/medical-datasets/load")
async def load_medical_dataset(
    dataset_name: str = Form(...),
    streaming: bool = Form(True),
    split: str = Form("train")
):
    """Load medical dataset"""
    try:
        # Get appropriate token for medical datasets (fine-grained preferred)
        hf_token = token_manager.get_token_for_task('medical')

        if not hf_token:
            logger.warning("No suitable token found for medical datasets, trying default")
            hf_token = token_manager.get_token()

        dataset_info = await medical_dataset_manager.load_dataset(
            dataset_name=dataset_name,
            streaming=streaming,
            split=split,
            token=hf_token
        )

        return {
            "success": True,
            "dataset_info": {
                "name": dataset_info['config']['name'],
                "size_gb": dataset_info['config']['size_gb'],
                "num_samples": dataset_info['config']['num_samples'],
                "streaming": dataset_info['streaming']
            }
        }
    except Exception as e:
        logger.error(f"Error loading medical dataset: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Memory and Performance Endpoints
@app.get("/api/system/memory")
async def get_memory_info():
    """Get current memory information"""
    try:
        memory_info = memory_manager.get_memory_info()
        return memory_info
    except Exception as e:
        logger.error(f"Error getting memory info: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/system/performance")
async def get_performance_info():
    """Get system performance information"""
    try:
        memory_info = memory_manager.get_memory_info()
        recommendations = memory_manager.get_memory_recommendations()

        return {
            "memory": memory_info,
            "recommendations": recommendations,
            "cpu_cores": cpu_optimizer.cpu_count,
            "optimizations_applied": cpu_optimizer.optimizations_applied
        }
    except Exception as e:
        logger.error(f"Error getting performance info: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/system/cleanup")
async def force_memory_cleanup():
    """Force memory cleanup"""
    try:
        memory_manager.force_cleanup()
        return {"success": True, "message": "Memory cleanup completed"}
    except Exception as e:
        logger.error(f"Error during memory cleanup: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Google Models Support
@app.get("/api/models/google")
async def list_google_models():
    """List available Google models"""
    try:
        google_models = [
            {
                "name": "google/medsiglip-448",
                "description": "Medical SigLIP model for medical image-text understanding",
                "type": "vision-language",
                "size_gb": 1.1,
                "modality": "multimodal",
                "medical_specialized": True
            },
            {
                "name": "google/gemma-3n-E4B-it",
                "description": "Gemma 3 model for instruction following",
                "type": "language",
                "size_gb": 8.5,
                "modality": "text",
                "medical_specialized": False
            }
        ]
        return {"models": google_models}
    except Exception as e:
        logger.error(f"Error listing Google models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Database Management API Endpoints
@app.get("/api/databases")
async def get_all_databases():
    """Get all configured databases"""
    try:
        databases = platform_db_manager.get_all_databases()
        selected = platform_db_manager.get_selected_databases()

        return {
            "success": True,
            "databases": databases,
            "selected": selected,
            "total": len(databases)
        }
    except Exception as e:
        logger.error(f"Error getting databases: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/databases/search")
async def search_databases(request: DatabaseSearchRequest):
    """Search for databases on Hugging Face"""
    try:
        results = await platform_db_manager.search_huggingface_datasets(
            query=request.query,
            limit=request.limit
        )

        return {
            "success": True,
            "results": results,
            "count": len(results),
            "query": request.query
        }
    except Exception as e:
        logger.error(f"Error searching databases: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/databases/add")
async def add_database(database_info: DatabaseInfo):
    """Add a new database to the configuration"""
    try:
        success = await platform_db_manager.add_database(database_info.dict())

        if success:
            return {
                "success": True,
                "message": f"Database {database_info.dataset_id} added successfully"
            }
        else:
            raise HTTPException(status_code=400, detail="Failed to add database")

    except Exception as e:
        logger.error(f"Error adding database: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/databases/validate/{dataset_id:path}")
async def validate_database(dataset_id: str):
    """Validate a dataset"""
    try:
        validation_result = await platform_db_manager.validate_dataset(dataset_id)

        return {
            "success": True,
            "validation": validation_result,
            "dataset_id": dataset_id
        }
    except Exception as e:
        logger.error(f"Error validating database: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/databases/select")
async def select_databases(request: DatabaseSelectionRequest):
    """Select databases for use"""
    try:
        results = []
        for database_id in request.database_ids:
            success = platform_db_manager.select_database(database_id)
            results.append({
                "database_id": database_id,
                "success": success
            })

        return {
            "success": True,
            "results": results,
            "selected": platform_db_manager.get_selected_databases()
        }
    except Exception as e:
        logger.error(f"Error selecting databases: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.delete("/api/databases/{database_id:path}")
async def remove_database(database_id: str):
    """Remove a database from configuration"""
    try:
        success = platform_db_manager.remove_database(database_id)

        if success:
            return {
                "success": True,
                "message": f"Database {database_id} removed successfully"
            }
        else:
            raise HTTPException(status_code=400, detail="Failed to remove database")

    except Exception as e:
        logger.error(f"Error removing database: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/databases/{database_id:path}")
async def get_database_info(database_id: str):
    """Get detailed information about a specific database"""
    try:
        database_info = platform_db_manager.get_database_info(database_id)

        if database_info:
            return {
                "success": True,
                "database": database_info
            }
        else:
            raise HTTPException(status_code=404, detail="Database not found")

    except Exception as e:
        logger.error(f"Error getting database info: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/databases/category/{category}")
async def get_databases_by_category(category: str):
    """Get databases filtered by category"""
    try:
        databases = platform_db_manager.get_databases_by_category(category)

        return {
            "success": True,
            "databases": databases,
            "category": category,
            "count": len(databases)
        }
    except Exception as e:
        logger.error(f"Error getting databases by category: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/databases/load-selected")
async def load_selected_databases(max_samples: int = 1000):
    """Load data from selected databases"""
    try:
        loaded_data = await platform_db_manager.load_selected_datasets(max_samples)

        return {
            "success": True,
            "loaded_datasets": loaded_data,
            "total_datasets": len(loaded_data)
        }
    except Exception as e:
        logger.error(f"Error loading selected databases: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Models Management API Endpoints
@app.get("/api/models")
async def get_all_models():
    """Get all configured models"""
    try:
        models = models_manager.get_all_models()
        teachers = models_manager.get_selected_teachers()
        student = models_manager.get_selected_student()

        return {
            "success": True,
            "models": models,
            "selected_teachers": teachers,
            "selected_student": student,
            "total": len(models)
        }
    except Exception as e:
        logger.error(f"Error getting models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/models/teachers")
async def get_teacher_models():
    """Get all teacher models"""
    try:
        teachers = models_manager.get_teacher_models()
        selected = models_manager.get_selected_teachers()

        return {
            "success": True,
            "teachers": teachers,
            "selected": selected,
            "total": len(teachers)
        }
    except Exception as e:
        logger.error(f"Error getting teacher models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/models/students")
async def get_student_models():
    """Get all student models"""
    try:
        students = models_manager.get_student_models()
        selected = models_manager.get_selected_student()

        return {
            "success": True,
            "students": students,
            "selected": selected,
            "total": len(students)
        }
    except Exception as e:
        logger.error(f"Error getting student models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/models/search")
async def search_models(request: ModelSearchRequest):
    """Search for models on Hugging Face"""
    try:
        results = await models_manager.search_huggingface_models(
            query=request.query,
            limit=request.limit,
            model_type=request.model_type
        )

        return {
            "success": True,
            "results": results,
            "count": len(results),
            "query": request.query
        }
    except Exception as e:
        logger.error(f"Error searching models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/models/add")
async def add_model(model_info: Dict[str, Any]):
    """Add a new model to the configuration"""
    try:
        success = await models_manager.add_model(model_info)

        if success:
            return {
                "success": True,
                "message": f"Model {model_info.get('model_id')} added successfully"
            }
        else:
            raise HTTPException(status_code=400, detail="Failed to add model")

    except Exception as e:
        logger.error(f"Error adding model: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/models/validate/{model_id:path}")
async def validate_model(model_id: str):
    """Validate a model"""
    try:
        validation_result = await models_manager.validate_model(model_id)

        return {
            "success": True,
            "validation": validation_result,
            "model_id": model_id
        }
    except Exception as e:
        logger.error(f"Error validating model: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/models/select")
async def select_models(request: ModelSelectionRequest):
    """Select teacher and student models"""
    try:
        results = []

        # Select teacher models
        for teacher_id in request.teacher_models:
            success = models_manager.select_teacher(teacher_id)
            results.append({
                "model_id": teacher_id,
                "type": "teacher",
                "success": success
            })

        # Select student model
        if request.student_model is not None:
            success = models_manager.select_student(request.student_model)
            results.append({
                "model_id": request.student_model,
                "type": "student",
                "success": success
            })

        return {
            "success": True,
            "results": results,
            "selected_teachers": models_manager.get_selected_teachers(),
            "selected_student": models_manager.get_selected_student()
        }
    except Exception as e:
        logger.error(f"Error selecting models: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.delete("/api/models/{model_id:path}")
async def remove_model(model_id: str):
    """Remove a model from configuration"""
    try:
        success = models_manager.remove_model(model_id)

        if success:
            return {
                "success": True,
                "message": f"Model {model_id} removed successfully"
            }
        else:
            raise HTTPException(status_code=400, detail="Failed to remove model")

    except Exception as e:
        logger.error(f"Error removing model: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/api/models/{model_id:path}")
async def get_model_info(model_id: str):
    """Get detailed information about a specific model"""
    try:
        model_info = models_manager.get_model_info(model_id)

        if model_info:
            return {
                "success": True,
                "model": model_info
            }
        else:
            raise HTTPException(status_code=404, detail="Model not found")

    except Exception as e:
        logger.error(f"Error getting model info: {e}")
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    uvicorn.run(
        "app:app",
        host="0.0.0.0",
        port=int(os.getenv("PORT", 7860)),
        reload=False,
        log_level="info"
    )