Spaces:

fokan
/

train-modle

Running

File size: 6,053 Bytes

ab4e093

#!/usr/bin/env python3
"""
Optimized runner for AI Knowledge Distillation Platform
Configured for CPU-only training with memory constraints
"""

import os
import sys
import logging
import asyncio
import uvicorn
from pathlib import Path

# Add src directory to Python path
sys.path.insert(0, str(Path(__file__).parent / "src"))

def setup_environment():
    """Setup environment variables for optimal CPU performance"""
    
    # CPU optimization settings
    os.environ['OMP_NUM_THREADS'] = str(min(os.cpu_count(), 8))
    os.environ['MKL_NUM_THREADS'] = str(min(os.cpu_count(), 8))
    os.environ['NUMEXPR_NUM_THREADS'] = str(min(os.cpu_count(), 8))
    os.environ['OPENBLAS_NUM_THREADS'] = str(min(os.cpu_count(), 8))
    
    # Memory optimization
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
    os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # Avoid tokenizer warnings
    
    # Disable GPU if available (force CPU-only)
    os.environ['CUDA_VISIBLE_DEVICES'] = ''
    
    # Set memory limits for Hugging Face
    os.environ['HF_DATASETS_CACHE'] = './cache/datasets'
    os.environ['TRANSFORMERS_CACHE'] = './cache/transformers'
    
    print("✅ Environment optimized for CPU-only training")
    print(f"🔧 CPU threads: {os.environ['OMP_NUM_THREADS']}")
    print(f"💾 Memory optimization enabled")

def setup_logging():
    """Setup logging configuration"""
    
    # Create logs directory
    logs_dir = Path("logs")
    logs_dir.mkdir(exist_ok=True)
    
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(logs_dir / "app.log"),
            logging.StreamHandler(sys.stdout)
        ]
    )
    
    # Set specific log levels
    logging.getLogger("uvicorn").setLevel(logging.INFO)
    logging.getLogger("transformers").setLevel(logging.WARNING)
    logging.getLogger("datasets").setLevel(logging.WARNING)
    
    print("📝 Logging configured")

def check_system_requirements():
    """Check system requirements and provide recommendations"""
    
    import psutil
    
    # Check available memory
    memory = psutil.virtual_memory()
    memory_gb = memory.total / (1024**3)
    
    print(f"\n🖥️  System Information:")
    print(f"   💾 Total Memory: {memory_gb:.1f} GB")
    print(f"   🔄 Available Memory: {memory.available / (1024**3):.1f} GB")
    print(f"   🔧 CPU Cores: {os.cpu_count()}")
    
    # Recommendations
    if memory_gb < 8:
        print("⚠️  Warning: Less than 8GB RAM detected. Consider using smaller models.")
    elif memory_gb < 16:
        print("ℹ️  Note: 8-16GB RAM detected. Chunked loading will be used for large models.")
    else:
        print("✅ Sufficient memory for most operations.")
    
    # Check disk space
    disk = psutil.disk_usage('.')
    disk_free_gb = disk.free / (1024**3)
    
    print(f"   💿 Free Disk Space: {disk_free_gb:.1f} GB")
    
    if disk_free_gb < 10:
        print("⚠️  Warning: Less than 10GB free disk space. Consider cleaning up.")
    
    return memory_gb >= 4  # Minimum 4GB required

def create_directories():
    """Create necessary directories"""
    
    directories = [
        "cache",
        "cache/datasets", 
        "cache/transformers",
        "cache/medical_datasets",
        "database",
        "logs",
        "models",
        "backups"
    ]
    
    for directory in directories:
        Path(directory).mkdir(parents=True, exist_ok=True)
    
    print("📁 Directories created")

def check_dependencies():
    """Check if required dependencies are installed"""
    
    required_packages = [
        'torch',
        'transformers', 
        'fastapi',
        'uvicorn',
        'datasets',
        'safetensors',
        'psutil'
    ]
    
    missing_packages = []
    
    for package in required_packages:
        try:
            __import__(package)
        except ImportError:
            missing_packages.append(package)
    
    if missing_packages:
        print(f"❌ Missing packages: {', '.join(missing_packages)}")
        print("📦 Install with: pip install -r requirements.txt")
        return False
    
    print("✅ All required packages installed")
    return True

def main():
    """Main function to run the optimized server"""
    
    print("🚀 Starting AI Knowledge Distillation Platform (Optimized)")
    print("=" * 60)
    
    # Setup environment
    setup_environment()
    setup_logging()
    create_directories()
    
    # Check system requirements
    if not check_system_requirements():
        print("❌ System requirements not met. Exiting.")
        sys.exit(1)
    
    # Check dependencies
    if not check_dependencies():
        print("❌ Dependencies not satisfied. Exiting.")
        sys.exit(1)
    
    print("\n🎯 Starting server with optimized settings...")
    print("🌐 Access the application at: http://localhost:8000")
    print("📊 Token management: http://localhost:8000/tokens")
    print("🏥 Medical datasets: http://localhost:8000/medical-datasets")
    print("\n" + "=" * 60)
    
    # Import and start the app
    try:
        from app import app
        
        # Configure uvicorn for optimal performance
        config = uvicorn.Config(
            app=app,
            host="0.0.0.0",
            port=8000,
            log_level="info",
            access_log=True,
            workers=1,  # Single worker for memory efficiency
            loop="asyncio",
            http="httptools",
            ws="websockets",
            lifespan="on",
            reload=False  # Disable reload for production
        )
        
        server = uvicorn.Server(config)
        
        # Start server
        asyncio.run(server.serve())
        
    except KeyboardInterrupt:
        print("\n🛑 Server stopped by user")
    except Exception as e:
        print(f"❌ Error starting server: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()