train-modle / run_optimized.py
fokan's picture
Initial clean commit: Multi-Modal Knowledge Distillation Platform
ab4e093
#!/usr/bin/env python3
"""
Optimized runner for AI Knowledge Distillation Platform
Configured for CPU-only training with memory constraints
"""
import os
import sys
import logging
import asyncio
import uvicorn
from pathlib import Path
# Add src directory to Python path
sys.path.insert(0, str(Path(__file__).parent / "src"))
def setup_environment():
"""Setup environment variables for optimal CPU performance"""
# CPU optimization settings
os.environ['OMP_NUM_THREADS'] = str(min(os.cpu_count(), 8))
os.environ['MKL_NUM_THREADS'] = str(min(os.cpu_count(), 8))
os.environ['NUMEXPR_NUM_THREADS'] = str(min(os.cpu_count(), 8))
os.environ['OPENBLAS_NUM_THREADS'] = str(min(os.cpu_count(), 8))
# Memory optimization
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid tokenizer warnings
# Disable GPU if available (force CPU-only)
os.environ['CUDA_VISIBLE_DEVICES'] = ''
# Set memory limits for Hugging Face
os.environ['HF_DATASETS_CACHE'] = './cache/datasets'
os.environ['TRANSFORMERS_CACHE'] = './cache/transformers'
print("βœ… Environment optimized for CPU-only training")
print(f"πŸ”§ CPU threads: {os.environ['OMP_NUM_THREADS']}")
print(f"πŸ’Ύ Memory optimization enabled")
def setup_logging():
"""Setup logging configuration"""
# Create logs directory
logs_dir = Path("logs")
logs_dir.mkdir(exist_ok=True)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(logs_dir / "app.log"),
logging.StreamHandler(sys.stdout)
]
)
# Set specific log levels
logging.getLogger("uvicorn").setLevel(logging.INFO)
logging.getLogger("transformers").setLevel(logging.WARNING)
logging.getLogger("datasets").setLevel(logging.WARNING)
print("πŸ“ Logging configured")
def check_system_requirements():
"""Check system requirements and provide recommendations"""
import psutil
# Check available memory
memory = psutil.virtual_memory()
memory_gb = memory.total / (1024**3)
print(f"\nπŸ–₯️ System Information:")
print(f" πŸ’Ύ Total Memory: {memory_gb:.1f} GB")
print(f" πŸ”„ Available Memory: {memory.available / (1024**3):.1f} GB")
print(f" πŸ”§ CPU Cores: {os.cpu_count()}")
# Recommendations
if memory_gb < 8:
print("⚠️ Warning: Less than 8GB RAM detected. Consider using smaller models.")
elif memory_gb < 16:
print("ℹ️ Note: 8-16GB RAM detected. Chunked loading will be used for large models.")
else:
print("βœ… Sufficient memory for most operations.")
# Check disk space
disk = psutil.disk_usage('.')
disk_free_gb = disk.free / (1024**3)
print(f" πŸ’Ώ Free Disk Space: {disk_free_gb:.1f} GB")
if disk_free_gb < 10:
print("⚠️ Warning: Less than 10GB free disk space. Consider cleaning up.")
return memory_gb >= 4 # Minimum 4GB required
def create_directories():
"""Create necessary directories"""
directories = [
"cache",
"cache/datasets",
"cache/transformers",
"cache/medical_datasets",
"database",
"logs",
"models",
"backups"
]
for directory in directories:
Path(directory).mkdir(parents=True, exist_ok=True)
print("πŸ“ Directories created")
def check_dependencies():
"""Check if required dependencies are installed"""
required_packages = [
'torch',
'transformers',
'fastapi',
'uvicorn',
'datasets',
'safetensors',
'psutil'
]
missing_packages = []
for package in required_packages:
try:
__import__(package)
except ImportError:
missing_packages.append(package)
if missing_packages:
print(f"❌ Missing packages: {', '.join(missing_packages)}")
print("πŸ“¦ Install with: pip install -r requirements.txt")
return False
print("βœ… All required packages installed")
return True
def main():
"""Main function to run the optimized server"""
print("πŸš€ Starting AI Knowledge Distillation Platform (Optimized)")
print("=" * 60)
# Setup environment
setup_environment()
setup_logging()
create_directories()
# Check system requirements
if not check_system_requirements():
print("❌ System requirements not met. Exiting.")
sys.exit(1)
# Check dependencies
if not check_dependencies():
print("❌ Dependencies not satisfied. Exiting.")
sys.exit(1)
print("\n🎯 Starting server with optimized settings...")
print("🌐 Access the application at: http://localhost:8000")
print("πŸ“Š Token management: http://localhost:8000/tokens")
print("πŸ₯ Medical datasets: http://localhost:8000/medical-datasets")
print("\n" + "=" * 60)
# Import and start the app
try:
from app import app
# Configure uvicorn for optimal performance
config = uvicorn.Config(
app=app,
host="0.0.0.0",
port=8000,
log_level="info",
access_log=True,
workers=1, # Single worker for memory efficiency
loop="asyncio",
http="httptools",
ws="websockets",
lifespan="on",
reload=False # Disable reload for production
)
server = uvicorn.Server(config)
# Start server
asyncio.run(server.serve())
except KeyboardInterrupt:
print("\nπŸ›‘ Server stopped by user")
except Exception as e:
print(f"❌ Error starting server: {e}")
sys.exit(1)
if __name__ == "__main__":
main()