Spaces:

fokan
/

train-modle

Running

App Files Files Community

train-modle / run_optimized.py

fokan

Initial clean commit: Multi-Modal Knowledge Distillation Platform

ab4e093 6 days ago

raw

history blame contribute delete

6.05 kB

	#!/usr/bin/env python3
	"""
	Optimized runner for AI Knowledge Distillation Platform
	Configured for CPU-only training with memory constraints
	"""

	import os
	import sys
	import logging
	import asyncio
	import uvicorn
	from pathlib import Path

	# Add src directory to Python path
	sys.path.insert(0, str(Path(__file__).parent / "src"))

	def setup_environment():
	"""Setup environment variables for optimal CPU performance"""

	# CPU optimization settings
	os.environ['OMP_NUM_THREADS'] = str(min(os.cpu_count(), 8))
	os.environ['MKL_NUM_THREADS'] = str(min(os.cpu_count(), 8))
	os.environ['NUMEXPR_NUM_THREADS'] = str(min(os.cpu_count(), 8))
	os.environ['OPENBLAS_NUM_THREADS'] = str(min(os.cpu_count(), 8))

	# Memory optimization
	os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
	os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid tokenizer warnings

	# Disable GPU if available (force CPU-only)
	os.environ['CUDA_VISIBLE_DEVICES'] = ''

	# Set memory limits for Hugging Face
	os.environ['HF_DATASETS_CACHE'] = './cache/datasets'
	os.environ['TRANSFORMERS_CACHE'] = './cache/transformers'

	print("✅ Environment optimized for CPU-only training")
	print(f"🔧 CPU threads: {os.environ['OMP_NUM_THREADS']}")
	print(f"💾 Memory optimization enabled")

	def setup_logging():
	"""Setup logging configuration"""

	# Create logs directory
	logs_dir = Path("logs")
	logs_dir.mkdir(exist_ok=True)

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler(logs_dir / "app.log"),
	logging.StreamHandler(sys.stdout)
	]
	)

	# Set specific log levels
	logging.getLogger("uvicorn").setLevel(logging.INFO)
	logging.getLogger("transformers").setLevel(logging.WARNING)
	logging.getLogger("datasets").setLevel(logging.WARNING)

	print("📝 Logging configured")

	def check_system_requirements():
	"""Check system requirements and provide recommendations"""

	import psutil

	# Check available memory
	memory = psutil.virtual_memory()
	memory_gb = memory.total / (1024**3)

	print(f"\n🖥️ System Information:")
	print(f" 💾 Total Memory: {memory_gb:.1f} GB")
	print(f" 🔄 Available Memory: {memory.available / (1024**3):.1f} GB")
	print(f" 🔧 CPU Cores: {os.cpu_count()}")

	# Recommendations
	if memory_gb < 8:
	print("⚠️ Warning: Less than 8GB RAM detected. Consider using smaller models.")
	elif memory_gb < 16:
	print("ℹ️ Note: 8-16GB RAM detected. Chunked loading will be used for large models.")
	else:
	print("✅ Sufficient memory for most operations.")

	# Check disk space
	disk = psutil.disk_usage('.')
	disk_free_gb = disk.free / (1024**3)

	print(f" 💿 Free Disk Space: {disk_free_gb:.1f} GB")

	if disk_free_gb < 10:
	print("⚠️ Warning: Less than 10GB free disk space. Consider cleaning up.")

	return memory_gb >= 4 # Minimum 4GB required

	def create_directories():
	"""Create necessary directories"""

	directories = [
	"cache",
	"cache/datasets",
	"cache/transformers",
	"cache/medical_datasets",
	"database",
	"logs",
	"models",
	"backups"
	]

	for directory in directories:
	Path(directory).mkdir(parents=True, exist_ok=True)

	print("📁 Directories created")

	def check_dependencies():
	"""Check if required dependencies are installed"""

	required_packages = [
	'torch',
	'transformers',
	'fastapi',
	'uvicorn',
	'datasets',
	'safetensors',
	'psutil'
	]

	missing_packages = []

	for package in required_packages:
	try:
	__import__(package)
	except ImportError:
	missing_packages.append(package)

	if missing_packages:
	print(f"❌ Missing packages: {', '.join(missing_packages)}")
	print("📦 Install with: pip install -r requirements.txt")
	return False

	print("✅ All required packages installed")
	return True

	def main():
	"""Main function to run the optimized server"""

	print("🚀 Starting AI Knowledge Distillation Platform (Optimized)")
	print("=" * 60)

	# Setup environment
	setup_environment()
	setup_logging()
	create_directories()

	# Check system requirements
	if not check_system_requirements():
	print("❌ System requirements not met. Exiting.")
	sys.exit(1)

	# Check dependencies
	if not check_dependencies():
	print("❌ Dependencies not satisfied. Exiting.")
	sys.exit(1)

	print("\n🎯 Starting server with optimized settings...")
	print("🌐 Access the application at: http://localhost:8000")
	print("📊 Token management: http://localhost:8000/tokens")
	print("🏥 Medical datasets: http://localhost:8000/medical-datasets")
	print("\n" + "=" * 60)

	# Import and start the app
	try:
	from app import app

	# Configure uvicorn for optimal performance
	config = uvicorn.Config(
	app=app,
	host="0.0.0.0",
	port=8000,
	log_level="info",
	access_log=True,
	workers=1, # Single worker for memory efficiency
	loop="asyncio",
	http="httptools",
	ws="websockets",
	lifespan="on",
	reload=False # Disable reload for production
	)

	server = uvicorn.Server(config)

	# Start server
	asyncio.run(server.serve())

	except KeyboardInterrupt:
	print("\n🛑 Server stopped by user")
	except Exception as e:
	print(f"❌ Error starting server: {e}")
	sys.exit(1)

	if __name__ == "__main__":
	main()