train-modle / config.yaml
fokan's picture
Initial clean commit: Multi-Modal Knowledge Distillation Platform
ab4e093
# AI Knowledge Distillation Platform Configuration
# تكوين منصة تقطير المعرفة للذكاء الاصطناعي
# System Configuration
system:
# Memory management settings
memory:
max_memory_gb: 14.0 # Maximum memory usage (leave 2GB for system)
chunk_size_mb: 500.0 # Chunk size for large model loading
cleanup_threshold: 0.85 # Memory usage threshold for cleanup
emergency_threshold: 0.95 # Emergency cleanup threshold
# CPU optimization settings
cpu:
max_threads: 8 # Maximum number of threads
use_intel_extension: true # Use Intel Extension for PyTorch if available
enable_mkl: true # Enable Intel MKL
enable_openmp: true # Enable OpenMP
# Storage settings
storage:
cache_dir: "./cache"
models_dir: "./models"
database_dir: "./database"
logs_dir: "./logs"
temp_dir: "./temp"
max_cache_size_gb: 20.0 # Maximum cache size
# Model Loading Configuration
models:
# Default settings for model loading
default_settings:
torch_dtype: "float32" # Use float32 for CPU
low_cpu_mem_usage: true
device_map: "cpu"
trust_remote_code: false
# Chunk loading settings
chunk_loading:
enabled: true
max_chunk_size_mb: 500.0
max_cached_chunks: 3
auto_cleanup: true
# Supported model types
supported_formats:
- ".pt"
- ".pth"
- ".bin"
- ".safetensors"
# Model size limits
size_limits:
small_model_mb: 1000 # Models under 1GB load normally
large_model_mb: 2000 # Models over 2GB use chunking
# Training Configuration
training:
# Default training parameters
default_params:
learning_rate: 0.0001
batch_size: 4 # Small batch size for memory efficiency
max_steps: 1000
temperature: 3.0
alpha: 0.7
save_steps: 100
eval_steps: 50
# Memory optimization during training
memory_optimization:
gradient_accumulation_steps: 4
gradient_checkpointing: true
mixed_precision: false # Disable for CPU
dataloader_num_workers: 2
# Medical Datasets Configuration
medical:
# Supported medical datasets
datasets:
roco_v2:
repo_id: "eltorio/ROCOv2-radiology"
streaming_supported: true
estimated_size_gb: 8.5
ct_rate:
repo_id: "ibrahimhamamci/CT-RATE"
streaming_supported: true
estimated_size_gb: 12.3
umie_datasets:
repo_id: "lion-ai/umie_datasets"
streaming_supported: true
estimated_size_gb: 15.7
# DICOM processing settings
dicom:
memory_limit_mb: 1000.0
default_window_center: 40
default_window_width: 400
default_output_size: [512, 512]
# Medical preprocessing settings
preprocessing:
target_size: [512, 512]
normalize_images: true
enhance_contrast: true
# Token Management Configuration
tokens:
# Encryption settings
encryption:
key_file: ".token_key"
algorithm: "Fernet"
# Token types and their properties
types:
read:
security_level: "medium"
recommended_for: "development"
write:
security_level: "high"
recommended_for: "production"
fine_grained:
security_level: "very_high"
recommended_for: "enterprise"
# Database Configuration
database:
# SQLite settings
sqlite:
database_dir: "./database"
backup_interval_hours: 24
cleanup_days: 30
# Connection settings
connection:
timeout: 30
check_same_thread: false
# Web Server Configuration
server:
# FastAPI settings
host: "0.0.0.0"
port: 8000
workers: 1 # Single worker for memory efficiency
reload: false
# CORS settings
cors:
allow_origins: ["*"]
allow_methods: ["GET", "POST", "PUT", "DELETE"]
allow_headers: ["*"]
# Upload settings
uploads:
max_file_size_mb: 5000 # 5GB max file size
allowed_extensions: [".pt", ".pth", ".bin", ".safetensors"]
temp_dir: "./temp"
# Logging Configuration
logging:
# Log levels
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# File logging
file:
enabled: true
filename: "logs/app.log"
max_size_mb: 100
backup_count: 5
# Console logging
console:
enabled: true
level: "INFO"
# Specific logger levels
loggers:
uvicorn: "INFO"
transformers: "WARNING"
datasets: "WARNING"
torch: "WARNING"
# Performance Monitoring
monitoring:
# System metrics collection
system_metrics:
enabled: true
interval_seconds: 30
store_in_database: true
# Memory monitoring
memory_monitoring:
enabled: true
alert_threshold: 0.85
emergency_threshold: 0.95
# Performance recommendations
recommendations:
enabled: true
check_interval_minutes: 5
# Security Configuration
security:
# Token validation
token_validation:
enabled: true
cache_results: true
cache_duration_minutes: 60
# File upload security
file_uploads:
scan_uploads: true
max_file_size_mb: 5000
allowed_mime_types:
- "application/octet-stream"
- "application/x-pytorch"
# Feature Flags
features:
# Advanced features
memory_management: true
chunk_loading: true
cpu_optimization: true
medical_datasets: true
token_management: true
# Experimental features
experimental:
auto_model_optimization: true
progressive_loading: true
smart_caching: true
# Environment-specific overrides
environments:
development:
logging:
level: "DEBUG"
server:
reload: true
production:
logging:
level: "INFO"
server:
reload: false
security:
token_validation:
enabled: true