File size: 5,630 Bytes
ab4e093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# AI Knowledge Distillation Platform Configuration
# تكوين منصة تقطير المعرفة للذكاء الاصطناعي

# System Configuration
system:
  # Memory management settings
  memory:
    max_memory_gb: 14.0  # Maximum memory usage (leave 2GB for system)
    chunk_size_mb: 500.0  # Chunk size for large model loading
    cleanup_threshold: 0.85  # Memory usage threshold for cleanup
    emergency_threshold: 0.95  # Emergency cleanup threshold
  
  # CPU optimization settings
  cpu:
    max_threads: 8  # Maximum number of threads
    use_intel_extension: true  # Use Intel Extension for PyTorch if available
    enable_mkl: true  # Enable Intel MKL
    enable_openmp: true  # Enable OpenMP
  
  # Storage settings
  storage:
    cache_dir: "./cache"
    models_dir: "./models"
    database_dir: "./database"
    logs_dir: "./logs"
    temp_dir: "./temp"
    max_cache_size_gb: 20.0  # Maximum cache size

# Model Loading Configuration
models:
  # Default settings for model loading
  default_settings:
    torch_dtype: "float32"  # Use float32 for CPU
    low_cpu_mem_usage: true
    device_map: "cpu"
    trust_remote_code: false
  
  # Chunk loading settings
  chunk_loading:
    enabled: true
    max_chunk_size_mb: 500.0
    max_cached_chunks: 3
    auto_cleanup: true
  
  # Supported model types
  supported_formats:
    - ".pt"
    - ".pth" 
    - ".bin"
    - ".safetensors"
  
  # Model size limits
  size_limits:
    small_model_mb: 1000  # Models under 1GB load normally
    large_model_mb: 2000  # Models over 2GB use chunking

# Training Configuration
training:
  # Default training parameters
  default_params:
    learning_rate: 0.0001
    batch_size: 4  # Small batch size for memory efficiency
    max_steps: 1000
    temperature: 3.0
    alpha: 0.7
    save_steps: 100
    eval_steps: 50
  
  # Memory optimization during training
  memory_optimization:
    gradient_accumulation_steps: 4
    gradient_checkpointing: true
    mixed_precision: false  # Disable for CPU
    dataloader_num_workers: 2

# Medical Datasets Configuration
medical:
  # Supported medical datasets
  datasets:
    roco_v2:
      repo_id: "eltorio/ROCOv2-radiology"
      streaming_supported: true
      estimated_size_gb: 8.5
    ct_rate:
      repo_id: "ibrahimhamamci/CT-RATE"
      streaming_supported: true
      estimated_size_gb: 12.3
    umie_datasets:
      repo_id: "lion-ai/umie_datasets"
      streaming_supported: true
      estimated_size_gb: 15.7
  
  # DICOM processing settings
  dicom:
    memory_limit_mb: 1000.0
    default_window_center: 40
    default_window_width: 400
    default_output_size: [512, 512]
  
  # Medical preprocessing settings
  preprocessing:
    target_size: [512, 512]
    normalize_images: true
    enhance_contrast: true

# Token Management Configuration
tokens:
  # Encryption settings
  encryption:
    key_file: ".token_key"
    algorithm: "Fernet"
  
  # Token types and their properties
  types:
    read:
      security_level: "medium"
      recommended_for: "development"
    write:
      security_level: "high"
      recommended_for: "production"
    fine_grained:
      security_level: "very_high"
      recommended_for: "enterprise"

# Database Configuration
database:
  # SQLite settings
  sqlite:
    database_dir: "./database"
    backup_interval_hours: 24
    cleanup_days: 30
  
  # Connection settings
  connection:
    timeout: 30
    check_same_thread: false

# Web Server Configuration
server:
  # FastAPI settings
  host: "0.0.0.0"
  port: 8000
  workers: 1  # Single worker for memory efficiency
  reload: false
  
  # CORS settings
  cors:
    allow_origins: ["*"]
    allow_methods: ["GET", "POST", "PUT", "DELETE"]
    allow_headers: ["*"]
  
  # Upload settings
  uploads:
    max_file_size_mb: 5000  # 5GB max file size
    allowed_extensions: [".pt", ".pth", ".bin", ".safetensors"]
    temp_dir: "./temp"

# Logging Configuration
logging:
  # Log levels
  level: "INFO"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  
  # File logging
  file:
    enabled: true
    filename: "logs/app.log"
    max_size_mb: 100
    backup_count: 5
  
  # Console logging
  console:
    enabled: true
    level: "INFO"
  
  # Specific logger levels
  loggers:
    uvicorn: "INFO"
    transformers: "WARNING"
    datasets: "WARNING"
    torch: "WARNING"

# Performance Monitoring
monitoring:
  # System metrics collection
  system_metrics:
    enabled: true
    interval_seconds: 30
    store_in_database: true
  
  # Memory monitoring
  memory_monitoring:
    enabled: true
    alert_threshold: 0.85
    emergency_threshold: 0.95
  
  # Performance recommendations
  recommendations:
    enabled: true
    check_interval_minutes: 5

# Security Configuration
security:
  # Token validation
  token_validation:
    enabled: true
    cache_results: true
    cache_duration_minutes: 60
  
  # File upload security
  file_uploads:
    scan_uploads: true
    max_file_size_mb: 5000
    allowed_mime_types:
      - "application/octet-stream"
      - "application/x-pytorch"

# Feature Flags
features:
  # Advanced features
  memory_management: true
  chunk_loading: true
  cpu_optimization: true
  medical_datasets: true
  token_management: true
  
  # Experimental features
  experimental:
    auto_model_optimization: true
    progressive_loading: true
    smart_caching: true

# Environment-specific overrides
environments:
  development:
    logging:
      level: "DEBUG"
    server:
      reload: true
  
  production:
    logging:
      level: "INFO"
    server:
      reload: false
    security:
      token_validation:
        enabled: true