File size: 16,936 Bytes
cca1fa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
"""
Models Management System for Knowledge Distillation Platform
نظام إدارة النماذج لمنصة تقطير المعرفة
"""

import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Any, Optional
from datetime import datetime
import asyncio

from huggingface_hub import list_models, model_info

logger = logging.getLogger(__name__)

class ModelsManager:
    """
    Comprehensive models management system for the platform
    نظام إدارة النماذج الشامل للمنصة
    """
    
    def __init__(self, storage_path: str = "data/models"):
        self.storage_path = Path(storage_path)
        self.storage_path.mkdir(parents=True, exist_ok=True)
        
        self.config_file = self.storage_path / "models_config.json"
        self.selected_teachers_file = self.storage_path / "selected_teachers.json"
        self.selected_student_file = self.storage_path / "selected_student.json"
        
        # Load existing configuration
        self.models_config = self._load_config()
        self.selected_teachers = self._load_selected_teachers()
        self.selected_student = self._load_selected_student()
        
        logger.info(f"Models Manager initialized with {len(self.models_config)} configured models")
    
    def _load_config(self) -> Dict[str, Any]:
        """Load models configuration"""
        try:
            if self.config_file.exists():
                with open(self.config_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            else:
                # Initialize with default models
                default_config = self._get_default_models()
                self._save_config(default_config)
                return default_config
        except Exception as e:
            logger.error(f"Error loading models config: {e}")
            return {}
    
    def _save_config(self, config: Dict[str, Any]):
        """Save models configuration"""
        try:
            with open(self.config_file, 'w', encoding='utf-8') as f:
                json.dump(config, f, indent=2, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Error saving models config: {e}")
    
    def _load_selected_teachers(self) -> List[str]:
        """Load selected teacher models list"""
        try:
            if self.selected_teachers_file.exists():
                with open(self.selected_teachers_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            else:
                return []
        except Exception as e:
            logger.error(f"Error loading selected teachers: {e}")
            return []
    
    def _save_selected_teachers(self):
        """Save selected teacher models list"""
        try:
            with open(self.selected_teachers_file, 'w', encoding='utf-8') as f:
                json.dump(self.selected_teachers, f, indent=2, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Error saving selected teachers: {e}")
    
    def _load_selected_student(self) -> Optional[str]:
        """Load selected student model"""
        try:
            if self.selected_student_file.exists():
                with open(self.selected_student_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                    return data.get('student_model')
            else:
                return None
        except Exception as e:
            logger.error(f"Error loading selected student: {e}")
            return None
    
    def _save_selected_student(self):
        """Save selected student model"""
        try:
            with open(self.selected_student_file, 'w', encoding='utf-8') as f:
                json.dump({'student_model': self.selected_student}, f, indent=2, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Error saving selected student: {e}")
    
    def _get_default_models(self) -> Dict[str, Any]:
        """Get default models configuration"""
        return {
            "google/bert-base-uncased": {
                "name": "BERT Base Uncased",
                "name_ar": "بيرت الأساسي",
                "model_id": "google/bert-base-uncased",
                "category": "text",
                "type": "teacher",
                "description": "BERT base model for text understanding",
                "description_ar": "نموذج بيرت الأساسي لفهم النصوص",
                "size": "~440MB",
                "language": "English",
                "modality": "text",
                "architecture": "transformer",
                "license": "Apache 2.0",
                "added_date": datetime.now().isoformat(),
                "status": "available",
                "parameters": "110M"
            },
            "microsoft/DialoGPT-medium": {
                "name": "DialoGPT Medium",
                "name_ar": "ديالو جي بي تي متوسط",
                "model_id": "microsoft/DialoGPT-medium",
                "category": "text",
                "type": "teacher",
                "description": "Conversational AI model",
                "description_ar": "نموذج ذكاء اصطناعي للمحادثة",
                "size": "~1.2GB",
                "language": "English",
                "modality": "text",
                "architecture": "gpt",
                "license": "MIT",
                "added_date": datetime.now().isoformat(),
                "status": "available",
                "parameters": "345M"
            },
            "google/vit-base-patch16-224": {
                "name": "Vision Transformer Base",
                "name_ar": "محول الرؤية الأساسي",
                "model_id": "google/vit-base-patch16-224",
                "category": "vision",
                "type": "teacher",
                "description": "Vision Transformer for image classification",
                "description_ar": "محول الرؤية لتصنيف الصور",
                "size": "~330MB",
                "language": "Universal",
                "modality": "vision",
                "architecture": "transformer",
                "license": "Apache 2.0",
                "added_date": datetime.now().isoformat(),
                "status": "available",
                "parameters": "86M"
            }
        }
    
    async def search_huggingface_models(self, query: str, limit: int = 20, model_type: str = None) -> List[Dict[str, Any]]:
        """Search for models on Hugging Face"""
        try:
            logger.info(f"Searching Hugging Face for models: {query}")
            
            # Search models
            models = list_models(search=query, limit=limit)
            
            results = []
            for model in models:
                try:
                    # Get model info
                    info = model_info(model.modelId)
                    
                    model_data = {
                        "id": model.modelId,
                        "name": model.modelId.split('/')[-1],
                        "author": model.modelId.split('/')[0] if '/' in model.modelId else 'unknown',
                        "description": getattr(info, 'description', 'No description available'),
                        "tags": getattr(info, 'tags', []),
                        "downloads": getattr(info, 'downloads', 0),
                        "likes": getattr(info, 'likes', 0),
                        "created_at": getattr(info, 'created_at', None),
                        "last_modified": getattr(info, 'last_modified', None),
                        "pipeline_tag": getattr(info, 'pipeline_tag', 'unknown'),
                        "library_name": getattr(info, 'library_name', 'unknown')
                    }
                    
                    # Filter by model type if specified
                    if model_type:
                        pipeline_tag = model_data.get('pipeline_tag', '').lower()
                        if model_type == 'text' and pipeline_tag not in ['text-classification', 'text-generation', 'fill-mask', 'question-answering']:
                            continue
                        elif model_type == 'vision' and pipeline_tag not in ['image-classification', 'object-detection', 'image-segmentation']:
                            continue
                        elif model_type == 'audio' and pipeline_tag not in ['automatic-speech-recognition', 'audio-classification']:
                            continue
                    
                    results.append(model_data)
                    
                except Exception as e:
                    logger.warning(f"Error processing model {model.modelId}: {e}")
                    continue
            
            logger.info(f"Found {len(results)} models")
            return results
            
        except Exception as e:
            logger.error(f"Error searching Hugging Face models: {e}")
            return []
    
    async def add_model(self, model_info: Dict[str, Any]) -> bool:
        """Add a new model to the configuration"""
        try:
            model_id = model_info.get('model_id') or model_info.get('id')
            if not model_id:
                raise ValueError("Model ID is required")
            
            # Validate model exists and is accessible
            validation_result = await self.validate_model(model_id)
            if not validation_result['valid']:
                raise ValueError(f"Model validation failed: {validation_result['error']}")
            
            # Prepare model configuration
            config = {
                "name": model_info.get('name', model_id.split('/')[-1]),
                "name_ar": model_info.get('name_ar', ''),
                "model_id": model_id,
                "category": model_info.get('category', 'text'),
                "type": model_info.get('type', 'teacher'),
                "description": model_info.get('description', ''),
                "description_ar": model_info.get('description_ar', ''),
                "size": model_info.get('size', 'Unknown'),
                "language": model_info.get('language', 'Unknown'),
                "modality": model_info.get('modality', 'text'),
                "architecture": model_info.get('architecture', 'unknown'),
                "license": model_info.get('license', 'Unknown'),
                "added_date": datetime.now().isoformat(),
                "status": "available",
                "parameters": model_info.get('parameters', 'Unknown'),
                "validation": validation_result
            }
            
            # Add to configuration
            self.models_config[model_id] = config
            self._save_config(self.models_config)
            
            logger.info(f"Added model: {model_id}")
            return True
            
        except Exception as e:
            logger.error(f"Error adding model: {e}")
            return False
    
    async def validate_model(self, model_id: str) -> Dict[str, Any]:
        """Validate that a model exists and is accessible"""
        try:
            logger.info(f"Validating model: {model_id}")
            
            # Try to get model info
            info = model_info(model_id)
            
            return {
                "valid": True,
                "pipeline_tag": getattr(info, 'pipeline_tag', 'unknown'),
                "library_name": getattr(info, 'library_name', 'unknown'),
                "accessible": True,
                "error": None
            }
            
        except Exception as e:
            logger.warning(f"Model validation failed for {model_id}: {e}")
            return {
                "valid": False,
                "pipeline_tag": None,
                "library_name": None,
                "accessible": False,
                "error": str(e)
            }
    
    def get_all_models(self) -> Dict[str, Any]:
        """Get all configured models"""
        return self.models_config
    
    def get_teacher_models(self) -> Dict[str, Any]:
        """Get all teacher models"""
        return {
            model_id: model_info 
            for model_id, model_info in self.models_config.items() 
            if model_info.get('type') == 'teacher'
        }
    
    def get_student_models(self) -> Dict[str, Any]:
        """Get all student models"""
        return {
            model_id: model_info 
            for model_id, model_info in self.models_config.items() 
            if model_info.get('type') == 'student'
        }
    
    def get_selected_teachers(self) -> List[str]:
        """Get list of selected teacher model IDs"""
        return self.selected_teachers
    
    def get_selected_student(self) -> Optional[str]:
        """Get selected student model ID"""
        return self.selected_student
    
    def select_teacher(self, model_id: str) -> bool:
        """Select a teacher model"""
        try:
            if model_id not in self.models_config:
                raise ValueError(f"Model {model_id} not found in configuration")
            
            model_info = self.models_config[model_id]
            if model_info.get('type') != 'teacher':
                raise ValueError(f"Model {model_id} is not a teacher model")
            
            if model_id not in self.selected_teachers:
                self.selected_teachers.append(model_id)
                self._save_selected_teachers()
                logger.info(f"Selected teacher model: {model_id}")
            
            return True
            
        except Exception as e:
            logger.error(f"Error selecting teacher model: {e}")
            return False
    
    def deselect_teacher(self, model_id: str) -> bool:
        """Deselect a teacher model"""
        try:
            if model_id in self.selected_teachers:
                self.selected_teachers.remove(model_id)
                self._save_selected_teachers()
                logger.info(f"Deselected teacher model: {model_id}")
            
            return True
            
        except Exception as e:
            logger.error(f"Error deselecting teacher model: {e}")
            return False
    
    def select_student(self, model_id: str = None) -> bool:
        """Select a student model (None for training from scratch)"""
        try:
            if model_id and model_id not in self.models_config:
                raise ValueError(f"Model {model_id} not found in configuration")
            
            if model_id:
                model_info = self.models_config[model_id]
                if model_info.get('type') not in ['student', 'teacher']:  # Teachers can be used as base for students
                    raise ValueError(f"Model {model_id} cannot be used as student model")
            
            self.selected_student = model_id
            self._save_selected_student()
            
            if model_id:
                logger.info(f"Selected student model: {model_id}")
            else:
                logger.info("Selected training from scratch (no base student model)")
            
            return True
            
        except Exception as e:
            logger.error(f"Error selecting student model: {e}")
            return False
    
    def remove_model(self, model_id: str) -> bool:
        """Remove a model from configuration"""
        try:
            if model_id in self.models_config:
                del self.models_config[model_id]
                self._save_config(self.models_config)
            
            if model_id in self.selected_teachers:
                self.selected_teachers.remove(model_id)
                self._save_selected_teachers()
            
            if self.selected_student == model_id:
                self.selected_student = None
                self._save_selected_student()
            
            logger.info(f"Removed model: {model_id}")
            return True
            
        except Exception as e:
            logger.error(f"Error removing model: {e}")
            return False
    
    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
        """Get detailed information about a specific model"""
        return self.models_config.get(model_id)
    
    def get_models_by_category(self, category: str) -> Dict[str, Any]:
        """Get models filtered by category"""
        return {
            model_id: model_info 
            for model_id, model_info in self.models_config.items() 
            if model_info.get('category') == category
        }
    
    def get_models_by_modality(self, modality: str) -> Dict[str, Any]:
        """Get models filtered by modality"""
        return {
            model_id: model_info 
            for model_id, model_info in self.models_config.items() 
            if model_info.get('modality') == modality
        }