Spaces:

kvn420
/

Train

Running

App Files Files Community

kvn420 commited on May 28

Commit

f142553

verified ·

1 Parent(s): 5bba009

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -16

app.py CHANGED Viewed

@@ -246,20 +246,96 @@ class MultimodalTrainer:
         try:
             logger.info(f"Chargement du modèle: {model_name}")
-            if model_type == "causal":
-                self.current_model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                    device_map="auto" if torch.cuda.is_available() else None,
-                    trust_remote_code=True
-                )
-            else:
-                self.current_model = AutoModel.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                    device_map="auto" if torch.cuda.is_available() else None,
-                    trust_remote_code=True
-                )
             # Charge le tokenizer
             try:
@@ -270,6 +346,14 @@ class MultimodalTrainer:
                     self.current_tokenizer.pad_token = self.current_tokenizer.eos_token
             except Exception as e:
                 logger.warning(f"Tokenizer non trouvé: {e}")
             # Charge le processor
             try:
@@ -279,7 +363,7 @@ class MultimodalTrainer:
             except Exception as e:
                 logger.warning(f"Processor non trouvé: {e}")
-            return f"✅ Modèle {model_name} chargé avec succès!\nType: {type(self.current_model).__name__}"
         except Exception as e:
             error_msg = f"❌ Erreur lors du chargement: {str(e)}"
@@ -354,7 +438,55 @@ class MultimodalTrainer:
             info += f"📈 Exemples: {len(self.training_data):,}\n"
             info += f"📝 Colonnes: {list(self.training_data.column_names)}\n"
-        return info
 # Initialisation
 trainer = MultimodalTrainer()
@@ -428,6 +560,7 @@ def create_interface():
                         value="causal"
                     )
                     load_model_btn = gr.Button("🔄 Charger le modèle", variant="primary")
                 with gr.Column():
                     model_status = gr.Textbox(
@@ -449,6 +582,12 @@ def create_interface():
                 outputs=model_status
             )
             info_btn.click(trainer.get_model_info, outputs=model_info)
         with gr.Tab("📊 Données"):

         try:
             logger.info(f"Chargement du modèle: {model_name}")
+            # Stratégies de chargement multiples
+            model_loaded = False
+            error_messages = []
+            # Stratégie 1: AutoModelForCausalLM avec trust_remote_code
+            if model_type == "causal" and not model_loaded:
+                try:
+                    self.current_model = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        trust_remote_code=True
+                    )
+                    model_loaded = True
+                except Exception as e:
+                    error_messages.append(f"AutoModelForCausalLM: {str(e)}")
+            # Stratégie 2: AutoModel générique
+            if not model_loaded:
+                try:
+                    self.current_model = AutoModel.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        trust_remote_code=True
+                    )
+                    model_loaded = True
+                except Exception as e:
+                    error_messages.append(f"AutoModel: {str(e)}")
+            # Stratégie 3: Détection automatique basée sur le nom
+            if not model_loaded and any(x in model_name.lower() for x in ['llama', 'mistral', 'qwen', 'phi']):
+                try:
+                    # Pour les modèles de type LLaMA/Mistral/Qwen
+                    from transformers import LlamaForCausalLM, MistralForCausalLM
+                    if 'llama' in model_name.lower():
+                        self.current_model = LlamaForCausalLM.from_pretrained(
+                            model_name,
+                            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                            device_map="auto" if torch.cuda.is_available() else None,
+                            trust_remote_code=True
+                        )
+                    elif 'mistral' in model_name.lower():
+                        self.current_model = MistralForCausalLM.from_pretrained(
+                            model_name,
+                            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                            device_map="auto" if torch.cuda.is_available() else None,
+                            trust_remote_code=True
+                        )
+                    model_loaded = True
+                except Exception as e:
+                    error_messages.append(f"Modèle spécifique: {str(e)}")
+            # Stratégie 4: Configuration manuelle
+            if not model_loaded:
+                try:
+                    # Télécharge la configuration d'abord
+                    from transformers import AutoConfig
+                    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+                    # Force le model_type si manquant
+                    if not hasattr(config, 'model_type') or config.model_type is None:
+                        # Détection basée sur l'architecture
+                        if hasattr(config, 'architectures') and config.architectures:
+                            arch = config.architectures[0].lower()
+                            if 'llama' in arch:
+                                config.model_type = 'llama'
+                            elif 'mistral' in arch:
+                                config.model_type = 'mistral'
+                            elif 'qwen' in arch:
+                                config.model_type = 'qwen2'
+                            elif 'phi' in arch:
+                                config.model_type = 'phi'
+                            else:
+                                config.model_type = 'llama'  # Par défaut
+                    self.current_model = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        config=config,
+                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        trust_remote_code=True
+                    )
+                    model_loaded = True
+                except Exception as e:
+                    error_messages.append(f"Configuration manuelle: {str(e)}")
+            if not model_loaded:
+                return f"❌ Impossible de charger le modèle. Erreurs:\n" + "\n".join(error_messages)
             # Charge le tokenizer
             try:
                     self.current_tokenizer.pad_token = self.current_tokenizer.eos_token
             except Exception as e:
                 logger.warning(f"Tokenizer non trouvé: {e}")
+                try:
+                    # Essaye avec un tokenizer générique
+                    from transformers import LlamaTokenizer
+                    self.current_tokenizer = LlamaTokenizer.from_pretrained(
+                        model_name, trust_remote_code=True
+                    )
+                except:
+                    logger.warning("Aucun tokenizer trouvé")
             # Charge le processor
             try:
             except Exception as e:
                 logger.warning(f"Processor non trouvé: {e}")
+            return f"✅ Modèle {model_name} chargé avec succès!\nType: {type(self.current_model).__name__}\nArchitecture: {getattr(self.current_model.config, 'architectures', ['Inconnue'])[0] if hasattr(self.current_model, 'config') else 'Inconnue'}"
         except Exception as e:
             error_msg = f"❌ Erreur lors du chargement: {str(e)}"
             info += f"📈 Exemples: {len(self.training_data):,}\n"
             info += f"📝 Colonnes: {list(self.training_data.column_names)}\n"
+    def diagnose_model(self, model_name: str):
+        """Diagnostique un modèle avant chargement"""
+        if not model_name.strip():
+            return "❌ Veuillez entrer un nom de modèle"
+        try:
+            from transformers import AutoConfig
+            import requests
+            result = f"🔍 DIAGNOSTIC DU MODÈLE: {model_name}\n\n"
+            # Vérification de l'existence
+            try:
+                config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+                result += "✅ Modèle accessible\n"
+                # Informations sur la configuration
+                result += f"📋 Type de modèle: {getattr(config, 'model_type', 'Non défini')}\n"
+                result += f"🏗️ Architecture: {getattr(config, 'architectures', ['Inconnue'])}\n"
+                result += f"📚 Vocabulaire: {getattr(config, 'vocab_size', 'Inconnu')}\n"
+                result += f"🧠 Couches cachées: {getattr(config, 'hidden_size', 'Inconnu')}\n"
+                result += f"🔢 Nombre de couches: {getattr(config, 'num_hidden_layers', 'Inconnu')}\n"
+                # Recommandations
+                if not hasattr(config, 'model_type') or config.model_type is None:
+                    result += "\n⚠️ PROBLÈME: model_type manquant\n"
+                    result += "💡 SOLUTION: Le chargeur essaiera de détecter automatiquement\n"
+                    if hasattr(config, 'architectures') and config.architectures:
+                        arch = config.architectures[0].lower()
+                        if 'llama' in arch:
+                            result += "🎯 Type détecté: LLaMA\n"
+                        elif 'mistral' in arch:
+                            result += "🎯 Type détecté: Mistral\n"
+                        elif 'qwen' in arch:
+                            result += "🎯 Type détecté: Qwen\n"
+                        elif 'phi' in arch:
+                            result += "🎯 Type détecté: Phi\n"
+                result += "\n✅ Chargement possible avec les stratégies multiples"
+            except Exception as e:
+                result += f"❌ Erreur d'accès: {str(e)}\n"
+                result += "💡 Vérifiez que le modèle existe et est public\n"
+            return result
+        except Exception as e:
+            return f"❌ Erreur diagnostic: {str(e)}"
 # Initialisation
 trainer = MultimodalTrainer()
                         value="causal"
                     )
                     load_model_btn = gr.Button("🔄 Charger le modèle", variant="primary")
+                    diagnose_btn = gr.Button("🔍 Diagnostiquer le modèle", variant="secondary")
                 with gr.Column():
                     model_status = gr.Textbox(
                 outputs=model_status
             )
+            diagnose_btn.click(
+                trainer.diagnose_model,
+                inputs=[model_input],
+                outputs=model_status
+            )
             info_btn.click(trainer.get_model_info, outputs=model_info)
         with gr.Tab("📊 Données"):