kvn420 commited on
Commit
8137cde
·
verified ·
1 Parent(s): a289b5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -19
app.py CHANGED
@@ -1,42 +1,138 @@
1
  import gradio as gr
2
- import torch
3
- import torch.nn as nn
4
- from transformers import (
5
- AutoTokenizer, AutoModel, AutoProcessor,
6
- AutoModelForCausalLM, TrainingArguments, Trainer,
7
- DataCollatorForLanguageModeling
8
- )
9
- from datasets import Dataset, load_dataset, concatenate_datasets
10
- import json
11
  import os
12
  import requests
13
- from PIL import Image
14
- import librosa
15
- import cv2
16
- import numpy as np
17
- from pathlib import Path
18
  import logging
19
  from typing import Dict, List, Optional, Union
20
  import time
21
- from huggingface_hub import HfApi, list_datasets_in_collection
22
  import tempfile
23
  import shutil
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Configuration du logging
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
28
 
29
  class MultimodalTrainer:
30
  def __init__(self):
31
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
32
  self.current_model = None
33
  self.current_tokenizer = None
34
  self.current_processor = None
35
  self.training_data = []
36
- self.hf_api = HfApi()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def load_model(self, model_name: str, model_type: str = "causal"):
39
  """Charge un modèle depuis Hugging Face"""
 
 
 
 
 
 
40
  try:
41
  logger.info(f"Chargement du modèle: {model_name}")
42
 
@@ -79,12 +175,19 @@ class MultimodalTrainer:
79
 
80
  def load_collection_datasets(self, collection_url: str):
81
  """Charge tous les datasets d'une collection HF"""
 
 
 
82
  try:
83
  # Extrait l'ID de la collection depuis l'URL
84
  collection_id = collection_url.split("/")[-1]
85
 
86
- # Liste les datasets de la collection
87
- collection_items = list_datasets_in_collection(collection_id)
 
 
 
 
88
 
89
  datasets_info = []
90
  loaded_datasets = []
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
2
  import os
3
  import requests
4
+ import json
 
 
 
 
5
  import logging
6
  from typing import Dict, List, Optional, Union
7
  import time
 
8
  import tempfile
9
  import shutil
10
 
11
+ # Imports conditionnels pour éviter les erreurs
12
+ try:
13
+ import numpy as np
14
+ NUMPY_AVAILABLE = True
15
+ except ImportError:
16
+ NUMPY_AVAILABLE = False
17
+ import array
18
+
19
+ try:
20
+ from pathlib import Path
21
+ PATHLIB_AVAILABLE = True
22
+ except ImportError:
23
+ PATHLIB_AVAILABLE = False
24
+
25
+ try:
26
+ from huggingface_hub import HfApi
27
+ HF_HUB_AVAILABLE = True
28
+ except ImportError:
29
+ HF_HUB_AVAILABLE = False
30
+
31
+ try:
32
+ import numpy as np
33
+ NUMPY_AVAILABLE = True
34
+ except ImportError:
35
+ NUMPY_AVAILABLE = False
36
+
37
+ try:
38
+ import torch
39
+ import torch.nn as nn
40
+ TORCH_AVAILABLE = True
41
+ except ImportError:
42
+ TORCH_AVAILABLE = False
43
+ torch = None
44
+
45
+ try:
46
+ from transformers import (
47
+ AutoTokenizer, AutoModel, AutoProcessor,
48
+ AutoModelForCausalLM, TrainingArguments, Trainer,
49
+ DataCollatorForLanguageModeling
50
+ )
51
+ TRANSFORMERS_AVAILABLE = True
52
+ except ImportError:
53
+ TRANSFORMERS_AVAILABLE = False
54
+
55
+ try:
56
+ from datasets import Dataset, load_dataset, concatenate_datasets
57
+ DATASETS_AVAILABLE = True
58
+ except ImportError:
59
+ DATASETS_AVAILABLE = False
60
+
61
+ try:
62
+ from PIL import Image
63
+ PIL_AVAILABLE = True
64
+ except ImportError:
65
+ PIL_AVAILABLE = False
66
+
67
+ try:
68
+ import librosa
69
+ LIBROSA_AVAILABLE = True
70
+ except ImportError:
71
+ LIBROSA_AVAILABLE = False
72
+
73
+ try:
74
+ import cv2
75
+ CV2_AVAILABLE = True
76
+ except ImportError:
77
+ CV2_AVAILABLE = False
78
+
79
  # Configuration du logging
80
  logging.basicConfig(level=logging.INFO)
81
  logger = logging.getLogger(__name__)
82
 
83
  class MultimodalTrainer:
84
  def __init__(self):
85
+ # Vérification des dépendances
86
+ self.dependencies_ok = self.check_dependencies()
87
+
88
+ if not TORCH_AVAILABLE:
89
+ self.device = "cpu"
90
+ logger.warning("PyTorch non disponible")
91
+ else:
92
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
93
+
94
  self.current_model = None
95
  self.current_tokenizer = None
96
  self.current_processor = None
97
  self.training_data = []
98
+
99
+ if HF_HUB_AVAILABLE:
100
+ self.hf_api = HfApi()
101
+ else:
102
+ self.hf_api = None
103
+
104
+ def check_dependencies(self):
105
+ """Vérifie les dépendances installées"""
106
+ deps = {
107
+ "PyTorch": TORCH_AVAILABLE,
108
+ "Transformers": TRANSFORMERS_AVAILABLE,
109
+ "Datasets": DATASETS_AVAILABLE,
110
+ "PIL": PIL_AVAILABLE,
111
+ "Librosa": LIBROSA_AVAILABLE,
112
+ "OpenCV": CV2_AVAILABLE,
113
+ "NumPy": NUMPY_AVAILABLE,
114
+ "HuggingFace Hub": HF_HUB_AVAILABLE
115
+ }
116
+
117
+ status = "📦 État des dépendances:\n"
118
+ for name, available in deps.items():
119
+ status += f"{'✅' if available else '❌'} {name}\n"
120
+
121
+ if not TORCH_AVAILABLE:
122
+ status += "\n⚠️ PyTorch requis pour l'entraînement!"
123
+ if not TRANSFORMERS_AVAILABLE:
124
+ status += "\n⚠️ Transformers requis pour les modèles!"
125
+
126
+ return status
127
 
128
  def load_model(self, model_name: str, model_type: str = "causal"):
129
  """Charge un modèle depuis Hugging Face"""
130
+ if not TRANSFORMERS_AVAILABLE:
131
+ return "❌ Transformers non installé!"
132
+
133
+ if not TORCH_AVAILABLE:
134
+ return "❌ PyTorch non installé!"
135
+
136
  try:
137
  logger.info(f"Chargement du modèle: {model_name}")
138
 
 
175
 
176
  def load_collection_datasets(self, collection_url: str):
177
  """Charge tous les datasets d'une collection HF"""
178
+ if not DATASETS_AVAILABLE:
179
+ return "❌ Datasets non installé!"
180
+
181
  try:
182
  # Extrait l'ID de la collection depuis l'URL
183
  collection_id = collection_url.split("/")[-1]
184
 
185
+ # Pour l'instant, utilise l'API HF de base
186
+ try:
187
+ from huggingface_hub import list_datasets_in_collection
188
+ collection_items = list_datasets_in_collection(collection_id)
189
+ except ImportError:
190
+ return "❌ Fonction collection non disponible, ajoutez manuellement les datasets"
191
 
192
  datasets_info = []
193
  loaded_datasets = []