Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import PyPDF2
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
@@ -7,1507 +8,352 @@ import os
|
|
7 |
import json
|
8 |
import zipfile
|
9 |
import tempfile
|
10 |
-
from typing import Dict, List, Tuple, Union
|
11 |
-
import re
|
12 |
from pathlib import Path
|
13 |
-
import openpyxl
|
14 |
-
from dataclasses import dataclass
|
15 |
-
from enum import Enum
|
16 |
from docx import Document
|
17 |
-
from docx.shared import
|
18 |
-
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
19 |
from reportlab.lib import colors
|
20 |
-
from reportlab.lib.pagesizes import letter
|
21 |
-
from reportlab.platypus import SimpleDocTemplate,
|
22 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
23 |
from reportlab.lib.units import inch
|
24 |
-
from reportlab.pdfbase import pdfmetrics
|
25 |
-
from reportlab.pdfbase.ttfonts import TTFont
|
26 |
-
import matplotlib.pyplot as plt
|
27 |
from datetime import datetime
|
28 |
-
from openai import OpenAI # CAMBIO: Importación de la nueva librería
|
29 |
|
30 |
# Configuración para HuggingFace
|
31 |
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
32 |
|
33 |
-
#
|
34 |
client = OpenAI(
|
35 |
base_url="https://api.studio.nebius.com/v1/",
|
36 |
api_key=os.environ.get("NEBIUS_API_KEY")
|
37 |
)
|
38 |
|
39 |
-
# Sistema de traducción
|
40 |
TRANSLATIONS = {
|
41 |
'en': {
|
42 |
-
'title': '🧬
|
43 |
-
'subtitle': '
|
44 |
'upload_files': '📁 Upload fitting results (CSV/Excel)',
|
45 |
-
'select_model': '🤖 AI Model',
|
46 |
'select_language': '🌐 Language',
|
47 |
-
'
|
48 |
-
'detail_level': '📋 Analysis detail level',
|
49 |
'detailed': 'Detailed',
|
50 |
'summarized': 'Summarized',
|
51 |
-
'analyze_button': '🚀 Analyze
|
52 |
-
'export_format': '📄 Export
|
53 |
-
'export_button': '💾 Export
|
54 |
-
'comparative_analysis': '📊
|
55 |
-
'implementation_code': '💻 Implementation Code',
|
56 |
-
'data_format': '📋 Expected
|
57 |
-
'
|
58 |
-
'light': 'Light',
|
59 |
-
'dark': 'Dark',
|
60 |
-
'best_for': 'Best for',
|
61 |
-
'loading': 'Loading...',
|
62 |
-
'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets', # CAMBIO
|
63 |
'error_no_files': 'Please upload fitting result files to analyze',
|
64 |
'report_exported': 'Report exported successfully as',
|
65 |
-
'
|
66 |
-
'
|
67 |
-
'what_analyzes': '🔍 What it specifically analyzes:',
|
68 |
-
'tips': '💡 Tips for better results:',
|
69 |
-
'additional_specs': '📝 Additional specifications for analysis',
|
70 |
-
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
|
71 |
},
|
72 |
'es': {
|
73 |
-
'title': '🧬 Analizador
|
74 |
-
'subtitle': '
|
75 |
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
|
76 |
-
'select_model': '🤖 Modelo de IA',
|
77 |
'select_language': '🌐 Idioma',
|
78 |
-
'
|
79 |
-
'detail_level': '📋 Nivel de detalle del análisis',
|
80 |
'detailed': 'Detallado',
|
81 |
'summarized': 'Resumido',
|
82 |
-
'analyze_button': '🚀 Analizar
|
83 |
-
'export_format': '📄 Formato de
|
84 |
-
'export_button': '💾 Exportar
|
85 |
-
'comparative_analysis': '📊 Análisis
|
86 |
-
'implementation_code': '💻 Código de Implementación',
|
87 |
-
'data_format': '📋 Formato de
|
88 |
-
'
|
89 |
-
'light': 'Claro',
|
90 |
-
'dark': 'Oscuro',
|
91 |
-
'best_for': 'Mejor para',
|
92 |
-
'loading': 'Cargando...',
|
93 |
-
'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space', # CAMBIO
|
94 |
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
|
95 |
'report_exported': 'Reporte exportado exitosamente como',
|
96 |
-
'
|
97 |
-
'
|
98 |
-
'what_analyzes': '🔍 Qué analiza específicamente:',
|
99 |
-
'tips': '💡 Tips para mejores resultados:',
|
100 |
-
'additional_specs': '📝 Especificaciones adicionales para el análisis',
|
101 |
-
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
|
102 |
},
|
103 |
-
'fr': {
|
104 |
-
'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
|
105 |
-
'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
|
106 |
-
'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
|
107 |
-
'select_model': '🤖 Modèle d\'IA', # CAMBIO
|
108 |
-
'select_language': '🌐 Langue',
|
109 |
-
'select_theme': '🎨 Thème',
|
110 |
-
'detail_level': '📋 Niveau de détail',
|
111 |
-
'detailed': 'Détaillé',
|
112 |
-
'summarized': 'Résumé',
|
113 |
-
'analyze_button': '🚀 Analyser et Comparer',
|
114 |
-
'export_format': '📄 Format d\'export',
|
115 |
-
'export_button': '💾 Exporter le Rapport',
|
116 |
-
'comparative_analysis': '📊 Analyse Comparative',
|
117 |
-
'implementation_code': '💻 Code d\'Implémentation',
|
118 |
-
'data_format': '📋 Format de données attendu',
|
119 |
-
'examples': '📚 Exemples d\'analyse',
|
120 |
-
'light': 'Clair',
|
121 |
-
'dark': 'Sombre',
|
122 |
-
'best_for': 'Meilleur pour',
|
123 |
-
'loading': 'Chargement...',
|
124 |
-
'error_no_api': 'Veuillez configurer NEBIUS_API_KEY', # CAMBIO
|
125 |
-
'error_no_files': 'Veuillez télécharger des fichiers à analyser',
|
126 |
-
'report_exported': 'Rapport exporté avec succès comme',
|
127 |
-
'specialized_in': '🎯 Spécialisé dans:',
|
128 |
-
'metrics_analyzed': '📊 Métriques analysées:',
|
129 |
-
'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
|
130 |
-
'tips': '💡 Conseils pour de meilleurs résultats:',
|
131 |
-
'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
|
132 |
-
'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
|
133 |
-
},
|
134 |
-
'de': {
|
135 |
-
'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
|
136 |
-
'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
|
137 |
-
'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
|
138 |
-
'select_model': '🤖 KI-Modell', # CAMBIO
|
139 |
-
'select_language': '🌐 Sprache',
|
140 |
-
'select_theme': '🎨 Thema',
|
141 |
-
'detail_level': '📋 Detailgrad der Analyse',
|
142 |
-
'detailed': 'Detailliert',
|
143 |
-
'summarized': 'Zusammengefasst',
|
144 |
-
'analyze_button': '🚀 Analysieren und Vergleichen',
|
145 |
-
'export_format': '📄 Exportformat',
|
146 |
-
'export_button': '💾 Bericht Exportieren',
|
147 |
-
'comparative_analysis': '📊 Vergleichende Analyse',
|
148 |
-
'implementation_code': '💻 Implementierungscode',
|
149 |
-
'data_format': '📋 Erwartetes Datenformat',
|
150 |
-
'examples': '📚 Analysebeispiele',
|
151 |
-
'light': 'Hell',
|
152 |
-
'dark': 'Dunkel',
|
153 |
-
'best_for': 'Am besten für',
|
154 |
-
'loading': 'Laden...',
|
155 |
-
'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY', # CAMBIO
|
156 |
-
'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
|
157 |
-
'report_exported': 'Bericht erfolgreich exportiert als',
|
158 |
-
'specialized_in': '🎯 Spezialisiert auf:',
|
159 |
-
'metrics_analyzed': '📊 Analysierte Metriken:',
|
160 |
-
'what_analyzes': '🔍 Was spezifisch analysiert wird:',
|
161 |
-
'tips': '💡 Tipps für bessere Ergebnisse:',
|
162 |
-
'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
|
163 |
-
'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
|
164 |
-
},
|
165 |
-
'pt': {
|
166 |
-
'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
|
167 |
-
'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
|
168 |
-
'upload_files': '📁 Carregar resultados (CSV/Excel)',
|
169 |
-
'select_model': '🤖 Modelo de IA', # CAMBIO
|
170 |
-
'select_language': '🌐 Idioma',
|
171 |
-
'select_theme': '🎨 Tema',
|
172 |
-
'detail_level': '📋 Nível de detalhe',
|
173 |
-
'detailed': 'Detalhado',
|
174 |
-
'summarized': 'Resumido',
|
175 |
-
'analyze_button': '🚀 Analisar e Comparar',
|
176 |
-
'export_format': '📄 Formato de exportação',
|
177 |
-
'export_button': '💾 Exportar Relatório',
|
178 |
-
'comparative_analysis': '📊 Análise Comparativa',
|
179 |
-
'implementation_code': '💻 Código de Implementação',
|
180 |
-
'data_format': '📋 Formato de dados esperado',
|
181 |
-
'examples': '📚 Exemplos de análise',
|
182 |
-
'light': 'Claro',
|
183 |
-
'dark': 'Escuro',
|
184 |
-
'best_for': 'Melhor para',
|
185 |
-
'loading': 'Carregando...',
|
186 |
-
'error_no_api': 'Por favor configure NEBIUS_API_KEY', # CAMBIO
|
187 |
-
'error_no_files': 'Por favor carregue arquivos para analisar',
|
188 |
-
'report_exported': 'Relatório exportado com sucesso como',
|
189 |
-
'specialized_in': '🎯 Especializado em:',
|
190 |
-
'metrics_analyzed': '📊 Métricas analisadas:',
|
191 |
-
'what_analyzes': '🔍 O que analisa especificamente:',
|
192 |
-
'tips': '💡 Dicas para melhores resultados:',
|
193 |
-
'additional_specs': '📝 Especificações adicionais para a análise',
|
194 |
-
'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
|
195 |
-
}
|
196 |
}
|
197 |
|
198 |
-
#
|
199 |
-
THEMES = {
|
200 |
-
'light': gr.themes.Soft(),
|
201 |
-
'dark': gr.themes.Base(
|
202 |
-
primary_hue="blue",
|
203 |
-
secondary_hue="gray",
|
204 |
-
neutral_hue="gray",
|
205 |
-
font=["Arial", "sans-serif"]
|
206 |
-
).set(
|
207 |
-
body_background_fill="dark",
|
208 |
-
body_background_fill_dark="*neutral_950",
|
209 |
-
button_primary_background_fill="*primary_600",
|
210 |
-
button_primary_background_fill_hover="*primary_500",
|
211 |
-
button_primary_text_color="white",
|
212 |
-
block_background_fill="*neutral_800",
|
213 |
-
block_border_color="*neutral_700",
|
214 |
-
block_label_text_color="*neutral_200",
|
215 |
-
block_title_text_color="*neutral_100",
|
216 |
-
checkbox_background_color="*neutral_700",
|
217 |
-
checkbox_background_color_selected="*primary_600",
|
218 |
-
input_background_fill="*neutral_700",
|
219 |
-
input_border_color="*neutral_600",
|
220 |
-
input_placeholder_color="*neutral_400"
|
221 |
-
)
|
222 |
-
}
|
223 |
-
|
224 |
-
# Enum para tipos de análisis
|
225 |
-
class AnalysisType(Enum):
|
226 |
-
MATHEMATICAL_MODEL = "mathematical_model"
|
227 |
-
DATA_FITTING = "data_fitting"
|
228 |
-
FITTING_RESULTS = "fitting_results"
|
229 |
-
UNKNOWN = "unknown"
|
230 |
-
|
231 |
-
# Estructura modular para modelos
|
232 |
-
@dataclass
|
233 |
-
class MathematicalModel:
|
234 |
-
name: str
|
235 |
-
equation: str
|
236 |
-
parameters: List[str]
|
237 |
-
application: str
|
238 |
-
sources: List[str]
|
239 |
-
category: str
|
240 |
-
biological_meaning: str
|
241 |
-
|
242 |
-
# Sistema de registro de modelos escalable
|
243 |
-
class ModelRegistry:
|
244 |
-
def __init__(self):
|
245 |
-
self.models = {}
|
246 |
-
self._initialize_default_models()
|
247 |
-
|
248 |
-
def register_model(self, model: MathematicalModel):
|
249 |
-
"""Registra un nuevo modelo matemático"""
|
250 |
-
if model.category not in self.models:
|
251 |
-
self.models[model.category] = {}
|
252 |
-
self.models[model.category][model.name] = model
|
253 |
-
|
254 |
-
def get_model(self, category: str, name: str) -> MathematicalModel:
|
255 |
-
"""Obtiene un modelo específico"""
|
256 |
-
return self.models.get(category, {}).get(name)
|
257 |
-
|
258 |
-
def get_all_models(self) -> Dict:
|
259 |
-
"""Retorna todos los modelos registrados"""
|
260 |
-
return self.models
|
261 |
-
|
262 |
-
def _initialize_default_models(self):
|
263 |
-
"""Inicializa los modelos por defecto"""
|
264 |
-
# Modelos de crecimiento
|
265 |
-
self.register_model(MathematicalModel(
|
266 |
-
name="Monod",
|
267 |
-
equation="μ = μmax × (S / (Ks + S))",
|
268 |
-
parameters=["μmax (h⁻¹)", "Ks (g/L)"],
|
269 |
-
application="Crecimiento limitado por sustrato único",
|
270 |
-
sources=["Cambridge", "MIT", "DTU"],
|
271 |
-
category="crecimiento_biomasa",
|
272 |
-
biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
|
273 |
-
))
|
274 |
-
|
275 |
-
self.register_model(MathematicalModel(
|
276 |
-
name="Logístico",
|
277 |
-
equation="dX/dt = μmax × X × (1 - X/Xmax)",
|
278 |
-
parameters=["μmax (h⁻¹)", "Xmax (g/L)"],
|
279 |
-
application="Sistemas cerrados batch",
|
280 |
-
sources=["Cranfield", "Swansea", "HAL Theses"],
|
281 |
-
category="crecimiento_biomasa",
|
282 |
-
biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
|
283 |
-
))
|
284 |
-
|
285 |
-
self.register_model(MathematicalModel(
|
286 |
-
name="Gompertz",
|
287 |
-
equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
|
288 |
-
parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"],
|
289 |
-
application="Crecimiento con fase lag pronunciada",
|
290 |
-
sources=["Lund University", "NC State"],
|
291 |
-
category="crecimiento_biomasa",
|
292 |
-
biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"
|
293 |
-
))
|
294 |
-
|
295 |
-
# Instancia global del registro
|
296 |
-
model_registry = ModelRegistry()
|
297 |
-
|
298 |
-
# CAMBIO: Modelos de Nebius en lugar de Claude
|
299 |
NEBIUS_MODELS = {
|
300 |
"Qwen/Qwen3-14B": {
|
301 |
"name": "Qwen 3 (14B)",
|
302 |
"description": "Modelo potente y versátil de la familia Qwen.",
|
303 |
-
"max_tokens": 4096,
|
304 |
-
"best_for": "Análisis detallados y generación de código complejo."
|
305 |
},
|
306 |
-
# Puedes añadir más modelos de Nebius aquí si están disponibles
|
307 |
}
|
308 |
|
309 |
class FileProcessor:
|
310 |
-
"""Clase para procesar diferentes tipos de archivos"""
|
311 |
-
|
312 |
-
@staticmethod
|
313 |
-
def extract_text_from_pdf(pdf_file) -> str:
|
314 |
-
"""Extrae texto de un archivo PDF"""
|
315 |
-
try:
|
316 |
-
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
|
317 |
-
text = ""
|
318 |
-
for page in pdf_reader.pages:
|
319 |
-
text += page.extract_text() + "\n"
|
320 |
-
return text
|
321 |
-
except Exception as e:
|
322 |
-
return f"Error reading PDF: {str(e)}"
|
323 |
-
|
324 |
-
@staticmethod
|
325 |
-
def read_csv(csv_file) -> pd.DataFrame:
|
326 |
-
"""Lee archivo CSV"""
|
327 |
-
try:
|
328 |
-
return pd.read_csv(io.BytesIO(csv_file))
|
329 |
-
except Exception as e:
|
330 |
-
return None
|
331 |
-
|
332 |
@staticmethod
|
333 |
-
def
|
334 |
-
|
335 |
-
|
336 |
-
return pd.read_excel(io.BytesIO(excel_file))
|
337 |
-
except Exception as e:
|
338 |
-
return None
|
339 |
|
340 |
@staticmethod
|
341 |
-
def
|
342 |
-
|
343 |
-
|
344 |
-
try:
|
345 |
-
with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
|
346 |
-
for file_name in zip_ref.namelist():
|
347 |
-
if not file_name.startswith('__MACOSX'):
|
348 |
-
file_data = zip_ref.read(file_name)
|
349 |
-
files.append((file_name, file_data))
|
350 |
-
except Exception as e:
|
351 |
-
print(f"Error processing ZIP: {e}")
|
352 |
-
return files
|
353 |
|
354 |
class ReportExporter:
|
355 |
-
"""Clase para exportar reportes a diferentes formatos"""
|
356 |
-
|
357 |
@staticmethod
|
358 |
-
def export_to_docx(content: str, filename: str, language: str = 'en')
|
359 |
-
"""Exporta el contenido a un archivo DOCX"""
|
360 |
doc = Document()
|
361 |
-
|
362 |
-
|
363 |
-
title_style = doc.styles['Title']
|
364 |
-
title_style.font.size = Pt(24)
|
365 |
-
title_style.font.bold = True
|
366 |
-
|
367 |
-
heading_style = doc.styles['Heading 1']
|
368 |
-
heading_style.font.size = Pt(18)
|
369 |
-
heading_style.font.bold = True
|
370 |
-
|
371 |
-
# Título
|
372 |
-
title_text = {
|
373 |
-
'en': 'Comparative Analysis Report - Biotechnological Models',
|
374 |
-
'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
|
375 |
-
'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
|
376 |
-
'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
|
377 |
-
'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
|
378 |
-
}
|
379 |
-
|
380 |
-
doc.add_heading(title_text.get(language, title_text['en']), 0)
|
381 |
-
|
382 |
-
# Fecha
|
383 |
-
date_text = {
|
384 |
-
'en': 'Generated on',
|
385 |
-
'es': 'Generado el',
|
386 |
-
'fr': 'Généré le',
|
387 |
-
'de': 'Erstellt am',
|
388 |
-
'pt': 'Gerado em'
|
389 |
-
}
|
390 |
-
doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
391 |
doc.add_paragraph()
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
for line in lines:
|
398 |
-
line = line.strip()
|
399 |
-
|
400 |
-
if line.startswith('###'):
|
401 |
-
doc.add_heading(line.replace('###', '').strip(), level=2)
|
402 |
-
elif line.startswith('##'):
|
403 |
-
doc.add_heading(line.replace('##', '').strip(), level=1)
|
404 |
-
elif line.startswith('#'):
|
405 |
-
doc.add_heading(line.replace('#', '').strip(), level=0)
|
406 |
-
elif line.startswith('**') and line.endswith('**'):
|
407 |
-
# Texto en negrita
|
408 |
-
p = doc.add_paragraph()
|
409 |
-
run = p.add_run(line.replace('**', ''))
|
410 |
-
run.bold = True
|
411 |
-
elif line.startswith('- ') or line.startswith('* '):
|
412 |
-
# Lista
|
413 |
-
doc.add_paragraph(line[2:], style='List Bullet')
|
414 |
-
elif line.startswith(tuple('0123456789')):
|
415 |
-
# Lista numerada
|
416 |
-
doc.add_paragraph(line, style='List Number')
|
417 |
-
elif line == '---' or line.startswith('==='):
|
418 |
-
# Separador
|
419 |
-
doc.add_paragraph('_' * 50)
|
420 |
-
elif line:
|
421 |
-
# Párrafo normal
|
422 |
-
doc.add_paragraph(line)
|
423 |
-
|
424 |
-
# Guardar documento
|
425 |
doc.save(filename)
|
426 |
return filename
|
427 |
-
|
428 |
@staticmethod
|
429 |
-
def export_to_pdf(content: str, filename: str, language: str = 'en')
|
430 |
-
"""Exporta el contenido a un archivo PDF"""
|
431 |
-
# Crear documento PDF
|
432 |
doc = SimpleDocTemplate(filename, pagesize=letter)
|
433 |
-
story = []
|
434 |
styles = getSampleStyleSheet()
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
'
|
439 |
-
|
440 |
-
|
441 |
-
textColor=colors.HexColor('#1f4788'),
|
442 |
-
spaceAfter=30
|
443 |
-
)
|
444 |
-
|
445 |
-
heading_style = ParagraphStyle(
|
446 |
-
'CustomHeading',
|
447 |
-
parent=styles['Heading1'],
|
448 |
-
fontSize=16,
|
449 |
-
textColor=colors.HexColor('#2e5090'),
|
450 |
-
spaceAfter=12
|
451 |
-
)
|
452 |
-
|
453 |
-
# Título
|
454 |
-
title_text = {
|
455 |
-
'en': 'Comparative Analysis Report - Biotechnological Models',
|
456 |
-
'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
|
457 |
-
'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
|
458 |
-
'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
|
459 |
-
'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
|
460 |
-
}
|
461 |
-
|
462 |
-
story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
|
463 |
-
|
464 |
-
# Fecha
|
465 |
-
date_text = {
|
466 |
-
'en': 'Generated on',
|
467 |
-
'es': 'Generado el',
|
468 |
-
'fr': 'Généré le',
|
469 |
-
'de': 'Erstellt am',
|
470 |
-
'pt': 'Gerado em'
|
471 |
-
}
|
472 |
-
story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
|
473 |
-
story.append(Spacer(1, 0.5*inch))
|
474 |
-
|
475 |
-
# Procesar contenido
|
476 |
-
lines = content.split('\n')
|
477 |
-
|
478 |
-
for line in lines:
|
479 |
-
line = line.strip()
|
480 |
-
|
481 |
-
if not line:
|
482 |
-
story.append(Spacer(1, 0.2*inch))
|
483 |
-
elif line.startswith('###'):
|
484 |
-
story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
|
485 |
-
elif line.startswith('##'):
|
486 |
-
story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
|
487 |
-
elif line.startswith('#'):
|
488 |
-
story.append(Paragraph(line.replace('#', '').strip(), heading_style))
|
489 |
-
elif line.startswith('**') and line.endswith('**'):
|
490 |
-
text = line.replace('**', '')
|
491 |
-
story.append(Paragraph(f"<b>{text}</b>", styles['Normal']))
|
492 |
-
elif line.startswith('- ') or line.startswith('* '):
|
493 |
-
story.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
494 |
-
elif line == '---' or line.startswith('==='):
|
495 |
-
story.append(Spacer(1, 0.3*inch))
|
496 |
-
story.append(Paragraph("_" * 70, styles['Normal']))
|
497 |
-
story.append(Spacer(1, 0.3*inch))
|
498 |
-
else:
|
499 |
-
# Limpiar caracteres especiales para PDF
|
500 |
-
clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
|
501 |
-
story.append(Paragraph(clean_line, styles['Normal']))
|
502 |
-
|
503 |
-
# Construir PDF
|
504 |
doc.build(story)
|
505 |
return filename
|
506 |
|
507 |
class AIAnalyzer:
|
508 |
-
"""
|
509 |
-
|
510 |
-
|
|
|
|
|
511 |
self.client = client
|
512 |
-
|
513 |
-
|
514 |
-
def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
|
515 |
-
"""Detecta el tipo de análisis necesario"""
|
516 |
-
if isinstance(content, pd.DataFrame):
|
517 |
-
columns = [col.lower() for col in content.columns]
|
518 |
-
|
519 |
-
fitting_indicators = [
|
520 |
-
'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic',
|
521 |
-
'parameter', 'param', 'coefficient', 'fit',
|
522 |
-
'model', 'equation', 'goodness', 'chi_square',
|
523 |
-
'p_value', 'confidence', 'standard_error', 'se'
|
524 |
-
]
|
525 |
-
|
526 |
-
has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators)
|
527 |
-
|
528 |
-
if has_fitting_results:
|
529 |
-
return AnalysisType.FITTING_RESULTS
|
530 |
-
else:
|
531 |
-
return AnalysisType.DATA_FITTING
|
532 |
-
|
533 |
-
prompt = """
|
534 |
-
Analyze this content and determine if it is:
|
535 |
-
1. A scientific article describing biotechnological mathematical models
|
536 |
-
2. Experimental data for parameter fitting
|
537 |
-
3. Model fitting results (with parameters, R², RMSE, etc.)
|
538 |
-
|
539 |
-
Reply only with: "MODEL", "DATA" or "RESULTS"
|
540 |
"""
|
|
|
|
|
|
|
|
|
541 |
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
|
549 |
-
)
|
550 |
-
|
551 |
-
result = response.choices[0].message.content.strip().upper()
|
552 |
-
if "MODEL" in result:
|
553 |
-
return AnalysisType.MATHEMATICAL_MODEL
|
554 |
-
elif "RESULTS" in result:
|
555 |
-
return AnalysisType.FITTING_RESULTS
|
556 |
-
elif "DATA" in result:
|
557 |
-
return AnalysisType.DATA_FITTING
|
558 |
-
else:
|
559 |
-
return AnalysisType.UNKNOWN
|
560 |
-
|
561 |
-
except Exception as e:
|
562 |
-
print(f"Error in detect_analysis_type: {e}")
|
563 |
-
return AnalysisType.UNKNOWN
|
564 |
-
|
565 |
-
def get_language_prompt_prefix(self, language: str) -> str:
|
566 |
-
"""Obtiene el prefijo del prompt según el idioma"""
|
567 |
-
prefixes = {
|
568 |
-
'en': "Please respond in English. ",
|
569 |
-
'es': "Por favor responde en español. ",
|
570 |
-
'fr': "Veuillez répondre en français. ",
|
571 |
-
'de': "Bitte antworten Sie auf Deutsch. ",
|
572 |
-
'pt': "Por favor responda em português. "
|
573 |
-
}
|
574 |
-
return prefixes.get(language, prefixes['en'])
|
575 |
-
|
576 |
-
def analyze_fitting_results(self, data: pd.DataFrame, nebius_model: str, detail_level: str = "detailed",
|
577 |
-
language: str = "en", additional_specs: str = "") -> Dict:
|
578 |
-
"""Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
|
579 |
-
|
580 |
-
# Preparar resumen completo de los datos
|
581 |
-
data_summary = f"""
|
582 |
-
FITTING RESULTS DATA:
|
583 |
-
|
584 |
-
Data structure:
|
585 |
-
- Columns: {list(data.columns)}
|
586 |
-
- Number of models evaluated: {len(data)}
|
587 |
-
|
588 |
-
Complete data:
|
589 |
{data.to_string()}
|
590 |
-
|
591 |
-
|
592 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
593 |
"""
|
594 |
-
|
595 |
-
# Extraer valores para usar en el código
|
596 |
-
data_dict = data.to_dict('records')
|
597 |
-
|
598 |
-
# Obtener prefijo de idioma
|
599 |
-
lang_prefix = self.get_language_prompt_prefix(language)
|
600 |
-
|
601 |
-
# Agregar especificaciones adicionales del usuario si existen
|
602 |
-
user_specs_section = f"""
|
603 |
-
|
604 |
-
USER ADDITIONAL SPECIFICATIONS:
|
605 |
-
{additional_specs}
|
606 |
-
|
607 |
-
Please ensure to address these specific requirements in your analysis.
|
608 |
-
""" if additional_specs else ""
|
609 |
-
|
610 |
-
# Prompt mejorado con instrucciones específicas para cada nivel
|
611 |
-
if detail_level == "detailed":
|
612 |
-
prompt = f"""
|
613 |
-
{lang_prefix}
|
614 |
-
|
615 |
-
You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
|
616 |
-
|
617 |
-
{user_specs_section}
|
618 |
-
|
619 |
-
DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
|
620 |
-
|
621 |
-
PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
|
622 |
-
|
623 |
-
1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
|
624 |
-
- List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
|
625 |
-
- For EACH experiment, identify:
|
626 |
-
* Experimental conditions
|
627 |
-
* Number of models tested
|
628 |
-
* Variables measured (biomass, substrate, product)
|
629 |
-
|
630 |
-
2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
|
631 |
-
For EACH EXPERIMENT separately:
|
632 |
-
- Identify ALL fitted mathematical models BY NAME
|
633 |
-
- Classify them: biomass growth, substrate consumption, product formation
|
634 |
-
- Show the mathematical equation of each model
|
635 |
-
- List parameter values obtained for that specific experiment
|
636 |
-
|
637 |
-
3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
|
638 |
-
Create a section for EACH EXPERIMENT showing:
|
639 |
-
|
640 |
-
**EXPERIMENT [Name/Condition]:**
|
641 |
-
|
642 |
-
a) **BIOMASS MODELS** (if applicable):
|
643 |
-
- Best model: [Name] with R²=[value], RMSE=[value]
|
644 |
-
- Parameters: μmax=[value], Xmax=[value], etc.
|
645 |
-
- Ranking of all biomass models tested
|
646 |
-
|
647 |
-
b) **SUBSTRATE MODELS** (if applicable):
|
648 |
-
- Best model: [Name] with R²=[value], RMSE=[value]
|
649 |
-
- Parameters: Ks=[value], Yxs=[value], etc.
|
650 |
-
- Ranking of all substrate models tested
|
651 |
-
|
652 |
-
c) **PRODUCT MODELS** (if applicable):
|
653 |
-
- Best model: [Name] with R²=[value], RMSE=[value]
|
654 |
-
- Parameters: α=[value], β=[value], etc.
|
655 |
-
- Ranking of all product models tested
|
656 |
-
|
657 |
-
4. **DETAILED COMPARATIVE TABLES**
|
658 |
-
|
659 |
-
**Table 1: Summary by Experiment and Variable Type**
|
660 |
-
| Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
|
661 |
-
|------------|----------|------------|-------|------|----------------|---------|
|
662 |
-
| Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
|
663 |
-
| Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
|
664 |
-
| Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
|
665 |
-
| Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
|
666 |
-
|
667 |
-
**Table 2: Complete Model Comparison Across All Experiments**
|
668 |
-
| Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
|
669 |
-
|
670 |
-
5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
|
671 |
-
- Compare how parameters change between experiments
|
672 |
-
- Identify trends (e.g., μmax increases with temperature)
|
673 |
-
- Calculate average parameters and variability
|
674 |
-
- Suggest optimal conditions based on parameters
|
675 |
-
|
676 |
-
6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
|
677 |
-
For each experiment, explain:
|
678 |
-
- What the parameter values mean biologically
|
679 |
-
- Whether values are realistic for the conditions
|
680 |
-
- Key differences between experiments
|
681 |
-
- Critical control parameters identified
|
682 |
-
|
683 |
-
7. **OVERALL BEST MODELS DETERMINATION**
|
684 |
-
- **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
|
685 |
-
- **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
|
686 |
-
- **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
|
687 |
-
|
688 |
-
Justify with numerical evidence from multiple experiments.
|
689 |
-
|
690 |
-
8. **CONCLUSIONS AND RECOMMENDATIONS**
|
691 |
-
- Which models are most robust across different conditions
|
692 |
-
- Specific models to use for each experimental condition
|
693 |
-
- Confidence intervals and prediction reliability
|
694 |
-
- Scale-up recommendations with specific values
|
695 |
-
|
696 |
-
Use Markdown format with clear structure. Include ALL numerical values from the data.
|
697 |
-
Create clear sections for EACH EXPERIMENT.
|
698 |
-
"""
|
699 |
-
else: # summarized
|
700 |
-
prompt = f"""
|
701 |
-
{lang_prefix}
|
702 |
-
|
703 |
-
You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
|
704 |
-
|
705 |
-
{user_specs_section}
|
706 |
-
|
707 |
-
DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
|
708 |
-
|
709 |
-
PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
|
710 |
-
|
711 |
-
1. **EXPERIMENTS OVERVIEW**
|
712 |
-
- Total experiments analyzed: [number]
|
713 |
-
- Conditions tested: [list]
|
714 |
-
- Variables measured: biomass/substrate/product
|
715 |
-
|
716 |
-
2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
|
717 |
-
|
718 |
-
📊 **EXPERIMENT 1 [Name/Condition]:**
|
719 |
-
- Biomass: [Model] (R²=[value])
|
720 |
-
- Substrate: [Model] (R²=[value])
|
721 |
-
- Product: [Model] (R²=[value])
|
722 |
-
|
723 |
-
📊 **EXPERIMENT 2 [Name/Condition]:**
|
724 |
-
- Biomass: [Model] (R²=[value])
|
725 |
-
- Substrate: [Model] (R²=[value])
|
726 |
-
- Product: [Model] (R²=[value])
|
727 |
-
|
728 |
-
[Continue for all experiments...]
|
729 |
-
|
730 |
-
3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
|
731 |
-
🏆 **Best Models Overall:**
|
732 |
-
- **Biomass**: [Model] - Best in [X]/[Y] experiments
|
733 |
-
- **Substrate**: [Model] - Average R²=[value]
|
734 |
-
- **Product**: [Model] - Most consistent performance
|
735 |
-
|
736 |
-
4. **QUICK COMPARISON TABLE**
|
737 |
-
| Experiment | Best Biomass | Best Substrate | Best Product | Overall R² |
|
738 |
-
|------------|--------------|----------------|--------------|------------|
|
739 |
-
| Exp1 | [Model] | [Model] | [Model] | [avg] |
|
740 |
-
| Exp2 | [Model] | [Model] | [Model] | [avg] |
|
741 |
-
|
742 |
-
5. **KEY FINDINGS**
|
743 |
-
- Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
|
744 |
-
- Best conditions identified: [specific values]
|
745 |
-
- Most robust models: [list with reasons]
|
746 |
-
|
747 |
-
6. **PRACTICAL RECOMMENDATIONS**
|
748 |
-
- For biomass prediction: Use [Model]
|
749 |
-
- For substrate monitoring: Use [Model]
|
750 |
-
- For product estimation: Use [Model]
|
751 |
-
- Critical parameters: [list with values]
|
752 |
-
|
753 |
-
Keep it concise but include ALL experiments and model names with their key metrics.
|
754 |
-
"""
|
755 |
-
|
756 |
try:
|
757 |
-
# CAMBIO: Llamada a la API y acceso a la respuesta
|
758 |
response = self.client.chat.completions.create(
|
759 |
-
model=
|
760 |
temperature=0.6,
|
761 |
top_p=0.95,
|
762 |
-
max_tokens=4000,
|
763 |
-
messages=[{
|
764 |
-
"role": "user",
|
765 |
-
"content": f"{prompt}\n\n{data_summary}"
|
766 |
-
}]
|
767 |
)
|
768 |
|
769 |
-
|
770 |
-
code_prompt = f"""
|
771 |
-
{lang_prefix}
|
772 |
-
|
773 |
-
Based on the analysis and this actual data:
|
774 |
-
{data.to_string()}
|
775 |
-
|
776 |
-
Generate Python code that:
|
777 |
-
|
778 |
-
1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
|
779 |
-
2. Implements analysis BY EXPERIMENT showing:
|
780 |
-
- Best models for each experiment
|
781 |
-
- Comparison across experiments
|
782 |
-
- Parameter evolution between conditions
|
783 |
-
3. Includes visualization functions that:
|
784 |
-
- Show results PER EXPERIMENT
|
785 |
-
- Compare models across experiments
|
786 |
-
- Display parameter trends
|
787 |
-
4. Shows the best model for biomass, substrate, and product separately
|
788 |
-
|
789 |
-
The code must include:
|
790 |
-
- Data loading with experiment identification
|
791 |
-
- Model comparison by experiment and variable type
|
792 |
-
- Visualization showing results per experiment
|
793 |
-
- Overall best model selection with justification
|
794 |
-
- Functions to predict using the best models for each category
|
795 |
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
temperature=0.6,
|
805 |
-
top_p=0.95,
|
806 |
-
max_tokens=3000,
|
807 |
-
messages=[{
|
808 |
-
"role": "user",
|
809 |
-
"content": code_prompt
|
810 |
-
}]
|
811 |
-
)
|
812 |
-
|
813 |
-
return {
|
814 |
-
"tipo": "Comparative Analysis of Mathematical Models",
|
815 |
-
"analisis_completo": response.choices[0].message.content,
|
816 |
-
"codigo_implementacion": code_response.choices[0].message.content,
|
817 |
-
"resumen_datos": {
|
818 |
-
"n_modelos": len(data),
|
819 |
-
"columnas": list(data.columns),
|
820 |
-
"metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
|
821 |
-
for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
|
822 |
-
"mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
|
823 |
-
"mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
|
824 |
-
"datos_completos": data_dict
|
825 |
}
|
826 |
-
|
827 |
-
|
|
|
|
|
|
|
|
|
|
|
828 |
except Exception as e:
|
829 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
830 |
|
831 |
-
def process_files(files, model_name: str, detail_level: str = "detailed",
|
832 |
-
language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
|
833 |
-
"""Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
|
834 |
processor = FileProcessor()
|
835 |
-
analyzer = AIAnalyzer(client
|
836 |
-
results = []
|
837 |
-
all_code = []
|
838 |
|
|
|
|
|
|
|
|
|
839 |
for file in files:
|
840 |
-
if file is None:
|
841 |
-
|
842 |
-
|
843 |
-
file_name = file.name if hasattr(file, 'name') else "archivo"
|
844 |
file_ext = Path(file_name).suffix.lower()
|
845 |
|
846 |
with open(file.name, 'rb') as f:
|
847 |
file_content = f.read()
|
848 |
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
854 |
-
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
df, model_name, detail_level, language, additional_specs
|
866 |
-
)
|
867 |
-
|
868 |
-
if language == 'es':
|
869 |
-
results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
|
870 |
-
else:
|
871 |
-
results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS")
|
872 |
-
|
873 |
-
results.append(result.get("analisis_completo", ""))
|
874 |
-
if "codigo_implementacion" in result:
|
875 |
-
all_code.append(result["codigo_implementacion"])
|
876 |
-
|
877 |
-
results.append("\n---\n")
|
878 |
-
|
879 |
-
analysis_text = "\n".join(results)
|
880 |
-
code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
|
881 |
-
|
882 |
-
return analysis_text, code_text
|
883 |
-
|
884 |
-
def generate_implementation_code(analysis_results: str) -> str:
|
885 |
-
"""Genera código de implementación con análisis por experimento"""
|
886 |
-
code = """
|
887 |
-
import numpy as np
|
888 |
-
import pandas as pd
|
889 |
-
import matplotlib.pyplot as plt
|
890 |
-
from scipy.integrate import odeint
|
891 |
-
from scipy.optimize import curve_fit, differential_evolution
|
892 |
-
from sklearn.metrics import r2_score, mean_squared_error
|
893 |
-
import seaborn as sns
|
894 |
-
from typing import Dict, List, Tuple, Optional
|
895 |
-
|
896 |
-
# Visualization configuration
|
897 |
-
plt.style.use('seaborn-v0_8-darkgrid')
|
898 |
-
sns.set_palette("husl")
|
899 |
-
|
900 |
-
class ExperimentalModelAnalyzer:
|
901 |
-
\"\"\"
|
902 |
-
Class for comparative analysis of biotechnological models across multiple experiments.
|
903 |
-
Analyzes biomass, substrate and product models separately for each experimental condition.
|
904 |
-
\"\"\"
|
905 |
-
|
906 |
-
def __init__(self):
|
907 |
-
self.results_df = None
|
908 |
-
self.experiments = {}
|
909 |
-
self.best_models_by_experiment = {}
|
910 |
-
self.overall_best_models = {
|
911 |
-
'biomass': None,
|
912 |
-
'substrate': None,
|
913 |
-
'product': None
|
914 |
-
}
|
915 |
-
|
916 |
-
def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
|
917 |
-
\"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
|
918 |
-
if data_dict:
|
919 |
-
self.results_df = pd.DataFrame(data_dict)
|
920 |
-
elif file_path:
|
921 |
-
if file_path.endswith('.csv'):
|
922 |
-
self.results_df = pd.read_csv(file_path)
|
923 |
-
else:
|
924 |
-
self.results_df = pd.read_excel(file_path)
|
925 |
-
|
926 |
-
print(f"✅ Data loaded: {len(self.results_df)} models")
|
927 |
-
print(f"📊 Available columns: {list(self.results_df.columns)}")
|
928 |
-
|
929 |
-
# Identify experiments
|
930 |
-
if 'Experiment' in self.results_df.columns:
|
931 |
-
self.experiments = self.results_df.groupby('Experiment').groups
|
932 |
-
print(f"🧪 Experiments found: {list(self.experiments.keys())}")
|
933 |
-
|
934 |
-
return self.results_df
|
935 |
-
|
936 |
-
def analyze_by_experiment(self,
|
937 |
-
experiment_col: str = 'Experiment',
|
938 |
-
model_col: str = 'Model',
|
939 |
-
type_col: str = 'Type',
|
940 |
-
r2_col: str = 'R2',
|
941 |
-
rmse_col: str = 'RMSE') -> Dict:
|
942 |
-
\"\"\"
|
943 |
-
Analyze models by experiment and variable type.
|
944 |
-
Identifies best models for biomass, substrate, and product in each experiment.
|
945 |
-
\"\"\"
|
946 |
-
if self.results_df is None:
|
947 |
-
raise ValueError("First load data with load_results()")
|
948 |
-
|
949 |
-
results_by_exp = {}
|
950 |
-
|
951 |
-
# Get unique experiments
|
952 |
-
if experiment_col in self.results_df.columns:
|
953 |
-
experiments = self.results_df[experiment_col].unique()
|
954 |
-
else:
|
955 |
-
experiments = ['All_Data']
|
956 |
-
self.results_df[experiment_col] = 'All_Data'
|
957 |
-
|
958 |
-
print("\\n" + "="*80)
|
959 |
-
print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
|
960 |
-
print("="*80)
|
961 |
-
|
962 |
-
for exp in experiments:
|
963 |
-
print(f"\\n🧪 EXPERIMENT: {exp}")
|
964 |
-
print("-"*50)
|
965 |
-
|
966 |
-
exp_data = self.results_df[self.results_df[experiment_col] == exp]
|
967 |
-
results_by_exp[exp] = {}
|
968 |
-
|
969 |
-
# Analyze by variable type if available
|
970 |
-
if type_col in exp_data.columns:
|
971 |
-
var_types = exp_data[type_col].unique()
|
972 |
-
|
973 |
-
for var_type in var_types:
|
974 |
-
var_data = exp_data[exp_data[type_col] == var_type]
|
975 |
-
|
976 |
-
if not var_data.empty:
|
977 |
-
# Find best model for this variable type
|
978 |
-
best_idx = var_data[r2_col].idxmax()
|
979 |
-
best_model = var_data.loc[best_idx]
|
980 |
-
|
981 |
-
results_by_exp[exp][var_type] = {
|
982 |
-
'best_model': best_model[model_col],
|
983 |
-
'r2': best_model[r2_col],
|
984 |
-
'rmse': best_model[rmse_col],
|
985 |
-
'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
|
986 |
-
}
|
987 |
-
|
988 |
-
print(f"\\n 📈 {var_type.upper()}:")
|
989 |
-
print(f" Best Model: {best_model[model_col]}")
|
990 |
-
print(f" R² = {best_model[r2_col]:.4f}")
|
991 |
-
print(f" RMSE = {best_model[rmse_col]:.4f}")
|
992 |
-
|
993 |
-
# Show all models for this variable
|
994 |
-
print(f"\\n All {var_type} models tested:")
|
995 |
-
for _, row in var_data.iterrows():
|
996 |
-
print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
|
997 |
-
else:
|
998 |
-
# If no type column, analyze all models together
|
999 |
-
best_idx = exp_data[r2_col].idxmax()
|
1000 |
-
best_model = exp_data.loc[best_idx]
|
1001 |
-
|
1002 |
-
results_by_exp[exp]['all'] = {
|
1003 |
-
'best_model': best_model[model_col],
|
1004 |
-
'r2': best_model[r2_col],
|
1005 |
-
'rmse': best_model[rmse_col],
|
1006 |
-
'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
|
1007 |
-
}
|
1008 |
-
|
1009 |
-
self.best_models_by_experiment = results_by_exp
|
1010 |
-
|
1011 |
-
# Determine overall best models
|
1012 |
-
self._determine_overall_best_models()
|
1013 |
-
|
1014 |
-
return results_by_exp
|
1015 |
-
|
1016 |
-
def _determine_overall_best_models(self):
|
1017 |
-
\"\"\"Determine the best models across all experiments\"\"\"
|
1018 |
-
print("\\n" + "="*80)
|
1019 |
-
print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
|
1020 |
-
print("="*80)
|
1021 |
-
|
1022 |
-
# Aggregate performance by model and type
|
1023 |
-
model_performance = {}
|
1024 |
-
|
1025 |
-
for exp, exp_results in self.best_models_by_experiment.items():
|
1026 |
-
for var_type, var_results in exp_results.items():
|
1027 |
-
if var_type not in model_performance:
|
1028 |
-
model_performance[var_type] = {}
|
1029 |
-
|
1030 |
-
for model_data in var_results['all_models']:
|
1031 |
-
model_name = model_data['Model']
|
1032 |
-
if model_name not in model_performance[var_type]:
|
1033 |
-
model_performance[var_type][model_name] = {
|
1034 |
-
'r2_values': [],
|
1035 |
-
'rmse_values': [],
|
1036 |
-
'experiments': []
|
1037 |
-
}
|
1038 |
-
|
1039 |
-
model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
|
1040 |
-
model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
|
1041 |
-
model_performance[var_type][model_name]['experiments'].append(exp)
|
1042 |
-
|
1043 |
-
# Calculate average performance and select best
|
1044 |
-
for var_type, models in model_performance.items():
|
1045 |
-
best_avg_r2 = -1
|
1046 |
-
best_model = None
|
1047 |
-
|
1048 |
-
print(f"\\n📊 {var_type.upper()} MODELS:")
|
1049 |
-
for model_name, perf_data in models.items():
|
1050 |
-
avg_r2 = np.mean(perf_data['r2_values'])
|
1051 |
-
avg_rmse = np.mean(perf_data['rmse_values'])
|
1052 |
-
n_exp = len(perf_data['experiments'])
|
1053 |
-
|
1054 |
-
print(f" {model_name}:")
|
1055 |
-
print(f" Average R² = {avg_r2:.4f}")
|
1056 |
-
print(f" Average RMSE = {avg_rmse:.4f}")
|
1057 |
-
print(f" Tested in {n_exp} experiments")
|
1058 |
-
|
1059 |
-
if avg_r2 > best_avg_r2:
|
1060 |
-
best_avg_r2 = avg_r2
|
1061 |
-
best_model = {
|
1062 |
-
'name': model_name,
|
1063 |
-
'avg_r2': avg_r2,
|
1064 |
-
'avg_rmse': avg_rmse,
|
1065 |
-
'n_experiments': n_exp
|
1066 |
-
}
|
1067 |
-
|
1068 |
-
if var_type.lower() in ['biomass', 'substrate', 'product']:
|
1069 |
-
self.overall_best_models[var_type.lower()] = best_model
|
1070 |
-
print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
|
1071 |
-
|
1072 |
-
def create_comparison_visualizations(self):
|
1073 |
-
\"\"\"Create visualizations comparing models across experiments\"\"\"
|
1074 |
-
if not self.best_models_by_experiment:
|
1075 |
-
raise ValueError("First run analyze_by_experiment()")
|
1076 |
-
|
1077 |
-
# Prepare data for visualization
|
1078 |
-
experiments = []
|
1079 |
-
biomass_r2 = []
|
1080 |
-
substrate_r2 = []
|
1081 |
-
product_r2 = []
|
1082 |
-
|
1083 |
-
for exp, results in self.best_models_by_experiment.items():
|
1084 |
-
experiments.append(exp)
|
1085 |
-
biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
|
1086 |
-
substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
|
1087 |
-
product_r2.append(results.get('Product', {}).get('r2', 0))
|
1088 |
-
|
1089 |
-
# Create figure with subplots
|
1090 |
-
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
1091 |
-
fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
|
1092 |
-
|
1093 |
-
# 1. R² comparison by experiment and variable type
|
1094 |
-
ax1 = axes[0, 0]
|
1095 |
-
x = np.arange(len(experiments))
|
1096 |
-
width = 0.25
|
1097 |
-
|
1098 |
-
ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
|
1099 |
-
ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
|
1100 |
-
ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
|
1101 |
-
|
1102 |
-
ax1.set_xlabel('Experiment')
|
1103 |
-
ax1.set_ylabel('R²')
|
1104 |
-
ax1.set_title('Best Model R² by Experiment and Variable Type')
|
1105 |
-
ax1.set_xticks(x)
|
1106 |
-
ax1.set_xticklabels(experiments, rotation=45, ha='right')
|
1107 |
-
ax1.legend()
|
1108 |
-
ax1.grid(True, alpha=0.3)
|
1109 |
-
|
1110 |
-
# Add value labels
|
1111 |
-
for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
|
1112 |
-
if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
|
1113 |
-
if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
|
1114 |
-
if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
|
1115 |
-
|
1116 |
-
# 2. Model frequency heatmap
|
1117 |
-
ax2 = axes[0, 1]
|
1118 |
-
# This would show which models appear most frequently as best
|
1119 |
-
# Implementation depends on actual data structure
|
1120 |
-
ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
|
1121 |
-
ha='center', va='center', transform=ax2.transAxes)
|
1122 |
-
ax2.set_title('Most Frequently Selected Models')
|
1123 |
-
|
1124 |
-
# 3. Parameter evolution across experiments
|
1125 |
-
ax3 = axes[1, 0]
|
1126 |
-
ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
|
1127 |
-
ha='center', va='center', transform=ax3.transAxes)
|
1128 |
-
ax3.set_title('Parameter Trends')
|
1129 |
-
|
1130 |
-
# 4. Overall best models summary
|
1131 |
-
ax4 = axes[1, 1]
|
1132 |
-
ax4.axis('off')
|
1133 |
-
|
1134 |
-
summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
|
1135 |
-
for var_type, model_info in self.overall_best_models.items():
|
1136 |
-
if model_info:
|
1137 |
-
summary_text += f"{var_type.upper()}:\\n"
|
1138 |
-
summary_text += f" Model: {model_info['name']}\\n"
|
1139 |
-
summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
|
1140 |
-
summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
|
1141 |
-
|
1142 |
-
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
|
1143 |
-
fontsize=12, verticalalignment='top', fontfamily='monospace')
|
1144 |
-
ax4.set_title('Overall Best Models Summary')
|
1145 |
-
|
1146 |
-
plt.tight_layout()
|
1147 |
-
plt.show()
|
1148 |
-
|
1149 |
-
def generate_summary_table(self) -> pd.DataFrame:
|
1150 |
-
\"\"\"Generate a summary table of best models by experiment and type\"\"\"
|
1151 |
-
summary_data = []
|
1152 |
-
|
1153 |
-
for exp, results in self.best_models_by_experiment.items():
|
1154 |
-
for var_type, var_results in results.items():
|
1155 |
-
summary_data.append({
|
1156 |
-
'Experiment': exp,
|
1157 |
-
'Variable_Type': var_type,
|
1158 |
-
'Best_Model': var_results['best_model'],
|
1159 |
-
'R2': var_results['r2'],
|
1160 |
-
'RMSE': var_results['rmse']
|
1161 |
-
})
|
1162 |
-
|
1163 |
-
summary_df = pd.DataFrame(summary_data)
|
1164 |
-
|
1165 |
-
print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
|
1166 |
-
print("="*80)
|
1167 |
-
print(summary_df.to_string(index=False))
|
1168 |
-
|
1169 |
-
return summary_df
|
1170 |
-
|
1171 |
-
# Example usage
|
1172 |
-
if __name__ == "__main__":
|
1173 |
-
print("🧬 Experimental Model Comparison System")
|
1174 |
-
print("="*60)
|
1175 |
-
|
1176 |
-
# Example data structure with experiments
|
1177 |
-
example_data = {
|
1178 |
-
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
|
1179 |
-
'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
|
1180 |
-
'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
|
1181 |
-
'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
|
1182 |
-
'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
|
1183 |
-
'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
|
1184 |
-
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
|
1185 |
-
'Substrate', 'Substrate', 'Substrate', 'Substrate',
|
1186 |
-
'Product', 'Product', 'Product', 'Product'],
|
1187 |
-
'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
|
1188 |
-
0.9723, 0.9856, 0.9698, 0.9812,
|
1189 |
-
0.9634, 0.9512, 0.9687, 0.9423],
|
1190 |
-
'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
|
1191 |
-
0.0312, 0.0245, 0.0334, 0.0289,
|
1192 |
-
0.0412, 0.0523, 0.0389, 0.0567],
|
1193 |
-
'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
|
1194 |
-
None, None, None, None, None, None, None, None],
|
1195 |
-
'Ks': [None, None, None, None, None, None,
|
1196 |
-
2.1, 1.8, 2.3, 1.9, None, None, None, None]
|
1197 |
-
}
|
1198 |
-
|
1199 |
-
# Create analyzer
|
1200 |
-
analyzer = ExperimentalModelAnalyzer()
|
1201 |
-
|
1202 |
-
# Load data
|
1203 |
-
analyzer.load_results(data_dict=example_data)
|
1204 |
-
|
1205 |
-
# Analyze by experiment
|
1206 |
-
results = analyzer.analyze_by_experiment()
|
1207 |
-
|
1208 |
-
# Create visualizations
|
1209 |
-
analyzer.create_comparison_visualizations()
|
1210 |
-
|
1211 |
-
# Generate summary table
|
1212 |
-
summary = analyzer.generate_summary_table()
|
1213 |
-
|
1214 |
-
print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
|
1215 |
-
"""
|
1216 |
-
|
1217 |
-
return code
|
1218 |
-
|
1219 |
-
# Estado global para almacenar resultados
|
1220 |
-
class AppState:
|
1221 |
-
def __init__(self):
|
1222 |
-
self.current_analysis = ""
|
1223 |
-
self.current_code = ""
|
1224 |
-
self.current_language = "en"
|
1225 |
-
|
1226 |
-
app_state = AppState()
|
1227 |
|
1228 |
-
|
1229 |
-
"""Exporta el reporte al formato seleccionado"""
|
1230 |
-
if not app_state.current_analysis:
|
1231 |
-
error_msg = {
|
1232 |
-
'en': "No analysis available to export",
|
1233 |
-
'es': "No hay análisis disponible para exportar",
|
1234 |
-
'fr': "Aucune analyse disponible pour exporter",
|
1235 |
-
'de': "Keine Analyse zum Exportieren verfügbar",
|
1236 |
-
'pt': "Nenhuma análise disponível para exportar"
|
1237 |
-
}
|
1238 |
-
return error_msg.get(language, error_msg['en']), ""
|
1239 |
-
|
1240 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
1241 |
-
|
1242 |
-
try:
|
1243 |
-
if export_format == "DOCX":
|
1244 |
-
filename = f"biotech_analysis_report_{timestamp}.docx"
|
1245 |
-
ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
|
1246 |
-
else: # PDF
|
1247 |
-
filename = f"biotech_analysis_report_{timestamp}.pdf"
|
1248 |
-
ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
|
1249 |
-
|
1250 |
-
success_msg = TRANSLATIONS[language]['report_exported']
|
1251 |
-
return f"{success_msg} {filename}", filename
|
1252 |
-
except Exception as e:
|
1253 |
-
return f"Error: {str(e)}", ""
|
1254 |
|
1255 |
-
# Interfaz Gradio
|
1256 |
def create_interface():
|
1257 |
-
|
1258 |
-
|
1259 |
-
|
1260 |
-
|
1261 |
-
def update_interface_language(language):
|
1262 |
-
"""Actualiza el idioma de la interfaz"""
|
1263 |
-
app_state.current_language = language
|
1264 |
t = TRANSLATIONS[language]
|
1265 |
-
|
1266 |
return [
|
1267 |
-
gr.update(value=f"# {t['title']}"),
|
1268 |
-
gr.update(
|
1269 |
-
gr.update(label=t['
|
1270 |
-
gr.update(label=t['
|
1271 |
-
gr.update(label=t['
|
1272 |
-
gr.update(label=t['
|
1273 |
-
gr.update(label=t['
|
1274 |
-
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
|
1275 |
-
gr.update(value=t['analyze_button']), # analyze_btn
|
1276 |
-
gr.update(label=t['export_format']), # export_format
|
1277 |
-
gr.update(value=t['export_button']), # export_btn
|
1278 |
-
gr.update(label=t['comparative_analysis']), # analysis_output
|
1279 |
-
gr.update(label=t['implementation_code']), # code_output
|
1280 |
-
gr.update(label=t['data_format']) # data_format_accordion
|
1281 |
]
|
1282 |
-
|
1283 |
-
|
1284 |
-
"""
|
1285 |
-
|
1286 |
-
|
1287 |
-
|
1288 |
-
|
1289 |
-
analysis, code = process_files(files, model, detail, language, additional_specs)
|
1290 |
-
app_state.current_analysis = analysis
|
1291 |
-
app_state.current_code = code
|
1292 |
-
return analysis, code
|
1293 |
-
|
1294 |
-
with gr.Blocks(theme=THEMES[current_theme]) as demo:
|
1295 |
-
# Componentes de UI
|
1296 |
-
with gr.Row():
|
1297 |
-
with gr.Column(scale=3):
|
1298 |
-
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
|
1299 |
-
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
|
1300 |
-
with gr.Column(scale=1):
|
1301 |
-
with gr.Row():
|
1302 |
-
language_selector = gr.Dropdown(
|
1303 |
-
choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
|
1304 |
-
("Deutsch", "de"), ("Português", "pt")],
|
1305 |
-
value="en",
|
1306 |
-
label=TRANSLATIONS[current_language]['select_language'],
|
1307 |
-
interactive=True
|
1308 |
-
)
|
1309 |
-
theme_selector = gr.Dropdown(
|
1310 |
-
choices=[("Light", "light"), ("Dark", "dark")],
|
1311 |
-
value="light",
|
1312 |
-
label=TRANSLATIONS[current_language]['select_theme'],
|
1313 |
-
interactive=True
|
1314 |
-
)
|
1315 |
|
1316 |
with gr.Row():
|
1317 |
with gr.Column(scale=1):
|
1318 |
-
files_input = gr.File(
|
1319 |
-
label=TRANSLATIONS[current_language]['upload_files'],
|
1320 |
-
file_count="multiple",
|
1321 |
-
file_types=[".csv", ".xlsx", ".xls", ".pdf", ".zip"],
|
1322 |
-
type="filepath"
|
1323 |
-
)
|
1324 |
|
1325 |
-
# CAMBIO: Usar el diccionario de modelos de Nebius
|
1326 |
default_model = "Qwen/Qwen3-14B"
|
1327 |
-
model_selector = gr.Dropdown(
|
1328 |
-
choices=list(NEBIUS_MODELS.keys()),
|
1329 |
-
value=default_model,
|
1330 |
-
label=TRANSLATIONS[current_language]['select_model'],
|
1331 |
-
info=f"{TRANSLATIONS[current_language]['best_for']}: {NEBIUS_MODELS[default_model]['best_for']}"
|
1332 |
-
)
|
1333 |
-
|
1334 |
-
detail_level = gr.Radio(
|
1335 |
-
choices=[
|
1336 |
-
(TRANSLATIONS[current_language]['detailed'], "detailed"),
|
1337 |
-
(TRANSLATIONS[current_language]['summarized'], "summarized")
|
1338 |
-
],
|
1339 |
-
value="detailed",
|
1340 |
-
label=TRANSLATIONS[current_language]['detail_level']
|
1341 |
-
)
|
1342 |
|
1343 |
-
|
1344 |
-
additional_specs = gr.Textbox(
|
1345 |
-
label=TRANSLATIONS[current_language]['additional_specs'],
|
1346 |
-
placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
|
1347 |
-
lines=3,
|
1348 |
-
max_lines=5,
|
1349 |
-
interactive=True
|
1350 |
-
)
|
1351 |
|
1352 |
-
|
1353 |
-
TRANSLATIONS[current_language]['analyze_button'],
|
1354 |
-
variant="primary",
|
1355 |
-
size="lg"
|
1356 |
-
)
|
1357 |
-
|
1358 |
-
gr.Markdown("---")
|
1359 |
|
1360 |
-
|
1361 |
-
choices=["DOCX", "PDF"],
|
1362 |
-
value="PDF",
|
1363 |
-
label=TRANSLATIONS[current_language]['export_format']
|
1364 |
-
)
|
1365 |
|
1366 |
-
|
1367 |
-
TRANSLATIONS[current_language]['export_button'],
|
1368 |
-
variant="secondary"
|
1369 |
-
)
|
1370 |
|
1371 |
-
|
1372 |
-
label="Export Status",
|
1373 |
-
interactive=False,
|
1374 |
-
visible=False
|
1375 |
-
)
|
1376 |
|
1377 |
-
|
1378 |
-
|
1379 |
-
|
1380 |
-
|
1381 |
-
|
1382 |
with gr.Column(scale=2):
|
1383 |
-
analysis_output = gr.Markdown(
|
1384 |
-
|
1385 |
-
|
1386 |
-
|
1387 |
-
|
1388 |
-
|
1389 |
-
|
1390 |
-
|
1391 |
-
|
1392 |
-
|
1393 |
-
|
1394 |
-
|
1395 |
-
|
1396 |
-
open=False
|
1397 |
-
)
|
1398 |
-
|
1399 |
-
with data_format_accordion:
|
1400 |
-
gr.Markdown("""
|
1401 |
-
### Expected CSV/Excel structure:
|
1402 |
|
1403 |
-
|
1404 |
-
|
1405 |
-
|
1406 |
-
| pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
|
1407 |
-
| pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
|
1408 |
-
| pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
|
1409 |
|
1410 |
-
|
1411 |
-
|
1412 |
-
|
1413 |
-
|
1414 |
-
|
1415 |
-
|
1416 |
-
|
1417 |
-
|
1418 |
-
# CAMBIO: Actualizar el modelo en los ejemplos
|
1419 |
-
examples = gr.Examples(
|
1420 |
-
examples=[
|
1421 |
-
[["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""],
|
1422 |
-
[["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects"]
|
1423 |
-
],
|
1424 |
-
inputs=[files_input, model_selector, detail_level, additional_specs],
|
1425 |
-
label=TRANSLATIONS[current_language]['examples']
|
1426 |
-
)
|
1427 |
-
|
1428 |
-
# Eventos
|
1429 |
-
language_selector.change(
|
1430 |
-
update_interface_language,
|
1431 |
-
inputs=[language_selector],
|
1432 |
-
outputs=[
|
1433 |
-
title_text, subtitle_text, files_input, model_selector,
|
1434 |
-
language_selector, theme_selector, detail_level, additional_specs,
|
1435 |
-
analyze_btn, export_format, export_btn, analysis_output,
|
1436 |
-
code_output, data_format_accordion
|
1437 |
-
]
|
1438 |
-
)
|
1439 |
-
|
1440 |
-
def change_theme(theme_name):
|
1441 |
-
"""Cambia el tema de la interfaz"""
|
1442 |
-
return gr.Info("Theme will be applied on next page load")
|
1443 |
-
|
1444 |
-
theme_selector.change(
|
1445 |
-
change_theme,
|
1446 |
-
inputs=[theme_selector],
|
1447 |
-
outputs=[]
|
1448 |
-
)
|
1449 |
-
|
1450 |
analyze_btn.click(
|
1451 |
-
fn=
|
1452 |
-
inputs=[files_input, model_selector,
|
1453 |
-
outputs=[analysis_output, code_output]
|
1454 |
)
|
1455 |
|
1456 |
-
def handle_export(format, language):
|
1457 |
-
status, file = export_report(format, language)
|
1458 |
-
if file:
|
1459 |
-
return gr.update(value=status, visible=True), gr.update(value=file, visible=True)
|
1460 |
-
else:
|
1461 |
-
return gr.update(value=status, visible=True), gr.update(visible=False)
|
1462 |
-
|
1463 |
export_btn.click(
|
1464 |
-
fn=
|
1465 |
-
inputs=[
|
1466 |
-
outputs=[
|
1467 |
)
|
1468 |
-
|
1469 |
return demo
|
1470 |
|
1471 |
-
# Función principal
|
1472 |
def main():
|
1473 |
-
# CAMBIO: Comprobar la nueva variable de entorno
|
1474 |
if not os.getenv("NEBIUS_API_KEY"):
|
1475 |
-
print("⚠️
|
1476 |
-
return gr.Interface(
|
1477 |
-
fn=lambda x: TRANSLATIONS['en']['error_no_api'],
|
1478 |
-
inputs=gr.Textbox(),
|
1479 |
-
outputs=gr.Textbox(),
|
1480 |
-
title="Configuration Error"
|
1481 |
-
)
|
1482 |
|
1483 |
return create_interface()
|
1484 |
|
1485 |
-
# Para ejecución local
|
1486 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1487 |
demo = main()
|
1488 |
if demo:
|
1489 |
-
|
1490 |
-
if not os.path.exists("examples"):
|
1491 |
-
os.makedirs("examples")
|
1492 |
-
if not os.path.exists("examples/biomass_models_comparison.csv"):
|
1493 |
-
pd.DataFrame({
|
1494 |
-
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5'],
|
1495 |
-
'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz'],
|
1496 |
-
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass'],
|
1497 |
-
'R2': [0.98, 0.99, 0.995, 0.97, 0.98, 0.99],
|
1498 |
-
'RMSE': [0.02, 0.01, 0.005, 0.03, 0.02, 0.01]
|
1499 |
-
}).to_csv("examples/biomass_models_comparison.csv", index=False)
|
1500 |
-
if not os.path.exists("examples/substrate_kinetics_results.xlsx"):
|
1501 |
-
pd.DataFrame({
|
1502 |
-
'Experiment': ['Temp_30C', 'Temp_30C', 'Temp_37C', 'Temp_37C'],
|
1503 |
-
'Model': ['First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate'],
|
1504 |
-
'Type': ['Substrate', 'Substrate', 'Substrate', 'Substrate'],
|
1505 |
-
'R2': [0.97, 0.98, 0.96, 0.985],
|
1506 |
-
'RMSE': [0.03, 0.02, 0.04, 0.015]
|
1507 |
-
}).to_excel("examples/substrate_kinetics_results.xlsx", index=False)
|
1508 |
-
|
1509 |
-
demo.launch(
|
1510 |
-
server_name="0.0.0.0",
|
1511 |
-
server_port=7860,
|
1512 |
-
share=False
|
1513 |
-
)
|
|
|
1 |
import gradio as gr
|
2 |
+
from openai import OpenAI
|
3 |
import PyPDF2
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
|
|
8 |
import json
|
9 |
import zipfile
|
10 |
import tempfile
|
11 |
+
from typing import Dict, List, Tuple, Union
|
|
|
12 |
from pathlib import Path
|
|
|
|
|
|
|
13 |
from docx import Document
|
14 |
+
from docx.shared import Pt
|
|
|
15 |
from reportlab.lib import colors
|
16 |
+
from reportlab.lib.pagesizes import letter
|
17 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
18 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
19 |
from reportlab.lib.units import inch
|
|
|
|
|
|
|
20 |
from datetime import datetime
|
|
|
21 |
|
22 |
# Configuración para HuggingFace
|
23 |
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
24 |
|
25 |
+
# Inicializar cliente OpenAI para Nebius
|
26 |
client = OpenAI(
|
27 |
base_url="https://api.studio.nebius.com/v1/",
|
28 |
api_key=os.environ.get("NEBIUS_API_KEY")
|
29 |
)
|
30 |
|
31 |
+
# Sistema de traducción
|
32 |
TRANSLATIONS = {
|
33 |
'en': {
|
34 |
+
'title': '🧬 API-Powered Biotechnological Model Analyzer',
|
35 |
+
'subtitle': 'Upload your model fitting results and let the AI perform a complete comparative analysis.',
|
36 |
'upload_files': '📁 Upload fitting results (CSV/Excel)',
|
37 |
+
'select_model': '🤖 AI Model',
|
38 |
'select_language': '🌐 Language',
|
39 |
+
'detail_level': '📋 Analysis Detail Level',
|
|
|
40 |
'detailed': 'Detailed',
|
41 |
'summarized': 'Summarized',
|
42 |
+
'analyze_button': '🚀 Analyze with AI',
|
43 |
+
'export_format': '📄 Export Format',
|
44 |
+
'export_button': '💾 Export Analysis',
|
45 |
+
'comparative_analysis': '📊 AI-Generated Analysis',
|
46 |
+
'implementation_code': '💻 AI-Generated Implementation Code',
|
47 |
+
'data_format': '📋 Expected Data Format',
|
48 |
+
'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets',
|
|
|
|
|
|
|
|
|
|
|
49 |
'error_no_files': 'Please upload fitting result files to analyze',
|
50 |
'report_exported': 'Report exported successfully as',
|
51 |
+
'additional_specs': '📝 Additional Specifications for Analysis',
|
52 |
+
'additional_specs_placeholder': 'e.g., "Focus on the effect of temperature" or "Provide scale-up recommendations"...'
|
|
|
|
|
|
|
|
|
53 |
},
|
54 |
'es': {
|
55 |
+
'title': '🧬 Analizador Biotecnológico Impulsado por API',
|
56 |
+
'subtitle': 'Sube los resultados de ajuste de tus modelos y deja que la IA realice un análisis comparativo completo.',
|
57 |
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
|
58 |
+
'select_model': '🤖 Modelo de IA',
|
59 |
'select_language': '🌐 Idioma',
|
60 |
+
'detail_level': '📋 Nivel de Detalle del Análisis',
|
|
|
61 |
'detailed': 'Detallado',
|
62 |
'summarized': 'Resumido',
|
63 |
+
'analyze_button': '🚀 Analizar con IA',
|
64 |
+
'export_format': '📄 Formato de Exportación',
|
65 |
+
'export_button': '💾 Exportar Análisis',
|
66 |
+
'comparative_analysis': '📊 Análisis Generado por IA',
|
67 |
+
'implementation_code': '💻 Código de Implementación Generado por IA',
|
68 |
+
'data_format': '📋 Formato de Datos Esperado',
|
69 |
+
'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space',
|
|
|
|
|
|
|
|
|
|
|
70 |
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
|
71 |
'report_exported': 'Reporte exportado exitosamente como',
|
72 |
+
'additional_specs': '📝 Especificaciones Adicionales para el Análisis',
|
73 |
+
'additional_specs_placeholder': 'Ej: "Enfócate en el efecto de la temperatura" o "Provee recomendaciones de escalado"...'
|
|
|
|
|
|
|
|
|
74 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
}
|
76 |
|
77 |
+
# Modelos de Nebius disponibles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
NEBIUS_MODELS = {
|
79 |
"Qwen/Qwen3-14B": {
|
80 |
"name": "Qwen 3 (14B)",
|
81 |
"description": "Modelo potente y versátil de la familia Qwen.",
|
|
|
|
|
82 |
},
|
|
|
83 |
}
|
84 |
|
85 |
class FileProcessor:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
@staticmethod
|
87 |
+
def read_csv(csv_file: bytes) -> pd.DataFrame:
|
88 |
+
try: return pd.read_csv(io.BytesIO(csv_file))
|
89 |
+
except Exception: return None
|
|
|
|
|
|
|
90 |
|
91 |
@staticmethod
|
92 |
+
def read_excel(excel_file: bytes) -> pd.DataFrame:
|
93 |
+
try: return pd.read_excel(io.BytesIO(excel_file))
|
94 |
+
except Exception: return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
class ReportExporter:
|
|
|
|
|
97 |
@staticmethod
|
98 |
+
def export_to_docx(content: str, filename: str, language: str = 'en'):
|
|
|
99 |
doc = Document()
|
100 |
+
doc.add_heading(TRANSLATIONS[language]['title'], 0)
|
101 |
+
doc.add_paragraph(f"{TRANSLATIONS[language]['report_exported'].split(' as')[0]}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
doc.add_paragraph()
|
103 |
+
for line in content.split('\n'):
|
104 |
+
if line.startswith('### '): doc.add_heading(line[4:], level=3)
|
105 |
+
elif line.startswith('## '): doc.add_heading(line[3:], level=2)
|
106 |
+
elif line.startswith('# '): doc.add_heading(line[2:], level=1)
|
107 |
+
else: doc.add_paragraph(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
doc.save(filename)
|
109 |
return filename
|
110 |
+
|
111 |
@staticmethod
|
112 |
+
def export_to_pdf(content: str, filename: str, language: str = 'en'):
|
|
|
|
|
113 |
doc = SimpleDocTemplate(filename, pagesize=letter)
|
|
|
114 |
styles = getSampleStyleSheet()
|
115 |
+
story = [Paragraph(TRANSLATIONS[language]['title'], styles['h1'])]
|
116 |
+
for line in content.split('\n'):
|
117 |
+
if line.startswith('### '): story.append(Paragraph(line[4:], styles['h3']))
|
118 |
+
elif line.startswith('## '): story.append(Paragraph(line[3:], styles['h2']))
|
119 |
+
elif line.startswith('# '): story.append(Paragraph(line[2:], styles['h1']))
|
120 |
+
else: story.append(Paragraph(line, styles['BodyText']))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
doc.build(story)
|
122 |
return filename
|
123 |
|
124 |
class AIAnalyzer:
|
125 |
+
"""
|
126 |
+
Clase que interactúa exclusivamente con la API para obtener análisis y código.
|
127 |
+
No contiene lógica de análisis predefinida.
|
128 |
+
"""
|
129 |
+
def __init__(self, client):
|
130 |
self.client = client
|
131 |
+
|
132 |
+
def get_analysis_and_code(self, data: pd.DataFrame, model: str, detail_level: str, language: str, additional_specs: str) -> Dict[str, str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
"""
|
134 |
+
Realiza una única llamada a la API para obtener tanto el análisis
|
135 |
+
como el código de implementación en un formato JSON.
|
136 |
+
"""
|
137 |
+
lang_instruction = TRANSLATIONS[language]['additional_specs_placeholder'] # Reutilizamos un texto traducido
|
138 |
|
139 |
+
# Prompt unificado que solicita una respuesta JSON con dos claves
|
140 |
+
prompt = f"""
|
141 |
+
Act as an expert in biotechnology and data science. Your task is to analyze the provided model fitting results and generate both a textual analysis and a Python implementation script.
|
142 |
+
|
143 |
+
The user has provided the following data from a CSV/Excel file:
|
144 |
+
--- DATA ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
{data.to_string()}
|
146 |
+
--- END DATA ---
|
147 |
+
|
148 |
+
User requirements:
|
149 |
+
- Language for the analysis: {language}
|
150 |
+
- Detail level: {detail_level}
|
151 |
+
- Additional specifications: "{additional_specs if additional_specs else 'None'}"
|
152 |
+
|
153 |
+
Based on all the information above, perform the following two tasks:
|
154 |
+
|
155 |
+
TASK 1: GENERATE TEXTUAL ANALYSIS
|
156 |
+
Write a comprehensive comparative analysis in Markdown format.
|
157 |
+
- If detail_level is 'detailed', provide an in-depth, experiment-by-experiment comparison, parameter analysis, biological interpretation, and robust conclusions.
|
158 |
+
- If detail_level is 'summarized', provide a concise overview, highlight the best models per experiment, and give clear, practical recommendations.
|
159 |
+
- The analysis MUST be in {language}.
|
160 |
+
|
161 |
+
TASK 2: GENERATE PYTHON CODE
|
162 |
+
Write a complete, executable Python script that a researcher can use to replicate and visualize this analysis.
|
163 |
+
- The script should include data loading (embed the provided data directly).
|
164 |
+
- It must contain functions to compare models and find the best ones.
|
165 |
+
- It must include plotting functions (using matplotlib or seaborn) to visualize the results, such as comparing R² values across experiments.
|
166 |
+
- The code should be well-commented.
|
167 |
+
|
168 |
+
IMPORTANT: Your final output must be a single, valid JSON object containing two keys: "analysis" and "code".
|
169 |
+
Example format:
|
170 |
+
{{
|
171 |
+
"analysis": "### Comparative Analysis\\n\\nHere is the detailed analysis in Markdown...",
|
172 |
+
"code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Your Python code here..."
|
173 |
+
}}
|
174 |
+
|
175 |
+
Do not add any text or explanations outside of the JSON object.
|
176 |
"""
|
177 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
try:
|
|
|
179 |
response = self.client.chat.completions.create(
|
180 |
+
model=model,
|
181 |
temperature=0.6,
|
182 |
top_p=0.95,
|
183 |
+
max_tokens=4000, # Usar un valor alto para permitir respuestas completas
|
184 |
+
messages=[{"role": "user", "content": prompt}]
|
|
|
|
|
|
|
185 |
)
|
186 |
|
187 |
+
raw_response_text = response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
+
# Intentar parsear la respuesta JSON
|
190 |
+
try:
|
191 |
+
# Limpiar el texto para asegurar que sea un JSON válido
|
192 |
+
json_text = raw_response_text[raw_response_text.find('{'):raw_response_text.rfind('}')+1]
|
193 |
+
parsed_json = json.loads(json_text)
|
194 |
+
return {
|
195 |
+
"analysis": parsed_json.get("analysis", "API did not return an analysis."),
|
196 |
+
"code": parsed_json.get("code", "# API did not return code.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
}
|
198 |
+
except (json.JSONDecodeError, IndexError):
|
199 |
+
# Si falla el parseo, devolver el texto crudo como análisis
|
200 |
+
return {
|
201 |
+
"analysis": f"API returned a non-JSON response:\n\n{raw_response_text}",
|
202 |
+
"code": "# Could not parse API response to extract code."
|
203 |
+
}
|
204 |
+
|
205 |
except Exception as e:
|
206 |
+
error_message = f"An error occurred while calling the API: {str(e)}"
|
207 |
+
return {
|
208 |
+
"analysis": error_message,
|
209 |
+
"code": f"# {error_message}"
|
210 |
+
}
|
211 |
+
|
212 |
+
def process_files(files: List, model: str, detail_level: str, language: str, additional_specs: str) -> Tuple[str, str]:
|
213 |
+
"""
|
214 |
+
Procesa los archivos subidos, llama al analizador de IA y devuelve los resultados.
|
215 |
+
"""
|
216 |
+
if not files:
|
217 |
+
return TRANSLATIONS[language]['error_no_files'], ""
|
218 |
|
|
|
|
|
|
|
219 |
processor = FileProcessor()
|
220 |
+
analyzer = AIAnalyzer(client)
|
|
|
|
|
221 |
|
222 |
+
# Por simplicidad, se procesa solo el primer archivo válido
|
223 |
+
full_analysis = []
|
224 |
+
full_code = []
|
225 |
+
|
226 |
for file in files:
|
227 |
+
if file is None: continue
|
228 |
+
|
229 |
+
file_name = file.name
|
|
|
230 |
file_ext = Path(file_name).suffix.lower()
|
231 |
|
232 |
with open(file.name, 'rb') as f:
|
233 |
file_content = f.read()
|
234 |
|
235 |
+
df = None
|
236 |
+
if file_ext == '.csv':
|
237 |
+
df = processor.read_csv(file_content)
|
238 |
+
elif file_ext in ['.xlsx', '.xls']:
|
239 |
+
df = processor.read_excel(file_content)
|
240 |
+
|
241 |
+
if df is not None:
|
242 |
+
full_analysis.append(f"# Analysis for: {file_name}")
|
243 |
+
api_result = analyzer.get_analysis_and_code(df, model, detail_level, language, additional_specs)
|
244 |
+
full_analysis.append(api_result.get("analysis", ""))
|
245 |
+
full_code.append(f"# Code generated for: {file_name}\n" + api_result.get("code", ""))
|
246 |
+
# Rompemos el bucle para analizar solo un archivo a la vez y evitar confusión
|
247 |
+
break
|
248 |
+
|
249 |
+
if not full_analysis:
|
250 |
+
return "No valid CSV/Excel files found to analyze.", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
+
return "\n\n".join(full_analysis), "\n\n".join(full_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
|
254 |
+
# --- Interfaz de Gradio ---
|
255 |
def create_interface():
|
256 |
+
current_language = "es"
|
257 |
+
|
258 |
+
def update_language(language):
|
|
|
|
|
|
|
|
|
259 |
t = TRANSLATIONS[language]
|
|
|
260 |
return [
|
261 |
+
gr.update(value=f"# {t['title']}"), gr.update(value=t['subtitle']),
|
262 |
+
gr.update(label=t['upload_files']), gr.update(label=t['select_model']),
|
263 |
+
gr.update(label=t['select_language']), gr.update(label=t['detail_level']),
|
264 |
+
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']),
|
265 |
+
gr.update(value=t['analyze_button']), gr.update(label=t['export_format']),
|
266 |
+
gr.update(value=t['export_button']), gr.update(label=t['comparative_analysis']),
|
267 |
+
gr.update(label=t['implementation_code']), gr.update(label=t['data_format'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
]
|
269 |
+
|
270 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
271 |
+
app_state = gr.State({"analysis": "", "code": "", "language": "es"})
|
272 |
+
|
273 |
+
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
|
274 |
+
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
with gr.Row():
|
277 |
with gr.Column(scale=1):
|
278 |
+
files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
|
|
|
|
|
|
|
|
|
|
|
279 |
|
|
|
280 |
default_model = "Qwen/Qwen3-14B"
|
281 |
+
model_selector = gr.Dropdown(choices=list(NEBIUS_MODELS.keys()), value=default_model, label=TRANSLATIONS[current_language]['select_model'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
+
detail_level_selector = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
+
additional_specs_input = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
+
language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="es", label=TRANSLATIONS[current_language]['select_language'])
|
|
|
|
|
|
|
|
|
288 |
|
289 |
+
analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary")
|
|
|
|
|
|
|
290 |
|
291 |
+
gr.Markdown("---")
|
|
|
|
|
|
|
|
|
292 |
|
293 |
+
export_format_selector = gr.Radio(choices=["DOCX", "PDF"], value="PDF", label=TRANSLATIONS[current_language]['export_format'])
|
294 |
+
export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'])
|
295 |
+
export_file_output = gr.File(label="Download Report", visible=False)
|
296 |
+
|
|
|
297 |
with gr.Column(scale=2):
|
298 |
+
analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
|
299 |
+
code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True)
|
300 |
+
|
301 |
+
def run_analysis(files, model, detail, lang, specs, state):
|
302 |
+
analysis, code = process_files(files, model, detail, lang, specs)
|
303 |
+
state["analysis"] = analysis
|
304 |
+
state["code"] = code
|
305 |
+
state["language"] = lang
|
306 |
+
return analysis, code, state
|
307 |
+
|
308 |
+
def run_export(state, format):
|
309 |
+
if not state["analysis"]:
|
310 |
+
return gr.update(visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
313 |
+
ext = "docx" if format == "DOCX" else "pdf"
|
314 |
+
filename = f"analysis_report_{timestamp}.{ext}"
|
|
|
|
|
|
|
315 |
|
316 |
+
if format == "DOCX":
|
317 |
+
ReportExporter.export_to_docx(state["analysis"], filename, state["language"])
|
318 |
+
else:
|
319 |
+
ReportExporter.export_to_pdf(state["analysis"], filename, state["language"])
|
320 |
+
|
321 |
+
return gr.update(value=filename, visible=True)
|
322 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
analyze_btn.click(
|
324 |
+
fn=run_analysis,
|
325 |
+
inputs=[files_input, model_selector, detail_level_selector, language_selector, additional_specs_input, app_state],
|
326 |
+
outputs=[analysis_output, code_output, app_state]
|
327 |
)
|
328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
export_btn.click(
|
330 |
+
fn=run_export,
|
331 |
+
inputs=[app_state, export_format_selector],
|
332 |
+
outputs=[export_file_output]
|
333 |
)
|
334 |
+
|
335 |
return demo
|
336 |
|
|
|
337 |
def main():
|
|
|
338 |
if not os.getenv("NEBIUS_API_KEY"):
|
339 |
+
print("⚠️ NEBIUS_API_KEY not found. Please set it as an environment variable.")
|
340 |
+
return gr.Interface(fn=lambda: TRANSLATIONS['en']['error_no_api'], inputs=[], outputs="text", title="Configuration Error")
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
return create_interface()
|
343 |
|
|
|
344 |
if __name__ == "__main__":
|
345 |
+
# Crear archivos de ejemplo para Gradio si no existen
|
346 |
+
if not os.path.exists("examples"):
|
347 |
+
os.makedirs("examples")
|
348 |
+
if not os.path.exists("examples/biomass_models_comparison.csv"):
|
349 |
+
pd.DataFrame({
|
350 |
+
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
|
351 |
+
'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'],
|
352 |
+
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'],
|
353 |
+
'R2': [0.98, 0.99, 0.97, 0.985],
|
354 |
+
'RMSE': [0.02, 0.01, 0.03, 0.015]
|
355 |
+
}).to_csv("examples/biomass_models_comparison.csv", index=False)
|
356 |
+
|
357 |
demo = main()
|
358 |
if demo:
|
359 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|