C2MV commited on
Commit
1cac78f
·
verified ·
1 Parent(s): 58be36b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -1387
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import PyPDF2
3
  import pandas as pd
4
  import numpy as np
@@ -7,1507 +8,352 @@ import os
7
  import json
8
  import zipfile
9
  import tempfile
10
- from typing import Dict, List, Tuple, Union, Optional
11
- import re
12
  from pathlib import Path
13
- import openpyxl
14
- from dataclasses import dataclass
15
- from enum import Enum
16
  from docx import Document
17
- from docx.shared import Inches, Pt, RGBColor
18
- from docx.enum.text import WD_ALIGN_PARAGRAPH
19
  from reportlab.lib import colors
20
- from reportlab.lib.pagesizes import letter, A4
21
- from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
22
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
23
  from reportlab.lib.units import inch
24
- from reportlab.pdfbase import pdfmetrics
25
- from reportlab.pdfbase.ttfonts import TTFont
26
- import matplotlib.pyplot as plt
27
  from datetime import datetime
28
- from openai import OpenAI # CAMBIO: Importación de la nueva librería
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
32
 
33
- # CAMBIO: Inicializar cliente OpenAI para Nebius
34
  client = OpenAI(
35
  base_url="https://api.studio.nebius.com/v1/",
36
  api_key=os.environ.get("NEBIUS_API_KEY")
37
  )
38
 
39
- # Sistema de traducción - Actualizado con nuevas entradas
40
  TRANSLATIONS = {
41
  'en': {
42
- 'title': '🧬 Comparative Analyzer of Biotechnological Models',
43
- 'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
44
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
45
- 'select_model': '🤖 AI Model', # CAMBIO
46
  'select_language': '🌐 Language',
47
- 'select_theme': '🎨 Theme',
48
- 'detail_level': '📋 Analysis detail level',
49
  'detailed': 'Detailed',
50
  'summarized': 'Summarized',
51
- 'analyze_button': '🚀 Analyze and Compare Models',
52
- 'export_format': '📄 Export format',
53
- 'export_button': '💾 Export Report',
54
- 'comparative_analysis': '📊 Comparative Analysis',
55
- 'implementation_code': '💻 Implementation Code',
56
- 'data_format': '📋 Expected data format',
57
- 'examples': '📚 Analysis examples',
58
- 'light': 'Light',
59
- 'dark': 'Dark',
60
- 'best_for': 'Best for',
61
- 'loading': 'Loading...',
62
- 'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets', # CAMBIO
63
  'error_no_files': 'Please upload fitting result files to analyze',
64
  'report_exported': 'Report exported successfully as',
65
- 'specialized_in': '🎯 Specialized in:',
66
- 'metrics_analyzed': '📊 Analyzed metrics:',
67
- 'what_analyzes': '🔍 What it specifically analyzes:',
68
- 'tips': '💡 Tips for better results:',
69
- 'additional_specs': '📝 Additional specifications for analysis',
70
- 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
71
  },
72
  'es': {
73
- 'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
74
- 'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
75
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
76
- 'select_model': '🤖 Modelo de IA', # CAMBIO
77
  'select_language': '🌐 Idioma',
78
- 'select_theme': '🎨 Tema',
79
- 'detail_level': '📋 Nivel de detalle del análisis',
80
  'detailed': 'Detallado',
81
  'summarized': 'Resumido',
82
- 'analyze_button': '🚀 Analizar y Comparar Modelos',
83
- 'export_format': '📄 Formato de exportación',
84
- 'export_button': '💾 Exportar Reporte',
85
- 'comparative_analysis': '📊 Análisis Comparativo',
86
- 'implementation_code': '💻 Código de Implementación',
87
- 'data_format': '📋 Formato de datos esperado',
88
- 'examples': '📚 Ejemplos de análisis',
89
- 'light': 'Claro',
90
- 'dark': 'Oscuro',
91
- 'best_for': 'Mejor para',
92
- 'loading': 'Cargando...',
93
- 'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space', # CAMBIO
94
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
95
  'report_exported': 'Reporte exportado exitosamente como',
96
- 'specialized_in': '🎯 Especializado en:',
97
- 'metrics_analyzed': '📊 Métricas analizadas:',
98
- 'what_analyzes': '🔍 Qué analiza específicamente:',
99
- 'tips': '💡 Tips para mejores resultados:',
100
- 'additional_specs': '📝 Especificaciones adicionales para el análisis',
101
- 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
102
  },
103
- 'fr': {
104
- 'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
105
- 'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
106
- 'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
107
- 'select_model': '🤖 Modèle d\'IA', # CAMBIO
108
- 'select_language': '🌐 Langue',
109
- 'select_theme': '🎨 Thème',
110
- 'detail_level': '📋 Niveau de détail',
111
- 'detailed': 'Détaillé',
112
- 'summarized': 'Résumé',
113
- 'analyze_button': '🚀 Analyser et Comparer',
114
- 'export_format': '📄 Format d\'export',
115
- 'export_button': '💾 Exporter le Rapport',
116
- 'comparative_analysis': '📊 Analyse Comparative',
117
- 'implementation_code': '💻 Code d\'Implémentation',
118
- 'data_format': '📋 Format de données attendu',
119
- 'examples': '📚 Exemples d\'analyse',
120
- 'light': 'Clair',
121
- 'dark': 'Sombre',
122
- 'best_for': 'Meilleur pour',
123
- 'loading': 'Chargement...',
124
- 'error_no_api': 'Veuillez configurer NEBIUS_API_KEY', # CAMBIO
125
- 'error_no_files': 'Veuillez télécharger des fichiers à analyser',
126
- 'report_exported': 'Rapport exporté avec succès comme',
127
- 'specialized_in': '🎯 Spécialisé dans:',
128
- 'metrics_analyzed': '📊 Métriques analysées:',
129
- 'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
130
- 'tips': '💡 Conseils pour de meilleurs résultats:',
131
- 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
132
- 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
133
- },
134
- 'de': {
135
- 'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
136
- 'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
137
- 'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
138
- 'select_model': '🤖 KI-Modell', # CAMBIO
139
- 'select_language': '🌐 Sprache',
140
- 'select_theme': '🎨 Thema',
141
- 'detail_level': '📋 Detailgrad der Analyse',
142
- 'detailed': 'Detailliert',
143
- 'summarized': 'Zusammengefasst',
144
- 'analyze_button': '🚀 Analysieren und Vergleichen',
145
- 'export_format': '📄 Exportformat',
146
- 'export_button': '💾 Bericht Exportieren',
147
- 'comparative_analysis': '📊 Vergleichende Analyse',
148
- 'implementation_code': '💻 Implementierungscode',
149
- 'data_format': '📋 Erwartetes Datenformat',
150
- 'examples': '📚 Analysebeispiele',
151
- 'light': 'Hell',
152
- 'dark': 'Dunkel',
153
- 'best_for': 'Am besten für',
154
- 'loading': 'Laden...',
155
- 'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY', # CAMBIO
156
- 'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
157
- 'report_exported': 'Bericht erfolgreich exportiert als',
158
- 'specialized_in': '🎯 Spezialisiert auf:',
159
- 'metrics_analyzed': '📊 Analysierte Metriken:',
160
- 'what_analyzes': '🔍 Was spezifisch analysiert wird:',
161
- 'tips': '💡 Tipps für bessere Ergebnisse:',
162
- 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
163
- 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
164
- },
165
- 'pt': {
166
- 'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
167
- 'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
168
- 'upload_files': '📁 Carregar resultados (CSV/Excel)',
169
- 'select_model': '🤖 Modelo de IA', # CAMBIO
170
- 'select_language': '🌐 Idioma',
171
- 'select_theme': '🎨 Tema',
172
- 'detail_level': '📋 Nível de detalhe',
173
- 'detailed': 'Detalhado',
174
- 'summarized': 'Resumido',
175
- 'analyze_button': '🚀 Analisar e Comparar',
176
- 'export_format': '📄 Formato de exportação',
177
- 'export_button': '💾 Exportar Relatório',
178
- 'comparative_analysis': '📊 Análise Comparativa',
179
- 'implementation_code': '💻 Código de Implementação',
180
- 'data_format': '📋 Formato de dados esperado',
181
- 'examples': '📚 Exemplos de análise',
182
- 'light': 'Claro',
183
- 'dark': 'Escuro',
184
- 'best_for': 'Melhor para',
185
- 'loading': 'Carregando...',
186
- 'error_no_api': 'Por favor configure NEBIUS_API_KEY', # CAMBIO
187
- 'error_no_files': 'Por favor carregue arquivos para analisar',
188
- 'report_exported': 'Relatório exportado com sucesso como',
189
- 'specialized_in': '🎯 Especializado em:',
190
- 'metrics_analyzed': '📊 Métricas analisadas:',
191
- 'what_analyzes': '🔍 O que analisa especificamente:',
192
- 'tips': '💡 Dicas para melhores resultados:',
193
- 'additional_specs': '📝 Especificações adicionais para a análise',
194
- 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
195
- }
196
  }
197
 
198
- # Temas disponibles
199
- THEMES = {
200
- 'light': gr.themes.Soft(),
201
- 'dark': gr.themes.Base(
202
- primary_hue="blue",
203
- secondary_hue="gray",
204
- neutral_hue="gray",
205
- font=["Arial", "sans-serif"]
206
- ).set(
207
- body_background_fill="dark",
208
- body_background_fill_dark="*neutral_950",
209
- button_primary_background_fill="*primary_600",
210
- button_primary_background_fill_hover="*primary_500",
211
- button_primary_text_color="white",
212
- block_background_fill="*neutral_800",
213
- block_border_color="*neutral_700",
214
- block_label_text_color="*neutral_200",
215
- block_title_text_color="*neutral_100",
216
- checkbox_background_color="*neutral_700",
217
- checkbox_background_color_selected="*primary_600",
218
- input_background_fill="*neutral_700",
219
- input_border_color="*neutral_600",
220
- input_placeholder_color="*neutral_400"
221
- )
222
- }
223
-
224
- # Enum para tipos de análisis
225
- class AnalysisType(Enum):
226
- MATHEMATICAL_MODEL = "mathematical_model"
227
- DATA_FITTING = "data_fitting"
228
- FITTING_RESULTS = "fitting_results"
229
- UNKNOWN = "unknown"
230
-
231
- # Estructura modular para modelos
232
- @dataclass
233
- class MathematicalModel:
234
- name: str
235
- equation: str
236
- parameters: List[str]
237
- application: str
238
- sources: List[str]
239
- category: str
240
- biological_meaning: str
241
-
242
- # Sistema de registro de modelos escalable
243
- class ModelRegistry:
244
- def __init__(self):
245
- self.models = {}
246
- self._initialize_default_models()
247
-
248
- def register_model(self, model: MathematicalModel):
249
- """Registra un nuevo modelo matemático"""
250
- if model.category not in self.models:
251
- self.models[model.category] = {}
252
- self.models[model.category][model.name] = model
253
-
254
- def get_model(self, category: str, name: str) -> MathematicalModel:
255
- """Obtiene un modelo específico"""
256
- return self.models.get(category, {}).get(name)
257
-
258
- def get_all_models(self) -> Dict:
259
- """Retorna todos los modelos registrados"""
260
- return self.models
261
-
262
- def _initialize_default_models(self):
263
- """Inicializa los modelos por defecto"""
264
- # Modelos de crecimiento
265
- self.register_model(MathematicalModel(
266
- name="Monod",
267
- equation="μ = μmax × (S / (Ks + S))",
268
- parameters=["μmax (h⁻¹)", "Ks (g/L)"],
269
- application="Crecimiento limitado por sustrato único",
270
- sources=["Cambridge", "MIT", "DTU"],
271
- category="crecimiento_biomasa",
272
- biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
273
- ))
274
-
275
- self.register_model(MathematicalModel(
276
- name="Logístico",
277
- equation="dX/dt = μmax × X × (1 - X/Xmax)",
278
- parameters=["μmax (h⁻¹)", "Xmax (g/L)"],
279
- application="Sistemas cerrados batch",
280
- sources=["Cranfield", "Swansea", "HAL Theses"],
281
- category="crecimiento_biomasa",
282
- biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
283
- ))
284
-
285
- self.register_model(MathematicalModel(
286
- name="Gompertz",
287
- equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
288
- parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"],
289
- application="Crecimiento con fase lag pronunciada",
290
- sources=["Lund University", "NC State"],
291
- category="crecimiento_biomasa",
292
- biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"
293
- ))
294
-
295
- # Instancia global del registro
296
- model_registry = ModelRegistry()
297
-
298
- # CAMBIO: Modelos de Nebius en lugar de Claude
299
  NEBIUS_MODELS = {
300
  "Qwen/Qwen3-14B": {
301
  "name": "Qwen 3 (14B)",
302
  "description": "Modelo potente y versátil de la familia Qwen.",
303
- "max_tokens": 4096,
304
- "best_for": "Análisis detallados y generación de código complejo."
305
  },
306
- # Puedes añadir más modelos de Nebius aquí si están disponibles
307
  }
308
 
309
  class FileProcessor:
310
- """Clase para procesar diferentes tipos de archivos"""
311
-
312
- @staticmethod
313
- def extract_text_from_pdf(pdf_file) -> str:
314
- """Extrae texto de un archivo PDF"""
315
- try:
316
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
317
- text = ""
318
- for page in pdf_reader.pages:
319
- text += page.extract_text() + "\n"
320
- return text
321
- except Exception as e:
322
- return f"Error reading PDF: {str(e)}"
323
-
324
- @staticmethod
325
- def read_csv(csv_file) -> pd.DataFrame:
326
- """Lee archivo CSV"""
327
- try:
328
- return pd.read_csv(io.BytesIO(csv_file))
329
- except Exception as e:
330
- return None
331
-
332
  @staticmethod
333
- def read_excel(excel_file) -> pd.DataFrame:
334
- """Lee archivo Excel"""
335
- try:
336
- return pd.read_excel(io.BytesIO(excel_file))
337
- except Exception as e:
338
- return None
339
 
340
  @staticmethod
341
- def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]:
342
- """Extrae archivos de un ZIP"""
343
- files = []
344
- try:
345
- with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
346
- for file_name in zip_ref.namelist():
347
- if not file_name.startswith('__MACOSX'):
348
- file_data = zip_ref.read(file_name)
349
- files.append((file_name, file_data))
350
- except Exception as e:
351
- print(f"Error processing ZIP: {e}")
352
- return files
353
 
354
  class ReportExporter:
355
- """Clase para exportar reportes a diferentes formatos"""
356
-
357
  @staticmethod
358
- def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
359
- """Exporta el contenido a un archivo DOCX"""
360
  doc = Document()
361
-
362
- # Configurar estilos
363
- title_style = doc.styles['Title']
364
- title_style.font.size = Pt(24)
365
- title_style.font.bold = True
366
-
367
- heading_style = doc.styles['Heading 1']
368
- heading_style.font.size = Pt(18)
369
- heading_style.font.bold = True
370
-
371
- # Título
372
- title_text = {
373
- 'en': 'Comparative Analysis Report - Biotechnological Models',
374
- 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
375
- 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
376
- 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
377
- 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
378
- }
379
-
380
- doc.add_heading(title_text.get(language, title_text['en']), 0)
381
-
382
- # Fecha
383
- date_text = {
384
- 'en': 'Generated on',
385
- 'es': 'Generado el',
386
- 'fr': 'Généré le',
387
- 'de': 'Erstellt am',
388
- 'pt': 'Gerado em'
389
- }
390
- doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
391
  doc.add_paragraph()
392
-
393
- # Procesar contenido
394
- lines = content.split('\n')
395
- current_paragraph = None
396
-
397
- for line in lines:
398
- line = line.strip()
399
-
400
- if line.startswith('###'):
401
- doc.add_heading(line.replace('###', '').strip(), level=2)
402
- elif line.startswith('##'):
403
- doc.add_heading(line.replace('##', '').strip(), level=1)
404
- elif line.startswith('#'):
405
- doc.add_heading(line.replace('#', '').strip(), level=0)
406
- elif line.startswith('**') and line.endswith('**'):
407
- # Texto en negrita
408
- p = doc.add_paragraph()
409
- run = p.add_run(line.replace('**', ''))
410
- run.bold = True
411
- elif line.startswith('- ') or line.startswith('* '):
412
- # Lista
413
- doc.add_paragraph(line[2:], style='List Bullet')
414
- elif line.startswith(tuple('0123456789')):
415
- # Lista numerada
416
- doc.add_paragraph(line, style='List Number')
417
- elif line == '---' or line.startswith('==='):
418
- # Separador
419
- doc.add_paragraph('_' * 50)
420
- elif line:
421
- # Párrafo normal
422
- doc.add_paragraph(line)
423
-
424
- # Guardar documento
425
  doc.save(filename)
426
  return filename
427
-
428
  @staticmethod
429
- def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
430
- """Exporta el contenido a un archivo PDF"""
431
- # Crear documento PDF
432
  doc = SimpleDocTemplate(filename, pagesize=letter)
433
- story = []
434
  styles = getSampleStyleSheet()
435
-
436
- # Estilos personalizados
437
- title_style = ParagraphStyle(
438
- 'CustomTitle',
439
- parent=styles['Title'],
440
- fontSize=24,
441
- textColor=colors.HexColor('#1f4788'),
442
- spaceAfter=30
443
- )
444
-
445
- heading_style = ParagraphStyle(
446
- 'CustomHeading',
447
- parent=styles['Heading1'],
448
- fontSize=16,
449
- textColor=colors.HexColor('#2e5090'),
450
- spaceAfter=12
451
- )
452
-
453
- # Título
454
- title_text = {
455
- 'en': 'Comparative Analysis Report - Biotechnological Models',
456
- 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
457
- 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
458
- 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
459
- 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
460
- }
461
-
462
- story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
463
-
464
- # Fecha
465
- date_text = {
466
- 'en': 'Generated on',
467
- 'es': 'Generado el',
468
- 'fr': 'Généré le',
469
- 'de': 'Erstellt am',
470
- 'pt': 'Gerado em'
471
- }
472
- story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
473
- story.append(Spacer(1, 0.5*inch))
474
-
475
- # Procesar contenido
476
- lines = content.split('\n')
477
-
478
- for line in lines:
479
- line = line.strip()
480
-
481
- if not line:
482
- story.append(Spacer(1, 0.2*inch))
483
- elif line.startswith('###'):
484
- story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
485
- elif line.startswith('##'):
486
- story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
487
- elif line.startswith('#'):
488
- story.append(Paragraph(line.replace('#', '').strip(), heading_style))
489
- elif line.startswith('**') and line.endswith('**'):
490
- text = line.replace('**', '')
491
- story.append(Paragraph(f"<b>{text}</b>", styles['Normal']))
492
- elif line.startswith('- ') or line.startswith('* '):
493
- story.append(Paragraph(f"• {line[2:]}", styles['Normal']))
494
- elif line == '---' or line.startswith('==='):
495
- story.append(Spacer(1, 0.3*inch))
496
- story.append(Paragraph("_" * 70, styles['Normal']))
497
- story.append(Spacer(1, 0.3*inch))
498
- else:
499
- # Limpiar caracteres especiales para PDF
500
- clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
501
- story.append(Paragraph(clean_line, styles['Normal']))
502
-
503
- # Construir PDF
504
  doc.build(story)
505
  return filename
506
 
507
  class AIAnalyzer:
508
- """Clase para análisis con IA"""
509
-
510
- def __init__(self, client, model_registry):
 
 
511
  self.client = client
512
- self.model_registry = model_registry
513
-
514
- def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
515
- """Detecta el tipo de análisis necesario"""
516
- if isinstance(content, pd.DataFrame):
517
- columns = [col.lower() for col in content.columns]
518
-
519
- fitting_indicators = [
520
- 'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic',
521
- 'parameter', 'param', 'coefficient', 'fit',
522
- 'model', 'equation', 'goodness', 'chi_square',
523
- 'p_value', 'confidence', 'standard_error', 'se'
524
- ]
525
-
526
- has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators)
527
-
528
- if has_fitting_results:
529
- return AnalysisType.FITTING_RESULTS
530
- else:
531
- return AnalysisType.DATA_FITTING
532
-
533
- prompt = """
534
- Analyze this content and determine if it is:
535
- 1. A scientific article describing biotechnological mathematical models
536
- 2. Experimental data for parameter fitting
537
- 3. Model fitting results (with parameters, R², RMSE, etc.)
538
-
539
- Reply only with: "MODEL", "DATA" or "RESULTS"
540
  """
 
 
 
 
541
 
542
- try:
543
- # CAMBIO: Llamada a la API y acceso a la respuesta
544
- response = self.client.chat.completions.create(
545
- model="Qwen/Qwen3-14B", # Usar un modelo rápido disponible en Nebius
546
- temperature=0.1,
547
- max_tokens=10,
548
- messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
549
- )
550
-
551
- result = response.choices[0].message.content.strip().upper()
552
- if "MODEL" in result:
553
- return AnalysisType.MATHEMATICAL_MODEL
554
- elif "RESULTS" in result:
555
- return AnalysisType.FITTING_RESULTS
556
- elif "DATA" in result:
557
- return AnalysisType.DATA_FITTING
558
- else:
559
- return AnalysisType.UNKNOWN
560
-
561
- except Exception as e:
562
- print(f"Error in detect_analysis_type: {e}")
563
- return AnalysisType.UNKNOWN
564
-
565
- def get_language_prompt_prefix(self, language: str) -> str:
566
- """Obtiene el prefijo del prompt según el idioma"""
567
- prefixes = {
568
- 'en': "Please respond in English. ",
569
- 'es': "Por favor responde en español. ",
570
- 'fr': "Veuillez répondre en français. ",
571
- 'de': "Bitte antworten Sie auf Deutsch. ",
572
- 'pt': "Por favor responda em português. "
573
- }
574
- return prefixes.get(language, prefixes['en'])
575
-
576
- def analyze_fitting_results(self, data: pd.DataFrame, nebius_model: str, detail_level: str = "detailed",
577
- language: str = "en", additional_specs: str = "") -> Dict:
578
- """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
579
-
580
- # Preparar resumen completo de los datos
581
- data_summary = f"""
582
- FITTING RESULTS DATA:
583
-
584
- Data structure:
585
- - Columns: {list(data.columns)}
586
- - Number of models evaluated: {len(data)}
587
-
588
- Complete data:
589
  {data.to_string()}
590
-
591
- Descriptive statistics:
592
- {data.describe().to_string()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  """
594
-
595
- # Extraer valores para usar en el código
596
- data_dict = data.to_dict('records')
597
-
598
- # Obtener prefijo de idioma
599
- lang_prefix = self.get_language_prompt_prefix(language)
600
-
601
- # Agregar especificaciones adicionales del usuario si existen
602
- user_specs_section = f"""
603
-
604
- USER ADDITIONAL SPECIFICATIONS:
605
- {additional_specs}
606
-
607
- Please ensure to address these specific requirements in your analysis.
608
- """ if additional_specs else ""
609
-
610
- # Prompt mejorado con instrucciones específicas para cada nivel
611
- if detail_level == "detailed":
612
- prompt = f"""
613
- {lang_prefix}
614
-
615
- You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
616
-
617
- {user_specs_section}
618
-
619
- DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
620
-
621
- PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
622
-
623
- 1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
624
- - List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
625
- - For EACH experiment, identify:
626
- * Experimental conditions
627
- * Number of models tested
628
- * Variables measured (biomass, substrate, product)
629
-
630
- 2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
631
- For EACH EXPERIMENT separately:
632
- - Identify ALL fitted mathematical models BY NAME
633
- - Classify them: biomass growth, substrate consumption, product formation
634
- - Show the mathematical equation of each model
635
- - List parameter values obtained for that specific experiment
636
-
637
- 3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
638
- Create a section for EACH EXPERIMENT showing:
639
-
640
- **EXPERIMENT [Name/Condition]:**
641
-
642
- a) **BIOMASS MODELS** (if applicable):
643
- - Best model: [Name] with R²=[value], RMSE=[value]
644
- - Parameters: μmax=[value], Xmax=[value], etc.
645
- - Ranking of all biomass models tested
646
-
647
- b) **SUBSTRATE MODELS** (if applicable):
648
- - Best model: [Name] with R²=[value], RMSE=[value]
649
- - Parameters: Ks=[value], Yxs=[value], etc.
650
- - Ranking of all substrate models tested
651
-
652
- c) **PRODUCT MODELS** (if applicable):
653
- - Best model: [Name] with R²=[value], RMSE=[value]
654
- - Parameters: α=[value], β=[value], etc.
655
- - Ranking of all product models tested
656
-
657
- 4. **DETAILED COMPARATIVE TABLES**
658
-
659
- **Table 1: Summary by Experiment and Variable Type**
660
- | Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
661
- |------------|----------|------------|-------|------|----------------|---------|
662
- | Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
663
- | Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
664
- | Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
665
- | Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
666
-
667
- **Table 2: Complete Model Comparison Across All Experiments**
668
- | Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
669
-
670
- 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
671
- - Compare how parameters change between experiments
672
- - Identify trends (e.g., μmax increases with temperature)
673
- - Calculate average parameters and variability
674
- - Suggest optimal conditions based on parameters
675
-
676
- 6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
677
- For each experiment, explain:
678
- - What the parameter values mean biologically
679
- - Whether values are realistic for the conditions
680
- - Key differences between experiments
681
- - Critical control parameters identified
682
-
683
- 7. **OVERALL BEST MODELS DETERMINATION**
684
- - **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
685
- - **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
686
- - **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
687
-
688
- Justify with numerical evidence from multiple experiments.
689
-
690
- 8. **CONCLUSIONS AND RECOMMENDATIONS**
691
- - Which models are most robust across different conditions
692
- - Specific models to use for each experimental condition
693
- - Confidence intervals and prediction reliability
694
- - Scale-up recommendations with specific values
695
-
696
- Use Markdown format with clear structure. Include ALL numerical values from the data.
697
- Create clear sections for EACH EXPERIMENT.
698
- """
699
- else: # summarized
700
- prompt = f"""
701
- {lang_prefix}
702
-
703
- You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
704
-
705
- {user_specs_section}
706
-
707
- DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
708
-
709
- PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
710
-
711
- 1. **EXPERIMENTS OVERVIEW**
712
- - Total experiments analyzed: [number]
713
- - Conditions tested: [list]
714
- - Variables measured: biomass/substrate/product
715
-
716
- 2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
717
-
718
- 📊 **EXPERIMENT 1 [Name/Condition]:**
719
- - Biomass: [Model] (R²=[value])
720
- - Substrate: [Model] (R²=[value])
721
- - Product: [Model] (R²=[value])
722
-
723
- 📊 **EXPERIMENT 2 [Name/Condition]:**
724
- - Biomass: [Model] (R²=[value])
725
- - Substrate: [Model] (R²=[value])
726
- - Product: [Model] (R²=[value])
727
-
728
- [Continue for all experiments...]
729
-
730
- 3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
731
- 🏆 **Best Models Overall:**
732
- - **Biomass**: [Model] - Best in [X]/[Y] experiments
733
- - **Substrate**: [Model] - Average R²=[value]
734
- - **Product**: [Model] - Most consistent performance
735
-
736
- 4. **QUICK COMPARISON TABLE**
737
- | Experiment | Best Biomass | Best Substrate | Best Product | Overall R² |
738
- |------------|--------------|----------------|--------------|------------|
739
- | Exp1 | [Model] | [Model] | [Model] | [avg] |
740
- | Exp2 | [Model] | [Model] | [Model] | [avg] |
741
-
742
- 5. **KEY FINDINGS**
743
- - Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
744
- - Best conditions identified: [specific values]
745
- - Most robust models: [list with reasons]
746
-
747
- 6. **PRACTICAL RECOMMENDATIONS**
748
- - For biomass prediction: Use [Model]
749
- - For substrate monitoring: Use [Model]
750
- - For product estimation: Use [Model]
751
- - Critical parameters: [list with values]
752
-
753
- Keep it concise but include ALL experiments and model names with their key metrics.
754
- """
755
-
756
  try:
757
- # CAMBIO: Llamada a la API y acceso a la respuesta
758
  response = self.client.chat.completions.create(
759
- model=nebius_model,
760
  temperature=0.6,
761
  top_p=0.95,
762
- max_tokens=4000,
763
- messages=[{
764
- "role": "user",
765
- "content": f"{prompt}\n\n{data_summary}"
766
- }]
767
  )
768
 
769
- # Análisis adicional para generar código con valores numéricos reales
770
- code_prompt = f"""
771
- {lang_prefix}
772
-
773
- Based on the analysis and this actual data:
774
- {data.to_string()}
775
-
776
- Generate Python code that:
777
-
778
- 1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
779
- 2. Implements analysis BY EXPERIMENT showing:
780
- - Best models for each experiment
781
- - Comparison across experiments
782
- - Parameter evolution between conditions
783
- 3. Includes visualization functions that:
784
- - Show results PER EXPERIMENT
785
- - Compare models across experiments
786
- - Display parameter trends
787
- 4. Shows the best model for biomass, substrate, and product separately
788
-
789
- The code must include:
790
- - Data loading with experiment identification
791
- - Model comparison by experiment and variable type
792
- - Visualization showing results per experiment
793
- - Overall best model selection with justification
794
- - Functions to predict using the best models for each category
795
 
796
- Make sure to include comments indicating which model won for each variable type and why.
797
-
798
- Format: Complete, executable Python code with actual data values embedded.
799
- """
800
-
801
- # CAMBIO: Llamada a la API y acceso a la respuesta
802
- code_response = self.client.chat.completions.create(
803
- model=nebius_model,
804
- temperature=0.6,
805
- top_p=0.95,
806
- max_tokens=3000,
807
- messages=[{
808
- "role": "user",
809
- "content": code_prompt
810
- }]
811
- )
812
-
813
- return {
814
- "tipo": "Comparative Analysis of Mathematical Models",
815
- "analisis_completo": response.choices[0].message.content,
816
- "codigo_implementacion": code_response.choices[0].message.content,
817
- "resumen_datos": {
818
- "n_modelos": len(data),
819
- "columnas": list(data.columns),
820
- "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
821
- for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
822
- "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
823
- "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
824
- "datos_completos": data_dict
825
  }
826
- }
827
-
 
 
 
 
 
828
  except Exception as e:
829
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
830
 
831
- def process_files(files, model_name: str, detail_level: str = "detailed",
832
- language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
833
- """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
834
  processor = FileProcessor()
835
- analyzer = AIAnalyzer(client, model_registry)
836
- results = []
837
- all_code = []
838
 
 
 
 
 
839
  for file in files:
840
- if file is None:
841
- continue
842
-
843
- file_name = file.name if hasattr(file, 'name') else "archivo"
844
  file_ext = Path(file_name).suffix.lower()
845
 
846
  with open(file.name, 'rb') as f:
847
  file_content = f.read()
848
 
849
- if file_ext in ['.csv', '.xlsx', '.xls']:
850
- if language == 'es':
851
- results.append(f"## 📊 Análisis de Resultados: {file_name}")
852
- else:
853
- results.append(f"## 📊 Results Analysis: {file_name}")
854
-
855
- if file_ext == '.csv':
856
- df = processor.read_csv(file_content)
857
- else:
858
- df = processor.read_excel(file_content)
859
-
860
- if df is not None:
861
- analysis_type = analyzer.detect_analysis_type(df)
862
-
863
- if analysis_type == AnalysisType.FITTING_RESULTS:
864
- result = analyzer.analyze_fitting_results(
865
- df, model_name, detail_level, language, additional_specs
866
- )
867
-
868
- if language == 'es':
869
- results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
870
- else:
871
- results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS")
872
-
873
- results.append(result.get("analisis_completo", ""))
874
- if "codigo_implementacion" in result:
875
- all_code.append(result["codigo_implementacion"])
876
-
877
- results.append("\n---\n")
878
-
879
- analysis_text = "\n".join(results)
880
- code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
881
-
882
- return analysis_text, code_text
883
-
884
- def generate_implementation_code(analysis_results: str) -> str:
885
- """Genera código de implementación con análisis por experimento"""
886
- code = """
887
- import numpy as np
888
- import pandas as pd
889
- import matplotlib.pyplot as plt
890
- from scipy.integrate import odeint
891
- from scipy.optimize import curve_fit, differential_evolution
892
- from sklearn.metrics import r2_score, mean_squared_error
893
- import seaborn as sns
894
- from typing import Dict, List, Tuple, Optional
895
-
896
- # Visualization configuration
897
- plt.style.use('seaborn-v0_8-darkgrid')
898
- sns.set_palette("husl")
899
-
900
- class ExperimentalModelAnalyzer:
901
- \"\"\"
902
- Class for comparative analysis of biotechnological models across multiple experiments.
903
- Analyzes biomass, substrate and product models separately for each experimental condition.
904
- \"\"\"
905
-
906
- def __init__(self):
907
- self.results_df = None
908
- self.experiments = {}
909
- self.best_models_by_experiment = {}
910
- self.overall_best_models = {
911
- 'biomass': None,
912
- 'substrate': None,
913
- 'product': None
914
- }
915
-
916
- def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
917
- \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
918
- if data_dict:
919
- self.results_df = pd.DataFrame(data_dict)
920
- elif file_path:
921
- if file_path.endswith('.csv'):
922
- self.results_df = pd.read_csv(file_path)
923
- else:
924
- self.results_df = pd.read_excel(file_path)
925
-
926
- print(f"✅ Data loaded: {len(self.results_df)} models")
927
- print(f"📊 Available columns: {list(self.results_df.columns)}")
928
-
929
- # Identify experiments
930
- if 'Experiment' in self.results_df.columns:
931
- self.experiments = self.results_df.groupby('Experiment').groups
932
- print(f"🧪 Experiments found: {list(self.experiments.keys())}")
933
-
934
- return self.results_df
935
-
936
- def analyze_by_experiment(self,
937
- experiment_col: str = 'Experiment',
938
- model_col: str = 'Model',
939
- type_col: str = 'Type',
940
- r2_col: str = 'R2',
941
- rmse_col: str = 'RMSE') -> Dict:
942
- \"\"\"
943
- Analyze models by experiment and variable type.
944
- Identifies best models for biomass, substrate, and product in each experiment.
945
- \"\"\"
946
- if self.results_df is None:
947
- raise ValueError("First load data with load_results()")
948
-
949
- results_by_exp = {}
950
-
951
- # Get unique experiments
952
- if experiment_col in self.results_df.columns:
953
- experiments = self.results_df[experiment_col].unique()
954
- else:
955
- experiments = ['All_Data']
956
- self.results_df[experiment_col] = 'All_Data'
957
-
958
- print("\\n" + "="*80)
959
- print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
960
- print("="*80)
961
-
962
- for exp in experiments:
963
- print(f"\\n🧪 EXPERIMENT: {exp}")
964
- print("-"*50)
965
-
966
- exp_data = self.results_df[self.results_df[experiment_col] == exp]
967
- results_by_exp[exp] = {}
968
-
969
- # Analyze by variable type if available
970
- if type_col in exp_data.columns:
971
- var_types = exp_data[type_col].unique()
972
-
973
- for var_type in var_types:
974
- var_data = exp_data[exp_data[type_col] == var_type]
975
-
976
- if not var_data.empty:
977
- # Find best model for this variable type
978
- best_idx = var_data[r2_col].idxmax()
979
- best_model = var_data.loc[best_idx]
980
-
981
- results_by_exp[exp][var_type] = {
982
- 'best_model': best_model[model_col],
983
- 'r2': best_model[r2_col],
984
- 'rmse': best_model[rmse_col],
985
- 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
986
- }
987
-
988
- print(f"\\n 📈 {var_type.upper()}:")
989
- print(f" Best Model: {best_model[model_col]}")
990
- print(f" R² = {best_model[r2_col]:.4f}")
991
- print(f" RMSE = {best_model[rmse_col]:.4f}")
992
-
993
- # Show all models for this variable
994
- print(f"\\n All {var_type} models tested:")
995
- for _, row in var_data.iterrows():
996
- print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
997
- else:
998
- # If no type column, analyze all models together
999
- best_idx = exp_data[r2_col].idxmax()
1000
- best_model = exp_data.loc[best_idx]
1001
-
1002
- results_by_exp[exp]['all'] = {
1003
- 'best_model': best_model[model_col],
1004
- 'r2': best_model[r2_col],
1005
- 'rmse': best_model[rmse_col],
1006
- 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
1007
- }
1008
-
1009
- self.best_models_by_experiment = results_by_exp
1010
-
1011
- # Determine overall best models
1012
- self._determine_overall_best_models()
1013
-
1014
- return results_by_exp
1015
-
1016
- def _determine_overall_best_models(self):
1017
- \"\"\"Determine the best models across all experiments\"\"\"
1018
- print("\\n" + "="*80)
1019
- print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1020
- print("="*80)
1021
-
1022
- # Aggregate performance by model and type
1023
- model_performance = {}
1024
-
1025
- for exp, exp_results in self.best_models_by_experiment.items():
1026
- for var_type, var_results in exp_results.items():
1027
- if var_type not in model_performance:
1028
- model_performance[var_type] = {}
1029
-
1030
- for model_data in var_results['all_models']:
1031
- model_name = model_data['Model']
1032
- if model_name not in model_performance[var_type]:
1033
- model_performance[var_type][model_name] = {
1034
- 'r2_values': [],
1035
- 'rmse_values': [],
1036
- 'experiments': []
1037
- }
1038
-
1039
- model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
1040
- model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
1041
- model_performance[var_type][model_name]['experiments'].append(exp)
1042
-
1043
- # Calculate average performance and select best
1044
- for var_type, models in model_performance.items():
1045
- best_avg_r2 = -1
1046
- best_model = None
1047
-
1048
- print(f"\\n📊 {var_type.upper()} MODELS:")
1049
- for model_name, perf_data in models.items():
1050
- avg_r2 = np.mean(perf_data['r2_values'])
1051
- avg_rmse = np.mean(perf_data['rmse_values'])
1052
- n_exp = len(perf_data['experiments'])
1053
-
1054
- print(f" {model_name}:")
1055
- print(f" Average R² = {avg_r2:.4f}")
1056
- print(f" Average RMSE = {avg_rmse:.4f}")
1057
- print(f" Tested in {n_exp} experiments")
1058
-
1059
- if avg_r2 > best_avg_r2:
1060
- best_avg_r2 = avg_r2
1061
- best_model = {
1062
- 'name': model_name,
1063
- 'avg_r2': avg_r2,
1064
- 'avg_rmse': avg_rmse,
1065
- 'n_experiments': n_exp
1066
- }
1067
-
1068
- if var_type.lower() in ['biomass', 'substrate', 'product']:
1069
- self.overall_best_models[var_type.lower()] = best_model
1070
- print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
1071
-
1072
- def create_comparison_visualizations(self):
1073
- \"\"\"Create visualizations comparing models across experiments\"\"\"
1074
- if not self.best_models_by_experiment:
1075
- raise ValueError("First run analyze_by_experiment()")
1076
-
1077
- # Prepare data for visualization
1078
- experiments = []
1079
- biomass_r2 = []
1080
- substrate_r2 = []
1081
- product_r2 = []
1082
-
1083
- for exp, results in self.best_models_by_experiment.items():
1084
- experiments.append(exp)
1085
- biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
1086
- substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
1087
- product_r2.append(results.get('Product', {}).get('r2', 0))
1088
-
1089
- # Create figure with subplots
1090
- fig, axes = plt.subplots(2, 2, figsize=(15, 12))
1091
- fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
1092
-
1093
- # 1. R² comparison by experiment and variable type
1094
- ax1 = axes[0, 0]
1095
- x = np.arange(len(experiments))
1096
- width = 0.25
1097
-
1098
- ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
1099
- ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
1100
- ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
1101
-
1102
- ax1.set_xlabel('Experiment')
1103
- ax1.set_ylabel('R²')
1104
- ax1.set_title('Best Model R² by Experiment and Variable Type')
1105
- ax1.set_xticks(x)
1106
- ax1.set_xticklabels(experiments, rotation=45, ha='right')
1107
- ax1.legend()
1108
- ax1.grid(True, alpha=0.3)
1109
-
1110
- # Add value labels
1111
- for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
1112
- if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
1113
- if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
1114
- if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
1115
-
1116
- # 2. Model frequency heatmap
1117
- ax2 = axes[0, 1]
1118
- # This would show which models appear most frequently as best
1119
- # Implementation depends on actual data structure
1120
- ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
1121
- ha='center', va='center', transform=ax2.transAxes)
1122
- ax2.set_title('Most Frequently Selected Models')
1123
-
1124
- # 3. Parameter evolution across experiments
1125
- ax3 = axes[1, 0]
1126
- ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
1127
- ha='center', va='center', transform=ax3.transAxes)
1128
- ax3.set_title('Parameter Trends')
1129
-
1130
- # 4. Overall best models summary
1131
- ax4 = axes[1, 1]
1132
- ax4.axis('off')
1133
-
1134
- summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
1135
- for var_type, model_info in self.overall_best_models.items():
1136
- if model_info:
1137
- summary_text += f"{var_type.upper()}:\\n"
1138
- summary_text += f" Model: {model_info['name']}\\n"
1139
- summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
1140
- summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
1141
-
1142
- ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
1143
- fontsize=12, verticalalignment='top', fontfamily='monospace')
1144
- ax4.set_title('Overall Best Models Summary')
1145
-
1146
- plt.tight_layout()
1147
- plt.show()
1148
-
1149
- def generate_summary_table(self) -> pd.DataFrame:
1150
- \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1151
- summary_data = []
1152
-
1153
- for exp, results in self.best_models_by_experiment.items():
1154
- for var_type, var_results in results.items():
1155
- summary_data.append({
1156
- 'Experiment': exp,
1157
- 'Variable_Type': var_type,
1158
- 'Best_Model': var_results['best_model'],
1159
- 'R2': var_results['r2'],
1160
- 'RMSE': var_results['rmse']
1161
- })
1162
-
1163
- summary_df = pd.DataFrame(summary_data)
1164
-
1165
- print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1166
- print("="*80)
1167
- print(summary_df.to_string(index=False))
1168
-
1169
- return summary_df
1170
-
1171
- # Example usage
1172
- if __name__ == "__main__":
1173
- print("🧬 Experimental Model Comparison System")
1174
- print("="*60)
1175
-
1176
- # Example data structure with experiments
1177
- example_data = {
1178
- 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1179
- 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1180
- 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1181
- 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1182
- 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1183
- 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1184
- 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1185
- 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1186
- 'Product', 'Product', 'Product', 'Product'],
1187
- 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1188
- 0.9723, 0.9856, 0.9698, 0.9812,
1189
- 0.9634, 0.9512, 0.9687, 0.9423],
1190
- 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1191
- 0.0312, 0.0245, 0.0334, 0.0289,
1192
- 0.0412, 0.0523, 0.0389, 0.0567],
1193
- 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1194
- None, None, None, None, None, None, None, None],
1195
- 'Ks': [None, None, None, None, None, None,
1196
- 2.1, 1.8, 2.3, 1.9, None, None, None, None]
1197
- }
1198
-
1199
- # Create analyzer
1200
- analyzer = ExperimentalModelAnalyzer()
1201
-
1202
- # Load data
1203
- analyzer.load_results(data_dict=example_data)
1204
-
1205
- # Analyze by experiment
1206
- results = analyzer.analyze_by_experiment()
1207
-
1208
- # Create visualizations
1209
- analyzer.create_comparison_visualizations()
1210
-
1211
- # Generate summary table
1212
- summary = analyzer.generate_summary_table()
1213
-
1214
- print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
1215
- """
1216
-
1217
- return code
1218
-
1219
- # Estado global para almacenar resultados
1220
- class AppState:
1221
- def __init__(self):
1222
- self.current_analysis = ""
1223
- self.current_code = ""
1224
- self.current_language = "en"
1225
-
1226
- app_state = AppState()
1227
 
1228
- def export_report(export_format: str, language: str) -> Tuple[str, str]:
1229
- """Exporta el reporte al formato seleccionado"""
1230
- if not app_state.current_analysis:
1231
- error_msg = {
1232
- 'en': "No analysis available to export",
1233
- 'es': "No hay análisis disponible para exportar",
1234
- 'fr': "Aucune analyse disponible pour exporter",
1235
- 'de': "Keine Analyse zum Exportieren verfügbar",
1236
- 'pt': "Nenhuma análise disponível para exportar"
1237
- }
1238
- return error_msg.get(language, error_msg['en']), ""
1239
-
1240
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1241
-
1242
- try:
1243
- if export_format == "DOCX":
1244
- filename = f"biotech_analysis_report_{timestamp}.docx"
1245
- ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
1246
- else: # PDF
1247
- filename = f"biotech_analysis_report_{timestamp}.pdf"
1248
- ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
1249
-
1250
- success_msg = TRANSLATIONS[language]['report_exported']
1251
- return f"{success_msg} {filename}", filename
1252
- except Exception as e:
1253
- return f"Error: {str(e)}", ""
1254
 
1255
- # Interfaz Gradio con soporte multiidioma y temas
1256
  def create_interface():
1257
- # Estado inicial
1258
- current_theme = "light"
1259
- current_language = "en"
1260
-
1261
- def update_interface_language(language):
1262
- """Actualiza el idioma de la interfaz"""
1263
- app_state.current_language = language
1264
  t = TRANSLATIONS[language]
1265
-
1266
  return [
1267
- gr.update(value=f"# {t['title']}"), # title_text
1268
- gr.update(value=t['subtitle']), # subtitle_text
1269
- gr.update(label=t['upload_files']), # files_input
1270
- gr.update(label=t['select_model']), # model_selector
1271
- gr.update(label=t['select_language']), # language_selector
1272
- gr.update(label=t['select_theme']), # theme_selector
1273
- gr.update(label=t['detail_level']), # detail_level
1274
- gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
1275
- gr.update(value=t['analyze_button']), # analyze_btn
1276
- gr.update(label=t['export_format']), # export_format
1277
- gr.update(value=t['export_button']), # export_btn
1278
- gr.update(label=t['comparative_analysis']), # analysis_output
1279
- gr.update(label=t['implementation_code']), # code_output
1280
- gr.update(label=t['data_format']) # data_format_accordion
1281
  ]
1282
-
1283
- def process_and_store(files, model, detail, language, additional_specs):
1284
- """Procesa archivos y almacena resultados"""
1285
- if not files:
1286
- error_msg = TRANSLATIONS[language]['error_no_files']
1287
- return error_msg, ""
1288
-
1289
- analysis, code = process_files(files, model, detail, language, additional_specs)
1290
- app_state.current_analysis = analysis
1291
- app_state.current_code = code
1292
- return analysis, code
1293
-
1294
- with gr.Blocks(theme=THEMES[current_theme]) as demo:
1295
- # Componentes de UI
1296
- with gr.Row():
1297
- with gr.Column(scale=3):
1298
- title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
1299
- subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
1300
- with gr.Column(scale=1):
1301
- with gr.Row():
1302
- language_selector = gr.Dropdown(
1303
- choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1304
- ("Deutsch", "de"), ("Português", "pt")],
1305
- value="en",
1306
- label=TRANSLATIONS[current_language]['select_language'],
1307
- interactive=True
1308
- )
1309
- theme_selector = gr.Dropdown(
1310
- choices=[("Light", "light"), ("Dark", "dark")],
1311
- value="light",
1312
- label=TRANSLATIONS[current_language]['select_theme'],
1313
- interactive=True
1314
- )
1315
 
1316
  with gr.Row():
1317
  with gr.Column(scale=1):
1318
- files_input = gr.File(
1319
- label=TRANSLATIONS[current_language]['upload_files'],
1320
- file_count="multiple",
1321
- file_types=[".csv", ".xlsx", ".xls", ".pdf", ".zip"],
1322
- type="filepath"
1323
- )
1324
 
1325
- # CAMBIO: Usar el diccionario de modelos de Nebius
1326
  default_model = "Qwen/Qwen3-14B"
1327
- model_selector = gr.Dropdown(
1328
- choices=list(NEBIUS_MODELS.keys()),
1329
- value=default_model,
1330
- label=TRANSLATIONS[current_language]['select_model'],
1331
- info=f"{TRANSLATIONS[current_language]['best_for']}: {NEBIUS_MODELS[default_model]['best_for']}"
1332
- )
1333
-
1334
- detail_level = gr.Radio(
1335
- choices=[
1336
- (TRANSLATIONS[current_language]['detailed'], "detailed"),
1337
- (TRANSLATIONS[current_language]['summarized'], "summarized")
1338
- ],
1339
- value="detailed",
1340
- label=TRANSLATIONS[current_language]['detail_level']
1341
- )
1342
 
1343
- # Nueva entrada para especificaciones adicionales
1344
- additional_specs = gr.Textbox(
1345
- label=TRANSLATIONS[current_language]['additional_specs'],
1346
- placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
1347
- lines=3,
1348
- max_lines=5,
1349
- interactive=True
1350
- )
1351
 
1352
- analyze_btn = gr.Button(
1353
- TRANSLATIONS[current_language]['analyze_button'],
1354
- variant="primary",
1355
- size="lg"
1356
- )
1357
-
1358
- gr.Markdown("---")
1359
 
1360
- export_format = gr.Radio(
1361
- choices=["DOCX", "PDF"],
1362
- value="PDF",
1363
- label=TRANSLATIONS[current_language]['export_format']
1364
- )
1365
 
1366
- export_btn = gr.Button(
1367
- TRANSLATIONS[current_language]['export_button'],
1368
- variant="secondary"
1369
- )
1370
 
1371
- export_status = gr.Textbox(
1372
- label="Export Status",
1373
- interactive=False,
1374
- visible=False
1375
- )
1376
 
1377
- export_file = gr.File(
1378
- label="Download Report",
1379
- visible=False
1380
- )
1381
-
1382
  with gr.Column(scale=2):
1383
- analysis_output = gr.Markdown(
1384
- label=TRANSLATIONS[current_language]['comparative_analysis']
1385
- )
1386
-
1387
- code_output = gr.Code(
1388
- label=TRANSLATIONS[current_language]['implementation_code'],
1389
- language="python",
1390
- interactive=True,
1391
- lines=20
1392
- )
1393
-
1394
- data_format_accordion = gr.Accordion(
1395
- label=TRANSLATIONS[current_language]['data_format'],
1396
- open=False
1397
- )
1398
-
1399
- with data_format_accordion:
1400
- gr.Markdown("""
1401
- ### Expected CSV/Excel structure:
1402
 
1403
- | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1404
- |------------|-------|------|-----|------|-----|-----|--------|-------|------------|
1405
- | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1406
- | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1407
- | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
1408
- | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
1409
 
1410
- **Important columns:**
1411
- - **Experiment**: Experimental condition identifier
1412
- - **Model**: Model name
1413
- - **Type**: Variable type (Biomass/Substrate/Product)
1414
- - **R2, RMSE**: Fit quality metrics
1415
- - **Parameters**: Model-specific parameters
1416
- """)
1417
-
1418
- # CAMBIO: Actualizar el modelo en los ejemplos
1419
- examples = gr.Examples(
1420
- examples=[
1421
- [["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""],
1422
- [["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects"]
1423
- ],
1424
- inputs=[files_input, model_selector, detail_level, additional_specs],
1425
- label=TRANSLATIONS[current_language]['examples']
1426
- )
1427
-
1428
- # Eventos
1429
- language_selector.change(
1430
- update_interface_language,
1431
- inputs=[language_selector],
1432
- outputs=[
1433
- title_text, subtitle_text, files_input, model_selector,
1434
- language_selector, theme_selector, detail_level, additional_specs,
1435
- analyze_btn, export_format, export_btn, analysis_output,
1436
- code_output, data_format_accordion
1437
- ]
1438
- )
1439
-
1440
- def change_theme(theme_name):
1441
- """Cambia el tema de la interfaz"""
1442
- return gr.Info("Theme will be applied on next page load")
1443
-
1444
- theme_selector.change(
1445
- change_theme,
1446
- inputs=[theme_selector],
1447
- outputs=[]
1448
- )
1449
-
1450
  analyze_btn.click(
1451
- fn=process_and_store,
1452
- inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1453
- outputs=[analysis_output, code_output]
1454
  )
1455
 
1456
- def handle_export(format, language):
1457
- status, file = export_report(format, language)
1458
- if file:
1459
- return gr.update(value=status, visible=True), gr.update(value=file, visible=True)
1460
- else:
1461
- return gr.update(value=status, visible=True), gr.update(visible=False)
1462
-
1463
  export_btn.click(
1464
- fn=handle_export,
1465
- inputs=[export_format, language_selector],
1466
- outputs=[export_status, export_file]
1467
  )
1468
-
1469
  return demo
1470
 
1471
- # Función principal
1472
  def main():
1473
- # CAMBIO: Comprobar la nueva variable de entorno
1474
  if not os.getenv("NEBIUS_API_KEY"):
1475
- print("⚠️ Configure NEBIUS_API_KEY in HuggingFace Space secrets")
1476
- return gr.Interface(
1477
- fn=lambda x: TRANSLATIONS['en']['error_no_api'],
1478
- inputs=gr.Textbox(),
1479
- outputs=gr.Textbox(),
1480
- title="Configuration Error"
1481
- )
1482
 
1483
  return create_interface()
1484
 
1485
- # Para ejecución local
1486
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
1487
  demo = main()
1488
  if demo:
1489
- # Para crear los archivos de ejemplo si no existen
1490
- if not os.path.exists("examples"):
1491
- os.makedirs("examples")
1492
- if not os.path.exists("examples/biomass_models_comparison.csv"):
1493
- pd.DataFrame({
1494
- 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5'],
1495
- 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz'],
1496
- 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass'],
1497
- 'R2': [0.98, 0.99, 0.995, 0.97, 0.98, 0.99],
1498
- 'RMSE': [0.02, 0.01, 0.005, 0.03, 0.02, 0.01]
1499
- }).to_csv("examples/biomass_models_comparison.csv", index=False)
1500
- if not os.path.exists("examples/substrate_kinetics_results.xlsx"):
1501
- pd.DataFrame({
1502
- 'Experiment': ['Temp_30C', 'Temp_30C', 'Temp_37C', 'Temp_37C'],
1503
- 'Model': ['First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate'],
1504
- 'Type': ['Substrate', 'Substrate', 'Substrate', 'Substrate'],
1505
- 'R2': [0.97, 0.98, 0.96, 0.985],
1506
- 'RMSE': [0.03, 0.02, 0.04, 0.015]
1507
- }).to_excel("examples/substrate_kinetics_results.xlsx", index=False)
1508
-
1509
- demo.launch(
1510
- server_name="0.0.0.0",
1511
- server_port=7860,
1512
- share=False
1513
- )
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
 
8
  import json
9
  import zipfile
10
  import tempfile
11
+ from typing import Dict, List, Tuple, Union
 
12
  from pathlib import Path
 
 
 
13
  from docx import Document
14
+ from docx.shared import Pt
 
15
  from reportlab.lib import colors
16
+ from reportlab.lib.pagesizes import letter
17
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
18
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
19
  from reportlab.lib.units import inch
 
 
 
20
  from datetime import datetime
 
21
 
22
  # Configuración para HuggingFace
23
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
24
 
25
+ # Inicializar cliente OpenAI para Nebius
26
  client = OpenAI(
27
  base_url="https://api.studio.nebius.com/v1/",
28
  api_key=os.environ.get("NEBIUS_API_KEY")
29
  )
30
 
31
+ # Sistema de traducción
32
  TRANSLATIONS = {
33
  'en': {
34
+ 'title': '🧬 API-Powered Biotechnological Model Analyzer',
35
+ 'subtitle': 'Upload your model fitting results and let the AI perform a complete comparative analysis.',
36
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
37
+ 'select_model': '🤖 AI Model',
38
  'select_language': '🌐 Language',
39
+ 'detail_level': '📋 Analysis Detail Level',
 
40
  'detailed': 'Detailed',
41
  'summarized': 'Summarized',
42
+ 'analyze_button': '🚀 Analyze with AI',
43
+ 'export_format': '📄 Export Format',
44
+ 'export_button': '💾 Export Analysis',
45
+ 'comparative_analysis': '📊 AI-Generated Analysis',
46
+ 'implementation_code': '💻 AI-Generated Implementation Code',
47
+ 'data_format': '📋 Expected Data Format',
48
+ 'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets',
 
 
 
 
 
49
  'error_no_files': 'Please upload fitting result files to analyze',
50
  'report_exported': 'Report exported successfully as',
51
+ 'additional_specs': '📝 Additional Specifications for Analysis',
52
+ 'additional_specs_placeholder': 'e.g., "Focus on the effect of temperature" or "Provide scale-up recommendations"...'
 
 
 
 
53
  },
54
  'es': {
55
+ 'title': '🧬 Analizador Biotecnológico Impulsado por API',
56
+ 'subtitle': 'Sube los resultados de ajuste de tus modelos y deja que la IA realice un análisis comparativo completo.',
57
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
58
+ 'select_model': '🤖 Modelo de IA',
59
  'select_language': '🌐 Idioma',
60
+ 'detail_level': '📋 Nivel de Detalle del Análisis',
 
61
  'detailed': 'Detallado',
62
  'summarized': 'Resumido',
63
+ 'analyze_button': '🚀 Analizar con IA',
64
+ 'export_format': '📄 Formato de Exportación',
65
+ 'export_button': '💾 Exportar Análisis',
66
+ 'comparative_analysis': '📊 Análisis Generado por IA',
67
+ 'implementation_code': '💻 Código de Implementación Generado por IA',
68
+ 'data_format': '📋 Formato de Datos Esperado',
69
+ 'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space',
 
 
 
 
 
70
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
71
  'report_exported': 'Reporte exportado exitosamente como',
72
+ 'additional_specs': '📝 Especificaciones Adicionales para el Análisis',
73
+ 'additional_specs_placeholder': 'Ej: "Enfócate en el efecto de la temperatura" o "Provee recomendaciones de escalado"...'
 
 
 
 
74
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  }
76
 
77
+ # Modelos de Nebius disponibles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  NEBIUS_MODELS = {
79
  "Qwen/Qwen3-14B": {
80
  "name": "Qwen 3 (14B)",
81
  "description": "Modelo potente y versátil de la familia Qwen.",
 
 
82
  },
 
83
  }
84
 
85
  class FileProcessor:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  @staticmethod
87
+ def read_csv(csv_file: bytes) -> pd.DataFrame:
88
+ try: return pd.read_csv(io.BytesIO(csv_file))
89
+ except Exception: return None
 
 
 
90
 
91
  @staticmethod
92
+ def read_excel(excel_file: bytes) -> pd.DataFrame:
93
+ try: return pd.read_excel(io.BytesIO(excel_file))
94
+ except Exception: return None
 
 
 
 
 
 
 
 
 
95
 
96
  class ReportExporter:
 
 
97
  @staticmethod
98
+ def export_to_docx(content: str, filename: str, language: str = 'en'):
 
99
  doc = Document()
100
+ doc.add_heading(TRANSLATIONS[language]['title'], 0)
101
+ doc.add_paragraph(f"{TRANSLATIONS[language]['report_exported'].split(' as')[0]}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  doc.add_paragraph()
103
+ for line in content.split('\n'):
104
+ if line.startswith('### '): doc.add_heading(line[4:], level=3)
105
+ elif line.startswith('## '): doc.add_heading(line[3:], level=2)
106
+ elif line.startswith('# '): doc.add_heading(line[2:], level=1)
107
+ else: doc.add_paragraph(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  doc.save(filename)
109
  return filename
110
+
111
  @staticmethod
112
+ def export_to_pdf(content: str, filename: str, language: str = 'en'):
 
 
113
  doc = SimpleDocTemplate(filename, pagesize=letter)
 
114
  styles = getSampleStyleSheet()
115
+ story = [Paragraph(TRANSLATIONS[language]['title'], styles['h1'])]
116
+ for line in content.split('\n'):
117
+ if line.startswith('### '): story.append(Paragraph(line[4:], styles['h3']))
118
+ elif line.startswith('## '): story.append(Paragraph(line[3:], styles['h2']))
119
+ elif line.startswith('# '): story.append(Paragraph(line[2:], styles['h1']))
120
+ else: story.append(Paragraph(line, styles['BodyText']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  doc.build(story)
122
  return filename
123
 
124
  class AIAnalyzer:
125
+ """
126
+ Clase que interactúa exclusivamente con la API para obtener análisis y código.
127
+ No contiene lógica de análisis predefinida.
128
+ """
129
+ def __init__(self, client):
130
  self.client = client
131
+
132
+ def get_analysis_and_code(self, data: pd.DataFrame, model: str, detail_level: str, language: str, additional_specs: str) -> Dict[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  """
134
+ Realiza una única llamada a la API para obtener tanto el análisis
135
+ como el código de implementación en un formato JSON.
136
+ """
137
+ lang_instruction = TRANSLATIONS[language]['additional_specs_placeholder'] # Reutilizamos un texto traducido
138
 
139
+ # Prompt unificado que solicita una respuesta JSON con dos claves
140
+ prompt = f"""
141
+ Act as an expert in biotechnology and data science. Your task is to analyze the provided model fitting results and generate both a textual analysis and a Python implementation script.
142
+
143
+ The user has provided the following data from a CSV/Excel file:
144
+ --- DATA ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  {data.to_string()}
146
+ --- END DATA ---
147
+
148
+ User requirements:
149
+ - Language for the analysis: {language}
150
+ - Detail level: {detail_level}
151
+ - Additional specifications: "{additional_specs if additional_specs else 'None'}"
152
+
153
+ Based on all the information above, perform the following two tasks:
154
+
155
+ TASK 1: GENERATE TEXTUAL ANALYSIS
156
+ Write a comprehensive comparative analysis in Markdown format.
157
+ - If detail_level is 'detailed', provide an in-depth, experiment-by-experiment comparison, parameter analysis, biological interpretation, and robust conclusions.
158
+ - If detail_level is 'summarized', provide a concise overview, highlight the best models per experiment, and give clear, practical recommendations.
159
+ - The analysis MUST be in {language}.
160
+
161
+ TASK 2: GENERATE PYTHON CODE
162
+ Write a complete, executable Python script that a researcher can use to replicate and visualize this analysis.
163
+ - The script should include data loading (embed the provided data directly).
164
+ - It must contain functions to compare models and find the best ones.
165
+ - It must include plotting functions (using matplotlib or seaborn) to visualize the results, such as comparing R² values across experiments.
166
+ - The code should be well-commented.
167
+
168
+ IMPORTANT: Your final output must be a single, valid JSON object containing two keys: "analysis" and "code".
169
+ Example format:
170
+ {{
171
+ "analysis": "### Comparative Analysis\\n\\nHere is the detailed analysis in Markdown...",
172
+ "code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Your Python code here..."
173
+ }}
174
+
175
+ Do not add any text or explanations outside of the JSON object.
176
  """
177
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  try:
 
179
  response = self.client.chat.completions.create(
180
+ model=model,
181
  temperature=0.6,
182
  top_p=0.95,
183
+ max_tokens=4000, # Usar un valor alto para permitir respuestas completas
184
+ messages=[{"role": "user", "content": prompt}]
 
 
 
185
  )
186
 
187
+ raw_response_text = response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
+ # Intentar parsear la respuesta JSON
190
+ try:
191
+ # Limpiar el texto para asegurar que sea un JSON válido
192
+ json_text = raw_response_text[raw_response_text.find('{'):raw_response_text.rfind('}')+1]
193
+ parsed_json = json.loads(json_text)
194
+ return {
195
+ "analysis": parsed_json.get("analysis", "API did not return an analysis."),
196
+ "code": parsed_json.get("code", "# API did not return code.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  }
198
+ except (json.JSONDecodeError, IndexError):
199
+ # Si falla el parseo, devolver el texto crudo como análisis
200
+ return {
201
+ "analysis": f"API returned a non-JSON response:\n\n{raw_response_text}",
202
+ "code": "# Could not parse API response to extract code."
203
+ }
204
+
205
  except Exception as e:
206
+ error_message = f"An error occurred while calling the API: {str(e)}"
207
+ return {
208
+ "analysis": error_message,
209
+ "code": f"# {error_message}"
210
+ }
211
+
212
+ def process_files(files: List, model: str, detail_level: str, language: str, additional_specs: str) -> Tuple[str, str]:
213
+ """
214
+ Procesa los archivos subidos, llama al analizador de IA y devuelve los resultados.
215
+ """
216
+ if not files:
217
+ return TRANSLATIONS[language]['error_no_files'], ""
218
 
 
 
 
219
  processor = FileProcessor()
220
+ analyzer = AIAnalyzer(client)
 
 
221
 
222
+ # Por simplicidad, se procesa solo el primer archivo válido
223
+ full_analysis = []
224
+ full_code = []
225
+
226
  for file in files:
227
+ if file is None: continue
228
+
229
+ file_name = file.name
 
230
  file_ext = Path(file_name).suffix.lower()
231
 
232
  with open(file.name, 'rb') as f:
233
  file_content = f.read()
234
 
235
+ df = None
236
+ if file_ext == '.csv':
237
+ df = processor.read_csv(file_content)
238
+ elif file_ext in ['.xlsx', '.xls']:
239
+ df = processor.read_excel(file_content)
240
+
241
+ if df is not None:
242
+ full_analysis.append(f"# Analysis for: {file_name}")
243
+ api_result = analyzer.get_analysis_and_code(df, model, detail_level, language, additional_specs)
244
+ full_analysis.append(api_result.get("analysis", ""))
245
+ full_code.append(f"# Code generated for: {file_name}\n" + api_result.get("code", ""))
246
+ # Rompemos el bucle para analizar solo un archivo a la vez y evitar confusión
247
+ break
248
+
249
+ if not full_analysis:
250
+ return "No valid CSV/Excel files found to analyze.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
+ return "\n\n".join(full_analysis), "\n\n".join(full_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
+ # --- Interfaz de Gradio ---
255
  def create_interface():
256
+ current_language = "es"
257
+
258
+ def update_language(language):
 
 
 
 
259
  t = TRANSLATIONS[language]
 
260
  return [
261
+ gr.update(value=f"# {t['title']}"), gr.update(value=t['subtitle']),
262
+ gr.update(label=t['upload_files']), gr.update(label=t['select_model']),
263
+ gr.update(label=t['select_language']), gr.update(label=t['detail_level']),
264
+ gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']),
265
+ gr.update(value=t['analyze_button']), gr.update(label=t['export_format']),
266
+ gr.update(value=t['export_button']), gr.update(label=t['comparative_analysis']),
267
+ gr.update(label=t['implementation_code']), gr.update(label=t['data_format'])
 
 
 
 
 
 
 
268
  ]
269
+
270
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
271
+ app_state = gr.State({"analysis": "", "code": "", "language": "es"})
272
+
273
+ title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
274
+ subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  with gr.Row():
277
  with gr.Column(scale=1):
278
+ files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
 
 
 
 
 
279
 
 
280
  default_model = "Qwen/Qwen3-14B"
281
+ model_selector = gr.Dropdown(choices=list(NEBIUS_MODELS.keys()), value=default_model, label=TRANSLATIONS[current_language]['select_model'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
+ detail_level_selector = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
 
 
 
 
 
 
 
284
 
285
+ additional_specs_input = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3)
 
 
 
 
 
 
286
 
287
+ language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="es", label=TRANSLATIONS[current_language]['select_language'])
 
 
 
 
288
 
289
+ analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary")
 
 
 
290
 
291
+ gr.Markdown("---")
 
 
 
 
292
 
293
+ export_format_selector = gr.Radio(choices=["DOCX", "PDF"], value="PDF", label=TRANSLATIONS[current_language]['export_format'])
294
+ export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'])
295
+ export_file_output = gr.File(label="Download Report", visible=False)
296
+
 
297
  with gr.Column(scale=2):
298
+ analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
299
+ code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True)
300
+
301
+ def run_analysis(files, model, detail, lang, specs, state):
302
+ analysis, code = process_files(files, model, detail, lang, specs)
303
+ state["analysis"] = analysis
304
+ state["code"] = code
305
+ state["language"] = lang
306
+ return analysis, code, state
307
+
308
+ def run_export(state, format):
309
+ if not state["analysis"]:
310
+ return gr.update(visible=False)
 
 
 
 
 
 
311
 
312
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
313
+ ext = "docx" if format == "DOCX" else "pdf"
314
+ filename = f"analysis_report_{timestamp}.{ext}"
 
 
 
315
 
316
+ if format == "DOCX":
317
+ ReportExporter.export_to_docx(state["analysis"], filename, state["language"])
318
+ else:
319
+ ReportExporter.export_to_pdf(state["analysis"], filename, state["language"])
320
+
321
+ return gr.update(value=filename, visible=True)
322
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  analyze_btn.click(
324
+ fn=run_analysis,
325
+ inputs=[files_input, model_selector, detail_level_selector, language_selector, additional_specs_input, app_state],
326
+ outputs=[analysis_output, code_output, app_state]
327
  )
328
 
 
 
 
 
 
 
 
329
  export_btn.click(
330
+ fn=run_export,
331
+ inputs=[app_state, export_format_selector],
332
+ outputs=[export_file_output]
333
  )
334
+
335
  return demo
336
 
 
337
  def main():
 
338
  if not os.getenv("NEBIUS_API_KEY"):
339
+ print("⚠️ NEBIUS_API_KEY not found. Please set it as an environment variable.")
340
+ return gr.Interface(fn=lambda: TRANSLATIONS['en']['error_no_api'], inputs=[], outputs="text", title="Configuration Error")
 
 
 
 
 
341
 
342
  return create_interface()
343
 
 
344
  if __name__ == "__main__":
345
+ # Crear archivos de ejemplo para Gradio si no existen
346
+ if not os.path.exists("examples"):
347
+ os.makedirs("examples")
348
+ if not os.path.exists("examples/biomass_models_comparison.csv"):
349
+ pd.DataFrame({
350
+ 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
351
+ 'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'],
352
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'],
353
+ 'R2': [0.98, 0.99, 0.97, 0.985],
354
+ 'RMSE': [0.02, 0.01, 0.03, 0.015]
355
+ }).to_csv("examples/biomass_models_comparison.csv", index=False)
356
+
357
  demo = main()
358
  if demo:
359
+ demo.launch(server_name="0.0.0.0", server_port=7860)