C2MV commited on
Commit
384eeae
·
verified ·
1 Parent(s): 1d1e729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +914 -515
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- #import anthropic
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
@@ -11,31 +11,43 @@ import tempfile
11
  from typing import Dict, List, Tuple, Union, Optional
12
  import re
13
  from pathlib import Path
14
- import openpyxl
15
  from dataclasses import dataclass
16
  from enum import Enum
17
- from docx import Document
18
- from docx.shared import Inches, Pt, RGBColor
19
- from docx.enum.text import WD_ALIGN_PARAGRAPH
20
- from reportlab.lib import colors
21
- from reportlab.lib.pagesizes import letter, A4
22
- from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
23
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
- from reportlab.lib.units import inch
25
- from reportlab.pdfbase import pdfmetrics
26
- from reportlab.pdfbase.ttfonts import TTFont
27
- import matplotlib.pyplot as plt
 
 
28
  from datetime import datetime
29
 
 
 
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
32
 
33
- # Inicializar cliente Anthropic
34
- #client = anthropic.Anthropic()
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Inicializar cliente Nebius
37
- from openai import OpenAI
38
- client = OpenAI(api_key=os.getenv("NEBIUS_API_KEY"))
39
 
40
  # Sistema de traducción - Actualizado con nuevas entradas
41
  TRANSLATIONS = {
@@ -43,7 +55,7 @@ TRANSLATIONS = {
43
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
44
  'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
45
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
46
- 'select_model': '🤖 Claude Model',
47
  'select_language': '🌐 Language',
48
  'select_theme': '🎨 Theme',
49
  'detail_level': '📋 Analysis detail level',
@@ -60,7 +72,7 @@ TRANSLATIONS = {
60
  'dark': 'Dark',
61
  'best_for': 'Best for',
62
  'loading': 'Loading...',
63
- 'error_no_api': 'Please configure ANTHROPIC_API_KEY in HuggingFace Space secrets',
64
  'error_no_files': 'Please upload fitting result files to analyze',
65
  'report_exported': 'Report exported successfully as',
66
  'specialized_in': '🎯 Specialized in:',
@@ -74,7 +86,7 @@ TRANSLATIONS = {
74
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
75
  'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
76
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
77
- 'select_model': '🤖 Modelo Claude',
78
  'select_language': '🌐 Idioma',
79
  'select_theme': '🎨 Tema',
80
  'detail_level': '📋 Nivel de detalle del análisis',
@@ -91,7 +103,7 @@ TRANSLATIONS = {
91
  'dark': 'Oscuro',
92
  'best_for': 'Mejor para',
93
  'loading': 'Cargando...',
94
- 'error_no_api': 'Por favor configura ANTHROPIC_API_KEY en los secretos del Space',
95
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
96
  'report_exported': 'Reporte exportado exitosamente como',
97
  'specialized_in': '🎯 Especializado en:',
@@ -105,7 +117,7 @@ TRANSLATIONS = {
105
  'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
106
  'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
107
  'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
108
- 'select_model': '🤖 Modèle Claude',
109
  'select_language': '🌐 Langue',
110
  'select_theme': '🎨 Thème',
111
  'detail_level': '📋 Niveau de détail',
@@ -122,7 +134,7 @@ TRANSLATIONS = {
122
  'dark': 'Sombre',
123
  'best_for': 'Meilleur pour',
124
  'loading': 'Chargement...',
125
- 'error_no_api': 'Veuillez configurer ANTHROPIC_API_KEY',
126
  'error_no_files': 'Veuillez télécharger des fichiers à analyser',
127
  'report_exported': 'Rapport exporté avec succès comme',
128
  'specialized_in': '🎯 Spécialisé dans:',
@@ -136,7 +148,7 @@ TRANSLATIONS = {
136
  'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
137
  'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
138
  'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
139
- 'select_model': '🤖 Claude Modell',
140
  'select_language': '🌐 Sprache',
141
  'select_theme': '🎨 Thema',
142
  'detail_level': '📋 Detailgrad der Analyse',
@@ -153,7 +165,7 @@ TRANSLATIONS = {
153
  'dark': 'Dunkel',
154
  'best_for': 'Am besten für',
155
  'loading': 'Laden...',
156
- 'error_no_api': 'Bitte konfigurieren Sie ANTHROPIC_API_KEY',
157
  'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
158
  'report_exported': 'Bericht erfolgreich exportiert als',
159
  'specialized_in': '🎯 Spezialisiert auf:',
@@ -167,7 +179,7 @@ TRANSLATIONS = {
167
  'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
168
  'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
169
  'upload_files': '📁 Carregar resultados (CSV/Excel)',
170
- 'select_model': '🤖 Modelo Claude',
171
  'select_language': '🌐 Idioma',
172
  'select_theme': '🎨 Tema',
173
  'detail_level': '📋 Nível de detalhe',
@@ -184,7 +196,7 @@ TRANSLATIONS = {
184
  'dark': 'Escuro',
185
  'best_for': 'Melhor para',
186
  'loading': 'Carregando...',
187
- 'error_no_api': 'Por favor configure ANTHROPIC_API_KEY',
188
  'error_no_files': 'Por favor carregue arquivos para analisar',
189
  'report_exported': 'Relatório exportado com sucesso como',
190
  'specialized_in': '🎯 Especializado em:',
@@ -245,21 +257,21 @@ class ModelRegistry:
245
  def __init__(self):
246
  self.models = {}
247
  self._initialize_default_models()
248
-
249
  def register_model(self, model: MathematicalModel):
250
  """Registra un nuevo modelo matemático"""
251
  if model.category not in self.models:
252
  self.models[model.category] = {}
253
  self.models[model.category][model.name] = model
254
-
255
  def get_model(self, category: str, name: str) -> MathematicalModel:
256
  """Obtiene un modelo específico"""
257
  return self.models.get(category, {}).get(name)
258
-
259
  def get_all_models(self) -> Dict:
260
  """Retorna todos los modelos registrados"""
261
  return self.models
262
-
263
  def _initialize_default_models(self):
264
  """Inicializa los modelos por defecto"""
265
  # Modelos de crecimiento
@@ -272,7 +284,7 @@ class ModelRegistry:
272
  category="crecimiento_biomasa",
273
  biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
274
  ))
275
-
276
  self.register_model(MathematicalModel(
277
  name="Logístico",
278
  equation="dX/dt = μmax × X × (1 - X/Xmax)",
@@ -282,7 +294,7 @@ class ModelRegistry:
282
  category="crecimiento_biomasa",
283
  biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
284
  ))
285
-
286
  self.register_model(MathematicalModel(
287
  name="Gompertz",
288
  equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
@@ -296,18 +308,26 @@ class ModelRegistry:
296
  # Instancia global del registro
297
  model_registry = ModelRegistry()
298
 
299
- CLAUDE_MODELS = {
300
- "Qwen/Qwen3-14B": {
 
301
  "name": "Qwen 3-14B",
302
- "description": "Modelo Qwen 3-14B para análisis detallado",
303
- "max_tokens": 4096,
304
- "best_for": "Análisis técnico y científico"
305
- }
 
 
 
 
 
 
 
306
  }
307
 
308
  class FileProcessor:
309
  """Clase para procesar diferentes tipos de archivos"""
310
-
311
  @staticmethod
312
  def extract_text_from_pdf(pdf_file) -> str:
313
  """Extrae texto de un archivo PDF"""
@@ -319,23 +339,25 @@ class FileProcessor:
319
  return text
320
  except Exception as e:
321
  return f"Error reading PDF: {str(e)}"
322
-
323
  @staticmethod
324
  def read_csv(csv_file) -> pd.DataFrame:
325
  """Lee archivo CSV"""
326
  try:
327
  return pd.read_csv(io.BytesIO(csv_file))
328
  except Exception as e:
 
329
  return None
330
-
331
  @staticmethod
332
  def read_excel(excel_file) -> pd.DataFrame:
333
  """Lee archivo Excel"""
334
  try:
335
  return pd.read_excel(io.BytesIO(excel_file))
336
  except Exception as e:
 
337
  return None
338
-
339
  @staticmethod
340
  def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]:
341
  """Extrae archivos de un ZIP"""
@@ -352,21 +374,36 @@ class FileProcessor:
352
 
353
  class ReportExporter:
354
  """Clase para exportar reportes a diferentes formatos"""
355
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  @staticmethod
357
  def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
358
  """Exporta el contenido a un archivo DOCX"""
359
- doc = Document()
360
-
361
  # Configurar estilos
362
  title_style = doc.styles['Title']
363
- title_style.font.size = Pt(24)
364
  title_style.font.bold = True
365
-
366
  heading_style = doc.styles['Heading 1']
367
- heading_style.font.size = Pt(18)
368
  heading_style.font.bold = True
369
-
370
  # Título
371
  title_text = {
372
  'en': 'Comparative Analysis Report - Biotechnological Models',
@@ -375,9 +412,9 @@ class ReportExporter:
375
  'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
376
  'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
377
  }
378
-
379
  doc.add_heading(title_text.get(language, title_text['en']), 0)
380
-
381
  # Fecha
382
  date_text = {
383
  'en': 'Generated on',
@@ -388,14 +425,14 @@ class ReportExporter:
388
  }
389
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
390
  doc.add_paragraph()
391
-
392
  # Procesar contenido
393
  lines = content.split('\n')
394
  current_paragraph = None
395
-
396
  for line in lines:
397
  line = line.strip()
398
-
399
  if line.startswith('###'):
400
  doc.add_heading(line.replace('###', '').strip(), level=2)
401
  elif line.startswith('##'):
@@ -419,36 +456,36 @@ class ReportExporter:
419
  elif line:
420
  # Párrafo normal
421
  doc.add_paragraph(line)
422
-
423
  # Guardar documento
424
  doc.save(filename)
425
  return filename
426
-
427
  @staticmethod
428
  def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
429
  """Exporta el contenido a un archivo PDF"""
430
  # Crear documento PDF
431
- doc = SimpleDocTemplate(filename, pagesize=letter)
432
  story = []
433
- styles = getSampleStyleSheet()
434
-
435
  # Estilos personalizados
436
- title_style = ParagraphStyle(
437
  'CustomTitle',
438
  parent=styles['Title'],
439
  fontSize=24,
440
- textColor=colors.HexColor('#1f4788'),
441
  spaceAfter=30
442
  )
443
-
444
- heading_style = ParagraphStyle(
445
  'CustomHeading',
446
  parent=styles['Heading1'],
447
  fontSize=16,
448
- textColor=colors.HexColor('#2e5090'),
449
  spaceAfter=12
450
  )
451
-
452
  # Título
453
  title_text = {
454
  'en': 'Comparative Analysis Report - Biotechnological Models',
@@ -457,9 +494,9 @@ class ReportExporter:
457
  'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
458
  'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
459
  }
460
-
461
- story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
462
-
463
  # Fecha
464
  date_text = {
465
  'en': 'Generated on',
@@ -468,483 +505,781 @@ class ReportExporter:
468
  'de': 'Erstellt am',
469
  'pt': 'Gerado em'
470
  }
471
- story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
472
- story.append(Spacer(1, 0.5*inch))
473
-
474
  # Procesar contenido
475
  lines = content.split('\n')
476
-
477
  for line in lines:
478
  line = line.strip()
479
-
480
  if not line:
481
- story.append(Spacer(1, 0.2*inch))
482
  elif line.startswith('###'):
483
- story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
484
  elif line.startswith('##'):
485
- story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
486
  elif line.startswith('#'):
487
- story.append(Paragraph(line.replace('#', '').strip(), heading_style))
488
  elif line.startswith('**') and line.endswith('**'):
489
  text = line.replace('**', '')
490
- story.append(Paragraph(f"<b>{text}</b>", styles['Normal']))
491
  elif line.startswith('- ') or line.startswith('* '):
492
- story.append(Paragraph(f"• {line[2:]}", styles['Normal']))
493
  elif line == '---' or line.startswith('==='):
494
- story.append(Spacer(1, 0.3*inch))
495
- story.append(Paragraph("_" * 70, styles['Normal']))
496
- story.append(Spacer(1, 0.3*inch))
497
  else:
498
  # Limpiar caracteres especiales para PDF
499
  clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
500
- story.append(Paragraph(clean_line, styles['Normal']))
501
-
502
  # Construir PDF
503
  doc.build(story)
504
  return filename
505
 
 
506
  class AIAnalyzer:
507
- """Clase para análisis con IA"""
508
-
509
  def __init__(self, client, model_registry):
 
510
  self.client = client
511
  self.model_registry = model_registry
512
-
 
 
 
513
  def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
514
  """Detecta el tipo de análisis necesario"""
515
  if isinstance(content, pd.DataFrame):
516
  columns = [col.lower() for col in content.columns]
517
-
518
  fitting_indicators = [
519
- 'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic',
520
  'parameter', 'param', 'coefficient', 'fit',
521
  'model', 'equation', 'goodness', 'chi_square',
522
  'p_value', 'confidence', 'standard_error', 'se'
523
  ]
524
-
525
  has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators)
526
-
527
  if has_fitting_results:
528
  return AnalysisType.FITTING_RESULTS
529
  else:
 
530
  return AnalysisType.DATA_FITTING
531
-
532
- # Ejemplo de prompt mejorado:
533
- prompt = f"""
534
- {lang_prefix}
535
- Based on the following data:
536
- {data.to_string()}
537
-
538
- Perform a comparative analysis of mathematical models. Include:
539
- 1. Best model for each variable type (biomass, substrate, product)
540
- 2. Comparison metrics (R², RMSE, AIC, BIC)
541
- 3. Justification for model selection
542
- 4. Implementation code with embedded data values
543
  """
544
-
545
- response = self.client.chat.completions.create(
546
- model="Qwen/Qwen3-14B",
547
- messages=[{"role": "user", "content": prompt}],
548
- max_tokens=4096
549
- )
550
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  def get_language_prompt_prefix(self, language: str) -> str:
552
  """Obtiene el prefijo del prompt según el idioma"""
553
  prefixes = {
554
- 'en': "Please respond in English. ",
555
- 'es': "Por favor responde en español. ",
556
- 'fr': "Veuillez répondre en français. ",
557
- 'de': "Bitte antworten Sie auf Deutsch. ",
558
- 'pt': "Por favor responda em português. "
559
  }
560
  return prefixes.get(language, prefixes['en'])
561
-
562
- def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
563
  language: str = "en", additional_specs: str = "") -> Dict:
564
  """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
565
-
566
- # Preparar resumen completo de los datos
567
  data_summary = f"""
568
- FITTING RESULTS DATA:
569
-
570
- Data structure:
 
571
  - Columns: {list(data.columns)}
572
  - Number of models evaluated: {len(data)}
573
-
574
- Complete data:
575
- {data.to_string()}
576
-
577
- Descriptive statistics:
578
- {data.describe().to_string()}
579
  """
580
-
581
- # Extraer valores para usar en el código
582
- data_dict = data.to_dict('records')
583
-
584
- # Obtener prefijo de idioma
585
  lang_prefix = self.get_language_prompt_prefix(language)
586
-
587
- # Agregar especificaciones adicionales del usuario si existen
588
  user_specs_section = f"""
589
-
590
- USER ADDITIONAL SPECIFICATIONS:
591
  {additional_specs}
592
-
593
- Please ensure to address these specific requirements in your analysis.
594
  """ if additional_specs else ""
595
-
596
- # Prompt mejorado con instrucciones específicas para cada nivel
 
597
  if detail_level == "detailed":
598
- prompt = f"""
599
- {lang_prefix}
600
-
601
- You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
602
-
 
603
  {user_specs_section}
604
-
605
- DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
606
-
607
- PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
608
-
609
- 1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
610
- - List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
611
- - For EACH experiment, identify:
612
- * Experimental conditions
613
- * Number of models tested
614
- * Variables measured (biomass, substrate, product)
615
-
616
- 2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
617
- For EACH EXPERIMENT separately:
618
- - Identify ALL fitted mathematical models BY NAME
619
- - Classify them: biomass growth, substrate consumption, product formation
620
- - Show the mathematical equation of each model
621
- - List parameter values obtained for that specific experiment
622
-
623
- 3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
624
- Create a section for EACH EXPERIMENT showing:
625
-
626
- **EXPERIMENT [Name/Condition]:**
627
-
628
- a) **BIOMASS MODELS** (if applicable):
629
- - Best model: [Name] with R²=[value], RMSE=[value]
630
- - Parameters: μmax=[value], Xmax=[value], etc.
631
- - Ranking of all biomass models tested
632
-
633
- b) **SUBSTRATE MODELS** (if applicable):
634
- - Best model: [Name] with R²=[value], RMSE=[value]
635
- - Parameters: Ks=[value], Yxs=[value], etc.
636
- - Ranking of all substrate models tested
637
-
638
- c) **PRODUCT MODELS** (if applicable):
639
- - Best model: [Name] with R²=[value], RMSE=[value]
640
- - Parameters: α=[value], β=[value], etc.
641
- - Ranking of all product models tested
642
-
643
- 4. **DETAILED COMPARATIVE TABLES**
644
-
645
- **Table 1: Summary by Experiment and Variable Type**
646
- | Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
647
- |------------|----------|------------|-------|------|----------------|---------|
648
- | Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
649
- | Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
650
- | Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
651
- | Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
652
-
653
- **Table 2: Complete Model Comparison Across All Experiments**
654
- | Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
655
-
656
- 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
657
- - Compare how parameters change between experiments
658
- - Identify trends (e.g., μmax increases with temperature)
659
- - Calculate average parameters and variability
660
- - Suggest optimal conditions based on parameters
661
-
662
- 6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
663
- For each experiment, explain:
664
- - What the parameter values mean biologically
665
- - Whether values are realistic for the conditions
666
- - Key differences between experiments
667
- - Critical control parameters identified
668
-
669
- 7. **OVERALL BEST MODELS DETERMINATION**
670
- - **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
671
- - **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
672
- - **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
673
-
674
- Justify with numerical evidence from multiple experiments.
675
-
676
- 8. **CONCLUSIONS AND RECOMMENDATIONS**
677
- - Which models are most robust across different conditions
678
- - Specific models to use for each experimental condition
679
- - Confidence intervals and prediction reliability
680
- - Scale-up recommendations with specific values
681
-
682
- Use Markdown format with clear structure. Include ALL numerical values from the data.
683
- Create clear sections for EACH EXPERIMENT.
684
  """
 
 
685
  else: # summarized
686
- prompt = f"""
687
- {lang_prefix}
688
-
689
- You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
690
-
 
691
  {user_specs_section}
692
-
693
- DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
694
-
695
  PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
696
-
697
- 1. **EXPERIMENTS OVERVIEW**
698
- - Total experiments analyzed: [number]
699
- - Conditions tested: [list]
700
- - Variables measured: biomass/substrate/product
701
-
702
- 2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
703
-
704
- 📊 **EXPERIMENT 1 [Name/Condition]:**
705
- - Biomass: [Model] (R²=[value])
706
- - Substrate: [Model] (R²=[value])
707
- - Product: [Model] (R²=[value])
708
-
709
- 📊 **EXPERIMENT 2 [Name/Condition]:**
710
- - Biomass: [Model] (R²=[value])
711
- - Substrate: [Model] (R²=[value])
712
- - Product: [Model] (R²=[value])
713
-
714
- [Continue for all experiments...]
715
-
716
- 3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
717
- 🏆 **Best Models Overall:**
718
- - **Biomass**: [Model] - Best in [X]/[Y] experiments
719
- - **Substrate**: [Model] - Average R²=[value]
720
- - **Product**: [Model] - Most consistent performance
721
-
722
- 4. **QUICK COMPARISON TABLE**
723
- | Experiment | Best Biomass | Best Substrate | Best Product | Overall R² |
724
- |------------|--------------|----------------|--------------|------------|
725
- | Exp1 | [Model] | [Model] | [Model] | [avg] |
726
- | Exp2 | [Model] | [Model] | [Model] | [avg] |
727
-
728
- 5. **KEY FINDINGS**
729
- - Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
730
- - Best conditions identified: [specific values]
731
- - Most robust models: [list with reasons]
732
-
733
- 6. **PRACTICAL RECOMMENDATIONS**
734
- - For biomass prediction: Use [Model]
735
- - For substrate monitoring: Use [Model]
736
- - For product estimation: Use [Model]
737
- - Critical parameters: [list with values]
738
-
739
- Keep it concise but include ALL experiments and model names with their key metrics.
740
  """
741
-
 
 
742
  try:
 
743
  response = self.client.chat.completions.create(
744
- model="Qwen/Qwen3-14B", # o el modelo específico
745
- messages=[{"role": "user", "content": prompt}],
746
- max_tokens=3000,
747
- temperature=0.7
 
748
  )
749
-
750
- # Análisis adicional para generar código con valores numéricos reales
751
- code_prompt = f"""
752
- {lang_prefix}
753
-
754
- Based on the analysis and this actual data:
755
- {data.to_string()}
756
-
757
- Generate Python code that:
758
-
759
- 1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
760
- 2. Implements analysis BY EXPERIMENT showing:
761
- - Best models for each experiment
762
- - Comparison across experiments
763
- - Parameter evolution between conditions
764
- 3. Includes visualization functions that:
765
- - Show results PER EXPERIMENT
766
- - Compare models across experiments
767
- - Display parameter trends
768
- 4. Shows the best model for biomass, substrate, and product separately
769
-
770
- The code must include:
771
- - Data loading with experiment identification
772
- - Model comparison by experiment and variable type
773
- - Visualization showing results per experiment
774
- - Overall best model selection with justification
775
- - Functions to predict using the best models for each category
776
-
777
- Make sure to include comments indicating which model won for each variable type and why.
778
-
779
- Format: Complete, executable Python code with actual data values embedded.
780
  """
781
-
782
- code_response = self.client.messages.create(
783
- model=claude_model,
784
- max_tokens=3000,
785
- messages=[{
786
- "role": "user",
787
- "content": code_prompt
788
- }]
 
789
  )
790
-
 
 
 
 
 
 
 
 
 
 
 
 
791
  return {
792
  "tipo": "Comparative Analysis of Mathematical Models",
793
- "analisis_completo": response.choices[0].message.content,
794
- "codigo_implementacion": code_response.content[0].text,
795
  "resumen_datos": {
796
  "n_modelos": len(data),
797
  "columnas": list(data.columns),
798
- "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
799
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
 
800
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
801
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
802
- "datos_completos": data_dict # Incluir todos los datos para el código
803
  }
804
  }
805
-
806
  except Exception as e:
 
807
  return {"error": str(e)}
808
 
809
- def process_files(files, claude_model: str, detail_level: str = "detailed",
810
  language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
811
  """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
 
 
 
 
 
812
  processor = FileProcessor()
813
- analyzer = AIAnalyzer(client, model_registry)
814
  results = []
815
  all_code = []
816
-
817
  for file in files:
818
  if file is None:
819
  continue
820
-
821
  file_name = file.name if hasattr(file, 'name') else "archivo"
822
  file_ext = Path(file_name).suffix.lower()
823
-
824
- with open(file.name, 'rb') as f:
825
- file_content = f.read()
826
-
827
- if file_ext in ['.csv', '.xlsx', '.xls']:
828
- if language == 'es':
829
- results.append(f"## 📊 Análisis de Resultados: {file_name}")
830
  else:
831
- results.append(f"## 📊 Results Analysis: {file_name}")
832
-
833
- if file_ext == '.csv':
834
- df = processor.read_csv(file_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835
  else:
836
- df = processor.read_excel(file_content)
837
-
838
- if df is not None:
839
- analysis_type = analyzer.detect_analysis_type(df)
840
-
841
- if analysis_type == AnalysisType.FITTING_RESULTS:
842
- result = analyzer.analyze_fitting_results(
843
- df, claude_model, detail_level, language, additional_specs
844
- )
845
-
846
- if language == 'es':
847
- results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
848
- else:
849
- results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS")
850
-
851
- results.append(result.get("analisis_completo", ""))
852
- if "codigo_implementacion" in result:
853
- all_code.append(result["codigo_implementacion"])
854
-
855
- results.append("\n---\n")
856
-
857
  analysis_text = "\n".join(results)
858
- code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
859
-
 
 
 
860
  return analysis_text, code_text
861
 
862
- def generate_implementation_code(analysis_results: str, data: pd.DataFrame) -> str:
863
- """
864
- Genera código de implementación con análisis por experimento y valores numéricos embebidos
865
- """
866
- # Extraer datos únicos por experimento
867
- experiments = data['Experiment'].unique()
868
-
869
- # Iniciar construcción del código
870
- code_parts = [
871
- "\"\"\"\nSistema de Análisis de Modelos Biotecnológicos\nGenerado automáticamente con valores reales de datos\n\"\"\"",
872
- "import numpy as np",
873
- "import pandas as pd",
874
- "import matplotlib.pyplot as plt",
875
- "from scipy.optimize import curve_fit",
876
- "from sklearn.metrics import r2_score, mean_squared_error",
877
- ""
878
- ]
879
-
880
- # Agregar datos embebidos
881
- code_parts.append("# Datos embebidos con resultados de ajuste de modelos")
882
- code_parts.append("model_data = {")
883
- for col in data.columns:
884
- code_parts.append(f" '{col}': {data[col].tolist()},")
885
- code_parts.append("}")
886
- code_parts.append("df = pd.DataFrame(model_data)")
887
- code_parts.append("")
888
-
889
- # Función para cargar datos
890
- code_parts.append("def load_data():")
891
- code_parts.append(" \"\"\"Carga los datos de ajuste de modelos\"\"\"")
892
- code_parts.append(" print('Datos cargados con éxito. Estructura:')")
893
- code_parts.append(" print(df.head())")
894
- code_parts.append(" return df")
895
- code_parts.append("")
896
-
897
- # Función para comparar modelos por experimento
898
- code_parts.append("def compare_models_by_experiment(df):")
899
- code_parts.append(" \"\"\"Compara modelos por experimento y tipo de variable\"\"\"")
900
- code_parts.append(" results = {}")
901
- code_parts.append(" for exp in df['Experiment'].unique():")
902
- code_parts.append(" exp_data = df[df['Experiment'] == exp]")
903
- code_parts.append(" best_models = {}")
904
- code_parts.append(" for var_type in ['biomass', 'substrate', 'product']:")
905
- code_parts.append(" # Filtrar por tipo de variable si existe")
906
- code_parts.append(" var_data = exp_data[exp_data['Type'].str.lower() == var_type] if 'Type' in exp_data.columns else exp_data")
907
- code_parts.append(" if not var_data.empty:")
908
- code_parts.append(" best_idx = var_data['R2'].idxmax()")
909
- code_parts.append(" best_model = var_data.iloc[best_idx]")
910
- code_parts.append(" best_models[var_type] = {")
911
- code_parts.append(" 'model': best_model['Model'],")
912
- code_parts.append(" 'r2': best_model['R2'],")
913
- code_parts.append(" 'rmse': best_model['RMSE'],")
914
- code_parts.append(" 'params': {k: v for k, v in best_model.items() if k not in ['Model', 'R2', 'RMSE', 'Experiment', 'Type']}")
915
- code_parts.append(" }")
916
- code_parts.append(" results[exp] = best_models")
917
- code_parts.append(" print('\\nModelos seleccionados por experimento:')")
918
- code_parts.append(" for exp, models in results.items():")
919
- code_parts.append(" print(f'\\n{exp}:')")
920
- code_parts.append(" for var, info in models.items():")
921
- code_parts.append(" print(f' {var}: {info[\"model\"]}' + (f' (R²={info[\"r2\"]:.3f})' if info[\"r2\"] else ''))")
922
- code_parts.append(" return results")
923
- code_parts.append("")
924
-
925
- # Función para visualizar resultados
926
- code_parts.append("def plot_results_by_experiment(df, results):")
927
- code_parts.append(" \"\"\"Visualiza los resultados por experimento\"\"\"")
928
- code_parts.append(" n_experiments = len(results)")
929
- code_parts.append(" fig, axes = plt.subplots(nrows=1, ncols=n_experiments, figsize=(6*n_experiments, 5))")
930
- code_parts.append(" if n_experiments == 1: axes = [axes]")
931
- code_parts.append(" for ax, (exp, models) in zip(axes, results.items()):")
932
- code_parts.append(" exp_data = df[df['Experiment'] == exp]")
933
- code_parts.append(" exp_data.plot(kind='bar', x='Model', y='R2', ax=ax, title=f'{exp} - R² Comparison', legend=False)")
934
- code_parts.append(" ax.set_ylabel('R² Score')")
935
- code_parts.append(" ax.axhline(y=0.95, color='r', linestyle='--', label='Threshold (0.95)')")
936
- code_parts.append(" ax.legend()")
937
- code_parts.append(" plt.tight_layout()")
938
- code_parts.append(" plt.show()")
939
- code_parts.append("")
940
-
941
- # Ejemplo de uso
942
- code_parts.append("if __name__ == '__main__':")
943
- code_parts.append(" df = load_data()")
944
- code_parts.append(" results = compare_models_by_experiment(df)")
945
- code_parts.append(" plot_results_by_experiment(df, results)")
946
-
947
- return "\n".join(code_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
948
 
949
  # Estado global para almacenar resultados
950
  class AppState:
@@ -966,41 +1301,52 @@ def export_report(export_format: str, language: str) -> Tuple[str, str]:
966
  'pt': "Nenhuma análise disponível para exportar"
967
  }
968
  return error_msg.get(language, error_msg['en']), ""
969
-
970
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
971
-
972
  try:
 
973
  if export_format == "DOCX":
974
  filename = f"biotech_analysis_report_{timestamp}.docx"
975
  ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
976
  else: # PDF
977
  filename = f"biotech_analysis_report_{timestamp}.pdf"
978
  ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
979
-
980
  success_msg = TRANSLATIONS[language]['report_exported']
981
  return f"{success_msg} {filename}", filename
982
  except Exception as e:
983
- return f"Error: {str(e)}", ""
 
 
984
 
985
  # Interfaz Gradio con soporte multiidioma y temas
986
  def create_interface():
987
  # Estado inicial
988
  current_theme = "light"
989
  current_language = "en"
990
-
991
  def update_interface_language(language):
992
  """Actualiza el idioma de la interfaz"""
993
  app_state.current_language = language
994
  t = TRANSLATIONS[language]
995
-
 
 
 
 
 
 
 
 
996
  return [
997
  gr.update(value=f"# {t['title']}"), # title_text
998
  gr.update(value=t['subtitle']), # subtitle_text
999
  gr.update(label=t['upload_files']), # files_input
1000
- gr.update(label=t['select_model']), # model_selector
1001
  gr.update(label=t['select_language']), # language_selector
1002
  gr.update(label=t['select_theme']), # theme_selector
1003
- gr.update(label=t['detail_level']), # detail_level
1004
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
1005
  gr.update(value=t['analyze_button']), # analyze_btn
1006
  gr.update(label=t['export_format']), # export_format
@@ -1009,18 +1355,30 @@ def create_interface():
1009
  gr.update(label=t['implementation_code']), # code_output
1010
  gr.update(label=t['data_format']) # data_format_accordion
1011
  ]
1012
-
1013
  def process_and_store(files, model, detail, language, additional_specs):
1014
- """Procesa archivos y almacena resultados"""
1015
  if not files:
1016
  error_msg = TRANSLATIONS[language]['error_no_files']
1017
- return error_msg, ""
1018
-
 
 
 
1019
  analysis, code = process_files(files, model, detail, language, additional_specs)
 
 
1020
  app_state.current_analysis = analysis
1021
  app_state.current_code = code
 
1022
  return analysis, code
1023
-
 
 
 
 
 
 
1024
  with gr.Blocks(theme=THEMES[current_theme]) as demo:
1025
  # Componentes de UI
1026
  with gr.Row():
@@ -1030,35 +1388,35 @@ def create_interface():
1030
  with gr.Column(scale=1):
1031
  with gr.Row():
1032
  language_selector = gr.Dropdown(
1033
- choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1034
  ("Deutsch", "de"), ("Português", "pt")],
1035
- value="en",
1036
  label=TRANSLATIONS[current_language]['select_language'],
1037
  interactive=True
1038
  )
1039
  theme_selector = gr.Dropdown(
1040
  choices=[("Light", "light"), ("Dark", "dark")],
1041
- value="light",
1042
  label=TRANSLATIONS[current_language]['select_theme'],
1043
  interactive=True
1044
  )
1045
-
1046
  with gr.Row():
1047
  with gr.Column(scale=1):
1048
  files_input = gr.File(
1049
  label=TRANSLATIONS[current_language]['upload_files'],
1050
  file_count="multiple",
1051
- file_types=[".csv", ".xlsx", ".xls", ".pdf", ".zip"],
1052
- type="filepath"
1053
  )
1054
 
1055
  model_selector = gr.Dropdown(
1056
- choices=list(CLAUDE_MODELS.keys()),
1057
- value="Qwen/Qwen3-14B", # ✅ Modelo válido
1058
  label=TRANSLATIONS[current_language]['select_model'],
1059
- info=f"{TRANSLATIONS[current_language]['best_for']}: {CLAUDE_MODELS['Qwen/Qwen3-14B']['best_for']}"
1060
  )
1061
-
1062
  detail_level = gr.Radio(
1063
  choices=[
1064
  (TRANSLATIONS[current_language]['detailed'], "detailed"),
@@ -1067,7 +1425,7 @@ def create_interface():
1067
  value="detailed",
1068
  label=TRANSLATIONS[current_language]['detail_level']
1069
  )
1070
-
1071
  # Nueva entrada para especificaciones adicionales
1072
  additional_specs = gr.Textbox(
1073
  label=TRANSLATIONS[current_language]['additional_specs'],
@@ -1076,83 +1434,84 @@ def create_interface():
1076
  max_lines=5,
1077
  interactive=True
1078
  )
1079
-
1080
  analyze_btn = gr.Button(
1081
  TRANSLATIONS[current_language]['analyze_button'],
1082
  variant="primary",
1083
  size="lg"
1084
  )
1085
-
1086
  gr.Markdown("---")
1087
-
1088
  export_format = gr.Radio(
1089
  choices=["DOCX", "PDF"],
1090
  value="PDF",
1091
  label=TRANSLATIONS[current_language]['export_format']
1092
  )
1093
-
1094
  export_btn = gr.Button(
1095
  TRANSLATIONS[current_language]['export_button'],
1096
  variant="secondary"
1097
  )
1098
-
1099
  export_status = gr.Textbox(
1100
  label="Export Status",
1101
  interactive=False,
1102
  visible=False
1103
  )
1104
-
1105
  export_file = gr.File(
1106
  label="Download Report",
1107
  visible=False
1108
  )
1109
-
1110
  with gr.Column(scale=2):
1111
  analysis_output = gr.Markdown(
1112
  label=TRANSLATIONS[current_language]['comparative_analysis']
1113
  )
1114
-
1115
  code_output = gr.Code(
1116
  label=TRANSLATIONS[current_language]['implementation_code'],
1117
  language="python",
1118
  interactive=True,
1119
  lines=20
1120
  )
1121
-
1122
  data_format_accordion = gr.Accordion(
1123
  label=TRANSLATIONS[current_language]['data_format'],
1124
  open=False
1125
  )
1126
-
1127
  with data_format_accordion:
1128
  gr.Markdown("""
1129
  ### Expected CSV/Excel structure:
1130
-
1131
  | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1132
  |------------|-------|------|-----|------|-----|-----|--------|-------|------------|
1133
  | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1134
  | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1135
  | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
1136
  | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
1137
-
1138
  **Important columns:**
1139
- - **Experiment**: Experimental condition identifier
1140
- - **Model**: Model name
1141
- - **Type**: Variable type (Biomass/Substrate/Product)
1142
- - **R2, RMSE**: Fit quality metrics
1143
- - **Parameters**: Model-specific parameters
1144
  """)
1145
-
1146
- # Definir ejemplos
1147
  examples = gr.Examples(
1148
  examples=[
1149
- [["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""],
1150
- [["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects"]
1151
  ],
1152
  inputs=[files_input, model_selector, detail_level, additional_specs],
1153
  label=TRANSLATIONS[current_language]['examples']
1154
  )
1155
-
 
1156
  # Eventos - Actualizado para incluir additional_specs
1157
  language_selector.change(
1158
  update_interface_language,
@@ -1160,55 +1519,95 @@ def create_interface():
1160
  outputs=[
1161
  title_text, subtitle_text, files_input, model_selector,
1162
  language_selector, theme_selector, detail_level, additional_specs,
1163
- analyze_btn, export_format, export_btn, analysis_output,
1164
  code_output, data_format_accordion
1165
  ]
1166
  )
1167
-
1168
  def change_theme(theme_name):
1169
  """Cambia el tema de la interfaz"""
1170
- # Nota: En Gradio actual, cambiar el tema dinámicamente requiere recargar
1171
- # Esta es una limitación conocida
1172
- return gr.Info("Theme will be applied on next page load")
1173
-
 
 
1174
  theme_selector.change(
1175
  change_theme,
1176
  inputs=[theme_selector],
1177
- outputs=[]
1178
  )
1179
-
1180
  analyze_btn.click(
1181
  fn=process_and_store,
1182
  inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1183
  outputs=[analysis_output, code_output]
1184
  )
1185
-
1186
  def handle_export(format, language):
1187
  status, file = export_report(format, language)
1188
- if file:
1189
- return gr.update(value=status, visible=True), gr.update(value=file, visible=True)
 
1190
  else:
1191
- return gr.update(value=status, visible=True), gr.update(visible=False)
1192
-
 
 
1193
  export_btn.click(
1194
  fn=handle_export,
1195
  inputs=[export_format, language_selector],
1196
  outputs=[export_status, export_file]
1197
  )
1198
-
1199
  return demo
1200
 
1201
  # Función principal
1202
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
1203
  return create_interface()
1204
 
1205
  # Para ejecución local
1206
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1207
  demo = main()
1208
  if demo:
1209
  demo.launch(
1210
  server_name="0.0.0.0",
1211
  server_port=7860,
1212
- debug=True,
1213
  share=False
1214
  )
 
1
  import gradio as gr
2
+ # import anthropic # Removed Anthropic import
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
 
11
  from typing import Dict, List, Tuple, Union, Optional
12
  import re
13
  from pathlib import Path
14
+ import openpyxl # Needed for reading .xlsx
15
  from dataclasses import dataclass
16
  from enum import Enum
17
+ # No need for docx, reportlab, matplotlib if only text/code output is used and not generating them internally
18
+ # import docx
19
+ # from docx.shared import Inches, Pt, RGBColor
20
+ # from docx.enum.text import WD_ALIGN_PARAGRAPH
21
+ # import reportlab
22
+ # from reportlab.lib import colors
23
+ # from reportlab.lib.pagesizes import letter, A4
24
+ # from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
25
+ # from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
26
+ # from reportlab.lib.units import inch
27
+ # from reportlab.pdfbase import pdfmetrics
28
+ # from reportlab.pdfbase.ttfonts import TTFont
29
+ # import matplotlib.pyplot as plt # Moved to the implementation code section
30
  from datetime import datetime
31
 
32
+ # Import OpenAI for Qwen access
33
+ from openai import OpenAI
34
+
35
  # Configuración para HuggingFace
36
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
37
 
38
+ # Initialize OpenAI client for Qwen
39
+ # Read API key from NEBIUS_API_KEY environment variable
40
+ NEBIUS_API_KEY = os.environ.get("NEBIUS_API_KEY")
41
+ if NEBIUS_API_KEY:
42
+ openai_client = OpenAI(
43
+ base_url="https://api.studio.nebius.com/v1/",
44
+ api_key=NEBIUS_API_KEY
45
+ )
46
+ print("OpenAI client initialized for Nebius Qwen endpoint.")
47
+ else:
48
+ openai_client = None
49
+ print("NEBIUS_API_KEY not found. OpenAI client not initialized.")
50
 
 
 
 
51
 
52
  # Sistema de traducción - Actualizado con nuevas entradas
53
  TRANSLATIONS = {
 
55
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
56
  'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
57
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
58
+ 'select_model': '🤖 Qwen Model', # Changed label
59
  'select_language': '🌐 Language',
60
  'select_theme': '🎨 Theme',
61
  'detail_level': '📋 Analysis detail level',
 
72
  'dark': 'Dark',
73
  'best_for': 'Best for',
74
  'loading': 'Loading...',
75
+ 'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets', # Changed message
76
  'error_no_files': 'Please upload fitting result files to analyze',
77
  'report_exported': 'Report exported successfully as',
78
  'specialized_in': '🎯 Specialized in:',
 
86
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
87
  'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
88
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
89
+ 'select_model': '🤖 Modelo Qwen', # Changed label
90
  'select_language': '🌐 Idioma',
91
  'select_theme': '🎨 Tema',
92
  'detail_level': '📋 Nivel de detalle del análisis',
 
103
  'dark': 'Oscuro',
104
  'best_for': 'Mejor para',
105
  'loading': 'Cargando...',
106
+ 'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space', # Changed message
107
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
108
  'report_exported': 'Reporte exportado exitosamente como',
109
  'specialized_in': '🎯 Especializado en:',
 
117
  'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
118
  'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
119
  'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
120
+ 'select_model': '🤖 Modèle Qwen', # Changed label
121
  'select_language': '🌐 Langue',
122
  'select_theme': '🎨 Thème',
123
  'detail_level': '📋 Niveau de détail',
 
134
  'dark': 'Sombre',
135
  'best_for': 'Meilleur pour',
136
  'loading': 'Chargement...',
137
+ 'error_no_api': 'Veuillez configurer NEBIUS_API_KEY', # Changed message
138
  'error_no_files': 'Veuillez télécharger des fichiers à analyser',
139
  'report_exported': 'Rapport exporté avec succès comme',
140
  'specialized_in': '🎯 Spécialisé dans:',
 
148
  'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
149
  'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
150
  'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
151
+ 'select_model': '🤖 Qwen Modell', # Changed label
152
  'select_language': '🌐 Sprache',
153
  'select_theme': '🎨 Thema',
154
  'detail_level': '📋 Detailgrad der Analyse',
 
165
  'dark': 'Dunkel',
166
  'best_for': 'Am besten für',
167
  'loading': 'Laden...',
168
+ 'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY', # Changed message
169
  'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
170
  'report_exported': 'Bericht erfolgreich exportiert als',
171
  'specialized_in': '🎯 Spezialisiert auf:',
 
179
  'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
180
  'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
181
  'upload_files': '📁 Carregar resultados (CSV/Excel)',
182
+ 'select_model': '🤖 Modelo Qwen', # Changed label
183
  'select_language': '🌐 Idioma',
184
  'select_theme': '🎨 Tema',
185
  'detail_level': '📋 Nível de detalhe',
 
196
  'dark': 'Escuro',
197
  'best_for': 'Melhor para',
198
  'loading': 'Carregando...',
199
+ 'error_no_api': 'Por favor configure NEBIUS_API_KEY', # Changed message
200
  'error_no_files': 'Por favor carregue arquivos para analisar',
201
  'report_exported': 'Relatório exportado com sucesso como',
202
  'specialized_in': '🎯 Especializado em:',
 
257
  def __init__(self):
258
  self.models = {}
259
  self._initialize_default_models()
260
+
261
  def register_model(self, model: MathematicalModel):
262
  """Registra un nuevo modelo matemático"""
263
  if model.category not in self.models:
264
  self.models[model.category] = {}
265
  self.models[model.category][model.name] = model
266
+
267
  def get_model(self, category: str, name: str) -> MathematicalModel:
268
  """Obtiene un modelo específico"""
269
  return self.models.get(category, {}).get(name)
270
+
271
  def get_all_models(self) -> Dict:
272
  """Retorna todos los modelos registrados"""
273
  return self.models
274
+
275
  def _initialize_default_models(self):
276
  """Inicializa los modelos por defecto"""
277
  # Modelos de crecimiento
 
284
  category="crecimiento_biomasa",
285
  biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
286
  ))
287
+
288
  self.register_model(MathematicalModel(
289
  name="Logístico",
290
  equation="dX/dt = μmax × X × (1 - X/Xmax)",
 
294
  category="crecimiento_biomasa",
295
  biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
296
  ))
297
+
298
  self.register_model(MathematicalModel(
299
  name="Gompertz",
300
  equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
 
308
  # Instancia global del registro
309
  model_registry = ModelRegistry()
310
 
311
+ # Available Qwen Models (Updated from Claude)
312
+ QWEN_MODELS = {
313
+ "Qwen/Qwen3-14B": { # Using the model specified by the user
314
  "name": "Qwen 3-14B",
315
+ "description": "A powerful Qwen model suitable for complex analysis.",
316
+ "max_tokens": 8192, # Example context window, adjust based on actual model specs
317
+ "best_for": "Detailed analysis and code generation"
318
+ },
319
+ # Add other Qwen models if available and desired, e.g.:
320
+ # "Qwen/Qwen3-7B": {
321
+ # "name": "Qwen 3-7B",
322
+ # "description": "Faster Qwen model",
323
+ # "max_tokens": 8192,
324
+ # "best_for": "Quicker analysis"
325
+ # }
326
  }
327
 
328
  class FileProcessor:
329
  """Clase para procesar diferentes tipos de archivos"""
330
+
331
  @staticmethod
332
  def extract_text_from_pdf(pdf_file) -> str:
333
  """Extrae texto de un archivo PDF"""
 
339
  return text
340
  except Exception as e:
341
  return f"Error reading PDF: {str(e)}"
342
+
343
  @staticmethod
344
  def read_csv(csv_file) -> pd.DataFrame:
345
  """Lee archivo CSV"""
346
  try:
347
  return pd.read_csv(io.BytesIO(csv_file))
348
  except Exception as e:
349
+ print(f"Error reading CSV: {e}")
350
  return None
351
+
352
  @staticmethod
353
  def read_excel(excel_file) -> pd.DataFrame:
354
  """Lee archivo Excel"""
355
  try:
356
  return pd.read_excel(io.BytesIO(excel_file))
357
  except Exception as e:
358
+ print(f"Error reading Excel: {e}")
359
  return None
360
+
361
  @staticmethod
362
  def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]:
363
  """Extrae archivos de un ZIP"""
 
374
 
375
  class ReportExporter:
376
  """Clase para exportar reportes a diferentes formatos"""
377
+ # Keep ReportExporter as is, as it processes the analysis text,
378
+ # not the AI interaction itself. It might need docx/reportlab imports
379
+ # re-added if generating those formats. Assuming they are needed for export_to_docx/pdf.
380
+ # Re-adding necessary imports for ReportExporter
381
+ from docx import Document
382
+ from docx.shared import Inches, Pt, RGBColor
383
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
384
+ from reportlab.lib import colors
385
+ from reportlab.lib.pagesizes import letter, A4
386
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
387
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
388
+ from reportlab.lib.units import inch
389
+ # pdfmetrics and TTFont might be needed for custom fonts if used, keep them for now.
390
+ from reportlab.pdfbase import pdfmetrics
391
+ from reportlab.pdfbase.ttfonts import TTFont
392
+
393
  @staticmethod
394
  def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
395
  """Exporta el contenido a un archivo DOCX"""
396
+ doc = ReportExporter.Document()
397
+
398
  # Configurar estilos
399
  title_style = doc.styles['Title']
400
+ title_style.font.size = ReportExporter.Pt(24)
401
  title_style.font.bold = True
402
+
403
  heading_style = doc.styles['Heading 1']
404
+ heading_style.font.size = ReportExporter.Pt(18)
405
  heading_style.font.bold = True
406
+
407
  # Título
408
  title_text = {
409
  'en': 'Comparative Analysis Report - Biotechnological Models',
 
412
  'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
413
  'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
414
  }
415
+
416
  doc.add_heading(title_text.get(language, title_text['en']), 0)
417
+
418
  # Fecha
419
  date_text = {
420
  'en': 'Generated on',
 
425
  }
426
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
427
  doc.add_paragraph()
428
+
429
  # Procesar contenido
430
  lines = content.split('\n')
431
  current_paragraph = None
432
+
433
  for line in lines:
434
  line = line.strip()
435
+
436
  if line.startswith('###'):
437
  doc.add_heading(line.replace('###', '').strip(), level=2)
438
  elif line.startswith('##'):
 
456
  elif line:
457
  # Párrafo normal
458
  doc.add_paragraph(line)
459
+
460
  # Guardar documento
461
  doc.save(filename)
462
  return filename
463
+
464
  @staticmethod
465
  def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
466
  """Exporta el contenido a un archivo PDF"""
467
  # Crear documento PDF
468
+ doc = ReportExporter.SimpleDocTemplate(filename, pagesize=ReportExporter.letter)
469
  story = []
470
+ styles = ReportExporter.getSampleStyleSheet()
471
+
472
  # Estilos personalizados
473
+ title_style = ReportExporter.ParagraphStyle(
474
  'CustomTitle',
475
  parent=styles['Title'],
476
  fontSize=24,
477
+ textColor=ReportExporter.colors.HexColor('#1f4788'),
478
  spaceAfter=30
479
  )
480
+
481
+ heading_style = ReportExporter.ParagraphStyle(
482
  'CustomHeading',
483
  parent=styles['Heading1'],
484
  fontSize=16,
485
+ textColor=ReportExporter.colors.HexColor('#2e5090'),
486
  spaceAfter=12
487
  )
488
+
489
  # Título
490
  title_text = {
491
  'en': 'Comparative Analysis Report - Biotechnological Models',
 
494
  'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
495
  'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
496
  }
497
+
498
+ story.append(ReportExporter.Paragraph(title_text.get(language, title_text['en']), title_style))
499
+
500
  # Fecha
501
  date_text = {
502
  'en': 'Generated on',
 
505
  'de': 'Erstellt am',
506
  'pt': 'Gerado em'
507
  }
508
+ story.append(ReportExporter.Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
509
+ story.append(ReportExporter.Spacer(1, 0.5*ReportExporter.inch))
510
+
511
  # Procesar contenido
512
  lines = content.split('\n')
513
+
514
  for line in lines:
515
  line = line.strip()
516
+
517
  if not line:
518
+ story.append(ReportExporter.Spacer(1, 0.2*ReportExporter.inch))
519
  elif line.startswith('###'):
520
+ story.append(ReportExporter.Paragraph(line.replace('###', '').strip(), styles['Heading3']))
521
  elif line.startswith('##'):
522
+ story.append(ReportExporter.Paragraph(line.replace('##', '').strip(), styles['Heading2']))
523
  elif line.startswith('#'):
524
+ story.append(ReportExporter.Paragraph(line.replace('#', '').strip(), heading_style))
525
  elif line.startswith('**') and line.endswith('**'):
526
  text = line.replace('**', '')
527
+ story.append(ReportExporter.Paragraph(f"<b>{text}</b>", styles['Normal']))
528
  elif line.startswith('- ') or line.startswith('* '):
529
+ story.append(ReportExporter.Paragraph(f"• {line[2:]}", styles['Normal']))
530
  elif line == '---' or line.startswith('==='):
531
+ story.append(ReportExporter.Spacer(1, 0.3*ReportExporter.inch))
532
+ story.append(ReportExporter.Paragraph("_" * 70, styles['Normal']))
533
+ story.append(ReportExporter.Spacer(1, 0.3*ReportExporter.inch))
534
  else:
535
  # Limpiar caracteres especiales para PDF
536
  clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
537
+ story.append(ReportExporter.Paragraph(clean_line, styles['Normal']))
538
+
539
  # Construir PDF
540
  doc.build(story)
541
  return filename
542
 
543
+
544
  class AIAnalyzer:
545
+ """Clase para análisis con IA (usando OpenAI for Qwen)"""
546
+
547
  def __init__(self, client, model_registry):
548
+ # client is now an OpenAI client instance
549
  self.client = client
550
  self.model_registry = model_registry
551
+ # Qwen specific parameters from user example
552
+ self.temperature = 0.6
553
+ self.top_p = 0.95
554
+
555
  def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
556
  """Detecta el tipo de análisis necesario"""
557
  if isinstance(content, pd.DataFrame):
558
  columns = [col.lower() for col in content.columns]
559
+
560
  fitting_indicators = [
561
+ 'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic',
562
  'parameter', 'param', 'coefficient', 'fit',
563
  'model', 'equation', 'goodness', 'chi_square',
564
  'p_value', 'confidence', 'standard_error', 'se'
565
  ]
566
+
567
  has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators)
568
+
569
  if has_fitting_results:
570
  return AnalysisType.FITTING_RESULTS
571
  else:
572
+ # Assuming any dataframe without clear fitting metrics is raw data
573
  return AnalysisType.DATA_FITTING
574
+
575
+ # Use a quick Qwen model for type detection
576
+ # Using the same model as the main analysis for simplicity, could use a smaller one if available
577
+ model_for_detection = list(QWEN_MODELS.keys())[0] # Use the first available Qwen model
578
+
579
+ prompt = """
580
+ Analyze this content and determine if it is:
581
+ 1. A scientific article describing biotechnological mathematical models
582
+ 2. Experimental data for parameter fitting
583
+ 3. Model fitting results (with parameters, R², RMSE, etc.)
584
+
585
+ Reply only with: "MODEL", "DATA" or "RESULTS". Be very concise.
586
  """
587
+
588
+ try:
589
+ response = self.client.chat.completions.create(
590
+ model=model_for_detection,
591
+ temperature=0.1, # Lower temp for deterministic output
592
+ max_tokens=10,
593
+ messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
594
+ )
595
+
596
+ # Extract text from OpenAI response
597
+ result = response.choices[0].message.content.strip().upper()
598
+
599
+ if "MODEL" in result:
600
+ return AnalysisType.MATHEMATICAL_MODEL
601
+ elif "RESULTS" in result:
602
+ return AnalysisType.FITTING_RESULTS
603
+ elif "DATA" in result:
604
+ return AnalysisType.DATA_FITTING
605
+ else:
606
+ return AnalysisType.UNKNOWN
607
+
608
+ except Exception as e:
609
+ print(f"Error during analysis type detection: {e}")
610
+ return AnalysisType.UNKNOWN
611
+
612
  def get_language_prompt_prefix(self, language: str) -> str:
613
  """Obtiene el prefijo del prompt según el idioma"""
614
  prefixes = {
615
+ 'en': "Please respond exclusively in English. ",
616
+ 'es': "Por favor responde exclusivamente en español. ",
617
+ 'fr': "Veuillez répondre exclusivement en français. ",
618
+ 'de': "Bitte antworten Sie ausschließlich auf Deutsch. ",
619
+ 'pt': "Por favor responda exclusivamente em português. "
620
  }
621
  return prefixes.get(language, prefixes['en'])
622
+
623
+ def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str = "detailed",
624
  language: str = "en", additional_specs: str = "") -> Dict:
625
  """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
626
+
627
+ # Prepare comprehensive data summary for the model
628
  data_summary = f"""
629
+ FITTING RESULTS DATA (as JSON records for parsing):
630
+ {json.dumps(data.to_dict('records'), indent=2)}
631
+
632
+ DATA OVERVIEW:
633
  - Columns: {list(data.columns)}
634
  - Number of models evaluated: {len(data)}
 
 
 
 
 
 
635
  """
636
+
637
+ # Get language prefix
 
 
 
638
  lang_prefix = self.get_language_prompt_prefix(language)
639
+
640
+ # Add user additional specifications if they exist
641
  user_specs_section = f"""
642
+
643
+ USER ADDITIONAL SPECIFICATIONS / FOCUS AREAS:
644
  {additional_specs}
645
+
646
+ Please ensure your analysis incorporates these specific requirements and focus areas.
647
  """ if additional_specs else ""
648
+
649
+ # Prompt enhanced with specific instructions for each level
650
+ # Added system message for better role adherence
651
  if detail_level == "detailed":
652
+ messages = [
653
+ {"role": "system", "content": f"{lang_prefix} You are an expert in biotechnology and mathematical modeling, specializing in the comparative analysis of model fitting results. Your task is to provide a comprehensive, structured analysis of the provided data, focusing on the comparative performance of models across different experimental conditions. Include specific numerical values from the data in your analysis. Use Markdown formatting."}
654
+ ]
655
+ prompt_content = f"""
656
+ Analyze these kinetic/biotechnological model fitting results.
657
+
658
  {user_specs_section}
659
+
660
+ DETAIL LEVEL: DETAILED - Provide comprehensive analysis structured BY EXPERIMENT/CONDITION.
661
+
662
+ PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT/CONDITION:
663
+
664
+ 1. **IDENTIFY ALL EXPERIMENTS/CONDITIONS:** List and describe each unique experimental condition present in the data (e.g., pH levels, temperatures, media compositions).
665
+ 2. **MODELS TESTED PER EXPERIMENT:** For EACH experiment, list ALL fitted mathematical models tested. Classify them (Biomass, Substrate, Product, etc.) if a 'Type' column exists.
666
+ 3. **DETAILED COMPARISON PER EXPERIMENT:** Create a dedicated section for *each* experiment. Within each experiment section:
667
+ * Report the experimental condition.
668
+ * For each Variable Type (Biomass, Substrate, Product) analyzed in this experiment (if applicable):
669
+ * Identify the **Best Model** based on R² (primary metric) and RMSE (secondary metric). State its name and the exact R² and RMSE values for this experiment.
670
+ * List the values of the main parameters obtained for the best model in this specific experiment.
671
+ * Provide a ranked list of *all* models tested for this variable type in this experiment, showing Model Name, R², and RMSE.
672
+ 4. **COMPARATIVE TABLES (Across Experiments):**
673
+ * Create a summary table showing the Best Model, R², and RMSE for EACH Variable Type within EACH Experiment.
674
+ * Create a table summarizing the performance (Average R², Average RMSE, Number of experiments tested) of key models across *all* experiments where they were applied.
675
+ 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS:** Analyze how the key parameters (e.g., μmax, Ks, Xmax) for frequently used or important models change from one experimental condition to another. Identify trends or sensitivities to experimental conditions.
676
+ 6. **BIOLOGICAL INTERPRETATION & EXPERIMENTAL INSIGHTS:** For each experiment, provide a brief biological interpretation based on the fitting results and parameter values. Discuss whether the parameter values are biologically reasonable for the given conditions. Highlight key differences or findings between experiments.
677
+ 7. **OVERALL BEST MODELS:** Based on performance across *all* experiments, identify the overall best model(s) for Biomass, Substrate, and Product (if applicable). Justify your selection with average metrics and consistency across conditions, citing numerical evidence.
678
+ 8. **CONCLUSIONS AND RECOMMENDATIONS:** Summarize the main findings. Recommend which models are most robust or suitable for different types of analysis or specific experimental conditions. Discuss practical implications, confidence levels, and potential considerations for scale-up or further research based on the analysis.
679
+
680
+ Use clear Markdown headings (`#`, `##`, `###`), bold text (`**text**`), and lists (`- ` or `1. `). Include ALL relevant numerical values from the provided data.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  """
682
+ messages.append({"role": "user", "content": f"{prompt_content}\n\n{data_summary}"})
683
+
684
  else: # summarized
685
+ messages = [
686
+ {"role": "system", "content": f"{lang_prefix} You are an expert in biotechnology, providing a concise comparative analysis of mathematical model fitting results across different experiments. Focus on identifying the best models per experiment and overall winners. Include essential numerical information. Use Markdown formatting."}
687
+ ]
688
+ prompt_content = f"""
689
+ Analyze these kinetic/biotechnological model fitting results CONCISELY but completely, structured BY EXPERIMENT/CONDITION.
690
+
691
  {user_specs_section}
692
+
693
+ DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information.
694
+
695
  PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
696
+
697
+ 1. **EXPERIMENTS OVERVIEW:** Briefly state the total number of experiments/conditions analyzed and list the types of experimental conditions covered. Mention the variables measured (Biomass, Substrate, Product).
698
+ 2. **BEST MODELS QUICK SUMMARY BY EXPERIMENT:** For *each* experiment/condition, clearly state:
699
+ * The experimental condition name.
700
+ * The Best Model found for Biomass (with its R² value).
701
+ * The Best Model found for Substrate (with its R² value).
702
+ * The Best Model found for Product (with its R² value).
703
+ (Only include variable types present in the experiment).
704
+ 3. **OVERALL BEST MODELS ACROSS ALL EXPERIMENTS:** Identify the single best model overall for Biomass, Substrate, and Product based on average performance or frequency of being the best model across experiments. State their average R² (if applicable) and mention how many experiments they were tested in.
705
+ 4. **SUMMARY TABLE:** Provide a concise table summarizing the Best Model and its R²/RMSE for each Experiment and Variable Type combination.
706
+ 5. **KEY FINDINGS & PARAMETER RANGES:** Highlight the most important findings. Briefly mention the observed range or average values for key parameters (e.g., μmax, Ks) across the experiments.
707
+ 6. **PRACTICAL RECOMMENDATIONS:** Offer concise recommendations on which models are most suitable for which variables or conditions based on the analysis.
708
+
709
+ Keep it concise but include ALL experiments, model names, and their key R² or RMSE metrics. Use Markdown.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  """
711
+ messages.append({"role": "user", "content": f"{prompt_content}\n\n{data_summary}"})
712
+
713
+
714
  try:
715
+ # Main analysis call
716
  response = self.client.chat.completions.create(
717
+ model=qwen_model,
718
+ messages=messages,
719
+ temperature=self.temperature, # Use defined temperature
720
+ top_p=self.top_p, # Use defined top_p
721
+ max_tokens=QWEN_MODELS.get(qwen_model, {}).get("max_tokens", 4000) # Use model max tokens, default 4000
722
  )
723
+
724
+ # Extract analysis text
725
+ analysis_text = response.choices[0].message.content
726
+
727
+ # Generate implementation code - This prompt is adjusted to match the analysis structure
728
+ # Also using a system message for the code generation role
729
+ code_messages = [
730
+ {"role": "system", "content": f"{lang_prefix} You are an expert Python programmer specializing in biotechnological modeling and data analysis. Your task is to generate executable Python code based on the provided data and analysis. The code should implement the comparison of models by experiment and variable type, identify best models, and include basic plotting functions. Ensure actual numerical values from the data are used where appropriate for demonstration or analysis within the code."}
731
+ ]
732
+ code_prompt_content = f"""
733
+ Generate complete, executable Python code to analyze and visualize the biotechnological model fitting results provided earlier.
734
+
735
+ Use the actual data, which looks like this (as JSON records):
736
+ {json.dumps(data.to_dict('records'), indent=2)}
737
+
738
+ The code should:
739
+ 1. Load this specific dataset.
740
+ 2. Implement a class or functions to analyze model fitting results.
741
+ 3. Perform analysis BY EXPERIMENT AND VARIABLE TYPE (Biomass, Substrate, Product), identifying the best model for each combination based on R² and RMSE.
742
+ 4. Identify overall best models across all experiments for each variable type.
743
+ 5. Include functions to generate visualizations comparing model performance (e.g., R² values) across experiments and variable types.
744
+ 6. Include comments explaining the logic and findings, especially which model was best for which category/experiment and why.
745
+ 7. Provide example usage of the code with the embedded data.
746
+
747
+ Make the code robust and well-commented. Focus on clear data handling, analysis, and visualization.
 
 
 
 
 
 
748
  """
749
+ code_messages.append({"role": "user", "content": code_prompt_content})
750
+
751
+
752
+ code_response = self.client.chat.completions.create(
753
+ model=qwen_model, # Use the same Qwen model for consistency
754
+ messages=code_messages,
755
+ temperature=0.5, # Slightly lower temp for more structured code
756
+ top_p=0.9,
757
+ max_tokens=3000 # Code might be shorter than analysis
758
  )
759
+
760
+ # Extract code text, handle potential code block markdown
761
+ code_text_raw = code_response.choices[0].message.content
762
+ # Remove markdown code block fences if present
763
+ if code_text_raw.startswith("```python"):
764
+ code_text = code_text_raw.strip().replace("```python\n", "", 1).strip("```")
765
+ elif code_text_raw.startswith("```"):
766
+ # Handle generic code blocks
767
+ code_text = code_text_raw.strip().replace("```\n", "", 1).strip("```")
768
+ else:
769
+ code_text = code_text_raw
770
+
771
+
772
  return {
773
  "tipo": "Comparative Analysis of Mathematical Models",
774
+ "analisis_completo": analysis_text,
775
+ "codigo_implementacion": code_text,
776
  "resumen_datos": {
777
  "n_modelos": len(data),
778
  "columnas": list(data.columns),
779
+ "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
780
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
781
+ # Safely get best R2 and model name if columns exist
782
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
783
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
784
+ "datos_completos": data.to_dict('records') # Include all data for code
785
  }
786
  }
787
+
788
  except Exception as e:
789
+ print(f"Error during AI analysis: {e}")
790
  return {"error": str(e)}
791
 
792
+ def process_files(files, qwen_model: str, detail_level: str = "detailed",
793
  language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
794
  """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
795
+ # Check if the OpenAI client was successfully initialized
796
+ if openai_client is None:
797
+ error_msg = TRANSLATIONS.get(language, TRANSLATIONS['en'])['error_no_api']
798
+ return error_msg, generate_implementation_code(error_msg) # Return error message and fallback code
799
+
800
  processor = FileProcessor()
801
+ analyzer = AIAnalyzer(openai_client, model_registry) # Pass the initialized openai_client
802
  results = []
803
  all_code = []
804
+
805
  for file in files:
806
  if file is None:
807
  continue
808
+
809
  file_name = file.name if hasattr(file, 'name') else "archivo"
810
  file_ext = Path(file_name).suffix.lower()
811
+
812
+ try:
813
+ # Use tempfile to get the actual file path provided by Gradio
814
+ if isinstance(file, str):
815
+ # Gradio >= 4.0 might pass strings (file paths)
816
+ file_path = file
 
817
  else:
818
+ # Handle older Gradio or other file-like objects if necessary
819
+ # For now, assume Gradio provides path string
820
+ raise TypeError("Unexpected file input type")
821
+
822
+ file_content = None # Process using path or read bytes as needed
823
+ df = None
824
+
825
+ if file_ext in ['.csv', '.xlsx', '.xls']:
826
+ if language == 'es':
827
+ results.append(f"## 📊 Análisis de Resultados: {file_name}")
828
+ else:
829
+ results.append(f"## 📊 Results Analysis: {file_name}")
830
+
831
+ # Read dataframe directly from path
832
+ if file_ext == '.csv':
833
+ df = pd.read_csv(file_path)
834
+ else:
835
+ df = pd.read_excel(file_path)
836
+
837
+ if df is not None and not df.empty:
838
+ analysis_type = analyzer.detect_analysis_type(df)
839
+
840
+ if analysis_type == AnalysisType.FITTING_RESULTS:
841
+ result = analyzer.analyze_fitting_results(
842
+ df, qwen_model, detail_level, language, additional_specs
843
+ )
844
+
845
+ if "error" in result:
846
+ results.append(f"Error during analysis of {file_name}: {result['error']}")
847
+ else:
848
+ if language == 'es':
849
+ results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
850
+ else:
851
+ results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS")
852
+ results.append(result.get("analisis_completo", ""))
853
+ if "codigo_implementacion" in result:
854
+ all_code.append(result["codigo_implementacion"])
855
+ elif analysis_type == AnalysisType.DATA_FITTING:
856
+ # Handle raw data - Could add a prompt here for Qwen
857
+ if language == 'es':
858
+ results.append(f"### 📈 Datos Experimentales Detectados: {file_name}")
859
+ results.append("Se detectaron datos experimentales. Esta herramienta se especializa en *resultados de ajuste*.")
860
+ else:
861
+ results.append(f"### 📈 Experimental Data Detected: {file_name}")
862
+ results.append("Experimental data was detected. This tool specializes in *fitting results*.")
863
+ # Optionally call Qwen to suggest fitting approach
864
+ elif analysis_type == AnalysisType.MATHEMATICAL_MODEL:
865
+ if language == 'es':
866
+ results.append(f"### 🔬 Descripción de Modelo Detectada: {file_name}")
867
+ results.append("Se detectó una descripción de modelo matemático. Esta herramienta se especializa en análisis comparativos de *resultados de ajuste*.")
868
+ else:
869
+ results.append(f"### 🔬 Mathematical Model Description Detected: {file_name}")
870
+ results.append("A mathematical model description was detected. This tool specializes in comparative analysis of *fitting results*.")
871
+ else: # Unknown
872
+ if language == 'es':
873
+ results.append(f"### 🤔 Tipo de Contenido Desconocido: {file_name}")
874
+ results.append("El tipo de contenido en este archivo no pudo ser determinado. Por favor, sube archivos con resultados de ajuste de modelos (con columnas como 'R2', 'RMSE', 'Model', etc.).")
875
+ else:
876
+ results.append(f"### 🤔 Unknown Content Type: {file_name}")
877
+ results.append("The type of content in this file could not be determined. Please upload files containing model fitting results (with columns like 'R2', 'RMSE', 'Model', etc.).")
878
+
879
+ else:
880
+ if language == 'es':
881
+ results.append(f"### ⚠️ Error al leer o archivo vacío: {file_name}")
882
+ else:
883
+ results.append(f"### ⚠️ Error reading or empty file: {file_name}")
884
+
885
+ # Add handling for PDF, ZIP if necessary, though the core tool is for CSV/Excel
886
+ # elif file_ext == '.pdf':
887
+ # # Process PDF text if needed for model description analysis
888
+ # text = processor.extract_text_from_pdf(file_content)
889
+ # # Could call Qwen to analyze text here if needed
890
+ # results.append(f"Processed PDF {file_name}. Text extracted.")
891
+ # elif file_ext == '.zip':
892
+ # extracted_files = processor.extract_from_zip(file_content)
893
+ # # Process extracted files recursively or as needed
894
+ # results.append(f"Processed ZIP {file_name}. Found {len(extracted_files)} files.")
895
+
896
  else:
897
+ if language == 'es':
898
+ results.append(f"### ⚠️ Formato de archivo no soportado: {file_name}")
899
+ else:
900
+ results.append(f"### ⚠️ Unsupported file format: {file_name}")
901
+
902
+ except Exception as e:
903
+ # Catch any unexpected errors during file processing
904
+ if language == 'es':
905
+ results.append(f"### ❌ Error inesperado al procesar {file_name}: {str(e)}")
906
+ else:
907
+ results.append(f"### Unexpected error processing {file_name}: {str(e)}")
908
+
909
+
910
+ results.append("\n---\n") # Separator between files
911
+
 
 
 
 
 
 
912
  analysis_text = "\n".join(results)
913
+ # Combine all generated code snippets
914
+ # The fallback code generator is less critical now that the API generates code
915
+ # But keep it as a safeguard or example if API fails
916
+ code_text = "\n\n# === Combined Implementation Code ===\n\n" + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
917
+
918
  return analysis_text, code_text
919
 
920
+
921
+ def generate_implementation_code(analysis_results: str) -> str:
922
+ """Generates a default or fallback implementation code structure."""
923
+ # This function is less critical if the AI generates code, but kept as a fallback.
924
+ # The generated code structure from the AI is preferred.
925
+ # This fallback provides a basic template if AI fails to produce code.
926
+
927
+ code = """
928
+ # Fallback Implementation Code (Generated if AI code generation fails)
929
+ # This code provides a basic structure for analyzing fitting results.
930
+ # Replace placeholder data with your actual results dataframe.
931
+
932
+ import numpy as np
933
+ import pandas as pd
934
+ # Matplotlib and Seaborn imports moved here as they are for the generated code
935
+ import matplotlib.pyplot as plt
936
+ import seaborn as sns
937
+
938
+ # Visualization configuration
939
+ plt.style.use('seaborn-v0_8-darkgrid')
940
+ sns.set_palette("husl")
941
+
942
+ class ExperimentalModelAnalyzer:
943
+ \"\"\"
944
+ Basic class for comparative analysis of biotechnological models across multiple experiments.
945
+ This is a fallback implementation.
946
+ \"\"\"
947
+
948
+ def __init__(self, results_df: pd.DataFrame = None):
949
+ self.results_df = results_df
950
+ if self.results_df is not None and 'Experiment' not in self.results_df.columns:
951
+ # Add a default experiment if none exists
952
+ self.results_df['Experiment'] = 'Default_Experiment'
953
+
954
+ def load_results(self, file_path: str = None, data_dict: dict = None):
955
+ \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
956
+ if data_dict:
957
+ self.results_df = pd.DataFrame(data_dict)
958
+ elif file_path:
959
+ if file_path.endswith('.csv'):
960
+ self.results_df = pd.read_csv(file_path)
961
+ else:
962
+ self.results_df = pd.read_excel(file_path)
963
+
964
+ if self.results_df is not None and 'Experiment' not in self.results_df.columns:
965
+ self.results_df['Experiment'] = 'Default_Experiment'
966
+
967
+ if self.results_df is not None:
968
+ print(f" Data loaded: {len(self.results_df)} models")
969
+ print(f"📊 Available columns: {list(self.results_df.columns)}")
970
+ if 'Experiment' in self.results_df.columns:
971
+ print(f"🧪 Experiments found: {self.results_df['Experiment'].unique()}")
972
+
973
+ def analyze_by_experiment(self,
974
+ experiment_col: str = 'Experiment',
975
+ model_col: str = 'Model',
976
+ type_col: str = 'Type',
977
+ r2_col: str = 'R2',
978
+ rmse_col: str = 'RMSE') -> Dict:
979
+ \"\"\"
980
+ Analyze models by experiment and variable type.
981
+ Identifies best models for biomass, substrate, and product in each experiment.
982
+ \"\"\"
983
+ if self.results_df is None or self.results_df.empty:
984
+ print("⚠️ No data loaded for analysis.")
985
+ return {}
986
+
987
+ results_by_exp = {}
988
+ experiments = self.results_df[experiment_col].unique()
989
+
990
+ print("\\n" + "="*80)
991
+ print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
992
+ print("="*80)
993
+
994
+ for exp in experiments:
995
+ print(f"\\n🧪 EXPERIMENT: {exp}")
996
+ print("-"*50)
997
+
998
+ exp_data = self.results_df[self.results_df[experiment_col] == exp].copy() # Use copy to avoid SettingWithCopyWarning
999
+ results_by_exp[exp] = {}
1000
+
1001
+ var_types = exp_data[type_col].unique() if type_col in exp_data.columns else ['All_Types']
1002
+
1003
+ for var_type in var_types:
1004
+ if type_col in exp_data.columns:
1005
+ var_data = exp_data[exp_data[type_col] == var_type]
1006
+ else:
1007
+ var_data = exp_data # Analyze all together if no type column
1008
+
1009
+ if not var_data.empty and r2_col in var_data.columns:
1010
+ # Find best model for this variable type (or all) based on R2
1011
+ best_idx = var_data[r2_col].idxmax()
1012
+ best_model = var_data.loc[best_idx]
1013
+
1014
+ results_by_exp[exp][var_type] = {
1015
+ 'best_model': best_model.get(model_col, 'N/A'),
1016
+ 'r2': best_model.get(r2_col, np.nan),
1017
+ 'rmse': best_model.get(rmse_col, np.nan),
1018
+ 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records') if {model_col, r2_col, rmse_col}.issubset(var_data.columns) else var_data.to_dict('records')
1019
+ }
1020
+
1021
+ print(f"\\n 📈 {var_type.upper()}:")
1022
+ print(f" Best Model: {results_by_exp[exp][var_type]['best_model']}")
1023
+ print(f" R² = {results_by_exp[exp][var_type]['r2']:.4f}" if not np.isnan(results_by_exp[exp][var_type]['r2']) else "R² = N/A")
1024
+ print(f" RMSE = {results_by_exp[exp][var_type]['rmse']:.4f}" if not np.isnan(results_by_exp[exp][var_type]['rmse']) else "RMSE = N/A")
1025
+
1026
+ # Show all models for this variable
1027
+ if 'all_models' in results_by_exp[exp][var_type]:
1028
+ print(f"\\n All {var_type} models tested:")
1029
+ for model_entry in results_by_exp[exp][var_type]['all_models']:
1030
+ r2_val = model_entry.get(r2_col, np.nan)
1031
+ rmse_val = model_entry.get(rmse_col, np.nan)
1032
+ model_name = model_entry.get(model_col, 'N/A')
1033
+ print(f" - {model_name}: R²={r2_val:.4f}" if not np.isnan(r2_val) else f" - {model_name}: R²=N/A", end="")
1034
+ print(f", RMSE={rmse_val:.4f}" if not np.isnan(rmse_val) else ", RMSE=N/A")
1035
+ elif not var_data.empty:
1036
+ print(f"\\n 📈 {var_type.upper()}:")
1037
+ print(f" No '{r2_col}' column found for comparison.")
1038
+ else:
1039
+ print(f"\\n 📈 {var_type.upper()}:")
1040
+ print(f" No data found for this variable type.")
1041
+
1042
+
1043
+ self.best_models_by_experiment = results_by_exp
1044
+ return results_by_exp
1045
+
1046
+ def _determine_overall_best_models(self):
1047
+ \"\"\"Determine the best models across all experiments\"\"\"
1048
+ if not hasattr(self, 'best_models_by_experiment') or not self.best_models_by_experiment:
1049
+ print("⚠️ No experimental analysis available to determine overall models.")
1050
+ return {}
1051
+
1052
+ print("\\n" + "="*80)
1053
+ print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1054
+ print("="*80)
1055
+
1056
+ model_performance = {}
1057
+
1058
+ for exp, exp_results in self.best_models_by_experiment.items():
1059
+ for var_type, var_results in exp_results.items():
1060
+ if var_type not in model_performance:
1061
+ model_performance[var_type] = {}
1062
+
1063
+ # Use the list of all models analyzed for this type in this experiment
1064
+ models_in_exp_type = var_results.get('all_models', [])
1065
+
1066
+ for model_data in models_in_exp_type:
1067
+ model_name = model_data.get('Model', 'Unknown Model') # Use .get for safety
1068
+ r2_val = model_data.get('R2')
1069
+ rmse_val = model_data.get('RMSE')
1070
+
1071
+ if model_name not in model_performance[var_type]:
1072
+ model_performance[var_type][model_name] = {
1073
+ 'r2_values': [],
1074
+ 'rmse_values': [],
1075
+ 'experiments': []
1076
+ }
1077
+
1078
+ if r2_val is not None:
1079
+ model_performance[var_type][model_name]['r2_values'].append(r2_val)
1080
+ if rmse_val is not None:
1081
+ model_performance[var_type][model_name]['rmse_values'].append(rmse_val)
1082
+
1083
+ if exp not in model_performance[var_type][model_name]['experiments']:
1084
+ model_performance[var_type][model_name]['experiments'].append(exp)
1085
+
1086
+ overall_best_models = {}
1087
+
1088
+ # Calculate average performance and select best
1089
+ for var_type, models in model_performance.items():
1090
+ best_avg_r2 = -np.inf # Use -infinity to ensure any valid R2 is better
1091
+ best_model_info = None
1092
+
1093
+ print(f"\\n📊 {var_type.upper()} MODELS:")
1094
+ if not models:
1095
+ print(" No models found for this type.")
1096
+ continue
1097
+
1098
+ for model_name, perf_data in models.items():
1099
+ # Calculate average R2, ignoring NaNs
1100
+ r2_values = [v for v in perf_data['r2_values'] if v is not None and not np.isnan(v)]
1101
+ avg_r2 = np.mean(r2_values) if r2_values else -np.inf # Handle case with no valid R2
1102
+
1103
+ # Calculate average RMSE, ignoring NaNs
1104
+ rmse_values = [v for v in perf_data['rmse_values'] if v is not None and not np.isnan(v)]
1105
+ avg_rmse = np.mean(rmse_values) if rmse_values else np.inf # Handle case with no valid RMSE
1106
+
1107
+ n_exp = len(perf_data['experiments'])
1108
+
1109
+ print(f" {model_name}:")
1110
+ print(f" Average R² = {avg_r2:.4f}" if avg_r2 > -np.inf else " Average R² = N/A")
1111
+ print(f" Average RMSE = {avg_rmse:.4f}" if avg_rmse < np.inf else " Average RMSE = N/A")
1112
+ print(f" Tested in {n_exp} experiments")
1113
+
1114
+ # Selection logic: prioritize higher average R2. Could add secondary criteria (e.g., lower RMSE, consistency)
1115
+ if avg_r2 > best_avg_r2:
1116
+ best_avg_r2 = avg_r2
1117
+ best_model_info = {
1118
+ 'name': model_name,
1119
+ 'avg_r2': avg_r2,
1120
+ 'avg_rmse': avg_rmse,
1121
+ 'n_experiments': n_exp
1122
+ }
1123
+ elif avg_r2 == best_avg_r2 and avg_rmse < (best_model_info['avg_rmse'] if best_model_info and best_model_info['avg_rmse'] < np.inf else np.inf):
1124
+ # Tie-breaking: prefer lower average RMSE if R2 is the same
1125
+ best_model_info = {
1126
+ 'name': model_name,
1127
+ 'avg_r2': avg_r2,
1128
+ 'avg_rmse': avg_rmse,
1129
+ 'n_experiments': n_exp
1130
+ }
1131
+
1132
+
1133
+ if best_model_info and var_type.lower() in ['biomass', 'substrate', 'product', 'all_types']:
1134
+ # Assign to standard keys if they exist
1135
+ target_key = var_type.lower() if var_type.lower() in ['biomass', 'substrate', 'product'] else 'overall'
1136
+ overall_best_models[target_key] = best_model_info
1137
+ print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model_info['name']} (Avg R²={best_model_info['avg_r2']:.4f})" if best_model_info['avg_r2'] > -np.inf else f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model_info['name']} (Avg R²=N/A)")
1138
+ elif best_model_info:
1139
+ # Add other types found
1140
+ overall_best_models[var_type] = best_model_info
1141
+
1142
+
1143
+ self.overall_best_models = overall_best_models
1144
+ return overall_best_models
1145
+
1146
+
1147
+ def create_comparison_visualizations(self):
1148
+ \"\"\"Create visualizations comparing models across experiments\"\"\"
1149
+ if not hasattr(self, 'best_models_by_experiment') or not self.best_models_by_experiment:
1150
+ print("⚠️ No analysis results to visualize.")
1151
+ return # Exit if no data
1152
+
1153
+ # Prepare data for visualization - focusing on R2 for best models per experiment/type
1154
+ plot_data = []
1155
+ for exp, results in self.best_models_by_experiment.items():
1156
+ for var_type, var_results in results.items():
1157
+ plot_data.append({
1158
+ 'Experiment': exp,
1159
+ 'Variable_Type': var_type,
1160
+ 'Best_Model': var_results.get('best_model'),
1161
+ 'R2': var_results.get('r2')
1162
+ })
1163
+
1164
+ plot_df = pd.DataFrame(plot_data)
1165
+ plot_df = plot_df.dropna(subset=['R2']) # Only plot entries with R2
1166
+
1167
+ if plot_df.empty:
1168
+ print("⚠️ No valid R² data available for visualization.")
1169
+ return
1170
+
1171
+ # Use Seaborn for better aesthetics
1172
+ plt.figure(figsize=(14, 8))
1173
+ sns.barplot(data=plot_df, x='Experiment', y='R2', hue='Variable_Type', palette='viridis')
1174
+
1175
+ plt.title('Best Model R² Comparison by Experiment and Variable Type', fontsize=16)
1176
+ plt.xlabel('Experiment', fontsize=12)
1177
+ plt.ylabel('R²', fontsize=12)
1178
+ plt.xticks(rotation=45, ha='right')
1179
+ plt.ylim(0, 1.05) # R2 is typically between 0 and 1
1180
+ plt.legend(title='Variable Type')
1181
+ plt.tight_layout() # Adjust layout to prevent labels overlapping
1182
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
1183
+ plt.show()
1184
+
1185
+ # Optional: Add more plots if needed, e.g., parameter trends
1186
+
1187
+
1188
+ def generate_summary_table(self) -> pd.DataFrame:
1189
+ \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1190
+ if not hasattr(self, 'best_models_by_experiment') or not self.best_models_by_experiment:
1191
+ print("⚠️ No analysis results to generate summary table.")
1192
+ return pd.DataFrame()
1193
+
1194
+ summary_data = []
1195
+
1196
+ for exp, results in self.best_models_by_experiment.items():
1197
+ for var_type, var_results in results.items():
1198
+ summary_data.append({
1199
+ 'Experiment': exp,
1200
+ 'Variable_Type': var_type,
1201
+ 'Best_Model': var_results.get('best_model', 'N/A'),
1202
+ 'R2': var_results.get('r2', np.nan),
1203
+ 'RMSE': var_results.get('rmse', np.nan)
1204
+ })
1205
+
1206
+ summary_df = pd.DataFrame(summary_data)
1207
+
1208
+ print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1209
+ print("="*80)
1210
+ if not summary_df.empty:
1211
+ # Format R2 and RMSE for display
1212
+ summary_df_display = summary_df.copy()
1213
+ if 'R2' in summary_df_display.columns:
1214
+ summary_df_display['R2'] = summary_df_display['R2'].apply(lambda x: f'{x:.4f}' if pd.notna(x) else 'N/A')
1215
+ if 'RMSE' in summary_df_display.columns:
1216
+ summary_df_display['RMSE'] = summary_df_display['RMSE'].apply(lambda x: f'{x:.4f}' if pd.notna(x) else 'N/A')
1217
+
1218
+ print(summary_df_display.to_string(index=False))
1219
+ else:
1220
+ print("No data to display in the summary table.")
1221
+
1222
+ return summary_df
1223
+
1224
+ # Example usage for the fallback code structure
1225
+ # Note: The AI-generated code should ideally replace this example usage
1226
+ # but this part demonstrates how the generated code might be used.
1227
+ if __name__ == "__main__":
1228
+ print("🧬 Experimental Model Comparison System (Fallback Code Example)")
1229
+ print("="*60)
1230
+
1231
+ # --- Placeholder Example Data ---
1232
+ # This data structure should match the format the AI expects and uses
1233
+ # in the generated code. It includes 'Experiment' and 'Type'.
1234
+ fallback_example_data = {
1235
+ 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1236
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1237
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1238
+ 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1239
+ 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1240
+ 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1241
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1242
+ 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1243
+ 'Product', 'Product', 'Product', 'Product'],
1244
+ 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1245
+ 0.9723, 0.9856, 0.9698, 0.9812,
1246
+ 0.9634, 0.9512, 0.9687, 0.9423],
1247
+ 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1248
+ 0.0312, 0.0245, 0.0334, 0.0289,
1249
+ 0.0412, 0.0523, 0.0389, 0.0567],
1250
+ 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1251
+ np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
1252
+ 'Ks': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
1253
+ 2.1, 1.8, 2.3, 1.9, np.nan, np.nan, np.nan, np.nan]
1254
+ }
1255
+
1256
+ # Create analyzer instance using the fallback data
1257
+ analyzer = ExperimentalModelAnalyzer(results_df=pd.DataFrame(fallback_example_data))
1258
+
1259
+ # Analyze by experiment
1260
+ analysis_results = analyzer.analyze_by_experiment()
1261
+
1262
+ # Determine overall best models
1263
+ overall_best = analyzer._determine_overall_best_models()
1264
+ print(f"Overall Best Models (Determined by Fallback): {overall_best}")
1265
+
1266
+
1267
+ # Create visualizations (will use the best_models_by_experiment attribute)
1268
+ print("\\nAttempting to create visualizations...")
1269
+ try:
1270
+ analyzer.create_comparison_visualizations()
1271
+ except Exception as e:
1272
+ print(f"Error creating visualization: {e}")
1273
+ print("This might happen if data structure or plotting logic is not fully compatible.")
1274
+
1275
+
1276
+ # Generate summary table
1277
+ summary_table = analyzer.generate_summary_table()
1278
+
1279
+ print("\\n✨ Fallback Analysis complete!")
1280
+
1281
+ # --- End of Fallback Code Example ---
1282
+
1283
 
1284
  # Estado global para almacenar resultados
1285
  class AppState:
 
1301
  'pt': "Nenhuma análise disponível para exportar"
1302
  }
1303
  return error_msg.get(language, error_msg['en']), ""
1304
+
1305
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1306
+
1307
  try:
1308
+ # Ensure ReportExporter is used correctly with its static methods
1309
  if export_format == "DOCX":
1310
  filename = f"biotech_analysis_report_{timestamp}.docx"
1311
  ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
1312
  else: # PDF
1313
  filename = f"biotech_analysis_report_{timestamp}.pdf"
1314
  ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
1315
+
1316
  success_msg = TRANSLATIONS[language]['report_exported']
1317
  return f"{success_msg} {filename}", filename
1318
  except Exception as e:
1319
+ # Provide more specific error details for export
1320
+ return f"Error exporting report: {str(e)}", ""
1321
+
1322
 
1323
  # Interfaz Gradio con soporte multiidioma y temas
1324
  def create_interface():
1325
  # Estado inicial
1326
  current_theme = "light"
1327
  current_language = "en"
1328
+
1329
  def update_interface_language(language):
1330
  """Actualiza el idioma de la interfaz"""
1331
  app_state.current_language = language
1332
  t = TRANSLATIONS[language]
1333
+
1334
+ # Build model choices string with descriptions for info text
1335
+ model_info_str = ""
1336
+ # Default model might change based on QWEN_MODELS keys
1337
+ default_model_key = list(QWEN_MODELS.keys())[0] if QWEN_MODELS else "Qwen/Qwen3-14B"
1338
+ if default_model_key in QWEN_MODELS:
1339
+ model_info_str = f"{t['best_for']}: {QWEN_MODELS[default_model_key]['best_for']}"
1340
+
1341
+
1342
  return [
1343
  gr.update(value=f"# {t['title']}"), # title_text
1344
  gr.update(value=t['subtitle']), # subtitle_text
1345
  gr.update(label=t['upload_files']), # files_input
1346
+ gr.update(label=t['select_model'], info=model_info_str), # model_selector
1347
  gr.update(label=t['select_language']), # language_selector
1348
  gr.update(label=t['select_theme']), # theme_selector
1349
+ gr.update(label=t['detail_level'], choices=[(t['detailed'], "detailed"), (t['summarized'], "summarized")]), # detail_level
1350
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
1351
  gr.update(value=t['analyze_button']), # analyze_btn
1352
  gr.update(label=t['export_format']), # export_format
 
1355
  gr.update(label=t['implementation_code']), # code_output
1356
  gr.update(label=t['data_format']) # data_format_accordion
1357
  ]
1358
+
1359
  def process_and_store(files, model, detail, language, additional_specs):
1360
+ """Procesa files y almacena resultados"""
1361
  if not files:
1362
  error_msg = TRANSLATIONS[language]['error_no_files']
1363
+ app_state.current_analysis = error_msg
1364
+ app_state.current_code = generate_implementation_code(error_msg) # Provide fallback code even on file error
1365
+ return error_msg, app_state.current_code
1366
+
1367
+ # Assuming files is a list of strings (filepaths) from Gradio
1368
  analysis, code = process_files(files, model, detail, language, additional_specs)
1369
+
1370
+ # Store results in app state
1371
  app_state.current_analysis = analysis
1372
  app_state.current_code = code
1373
+
1374
  return analysis, code
1375
+
1376
+ # Get default model key safely
1377
+ default_qwen_model_key = list(QWEN_MODELS.keys())[0] if QWEN_MODELS else ""
1378
+ default_qwen_model_info = QWEN_MODELS.get(default_qwen_model_key, {})
1379
+ default_model_info_str = f"{TRANSLATIONS[current_language]['best_for']}: {default_qwen_model_info.get('best_for', 'N/A')}"
1380
+
1381
+
1382
  with gr.Blocks(theme=THEMES[current_theme]) as demo:
1383
  # Componentes de UI
1384
  with gr.Row():
 
1388
  with gr.Column(scale=1):
1389
  with gr.Row():
1390
  language_selector = gr.Dropdown(
1391
+ choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1392
  ("Deutsch", "de"), ("Português", "pt")],
1393
+ value=current_language,
1394
  label=TRANSLATIONS[current_language]['select_language'],
1395
  interactive=True
1396
  )
1397
  theme_selector = gr.Dropdown(
1398
  choices=[("Light", "light"), ("Dark", "dark")],
1399
+ value=current_theme,
1400
  label=TRANSLATIONS[current_language]['select_theme'],
1401
  interactive=True
1402
  )
1403
+
1404
  with gr.Row():
1405
  with gr.Column(scale=1):
1406
  files_input = gr.File(
1407
  label=TRANSLATIONS[current_language]['upload_files'],
1408
  file_count="multiple",
1409
+ file_types=[".csv", ".xlsx", ".xls"], # Focusing on data files
1410
+ type="filepath" # Get file path as string
1411
  )
1412
 
1413
  model_selector = gr.Dropdown(
1414
+ choices=list(QWEN_MODELS.keys()),
1415
+ value=default_qwen_model_key,
1416
  label=TRANSLATIONS[current_language]['select_model'],
1417
+ info=default_model_info_str
1418
  )
1419
+
1420
  detail_level = gr.Radio(
1421
  choices=[
1422
  (TRANSLATIONS[current_language]['detailed'], "detailed"),
 
1425
  value="detailed",
1426
  label=TRANSLATIONS[current_language]['detail_level']
1427
  )
1428
+
1429
  # Nueva entrada para especificaciones adicionales
1430
  additional_specs = gr.Textbox(
1431
  label=TRANSLATIONS[current_language]['additional_specs'],
 
1434
  max_lines=5,
1435
  interactive=True
1436
  )
1437
+
1438
  analyze_btn = gr.Button(
1439
  TRANSLATIONS[current_language]['analyze_button'],
1440
  variant="primary",
1441
  size="lg"
1442
  )
1443
+
1444
  gr.Markdown("---")
1445
+
1446
  export_format = gr.Radio(
1447
  choices=["DOCX", "PDF"],
1448
  value="PDF",
1449
  label=TRANSLATIONS[current_language]['export_format']
1450
  )
1451
+
1452
  export_btn = gr.Button(
1453
  TRANSLATIONS[current_language]['export_button'],
1454
  variant="secondary"
1455
  )
1456
+
1457
  export_status = gr.Textbox(
1458
  label="Export Status",
1459
  interactive=False,
1460
  visible=False
1461
  )
1462
+
1463
  export_file = gr.File(
1464
  label="Download Report",
1465
  visible=False
1466
  )
1467
+
1468
  with gr.Column(scale=2):
1469
  analysis_output = gr.Markdown(
1470
  label=TRANSLATIONS[current_language]['comparative_analysis']
1471
  )
1472
+
1473
  code_output = gr.Code(
1474
  label=TRANSLATIONS[current_language]['implementation_code'],
1475
  language="python",
1476
  interactive=True,
1477
  lines=20
1478
  )
1479
+
1480
  data_format_accordion = gr.Accordion(
1481
  label=TRANSLATIONS[current_language]['data_format'],
1482
  open=False
1483
  )
1484
+
1485
  with data_format_accordion:
1486
  gr.Markdown("""
1487
  ### Expected CSV/Excel structure:
1488
+
1489
  | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1490
  |------------|-------|------|-----|------|-----|-----|--------|-------|------------|
1491
  | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1492
  | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1493
  | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
1494
  | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
1495
+
1496
  **Important columns:**
1497
+ - **Experiment**: Experimental condition identifier (Optional, but recommended for comparative analysis)
1498
+ - **Model**: Model name (e.g., Monod, Logistic)
1499
+ - **Type**: Variable type (Biomass, Substrate, Product) (Optional, but recommended for analysis by type)
1500
+ - **R2, RMSE**: Fit quality metrics (At least one needed for comparison)
1501
+ - **Parameters**: Columns for model-specific parameters (e.g., mu_max, Ks, Xmax)
1502
  """)
1503
+
1504
+ # Definir ejemplos (Update example paths if necessary)
1505
  examples = gr.Examples(
1506
  examples=[
1507
+ [["examples/biomass_models_comparison.csv"], list(QWEN_MODELS.keys())[0] if QWEN_MODELS else "", "detailed", ""],
1508
+ [["examples/substrate_kinetics_results.xlsx"], list(QWEN_MODELS.keys())[0] if QWEN_MODELS else "", "summarized", "Focus on temperature effects"]
1509
  ],
1510
  inputs=[files_input, model_selector, detail_level, additional_specs],
1511
  label=TRANSLATIONS[current_language]['examples']
1512
  )
1513
+
1514
+
1515
  # Eventos - Actualizado para incluir additional_specs
1516
  language_selector.change(
1517
  update_interface_language,
 
1519
  outputs=[
1520
  title_text, subtitle_text, files_input, model_selector,
1521
  language_selector, theme_selector, detail_level, additional_specs,
1522
+ analyze_btn, export_format, export_btn, analysis_output,
1523
  code_output, data_format_accordion
1524
  ]
1525
  )
1526
+
1527
  def change_theme(theme_name):
1528
  """Cambia el tema de la interfaz"""
1529
+ # Note: Dynamic theme switching in Gradio might require a page reload for full effect.
1530
+ # This function primarily triggers the UI update but the theme itself is set at gr.Blocks creation.
1531
+ # Returning gr.Info is a common way to indicate the change.
1532
+ # To truly change theme dynamically, you might need Javascript or specific Gradio features.
1533
+ return gr.Info("Theme applied. May require page refresh for full effect on all components.")
1534
+
1535
  theme_selector.change(
1536
  change_theme,
1537
  inputs=[theme_selector],
1538
+ outputs=[] # No direct UI output change from this function in the current structure
1539
  )
1540
+
1541
  analyze_btn.click(
1542
  fn=process_and_store,
1543
  inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1544
  outputs=[analysis_output, code_output]
1545
  )
1546
+
1547
  def handle_export(format, language):
1548
  status, file = export_report(format, language)
1549
+ # Check if the file was successfully created before making the download button visible
1550
+ if file and os.path.exists(file):
1551
+ return gr.update(value=status, visible=True), gr.update(value=file, visible=True, label=f"Download {format}")
1552
  else:
1553
+ # Hide the download button if no file was created
1554
+ return gr.update(value=status, visible=True), gr.update(value=None, visible=False)
1555
+
1556
+
1557
  export_btn.click(
1558
  fn=handle_export,
1559
  inputs=[export_format, language_selector],
1560
  outputs=[export_status, export_file]
1561
  )
1562
+
1563
  return demo
1564
 
1565
  # Función principal
1566
  def main():
1567
+ # Check for the specific API key required for Qwen
1568
+ if openai_client is None:
1569
+ print("⚠️ NEBIUS_API_KEY environment variable not found. Please configure it.")
1570
+ return gr.Interface(
1571
+ fn=lambda: TRANSLATIONS['en']['error_no_api'], # Display error message in UI
1572
+ inputs=None, # No inputs needed for just showing error
1573
+ outputs=gr.Textbox(label="Configuration Error"),
1574
+ title=TRANSLATIONS['en']['title'],
1575
+ description="Failed to initialize AI client.",
1576
+ theme=THEMES['light'] # Use a default theme
1577
+ )
1578
+
1579
+ # Proceed with creating the interface if client is initialized
1580
  return create_interface()
1581
 
1582
  # Para ejecución local
1583
  if __name__ == "__main__":
1584
+ # Ensure Gradio example paths exist for the examples section
1585
+ if not os.path.exists("examples"):
1586
+ os.makedirs("examples")
1587
+ # Create dummy example files if they don't exist
1588
+ if not os.path.exists("examples/biomass_models_comparison.csv"):
1589
+ dummy_csv_data = {'Experiment': ['ExpA', 'ExpA', 'ExpB', 'ExpB'],
1590
+ 'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'],
1591
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'],
1592
+ 'R2': [0.98, 0.97, 0.95, 0.96],
1593
+ 'RMSE': [0.02, 0.03, 0.04, 0.035],
1594
+ 'mu_max': [0.5, 0.48, 0.4, 0.38]}
1595
+ pd.DataFrame(dummy_csv_data).to_csv("examples/biomass_models_comparison.csv", index=False)
1596
+
1597
+ if not os.path.exists("examples/substrate_kinetics_results.xlsx"):
1598
+ dummy_excel_data = {'Experiment': ['Temp25', 'Temp25', 'Temp30', 'Temp30'],
1599
+ 'Model': ['First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate'],
1600
+ 'Type': ['Substrate', 'Substrate', 'Substrate', 'Substrate'],
1601
+ 'R2': [0.99, 0.98, 0.97, 0.985],
1602
+ 'RMSE': [0.015, 0.02, 0.025, 0.018],
1603
+ 'Ks': [1.5, 1.2, 1.8, 1.4]}
1604
+ pd.DataFrame(dummy_excel_data).to_excel("examples/substrate_kinetics_results.xlsx", index=False)
1605
+
1606
+
1607
  demo = main()
1608
  if demo:
1609
  demo.launch(
1610
  server_name="0.0.0.0",
1611
  server_port=7860,
 
1612
  share=False
1613
  )