C2MV commited on
Commit
7512c33
·
verified ·
1 Parent(s): 751ba70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +558 -255
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import PyPDF2
3
  import pandas as pd
4
  import numpy as np
@@ -25,24 +26,20 @@ from reportlab.pdfbase import pdfmetrics
25
  from reportlab.pdfbase.ttfonts import TTFont
26
  import matplotlib.pyplot as plt
27
  from datetime import datetime
28
- from openai import OpenAI
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
32
 
33
- # Inicializar cliente Qwen
34
- client = OpenAI(
35
- base_url="https://api.studio.nebius.com/v1/",
36
- api_key=os.environ.get("NEBIUS_API_KEY")
37
- )
38
 
39
- # Sistema de traducción
40
  TRANSLATIONS = {
41
  'en': {
42
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
43
  'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
44
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
45
- 'select_model': '🤖 Qwen Model',
46
  'select_language': '🌐 Language',
47
  'select_theme': '🎨 Theme',
48
  'detail_level': '📋 Analysis detail level',
@@ -59,7 +56,7 @@ TRANSLATIONS = {
59
  'dark': 'Dark',
60
  'best_for': 'Best for',
61
  'loading': 'Loading...',
62
- 'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets',
63
  'error_no_files': 'Please upload fitting result files to analyze',
64
  'report_exported': 'Report exported successfully as',
65
  'specialized_in': '🎯 Specialized in:',
@@ -67,24 +64,13 @@ TRANSLATIONS = {
67
  'what_analyzes': '🔍 What it specifically analyzes:',
68
  'tips': '💡 Tips for better results:',
69
  'additional_specs': '📝 Additional specifications for analysis',
70
- 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...',
71
- 'input_tokens': '🔢 Input tokens (0-1M)',
72
- 'output_tokens': '🔢 Output tokens (0-1M)',
73
- 'token_info': 'ℹ️ Token usage information',
74
- 'input_token_count': 'Input tokens used',
75
- 'output_token_count': 'Output tokens used',
76
- 'total_token_count': 'Total tokens used',
77
- 'token_cost': 'Estimated cost',
78
- 'thinking_process': '🧠 Thinking Process',
79
- 'analysis_report': '📊 Analysis Report',
80
- 'code_output': '💻 Implementation Code',
81
- 'token_usage': '💰 Token Usage'
82
  },
83
  'es': {
84
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
85
  'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
86
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
87
- 'select_model': '🤖 Modelo Qwen',
88
  'select_language': '🌐 Idioma',
89
  'select_theme': '🎨 Tema',
90
  'detail_level': '📋 Nivel de detalle del análisis',
@@ -101,7 +87,7 @@ TRANSLATIONS = {
101
  'dark': 'Oscuro',
102
  'best_for': 'Mejor para',
103
  'loading': 'Cargando...',
104
- 'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space',
105
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
106
  'report_exported': 'Reporte exportado exitosamente como',
107
  'specialized_in': '🎯 Especializado en:',
@@ -109,18 +95,100 @@ TRANSLATIONS = {
109
  'what_analyzes': '🔍 Qué analiza específicamente:',
110
  'tips': '💡 Tips para mejores resultados:',
111
  'additional_specs': '📝 Especificaciones adicionales para el análisis',
112
- 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...',
113
- 'input_tokens': '🔢 Tokens de entrada (0-1M)',
114
- 'output_tokens': '🔢 Tokens de salida (0-1M)',
115
- 'token_info': 'ℹ️ Información de uso de tokens',
116
- 'input_token_count': 'Tokens de entrada usados',
117
- 'output_token_count': 'Tokens de salida usados',
118
- 'total_token_count': 'Total de tokens usados',
119
- 'token_cost': 'Costo estimado',
120
- 'thinking_process': '🧠 Proceso de Pensamiento',
121
- 'analysis_report': '📊 Reporte de Análisis',
122
- 'code_output': '💻 Código de Implementación',
123
- 'token_usage': '💰 Uso de Tokens'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  }
126
 
@@ -224,31 +292,37 @@ class ModelRegistry:
224
  # Instancia global del registro
225
  model_registry = ModelRegistry()
226
 
227
- # Modelos de Qwen disponibles
228
- QWEN_MODELS = {
229
- "Qwen/Qwen3-14B": {
230
- "name": "Qwen 3 14B",
231
- "description": "Modelo potente multilingüe de Alibaba",
232
- "max_tokens": 1000000,
233
- "best_for": "Análisis complejos y detallados",
234
- "input_cost": 0.0000007,
235
- "output_cost": 0.0000021
236
  },
237
- "Qwen/Qwen3-7B": {
238
- "name": "Qwen 3 7B",
239
- "description": "Modelo equilibrado para uso general",
240
- "max_tokens": 1000000,
241
- "best_for": "Análisis rápidos y precisos",
242
- "input_cost": 0.00000035,
243
- "output_cost": 0.00000105
244
  },
245
- "Qwen/Qwen1.5-14B": {
246
- "name": "Qwen 1.5 14B",
247
- "description": "Modelo avanzado para tareas complejas",
248
- "max_tokens": 1000000,
249
- "best_for": "Análisis técnicos detallados",
250
- "input_cost": 0.0000007,
251
- "output_cost": 0.0000021
 
 
 
 
 
 
 
 
 
 
252
  }
253
  }
254
 
@@ -318,6 +392,9 @@ class ReportExporter:
318
  title_text = {
319
  'en': 'Comparative Analysis Report - Biotechnological Models',
320
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
 
 
 
321
  }
322
 
323
  doc.add_heading(title_text.get(language, title_text['en']), 0)
@@ -326,6 +403,9 @@ class ReportExporter:
326
  date_text = {
327
  'en': 'Generated on',
328
  'es': 'Generado el',
 
 
 
329
  }
330
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
331
  doc.add_paragraph()
@@ -394,6 +474,9 @@ class ReportExporter:
394
  title_text = {
395
  'en': 'Comparative Analysis Report - Biotechnological Models',
396
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
 
 
 
397
  }
398
 
399
  story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
@@ -402,6 +485,9 @@ class ReportExporter:
402
  date_text = {
403
  'en': 'Generated on',
404
  'es': 'Generado el',
 
 
 
405
  }
406
  story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
407
  story.append(Spacer(1, 0.5*inch))
@@ -439,28 +525,13 @@ class ReportExporter:
439
  return filename
440
 
441
  class AIAnalyzer:
442
- """Clase para análisis con IA usando Qwen"""
443
 
444
  def __init__(self, client, model_registry):
445
  self.client = client
446
  self.model_registry = model_registry
447
- self.token_usage = {
448
- 'input_tokens': 0,
449
- 'output_tokens': 0,
450
- 'total_tokens': 0,
451
- 'estimated_cost': 0.0
452
- }
453
 
454
- def reset_token_usage(self):
455
- """Reinicia el contador de tokens"""
456
- self.token_usage = {
457
- 'input_tokens': 0,
458
- 'output_tokens': 0,
459
- 'total_tokens': 0,
460
- 'estimated_cost': 0.0
461
- }
462
-
463
- def detect_analysis_type(self, content: Union[str, pd.DataFrame], max_tokens: int = 1000) -> AnalysisType:
464
  """Detecta el tipo de análisis necesario"""
465
  if isinstance(content, pd.DataFrame):
466
  columns = [col.lower() for col in content.columns]
@@ -489,20 +560,13 @@ class AIAnalyzer:
489
  """
490
 
491
  try:
492
- response = self.client.chat.completions.create(
493
- model="Qwen/Qwen3-14B",
494
- max_tokens=min(max_tokens, 10000),
495
- temperature=0.0,
496
- messages=[{"role": "user", "content": f"{prompt}\n\n{content[:10000]}"}]
497
  )
498
 
499
- # Registrar uso de tokens
500
- if response.usage:
501
- self.token_usage['input_tokens'] += response.usage.prompt_tokens
502
- self.token_usage['output_tokens'] += response.usage.completion_tokens
503
- self.token_usage['total_tokens'] += response.usage.total_tokens
504
-
505
- result = response.choices[0].message.content.strip().upper()
506
  if "MODEL" in result:
507
  return AnalysisType.MATHEMATICAL_MODEL
508
  elif "RESULTS" in result:
@@ -512,8 +576,7 @@ class AIAnalyzer:
512
  else:
513
  return AnalysisType.UNKNOWN
514
 
515
- except Exception as e:
516
- print(f"Error en detección de tipo: {str(e)}")
517
  return AnalysisType.UNKNOWN
518
 
519
  def get_language_prompt_prefix(self, language: str) -> str:
@@ -521,13 +584,15 @@ class AIAnalyzer:
521
  prefixes = {
522
  'en': "Please respond in English. ",
523
  'es': "Por favor responde en español. ",
 
 
 
524
  }
525
  return prefixes.get(language, prefixes['en'])
526
 
527
- def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str = "detailed",
528
- language: str = "en", additional_specs: str = "",
529
- max_input_tokens: int = 100000, max_output_tokens: int = 100000) -> Dict:
530
- """Analiza resultados de ajuste de modelos usando Qwen"""
531
 
532
  # Preparar resumen completo de los datos
533
  data_summary = f"""
@@ -537,10 +602,16 @@ class AIAnalyzer:
537
  - Columns: {list(data.columns)}
538
  - Number of models evaluated: {len(data)}
539
 
540
- Complete data (first 5 rows):
541
- {data.head().to_string()}
 
 
 
542
  """
543
 
 
 
 
544
  # Obtener prefijo de idioma
545
  lang_prefix = self.get_language_prompt_prefix(language)
546
 
@@ -700,32 +771,21 @@ class AIAnalyzer:
700
  """
701
 
702
  try:
703
- # Análisis principal
704
- response = self.client.chat.completions.create(
705
- model=qwen_model,
706
- max_tokens=min(max_output_tokens, 100000),
707
- temperature=0.3,
708
  messages=[{
709
  "role": "user",
710
  "content": f"{prompt}\n\n{data_summary}"
711
  }]
712
  )
713
 
714
- # Registrar uso de tokens
715
- if response.usage:
716
- self.token_usage['input_tokens'] += response.usage.prompt_tokens
717
- self.token_usage['output_tokens'] += response.usage.completion_tokens
718
- self.token_usage['total_tokens'] += response.usage.total_tokens
719
- self.token_usage['estimated_cost'] = self.calculate_cost(qwen_model, response.usage)
720
-
721
- analysis_result = response.choices[0].message.content
722
-
723
- # Generación de código
724
  code_prompt = f"""
725
  {lang_prefix}
726
 
727
  Based on the analysis and this actual data:
728
- {data.head().to_string()}
729
 
730
  Generate Python code that:
731
 
@@ -752,29 +812,19 @@ class AIAnalyzer:
752
  Format: Complete, executable Python code with actual data values embedded.
753
  """
754
 
755
- code_response = self.client.chat.completions.create(
756
- model=qwen_model,
757
- max_tokens=min(max_output_tokens, 100000),
758
- temperature=0.1,
759
  messages=[{
760
  "role": "user",
761
  "content": code_prompt
762
  }]
763
  )
764
 
765
- # Registrar uso de tokens
766
- if code_response.usage:
767
- self.token_usage['input_tokens'] += code_response.usage.prompt_tokens
768
- self.token_usage['output_tokens'] += code_response.usage.completion_tokens
769
- self.token_usage['total_tokens'] += code_response.usage.total_tokens
770
- self.token_usage['estimated_cost'] += self.calculate_cost(qwen_model, code_response.usage)
771
-
772
- code_result = code_response.choices[0].message.content
773
-
774
  return {
775
  "tipo": "Comparative Analysis of Mathematical Models",
776
- "analisis_completo": analysis_result,
777
- "codigo_implementacion": code_result,
778
  "resumen_datos": {
779
  "n_modelos": len(data),
780
  "columnas": list(data.columns),
@@ -782,35 +832,20 @@ class AIAnalyzer:
782
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
783
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
784
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
 
785
  }
786
  }
787
 
788
  except Exception as e:
789
- print(f"Error en análisis: {str(e)}")
790
  return {"error": str(e)}
791
-
792
- def calculate_cost(self, model_name: str, usage) -> float:
793
- """Calcula el costo estimado en dólares"""
794
- if model_name not in QWEN_MODELS:
795
- return 0.0
796
-
797
- model_info = QWEN_MODELS[model_name]
798
- input_cost = model_info.get('input_cost', 0.0)
799
- output_cost = model_info.get('output_cost', 0.0)
800
-
801
- return (usage.prompt_tokens * input_cost) + (usage.completion_tokens * output_cost)
802
 
803
- def process_files(files, qwen_model: str, detail_level: str = "detailed",
804
- language: str = "en", additional_specs: str = "",
805
- max_input_tokens: int = 10000, max_output_tokens: int = 10000) -> Tuple[str, str, str, Dict]:
806
- """Procesa múltiples archivos usando Qwen"""
807
  processor = FileProcessor()
808
  analyzer = AIAnalyzer(client, model_registry)
809
- analyzer.reset_token_usage()
810
-
811
  results = []
812
  all_code = []
813
- thinking_process = []
814
 
815
  for file in files:
816
  if file is None:
@@ -825,26 +860,20 @@ def process_files(files, qwen_model: str, detail_level: str = "detailed",
825
  if file_ext in ['.csv', '.xlsx', '.xls']:
826
  if language == 'es':
827
  results.append(f"## 📊 Análisis de Resultados: {file_name}")
828
- thinking_process.append(f"### 🔍 Procesando archivo: {file_name}")
829
  else:
830
  results.append(f"## 📊 Results Analysis: {file_name}")
831
- thinking_process.append(f"### 🔍 Processing file: {file_name}")
832
 
833
  if file_ext == '.csv':
834
  df = processor.read_csv(file_content)
835
- thinking_process.append("✅ Archivo CSV leído correctamente" if language == 'es' else "✅ CSV file read successfully")
836
  else:
837
  df = processor.read_excel(file_content)
838
- thinking_process.append("✅ Archivo Excel leído correctamente" if language == 'es' else "✅ Excel file read successfully")
839
 
840
  if df is not None:
841
- analysis_type = analyzer.detect_analysis_type(df, max_input_tokens)
842
- thinking_process.append(f"🔎 Tipo de análisis detectado: {analysis_type.value}" if language == 'es' else f"🔎 Analysis type detected: {analysis_type.value}")
843
 
844
  if analysis_type == AnalysisType.FITTING_RESULTS:
845
  result = analyzer.analyze_fitting_results(
846
- df, qwen_model, detail_level, language, additional_specs,
847
- max_input_tokens, max_output_tokens
848
  )
849
 
850
  if language == 'es':
@@ -857,56 +886,367 @@ def process_files(files, qwen_model: str, detail_level: str = "detailed",
857
  all_code.append(result["codigo_implementacion"])
858
 
859
  results.append("\n---\n")
860
- thinking_process.append("\n---\n")
861
 
862
  analysis_text = "\n".join(results)
863
  code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
864
- thinking_text = "\n".join(thinking_process)
865
 
866
- # Agregar información de tokens al proceso de pensamiento
867
- token_info = analyzer.token_usage
868
- if language == 'es':
869
- thinking_text += f"""
870
-
871
- ### 💰 USO DE TOKENS
872
- - Tokens de entrada usados: {token_info['input_tokens']}
873
- - Tokens de salida usados: {token_info['output_tokens']}
874
- - Total de tokens: {token_info['total_tokens']}
875
- - Costo estimado: ${token_info['estimated_cost']:.6f}
876
- """
877
- else:
878
- thinking_text += f"""
879
-
880
- ### 💰 TOKEN USAGE
881
- - Input tokens used: {token_info['input_tokens']}
882
- - Output tokens used: {token_info['output_tokens']}
883
- - Total tokens: {token_info['total_tokens']}
884
- - Estimated cost: ${token_info['estimated_cost']:.6f}
885
- """
886
-
887
- return thinking_text, analysis_text, code_text, token_info
888
 
889
  def generate_implementation_code(analysis_results: str) -> str:
890
  """Genera código de implementación con análisis por experimento"""
891
- # (El código de implementación se mantiene igual que en la versión anterior)
892
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
893
 
894
  # Estado global para almacenar resultados
895
  class AppState:
896
  def __init__(self):
897
- self.current_thinking = ""
898
  self.current_analysis = ""
899
  self.current_code = ""
900
  self.current_language = "en"
901
- self.token_usage = {}
902
 
903
  app_state = AppState()
904
 
905
  def export_report(export_format: str, language: str) -> Tuple[str, str]:
906
  """Exporta el reporte al formato seleccionado"""
907
  if not app_state.current_analysis:
908
- error_msg = TRANSLATIONS[language]['error_no_files']
909
- return error_msg, ""
 
 
 
 
 
 
910
 
911
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
912
 
@@ -943,47 +1283,26 @@ def create_interface():
943
  gr.update(label=t['select_theme']), # theme_selector
944
  gr.update(label=t['detail_level']), # detail_level
945
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
946
- gr.update(label=t['input_tokens']), # input_tokens_slider
947
- gr.update(label=t['output_tokens']), # output_tokens_slider
948
  gr.update(value=t['analyze_button']), # analyze_btn
949
  gr.update(label=t['export_format']), # export_format
950
  gr.update(value=t['export_button']), # export_btn
951
- gr.update(label=t['thinking_process']), # thinking_output
952
- gr.update(label=t['analysis_report']), # analysis_output
953
- gr.update(label=t['code_output']), # code_output
954
- gr.update(label=t['token_usage']), # token_usage_output
955
  gr.update(label=t['data_format']) # data_format_accordion
956
  ]
957
 
958
- def process_and_store(files, model, detail, language, additional_specs, input_tokens, output_tokens):
959
  """Procesa archivos y almacena resultados"""
960
  if not files:
961
  error_msg = TRANSLATIONS[language]['error_no_files']
962
- return error_msg, "", "", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0, "estimated_cost": 0.0}
963
-
964
- thinking, analysis, code, token_usage = process_files(
965
- files, model, detail, language, additional_specs,
966
- input_tokens, output_tokens
967
- )
968
 
969
- app_state.current_thinking = thinking
970
  app_state.current_analysis = analysis
971
  app_state.current_code = code
972
- app_state.token_usage = token_usage
973
-
974
- # Formatear información de tokens
975
- t = TRANSLATIONS[language]
976
- token_info = f"""
977
- ### {t['token_info']}
978
- - **{t['input_token_count']}:** {token_usage['input_tokens']}
979
- - **{t['output_token_count']}:** {token_usage['output_tokens']}
980
- - **{t['total_token_count']}:** {token_usage['total_tokens']}
981
- - **{t['token_cost']}:** ${token_usage['estimated_cost']:.6f}
982
- """
983
-
984
- return thinking, analysis, code, token_info
985
 
986
- with gr.Blocks(theme=THEMES[current_theme], title="Biotech Model Analyzer") as demo:
987
  # Componentes de UI
988
  with gr.Row():
989
  with gr.Column(scale=3):
@@ -992,7 +1311,8 @@ def create_interface():
992
  with gr.Column(scale=1):
993
  with gr.Row():
994
  language_selector = gr.Dropdown(
995
- choices=[("English", "en"), ("Español", "es")],
 
996
  value="en",
997
  label=TRANSLATIONS[current_language]['select_language'],
998
  interactive=True
@@ -1014,10 +1334,10 @@ def create_interface():
1014
  )
1015
 
1016
  model_selector = gr.Dropdown(
1017
- choices=list(QWEN_MODELS.keys()),
1018
- value="Qwen/Qwen3-14B",
1019
  label=TRANSLATIONS[current_language]['select_model'],
1020
- info=f"{TRANSLATIONS[current_language]['best_for']}: {QWEN_MODELS['Qwen/Qwen3-14B']['best_for']}"
1021
  )
1022
 
1023
  detail_level = gr.Radio(
@@ -1029,6 +1349,7 @@ def create_interface():
1029
  label=TRANSLATIONS[current_language]['detail_level']
1030
  )
1031
 
 
1032
  additional_specs = gr.Textbox(
1033
  label=TRANSLATIONS[current_language]['additional_specs'],
1034
  placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
@@ -1037,25 +1358,6 @@ def create_interface():
1037
  interactive=True
1038
  )
1039
 
1040
- # Nuevos sliders para tokens
1041
- input_tokens_slider = gr.Slider(
1042
- minimum=1000,
1043
- maximum=1000000,
1044
- value=10000,
1045
- step=1000,
1046
- label=TRANSLATIONS[current_language]['input_tokens'],
1047
- info="Máximo tokens para entrada (0-1 millón)"
1048
- )
1049
-
1050
- output_tokens_slider = gr.Slider(
1051
- minimum=1000,
1052
- maximum=1000000,
1053
- value=30000,
1054
- step=1000,
1055
- label=TRANSLATIONS[current_language]['output_tokens'],
1056
- info="Máximo tokens para salida (0-1 millón)"
1057
- )
1058
-
1059
  analyze_btn = gr.Button(
1060
  TRANSLATIONS[current_language]['analyze_button'],
1061
  variant="primary",
@@ -1087,24 +1389,15 @@ def create_interface():
1087
  )
1088
 
1089
  with gr.Column(scale=2):
1090
- # Nuevos outputs separados
1091
- thinking_output = gr.Markdown(
1092
- label=TRANSLATIONS[current_language]['thinking_process']
1093
- )
1094
-
1095
  analysis_output = gr.Markdown(
1096
- label=TRANSLATIONS[current_language]['analysis_report']
1097
  )
1098
 
1099
  code_output = gr.Code(
1100
- label=TRANSLATIONS[current_language]['code_output'],
1101
  language="python",
1102
  interactive=True,
1103
- lines=15
1104
- )
1105
-
1106
- token_usage_output = gr.Markdown(
1107
- label=TRANSLATIONS[current_language]['token_usage']
1108
  )
1109
 
1110
  data_format_accordion = gr.Accordion(
@@ -1131,21 +1424,32 @@ def create_interface():
1131
  - **Parameters**: Model-specific parameters
1132
  """)
1133
 
1134
- # Eventos
 
 
 
 
 
 
 
 
 
 
1135
  language_selector.change(
1136
  update_interface_language,
1137
  inputs=[language_selector],
1138
  outputs=[
1139
  title_text, subtitle_text, files_input, model_selector,
1140
  language_selector, theme_selector, detail_level, additional_specs,
1141
- input_tokens_slider, output_tokens_slider, analyze_btn, export_format,
1142
- export_btn, thinking_output, analysis_output, code_output,
1143
- token_usage_output, data_format_accordion
1144
  ]
1145
  )
1146
 
1147
  def change_theme(theme_name):
1148
  """Cambia el tema de la interfaz"""
 
 
1149
  return gr.Info("Theme will be applied on next page load")
1150
 
1151
  theme_selector.change(
@@ -1156,9 +1460,8 @@ def create_interface():
1156
 
1157
  analyze_btn.click(
1158
  fn=process_and_store,
1159
- inputs=[files_input, model_selector, detail_level, language_selector,
1160
- additional_specs, input_tokens_slider, output_tokens_slider],
1161
- outputs=[thinking_output, analysis_output, code_output, token_usage_output]
1162
  )
1163
 
1164
  def handle_export(format, language):
@@ -1178,8 +1481,8 @@ def create_interface():
1178
 
1179
  # Función principal
1180
  def main():
1181
- if not os.getenv("NEBIUS_API_KEY"):
1182
- print("⚠️ Configure NEBIUS_API_KEY in HuggingFace Space secrets")
1183
  return gr.Interface(
1184
  fn=lambda x: TRANSLATIONS['en']['error_no_api'],
1185
  inputs=gr.Textbox(),
 
1
  import gradio as gr
2
+ import anthropic
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
 
26
  from reportlab.pdfbase.ttfonts import TTFont
27
  import matplotlib.pyplot as plt
28
  from datetime import datetime
 
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
32
 
33
+ # Inicializar cliente Anthropic
34
+ client = anthropic.Anthropic()
 
 
 
35
 
36
+ # Sistema de traducción - Actualizado con nuevas entradas
37
  TRANSLATIONS = {
38
  'en': {
39
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
40
  'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
41
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
42
+ 'select_model': '🤖 Claude Model',
43
  'select_language': '🌐 Language',
44
  'select_theme': '🎨 Theme',
45
  'detail_level': '📋 Analysis detail level',
 
56
  'dark': 'Dark',
57
  'best_for': 'Best for',
58
  'loading': 'Loading...',
59
+ 'error_no_api': 'Please configure ANTHROPIC_API_KEY in HuggingFace Space secrets',
60
  'error_no_files': 'Please upload fitting result files to analyze',
61
  'report_exported': 'Report exported successfully as',
62
  'specialized_in': '🎯 Specialized in:',
 
64
  'what_analyzes': '🔍 What it specifically analyzes:',
65
  'tips': '💡 Tips for better results:',
66
  'additional_specs': '📝 Additional specifications for analysis',
67
+ 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
 
 
 
 
 
 
 
 
 
 
 
68
  },
69
  'es': {
70
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
71
  'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
72
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
73
+ 'select_model': '🤖 Modelo Claude',
74
  'select_language': '🌐 Idioma',
75
  'select_theme': '🎨 Tema',
76
  'detail_level': '📋 Nivel de detalle del análisis',
 
87
  'dark': 'Oscuro',
88
  'best_for': 'Mejor para',
89
  'loading': 'Cargando...',
90
+ 'error_no_api': 'Por favor configura ANTHROPIC_API_KEY en los secretos del Space',
91
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
92
  'report_exported': 'Reporte exportado exitosamente como',
93
  'specialized_in': '🎯 Especializado en:',
 
95
  'what_analyzes': '🔍 Qué analiza específicamente:',
96
  'tips': '💡 Tips para mejores resultados:',
97
  'additional_specs': '📝 Especificaciones adicionales para el análisis',
98
+ 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
99
+ },
100
+ 'fr': {
101
+ 'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
102
+ 'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
103
+ 'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
104
+ 'select_model': '🤖 Modèle Claude',
105
+ 'select_language': '🌐 Langue',
106
+ 'select_theme': '🎨 Thème',
107
+ 'detail_level': '📋 Niveau de détail',
108
+ 'detailed': 'Détaillé',
109
+ 'summarized': 'Résumé',
110
+ 'analyze_button': '🚀 Analyser et Comparer',
111
+ 'export_format': '📄 Format d\'export',
112
+ 'export_button': '💾 Exporter le Rapport',
113
+ 'comparative_analysis': '📊 Analyse Comparative',
114
+ 'implementation_code': '💻 Code d\'Implémentation',
115
+ 'data_format': '📋 Format de données attendu',
116
+ 'examples': '📚 Exemples d\'analyse',
117
+ 'light': 'Clair',
118
+ 'dark': 'Sombre',
119
+ 'best_for': 'Meilleur pour',
120
+ 'loading': 'Chargement...',
121
+ 'error_no_api': 'Veuillez configurer ANTHROPIC_API_KEY',
122
+ 'error_no_files': 'Veuillez télécharger des fichiers à analyser',
123
+ 'report_exported': 'Rapport exporté avec succès comme',
124
+ 'specialized_in': '🎯 Spécialisé dans:',
125
+ 'metrics_analyzed': '📊 Métriques analysées:',
126
+ 'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
127
+ 'tips': '💡 Conseils pour de meilleurs résultats:',
128
+ 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
129
+ 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
130
+ },
131
+ 'de': {
132
+ 'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
133
+ 'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
134
+ 'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
135
+ 'select_model': '🤖 Claude Modell',
136
+ 'select_language': '🌐 Sprache',
137
+ 'select_theme': '🎨 Thema',
138
+ 'detail_level': '📋 Detailgrad der Analyse',
139
+ 'detailed': 'Detailliert',
140
+ 'summarized': 'Zusammengefasst',
141
+ 'analyze_button': '🚀 Analysieren und Vergleichen',
142
+ 'export_format': '📄 Exportformat',
143
+ 'export_button': '💾 Bericht Exportieren',
144
+ 'comparative_analysis': '📊 Vergleichende Analyse',
145
+ 'implementation_code': '💻 Implementierungscode',
146
+ 'data_format': '📋 Erwartetes Datenformat',
147
+ 'examples': '📚 Analysebeispiele',
148
+ 'light': 'Hell',
149
+ 'dark': 'Dunkel',
150
+ 'best_for': 'Am besten für',
151
+ 'loading': 'Laden...',
152
+ 'error_no_api': 'Bitte konfigurieren Sie ANTHROPIC_API_KEY',
153
+ 'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
154
+ 'report_exported': 'Bericht erfolgreich exportiert als',
155
+ 'specialized_in': '🎯 Spezialisiert auf:',
156
+ 'metrics_analyzed': '📊 Analysierte Metriken:',
157
+ 'what_analyzes': '🔍 Was spezifisch analysiert wird:',
158
+ 'tips': '💡 Tipps für bessere Ergebnisse:',
159
+ 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
160
+ 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
161
+ },
162
+ 'pt': {
163
+ 'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
164
+ 'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
165
+ 'upload_files': '📁 Carregar resultados (CSV/Excel)',
166
+ 'select_model': '🤖 Modelo Claude',
167
+ 'select_language': '🌐 Idioma',
168
+ 'select_theme': '🎨 Tema',
169
+ 'detail_level': '📋 Nível de detalhe',
170
+ 'detailed': 'Detalhado',
171
+ 'summarized': 'Resumido',
172
+ 'analyze_button': '🚀 Analisar e Comparar',
173
+ 'export_format': '📄 Formato de exportação',
174
+ 'export_button': '💾 Exportar Relatório',
175
+ 'comparative_analysis': '📊 Análise Comparativa',
176
+ 'implementation_code': '💻 Código de Implementação',
177
+ 'data_format': '📋 Formato de dados esperado',
178
+ 'examples': '📚 Exemplos de análise',
179
+ 'light': 'Claro',
180
+ 'dark': 'Escuro',
181
+ 'best_for': 'Melhor para',
182
+ 'loading': 'Carregando...',
183
+ 'error_no_api': 'Por favor configure ANTHROPIC_API_KEY',
184
+ 'error_no_files': 'Por favor carregue arquivos para analisar',
185
+ 'report_exported': 'Relatório exportado com sucesso como',
186
+ 'specialized_in': '🎯 Especializado em:',
187
+ 'metrics_analyzed': '📊 Métricas analisadas:',
188
+ 'what_analyzes': '🔍 O que analisa especificamente:',
189
+ 'tips': '💡 Dicas para melhores resultados:',
190
+ 'additional_specs': '📝 Especificações adicionais para a análise',
191
+ 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
192
  }
193
  }
194
 
 
292
  # Instancia global del registro
293
  model_registry = ModelRegistry()
294
 
295
+ # Modelos de Claude disponibles
296
+ CLAUDE_MODELS = {
297
+ "claude-opus-4-20250514": {
298
+ "name": "Claude Opus 4 (Latest)",
299
+ "description": "Modelo más potente para desafíos complejos",
300
+ "max_tokens": 4000,
301
+ "best_for": "Análisis muy detallados y complejos"
 
 
302
  },
303
+ "claude-sonnet-4-20250514": {
304
+ "name": "Claude Sonnet 4 (Latest)",
305
+ "description": "Modelo inteligente y eficiente para uso cotidiano",
306
+ "max_tokens": 4000,
307
+ "best_for": "Análisis general, recomendado para la mayoría de casos"
 
 
308
  },
309
+ "claude-3-5-haiku-20241022": {
310
+ "name": "Claude 3.5 Haiku (Latest)",
311
+ "description": "Modelo más rápido para tareas diarias",
312
+ "max_tokens": 4000,
313
+ "best_for": "Análisis rápidos y económicos"
314
+ },
315
+ "claude-3-7-sonnet-20250219": {
316
+ "name": "Claude 3.7 Sonnet",
317
+ "description": "Modelo avanzado de la serie 3.7",
318
+ "max_tokens": 4000,
319
+ "best_for": "Análisis equilibrados con alta calidad"
320
+ },
321
+ "claude-3-5-sonnet-20241022": {
322
+ "name": "Claude 3.5 Sonnet (Oct 2024)",
323
+ "description": "Excelente balance entre velocidad y capacidad",
324
+ "max_tokens": 4000,
325
+ "best_for": "Análisis rápidos y precisos"
326
  }
327
  }
328
 
 
392
  title_text = {
393
  'en': 'Comparative Analysis Report - Biotechnological Models',
394
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
395
+ 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
396
+ 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
397
+ 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
398
  }
399
 
400
  doc.add_heading(title_text.get(language, title_text['en']), 0)
 
403
  date_text = {
404
  'en': 'Generated on',
405
  'es': 'Generado el',
406
+ 'fr': 'Généré le',
407
+ 'de': 'Erstellt am',
408
+ 'pt': 'Gerado em'
409
  }
410
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
411
  doc.add_paragraph()
 
474
  title_text = {
475
  'en': 'Comparative Analysis Report - Biotechnological Models',
476
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
477
+ 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
478
+ 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
479
+ 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
480
  }
481
 
482
  story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
 
485
  date_text = {
486
  'en': 'Generated on',
487
  'es': 'Generado el',
488
+ 'fr': 'Généré le',
489
+ 'de': 'Erstellt am',
490
+ 'pt': 'Gerado em'
491
  }
492
  story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
493
  story.append(Spacer(1, 0.5*inch))
 
525
  return filename
526
 
527
  class AIAnalyzer:
528
+ """Clase para análisis con IA"""
529
 
530
  def __init__(self, client, model_registry):
531
  self.client = client
532
  self.model_registry = model_registry
 
 
 
 
 
 
533
 
534
+ def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
 
 
 
 
 
 
 
 
 
535
  """Detecta el tipo de análisis necesario"""
536
  if isinstance(content, pd.DataFrame):
537
  columns = [col.lower() for col in content.columns]
 
560
  """
561
 
562
  try:
563
+ response = self.client.messages.create(
564
+ model="claude-3-haiku-20240307",
565
+ max_tokens=10,
566
+ messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
 
567
  )
568
 
569
+ result = response.content[0].text.strip().upper()
 
 
 
 
 
 
570
  if "MODEL" in result:
571
  return AnalysisType.MATHEMATICAL_MODEL
572
  elif "RESULTS" in result:
 
576
  else:
577
  return AnalysisType.UNKNOWN
578
 
579
+ except:
 
580
  return AnalysisType.UNKNOWN
581
 
582
  def get_language_prompt_prefix(self, language: str) -> str:
 
584
  prefixes = {
585
  'en': "Please respond in English. ",
586
  'es': "Por favor responde en español. ",
587
+ 'fr': "Veuillez répondre en français. ",
588
+ 'de': "Bitte antworten Sie auf Deutsch. ",
589
+ 'pt': "Por favor responda em português. "
590
  }
591
  return prefixes.get(language, prefixes['en'])
592
 
593
+ def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
594
+ language: str = "en", additional_specs: str = "") -> Dict:
595
+ """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
 
596
 
597
  # Preparar resumen completo de los datos
598
  data_summary = f"""
 
602
  - Columns: {list(data.columns)}
603
  - Number of models evaluated: {len(data)}
604
 
605
+ Complete data:
606
+ {data.to_string()}
607
+
608
+ Descriptive statistics:
609
+ {data.describe().to_string()}
610
  """
611
 
612
+ # Extraer valores para usar en el código
613
+ data_dict = data.to_dict('records')
614
+
615
  # Obtener prefijo de idioma
616
  lang_prefix = self.get_language_prompt_prefix(language)
617
 
 
771
  """
772
 
773
  try:
774
+ response = self.client.messages.create(
775
+ model=claude_model,
776
+ max_tokens=4000,
 
 
777
  messages=[{
778
  "role": "user",
779
  "content": f"{prompt}\n\n{data_summary}"
780
  }]
781
  )
782
 
783
+ # Análisis adicional para generar código con valores numéricos reales
 
 
 
 
 
 
 
 
 
784
  code_prompt = f"""
785
  {lang_prefix}
786
 
787
  Based on the analysis and this actual data:
788
+ {data.to_string()}
789
 
790
  Generate Python code that:
791
 
 
812
  Format: Complete, executable Python code with actual data values embedded.
813
  """
814
 
815
+ code_response = self.client.messages.create(
816
+ model=claude_model,
817
+ max_tokens=3000,
 
818
  messages=[{
819
  "role": "user",
820
  "content": code_prompt
821
  }]
822
  )
823
 
 
 
 
 
 
 
 
 
 
824
  return {
825
  "tipo": "Comparative Analysis of Mathematical Models",
826
+ "analisis_completo": response.content[0].text,
827
+ "codigo_implementacion": code_response.content[0].text,
828
  "resumen_datos": {
829
  "n_modelos": len(data),
830
  "columnas": list(data.columns),
 
832
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
833
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
834
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
835
+ "datos_completos": data_dict # Incluir todos los datos para el código
836
  }
837
  }
838
 
839
  except Exception as e:
 
840
  return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
841
 
842
+ def process_files(files, claude_model: str, detail_level: str = "detailed",
843
+ language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
844
+ """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
 
845
  processor = FileProcessor()
846
  analyzer = AIAnalyzer(client, model_registry)
 
 
847
  results = []
848
  all_code = []
 
849
 
850
  for file in files:
851
  if file is None:
 
860
  if file_ext in ['.csv', '.xlsx', '.xls']:
861
  if language == 'es':
862
  results.append(f"## 📊 Análisis de Resultados: {file_name}")
 
863
  else:
864
  results.append(f"## 📊 Results Analysis: {file_name}")
 
865
 
866
  if file_ext == '.csv':
867
  df = processor.read_csv(file_content)
 
868
  else:
869
  df = processor.read_excel(file_content)
 
870
 
871
  if df is not None:
872
+ analysis_type = analyzer.detect_analysis_type(df)
 
873
 
874
  if analysis_type == AnalysisType.FITTING_RESULTS:
875
  result = analyzer.analyze_fitting_results(
876
+ df, claude_model, detail_level, language, additional_specs
 
877
  )
878
 
879
  if language == 'es':
 
886
  all_code.append(result["codigo_implementacion"])
887
 
888
  results.append("\n---\n")
 
889
 
890
  analysis_text = "\n".join(results)
891
  code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
 
892
 
893
+ return analysis_text, code_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
894
 
895
  def generate_implementation_code(analysis_results: str) -> str:
896
  """Genera código de implementación con análisis por experimento"""
897
+ code = """
898
+ import numpy as np
899
+ import pandas as pd
900
+ import matplotlib.pyplot as plt
901
+ from scipy.integrate import odeint
902
+ from scipy.optimize import curve_fit, differential_evolution
903
+ from sklearn.metrics import r2_score, mean_squared_error
904
+ import seaborn as sns
905
+ from typing import Dict, List, Tuple, Optional
906
+
907
+ # Visualization configuration
908
+ plt.style.use('seaborn-v0_8-darkgrid')
909
+ sns.set_palette("husl")
910
+
911
+ class ExperimentalModelAnalyzer:
912
+ \"\"\"
913
+ Class for comparative analysis of biotechnological models across multiple experiments.
914
+ Analyzes biomass, substrate and product models separately for each experimental condition.
915
+ \"\"\"
916
+
917
+ def __init__(self):
918
+ self.results_df = None
919
+ self.experiments = {}
920
+ self.best_models_by_experiment = {}
921
+ self.overall_best_models = {
922
+ 'biomass': None,
923
+ 'substrate': None,
924
+ 'product': None
925
+ }
926
+
927
+ def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
928
+ \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
929
+ if data_dict:
930
+ self.results_df = pd.DataFrame(data_dict)
931
+ elif file_path:
932
+ if file_path.endswith('.csv'):
933
+ self.results_df = pd.read_csv(file_path)
934
+ else:
935
+ self.results_df = pd.read_excel(file_path)
936
+
937
+ print(f"✅ Data loaded: {len(self.results_df)} models")
938
+ print(f"📊 Available columns: {list(self.results_df.columns)}")
939
+
940
+ # Identify experiments
941
+ if 'Experiment' in self.results_df.columns:
942
+ self.experiments = self.results_df.groupby('Experiment').groups
943
+ print(f"🧪 Experiments found: {list(self.experiments.keys())}")
944
+
945
+ return self.results_df
946
+
947
+ def analyze_by_experiment(self,
948
+ experiment_col: str = 'Experiment',
949
+ model_col: str = 'Model',
950
+ type_col: str = 'Type',
951
+ r2_col: str = 'R2',
952
+ rmse_col: str = 'RMSE') -> Dict:
953
+ \"\"\"
954
+ Analyze models by experiment and variable type.
955
+ Identifies best models for biomass, substrate, and product in each experiment.
956
+ \"\"\"
957
+ if self.results_df is None:
958
+ raise ValueError("First load data with load_results()")
959
+
960
+ results_by_exp = {}
961
+
962
+ # Get unique experiments
963
+ if experiment_col in self.results_df.columns:
964
+ experiments = self.results_df[experiment_col].unique()
965
+ else:
966
+ experiments = ['All_Data']
967
+ self.results_df[experiment_col] = 'All_Data'
968
+
969
+ print("\\n" + "="*80)
970
+ print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
971
+ print("="*80)
972
+
973
+ for exp in experiments:
974
+ print(f"\\n🧪 EXPERIMENT: {exp}")
975
+ print("-"*50)
976
+
977
+ exp_data = self.results_df[self.results_df[experiment_col] == exp]
978
+ results_by_exp[exp] = {}
979
+
980
+ # Analyze by variable type if available
981
+ if type_col in exp_data.columns:
982
+ var_types = exp_data[type_col].unique()
983
+
984
+ for var_type in var_types:
985
+ var_data = exp_data[exp_data[type_col] == var_type]
986
+
987
+ if not var_data.empty:
988
+ # Find best model for this variable type
989
+ best_idx = var_data[r2_col].idxmax()
990
+ best_model = var_data.loc[best_idx]
991
+
992
+ results_by_exp[exp][var_type] = {
993
+ 'best_model': best_model[model_col],
994
+ 'r2': best_model[r2_col],
995
+ 'rmse': best_model[rmse_col],
996
+ 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
997
+ }
998
+
999
+ print(f"\\n 📈 {var_type.upper()}:")
1000
+ print(f" Best Model: {best_model[model_col]}")
1001
+ print(f" R² = {best_model[r2_col]:.4f}")
1002
+ print(f" RMSE = {best_model[rmse_col]:.4f}")
1003
+
1004
+ # Show all models for this variable
1005
+ print(f"\\n All {var_type} models tested:")
1006
+ for _, row in var_data.iterrows():
1007
+ print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
1008
+ else:
1009
+ # If no type column, analyze all models together
1010
+ best_idx = exp_data[r2_col].idxmax()
1011
+ best_model = exp_data.loc[best_idx]
1012
+
1013
+ results_by_exp[exp]['all'] = {
1014
+ 'best_model': best_model[model_col],
1015
+ 'r2': best_model[r2_col],
1016
+ 'rmse': best_model[rmse_col],
1017
+ 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
1018
+ }
1019
+
1020
+ self.best_models_by_experiment = results_by_exp
1021
+
1022
+ # Determine overall best models
1023
+ self._determine_overall_best_models()
1024
+
1025
+ return results_by_exp
1026
+
1027
+ def _determine_overall_best_models(self):
1028
+ \"\"\"Determine the best models across all experiments\"\"\"
1029
+ print("\\n" + "="*80)
1030
+ print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1031
+ print("="*80)
1032
+
1033
+ # Aggregate performance by model and type
1034
+ model_performance = {}
1035
+
1036
+ for exp, exp_results in self.best_models_by_experiment.items():
1037
+ for var_type, var_results in exp_results.items():
1038
+ if var_type not in model_performance:
1039
+ model_performance[var_type] = {}
1040
+
1041
+ for model_data in var_results['all_models']:
1042
+ model_name = model_data['Model']
1043
+ if model_name not in model_performance[var_type]:
1044
+ model_performance[var_type][model_name] = {
1045
+ 'r2_values': [],
1046
+ 'rmse_values': [],
1047
+ 'experiments': []
1048
+ }
1049
+
1050
+ model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
1051
+ model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
1052
+ model_performance[var_type][model_name]['experiments'].append(exp)
1053
+
1054
+ # Calculate average performance and select best
1055
+ for var_type, models in model_performance.items():
1056
+ best_avg_r2 = -1
1057
+ best_model = None
1058
+
1059
+ print(f"\\n📊 {var_type.upper()} MODELS:")
1060
+ for model_name, perf_data in models.items():
1061
+ avg_r2 = np.mean(perf_data['r2_values'])
1062
+ avg_rmse = np.mean(perf_data['rmse_values'])
1063
+ n_exp = len(perf_data['experiments'])
1064
+
1065
+ print(f" {model_name}:")
1066
+ print(f" Average R² = {avg_r2:.4f}")
1067
+ print(f" Average RMSE = {avg_rmse:.4f}")
1068
+ print(f" Tested in {n_exp} experiments")
1069
+
1070
+ if avg_r2 > best_avg_r2:
1071
+ best_avg_r2 = avg_r2
1072
+ best_model = {
1073
+ 'name': model_name,
1074
+ 'avg_r2': avg_r2,
1075
+ 'avg_rmse': avg_rmse,
1076
+ 'n_experiments': n_exp
1077
+ }
1078
+
1079
+ if var_type.lower() in ['biomass', 'substrate', 'product']:
1080
+ self.overall_best_models[var_type.lower()] = best_model
1081
+ print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
1082
+
1083
+ def create_comparison_visualizations(self):
1084
+ \"\"\"Create visualizations comparing models across experiments\"\"\"
1085
+ if not self.best_models_by_experiment:
1086
+ raise ValueError("First run analyze_by_experiment()")
1087
+
1088
+ # Prepare data for visualization
1089
+ experiments = []
1090
+ biomass_r2 = []
1091
+ substrate_r2 = []
1092
+ product_r2 = []
1093
+
1094
+ for exp, results in self.best_models_by_experiment.items():
1095
+ experiments.append(exp)
1096
+ biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
1097
+ substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
1098
+ product_r2.append(results.get('Product', {}).get('r2', 0))
1099
+
1100
+ # Create figure with subplots
1101
+ fig, axes = plt.subplots(2, 2, figsize=(15, 12))
1102
+ fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
1103
+
1104
+ # 1. R² comparison by experiment and variable type
1105
+ ax1 = axes[0, 0]
1106
+ x = np.arange(len(experiments))
1107
+ width = 0.25
1108
+
1109
+ ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
1110
+ ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
1111
+ ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
1112
+
1113
+ ax1.set_xlabel('Experiment')
1114
+ ax1.set_ylabel('R²')
1115
+ ax1.set_title('Best Model R² by Experiment and Variable Type')
1116
+ ax1.set_xticks(x)
1117
+ ax1.set_xticklabels(experiments, rotation=45, ha='right')
1118
+ ax1.legend()
1119
+ ax1.grid(True, alpha=0.3)
1120
+
1121
+ # Add value labels
1122
+ for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
1123
+ if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
1124
+ if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
1125
+ if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
1126
+
1127
+ # 2. Model frequency heatmap
1128
+ ax2 = axes[0, 1]
1129
+ # This would show which models appear most frequently as best
1130
+ # Implementation depends on actual data structure
1131
+ ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
1132
+ ha='center', va='center', transform=ax2.transAxes)
1133
+ ax2.set_title('Most Frequently Selected Models')
1134
+
1135
+ # 3. Parameter evolution across experiments
1136
+ ax3 = axes[1, 0]
1137
+ ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
1138
+ ha='center', va='center', transform=ax3.transAxes)
1139
+ ax3.set_title('Parameter Trends')
1140
+
1141
+ # 4. Overall best models summary
1142
+ ax4 = axes[1, 1]
1143
+ ax4.axis('off')
1144
+
1145
+ summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
1146
+ for var_type, model_info in self.overall_best_models.items():
1147
+ if model_info:
1148
+ summary_text += f"{var_type.upper()}:\\n"
1149
+ summary_text += f" Model: {model_info['name']}\\n"
1150
+ summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
1151
+ summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
1152
+
1153
+ ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
1154
+ fontsize=12, verticalalignment='top', fontfamily='monospace')
1155
+ ax4.set_title('Overall Best Models Summary')
1156
+
1157
+ plt.tight_layout()
1158
+ plt.show()
1159
+
1160
+ def generate_summary_table(self) -> pd.DataFrame:
1161
+ \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1162
+ summary_data = []
1163
+
1164
+ for exp, results in self.best_models_by_experiment.items():
1165
+ for var_type, var_results in results.items():
1166
+ summary_data.append({
1167
+ 'Experiment': exp,
1168
+ 'Variable_Type': var_type,
1169
+ 'Best_Model': var_results['best_model'],
1170
+ 'R2': var_results['r2'],
1171
+ 'RMSE': var_results['rmse']
1172
+ })
1173
+
1174
+ summary_df = pd.DataFrame(summary_data)
1175
+
1176
+ print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1177
+ print("="*80)
1178
+ print(summary_df.to_string(index=False))
1179
+
1180
+ return summary_df
1181
+
1182
+ # Example usage
1183
+ if __name__ == "__main__":
1184
+ print("🧬 Experimental Model Comparison System")
1185
+ print("="*60)
1186
+
1187
+ # Example data structure with experiments
1188
+ example_data = {
1189
+ 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1190
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1191
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1192
+ 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1193
+ 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1194
+ 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1195
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1196
+ 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1197
+ 'Product', 'Product', 'Product', 'Product'],
1198
+ 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1199
+ 0.9723, 0.9856, 0.9698, 0.9812,
1200
+ 0.9634, 0.9512, 0.9687, 0.9423],
1201
+ 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1202
+ 0.0312, 0.0245, 0.0334, 0.0289,
1203
+ 0.0412, 0.0523, 0.0389, 0.0567],
1204
+ 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1205
+ None, None, None, None, None, None, None, None],
1206
+ 'Ks': [None, None, None, None, None, None,
1207
+ 2.1, 1.8, 2.3, 1.9, None, None, None, None]
1208
+ }
1209
+
1210
+ # Create analyzer
1211
+ analyzer = ExperimentalModelAnalyzer()
1212
+
1213
+ # Load data
1214
+ analyzer.load_results(data_dict=example_data)
1215
+
1216
+ # Analyze by experiment
1217
+ results = analyzer.analyze_by_experiment()
1218
+
1219
+ # Create visualizations
1220
+ analyzer.create_comparison_visualizations()
1221
+
1222
+ # Generate summary table
1223
+ summary = analyzer.generate_summary_table()
1224
+
1225
+ print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
1226
+ """
1227
+
1228
+ return code
1229
 
1230
  # Estado global para almacenar resultados
1231
  class AppState:
1232
  def __init__(self):
 
1233
  self.current_analysis = ""
1234
  self.current_code = ""
1235
  self.current_language = "en"
 
1236
 
1237
  app_state = AppState()
1238
 
1239
  def export_report(export_format: str, language: str) -> Tuple[str, str]:
1240
  """Exporta el reporte al formato seleccionado"""
1241
  if not app_state.current_analysis:
1242
+ error_msg = {
1243
+ 'en': "No analysis available to export",
1244
+ 'es': "No hay análisis disponible para exportar",
1245
+ 'fr': "Aucune analyse disponible pour exporter",
1246
+ 'de': "Keine Analyse zum Exportieren verfügbar",
1247
+ 'pt': "Nenhuma análise disponível para exportar"
1248
+ }
1249
+ return error_msg.get(language, error_msg['en']), ""
1250
 
1251
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1252
 
 
1283
  gr.update(label=t['select_theme']), # theme_selector
1284
  gr.update(label=t['detail_level']), # detail_level
1285
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
 
 
1286
  gr.update(value=t['analyze_button']), # analyze_btn
1287
  gr.update(label=t['export_format']), # export_format
1288
  gr.update(value=t['export_button']), # export_btn
1289
+ gr.update(label=t['comparative_analysis']), # analysis_output
1290
+ gr.update(label=t['implementation_code']), # code_output
 
 
1291
  gr.update(label=t['data_format']) # data_format_accordion
1292
  ]
1293
 
1294
+ def process_and_store(files, model, detail, language, additional_specs):
1295
  """Procesa archivos y almacena resultados"""
1296
  if not files:
1297
  error_msg = TRANSLATIONS[language]['error_no_files']
1298
+ return error_msg, ""
 
 
 
 
 
1299
 
1300
+ analysis, code = process_files(files, model, detail, language, additional_specs)
1301
  app_state.current_analysis = analysis
1302
  app_state.current_code = code
1303
+ return analysis, code
 
 
 
 
 
 
 
 
 
 
 
 
1304
 
1305
+ with gr.Blocks(theme=THEMES[current_theme]) as demo:
1306
  # Componentes de UI
1307
  with gr.Row():
1308
  with gr.Column(scale=3):
 
1311
  with gr.Column(scale=1):
1312
  with gr.Row():
1313
  language_selector = gr.Dropdown(
1314
+ choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1315
+ ("Deutsch", "de"), ("Português", "pt")],
1316
  value="en",
1317
  label=TRANSLATIONS[current_language]['select_language'],
1318
  interactive=True
 
1334
  )
1335
 
1336
  model_selector = gr.Dropdown(
1337
+ choices=list(CLAUDE_MODELS.keys()),
1338
+ value="claude-3-5-sonnet-20241022",
1339
  label=TRANSLATIONS[current_language]['select_model'],
1340
+ info=f"{TRANSLATIONS[current_language]['best_for']}: {CLAUDE_MODELS['claude-3-5-sonnet-20241022']['best_for']}"
1341
  )
1342
 
1343
  detail_level = gr.Radio(
 
1349
  label=TRANSLATIONS[current_language]['detail_level']
1350
  )
1351
 
1352
+ # Nueva entrada para especificaciones adicionales
1353
  additional_specs = gr.Textbox(
1354
  label=TRANSLATIONS[current_language]['additional_specs'],
1355
  placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
 
1358
  interactive=True
1359
  )
1360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1361
  analyze_btn = gr.Button(
1362
  TRANSLATIONS[current_language]['analyze_button'],
1363
  variant="primary",
 
1389
  )
1390
 
1391
  with gr.Column(scale=2):
 
 
 
 
 
1392
  analysis_output = gr.Markdown(
1393
+ label=TRANSLATIONS[current_language]['comparative_analysis']
1394
  )
1395
 
1396
  code_output = gr.Code(
1397
+ label=TRANSLATIONS[current_language]['implementation_code'],
1398
  language="python",
1399
  interactive=True,
1400
+ lines=20
 
 
 
 
1401
  )
1402
 
1403
  data_format_accordion = gr.Accordion(
 
1424
  - **Parameters**: Model-specific parameters
1425
  """)
1426
 
1427
+ # Definir ejemplos
1428
+ examples = gr.Examples(
1429
+ examples=[
1430
+ [["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed", ""],
1431
+ [["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized", "Focus on temperature effects"]
1432
+ ],
1433
+ inputs=[files_input, model_selector, detail_level, additional_specs],
1434
+ label=TRANSLATIONS[current_language]['examples']
1435
+ )
1436
+
1437
+ # Eventos - Actualizado para incluir additional_specs
1438
  language_selector.change(
1439
  update_interface_language,
1440
  inputs=[language_selector],
1441
  outputs=[
1442
  title_text, subtitle_text, files_input, model_selector,
1443
  language_selector, theme_selector, detail_level, additional_specs,
1444
+ analyze_btn, export_format, export_btn, analysis_output,
1445
+ code_output, data_format_accordion
 
1446
  ]
1447
  )
1448
 
1449
  def change_theme(theme_name):
1450
  """Cambia el tema de la interfaz"""
1451
+ # Nota: En Gradio actual, cambiar el tema dinámicamente requiere recargar
1452
+ # Esta es una limitación conocida
1453
  return gr.Info("Theme will be applied on next page load")
1454
 
1455
  theme_selector.change(
 
1460
 
1461
  analyze_btn.click(
1462
  fn=process_and_store,
1463
+ inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1464
+ outputs=[analysis_output, code_output]
 
1465
  )
1466
 
1467
  def handle_export(format, language):
 
1481
 
1482
  # Función principal
1483
  def main():
1484
+ if not os.getenv("ANTHROPIC_API_KEY"):
1485
+ print("⚠️ Configure ANTHROPIC_API_KEY in HuggingFace Space secrets")
1486
  return gr.Interface(
1487
  fn=lambda x: TRANSLATIONS['en']['error_no_api'],
1488
  inputs=gr.Textbox(),