C2MV commited on
Commit
f5ee395
·
verified ·
1 Parent(s): e58e835

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -508
app.py CHANGED
@@ -25,7 +25,7 @@ from reportlab.pdfbase import pdfmetrics
25
  from reportlab.pdfbase.ttfonts import TTFont
26
  import matplotlib.pyplot as plt
27
  from datetime import datetime
28
- from openai import OpenAI # Replaced Anthropic with OpenAI for Qwen
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
@@ -36,7 +36,7 @@ client = OpenAI(
36
  api_key=os.environ.get("NEBIUS_API_KEY")
37
  )
38
 
39
- # Sistema de traducción - Actualizado con nuevas entradas
40
  TRANSLATIONS = {
41
  'en': {
42
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
@@ -67,7 +67,18 @@ TRANSLATIONS = {
67
  'what_analyzes': '🔍 What it specifically analyzes:',
68
  'tips': '💡 Tips for better results:',
69
  'additional_specs': '📝 Additional specifications for analysis',
70
- 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  'es': {
73
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
@@ -98,100 +109,18 @@ TRANSLATIONS = {
98
  'what_analyzes': '🔍 Qué analiza específicamente:',
99
  'tips': '💡 Tips para mejores resultados:',
100
  'additional_specs': '📝 Especificaciones adicionales para el análisis',
101
- 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
102
- },
103
- 'fr': {
104
- 'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
105
- 'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
106
- 'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
107
- 'select_model': '🤖 Modèle Qwen',
108
- 'select_language': '🌐 Langue',
109
- 'select_theme': '🎨 Thème',
110
- 'detail_level': '📋 Niveau de détail',
111
- 'detailed': 'Détaillé',
112
- 'summarized': 'Résumé',
113
- 'analyze_button': '🚀 Analyser et Comparer',
114
- 'export_format': '📄 Format d\'export',
115
- 'export_button': '💾 Exporter le Rapport',
116
- 'comparative_analysis': '📊 Analyse Comparative',
117
- 'implementation_code': '💻 Code d\'Implémentation',
118
- 'data_format': '📋 Format de données attendu',
119
- 'examples': '📚 Exemples d\'analyse',
120
- 'light': 'Clair',
121
- 'dark': 'Sombre',
122
- 'best_for': 'Meilleur pour',
123
- 'loading': 'Chargement...',
124
- 'error_no_api': 'Veuillez configurer NEBIUS_API_KEY',
125
- 'error_no_files': 'Veuillez télécharger des fichiers à analyser',
126
- 'report_exported': 'Rapport exporté avec succès comme',
127
- 'specialized_in': '🎯 Spécialisé dans:',
128
- 'metrics_analyzed': '📊 Métriques analysées:',
129
- 'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
130
- 'tips': '💡 Conseils pour de meilleurs résultats:',
131
- 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
132
- 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
133
- },
134
- 'de': {
135
- 'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
136
- 'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
137
- 'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
138
- 'select_model': '🤖 Qwen Modell',
139
- 'select_language': '🌐 Sprache',
140
- 'select_theme': '🎨 Thema',
141
- 'detail_level': '📋 Detailgrad der Analyse',
142
- 'detailed': 'Detailliert',
143
- 'summarized': 'Zusammengefasst',
144
- 'analyze_button': '🚀 Analysieren und Vergleichen',
145
- 'export_format': '📄 Exportformat',
146
- 'export_button': '💾 Bericht Exportieren',
147
- 'comparative_analysis': '📊 Vergleichende Analyse',
148
- 'implementation_code': '💻 Implementierungscode',
149
- 'data_format': '📋 Erwartetes Datenformat',
150
- 'examples': '📚 Analysebeispiele',
151
- 'light': 'Hell',
152
- 'dark': 'Dunkel',
153
- 'best_for': 'Am besten für',
154
- 'loading': 'Laden...',
155
- 'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY',
156
- 'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
157
- 'report_exported': 'Bericht erfolgreich exportiert als',
158
- 'specialized_in': '🎯 Spezialisiert auf:',
159
- 'metrics_analyzed': '📊 Analysierte Metriken:',
160
- 'what_analyzes': '🔍 Was spezifisch analysiert wird:',
161
- 'tips': '💡 Tipps für bessere Ergebnisse:',
162
- 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
163
- 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
164
- },
165
- 'pt': {
166
- 'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
167
- 'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
168
- 'upload_files': '📁 Carregar resultados (CSV/Excel)',
169
- 'select_model': '🤖 Modelo Qwen',
170
- 'select_language': '🌐 Idioma',
171
- 'select_theme': '🎨 Tema',
172
- 'detail_level': '📋 Nível de detalhe',
173
- 'detailed': 'Detalhado',
174
- 'summarized': 'Resumido',
175
- 'analyze_button': '🚀 Analisar e Comparar',
176
- 'export_format': '📄 Formato de exportação',
177
- 'export_button': '💾 Exportar Relatório',
178
- 'comparative_analysis': '📊 Análise Comparativa',
179
- 'implementation_code': '💻 Código de Implementação',
180
- 'data_format': '📋 Formato de dados esperado',
181
- 'examples': '📚 Exemplos de análise',
182
- 'light': 'Claro',
183
- 'dark': 'Escuro',
184
- 'best_for': 'Melhor para',
185
- 'loading': 'Carregando...',
186
- 'error_no_api': 'Por favor configure NEBIUS_API_KEY',
187
- 'error_no_files': 'Por favor carregue arquivos para analisar',
188
- 'report_exported': 'Relatório exportado com sucesso como',
189
- 'specialized_in': '🎯 Especializado em:',
190
- 'metrics_analyzed': '📊 Métricas analisadas:',
191
- 'what_analyzes': '🔍 O que analisa especificamente:',
192
- 'tips': '💡 Dicas para melhores resultados:',
193
- 'additional_specs': '📝 Especificações adicionais para a análise',
194
- 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
195
  }
196
  }
197
 
@@ -300,20 +229,26 @@ QWEN_MODELS = {
300
  "Qwen/Qwen3-14B": {
301
  "name": "Qwen 3 14B",
302
  "description": "Modelo potente multilingüe de Alibaba",
303
- "max_tokens": 10000,
304
- "best_for": "Análisis complejos y detallados"
 
 
305
  },
306
  "Qwen/Qwen3-7B": {
307
  "name": "Qwen 3 7B",
308
  "description": "Modelo equilibrado para uso general",
309
- "max_tokens": 10000,
310
- "best_for": "Análisis rápidos y precisos"
 
 
311
  },
312
  "Qwen/Qwen1.5-14B": {
313
  "name": "Qwen 1.5 14B",
314
  "description": "Modelo avanzado para tareas complejas",
315
- "max_tokens": 10000,
316
- "best_for": "Análisis técnicos detallados"
 
 
317
  }
318
  }
319
 
@@ -383,9 +318,6 @@ class ReportExporter:
383
  title_text = {
384
  'en': 'Comparative Analysis Report - Biotechnological Models',
385
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
386
- 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
387
- 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
388
- 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
389
  }
390
 
391
  doc.add_heading(title_text.get(language, title_text['en']), 0)
@@ -394,9 +326,6 @@ class ReportExporter:
394
  date_text = {
395
  'en': 'Generated on',
396
  'es': 'Generado el',
397
- 'fr': 'Généré le',
398
- 'de': 'Erstellt am',
399
- 'pt': 'Gerado em'
400
  }
401
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
402
  doc.add_paragraph()
@@ -465,9 +394,6 @@ class ReportExporter:
465
  title_text = {
466
  'en': 'Comparative Analysis Report - Biotechnological Models',
467
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
468
- 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
469
- 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
470
- 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
471
  }
472
 
473
  story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
@@ -476,9 +402,6 @@ class ReportExporter:
476
  date_text = {
477
  'en': 'Generated on',
478
  'es': 'Generado el',
479
- 'fr': 'Généré le',
480
- 'de': 'Erstellt am',
481
- 'pt': 'Gerado em'
482
  }
483
  story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
484
  story.append(Spacer(1, 0.5*inch))
@@ -521,8 +444,23 @@ class AIAnalyzer:
521
  def __init__(self, client, model_registry):
522
  self.client = client
523
  self.model_registry = model_registry
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
- def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
526
  """Detecta el tipo de análisis necesario"""
527
  if isinstance(content, pd.DataFrame):
528
  columns = [col.lower() for col in content.columns]
@@ -553,11 +491,17 @@ class AIAnalyzer:
553
  try:
554
  response = self.client.chat.completions.create(
555
  model="Qwen/Qwen3-14B",
556
- max_tokens=10000,
557
  temperature=0.0,
558
- messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
559
  )
560
 
 
 
 
 
 
 
561
  result = response.choices[0].message.content.strip().upper()
562
  if "MODEL" in result:
563
  return AnalysisType.MATHEMATICAL_MODEL
@@ -577,14 +521,12 @@ class AIAnalyzer:
577
  prefixes = {
578
  'en': "Please respond in English. ",
579
  'es': "Por favor responde en español. ",
580
- 'fr': "Veuillez répondre en français. ",
581
- 'de': "Bitte antworten Sie auf Deutsch. ",
582
- 'pt': "Por favor responda em português. "
583
  }
584
  return prefixes.get(language, prefixes['en'])
585
 
586
  def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str = "detailed",
587
- language: str = "en", additional_specs: str = "") -> Dict:
 
588
  """Analiza resultados de ajuste de modelos usando Qwen"""
589
 
590
  # Preparar resumen completo de los datos
@@ -595,16 +537,10 @@ class AIAnalyzer:
595
  - Columns: {list(data.columns)}
596
  - Number of models evaluated: {len(data)}
597
 
598
- Complete data:
599
- {data.to_string()}
600
-
601
- Descriptive statistics:
602
- {data.describe().to_string()}
603
  """
604
 
605
- # Extraer valores para usar en el código
606
- data_dict = data.to_dict('records')
607
-
608
  # Obtener prefijo de idioma
609
  lang_prefix = self.get_language_prompt_prefix(language)
610
 
@@ -767,7 +703,7 @@ class AIAnalyzer:
767
  # Análisis principal
768
  response = self.client.chat.completions.create(
769
  model=qwen_model,
770
- max_tokens=10000,
771
  temperature=0.3,
772
  messages=[{
773
  "role": "user",
@@ -775,6 +711,13 @@ class AIAnalyzer:
775
  }]
776
  )
777
 
 
 
 
 
 
 
 
778
  analysis_result = response.choices[0].message.content
779
 
780
  # Generación de código
@@ -782,7 +725,7 @@ class AIAnalyzer:
782
  {lang_prefix}
783
 
784
  Based on the analysis and this actual data:
785
- {data.to_string()}
786
 
787
  Generate Python code that:
788
 
@@ -811,7 +754,7 @@ class AIAnalyzer:
811
 
812
  code_response = self.client.chat.completions.create(
813
  model=qwen_model,
814
- max_tokens=10000,
815
  temperature=0.1,
816
  messages=[{
817
  "role": "user",
@@ -819,6 +762,13 @@ class AIAnalyzer:
819
  }]
820
  )
821
 
 
 
 
 
 
 
 
822
  code_result = code_response.choices[0].message.content
823
 
824
  return {
@@ -832,21 +782,35 @@ class AIAnalyzer:
832
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
833
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
834
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
835
- "datos_completos": data_dict # Incluir todos los datos para el código
836
  }
837
  }
838
 
839
  except Exception as e:
840
  print(f"Error en análisis: {str(e)}")
841
  return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
842
 
843
  def process_files(files, qwen_model: str, detail_level: str = "detailed",
844
- language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
 
845
  """Procesa múltiples archivos usando Qwen"""
846
  processor = FileProcessor()
847
  analyzer = AIAnalyzer(client, model_registry)
 
 
848
  results = []
849
  all_code = []
 
850
 
851
  for file in files:
852
  if file is None:
@@ -861,20 +825,26 @@ def process_files(files, qwen_model: str, detail_level: str = "detailed",
861
  if file_ext in ['.csv', '.xlsx', '.xls']:
862
  if language == 'es':
863
  results.append(f"## 📊 Análisis de Resultados: {file_name}")
 
864
  else:
865
  results.append(f"## 📊 Results Analysis: {file_name}")
 
866
 
867
  if file_ext == '.csv':
868
  df = processor.read_csv(file_content)
 
869
  else:
870
  df = processor.read_excel(file_content)
 
871
 
872
  if df is not None:
873
- analysis_type = analyzer.detect_analysis_type(df)
 
874
 
875
  if analysis_type == AnalysisType.FITTING_RESULTS:
876
  result = analyzer.analyze_fitting_results(
877
- df, qwen_model, detail_level, language, additional_specs
 
878
  )
879
 
880
  if language == 'es':
@@ -887,367 +857,56 @@ def process_files(files, qwen_model: str, detail_level: str = "detailed",
887
  all_code.append(result["codigo_implementacion"])
888
 
889
  results.append("\n---\n")
 
890
 
891
  analysis_text = "\n".join(results)
892
  code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
 
893
 
894
- return analysis_text, code_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
 
896
  def generate_implementation_code(analysis_results: str) -> str:
897
  """Genera código de implementación con análisis por experimento"""
898
- code = """
899
- import numpy as np
900
- import pandas as pd
901
- import matplotlib.pyplot as plt
902
- from scipy.integrate import odeint
903
- from scipy.optimize import curve_fit, differential_evolution
904
- from sklearn.metrics import r2_score, mean_squared_error
905
- import seaborn as sns
906
- from typing import Dict, List, Tuple, Optional
907
-
908
- # Visualization configuration
909
- plt.style.use('seaborn-v0_8-darkgrid')
910
- sns.set_palette("husl")
911
-
912
- class ExperimentalModelAnalyzer:
913
- \"\"\"
914
- Class for comparative analysis of biotechnological models across multiple experiments.
915
- Analyzes biomass, substrate and product models separately for each experimental condition.
916
- \"\"\"
917
-
918
- def __init__(self):
919
- self.results_df = None
920
- self.experiments = {}
921
- self.best_models_by_experiment = {}
922
- self.overall_best_models = {
923
- 'biomass': None,
924
- 'substrate': None,
925
- 'product': None
926
- }
927
-
928
- def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
929
- \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
930
- if data_dict:
931
- self.results_df = pd.DataFrame(data_dict)
932
- elif file_path:
933
- if file_path.endswith('.csv'):
934
- self.results_df = pd.read_csv(file_path)
935
- else:
936
- self.results_df = pd.read_excel(file_path)
937
-
938
- print(f"✅ Data loaded: {len(self.results_df)} models")
939
- print(f"📊 Available columns: {list(self.results_df.columns)}")
940
-
941
- # Identify experiments
942
- if 'Experiment' in self.results_df.columns:
943
- self.experiments = self.results_df.groupby('Experiment').groups
944
- print(f"🧪 Experiments found: {list(self.experiments.keys())}")
945
-
946
- return self.results_df
947
-
948
- def analyze_by_experiment(self,
949
- experiment_col: str = 'Experiment',
950
- model_col: str = 'Model',
951
- type_col: str = 'Type',
952
- r2_col: str = 'R2',
953
- rmse_col: str = 'RMSE') -> Dict:
954
- \"\"\"
955
- Analyze models by experiment and variable type.
956
- Identifies best models for biomass, substrate, and product in each experiment.
957
- \"\"\"
958
- if self.results_df is None:
959
- raise ValueError("First load data with load_results()")
960
-
961
- results_by_exp = {}
962
-
963
- # Get unique experiments
964
- if experiment_col in self.results_df.columns:
965
- experiments = self.results_df[experiment_col].unique()
966
- else:
967
- experiments = ['All_Data']
968
- self.results_df[experiment_col] = 'All_Data'
969
-
970
- print("\\n" + "="*80)
971
- print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
972
- print("="*80)
973
-
974
- for exp in experiments:
975
- print(f"\\n🧪 EXPERIMENT: {exp}")
976
- print("-"*50)
977
-
978
- exp_data = self.results_df[self.results_df[experiment_col] == exp]
979
- results_by_exp[exp] = {}
980
-
981
- # Analyze by variable type if available
982
- if type_col in exp_data.columns:
983
- var_types = exp_data[type_col].unique()
984
-
985
- for var_type in var_types:
986
- var_data = exp_data[exp_data[type_col] == var_type]
987
-
988
- if not var_data.empty:
989
- # Find best model for this variable type
990
- best_idx = var_data[r2_col].idxmax()
991
- best_model = var_data.loc[best_idx]
992
-
993
- results_by_exp[exp][var_type] = {
994
- 'best_model': best_model[model_col],
995
- 'r2': best_model[r2_col],
996
- 'rmse': best_model[rmse_col],
997
- 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
998
- }
999
-
1000
- print(f"\\n 📈 {var_type.upper()}:")
1001
- print(f" Best Model: {best_model[model_col]}")
1002
- print(f" R² = {best_model[r2_col]:.4f}")
1003
- print(f" RMSE = {best_model[rmse_col]:.4f}")
1004
-
1005
- # Show all models for this variable
1006
- print(f"\\n All {var_type} models tested:")
1007
- for _, row in var_data.iterrows():
1008
- print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
1009
- else:
1010
- # If no type column, analyze all models together
1011
- best_idx = exp_data[r2_col].idxmax()
1012
- best_model = exp_data.loc[best_idx]
1013
-
1014
- results_by_exp[exp]['all'] = {
1015
- 'best_model': best_model[model_col],
1016
- 'r2': best_model[r2_col],
1017
- 'rmse': best_model[rmse_col],
1018
- 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
1019
- }
1020
-
1021
- self.best_models_by_experiment = results_by_exp
1022
-
1023
- # Determine overall best models
1024
- self._determine_overall_best_models()
1025
-
1026
- return results_by_exp
1027
-
1028
- def _determine_overall_best_models(self):
1029
- \"\"\"Determine the best models across all experiments\"\"\"
1030
- print("\\n" + "="*80)
1031
- print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1032
- print("="*80)
1033
-
1034
- # Aggregate performance by model and type
1035
- model_performance = {}
1036
-
1037
- for exp, exp_results in self.best_models_by_experiment.items():
1038
- for var_type, var_results in exp_results.items():
1039
- if var_type not in model_performance:
1040
- model_performance[var_type] = {}
1041
-
1042
- for model_data in var_results['all_models']:
1043
- model_name = model_data['Model']
1044
- if model_name not in model_performance[var_type]:
1045
- model_performance[var_type][model_name] = {
1046
- 'r2_values': [],
1047
- 'rmse_values': [],
1048
- 'experiments': []
1049
- }
1050
-
1051
- model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
1052
- model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
1053
- model_performance[var_type][model_name]['experiments'].append(exp)
1054
-
1055
- # Calculate average performance and select best
1056
- for var_type, models in model_performance.items():
1057
- best_avg_r2 = -1
1058
- best_model = None
1059
-
1060
- print(f"\\n📊 {var_type.upper()} MODELS:")
1061
- for model_name, perf_data in models.items():
1062
- avg_r2 = np.mean(perf_data['r2_values'])
1063
- avg_rmse = np.mean(perf_data['rmse_values'])
1064
- n_exp = len(perf_data['experiments'])
1065
-
1066
- print(f" {model_name}:")
1067
- print(f" Average R² = {avg_r2:.4f}")
1068
- print(f" Average RMSE = {avg_rmse:.4f}")
1069
- print(f" Tested in {n_exp} experiments")
1070
-
1071
- if avg_r2 > best_avg_r2:
1072
- best_avg_r2 = avg_r2
1073
- best_model = {
1074
- 'name': model_name,
1075
- 'avg_r2': avg_r2,
1076
- 'avg_rmse': avg_rmse,
1077
- 'n_experiments': n_exp
1078
- }
1079
-
1080
- if var_type.lower() in ['biomass', 'substrate', 'product']:
1081
- self.overall_best_models[var_type.lower()] = best_model
1082
- print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
1083
-
1084
- def create_comparison_visualizations(self):
1085
- \"\"\"Create visualizations comparing models across experiments\"\"\"
1086
- if not self.best_models_by_experiment:
1087
- raise ValueError("First run analyze_by_experiment()")
1088
-
1089
- # Prepare data for visualization
1090
- experiments = []
1091
- biomass_r2 = []
1092
- substrate_r2 = []
1093
- product_r2 = []
1094
-
1095
- for exp, results in self.best_models_by_experiment.items():
1096
- experiments.append(exp)
1097
- biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
1098
- substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
1099
- product_r2.append(results.get('Product', {}).get('r2', 0))
1100
-
1101
- # Create figure with subplots
1102
- fig, axes = plt.subplots(2, 2, figsize=(15, 12))
1103
- fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
1104
-
1105
- # 1. R² comparison by experiment and variable type
1106
- ax1 = axes[0, 0]
1107
- x = np.arange(len(experiments))
1108
- width = 0.25
1109
-
1110
- ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
1111
- ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
1112
- ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
1113
-
1114
- ax1.set_xlabel('Experiment')
1115
- ax1.set_ylabel('R²')
1116
- ax1.set_title('Best Model R² by Experiment and Variable Type')
1117
- ax1.set_xticks(x)
1118
- ax1.set_xticklabels(experiments, rotation=45, ha='right')
1119
- ax1.legend()
1120
- ax1.grid(True, alpha=0.3)
1121
-
1122
- # Add value labels
1123
- for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
1124
- if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
1125
- if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
1126
- if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
1127
-
1128
- # 2. Model frequency heatmap
1129
- ax2 = axes[0, 1]
1130
- # This would show which models appear most frequently as best
1131
- # Implementation depends on actual data structure
1132
- ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
1133
- ha='center', va='center', transform=ax2.transAxes)
1134
- ax2.set_title('Most Frequently Selected Models')
1135
-
1136
- # 3. Parameter evolution across experiments
1137
- ax3 = axes[1, 0]
1138
- ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
1139
- ha='center', va='center', transform=ax3.transAxes)
1140
- ax3.set_title('Parameter Trends')
1141
-
1142
- # 4. Overall best models summary
1143
- ax4 = axes[1, 1]
1144
- ax4.axis('off')
1145
-
1146
- summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
1147
- for var_type, model_info in self.overall_best_models.items():
1148
- if model_info:
1149
- summary_text += f"{var_type.upper()}:\\n"
1150
- summary_text += f" Model: {model_info['name']}\\n"
1151
- summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
1152
- summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
1153
-
1154
- ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
1155
- fontsize=12, verticalalignment='top', fontfamily='monospace')
1156
- ax4.set_title('Overall Best Models Summary')
1157
-
1158
- plt.tight_layout()
1159
- plt.show()
1160
-
1161
- def generate_summary_table(self) -> pd.DataFrame:
1162
- \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1163
- summary_data = []
1164
-
1165
- for exp, results in self.best_models_by_experiment.items():
1166
- for var_type, var_results in results.items():
1167
- summary_data.append({
1168
- 'Experiment': exp,
1169
- 'Variable_Type': var_type,
1170
- 'Best_Model': var_results['best_model'],
1171
- 'R2': var_results['r2'],
1172
- 'RMSE': var_results['rmse']
1173
- })
1174
-
1175
- summary_df = pd.DataFrame(summary_data)
1176
-
1177
- print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1178
- print("="*80)
1179
- print(summary_df.to_string(index=False))
1180
-
1181
- return summary_df
1182
-
1183
- # Example usage
1184
- if __name__ == "__main__":
1185
- print("🧬 Experimental Model Comparison System")
1186
- print("="*60)
1187
-
1188
- # Example data structure with experiments
1189
- example_data = {
1190
- 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1191
- 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1192
- 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1193
- 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1194
- 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1195
- 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1196
- 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1197
- 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1198
- 'Product', 'Product', 'Product', 'Product'],
1199
- 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1200
- 0.9723, 0.9856, 0.9698, 0.9812,
1201
- 0.9634, 0.9512, 0.9687, 0.9423],
1202
- 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1203
- 0.0312, 0.0245, 0.0334, 0.0289,
1204
- 0.0412, 0.0523, 0.0389, 0.0567],
1205
- 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1206
- None, None, None, None, None, None, None, None],
1207
- 'Ks': [None, None, None, None, None, None,
1208
- 2.1, 1.8, 2.3, 1.9, None, None, None, None]
1209
- }
1210
-
1211
- # Create analyzer
1212
- analyzer = ExperimentalModelAnalyzer()
1213
-
1214
- # Load data
1215
- analyzer.load_results(data_dict=example_data)
1216
-
1217
- # Analyze by experiment
1218
- results = analyzer.analyze_by_experiment()
1219
-
1220
- # Create visualizations
1221
- analyzer.create_comparison_visualizations()
1222
-
1223
- # Generate summary table
1224
- summary = analyzer.generate_summary_table()
1225
-
1226
- print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
1227
- """
1228
-
1229
- return code
1230
 
1231
  # Estado global para almacenar resultados
1232
  class AppState:
1233
  def __init__(self):
 
1234
  self.current_analysis = ""
1235
  self.current_code = ""
1236
  self.current_language = "en"
 
1237
 
1238
  app_state = AppState()
1239
 
1240
  def export_report(export_format: str, language: str) -> Tuple[str, str]:
1241
  """Exporta el reporte al formato seleccionado"""
1242
  if not app_state.current_analysis:
1243
- error_msg = {
1244
- 'en': "No analysis available to export",
1245
- 'es': "No hay análisis disponible para exportar",
1246
- 'fr': "Aucune analyse disponible pour exporter",
1247
- 'de': "Keine Analyse zum Exportieren verfügbar",
1248
- 'pt': "Nenhuma análise disponível para exportar"
1249
- }
1250
- return error_msg.get(language, error_msg['en']), ""
1251
 
1252
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1253
 
@@ -1284,26 +943,47 @@ def create_interface():
1284
  gr.update(label=t['select_theme']), # theme_selector
1285
  gr.update(label=t['detail_level']), # detail_level
1286
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
 
 
1287
  gr.update(value=t['analyze_button']), # analyze_btn
1288
  gr.update(label=t['export_format']), # export_format
1289
  gr.update(value=t['export_button']), # export_btn
1290
- gr.update(label=t['comparative_analysis']), # analysis_output
1291
- gr.update(label=t['implementation_code']), # code_output
 
 
1292
  gr.update(label=t['data_format']) # data_format_accordion
1293
  ]
1294
 
1295
- def process_and_store(files, model, detail, language, additional_specs):
1296
  """Procesa archivos y almacena resultados"""
1297
  if not files:
1298
  error_msg = TRANSLATIONS[language]['error_no_files']
1299
- return error_msg, ""
1300
 
1301
- analysis, code = process_files(files, model, detail, language, additional_specs)
 
 
 
 
 
1302
  app_state.current_analysis = analysis
1303
  app_state.current_code = code
1304
- return analysis, code
 
 
 
 
 
 
 
 
 
 
 
 
1305
 
1306
- with gr.Blocks(theme=THEMES[current_theme]) as demo:
1307
  # Componentes de UI
1308
  with gr.Row():
1309
  with gr.Column(scale=3):
@@ -1312,8 +992,7 @@ def create_interface():
1312
  with gr.Column(scale=1):
1313
  with gr.Row():
1314
  language_selector = gr.Dropdown(
1315
- choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1316
- ("Deutsch", "de"), ("Português", "pt")],
1317
  value="en",
1318
  label=TRANSLATIONS[current_language]['select_language'],
1319
  interactive=True
@@ -1350,7 +1029,6 @@ def create_interface():
1350
  label=TRANSLATIONS[current_language]['detail_level']
1351
  )
1352
 
1353
- # Nueva entrada para especificaciones adicionales
1354
  additional_specs = gr.Textbox(
1355
  label=TRANSLATIONS[current_language]['additional_specs'],
1356
  placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
@@ -1359,6 +1037,25 @@ def create_interface():
1359
  interactive=True
1360
  )
1361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1362
  analyze_btn = gr.Button(
1363
  TRANSLATIONS[current_language]['analyze_button'],
1364
  variant="primary",
@@ -1390,15 +1087,24 @@ def create_interface():
1390
  )
1391
 
1392
  with gr.Column(scale=2):
 
 
 
 
 
1393
  analysis_output = gr.Markdown(
1394
- label=TRANSLATIONS[current_language]['comparative_analysis']
1395
  )
1396
 
1397
  code_output = gr.Code(
1398
- label=TRANSLATIONS[current_language]['implementation_code'],
1399
  language="python",
1400
  interactive=True,
1401
- lines=20
 
 
 
 
1402
  )
1403
 
1404
  data_format_accordion = gr.Accordion(
@@ -1425,32 +1131,21 @@ def create_interface():
1425
  - **Parameters**: Model-specific parameters
1426
  """)
1427
 
1428
- # Definir ejemplos
1429
- examples = gr.Examples(
1430
- examples=[
1431
- [["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""],
1432
- [["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects"]
1433
- ],
1434
- inputs=[files_input, model_selector, detail_level, additional_specs],
1435
- label=TRANSLATIONS[current_language]['examples']
1436
- )
1437
-
1438
- # Eventos - Actualizado para incluir additional_specs
1439
  language_selector.change(
1440
  update_interface_language,
1441
  inputs=[language_selector],
1442
  outputs=[
1443
  title_text, subtitle_text, files_input, model_selector,
1444
  language_selector, theme_selector, detail_level, additional_specs,
1445
- analyze_btn, export_format, export_btn, analysis_output,
1446
- code_output, data_format_accordion
 
1447
  ]
1448
  )
1449
 
1450
  def change_theme(theme_name):
1451
  """Cambia el tema de la interfaz"""
1452
- # Nota: En Gradio actual, cambiar el tema dinámicamente requiere recargar
1453
- # Esta es una limitación conocida
1454
  return gr.Info("Theme will be applied on next page load")
1455
 
1456
  theme_selector.change(
@@ -1461,8 +1156,9 @@ def create_interface():
1461
 
1462
  analyze_btn.click(
1463
  fn=process_and_store,
1464
- inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1465
- outputs=[analysis_output, code_output]
 
1466
  )
1467
 
1468
  def handle_export(format, language):
 
25
  from reportlab.pdfbase.ttfonts import TTFont
26
  import matplotlib.pyplot as plt
27
  from datetime import datetime
28
+ from openai import OpenAI
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 
36
  api_key=os.environ.get("NEBIUS_API_KEY")
37
  )
38
 
39
+ # Sistema de traducción
40
  TRANSLATIONS = {
41
  'en': {
42
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
 
67
  'what_analyzes': '🔍 What it specifically analyzes:',
68
  'tips': '💡 Tips for better results:',
69
  'additional_specs': '📝 Additional specifications for analysis',
70
+ 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...',
71
+ 'input_tokens': '🔢 Input tokens (0-1M)',
72
+ 'output_tokens': '🔢 Output tokens (0-1M)',
73
+ 'token_info': 'ℹ️ Token usage information',
74
+ 'input_token_count': 'Input tokens used',
75
+ 'output_token_count': 'Output tokens used',
76
+ 'total_token_count': 'Total tokens used',
77
+ 'token_cost': 'Estimated cost',
78
+ 'thinking_process': '🧠 Thinking Process',
79
+ 'analysis_report': '📊 Analysis Report',
80
+ 'code_output': '💻 Implementation Code',
81
+ 'token_usage': '💰 Token Usage'
82
  },
83
  'es': {
84
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
 
109
  'what_analyzes': '🔍 Qué analiza específicamente:',
110
  'tips': '💡 Tips para mejores resultados:',
111
  'additional_specs': '📝 Especificaciones adicionales para el análisis',
112
+ 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...',
113
+ 'input_tokens': '🔢 Tokens de entrada (0-1M)',
114
+ 'output_tokens': '🔢 Tokens de salida (0-1M)',
115
+ 'token_info': 'ℹ️ Información de uso de tokens',
116
+ 'input_token_count': 'Tokens de entrada usados',
117
+ 'output_token_count': 'Tokens de salida usados',
118
+ 'total_token_count': 'Total de tokens usados',
119
+ 'token_cost': 'Costo estimado',
120
+ 'thinking_process': '🧠 Proceso de Pensamiento',
121
+ 'analysis_report': '📊 Reporte de Análisis',
122
+ 'code_output': '💻 Código de Implementación',
123
+ 'token_usage': '💰 Uso de Tokens'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  }
126
 
 
229
  "Qwen/Qwen3-14B": {
230
  "name": "Qwen 3 14B",
231
  "description": "Modelo potente multilingüe de Alibaba",
232
+ "max_tokens": 1000000,
233
+ "best_for": "Análisis complejos y detallados",
234
+ "input_cost": 0.0000007,
235
+ "output_cost": 0.0000021
236
  },
237
  "Qwen/Qwen3-7B": {
238
  "name": "Qwen 3 7B",
239
  "description": "Modelo equilibrado para uso general",
240
+ "max_tokens": 1000000,
241
+ "best_for": "Análisis rápidos y precisos",
242
+ "input_cost": 0.00000035,
243
+ "output_cost": 0.00000105
244
  },
245
  "Qwen/Qwen1.5-14B": {
246
  "name": "Qwen 1.5 14B",
247
  "description": "Modelo avanzado para tareas complejas",
248
+ "max_tokens": 1000000,
249
+ "best_for": "Análisis técnicos detallados",
250
+ "input_cost": 0.0000007,
251
+ "output_cost": 0.0000021
252
  }
253
  }
254
 
 
318
  title_text = {
319
  'en': 'Comparative Analysis Report - Biotechnological Models',
320
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
 
 
 
321
  }
322
 
323
  doc.add_heading(title_text.get(language, title_text['en']), 0)
 
326
  date_text = {
327
  'en': 'Generated on',
328
  'es': 'Generado el',
 
 
 
329
  }
330
  doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
331
  doc.add_paragraph()
 
394
  title_text = {
395
  'en': 'Comparative Analysis Report - Biotechnological Models',
396
  'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
 
 
 
397
  }
398
 
399
  story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
 
402
  date_text = {
403
  'en': 'Generated on',
404
  'es': 'Generado el',
 
 
 
405
  }
406
  story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
407
  story.append(Spacer(1, 0.5*inch))
 
444
  def __init__(self, client, model_registry):
445
  self.client = client
446
  self.model_registry = model_registry
447
+ self.token_usage = {
448
+ 'input_tokens': 0,
449
+ 'output_tokens': 0,
450
+ 'total_tokens': 0,
451
+ 'estimated_cost': 0.0
452
+ }
453
+
454
+ def reset_token_usage(self):
455
+ """Reinicia el contador de tokens"""
456
+ self.token_usage = {
457
+ 'input_tokens': 0,
458
+ 'output_tokens': 0,
459
+ 'total_tokens': 0,
460
+ 'estimated_cost': 0.0
461
+ }
462
 
463
+ def detect_analysis_type(self, content: Union[str, pd.DataFrame], max_tokens: int = 1000) -> AnalysisType:
464
  """Detecta el tipo de análisis necesario"""
465
  if isinstance(content, pd.DataFrame):
466
  columns = [col.lower() for col in content.columns]
 
491
  try:
492
  response = self.client.chat.completions.create(
493
  model="Qwen/Qwen3-14B",
494
+ max_tokens=min(max_tokens, 100),
495
  temperature=0.0,
496
+ messages=[{"role": "user", "content": f"{prompt}\n\n{content[:5000]}"}]
497
  )
498
 
499
+ # Registrar uso de tokens
500
+ if response.usage:
501
+ self.token_usage['input_tokens'] += response.usage.prompt_tokens
502
+ self.token_usage['output_tokens'] += response.usage.completion_tokens
503
+ self.token_usage['total_tokens'] += response.usage.total_tokens
504
+
505
  result = response.choices[0].message.content.strip().upper()
506
  if "MODEL" in result:
507
  return AnalysisType.MATHEMATICAL_MODEL
 
521
  prefixes = {
522
  'en': "Please respond in English. ",
523
  'es': "Por favor responde en español. ",
 
 
 
524
  }
525
  return prefixes.get(language, prefixes['en'])
526
 
527
  def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str = "detailed",
528
+ language: str = "en", additional_specs: str = "",
529
+ max_input_tokens: int = 4000, max_output_tokens: int = 4000) -> Dict:
530
  """Analiza resultados de ajuste de modelos usando Qwen"""
531
 
532
  # Preparar resumen completo de los datos
 
537
  - Columns: {list(data.columns)}
538
  - Number of models evaluated: {len(data)}
539
 
540
+ Complete data (first 5 rows):
541
+ {data.head().to_string()}
 
 
 
542
  """
543
 
 
 
 
544
  # Obtener prefijo de idioma
545
  lang_prefix = self.get_language_prompt_prefix(language)
546
 
 
703
  # Análisis principal
704
  response = self.client.chat.completions.create(
705
  model=qwen_model,
706
+ max_tokens=min(max_output_tokens, 4000),
707
  temperature=0.3,
708
  messages=[{
709
  "role": "user",
 
711
  }]
712
  )
713
 
714
+ # Registrar uso de tokens
715
+ if response.usage:
716
+ self.token_usage['input_tokens'] += response.usage.prompt_tokens
717
+ self.token_usage['output_tokens'] += response.usage.completion_tokens
718
+ self.token_usage['total_tokens'] += response.usage.total_tokens
719
+ self.token_usage['estimated_cost'] = self.calculate_cost(qwen_model, response.usage)
720
+
721
  analysis_result = response.choices[0].message.content
722
 
723
  # Generación de código
 
725
  {lang_prefix}
726
 
727
  Based on the analysis and this actual data:
728
+ {data.head().to_string()}
729
 
730
  Generate Python code that:
731
 
 
754
 
755
  code_response = self.client.chat.completions.create(
756
  model=qwen_model,
757
+ max_tokens=min(max_output_tokens, 3000),
758
  temperature=0.1,
759
  messages=[{
760
  "role": "user",
 
762
  }]
763
  )
764
 
765
+ # Registrar uso de tokens
766
+ if code_response.usage:
767
+ self.token_usage['input_tokens'] += code_response.usage.prompt_tokens
768
+ self.token_usage['output_tokens'] += code_response.usage.completion_tokens
769
+ self.token_usage['total_tokens'] += code_response.usage.total_tokens
770
+ self.token_usage['estimated_cost'] += self.calculate_cost(qwen_model, code_response.usage)
771
+
772
  code_result = code_response.choices[0].message.content
773
 
774
  return {
 
782
  for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
783
  "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
784
  "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
 
785
  }
786
  }
787
 
788
  except Exception as e:
789
  print(f"Error en análisis: {str(e)}")
790
  return {"error": str(e)}
791
+
792
+ def calculate_cost(self, model_name: str, usage) -> float:
793
+ """Calcula el costo estimado en dólares"""
794
+ if model_name not in QWEN_MODELS:
795
+ return 0.0
796
+
797
+ model_info = QWEN_MODELS[model_name]
798
+ input_cost = model_info.get('input_cost', 0.0)
799
+ output_cost = model_info.get('output_cost', 0.0)
800
+
801
+ return (usage.prompt_tokens * input_cost) + (usage.completion_tokens * output_cost)
802
 
803
  def process_files(files, qwen_model: str, detail_level: str = "detailed",
804
+ language: str = "en", additional_specs: str = "",
805
+ max_input_tokens: int = 4000, max_output_tokens: int = 4000) -> Tuple[str, str, str, Dict]:
806
  """Procesa múltiples archivos usando Qwen"""
807
  processor = FileProcessor()
808
  analyzer = AIAnalyzer(client, model_registry)
809
+ analyzer.reset_token_usage()
810
+
811
  results = []
812
  all_code = []
813
+ thinking_process = []
814
 
815
  for file in files:
816
  if file is None:
 
825
  if file_ext in ['.csv', '.xlsx', '.xls']:
826
  if language == 'es':
827
  results.append(f"## 📊 Análisis de Resultados: {file_name}")
828
+ thinking_process.append(f"### 🔍 Procesando archivo: {file_name}")
829
  else:
830
  results.append(f"## 📊 Results Analysis: {file_name}")
831
+ thinking_process.append(f"### 🔍 Processing file: {file_name}")
832
 
833
  if file_ext == '.csv':
834
  df = processor.read_csv(file_content)
835
+ thinking_process.append("✅ Archivo CSV leído correctamente" if language == 'es' else "✅ CSV file read successfully")
836
  else:
837
  df = processor.read_excel(file_content)
838
+ thinking_process.append("✅ Archivo Excel leído correctamente" if language == 'es' else "✅ Excel file read successfully")
839
 
840
  if df is not None:
841
+ analysis_type = analyzer.detect_analysis_type(df, max_input_tokens)
842
+ thinking_process.append(f"🔎 Tipo de análisis detectado: {analysis_type.value}" if language == 'es' else f"🔎 Analysis type detected: {analysis_type.value}")
843
 
844
  if analysis_type == AnalysisType.FITTING_RESULTS:
845
  result = analyzer.analyze_fitting_results(
846
+ df, qwen_model, detail_level, language, additional_specs,
847
+ max_input_tokens, max_output_tokens
848
  )
849
 
850
  if language == 'es':
 
857
  all_code.append(result["codigo_implementacion"])
858
 
859
  results.append("\n---\n")
860
+ thinking_process.append("\n---\n")
861
 
862
  analysis_text = "\n".join(results)
863
  code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
864
+ thinking_text = "\n".join(thinking_process)
865
 
866
+ # Agregar información de tokens al proceso de pensamiento
867
+ token_info = analyzer.token_usage
868
+ if language == 'es':
869
+ thinking_text += f"""
870
+
871
+ ### 💰 USO DE TOKENS
872
+ - Tokens de entrada usados: {token_info['input_tokens']}
873
+ - Tokens de salida usados: {token_info['output_tokens']}
874
+ - Total de tokens: {token_info['total_tokens']}
875
+ - Costo estimado: ${token_info['estimated_cost']:.6f}
876
+ """
877
+ else:
878
+ thinking_text += f"""
879
+
880
+ ### 💰 TOKEN USAGE
881
+ - Input tokens used: {token_info['input_tokens']}
882
+ - Output tokens used: {token_info['output_tokens']}
883
+ - Total tokens: {token_info['total_tokens']}
884
+ - Estimated cost: ${token_info['estimated_cost']:.6f}
885
+ """
886
+
887
+ return thinking_text, analysis_text, code_text, token_info
888
 
889
  def generate_implementation_code(analysis_results: str) -> str:
890
  """Genera código de implementación con análisis por experimento"""
891
+ # (El código de implementación se mantiene igual que en la versión anterior)
892
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
893
 
894
  # Estado global para almacenar resultados
895
  class AppState:
896
  def __init__(self):
897
+ self.current_thinking = ""
898
  self.current_analysis = ""
899
  self.current_code = ""
900
  self.current_language = "en"
901
+ self.token_usage = {}
902
 
903
  app_state = AppState()
904
 
905
  def export_report(export_format: str, language: str) -> Tuple[str, str]:
906
  """Exporta el reporte al formato seleccionado"""
907
  if not app_state.current_analysis:
908
+ error_msg = TRANSLATIONS[language]['error_no_files']
909
+ return error_msg, ""
 
 
 
 
 
 
910
 
911
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
912
 
 
943
  gr.update(label=t['select_theme']), # theme_selector
944
  gr.update(label=t['detail_level']), # detail_level
945
  gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
946
+ gr.update(label=t['input_tokens']), # input_tokens_slider
947
+ gr.update(label=t['output_tokens']), # output_tokens_slider
948
  gr.update(value=t['analyze_button']), # analyze_btn
949
  gr.update(label=t['export_format']), # export_format
950
  gr.update(value=t['export_button']), # export_btn
951
+ gr.update(label=t['thinking_process']), # thinking_output
952
+ gr.update(label=t['analysis_report']), # analysis_output
953
+ gr.update(label=t['code_output']), # code_output
954
+ gr.update(label=t['token_usage']), # token_usage_output
955
  gr.update(label=t['data_format']) # data_format_accordion
956
  ]
957
 
958
+ def process_and_store(files, model, detail, language, additional_specs, input_tokens, output_tokens):
959
  """Procesa archivos y almacena resultados"""
960
  if not files:
961
  error_msg = TRANSLATIONS[language]['error_no_files']
962
+ return error_msg, "", "", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0, "estimated_cost": 0.0}
963
 
964
+ thinking, analysis, code, token_usage = process_files(
965
+ files, model, detail, language, additional_specs,
966
+ input_tokens, output_tokens
967
+ )
968
+
969
+ app_state.current_thinking = thinking
970
  app_state.current_analysis = analysis
971
  app_state.current_code = code
972
+ app_state.token_usage = token_usage
973
+
974
+ # Formatear información de tokens
975
+ t = TRANSLATIONS[language]
976
+ token_info = f"""
977
+ ### {t['token_info']}
978
+ - **{t['input_token_count']}:** {token_usage['input_tokens']}
979
+ - **{t['output_token_count']}:** {token_usage['output_tokens']}
980
+ - **{t['total_token_count']}:** {token_usage['total_tokens']}
981
+ - **{t['token_cost']}:** ${token_usage['estimated_cost']:.6f}
982
+ """
983
+
984
+ return thinking, analysis, code, token_info
985
 
986
+ with gr.Blocks(theme=THEMES[current_theme], title="Biotech Model Analyzer") as demo:
987
  # Componentes de UI
988
  with gr.Row():
989
  with gr.Column(scale=3):
 
992
  with gr.Column(scale=1):
993
  with gr.Row():
994
  language_selector = gr.Dropdown(
995
+ choices=[("English", "en"), ("Español", "es")],
 
996
  value="en",
997
  label=TRANSLATIONS[current_language]['select_language'],
998
  interactive=True
 
1029
  label=TRANSLATIONS[current_language]['detail_level']
1030
  )
1031
 
 
1032
  additional_specs = gr.Textbox(
1033
  label=TRANSLATIONS[current_language]['additional_specs'],
1034
  placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
 
1037
  interactive=True
1038
  )
1039
 
1040
+ # Nuevos sliders para tokens
1041
+ input_tokens_slider = gr.Slider(
1042
+ minimum=1000,
1043
+ maximum=1000000,
1044
+ value=4000,
1045
+ step=1000,
1046
+ label=TRANSLATIONS[current_language]['input_tokens'],
1047
+ info="Máximo tokens para entrada (0-1 millón)"
1048
+ )
1049
+
1050
+ output_tokens_slider = gr.Slider(
1051
+ minimum=1000,
1052
+ maximum=1000000,
1053
+ value=4000,
1054
+ step=1000,
1055
+ label=TRANSLATIONS[current_language]['output_tokens'],
1056
+ info="Máximo tokens para salida (0-1 millón)"
1057
+ )
1058
+
1059
  analyze_btn = gr.Button(
1060
  TRANSLATIONS[current_language]['analyze_button'],
1061
  variant="primary",
 
1087
  )
1088
 
1089
  with gr.Column(scale=2):
1090
+ # Nuevos outputs separados
1091
+ thinking_output = gr.Markdown(
1092
+ label=TRANSLATIONS[current_language]['thinking_process']
1093
+ )
1094
+
1095
  analysis_output = gr.Markdown(
1096
+ label=TRANSLATIONS[current_language]['analysis_report']
1097
  )
1098
 
1099
  code_output = gr.Code(
1100
+ label=TRANSLATIONS[current_language]['code_output'],
1101
  language="python",
1102
  interactive=True,
1103
+ lines=15
1104
+ )
1105
+
1106
+ token_usage_output = gr.Markdown(
1107
+ label=TRANSLATIONS[current_language]['token_usage']
1108
  )
1109
 
1110
  data_format_accordion = gr.Accordion(
 
1131
  - **Parameters**: Model-specific parameters
1132
  """)
1133
 
1134
+ # Eventos
 
 
 
 
 
 
 
 
 
 
1135
  language_selector.change(
1136
  update_interface_language,
1137
  inputs=[language_selector],
1138
  outputs=[
1139
  title_text, subtitle_text, files_input, model_selector,
1140
  language_selector, theme_selector, detail_level, additional_specs,
1141
+ input_tokens_slider, output_tokens_slider, analyze_btn, export_format,
1142
+ export_btn, thinking_output, analysis_output, code_output,
1143
+ token_usage_output, data_format_accordion
1144
  ]
1145
  )
1146
 
1147
  def change_theme(theme_name):
1148
  """Cambia el tema de la interfaz"""
 
 
1149
  return gr.Info("Theme will be applied on next page load")
1150
 
1151
  theme_selector.change(
 
1156
 
1157
  analyze_btn.click(
1158
  fn=process_and_store,
1159
+ inputs=[files_input, model_selector, detail_level, language_selector,
1160
+ additional_specs, input_tokens_slider, output_tokens_slider],
1161
+ outputs=[thinking_output, analysis_output, code_output, token_usage_output]
1162
  )
1163
 
1164
  def handle_export(format, language):