C2MV commited on
Commit
195647b
·
verified ·
1 Parent(s): f57d15a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +443 -299
app.py CHANGED
@@ -33,7 +33,7 @@ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
33
  # Inicializar cliente Anthropic
34
  client = anthropic.Anthropic()
35
 
36
- # Sistema de traducción
37
  TRANSLATIONS = {
38
  'en': {
39
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
@@ -62,7 +62,9 @@ TRANSLATIONS = {
62
  'specialized_in': '🎯 Specialized in:',
63
  'metrics_analyzed': '📊 Analyzed metrics:',
64
  'what_analyzes': '🔍 What it specifically analyzes:',
65
- 'tips': '💡 Tips for better results:'
 
 
66
  },
67
  'es': {
68
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
@@ -91,7 +93,9 @@ TRANSLATIONS = {
91
  'specialized_in': '🎯 Especializado en:',
92
  'metrics_analyzed': '📊 Métricas analizadas:',
93
  'what_analyzes': '🔍 Qué analiza específicamente:',
94
- 'tips': '💡 Tips para mejores resultados:'
 
 
95
  },
96
  'fr': {
97
  'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
@@ -120,7 +124,9 @@ TRANSLATIONS = {
120
  'specialized_in': '🎯 Spécialisé dans:',
121
  'metrics_analyzed': '📊 Métriques analysées:',
122
  'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
123
- 'tips': '💡 Conseils pour de meilleurs résultats:'
 
 
124
  },
125
  'de': {
126
  'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
@@ -149,7 +155,9 @@ TRANSLATIONS = {
149
  'specialized_in': '🎯 Spezialisiert auf:',
150
  'metrics_analyzed': '📊 Analysierte Metriken:',
151
  'what_analyzes': '🔍 Was spezifisch analysiert wird:',
152
- 'tips': '💡 Tipps für bessere Ergebnisse:'
 
 
153
  },
154
  'pt': {
155
  'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
@@ -178,7 +186,9 @@ TRANSLATIONS = {
178
  'specialized_in': '🎯 Especializado em:',
179
  'metrics_analyzed': '📊 Métricas analisadas:',
180
  'what_analyzes': '🔍 O que analisa especificamente:',
181
- 'tips': '💡 Dicas para melhores resultados:'
 
 
182
  }
183
  }
184
 
@@ -568,8 +578,9 @@ class AIAnalyzer:
568
  }
569
  return prefixes.get(language, prefixes['en'])
570
 
571
- def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed", language: str = "en") -> Dict:
572
- """Analiza resultados de ajuste de modelos con soporte multiidioma"""
 
573
 
574
  # Preparar resumen completo de los datos
575
  data_summary = f"""
@@ -592,6 +603,15 @@ class AIAnalyzer:
592
  # Obtener prefijo de idioma
593
  lang_prefix = self.get_language_prompt_prefix(language)
594
 
 
 
 
 
 
 
 
 
 
595
  # Prompt mejorado con instrucciones específicas para cada nivel
596
  if detail_level == "detailed":
597
  prompt = f"""
@@ -599,122 +619,143 @@ class AIAnalyzer:
599
 
600
  You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
601
 
602
- DETAIL LEVEL: DETAILED - Provide comprehensive analysis
603
 
604
- PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS:
605
 
606
- 1. **MODEL IDENTIFICATION AND CLASSIFICATION**
607
- - Identify ALL fitted mathematical models BY NAME (e.g., "Monod", "Logistic", "Gompertz", etc.)
608
- - Classify them by type: biomass growth, substrate consumption, product formation
609
- - Indicate the mathematical equation of each model
610
- - Mention which experiments/conditions were tested
611
 
612
- 2. **COMPARATIVE ANALYSIS OF FIT QUALITY**
613
- - Compare ALL available indicators: R², RMSE, AIC, BIC, etc.
614
- - Create a detailed ranking from best to worst model with exact values
615
- - For the TOP 3 models, specify:
616
- * Model name: [exact name from data]
617
- * value: [exact value]
618
- * RMSE value: [exact value]
619
- * Key parameters and their values
620
- - Identify significant differences between models
621
- - Detect possible overfitting or underfitting
622
 
623
- 3. **DETERMINATION OF THE BEST MODEL PER CATEGORY**
624
- - **BEST OVERALL MODEL**: [Name] with R²=[value], RMSE=[value]
625
- - **BEST BIOMASS MODEL** (if applicable): [Name] with parameters
626
- - **BEST SUBSTRATE MODEL** (if applicable): [Name] with parameters
627
- - **BEST PRODUCT MODEL** (if applicable): [Name] with parameters
628
- - Justify NUMERICALLY why each is the best
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
 
630
- 4. **DETAILED ANALYSIS BY VARIABLE TYPE**
631
- a) **BIOMASS (if applicable)**:
632
- - Growth parameters (μmax, Xmax, etc.) with exact values
633
- - Doubling time calculations
634
- - Biomass productivity
635
- - Compare parameters between models numerically
636
 
637
- b) **SUBSTRATE (if applicable)**:
638
- - Affinity constants (Ks, Km) with exact values
639
- - Consumption rates
640
- - Yield Yx/s calculations
641
- - Utilization efficiency percentages
 
 
642
 
643
- c) **PRODUCT (if applicable)**:
644
- - Production parameters (α, β) with exact values
645
- - Specific productivity calculations
646
- - Yield Yp/x values
647
- - Production type classification
648
 
649
- 5. **BIOLOGICAL INTERPRETATION OF PARAMETERS**
650
- - Explain what EACH parameter means biologically
651
- - Compare parameter values between models
652
- - Evaluate if values are realistic for the biological system
653
- - Identify critical process control parameters
654
 
655
- 6. **DETAILED CONCLUSIONS WITH NUMERICAL CONTENT**
656
- - List the winning model for each category with full statistics
657
- - Provide confidence intervals if available
658
- - Indicate optimal operating conditions based on parameters
659
- - Suggest specific design values for scale-up
 
660
 
661
- 7. **PRACTICAL RECOMMENDATIONS**
662
- - Which specific models to use for different predictions
663
- - Limitations of each selected model
664
- - Recommended validation experiments
665
- - Industrial implementation considerations
 
666
 
667
- 8. **COMPREHENSIVE COMPARATIVE TABLE**
668
- Create a detailed table with ALL models showing:
669
- | Model Name | Type | | RMSE | AIC | BIC | Key Parameters | Best For | Ranking |
 
 
670
 
671
- Use Markdown format with clear structure and include ALL numerical values from the data.
 
672
  """
673
  else: # summarized
674
  prompt = f"""
675
  {lang_prefix}
676
 
677
- You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis of these fitting results.
 
 
678
 
679
- DETAIL LEVEL: SUMMARIZED - Be concise but include all essential information
680
 
681
  PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
682
 
683
- 1. **QUICK MODEL OVERVIEW**
684
- - List ALL models tested: [names]
685
- - Categories covered: biomass/substrate/product
 
686
 
687
- 2. **BEST MODELS - TOP PERFORMERS**
688
- 🏆 **OVERALL WINNER**: [Model Name]
689
- - = [exact value]
690
- - RMSE = [exact value]
691
- - Key parameters: [list with values]
 
692
 
693
- 📊 **BY CATEGORY**:
694
- - **Biomass**: [Model] (R²=[value], μmax=[value])
695
- - **Substrate**: [Model] (R²=[value], Ks=[value])
696
- - **Product**: [Model] (R²=[value], key param=[value])
 
 
697
 
698
- 3. **KEY NUMERICAL FINDINGS**
699
- - Best fit achieved: R² = [value] with [model]
700
- - Parameter ranges: μmax=[min-max], Ks=[min-max]
701
- - Productivity values: [specific numbers]
702
- - Yields: Yx/s=[value], Yp/x=[value]
703
 
704
  4. **QUICK COMPARISON TABLE**
705
- | Rank | Model | | RMSE | Best Application |
706
- |------|-------|-----|------|------------------|
707
- | 1 | [Name]| [#] | [#] | [Use case] |
708
- | 2 | [Name]| [#] | [#] | [Use case] |
709
- | 3 | [Name]| [#] | [#] | [Use case] |
 
 
 
 
710
 
711
- 5. **PRACTICAL CONCLUSIONS**
712
- - Use [Model X] for biomass prediction (R²=[value])
713
- - Use [Model Y] for substrate monitoring (R²=[value])
714
- - Critical parameters for control: [list with values]
715
- - Scale-up recommendation: [specific values]
716
 
717
- Keep it concise but include ALL model names and key numerical values.
718
  """
719
 
720
  try:
@@ -737,18 +778,24 @@ class AIAnalyzer:
737
  Generate Python code that:
738
 
739
  1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
740
- 2. Implements the best models identified with their EXACT parameters
741
- 3. Includes visualization functions that use the REAL data values
742
- 4. Shows comparative analysis with the SPECIFIC numbers from the results
 
 
 
 
 
 
743
 
744
  The code must include:
745
- - Data loading section with the actual values hardcoded as example
746
- - Model implementation with the exact parameter values found
747
- - Visualization showing the actual R², RMSE values in graphs
748
- - Comparison functions using the real numerical data
749
- - Predictions using the best model's actual parameters
750
 
751
- Make sure to include comments indicating which model won and why, with its exact statistics.
752
 
753
  Format: Complete, executable Python code with actual data values embedded.
754
  """
@@ -780,8 +827,9 @@ class AIAnalyzer:
780
  except Exception as e:
781
  return {"error": str(e)}
782
 
783
- def process_files(files, claude_model: str, detail_level: str = "detailed", language: str = "en") -> Tuple[str, str]:
784
- """Procesa múltiples archivos con soporte de idioma"""
 
785
  processor = FileProcessor()
786
  analyzer = AIAnalyzer(client, model_registry)
787
  results = []
@@ -812,7 +860,9 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
812
  analysis_type = analyzer.detect_analysis_type(df)
813
 
814
  if analysis_type == AnalysisType.FITTING_RESULTS:
815
- result = analyzer.analyze_fitting_results(df, claude_model, detail_level, language)
 
 
816
 
817
  if language == 'es':
818
  results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
@@ -831,7 +881,7 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
831
  return analysis_text, code_text
832
 
833
  def generate_implementation_code(analysis_results: str) -> str:
834
- """Genera código de implementación con valores numéricos del análisis"""
835
  code = """
836
  import numpy as np
837
  import pandas as pd
@@ -846,16 +896,21 @@ from typing import Dict, List, Tuple, Optional
846
  plt.style.use('seaborn-v0_8-darkgrid')
847
  sns.set_palette("husl")
848
 
849
- class ComparativeModelAnalyzer:
850
  \"\"\"
851
- Class for comparative analysis of biotechnological model fitting results.
852
- Specialized in comparing biomass, substrate and product models.
853
  \"\"\"
854
 
855
  def __init__(self):
856
  self.results_df = None
857
- self.best_models = {}
858
- self.model_rankings = {}
 
 
 
 
 
859
 
860
  def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
861
  \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
@@ -870,221 +925,292 @@ class ComparativeModelAnalyzer:
870
  print(f"✅ Data loaded: {len(self.results_df)} models")
871
  print(f"📊 Available columns: {list(self.results_df.columns)}")
872
 
 
 
 
 
 
873
  return self.results_df
874
 
875
- def analyze_model_quality(self,
 
 
 
876
  r2_col: str = 'R2',
877
- rmse_col: str = 'RMSE',
878
- aic_col: Optional[str] = 'AIC',
879
- bic_col: Optional[str] = 'BIC',
880
- model_col: str = 'Model') -> pd.DataFrame:
881
  \"\"\"
882
- Analyze and compare the fit quality of all models.
883
- Create a ranking based on multiple metrics.
884
  \"\"\"
885
  if self.results_df is None:
886
  raise ValueError("First load data with load_results()")
887
 
888
- # Create comparison DataFrame
889
- comparison = self.results_df.copy()
890
-
891
- # Calculate composite score
892
- scores = pd.DataFrame(index=comparison.index)
893
-
894
- # Normalize metrics (0-1)
895
- if r2_col in comparison.columns:
896
- scores['r2_score'] = comparison[r2_col] # Already between 0-1
897
-
898
- if rmse_col in comparison.columns:
899
- # Invert and normalize RMSE (lower is better)
900
- max_rmse = comparison[rmse_col].max()
901
- scores['rmse_score'] = 1 - (comparison[rmse_col] / max_rmse)
902
-
903
- if aic_col and aic_col in comparison.columns:
904
- # Invert and normalize AIC (lower is better)
905
- min_aic = comparison[aic_col].min()
906
- max_aic = comparison[aic_col].max()
907
- scores['aic_score'] = 1 - ((comparison[aic_col] - min_aic) / (max_aic - min_aic))
908
-
909
- if bic_col and bic_col in comparison.columns:
910
- # Invert and normalize BIC (lower is better)
911
- min_bic = comparison[bic_col].min()
912
- max_bic = comparison[bic_col].max()
913
- scores['bic_score'] = 1 - ((comparison[bic_col] - min_bic) / (max_bic - min_bic))
914
-
915
- # Calculate total score (weighted average)
916
- weights = {
917
- 'r2_score': 0.4,
918
- 'rmse_score': 0.3,
919
- 'aic_score': 0.15,
920
- 'bic_score': 0.15
921
- }
922
 
923
- scores['total_score'] = 0
924
- for metric, weight in weights.items():
925
- if metric in scores.columns:
926
- scores['total_score'] += scores[metric] * weight
 
 
927
 
928
- # Add score to comparison DataFrame
929
- comparison['Score'] = scores['total_score']
930
- comparison['Ranking'] = comparison['Score'].rank(ascending=False).astype(int)
931
 
932
- # Sort by ranking
933
- comparison = comparison.sort_values('Ranking')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
934
 
935
- # Identify best models by category
936
- if 'Type' in comparison.columns:
937
- for model_type in comparison['Type'].unique():
938
- type_models = comparison[comparison['Type'] == model_type]
939
- if not type_models.empty:
940
- best_idx = type_models['Score'].idxmax()
941
- self.best_models[model_type] = type_models.loc[best_idx]
942
 
943
- # Best overall model
944
- best_idx = comparison['Score'].idxmax()
945
- self.best_models['overall'] = comparison.loc[best_idx]
946
 
947
- # Print comparison table with actual values
 
 
 
948
  print("\\n" + "="*80)
949
- print("📊 MODEL COMPARISON TABLE - ACTUAL RESULTS")
950
  print("="*80)
951
 
952
- print(f"\\n{'Rank':<6} {'Model':<20} {'R²':<8} {'RMSE':<10} {'AIC':<10} {'BIC':<10} {'Score':<8}")
953
- print("-"*80)
954
-
955
- for idx, row in comparison.iterrows():
956
- rank = row['Ranking']
957
- model = row.get(model_col, f'Model_{idx}')[:20]
958
- r2 = row.get(r2_col, 0)
959
- rmse = row.get(rmse_col, 0)
960
- aic = row.get(aic_col, 'N/A')
961
- bic = row.get(bic_col, 'N/A')
962
- score = row['Score']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
 
964
- print(f"{rank:<6} {model:<20} {r2:<8.4f} {rmse:<10.4f} ", end="")
965
- if isinstance(aic, (int, float)):
966
- print(f"{aic:<10.2f} ", end="")
967
- else:
968
- print(f"{'N/A':<10} ", end="")
969
- if isinstance(bic, (int, float)):
970
- print(f"{bic:<10.2f} ", end="")
971
- else:
972
- print(f"{'N/A':<10} ", end="")
973
- print(f"{score:<8.4f}")
974
-
975
- print("\\n🏆 BEST MODELS BY CATEGORY:")
976
- for category, model_data in self.best_models.items():
977
- if isinstance(model_data, pd.Series):
978
- print(f"\\n{category.upper()}:")
979
- print(f" Model: {model_data.get(model_col, 'Unknown')}")
980
- print(f" R² = {model_data.get(r2_col, 0):.4f}")
981
- print(f" RMSE = {model_data.get(rmse_col, 0):.4f}")
982
-
983
- self.model_rankings = comparison
984
- return comparison
 
 
985
 
986
- def visualize_comparison(self):
987
- \"\"\"Create visualization of model comparison with actual data\"\"\"
988
- if self.model_rankings is None:
989
- raise ValueError("First run analyze_model_quality()")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
990
 
991
- fig, axes = plt.subplots(2, 2, figsize=(14, 10))
992
- fig.suptitle('Model Comparison - Actual Fitting Results', fontsize=16)
 
993
 
994
- # 1. R² comparison
995
- ax1 = axes[0, 0]
996
- models = self.model_rankings.get('Model', self.model_rankings.index)
997
- r2_values = self.model_rankings.get('R2', [])
998
- ax1.bar(range(len(models)), r2_values, color='skyblue')
999
- ax1.set_xlabel('Models')
1000
  ax1.set_ylabel('R²')
1001
- ax1.set_title('R² Comparison')
1002
- ax1.set_xticks(range(len(models)))
1003
- ax1.set_xticklabels(models, rotation=45, ha='right')
1004
- ax1.axhline(y=0.95, color='r', linestyle='--', label='Excellent fit (0.95)')
1005
  ax1.legend()
 
1006
 
1007
- # Add actual values on bars
1008
- for i, v in enumerate(r2_values):
1009
- ax1.text(i, v + 0.01, f'{v:.3f}', ha='center', va='bottom')
 
 
1010
 
1011
- # 2. RMSE comparison
1012
  ax2 = axes[0, 1]
1013
- rmse_values = self.model_rankings.get('RMSE', [])
1014
- ax2.bar(range(len(models)), rmse_values, color='salmon')
1015
- ax2.set_xlabel('Models')
1016
- ax2.set_ylabel('RMSE')
1017
- ax2.set_title('RMSE Comparison (Lower is Better)')
1018
- ax2.set_xticks(range(len(models)))
1019
- ax2.set_xticklabels(models, rotation=45, ha='right')
1020
-
1021
- # Add actual values on bars
1022
- for i, v in enumerate(rmse_values):
1023
- ax2.text(i, v + 0.001, f'{v:.3f}', ha='center', va='bottom')
1024
-
1025
- # 3. Combined score
1026
  ax3 = axes[1, 0]
1027
- scores = self.model_rankings.get('Score', [])
1028
- ax3.bar(range(len(models)), scores, color='lightgreen')
1029
- ax3.set_xlabel('Models')
1030
- ax3.set_ylabel('Combined Score')
1031
- ax3.set_title('Overall Model Score')
1032
- ax3.set_xticks(range(len(models)))
1033
- ax3.set_xticklabels(models, rotation=45, ha='right')
1034
-
1035
- # 4. Ranking visualization
1036
  ax4 = axes[1, 1]
1037
- rankings = self.model_rankings.get('Ranking', [])
1038
- ax4.scatter(r2_values, rmse_values, s=100, c=rankings, cmap='viridis')
1039
- ax4.set_xlabel('R²')
1040
- ax4.set_ylabel('RMSE')
1041
- ax4.set_title('R² vs RMSE (color = ranking)')
1042
-
1043
- # Annotate best model
1044
- best_model = self.best_models.get('overall')
1045
- if isinstance(best_model, pd.Series):
1046
- best_r2 = best_model.get('R2', 0)
1047
- best_rmse = best_model.get('RMSE', 0)
1048
- best_name = best_model.get('Model', 'Best')
1049
- ax4.annotate(f'Best: {best_name}',
1050
- xy=(best_r2, best_rmse),
1051
- xytext=(best_r2-0.05, best_rmse+0.01),
1052
- arrowprops=dict(arrowstyle='->', color='red'))
1053
 
1054
  plt.tight_layout()
1055
  plt.show()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1056
 
1057
- # Example usage with actual data
1058
  if __name__ == "__main__":
1059
- print("🧬 Biotechnological Model Comparative Analysis System")
1060
  print("="*60)
1061
 
1062
- # Example data structure (replace with your actual data)
1063
  example_data = {
1064
- 'Model': ['Monod', 'Logistic', 'Gompertz', 'Modified_Gompertz'],
1065
- 'Type': ['Substrate', 'Biomass', 'Biomass', 'Biomass'],
1066
- 'R2': [0.9845, 0.9912, 0.9956, 0.9889],
1067
- 'RMSE': [0.0234, 0.0189, 0.0145, 0.0201],
1068
- 'AIC': [-45.23, -48.91, -52.34, -47.56],
1069
- 'BIC': [-42.11, -45.79, -49.22, -44.44],
1070
- 'mu_max': [0.45, 0.48, 0.52, 0.49],
1071
- 'Ks': [2.1, None, None, None],
1072
- 'Xmax': [None, 12.5, 13.1, 12.8]
 
 
 
 
 
 
 
 
 
 
1073
  }
1074
 
1075
  # Create analyzer
1076
- analyzer = ComparativeModelAnalyzer()
1077
 
1078
  # Load data
1079
  analyzer.load_results(data_dict=example_data)
1080
 
1081
- # Analyze
1082
- results = analyzer.analyze_model_quality()
 
 
 
1083
 
1084
- # Visualize
1085
- analyzer.visualize_comparison()
1086
 
1087
- print("\\n✨ Analysis complete! Best models identified with actual parameters.")
1088
  """
1089
 
1090
  return code
@@ -1144,6 +1270,7 @@ def create_interface():
1144
  gr.update(label=t['select_language']), # language_selector
1145
  gr.update(label=t['select_theme']), # theme_selector
1146
  gr.update(label=t['detail_level']), # detail_level
 
1147
  gr.update(value=t['analyze_button']), # analyze_btn
1148
  gr.update(label=t['export_format']), # export_format
1149
  gr.update(value=t['export_button']), # export_btn
@@ -1152,13 +1279,13 @@ def create_interface():
1152
  gr.update(label=t['data_format']) # data_format_accordion
1153
  ]
1154
 
1155
- def process_and_store(files, model, detail, language):
1156
  """Procesa archivos y almacena resultados"""
1157
  if not files:
1158
  error_msg = TRANSLATIONS[language]['error_no_files']
1159
  return error_msg, ""
1160
 
1161
- analysis, code = process_files(files, model, detail, language)
1162
  app_state.current_analysis = analysis
1163
  app_state.current_code = code
1164
  return analysis, code
@@ -1210,6 +1337,15 @@ def create_interface():
1210
  label=TRANSLATIONS[current_language]['detail_level']
1211
  )
1212
 
 
 
 
 
 
 
 
 
 
1213
  analyze_btn = gr.Button(
1214
  TRANSLATIONS[current_language]['analyze_button'],
1215
  variant="primary",
@@ -1261,32 +1397,40 @@ def create_interface():
1261
  gr.Markdown("""
1262
  ### Expected CSV/Excel structure:
1263
 
1264
- | Model | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1265
- |-------|-----|------|-----|-----|--------|-------|------------|
1266
- | Monod | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1267
- | Logistic | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1268
- | Gompertz | 0.992 | 0.018 | -48.5 | -45.2 | 0.48 | - | {...} |
 
 
 
 
 
 
 
 
1269
  """)
1270
 
1271
  # Definir ejemplos
1272
  examples = gr.Examples(
1273
  examples=[
1274
- [["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed"],
1275
- [["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized"]
1276
  ],
1277
- inputs=[files_input, model_selector, detail_level],
1278
  label=TRANSLATIONS[current_language]['examples']
1279
  )
1280
 
1281
- # Eventos - Corregido: removiendo examples de los outputs
1282
  language_selector.change(
1283
  update_interface_language,
1284
  inputs=[language_selector],
1285
  outputs=[
1286
  title_text, subtitle_text, files_input, model_selector,
1287
- language_selector, theme_selector, detail_level, analyze_btn,
1288
- export_format, export_btn, analysis_output, code_output,
1289
- data_format_accordion # Removido examples_label
1290
  ]
1291
  )
1292
 
@@ -1304,7 +1448,7 @@ def create_interface():
1304
 
1305
  analyze_btn.click(
1306
  fn=process_and_store,
1307
- inputs=[files_input, model_selector, detail_level, language_selector],
1308
  outputs=[analysis_output, code_output]
1309
  )
1310
 
 
33
  # Inicializar cliente Anthropic
34
  client = anthropic.Anthropic()
35
 
36
+ # Sistema de traducción - Actualizado con nuevas entradas
37
  TRANSLATIONS = {
38
  'en': {
39
  'title': '🧬 Comparative Analyzer of Biotechnological Models',
 
62
  'specialized_in': '🎯 Specialized in:',
63
  'metrics_analyzed': '📊 Analyzed metrics:',
64
  'what_analyzes': '🔍 What it specifically analyzes:',
65
+ 'tips': '💡 Tips for better results:',
66
+ 'additional_specs': '📝 Additional specifications for analysis',
67
+ 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
68
  },
69
  'es': {
70
  'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
 
93
  'specialized_in': '🎯 Especializado en:',
94
  'metrics_analyzed': '📊 Métricas analizadas:',
95
  'what_analyzes': '🔍 Qué analiza específicamente:',
96
+ 'tips': '💡 Tips para mejores resultados:',
97
+ 'additional_specs': '📝 Especificaciones adicionales para el análisis',
98
+ 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
99
  },
100
  'fr': {
101
  'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
 
124
  'specialized_in': '🎯 Spécialisé dans:',
125
  'metrics_analyzed': '📊 Métriques analysées:',
126
  'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
127
+ 'tips': '💡 Conseils pour de meilleurs résultats:',
128
+ 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
129
+ 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
130
  },
131
  'de': {
132
  'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
 
155
  'specialized_in': '🎯 Spezialisiert auf:',
156
  'metrics_analyzed': '📊 Analysierte Metriken:',
157
  'what_analyzes': '🔍 Was spezifisch analysiert wird:',
158
+ 'tips': '💡 Tipps für bessere Ergebnisse:',
159
+ 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
160
+ 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
161
  },
162
  'pt': {
163
  'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
 
186
  'specialized_in': '🎯 Especializado em:',
187
  'metrics_analyzed': '📊 Métricas analisadas:',
188
  'what_analyzes': '🔍 O que analisa especificamente:',
189
+ 'tips': '💡 Dicas para melhores resultados:',
190
+ 'additional_specs': '📝 Especificações adicionais para a análise',
191
+ 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
192
  }
193
  }
194
 
 
578
  }
579
  return prefixes.get(language, prefixes['en'])
580
 
581
+ def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
582
+ language: str = "en", additional_specs: str = "") -> Dict:
583
+ """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
584
 
585
  # Preparar resumen completo de los datos
586
  data_summary = f"""
 
603
  # Obtener prefijo de idioma
604
  lang_prefix = self.get_language_prompt_prefix(language)
605
 
606
+ # Agregar especificaciones adicionales del usuario si existen
607
+ user_specs_section = f"""
608
+
609
+ USER ADDITIONAL SPECIFICATIONS:
610
+ {additional_specs}
611
+
612
+ Please ensure to address these specific requirements in your analysis.
613
+ """ if additional_specs else ""
614
+
615
  # Prompt mejorado con instrucciones específicas para cada nivel
616
  if detail_level == "detailed":
617
  prompt = f"""
 
619
 
620
  You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
621
 
622
+ {user_specs_section}
623
 
624
+ DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
625
 
626
+ PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
 
 
 
 
627
 
628
+ 1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
629
+ - List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
630
+ - For EACH experiment, identify:
631
+ * Experimental conditions
632
+ * Number of models tested
633
+ * Variables measured (biomass, substrate, product)
 
 
 
 
634
 
635
+ 2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
636
+ For EACH EXPERIMENT separately:
637
+ - Identify ALL fitted mathematical models BY NAME
638
+ - Classify them: biomass growth, substrate consumption, product formation
639
+ - Show the mathematical equation of each model
640
+ - List parameter values obtained for that specific experiment
641
+
642
+ 3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
643
+ Create a section for EACH EXPERIMENT showing:
644
+
645
+ **EXPERIMENT [Name/Condition]:**
646
+
647
+ a) **BIOMASS MODELS** (if applicable):
648
+ - Best model: [Name] with R²=[value], RMSE=[value]
649
+ - Parameters: μmax=[value], Xmax=[value], etc.
650
+ - Ranking of all biomass models tested
651
+
652
+ b) **SUBSTRATE MODELS** (if applicable):
653
+ - Best model: [Name] with R²=[value], RMSE=[value]
654
+ - Parameters: Ks=[value], Yxs=[value], etc.
655
+ - Ranking of all substrate models tested
656
+
657
+ c) **PRODUCT MODELS** (if applicable):
658
+ - Best model: [Name] with R²=[value], RMSE=[value]
659
+ - Parameters: α=[value], β=[value], etc.
660
+ - Ranking of all product models tested
661
 
662
+ 4. **DETAILED COMPARATIVE TABLES**
 
 
 
 
 
663
 
664
+ **Table 1: Summary by Experiment and Variable Type**
665
+ | Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
666
+ |------------|----------|------------|-------|------|----------------|---------|
667
+ | Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
668
+ | Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
669
+ | Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
670
+ | Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
671
 
672
+ **Table 2: Complete Model Comparison Across All Experiments**
673
+ | Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
 
 
 
674
 
675
+ 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
676
+ - Compare how parameters change between experiments
677
+ - Identify trends (e.g., μmax increases with temperature)
678
+ - Calculate average parameters and variability
679
+ - Suggest optimal conditions based on parameters
680
 
681
+ 6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
682
+ For each experiment, explain:
683
+ - What the parameter values mean biologically
684
+ - Whether values are realistic for the conditions
685
+ - Key differences between experiments
686
+ - Critical control parameters identified
687
 
688
+ 7. **OVERALL BEST MODELS DETERMINATION**
689
+ - **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
690
+ - **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
691
+ - **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
692
+
693
+ Justify with numerical evidence from multiple experiments.
694
 
695
+ 8. **CONCLUSIONS AND RECOMMENDATIONS**
696
+ - Which models are most robust across different conditions
697
+ - Specific models to use for each experimental condition
698
+ - Confidence intervals and prediction reliability
699
+ - Scale-up recommendations with specific values
700
 
701
+ Use Markdown format with clear structure. Include ALL numerical values from the data.
702
+ Create clear sections for EACH EXPERIMENT.
703
  """
704
  else: # summarized
705
  prompt = f"""
706
  {lang_prefix}
707
 
708
+ You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
709
+
710
+ {user_specs_section}
711
 
712
+ DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
713
 
714
  PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
715
 
716
+ 1. **EXPERIMENTS OVERVIEW**
717
+ - Total experiments analyzed: [number]
718
+ - Conditions tested: [list]
719
+ - Variables measured: biomass/substrate/product
720
 
721
+ 2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
722
+
723
+ 📊 **EXPERIMENT 1 [Name/Condition]:**
724
+ - Biomass: [Model] (R²=[value])
725
+ - Substrate: [Model] (R²=[value])
726
+ - Product: [Model] (R²=[value])
727
 
728
+ 📊 **EXPERIMENT 2 [Name/Condition]:**
729
+ - Biomass: [Model] (R²=[value])
730
+ - Substrate: [Model] (R²=[value])
731
+ - Product: [Model] (R²=[value])
732
+
733
+ [Continue for all experiments...]
734
 
735
+ 3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
736
+ 🏆 **Best Models Overall:**
737
+ - **Biomass**: [Model] - Best in [X]/[Y] experiments
738
+ - **Substrate**: [Model] - Average R²=[value]
739
+ - **Product**: [Model] - Most consistent performance
740
 
741
  4. **QUICK COMPARISON TABLE**
742
+ | Experiment | Best Biomass | Best Substrate | Best Product | Overall |
743
+ |------------|--------------|----------------|--------------|------------|
744
+ | Exp1 | [Model] | [Model] | [Model] | [avg] |
745
+ | Exp2 | [Model] | [Model] | [Model] | [avg] |
746
+
747
+ 5. **KEY FINDINGS**
748
+ - Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
749
+ - Best conditions identified: [specific values]
750
+ - Most robust models: [list with reasons]
751
 
752
+ 6. **PRACTICAL RECOMMENDATIONS**
753
+ - For biomass prediction: Use [Model]
754
+ - For substrate monitoring: Use [Model]
755
+ - For product estimation: Use [Model]
756
+ - Critical parameters: [list with values]
757
 
758
+ Keep it concise but include ALL experiments and model names with their key metrics.
759
  """
760
 
761
  try:
 
778
  Generate Python code that:
779
 
780
  1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
781
+ 2. Implements analysis BY EXPERIMENT showing:
782
+ - Best models for each experiment
783
+ - Comparison across experiments
784
+ - Parameter evolution between conditions
785
+ 3. Includes visualization functions that:
786
+ - Show results PER EXPERIMENT
787
+ - Compare models across experiments
788
+ - Display parameter trends
789
+ 4. Shows the best model for biomass, substrate, and product separately
790
 
791
  The code must include:
792
+ - Data loading with experiment identification
793
+ - Model comparison by experiment and variable type
794
+ - Visualization showing results per experiment
795
+ - Overall best model selection with justification
796
+ - Functions to predict using the best models for each category
797
 
798
+ Make sure to include comments indicating which model won for each variable type and why.
799
 
800
  Format: Complete, executable Python code with actual data values embedded.
801
  """
 
827
  except Exception as e:
828
  return {"error": str(e)}
829
 
830
+ def process_files(files, claude_model: str, detail_level: str = "detailed",
831
+ language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
832
+ """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
833
  processor = FileProcessor()
834
  analyzer = AIAnalyzer(client, model_registry)
835
  results = []
 
860
  analysis_type = analyzer.detect_analysis_type(df)
861
 
862
  if analysis_type == AnalysisType.FITTING_RESULTS:
863
+ result = analyzer.analyze_fitting_results(
864
+ df, claude_model, detail_level, language, additional_specs
865
+ )
866
 
867
  if language == 'es':
868
  results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
 
881
  return analysis_text, code_text
882
 
883
  def generate_implementation_code(analysis_results: str) -> str:
884
+ """Genera código de implementación con análisis por experimento"""
885
  code = """
886
  import numpy as np
887
  import pandas as pd
 
896
  plt.style.use('seaborn-v0_8-darkgrid')
897
  sns.set_palette("husl")
898
 
899
+ class ExperimentalModelAnalyzer:
900
  \"\"\"
901
+ Class for comparative analysis of biotechnological models across multiple experiments.
902
+ Analyzes biomass, substrate and product models separately for each experimental condition.
903
  \"\"\"
904
 
905
  def __init__(self):
906
  self.results_df = None
907
+ self.experiments = {}
908
+ self.best_models_by_experiment = {}
909
+ self.overall_best_models = {
910
+ 'biomass': None,
911
+ 'substrate': None,
912
+ 'product': None
913
+ }
914
 
915
  def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
916
  \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
 
925
  print(f"✅ Data loaded: {len(self.results_df)} models")
926
  print(f"📊 Available columns: {list(self.results_df.columns)}")
927
 
928
+ # Identify experiments
929
+ if 'Experiment' in self.results_df.columns:
930
+ self.experiments = self.results_df.groupby('Experiment').groups
931
+ print(f"🧪 Experiments found: {list(self.experiments.keys())}")
932
+
933
  return self.results_df
934
 
935
+ def analyze_by_experiment(self,
936
+ experiment_col: str = 'Experiment',
937
+ model_col: str = 'Model',
938
+ type_col: str = 'Type',
939
  r2_col: str = 'R2',
940
+ rmse_col: str = 'RMSE') -> Dict:
 
 
 
941
  \"\"\"
942
+ Analyze models by experiment and variable type.
943
+ Identifies best models for biomass, substrate, and product in each experiment.
944
  \"\"\"
945
  if self.results_df is None:
946
  raise ValueError("First load data with load_results()")
947
 
948
+ results_by_exp = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
949
 
950
+ # Get unique experiments
951
+ if experiment_col in self.results_df.columns:
952
+ experiments = self.results_df[experiment_col].unique()
953
+ else:
954
+ experiments = ['All_Data']
955
+ self.results_df[experiment_col] = 'All_Data'
956
 
957
+ print("\\n" + "="*80)
958
+ print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
959
+ print("="*80)
960
 
961
+ for exp in experiments:
962
+ print(f"\\n🧪 EXPERIMENT: {exp}")
963
+ print("-"*50)
964
+
965
+ exp_data = self.results_df[self.results_df[experiment_col] == exp]
966
+ results_by_exp[exp] = {}
967
+
968
+ # Analyze by variable type if available
969
+ if type_col in exp_data.columns:
970
+ var_types = exp_data[type_col].unique()
971
+
972
+ for var_type in var_types:
973
+ var_data = exp_data[exp_data[type_col] == var_type]
974
+
975
+ if not var_data.empty:
976
+ # Find best model for this variable type
977
+ best_idx = var_data[r2_col].idxmax()
978
+ best_model = var_data.loc[best_idx]
979
+
980
+ results_by_exp[exp][var_type] = {
981
+ 'best_model': best_model[model_col],
982
+ 'r2': best_model[r2_col],
983
+ 'rmse': best_model[rmse_col],
984
+ 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
985
+ }
986
+
987
+ print(f"\\n 📈 {var_type.upper()}:")
988
+ print(f" Best Model: {best_model[model_col]}")
989
+ print(f" R² = {best_model[r2_col]:.4f}")
990
+ print(f" RMSE = {best_model[rmse_col]:.4f}")
991
+
992
+ # Show all models for this variable
993
+ print(f"\\n All {var_type} models tested:")
994
+ for _, row in var_data.iterrows():
995
+ print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
996
+ else:
997
+ # If no type column, analyze all models together
998
+ best_idx = exp_data[r2_col].idxmax()
999
+ best_model = exp_data.loc[best_idx]
1000
+
1001
+ results_by_exp[exp]['all'] = {
1002
+ 'best_model': best_model[model_col],
1003
+ 'r2': best_model[r2_col],
1004
+ 'rmse': best_model[rmse_col],
1005
+ 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
1006
+ }
1007
 
1008
+ self.best_models_by_experiment = results_by_exp
 
 
 
 
 
 
1009
 
1010
+ # Determine overall best models
1011
+ self._determine_overall_best_models()
 
1012
 
1013
+ return results_by_exp
1014
+
1015
+ def _determine_overall_best_models(self):
1016
+ \"\"\"Determine the best models across all experiments\"\"\"
1017
  print("\\n" + "="*80)
1018
+ print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1019
  print("="*80)
1020
 
1021
+ # Aggregate performance by model and type
1022
+ model_performance = {}
1023
+
1024
+ for exp, exp_results in self.best_models_by_experiment.items():
1025
+ for var_type, var_results in exp_results.items():
1026
+ if var_type not in model_performance:
1027
+ model_performance[var_type] = {}
1028
+
1029
+ for model_data in var_results['all_models']:
1030
+ model_name = model_data['Model']
1031
+ if model_name not in model_performance[var_type]:
1032
+ model_performance[var_type][model_name] = {
1033
+ 'r2_values': [],
1034
+ 'rmse_values': [],
1035
+ 'experiments': []
1036
+ }
1037
+
1038
+ model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
1039
+ model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
1040
+ model_performance[var_type][model_name]['experiments'].append(exp)
1041
+
1042
+ # Calculate average performance and select best
1043
+ for var_type, models in model_performance.items():
1044
+ best_avg_r2 = -1
1045
+ best_model = None
1046
 
1047
+ print(f"\\n📊 {var_type.upper()} MODELS:")
1048
+ for model_name, perf_data in models.items():
1049
+ avg_r2 = np.mean(perf_data['r2_values'])
1050
+ avg_rmse = np.mean(perf_data['rmse_values'])
1051
+ n_exp = len(perf_data['experiments'])
1052
+
1053
+ print(f" {model_name}:")
1054
+ print(f" Average R² = {avg_r2:.4f}")
1055
+ print(f" Average RMSE = {avg_rmse:.4f}")
1056
+ print(f" Tested in {n_exp} experiments")
1057
+
1058
+ if avg_r2 > best_avg_r2:
1059
+ best_avg_r2 = avg_r2
1060
+ best_model = {
1061
+ 'name': model_name,
1062
+ 'avg_r2': avg_r2,
1063
+ 'avg_rmse': avg_rmse,
1064
+ 'n_experiments': n_exp
1065
+ }
1066
+
1067
+ if var_type.lower() in ['biomass', 'substrate', 'product']:
1068
+ self.overall_best_models[var_type.lower()] = best_model
1069
+ print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
1070
 
1071
+ def create_comparison_visualizations(self):
1072
+ \"\"\"Create visualizations comparing models across experiments\"\"\"
1073
+ if not self.best_models_by_experiment:
1074
+ raise ValueError("First run analyze_by_experiment()")
1075
+
1076
+ # Prepare data for visualization
1077
+ experiments = []
1078
+ biomass_r2 = []
1079
+ substrate_r2 = []
1080
+ product_r2 = []
1081
+
1082
+ for exp, results in self.best_models_by_experiment.items():
1083
+ experiments.append(exp)
1084
+ biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
1085
+ substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
1086
+ product_r2.append(results.get('Product', {}).get('r2', 0))
1087
+
1088
+ # Create figure with subplots
1089
+ fig, axes = plt.subplots(2, 2, figsize=(15, 12))
1090
+ fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
1091
+
1092
+ # 1. R² comparison by experiment and variable type
1093
+ ax1 = axes[0, 0]
1094
+ x = np.arange(len(experiments))
1095
+ width = 0.25
1096
 
1097
+ ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
1098
+ ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
1099
+ ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
1100
 
1101
+ ax1.set_xlabel('Experiment')
 
 
 
 
 
1102
  ax1.set_ylabel('R²')
1103
+ ax1.set_title('Best Model by Experiment and Variable Type')
1104
+ ax1.set_xticks(x)
1105
+ ax1.set_xticklabels(experiments, rotation=45, ha='right')
 
1106
  ax1.legend()
1107
+ ax1.grid(True, alpha=0.3)
1108
 
1109
+ # Add value labels
1110
+ for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
1111
+ if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
1112
+ if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
1113
+ if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
1114
 
1115
+ # 2. Model frequency heatmap
1116
  ax2 = axes[0, 1]
1117
+ # This would show which models appear most frequently as best
1118
+ # Implementation depends on actual data structure
1119
+ ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
1120
+ ha='center', va='center', transform=ax2.transAxes)
1121
+ ax2.set_title('Most Frequently Selected Models')
1122
+
1123
+ # 3. Parameter evolution across experiments
 
 
 
 
 
 
1124
  ax3 = axes[1, 0]
1125
+ ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
1126
+ ha='center', va='center', transform=ax3.transAxes)
1127
+ ax3.set_title('Parameter Trends')
1128
+
1129
+ # 4. Overall best models summary
 
 
 
 
1130
  ax4 = axes[1, 1]
1131
+ ax4.axis('off')
1132
+
1133
+ summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
1134
+ for var_type, model_info in self.overall_best_models.items():
1135
+ if model_info:
1136
+ summary_text += f"{var_type.upper()}:\\n"
1137
+ summary_text += f" Model: {model_info['name']}\\n"
1138
+ summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
1139
+ summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
1140
+
1141
+ ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
1142
+ fontsize=12, verticalalignment='top', fontfamily='monospace')
1143
+ ax4.set_title('Overall Best Models Summary')
 
 
 
1144
 
1145
  plt.tight_layout()
1146
  plt.show()
1147
+
1148
+ def generate_summary_table(self) -> pd.DataFrame:
1149
+ \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1150
+ summary_data = []
1151
+
1152
+ for exp, results in self.best_models_by_experiment.items():
1153
+ for var_type, var_results in results.items():
1154
+ summary_data.append({
1155
+ 'Experiment': exp,
1156
+ 'Variable_Type': var_type,
1157
+ 'Best_Model': var_results['best_model'],
1158
+ 'R2': var_results['r2'],
1159
+ 'RMSE': var_results['rmse']
1160
+ })
1161
+
1162
+ summary_df = pd.DataFrame(summary_data)
1163
+
1164
+ print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1165
+ print("="*80)
1166
+ print(summary_df.to_string(index=False))
1167
+
1168
+ return summary_df
1169
 
1170
+ # Example usage
1171
  if __name__ == "__main__":
1172
+ print("🧬 Experimental Model Comparison System")
1173
  print("="*60)
1174
 
1175
+ # Example data structure with experiments
1176
  example_data = {
1177
+ 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1178
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1179
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1180
+ 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1181
+ 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1182
+ 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1183
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1184
+ 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1185
+ 'Product', 'Product', 'Product', 'Product'],
1186
+ 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1187
+ 0.9723, 0.9856, 0.9698, 0.9812,
1188
+ 0.9634, 0.9512, 0.9687, 0.9423],
1189
+ 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1190
+ 0.0312, 0.0245, 0.0334, 0.0289,
1191
+ 0.0412, 0.0523, 0.0389, 0.0567],
1192
+ 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1193
+ None, None, None, None, None, None, None, None],
1194
+ 'Ks': [None, None, None, None, None, None,
1195
+ 2.1, 1.8, 2.3, 1.9, None, None, None, None]
1196
  }
1197
 
1198
  # Create analyzer
1199
+ analyzer = ExperimentalModelAnalyzer()
1200
 
1201
  # Load data
1202
  analyzer.load_results(data_dict=example_data)
1203
 
1204
+ # Analyze by experiment
1205
+ results = analyzer.analyze_by_experiment()
1206
+
1207
+ # Create visualizations
1208
+ analyzer.create_comparison_visualizations()
1209
 
1210
+ # Generate summary table
1211
+ summary = analyzer.generate_summary_table()
1212
 
1213
+ print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
1214
  """
1215
 
1216
  return code
 
1270
  gr.update(label=t['select_language']), # language_selector
1271
  gr.update(label=t['select_theme']), # theme_selector
1272
  gr.update(label=t['detail_level']), # detail_level
1273
+ gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
1274
  gr.update(value=t['analyze_button']), # analyze_btn
1275
  gr.update(label=t['export_format']), # export_format
1276
  gr.update(value=t['export_button']), # export_btn
 
1279
  gr.update(label=t['data_format']) # data_format_accordion
1280
  ]
1281
 
1282
+ def process_and_store(files, model, detail, language, additional_specs):
1283
  """Procesa archivos y almacena resultados"""
1284
  if not files:
1285
  error_msg = TRANSLATIONS[language]['error_no_files']
1286
  return error_msg, ""
1287
 
1288
+ analysis, code = process_files(files, model, detail, language, additional_specs)
1289
  app_state.current_analysis = analysis
1290
  app_state.current_code = code
1291
  return analysis, code
 
1337
  label=TRANSLATIONS[current_language]['detail_level']
1338
  )
1339
 
1340
+ # Nueva entrada para especificaciones adicionales
1341
+ additional_specs = gr.Textbox(
1342
+ label=TRANSLATIONS[current_language]['additional_specs'],
1343
+ placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
1344
+ lines=3,
1345
+ max_lines=5,
1346
+ interactive=True
1347
+ )
1348
+
1349
  analyze_btn = gr.Button(
1350
  TRANSLATIONS[current_language]['analyze_button'],
1351
  variant="primary",
 
1397
  gr.Markdown("""
1398
  ### Expected CSV/Excel structure:
1399
 
1400
+ | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1401
+ |------------|-------|------|-----|------|-----|-----|--------|-------|------------|
1402
+ | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1403
+ | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1404
+ | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
1405
+ | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
1406
+
1407
+ **Important columns:**
1408
+ - **Experiment**: Experimental condition identifier
1409
+ - **Model**: Model name
1410
+ - **Type**: Variable type (Biomass/Substrate/Product)
1411
+ - **R2, RMSE**: Fit quality metrics
1412
+ - **Parameters**: Model-specific parameters
1413
  """)
1414
 
1415
  # Definir ejemplos
1416
  examples = gr.Examples(
1417
  examples=[
1418
+ [["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed", ""],
1419
+ [["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized", "Focus on temperature effects"]
1420
  ],
1421
+ inputs=[files_input, model_selector, detail_level, additional_specs],
1422
  label=TRANSLATIONS[current_language]['examples']
1423
  )
1424
 
1425
+ # Eventos - Actualizado para incluir additional_specs
1426
  language_selector.change(
1427
  update_interface_language,
1428
  inputs=[language_selector],
1429
  outputs=[
1430
  title_text, subtitle_text, files_input, model_selector,
1431
+ language_selector, theme_selector, detail_level, additional_specs,
1432
+ analyze_btn, export_format, export_btn, analysis_output,
1433
+ code_output, data_format_accordion
1434
  ]
1435
  )
1436
 
 
1448
 
1449
  analyze_btn.click(
1450
  fn=process_and_store,
1451
+ inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1452
  outputs=[analysis_output, code_output]
1453
  )
1454