Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,7 +33,7 @@ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
|
33 |
# Inicializar cliente Anthropic
|
34 |
client = anthropic.Anthropic()
|
35 |
|
36 |
-
# Sistema de traducción
|
37 |
TRANSLATIONS = {
|
38 |
'en': {
|
39 |
'title': '🧬 Comparative Analyzer of Biotechnological Models',
|
@@ -62,7 +62,9 @@ TRANSLATIONS = {
|
|
62 |
'specialized_in': '🎯 Specialized in:',
|
63 |
'metrics_analyzed': '📊 Analyzed metrics:',
|
64 |
'what_analyzes': '🔍 What it specifically analyzes:',
|
65 |
-
'tips': '💡 Tips for better results:'
|
|
|
|
|
66 |
},
|
67 |
'es': {
|
68 |
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
|
@@ -91,7 +93,9 @@ TRANSLATIONS = {
|
|
91 |
'specialized_in': '🎯 Especializado en:',
|
92 |
'metrics_analyzed': '📊 Métricas analizadas:',
|
93 |
'what_analyzes': '🔍 Qué analiza específicamente:',
|
94 |
-
'tips': '💡 Tips para mejores resultados:'
|
|
|
|
|
95 |
},
|
96 |
'fr': {
|
97 |
'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
|
@@ -120,7 +124,9 @@ TRANSLATIONS = {
|
|
120 |
'specialized_in': '🎯 Spécialisé dans:',
|
121 |
'metrics_analyzed': '📊 Métriques analysées:',
|
122 |
'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
|
123 |
-
'tips': '💡 Conseils pour de meilleurs résultats:'
|
|
|
|
|
124 |
},
|
125 |
'de': {
|
126 |
'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
|
@@ -149,7 +155,9 @@ TRANSLATIONS = {
|
|
149 |
'specialized_in': '🎯 Spezialisiert auf:',
|
150 |
'metrics_analyzed': '📊 Analysierte Metriken:',
|
151 |
'what_analyzes': '🔍 Was spezifisch analysiert wird:',
|
152 |
-
'tips': '💡 Tipps für bessere Ergebnisse:'
|
|
|
|
|
153 |
},
|
154 |
'pt': {
|
155 |
'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
|
@@ -178,7 +186,9 @@ TRANSLATIONS = {
|
|
178 |
'specialized_in': '🎯 Especializado em:',
|
179 |
'metrics_analyzed': '📊 Métricas analisadas:',
|
180 |
'what_analyzes': '🔍 O que analisa especificamente:',
|
181 |
-
'tips': '💡 Dicas para melhores resultados:'
|
|
|
|
|
182 |
}
|
183 |
}
|
184 |
|
@@ -568,8 +578,9 @@ class AIAnalyzer:
|
|
568 |
}
|
569 |
return prefixes.get(language, prefixes['en'])
|
570 |
|
571 |
-
def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
|
572 |
-
|
|
|
573 |
|
574 |
# Preparar resumen completo de los datos
|
575 |
data_summary = f"""
|
@@ -592,6 +603,15 @@ class AIAnalyzer:
|
|
592 |
# Obtener prefijo de idioma
|
593 |
lang_prefix = self.get_language_prompt_prefix(language)
|
594 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
# Prompt mejorado con instrucciones específicas para cada nivel
|
596 |
if detail_level == "detailed":
|
597 |
prompt = f"""
|
@@ -599,122 +619,143 @@ class AIAnalyzer:
|
|
599 |
|
600 |
You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
|
601 |
|
602 |
-
|
603 |
|
604 |
-
|
605 |
|
606 |
-
|
607 |
-
- Identify ALL fitted mathematical models BY NAME (e.g., "Monod", "Logistic", "Gompertz", etc.)
|
608 |
-
- Classify them by type: biomass growth, substrate consumption, product formation
|
609 |
-
- Indicate the mathematical equation of each model
|
610 |
-
- Mention which experiments/conditions were tested
|
611 |
|
612 |
-
|
613 |
-
-
|
614 |
-
-
|
615 |
-
|
616 |
-
*
|
617 |
-
*
|
618 |
-
* RMSE value: [exact value]
|
619 |
-
* Key parameters and their values
|
620 |
-
- Identify significant differences between models
|
621 |
-
- Detect possible overfitting or underfitting
|
622 |
|
623 |
-
|
624 |
-
|
625 |
-
-
|
626 |
-
-
|
627 |
-
-
|
628 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
629 |
|
630 |
-
4. **DETAILED
|
631 |
-
a) **BIOMASS (if applicable)**:
|
632 |
-
- Growth parameters (μmax, Xmax, etc.) with exact values
|
633 |
-
- Doubling time calculations
|
634 |
-
- Biomass productivity
|
635 |
-
- Compare parameters between models numerically
|
636 |
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
|
|
|
|
642 |
|
643 |
-
|
644 |
-
|
645 |
-
- Specific productivity calculations
|
646 |
-
- Yield Yp/x values
|
647 |
-
- Production type classification
|
648 |
|
649 |
-
5. **
|
650 |
-
-
|
651 |
-
-
|
652 |
-
-
|
653 |
-
-
|
654 |
|
655 |
-
6. **
|
656 |
-
|
657 |
-
-
|
658 |
-
-
|
659 |
-
-
|
|
|
660 |
|
661 |
-
7. **
|
662 |
-
-
|
663 |
-
-
|
664 |
-
-
|
665 |
-
|
|
|
666 |
|
667 |
-
8. **
|
668 |
-
|
669 |
-
|
|
|
|
|
670 |
|
671 |
-
Use Markdown format with clear structure
|
|
|
672 |
"""
|
673 |
else: # summarized
|
674 |
prompt = f"""
|
675 |
{lang_prefix}
|
676 |
|
677 |
-
You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis
|
|
|
|
|
678 |
|
679 |
-
DETAIL LEVEL: SUMMARIZED - Be concise but include all essential information
|
680 |
|
681 |
PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
|
682 |
|
683 |
-
1. **
|
684 |
-
-
|
685 |
-
-
|
|
|
686 |
|
687 |
-
2. **BEST MODELS -
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
|
|
692 |
|
693 |
-
📊 **
|
694 |
-
|
695 |
-
|
696 |
-
|
|
|
|
|
697 |
|
698 |
-
3. **
|
699 |
-
|
700 |
-
-
|
701 |
-
-
|
702 |
-
-
|
703 |
|
704 |
4. **QUICK COMPARISON TABLE**
|
705 |
-
|
|
706 |
-
|
707 |
-
|
|
708 |
-
|
|
709 |
-
|
|
|
|
|
|
|
|
|
710 |
|
711 |
-
|
712 |
-
-
|
713 |
-
-
|
714 |
-
-
|
715 |
-
-
|
716 |
|
717 |
-
Keep it concise but include ALL model names
|
718 |
"""
|
719 |
|
720 |
try:
|
@@ -737,18 +778,24 @@ class AIAnalyzer:
|
|
737 |
Generate Python code that:
|
738 |
|
739 |
1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
|
740 |
-
2. Implements
|
741 |
-
|
742 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
743 |
|
744 |
The code must include:
|
745 |
-
- Data loading
|
746 |
-
- Model
|
747 |
-
- Visualization showing
|
748 |
-
-
|
749 |
-
-
|
750 |
|
751 |
-
Make sure to include comments indicating which model won
|
752 |
|
753 |
Format: Complete, executable Python code with actual data values embedded.
|
754 |
"""
|
@@ -780,8 +827,9 @@ class AIAnalyzer:
|
|
780 |
except Exception as e:
|
781 |
return {"error": str(e)}
|
782 |
|
783 |
-
def process_files(files, claude_model: str, detail_level: str = "detailed",
|
784 |
-
|
|
|
785 |
processor = FileProcessor()
|
786 |
analyzer = AIAnalyzer(client, model_registry)
|
787 |
results = []
|
@@ -812,7 +860,9 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
|
|
812 |
analysis_type = analyzer.detect_analysis_type(df)
|
813 |
|
814 |
if analysis_type == AnalysisType.FITTING_RESULTS:
|
815 |
-
result = analyzer.analyze_fitting_results(
|
|
|
|
|
816 |
|
817 |
if language == 'es':
|
818 |
results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
|
@@ -831,7 +881,7 @@ def process_files(files, claude_model: str, detail_level: str = "detailed", lang
|
|
831 |
return analysis_text, code_text
|
832 |
|
833 |
def generate_implementation_code(analysis_results: str) -> str:
|
834 |
-
"""Genera código de implementación con
|
835 |
code = """
|
836 |
import numpy as np
|
837 |
import pandas as pd
|
@@ -846,16 +896,21 @@ from typing import Dict, List, Tuple, Optional
|
|
846 |
plt.style.use('seaborn-v0_8-darkgrid')
|
847 |
sns.set_palette("husl")
|
848 |
|
849 |
-
class
|
850 |
\"\"\"
|
851 |
-
Class for comparative analysis of biotechnological
|
852 |
-
|
853 |
\"\"\"
|
854 |
|
855 |
def __init__(self):
|
856 |
self.results_df = None
|
857 |
-
self.
|
858 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
859 |
|
860 |
def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
|
861 |
\"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
|
@@ -870,221 +925,292 @@ class ComparativeModelAnalyzer:
|
|
870 |
print(f"✅ Data loaded: {len(self.results_df)} models")
|
871 |
print(f"📊 Available columns: {list(self.results_df.columns)}")
|
872 |
|
|
|
|
|
|
|
|
|
|
|
873 |
return self.results_df
|
874 |
|
875 |
-
def
|
|
|
|
|
|
|
876 |
r2_col: str = 'R2',
|
877 |
-
rmse_col: str = 'RMSE'
|
878 |
-
aic_col: Optional[str] = 'AIC',
|
879 |
-
bic_col: Optional[str] = 'BIC',
|
880 |
-
model_col: str = 'Model') -> pd.DataFrame:
|
881 |
\"\"\"
|
882 |
-
Analyze
|
883 |
-
|
884 |
\"\"\"
|
885 |
if self.results_df is None:
|
886 |
raise ValueError("First load data with load_results()")
|
887 |
|
888 |
-
|
889 |
-
comparison = self.results_df.copy()
|
890 |
-
|
891 |
-
# Calculate composite score
|
892 |
-
scores = pd.DataFrame(index=comparison.index)
|
893 |
-
|
894 |
-
# Normalize metrics (0-1)
|
895 |
-
if r2_col in comparison.columns:
|
896 |
-
scores['r2_score'] = comparison[r2_col] # Already between 0-1
|
897 |
-
|
898 |
-
if rmse_col in comparison.columns:
|
899 |
-
# Invert and normalize RMSE (lower is better)
|
900 |
-
max_rmse = comparison[rmse_col].max()
|
901 |
-
scores['rmse_score'] = 1 - (comparison[rmse_col] / max_rmse)
|
902 |
-
|
903 |
-
if aic_col and aic_col in comparison.columns:
|
904 |
-
# Invert and normalize AIC (lower is better)
|
905 |
-
min_aic = comparison[aic_col].min()
|
906 |
-
max_aic = comparison[aic_col].max()
|
907 |
-
scores['aic_score'] = 1 - ((comparison[aic_col] - min_aic) / (max_aic - min_aic))
|
908 |
-
|
909 |
-
if bic_col and bic_col in comparison.columns:
|
910 |
-
# Invert and normalize BIC (lower is better)
|
911 |
-
min_bic = comparison[bic_col].min()
|
912 |
-
max_bic = comparison[bic_col].max()
|
913 |
-
scores['bic_score'] = 1 - ((comparison[bic_col] - min_bic) / (max_bic - min_bic))
|
914 |
-
|
915 |
-
# Calculate total score (weighted average)
|
916 |
-
weights = {
|
917 |
-
'r2_score': 0.4,
|
918 |
-
'rmse_score': 0.3,
|
919 |
-
'aic_score': 0.15,
|
920 |
-
'bic_score': 0.15
|
921 |
-
}
|
922 |
|
923 |
-
|
924 |
-
|
925 |
-
|
926 |
-
|
|
|
|
|
927 |
|
928 |
-
|
929 |
-
|
930 |
-
|
931 |
|
932 |
-
|
933 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
934 |
|
935 |
-
|
936 |
-
if 'Type' in comparison.columns:
|
937 |
-
for model_type in comparison['Type'].unique():
|
938 |
-
type_models = comparison[comparison['Type'] == model_type]
|
939 |
-
if not type_models.empty:
|
940 |
-
best_idx = type_models['Score'].idxmax()
|
941 |
-
self.best_models[model_type] = type_models.loc[best_idx]
|
942 |
|
943 |
-
#
|
944 |
-
|
945 |
-
self.best_models['overall'] = comparison.loc[best_idx]
|
946 |
|
947 |
-
|
|
|
|
|
|
|
948 |
print("\\n" + "="*80)
|
949 |
-
print("
|
950 |
print("="*80)
|
951 |
|
952 |
-
|
953 |
-
|
954 |
-
|
955 |
-
for
|
956 |
-
|
957 |
-
|
958 |
-
|
959 |
-
|
960 |
-
|
961 |
-
|
962 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
963 |
|
964 |
-
print(f"
|
965 |
-
|
966 |
-
|
967 |
-
|
968 |
-
|
969 |
-
|
970 |
-
print(f"{
|
971 |
-
|
972 |
-
print(f"
|
973 |
-
|
974 |
-
|
975 |
-
|
976 |
-
|
977 |
-
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
982 |
-
|
983 |
-
|
984 |
-
|
|
|
|
|
985 |
|
986 |
-
def
|
987 |
-
\"\"\"Create
|
988 |
-
if self.
|
989 |
-
raise ValueError("First run
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
990 |
|
991 |
-
|
992 |
-
|
|
|
993 |
|
994 |
-
|
995 |
-
ax1 = axes[0, 0]
|
996 |
-
models = self.model_rankings.get('Model', self.model_rankings.index)
|
997 |
-
r2_values = self.model_rankings.get('R2', [])
|
998 |
-
ax1.bar(range(len(models)), r2_values, color='skyblue')
|
999 |
-
ax1.set_xlabel('Models')
|
1000 |
ax1.set_ylabel('R²')
|
1001 |
-
ax1.set_title('R²
|
1002 |
-
ax1.set_xticks(
|
1003 |
-
ax1.set_xticklabels(
|
1004 |
-
ax1.axhline(y=0.95, color='r', linestyle='--', label='Excellent fit (0.95)')
|
1005 |
ax1.legend()
|
|
|
1006 |
|
1007 |
-
# Add
|
1008 |
-
for i,
|
1009 |
-
ax1.text(i,
|
|
|
|
|
1010 |
|
1011 |
-
# 2.
|
1012 |
ax2 = axes[0, 1]
|
1013 |
-
|
1014 |
-
|
1015 |
-
ax2.
|
1016 |
-
|
1017 |
-
ax2.set_title('
|
1018 |
-
|
1019 |
-
|
1020 |
-
|
1021 |
-
# Add actual values on bars
|
1022 |
-
for i, v in enumerate(rmse_values):
|
1023 |
-
ax2.text(i, v + 0.001, f'{v:.3f}', ha='center', va='bottom')
|
1024 |
-
|
1025 |
-
# 3. Combined score
|
1026 |
ax3 = axes[1, 0]
|
1027 |
-
|
1028 |
-
|
1029 |
-
ax3.
|
1030 |
-
|
1031 |
-
|
1032 |
-
ax3.set_xticks(range(len(models)))
|
1033 |
-
ax3.set_xticklabels(models, rotation=45, ha='right')
|
1034 |
-
|
1035 |
-
# 4. Ranking visualization
|
1036 |
ax4 = axes[1, 1]
|
1037 |
-
|
1038 |
-
|
1039 |
-
|
1040 |
-
|
1041 |
-
|
1042 |
-
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
|
1048 |
-
|
1049 |
-
|
1050 |
-
xy=(best_r2, best_rmse),
|
1051 |
-
xytext=(best_r2-0.05, best_rmse+0.01),
|
1052 |
-
arrowprops=dict(arrowstyle='->', color='red'))
|
1053 |
|
1054 |
plt.tight_layout()
|
1055 |
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1056 |
|
1057 |
-
# Example usage
|
1058 |
if __name__ == "__main__":
|
1059 |
-
print("🧬
|
1060 |
print("="*60)
|
1061 |
|
1062 |
-
# Example data structure
|
1063 |
example_data = {
|
1064 |
-
'
|
1065 |
-
|
1066 |
-
|
1067 |
-
'
|
1068 |
-
|
1069 |
-
|
1070 |
-
'
|
1071 |
-
|
1072 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1073 |
}
|
1074 |
|
1075 |
# Create analyzer
|
1076 |
-
analyzer =
|
1077 |
|
1078 |
# Load data
|
1079 |
analyzer.load_results(data_dict=example_data)
|
1080 |
|
1081 |
-
# Analyze
|
1082 |
-
results = analyzer.
|
|
|
|
|
|
|
1083 |
|
1084 |
-
#
|
1085 |
-
analyzer.
|
1086 |
|
1087 |
-
print("\\n✨ Analysis complete! Best models identified
|
1088 |
"""
|
1089 |
|
1090 |
return code
|
@@ -1144,6 +1270,7 @@ def create_interface():
|
|
1144 |
gr.update(label=t['select_language']), # language_selector
|
1145 |
gr.update(label=t['select_theme']), # theme_selector
|
1146 |
gr.update(label=t['detail_level']), # detail_level
|
|
|
1147 |
gr.update(value=t['analyze_button']), # analyze_btn
|
1148 |
gr.update(label=t['export_format']), # export_format
|
1149 |
gr.update(value=t['export_button']), # export_btn
|
@@ -1152,13 +1279,13 @@ def create_interface():
|
|
1152 |
gr.update(label=t['data_format']) # data_format_accordion
|
1153 |
]
|
1154 |
|
1155 |
-
def process_and_store(files, model, detail, language):
|
1156 |
"""Procesa archivos y almacena resultados"""
|
1157 |
if not files:
|
1158 |
error_msg = TRANSLATIONS[language]['error_no_files']
|
1159 |
return error_msg, ""
|
1160 |
|
1161 |
-
analysis, code = process_files(files, model, detail, language)
|
1162 |
app_state.current_analysis = analysis
|
1163 |
app_state.current_code = code
|
1164 |
return analysis, code
|
@@ -1210,6 +1337,15 @@ def create_interface():
|
|
1210 |
label=TRANSLATIONS[current_language]['detail_level']
|
1211 |
)
|
1212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1213 |
analyze_btn = gr.Button(
|
1214 |
TRANSLATIONS[current_language]['analyze_button'],
|
1215 |
variant="primary",
|
@@ -1261,32 +1397,40 @@ def create_interface():
|
|
1261 |
gr.Markdown("""
|
1262 |
### Expected CSV/Excel structure:
|
1263 |
|
1264 |
-
| Model | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
|
1265 |
-
|
1266 |
-
| Monod | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
|
1267 |
-
| Logistic | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
|
1268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1269 |
""")
|
1270 |
|
1271 |
# Definir ejemplos
|
1272 |
examples = gr.Examples(
|
1273 |
examples=[
|
1274 |
-
[["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed"],
|
1275 |
-
[["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized"]
|
1276 |
],
|
1277 |
-
inputs=[files_input, model_selector, detail_level],
|
1278 |
label=TRANSLATIONS[current_language]['examples']
|
1279 |
)
|
1280 |
|
1281 |
-
# Eventos -
|
1282 |
language_selector.change(
|
1283 |
update_interface_language,
|
1284 |
inputs=[language_selector],
|
1285 |
outputs=[
|
1286 |
title_text, subtitle_text, files_input, model_selector,
|
1287 |
-
language_selector, theme_selector, detail_level,
|
1288 |
-
export_format, export_btn, analysis_output,
|
1289 |
-
data_format_accordion
|
1290 |
]
|
1291 |
)
|
1292 |
|
@@ -1304,7 +1448,7 @@ def create_interface():
|
|
1304 |
|
1305 |
analyze_btn.click(
|
1306 |
fn=process_and_store,
|
1307 |
-
inputs=[files_input, model_selector, detail_level, language_selector],
|
1308 |
outputs=[analysis_output, code_output]
|
1309 |
)
|
1310 |
|
|
|
33 |
# Inicializar cliente Anthropic
|
34 |
client = anthropic.Anthropic()
|
35 |
|
36 |
+
# Sistema de traducción - Actualizado con nuevas entradas
|
37 |
TRANSLATIONS = {
|
38 |
'en': {
|
39 |
'title': '🧬 Comparative Analyzer of Biotechnological Models',
|
|
|
62 |
'specialized_in': '🎯 Specialized in:',
|
63 |
'metrics_analyzed': '📊 Analyzed metrics:',
|
64 |
'what_analyzes': '🔍 What it specifically analyzes:',
|
65 |
+
'tips': '💡 Tips for better results:',
|
66 |
+
'additional_specs': '📝 Additional specifications for analysis',
|
67 |
+
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
|
68 |
},
|
69 |
'es': {
|
70 |
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
|
|
|
93 |
'specialized_in': '🎯 Especializado en:',
|
94 |
'metrics_analyzed': '📊 Métricas analizadas:',
|
95 |
'what_analyzes': '🔍 Qué analiza específicamente:',
|
96 |
+
'tips': '💡 Tips para mejores resultados:',
|
97 |
+
'additional_specs': '📝 Especificaciones adicionales para el análisis',
|
98 |
+
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
|
99 |
},
|
100 |
'fr': {
|
101 |
'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
|
|
|
124 |
'specialized_in': '🎯 Spécialisé dans:',
|
125 |
'metrics_analyzed': '📊 Métriques analysées:',
|
126 |
'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
|
127 |
+
'tips': '💡 Conseils pour de meilleurs résultats:',
|
128 |
+
'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
|
129 |
+
'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
|
130 |
},
|
131 |
'de': {
|
132 |
'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
|
|
|
155 |
'specialized_in': '🎯 Spezialisiert auf:',
|
156 |
'metrics_analyzed': '📊 Analysierte Metriken:',
|
157 |
'what_analyzes': '🔍 Was spezifisch analysiert wird:',
|
158 |
+
'tips': '💡 Tipps für bessere Ergebnisse:',
|
159 |
+
'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
|
160 |
+
'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
|
161 |
},
|
162 |
'pt': {
|
163 |
'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
|
|
|
186 |
'specialized_in': '🎯 Especializado em:',
|
187 |
'metrics_analyzed': '📊 Métricas analisadas:',
|
188 |
'what_analyzes': '🔍 O que analisa especificamente:',
|
189 |
+
'tips': '💡 Dicas para melhores resultados:',
|
190 |
+
'additional_specs': '📝 Especificações adicionais para a análise',
|
191 |
+
'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
|
192 |
}
|
193 |
}
|
194 |
|
|
|
578 |
}
|
579 |
return prefixes.get(language, prefixes['en'])
|
580 |
|
581 |
+
def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
|
582 |
+
language: str = "en", additional_specs: str = "") -> Dict:
|
583 |
+
"""Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
|
584 |
|
585 |
# Preparar resumen completo de los datos
|
586 |
data_summary = f"""
|
|
|
603 |
# Obtener prefijo de idioma
|
604 |
lang_prefix = self.get_language_prompt_prefix(language)
|
605 |
|
606 |
+
# Agregar especificaciones adicionales del usuario si existen
|
607 |
+
user_specs_section = f"""
|
608 |
+
|
609 |
+
USER ADDITIONAL SPECIFICATIONS:
|
610 |
+
{additional_specs}
|
611 |
+
|
612 |
+
Please ensure to address these specific requirements in your analysis.
|
613 |
+
""" if additional_specs else ""
|
614 |
+
|
615 |
# Prompt mejorado con instrucciones específicas para cada nivel
|
616 |
if detail_level == "detailed":
|
617 |
prompt = f"""
|
|
|
619 |
|
620 |
You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
|
621 |
|
622 |
+
{user_specs_section}
|
623 |
|
624 |
+
DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
|
625 |
|
626 |
+
PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
|
|
|
|
|
|
|
|
|
627 |
|
628 |
+
1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
|
629 |
+
- List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
|
630 |
+
- For EACH experiment, identify:
|
631 |
+
* Experimental conditions
|
632 |
+
* Number of models tested
|
633 |
+
* Variables measured (biomass, substrate, product)
|
|
|
|
|
|
|
|
|
634 |
|
635 |
+
2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
|
636 |
+
For EACH EXPERIMENT separately:
|
637 |
+
- Identify ALL fitted mathematical models BY NAME
|
638 |
+
- Classify them: biomass growth, substrate consumption, product formation
|
639 |
+
- Show the mathematical equation of each model
|
640 |
+
- List parameter values obtained for that specific experiment
|
641 |
+
|
642 |
+
3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
|
643 |
+
Create a section for EACH EXPERIMENT showing:
|
644 |
+
|
645 |
+
**EXPERIMENT [Name/Condition]:**
|
646 |
+
|
647 |
+
a) **BIOMASS MODELS** (if applicable):
|
648 |
+
- Best model: [Name] with R²=[value], RMSE=[value]
|
649 |
+
- Parameters: μmax=[value], Xmax=[value], etc.
|
650 |
+
- Ranking of all biomass models tested
|
651 |
+
|
652 |
+
b) **SUBSTRATE MODELS** (if applicable):
|
653 |
+
- Best model: [Name] with R²=[value], RMSE=[value]
|
654 |
+
- Parameters: Ks=[value], Yxs=[value], etc.
|
655 |
+
- Ranking of all substrate models tested
|
656 |
+
|
657 |
+
c) **PRODUCT MODELS** (if applicable):
|
658 |
+
- Best model: [Name] with R²=[value], RMSE=[value]
|
659 |
+
- Parameters: α=[value], β=[value], etc.
|
660 |
+
- Ranking of all product models tested
|
661 |
|
662 |
+
4. **DETAILED COMPARATIVE TABLES**
|
|
|
|
|
|
|
|
|
|
|
663 |
|
664 |
+
**Table 1: Summary by Experiment and Variable Type**
|
665 |
+
| Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
|
666 |
+
|------------|----------|------------|-------|------|----------------|---------|
|
667 |
+
| Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
|
668 |
+
| Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
|
669 |
+
| Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
|
670 |
+
| Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
|
671 |
|
672 |
+
**Table 2: Complete Model Comparison Across All Experiments**
|
673 |
+
| Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
|
|
|
|
|
|
|
674 |
|
675 |
+
5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
|
676 |
+
- Compare how parameters change between experiments
|
677 |
+
- Identify trends (e.g., μmax increases with temperature)
|
678 |
+
- Calculate average parameters and variability
|
679 |
+
- Suggest optimal conditions based on parameters
|
680 |
|
681 |
+
6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
|
682 |
+
For each experiment, explain:
|
683 |
+
- What the parameter values mean biologically
|
684 |
+
- Whether values are realistic for the conditions
|
685 |
+
- Key differences between experiments
|
686 |
+
- Critical control parameters identified
|
687 |
|
688 |
+
7. **OVERALL BEST MODELS DETERMINATION**
|
689 |
+
- **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
|
690 |
+
- **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
|
691 |
+
- **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
|
692 |
+
|
693 |
+
Justify with numerical evidence from multiple experiments.
|
694 |
|
695 |
+
8. **CONCLUSIONS AND RECOMMENDATIONS**
|
696 |
+
- Which models are most robust across different conditions
|
697 |
+
- Specific models to use for each experimental condition
|
698 |
+
- Confidence intervals and prediction reliability
|
699 |
+
- Scale-up recommendations with specific values
|
700 |
|
701 |
+
Use Markdown format with clear structure. Include ALL numerical values from the data.
|
702 |
+
Create clear sections for EACH EXPERIMENT.
|
703 |
"""
|
704 |
else: # summarized
|
705 |
prompt = f"""
|
706 |
{lang_prefix}
|
707 |
|
708 |
+
You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
|
709 |
+
|
710 |
+
{user_specs_section}
|
711 |
|
712 |
+
DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
|
713 |
|
714 |
PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
|
715 |
|
716 |
+
1. **EXPERIMENTS OVERVIEW**
|
717 |
+
- Total experiments analyzed: [number]
|
718 |
+
- Conditions tested: [list]
|
719 |
+
- Variables measured: biomass/substrate/product
|
720 |
|
721 |
+
2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
|
722 |
+
|
723 |
+
📊 **EXPERIMENT 1 [Name/Condition]:**
|
724 |
+
- Biomass: [Model] (R²=[value])
|
725 |
+
- Substrate: [Model] (R²=[value])
|
726 |
+
- Product: [Model] (R²=[value])
|
727 |
|
728 |
+
📊 **EXPERIMENT 2 [Name/Condition]:**
|
729 |
+
- Biomass: [Model] (R²=[value])
|
730 |
+
- Substrate: [Model] (R²=[value])
|
731 |
+
- Product: [Model] (R²=[value])
|
732 |
+
|
733 |
+
[Continue for all experiments...]
|
734 |
|
735 |
+
3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
|
736 |
+
🏆 **Best Models Overall:**
|
737 |
+
- **Biomass**: [Model] - Best in [X]/[Y] experiments
|
738 |
+
- **Substrate**: [Model] - Average R²=[value]
|
739 |
+
- **Product**: [Model] - Most consistent performance
|
740 |
|
741 |
4. **QUICK COMPARISON TABLE**
|
742 |
+
| Experiment | Best Biomass | Best Substrate | Best Product | Overall R² |
|
743 |
+
|------------|--------------|----------------|--------------|------------|
|
744 |
+
| Exp1 | [Model] | [Model] | [Model] | [avg] |
|
745 |
+
| Exp2 | [Model] | [Model] | [Model] | [avg] |
|
746 |
+
|
747 |
+
5. **KEY FINDINGS**
|
748 |
+
- Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
|
749 |
+
- Best conditions identified: [specific values]
|
750 |
+
- Most robust models: [list with reasons]
|
751 |
|
752 |
+
6. **PRACTICAL RECOMMENDATIONS**
|
753 |
+
- For biomass prediction: Use [Model]
|
754 |
+
- For substrate monitoring: Use [Model]
|
755 |
+
- For product estimation: Use [Model]
|
756 |
+
- Critical parameters: [list with values]
|
757 |
|
758 |
+
Keep it concise but include ALL experiments and model names with their key metrics.
|
759 |
"""
|
760 |
|
761 |
try:
|
|
|
778 |
Generate Python code that:
|
779 |
|
780 |
1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
|
781 |
+
2. Implements analysis BY EXPERIMENT showing:
|
782 |
+
- Best models for each experiment
|
783 |
+
- Comparison across experiments
|
784 |
+
- Parameter evolution between conditions
|
785 |
+
3. Includes visualization functions that:
|
786 |
+
- Show results PER EXPERIMENT
|
787 |
+
- Compare models across experiments
|
788 |
+
- Display parameter trends
|
789 |
+
4. Shows the best model for biomass, substrate, and product separately
|
790 |
|
791 |
The code must include:
|
792 |
+
- Data loading with experiment identification
|
793 |
+
- Model comparison by experiment and variable type
|
794 |
+
- Visualization showing results per experiment
|
795 |
+
- Overall best model selection with justification
|
796 |
+
- Functions to predict using the best models for each category
|
797 |
|
798 |
+
Make sure to include comments indicating which model won for each variable type and why.
|
799 |
|
800 |
Format: Complete, executable Python code with actual data values embedded.
|
801 |
"""
|
|
|
827 |
except Exception as e:
|
828 |
return {"error": str(e)}
|
829 |
|
830 |
+
def process_files(files, claude_model: str, detail_level: str = "detailed",
|
831 |
+
language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
|
832 |
+
"""Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
|
833 |
processor = FileProcessor()
|
834 |
analyzer = AIAnalyzer(client, model_registry)
|
835 |
results = []
|
|
|
860 |
analysis_type = analyzer.detect_analysis_type(df)
|
861 |
|
862 |
if analysis_type == AnalysisType.FITTING_RESULTS:
|
863 |
+
result = analyzer.analyze_fitting_results(
|
864 |
+
df, claude_model, detail_level, language, additional_specs
|
865 |
+
)
|
866 |
|
867 |
if language == 'es':
|
868 |
results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
|
|
|
881 |
return analysis_text, code_text
|
882 |
|
883 |
def generate_implementation_code(analysis_results: str) -> str:
|
884 |
+
"""Genera código de implementación con análisis por experimento"""
|
885 |
code = """
|
886 |
import numpy as np
|
887 |
import pandas as pd
|
|
|
896 |
plt.style.use('seaborn-v0_8-darkgrid')
|
897 |
sns.set_palette("husl")
|
898 |
|
899 |
+
class ExperimentalModelAnalyzer:
|
900 |
\"\"\"
|
901 |
+
Class for comparative analysis of biotechnological models across multiple experiments.
|
902 |
+
Analyzes biomass, substrate and product models separately for each experimental condition.
|
903 |
\"\"\"
|
904 |
|
905 |
def __init__(self):
|
906 |
self.results_df = None
|
907 |
+
self.experiments = {}
|
908 |
+
self.best_models_by_experiment = {}
|
909 |
+
self.overall_best_models = {
|
910 |
+
'biomass': None,
|
911 |
+
'substrate': None,
|
912 |
+
'product': None
|
913 |
+
}
|
914 |
|
915 |
def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
|
916 |
\"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
|
|
|
925 |
print(f"✅ Data loaded: {len(self.results_df)} models")
|
926 |
print(f"📊 Available columns: {list(self.results_df.columns)}")
|
927 |
|
928 |
+
# Identify experiments
|
929 |
+
if 'Experiment' in self.results_df.columns:
|
930 |
+
self.experiments = self.results_df.groupby('Experiment').groups
|
931 |
+
print(f"🧪 Experiments found: {list(self.experiments.keys())}")
|
932 |
+
|
933 |
return self.results_df
|
934 |
|
935 |
+
def analyze_by_experiment(self,
|
936 |
+
experiment_col: str = 'Experiment',
|
937 |
+
model_col: str = 'Model',
|
938 |
+
type_col: str = 'Type',
|
939 |
r2_col: str = 'R2',
|
940 |
+
rmse_col: str = 'RMSE') -> Dict:
|
|
|
|
|
|
|
941 |
\"\"\"
|
942 |
+
Analyze models by experiment and variable type.
|
943 |
+
Identifies best models for biomass, substrate, and product in each experiment.
|
944 |
\"\"\"
|
945 |
if self.results_df is None:
|
946 |
raise ValueError("First load data with load_results()")
|
947 |
|
948 |
+
results_by_exp = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
949 |
|
950 |
+
# Get unique experiments
|
951 |
+
if experiment_col in self.results_df.columns:
|
952 |
+
experiments = self.results_df[experiment_col].unique()
|
953 |
+
else:
|
954 |
+
experiments = ['All_Data']
|
955 |
+
self.results_df[experiment_col] = 'All_Data'
|
956 |
|
957 |
+
print("\\n" + "="*80)
|
958 |
+
print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
|
959 |
+
print("="*80)
|
960 |
|
961 |
+
for exp in experiments:
|
962 |
+
print(f"\\n🧪 EXPERIMENT: {exp}")
|
963 |
+
print("-"*50)
|
964 |
+
|
965 |
+
exp_data = self.results_df[self.results_df[experiment_col] == exp]
|
966 |
+
results_by_exp[exp] = {}
|
967 |
+
|
968 |
+
# Analyze by variable type if available
|
969 |
+
if type_col in exp_data.columns:
|
970 |
+
var_types = exp_data[type_col].unique()
|
971 |
+
|
972 |
+
for var_type in var_types:
|
973 |
+
var_data = exp_data[exp_data[type_col] == var_type]
|
974 |
+
|
975 |
+
if not var_data.empty:
|
976 |
+
# Find best model for this variable type
|
977 |
+
best_idx = var_data[r2_col].idxmax()
|
978 |
+
best_model = var_data.loc[best_idx]
|
979 |
+
|
980 |
+
results_by_exp[exp][var_type] = {
|
981 |
+
'best_model': best_model[model_col],
|
982 |
+
'r2': best_model[r2_col],
|
983 |
+
'rmse': best_model[rmse_col],
|
984 |
+
'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
|
985 |
+
}
|
986 |
+
|
987 |
+
print(f"\\n 📈 {var_type.upper()}:")
|
988 |
+
print(f" Best Model: {best_model[model_col]}")
|
989 |
+
print(f" R² = {best_model[r2_col]:.4f}")
|
990 |
+
print(f" RMSE = {best_model[rmse_col]:.4f}")
|
991 |
+
|
992 |
+
# Show all models for this variable
|
993 |
+
print(f"\\n All {var_type} models tested:")
|
994 |
+
for _, row in var_data.iterrows():
|
995 |
+
print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
|
996 |
+
else:
|
997 |
+
# If no type column, analyze all models together
|
998 |
+
best_idx = exp_data[r2_col].idxmax()
|
999 |
+
best_model = exp_data.loc[best_idx]
|
1000 |
+
|
1001 |
+
results_by_exp[exp]['all'] = {
|
1002 |
+
'best_model': best_model[model_col],
|
1003 |
+
'r2': best_model[r2_col],
|
1004 |
+
'rmse': best_model[rmse_col],
|
1005 |
+
'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
|
1006 |
+
}
|
1007 |
|
1008 |
+
self.best_models_by_experiment = results_by_exp
|
|
|
|
|
|
|
|
|
|
|
|
|
1009 |
|
1010 |
+
# Determine overall best models
|
1011 |
+
self._determine_overall_best_models()
|
|
|
1012 |
|
1013 |
+
return results_by_exp
|
1014 |
+
|
1015 |
+
def _determine_overall_best_models(self):
|
1016 |
+
\"\"\"Determine the best models across all experiments\"\"\"
|
1017 |
print("\\n" + "="*80)
|
1018 |
+
print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
|
1019 |
print("="*80)
|
1020 |
|
1021 |
+
# Aggregate performance by model and type
|
1022 |
+
model_performance = {}
|
1023 |
+
|
1024 |
+
for exp, exp_results in self.best_models_by_experiment.items():
|
1025 |
+
for var_type, var_results in exp_results.items():
|
1026 |
+
if var_type not in model_performance:
|
1027 |
+
model_performance[var_type] = {}
|
1028 |
+
|
1029 |
+
for model_data in var_results['all_models']:
|
1030 |
+
model_name = model_data['Model']
|
1031 |
+
if model_name not in model_performance[var_type]:
|
1032 |
+
model_performance[var_type][model_name] = {
|
1033 |
+
'r2_values': [],
|
1034 |
+
'rmse_values': [],
|
1035 |
+
'experiments': []
|
1036 |
+
}
|
1037 |
+
|
1038 |
+
model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
|
1039 |
+
model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
|
1040 |
+
model_performance[var_type][model_name]['experiments'].append(exp)
|
1041 |
+
|
1042 |
+
# Calculate average performance and select best
|
1043 |
+
for var_type, models in model_performance.items():
|
1044 |
+
best_avg_r2 = -1
|
1045 |
+
best_model = None
|
1046 |
|
1047 |
+
print(f"\\n📊 {var_type.upper()} MODELS:")
|
1048 |
+
for model_name, perf_data in models.items():
|
1049 |
+
avg_r2 = np.mean(perf_data['r2_values'])
|
1050 |
+
avg_rmse = np.mean(perf_data['rmse_values'])
|
1051 |
+
n_exp = len(perf_data['experiments'])
|
1052 |
+
|
1053 |
+
print(f" {model_name}:")
|
1054 |
+
print(f" Average R² = {avg_r2:.4f}")
|
1055 |
+
print(f" Average RMSE = {avg_rmse:.4f}")
|
1056 |
+
print(f" Tested in {n_exp} experiments")
|
1057 |
+
|
1058 |
+
if avg_r2 > best_avg_r2:
|
1059 |
+
best_avg_r2 = avg_r2
|
1060 |
+
best_model = {
|
1061 |
+
'name': model_name,
|
1062 |
+
'avg_r2': avg_r2,
|
1063 |
+
'avg_rmse': avg_rmse,
|
1064 |
+
'n_experiments': n_exp
|
1065 |
+
}
|
1066 |
+
|
1067 |
+
if var_type.lower() in ['biomass', 'substrate', 'product']:
|
1068 |
+
self.overall_best_models[var_type.lower()] = best_model
|
1069 |
+
print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
|
1070 |
|
1071 |
+
def create_comparison_visualizations(self):
|
1072 |
+
\"\"\"Create visualizations comparing models across experiments\"\"\"
|
1073 |
+
if not self.best_models_by_experiment:
|
1074 |
+
raise ValueError("First run analyze_by_experiment()")
|
1075 |
+
|
1076 |
+
# Prepare data for visualization
|
1077 |
+
experiments = []
|
1078 |
+
biomass_r2 = []
|
1079 |
+
substrate_r2 = []
|
1080 |
+
product_r2 = []
|
1081 |
+
|
1082 |
+
for exp, results in self.best_models_by_experiment.items():
|
1083 |
+
experiments.append(exp)
|
1084 |
+
biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
|
1085 |
+
substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
|
1086 |
+
product_r2.append(results.get('Product', {}).get('r2', 0))
|
1087 |
+
|
1088 |
+
# Create figure with subplots
|
1089 |
+
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
1090 |
+
fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
|
1091 |
+
|
1092 |
+
# 1. R² comparison by experiment and variable type
|
1093 |
+
ax1 = axes[0, 0]
|
1094 |
+
x = np.arange(len(experiments))
|
1095 |
+
width = 0.25
|
1096 |
|
1097 |
+
ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
|
1098 |
+
ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
|
1099 |
+
ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
|
1100 |
|
1101 |
+
ax1.set_xlabel('Experiment')
|
|
|
|
|
|
|
|
|
|
|
1102 |
ax1.set_ylabel('R²')
|
1103 |
+
ax1.set_title('Best Model R² by Experiment and Variable Type')
|
1104 |
+
ax1.set_xticks(x)
|
1105 |
+
ax1.set_xticklabels(experiments, rotation=45, ha='right')
|
|
|
1106 |
ax1.legend()
|
1107 |
+
ax1.grid(True, alpha=0.3)
|
1108 |
|
1109 |
+
# Add value labels
|
1110 |
+
for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
|
1111 |
+
if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
|
1112 |
+
if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
|
1113 |
+
if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
|
1114 |
|
1115 |
+
# 2. Model frequency heatmap
|
1116 |
ax2 = axes[0, 1]
|
1117 |
+
# This would show which models appear most frequently as best
|
1118 |
+
# Implementation depends on actual data structure
|
1119 |
+
ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
|
1120 |
+
ha='center', va='center', transform=ax2.transAxes)
|
1121 |
+
ax2.set_title('Most Frequently Selected Models')
|
1122 |
+
|
1123 |
+
# 3. Parameter evolution across experiments
|
|
|
|
|
|
|
|
|
|
|
|
|
1124 |
ax3 = axes[1, 0]
|
1125 |
+
ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
|
1126 |
+
ha='center', va='center', transform=ax3.transAxes)
|
1127 |
+
ax3.set_title('Parameter Trends')
|
1128 |
+
|
1129 |
+
# 4. Overall best models summary
|
|
|
|
|
|
|
|
|
1130 |
ax4 = axes[1, 1]
|
1131 |
+
ax4.axis('off')
|
1132 |
+
|
1133 |
+
summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
|
1134 |
+
for var_type, model_info in self.overall_best_models.items():
|
1135 |
+
if model_info:
|
1136 |
+
summary_text += f"{var_type.upper()}:\\n"
|
1137 |
+
summary_text += f" Model: {model_info['name']}\\n"
|
1138 |
+
summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
|
1139 |
+
summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
|
1140 |
+
|
1141 |
+
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
|
1142 |
+
fontsize=12, verticalalignment='top', fontfamily='monospace')
|
1143 |
+
ax4.set_title('Overall Best Models Summary')
|
|
|
|
|
|
|
1144 |
|
1145 |
plt.tight_layout()
|
1146 |
plt.show()
|
1147 |
+
|
1148 |
+
def generate_summary_table(self) -> pd.DataFrame:
|
1149 |
+
\"\"\"Generate a summary table of best models by experiment and type\"\"\"
|
1150 |
+
summary_data = []
|
1151 |
+
|
1152 |
+
for exp, results in self.best_models_by_experiment.items():
|
1153 |
+
for var_type, var_results in results.items():
|
1154 |
+
summary_data.append({
|
1155 |
+
'Experiment': exp,
|
1156 |
+
'Variable_Type': var_type,
|
1157 |
+
'Best_Model': var_results['best_model'],
|
1158 |
+
'R2': var_results['r2'],
|
1159 |
+
'RMSE': var_results['rmse']
|
1160 |
+
})
|
1161 |
+
|
1162 |
+
summary_df = pd.DataFrame(summary_data)
|
1163 |
+
|
1164 |
+
print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
|
1165 |
+
print("="*80)
|
1166 |
+
print(summary_df.to_string(index=False))
|
1167 |
+
|
1168 |
+
return summary_df
|
1169 |
|
1170 |
+
# Example usage
|
1171 |
if __name__ == "__main__":
|
1172 |
+
print("🧬 Experimental Model Comparison System")
|
1173 |
print("="*60)
|
1174 |
|
1175 |
+
# Example data structure with experiments
|
1176 |
example_data = {
|
1177 |
+
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
|
1178 |
+
'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
|
1179 |
+
'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
|
1180 |
+
'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
|
1181 |
+
'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
|
1182 |
+
'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
|
1183 |
+
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
|
1184 |
+
'Substrate', 'Substrate', 'Substrate', 'Substrate',
|
1185 |
+
'Product', 'Product', 'Product', 'Product'],
|
1186 |
+
'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
|
1187 |
+
0.9723, 0.9856, 0.9698, 0.9812,
|
1188 |
+
0.9634, 0.9512, 0.9687, 0.9423],
|
1189 |
+
'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
|
1190 |
+
0.0312, 0.0245, 0.0334, 0.0289,
|
1191 |
+
0.0412, 0.0523, 0.0389, 0.0567],
|
1192 |
+
'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
|
1193 |
+
None, None, None, None, None, None, None, None],
|
1194 |
+
'Ks': [None, None, None, None, None, None,
|
1195 |
+
2.1, 1.8, 2.3, 1.9, None, None, None, None]
|
1196 |
}
|
1197 |
|
1198 |
# Create analyzer
|
1199 |
+
analyzer = ExperimentalModelAnalyzer()
|
1200 |
|
1201 |
# Load data
|
1202 |
analyzer.load_results(data_dict=example_data)
|
1203 |
|
1204 |
+
# Analyze by experiment
|
1205 |
+
results = analyzer.analyze_by_experiment()
|
1206 |
+
|
1207 |
+
# Create visualizations
|
1208 |
+
analyzer.create_comparison_visualizations()
|
1209 |
|
1210 |
+
# Generate summary table
|
1211 |
+
summary = analyzer.generate_summary_table()
|
1212 |
|
1213 |
+
print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
|
1214 |
"""
|
1215 |
|
1216 |
return code
|
|
|
1270 |
gr.update(label=t['select_language']), # language_selector
|
1271 |
gr.update(label=t['select_theme']), # theme_selector
|
1272 |
gr.update(label=t['detail_level']), # detail_level
|
1273 |
+
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
|
1274 |
gr.update(value=t['analyze_button']), # analyze_btn
|
1275 |
gr.update(label=t['export_format']), # export_format
|
1276 |
gr.update(value=t['export_button']), # export_btn
|
|
|
1279 |
gr.update(label=t['data_format']) # data_format_accordion
|
1280 |
]
|
1281 |
|
1282 |
+
def process_and_store(files, model, detail, language, additional_specs):
|
1283 |
"""Procesa archivos y almacena resultados"""
|
1284 |
if not files:
|
1285 |
error_msg = TRANSLATIONS[language]['error_no_files']
|
1286 |
return error_msg, ""
|
1287 |
|
1288 |
+
analysis, code = process_files(files, model, detail, language, additional_specs)
|
1289 |
app_state.current_analysis = analysis
|
1290 |
app_state.current_code = code
|
1291 |
return analysis, code
|
|
|
1337 |
label=TRANSLATIONS[current_language]['detail_level']
|
1338 |
)
|
1339 |
|
1340 |
+
# Nueva entrada para especificaciones adicionales
|
1341 |
+
additional_specs = gr.Textbox(
|
1342 |
+
label=TRANSLATIONS[current_language]['additional_specs'],
|
1343 |
+
placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
|
1344 |
+
lines=3,
|
1345 |
+
max_lines=5,
|
1346 |
+
interactive=True
|
1347 |
+
)
|
1348 |
+
|
1349 |
analyze_btn = gr.Button(
|
1350 |
TRANSLATIONS[current_language]['analyze_button'],
|
1351 |
variant="primary",
|
|
|
1397 |
gr.Markdown("""
|
1398 |
### Expected CSV/Excel structure:
|
1399 |
|
1400 |
+
| Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
|
1401 |
+
|------------|-------|------|-----|------|-----|-----|--------|-------|------------|
|
1402 |
+
| pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
|
1403 |
+
| pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
|
1404 |
+
| pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
|
1405 |
+
| pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
|
1406 |
+
|
1407 |
+
**Important columns:**
|
1408 |
+
- **Experiment**: Experimental condition identifier
|
1409 |
+
- **Model**: Model name
|
1410 |
+
- **Type**: Variable type (Biomass/Substrate/Product)
|
1411 |
+
- **R2, RMSE**: Fit quality metrics
|
1412 |
+
- **Parameters**: Model-specific parameters
|
1413 |
""")
|
1414 |
|
1415 |
# Definir ejemplos
|
1416 |
examples = gr.Examples(
|
1417 |
examples=[
|
1418 |
+
[["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed", ""],
|
1419 |
+
[["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized", "Focus on temperature effects"]
|
1420 |
],
|
1421 |
+
inputs=[files_input, model_selector, detail_level, additional_specs],
|
1422 |
label=TRANSLATIONS[current_language]['examples']
|
1423 |
)
|
1424 |
|
1425 |
+
# Eventos - Actualizado para incluir additional_specs
|
1426 |
language_selector.change(
|
1427 |
update_interface_language,
|
1428 |
inputs=[language_selector],
|
1429 |
outputs=[
|
1430 |
title_text, subtitle_text, files_input, model_selector,
|
1431 |
+
language_selector, theme_selector, detail_level, additional_specs,
|
1432 |
+
analyze_btn, export_format, export_btn, analysis_output,
|
1433 |
+
code_output, data_format_accordion
|
1434 |
]
|
1435 |
)
|
1436 |
|
|
|
1448 |
|
1449 |
analyze_btn.click(
|
1450 |
fn=process_and_store,
|
1451 |
+
inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
|
1452 |
outputs=[analysis_output, code_output]
|
1453 |
)
|
1454 |
|