C2MV commited on
Commit
af1d89d
·
verified ·
1 Parent(s): 1cac78f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1418 -242
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
@@ -8,352 +8,1528 @@ import os
8
  import json
9
  import zipfile
10
  import tempfile
11
- from typing import Dict, List, Tuple, Union
 
12
  from pathlib import Path
 
 
 
13
  from docx import Document
14
- from docx.shared import Pt
 
15
  from reportlab.lib import colors
16
- from reportlab.lib.pagesizes import letter
17
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
18
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
19
  from reportlab.lib.units import inch
 
 
 
20
  from datetime import datetime
21
 
22
  # Configuración para HuggingFace
23
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
24
 
25
- # Inicializar cliente OpenAI para Nebius
 
 
 
 
 
 
26
  client = OpenAI(
27
- base_url="https://api.studio.nebius.com/v1/",
28
  api_key=os.environ.get("NEBIUS_API_KEY")
29
  )
30
 
31
- # Sistema de traducción
32
  TRANSLATIONS = {
33
  'en': {
34
- 'title': '🧬 API-Powered Biotechnological Model Analyzer',
35
- 'subtitle': 'Upload your model fitting results and let the AI perform a complete comparative analysis.',
36
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
37
- 'select_model': '🤖 AI Model',
38
  'select_language': '🌐 Language',
39
- 'detail_level': '📋 Analysis Detail Level',
 
40
  'detailed': 'Detailed',
41
  'summarized': 'Summarized',
42
- 'analyze_button': '🚀 Analyze with AI',
43
- 'export_format': '📄 Export Format',
44
- 'export_button': '💾 Export Analysis',
45
- 'comparative_analysis': '📊 AI-Generated Analysis',
46
- 'implementation_code': '💻 AI-Generated Implementation Code',
47
- 'data_format': '📋 Expected Data Format',
48
- 'error_no_api': 'Please configure NEBIUS_API_KEY in HuggingFace Space secrets',
 
 
 
 
 
49
  'error_no_files': 'Please upload fitting result files to analyze',
50
  'report_exported': 'Report exported successfully as',
51
- 'additional_specs': '📝 Additional Specifications for Analysis',
52
- 'additional_specs_placeholder': 'e.g., "Focus on the effect of temperature" or "Provide scale-up recommendations"...'
 
 
 
 
53
  },
54
  'es': {
55
- 'title': '🧬 Analizador Biotecnológico Impulsado por API',
56
- 'subtitle': 'Sube los resultados de ajuste de tus modelos y deja que la IA realice un análisis comparativo completo.',
57
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
58
- 'select_model': '🤖 Modelo de IA',
59
  'select_language': '🌐 Idioma',
60
- 'detail_level': '📋 Nivel de Detalle del Análisis',
 
61
  'detailed': 'Detallado',
62
  'summarized': 'Resumido',
63
- 'analyze_button': '🚀 Analizar con IA',
64
- 'export_format': '📄 Formato de Exportación',
65
- 'export_button': '💾 Exportar Análisis',
66
- 'comparative_analysis': '📊 Análisis Generado por IA',
67
- 'implementation_code': '💻 Código de Implementación Generado por IA',
68
- 'data_format': '📋 Formato de Datos Esperado',
69
- 'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos del Space',
 
 
 
 
 
70
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
71
  'report_exported': 'Reporte exportado exitosamente como',
72
- 'additional_specs': '📝 Especificaciones Adicionales para el Análisis',
73
- 'additional_specs_placeholder': 'Ej: "Enfócate en el efecto de la temperatura" o "Provee recomendaciones de escalado"...'
 
 
 
 
74
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  }
76
 
77
- # Modelos de Nebius disponibles
78
- NEBIUS_MODELS = {
79
- "Qwen/Qwen3-14B": {
80
- "name": "Qwen 3 (14B)",
81
- "description": "Modelo potente y versátil de la familia Qwen.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }
84
 
85
  class FileProcessor:
 
 
86
  @staticmethod
87
- def read_csv(csv_file: bytes) -> pd.DataFrame:
88
- try: return pd.read_csv(io.BytesIO(csv_file))
89
- except Exception: return None
 
 
 
 
 
 
 
90
 
91
  @staticmethod
92
- def read_excel(excel_file: bytes) -> pd.DataFrame:
93
- try: return pd.read_excel(io.BytesIO(excel_file))
94
- except Exception: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  class ReportExporter:
 
 
97
  @staticmethod
98
- def export_to_docx(content: str, filename: str, language: str = 'en'):
 
99
  doc = Document()
100
- doc.add_heading(TRANSLATIONS[language]['title'], 0)
101
- doc.add_paragraph(f"{TRANSLATIONS[language]['report_exported'].split(' as')[0]}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  doc.add_paragraph()
103
- for line in content.split('\n'):
104
- if line.startswith('### '): doc.add_heading(line[4:], level=3)
105
- elif line.startswith('## '): doc.add_heading(line[3:], level=2)
106
- elif line.startswith('# '): doc.add_heading(line[2:], level=1)
107
- else: doc.add_paragraph(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  doc.save(filename)
109
  return filename
110
-
111
  @staticmethod
112
- def export_to_pdf(content: str, filename: str, language: str = 'en'):
 
 
113
  doc = SimpleDocTemplate(filename, pagesize=letter)
 
114
  styles = getSampleStyleSheet()
115
- story = [Paragraph(TRANSLATIONS[language]['title'], styles['h1'])]
116
- for line in content.split('\n'):
117
- if line.startswith('### '): story.append(Paragraph(line[4:], styles['h3']))
118
- elif line.startswith('## '): story.append(Paragraph(line[3:], styles['h2']))
119
- elif line.startswith('# '): story.append(Paragraph(line[2:], styles['h1']))
120
- else: story.append(Paragraph(line, styles['BodyText']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  doc.build(story)
122
  return filename
123
 
124
  class AIAnalyzer:
125
- """
126
- Clase que interactúa exclusivamente con la API para obtener análisis y código.
127
- No contiene lógica de análisis predefinida.
128
- """
129
- def __init__(self, client):
130
  self.client = client
131
-
132
- def get_analysis_and_code(self, data: pd.DataFrame, model: str, detail_level: str, language: str, additional_specs: str) -> Dict[str, str]:
133
- """
134
- Realiza una única llamada a la API para obtener tanto el análisis
135
- como el código de implementación en un formato JSON.
136
- """
137
- lang_instruction = TRANSLATIONS[language]['additional_specs_placeholder'] # Reutilizamos un texto traducido
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # Prompt unificado que solicita una respuesta JSON con dos claves
140
- prompt = f"""
141
- Act as an expert in biotechnology and data science. Your task is to analyze the provided model fitting results and generate both a textual analysis and a Python implementation script.
142
-
143
- The user has provided the following data from a CSV/Excel file:
144
- --- DATA ---
145
- {data.to_string()}
146
- --- END DATA ---
147
-
148
- User requirements:
149
- - Language for the analysis: {language}
150
- - Detail level: {detail_level}
151
- - Additional specifications: "{additional_specs if additional_specs else 'None'}"
152
-
153
- Based on all the information above, perform the following two tasks:
154
-
155
- TASK 1: GENERATE TEXTUAL ANALYSIS
156
- Write a comprehensive comparative analysis in Markdown format.
157
- - If detail_level is 'detailed', provide an in-depth, experiment-by-experiment comparison, parameter analysis, biological interpretation, and robust conclusions.
158
- - If detail_level is 'summarized', provide a concise overview, highlight the best models per experiment, and give clear, practical recommendations.
159
- - The analysis MUST be in {language}.
160
-
161
- TASK 2: GENERATE PYTHON CODE
162
- Write a complete, executable Python script that a researcher can use to replicate and visualize this analysis.
163
- - The script should include data loading (embed the provided data directly).
164
- - It must contain functions to compare models and find the best ones.
165
- - It must include plotting functions (using matplotlib or seaborn) to visualize the results, such as comparing R² values across experiments.
166
- - The code should be well-commented.
167
-
168
- IMPORTANT: Your final output must be a single, valid JSON object containing two keys: "analysis" and "code".
169
- Example format:
170
- {{
171
- "analysis": "### Comparative Analysis\\n\\nHere is the detailed analysis in Markdown...",
172
- "code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Your Python code here..."
173
- }}
174
-
175
- Do not add any text or explanations outside of the JSON object.
176
  """
177
-
178
  try:
 
179
  response = self.client.chat.completions.create(
180
- model=model,
181
  temperature=0.6,
182
  top_p=0.95,
183
- max_tokens=4000, # Usar un valor alto para permitir respuestas completas
184
- messages=[{"role": "user", "content": prompt}]
185
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- raw_response_text = response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- # Intentar parsear la respuesta JSON
190
- try:
191
- # Limpiar el texto para asegurar que sea un JSON válido
192
- json_text = raw_response_text[raw_response_text.find('{'):raw_response_text.rfind('}')+1]
193
- parsed_json = json.loads(json_text)
194
- return {
195
- "analysis": parsed_json.get("analysis", "API did not return an analysis."),
196
- "code": parsed_json.get("code", "# API did not return code.")
197
- }
198
- except (json.JSONDecodeError, IndexError):
199
- # Si falla el parseo, devolver el texto crudo como análisis
200
- return {
201
- "analysis": f"API returned a non-JSON response:\n\n{raw_response_text}",
202
- "code": "# Could not parse API response to extract code."
203
- }
204
-
205
- except Exception as e:
206
- error_message = f"An error occurred while calling the API: {str(e)}"
207
  return {
208
- "analysis": error_message,
209
- "code": f"# {error_message}"
 
 
 
 
 
 
 
 
 
 
210
  }
 
 
 
211
 
212
- def process_files(files: List, model: str, detail_level: str, language: str, additional_specs: str) -> Tuple[str, str]:
213
- """
214
- Procesa los archivos subidos, llama al analizador de IA y devuelve los resultados.
215
- """
216
- if not files:
217
- return TRANSLATIONS[language]['error_no_files'], ""
218
-
219
  processor = FileProcessor()
220
- analyzer = AIAnalyzer(client)
 
 
221
 
222
- # Por simplicidad, se procesa solo el primer archivo válido
223
- full_analysis = []
224
- full_code = []
225
-
226
  for file in files:
227
- if file is None: continue
228
-
229
- file_name = file.name
 
230
  file_ext = Path(file_name).suffix.lower()
231
 
232
  with open(file.name, 'rb') as f:
233
  file_content = f.read()
234
 
235
- df = None
236
- if file_ext == '.csv':
237
- df = processor.read_csv(file_content)
238
- elif file_ext in ['.xlsx', '.xls']:
239
- df = processor.read_excel(file_content)
240
-
241
- if df is not None:
242
- full_analysis.append(f"# Analysis for: {file_name}")
243
- api_result = analyzer.get_analysis_and_code(df, model, detail_level, language, additional_specs)
244
- full_analysis.append(api_result.get("analysis", ""))
245
- full_code.append(f"# Code generated for: {file_name}\n" + api_result.get("code", ""))
246
- # Rompemos el bucle para analizar solo un archivo a la vez y evitar confusión
247
- break
248
-
249
- if not full_analysis:
250
- return "No valid CSV/Excel files found to analyze.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- return "\n\n".join(full_analysis), "\n\n".join(full_code)
 
 
 
 
 
 
 
 
 
 
253
 
254
- # --- Interfaz de Gradio ---
255
- def create_interface():
256
- current_language = "es"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- def update_language(language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  t = TRANSLATIONS[language]
 
260
  return [
261
- gr.update(value=f"# {t['title']}"), gr.update(value=t['subtitle']),
262
- gr.update(label=t['upload_files']), gr.update(label=t['select_model']),
263
- gr.update(label=t['select_language']), gr.update(label=t['detail_level']),
264
- gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']),
265
- gr.update(value=t['analyze_button']), gr.update(label=t['export_format']),
266
- gr.update(value=t['export_button']), gr.update(label=t['comparative_analysis']),
267
- gr.update(label=t['implementation_code']), gr.update(label=t['data_format'])
 
 
 
 
 
 
 
268
  ]
269
-
270
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
271
- app_state = gr.State({"analysis": "", "code": "", "language": "es"})
272
-
273
- title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
274
- subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
275
 
 
 
 
 
 
 
 
276
  with gr.Row():
 
 
 
277
  with gr.Column(scale=1):
278
- files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
279
-
280
- default_model = "Qwen/Qwen3-14B"
281
- model_selector = gr.Dropdown(choices=list(NEBIUS_MODELS.keys()), value=default_model, label=TRANSLATIONS[current_language]['select_model'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- detail_level_selector = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
 
 
 
 
 
284
 
285
- additional_specs_input = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3)
 
 
 
 
 
 
 
286
 
287
- language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="es", label=TRANSLATIONS[current_language]['select_language'])
 
 
 
 
 
 
 
288
 
289
- analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary")
 
 
 
 
290
 
291
  gr.Markdown("---")
292
 
293
- export_format_selector = gr.Radio(choices=["DOCX", "PDF"], value="PDF", label=TRANSLATIONS[current_language]['export_format'])
294
- export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'])
295
- export_file_output = gr.File(label="Download Report", visible=False)
296
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  with gr.Column(scale=2):
298
- analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
299
- code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True)
300
-
301
- def run_analysis(files, model, detail, lang, specs, state):
302
- analysis, code = process_files(files, model, detail, lang, specs)
303
- state["analysis"] = analysis
304
- state["code"] = code
305
- state["language"] = lang
306
- return analysis, code, state
307
-
308
- def run_export(state, format):
309
- if not state["analysis"]:
310
- return gr.update(visible=False)
 
 
 
 
 
 
311
 
312
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
313
- ext = "docx" if format == "DOCX" else "pdf"
314
- filename = f"analysis_report_{timestamp}.{ext}"
 
 
 
315
 
316
- if format == "DOCX":
317
- ReportExporter.export_to_docx(state["analysis"], filename, state["language"])
318
- else:
319
- ReportExporter.export_to_pdf(state["analysis"], filename, state["language"])
320
-
321
- return gr.update(value=filename, visible=True)
322
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  analyze_btn.click(
324
- fn=run_analysis,
325
- inputs=[files_input, model_selector, detail_level_selector, language_selector, additional_specs_input, app_state],
326
- outputs=[analysis_output, code_output, app_state]
327
  )
328
 
 
 
 
 
 
 
 
329
  export_btn.click(
330
- fn=run_export,
331
- inputs=[app_state, export_format_selector],
332
- outputs=[export_file_output]
333
  )
334
-
335
  return demo
336
 
 
337
  def main():
338
- if not os.getenv("NEBIUS_API_KEY"):
339
- print("⚠️ NEBIUS_API_KEY not found. Please set it as an environment variable.")
340
- return gr.Interface(fn=lambda: TRANSLATIONS['en']['error_no_api'], inputs=[], outputs="text", title="Configuration Error")
 
 
 
 
 
341
 
342
  return create_interface()
343
 
 
344
  if __name__ == "__main__":
345
- # Crear archivos de ejemplo para Gradio si no existen
346
- if not os.path.exists("examples"):
347
- os.makedirs("examples")
348
- if not os.path.exists("examples/biomass_models_comparison.csv"):
349
- pd.DataFrame({
350
- 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
351
- 'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'],
352
- 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'],
353
- 'R2': [0.98, 0.99, 0.97, 0.985],
354
- 'RMSE': [0.02, 0.01, 0.03, 0.015]
355
- }).to_csv("examples/biomass_models_comparison.csv", index=False)
356
-
357
  demo = main()
358
  if demo:
359
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
  import gradio as gr
2
+ #import anthropic
3
  import PyPDF2
4
  import pandas as pd
5
  import numpy as np
 
8
  import json
9
  import zipfile
10
  import tempfile
11
+ from typing import Dict, List, Tuple, Union, Optional
12
+ import re
13
  from pathlib import Path
14
+ import openpyxl
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
  from docx import Document
18
+ from docx.shared import Inches, Pt, RGBColor
19
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
20
  from reportlab.lib import colors
21
+ from reportlab.lib.pagesizes import letter, A4
22
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
+ from reportlab.pdfbase import pdfmetrics
26
+ from reportlab.pdfbase.ttfonts import TTFont
27
+ import matplotlib.pyplot as plt
28
  from datetime import datetime
29
 
30
  # Configuración para HuggingFace
31
  os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
32
 
33
+ # Inicializar cliente Anthropic
34
+ #client = anthropic.Anthropic()
35
+
36
+ # Inicializar cliente Nebius
37
+ import os
38
+ from openai import OpenAI
39
+
40
  client = OpenAI(
41
+ base_url="https://api.studio.nebius.com/v1/",
42
  api_key=os.environ.get("NEBIUS_API_KEY")
43
  )
44
 
45
+ # Sistema de traducción - Actualizado con nuevas entradas
46
  TRANSLATIONS = {
47
  'en': {
48
+ 'title': '🧬 Comparative Analyzer of Biotechnological Models',
49
+ 'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
50
  'upload_files': '📁 Upload fitting results (CSV/Excel)',
51
+ 'select_model': '🤖 Claude Model',
52
  'select_language': '🌐 Language',
53
+ 'select_theme': '🎨 Theme',
54
+ 'detail_level': '📋 Analysis detail level',
55
  'detailed': 'Detailed',
56
  'summarized': 'Summarized',
57
+ 'analyze_button': '🚀 Analyze and Compare Models',
58
+ 'export_format': '📄 Export format',
59
+ 'export_button': '💾 Export Report',
60
+ 'comparative_analysis': '📊 Comparative Analysis',
61
+ 'implementation_code': '💻 Implementation Code',
62
+ 'data_format': '📋 Expected data format',
63
+ 'examples': '📚 Analysis examples',
64
+ 'light': 'Light',
65
+ 'dark': 'Dark',
66
+ 'best_for': 'Best for',
67
+ 'loading': 'Loading...',
68
+ 'error_no_api': 'Please configure ANTHROPIC_API_KEY in HuggingFace Space secrets',
69
  'error_no_files': 'Please upload fitting result files to analyze',
70
  'report_exported': 'Report exported successfully as',
71
+ 'specialized_in': '🎯 Specialized in:',
72
+ 'metrics_analyzed': '📊 Analyzed metrics:',
73
+ 'what_analyzes': '🔍 What it specifically analyzes:',
74
+ 'tips': '💡 Tips for better results:',
75
+ 'additional_specs': '📝 Additional specifications for analysis',
76
+ 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
77
  },
78
  'es': {
79
+ 'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
80
+ 'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
81
  'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
82
+ 'select_model': '🤖 Modelo Claude',
83
  'select_language': '🌐 Idioma',
84
+ 'select_theme': '🎨 Tema',
85
+ 'detail_level': '📋 Nivel de detalle del análisis',
86
  'detailed': 'Detallado',
87
  'summarized': 'Resumido',
88
+ 'analyze_button': '🚀 Analizar y Comparar Modelos',
89
+ 'export_format': '📄 Formato de exportación',
90
+ 'export_button': '💾 Exportar Reporte',
91
+ 'comparative_analysis': '📊 Análisis Comparativo',
92
+ 'implementation_code': '💻 Código de Implementación',
93
+ 'data_format': '📋 Formato de datos esperado',
94
+ 'examples': '📚 Ejemplos de análisis',
95
+ 'light': 'Claro',
96
+ 'dark': 'Oscuro',
97
+ 'best_for': 'Mejor para',
98
+ 'loading': 'Cargando...',
99
+ 'error_no_api': 'Por favor configura ANTHROPIC_API_KEY en los secretos del Space',
100
  'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
101
  'report_exported': 'Reporte exportado exitosamente como',
102
+ 'specialized_in': '🎯 Especializado en:',
103
+ 'metrics_analyzed': '📊 Métricas analizadas:',
104
+ 'what_analyzes': '🔍 Qué analiza específicamente:',
105
+ 'tips': '💡 Tips para mejores resultados:',
106
+ 'additional_specs': '📝 Especificaciones adicionales para el análisis',
107
+ 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
108
  },
109
+ 'fr': {
110
+ 'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
111
+ 'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
112
+ 'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
113
+ 'select_model': '🤖 Modèle Claude',
114
+ 'select_language': '🌐 Langue',
115
+ 'select_theme': '🎨 Thème',
116
+ 'detail_level': '📋 Niveau de détail',
117
+ 'detailed': 'Détaillé',
118
+ 'summarized': 'Résumé',
119
+ 'analyze_button': '🚀 Analyser et Comparer',
120
+ 'export_format': '📄 Format d\'export',
121
+ 'export_button': '💾 Exporter le Rapport',
122
+ 'comparative_analysis': '📊 Analyse Comparative',
123
+ 'implementation_code': '💻 Code d\'Implémentation',
124
+ 'data_format': '📋 Format de données attendu',
125
+ 'examples': '📚 Exemples d\'analyse',
126
+ 'light': 'Clair',
127
+ 'dark': 'Sombre',
128
+ 'best_for': 'Meilleur pour',
129
+ 'loading': 'Chargement...',
130
+ 'error_no_api': 'Veuillez configurer ANTHROPIC_API_KEY',
131
+ 'error_no_files': 'Veuillez télécharger des fichiers à analyser',
132
+ 'report_exported': 'Rapport exporté avec succès comme',
133
+ 'specialized_in': '🎯 Spécialisé dans:',
134
+ 'metrics_analyzed': '📊 Métriques analysées:',
135
+ 'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
136
+ 'tips': '💡 Conseils pour de meilleurs résultats:',
137
+ 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
138
+ 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
139
+ },
140
+ 'de': {
141
+ 'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
142
+ 'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
143
+ 'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
144
+ 'select_model': '🤖 Claude Modell',
145
+ 'select_language': '🌐 Sprache',
146
+ 'select_theme': '🎨 Thema',
147
+ 'detail_level': '📋 Detailgrad der Analyse',
148
+ 'detailed': 'Detailliert',
149
+ 'summarized': 'Zusammengefasst',
150
+ 'analyze_button': '🚀 Analysieren und Vergleichen',
151
+ 'export_format': '📄 Exportformat',
152
+ 'export_button': '💾 Bericht Exportieren',
153
+ 'comparative_analysis': '📊 Vergleichende Analyse',
154
+ 'implementation_code': '💻 Implementierungscode',
155
+ 'data_format': '📋 Erwartetes Datenformat',
156
+ 'examples': '📚 Analysebeispiele',
157
+ 'light': 'Hell',
158
+ 'dark': 'Dunkel',
159
+ 'best_for': 'Am besten für',
160
+ 'loading': 'Laden...',
161
+ 'error_no_api': 'Bitte konfigurieren Sie ANTHROPIC_API_KEY',
162
+ 'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
163
+ 'report_exported': 'Bericht erfolgreich exportiert als',
164
+ 'specialized_in': '🎯 Spezialisiert auf:',
165
+ 'metrics_analyzed': '📊 Analysierte Metriken:',
166
+ 'what_analyzes': '🔍 Was spezifisch analysiert wird:',
167
+ 'tips': '💡 Tipps für bessere Ergebnisse:',
168
+ 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
169
+ 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
170
+ },
171
+ 'pt': {
172
+ 'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
173
+ 'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
174
+ 'upload_files': '📁 Carregar resultados (CSV/Excel)',
175
+ 'select_model': '🤖 Modelo Claude',
176
+ 'select_language': '🌐 Idioma',
177
+ 'select_theme': '🎨 Tema',
178
+ 'detail_level': '📋 Nível de detalhe',
179
+ 'detailed': 'Detalhado',
180
+ 'summarized': 'Resumido',
181
+ 'analyze_button': '🚀 Analisar e Comparar',
182
+ 'export_format': '📄 Formato de exportação',
183
+ 'export_button': '💾 Exportar Relatório',
184
+ 'comparative_analysis': '📊 Análise Comparativa',
185
+ 'implementation_code': '💻 Código de Implementação',
186
+ 'data_format': '📋 Formato de dados esperado',
187
+ 'examples': '📚 Exemplos de análise',
188
+ 'light': 'Claro',
189
+ 'dark': 'Escuro',
190
+ 'best_for': 'Melhor para',
191
+ 'loading': 'Carregando...',
192
+ 'error_no_api': 'Por favor configure ANTHROPIC_API_KEY',
193
+ 'error_no_files': 'Por favor carregue arquivos para analisar',
194
+ 'report_exported': 'Relatório exportado com sucesso como',
195
+ 'specialized_in': '🎯 Especializado em:',
196
+ 'metrics_analyzed': '📊 Métricas analisadas:',
197
+ 'what_analyzes': '🔍 O que analisa especificamente:',
198
+ 'tips': '💡 Dicas para melhores resultados:',
199
+ 'additional_specs': '📝 Especificações adicionais para a análise',
200
+ 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
201
+ }
202
  }
203
 
204
+ # Temas disponibles
205
+ THEMES = {
206
+ 'light': gr.themes.Soft(),
207
+ 'dark': gr.themes.Base(
208
+ primary_hue="blue",
209
+ secondary_hue="gray",
210
+ neutral_hue="gray",
211
+ font=["Arial", "sans-serif"]
212
+ ).set(
213
+ body_background_fill="dark",
214
+ body_background_fill_dark="*neutral_950",
215
+ button_primary_background_fill="*primary_600",
216
+ button_primary_background_fill_hover="*primary_500",
217
+ button_primary_text_color="white",
218
+ block_background_fill="*neutral_800",
219
+ block_border_color="*neutral_700",
220
+ block_label_text_color="*neutral_200",
221
+ block_title_text_color="*neutral_100",
222
+ checkbox_background_color="*neutral_700",
223
+ checkbox_background_color_selected="*primary_600",
224
+ input_background_fill="*neutral_700",
225
+ input_border_color="*neutral_600",
226
+ input_placeholder_color="*neutral_400"
227
+ )
228
+ }
229
+
230
+ # Enum para tipos de análisis
231
+ class AnalysisType(Enum):
232
+ MATHEMATICAL_MODEL = "mathematical_model"
233
+ DATA_FITTING = "data_fitting"
234
+ FITTING_RESULTS = "fitting_results"
235
+ UNKNOWN = "unknown"
236
+
237
+ # Estructura modular para modelos
238
+ @dataclass
239
+ class MathematicalModel:
240
+ name: str
241
+ equation: str
242
+ parameters: List[str]
243
+ application: str
244
+ sources: List[str]
245
+ category: str
246
+ biological_meaning: str
247
+
248
+ # Sistema de registro de modelos escalable
249
+ class ModelRegistry:
250
+ def __init__(self):
251
+ self.models = {}
252
+ self._initialize_default_models()
253
+
254
+ def register_model(self, model: MathematicalModel):
255
+ """Registra un nuevo modelo matemático"""
256
+ if model.category not in self.models:
257
+ self.models[model.category] = {}
258
+ self.models[model.category][model.name] = model
259
+
260
+ def get_model(self, category: str, name: str) -> MathematicalModel:
261
+ """Obtiene un modelo específico"""
262
+ return self.models.get(category, {}).get(name)
263
+
264
+ def get_all_models(self) -> Dict:
265
+ """Retorna todos los modelos registrados"""
266
+ return self.models
267
+
268
+ def _initialize_default_models(self):
269
+ """Inicializa los modelos por defecto"""
270
+ # Modelos de crecimiento
271
+ self.register_model(MathematicalModel(
272
+ name="Monod",
273
+ equation="μ = μmax × (S / (Ks + S))",
274
+ parameters=["μmax (h⁻¹)", "Ks (g/L)"],
275
+ application="Crecimiento limitado por sustrato único",
276
+ sources=["Cambridge", "MIT", "DTU"],
277
+ category="crecimiento_biomasa",
278
+ biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
279
+ ))
280
+
281
+ self.register_model(MathematicalModel(
282
+ name="Logístico",
283
+ equation="dX/dt = μmax × X × (1 - X/Xmax)",
284
+ parameters=["μmax (h⁻¹)", "Xmax (g/L)"],
285
+ application="Sistemas cerrados batch",
286
+ sources=["Cranfield", "Swansea", "HAL Theses"],
287
+ category="crecimiento_biomasa",
288
+ biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
289
+ ))
290
+
291
+ self.register_model(MathematicalModel(
292
+ name="Gompertz",
293
+ equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
294
+ parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"],
295
+ application="Crecimiento con fase lag pronunciada",
296
+ sources=["Lund University", "NC State"],
297
+ category="crecimiento_biomasa",
298
+ biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"
299
+ ))
300
+
301
+ # Instancia global del registro
302
+ model_registry = ModelRegistry()
303
+
304
+ '''
305
+ # Modelos de Claude disponibles
306
+ CLAUDE_MODELS = {
307
+ "claude-opus-4-20250514": {
308
+ "name": "Claude Opus 4 (Latest)",
309
+ "description": "Modelo más potente para desafíos complejos",
310
+ "max_tokens": 4000,
311
+ "best_for": "Análisis muy detallados y complejos"
312
  },
313
+ "claude-sonnet-4-20250514": {
314
+ "name": "Claude Sonnet 4 (Latest)",
315
+ "description": "Modelo inteligente y eficiente para uso cotidiano",
316
+ "max_tokens": 4000,
317
+ "best_for": "Análisis general, recomendado para la mayoría de casos"
318
+ },
319
+ "claude-3-5-haiku-20241022": {
320
+ "name": "Claude 3.5 Haiku (Latest)",
321
+ "description": "Modelo más rápido para tareas diarias",
322
+ "max_tokens": 4000,
323
+ "best_for": "Análisis rápidos y económicos"
324
+ },
325
+ "claude-3-7-sonnet-20250219": {
326
+ "name": "Claude 3.7 Sonnet",
327
+ "description": "Modelo avanzado de la serie 3.7",
328
+ "max_tokens": 4000,
329
+ "best_for": "Análisis equilibrados con alta calidad"
330
+ },
331
+ "claude-3-5-sonnet-20241022": {
332
+ "name": "Claude 3.5 Sonnet (Oct 2024)",
333
+ "description": "Excelente balance entre velocidad y capacidad",
334
+ "max_tokens": 4000,
335
+ "best_for": "Análisis rápidos y precisos"
336
+ }
337
+ }
338
+
339
+ '''
340
+
341
+ CLAUDE_MODELS = {
342
+ "Qwen/Qwen3-14B": {
343
+ "name": "Qwen 3-14B",
344
+ "description": "Modelo Qwen 3-14B para análisis detallado",
345
+ "max_tokens": 4096,
346
+ "best_for": "Análisis técnico y científico"
347
+ }
348
  }
349
 
350
  class FileProcessor:
351
+ """Clase para procesar diferentes tipos de archivos"""
352
+
353
  @staticmethod
354
+ def extract_text_from_pdf(pdf_file) -> str:
355
+ """Extrae texto de un archivo PDF"""
356
+ try:
357
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
358
+ text = ""
359
+ for page in pdf_reader.pages:
360
+ text += page.extract_text() + "\n"
361
+ return text
362
+ except Exception as e:
363
+ return f"Error reading PDF: {str(e)}"
364
 
365
  @staticmethod
366
+ def read_csv(csv_file) -> pd.DataFrame:
367
+ """Lee archivo CSV"""
368
+ try:
369
+ return pd.read_csv(io.BytesIO(csv_file))
370
+ except Exception as e:
371
+ return None
372
+
373
+ @staticmethod
374
+ def read_excel(excel_file) -> pd.DataFrame:
375
+ """Lee archivo Excel"""
376
+ try:
377
+ return pd.read_excel(io.BytesIO(excel_file))
378
+ except Exception as e:
379
+ return None
380
+
381
+ @staticmethod
382
+ def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]:
383
+ """Extrae archivos de un ZIP"""
384
+ files = []
385
+ try:
386
+ with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
387
+ for file_name in zip_ref.namelist():
388
+ if not file_name.startswith('__MACOSX'):
389
+ file_data = zip_ref.read(file_name)
390
+ files.append((file_name, file_data))
391
+ except Exception as e:
392
+ print(f"Error processing ZIP: {e}")
393
+ return files
394
 
395
  class ReportExporter:
396
+ """Clase para exportar reportes a diferentes formatos"""
397
+
398
  @staticmethod
399
+ def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
400
+ """Exporta el contenido a un archivo DOCX"""
401
  doc = Document()
402
+
403
+ # Configurar estilos
404
+ title_style = doc.styles['Title']
405
+ title_style.font.size = Pt(24)
406
+ title_style.font.bold = True
407
+
408
+ heading_style = doc.styles['Heading 1']
409
+ heading_style.font.size = Pt(18)
410
+ heading_style.font.bold = True
411
+
412
+ # Título
413
+ title_text = {
414
+ 'en': 'Comparative Analysis Report - Biotechnological Models',
415
+ 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
416
+ 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
417
+ 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
418
+ 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
419
+ }
420
+
421
+ doc.add_heading(title_text.get(language, title_text['en']), 0)
422
+
423
+ # Fecha
424
+ date_text = {
425
+ 'en': 'Generated on',
426
+ 'es': 'Generado el',
427
+ 'fr': 'Généré le',
428
+ 'de': 'Erstellt am',
429
+ 'pt': 'Gerado em'
430
+ }
431
+ doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
432
  doc.add_paragraph()
433
+
434
+ # Procesar contenido
435
+ lines = content.split('\n')
436
+ current_paragraph = None
437
+
438
+ for line in lines:
439
+ line = line.strip()
440
+
441
+ if line.startswith('###'):
442
+ doc.add_heading(line.replace('###', '').strip(), level=2)
443
+ elif line.startswith('##'):
444
+ doc.add_heading(line.replace('##', '').strip(), level=1)
445
+ elif line.startswith('#'):
446
+ doc.add_heading(line.replace('#', '').strip(), level=0)
447
+ elif line.startswith('**') and line.endswith('**'):
448
+ # Texto en negrita
449
+ p = doc.add_paragraph()
450
+ run = p.add_run(line.replace('**', ''))
451
+ run.bold = True
452
+ elif line.startswith('- ') or line.startswith('* '):
453
+ # Lista
454
+ doc.add_paragraph(line[2:], style='List Bullet')
455
+ elif line.startswith(tuple('0123456789')):
456
+ # Lista numerada
457
+ doc.add_paragraph(line, style='List Number')
458
+ elif line == '---' or line.startswith('==='):
459
+ # Separador
460
+ doc.add_paragraph('_' * 50)
461
+ elif line:
462
+ # Párrafo normal
463
+ doc.add_paragraph(line)
464
+
465
+ # Guardar documento
466
  doc.save(filename)
467
  return filename
468
+
469
  @staticmethod
470
+ def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
471
+ """Exporta el contenido a un archivo PDF"""
472
+ # Crear documento PDF
473
  doc = SimpleDocTemplate(filename, pagesize=letter)
474
+ story = []
475
  styles = getSampleStyleSheet()
476
+
477
+ # Estilos personalizados
478
+ title_style = ParagraphStyle(
479
+ 'CustomTitle',
480
+ parent=styles['Title'],
481
+ fontSize=24,
482
+ textColor=colors.HexColor('#1f4788'),
483
+ spaceAfter=30
484
+ )
485
+
486
+ heading_style = ParagraphStyle(
487
+ 'CustomHeading',
488
+ parent=styles['Heading1'],
489
+ fontSize=16,
490
+ textColor=colors.HexColor('#2e5090'),
491
+ spaceAfter=12
492
+ )
493
+
494
+ # Título
495
+ title_text = {
496
+ 'en': 'Comparative Analysis Report - Biotechnological Models',
497
+ 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos',
498
+ 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques',
499
+ 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle',
500
+ 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos'
501
+ }
502
+
503
+ story.append(Paragraph(title_text.get(language, title_text['en']), title_style))
504
+
505
+ # Fecha
506
+ date_text = {
507
+ 'en': 'Generated on',
508
+ 'es': 'Generado el',
509
+ 'fr': 'Généré le',
510
+ 'de': 'Erstellt am',
511
+ 'pt': 'Gerado em'
512
+ }
513
+ story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
514
+ story.append(Spacer(1, 0.5*inch))
515
+
516
+ # Procesar contenido
517
+ lines = content.split('\n')
518
+
519
+ for line in lines:
520
+ line = line.strip()
521
+
522
+ if not line:
523
+ story.append(Spacer(1, 0.2*inch))
524
+ elif line.startswith('###'):
525
+ story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
526
+ elif line.startswith('##'):
527
+ story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
528
+ elif line.startswith('#'):
529
+ story.append(Paragraph(line.replace('#', '').strip(), heading_style))
530
+ elif line.startswith('**') and line.endswith('**'):
531
+ text = line.replace('**', '')
532
+ story.append(Paragraph(f"<b>{text}</b>", styles['Normal']))
533
+ elif line.startswith('- ') or line.startswith('* '):
534
+ story.append(Paragraph(f"• {line[2:]}", styles['Normal']))
535
+ elif line == '---' or line.startswith('==='):
536
+ story.append(Spacer(1, 0.3*inch))
537
+ story.append(Paragraph("_" * 70, styles['Normal']))
538
+ story.append(Spacer(1, 0.3*inch))
539
+ else:
540
+ # Limpiar caracteres especiales para PDF
541
+ clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
542
+ story.append(Paragraph(clean_line, styles['Normal']))
543
+
544
+ # Construir PDF
545
  doc.build(story)
546
  return filename
547
 
548
  class AIAnalyzer:
549
+ """Clase para análisis con IA"""
550
+
551
+ def __init__(self, client, model_registry):
 
 
552
  self.client = client
553
+ self.model_registry = model_registry
554
+
555
+ def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType:
556
+ """Detecta el tipo de análisis necesario"""
557
+ if isinstance(content, pd.DataFrame):
558
+ columns = [col.lower() for col in content.columns]
559
+
560
+ fitting_indicators = [
561
+ 'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic',
562
+ 'parameter', 'param', 'coefficient', 'fit',
563
+ 'model', 'equation', 'goodness', 'chi_square',
564
+ 'p_value', 'confidence', 'standard_error', 'se'
565
+ ]
566
+
567
+ has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators)
568
+
569
+ if has_fitting_results:
570
+ return AnalysisType.FITTING_RESULTS
571
+ else:
572
+ return AnalysisType.DATA_FITTING
573
 
574
+ prompt = """
575
+ Analyze this content and determine if it is:
576
+ 1. A scientific article describing biotechnological mathematical models
577
+ 2. Experimental data for parameter fitting
578
+ 3. Model fitting results (with parameters, R², RMSE, etc.)
579
+
580
+ Reply only with: "MODEL", "DATA" or "RESULTS"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
  """
582
+
583
  try:
584
+ # Cliente Nebius
585
  response = self.client.chat.completions.create(
586
+ model="Qwen/Qwen3-14B",
587
  temperature=0.6,
588
  top_p=0.95,
589
+ max_tokens=10,
590
+ messages=[{"role": "user", "content": f"{prompt}\n{content[:1000]}"}]
591
  )
592
+
593
+ #Cliente Anthropic
594
+ #response = self.client.messages.create(
595
+ #model="claude-3-haiku-20240307",
596
+ #max_tokens=10,
597
+ #messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}]
598
+ #)
599
+
600
+ #result = response.content[0].text.strip().upper()
601
+ result = response.choices[0].message.content.strip().upper()
602
+ if "MODEL" in result:
603
+ return AnalysisType.MATHEMATICAL_MODEL
604
+ elif "RESULTS" in result:
605
+ return AnalysisType.FITTING_RESULTS
606
+ elif "DATA" in result:
607
+ return AnalysisType.DATA_FITTING
608
+ else:
609
+ return AnalysisType.UNKNOWN
610
+
611
+ except:
612
+ return AnalysisType.UNKNOWN
613
+
614
+ def get_language_prompt_prefix(self, language: str) -> str:
615
+ """Obtiene el prefijo del prompt según el idioma"""
616
+ prefixes = {
617
+ 'en': "Please respond in English. ",
618
+ 'es': "Por favor responde en español. ",
619
+ 'fr': "Veuillez répondre en français. ",
620
+ 'de': "Bitte antworten Sie auf Deutsch. ",
621
+ 'pt': "Por favor responda em português. "
622
+ }
623
+ return prefixes.get(language, prefixes['en'])
624
+
625
+ def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed",
626
+ language: str = "en", additional_specs: str = "") -> Dict:
627
+ """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales"""
628
+
629
+ # Preparar resumen completo de los datos
630
+ data_summary = f"""
631
+ FITTING RESULTS DATA:
632
+
633
+ Data structure:
634
+ - Columns: {list(data.columns)}
635
+ - Number of models evaluated: {len(data)}
636
+
637
+ Complete data:
638
+ {data.to_string()}
639
+
640
+ Descriptive statistics:
641
+ {data.describe().to_string()}
642
+ """
643
+
644
+ # Extraer valores para usar en el código
645
+ data_dict = data.to_dict('records')
646
+
647
+ # Obtener prefijo de idioma
648
+ lang_prefix = self.get_language_prompt_prefix(language)
649
+
650
+ # Agregar especificaciones adicionales del usuario si existen
651
+ user_specs_section = f"""
652
+
653
+ USER ADDITIONAL SPECIFICATIONS:
654
+ {additional_specs}
655
+
656
+ Please ensure to address these specific requirements in your analysis.
657
+ """ if additional_specs else ""
658
+
659
+ # Prompt mejorado con instrucciones específicas para cada nivel
660
+ if detail_level == "detailed":
661
+ prompt = f"""
662
+ {lang_prefix}
663
 
664
+ You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results.
665
+
666
+ {user_specs_section}
667
+
668
+ DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT
669
+
670
+ PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT:
671
+
672
+ 1. **EXPERIMENT IDENTIFICATION AND OVERVIEW**
673
+ - List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points)
674
+ - For EACH experiment, identify:
675
+ * Experimental conditions
676
+ * Number of models tested
677
+ * Variables measured (biomass, substrate, product)
678
+
679
+ 2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT**
680
+ For EACH EXPERIMENT separately:
681
+ - Identify ALL fitted mathematical models BY NAME
682
+ - Classify them: biomass growth, substrate consumption, product formation
683
+ - Show the mathematical equation of each model
684
+ - List parameter values obtained for that specific experiment
685
+
686
+ 3. **COMPARATIVE ANALYSIS PER EXPERIMENT**
687
+ Create a section for EACH EXPERIMENT showing:
688
+
689
+ **EXPERIMENT [Name/Condition]:**
690
+
691
+ a) **BIOMASS MODELS** (if applicable):
692
+ - Best model: [Name] with R²=[value], RMSE=[value]
693
+ - Parameters: μmax=[value], Xmax=[value], etc.
694
+ - Ranking of all biomass models tested
695
+
696
+ b) **SUBSTRATE MODELS** (if applicable):
697
+ - Best model: [Name] with R²=[value], RMSE=[value]
698
+ - Parameters: Ks=[value], Yxs=[value], etc.
699
+ - Ranking of all substrate models tested
700
+
701
+ c) **PRODUCT MODELS** (if applicable):
702
+ - Best model: [Name] with R²=[value], RMSE=[value]
703
+ - Parameters: α=[value], β=[value], etc.
704
+ - Ranking of all product models tested
705
+
706
+ 4. **DETAILED COMPARATIVE TABLES**
707
+
708
+ **Table 1: Summary by Experiment and Variable Type**
709
+ | Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking |
710
+ |------------|----------|------------|-------|------|----------------|---------|
711
+ | Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 |
712
+ | Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 |
713
+ | Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 |
714
+ | Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 |
715
+
716
+ **Table 2: Complete Model Comparison Across All Experiments**
717
+ | Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For |
718
+
719
+ 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS**
720
+ - Compare how parameters change between experiments
721
+ - Identify trends (e.g., μmax increases with temperature)
722
+ - Calculate average parameters and variability
723
+ - Suggest optimal conditions based on parameters
724
+
725
+ 6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT**
726
+ For each experiment, explain:
727
+ - What the parameter values mean biologically
728
+ - Whether values are realistic for the conditions
729
+ - Key differences between experiments
730
+ - Critical control parameters identified
731
+
732
+ 7. **OVERALL BEST MODELS DETERMINATION**
733
+ - **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments
734
+ - **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value]
735
+ - **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions
736
+
737
+ Justify with numerical evidence from multiple experiments.
738
+
739
+ 8. **CONCLUSIONS AND RECOMMENDATIONS**
740
+ - Which models are most robust across different conditions
741
+ - Specific models to use for each experimental condition
742
+ - Confidence intervals and prediction reliability
743
+ - Scale-up recommendations with specific values
744
+
745
+ Use Markdown format with clear structure. Include ALL numerical values from the data.
746
+ Create clear sections for EACH EXPERIMENT.
747
+ """
748
+ else: # summarized
749
+ prompt = f"""
750
+ {lang_prefix}
751
+
752
+ You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT.
753
+
754
+ {user_specs_section}
755
+
756
+ DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information
757
+
758
+ PROVIDE A FOCUSED COMPARATIVE ANALYSIS:
759
+
760
+ 1. **EXPERIMENTS OVERVIEW**
761
+ - Total experiments analyzed: [number]
762
+ - Conditions tested: [list]
763
+ - Variables measured: biomass/substrate/product
764
+
765
+ 2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY**
766
+
767
+ 📊 **EXPERIMENT 1 [Name/Condition]:**
768
+ - Biomass: [Model] (R²=[value])
769
+ - Substrate: [Model] (R²=[value])
770
+ - Product: [Model] (R²=[value])
771
+
772
+ 📊 **EXPERIMENT 2 [Name/Condition]:**
773
+ - Biomass: [Model] (R²=[value])
774
+ - Substrate: [Model] (R²=[value])
775
+ - Product: [Model] (R²=[value])
776
+
777
+ [Continue for all experiments...]
778
+
779
+ 3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS**
780
+ 🏆 **Best Models Overall:**
781
+ - **Biomass**: [Model] - Best in [X]/[Y] experiments
782
+ - **Substrate**: [Model] - Average R²=[value]
783
+ - **Product**: [Model] - Most consistent performance
784
+
785
+ 4. **QUICK COMPARISON TABLE**
786
+ | Experiment | Best Biomass | Best Substrate | Best Product | Overall R² |
787
+ |------------|--------------|----------------|--------------|------------|
788
+ | Exp1 | [Model] | [Model] | [Model] | [avg] |
789
+ | Exp2 | [Model] | [Model] | [Model] | [avg] |
790
+
791
+ 5. **KEY FINDINGS**
792
+ - Parameter ranges across experiments: μmax=[min-max], Ks=[min-max]
793
+ - Best conditions identified: [specific values]
794
+ - Most robust models: [list with reasons]
795
+
796
+ 6. **PRACTICAL RECOMMENDATIONS**
797
+ - For biomass prediction: Use [Model]
798
+ - For substrate monitoring: Use [Model]
799
+ - For product estimation: Use [Model]
800
+ - Critical parameters: [list with values]
801
+
802
+ Keep it concise but include ALL experiments and model names with their key metrics.
803
+ """
804
+
805
+ try:
806
+ response = self.client.messages.create(
807
+ model=claude_model,
808
+ max_tokens=4000,
809
+ messages=[{
810
+ "role": "user",
811
+ "content": f"{prompt}\n\n{data_summary}"
812
+ }]
813
+ )
814
+
815
+ # Análisis adicional para generar código con valores numéricos reales
816
+ code_prompt = f"""
817
+ {lang_prefix}
818
+
819
+ Based on the analysis and this actual data:
820
+ {data.to_string()}
821
+
822
+ Generate Python code that:
823
+
824
+ 1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data
825
+ 2. Implements analysis BY EXPERIMENT showing:
826
+ - Best models for each experiment
827
+ - Comparison across experiments
828
+ - Parameter evolution between conditions
829
+ 3. Includes visualization functions that:
830
+ - Show results PER EXPERIMENT
831
+ - Compare models across experiments
832
+ - Display parameter trends
833
+ 4. Shows the best model for biomass, substrate, and product separately
834
+
835
+ The code must include:
836
+ - Data loading with experiment identification
837
+ - Model comparison by experiment and variable type
838
+ - Visualization showing results per experiment
839
+ - Overall best model selection with justification
840
+ - Functions to predict using the best models for each category
841
+
842
+ Make sure to include comments indicating which model won for each variable type and why.
843
+
844
+ Format: Complete, executable Python code with actual data values embedded.
845
+ """
846
+
847
+ code_response = self.client.messages.create(
848
+ model=claude_model,
849
+ max_tokens=3000,
850
+ messages=[{
851
+ "role": "user",
852
+ "content": code_prompt
853
+ }]
854
+ )
855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
  return {
857
+ "tipo": "Comparative Analysis of Mathematical Models",
858
+ "analisis_completo": response.content[0].text,
859
+ "codigo_implementacion": code_response.content[0].text,
860
+ "resumen_datos": {
861
+ "n_modelos": len(data),
862
+ "columnas": list(data.columns),
863
+ "metricas_disponibles": [col for col in data.columns if any(metric in col.lower()
864
+ for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])],
865
+ "mejor_r2": data['R2'].max() if 'R2' in data.columns else None,
866
+ "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None,
867
+ "datos_completos": data_dict # Incluir todos los datos para el código
868
+ }
869
  }
870
+
871
+ except Exception as e:
872
+ return {"error": str(e)}
873
 
874
+ def process_files(files, claude_model: str, detail_level: str = "detailed",
875
+ language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
876
+ """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales"""
 
 
 
 
877
  processor = FileProcessor()
878
+ analyzer = AIAnalyzer(client, model_registry)
879
+ results = []
880
+ all_code = []
881
 
 
 
 
 
882
  for file in files:
883
+ if file is None:
884
+ continue
885
+
886
+ file_name = file.name if hasattr(file, 'name') else "archivo"
887
  file_ext = Path(file_name).suffix.lower()
888
 
889
  with open(file.name, 'rb') as f:
890
  file_content = f.read()
891
 
892
+ if file_ext in ['.csv', '.xlsx', '.xls']:
893
+ if language == 'es':
894
+ results.append(f"## 📊 Análisis de Resultados: {file_name}")
895
+ else:
896
+ results.append(f"## 📊 Results Analysis: {file_name}")
897
+
898
+ if file_ext == '.csv':
899
+ df = processor.read_csv(file_content)
900
+ else:
901
+ df = processor.read_excel(file_content)
902
+
903
+ if df is not None:
904
+ analysis_type = analyzer.detect_analysis_type(df)
905
+
906
+ if analysis_type == AnalysisType.FITTING_RESULTS:
907
+ result = analyzer.analyze_fitting_results(
908
+ df, claude_model, detail_level, language, additional_specs
909
+ )
910
+
911
+ if language == 'es':
912
+ results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS")
913
+ else:
914
+ results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS")
915
+
916
+ results.append(result.get("analisis_completo", ""))
917
+ if "codigo_implementacion" in result:
918
+ all_code.append(result["codigo_implementacion"])
919
+
920
+ results.append("\n---\n")
921
+
922
+ analysis_text = "\n".join(results)
923
+ code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text)
924
+
925
+ return analysis_text, code_text
926
 
927
+ def generate_implementation_code(analysis_results: str) -> str:
928
+ """Genera código de implementación con análisis por experimento"""
929
+ code = """
930
+ import numpy as np
931
+ import pandas as pd
932
+ import matplotlib.pyplot as plt
933
+ from scipy.integrate import odeint
934
+ from scipy.optimize import curve_fit, differential_evolution
935
+ from sklearn.metrics import r2_score, mean_squared_error
936
+ import seaborn as sns
937
+ from typing import Dict, List, Tuple, Optional
938
 
939
+ # Visualization configuration
940
+ plt.style.use('seaborn-v0_8-darkgrid')
941
+ sns.set_palette("husl")
942
+
943
+ class ExperimentalModelAnalyzer:
944
+ \"\"\"
945
+ Class for comparative analysis of biotechnological models across multiple experiments.
946
+ Analyzes biomass, substrate and product models separately for each experimental condition.
947
+ \"\"\"
948
+
949
+ def __init__(self):
950
+ self.results_df = None
951
+ self.experiments = {}
952
+ self.best_models_by_experiment = {}
953
+ self.overall_best_models = {
954
+ 'biomass': None,
955
+ 'substrate': None,
956
+ 'product': None
957
+ }
958
+
959
+ def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame:
960
+ \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\"
961
+ if data_dict:
962
+ self.results_df = pd.DataFrame(data_dict)
963
+ elif file_path:
964
+ if file_path.endswith('.csv'):
965
+ self.results_df = pd.read_csv(file_path)
966
+ else:
967
+ self.results_df = pd.read_excel(file_path)
968
+
969
+ print(f"✅ Data loaded: {len(self.results_df)} models")
970
+ print(f"📊 Available columns: {list(self.results_df.columns)}")
971
+
972
+ # Identify experiments
973
+ if 'Experiment' in self.results_df.columns:
974
+ self.experiments = self.results_df.groupby('Experiment').groups
975
+ print(f"🧪 Experiments found: {list(self.experiments.keys())}")
976
+
977
+ return self.results_df
978
+
979
+ def analyze_by_experiment(self,
980
+ experiment_col: str = 'Experiment',
981
+ model_col: str = 'Model',
982
+ type_col: str = 'Type',
983
+ r2_col: str = 'R2',
984
+ rmse_col: str = 'RMSE') -> Dict:
985
+ \"\"\"
986
+ Analyze models by experiment and variable type.
987
+ Identifies best models for biomass, substrate, and product in each experiment.
988
+ \"\"\"
989
+ if self.results_df is None:
990
+ raise ValueError("First load data with load_results()")
991
+
992
+ results_by_exp = {}
993
+
994
+ # Get unique experiments
995
+ if experiment_col in self.results_df.columns:
996
+ experiments = self.results_df[experiment_col].unique()
997
+ else:
998
+ experiments = ['All_Data']
999
+ self.results_df[experiment_col] = 'All_Data'
1000
+
1001
+ print("\\n" + "="*80)
1002
+ print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE")
1003
+ print("="*80)
1004
+
1005
+ for exp in experiments:
1006
+ print(f"\\n🧪 EXPERIMENT: {exp}")
1007
+ print("-"*50)
1008
+
1009
+ exp_data = self.results_df[self.results_df[experiment_col] == exp]
1010
+ results_by_exp[exp] = {}
1011
+
1012
+ # Analyze by variable type if available
1013
+ if type_col in exp_data.columns:
1014
+ var_types = exp_data[type_col].unique()
1015
+
1016
+ for var_type in var_types:
1017
+ var_data = exp_data[exp_data[type_col] == var_type]
1018
+
1019
+ if not var_data.empty:
1020
+ # Find best model for this variable type
1021
+ best_idx = var_data[r2_col].idxmax()
1022
+ best_model = var_data.loc[best_idx]
1023
+
1024
+ results_by_exp[exp][var_type] = {
1025
+ 'best_model': best_model[model_col],
1026
+ 'r2': best_model[r2_col],
1027
+ 'rmse': best_model[rmse_col],
1028
+ 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records')
1029
+ }
1030
+
1031
+ print(f"\\n 📈 {var_type.upper()}:")
1032
+ print(f" Best Model: {best_model[model_col]}")
1033
+ print(f" R² = {best_model[r2_col]:.4f}")
1034
+ print(f" RMSE = {best_model[rmse_col]:.4f}")
1035
+
1036
+ # Show all models for this variable
1037
+ print(f"\\n All {var_type} models tested:")
1038
+ for _, row in var_data.iterrows():
1039
+ print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}")
1040
+ else:
1041
+ # If no type column, analyze all models together
1042
+ best_idx = exp_data[r2_col].idxmax()
1043
+ best_model = exp_data.loc[best_idx]
1044
+
1045
+ results_by_exp[exp]['all'] = {
1046
+ 'best_model': best_model[model_col],
1047
+ 'r2': best_model[r2_col],
1048
+ 'rmse': best_model[rmse_col],
1049
+ 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records')
1050
+ }
1051
+
1052
+ self.best_models_by_experiment = results_by_exp
1053
+
1054
+ # Determine overall best models
1055
+ self._determine_overall_best_models()
1056
+
1057
+ return results_by_exp
1058
+
1059
+ def _determine_overall_best_models(self):
1060
+ \"\"\"Determine the best models across all experiments\"\"\"
1061
+ print("\\n" + "="*80)
1062
+ print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS")
1063
+ print("="*80)
1064
+
1065
+ # Aggregate performance by model and type
1066
+ model_performance = {}
1067
+
1068
+ for exp, exp_results in self.best_models_by_experiment.items():
1069
+ for var_type, var_results in exp_results.items():
1070
+ if var_type not in model_performance:
1071
+ model_performance[var_type] = {}
1072
+
1073
+ for model_data in var_results['all_models']:
1074
+ model_name = model_data['Model']
1075
+ if model_name not in model_performance[var_type]:
1076
+ model_performance[var_type][model_name] = {
1077
+ 'r2_values': [],
1078
+ 'rmse_values': [],
1079
+ 'experiments': []
1080
+ }
1081
+
1082
+ model_performance[var_type][model_name]['r2_values'].append(model_data['R2'])
1083
+ model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE'])
1084
+ model_performance[var_type][model_name]['experiments'].append(exp)
1085
+
1086
+ # Calculate average performance and select best
1087
+ for var_type, models in model_performance.items():
1088
+ best_avg_r2 = -1
1089
+ best_model = None
1090
+
1091
+ print(f"\\n📊 {var_type.upper()} MODELS:")
1092
+ for model_name, perf_data in models.items():
1093
+ avg_r2 = np.mean(perf_data['r2_values'])
1094
+ avg_rmse = np.mean(perf_data['rmse_values'])
1095
+ n_exp = len(perf_data['experiments'])
1096
+
1097
+ print(f" {model_name}:")
1098
+ print(f" Average R² = {avg_r2:.4f}")
1099
+ print(f" Average RMSE = {avg_rmse:.4f}")
1100
+ print(f" Tested in {n_exp} experiments")
1101
+
1102
+ if avg_r2 > best_avg_r2:
1103
+ best_avg_r2 = avg_r2
1104
+ best_model = {
1105
+ 'name': model_name,
1106
+ 'avg_r2': avg_r2,
1107
+ 'avg_rmse': avg_rmse,
1108
+ 'n_experiments': n_exp
1109
+ }
1110
+
1111
+ if var_type.lower() in ['biomass', 'substrate', 'product']:
1112
+ self.overall_best_models[var_type.lower()] = best_model
1113
+ print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})")
1114
+
1115
+ def create_comparison_visualizations(self):
1116
+ \"\"\"Create visualizations comparing models across experiments\"\"\"
1117
+ if not self.best_models_by_experiment:
1118
+ raise ValueError("First run analyze_by_experiment()")
1119
+
1120
+ # Prepare data for visualization
1121
+ experiments = []
1122
+ biomass_r2 = []
1123
+ substrate_r2 = []
1124
+ product_r2 = []
1125
+
1126
+ for exp, results in self.best_models_by_experiment.items():
1127
+ experiments.append(exp)
1128
+ biomass_r2.append(results.get('Biomass', {}).get('r2', 0))
1129
+ substrate_r2.append(results.get('Substrate', {}).get('r2', 0))
1130
+ product_r2.append(results.get('Product', {}).get('r2', 0))
1131
+
1132
+ # Create figure with subplots
1133
+ fig, axes = plt.subplots(2, 2, figsize=(15, 12))
1134
+ fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16)
1135
+
1136
+ # 1. R² comparison by experiment and variable type
1137
+ ax1 = axes[0, 0]
1138
+ x = np.arange(len(experiments))
1139
+ width = 0.25
1140
+
1141
+ ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8)
1142
+ ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8)
1143
+ ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8)
1144
+
1145
+ ax1.set_xlabel('Experiment')
1146
+ ax1.set_ylabel('R²')
1147
+ ax1.set_title('Best Model R² by Experiment and Variable Type')
1148
+ ax1.set_xticks(x)
1149
+ ax1.set_xticklabels(experiments, rotation=45, ha='right')
1150
+ ax1.legend()
1151
+ ax1.grid(True, alpha=0.3)
1152
+
1153
+ # Add value labels
1154
+ for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)):
1155
+ if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8)
1156
+ if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8)
1157
+ if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8)
1158
+
1159
+ # 2. Model frequency heatmap
1160
+ ax2 = axes[0, 1]
1161
+ # This would show which models appear most frequently as best
1162
+ # Implementation depends on actual data structure
1163
+ ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)',
1164
+ ha='center', va='center', transform=ax2.transAxes)
1165
+ ax2.set_title('Most Frequently Selected Models')
1166
+
1167
+ # 3. Parameter evolution across experiments
1168
+ ax3 = axes[1, 0]
1169
+ ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments',
1170
+ ha='center', va='center', transform=ax3.transAxes)
1171
+ ax3.set_title('Parameter Trends')
1172
+
1173
+ # 4. Overall best models summary
1174
+ ax4 = axes[1, 1]
1175
+ ax4.axis('off')
1176
+
1177
+ summary_text = "🏆 OVERALL BEST MODELS\\n\\n"
1178
+ for var_type, model_info in self.overall_best_models.items():
1179
+ if model_info:
1180
+ summary_text += f"{var_type.upper()}:\\n"
1181
+ summary_text += f" Model: {model_info['name']}\\n"
1182
+ summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n"
1183
+ summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n"
1184
+
1185
+ ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
1186
+ fontsize=12, verticalalignment='top', fontfamily='monospace')
1187
+ ax4.set_title('Overall Best Models Summary')
1188
+
1189
+ plt.tight_layout()
1190
+ plt.show()
1191
+
1192
+ def generate_summary_table(self) -> pd.DataFrame:
1193
+ \"\"\"Generate a summary table of best models by experiment and type\"\"\"
1194
+ summary_data = []
1195
+
1196
+ for exp, results in self.best_models_by_experiment.items():
1197
+ for var_type, var_results in results.items():
1198
+ summary_data.append({
1199
+ 'Experiment': exp,
1200
+ 'Variable_Type': var_type,
1201
+ 'Best_Model': var_results['best_model'],
1202
+ 'R2': var_results['r2'],
1203
+ 'RMSE': var_results['rmse']
1204
+ })
1205
+
1206
+ summary_df = pd.DataFrame(summary_data)
1207
+
1208
+ print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE")
1209
+ print("="*80)
1210
+ print(summary_df.to_string(index=False))
1211
+
1212
+ return summary_df
1213
 
1214
+ # Example usage
1215
+ if __name__ == "__main__":
1216
+ print("🧬 Experimental Model Comparison System")
1217
+ print("="*60)
1218
+
1219
+ # Example data structure with experiments
1220
+ example_data = {
1221
+ 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5',
1222
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5',
1223
+ 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'],
1224
+ 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz',
1225
+ 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate',
1226
+ 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'],
1227
+ 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass',
1228
+ 'Substrate', 'Substrate', 'Substrate', 'Substrate',
1229
+ 'Product', 'Product', 'Product', 'Product'],
1230
+ 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901,
1231
+ 0.9723, 0.9856, 0.9698, 0.9812,
1232
+ 0.9634, 0.9512, 0.9687, 0.9423],
1233
+ 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178,
1234
+ 0.0312, 0.0245, 0.0334, 0.0289,
1235
+ 0.0412, 0.0523, 0.0389, 0.0567],
1236
+ 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49,
1237
+ None, None, None, None, None, None, None, None],
1238
+ 'Ks': [None, None, None, None, None, None,
1239
+ 2.1, 1.8, 2.3, 1.9, None, None, None, None]
1240
+ }
1241
+
1242
+ # Create analyzer
1243
+ analyzer = ExperimentalModelAnalyzer()
1244
+
1245
+ # Load data
1246
+ analyzer.load_results(data_dict=example_data)
1247
+
1248
+ # Analyze by experiment
1249
+ results = analyzer.analyze_by_experiment()
1250
+
1251
+ # Create visualizations
1252
+ analyzer.create_comparison_visualizations()
1253
+
1254
+ # Generate summary table
1255
+ summary = analyzer.generate_summary_table()
1256
+
1257
+ print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.")
1258
+ """
1259
+
1260
+ return code
1261
+
1262
+ # Estado global para almacenar resultados
1263
+ class AppState:
1264
+ def __init__(self):
1265
+ self.current_analysis = ""
1266
+ self.current_code = ""
1267
+ self.current_language = "en"
1268
+
1269
+ app_state = AppState()
1270
+
1271
+ def export_report(export_format: str, language: str) -> Tuple[str, str]:
1272
+ """Exporta el reporte al formato seleccionado"""
1273
+ if not app_state.current_analysis:
1274
+ error_msg = {
1275
+ 'en': "No analysis available to export",
1276
+ 'es': "No hay análisis disponible para exportar",
1277
+ 'fr': "Aucune analyse disponible pour exporter",
1278
+ 'de': "Keine Analyse zum Exportieren verfügbar",
1279
+ 'pt': "Nenhuma análise disponível para exportar"
1280
+ }
1281
+ return error_msg.get(language, error_msg['en']), ""
1282
+
1283
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1284
+
1285
+ try:
1286
+ if export_format == "DOCX":
1287
+ filename = f"biotech_analysis_report_{timestamp}.docx"
1288
+ ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
1289
+ else: # PDF
1290
+ filename = f"biotech_analysis_report_{timestamp}.pdf"
1291
+ ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
1292
+
1293
+ success_msg = TRANSLATIONS[language]['report_exported']
1294
+ return f"{success_msg} {filename}", filename
1295
+ except Exception as e:
1296
+ return f"Error: {str(e)}", ""
1297
+
1298
+ # Interfaz Gradio con soporte multiidioma y temas
1299
+ def create_interface():
1300
+ # Estado inicial
1301
+ current_theme = "light"
1302
+ current_language = "en"
1303
+
1304
+ def update_interface_language(language):
1305
+ """Actualiza el idioma de la interfaz"""
1306
+ app_state.current_language = language
1307
  t = TRANSLATIONS[language]
1308
+
1309
  return [
1310
+ gr.update(value=f"# {t['title']}"), # title_text
1311
+ gr.update(value=t['subtitle']), # subtitle_text
1312
+ gr.update(label=t['upload_files']), # files_input
1313
+ gr.update(label=t['select_model']), # model_selector
1314
+ gr.update(label=t['select_language']), # language_selector
1315
+ gr.update(label=t['select_theme']), # theme_selector
1316
+ gr.update(label=t['detail_level']), # detail_level
1317
+ gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs
1318
+ gr.update(value=t['analyze_button']), # analyze_btn
1319
+ gr.update(label=t['export_format']), # export_format
1320
+ gr.update(value=t['export_button']), # export_btn
1321
+ gr.update(label=t['comparative_analysis']), # analysis_output
1322
+ gr.update(label=t['implementation_code']), # code_output
1323
+ gr.update(label=t['data_format']) # data_format_accordion
1324
  ]
1325
+
1326
+ def process_and_store(files, model, detail, language, additional_specs):
1327
+ """Procesa archivos y almacena resultados"""
1328
+ if not files:
1329
+ error_msg = TRANSLATIONS[language]['error_no_files']
1330
+ return error_msg, ""
1331
 
1332
+ analysis, code = process_files(files, model, detail, language, additional_specs)
1333
+ app_state.current_analysis = analysis
1334
+ app_state.current_code = code
1335
+ return analysis, code
1336
+
1337
+ with gr.Blocks(theme=THEMES[current_theme]) as demo:
1338
+ # Componentes de UI
1339
  with gr.Row():
1340
+ with gr.Column(scale=3):
1341
+ title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
1342
+ subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
1343
  with gr.Column(scale=1):
1344
+ with gr.Row():
1345
+ language_selector = gr.Dropdown(
1346
+ choices=[("English", "en"), ("Español", "es"), ("Français", "fr"),
1347
+ ("Deutsch", "de"), ("Português", "pt")],
1348
+ value="en",
1349
+ label=TRANSLATIONS[current_language]['select_language'],
1350
+ interactive=True
1351
+ )
1352
+ theme_selector = gr.Dropdown(
1353
+ choices=[("Light", "light"), ("Dark", "dark")],
1354
+ value="light",
1355
+ label=TRANSLATIONS[current_language]['select_theme'],
1356
+ interactive=True
1357
+ )
1358
+
1359
+ with gr.Row():
1360
+ with gr.Column(scale=1):
1361
+ files_input = gr.File(
1362
+ label=TRANSLATIONS[current_language]['upload_files'],
1363
+ file_count="multiple",
1364
+ file_types=[".csv", ".xlsx", ".xls", ".pdf", ".zip"],
1365
+ type="filepath"
1366
+ )
1367
 
1368
+ model_selector = gr.Dropdown(
1369
+ choices=list(CLAUDE_MODELS.keys()),
1370
+ value="claude-3-5-sonnet-20241022",
1371
+ label=TRANSLATIONS[current_language]['select_model'],
1372
+ info=f"{TRANSLATIONS[current_language]['best_for']}: {CLAUDE_MODELS['claude-3-5-sonnet-20241022']['best_for']}"
1373
+ )
1374
 
1375
+ detail_level = gr.Radio(
1376
+ choices=[
1377
+ (TRANSLATIONS[current_language]['detailed'], "detailed"),
1378
+ (TRANSLATIONS[current_language]['summarized'], "summarized")
1379
+ ],
1380
+ value="detailed",
1381
+ label=TRANSLATIONS[current_language]['detail_level']
1382
+ )
1383
 
1384
+ # Nueva entrada para especificaciones adicionales
1385
+ additional_specs = gr.Textbox(
1386
+ label=TRANSLATIONS[current_language]['additional_specs'],
1387
+ placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'],
1388
+ lines=3,
1389
+ max_lines=5,
1390
+ interactive=True
1391
+ )
1392
 
1393
+ analyze_btn = gr.Button(
1394
+ TRANSLATIONS[current_language]['analyze_button'],
1395
+ variant="primary",
1396
+ size="lg"
1397
+ )
1398
 
1399
  gr.Markdown("---")
1400
 
1401
+ export_format = gr.Radio(
1402
+ choices=["DOCX", "PDF"],
1403
+ value="PDF",
1404
+ label=TRANSLATIONS[current_language]['export_format']
1405
+ )
1406
+
1407
+ export_btn = gr.Button(
1408
+ TRANSLATIONS[current_language]['export_button'],
1409
+ variant="secondary"
1410
+ )
1411
+
1412
+ export_status = gr.Textbox(
1413
+ label="Export Status",
1414
+ interactive=False,
1415
+ visible=False
1416
+ )
1417
+
1418
+ export_file = gr.File(
1419
+ label="Download Report",
1420
+ visible=False
1421
+ )
1422
+
1423
  with gr.Column(scale=2):
1424
+ analysis_output = gr.Markdown(
1425
+ label=TRANSLATIONS[current_language]['comparative_analysis']
1426
+ )
1427
+
1428
+ code_output = gr.Code(
1429
+ label=TRANSLATIONS[current_language]['implementation_code'],
1430
+ language="python",
1431
+ interactive=True,
1432
+ lines=20
1433
+ )
1434
+
1435
+ data_format_accordion = gr.Accordion(
1436
+ label=TRANSLATIONS[current_language]['data_format'],
1437
+ open=False
1438
+ )
1439
+
1440
+ with data_format_accordion:
1441
+ gr.Markdown("""
1442
+ ### Expected CSV/Excel structure:
1443
 
1444
+ | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters |
1445
+ |------------|-------|------|-----|------|-----|-----|--------|-------|------------|
1446
+ | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} |
1447
+ | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} |
1448
+ | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} |
1449
+ | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} |
1450
 
1451
+ **Important columns:**
1452
+ - **Experiment**: Experimental condition identifier
1453
+ - **Model**: Model name
1454
+ - **Type**: Variable type (Biomass/Substrate/Product)
1455
+ - **R2, RMSE**: Fit quality metrics
1456
+ - **Parameters**: Model-specific parameters
1457
+ """)
1458
+
1459
+ # Definir ejemplos
1460
+ examples = gr.Examples(
1461
+ examples=[
1462
+ [["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed", ""],
1463
+ [["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized", "Focus on temperature effects"]
1464
+ ],
1465
+ inputs=[files_input, model_selector, detail_level, additional_specs],
1466
+ label=TRANSLATIONS[current_language]['examples']
1467
+ )
1468
+
1469
+ # Eventos - Actualizado para incluir additional_specs
1470
+ language_selector.change(
1471
+ update_interface_language,
1472
+ inputs=[language_selector],
1473
+ outputs=[
1474
+ title_text, subtitle_text, files_input, model_selector,
1475
+ language_selector, theme_selector, detail_level, additional_specs,
1476
+ analyze_btn, export_format, export_btn, analysis_output,
1477
+ code_output, data_format_accordion
1478
+ ]
1479
+ )
1480
+
1481
+ def change_theme(theme_name):
1482
+ """Cambia el tema de la interfaz"""
1483
+ # Nota: En Gradio actual, cambiar el tema dinámicamente requiere recargar
1484
+ # Esta es una limitación conocida
1485
+ return gr.Info("Theme will be applied on next page load")
1486
+
1487
+ theme_selector.change(
1488
+ change_theme,
1489
+ inputs=[theme_selector],
1490
+ outputs=[]
1491
+ )
1492
+
1493
  analyze_btn.click(
1494
+ fn=process_and_store,
1495
+ inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
1496
+ outputs=[analysis_output, code_output]
1497
  )
1498
 
1499
+ def handle_export(format, language):
1500
+ status, file = export_report(format, language)
1501
+ if file:
1502
+ return gr.update(value=status, visible=True), gr.update(value=file, visible=True)
1503
+ else:
1504
+ return gr.update(value=status, visible=True), gr.update(visible=False)
1505
+
1506
  export_btn.click(
1507
+ fn=handle_export,
1508
+ inputs=[export_format, language_selector],
1509
+ outputs=[export_status, export_file]
1510
  )
1511
+
1512
  return demo
1513
 
1514
+ # Función principal
1515
  def main():
1516
+ if not os.getenv("ANTHROPIC_API_KEY"):
1517
+ print("⚠️ Configure ANTHROPIC_API_KEY in HuggingFace Space secrets")
1518
+ return gr.Interface(
1519
+ fn=lambda x: TRANSLATIONS['en']['error_no_api'],
1520
+ inputs=gr.Textbox(),
1521
+ outputs=gr.Textbox(),
1522
+ title="Configuration Error"
1523
+ )
1524
 
1525
  return create_interface()
1526
 
1527
+ # Para ejecución local
1528
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
1529
  demo = main()
1530
  if demo:
1531
+ demo.launch(
1532
+ server_name="0.0.0.0",
1533
+ server_port=7860,
1534
+ share=False
1535
+ )