Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import PyPDF2 | |
import pandas as pd | |
import numpy as np | |
import io | |
import os | |
import json | |
import zipfile | |
import tempfile | |
from typing import Dict, List, Tuple, Union, Optional | |
import re | |
from pathlib import Path | |
import openpyxl | |
from dataclasses import dataclass | |
from enum import Enum | |
from docx import Document | |
from docx.shared import Inches, Pt, RGBColor | |
from docx.enum.text import WD_ALIGN_PARAGRAPH | |
from reportlab.lib import colors | |
from reportlab.lib.pagesizes import letter | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.units import inch | |
import matplotlib.pyplot as plt | |
from datetime import datetime | |
# --- Configuración para la API de Qwen --- | |
# Configuración de Gradio | |
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False' | |
# Inicializar cliente OpenAI para apuntar a la API de Nebius (Qwen) | |
try: | |
if "NEBIUS_API_KEY" not in os.environ: | |
print("⚠️ ADVERTENCIA: La variable de entorno NEBIUS_API_KEY no está configurada. El analizador no funcionará.") | |
client = None | |
else: | |
client = OpenAI( | |
base_url="https://api.studio.nebius.com/v1/", | |
api_key=os.environ.get("NEBIUS_API_KEY") | |
) | |
except Exception as e: | |
print(f"Error al inicializar el cliente OpenAI para Nebius: {e}") | |
client = None | |
# --- Sistema de Traducción Completo --- | |
TRANSLATIONS = { | |
'en': { | |
'title': '🧬 Comparative Analyzer of Biotechnological Models', | |
'subtitle': 'Specialized in comparative analysis of mathematical model fitting results', | |
'upload_files': '📁 Upload fitting results (CSV/Excel)', | |
'select_model': '🤖 Qwen Model', | |
'select_language': '🌐 Language', | |
'select_theme': '🎨 Theme', | |
'detail_level': '📋 Analysis detail level', | |
'detailed': 'Detailed', | |
'summarized': 'Summarized', | |
'analyze_button': '🚀 Analyze and Compare Models', | |
'export_format': '📄 Export format', | |
'export_button': '💾 Export Report', | |
'comparative_analysis': '📊 Comparative Analysis', | |
'implementation_code': '💻 Implementation Code', | |
'data_format': '📋 Expected data format', | |
'examples': '📚 Analysis examples', | |
'light': 'Light', | |
'dark': 'Dark', | |
'best_for': 'Best for', | |
'loading': 'Loading...', | |
'error_no_api': 'Please configure NEBIUS_API_KEY in the environment variables or Space secrets', | |
'error_no_files': 'Please upload fitting result files to analyze', | |
'report_exported': 'Report exported successfully as', | |
'specialized_in': '🎯 Specialized in:', | |
'metrics_analyzed': '📊 Analyzed metrics:', | |
'what_analyzes': '🔍 What it specifically analyzes:', | |
'tips': '💡 Tips for better results:', | |
'additional_specs': '📝 Additional specifications for analysis', | |
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...' | |
}, | |
'es': { | |
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos', | |
'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos', | |
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)', | |
'select_model': '🤖 Modelo Qwen', | |
'select_language': '🌐 Idioma', | |
'select_theme': '🎨 Tema', | |
'detail_level': '📋 Nivel de detalle del análisis', | |
'detailed': 'Detallado', | |
'summarized': 'Resumido', | |
'analyze_button': '🚀 Analizar y Comparar Modelos', | |
'export_format': '📄 Formato de exportación', | |
'export_button': '💾 Exportar Reporte', | |
'comparative_analysis': '📊 Análisis Comparativo', | |
'implementation_code': '💻 Código de Implementación', | |
'data_format': '📋 Formato de datos esperado', | |
'examples': '📚 Ejemplos de análisis', | |
'light': 'Claro', | |
'dark': 'Oscuro', | |
'best_for': 'Mejor para', | |
'loading': 'Cargando...', | |
'error_no_api': 'Por favor configura NEBIUS_API_KEY en las variables de entorno o secretos del Space', | |
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar', | |
'report_exported': 'Reporte exportado exitosamente como', | |
'specialized_in': '🎯 Especializado en:', | |
'metrics_analyzed': '📊 Métricas analizadas:', | |
'what_analyzes': '🔍 Qué analiza específicamente:', | |
'tips': '💡 Tips para mejores resultados:', | |
'additional_specs': '📝 Especificaciones adicionales para el análisis', | |
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...' | |
}, | |
'fr': { | |
'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques', | |
'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement', | |
'upload_files': '📁 Télécharger les résultats (CSV/Excel)', | |
'select_model': '🤖 Modèle Qwen', | |
'select_language': '🌐 Langue', | |
'select_theme': '🎨 Thème', | |
'detail_level': '📋 Niveau de détail', | |
'detailed': 'Détaillé', | |
'summarized': 'Résumé', | |
'analyze_button': '🚀 Analyser et Comparer', | |
'export_format': '📄 Format d\'export', | |
'export_button': '💾 Exporter le Rapport', | |
'comparative_analysis': '📊 Analyse Comparative', | |
'implementation_code': '💻 Code d\'Implémentation', | |
'data_format': '📋 Format de données attendu', | |
'examples': '📚 Exemples d\'analyse', | |
'light': 'Clair', | |
'dark': 'Sombre', | |
'best_for': 'Meilleur pour', | |
'loading': 'Chargement...', | |
'error_no_api': 'Veuillez configurer NEBIUS_API_KEY', | |
'error_no_files': 'Veuillez télécharger des fichiers à analyser', | |
'report_exported': 'Rapport exporté avec succès comme', | |
'specialized_in': '🎯 Spécialisé dans:', | |
'metrics_analyzed': '📊 Métriques analysées:', | |
'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:', | |
'tips': '💡 Conseils pour de meilleurs résultats:', | |
'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse', | |
'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...' | |
}, | |
'de': { | |
'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle', | |
'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen', | |
'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)', | |
'select_model': '🤖 Qwen Modell', | |
'select_language': '🌐 Sprache', | |
'select_theme': '🎨 Thema', | |
'detail_level': '📋 Detailgrad der Analyse', | |
'detailed': 'Detailliert', | |
'summarized': 'Zusammengefasst', | |
'analyze_button': '🚀 Analysieren und Vergleichen', | |
'export_format': '📄 Exportformat', | |
'export_button': '💾 Bericht Exportieren', | |
'comparative_analysis': '📊 Vergleichende Analyse', | |
'implementation_code': '💻 Implementierungscode', | |
'data_format': '📋 Erwartetes Datenformat', | |
'examples': '📚 Analysebeispiele', | |
'light': 'Hell', | |
'dark': 'Dunkel', | |
'best_for': 'Am besten für', | |
'loading': 'Laden...', | |
'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY', | |
'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch', | |
'report_exported': 'Bericht erfolgreich exportiert als', | |
'specialized_in': '🎯 Spezialisiert auf:', | |
'metrics_analyzed': '📊 Analysierte Metriken:', | |
'what_analyzes': '🔍 Was spezifisch analysiert wird:', | |
'tips': '💡 Tipps für bessere Ergebnisse:', | |
'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse', | |
'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...' | |
}, | |
'pt': { | |
'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos', | |
'subtitle': 'Especializado em análise comparativa de resultados de ajuste', | |
'upload_files': '📁 Carregar resultados (CSV/Excel)', | |
'select_model': '🤖 Modelo Qwen', | |
'select_language': '🌐 Idioma', | |
'select_theme': '🎨 Tema', | |
'detail_level': '📋 Nível de detalhe', | |
'detailed': 'Detalhado', | |
'summarized': 'Resumido', | |
'analyze_button': '🚀 Analisar e Comparar', | |
'export_format': '📄 Formato de exportação', | |
'export_button': '💾 Exportar Relatório', | |
'comparative_analysis': '📊 Análise Comparativa', | |
'implementation_code': '💻 Código de Implementação', | |
'data_format': '📋 Formato de dados esperado', | |
'examples': '📚 Exemplos de análise', | |
'light': 'Claro', | |
'dark': 'Escuro', | |
'best_for': 'Melhor para', | |
'loading': 'Carregando...', | |
'error_no_api': 'Por favor configure NEBIUS_API_KEY', | |
'error_no_files': 'Por favor carregue arquivos para analisar', | |
'report_exported': 'Relatório exportado com sucesso como', | |
'specialized_in': '🎯 Especializado em:', | |
'metrics_analyzed': '📊 Métricas analisadas:', | |
'what_analyzes': '🔍 O que analisa especificamente:', | |
'tips': '💡 Dicas para melhores resultados:', | |
'additional_specs': '📝 Especificações adicionais para a análise', | |
'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...' | |
} | |
} | |
# --- Temas de la Interfaz --- | |
THEMES = { | |
'light': gr.themes.Soft(), | |
'dark': gr.themes.Base( | |
primary_hue="blue", | |
secondary_hue="gray", | |
neutral_hue="gray", | |
font=["Arial", "sans-serif"] | |
).set( | |
body_background_fill="dark", | |
body_background_fill_dark="*neutral_950", | |
button_primary_background_fill="*primary_600", | |
button_primary_background_fill_hover="*primary_500", | |
button_primary_text_color="white", | |
block_background_fill="*neutral_800", | |
block_border_color="*neutral_700", | |
block_label_text_color="*neutral_200", | |
block_title_text_color="*neutral_100", | |
checkbox_background_color="*neutral_700", | |
checkbox_background_color_selected="*primary_600", | |
input_background_fill="*neutral_700", | |
input_border_color="*neutral_600", | |
input_placeholder_color="*neutral_400" | |
) | |
} | |
# --- Clases de Datos y Estructuras --- | |
class AnalysisType(Enum): | |
MATHEMATICAL_MODEL = "mathematical_model" | |
DATA_FITTING = "data_fitting" | |
FITTING_RESULTS = "fitting_results" | |
UNKNOWN = "unknown" | |
class MathematicalModel: | |
name: str | |
equation: str | |
parameters: List[str] | |
application: str | |
sources: List[str] | |
category: str | |
biological_meaning: str | |
class ModelRegistry: | |
def __init__(self): | |
self.models = {} | |
self._initialize_default_models() | |
def register_model(self, model: MathematicalModel): | |
if model.category not in self.models: | |
self.models[model.category] = {} | |
self.models[model.category][model.name] = model | |
def get_model(self, category: str, name: str) -> Optional[MathematicalModel]: | |
return self.models.get(category, {}).get(name) | |
def get_all_models(self) -> Dict: | |
return self.models | |
def _initialize_default_models(self): | |
self.register_model(MathematicalModel( | |
name="Monod", | |
equation="μ = μmax × (S / (Ks + S))", | |
parameters=["μmax (h⁻¹)", "Ks (g/L)"], | |
application="Crecimiento limitado por sustrato único", | |
sources=["Cambridge", "MIT", "DTU"], | |
category="crecimiento_biomasa", | |
biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante" | |
)) | |
self.register_model(MathematicalModel( | |
name="Logístico", | |
equation="dX/dt = μmax × X × (1 - X/Xmax)", | |
parameters=["μmax (h⁻¹)", "Xmax (g/L)"], | |
application="Sistemas cerrados batch", | |
sources=["Cranfield", "Swansea", "HAL Theses"], | |
category="crecimiento_biomasa", | |
biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema" | |
)) | |
self.register_model(MathematicalModel( | |
name="Gompertz", | |
equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))", | |
parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"], | |
application="Crecimiento con fase lag pronunciada", | |
sources=["Lund University", "NC State"], | |
category="crecimiento_biomasa", | |
biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario" | |
)) | |
model_registry = ModelRegistry() | |
QWEN_MODELS = { | |
"Qwen/Qwen3-14B": { | |
"name": "Qwen 3 14B", | |
"description": "Modelo potente y versátil de la serie Qwen.", | |
"max_tokens": 4096, | |
"best_for": "Análisis complejos y generación de código detallado." | |
} | |
} | |
# --- Clases de Procesamiento y Exportación --- | |
class FileProcessor: | |
def extract_text_from_pdf(pdf_file: bytes) -> str: | |
try: | |
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file)) | |
text = "" | |
for page in pdf_reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text + "\n" | |
return text | |
except Exception as e: | |
return f"Error reading PDF: {str(e)}" | |
def read_csv(csv_file: bytes) -> Optional[pd.DataFrame]: | |
try: | |
return pd.read_csv(io.BytesIO(csv_file)) | |
except Exception: | |
return None | |
def read_excel(excel_file: bytes) -> Optional[pd.DataFrame]: | |
try: | |
return pd.read_excel(io.BytesIO(excel_file)) | |
except Exception: | |
return None | |
def extract_from_zip(zip_file: bytes) -> List[Tuple[str, bytes]]: | |
files = [] | |
try: | |
with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref: | |
for file_name in zip_ref.namelist(): | |
if not file_name.startswith('__MACOSX') and not file_name.endswith('/'): | |
file_data = zip_ref.read(file_name) | |
files.append((file_name, file_data)) | |
except Exception as e: | |
print(f"Error processing ZIP: {e}") | |
return files | |
class ReportExporter: | |
def export_to_docx(content: str, filename: str, language: str = 'en') -> str: | |
doc = Document() | |
title_style = doc.styles['Title'] | |
title_style.font.size = Pt(24) | |
title_style.font.bold = True | |
heading1_style = doc.styles['Heading 1'] | |
heading1_style.font.size = Pt(18) | |
heading1_style.font.bold = True | |
title_text = TRANSLATIONS[language]['title'] | |
doc.add_heading(title_text, 0) | |
date_text = {'en': 'Generated on', 'es': 'Generado el'} | |
doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
doc.add_paragraph() | |
lines = content.split('\n') | |
for line in lines: | |
line = line.strip() | |
if line.startswith('###'): | |
doc.add_heading(line.replace('###', '').strip(), level=3) | |
elif line.startswith('##'): | |
doc.add_heading(line.replace('##', '').strip(), level=2) | |
elif line.startswith('#'): | |
doc.add_heading(line.replace('#', '').strip(), level=1) | |
elif line.startswith('**') and line.endswith('**'): | |
p = doc.add_paragraph() | |
p.add_run(line.replace('**', '')).bold = True | |
elif line.startswith('- ') or line.startswith('* '): | |
doc.add_paragraph(line[2:], style='List Bullet') | |
elif line: | |
doc.add_paragraph(line) | |
doc.save(filename) | |
return filename | |
def export_to_pdf(content: str, filename: str, language: str = 'en') -> str: | |
doc = SimpleDocTemplate(filename, pagesize=letter) | |
story = [] | |
styles = getSampleStyleSheet() | |
title_style = ParagraphStyle('CustomTitle', parent=styles['Title'], fontSize=24, textColor=colors.HexColor('#1f4788'), spaceAfter=20) | |
heading_style = ParagraphStyle('CustomHeading1', parent=styles['Heading1'], fontSize=16, textColor=colors.HexColor('#2e5090'), spaceAfter=12) | |
title_text = TRANSLATIONS[language]['title'] | |
story.append(Paragraph(title_text, title_style)) | |
date_text = {'en': 'Generated on', 'es': 'Generado el'} | |
story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal'])) | |
story.append(Spacer(1, 0.3 * inch)) | |
lines = content.split('\n') | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
elif line.startswith('###'): | |
story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3'])) | |
elif line.startswith('##'): | |
story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2'])) | |
elif line.startswith('#'): | |
story.append(Paragraph(line.replace('#', '').strip(), heading_style)) | |
elif line.startswith('**') and line.endswith('**'): | |
story.append(Paragraph(f"<b>{line.replace('**', '')}</b>", styles['Normal'])) | |
elif line.startswith('- ') or line.startswith('* '): | |
story.append(Paragraph(f"• {line[2:]}", styles['Normal'], bulletText='•')) | |
else: | |
# Limpiar caracteres que pueden dar problemas en ReportLab | |
clean_line = line.replace('🧬', '[DNA]').replace('🤖', '[BOT]').replace('📁', '[FILE]').replace('🚀', '[ROCKET]').replace('📊', '[GRAPH]').replace('💻', '[CODE]').replace('💾', '[SAVE]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]') | |
story.append(Paragraph(clean_line, styles['Normal'])) | |
story.append(Spacer(1, 0.1 * inch)) | |
doc.build(story) | |
return filename | |
# --- Clase del Analizador de IA con Qwen --- | |
class AIAnalyzer: | |
def __init__(self, client, model_registry): | |
self.client = client | |
self.model_registry = model_registry | |
def get_language_prompt_prefix(self, language: str) -> str: | |
return TRANSLATIONS.get(language, TRANSLATIONS['en']).get('response_prefix', f"Please respond in {language}. ") | |
def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str, language: str, additional_specs: str) -> Dict: | |
if self.client is None: | |
return {"error": "AI client not initialized. Check NEBIUS_API_KEY."} | |
data_summary = f"FITTING RESULTS DATA:\n- Columns: {list(data.columns)}\n- Number of models/entries: {len(data)}\n- Full Data:\n{data.to_string()}" | |
lang_prefix = self.get_language_prompt_prefix(language) | |
user_specs_section = f"\nADDITIONAL USER SPECIFICATIONS: {additional_specs}\n" if additional_specs else "" | |
if detail_level == "detailed": | |
prompt = f"""{lang_prefix}{user_specs_section} | |
You are an expert biotechnologist and data scientist. Analyze the provided model fitting results in detail. | |
1. **Overall Summary:** Briefly describe the dataset (number of experiments, models, metrics). | |
2. **Analysis by Experiment/Condition:** For each unique experiment/condition found in the data: | |
- Identify the best performing model for each category (e.g., Biomass, Substrate, Product) based on R2 (higher is better) and RMSE (lower is better). | |
- List the best model's name, its key metrics (R2, RMSE), and its fitted parameters. | |
- Provide a brief biological interpretation of the parameters for the best model in that context. | |
3. **Overall Best Models:** Identify the most robust models across all experiments for each category. Justify your choice (e.g., "Model X was best in 3 out of 4 experiments"). | |
4. **Parameter Trends:** If possible, comment on how key parameters (like μmax, Ks) change across different experimental conditions. | |
5. **Conclusion and Recommendations:** Summarize the findings and recommend which models to use for future predictions under specific conditions. | |
Format the entire response using clear Markdown headers, lists, and bold text.""" | |
else: # summarized | |
prompt = f"""{lang_prefix}{user_specs_section} | |
You are an expert biotechnologist. Provide a concise, summarized analysis of the provided model fitting results. | |
1. **Best Models Summary Table:** Create a table that shows the best model for each Experiment and Type (Biomass, Substrate, etc.) along with its R2 value. | |
2. **Overall Winners:** State the single best model for Biomass, Substrate, and Product across all experiments. | |
3. **Key Insights:** In 2-3 bullet points, what are the most important findings? (e.g., "Gompertz model consistently outperforms others for biomass.", "μmax is highest at pH 7.5."). | |
Format as a brief and clear Markdown report.""" | |
try: | |
# Análisis principal | |
analysis_response = self.client.chat.completions.create( | |
model=qwen_model, | |
messages=[{"role": "user", "content": f"{prompt}\n\n{data_summary}"}], | |
max_tokens=4000, | |
temperature=0.6, | |
top_p=0.95 | |
) | |
analysis_text = analysis_response.choices[0].message.content | |
# Generación de código | |
code_prompt = f"""{lang_prefix} | |
Based on the provided data, generate a single, complete, and executable Python script for analysis. | |
The script must: | |
1. Contain the provided data hardcoded into a pandas DataFrame. | |
2. Define functions to analyze the data to find the best model per experiment and type. | |
3. Include functions to create visualizations using matplotlib or seaborn, such as: | |
- A bar chart comparing the R2 values of the best models across experiments. | |
- A summary table of the best models. | |
4. Have a `if __name__ == "__main__":` block that runs the analysis and shows the plots. | |
The code should be well-commented and self-contained.""" | |
code_response = self.client.chat.completions.create( | |
model=qwen_model, | |
messages=[{"role": "user", "content": f"{code_prompt}\n\n{data_summary}"}], | |
max_tokens=3500, | |
temperature=0.4, # Más determinista para código | |
top_p=0.95 | |
) | |
code_text = code_response.choices[0].message.content | |
return { | |
"analisis_completo": analysis_text, | |
"codigo_implementacion": code_text, | |
} | |
except Exception as e: | |
return {"error": f"An error occurred with the AI API: {str(e)}"} | |
# --- Lógica de la Aplicación --- | |
def process_files(files, model_name: str, detail_level: str, language: str, additional_specs: str) -> Tuple[str, str]: | |
if not files: | |
return TRANSLATIONS[language]['error_no_files'], "Please upload files first." | |
if client is None: | |
return TRANSLATIONS[language]['error_no_api'], "AI client is not configured." | |
analyzer = AIAnalyzer(client, model_registry) | |
all_analysis_parts = [] | |
all_code_parts = [] | |
for file in files: | |
try: | |
with open(file.name, 'rb') as f: | |
file_content = f.read() | |
file_name = os.path.basename(file.name) | |
file_ext = Path(file_name).suffix.lower() | |
df = None | |
if file_ext == '.csv': | |
df = FileProcessor.read_csv(file_content) | |
elif file_ext in ['.xlsx', '.xls']: | |
df = FileProcessor.read_excel(file_content) | |
if df is not None: | |
all_analysis_parts.append(f"# Analysis for: {file_name}") | |
result = analyzer.analyze_fitting_results(df, model_name, detail_level, language, additional_specs) | |
if "error" in result: | |
all_analysis_parts.append(f"An error occurred: {result['error']}") | |
else: | |
all_analysis_parts.append(result.get("analisis_completo", "No analysis generated.")) | |
all_code_parts.append(f"# Code generated from: {file_name}\n{result.get('codigo_implementacion', '# No code generated.')}") | |
else: | |
all_analysis_parts.append(f"# Could not process file: {file_name}") | |
except Exception as e: | |
all_analysis_parts.append(f"# Error processing {file.name}: {str(e)}") | |
final_analysis = "\n\n---\n\n".join(all_analysis_parts) | |
final_code = "\n\n" + "="*80 + "\n\n".join(all_code_parts) if all_code_parts else generate_implementation_code(final_analysis) | |
return final_analysis, final_code | |
def generate_implementation_code(analysis_results: str) -> str: | |
# This is a fallback in case the API fails to generate code. | |
# It provides a generic template. | |
return """ | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
def analyze_data(df): | |
\"\"\" | |
Analyzes the dataframe to find the best model per experiment and type. | |
This is a template function. You may need to adapt it to your specific column names. | |
\"\"\" | |
# Assuming columns 'Experiment', 'Type', 'R2', 'Model' exist | |
if not all(col in df.columns for col in ['Experiment', 'Type', 'R2', 'Model']): | |
print("DataFrame is missing required columns: 'Experiment', 'Type', 'R2', 'Model'") | |
return None | |
# Find the index of the max R2 for each group | |
best_models_idx = df.loc[df.groupby(['Experiment', 'Type'])['R2'].idxmax()] | |
print("--- Best Models by Experiment and Type ---") | |
print(best_models_idx[['Experiment', 'Type', 'Model', 'R2']].to_string(index=False)) | |
return best_models_idx | |
def visualize_results(best_models_df): | |
\"\"\" | |
Creates a bar plot to visualize the R2 scores of the best models. | |
\"\"\" | |
if best_models_df is None: | |
print("Cannot visualize results. Analysis failed.") | |
return | |
plt.figure(figsize=(12, 7)) | |
sns.barplot(data=best_models_df, x='Experiment', y='R2', hue='Type', palette='viridis') | |
plt.title('Best Model Performance (R²) by Experiment and Type', fontsize=16) | |
plt.xlabel('Experiment', fontsize=12) | |
plt.ylabel('R² Score', fontsize=12) | |
plt.xticks(rotation=45, ha='right') | |
plt.ylim(bottom=max(0, best_models_df['R2'].min() - 0.05), top=1.0) | |
plt.legend(title='Variable Type') | |
plt.tight_layout() | |
plt.show() | |
if __name__ == '__main__': | |
# TODO: Replace this with your actual data | |
# This is placeholder data. | |
data = { | |
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.0', 'pH_7.5'], | |
'Model': ['Monod', 'Logistic', 'Monod', 'Logistic', 'FirstOrder', 'FirstOrder'], | |
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Substrate', 'Substrate'], | |
'R2': [0.98, 0.99, 0.97, 0.96, 0.95, 0.99], | |
'RMSE': [0.1, 0.05, 0.12, 0.15, 0.2, 0.08] | |
} | |
df = pd.DataFrame(data) | |
print("--- Input Data ---") | |
print(df) | |
print("\\n") | |
best_models = analyze_data(df) | |
visualize_results(best_models) | |
""" | |
class AppState: | |
def __init__(self): | |
self.current_analysis = "" | |
self.current_code = "" | |
self.current_language = "en" | |
app_state = AppState() | |
def export_report(export_format: str, language: str) -> Tuple[str, Optional[str]]: | |
if not app_state.current_analysis: | |
error_msg = TRANSLATIONS[language]['error_no_files'].replace('analizar', 'exportar') | |
return error_msg, None | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
try: | |
if export_format == "DOCX": | |
filename = f"biotech_analysis_report_{timestamp}.docx" | |
ReportExporter.export_to_docx(app_state.current_analysis, filename, language) | |
else: # PDF | |
filename = f"biotech_analysis_report_{timestamp}.pdf" | |
ReportExporter.export_to_pdf(app_state.current_analysis, filename, language) | |
success_msg = TRANSLATIONS[language]['report_exported'] | |
return f"{success_msg} {filename}", filename | |
except Exception as e: | |
return f"Error during export: {str(e)}", None | |
# --- Interfaz de Gradio --- | |
def create_interface(): | |
current_language = "en" | |
def update_interface_language(language: str): | |
app_state.current_language = language | |
t = TRANSLATIONS[language] | |
return [ | |
gr.update(value=f"# {t['title']}"), | |
gr.update(value=t['subtitle']), | |
gr.update(label=t['upload_files']), | |
gr.update(label=t['select_model']), | |
gr.update(label=t['select_language']), | |
gr.update(label=t['select_theme']), | |
gr.update(label=t['detail_level'], choices=[(t['detailed'], "detailed"), (t['summarized'], "summarized")]), | |
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), | |
gr.update(value=t['analyze_button']), | |
gr.update(label=t['export_format']), | |
gr.update(value=t['export_button']), | |
gr.update(label=t['comparative_analysis']), | |
gr.update(label=t['implementation_code']), | |
gr.update(label=t['data_format']), | |
gr.update(label=t['examples']) | |
] | |
def process_and_store(files, model, detail, language, additional_specs): | |
if not files: | |
return TRANSLATIONS[language]['error_no_files'], "" | |
analysis, code = process_files(files, model, detail, language, additional_specs) | |
app_state.current_analysis = analysis | |
app_state.current_code = code | |
return analysis, code | |
with gr.Blocks(theme=THEMES['light']) as demo: | |
# Estructura de la UI | |
with gr.Column(): | |
with gr.Row(): | |
with gr.Column(scale=3): | |
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}") | |
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle']) | |
with gr.Column(scale=1): | |
language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es"), ("Français", "fr"), ("Deutsch", "de"), ("Português", "pt")], value=current_language, label="Language", interactive=True) | |
theme_selector = gr.Dropdown(choices=["Light", "Dark"], value="Light", label="Theme", interactive=True, visible=False) # Theme switching is complex, hiding for now | |
with gr.Row(): | |
with gr.Column(scale=1): | |
files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath") | |
model_selector = gr.Dropdown(choices=list(QWEN_MODELS.keys()), value="Qwen/Qwen3-14B", label=TRANSLATIONS[current_language]['select_model'], info=QWEN_MODELS["Qwen/Qwen3-14B"]['best_for']) | |
detail_level = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level']) | |
additional_specs = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3, max_lines=5, interactive=True) | |
analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary", size="lg") | |
gr.Markdown("---") | |
export_format = gr.Radio(choices=["PDF", "DOCX"], value="PDF", label=TRANSLATIONS[current_language]['export_format']) | |
export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'], variant="secondary") | |
export_status = gr.Textbox(label="Export Status", interactive=False, visible=False) | |
export_file = gr.File(label="Download Report", visible=False) | |
with gr.Column(scale=2): | |
analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis']) | |
code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True, lines=20) | |
with gr.Accordion(label=TRANSLATIONS[current_language]['data_format'], open=False) as data_format_accordion: | |
gr.Markdown(""" | |
### Expected CSV/Excel Structure: | |
The file should contain columns representing your model fitting results. Essential columns are: | |
- **Experiment**: An identifier for the experimental condition (e.g., `pH_7.0`, `Temp_30C`). | |
- **Model**: The name of the mathematical model (e.g., `Monod`, `Logistic`). | |
- **Type**: The type of process being modeled (e.g., `Biomass`, `Substrate`, `Product`). This is crucial for categorical analysis. | |
- **R2 / R_squared**: The coefficient of determination. Higher is better. | |
- **RMSE**: Root Mean Squared Error. Lower is better. | |
- **[Parameter_Columns]**: Additional columns for each model parameter (e.g., `mu_max`, `Ks`, `Xmax`). | |
| Experiment | Model | Type | R2 | RMSE | mu_max | | |
|:-----------|:---------|:----------|------:|-------:|--------:| | |
| pH_7.0 | Monod | Biomass | 0.985 | 0.023 | 0.45 | | |
| pH_7.0 | Logistic | Biomass | 0.991 | 0.018 | 0.48 | | |
| pH_7.5 | Monod | Biomass | 0.978 | 0.027 | 0.43 | | |
""") | |
examples_ui = gr.Examples( | |
examples=[ | |
[["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""], | |
[["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects and which temp is optimal."] | |
], | |
inputs=[files_input, model_selector, detail_level, additional_specs], | |
label=TRANSLATIONS[current_language]['examples'] | |
) | |
# Event Handlers | |
language_selector.change( | |
update_interface_language, | |
inputs=[language_selector], | |
outputs=[title_text, subtitle_text, files_input, model_selector, language_selector, theme_selector, detail_level, additional_specs, analyze_btn, export_format, export_btn, analysis_output, code_output, data_format_accordion, examples_ui] | |
) | |
analyze_btn.click( | |
fn=process_and_store, | |
inputs=[files_input, model_selector, detail_level, language_selector, additional_specs], | |
outputs=[analysis_output, code_output], | |
api_name="analyze" | |
) | |
def handle_export(fmt, lang): | |
status, file_path = export_report(fmt, lang) | |
if file_path: | |
return gr.update(value=status, visible=True), gr.update(value=file_path, visible=True) | |
else: | |
return gr.update(value=status, visible=True), gr.update(visible=False) | |
export_btn.click( | |
fn=handle_export, | |
inputs=[export_format, language_selector], | |
outputs=[export_status, export_file] | |
) | |
return demo | |
# --- Punto de Entrada Principal --- | |
def main(): | |
if client is None: | |
return gr.Interface( | |
fn=lambda: TRANSLATIONS['en']['error_no_api'], | |
inputs=[], | |
outputs=gr.Textbox(label="Error"), | |
title="Configuration Error" | |
) | |
return create_interface() | |
if __name__ == "__main__": | |
# Crear archivos y carpetas de ejemplo si no existen para que la UI no falle | |
if not os.path.exists("examples"): | |
os.makedirs("examples") | |
if not os.path.exists("examples/biomass_models_comparison.csv"): | |
pd.DataFrame({ | |
'Experiment': ['Exp1_pH7', 'Exp1_pH7', 'Exp2_pH8', 'Exp2_pH8'], | |
'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'], | |
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'], | |
'R2': [0.98, 0.99, 0.97, 0.96], | |
'RMSE': [0.1, 0.05, 0.12, 0.15], | |
'mu_max': [0.5, 0.52, 0.45, 0.46] | |
}).to_csv("examples/biomass_models_comparison.csv", index=False) | |
if not os.path.exists("examples/substrate_kinetics_results.xlsx"): | |
pd.DataFrame({ | |
'Experiment': ['T30C', 'T30C', 'T37C', 'T37C'], | |
'Model': ['FirstOrder', 'MichaelisMenten', 'FirstOrder', 'MichaelisMenten'], | |
'Type': ['Substrate', 'Substrate', 'Substrate', 'Substrate'], | |
'R2': [0.95, 0.94, 0.99, 0.98], | |
'RMSE': [0.2, 0.25, 0.08, 0.1], | |
'Ks': [None, 1.5, None, 1.2] | |
}).to_excel("examples/substrate_kinetics_results.xlsx", index=False) | |
demo = main() | |
if demo: | |
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) |