Project-HF-2025 / app.py
C2MV's picture
Update app.py
a7929b6 verified
raw
history blame
36.9 kB
import gradio as gr
from openai import OpenAI
import PyPDF2
import pandas as pd
import numpy as np
import io
import os
import json
import zipfile
import tempfile
from typing import Dict, List, Tuple, Union, Optional
import re
from pathlib import Path
import openpyxl
from dataclasses import dataclass
from enum import Enum
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
import matplotlib.pyplot as plt
from datetime import datetime
# --- Configuración para la API de Qwen ---
# Configuración de Gradio
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
# Inicializar cliente OpenAI para apuntar a la API de Nebius (Qwen)
try:
if "NEBIUS_API_KEY" not in os.environ:
print("⚠️ ADVERTENCIA: La variable de entorno NEBIUS_API_KEY no está configurada. El analizador no funcionará.")
client = None
else:
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=os.environ.get("NEBIUS_API_KEY")
)
except Exception as e:
print(f"Error al inicializar el cliente OpenAI para Nebius: {e}")
client = None
# --- Sistema de Traducción Completo ---
TRANSLATIONS = {
'en': {
'title': '🧬 Comparative Analyzer of Biotechnological Models',
'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
'upload_files': '📁 Upload fitting results (CSV/Excel)',
'select_model': '🤖 Qwen Model',
'select_language': '🌐 Language',
'select_theme': '🎨 Theme',
'detail_level': '📋 Analysis detail level',
'detailed': 'Detailed',
'summarized': 'Summarized',
'analyze_button': '🚀 Analyze and Compare Models',
'export_format': '📄 Export format',
'export_button': '💾 Export Report',
'comparative_analysis': '📊 Comparative Analysis',
'implementation_code': '💻 Implementation Code',
'data_format': '📋 Expected data format',
'examples': '📚 Analysis examples',
'light': 'Light',
'dark': 'Dark',
'best_for': 'Best for',
'loading': 'Loading...',
'error_no_api': 'Please configure NEBIUS_API_KEY in the environment variables or Space secrets',
'error_no_files': 'Please upload fitting result files to analyze',
'report_exported': 'Report exported successfully as',
'specialized_in': '🎯 Specialized in:',
'metrics_analyzed': '📊 Analyzed metrics:',
'what_analyzes': '🔍 What it specifically analyzes:',
'tips': '💡 Tips for better results:',
'additional_specs': '📝 Additional specifications for analysis',
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
},
'es': {
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
'select_model': '🤖 Modelo Qwen',
'select_language': '🌐 Idioma',
'select_theme': '🎨 Tema',
'detail_level': '📋 Nivel de detalle del análisis',
'detailed': 'Detallado',
'summarized': 'Resumido',
'analyze_button': '🚀 Analizar y Comparar Modelos',
'export_format': '📄 Formato de exportación',
'export_button': '💾 Exportar Reporte',
'comparative_analysis': '📊 Análisis Comparativo',
'implementation_code': '💻 Código de Implementación',
'data_format': '📋 Formato de datos esperado',
'examples': '📚 Ejemplos de análisis',
'light': 'Claro',
'dark': 'Oscuro',
'best_for': 'Mejor para',
'loading': 'Cargando...',
'error_no_api': 'Por favor configura NEBIUS_API_KEY en las variables de entorno o secretos del Space',
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
'report_exported': 'Reporte exportado exitosamente como',
'specialized_in': '🎯 Especializado en:',
'metrics_analyzed': '📊 Métricas analizadas:',
'what_analyzes': '🔍 Qué analiza específicamente:',
'tips': '💡 Tips para mejores resultados:',
'additional_specs': '📝 Especificaciones adicionales para el análisis',
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
},
'fr': {
'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques',
'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement',
'upload_files': '📁 Télécharger les résultats (CSV/Excel)',
'select_model': '🤖 Modèle Qwen',
'select_language': '🌐 Langue',
'select_theme': '🎨 Thème',
'detail_level': '📋 Niveau de détail',
'detailed': 'Détaillé',
'summarized': 'Résumé',
'analyze_button': '🚀 Analyser et Comparer',
'export_format': '📄 Format d\'export',
'export_button': '💾 Exporter le Rapport',
'comparative_analysis': '📊 Analyse Comparative',
'implementation_code': '💻 Code d\'Implémentation',
'data_format': '📋 Format de données attendu',
'examples': '📚 Exemples d\'analyse',
'light': 'Clair',
'dark': 'Sombre',
'best_for': 'Meilleur pour',
'loading': 'Chargement...',
'error_no_api': 'Veuillez configurer NEBIUS_API_KEY',
'error_no_files': 'Veuillez télécharger des fichiers à analyser',
'report_exported': 'Rapport exporté avec succès comme',
'specialized_in': '🎯 Spécialisé dans:',
'metrics_analyzed': '📊 Métriques analysées:',
'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:',
'tips': '💡 Conseils pour de meilleurs résultats:',
'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse',
'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...'
},
'de': {
'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle',
'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen',
'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)',
'select_model': '🤖 Qwen Modell',
'select_language': '🌐 Sprache',
'select_theme': '🎨 Thema',
'detail_level': '📋 Detailgrad der Analyse',
'detailed': 'Detailliert',
'summarized': 'Zusammengefasst',
'analyze_button': '🚀 Analysieren und Vergleichen',
'export_format': '📄 Exportformat',
'export_button': '💾 Bericht Exportieren',
'comparative_analysis': '📊 Vergleichende Analyse',
'implementation_code': '💻 Implementierungscode',
'data_format': '📋 Erwartetes Datenformat',
'examples': '📚 Analysebeispiele',
'light': 'Hell',
'dark': 'Dunkel',
'best_for': 'Am besten für',
'loading': 'Laden...',
'error_no_api': 'Bitte konfigurieren Sie NEBIUS_API_KEY',
'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch',
'report_exported': 'Bericht erfolgreich exportiert als',
'specialized_in': '🎯 Spezialisiert auf:',
'metrics_analyzed': '📊 Analysierte Metriken:',
'what_analyzes': '🔍 Was spezifisch analysiert wird:',
'tips': '💡 Tipps für bessere Ergebnisse:',
'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse',
'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...'
},
'pt': {
'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos',
'subtitle': 'Especializado em análise comparativa de resultados de ajuste',
'upload_files': '📁 Carregar resultados (CSV/Excel)',
'select_model': '🤖 Modelo Qwen',
'select_language': '🌐 Idioma',
'select_theme': '🎨 Tema',
'detail_level': '📋 Nível de detalhe',
'detailed': 'Detalhado',
'summarized': 'Resumido',
'analyze_button': '🚀 Analisar e Comparar',
'export_format': '📄 Formato de exportação',
'export_button': '💾 Exportar Relatório',
'comparative_analysis': '📊 Análise Comparativa',
'implementation_code': '💻 Código de Implementação',
'data_format': '📋 Formato de dados esperado',
'examples': '📚 Exemplos de análise',
'light': 'Claro',
'dark': 'Escuro',
'best_for': 'Melhor para',
'loading': 'Carregando...',
'error_no_api': 'Por favor configure NEBIUS_API_KEY',
'error_no_files': 'Por favor carregue arquivos para analisar',
'report_exported': 'Relatório exportado com sucesso como',
'specialized_in': '🎯 Especializado em:',
'metrics_analyzed': '📊 Métricas analisadas:',
'what_analyzes': '🔍 O que analisa especificamente:',
'tips': '💡 Dicas para melhores resultados:',
'additional_specs': '📝 Especificações adicionais para a análise',
'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...'
}
}
# --- Temas de la Interfaz ---
THEMES = {
'light': gr.themes.Soft(),
'dark': gr.themes.Base(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="gray",
font=["Arial", "sans-serif"]
).set(
body_background_fill="dark",
body_background_fill_dark="*neutral_950",
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_500",
button_primary_text_color="white",
block_background_fill="*neutral_800",
block_border_color="*neutral_700",
block_label_text_color="*neutral_200",
block_title_text_color="*neutral_100",
checkbox_background_color="*neutral_700",
checkbox_background_color_selected="*primary_600",
input_background_fill="*neutral_700",
input_border_color="*neutral_600",
input_placeholder_color="*neutral_400"
)
}
# --- Clases de Datos y Estructuras ---
class AnalysisType(Enum):
MATHEMATICAL_MODEL = "mathematical_model"
DATA_FITTING = "data_fitting"
FITTING_RESULTS = "fitting_results"
UNKNOWN = "unknown"
@dataclass
class MathematicalModel:
name: str
equation: str
parameters: List[str]
application: str
sources: List[str]
category: str
biological_meaning: str
class ModelRegistry:
def __init__(self):
self.models = {}
self._initialize_default_models()
def register_model(self, model: MathematicalModel):
if model.category not in self.models:
self.models[model.category] = {}
self.models[model.category][model.name] = model
def get_model(self, category: str, name: str) -> Optional[MathematicalModel]:
return self.models.get(category, {}).get(name)
def get_all_models(self) -> Dict:
return self.models
def _initialize_default_models(self):
self.register_model(MathematicalModel(
name="Monod",
equation="μ = μmax × (S / (Ks + S))",
parameters=["μmax (h⁻¹)", "Ks (g/L)"],
application="Crecimiento limitado por sustrato único",
sources=["Cambridge", "MIT", "DTU"],
category="crecimiento_biomasa",
biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"
))
self.register_model(MathematicalModel(
name="Logístico",
equation="dX/dt = μmax × X × (1 - X/Xmax)",
parameters=["μmax (h⁻¹)", "Xmax (g/L)"],
application="Sistemas cerrados batch",
sources=["Cranfield", "Swansea", "HAL Theses"],
category="crecimiento_biomasa",
biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"
))
self.register_model(MathematicalModel(
name="Gompertz",
equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))",
parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"],
application="Crecimiento con fase lag pronunciada",
sources=["Lund University", "NC State"],
category="crecimiento_biomasa",
biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"
))
model_registry = ModelRegistry()
QWEN_MODELS = {
"Qwen/Qwen3-14B": {
"name": "Qwen 3 14B",
"description": "Modelo potente y versátil de la serie Qwen.",
"max_tokens": 4096,
"best_for": "Análisis complejos y generación de código detallado."
}
}
# --- Clases de Procesamiento y Exportación ---
class FileProcessor:
@staticmethod
def extract_text_from_pdf(pdf_file: bytes) -> str:
try:
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
text = ""
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
except Exception as e:
return f"Error reading PDF: {str(e)}"
@staticmethod
def read_csv(csv_file: bytes) -> Optional[pd.DataFrame]:
try:
return pd.read_csv(io.BytesIO(csv_file))
except Exception:
return None
@staticmethod
def read_excel(excel_file: bytes) -> Optional[pd.DataFrame]:
try:
return pd.read_excel(io.BytesIO(excel_file))
except Exception:
return None
@staticmethod
def extract_from_zip(zip_file: bytes) -> List[Tuple[str, bytes]]:
files = []
try:
with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
for file_name in zip_ref.namelist():
if not file_name.startswith('__MACOSX') and not file_name.endswith('/'):
file_data = zip_ref.read(file_name)
files.append((file_name, file_data))
except Exception as e:
print(f"Error processing ZIP: {e}")
return files
class ReportExporter:
@staticmethod
def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
doc = Document()
title_style = doc.styles['Title']
title_style.font.size = Pt(24)
title_style.font.bold = True
heading1_style = doc.styles['Heading 1']
heading1_style.font.size = Pt(18)
heading1_style.font.bold = True
title_text = TRANSLATIONS[language]['title']
doc.add_heading(title_text, 0)
date_text = {'en': 'Generated on', 'es': 'Generado el'}
doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
doc.add_paragraph()
lines = content.split('\n')
for line in lines:
line = line.strip()
if line.startswith('###'):
doc.add_heading(line.replace('###', '').strip(), level=3)
elif line.startswith('##'):
doc.add_heading(line.replace('##', '').strip(), level=2)
elif line.startswith('#'):
doc.add_heading(line.replace('#', '').strip(), level=1)
elif line.startswith('**') and line.endswith('**'):
p = doc.add_paragraph()
p.add_run(line.replace('**', '')).bold = True
elif line.startswith('- ') or line.startswith('* '):
doc.add_paragraph(line[2:], style='List Bullet')
elif line:
doc.add_paragraph(line)
doc.save(filename)
return filename
@staticmethod
def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
doc = SimpleDocTemplate(filename, pagesize=letter)
story = []
styles = getSampleStyleSheet()
title_style = ParagraphStyle('CustomTitle', parent=styles['Title'], fontSize=24, textColor=colors.HexColor('#1f4788'), spaceAfter=20)
heading_style = ParagraphStyle('CustomHeading1', parent=styles['Heading1'], fontSize=16, textColor=colors.HexColor('#2e5090'), spaceAfter=12)
title_text = TRANSLATIONS[language]['title']
story.append(Paragraph(title_text, title_style))
date_text = {'en': 'Generated on', 'es': 'Generado el'}
story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
story.append(Spacer(1, 0.3 * inch))
lines = content.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
elif line.startswith('###'):
story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3']))
elif line.startswith('##'):
story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
elif line.startswith('#'):
story.append(Paragraph(line.replace('#', '').strip(), heading_style))
elif line.startswith('**') and line.endswith('**'):
story.append(Paragraph(f"<b>{line.replace('**', '')}</b>", styles['Normal']))
elif line.startswith('- ') or line.startswith('* '):
story.append(Paragraph(f"• {line[2:]}", styles['Normal'], bulletText='•'))
else:
# Limpiar caracteres que pueden dar problemas en ReportLab
clean_line = line.replace('🧬', '[DNA]').replace('🤖', '[BOT]').replace('📁', '[FILE]').replace('🚀', '[ROCKET]').replace('📊', '[GRAPH]').replace('💻', '[CODE]').replace('💾', '[SAVE]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]')
story.append(Paragraph(clean_line, styles['Normal']))
story.append(Spacer(1, 0.1 * inch))
doc.build(story)
return filename
# --- Clase del Analizador de IA con Qwen ---
class AIAnalyzer:
def __init__(self, client, model_registry):
self.client = client
self.model_registry = model_registry
def get_language_prompt_prefix(self, language: str) -> str:
return TRANSLATIONS.get(language, TRANSLATIONS['en']).get('response_prefix', f"Please respond in {language}. ")
def analyze_fitting_results(self, data: pd.DataFrame, qwen_model: str, detail_level: str, language: str, additional_specs: str) -> Dict:
if self.client is None:
return {"error": "AI client not initialized. Check NEBIUS_API_KEY."}
data_summary = f"FITTING RESULTS DATA:\n- Columns: {list(data.columns)}\n- Number of models/entries: {len(data)}\n- Full Data:\n{data.to_string()}"
lang_prefix = self.get_language_prompt_prefix(language)
user_specs_section = f"\nADDITIONAL USER SPECIFICATIONS: {additional_specs}\n" if additional_specs else ""
if detail_level == "detailed":
prompt = f"""{lang_prefix}{user_specs_section}
You are an expert biotechnologist and data scientist. Analyze the provided model fitting results in detail.
1. **Overall Summary:** Briefly describe the dataset (number of experiments, models, metrics).
2. **Analysis by Experiment/Condition:** For each unique experiment/condition found in the data:
- Identify the best performing model for each category (e.g., Biomass, Substrate, Product) based on R2 (higher is better) and RMSE (lower is better).
- List the best model's name, its key metrics (R2, RMSE), and its fitted parameters.
- Provide a brief biological interpretation of the parameters for the best model in that context.
3. **Overall Best Models:** Identify the most robust models across all experiments for each category. Justify your choice (e.g., "Model X was best in 3 out of 4 experiments").
4. **Parameter Trends:** If possible, comment on how key parameters (like μmax, Ks) change across different experimental conditions.
5. **Conclusion and Recommendations:** Summarize the findings and recommend which models to use for future predictions under specific conditions.
Format the entire response using clear Markdown headers, lists, and bold text."""
else: # summarized
prompt = f"""{lang_prefix}{user_specs_section}
You are an expert biotechnologist. Provide a concise, summarized analysis of the provided model fitting results.
1. **Best Models Summary Table:** Create a table that shows the best model for each Experiment and Type (Biomass, Substrate, etc.) along with its R2 value.
2. **Overall Winners:** State the single best model for Biomass, Substrate, and Product across all experiments.
3. **Key Insights:** In 2-3 bullet points, what are the most important findings? (e.g., "Gompertz model consistently outperforms others for biomass.", "μmax is highest at pH 7.5.").
Format as a brief and clear Markdown report."""
try:
# Análisis principal
analysis_response = self.client.chat.completions.create(
model=qwen_model,
messages=[{"role": "user", "content": f"{prompt}\n\n{data_summary}"}],
max_tokens=4000,
temperature=0.6,
top_p=0.95
)
analysis_text = analysis_response.choices[0].message.content
# Generación de código
code_prompt = f"""{lang_prefix}
Based on the provided data, generate a single, complete, and executable Python script for analysis.
The script must:
1. Contain the provided data hardcoded into a pandas DataFrame.
2. Define functions to analyze the data to find the best model per experiment and type.
3. Include functions to create visualizations using matplotlib or seaborn, such as:
- A bar chart comparing the R2 values of the best models across experiments.
- A summary table of the best models.
4. Have a `if __name__ == "__main__":` block that runs the analysis and shows the plots.
The code should be well-commented and self-contained."""
code_response = self.client.chat.completions.create(
model=qwen_model,
messages=[{"role": "user", "content": f"{code_prompt}\n\n{data_summary}"}],
max_tokens=3500,
temperature=0.4, # Más determinista para código
top_p=0.95
)
code_text = code_response.choices[0].message.content
return {
"analisis_completo": analysis_text,
"codigo_implementacion": code_text,
}
except Exception as e:
return {"error": f"An error occurred with the AI API: {str(e)}"}
# --- Lógica de la Aplicación ---
def process_files(files, model_name: str, detail_level: str, language: str, additional_specs: str) -> Tuple[str, str]:
if not files:
return TRANSLATIONS[language]['error_no_files'], "Please upload files first."
if client is None:
return TRANSLATIONS[language]['error_no_api'], "AI client is not configured."
analyzer = AIAnalyzer(client, model_registry)
all_analysis_parts = []
all_code_parts = []
for file in files:
try:
with open(file.name, 'rb') as f:
file_content = f.read()
file_name = os.path.basename(file.name)
file_ext = Path(file_name).suffix.lower()
df = None
if file_ext == '.csv':
df = FileProcessor.read_csv(file_content)
elif file_ext in ['.xlsx', '.xls']:
df = FileProcessor.read_excel(file_content)
if df is not None:
all_analysis_parts.append(f"# Analysis for: {file_name}")
result = analyzer.analyze_fitting_results(df, model_name, detail_level, language, additional_specs)
if "error" in result:
all_analysis_parts.append(f"An error occurred: {result['error']}")
else:
all_analysis_parts.append(result.get("analisis_completo", "No analysis generated."))
all_code_parts.append(f"# Code generated from: {file_name}\n{result.get('codigo_implementacion', '# No code generated.')}")
else:
all_analysis_parts.append(f"# Could not process file: {file_name}")
except Exception as e:
all_analysis_parts.append(f"# Error processing {file.name}: {str(e)}")
final_analysis = "\n\n---\n\n".join(all_analysis_parts)
final_code = "\n\n" + "="*80 + "\n\n".join(all_code_parts) if all_code_parts else generate_implementation_code(final_analysis)
return final_analysis, final_code
def generate_implementation_code(analysis_results: str) -> str:
# This is a fallback in case the API fails to generate code.
# It provides a generic template.
return """
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def analyze_data(df):
\"\"\"
Analyzes the dataframe to find the best model per experiment and type.
This is a template function. You may need to adapt it to your specific column names.
\"\"\"
# Assuming columns 'Experiment', 'Type', 'R2', 'Model' exist
if not all(col in df.columns for col in ['Experiment', 'Type', 'R2', 'Model']):
print("DataFrame is missing required columns: 'Experiment', 'Type', 'R2', 'Model'")
return None
# Find the index of the max R2 for each group
best_models_idx = df.loc[df.groupby(['Experiment', 'Type'])['R2'].idxmax()]
print("--- Best Models by Experiment and Type ---")
print(best_models_idx[['Experiment', 'Type', 'Model', 'R2']].to_string(index=False))
return best_models_idx
def visualize_results(best_models_df):
\"\"\"
Creates a bar plot to visualize the R2 scores of the best models.
\"\"\"
if best_models_df is None:
print("Cannot visualize results. Analysis failed.")
return
plt.figure(figsize=(12, 7))
sns.barplot(data=best_models_df, x='Experiment', y='R2', hue='Type', palette='viridis')
plt.title('Best Model Performance (R²) by Experiment and Type', fontsize=16)
plt.xlabel('Experiment', fontsize=12)
plt.ylabel('R² Score', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.ylim(bottom=max(0, best_models_df['R2'].min() - 0.05), top=1.0)
plt.legend(title='Variable Type')
plt.tight_layout()
plt.show()
if __name__ == '__main__':
# TODO: Replace this with your actual data
# This is placeholder data.
data = {
'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.0', 'pH_7.5'],
'Model': ['Monod', 'Logistic', 'Monod', 'Logistic', 'FirstOrder', 'FirstOrder'],
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Substrate', 'Substrate'],
'R2': [0.98, 0.99, 0.97, 0.96, 0.95, 0.99],
'RMSE': [0.1, 0.05, 0.12, 0.15, 0.2, 0.08]
}
df = pd.DataFrame(data)
print("--- Input Data ---")
print(df)
print("\\n")
best_models = analyze_data(df)
visualize_results(best_models)
"""
class AppState:
def __init__(self):
self.current_analysis = ""
self.current_code = ""
self.current_language = "en"
app_state = AppState()
def export_report(export_format: str, language: str) -> Tuple[str, Optional[str]]:
if not app_state.current_analysis:
error_msg = TRANSLATIONS[language]['error_no_files'].replace('analizar', 'exportar')
return error_msg, None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
try:
if export_format == "DOCX":
filename = f"biotech_analysis_report_{timestamp}.docx"
ReportExporter.export_to_docx(app_state.current_analysis, filename, language)
else: # PDF
filename = f"biotech_analysis_report_{timestamp}.pdf"
ReportExporter.export_to_pdf(app_state.current_analysis, filename, language)
success_msg = TRANSLATIONS[language]['report_exported']
return f"{success_msg} {filename}", filename
except Exception as e:
return f"Error during export: {str(e)}", None
# --- Interfaz de Gradio ---
def create_interface():
current_language = "en"
def update_interface_language(language: str):
app_state.current_language = language
t = TRANSLATIONS[language]
return [
gr.update(value=f"# {t['title']}"),
gr.update(value=t['subtitle']),
gr.update(label=t['upload_files']),
gr.update(label=t['select_model']),
gr.update(label=t['select_language']),
gr.update(label=t['select_theme']),
gr.update(label=t['detail_level'], choices=[(t['detailed'], "detailed"), (t['summarized'], "summarized")]),
gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']),
gr.update(value=t['analyze_button']),
gr.update(label=t['export_format']),
gr.update(value=t['export_button']),
gr.update(label=t['comparative_analysis']),
gr.update(label=t['implementation_code']),
gr.update(label=t['data_format']),
gr.update(label=t['examples'])
]
def process_and_store(files, model, detail, language, additional_specs):
if not files:
return TRANSLATIONS[language]['error_no_files'], ""
analysis, code = process_files(files, model, detail, language, additional_specs)
app_state.current_analysis = analysis
app_state.current_code = code
return analysis, code
with gr.Blocks(theme=THEMES['light']) as demo:
# Estructura de la UI
with gr.Column():
with gr.Row():
with gr.Column(scale=3):
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
with gr.Column(scale=1):
language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es"), ("Français", "fr"), ("Deutsch", "de"), ("Português", "pt")], value=current_language, label="Language", interactive=True)
theme_selector = gr.Dropdown(choices=["Light", "Dark"], value="Light", label="Theme", interactive=True, visible=False) # Theme switching is complex, hiding for now
with gr.Row():
with gr.Column(scale=1):
files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
model_selector = gr.Dropdown(choices=list(QWEN_MODELS.keys()), value="Qwen/Qwen3-14B", label=TRANSLATIONS[current_language]['select_model'], info=QWEN_MODELS["Qwen/Qwen3-14B"]['best_for'])
detail_level = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
additional_specs = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3, max_lines=5, interactive=True)
analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary", size="lg")
gr.Markdown("---")
export_format = gr.Radio(choices=["PDF", "DOCX"], value="PDF", label=TRANSLATIONS[current_language]['export_format'])
export_btn = gr.Button(TRANSLATIONS[current_language]['export_button'], variant="secondary")
export_status = gr.Textbox(label="Export Status", interactive=False, visible=False)
export_file = gr.File(label="Download Report", visible=False)
with gr.Column(scale=2):
analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True, lines=20)
with gr.Accordion(label=TRANSLATIONS[current_language]['data_format'], open=False) as data_format_accordion:
gr.Markdown("""
### Expected CSV/Excel Structure:
The file should contain columns representing your model fitting results. Essential columns are:
- **Experiment**: An identifier for the experimental condition (e.g., `pH_7.0`, `Temp_30C`).
- **Model**: The name of the mathematical model (e.g., `Monod`, `Logistic`).
- **Type**: The type of process being modeled (e.g., `Biomass`, `Substrate`, `Product`). This is crucial for categorical analysis.
- **R2 / R_squared**: The coefficient of determination. Higher is better.
- **RMSE**: Root Mean Squared Error. Lower is better.
- **[Parameter_Columns]**: Additional columns for each model parameter (e.g., `mu_max`, `Ks`, `Xmax`).
| Experiment | Model | Type | R2 | RMSE | mu_max |
|:-----------|:---------|:----------|------:|-------:|--------:|
| pH_7.0 | Monod | Biomass | 0.985 | 0.023 | 0.45 |
| pH_7.0 | Logistic | Biomass | 0.991 | 0.018 | 0.48 |
| pH_7.5 | Monod | Biomass | 0.978 | 0.027 | 0.43 |
""")
examples_ui = gr.Examples(
examples=[
[["examples/biomass_models_comparison.csv"], "Qwen/Qwen3-14B", "detailed", ""],
[["examples/substrate_kinetics_results.xlsx"], "Qwen/Qwen3-14B", "summarized", "Focus on temperature effects and which temp is optimal."]
],
inputs=[files_input, model_selector, detail_level, additional_specs],
label=TRANSLATIONS[current_language]['examples']
)
# Event Handlers
language_selector.change(
update_interface_language,
inputs=[language_selector],
outputs=[title_text, subtitle_text, files_input, model_selector, language_selector, theme_selector, detail_level, additional_specs, analyze_btn, export_format, export_btn, analysis_output, code_output, data_format_accordion, examples_ui]
)
analyze_btn.click(
fn=process_and_store,
inputs=[files_input, model_selector, detail_level, language_selector, additional_specs],
outputs=[analysis_output, code_output],
api_name="analyze"
)
def handle_export(fmt, lang):
status, file_path = export_report(fmt, lang)
if file_path:
return gr.update(value=status, visible=True), gr.update(value=file_path, visible=True)
else:
return gr.update(value=status, visible=True), gr.update(visible=False)
export_btn.click(
fn=handle_export,
inputs=[export_format, language_selector],
outputs=[export_status, export_file]
)
return demo
# --- Punto de Entrada Principal ---
def main():
if client is None:
return gr.Interface(
fn=lambda: TRANSLATIONS['en']['error_no_api'],
inputs=[],
outputs=gr.Textbox(label="Error"),
title="Configuration Error"
)
return create_interface()
if __name__ == "__main__":
# Crear archivos y carpetas de ejemplo si no existen para que la UI no falle
if not os.path.exists("examples"):
os.makedirs("examples")
if not os.path.exists("examples/biomass_models_comparison.csv"):
pd.DataFrame({
'Experiment': ['Exp1_pH7', 'Exp1_pH7', 'Exp2_pH8', 'Exp2_pH8'],
'Model': ['Monod', 'Logistic', 'Monod', 'Logistic'],
'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass'],
'R2': [0.98, 0.99, 0.97, 0.96],
'RMSE': [0.1, 0.05, 0.12, 0.15],
'mu_max': [0.5, 0.52, 0.45, 0.46]
}).to_csv("examples/biomass_models_comparison.csv", index=False)
if not os.path.exists("examples/substrate_kinetics_results.xlsx"):
pd.DataFrame({
'Experiment': ['T30C', 'T30C', 'T37C', 'T37C'],
'Model': ['FirstOrder', 'MichaelisMenten', 'FirstOrder', 'MichaelisMenten'],
'Type': ['Substrate', 'Substrate', 'Substrate', 'Substrate'],
'R2': [0.95, 0.94, 0.99, 0.98],
'RMSE': [0.2, 0.25, 0.08, 0.1],
'Ks': [None, 1.5, None, 1.2]
}).to_excel("examples/substrate_kinetics_results.xlsx", index=False)
demo = main()
if demo:
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)