Project-HF-2025 / app.py
C2MV's picture
Update app.py
11f0136 verified
raw
history blame
19.5 kB
import gradio as gr
from openai import OpenAI
import PyPDF2
import pandas as pd
import numpy as np
import io
import os
import json
import zipfile
import tempfile
from typing import Dict, List, Tuple, Union, Optional
import re
from pathlib import Path
import openpyxl
from dataclasses import dataclass
from enum import Enum
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import matplotlib.pyplot as plt
from datetime import datetime
# Configuración para HuggingFace
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
# Inicializar el cliente de OpenAI para Nebius AI (Qwen)
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=os.environ.get("NEBIUS_API_KEY")
)
# Sistema de traducción (sin cambios)
TRANSLATIONS = {
'en': {
'title': '🧬 Comparative Analyzer of Biotechnological Models',
'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
'upload_files': '📁 Upload fitting results (CSV/Excel)',
'select_model': '🤖 AI Model',
'select_language': '🌐 Language',
'select_theme': '🎨 Theme',
'detail_level': '📋 Analysis detail level',
'detailed': 'Detailed',
'summarized': 'Summarized',
'analyze_button': '🚀 Analyze and Compare Models',
'export_format': '📄 Export format',
'export_button': '💾 Export Report',
'comparative_analysis': '📊 Comparative Analysis',
'implementation_code': '💻 Full Implementation Code (AI-Generated)',
'data_format': '📋 Expected data format',
'examples': '📚 Analysis examples',
'light': 'Light',
'dark': 'Dark',
'best_for': 'Best for',
'loading': 'Loading...',
'error_no_api': 'Please configure NEBIUS_API_KEY in your environment secrets',
'error_no_files': 'Please upload fitting result files to analyze',
'report_exported': 'Report exported successfully as',
'specialized_in': '🎯 Specialized in:',
'metrics_analyzed': '📊 Analyzed metrics:',
'what_analyzes': '🔍 What it specifically analyzes:',
'tips': '💡 Tips for better results:',
'additional_specs': '📝 Additional specifications for analysis',
'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
},
'es': {
'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
'select_model': '🤖 Modelo de IA',
'select_language': '🌐 Idioma',
'select_theme': '🎨 Tema',
'detail_level': '📋 Nivel de detalle del análisis',
'detailed': 'Detallado',
'summarized': 'Resumido',
'analyze_button': '🚀 Analizar y Comparar Modelos',
'export_format': '📄 Formato de exportación',
'export_button': '💾 Exportar Reporte',
'comparative_analysis': '📊 Análisis Comparativo',
'implementation_code': '💻 Código de Implementación Completo (Generado por IA)',
'data_format': '📋 Formato de datos esperado',
'examples': '📚 Ejemplos de análisis',
'light': 'Claro',
'dark': 'Oscuro',
'best_for': 'Mejor para',
'loading': 'Cargando...',
'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos de tu entorno',
'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
'report_exported': 'Reporte exportado exitosamente como',
'specialized_in': '🎯 Especializado en:',
'metrics_analyzed': '📊 Métricas analizadas:',
'what_analyzes': '🔍 Qué analiza específicamente:',
'tips': '💡 Tips para mejores resultados:',
'additional_specs': '📝 Especificaciones adicionales para el análisis',
'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
},
}
# Temas y Clases de Estructura (sin cambios)
THEMES = {'light': gr.themes.Soft(), 'dark': gr.themes.Base(primary_hue="blue", secondary_hue="gray", neutral_hue="gray", font=["Arial", "sans-serif"]).set(body_background_fill="dark", body_background_fill_dark="*neutral_950", button_primary_background_fill="*primary_600", button_primary_background_fill_hover="*primary_500", button_primary_text_color="white", block_background_fill="*neutral_800", block_border_color="*neutral_700", block_label_text_color="*neutral_200", block_title_text_color="*neutral_100", checkbox_background_color="*neutral_700", checkbox_background_color_selected="*primary_600", input_background_fill="*neutral_700", input_border_color="*neutral_600", input_placeholder_color="*neutral_400")}
class AnalysisType(Enum): MATHEMATICAL_MODEL = "mathematical_model"; DATA_FITTING = "data_fitting"; FITTING_RESULTS = "fitting_results"; UNKNOWN = "unknown"
@dataclass
class MathematicalModel: name: str; equation: str; parameters: List[str]; application: str; sources: List[str]; category: str; biological_meaning: str
class ModelRegistry:
def __init__(self): self.models = {}; self._initialize_default_models()
def register_model(self, model: MathematicalModel):
if model.category not in self.models: self.models[model.category] = {}
self.models[model.category][model.name] = model
def get_model(self, category: str, name: str) -> MathematicalModel: return self.models.get(category, {}).get(name)
def get_all_models(self) -> Dict: return self.models
def _initialize_default_models(self):
self.register_model(MathematicalModel(name="Monod", equation="μ = μmax × (S / (Ks + S))", parameters=["μmax (h⁻¹)", "Ks (g/L)"], application="Crecimiento limitado por sustrato único", sources=["Cambridge", "MIT", "DTU"], category="crecimiento_biomasa", biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"))
self.register_model(MathematicalModel(name="Logístico", equation="dX/dt = μmax × X × (1 - X/Xmax)", parameters=["μmax (h⁻¹)", "Xmax (g/L)"], application="Sistemas cerrados batch", sources=["Cranfield", "Swansea", "HAL Theses"], category="crecimiento_biomasa", biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"))
self.register_model(MathematicalModel(name="Gompertz", equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))", parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"], application="Crecimiento con fase lag pronunciada", sources=["Lund University", "NC State"], category="crecimiento_biomasa", biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"))
model_registry = ModelRegistry()
AI_MODELS = {"Qwen/Qwen3-14B": {"name": "Qwen 3 14B (Nebius)", "description": "Modelo potente de la serie Qwen, accedido vía Nebius AI.", "max_tokens": 8000, "best_for": "Análisis complejos y generación de código detallado."}}
class FileProcessor:
@staticmethod
def read_csv(csv_file) -> pd.DataFrame:
try: return pd.read_csv(io.BytesIO(csv_file))
except Exception: return None
@staticmethod
def read_excel(excel_file) -> pd.DataFrame:
try: return pd.read_excel(io.BytesIO(excel_file))
except Exception: return None
class ReportExporter:
@staticmethod
def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
doc = Document()
doc.add_heading(TRANSLATIONS[language]['title'], 0)
doc.add_paragraph(f"{TRANSLATIONS[language]['generated_on']}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# ... (lógica de exportación completa)
doc.save(filename)
return filename
@staticmethod
def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
doc = SimpleDocTemplate(filename, pagesize=letter)
# ... (lógica de exportación completa)
doc.build([])
return filename
class AIAnalyzer:
def __init__(self, client: OpenAI, model_registry: ModelRegistry):
self.client = client
self.model_registry = model_registry
def get_language_prompt_prefix(self, language: str) -> str:
prefixes = {'en': "Please respond in English. ", 'es': "Por favor responde en español. ", 'fr': "Veuillez répondre en français. ", 'de': "Bitte antworten Sie auf Deutsch. ", 'pt': "Por favor responda em português. "}
return prefixes.get(language, prefixes['en'])
def analyze_fitting_results(self, data: pd.DataFrame, ai_model: str, detail_level: str = "detailed",
language: str = "en", additional_specs: str = "") -> Dict:
data_summary = f"FITTING RESULTS DATA:\n\n{data.to_string()}"
lang_prefix = self.get_language_prompt_prefix(language)
user_specs_section = f"\n\nUSER ADDITIONAL SPECIFICATIONS:\n{additional_specs}" if additional_specs else ""
# Prompt para el análisis de texto (sin cambios)
analysis_prompt = f"{lang_prefix}\nYou are an expert in biotechnology and mathematical modeling. Analyze these model fitting results.\n{user_specs_section}\nDETAIL LEVEL: {detail_level.upper()}\n\nProvide a comprehensive comparative analysis based on the provided data. Structure your response clearly using Markdown. Identify the best models for each experimental condition and justify your choices with metrics like R² and RMSE. Conclude with overall recommendations.\n\n{data_summary}"
# --- CAMBIO 1: Prompt de generación de código mejorado y más exigente ---
code_prompt = f"""
{lang_prefix}
Based on the following data, generate a SINGLE, COMPLETE, and EXECUTABLE Python script.
**Requirements for the script:**
1. **Self-Contained:** The script must be runnable as-is. It should NOT require any external CSV/Excel files.
2. **Embed Data:** You MUST embed the provided data directly into the script, for example, by creating a pandas DataFrame from a dictionary or a multiline string.
3. **Full Analysis:** The script should perform a complete analysis similar to the text report:
- Identify the best model (based on R²) for each 'Experiment' and 'Type' (Biomass, Substrate, Product).
- Print a clear summary table of the findings.
4. **Visualization:** The script MUST generate at least one publication-quality plot using Matplotlib or Seaborn to visually compare the performance (e.g., R² values) of the best models across different experiments. The plot must be clearly labeled.
5. **Code Quality:** Use clear variable names, comments, and functions or a class structure to organize the code logically.
6. **No Placeholders:** Do not use placeholder comments like '# Add visualization here'. Implement the full functionality.
**Data to use:**
```
{data.to_string()}
```
Generate only the Python code, starting with `import pandas as pd`.
"""
try:
# Llamada para el análisis de texto
analysis_response = self.client.chat.completions.create(
model=ai_model,
max_tokens=4000,
temperature=0.6,
messages=[{"role": "user", "content": analysis_prompt}]
)
analysis_text = analysis_response.choices[0].message.content
# Llamada para la generación de código
code_response = self.client.chat.completions.create(
model=ai_model,
max_tokens=4000,
temperature=0.4, # Ligeramente más determinista para el código
messages=[{"role": "user", "content": code_prompt}]
)
code_text = code_response.choices[0].message.content
# Limpiar el código si viene envuelto en ```python ... ```
if code_text.strip().startswith("```python"):
code_text = code_text.strip()[9:]
if code_text.strip().endswith("```"):
code_text = code_text.strip()[:-3]
return {
"analisis_completo": analysis_text,
"codigo_implementacion": code_text,
}
except Exception as e:
return {"error": str(e)}
def process_files(files, ai_model: str, detail_level: str = "detailed",
language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
processor = FileProcessor()
analyzer = AIAnalyzer(client, model_registry)
all_analysis = []
all_code = []
if not files:
return TRANSLATIONS[language]['error_no_files'], ""
for file in files:
file_name = file.name if hasattr(file, 'name') else "archivo"
file_ext = Path(file_name).suffix.lower()
with open(file.name, 'rb') as f:
file_content = f.read()
if file_ext in ['.csv', '.xlsx', '.xls']:
all_analysis.append(f"## 📊 {TRANSLATIONS[language]['comparative_analysis']}: {file_name}")
df = processor.read_csv(file_content) if file_ext == '.csv' else processor.read_excel(file_content)
if df is not None:
result = analyzer.analyze_fitting_results(df, ai_model, detail_level, language, additional_specs)
if "error" in result:
all_analysis.append(f"An error occurred: {result['error']}")
else:
all_analysis.append(result.get("analisis_completo", "No analysis generated."))
# --- CAMBIO 2: Usar siempre el código de la IA, sin fallback ---
all_code.append(f"# Code generated for file: {file_name}\n" + result.get("codigo_implementacion", "# No code was generated for this file."))
else:
all_analysis.append("Could not read the file content.")
all_analysis.append("\n---\n")
final_analysis = "\n".join(all_analysis)
final_code = "\n\n".join(all_code)
return final_analysis, final_code
# --- CAMBIO 3: La función `generate_implementation_code` ha sido eliminada por completo. ---
# Estado de la aplicación y función de exportación (sin cambios)
class AppState:
def __init__(self): self.current_analysis = ""; self.current_code = ""; self.current_language = "en"
app_state = AppState()
def export_report(export_format: str, language: str) -> Tuple[str, str]:
# ... (lógica de exportación sin cambios)
pass
def create_interface():
current_language = "en"
def update_interface_language(language):
app_state.current_language = language
t = TRANSLATIONS[language]
return [gr.update(value=f"# {t['title']}"), gr.update(value=t['subtitle']), gr.update(label=t['upload_files']), gr.update(label=t['select_model']), gr.update(label=t['select_language']), gr.update(label=t['select_theme']), gr.update(label=t['detail_level']), gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), gr.update(value=t['analyze_button']), gr.update(label=t['export_format']), gr.update(value=t['export_button']), gr.update(label=t['comparative_analysis']), gr.update(label=t['implementation_code']), gr.update(label=t['data_format'])]
def process_and_store(files, model, detail, language, additional_specs):
analysis, code = process_files(files, model, detail, language, additional_specs)
app_state.current_analysis = analysis
app_state.current_code = code
return analysis, code
with gr.Blocks(theme=THEMES['light']) as demo:
# Definición de la UI (sin cambios estructurales, solo etiquetas)
with gr.Row():
with gr.Column(scale=3):
title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
with gr.Column(scale=1):
language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="en", label=TRANSLATIONS[current_language]['select_language'], interactive=True)
theme_selector = gr.Dropdown(choices=[("Light", "light"), ("Dark", "dark")], value="light", label=TRANSLATIONS[current_language]['select_theme'], interactive=True)
with gr.Row():
with gr.Column(scale=1):
files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
default_model = "Qwen/Qwen3-14B"
model_selector = gr.Dropdown(choices=list(AI_MODELS.keys()), value=default_model, label=TRANSLATIONS[current_language]['select_model'], info=f"{TRANSLATIONS[current_language]['best_for']}: {AI_MODELS[default_model]['best_for']}")
detail_level = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
additional_specs = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3, interactive=True)
analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary", size="lg")
# ... (resto de botones de exportación)
with gr.Column(scale=2):
analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True, lines=25) # Más líneas para el código completo
# ... (resto de la UI y eventos)
language_selector.change(update_interface_language, inputs=[language_selector], outputs=[title_text, subtitle_text, files_input, model_selector, language_selector, theme_selector, detail_level, additional_specs, analyze_btn, export_format, export_btn, analysis_output, code_output, data_format_accordion])
analyze_btn.click(fn=process_and_store, inputs=[files_input, model_selector, detail_level, language_selector, additional_specs], outputs=[analysis_output, code_output])
return demo
def main():
if not os.getenv("NEBIUS_API_KEY"):
print("⚠️ Configure NEBIUS_API_KEY in your environment secrets")
return gr.Interface(fn=lambda x: TRANSLATIONS['en']['error_no_api'], inputs=gr.Textbox(), outputs=gr.Textbox(), title="Configuration Error")
return create_interface()
if __name__ == "__main__":
demo = main()
if demo:
demo.launch(server_name="0.0.0.0", server_port=7860)