Spaces:

C2MV
/

Project-HF-2025

Sleeping

App Files Files Community

Project-HF-2025 / app.py

C2MV

Update app.py

11f0136 verified 2 months ago

raw

history blame

19.5 kB

	import gradio as gr
	from openai import OpenAI
	import PyPDF2
	import pandas as pd
	import numpy as np
	import io
	import os
	import json
	import zipfile
	import tempfile
	from typing import Dict, List, Tuple, Union, Optional
	import re
	from pathlib import Path
	import openpyxl
	from dataclasses import dataclass
	from enum import Enum
	from docx import Document
	from docx.shared import Inches, Pt, RGBColor
	from docx.enum.text import WD_ALIGN_PARAGRAPH
	from reportlab.lib import colors
	from reportlab.lib.pagesizes import letter, A4
	from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	import matplotlib.pyplot as plt
	from datetime import datetime

	# Configuración para HuggingFace
	os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'

	# Inicializar el cliente de OpenAI para Nebius AI (Qwen)
	client = OpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=os.environ.get("NEBIUS_API_KEY")
	)

	# Sistema de traducción (sin cambios)
	TRANSLATIONS = {
	'en': {
	'title': '🧬 Comparative Analyzer of Biotechnological Models',
	'subtitle': 'Specialized in comparative analysis of mathematical model fitting results',
	'upload_files': '📁 Upload fitting results (CSV/Excel)',
	'select_model': '🤖 AI Model',
	'select_language': '🌐 Language',
	'select_theme': '🎨 Theme',
	'detail_level': '📋 Analysis detail level',
	'detailed': 'Detailed',
	'summarized': 'Summarized',
	'analyze_button': '🚀 Analyze and Compare Models',
	'export_format': '📄 Export format',
	'export_button': '💾 Export Report',
	'comparative_analysis': '📊 Comparative Analysis',
	'implementation_code': '💻 Full Implementation Code (AI-Generated)',
	'data_format': '📋 Expected data format',
	'examples': '📚 Analysis examples',
	'light': 'Light',
	'dark': 'Dark',
	'best_for': 'Best for',
	'loading': 'Loading...',
	'error_no_api': 'Please configure NEBIUS_API_KEY in your environment secrets',
	'error_no_files': 'Please upload fitting result files to analyze',
	'report_exported': 'Report exported successfully as',
	'specialized_in': '🎯 Specialized in:',
	'metrics_analyzed': '📊 Analyzed metrics:',
	'what_analyzes': '🔍 What it specifically analyzes:',
	'tips': '💡 Tips for better results:',
	'additional_specs': '📝 Additional specifications for analysis',
	'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...'
	},
	'es': {
	'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos',
	'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos',
	'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)',
	'select_model': '🤖 Modelo de IA',
	'select_language': '🌐 Idioma',
	'select_theme': '🎨 Tema',
	'detail_level': '📋 Nivel de detalle del análisis',
	'detailed': 'Detallado',
	'summarized': 'Resumido',
	'analyze_button': '🚀 Analizar y Comparar Modelos',
	'export_format': '📄 Formato de exportación',
	'export_button': '💾 Exportar Reporte',
	'comparative_analysis': '📊 Análisis Comparativo',
	'implementation_code': '💻 Código de Implementación Completo (Generado por IA)',
	'data_format': '📋 Formato de datos esperado',
	'examples': '📚 Ejemplos de análisis',
	'light': 'Claro',
	'dark': 'Oscuro',
	'best_for': 'Mejor para',
	'loading': 'Cargando...',
	'error_no_api': 'Por favor configura NEBIUS_API_KEY en los secretos de tu entorno',
	'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar',
	'report_exported': 'Reporte exportado exitosamente como',
	'specialized_in': '🎯 Especializado en:',
	'metrics_analyzed': '📊 Métricas analizadas:',
	'what_analyzes': '🔍 Qué analiza específicamente:',
	'tips': '💡 Tips para mejores resultados:',
	'additional_specs': '📝 Especificaciones adicionales para el análisis',
	'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...'
	},
	}


	# Temas y Clases de Estructura (sin cambios)
	THEMES = {'light': gr.themes.Soft(), 'dark': gr.themes.Base(primary_hue="blue", secondary_hue="gray", neutral_hue="gray", font=["Arial", "sans-serif"]).set(body_background_fill="dark", body_background_fill_dark="neutral_950", button_primary_background_fill="primary_600", button_primary_background_fill_hover="primary_500", button_primary_text_color="white", block_background_fill="neutral_800", block_border_color="neutral_700", block_label_text_color="neutral_200", block_title_text_color="neutral_100", checkbox_background_color="neutral_700", checkbox_background_color_selected="primary_600", input_background_fill="neutral_700", input_border_color="neutral_600", input_placeholder_color="neutral_400")}
	class AnalysisType(Enum): MATHEMATICAL_MODEL = "mathematical_model"; DATA_FITTING = "data_fitting"; FITTING_RESULTS = "fitting_results"; UNKNOWN = "unknown"
	@dataclass
	class MathematicalModel: name: str; equation: str; parameters: List[str]; application: str; sources: List[str]; category: str; biological_meaning: str
	class ModelRegistry:
	def __init__(self): self.models = {}; self._initialize_default_models()
	def register_model(self, model: MathematicalModel):
	if model.category not in self.models: self.models[model.category] = {}
	self.models[model.category][model.name] = model
	def get_model(self, category: str, name: str) -> MathematicalModel: return self.models.get(category, {}).get(name)
	def get_all_models(self) -> Dict: return self.models
	def _initialize_default_models(self):
	self.register_model(MathematicalModel(name="Monod", equation="μ = μmax × (S / (Ks + S))", parameters=["μmax (h⁻¹)", "Ks (g/L)"], application="Crecimiento limitado por sustrato único", sources=["Cambridge", "MIT", "DTU"], category="crecimiento_biomasa", biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante"))
	self.register_model(MathematicalModel(name="Logístico", equation="dX/dt = μmax × X × (1 - X/Xmax)", parameters=["μmax (h⁻¹)", "Xmax (g/L)"], application="Sistemas cerrados batch", sources=["Cranfield", "Swansea", "HAL Theses"], category="crecimiento_biomasa", biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema"))
	self.register_model(MathematicalModel(name="Gompertz", equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))", parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"], application="Crecimiento con fase lag pronunciada", sources=["Lund University", "NC State"], category="crecimiento_biomasa", biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario"))
	model_registry = ModelRegistry()
	AI_MODELS = {"Qwen/Qwen3-14B": {"name": "Qwen 3 14B (Nebius)", "description": "Modelo potente de la serie Qwen, accedido vía Nebius AI.", "max_tokens": 8000, "best_for": "Análisis complejos y generación de código detallado."}}
	class FileProcessor:
	@staticmethod
	def read_csv(csv_file) -> pd.DataFrame:
	try: return pd.read_csv(io.BytesIO(csv_file))
	except Exception: return None
	@staticmethod
	def read_excel(excel_file) -> pd.DataFrame:
	try: return pd.read_excel(io.BytesIO(excel_file))
	except Exception: return None
	class ReportExporter:
	@staticmethod
	def export_to_docx(content: str, filename: str, language: str = 'en') -> str:
	doc = Document()
	doc.add_heading(TRANSLATIONS[language]['title'], 0)
	doc.add_paragraph(f"{TRANSLATIONS[language]['generated_on']}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	# ... (lógica de exportación completa)
	doc.save(filename)
	return filename
	@staticmethod
	def export_to_pdf(content: str, filename: str, language: str = 'en') -> str:
	doc = SimpleDocTemplate(filename, pagesize=letter)
	# ... (lógica de exportación completa)
	doc.build([])
	return filename

	class AIAnalyzer:
	def __init__(self, client: OpenAI, model_registry: ModelRegistry):
	self.client = client
	self.model_registry = model_registry

	def get_language_prompt_prefix(self, language: str) -> str:
	prefixes = {'en': "Please respond in English. ", 'es': "Por favor responde en español. ", 'fr': "Veuillez répondre en français. ", 'de': "Bitte antworten Sie auf Deutsch. ", 'pt': "Por favor responda em português. "}
	return prefixes.get(language, prefixes['en'])

	def analyze_fitting_results(self, data: pd.DataFrame, ai_model: str, detail_level: str = "detailed",
	language: str = "en", additional_specs: str = "") -> Dict:
	data_summary = f"FITTING RESULTS DATA:\n\n{data.to_string()}"
	lang_prefix = self.get_language_prompt_prefix(language)
	user_specs_section = f"\n\nUSER ADDITIONAL SPECIFICATIONS:\n{additional_specs}" if additional_specs else ""

	# Prompt para el análisis de texto (sin cambios)
	analysis_prompt = f"{lang_prefix}\nYou are an expert in biotechnology and mathematical modeling. Analyze these model fitting results.\n{user_specs_section}\nDETAIL LEVEL: {detail_level.upper()}\n\nProvide a comprehensive comparative analysis based on the provided data. Structure your response clearly using Markdown. Identify the best models for each experimental condition and justify your choices with metrics like R² and RMSE. Conclude with overall recommendations.\n\n{data_summary}"

	# --- CAMBIO 1: Prompt de generación de código mejorado y más exigente ---
	code_prompt = f"""
	{lang_prefix}

	Based on the following data, generate a SINGLE, COMPLETE, and EXECUTABLE Python script.

	Requirements for the script:
	1. Self-Contained: The script must be runnable as-is. It should NOT require any external CSV/Excel files.
	2. Embed Data: You MUST embed the provided data directly into the script, for example, by creating a pandas DataFrame from a dictionary or a multiline string.
	3. Full Analysis: The script should perform a complete analysis similar to the text report:
	- Identify the best model (based on R²) for each 'Experiment' and 'Type' (Biomass, Substrate, Product).
	- Print a clear summary table of the findings.
	4. Visualization: The script MUST generate at least one publication-quality plot using Matplotlib or Seaborn to visually compare the performance (e.g., R² values) of the best models across different experiments. The plot must be clearly labeled.
	5. Code Quality: Use clear variable names, comments, and functions or a class structure to organize the code logically.
	6. No Placeholders: Do not use placeholder comments like '# Add visualization here'. Implement the full functionality.

	Data to use:
	```
	{data.to_string()}
	```

	Generate only the Python code, starting with `import pandas as pd`.
	"""

	try:
	# Llamada para el análisis de texto
	analysis_response = self.client.chat.completions.create(
	model=ai_model,
	max_tokens=4000,
	temperature=0.6,
	messages=[{"role": "user", "content": analysis_prompt}]
	)
	analysis_text = analysis_response.choices[0].message.content

	# Llamada para la generación de código
	code_response = self.client.chat.completions.create(
	model=ai_model,
	max_tokens=4000,
	temperature=0.4, # Ligeramente más determinista para el código
	messages=[{"role": "user", "content": code_prompt}]
	)
	code_text = code_response.choices[0].message.content

	# Limpiar el código si viene envuelto en ```python ... ```
	if code_text.strip().startswith("```python"):
	code_text = code_text.strip()[9:]
	if code_text.strip().endswith("```"):
	code_text = code_text.strip()[:-3]

	return {
	"analisis_completo": analysis_text,
	"codigo_implementacion": code_text,
	}

	except Exception as e:
	return {"error": str(e)}

	def process_files(files, ai_model: str, detail_level: str = "detailed",
	language: str = "en", additional_specs: str = "") -> Tuple[str, str]:
	processor = FileProcessor()
	analyzer = AIAnalyzer(client, model_registry)
	all_analysis = []
	all_code = []

	if not files:
	return TRANSLATIONS[language]['error_no_files'], ""

	for file in files:
	file_name = file.name if hasattr(file, 'name') else "archivo"
	file_ext = Path(file_name).suffix.lower()

	with open(file.name, 'rb') as f:
	file_content = f.read()

	if file_ext in ['.csv', '.xlsx', '.xls']:
	all_analysis.append(f"## 📊 {TRANSLATIONS[language]['comparative_analysis']}: {file_name}")
	df = processor.read_csv(file_content) if file_ext == '.csv' else processor.read_excel(file_content)

	if df is not None:
	result = analyzer.analyze_fitting_results(df, ai_model, detail_level, language, additional_specs)

	if "error" in result:
	all_analysis.append(f"An error occurred: {result['error']}")
	else:
	all_analysis.append(result.get("analisis_completo", "No analysis generated."))
	# --- CAMBIO 2: Usar siempre el código de la IA, sin fallback ---
	all_code.append(f"# Code generated for file: {file_name}\n" + result.get("codigo_implementacion", "# No code was generated for this file."))
	else:
	all_analysis.append("Could not read the file content.")
	all_analysis.append("\n---\n")

	final_analysis = "\n".join(all_analysis)
	final_code = "\n\n".join(all_code)

	return final_analysis, final_code

	# --- CAMBIO 3: La función `generate_implementation_code` ha sido eliminada por completo. ---

	# Estado de la aplicación y función de exportación (sin cambios)
	class AppState:
	def __init__(self): self.current_analysis = ""; self.current_code = ""; self.current_language = "en"
	app_state = AppState()
	def export_report(export_format: str, language: str) -> Tuple[str, str]:
	# ... (lógica de exportación sin cambios)
	pass

	def create_interface():
	current_language = "en"

	def update_interface_language(language):
	app_state.current_language = language
	t = TRANSLATIONS[language]
	return [gr.update(value=f"# {t['title']}"), gr.update(value=t['subtitle']), gr.update(label=t['upload_files']), gr.update(label=t['select_model']), gr.update(label=t['select_language']), gr.update(label=t['select_theme']), gr.update(label=t['detail_level']), gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), gr.update(value=t['analyze_button']), gr.update(label=t['export_format']), gr.update(value=t['export_button']), gr.update(label=t['comparative_analysis']), gr.update(label=t['implementation_code']), gr.update(label=t['data_format'])]

	def process_and_store(files, model, detail, language, additional_specs):
	analysis, code = process_files(files, model, detail, language, additional_specs)
	app_state.current_analysis = analysis
	app_state.current_code = code
	return analysis, code

	with gr.Blocks(theme=THEMES['light']) as demo:
	# Definición de la UI (sin cambios estructurales, solo etiquetas)
	with gr.Row():
	with gr.Column(scale=3):
	title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}")
	subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle'])
	with gr.Column(scale=1):
	language_selector = gr.Dropdown(choices=[("English", "en"), ("Español", "es")], value="en", label=TRANSLATIONS[current_language]['select_language'], interactive=True)
	theme_selector = gr.Dropdown(choices=[("Light", "light"), ("Dark", "dark")], value="light", label=TRANSLATIONS[current_language]['select_theme'], interactive=True)

	with gr.Row():
	with gr.Column(scale=1):
	files_input = gr.File(label=TRANSLATIONS[current_language]['upload_files'], file_count="multiple", file_types=[".csv", ".xlsx", ".xls"], type="filepath")
	default_model = "Qwen/Qwen3-14B"
	model_selector = gr.Dropdown(choices=list(AI_MODELS.keys()), value=default_model, label=TRANSLATIONS[current_language]['select_model'], info=f"{TRANSLATIONS[current_language]['best_for']}: {AI_MODELS[default_model]['best_for']}")
	detail_level = gr.Radio(choices=[(TRANSLATIONS[current_language]['detailed'], "detailed"), (TRANSLATIONS[current_language]['summarized'], "summarized")], value="detailed", label=TRANSLATIONS[current_language]['detail_level'])
	additional_specs = gr.Textbox(label=TRANSLATIONS[current_language]['additional_specs'], placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], lines=3, interactive=True)
	analyze_btn = gr.Button(TRANSLATIONS[current_language]['analyze_button'], variant="primary", size="lg")
	# ... (resto de botones de exportación)

	with gr.Column(scale=2):
	analysis_output = gr.Markdown(label=TRANSLATIONS[current_language]['comparative_analysis'])
	code_output = gr.Code(label=TRANSLATIONS[current_language]['implementation_code'], language="python", interactive=True, lines=25) # Más líneas para el código completo

	# ... (resto de la UI y eventos)
	language_selector.change(update_interface_language, inputs=[language_selector], outputs=[title_text, subtitle_text, files_input, model_selector, language_selector, theme_selector, detail_level, additional_specs, analyze_btn, export_format, export_btn, analysis_output, code_output, data_format_accordion])
	analyze_btn.click(fn=process_and_store, inputs=[files_input, model_selector, detail_level, language_selector, additional_specs], outputs=[analysis_output, code_output])

	return demo

	def main():
	if not os.getenv("NEBIUS_API_KEY"):
	print("⚠️ Configure NEBIUS_API_KEY in your environment secrets")
	return gr.Interface(fn=lambda x: TRANSLATIONS['en']['error_no_api'], inputs=gr.Textbox(), outputs=gr.Textbox(), title="Configuration Error")
	return create_interface()

	if __name__ == "__main__":
	demo = main()
	if demo:
	demo.launch(server_name="0.0.0.0", server_port=7860)