Project-HF-2025-3

Sleeping

App Files Files Community

Project-HF-2025-3 / app.py

C2MV

Update app.py

5c5c390 verified about 2 months ago

raw

history blame contribute delete

22.9 kB

	"""
	CÓDIGO COMPLETO Y CORREGIDO - VERSIÓN CON AGENTES FUNCIONALES
	- CORREGIDO: La clase `CoordinatorAgent` ahora utiliza activamente el `CodeAgent` para tomar decisiones.
	- AÑADIDO: Una lógica de fallback robusta que usa reglas simples si el modelo de IA no está disponible.
	- MANTIENE: Todas las correcciones previas de `smolagents`, `pandoc` y la UI.
	"""

	import gradio as gr
	from gradio_client import Client, handle_file
	import pandas as pd
	import json
	import tempfile
	import os
	from datetime import datetime
	import plotly.graph_objects as go
	import plotly.express as px
	import numpy as np
	from smolagents import CodeAgent, tool, InferenceClientModel
	import logging
	import shutil
	# pypandoc ya no es necesario para la lógica principal, pero se deja por si se reintroduce
	# import pypandoc

	# --- CONFIGURACIÓN Y CLIENTES (sin cambios) ---
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)
	try:
	biotech_client = Client("C2MV/BiotechU4")
	logger.info("✅ Cliente BiotechU4 inicializado.")
	except Exception as e:
	logger.error(f"❌ Error BiotechU4: {e}")
	biotech_client = None
	try:
	analysis_client = Client("C2MV/Project-HF-2025-2")
	logger.info("✅ Cliente Project-HF-2025-2 inicializado.")
	except Exception as e:
	logger.error(f"❌ Error Project-HF-2025-2: {e}")
	analysis_client = None
	try:
	hf_engine = InferenceClientModel(model_id="mistralai/Mistral-7B-Instruct-v0.2")
	logger.info("✅ Modelo de lenguaje para agentes inicializado.")
	except Exception:
	logger.warning("No se pudo inicializar HF Inference. Agentes usarán lógica simple de fallback.")
	hf_engine = None

	# ============================================================================
	# 🤖 SISTEMA DE AGENTES (CON LÓGICA CORREGIDA)
	# ============================================================================

	class BiotechAgentTools:
	@tool
	def analyze_data_characteristics(data_info: str) -> dict:
	"""
	Analiza las características de los datos biotecnológicos subidos para determinar el tipo de experimento.
	Args:
	data_info (str): Información sobre el archivo de datos, como su nombre.
	Returns:
	dict: Diccionario con tipo de experimento, modelos recomendados, parámetros y calidad de datos.
	"""
	try:
	characteristics = {"experiment_type": "unknown", "recommended_models": [], "suggested_params": {}, "data_quality": "good"}
	data_lower = data_info.lower()
	models_from_docs = ['logistic', 'gompertz', 'moser', 'baranyi', 'monod', 'contois', 'andrews', 'tessier', 'richards', 'stannard', 'huang']
	growth_models = [m for m in ['logistic', 'gompertz', 'baranyi', 'richards'] if m in models_from_docs]
	fermentation_models = [m for m in ['monod', 'contois', 'andrews', 'moser'] if m in models_from_docs]
	if "biomass" in data_lower or "growth" in data_lower:
	characteristics.update({"experiment_type": "growth_kinetics", "recommended_models": growth_models, "suggested_params": {"component": "biomass", "use_de": True, "maxfev": 75000}})
	elif "ferment" in data_lower or "substrate" in data_lower:
	characteristics.update({"experiment_type": "fermentation", "recommended_models": fermentation_models,"suggested_params": {"component": "all", "use_de": False, "maxfev": 50000}})
	else:
	characteristics.update({"experiment_type": "general_biotech", "recommended_models": growth_models, "suggested_params": {"component": "all", "use_de": False, "maxfev": 50000}})
	logger.info(f"Herramienta 'analyze_data_characteristics' ejecutada. Resultado: {characteristics['experiment_type']}")
	return characteristics
	except Exception as e:
	logger.error(f"Error en herramienta 'analyze_data_characteristics': {str(e)}")
	return {"experiment_type": "error", "recommended_models": ['logistic', 'gompertz'], "suggested_params": {"component": "all", "use_de": False, "maxfev": 50000}, "data_quality": "unknown"}

	@tool
	def prepare_ia_context(data_summary: str) -> str:
	"""
	Prepara un contexto enriquecido y específico para un análisis de IA posterior, basado en un resumen del experimento.
	Args:
	data_summary (str): Un resumen del tipo de experimento (ej. 'Análisis de cinética de crecimiento').
	Returns:
	str: El contexto detallado y estructurado para la IA.
	"""
	try:
	enhanced_context = f"""CONTEXTO BIOTECNOLÓGICO ESPECÍFICO:
	Resultados del modelado: {data_summary}
	Por favor, enfócate en:
	1. Interpretación biológica de los parámetros ajustados.
	2. Comparación de la bondad de ajuste entre modelos.
	3. Implicaciones prácticas para el proceso.
	4. Recomendaciones para la optimización.
	5. Identificación de posibles limitaciones."""
	logger.info("Herramienta 'prepare_ia_context' ejecutada.")
	return enhanced_context
	except Exception as e:
	logger.error(f"Error en herramienta 'prepare_ia_context': {str(e)}")
	return data_summary

	class CoordinatorAgent:
	def __init__(self):
	self.tools = BiotechAgentTools()
	# El agente se inicializa con las herramientas disponibles
	self.agent = CodeAgent(
	tools=[self.tools.analyze_data_characteristics, self.tools.prepare_ia_context],
	model=hf_engine
	) if hf_engine else None

	def _fallback_logic(self, file_info: str, current_config: dict) -> dict:
	"""Lógica simple basada en reglas si el agente de IA no está disponible."""
	logger.warning("⚙️ Usando lógica de fallback (sin LLM) para el análisis.")
	characteristics = self.tools.analyze_data_characteristics(file_info)
	optimized_config = current_config.copy()
	if characteristics["experiment_type"] != "error":
	optimized_config.update({
	"models": characteristics["recommended_models"],
	"component": characteristics["suggested_params"]["component"],
	"use_de": characteristics["suggested_params"]["use_de"],
	"maxfev": characteristics["suggested_params"]["maxfev"],
	})
	if characteristics["experiment_type"] == "growth_kinetics":
	optimized_config["additional_specs"] = self.tools.prepare_ia_context("Análisis de cinética de crecimiento.")
	elif characteristics["experiment_type"] == "fermentation":
	optimized_config["additional_specs"] = self.tools.prepare_ia_context("Análisis de datos de fermentación.")

	logger.info(f"✅ Lógica de fallback optimizó la configuración para: {characteristics['experiment_type']}")
	return {"config": optimized_config, "analysis": characteristics, "recommendations": f"Configuración optimizada por reglas para {characteristics['experiment_type']}"}

	def analyze_and_optimize(self, file_info: str, current_config: dict) -> dict:
	"""Usa el agente de IA para analizar y optimizar, o recurre a la lógica de fallback."""
	logger.info("🤖 Agente Coordinador iniciando análisis...")
	if self.agent:
	try:
	logger.info("🧠 Usando CodeAgent (modelo de lenguaje) para el análisis.")
	# 1. El agente decide qué herramienta usar para analizar el archivo
	prompt1 = f"Analyze the characteristics of the data file to determine the experiment type and recommend optimal parameters. The file info is: '{file_info}'"
	characteristics = self.agent.run(prompt1)

	if not isinstance(characteristics, dict) or "experiment_type" not in characteristics:
	raise ValueError(f"El agente no devolvió un diccionario de características válido. Recibido: {characteristics}")

	optimized_config = current_config.copy()
	optimized_config.update({
	"models": characteristics["recommended_models"],
	"component": characteristics["suggested_params"]["component"],
	"use_de": characteristics["suggested_params"]["use_de"],
	"maxfev": characteristics["suggested_params"]["maxfev"],
	})

	# 2. El agente decide qué herramienta usar para preparar el contexto
	prompt2 = f"The experiment has been identified as '{characteristics['experiment_type']}'. Prepare a rich, specific context for a follow-up AI analysis based on this type."
	additional_specs = self.agent.run(prompt2)
	optimized_config["additional_specs"] = additional_specs

	logger.info(f"✅ Agente LLM optimizó la configuración para: {characteristics['experiment_type']}")
	return {"config": optimized_config, "analysis": characteristics, "recommendations": f"Configuración optimizada por IA para {characteristics['experiment_type']}"}

	except Exception as e:
	logger.error(f"❌ Error durante la ejecución del CodeAgent: {e}. Usando lógica de fallback.")
	return self._fallback_logic(file_info, current_config)
	else:
	# Si el agente no se inicializó, usa la lógica de fallback directamente
	return self._fallback_logic(file_info, current_config)

	class BiotechAgentSystem:
	def __init__(self):
	self.coordinator = CoordinatorAgent()
	logger.info("🚀 Sistema de agentes inicializado")
	def process_with_agents(self, file_info: str, user_config: dict) -> dict:
	try:
	return {"success": True, **self.coordinator.analyze_and_optimize(file_info, user_config)}
	except Exception as e:
	logger.error(f"❌ Error en sistema de agentes: {str(e)}")
	return {"success": False, "config": user_config, "analysis": {"experiment_type": "error"}, "recommendations": f"Error: {str(e)}"}

	# --- FUNCIONES DEL PIPELINE Y UI (sin cambios) ---
	agent_system = BiotechAgentSystem()

	def create_dummy_plot():
	fig = go.Figure(go.Scatter(x=[], y=[]))
	fig.update_layout(title="Esperando resultados...", template="plotly_white", height=500, annotations=[dict(text="Sube un archivo y ejecuta el pipeline", showarrow=False)])
	return fig

	def parse_plot_data(plot_info):
	if not plot_info: return create_dummy_plot()
	try:
	if isinstance(plot_info, dict) and 'plot' in plot_info: return go.Figure(json.loads(plot_info['plot']))
	if isinstance(plot_info, str): return go.Figure(json.loads(plot_info))
	if isinstance(plot_info, dict): return go.Figure(plot_info)
	except Exception as e:
	logger.error(f"Error parseando gráfico: {e}")
	return create_dummy_plot()

	def process_complete_pipeline_with_agents(
	file, models, component, use_de, maxfev, exp_names,
	ia_model, detail_level, language, additional_specs, max_output_tokens,
	use_personal_key, personal_api_key,
	progress=gr.Progress()):

	dummy_return = create_dummy_plot(), None, None, None, None, "", None

	progress(0, desc="🚀 Iniciando Pipeline...")
	if not file: return (*dummy_return[:5], "❌ Por favor, sube un archivo.", None)
	if not models: return (*dummy_return[:5], "❌ Por favor, selecciona al menos un modelo.", None)

	progress_updates = []
	# --- Pasos 1 a 4 (Lógica principal del pipeline, sin cambios) ---
	progress(0.1, desc="🤖 Activando agentes...")
	user_config = { "models": models, "component": component, "use_de": use_de, "maxfev": maxfev, "additional_specs": additional_specs }
	agent_result = agent_system.process_with_agents(f"Archivo: {os.path.basename(file.name)}", user_config)
	if agent_result["success"]:
	optimized_config = agent_result["config"]
	progress_updates.extend([f"✅ Agentes detectaron: {agent_result['analysis']['experiment_type']}", f"🎯 {agent_result['recommendations']}"])
	models, component, use_de, maxfev, additional_specs = (
	optimized_config.get("models", models), optimized_config.get("component", component),
	optimized_config.get("use_de", use_de), optimized_config.get("maxfev", maxfev),
	optimized_config.get("additional_specs", additional_specs))
	else: progress_updates.append(f"⚠️ Agentes no optimizaron: {agent_result['recommendations']}")
	progress(0.2, desc="🔬 Ejecutando análisis biotech...")
	if not biotech_client: return (*dummy_return[:5], "\n".join(progress_updates) + "\n❌ Cliente BiotechU4 no disponible.", None)
	try:
	plot_info, df_data, status = biotech_client.predict(file=handle_file(file.name), models=models, component=component, use_de=use_de, maxfev=maxfev, exp_names=exp_names, api_name="/run_analysis_wrapper")
	progress_updates.append(f"✅ Análisis BiotechU4: {status}")
	except Exception as e: return (*dummy_return[:5], f"\n".join(progress_updates) + f"\n❌ Error: {e}", None)
	if "Error" in status or not df_data: return (parse_plot_data(plot_info), None, None, None, None, f"\n".join(progress_updates) + f"\n❌ {status}", None)
	progress(0.4, desc="🌉 Creando puente de datos...")
	temp_csv_file = None
	try:
	df = pd.DataFrame(df_data['data'], columns=df_data['headers'])
	with tempfile.NamedTemporaryFile(mode='w+', suffix='.csv', delete=False, encoding='utf-8') as temp_f:
	df.to_csv(temp_f.name, index=False); temp_csv_file = temp_f.name
	progress_updates.append("✅ Puente de datos creado.")
	except Exception as e: return (parse_plot_data(plot_info), df_data, None, None, None, f"\n".join(progress_updates) + f"\n❌ Error: {e}", None)
	progress(0.5, desc=f"🤖 Generando informe IA...")
	if not analysis_client:
	if temp_csv_file and os.path.exists(temp_csv_file): os.remove(temp_csv_file)
	return (parse_plot_data(plot_info), df_data, None, None, None, "\n".join(progress_updates) + "\n❌ Cliente de análisis no disponible.", None)
	try:
	current_analysis_client = analysis_client
	if use_personal_key and personal_api_key:
	current_analysis_client = Client("C2MV/Project-HF-2025-2", hf_token=personal_api_key)
	progress_updates.append("🔑 Usando clave API personal.")
	progress(0.6, desc="🔎 Determinando columnas...")
	chunk_update_dict = current_analysis_client.predict(files=[handle_file(temp_csv_file)], api_name="/update_chunk_column_selector")
	if not isinstance(chunk_update_dict, dict) or 'choices' not in chunk_update_dict: raise ValueError(f"Formato inesperado: {chunk_update_dict}")
	selected_chunk_column = chunk_update_dict['choices'][0][0]
	progress_updates.append(f"✅ Columna de agrupación: '{selected_chunk_column}'")
	result = current_analysis_client.predict(
	files=[handle_file(temp_csv_file)], chunk_column=selected_chunk_column, qwen_model=ia_model,
	detail_level=detail_level, language=language, additional_specs=additional_specs,
	max_output_tokens=max_output_tokens, api_name="/process_files_and_analyze")
	if not isinstance(result, tuple) or len(result) != 4: raise ValueError(f"Respuesta inesperada: '{result}'.")
	thinking_process, analysis_report, implementation_code, token_usage = result
	progress_updates.extend([f"✅ Informe IA generado. {token_usage}", f"🧠 Pensamiento: {thinking_process}"])
	except Exception as e:
	error_msg = f"Error generando informe IA: {e}"
	return (parse_plot_data(plot_info), df_data, error_msg, None, None, "\n".join(progress_updates) + f"\n❌ {error_msg}", None)
	finally:
	if temp_csv_file and os.path.exists(temp_csv_file): os.remove(temp_csv_file)

	# --- Paso 5: Exportación Directa a Markdown (.md) ---
	progress(0.9, desc="📄 Generando archivo de reporte (.md)...")
	final_report_path = None
	if analysis_report and isinstance(analysis_report, str):
	export_dir = "exported_reports"
	if not os.path.exists(export_dir): os.makedirs(export_dir)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	final_report_path = os.path.join(export_dir, f"report_{timestamp}.md")
	try:
	with open(final_report_path, 'w', encoding='utf-8') as f:
	f.write(analysis_report)
	progress_updates.append(f"✅ ¡Éxito! Informe exportado como: {os.path.basename(final_report_path)}")
	except Exception as e:
	error_msg = f"❌ No se pudo escribir el archivo de reporte: {e}"
	progress_updates.append(error_msg)
	logger.error(error_msg)
	final_report_path = None
	else:
	progress_updates.append("⚠️ No se puede exportar: no hay contenido de informe.")

	progress(1, desc="🎉 Pipeline Completado")

	return (parse_plot_data(plot_info), df_data, analysis_report, implementation_code,
	final_report_path, "\n".join(progress_updates), final_report_path)

	# --- INTERFAZ DE USUARIO (sin cambios) ---
	BIOTECH_MODELS = ['logistic', 'gompertz', 'moser', 'baranyi', 'monod', 'contois', 'andrews', 'tessier', 'richards', 'stannard', 'huang']
	IA_MODELS = ["deepseek-ai/DeepSeek-V3-0324"]
	theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo", neutral_hue="slate")
	custom_css = ".file-upload { border: 2px dashed #3b82f6; } button.primary { background: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%); }"

	if __name__ == "__main__":
	with gr.Blocks(theme=theme, title="BioTech Analysis & Report Generator", css=custom_css) as demo:
	gr.Markdown("# 🧬 BioTech Analysis & Report Generator")
	gr.Markdown("## Full Pipeline: Biotech Modeling → AI Reporting")

	with gr.Accordion("🤖 How the AI Agents Work", open=False):
	gr.Markdown("El sistema utiliza agentes para analizar el tipo de datos, optimizar parámetros y preparar el contexto para un análisis de IA profundo y relevante.")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## 📊 Configuration")
	file_input = gr.File(label="📁 Data File (CSV/Excel)", file_types=[".csv", ".xlsx", ".xls"], elem_classes=["file-upload"])
	if not os.path.exists("examples"): os.makedirs("examples")
	if os.path.exists("examples/archivo.xlsx"): gr.Examples(examples=["examples/archivo.xlsx"], inputs=[file_input])
	with gr.Accordion("🔬 Biotech Analysis Parameters", open=True):
	models_input = gr.CheckboxGroup(choices=BIOTECH_MODELS, value=['logistic', 'gompertz'], label="📊 Models")
	component_input = gr.Dropdown(['all', 'biomass', 'substrate', 'product'], value='all', label="📈 Component")
	exp_names_input = gr.Textbox(label="🏷️ Exp. Names", value="Analysis")
	use_de_input = gr.Checkbox(label="🧮 Use Diff. Evolution", value=False)
	maxfev_input = gr.Slider(label="🔄 Max Iterations", minimum=10000, maximum=100000, value=50000, step=1000)
	with gr.Accordion("🤖 AI Report Parameters", open=True):
	ia_model_input = gr.Dropdown(choices=IA_MODELS, value=IA_MODELS[0], label="🤖 IA Model")
	detail_level_input = gr.Radio(['detailed', 'summarized'], value='detailed', label="📋 Detail Level")
	max_output_tokens_input = gr.Slider(minimum=1000, maximum=32000, value=4000, step=100, label="🔢 Max Tokens")
	additional_specs_input = gr.Textbox(label="📝 Add. Specs", placeholder="AI Agents will customize this...", lines=2)
	with gr.Accordion("⚙️ Global & Export", open=True):
	language_input = gr.Dropdown(['en', 'es'], value='en', label="🌐 Language")
	gr.Markdown("📄 Formato de Exportación: El informe se generará como un archivo de texto (`.md`).")
	with gr.Accordion("🔑 Personal API Key", open=False):
	use_personal_key_input = gr.Checkbox(label="Use Personal HF Token", value=False)
	personal_api_key_input = gr.Textbox(label="HF Token", type="password", visible=False)
	process_btn = gr.Button("🚀 Run Full Pipeline", variant="primary", size="lg")

	with gr.Column(scale=2):
	gr.Markdown("## 📈 Results")
	status_output = gr.Textbox(label="📊 Process Status Log", lines=8, interactive=False)
	with gr.Tabs():
	with gr.TabItem("📊 Visualization"): plot_output = gr.Plot()
	with gr.TabItem("📋 Modeling Table"): table_output = gr.Dataframe()
	with gr.TabItem("📝 AI Report"): analysis_output = gr.Markdown()
	with gr.TabItem("💻 Code"): code_output = gr.Code(language="python")

	download_link_markdown = gr.Markdown(value="El enlace de descarga aparecerá aquí al finalizar.", label="🔗 Enlace de Descarga")
	report_output = gr.File(label="📥 Descargar Informe Final (Componente)", interactive=False)
	report_path_state = gr.State(value=None)

	def toggle_api_key_visibility(checked):
	return gr.Textbox(visible=checked)

	def update_download_link(file_path):
	if file_path and os.path.exists(file_path):
	filename = os.path.basename(file_path)
	return f"¡Archivo listo! 👉 [Descargar '{filename}'](/file={file_path})"
	return "No se generó ningún archivo para descargar."

	use_personal_key_input.change(fn=toggle_api_key_visibility, inputs=use_personal_key_input, outputs=personal_api_key_input)

	process_btn.click(
	fn=process_complete_pipeline_with_agents,
	inputs=[
	file_input, models_input, component_input, use_de_input, maxfev_input, exp_names_input,
	ia_model_input, detail_level_input, language_input, additional_specs_input,
	max_output_tokens_input, use_personal_key_input, personal_api_key_input
	],
	outputs=[
	plot_output, table_output, analysis_output, code_output,
	report_output, status_output, report_path_state
	]
	)

	report_path_state.change(
	fn=update_download_link,
	inputs=report_path_state,
	outputs=download_link_markdown
	)

	demo.launch(show_error=True, debug=True)