## Iris Startup Lab ''' <(*) ( >) /| ''' ###### CSV Text Analyzer Shiny App ###### import os from pathlib import Path import io print("--- Environment Variable Check for Cache Config ---") print(f"Initial os.environ.get('HF_SPACE_ID'): {os.environ.get('HF_SPACE_ID')}") print(f"Initial os.environ.get('HOME'): {os.environ.get('HOME')}") print(f"Initial os.environ.get('USER'): {os.environ.get('USER')}") print(f"Initial os.environ.get('XDG_CACHE_HOME'): {os.environ.get('XDG_CACHE_HOME')}") print("-------------------------------------------------") try: # Check if running on Hugging Face Spaces by looking for a specific env variable hf_space_id_value = os.environ.get('HF_SPACE_ID') is_huggingface_spaces_by_id = bool(hf_space_id_value) current_home_dir_str = os.path.expanduser('~') # More robust way to get home current_home_dir = Path(current_home_dir_str) is_root_home = (current_home_dir_str == "/") print(f"DEBUG: HF_SPACE_ID raw value: '{hf_space_id_value}', is_huggingface_spaces_by_id: {is_huggingface_spaces_by_id}") print(f"DEBUG: os.path.expanduser('~') resolved to: {current_home_dir_str}, is_root_home: {is_root_home}") tmp_dir = Path("/tmp") tmp_exists = tmp_dir.exists() tmp_writable = os.access(str(tmp_dir), os.W_OK) if tmp_exists else False print(f"DEBUG: /tmp exists: {tmp_exists}, /tmp writable: {tmp_writable}") if is_huggingface_spaces_by_id: base_cache_path = tmp_dir / "iris_csv_analyzer_cache" print(f"INFO: Detected Hugging Face Spaces environment (by HF_SPACE_ID). Using /tmp for cache. Base path: {base_cache_path}") elif is_root_home and tmp_exists and tmp_writable: base_cache_path = tmp_dir / "iris_csv_analyzer_cache" print(f"INFO: Detected container-like environment (home is '/' and /tmp is writable). Using /tmp for cache. Base path: {base_cache_path}") else: can_write_to_home_cache = False if current_home_dir_str != "/": try: home_cache_test_path = current_home_dir / ".cache" / "_test_writability_csv_analyzer" os.makedirs(home_cache_test_path, exist_ok=True) os.rmdir(home_cache_test_path) can_write_to_home_cache = True except OSError: can_write_to_home_cache = False if can_write_to_home_cache: base_cache_path = current_home_dir / ".cache" / "iris_csv_analyzer_cache" print(f"INFO: Detected standard local environment. Using home-based .cache: {base_cache_path}") else: script_dir_cache = Path(__file__).resolve().parent / ".app_cache_csv_analyzer" base_cache_path = script_dir_cache / "iris_csv_analyzer_cache" print(f"INFO: Home dir ('{current_home_dir_str}') not suitable for .cache or /tmp fallback failed. Using script-relative cache: {base_cache_path}") os.makedirs(base_cache_path, exist_ok=True) print(f"DEBUG: Ensured base_cache_path exists: {base_cache_path}") hf_cache_path = base_cache_path / "huggingface" os.environ['HF_HOME'] = str(hf_cache_path) print(f"DEBUG: Setting HF_HOME to: {hf_cache_path}") mpl_cache_path = base_cache_path / "matplotlib" os.environ['MPLCONFIGDIR'] = str(mpl_cache_path) print(f"DEBUG: Setting MPLCONFIGDIR to: {mpl_cache_path}") os.environ['XDG_CACHE_HOME'] = str(base_cache_path) print(f"DEBUG: Setting XDG_CACHE_HOME to: {base_cache_path}") os.makedirs(hf_cache_path, exist_ok=True) os.makedirs(mpl_cache_path, exist_ok=True) print(f"INFO: Final Cache directory base set to: {base_cache_path}") print(f"INFO: Final HF_HOME set to: {os.environ.get('HF_HOME')}") print(f"INFO: Final MPLCONFIGDIR set to: {os.environ.get('MPLCONFIGDIR')}") print(f"INFO: Final XDG_CACHE_HOME set to: {os.environ.get('XDG_CACHE_HOME')}") except Exception as e: print(f"CRITICAL WARNING: An unexpected error occurred during cache setup: {e}") import traceback traceback.print_exc() print("Proceeding without custom cache paths. This may impact model downloading/caching.") from shiny import App, render, ui, reactive import pandas as pd import shinyswatch from dotenv import load_dotenv import spacy from spacytextblob.spacytextblob import SpacyTextBlob from pysentimiento import create_analyzer import matplotlib.pyplot as plt import seaborn as sns load_dotenv() # App UI app_ui = ui.page_fixed( ui.tags.head( #ui.tags.link(rel="stylesheet", href="styles.css"), ui.include_css( Path(__file__).parent / "styles.css" ), ui.tags.link( rel="stylesheet", href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" ) ), ui.layout_sidebar( ui.sidebar( ui.h1("Iris Startup Lab"), ui.hr(), ui.h2("Iris CSV Analyzer"), ui.hr(), ui.markdown("### Analizador de Emociones y Sentimientos en Texto"), ui.hr(), ui.input_file("file_upload", "Cargar archivo CSV (.csv) \n Próximamente archivos Excel", accept=[".csv"], multiple=False), ui.output_ui("column_selector_ui"), #ui.input_action_button("analyze_button", "Analizar Columnas Seleccionadas", icon=ui.tags.i(class_="fas fa-cogs"), class_="btn-primary"), ui.input_action_button("analyze_button", "Analizar Columnas Seleccionadas", icon=ui.tags.i(class_="fas fa-cogs"), class_="btn-custom-green"), width=350 ), ui.navset_card_tab( ui.nav_panel( "Datos Procesados", ui.output_data_frame("processed_table_output"), #ui.download_button("download_csv_button", "Descargar CSV Procesado", icon=ui.tags.i(class_="fas fa-download"), class_="btn-success btn-sm mt-2"), ui.download_button("download_csv_button", "Descargar CSV Procesado", icon=ui.tags.i(class_="fas fa-download"), class_="btn-custom-purple btn-sm mt-2"), icon=ui.tags.i(class_="fas fa-table-list") ), ui.nav_panel( "Análisis de Sentimiento", ui.output_ui("sentiment_plot_column_selector_ui"), ui.output_plot("sentiment_chart_output"), icon=ui.tags.i(class_="fa-solid fa-magnifying-glass-chart") ), ui.nav_panel( "Análisis de Emociones", ui.output_ui("emotion_plot_column_selector_ui"), ui.output_plot("emotion_chart_output"), icon=ui.tags.i(class_="fa-solid fa-icons") ) ) ), #theme=shinyswatch.theme.darkly() #theme=shinyswatch.theme.minty() #theme=shinyswatch.theme.spacelab() theme=shinyswatch.theme.morph() ) # Server Logic def server(input, output, session): uploaded_data_rv = reactive.Value(pd.DataFrame()) processed_data_rv = reactive.Value(pd.DataFrame()) # Model instances (reactive values for lazy loading) pysentimiento_sentiment_analyzer_rv = reactive.Value(None) pysentimiento_emotion_analyzer_rv = reactive.Value(None) def _ensure_pysentimiento_sentiment_analyzer(): if pysentimiento_sentiment_analyzer_rv.get() is None: try: print("Cargando modelo de sentimiento Pysentimiento...") analyzer = create_analyzer(task="sentiment", lang="es") pysentimiento_sentiment_analyzer_rv.set(analyzer) print("Modelo de sentimiento Pysentimiento cargado.") return True except Exception as e: print(f"Error al cargar modelo de sentimiento Pysentimiento: {e}") ui.notification_show(f"Error al cargar modelo de sentimiento: {e}", type="error", duration=None) return False return True def _ensure_pysentimiento_emotion_analyzer(): if pysentimiento_emotion_analyzer_rv.get() is None: try: print("Cargando modelo de emociones Pysentimiento...") analyzer = create_analyzer(task="emotion", lang="es") pysentimiento_emotion_analyzer_rv.set(analyzer) print("Modelo de emociones Pysentimiento cargado.") return True except Exception as e: print(f"Error al cargar modelo de emociones Pysentimiento: {e}") ui.notification_show(f"Error al cargar modelo de emociones: {e}", type="error", duration=None) return False return True def run_sentiment_analysis(text): text = str(text) if not text.strip(): return "Neutral" # Handle empty or whitespace-only strings if not _ensure_pysentimiento_sentiment_analyzer(): return "Error: Modelo de Sentimiento no cargado" analyzer = pysentimiento_sentiment_analyzer_rv.get() if analyzer: try: result = analyzer.predict(text) sentiment_map = {"POS": "Positivo", "NEG": "Negativo", "NEU": "Neutral"} return sentiment_map.get(result.output, "Neutral") except Exception as e: print(f"Error en análisis de sentimiento Pysentimiento para '{text[:50]}...': {e}") return "Error en análisis" return "Error: Modelo no disponible" def run_emotion_analysis(text): text = str(text) if not text.strip(): return "Neutral" # Handle empty or whitespace-only strings if not _ensure_pysentimiento_emotion_analyzer(): return "Error: Modelo de Emociones no cargado" analyzer = pysentimiento_emotion_analyzer_rv.get() if analyzer: try: result = analyzer.predict(text) emotion_map_es = { "joy": "Alegría", "sadness": "Tristeza", "anger": "Enojo", "fear": "Miedo", "surprise": "Sorpresa", "disgust": "Asco", "others": "Neutral" # Pysentimiento uses 'others' for neutral-like } return emotion_map_es.get(result.output, "Desconocida") except Exception as e: print(f"Error en análisis de emoción Pysentimiento para '{text[:50]}...': {e}") return "Error en análisis" return "Error: Modelo no disponible" @reactive.Effect @reactive.event(input.file_upload) def handle_file_upload(): file_infos = input.file_upload() if not file_infos: uploaded_data_rv.set(pd.DataFrame()) processed_data_rv.set(pd.DataFrame()) # Clear processed data too return file_info = file_infos[0] # Assuming single file upload try: print(f"Cargando archivo: {file_info['name']}") df = pd.read_csv(file_info["datapath"]) uploaded_data_rv.set(df) processed_data_rv.set(df.copy()) # Initialize processed_data with uploaded data ui.notification_show(f"Archivo '{file_info['name']}' cargado exitosamente.", type="message") print(f"Archivo '{file_info['name']}' cargado. Columnas: {df.columns.tolist()}") except Exception as e: uploaded_data_rv.set(pd.DataFrame()) processed_data_rv.set(pd.DataFrame()) print(f"Error al leer CSV: {e}") ui.notification_show(f"Error al leer el archivo CSV: {e}", type="error", duration=None) @output @render.ui def column_selector_ui(): df = uploaded_data_rv.get() if df.empty: return ui.markdown("_Cargue un archivo CSV para ver las columnas._") # Suggest text-like columns but allow user to choose any # A simple heuristic: select columns of object type or with many unique string values potential_text_cols = [col for col in df.columns if df[col].dtype == 'object'] if not potential_text_cols and not df.empty : # If no object columns, offer all potential_text_cols = df.columns.tolist() return ui.input_checkbox_group( "text_columns_selector", "Seleccionar columnas de texto para analizar:", choices=df.columns.tolist(), selected=potential_text_cols[:1] # Select first potential text col by default ) @reactive.Effect @reactive.event(input.analyze_button) def trigger_analysis(): selected_cols = input.text_columns_selector() original_df = uploaded_data_rv.get() if original_df.empty: ui.notification_show("Por favor, cargue un archivo CSV primero.", type="warning") return if not selected_cols: ui.notification_show("Por favor, seleccione al menos una columna para analizar.", type="warning") return # Ensure models are loaded before starting progress if not _ensure_pysentimiento_sentiment_analyzer() or not _ensure_pysentimiento_emotion_analyzer(): ui.notification_show("No se pudieron cargar los modelos de análisis. Intente de nuevo.", type="error", duration=None) return df_to_process = original_df.copy() with ui.Progress(min=0, max=len(selected_cols) * 2) as p: p.set(message="Analizando texto...", detail="Iniciando...") current_step = 0 for col_name in selected_cols: if col_name not in df_to_process.columns: print(f"Advertencia: La columna '{col_name}' no se encontró en el DataFrame cargado.") continue sentiment_col_name = f"{col_name}_sentimiento" emotion_col_name = f"{col_name}_emocion" p.set(current_step, detail=f"Analizando sentimiento para '{col_name}'...") df_to_process[sentiment_col_name] = df_to_process[col_name].apply(run_sentiment_analysis) current_step += 1 p.set(current_step, detail=f"Analizando emoción para '{col_name}'...") df_to_process[emotion_col_name] = df_to_process[col_name].apply(run_emotion_analysis) current_step += 1 processed_data_rv.set(df_to_process) ui.notification_show("Análisis completado.", type="message") @output @render.data_frame def processed_table_output(): df = processed_data_rv.get() if df.empty and not uploaded_data_rv.get().empty: # If processed is empty but uploaded is not (before analysis) return render.DataGrid(uploaded_data_rv.get(), height="500px", width="100%") if df.empty: return render.DataGrid(pd.DataFrame({"Mensaje": ["Cargue un archivo y presione 'Analizar' para ver los resultados."]}), height="100px") return render.DataGrid(df, height="500px", width="100%") # --- Plotting UI and Logic --- def get_analysis_columns(df, suffix): if df.empty: return [] return [col for col in df.columns if col.endswith(suffix)] @output @render.ui def sentiment_plot_column_selector_ui(): df = processed_data_rv.get() sentiment_cols = get_analysis_columns(df, "_sentimiento") if not sentiment_cols: return ui.markdown("_No hay columnas de sentimiento analizadas para graficar._") return ui.input_select("selected_sentiment_col_for_plot", "Graficar sentimiento de:", choices=sentiment_cols, selected=sentiment_cols[0]) @output @render.ui def emotion_plot_column_selector_ui(): df = processed_data_rv.get() emotion_cols = get_analysis_columns(df, "_emocion") if not emotion_cols: return ui.markdown("_No hay columnas de emoción analizadas para graficar._") return ui.input_select("selected_emotion_col_for_plot", "Graficar emoción de:", choices=emotion_cols, selected=emotion_cols[0]) def plot_sentiment_distribution_seaborn(df_input: pd.DataFrame, sentiment_column_name: str): if df_input.empty or sentiment_column_name not in df_input.columns: fig, ax = plt.subplots(figsize=(7, 5)) ax.text(0.5, 0.5, 'Sin datos de sentimiento para graficar.',va='center', fontsize=12, color='white') fig.patch.set_facecolor('#222222'); ax.set_facecolor('#222222') ax.set_xticks([]); ax.set_yticks([]) return fig plt.style.use('seaborn-v0_8-darkgrid') sentiment_categories = ['Positivo', 'Neutral', 'Negativo', 'Error en análisis'] df_input[sentiment_column_name] = pd.Categorical(df_input[sentiment_column_name], categories=sentiment_categories, ordered=True) sentiment_counts = df_input[sentiment_column_name].value_counts().reindex(sentiment_categories, fill_value=0) palette = {"Positivo": "#2ECC71", "Neutral": "#F1C40F", "Negativo": "#E74C3C", "Error en análisis": "#7f8c8d"} bar_colors = [palette.get(s, '#cccccc') for s in sentiment_counts.index] fig, ax = plt.subplots(figsize=(7, 5)) sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette=bar_colors, ax=ax, width=0.6) ax.set_title(f'Análisis de Sentimiento ({sentiment_column_name.replace("_sentimiento", "")})', fontsize=16, pad=20) ax.set_xlabel('Sentimiento', fontsize=14, labelpad=15) ax.set_ylabel('Número de Registros', fontsize=14, labelpad=15) ax.tick_params(axis='both', which='major', labelsize=12) total_sentiments = sentiment_counts.sum() if total_sentiments > 0: for i, count in enumerate(sentiment_counts.values): if count > 0: percentage = (count / total_sentiments) * 100 annotation_text = f"{count} ({percentage:.1f}%)" ax.text(i, count + (sentiment_counts.max() * 0.015 if sentiment_counts.max() > 0 else 0.15), annotation_text, ha='center', va='bottom', fontsize=9, color='black') # Changed to black for better visibility on light bars plt.tight_layout() return fig def plot_emotion_distribution_seaborn(df_input: pd.DataFrame, emotion_column_name: str): if df_input.empty or emotion_column_name not in df_input.columns: fig, ax = plt.subplots(figsize=(7, 5)) ax.text(0.5, 0.5, 'Sin datos de emoción para graficar.', va='center', fontsize=12, color='white') fig.patch.set_facecolor('#222222'); ax.set_facecolor('#222222') ax.set_xticks([]); ax.set_yticks([]) return fig plt.style.use('seaborn-v0_8-darkgrid') emotion_categories_es = ["Alegría", "Tristeza", "Enojo", "Miedo", "Sorpresa", "Asco", "Neutral", "Desconocida", "Error en análisis"] df_input[emotion_column_name] = pd.Categorical(df_input[emotion_column_name], categories=emotion_categories_es, ordered=True) emotion_counts = df_input[emotion_column_name].value_counts().reindex(emotion_categories_es, fill_value=0) emotion_palette_es = { "Alegría": "#4CAF50", "Tristeza": "#2196F3", "Enojo": "#F44336", "Miedo": "#9C27B0", "Sorpresa": "#FFC107", "Asco": "#795548", "Neutral": "#9E9E9E", "Desconocida": "#607D8B", "Error en análisis": "#BDBDBD" } bar_colors = [emotion_palette_es.get(cat, '#cccccc') for cat in emotion_counts.index] fig, ax = plt.subplots(figsize=(8, 6)) # Slightly larger for more categories sns.barplot(x=emotion_counts.index, y=emotion_counts.values, palette=bar_colors, ax=ax, width=0.7) ax.set_title(f'Análisis de Emociones ({emotion_column_name.replace("_emocion", "")})', fontsize=16, pad=20) ax.set_xlabel('Emoción', fontsize=14, labelpad=15) ax.set_ylabel('Número de Registros', fontsize=14, labelpad=15) ax.tick_params(axis='both', which='major', labelsize=12) plt.xticks(rotation=45, ha="right") total_emotions = emotion_counts.sum() if total_emotions > 0: for i, count in enumerate(emotion_counts.values): if count > 0: percentage = (count / total_emotions) * 100 annotation_text = f"{count}\n({percentage:.1f}%)" # Newline for better fit ax.text(i, count + (emotion_counts.max() * 0.015 if emotion_counts.max() > 0 else 0.15), annotation_text, ha='center', va='bottom', fontsize=8, color='black') plt.tight_layout() return fig @output @render.plot def sentiment_chart_output(): df = processed_data_rv.get() selected_col = input.selected_sentiment_col_for_plot() if df.empty or not selected_col: # Return a placeholder plot if no data or no column selected fig, ax = plt.subplots(figsize=(7,5)) ax.text(0.5, 0.5, "Cargue datos y seleccione una columna de sentimiento para ver el gráfico.", ha="center", va="center", fontsize=12, color="white", wrap=True) fig.patch.set_facecolor('#222222'); ax.set_facecolor('#222222') ax.set_xticks([]); ax.set_yticks([]) return fig return plot_sentiment_distribution_seaborn(df.copy(), selected_col) @output @render.plot def emotion_chart_output(): df = processed_data_rv.get() selected_col = input.selected_emotion_col_for_plot() if df.empty or not selected_col: fig, ax = plt.subplots(figsize=(7,5)) ax.text(0.5, 0.5, "Cargue datos y seleccione una columna de emoción para ver el gráfico.", ha="center", va="center", fontsize=12, color="white", wrap=True) fig.patch.set_facecolor('#222222'); ax.set_facecolor('#222222') ax.set_xticks([]); ax.set_yticks([]) return fig return plot_emotion_distribution_seaborn(df.copy(), selected_col) @render.download(filename="datos_analizados.csv") async def download_csv_button(): df_to_download = processed_data_rv.get() if df_to_download.empty: yield "No hay datos para descargar." return # Use a BytesIO buffer to hold the CSV data with io.BytesIO() as buf: df_to_download.to_csv(buf, index=False, encoding='utf-8-sig') buf.seek(0) # Rewind buffer to the beginning yield buf.read() app = App(app_ui, server) if __name__ == "__main__": # Ensure models are pre-loaded if running directly for faster first use (optional) # server_instance = server # This is not how Shiny server works for preloading in __main__ # Instead, you might call the _ensure functions if you had a way to pass reactive context, # but it's generally fine for them to load on first actual use. app.run()