import os import pandas as pd import numpy as np import dash from dash import Dash, html, dcc, Input, Output, State, dash_table import dash_bootstrap_components as dbc import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.tree import DecisionTreeClassifier, export_text, plot_tree from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.metrics import accuracy_score, classification_report, confusion_matrix import matplotlib.pyplot as plt import io import base64 import warnings warnings.filterwarnings('ignore') DATA_PATH = os.environ.get( "GLDAS_CSV", "ecuador_gldas_enriquecido_solo_tierra.csv" ) df = pd.read_csv(DATA_PATH, encoding="utf-8-sig") df["time"] = pd.to_datetime(df["time"]) df["year"] = df["time"].dt.year df["month"] = df["time"].dt.month # ESTACIONES CORREGIDAS PARA ECUADOR (solo 2 estaciones) # Verano seco: junio-noviembre, Invierno lluvioso: diciembre-mayo df["season"] = df["month"].map({ 12: "Invierno", 1: "Invierno", 2: "Invierno", 3: "Invierno", 4: "Invierno", 5: "Invierno", 6: "Verano", 7: "Verano", 8: "Verano", 9: "Verano", 10: "Verano", 11: "Verano" }) req = {"lat", "lon", "provincia", "canton", "time"} faltan = req - set(df.columns) if faltan: raise ValueError(f"Faltan columnas en el CSV: {faltan}") CAND_VARS = [ "SoilMoi0_10cm_inst", "SoilMoi10_40cm_inst", "RootMoist_inst", "SoilTMP0_10cm_inst", "Tair_f_inst", "Rainf_tavg", "Evap_tavg", "SWdown_f_tavg", "LWdown_f_tavg", "Qs_acc", "Qsb_acc", "Qsm_acc", "CanopInt_inst", ] NUM_VARS = [v for v in CAND_VARS if v in df.columns and pd.api.types.is_numeric_dtype(df[v])] LABELS = { "SoilMoi0_10cm_inst": "Humedad Suelo Superficial (0-10cm)", "SoilMoi10_40cm_inst": "Humedad Suelo Profundo (10-40cm)", "RootMoist_inst": "Humedad Zona Radicular", "SoilTMP0_10cm_inst": "Temperatura del Suelo", "Tair_f_inst": "Temperatura del Aire", "Rainf_tavg": "Precipitación Media", "Evap_tavg": "Evapotranspiración", "SWdown_f_tavg": "Radiación Solar Incidente", "LWdown_f_tavg": "Radiación Térmica Incidente", "Qs_acc": "Escorrentía Superficial", "Qsb_acc": "Escorrentía Subsuperficial", "Qsm_acc": "Derretimiento de Nieve", "CanopInt_inst": "Intercepción del Dosel", } UNITS = { "SoilMoi0_10cm_inst": "kg/m²", "SoilMoi10_40cm_inst": "kg/m²", "RootMoist_inst": "kg/m²", "SoilTMP0_10cm_inst": "K", "Tair_f_inst": "K", "Rainf_tavg": "kg·m⁻²·s⁻¹", "Evap_tavg": "kg·m⁻²·s⁻¹", "SWdown_f_tavg": "W·m⁻²", "LWdown_f_tavg": "W·m⁻²", "Qs_acc": "kg/m²", "Qsb_acc": "kg/m²", "Qsm_acc": "kg/m²", "CanopInt_inst": "kg/m²", } PROVINCIAS = sorted([p for p in df["provincia"].dropna().unique().tolist()]) CANTONES_POR_PROV = { p: sorted(df.loc[df["provincia"] == p, "canton"].dropna().unique().tolist()) for p in PROVINCIAS } YEARS = sorted(df["year"].unique().tolist()) SEASONS = ["Verano", "Invierno"] # Solo 2 estaciones para Ecuador MIN_DATE = df["time"].min().date() MAX_DATE = df["time"].max().date() app = Dash( __name__, external_stylesheets=[ dbc.themes.BOOTSTRAP, "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" ], suppress_callback_exceptions=True ) server = app.server # -------------------------------------------------- # Estilos personalizados # -------------------------------------------------- CARD_STYLE = { "box-shadow": "0 4px 6px rgba(0, 0, 0, 0.1)", "border": "none", "border-radius": "10px", "margin-bottom": "20px" } HEADER_STYLE = { "background": "linear-gradient(135deg, #667eea 0%, #764ba2 100%)", "color": "white", "padding": "30px 0", "margin-bottom": "30px", "border-radius": "0 0 20px 20px" } # -------------------------------------------------- # Layout principal # -------------------------------------------------- app.layout = dbc.Container([ # Header mejorado html.Div([ html.H1([ html.I(className="fas fa-seedling me-3"), "AGRO-LEO ECUADOR" ], className="text-center mb-2", style={"font-weight": "bold"}), html.P("Sistema de Monitoreo Agroclimático basado en datos GLDAS Noah 0.25° (2020-2024)", className="text-center mb-0", style={"opacity": "0.9"}) ], style=HEADER_STYLE), # Controles principales dbc.Card([ dbc.CardBody([ dbc.Row([ dbc.Col([ dbc.Label([html.I(className="fas fa-chart-line me-2"), "Variable Agroclimática"], className="fw-bold"), dcc.Dropdown( id="sel-var", options=[{"label": LABELS.get(v, v), "value": v} for v in NUM_VARS], value=NUM_VARS[0] if NUM_VARS else None, clearable=False, style={"border-radius": "8px"} ), ], md=3), dbc.Col([ dbc.Label([html.I(className="fas fa-calendar me-2"), "Período de Análisis"], className="fw-bold"), dcc.Dropdown( id="sel-years", options=[{"label": f"Año {y}", "value": y} for y in YEARS], value=YEARS, multi=True ), ], md=3), dbc.Col([ dbc.Label([html.I(className="fas fa-map-marker-alt me-2"), "Región"], className="fw-bold"), dcc.Dropdown( id="sel-provinces", options=[{"label": p, "value": p} for p in PROVINCIAS], value=[], multi=True, placeholder="Todas las provincias" ), ], md=3), dbc.Col([ dbc.Label([html.I(className="fas fa-leaf me-2"), "Estación"], className="fw-bold"), dcc.Dropdown( id="sel-season", options=[{"label": s, "value": s} for s in SEASONS], value=[], multi=True, placeholder="Ambas estaciones" ), ], md=3), ], className="g-3"), html.Hr(style={"margin": "20px 0"}), dbc.Row([ dbc.Col([ dbc.Label("Cantones (opcional)"), dcc.Dropdown(id="sel-cantons", options=[], value=[], multi=True, placeholder="Seleccionar cantones"), ], md=6), dbc.Col([ dbc.Label("Rango de Fechas"), dcc.DatePickerRange( id="sel-dates", start_date=str(MIN_DATE), end_date=str(MAX_DATE), display_format="DD/MM/YYYY" ), ], md=6), ], className="g-3"), ]) ], style=CARD_STYLE), # KPIs principales html.Div(id="main-kpis"), # Tabs mejoradas dbc.Card([ dbc.CardBody([ dcc.Tabs( id="main-tabs", value="dashboard", children=[ dcc.Tab( label="📊 Dashboard Principal", value="dashboard", className="custom-tab", selected_className="custom-tab--selected" ), dcc.Tab( label="🗺️ Análisis Espacial", value="spatial", className="custom-tab", selected_className="custom-tab--selected" ), dcc.Tab( label="📈 Series Temporales", value="temporal", className="custom-tab", selected_className="custom-tab--selected" ), dcc.Tab( label="🌳 Árbol de Clasificación", value="tree_classification", className="custom-tab", selected_className="custom-tab--selected" ), dcc.Tab( label="📋 Estadísticas Detalladas", value="statistics", className="custom-tab", selected_className="custom-tab--selected" ), ], style={"margin-bottom": "20px"} ), html.Div(id="tab-content") ]) ], style=CARD_STYLE), # Footer html.Hr(style={"margin-top": "40px"}), html.Div([ html.P([ "© 2025 AGRO-LEO Ecuador • ", html.A("Datos GLDAS", href="https://ldas.gsfc.nasa.gov/gldas", target="_blank"), " • Desarrollado para agricultura de precisión" ], className="text-center text-muted mb-0") ], style={"padding": "20px 0"}) ], fluid=True, style={"background-color": "#f8f9fa", "min-height": "100vh", "padding": "0"}) def filtrar_datos(df_orig, var, years, provinces, cantons, season, dates): """Filtra el dataframe según los parámetros seleccionados""" df_f = df_orig.copy() if years: df_f = df_f[df_f["year"].isin(years)] if provinces: df_f = df_f[df_f["provincia"].isin(provinces)] if cantons: df_f = df_f[df_f["canton"].isin(cantons)] if season: df_f = df_f[df_f["season"].isin(season)] if dates and dates[0] and dates[1]: df_f = df_f[(df_f["time"] >= pd.to_datetime(dates[0])) & (df_f["time"] <= pd.to_datetime(dates[1]))] return df_f def crear_kpis_principales(df_f, var): """Crea las tarjetas de KPIs principales""" if df_f.empty or var not in df_f.columns: return html.Div() total_registros = len(df_f) fechas_unicas = df_f["time"].nunique() ubicaciones = df_f[["lat", "lon"]].drop_duplicates().shape[0] na_percent = df_f[var].isna().mean() * 100 var_data = df_f[var].dropna() if len(var_data) > 0: promedio = var_data.mean() minimo = var_data.min() maximo = var_data.max() desv_std = var_data.std() else: promedio = minimo = maximo = desv_std = 0 kpis = dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-database fa-2x text-primary mb-2"), html.H4(f"{total_registros:,}", className="mb-1"), html.P("Registros Totales", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-calendar fa-2x text-success mb-2"), html.H4(f"{fechas_unicas}", className="mb-1"), html.P("Fechas Únicas", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-map-pin fa-2x text-info mb-2"), html.H4(f"{ubicaciones}", className="mb-1"), html.P("Ubicaciones", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-chart-bar fa-2x text-warning mb-2"), html.H4(f"{promedio:.2f}", className="mb-1"), html.P(f"Promedio ({UNITS.get(var, '')})", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-arrows-alt-v fa-2x text-danger mb-2"), html.H4(f"{minimo:.1f} - {maximo:.1f}", className="mb-1"), html.P("Rango Min-Max", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), dbc.Col([ dbc.Card([ dbc.CardBody([ html.Div([ html.I(className="fas fa-exclamation-triangle fa-2x text-secondary mb-2"), html.H4(f"{na_percent:.1f}%", className="mb-1"), html.P("Datos Faltantes", className="text-muted mb-0 small") ], className="text-center") ]) ], style=CARD_STYLE) ], md=2), ], className="g-3 mb-4") return kpis def render_dashboard(df_f, var): """Dashboard principal con múltiples visualizaciones""" fig_dist = px.histogram( df_f, x=var, nbins=30, title=f"Distribución de {LABELS.get(var, var)}", labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} ) fig_dist.update_layout(showlegend=False) df_time = df_f.groupby('time')[var].agg(['mean', 'std']).reset_index() fig_time = go.Figure() fig_time.add_trace(go.Scatter( x=df_time['time'], y=df_time['mean'], mode='lines', name='Promedio', line=dict(color='#1f77b4', width=2) )) fig_time.add_trace(go.Scatter( x=df_time['time'], y=df_time['mean'] + df_time['std'], mode='lines', name='+ 1 Desv. Std', line=dict(width=0), showlegend=False, hoverinfo='skip' )) fig_time.add_trace(go.Scatter( x=df_time['time'], y=df_time['mean'] - df_time['std'], mode='lines', name='- 1 Desv. Std', line=dict(width=0), fillcolor='rgba(31, 119, 180, 0.2)', fill='tonexty', showlegend=False, hoverinfo='skip' )) fig_time.update_layout( title=f"Evolución Temporal de {LABELS.get(var, var)}", xaxis_title="Fecha", yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})" ) if 'provincia' in df_f.columns: df_prov = df_f.groupby('provincia')[var].agg(['mean', 'count']).reset_index() df_prov = df_prov.sort_values('mean', ascending=True) fig_prov = px.bar( df_prov, x='mean', y='provincia', title=f"Promedio por Provincia - {LABELS.get(var, var)}", labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'}, orientation='h' ) fig_prov.update_layout(height=400) else: fig_prov = go.Figure() # ESTACIONES CORREGIDAS PARA ECUADOR if 'season' in df_f.columns: df_season = df_f.groupby('season')[var].agg(['mean', 'std']).reset_index() fig_season = px.bar( df_season, x='season', y='mean', error_y='std', title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}", labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, color='season', color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} ) fig_season.update_layout( annotations=[ dict(text="Verano: Junio-Noviembre (seco)", xref="paper", yref="paper", x=0.02, y=0.98, showarrow=False, font_size=10), dict(text="Invierno: Diciembre-Mayo (lluvioso)", xref="paper", yref="paper", x=0.02, y=0.93, showarrow=False, font_size=10) ] ) else: fig_season = go.Figure() return dbc.Row([ dbc.Col([ dcc.Graph(figure=fig_dist, config={"displayModeBar": True}) ], md=6), dbc.Col([ dcc.Graph(figure=fig_time, config={"displayModeBar": True}) ], md=6), dbc.Col([ dcc.Graph(figure=fig_prov, config={"displayModeBar": True}) ], md=6), dbc.Col([ dcc.Graph(figure=fig_season, config={"displayModeBar": True}) ], md=6), ], className="g-3") def render_spatial_analysis(df_f, var): """Análisis espacial con mapa interactivo""" fig_map = px.scatter_mapbox( df_f.sample(min(len(df_f), 10000)) if len(df_f) > 10000 else df_f, lat="lat", lon="lon", color=var, hover_data={"provincia": True, "canton": True, "time": True, var: ":.3f"}, zoom=5.2, height=600, color_continuous_scale="Viridis", title=f"Distribución Espacial - {LABELS.get(var, var)}" ) fig_map.update_traces(marker={"size": 6}) fig_map.update_layout(mapbox_style="open-street-map") return dbc.Row([ dbc.Col([ dcc.Graph(figure=fig_map, config={"displayModeBar": True}) ], md=12), ], className="g-3") def render_temporal_analysis_simple(df_f, var): """Series temporales simplificada con selector de año""" # Control de año year_selector = dbc.Card([ dbc.CardBody([ dbc.Row([ dbc.Col([ dbc.Label("Seleccionar Año para Análisis:"), dcc.Dropdown( id="temporal-year-selector", options=[{"label": f"Año {y}", "value": y} for y in sorted(df_f['year'].unique())], value=sorted(df_f['year'].unique())[-1], # Último año por defecto clearable=False ) ], md=4), dbc.Col([ html.Div(id="temporal-year-info", className="mt-3") ], md=8) ]) ]) ], className="mb-4") return html.Div([ year_selector, html.Div(id="temporal-analysis-content") ]) def create_variable_classes(df_f, var): """Crea clases para la variable basada en cuartiles""" var_data = df_f[var].dropna() # Crear clases basadas en cuartiles q25 = var_data.quantile(0.25) q50 = var_data.quantile(0.50) q75 = var_data.quantile(0.75) def classify_value(value): if pd.isna(value): return 'Sin datos' elif value <= q25: return 'Bajo' elif value <= q50: return 'Medio-Bajo' elif value <= q75: return 'Medio-Alto' else: return 'Alto' return df_f[var].apply(classify_value), {'q25': q25, 'q50': q50, 'q75': q75} def render_tree_classification(df_f, var): """Pestaña dedicada solo al árbol de clasificación""" try: # Crear clases de la variable objetivo df_tree = df_f.copy() df_tree['target_class'], quartiles = create_variable_classes(df_tree, var) # Seleccionar características para el modelo numeric_vars = [v for v in NUM_VARS if v != var and v in df_tree.columns] if len(numeric_vars) < 2: return dbc.Alert("No hay suficientes variables para crear el árbol de clasificación.", color="warning") # Preparar características df_tree['month_sin'] = np.sin(2 * np.pi * df_tree['month'] / 12) df_tree['month_cos'] = np.cos(2 * np.pi * df_tree['month'] / 12) # Encoding de variables categóricas le_prov = LabelEncoder() df_tree['provincia_encoded'] = le_prov.fit_transform(df_tree['provincia'].astype(str)) le_season = LabelEncoder() df_tree['season_encoded'] = le_season.fit_transform(df_tree['season'].astype(str)) # Seleccionar características finales feature_cols = numeric_vars[:6] + ['month_sin', 'month_cos', 'provincia_encoded', 'season_encoded'] feature_cols = [col for col in feature_cols if col in df_tree.columns] # Crear nombres legibles para las características feature_names_readable = [] for col in feature_cols: if col in LABELS: feature_names_readable.append(LABELS[col]) elif col == 'provincia_encoded': feature_names_readable.append('Provincia') elif col == 'season_encoded': feature_names_readable.append('Estación') elif col == 'month_sin': feature_names_readable.append('Mes (Sin)') elif col == 'month_cos': feature_names_readable.append('Mes (Cos)') else: feature_names_readable.append(col) # Preparar datos X = df_tree[feature_cols].fillna(df_tree[feature_cols].median()) y = df_tree['target_class'] # Filtrar datos válidos valid_mask = y != 'Sin datos' X = X[valid_mask] y = y[valid_mask] if len(X) < 100: return dbc.Alert("No hay suficientes datos válidos para entrenar el árbol.", color="warning") # Dividir datos X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Entrenar árbol de clasificación tree_model = DecisionTreeClassifier( max_depth=5, min_samples_split=50, min_samples_leaf=20, random_state=42 ) tree_model.fit(X_train, y_train) # Predicciones y_pred = tree_model.predict(X_test) # Crear visualización del árbol plt.figure(figsize=(25, 15)) plot_tree( tree_model, feature_names=feature_names_readable, class_names=['Alto', 'Bajo', 'Medio-Alto', 'Medio-Bajo'], filled=True, rounded=True, fontsize=12, proportion=True, impurity=True ) plt.title(f"Árbol de Clasificación - {LABELS.get(var, var)}", fontsize=20, fontweight='bold', pad=20) # Guardar como imagen base64 buffer = io.BytesIO() plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight') buffer.seek(0) tree_plot_url = base64.b64encode(buffer.getvalue()).decode() plt.close() # Métricas del modelo accuracy = accuracy_score(y_test, y_pred) # Matriz de confusión cm = confusion_matrix(y_test, y_pred, labels=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto']) # Crear gráfico de matriz de confusión fig_cm = px.imshow( cm, labels=dict(x="Predicho", y="Real", color="Frecuencia"), x=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'], y=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'], title="Matriz de Confusión", color_continuous_scale="Blues", text_auto=True ) # Importancia de características importance_df = pd.DataFrame({ 'Variable': feature_names_readable, 'Importancia': tree_model.feature_importances_ }).sort_values('Importancia', ascending=True) fig_importance = px.bar( importance_df.tail(10), x='Importancia', y='Variable', title="Importancia de Variables en el Árbol", orientation='h', color='Importancia', color_continuous_scale='Viridis' ) # Información de las clases class_info = dbc.Card([ dbc.CardHeader("Información de las Clases"), dbc.CardBody([ html.P(f"Bajo: ≤ {quartiles['q25']:.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Medio-Bajo: {quartiles['q25']:.3f} - {quartiles['q50']:.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Medio-Alto: {quartiles['q50']:.3f} - {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Alto: > {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-0"), ]) ]) # Métricas del modelo metrics_card = dbc.Card([ dbc.CardHeader("Métricas del Modelo"), dbc.CardBody([ html.H4(f"Precisión: {accuracy:.3f}", className="text-primary mb-3"), html.P(f"Datos de entrenamiento: {len(X_train):,}", className="mb-2"), html.P(f"Datos de prueba: {len(X_test):,}", className="mb-2"), html.P(f"Profundidad del árbol: {tree_model.get_depth()}", className="mb-0"), ]) ]) return dbc.Row([ # Información y métricas dbc.Col([ class_info, html.Br(), metrics_card ], md=3), # Visualización del árbol dbc.Col([ dbc.Card([ dbc.CardHeader("Visualización del Árbol de Clasificación"), dbc.CardBody([ html.Img(src=f"data:image/png;base64,{tree_plot_url}", style={"width": "100%", "height": "auto"}), html.Hr(), dbc.Alert([ html.H6("Interpretación:", className="mb-2"), html.P("• Cada nodo muestra la condición de división y las muestras", className="mb-1"), html.P("• Los colores representan las diferentes clases", className="mb-1"), html.P("• Las hojas muestran la clasificación final", className="mb-0"), ], color="info") ]) ]) ], md=9), # Matriz de confusión dbc.Col([ dcc.Graph(figure=fig_cm, config={"displayModeBar": True}) ], md=6), # Importancia de variables dbc.Col([ dcc.Graph(figure=fig_importance, config={"displayModeBar": True}) ], md=6) ], className="g-3") except Exception as e: return dbc.Alert(f"Error al crear el árbol de clasificación: {str(e)}", color="danger") def render_detailed_statistics(df_f, var): """Estadísticas detalladas mejoradas""" var_data = df_f[var].dropna() if len(var_data) == 0: return dbc.Alert("No hay datos disponibles para análisis estadístico.", color="warning") # Estadísticas básicas stats = { 'Media': var_data.mean(), 'Mediana': var_data.median(), 'Moda': var_data.mode().iloc[0] if not var_data.mode().empty else var_data.median(), 'Desviación Estándar': var_data.std(), 'Varianza': var_data.var(), 'Mínimo': var_data.min(), 'Máximo': var_data.max(), 'Rango': var_data.max() - var_data.min(), 'Q1 (Percentil 25)': var_data.quantile(0.25), 'Q3 (Percentil 75)': var_data.quantile(0.75), 'IQR': var_data.quantile(0.75) - var_data.quantile(0.25), 'Coef. Variación %': (var_data.std() / var_data.mean()) * 100 if var_data.mean() != 0 else 0 } stats_df = pd.DataFrame([ {'Estadística': k, 'Valor': f"{v:.3f}", 'Unidad': UNITS.get(var, '')} for k, v in stats.items() ]) # Estadísticas por provincia if 'provincia' in df_f.columns: prov_stats = df_f.groupby('provincia')[var].agg([ 'count', 'mean', 'median', 'std', 'min', 'max' ]).round(3).reset_index() prov_stats.columns = ['Provincia', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo'] else: prov_stats = pd.DataFrame() # Estadísticas por estación (Ecuador: Verano/Invierno) season_stats = df_f.groupby('season')[var].agg([ 'count', 'mean', 'median', 'std', 'min', 'max' ]).round(3).reset_index() season_stats.columns = ['Estación', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo'] season_stats['Período'] = season_stats['Estación'].map({ 'Verano': 'Jun-Nov (Seco)', 'Invierno': 'Dic-May (Lluvioso)' }) # Gráficos fig_box = px.box( df_f, x='provincia', y=var, title=f"Distribución por Provincia - {LABELS.get(var, var)}", labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'} ) fig_box.update_xaxes(tickangle=45) # Distribución vs normal fig_hist_normal = go.Figure() fig_hist_normal.add_trace(go.Histogram( x=var_data, nbinsx=40, name='Datos Observados', marker_color='lightblue', opacity=0.7, histnorm='probability density' )) # Curva normal teórica x_norm = np.linspace(var_data.min(), var_data.max(), 100) from scipy import stats as scipy_stats y_norm = scipy_stats.norm.pdf(x_norm, var_data.mean(), var_data.std()) fig_hist_normal.add_trace(go.Scatter( x=x_norm, y=y_norm, mode='lines', name='Distribución Normal Teórica', line=dict(color='red', width=3) )) fig_hist_normal.update_layout( title=f"Distribución vs Normal Teórica - {LABELS.get(var, var)}", xaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", yaxis_title="Densidad de Probabilidad" ) # Correlaciones numeric_cols = [col for col in NUM_VARS if col in df_f.columns and col != var] if len(numeric_cols) > 1: corr_data = df_f[[var] + numeric_cols[:8]].corr()[var].drop(var) corr_df = pd.DataFrame({ 'Variable': [LABELS.get(col, col) for col in corr_data.index], 'Correlación': corr_data.values }).sort_values('Correlación', key=abs, ascending=False) fig_corr = px.bar( corr_df.head(8), x='Correlación', y='Variable', title=f"Correlaciones más Fuertes con {LABELS.get(var, var)}", orientation='h', color='Correlación', color_continuous_scale='RdBu_r', range_color=[-1, 1] ) else: fig_corr = go.Figure() # Variación estacional específica para Ecuador fig_seasonal = px.box( df_f, x='season', y=var, color='season', title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}", labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} ) fig_seasonal.update_layout( annotations=[ dict(text="Verano: Junio-Noviembre (época seca)", xref="paper", yref="paper", x=0.02, y=0.98, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)"), dict(text="Invierno: Diciembre-Mayo (época lluviosa)", xref="paper", yref="paper", x=0.02, y=0.93, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)") ] ) return dbc.Row([ # Estadísticas básicas dbc.Col([ dbc.Card([ dbc.CardHeader([ html.I(className="fas fa-chart-pie me-2"), f"Estadísticas Descriptivas - {LABELS.get(var, var)}" ]), dbc.CardBody([ dash_table.DataTable( data=stats_df.to_dict('records'), columns=[{"name": i, "id": i} for i in stats_df.columns], style_cell={'textAlign': 'left', 'fontSize': '14px'}, style_header={'backgroundColor': '#f8f9fa', 'fontWeight': 'bold'}, style_data_conditional=[ { 'if': {'row_index': 'odd'}, 'backgroundColor': '#f8f9fa' } ] ) ]) ], style=CARD_STYLE) ], md=6), # Distribución vs normal dbc.Col([ dcc.Graph(figure=fig_hist_normal, config={"displayModeBar": True}) ], md=6), # Box plot por provincia dbc.Col([ dcc.Graph(figure=fig_box, config={"displayModeBar": True}) ], md=6), # Correlaciones dbc.Col([ dcc.Graph(figure=fig_corr, config={"displayModeBar": True}) ], md=6), # Variación estacional dbc.Col([ dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True}) ], md=12), # Estadísticas por estación dbc.Col([ dbc.Card([ dbc.CardHeader([ html.I(className="fas fa-leaf me-2"), "Estadísticas por Estación Climática (Ecuador)" ]), dbc.CardBody([ dash_table.DataTable( data=season_stats.to_dict('records') if not season_stats.empty else [], columns=[{"name": i, "id": i} for i in season_stats.columns] if not season_stats.empty else [], style_cell={'textAlign': 'center', 'fontSize': '12px'}, style_header={'backgroundColor': '#e8f5e8', 'fontWeight': 'bold'}, style_data_conditional=[ { 'if': {'filter_query': '{Estación} = Verano'}, 'backgroundColor': '#fff3cd', }, { 'if': {'filter_query': '{Estación} = Invierno'}, 'backgroundColor': '#d1ecf1', } ] ) if not season_stats.empty else html.P("No hay datos estacionales disponibles", className="text-muted") ]) ], style=CARD_STYLE) ], md=6), # Estadísticas por provincia dbc.Col([ dbc.Card([ dbc.CardHeader([ html.I(className="fas fa-map me-2"), "Estadísticas por Provincia" ]), dbc.CardBody([ dash_table.DataTable( data=prov_stats.to_dict('records') if not prov_stats.empty else [], columns=[{"name": i, "id": i} for i in prov_stats.columns] if not prov_stats.empty else [], style_cell={'textAlign': 'center', 'fontSize': '11px'}, style_header={'backgroundColor': '#e3f2fd', 'fontWeight': 'bold'}, style_data_conditional=[ { 'if': {'row_index': 'odd'}, 'backgroundColor': '#f8f9fa' } ], page_size=10, sort_action="native" ) if not prov_stats.empty else html.P("No hay datos provinciales disponibles", className="text-muted") ]) ], style=CARD_STYLE) ], md=6), ], className="g-3") # Callbacks @app.callback( Output("sel-cantons", "options"), Output("sel-cantons", "value"), Input("sel-provinces", "value"), ) def actualizar_cantones(provinces): if not provinces: todos = sorted(df["canton"].dropna().unique().tolist()) return [{"label": c, "value": c} for c in todos], [] cantones = sorted(df.loc[df["provincia"].isin(provinces), "canton"].dropna().unique().tolist()) return [{"label": c, "value": c} for c in cantones], [] @app.callback( Output("main-kpis", "children"), [Input("sel-var", "value"), Input("sel-years", "value"), Input("sel-provinces", "value"), Input("sel-cantons", "value"), Input("sel-season", "value"), Input("sel-dates", "start_date"), Input("sel-dates", "end_date")] ) def actualizar_kpis(var, years, provinces, cantons, season, start_date, end_date): if not var: return html.Div() years = years or YEARS df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date)) return crear_kpis_principales(df_f, var) # Callback para análisis temporal por año @app.callback( [Output("temporal-analysis-content", "children"), Output("temporal-year-info", "children")], [Input("temporal-year-selector", "value")], [State("sel-var", "value"), State("sel-provinces", "value"), State("sel-cantons", "value"), State("sel-season", "value")] ) def update_temporal_analysis(selected_year, var, provinces, cantons, season): if not selected_year or not var: return html.Div(), html.Div() # Filtrar datos para el año seleccionado df_year = df[df['year'] == selected_year].copy() # Aplicar filtros adicionales if provinces: df_year = df_year[df_year["provincia"].isin(provinces)] if cantons: df_year = df_year[df_year["canton"].isin(cantons)] if season: df_year = df_year[df_year["season"].isin(season)] if df_year.empty: return dbc.Alert(f"No hay datos para el año {selected_year} con los filtros seleccionados.", color="warning"), html.Div() # Información del año year_info = dbc.Alert([ html.H6(f"Análisis del Año {selected_year}", className="mb-2"), html.P(f"Registros encontrados: {len(df_year):,}", className="mb-1"), html.P(f"Rango de fechas: {df_year['time'].min().strftime('%d/%m/%Y')} - {df_year['time'].max().strftime('%d/%m/%Y')}", className="mb-0") ], color="info") # Gráficos del año seleccionado # 1. Serie temporal mensual df_monthly = df_year.groupby(df_year['time'].dt.month)[var].agg(['mean', 'std', 'count']).reset_index() df_monthly['month_name'] = df_monthly['time'].map({ 1: 'Ene', 2: 'Feb', 3: 'Mar', 4: 'Abr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Ago', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dic' }) fig_monthly = go.Figure() fig_monthly.add_trace(go.Scatter( x=df_monthly['month_name'], y=df_monthly['mean'], mode='lines+markers', name='Promedio Mensual', line=dict(color='#1f77b4', width=3), marker=dict(size=8), error_y=dict(type='data', array=df_monthly['std'], visible=True) )) fig_monthly.update_layout( title=f"Evolución Mensual {selected_year} - {LABELS.get(var, var)}", xaxis_title="Mes", yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", hovermode='x unified' ) # 2. Distribución por estaciones df_seasonal = df_year.groupby('season')[var].agg(['mean', 'std', 'count']).reset_index() fig_seasonal = px.bar( df_seasonal, x='season', y='mean', error_y='std', title=f"Comparación Estacional {selected_year} - {LABELS.get(var, var)}", labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, color='season', color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} ) # 3. Distribución de valores fig_dist = px.histogram( df_year, x=var, nbins=30, title=f"Distribución de Valores {selected_year} - {LABELS.get(var, var)}", labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} ) # 4. Evolución diaria (si hay suficientes datos) if len(df_year) > 50: df_daily = df_year.groupby('time')[var].mean().reset_index() fig_daily = px.line( df_daily, x='time', y=var, title=f"Evolución Diaria {selected_year} - {LABELS.get(var, var)}", labels={'time': 'Fecha', var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} ) fig_daily.update_traces(line_color='#2ca02c') daily_graph = dbc.Col([ dcc.Graph(figure=fig_daily, config={"displayModeBar": True}) ], md=12) else: daily_graph = html.Div() content = dbc.Row([ dbc.Col([ dcc.Graph(figure=fig_monthly, config={"displayModeBar": True}) ], md=6), dbc.Col([ dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True}) ], md=6), dbc.Col([ dcc.Graph(figure=fig_dist, config={"displayModeBar": True}) ], md=6), dbc.Col([ # Estadísticas del año dbc.Card([ dbc.CardHeader(f"Estadísticas {selected_year}"), dbc.CardBody([ html.P(f"Promedio: {df_year[var].mean():.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Mediana: {df_year[var].median():.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Desv. Estándar: {df_year[var].std():.3f}", className="mb-2"), html.P(f"Mínimo: {df_year[var].min():.3f} {UNITS.get(var, '')}", className="mb-2"), html.P(f"Máximo: {df_year[var].max():.3f} {UNITS.get(var, '')}", className="mb-0"), ]) ]) ], md=6), daily_graph ], className="g-3") return content, year_info @app.callback( Output("tab-content", "children"), [Input("main-tabs", "value"), Input("sel-var", "value"), Input("sel-years", "value"), Input("sel-provinces", "value"), Input("sel-cantons", "value"), Input("sel-season", "value"), Input("sel-dates", "start_date"), Input("sel-dates", "end_date")] ) def render_tab_content(active_tab, var, years, provinces, cantons, season, start_date, end_date): if not var: return dbc.Alert("Por favor selecciona una variable para continuar.", color="info") years = years or YEARS df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date)) if df_f.empty: return dbc.Alert("No hay datos disponibles con los filtros seleccionados.", color="warning") if active_tab == "dashboard": return render_dashboard(df_f, var) elif active_tab == "spatial": return render_spatial_analysis(df_f, var) elif active_tab == "temporal": return render_temporal_analysis_simple(df_f, var) elif active_tab == "tree_classification": return render_tree_classification(df_f, var) elif active_tab == "statistics": return render_detailed_statistics(df_f, var) # CSS personalizado mejorado app.index_string = '''
{%metas%}