Spaces:
Running
Running
import os | |
import pandas as pd | |
import numpy as np | |
import dash | |
from dash import Dash, html, dcc, Input, Output, State, dash_table | |
import dash_bootstrap_components as dbc | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
from sklearn.tree import DecisionTreeClassifier, export_text, plot_tree | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
import matplotlib.pyplot as plt | |
import io | |
import base64 | |
import warnings | |
warnings.filterwarnings('ignore') | |
DATA_PATH = os.environ.get( | |
"GLDAS_CSV", | |
"ecuador_gldas_enriquecido_solo_tierra.csv" | |
) | |
df = pd.read_csv(DATA_PATH, encoding="utf-8-sig") | |
df["time"] = pd.to_datetime(df["time"]) | |
df["year"] = df["time"].dt.year | |
df["month"] = df["time"].dt.month | |
# ESTACIONES CORREGIDAS PARA ECUADOR (solo 2 estaciones) | |
# Verano seco: junio-noviembre, Invierno lluvioso: diciembre-mayo | |
df["season"] = df["month"].map({ | |
12: "Invierno", 1: "Invierno", 2: "Invierno", 3: "Invierno", 4: "Invierno", 5: "Invierno", | |
6: "Verano", 7: "Verano", 8: "Verano", 9: "Verano", 10: "Verano", 11: "Verano" | |
}) | |
req = {"lat", "lon", "provincia", "canton", "time"} | |
faltan = req - set(df.columns) | |
if faltan: | |
raise ValueError(f"Faltan columnas en el CSV: {faltan}") | |
CAND_VARS = [ | |
"SoilMoi0_10cm_inst", "SoilMoi10_40cm_inst", "RootMoist_inst", | |
"SoilTMP0_10cm_inst", "Tair_f_inst", "Rainf_tavg", "Evap_tavg", | |
"SWdown_f_tavg", "LWdown_f_tavg", "Qs_acc", "Qsb_acc", "Qsm_acc", "CanopInt_inst", | |
] | |
NUM_VARS = [v for v in CAND_VARS if v in df.columns and pd.api.types.is_numeric_dtype(df[v])] | |
LABELS = { | |
"SoilMoi0_10cm_inst": "Humedad Suelo Superficial (0-10cm)", | |
"SoilMoi10_40cm_inst": "Humedad Suelo Profundo (10-40cm)", | |
"RootMoist_inst": "Humedad Zona Radicular", | |
"SoilTMP0_10cm_inst": "Temperatura del Suelo", | |
"Tair_f_inst": "Temperatura del Aire", | |
"Rainf_tavg": "Precipitación Media", | |
"Evap_tavg": "Evapotranspiración", | |
"SWdown_f_tavg": "Radiación Solar Incidente", | |
"LWdown_f_tavg": "Radiación Térmica Incidente", | |
"Qs_acc": "Escorrentía Superficial", | |
"Qsb_acc": "Escorrentía Subsuperficial", | |
"Qsm_acc": "Derretimiento de Nieve", | |
"CanopInt_inst": "Intercepción del Dosel", | |
} | |
UNITS = { | |
"SoilMoi0_10cm_inst": "kg/m²", "SoilMoi10_40cm_inst": "kg/m²", "RootMoist_inst": "kg/m²", | |
"SoilTMP0_10cm_inst": "K", "Tair_f_inst": "K", "Rainf_tavg": "kg·m⁻²·s⁻¹", | |
"Evap_tavg": "kg·m⁻²·s⁻¹", "SWdown_f_tavg": "W·m⁻²", "LWdown_f_tavg": "W·m⁻²", | |
"Qs_acc": "kg/m²", "Qsb_acc": "kg/m²", "Qsm_acc": "kg/m²", "CanopInt_inst": "kg/m²", | |
} | |
PROVINCIAS = sorted([p for p in df["provincia"].dropna().unique().tolist()]) | |
CANTONES_POR_PROV = { | |
p: sorted(df.loc[df["provincia"] == p, "canton"].dropna().unique().tolist()) | |
for p in PROVINCIAS | |
} | |
YEARS = sorted(df["year"].unique().tolist()) | |
SEASONS = ["Verano", "Invierno"] # Solo 2 estaciones para Ecuador | |
MIN_DATE = df["time"].min().date() | |
MAX_DATE = df["time"].max().date() | |
app = Dash( | |
__name__, | |
external_stylesheets=[ | |
dbc.themes.BOOTSTRAP, | |
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" | |
], | |
suppress_callback_exceptions=True | |
) | |
server = app.server | |
# -------------------------------------------------- | |
# Estilos personalizados | |
# -------------------------------------------------- | |
CARD_STYLE = { | |
"box-shadow": "0 4px 6px rgba(0, 0, 0, 0.1)", | |
"border": "none", | |
"border-radius": "10px", | |
"margin-bottom": "20px" | |
} | |
HEADER_STYLE = { | |
"background": "linear-gradient(135deg, #667eea 0%, #764ba2 100%)", | |
"color": "white", | |
"padding": "30px 0", | |
"margin-bottom": "30px", | |
"border-radius": "0 0 20px 20px" | |
} | |
# -------------------------------------------------- | |
# Layout principal | |
# -------------------------------------------------- | |
app.layout = dbc.Container([ | |
# Header mejorado | |
html.Div([ | |
html.H1([ | |
html.I(className="fas fa-seedling me-3"), | |
"AGRO-LEO ECUADOR" | |
], className="text-center mb-2", style={"font-weight": "bold"}), | |
html.P("Sistema de Monitoreo Agroclimático basado en datos GLDAS Noah 0.25° (2020-2024)", | |
className="text-center mb-0", style={"opacity": "0.9"}) | |
], style=HEADER_STYLE), | |
# Controles principales | |
dbc.Card([ | |
dbc.CardBody([ | |
dbc.Row([ | |
dbc.Col([ | |
dbc.Label([html.I(className="fas fa-chart-line me-2"), "Variable Agroclimática"], className="fw-bold"), | |
dcc.Dropdown( | |
id="sel-var", | |
options=[{"label": LABELS.get(v, v), "value": v} for v in NUM_VARS], | |
value=NUM_VARS[0] if NUM_VARS else None, | |
clearable=False, | |
style={"border-radius": "8px"} | |
), | |
], md=3), | |
dbc.Col([ | |
dbc.Label([html.I(className="fas fa-calendar me-2"), "Período de Análisis"], className="fw-bold"), | |
dcc.Dropdown( | |
id="sel-years", | |
options=[{"label": f"Año {y}", "value": y} for y in YEARS], | |
value=YEARS, | |
multi=True | |
), | |
], md=3), | |
dbc.Col([ | |
dbc.Label([html.I(className="fas fa-map-marker-alt me-2"), "Región"], className="fw-bold"), | |
dcc.Dropdown( | |
id="sel-provinces", | |
options=[{"label": p, "value": p} for p in PROVINCIAS], | |
value=[], | |
multi=True, | |
placeholder="Todas las provincias" | |
), | |
], md=3), | |
dbc.Col([ | |
dbc.Label([html.I(className="fas fa-leaf me-2"), "Estación"], className="fw-bold"), | |
dcc.Dropdown( | |
id="sel-season", | |
options=[{"label": s, "value": s} for s in SEASONS], | |
value=[], | |
multi=True, | |
placeholder="Ambas estaciones" | |
), | |
], md=3), | |
], className="g-3"), | |
html.Hr(style={"margin": "20px 0"}), | |
dbc.Row([ | |
dbc.Col([ | |
dbc.Label("Cantones (opcional)"), | |
dcc.Dropdown(id="sel-cantons", options=[], value=[], multi=True, placeholder="Seleccionar cantones"), | |
], md=6), | |
dbc.Col([ | |
dbc.Label("Rango de Fechas"), | |
dcc.DatePickerRange( | |
id="sel-dates", | |
start_date=str(MIN_DATE), | |
end_date=str(MAX_DATE), | |
display_format="DD/MM/YYYY" | |
), | |
], md=6), | |
], className="g-3"), | |
]) | |
], style=CARD_STYLE), | |
# KPIs principales | |
html.Div(id="main-kpis"), | |
# Tabs mejoradas | |
dbc.Card([ | |
dbc.CardBody([ | |
dcc.Tabs( | |
id="main-tabs", | |
value="dashboard", | |
children=[ | |
dcc.Tab( | |
label="📊 Dashboard Principal", | |
value="dashboard", | |
className="custom-tab", | |
selected_className="custom-tab--selected" | |
), | |
dcc.Tab( | |
label="🗺️ Análisis Espacial", | |
value="spatial", | |
className="custom-tab", | |
selected_className="custom-tab--selected" | |
), | |
dcc.Tab( | |
label="📈 Series Temporales", | |
value="temporal", | |
className="custom-tab", | |
selected_className="custom-tab--selected" | |
), | |
dcc.Tab( | |
label="🌳 Árbol de Clasificación", | |
value="tree_classification", | |
className="custom-tab", | |
selected_className="custom-tab--selected" | |
), | |
dcc.Tab( | |
label="📋 Estadísticas Detalladas", | |
value="statistics", | |
className="custom-tab", | |
selected_className="custom-tab--selected" | |
), | |
], | |
style={"margin-bottom": "20px"} | |
), | |
html.Div(id="tab-content") | |
]) | |
], style=CARD_STYLE), | |
# Footer | |
html.Hr(style={"margin-top": "40px"}), | |
html.Div([ | |
html.P([ | |
"© 2025 AGRO-LEO Ecuador • ", | |
html.A("Datos GLDAS", href="https://ldas.gsfc.nasa.gov/gldas", target="_blank"), | |
" • Desarrollado para agricultura de precisión" | |
], className="text-center text-muted mb-0") | |
], style={"padding": "20px 0"}) | |
], fluid=True, style={"background-color": "#f8f9fa", "min-height": "100vh", "padding": "0"}) | |
def filtrar_datos(df_orig, var, years, provinces, cantons, season, dates): | |
"""Filtra el dataframe según los parámetros seleccionados""" | |
df_f = df_orig.copy() | |
if years: | |
df_f = df_f[df_f["year"].isin(years)] | |
if provinces: | |
df_f = df_f[df_f["provincia"].isin(provinces)] | |
if cantons: | |
df_f = df_f[df_f["canton"].isin(cantons)] | |
if season: | |
df_f = df_f[df_f["season"].isin(season)] | |
if dates and dates[0] and dates[1]: | |
df_f = df_f[(df_f["time"] >= pd.to_datetime(dates[0])) & | |
(df_f["time"] <= pd.to_datetime(dates[1]))] | |
return df_f | |
def crear_kpis_principales(df_f, var): | |
"""Crea las tarjetas de KPIs principales""" | |
if df_f.empty or var not in df_f.columns: | |
return html.Div() | |
total_registros = len(df_f) | |
fechas_unicas = df_f["time"].nunique() | |
ubicaciones = df_f[["lat", "lon"]].drop_duplicates().shape[0] | |
na_percent = df_f[var].isna().mean() * 100 | |
var_data = df_f[var].dropna() | |
if len(var_data) > 0: | |
promedio = var_data.mean() | |
minimo = var_data.min() | |
maximo = var_data.max() | |
desv_std = var_data.std() | |
else: | |
promedio = minimo = maximo = desv_std = 0 | |
kpis = dbc.Row([ | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-database fa-2x text-primary mb-2"), | |
html.H4(f"{total_registros:,}", className="mb-1"), | |
html.P("Registros Totales", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-calendar fa-2x text-success mb-2"), | |
html.H4(f"{fechas_unicas}", className="mb-1"), | |
html.P("Fechas Únicas", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-map-pin fa-2x text-info mb-2"), | |
html.H4(f"{ubicaciones}", className="mb-1"), | |
html.P("Ubicaciones", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-chart-bar fa-2x text-warning mb-2"), | |
html.H4(f"{promedio:.2f}", className="mb-1"), | |
html.P(f"Promedio ({UNITS.get(var, '')})", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-arrows-alt-v fa-2x text-danger mb-2"), | |
html.H4(f"{minimo:.1f} - {maximo:.1f}", className="mb-1"), | |
html.P("Rango Min-Max", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardBody([ | |
html.Div([ | |
html.I(className="fas fa-exclamation-triangle fa-2x text-secondary mb-2"), | |
html.H4(f"{na_percent:.1f}%", className="mb-1"), | |
html.P("Datos Faltantes", className="text-muted mb-0 small") | |
], className="text-center") | |
]) | |
], style=CARD_STYLE) | |
], md=2), | |
], className="g-3 mb-4") | |
return kpis | |
def render_dashboard(df_f, var): | |
"""Dashboard principal con múltiples visualizaciones""" | |
fig_dist = px.histogram( | |
df_f, x=var, nbins=30, | |
title=f"Distribución de {LABELS.get(var, var)}", | |
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} | |
) | |
fig_dist.update_layout(showlegend=False) | |
df_time = df_f.groupby('time')[var].agg(['mean', 'std']).reset_index() | |
fig_time = go.Figure() | |
fig_time.add_trace(go.Scatter( | |
x=df_time['time'], y=df_time['mean'], | |
mode='lines', name='Promedio', | |
line=dict(color='#1f77b4', width=2) | |
)) | |
fig_time.add_trace(go.Scatter( | |
x=df_time['time'], y=df_time['mean'] + df_time['std'], | |
mode='lines', name='+ 1 Desv. Std', line=dict(width=0), | |
showlegend=False, hoverinfo='skip' | |
)) | |
fig_time.add_trace(go.Scatter( | |
x=df_time['time'], y=df_time['mean'] - df_time['std'], | |
mode='lines', name='- 1 Desv. Std', line=dict(width=0), | |
fillcolor='rgba(31, 119, 180, 0.2)', fill='tonexty', | |
showlegend=False, hoverinfo='skip' | |
)) | |
fig_time.update_layout( | |
title=f"Evolución Temporal de {LABELS.get(var, var)}", | |
xaxis_title="Fecha", | |
yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})" | |
) | |
if 'provincia' in df_f.columns: | |
df_prov = df_f.groupby('provincia')[var].agg(['mean', 'count']).reset_index() | |
df_prov = df_prov.sort_values('mean', ascending=True) | |
fig_prov = px.bar( | |
df_prov, x='mean', y='provincia', | |
title=f"Promedio por Provincia - {LABELS.get(var, var)}", | |
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'}, | |
orientation='h' | |
) | |
fig_prov.update_layout(height=400) | |
else: | |
fig_prov = go.Figure() | |
# ESTACIONES CORREGIDAS PARA ECUADOR | |
if 'season' in df_f.columns: | |
df_season = df_f.groupby('season')[var].agg(['mean', 'std']).reset_index() | |
fig_season = px.bar( | |
df_season, x='season', y='mean', | |
error_y='std', | |
title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}", | |
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, | |
color='season', | |
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} | |
) | |
fig_season.update_layout( | |
annotations=[ | |
dict(text="Verano: Junio-Noviembre (seco)", xref="paper", yref="paper", | |
x=0.02, y=0.98, showarrow=False, font_size=10), | |
dict(text="Invierno: Diciembre-Mayo (lluvioso)", xref="paper", yref="paper", | |
x=0.02, y=0.93, showarrow=False, font_size=10) | |
] | |
) | |
else: | |
fig_season = go.Figure() | |
return dbc.Row([ | |
dbc.Col([ | |
dcc.Graph(figure=fig_dist, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
dcc.Graph(figure=fig_time, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
dcc.Graph(figure=fig_prov, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
dcc.Graph(figure=fig_season, config={"displayModeBar": True}) | |
], md=6), | |
], className="g-3") | |
def render_spatial_analysis(df_f, var): | |
"""Análisis espacial con mapa interactivo""" | |
fig_map = px.scatter_mapbox( | |
df_f.sample(min(len(df_f), 10000)) if len(df_f) > 10000 else df_f, | |
lat="lat", lon="lon", color=var, | |
hover_data={"provincia": True, "canton": True, "time": True, var: ":.3f"}, | |
zoom=5.2, height=600, | |
color_continuous_scale="Viridis", | |
title=f"Distribución Espacial - {LABELS.get(var, var)}" | |
) | |
fig_map.update_traces(marker={"size": 6}) | |
fig_map.update_layout(mapbox_style="open-street-map") | |
return dbc.Row([ | |
dbc.Col([ | |
dcc.Graph(figure=fig_map, config={"displayModeBar": True}) | |
], md=12), | |
], className="g-3") | |
def render_temporal_analysis_simple(df_f, var): | |
"""Series temporales simplificada con selector de año""" | |
# Control de año | |
year_selector = dbc.Card([ | |
dbc.CardBody([ | |
dbc.Row([ | |
dbc.Col([ | |
dbc.Label("Seleccionar Año para Análisis:"), | |
dcc.Dropdown( | |
id="temporal-year-selector", | |
options=[{"label": f"Año {y}", "value": y} for y in sorted(df_f['year'].unique())], | |
value=sorted(df_f['year'].unique())[-1], # Último año por defecto | |
clearable=False | |
) | |
], md=4), | |
dbc.Col([ | |
html.Div(id="temporal-year-info", className="mt-3") | |
], md=8) | |
]) | |
]) | |
], className="mb-4") | |
return html.Div([ | |
year_selector, | |
html.Div(id="temporal-analysis-content") | |
]) | |
def create_variable_classes(df_f, var): | |
"""Crea clases para la variable basada en cuartiles""" | |
var_data = df_f[var].dropna() | |
# Crear clases basadas en cuartiles | |
q25 = var_data.quantile(0.25) | |
q50 = var_data.quantile(0.50) | |
q75 = var_data.quantile(0.75) | |
def classify_value(value): | |
if pd.isna(value): | |
return 'Sin datos' | |
elif value <= q25: | |
return 'Bajo' | |
elif value <= q50: | |
return 'Medio-Bajo' | |
elif value <= q75: | |
return 'Medio-Alto' | |
else: | |
return 'Alto' | |
return df_f[var].apply(classify_value), {'q25': q25, 'q50': q50, 'q75': q75} | |
def render_tree_classification(df_f, var): | |
"""Pestaña dedicada solo al árbol de clasificación""" | |
try: | |
# Crear clases de la variable objetivo | |
df_tree = df_f.copy() | |
df_tree['target_class'], quartiles = create_variable_classes(df_tree, var) | |
# Seleccionar características para el modelo | |
numeric_vars = [v for v in NUM_VARS if v != var and v in df_tree.columns] | |
if len(numeric_vars) < 2: | |
return dbc.Alert("No hay suficientes variables para crear el árbol de clasificación.", color="warning") | |
# Preparar características | |
df_tree['month_sin'] = np.sin(2 * np.pi * df_tree['month'] / 12) | |
df_tree['month_cos'] = np.cos(2 * np.pi * df_tree['month'] / 12) | |
# Encoding de variables categóricas | |
le_prov = LabelEncoder() | |
df_tree['provincia_encoded'] = le_prov.fit_transform(df_tree['provincia'].astype(str)) | |
le_season = LabelEncoder() | |
df_tree['season_encoded'] = le_season.fit_transform(df_tree['season'].astype(str)) | |
# Seleccionar características finales | |
feature_cols = numeric_vars[:6] + ['month_sin', 'month_cos', 'provincia_encoded', 'season_encoded'] | |
feature_cols = [col for col in feature_cols if col in df_tree.columns] | |
# Crear nombres legibles para las características | |
feature_names_readable = [] | |
for col in feature_cols: | |
if col in LABELS: | |
feature_names_readable.append(LABELS[col]) | |
elif col == 'provincia_encoded': | |
feature_names_readable.append('Provincia') | |
elif col == 'season_encoded': | |
feature_names_readable.append('Estación') | |
elif col == 'month_sin': | |
feature_names_readable.append('Mes (Sin)') | |
elif col == 'month_cos': | |
feature_names_readable.append('Mes (Cos)') | |
else: | |
feature_names_readable.append(col) | |
# Preparar datos | |
X = df_tree[feature_cols].fillna(df_tree[feature_cols].median()) | |
y = df_tree['target_class'] | |
# Filtrar datos válidos | |
valid_mask = y != 'Sin datos' | |
X = X[valid_mask] | |
y = y[valid_mask] | |
if len(X) < 100: | |
return dbc.Alert("No hay suficientes datos válidos para entrenar el árbol.", color="warning") | |
# Dividir datos | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
# Entrenar árbol de clasificación | |
tree_model = DecisionTreeClassifier( | |
max_depth=5, | |
min_samples_split=50, | |
min_samples_leaf=20, | |
random_state=42 | |
) | |
tree_model.fit(X_train, y_train) | |
# Predicciones | |
y_pred = tree_model.predict(X_test) | |
# Crear visualización del árbol | |
plt.figure(figsize=(25, 15)) | |
plot_tree( | |
tree_model, | |
feature_names=feature_names_readable, | |
class_names=['Alto', 'Bajo', 'Medio-Alto', 'Medio-Bajo'], | |
filled=True, | |
rounded=True, | |
fontsize=12, | |
proportion=True, | |
impurity=True | |
) | |
plt.title(f"Árbol de Clasificación - {LABELS.get(var, var)}", fontsize=20, fontweight='bold', pad=20) | |
# Guardar como imagen base64 | |
buffer = io.BytesIO() | |
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight') | |
buffer.seek(0) | |
tree_plot_url = base64.b64encode(buffer.getvalue()).decode() | |
plt.close() | |
# Métricas del modelo | |
accuracy = accuracy_score(y_test, y_pred) | |
# Matriz de confusión | |
cm = confusion_matrix(y_test, y_pred, labels=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto']) | |
# Crear gráfico de matriz de confusión | |
fig_cm = px.imshow( | |
cm, | |
labels=dict(x="Predicho", y="Real", color="Frecuencia"), | |
x=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'], | |
y=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'], | |
title="Matriz de Confusión", | |
color_continuous_scale="Blues", | |
text_auto=True | |
) | |
# Importancia de características | |
importance_df = pd.DataFrame({ | |
'Variable': feature_names_readable, | |
'Importancia': tree_model.feature_importances_ | |
}).sort_values('Importancia', ascending=True) | |
fig_importance = px.bar( | |
importance_df.tail(10), x='Importancia', y='Variable', | |
title="Importancia de Variables en el Árbol", | |
orientation='h', | |
color='Importancia', | |
color_continuous_scale='Viridis' | |
) | |
# Información de las clases | |
class_info = dbc.Card([ | |
dbc.CardHeader("Información de las Clases"), | |
dbc.CardBody([ | |
html.P(f"Bajo: ≤ {quartiles['q25']:.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Medio-Bajo: {quartiles['q25']:.3f} - {quartiles['q50']:.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Medio-Alto: {quartiles['q50']:.3f} - {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Alto: > {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-0"), | |
]) | |
]) | |
# Métricas del modelo | |
metrics_card = dbc.Card([ | |
dbc.CardHeader("Métricas del Modelo"), | |
dbc.CardBody([ | |
html.H4(f"Precisión: {accuracy:.3f}", className="text-primary mb-3"), | |
html.P(f"Datos de entrenamiento: {len(X_train):,}", className="mb-2"), | |
html.P(f"Datos de prueba: {len(X_test):,}", className="mb-2"), | |
html.P(f"Profundidad del árbol: {tree_model.get_depth()}", className="mb-0"), | |
]) | |
]) | |
return dbc.Row([ | |
# Información y métricas | |
dbc.Col([ | |
class_info, | |
html.Br(), | |
metrics_card | |
], md=3), | |
# Visualización del árbol | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardHeader("Visualización del Árbol de Clasificación"), | |
dbc.CardBody([ | |
html.Img(src=f"data:image/png;base64,{tree_plot_url}", style={"width": "100%", "height": "auto"}), | |
html.Hr(), | |
dbc.Alert([ | |
html.H6("Interpretación:", className="mb-2"), | |
html.P("• Cada nodo muestra la condición de división y las muestras", className="mb-1"), | |
html.P("• Los colores representan las diferentes clases", className="mb-1"), | |
html.P("• Las hojas muestran la clasificación final", className="mb-0"), | |
], color="info") | |
]) | |
]) | |
], md=9), | |
# Matriz de confusión | |
dbc.Col([ | |
dcc.Graph(figure=fig_cm, config={"displayModeBar": True}) | |
], md=6), | |
# Importancia de variables | |
dbc.Col([ | |
dcc.Graph(figure=fig_importance, config={"displayModeBar": True}) | |
], md=6) | |
], className="g-3") | |
except Exception as e: | |
return dbc.Alert(f"Error al crear el árbol de clasificación: {str(e)}", color="danger") | |
def render_detailed_statistics(df_f, var): | |
"""Estadísticas detalladas mejoradas""" | |
var_data = df_f[var].dropna() | |
if len(var_data) == 0: | |
return dbc.Alert("No hay datos disponibles para análisis estadístico.", color="warning") | |
# Estadísticas básicas | |
stats = { | |
'Media': var_data.mean(), | |
'Mediana': var_data.median(), | |
'Moda': var_data.mode().iloc[0] if not var_data.mode().empty else var_data.median(), | |
'Desviación Estándar': var_data.std(), | |
'Varianza': var_data.var(), | |
'Mínimo': var_data.min(), | |
'Máximo': var_data.max(), | |
'Rango': var_data.max() - var_data.min(), | |
'Q1 (Percentil 25)': var_data.quantile(0.25), | |
'Q3 (Percentil 75)': var_data.quantile(0.75), | |
'IQR': var_data.quantile(0.75) - var_data.quantile(0.25), | |
'Coef. Variación %': (var_data.std() / var_data.mean()) * 100 if var_data.mean() != 0 else 0 | |
} | |
stats_df = pd.DataFrame([ | |
{'Estadística': k, 'Valor': f"{v:.3f}", 'Unidad': UNITS.get(var, '')} | |
for k, v in stats.items() | |
]) | |
# Estadísticas por provincia | |
if 'provincia' in df_f.columns: | |
prov_stats = df_f.groupby('provincia')[var].agg([ | |
'count', 'mean', 'median', 'std', 'min', 'max' | |
]).round(3).reset_index() | |
prov_stats.columns = ['Provincia', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo'] | |
else: | |
prov_stats = pd.DataFrame() | |
# Estadísticas por estación (Ecuador: Verano/Invierno) | |
season_stats = df_f.groupby('season')[var].agg([ | |
'count', 'mean', 'median', 'std', 'min', 'max' | |
]).round(3).reset_index() | |
season_stats.columns = ['Estación', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo'] | |
season_stats['Período'] = season_stats['Estación'].map({ | |
'Verano': 'Jun-Nov (Seco)', | |
'Invierno': 'Dic-May (Lluvioso)' | |
}) | |
# Gráficos | |
fig_box = px.box( | |
df_f, x='provincia', y=var, | |
title=f"Distribución por Provincia - {LABELS.get(var, var)}", | |
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'} | |
) | |
fig_box.update_xaxes(tickangle=45) | |
# Distribución vs normal | |
fig_hist_normal = go.Figure() | |
fig_hist_normal.add_trace(go.Histogram( | |
x=var_data, nbinsx=40, name='Datos Observados', | |
marker_color='lightblue', opacity=0.7, histnorm='probability density' | |
)) | |
# Curva normal teórica | |
x_norm = np.linspace(var_data.min(), var_data.max(), 100) | |
from scipy import stats as scipy_stats | |
y_norm = scipy_stats.norm.pdf(x_norm, var_data.mean(), var_data.std()) | |
fig_hist_normal.add_trace(go.Scatter( | |
x=x_norm, y=y_norm, mode='lines', | |
name='Distribución Normal Teórica', | |
line=dict(color='red', width=3) | |
)) | |
fig_hist_normal.update_layout( | |
title=f"Distribución vs Normal Teórica - {LABELS.get(var, var)}", | |
xaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", | |
yaxis_title="Densidad de Probabilidad" | |
) | |
# Correlaciones | |
numeric_cols = [col for col in NUM_VARS if col in df_f.columns and col != var] | |
if len(numeric_cols) > 1: | |
corr_data = df_f[[var] + numeric_cols[:8]].corr()[var].drop(var) | |
corr_df = pd.DataFrame({ | |
'Variable': [LABELS.get(col, col) for col in corr_data.index], | |
'Correlación': corr_data.values | |
}).sort_values('Correlación', key=abs, ascending=False) | |
fig_corr = px.bar( | |
corr_df.head(8), x='Correlación', y='Variable', | |
title=f"Correlaciones más Fuertes con {LABELS.get(var, var)}", | |
orientation='h', | |
color='Correlación', | |
color_continuous_scale='RdBu_r', | |
range_color=[-1, 1] | |
) | |
else: | |
fig_corr = go.Figure() | |
# Variación estacional específica para Ecuador | |
fig_seasonal = px.box( | |
df_f, x='season', y=var, color='season', | |
title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}", | |
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, | |
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} | |
) | |
fig_seasonal.update_layout( | |
annotations=[ | |
dict(text="Verano: Junio-Noviembre (época seca)", xref="paper", yref="paper", | |
x=0.02, y=0.98, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)"), | |
dict(text="Invierno: Diciembre-Mayo (época lluviosa)", xref="paper", yref="paper", | |
x=0.02, y=0.93, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)") | |
] | |
) | |
return dbc.Row([ | |
# Estadísticas básicas | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardHeader([ | |
html.I(className="fas fa-chart-pie me-2"), | |
f"Estadísticas Descriptivas - {LABELS.get(var, var)}" | |
]), | |
dbc.CardBody([ | |
dash_table.DataTable( | |
data=stats_df.to_dict('records'), | |
columns=[{"name": i, "id": i} for i in stats_df.columns], | |
style_cell={'textAlign': 'left', 'fontSize': '14px'}, | |
style_header={'backgroundColor': '#f8f9fa', 'fontWeight': 'bold'}, | |
style_data_conditional=[ | |
{ | |
'if': {'row_index': 'odd'}, | |
'backgroundColor': '#f8f9fa' | |
} | |
] | |
) | |
]) | |
], style=CARD_STYLE) | |
], md=6), | |
# Distribución vs normal | |
dbc.Col([ | |
dcc.Graph(figure=fig_hist_normal, config={"displayModeBar": True}) | |
], md=6), | |
# Box plot por provincia | |
dbc.Col([ | |
dcc.Graph(figure=fig_box, config={"displayModeBar": True}) | |
], md=6), | |
# Correlaciones | |
dbc.Col([ | |
dcc.Graph(figure=fig_corr, config={"displayModeBar": True}) | |
], md=6), | |
# Variación estacional | |
dbc.Col([ | |
dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True}) | |
], md=12), | |
# Estadísticas por estación | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardHeader([ | |
html.I(className="fas fa-leaf me-2"), | |
"Estadísticas por Estación Climática (Ecuador)" | |
]), | |
dbc.CardBody([ | |
dash_table.DataTable( | |
data=season_stats.to_dict('records') if not season_stats.empty else [], | |
columns=[{"name": i, "id": i} for i in season_stats.columns] if not season_stats.empty else [], | |
style_cell={'textAlign': 'center', 'fontSize': '12px'}, | |
style_header={'backgroundColor': '#e8f5e8', 'fontWeight': 'bold'}, | |
style_data_conditional=[ | |
{ | |
'if': {'filter_query': '{Estación} = Verano'}, | |
'backgroundColor': '#fff3cd', | |
}, | |
{ | |
'if': {'filter_query': '{Estación} = Invierno'}, | |
'backgroundColor': '#d1ecf1', | |
} | |
] | |
) if not season_stats.empty else html.P("No hay datos estacionales disponibles", className="text-muted") | |
]) | |
], style=CARD_STYLE) | |
], md=6), | |
# Estadísticas por provincia | |
dbc.Col([ | |
dbc.Card([ | |
dbc.CardHeader([ | |
html.I(className="fas fa-map me-2"), | |
"Estadísticas por Provincia" | |
]), | |
dbc.CardBody([ | |
dash_table.DataTable( | |
data=prov_stats.to_dict('records') if not prov_stats.empty else [], | |
columns=[{"name": i, "id": i} for i in prov_stats.columns] if not prov_stats.empty else [], | |
style_cell={'textAlign': 'center', 'fontSize': '11px'}, | |
style_header={'backgroundColor': '#e3f2fd', 'fontWeight': 'bold'}, | |
style_data_conditional=[ | |
{ | |
'if': {'row_index': 'odd'}, | |
'backgroundColor': '#f8f9fa' | |
} | |
], | |
page_size=10, | |
sort_action="native" | |
) if not prov_stats.empty else html.P("No hay datos provinciales disponibles", className="text-muted") | |
]) | |
], style=CARD_STYLE) | |
], md=6), | |
], className="g-3") | |
# Callbacks | |
def actualizar_cantones(provinces): | |
if not provinces: | |
todos = sorted(df["canton"].dropna().unique().tolist()) | |
return [{"label": c, "value": c} for c in todos], [] | |
cantones = sorted(df.loc[df["provincia"].isin(provinces), "canton"].dropna().unique().tolist()) | |
return [{"label": c, "value": c} for c in cantones], [] | |
def actualizar_kpis(var, years, provinces, cantons, season, start_date, end_date): | |
if not var: | |
return html.Div() | |
years = years or YEARS | |
df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date)) | |
return crear_kpis_principales(df_f, var) | |
# Callback para análisis temporal por año | |
def update_temporal_analysis(selected_year, var, provinces, cantons, season): | |
if not selected_year or not var: | |
return html.Div(), html.Div() | |
# Filtrar datos para el año seleccionado | |
df_year = df[df['year'] == selected_year].copy() | |
# Aplicar filtros adicionales | |
if provinces: | |
df_year = df_year[df_year["provincia"].isin(provinces)] | |
if cantons: | |
df_year = df_year[df_year["canton"].isin(cantons)] | |
if season: | |
df_year = df_year[df_year["season"].isin(season)] | |
if df_year.empty: | |
return dbc.Alert(f"No hay datos para el año {selected_year} con los filtros seleccionados.", color="warning"), html.Div() | |
# Información del año | |
year_info = dbc.Alert([ | |
html.H6(f"Análisis del Año {selected_year}", className="mb-2"), | |
html.P(f"Registros encontrados: {len(df_year):,}", className="mb-1"), | |
html.P(f"Rango de fechas: {df_year['time'].min().strftime('%d/%m/%Y')} - {df_year['time'].max().strftime('%d/%m/%Y')}", className="mb-0") | |
], color="info") | |
# Gráficos del año seleccionado | |
# 1. Serie temporal mensual | |
df_monthly = df_year.groupby(df_year['time'].dt.month)[var].agg(['mean', 'std', 'count']).reset_index() | |
df_monthly['month_name'] = df_monthly['time'].map({ | |
1: 'Ene', 2: 'Feb', 3: 'Mar', 4: 'Abr', 5: 'May', 6: 'Jun', | |
7: 'Jul', 8: 'Ago', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dic' | |
}) | |
fig_monthly = go.Figure() | |
fig_monthly.add_trace(go.Scatter( | |
x=df_monthly['month_name'], | |
y=df_monthly['mean'], | |
mode='lines+markers', | |
name='Promedio Mensual', | |
line=dict(color='#1f77b4', width=3), | |
marker=dict(size=8), | |
error_y=dict(type='data', array=df_monthly['std'], visible=True) | |
)) | |
fig_monthly.update_layout( | |
title=f"Evolución Mensual {selected_year} - {LABELS.get(var, var)}", | |
xaxis_title="Mes", | |
yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", | |
hovermode='x unified' | |
) | |
# 2. Distribución por estaciones | |
df_seasonal = df_year.groupby('season')[var].agg(['mean', 'std', 'count']).reset_index() | |
fig_seasonal = px.bar( | |
df_seasonal, x='season', y='mean', | |
error_y='std', | |
title=f"Comparación Estacional {selected_year} - {LABELS.get(var, var)}", | |
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'}, | |
color='season', | |
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'} | |
) | |
# 3. Distribución de valores | |
fig_dist = px.histogram( | |
df_year, x=var, nbins=30, | |
title=f"Distribución de Valores {selected_year} - {LABELS.get(var, var)}", | |
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} | |
) | |
# 4. Evolución diaria (si hay suficientes datos) | |
if len(df_year) > 50: | |
df_daily = df_year.groupby('time')[var].mean().reset_index() | |
fig_daily = px.line( | |
df_daily, x='time', y=var, | |
title=f"Evolución Diaria {selected_year} - {LABELS.get(var, var)}", | |
labels={'time': 'Fecha', var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"} | |
) | |
fig_daily.update_traces(line_color='#2ca02c') | |
daily_graph = dbc.Col([ | |
dcc.Graph(figure=fig_daily, config={"displayModeBar": True}) | |
], md=12) | |
else: | |
daily_graph = html.Div() | |
content = dbc.Row([ | |
dbc.Col([ | |
dcc.Graph(figure=fig_monthly, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
dcc.Graph(figure=fig_dist, config={"displayModeBar": True}) | |
], md=6), | |
dbc.Col([ | |
# Estadísticas del año | |
dbc.Card([ | |
dbc.CardHeader(f"Estadísticas {selected_year}"), | |
dbc.CardBody([ | |
html.P(f"Promedio: {df_year[var].mean():.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Mediana: {df_year[var].median():.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Desv. Estándar: {df_year[var].std():.3f}", className="mb-2"), | |
html.P(f"Mínimo: {df_year[var].min():.3f} {UNITS.get(var, '')}", className="mb-2"), | |
html.P(f"Máximo: {df_year[var].max():.3f} {UNITS.get(var, '')}", className="mb-0"), | |
]) | |
]) | |
], md=6), | |
daily_graph | |
], className="g-3") | |
return content, year_info | |
def render_tab_content(active_tab, var, years, provinces, cantons, season, start_date, end_date): | |
if not var: | |
return dbc.Alert("Por favor selecciona una variable para continuar.", color="info") | |
years = years or YEARS | |
df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date)) | |
if df_f.empty: | |
return dbc.Alert("No hay datos disponibles con los filtros seleccionados.", color="warning") | |
if active_tab == "dashboard": | |
return render_dashboard(df_f, var) | |
elif active_tab == "spatial": | |
return render_spatial_analysis(df_f, var) | |
elif active_tab == "temporal": | |
return render_temporal_analysis_simple(df_f, var) | |
elif active_tab == "tree_classification": | |
return render_tree_classification(df_f, var) | |
elif active_tab == "statistics": | |
return render_detailed_statistics(df_f, var) | |
# CSS personalizado mejorado | |
app.index_string = ''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
{%metas%} | |
<title>{%title%}</title> | |
{%favicon%} | |
{%css%} | |
<style> | |
.custom-tab { | |
background-color: #f8f9fa !important; | |
border: 1px solid #dee2e6 !important; | |
border-radius: 8px 8px 0 0 !important; | |
margin-right: 4px !important; | |
font-weight: 500 !important; | |
transition: all 0.3s ease !important; | |
padding: 12px 20px !important; | |
} | |
.custom-tab:hover { | |
background-color: #e9ecef !important; | |
transform: translateY(-2px) !important; | |
} | |
.custom-tab--selected { | |
background-color: #007bff !important; | |
color: white !important; | |
border-color: #007bff !important; | |
box-shadow: 0 4px 8px rgba(0,123,255,0.3) !important; | |
} | |
.card { | |
transition: transform 0.2s ease, box-shadow 0.2s ease !important; | |
} | |
.card:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important; | |
} | |
body { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; | |
} | |
.fas { | |
color: #007bff !important; | |
} | |
/* Estilos para las predicciones */ | |
.prediction-highlight { | |
background: linear-gradient(45deg, #28a745, #20c997) !important; | |
color: white !important; | |
border-radius: 8px !important; | |
padding: 10px !important; | |
margin: 5px 0 !important; | |
} | |
/* Mejorar visualización de tablas */ | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table { | |
border-collapse: separate !important; | |
border-spacing: 0 !important; | |
} | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table th, | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table td { | |
border: 1px solid #dee2e6 !important; | |
border-top: none !important; | |
border-left: none !important; | |
} | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table th:first-child, | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table td:first-child { | |
border-left: 1px solid #dee2e6 !important; | |
} | |
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table tr:first-child th { | |
border-top: 1px solid #dee2e6 !important; | |
} | |
/* Animaciones suaves */ | |
.loading-spinner { | |
animation: spin 1s linear infinite !important; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
/* Responsive mejoras */ | |
@media (max-width: 768px) { | |
.custom-tab { | |
font-size: 12px !important; | |
padding: 8px 12px !important; | |
} | |
} | |
</style> | |
</head> | |
<body> | |
{%app_entry%} | |
<footer> | |
{%config%} | |
{%scripts%} | |
{%renderer%} | |
</footer> | |
</body> | |
</html> | |
''' | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
app.run_server( | |
host="0.0.0.0", | |
port=port, | |
debug=False | |
) |