TEST / app.py
Jose440car's picture
Update app.py
a5d7317 verified
import os
import pandas as pd
import numpy as np
import dash
from dash import Dash, html, dcc, Input, Output, State, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.tree import DecisionTreeClassifier, export_text, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import io
import base64
import warnings
warnings.filterwarnings('ignore')
DATA_PATH = os.environ.get(
"GLDAS_CSV",
"ecuador_gldas_enriquecido_solo_tierra.csv"
)
df = pd.read_csv(DATA_PATH, encoding="utf-8-sig")
df["time"] = pd.to_datetime(df["time"])
df["year"] = df["time"].dt.year
df["month"] = df["time"].dt.month
# ESTACIONES CORREGIDAS PARA ECUADOR (solo 2 estaciones)
# Verano seco: junio-noviembre, Invierno lluvioso: diciembre-mayo
df["season"] = df["month"].map({
12: "Invierno", 1: "Invierno", 2: "Invierno", 3: "Invierno", 4: "Invierno", 5: "Invierno",
6: "Verano", 7: "Verano", 8: "Verano", 9: "Verano", 10: "Verano", 11: "Verano"
})
req = {"lat", "lon", "provincia", "canton", "time"}
faltan = req - set(df.columns)
if faltan:
raise ValueError(f"Faltan columnas en el CSV: {faltan}")
CAND_VARS = [
"SoilMoi0_10cm_inst", "SoilMoi10_40cm_inst", "RootMoist_inst",
"SoilTMP0_10cm_inst", "Tair_f_inst", "Rainf_tavg", "Evap_tavg",
"SWdown_f_tavg", "LWdown_f_tavg", "Qs_acc", "Qsb_acc", "Qsm_acc", "CanopInt_inst",
]
NUM_VARS = [v for v in CAND_VARS if v in df.columns and pd.api.types.is_numeric_dtype(df[v])]
LABELS = {
"SoilMoi0_10cm_inst": "Humedad Suelo Superficial (0-10cm)",
"SoilMoi10_40cm_inst": "Humedad Suelo Profundo (10-40cm)",
"RootMoist_inst": "Humedad Zona Radicular",
"SoilTMP0_10cm_inst": "Temperatura del Suelo",
"Tair_f_inst": "Temperatura del Aire",
"Rainf_tavg": "Precipitación Media",
"Evap_tavg": "Evapotranspiración",
"SWdown_f_tavg": "Radiación Solar Incidente",
"LWdown_f_tavg": "Radiación Térmica Incidente",
"Qs_acc": "Escorrentía Superficial",
"Qsb_acc": "Escorrentía Subsuperficial",
"Qsm_acc": "Derretimiento de Nieve",
"CanopInt_inst": "Intercepción del Dosel",
}
UNITS = {
"SoilMoi0_10cm_inst": "kg/m²", "SoilMoi10_40cm_inst": "kg/m²", "RootMoist_inst": "kg/m²",
"SoilTMP0_10cm_inst": "K", "Tair_f_inst": "K", "Rainf_tavg": "kg·m⁻²·s⁻¹",
"Evap_tavg": "kg·m⁻²·s⁻¹", "SWdown_f_tavg": "W·m⁻²", "LWdown_f_tavg": "W·m⁻²",
"Qs_acc": "kg/m²", "Qsb_acc": "kg/m²", "Qsm_acc": "kg/m²", "CanopInt_inst": "kg/m²",
}
PROVINCIAS = sorted([p for p in df["provincia"].dropna().unique().tolist()])
CANTONES_POR_PROV = {
p: sorted(df.loc[df["provincia"] == p, "canton"].dropna().unique().tolist())
for p in PROVINCIAS
}
YEARS = sorted(df["year"].unique().tolist())
SEASONS = ["Verano", "Invierno"] # Solo 2 estaciones para Ecuador
MIN_DATE = df["time"].min().date()
MAX_DATE = df["time"].max().date()
app = Dash(
__name__,
external_stylesheets=[
dbc.themes.BOOTSTRAP,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"
],
suppress_callback_exceptions=True
)
server = app.server
# --------------------------------------------------
# Estilos personalizados
# --------------------------------------------------
CARD_STYLE = {
"box-shadow": "0 4px 6px rgba(0, 0, 0, 0.1)",
"border": "none",
"border-radius": "10px",
"margin-bottom": "20px"
}
HEADER_STYLE = {
"background": "linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
"color": "white",
"padding": "30px 0",
"margin-bottom": "30px",
"border-radius": "0 0 20px 20px"
}
# --------------------------------------------------
# Layout principal
# --------------------------------------------------
app.layout = dbc.Container([
# Header mejorado
html.Div([
html.H1([
html.I(className="fas fa-seedling me-3"),
"AGRO-LEO ECUADOR"
], className="text-center mb-2", style={"font-weight": "bold"}),
html.P("Sistema de Monitoreo Agroclimático basado en datos GLDAS Noah 0.25° (2020-2024)",
className="text-center mb-0", style={"opacity": "0.9"})
], style=HEADER_STYLE),
# Controles principales
dbc.Card([
dbc.CardBody([
dbc.Row([
dbc.Col([
dbc.Label([html.I(className="fas fa-chart-line me-2"), "Variable Agroclimática"], className="fw-bold"),
dcc.Dropdown(
id="sel-var",
options=[{"label": LABELS.get(v, v), "value": v} for v in NUM_VARS],
value=NUM_VARS[0] if NUM_VARS else None,
clearable=False,
style={"border-radius": "8px"}
),
], md=3),
dbc.Col([
dbc.Label([html.I(className="fas fa-calendar me-2"), "Período de Análisis"], className="fw-bold"),
dcc.Dropdown(
id="sel-years",
options=[{"label": f"Año {y}", "value": y} for y in YEARS],
value=YEARS,
multi=True
),
], md=3),
dbc.Col([
dbc.Label([html.I(className="fas fa-map-marker-alt me-2"), "Región"], className="fw-bold"),
dcc.Dropdown(
id="sel-provinces",
options=[{"label": p, "value": p} for p in PROVINCIAS],
value=[],
multi=True,
placeholder="Todas las provincias"
),
], md=3),
dbc.Col([
dbc.Label([html.I(className="fas fa-leaf me-2"), "Estación"], className="fw-bold"),
dcc.Dropdown(
id="sel-season",
options=[{"label": s, "value": s} for s in SEASONS],
value=[],
multi=True,
placeholder="Ambas estaciones"
),
], md=3),
], className="g-3"),
html.Hr(style={"margin": "20px 0"}),
dbc.Row([
dbc.Col([
dbc.Label("Cantones (opcional)"),
dcc.Dropdown(id="sel-cantons", options=[], value=[], multi=True, placeholder="Seleccionar cantones"),
], md=6),
dbc.Col([
dbc.Label("Rango de Fechas"),
dcc.DatePickerRange(
id="sel-dates",
start_date=str(MIN_DATE),
end_date=str(MAX_DATE),
display_format="DD/MM/YYYY"
),
], md=6),
], className="g-3"),
])
], style=CARD_STYLE),
# KPIs principales
html.Div(id="main-kpis"),
# Tabs mejoradas
dbc.Card([
dbc.CardBody([
dcc.Tabs(
id="main-tabs",
value="dashboard",
children=[
dcc.Tab(
label="📊 Dashboard Principal",
value="dashboard",
className="custom-tab",
selected_className="custom-tab--selected"
),
dcc.Tab(
label="🗺️ Análisis Espacial",
value="spatial",
className="custom-tab",
selected_className="custom-tab--selected"
),
dcc.Tab(
label="📈 Series Temporales",
value="temporal",
className="custom-tab",
selected_className="custom-tab--selected"
),
dcc.Tab(
label="🌳 Árbol de Clasificación",
value="tree_classification",
className="custom-tab",
selected_className="custom-tab--selected"
),
dcc.Tab(
label="📋 Estadísticas Detalladas",
value="statistics",
className="custom-tab",
selected_className="custom-tab--selected"
),
],
style={"margin-bottom": "20px"}
),
html.Div(id="tab-content")
])
], style=CARD_STYLE),
# Footer
html.Hr(style={"margin-top": "40px"}),
html.Div([
html.P([
"© 2025 AGRO-LEO Ecuador • ",
html.A("Datos GLDAS", href="https://ldas.gsfc.nasa.gov/gldas", target="_blank"),
" • Desarrollado para agricultura de precisión"
], className="text-center text-muted mb-0")
], style={"padding": "20px 0"})
], fluid=True, style={"background-color": "#f8f9fa", "min-height": "100vh", "padding": "0"})
def filtrar_datos(df_orig, var, years, provinces, cantons, season, dates):
"""Filtra el dataframe según los parámetros seleccionados"""
df_f = df_orig.copy()
if years:
df_f = df_f[df_f["year"].isin(years)]
if provinces:
df_f = df_f[df_f["provincia"].isin(provinces)]
if cantons:
df_f = df_f[df_f["canton"].isin(cantons)]
if season:
df_f = df_f[df_f["season"].isin(season)]
if dates and dates[0] and dates[1]:
df_f = df_f[(df_f["time"] >= pd.to_datetime(dates[0])) &
(df_f["time"] <= pd.to_datetime(dates[1]))]
return df_f
def crear_kpis_principales(df_f, var):
"""Crea las tarjetas de KPIs principales"""
if df_f.empty or var not in df_f.columns:
return html.Div()
total_registros = len(df_f)
fechas_unicas = df_f["time"].nunique()
ubicaciones = df_f[["lat", "lon"]].drop_duplicates().shape[0]
na_percent = df_f[var].isna().mean() * 100
var_data = df_f[var].dropna()
if len(var_data) > 0:
promedio = var_data.mean()
minimo = var_data.min()
maximo = var_data.max()
desv_std = var_data.std()
else:
promedio = minimo = maximo = desv_std = 0
kpis = dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-database fa-2x text-primary mb-2"),
html.H4(f"{total_registros:,}", className="mb-1"),
html.P("Registros Totales", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-calendar fa-2x text-success mb-2"),
html.H4(f"{fechas_unicas}", className="mb-1"),
html.P("Fechas Únicas", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-map-pin fa-2x text-info mb-2"),
html.H4(f"{ubicaciones}", className="mb-1"),
html.P("Ubicaciones", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-chart-bar fa-2x text-warning mb-2"),
html.H4(f"{promedio:.2f}", className="mb-1"),
html.P(f"Promedio ({UNITS.get(var, '')})", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-arrows-alt-v fa-2x text-danger mb-2"),
html.H4(f"{minimo:.1f} - {maximo:.1f}", className="mb-1"),
html.P("Rango Min-Max", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.Div([
html.I(className="fas fa-exclamation-triangle fa-2x text-secondary mb-2"),
html.H4(f"{na_percent:.1f}%", className="mb-1"),
html.P("Datos Faltantes", className="text-muted mb-0 small")
], className="text-center")
])
], style=CARD_STYLE)
], md=2),
], className="g-3 mb-4")
return kpis
def render_dashboard(df_f, var):
"""Dashboard principal con múltiples visualizaciones"""
fig_dist = px.histogram(
df_f, x=var, nbins=30,
title=f"Distribución de {LABELS.get(var, var)}",
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"}
)
fig_dist.update_layout(showlegend=False)
df_time = df_f.groupby('time')[var].agg(['mean', 'std']).reset_index()
fig_time = go.Figure()
fig_time.add_trace(go.Scatter(
x=df_time['time'], y=df_time['mean'],
mode='lines', name='Promedio',
line=dict(color='#1f77b4', width=2)
))
fig_time.add_trace(go.Scatter(
x=df_time['time'], y=df_time['mean'] + df_time['std'],
mode='lines', name='+ 1 Desv. Std', line=dict(width=0),
showlegend=False, hoverinfo='skip'
))
fig_time.add_trace(go.Scatter(
x=df_time['time'], y=df_time['mean'] - df_time['std'],
mode='lines', name='- 1 Desv. Std', line=dict(width=0),
fillcolor='rgba(31, 119, 180, 0.2)', fill='tonexty',
showlegend=False, hoverinfo='skip'
))
fig_time.update_layout(
title=f"Evolución Temporal de {LABELS.get(var, var)}",
xaxis_title="Fecha",
yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"
)
if 'provincia' in df_f.columns:
df_prov = df_f.groupby('provincia')[var].agg(['mean', 'count']).reset_index()
df_prov = df_prov.sort_values('mean', ascending=True)
fig_prov = px.bar(
df_prov, x='mean', y='provincia',
title=f"Promedio por Provincia - {LABELS.get(var, var)}",
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'},
orientation='h'
)
fig_prov.update_layout(height=400)
else:
fig_prov = go.Figure()
# ESTACIONES CORREGIDAS PARA ECUADOR
if 'season' in df_f.columns:
df_season = df_f.groupby('season')[var].agg(['mean', 'std']).reset_index()
fig_season = px.bar(
df_season, x='season', y='mean',
error_y='std',
title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}",
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'},
color='season',
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'}
)
fig_season.update_layout(
annotations=[
dict(text="Verano: Junio-Noviembre (seco)", xref="paper", yref="paper",
x=0.02, y=0.98, showarrow=False, font_size=10),
dict(text="Invierno: Diciembre-Mayo (lluvioso)", xref="paper", yref="paper",
x=0.02, y=0.93, showarrow=False, font_size=10)
]
)
else:
fig_season = go.Figure()
return dbc.Row([
dbc.Col([
dcc.Graph(figure=fig_dist, config={"displayModeBar": True})
], md=6),
dbc.Col([
dcc.Graph(figure=fig_time, config={"displayModeBar": True})
], md=6),
dbc.Col([
dcc.Graph(figure=fig_prov, config={"displayModeBar": True})
], md=6),
dbc.Col([
dcc.Graph(figure=fig_season, config={"displayModeBar": True})
], md=6),
], className="g-3")
def render_spatial_analysis(df_f, var):
"""Análisis espacial con mapa interactivo"""
fig_map = px.scatter_mapbox(
df_f.sample(min(len(df_f), 10000)) if len(df_f) > 10000 else df_f,
lat="lat", lon="lon", color=var,
hover_data={"provincia": True, "canton": True, "time": True, var: ":.3f"},
zoom=5.2, height=600,
color_continuous_scale="Viridis",
title=f"Distribución Espacial - {LABELS.get(var, var)}"
)
fig_map.update_traces(marker={"size": 6})
fig_map.update_layout(mapbox_style="open-street-map")
return dbc.Row([
dbc.Col([
dcc.Graph(figure=fig_map, config={"displayModeBar": True})
], md=12),
], className="g-3")
def render_temporal_analysis_simple(df_f, var):
"""Series temporales simplificada con selector de año"""
# Control de año
year_selector = dbc.Card([
dbc.CardBody([
dbc.Row([
dbc.Col([
dbc.Label("Seleccionar Año para Análisis:"),
dcc.Dropdown(
id="temporal-year-selector",
options=[{"label": f"Año {y}", "value": y} for y in sorted(df_f['year'].unique())],
value=sorted(df_f['year'].unique())[-1], # Último año por defecto
clearable=False
)
], md=4),
dbc.Col([
html.Div(id="temporal-year-info", className="mt-3")
], md=8)
])
])
], className="mb-4")
return html.Div([
year_selector,
html.Div(id="temporal-analysis-content")
])
def create_variable_classes(df_f, var):
"""Crea clases para la variable basada en cuartiles"""
var_data = df_f[var].dropna()
# Crear clases basadas en cuartiles
q25 = var_data.quantile(0.25)
q50 = var_data.quantile(0.50)
q75 = var_data.quantile(0.75)
def classify_value(value):
if pd.isna(value):
return 'Sin datos'
elif value <= q25:
return 'Bajo'
elif value <= q50:
return 'Medio-Bajo'
elif value <= q75:
return 'Medio-Alto'
else:
return 'Alto'
return df_f[var].apply(classify_value), {'q25': q25, 'q50': q50, 'q75': q75}
def render_tree_classification(df_f, var):
"""Pestaña dedicada solo al árbol de clasificación"""
try:
# Crear clases de la variable objetivo
df_tree = df_f.copy()
df_tree['target_class'], quartiles = create_variable_classes(df_tree, var)
# Seleccionar características para el modelo
numeric_vars = [v for v in NUM_VARS if v != var and v in df_tree.columns]
if len(numeric_vars) < 2:
return dbc.Alert("No hay suficientes variables para crear el árbol de clasificación.", color="warning")
# Preparar características
df_tree['month_sin'] = np.sin(2 * np.pi * df_tree['month'] / 12)
df_tree['month_cos'] = np.cos(2 * np.pi * df_tree['month'] / 12)
# Encoding de variables categóricas
le_prov = LabelEncoder()
df_tree['provincia_encoded'] = le_prov.fit_transform(df_tree['provincia'].astype(str))
le_season = LabelEncoder()
df_tree['season_encoded'] = le_season.fit_transform(df_tree['season'].astype(str))
# Seleccionar características finales
feature_cols = numeric_vars[:6] + ['month_sin', 'month_cos', 'provincia_encoded', 'season_encoded']
feature_cols = [col for col in feature_cols if col in df_tree.columns]
# Crear nombres legibles para las características
feature_names_readable = []
for col in feature_cols:
if col in LABELS:
feature_names_readable.append(LABELS[col])
elif col == 'provincia_encoded':
feature_names_readable.append('Provincia')
elif col == 'season_encoded':
feature_names_readable.append('Estación')
elif col == 'month_sin':
feature_names_readable.append('Mes (Sin)')
elif col == 'month_cos':
feature_names_readable.append('Mes (Cos)')
else:
feature_names_readable.append(col)
# Preparar datos
X = df_tree[feature_cols].fillna(df_tree[feature_cols].median())
y = df_tree['target_class']
# Filtrar datos válidos
valid_mask = y != 'Sin datos'
X = X[valid_mask]
y = y[valid_mask]
if len(X) < 100:
return dbc.Alert("No hay suficientes datos válidos para entrenar el árbol.", color="warning")
# Dividir datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Entrenar árbol de clasificación
tree_model = DecisionTreeClassifier(
max_depth=5,
min_samples_split=50,
min_samples_leaf=20,
random_state=42
)
tree_model.fit(X_train, y_train)
# Predicciones
y_pred = tree_model.predict(X_test)
# Crear visualización del árbol
plt.figure(figsize=(25, 15))
plot_tree(
tree_model,
feature_names=feature_names_readable,
class_names=['Alto', 'Bajo', 'Medio-Alto', 'Medio-Bajo'],
filled=True,
rounded=True,
fontsize=12,
proportion=True,
impurity=True
)
plt.title(f"Árbol de Clasificación - {LABELS.get(var, var)}", fontsize=20, fontweight='bold', pad=20)
# Guardar como imagen base64
buffer = io.BytesIO()
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
buffer.seek(0)
tree_plot_url = base64.b64encode(buffer.getvalue()).decode()
plt.close()
# Métricas del modelo
accuracy = accuracy_score(y_test, y_pred)
# Matriz de confusión
cm = confusion_matrix(y_test, y_pred, labels=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'])
# Crear gráfico de matriz de confusión
fig_cm = px.imshow(
cm,
labels=dict(x="Predicho", y="Real", color="Frecuencia"),
x=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'],
y=['Bajo', 'Medio-Bajo', 'Medio-Alto', 'Alto'],
title="Matriz de Confusión",
color_continuous_scale="Blues",
text_auto=True
)
# Importancia de características
importance_df = pd.DataFrame({
'Variable': feature_names_readable,
'Importancia': tree_model.feature_importances_
}).sort_values('Importancia', ascending=True)
fig_importance = px.bar(
importance_df.tail(10), x='Importancia', y='Variable',
title="Importancia de Variables en el Árbol",
orientation='h',
color='Importancia',
color_continuous_scale='Viridis'
)
# Información de las clases
class_info = dbc.Card([
dbc.CardHeader("Información de las Clases"),
dbc.CardBody([
html.P(f"Bajo: ≤ {quartiles['q25']:.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Medio-Bajo: {quartiles['q25']:.3f} - {quartiles['q50']:.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Medio-Alto: {quartiles['q50']:.3f} - {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Alto: > {quartiles['q75']:.3f} {UNITS.get(var, '')}", className="mb-0"),
])
])
# Métricas del modelo
metrics_card = dbc.Card([
dbc.CardHeader("Métricas del Modelo"),
dbc.CardBody([
html.H4(f"Precisión: {accuracy:.3f}", className="text-primary mb-3"),
html.P(f"Datos de entrenamiento: {len(X_train):,}", className="mb-2"),
html.P(f"Datos de prueba: {len(X_test):,}", className="mb-2"),
html.P(f"Profundidad del árbol: {tree_model.get_depth()}", className="mb-0"),
])
])
return dbc.Row([
# Información y métricas
dbc.Col([
class_info,
html.Br(),
metrics_card
], md=3),
# Visualización del árbol
dbc.Col([
dbc.Card([
dbc.CardHeader("Visualización del Árbol de Clasificación"),
dbc.CardBody([
html.Img(src=f"data:image/png;base64,{tree_plot_url}", style={"width": "100%", "height": "auto"}),
html.Hr(),
dbc.Alert([
html.H6("Interpretación:", className="mb-2"),
html.P("• Cada nodo muestra la condición de división y las muestras", className="mb-1"),
html.P("• Los colores representan las diferentes clases", className="mb-1"),
html.P("• Las hojas muestran la clasificación final", className="mb-0"),
], color="info")
])
])
], md=9),
# Matriz de confusión
dbc.Col([
dcc.Graph(figure=fig_cm, config={"displayModeBar": True})
], md=6),
# Importancia de variables
dbc.Col([
dcc.Graph(figure=fig_importance, config={"displayModeBar": True})
], md=6)
], className="g-3")
except Exception as e:
return dbc.Alert(f"Error al crear el árbol de clasificación: {str(e)}", color="danger")
def render_detailed_statistics(df_f, var):
"""Estadísticas detalladas mejoradas"""
var_data = df_f[var].dropna()
if len(var_data) == 0:
return dbc.Alert("No hay datos disponibles para análisis estadístico.", color="warning")
# Estadísticas básicas
stats = {
'Media': var_data.mean(),
'Mediana': var_data.median(),
'Moda': var_data.mode().iloc[0] if not var_data.mode().empty else var_data.median(),
'Desviación Estándar': var_data.std(),
'Varianza': var_data.var(),
'Mínimo': var_data.min(),
'Máximo': var_data.max(),
'Rango': var_data.max() - var_data.min(),
'Q1 (Percentil 25)': var_data.quantile(0.25),
'Q3 (Percentil 75)': var_data.quantile(0.75),
'IQR': var_data.quantile(0.75) - var_data.quantile(0.25),
'Coef. Variación %': (var_data.std() / var_data.mean()) * 100 if var_data.mean() != 0 else 0
}
stats_df = pd.DataFrame([
{'Estadística': k, 'Valor': f"{v:.3f}", 'Unidad': UNITS.get(var, '')}
for k, v in stats.items()
])
# Estadísticas por provincia
if 'provincia' in df_f.columns:
prov_stats = df_f.groupby('provincia')[var].agg([
'count', 'mean', 'median', 'std', 'min', 'max'
]).round(3).reset_index()
prov_stats.columns = ['Provincia', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo']
else:
prov_stats = pd.DataFrame()
# Estadísticas por estación (Ecuador: Verano/Invierno)
season_stats = df_f.groupby('season')[var].agg([
'count', 'mean', 'median', 'std', 'min', 'max'
]).round(3).reset_index()
season_stats.columns = ['Estación', 'Registros', 'Media', 'Mediana', 'Desv. Std', 'Mínimo', 'Máximo']
season_stats['Período'] = season_stats['Estación'].map({
'Verano': 'Jun-Nov (Seco)',
'Invierno': 'Dic-May (Lluvioso)'
})
# Gráficos
fig_box = px.box(
df_f, x='provincia', y=var,
title=f"Distribución por Provincia - {LABELS.get(var, var)}",
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'provincia': 'Provincia'}
)
fig_box.update_xaxes(tickangle=45)
# Distribución vs normal
fig_hist_normal = go.Figure()
fig_hist_normal.add_trace(go.Histogram(
x=var_data, nbinsx=40, name='Datos Observados',
marker_color='lightblue', opacity=0.7, histnorm='probability density'
))
# Curva normal teórica
x_norm = np.linspace(var_data.min(), var_data.max(), 100)
from scipy import stats as scipy_stats
y_norm = scipy_stats.norm.pdf(x_norm, var_data.mean(), var_data.std())
fig_hist_normal.add_trace(go.Scatter(
x=x_norm, y=y_norm, mode='lines',
name='Distribución Normal Teórica',
line=dict(color='red', width=3)
))
fig_hist_normal.update_layout(
title=f"Distribución vs Normal Teórica - {LABELS.get(var, var)}",
xaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})",
yaxis_title="Densidad de Probabilidad"
)
# Correlaciones
numeric_cols = [col for col in NUM_VARS if col in df_f.columns and col != var]
if len(numeric_cols) > 1:
corr_data = df_f[[var] + numeric_cols[:8]].corr()[var].drop(var)
corr_df = pd.DataFrame({
'Variable': [LABELS.get(col, col) for col in corr_data.index],
'Correlación': corr_data.values
}).sort_values('Correlación', key=abs, ascending=False)
fig_corr = px.bar(
corr_df.head(8), x='Correlación', y='Variable',
title=f"Correlaciones más Fuertes con {LABELS.get(var, var)}",
orientation='h',
color='Correlación',
color_continuous_scale='RdBu_r',
range_color=[-1, 1]
)
else:
fig_corr = go.Figure()
# Variación estacional específica para Ecuador
fig_seasonal = px.box(
df_f, x='season', y=var, color='season',
title=f"Variación Estacional Ecuador - {LABELS.get(var, var)}",
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'},
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'}
)
fig_seasonal.update_layout(
annotations=[
dict(text="Verano: Junio-Noviembre (época seca)", xref="paper", yref="paper",
x=0.02, y=0.98, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)"),
dict(text="Invierno: Diciembre-Mayo (época lluviosa)", xref="paper", yref="paper",
x=0.02, y=0.93, showarrow=False, font_size=11, bgcolor="rgba(255,255,255,0.8)")
]
)
return dbc.Row([
# Estadísticas básicas
dbc.Col([
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-chart-pie me-2"),
f"Estadísticas Descriptivas - {LABELS.get(var, var)}"
]),
dbc.CardBody([
dash_table.DataTable(
data=stats_df.to_dict('records'),
columns=[{"name": i, "id": i} for i in stats_df.columns],
style_cell={'textAlign': 'left', 'fontSize': '14px'},
style_header={'backgroundColor': '#f8f9fa', 'fontWeight': 'bold'},
style_data_conditional=[
{
'if': {'row_index': 'odd'},
'backgroundColor': '#f8f9fa'
}
]
)
])
], style=CARD_STYLE)
], md=6),
# Distribución vs normal
dbc.Col([
dcc.Graph(figure=fig_hist_normal, config={"displayModeBar": True})
], md=6),
# Box plot por provincia
dbc.Col([
dcc.Graph(figure=fig_box, config={"displayModeBar": True})
], md=6),
# Correlaciones
dbc.Col([
dcc.Graph(figure=fig_corr, config={"displayModeBar": True})
], md=6),
# Variación estacional
dbc.Col([
dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True})
], md=12),
# Estadísticas por estación
dbc.Col([
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-leaf me-2"),
"Estadísticas por Estación Climática (Ecuador)"
]),
dbc.CardBody([
dash_table.DataTable(
data=season_stats.to_dict('records') if not season_stats.empty else [],
columns=[{"name": i, "id": i} for i in season_stats.columns] if not season_stats.empty else [],
style_cell={'textAlign': 'center', 'fontSize': '12px'},
style_header={'backgroundColor': '#e8f5e8', 'fontWeight': 'bold'},
style_data_conditional=[
{
'if': {'filter_query': '{Estación} = Verano'},
'backgroundColor': '#fff3cd',
},
{
'if': {'filter_query': '{Estación} = Invierno'},
'backgroundColor': '#d1ecf1',
}
]
) if not season_stats.empty else html.P("No hay datos estacionales disponibles", className="text-muted")
])
], style=CARD_STYLE)
], md=6),
# Estadísticas por provincia
dbc.Col([
dbc.Card([
dbc.CardHeader([
html.I(className="fas fa-map me-2"),
"Estadísticas por Provincia"
]),
dbc.CardBody([
dash_table.DataTable(
data=prov_stats.to_dict('records') if not prov_stats.empty else [],
columns=[{"name": i, "id": i} for i in prov_stats.columns] if not prov_stats.empty else [],
style_cell={'textAlign': 'center', 'fontSize': '11px'},
style_header={'backgroundColor': '#e3f2fd', 'fontWeight': 'bold'},
style_data_conditional=[
{
'if': {'row_index': 'odd'},
'backgroundColor': '#f8f9fa'
}
],
page_size=10,
sort_action="native"
) if not prov_stats.empty else html.P("No hay datos provinciales disponibles", className="text-muted")
])
], style=CARD_STYLE)
], md=6),
], className="g-3")
# Callbacks
@app.callback(
Output("sel-cantons", "options"),
Output("sel-cantons", "value"),
Input("sel-provinces", "value"),
)
def actualizar_cantones(provinces):
if not provinces:
todos = sorted(df["canton"].dropna().unique().tolist())
return [{"label": c, "value": c} for c in todos], []
cantones = sorted(df.loc[df["provincia"].isin(provinces), "canton"].dropna().unique().tolist())
return [{"label": c, "value": c} for c in cantones], []
@app.callback(
Output("main-kpis", "children"),
[Input("sel-var", "value"),
Input("sel-years", "value"),
Input("sel-provinces", "value"),
Input("sel-cantons", "value"),
Input("sel-season", "value"),
Input("sel-dates", "start_date"),
Input("sel-dates", "end_date")]
)
def actualizar_kpis(var, years, provinces, cantons, season, start_date, end_date):
if not var:
return html.Div()
years = years or YEARS
df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date))
return crear_kpis_principales(df_f, var)
# Callback para análisis temporal por año
@app.callback(
[Output("temporal-analysis-content", "children"),
Output("temporal-year-info", "children")],
[Input("temporal-year-selector", "value")],
[State("sel-var", "value"),
State("sel-provinces", "value"),
State("sel-cantons", "value"),
State("sel-season", "value")]
)
def update_temporal_analysis(selected_year, var, provinces, cantons, season):
if not selected_year or not var:
return html.Div(), html.Div()
# Filtrar datos para el año seleccionado
df_year = df[df['year'] == selected_year].copy()
# Aplicar filtros adicionales
if provinces:
df_year = df_year[df_year["provincia"].isin(provinces)]
if cantons:
df_year = df_year[df_year["canton"].isin(cantons)]
if season:
df_year = df_year[df_year["season"].isin(season)]
if df_year.empty:
return dbc.Alert(f"No hay datos para el año {selected_year} con los filtros seleccionados.", color="warning"), html.Div()
# Información del año
year_info = dbc.Alert([
html.H6(f"Análisis del Año {selected_year}", className="mb-2"),
html.P(f"Registros encontrados: {len(df_year):,}", className="mb-1"),
html.P(f"Rango de fechas: {df_year['time'].min().strftime('%d/%m/%Y')} - {df_year['time'].max().strftime('%d/%m/%Y')}", className="mb-0")
], color="info")
# Gráficos del año seleccionado
# 1. Serie temporal mensual
df_monthly = df_year.groupby(df_year['time'].dt.month)[var].agg(['mean', 'std', 'count']).reset_index()
df_monthly['month_name'] = df_monthly['time'].map({
1: 'Ene', 2: 'Feb', 3: 'Mar', 4: 'Abr', 5: 'May', 6: 'Jun',
7: 'Jul', 8: 'Ago', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dic'
})
fig_monthly = go.Figure()
fig_monthly.add_trace(go.Scatter(
x=df_monthly['month_name'],
y=df_monthly['mean'],
mode='lines+markers',
name='Promedio Mensual',
line=dict(color='#1f77b4', width=3),
marker=dict(size=8),
error_y=dict(type='data', array=df_monthly['std'], visible=True)
))
fig_monthly.update_layout(
title=f"Evolución Mensual {selected_year} - {LABELS.get(var, var)}",
xaxis_title="Mes",
yaxis_title=f"{LABELS.get(var, var)} ({UNITS.get(var, '')})",
hovermode='x unified'
)
# 2. Distribución por estaciones
df_seasonal = df_year.groupby('season')[var].agg(['mean', 'std', 'count']).reset_index()
fig_seasonal = px.bar(
df_seasonal, x='season', y='mean',
error_y='std',
title=f"Comparación Estacional {selected_year} - {LABELS.get(var, var)}",
labels={'mean': f"{LABELS.get(var, var)} ({UNITS.get(var, '')})", 'season': 'Estación'},
color='season',
color_discrete_map={'Verano': '#ff7f0e', 'Invierno': '#1f77b4'}
)
# 3. Distribución de valores
fig_dist = px.histogram(
df_year, x=var, nbins=30,
title=f"Distribución de Valores {selected_year} - {LABELS.get(var, var)}",
labels={var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"}
)
# 4. Evolución diaria (si hay suficientes datos)
if len(df_year) > 50:
df_daily = df_year.groupby('time')[var].mean().reset_index()
fig_daily = px.line(
df_daily, x='time', y=var,
title=f"Evolución Diaria {selected_year} - {LABELS.get(var, var)}",
labels={'time': 'Fecha', var: f"{LABELS.get(var, var)} ({UNITS.get(var, '')})"}
)
fig_daily.update_traces(line_color='#2ca02c')
daily_graph = dbc.Col([
dcc.Graph(figure=fig_daily, config={"displayModeBar": True})
], md=12)
else:
daily_graph = html.Div()
content = dbc.Row([
dbc.Col([
dcc.Graph(figure=fig_monthly, config={"displayModeBar": True})
], md=6),
dbc.Col([
dcc.Graph(figure=fig_seasonal, config={"displayModeBar": True})
], md=6),
dbc.Col([
dcc.Graph(figure=fig_dist, config={"displayModeBar": True})
], md=6),
dbc.Col([
# Estadísticas del año
dbc.Card([
dbc.CardHeader(f"Estadísticas {selected_year}"),
dbc.CardBody([
html.P(f"Promedio: {df_year[var].mean():.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Mediana: {df_year[var].median():.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Desv. Estándar: {df_year[var].std():.3f}", className="mb-2"),
html.P(f"Mínimo: {df_year[var].min():.3f} {UNITS.get(var, '')}", className="mb-2"),
html.P(f"Máximo: {df_year[var].max():.3f} {UNITS.get(var, '')}", className="mb-0"),
])
])
], md=6),
daily_graph
], className="g-3")
return content, year_info
@app.callback(
Output("tab-content", "children"),
[Input("main-tabs", "value"),
Input("sel-var", "value"),
Input("sel-years", "value"),
Input("sel-provinces", "value"),
Input("sel-cantons", "value"),
Input("sel-season", "value"),
Input("sel-dates", "start_date"),
Input("sel-dates", "end_date")]
)
def render_tab_content(active_tab, var, years, provinces, cantons, season, start_date, end_date):
if not var:
return dbc.Alert("Por favor selecciona una variable para continuar.", color="info")
years = years or YEARS
df_f = filtrar_datos(df, var, years, provinces, cantons, season, (start_date, end_date))
if df_f.empty:
return dbc.Alert("No hay datos disponibles con los filtros seleccionados.", color="warning")
if active_tab == "dashboard":
return render_dashboard(df_f, var)
elif active_tab == "spatial":
return render_spatial_analysis(df_f, var)
elif active_tab == "temporal":
return render_temporal_analysis_simple(df_f, var)
elif active_tab == "tree_classification":
return render_tree_classification(df_f, var)
elif active_tab == "statistics":
return render_detailed_statistics(df_f, var)
# CSS personalizado mejorado
app.index_string = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>{%title%}</title>
{%favicon%}
{%css%}
<style>
.custom-tab {
background-color: #f8f9fa !important;
border: 1px solid #dee2e6 !important;
border-radius: 8px 8px 0 0 !important;
margin-right: 4px !important;
font-weight: 500 !important;
transition: all 0.3s ease !important;
padding: 12px 20px !important;
}
.custom-tab:hover {
background-color: #e9ecef !important;
transform: translateY(-2px) !important;
}
.custom-tab--selected {
background-color: #007bff !important;
color: white !important;
border-color: #007bff !important;
box-shadow: 0 4px 8px rgba(0,123,255,0.3) !important;
}
.card {
transition: transform 0.2s ease, box-shadow 0.2s ease !important;
}
.card:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
}
.fas {
color: #007bff !important;
}
/* Estilos para las predicciones */
.prediction-highlight {
background: linear-gradient(45deg, #28a745, #20c997) !important;
color: white !important;
border-radius: 8px !important;
padding: 10px !important;
margin: 5px 0 !important;
}
/* Mejorar visualización de tablas */
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table {
border-collapse: separate !important;
border-spacing: 0 !important;
}
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table th,
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table td {
border: 1px solid #dee2e6 !important;
border-top: none !important;
border-left: none !important;
}
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table th:first-child,
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table td:first-child {
border-left: 1px solid #dee2e6 !important;
}
.dash-table-container .dash-spreadsheet-container .dash-spreadsheet-inner table tr:first-child th {
border-top: 1px solid #dee2e6 !important;
}
/* Animaciones suaves */
.loading-spinner {
animation: spin 1s linear infinite !important;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Responsive mejoras */
@media (max-width: 768px) {
.custom-tab {
font-size: 12px !important;
padding: 8px 12px !important;
}
}
</style>
</head>
<body>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
</footer>
</body>
</html>
'''
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
app.run_server(
host="0.0.0.0",
port=port,
debug=False
)