import os
import xgboost as xgb
import pandas as pd
import numpy as np
import gradio as gr

# ----- config -----
FEATURES = [
    "aspect","clay",
    "contextual_slope_1000m","contextual_slope_300m","contextual_slope_5000m",
    "curvature","elevation","hand",
    "ndvi_change","ndvi_post","ndvi_pre",
    "organicC","rainfall","relief","ruggedness",
    "sand","silt","twi"
]

# map predicted class -> human label (edit to match your training)
LABEL_MAP = {0: "Low", 1: "Medium", 2: "High", 3: "Extreme"}

# ----- load model safely (json) -----
MODEL_PATH = os.path.join(os.path.dirname(__file__), "rockfall_xgb.json")
model = xgb.XGBClassifier()
model.load_model(MODEL_PATH)

# ----- helpers -----
def _predict_df(df: pd.DataFrame) -> pd.DataFrame:
    # ensure feature order and types
    missing = [c for c in FEATURES if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    X = df[FEATURES].astype(float)
    preds = model.predict(X)
    probs = model.predict_proba(X)

    out = df.copy()
    out["predicted_class"] = preds.astype(int)
    out["predicted_label"] = [LABEL_MAP.get(int(c), str(int(c))) for c in preds]

    # add per-class probabilities
    for i in range(probs.shape[1]):
        label = LABEL_MAP.get(i, str(i))
        out[f"prob_{label}"] = probs[:, i]

    # if ground truth exists, mark correctness
    if "target" in out.columns:
        out["correct"] = (out["target"].astype(int) == out["predicted_class"].astype(int))

    return out

# ----- Gradio functions -----
def predict_single(*vals):
    row = pd.DataFrame([vals], columns=FEATURES)
    scored = _predict_df(row)
    label = scored.loc[0, "predicted_label"]

    # build a tidy prob table for display
    prob_cols = [c for c in scored.columns if c.startswith("prob_")]
    probs = scored.loc[0, prob_cols].rename_axis("class").reset_index(name="probability")
    probs["class"] = probs["class"].str.replace("prob_", "", regex=False)
    probs = probs.sort_values("probability", ascending=False)

    return label, probs

def predict_batch(file):
    # accept CSV or Excel
    name = os.path.basename(file.name)
    if name.lower().endswith((".xls", ".xlsx")):
        df = pd.read_excel(file.name)
    else:
        df = pd.read_csv(file.name)

    scored = _predict_df(df)

    # write downloadable CSV
    out_path = "/tmp/rockfall_predictions.csv"
    scored.to_csv(out_path, index=False)
    return scored, out_path

# ----- UI -----
with gr.Blocks(title="⛏️ Rockfall Risk Prediction") as demo:
    gr.Markdown("## ⛏️ Rockfall Risk Prediction\nEnter features or upload a file to get class and probabilities.")

    with gr.Tab("Single prediction"):
        inputs = [gr.Number(label=col) for col in FEATURES]
        pred_label = gr.Label(label="Predicted Risk")
        prob_table = gr.Dataframe(headers=["class","probability"], label="Probabilities (descending)")
        gr.Button("Predict").click(predict_single, inputs=inputs, outputs=[pred_label, prob_table])

    with gr.Tab("Batch (CSV/Excel)"):
        up = gr.File(label="Upload CSV/Excel with required columns", file_count="single", type="filepath")
        out_df = gr.Dataframe(label="Scored Data")
        out_file = gr.File(label="Download predictions CSV")
        gr.Button("Run batch").click(predict_batch, inputs=[up], outputs=[out_df, out_file])

# Spaces will call demo.launch() automatically
if __name__ == "__main__":
    demo.launch()