File size: 3,468 Bytes
97361b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import xgboost as xgb
import pandas as pd
import numpy as np
import gradio as gr
# ----- config -----
FEATURES = [
"aspect","clay",
"contextual_slope_1000m","contextual_slope_300m","contextual_slope_5000m",
"curvature","elevation","hand",
"ndvi_change","ndvi_post","ndvi_pre",
"organicC","rainfall","relief","ruggedness",
"sand","silt","twi"
]
# map predicted class -> human label (edit to match your training)
LABEL_MAP = {0: "Low", 1: "Medium", 2: "High", 3: "Extreme"}
# ----- load model safely (json) -----
MODEL_PATH = os.path.join(os.path.dirname(__file__), "rockfall_xgb.json")
model = xgb.XGBClassifier()
model.load_model(MODEL_PATH)
# ----- helpers -----
def _predict_df(df: pd.DataFrame) -> pd.DataFrame:
# ensure feature order and types
missing = [c for c in FEATURES if c not in df.columns]
if missing:
raise ValueError(f"Missing required columns: {missing}")
X = df[FEATURES].astype(float)
preds = model.predict(X)
probs = model.predict_proba(X)
out = df.copy()
out["predicted_class"] = preds.astype(int)
out["predicted_label"] = [LABEL_MAP.get(int(c), str(int(c))) for c in preds]
# add per-class probabilities
for i in range(probs.shape[1]):
label = LABEL_MAP.get(i, str(i))
out[f"prob_{label}"] = probs[:, i]
# if ground truth exists, mark correctness
if "target" in out.columns:
out["correct"] = (out["target"].astype(int) == out["predicted_class"].astype(int))
return out
# ----- Gradio functions -----
def predict_single(*vals):
row = pd.DataFrame([vals], columns=FEATURES)
scored = _predict_df(row)
label = scored.loc[0, "predicted_label"]
# build a tidy prob table for display
prob_cols = [c for c in scored.columns if c.startswith("prob_")]
probs = scored.loc[0, prob_cols].rename_axis("class").reset_index(name="probability")
probs["class"] = probs["class"].str.replace("prob_", "", regex=False)
probs = probs.sort_values("probability", ascending=False)
return label, probs
def predict_batch(file):
# accept CSV or Excel
name = os.path.basename(file.name)
if name.lower().endswith((".xls", ".xlsx")):
df = pd.read_excel(file.name)
else:
df = pd.read_csv(file.name)
scored = _predict_df(df)
# write downloadable CSV
out_path = "/tmp/rockfall_predictions.csv"
scored.to_csv(out_path, index=False)
return scored, out_path
# ----- UI -----
with gr.Blocks(title="⛏️ Rockfall Risk Prediction") as demo:
gr.Markdown("## ⛏️ Rockfall Risk Prediction\nEnter features or upload a file to get class and probabilities.")
with gr.Tab("Single prediction"):
inputs = [gr.Number(label=col) for col in FEATURES]
pred_label = gr.Label(label="Predicted Risk")
prob_table = gr.Dataframe(headers=["class","probability"], label="Probabilities (descending)")
gr.Button("Predict").click(predict_single, inputs=inputs, outputs=[pred_label, prob_table])
with gr.Tab("Batch (CSV/Excel)"):
up = gr.File(label="Upload CSV/Excel with required columns", file_count="single", type="filepath")
out_df = gr.Dataframe(label="Scored Data")
out_file = gr.File(label="Download predictions CSV")
gr.Button("Run batch").click(predict_batch, inputs=[up], outputs=[out_df, out_file])
# Spaces will call demo.launch() automatically
if __name__ == "__main__":
demo.launch() |