import gradio as gr
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.preprocessing import RobustScaler
from sklearn.metrics.pairwise import euclidean_distances

# Load and preprocess data
data = pd.read_csv("scaled_dataset.csv")
data.drop("CUST_ID", axis=1, inplace=True)

selected_features = [
    "BALANCE",
    "BALANCE_FREQUENCY",
    "PURCHASES",
    "ONEOFF_PURCHASES",
    "INSTALLMENTS_PURCHASES",
    "CASH_ADVANCE",
    "PURCHASES_FREQUENCY",
    "ONEOFF_PURCHASES_FREQUENCY",
    "PURCHASES_INSTALLMENTS_FREQUENCY",
    "CASH_ADVANCE_FREQUENCY",
    "CASH_ADVANCE_TRX",
    "PURCHASES_TRX",
    "CREDIT_LIMIT",
    "PAYMENTS",
    "MINIMUM_PAYMENTS",
    "PRC_FULL_PAYMENT",
    "TENURE"
]

X = data[selected_features].values

# Scale features
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# Pre-fit KMeans model and compute cluster meaning
kmeans_model = KMeans(n_clusters=2, random_state=42)
kmeans_model.fit(X_scaled)
all_labels = kmeans_model.predict(X_scaled)

cluster0_balance = data.loc[all_labels == 0, "BALANCE"].mean()
cluster1_balance = data.loc[all_labels == 1, "BALANCE"].mean()

if cluster0_balance > cluster1_balance:
    cluster_meaning = {0: "High Spend Customer", 1: "Low Spend Customer"}
else:
    cluster_meaning = {1: "High Spend Customer", 0: "Low Spend Customer"}

# Pre-fit DBSCAN and Hierarchical models
hierarchical_model = AgglomerativeClustering(n_clusters=2).fit(X_scaled)
dbscan_model = DBSCAN(eps=0.5, min_samples=5).fit(X_scaled)
dbscan_core_samples = dbscan_model.components_
dbscan_core_labels = dbscan_model.labels_[dbscan_model.core_sample_indices_]


def predict_cluster(username, password, algorithm, k, *features):
    if username != "kavin" or password != "1234":
        return "❌ Invalid login. Please try again."

    features_scaled = scaler.transform([features])

    if algorithm == "KMeans":
        cluster = kmeans_model.predict(features_scaled)[0]
        cluster_type = cluster_meaning.get(cluster, "Unknown Cluster")
        return f"✅ Cluster {cluster} → {cluster_type} (KMeans, k=2)"

    elif algorithm == "Hierarchical":
        new_data = np.vstack([X_scaled, features_scaled])
        labels = AgglomerativeClustering(n_clusters=2).fit_predict(new_data)
        cluster = labels[-1]

        cluster0_balance = data.loc[labels[:-1] == 0, "BALANCE"].mean()
        cluster1_balance = data.loc[labels[:-1] == 1, "BALANCE"].mean()
        high_spend_cluster = 0 if cluster0_balance > cluster1_balance else 1

        cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"
        return f"✅ Cluster {cluster} → {cluster_type} (Hierarchical Clustering)"

    elif algorithm == "DBSCAN":
        dists = euclidean_distances(features_scaled, dbscan_core_samples)
        nearest_idx = np.argmin(dists)
        nearest_dist = dists[0, nearest_idx]
        cluster = dbscan_core_labels[nearest_idx]

        if nearest_dist <= dbscan_model.eps:
            valid_clusters = [c for c in np.unique(dbscan_model.labels_) if c != -1]
            cluster_balances = {}
            for c in valid_clusters:
                indices = np.where(dbscan_model.labels_ == c)[0]
                cluster_balances[c] = data.iloc[indices]["BALANCE"].mean()

            high_spend_cluster = max(cluster_balances, key=cluster_balances.get)
            cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"
            
            return f"✅ Cluster {cluster} → {cluster_type} (DBSCAN, dist={nearest_dist:.2f})"
        else:
            return "🚨 This data point is considered an OUTLIER (noise) by DBSCAN."

    else:
        return "⚠️ Please select a valid clustering algorithm."


with gr.Blocks() as demo:
    with gr.Tab("🔑 Login & Predict Cluster"):
        gr.Markdown("## 🔒 Login and Select Clustering Method")

        username = gr.Textbox(label="Username", placeholder="Enter username")
        password = gr.Textbox(label="Password", type="password", placeholder="Enter password")

        algorithm = gr.Dropdown(
            ["KMeans", "Hierarchical", "DBSCAN"],
            label="Select Clustering Algorithm",
            value="KMeans"
        )

        k_value = gr.Number(label="Number of Clusters (only for KMeans)", value=2)

        inputs = []
        with gr.Accordion("🔧 Enter Feature Values", open=True):
            for feature in selected_features:
                default_val = float(data[feature].median())
                inputs.append(gr.Number(label=feature, value=default_val))

        btn = gr.Button("🔍 Predict Cluster")
        output = gr.Textbox(label="Prediction Result", interactive=False)

        btn.click(
            fn=predict_cluster,
            inputs=[username, password, algorithm, k_value] + inputs,
            outputs=output
        )

if __name__ == "__main__":
    demo.launch()