kavin2906's picture
Update app.py
94b680f verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.preprocessing import RobustScaler
from sklearn.metrics.pairwise import euclidean_distances
# Load and preprocess data
data = pd.read_csv("scaled_dataset.csv")
data.drop("CUST_ID", axis=1, inplace=True)
selected_features = [
"BALANCE",
"BALANCE_FREQUENCY",
"PURCHASES",
"ONEOFF_PURCHASES",
"INSTALLMENTS_PURCHASES",
"CASH_ADVANCE",
"PURCHASES_FREQUENCY",
"ONEOFF_PURCHASES_FREQUENCY",
"PURCHASES_INSTALLMENTS_FREQUENCY",
"CASH_ADVANCE_FREQUENCY",
"CASH_ADVANCE_TRX",
"PURCHASES_TRX",
"CREDIT_LIMIT",
"PAYMENTS",
"MINIMUM_PAYMENTS",
"PRC_FULL_PAYMENT",
"TENURE"
]
X = data[selected_features].values
# Scale features
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)
# Pre-fit KMeans model and compute cluster meaning
kmeans_model = KMeans(n_clusters=2, random_state=42)
kmeans_model.fit(X_scaled)
all_labels = kmeans_model.predict(X_scaled)
cluster0_balance = data.loc[all_labels == 0, "BALANCE"].mean()
cluster1_balance = data.loc[all_labels == 1, "BALANCE"].mean()
if cluster0_balance > cluster1_balance:
cluster_meaning = {0: "High Spend Customer", 1: "Low Spend Customer"}
else:
cluster_meaning = {1: "High Spend Customer", 0: "Low Spend Customer"}
# Pre-fit DBSCAN and Hierarchical models
hierarchical_model = AgglomerativeClustering(n_clusters=2).fit(X_scaled)
dbscan_model = DBSCAN(eps=0.5, min_samples=5).fit(X_scaled)
dbscan_core_samples = dbscan_model.components_
dbscan_core_labels = dbscan_model.labels_[dbscan_model.core_sample_indices_]
def predict_cluster(username, password, algorithm, k, *features):
if username != "kavin" or password != "1234":
return "❌ Invalid login. Please try again."
features_scaled = scaler.transform([features])
if algorithm == "KMeans":
cluster = kmeans_model.predict(features_scaled)[0]
cluster_type = cluster_meaning.get(cluster, "Unknown Cluster")
return f"βœ… Cluster {cluster} β†’ {cluster_type} (KMeans, k=2)"
elif algorithm == "Hierarchical":
new_data = np.vstack([X_scaled, features_scaled])
labels = AgglomerativeClustering(n_clusters=2).fit_predict(new_data)
cluster = labels[-1]
cluster0_balance = data.loc[labels[:-1] == 0, "BALANCE"].mean()
cluster1_balance = data.loc[labels[:-1] == 1, "BALANCE"].mean()
high_spend_cluster = 0 if cluster0_balance > cluster1_balance else 1
cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"
return f"βœ… Cluster {cluster} β†’ {cluster_type} (Hierarchical Clustering)"
elif algorithm == "DBSCAN":
dists = euclidean_distances(features_scaled, dbscan_core_samples)
nearest_idx = np.argmin(dists)
nearest_dist = dists[0, nearest_idx]
cluster = dbscan_core_labels[nearest_idx]
if nearest_dist <= dbscan_model.eps:
valid_clusters = [c for c in np.unique(dbscan_model.labels_) if c != -1]
cluster_balances = {}
for c in valid_clusters:
indices = np.where(dbscan_model.labels_ == c)[0]
cluster_balances[c] = data.iloc[indices]["BALANCE"].mean()
high_spend_cluster = max(cluster_balances, key=cluster_balances.get)
cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"
return f"βœ… Cluster {cluster} β†’ {cluster_type} (DBSCAN, dist={nearest_dist:.2f})"
else:
return "🚨 This data point is considered an OUTLIER (noise) by DBSCAN."
else:
return "⚠️ Please select a valid clustering algorithm."
with gr.Blocks() as demo:
with gr.Tab("πŸ”‘ Login & Predict Cluster"):
gr.Markdown("## πŸ”’ Login and Select Clustering Method")
username = gr.Textbox(label="Username", placeholder="Enter username")
password = gr.Textbox(label="Password", type="password", placeholder="Enter password")
algorithm = gr.Dropdown(
["KMeans", "Hierarchical", "DBSCAN"],
label="Select Clustering Algorithm",
value="KMeans"
)
k_value = gr.Number(label="Number of Clusters (only for KMeans)", value=2)
inputs = []
with gr.Accordion("πŸ”§ Enter Feature Values", open=True):
for feature in selected_features:
default_val = float(data[feature].median())
inputs.append(gr.Number(label=feature, value=default_val))
btn = gr.Button("πŸ” Predict Cluster")
output = gr.Textbox(label="Prediction Result", interactive=False)
btn.click(
fn=predict_cluster,
inputs=[username, password, algorithm, k_value] + inputs,
outputs=output
)
if __name__ == "__main__":
demo.launch()