import gradio as gr import pandas as pd import numpy as np from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN from sklearn.preprocessing import RobustScaler from sklearn.metrics.pairwise import euclidean_distances # Load and preprocess data data = pd.read_csv("scaled_dataset.csv") data.drop("CUST_ID", axis=1, inplace=True) selected_features = [ "BALANCE", "BALANCE_FREQUENCY", "PURCHASES", "ONEOFF_PURCHASES", "INSTALLMENTS_PURCHASES", "CASH_ADVANCE", "PURCHASES_FREQUENCY", "ONEOFF_PURCHASES_FREQUENCY", "PURCHASES_INSTALLMENTS_FREQUENCY", "CASH_ADVANCE_FREQUENCY", "CASH_ADVANCE_TRX", "PURCHASES_TRX", "CREDIT_LIMIT", "PAYMENTS", "MINIMUM_PAYMENTS", "PRC_FULL_PAYMENT", "TENURE" ] X = data[selected_features].values # Scale features scaler = RobustScaler() X_scaled = scaler.fit_transform(X) # Pre-fit KMeans model and compute cluster meaning kmeans_model = KMeans(n_clusters=2, random_state=42) kmeans_model.fit(X_scaled) all_labels = kmeans_model.predict(X_scaled) cluster0_balance = data.loc[all_labels == 0, "BALANCE"].mean() cluster1_balance = data.loc[all_labels == 1, "BALANCE"].mean() if cluster0_balance > cluster1_balance: cluster_meaning = {0: "High Spend Customer", 1: "Low Spend Customer"} else: cluster_meaning = {1: "High Spend Customer", 0: "Low Spend Customer"} # Pre-fit DBSCAN and Hierarchical models hierarchical_model = AgglomerativeClustering(n_clusters=2).fit(X_scaled) dbscan_model = DBSCAN(eps=0.5, min_samples=5).fit(X_scaled) dbscan_core_samples = dbscan_model.components_ dbscan_core_labels = dbscan_model.labels_[dbscan_model.core_sample_indices_] def predict_cluster(username, password, algorithm, k, *features): if username != "kavin" or password != "1234": return "❌ Invalid login. Please try again." features_scaled = scaler.transform([features]) if algorithm == "KMeans": cluster = kmeans_model.predict(features_scaled)[0] cluster_type = cluster_meaning.get(cluster, "Unknown Cluster") return f"✅ Cluster {cluster} → {cluster_type} (KMeans, k=2)" elif algorithm == "Hierarchical": new_data = np.vstack([X_scaled, features_scaled]) labels = AgglomerativeClustering(n_clusters=2).fit_predict(new_data) cluster = labels[-1] cluster0_balance = data.loc[labels[:-1] == 0, "BALANCE"].mean() cluster1_balance = data.loc[labels[:-1] == 1, "BALANCE"].mean() high_spend_cluster = 0 if cluster0_balance > cluster1_balance else 1 cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer" return f"✅ Cluster {cluster} → {cluster_type} (Hierarchical Clustering)" elif algorithm == "DBSCAN": dists = euclidean_distances(features_scaled, dbscan_core_samples) nearest_idx = np.argmin(dists) nearest_dist = dists[0, nearest_idx] cluster = dbscan_core_labels[nearest_idx] if nearest_dist <= dbscan_model.eps: valid_clusters = [c for c in np.unique(dbscan_model.labels_) if c != -1] cluster_balances = {} for c in valid_clusters: indices = np.where(dbscan_model.labels_ == c)[0] cluster_balances[c] = data.iloc[indices]["BALANCE"].mean() high_spend_cluster = max(cluster_balances, key=cluster_balances.get) cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer" return f"✅ Cluster {cluster} → {cluster_type} (DBSCAN, dist={nearest_dist:.2f})" else: return "🚨 This data point is considered an OUTLIER (noise) by DBSCAN." else: return "⚠️ Please select a valid clustering algorithm." with gr.Blocks() as demo: with gr.Tab("🔑 Login & Predict Cluster"): gr.Markdown("## 🔒 Login and Select Clustering Method") username = gr.Textbox(label="Username", placeholder="Enter username") password = gr.Textbox(label="Password", type="password", placeholder="Enter password") algorithm = gr.Dropdown( ["KMeans", "Hierarchical", "DBSCAN"], label="Select Clustering Algorithm", value="KMeans" ) k_value = gr.Number(label="Number of Clusters (only for KMeans)", value=2) inputs = [] with gr.Accordion("🔧 Enter Feature Values", open=True): for feature in selected_features: default_val = float(data[feature].median()) inputs.append(gr.Number(label=feature, value=default_val)) btn = gr.Button("🔍 Predict Cluster") output = gr.Textbox(label="Prediction Result", interactive=False) btn.click( fn=predict_cluster, inputs=[username, password, algorithm, k_value] + inputs, outputs=output ) if __name__ == "__main__": demo.launch()