Spaces:

kavin2906
/

cluster_creditcard

Running

App Files Files Community

cluster_creditcard / app.py

kavin2906

Update app.py

94b680f verified 2 days ago

raw

history blame contribute delete

4.99 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
	from sklearn.preprocessing import RobustScaler
	from sklearn.metrics.pairwise import euclidean_distances

	# Load and preprocess data
	data = pd.read_csv("scaled_dataset.csv")
	data.drop("CUST_ID", axis=1, inplace=True)

	selected_features = [
	"BALANCE",
	"BALANCE_FREQUENCY",
	"PURCHASES",
	"ONEOFF_PURCHASES",
	"INSTALLMENTS_PURCHASES",
	"CASH_ADVANCE",
	"PURCHASES_FREQUENCY",
	"ONEOFF_PURCHASES_FREQUENCY",
	"PURCHASES_INSTALLMENTS_FREQUENCY",
	"CASH_ADVANCE_FREQUENCY",
	"CASH_ADVANCE_TRX",
	"PURCHASES_TRX",
	"CREDIT_LIMIT",
	"PAYMENTS",
	"MINIMUM_PAYMENTS",
	"PRC_FULL_PAYMENT",
	"TENURE"
	]

	X = data[selected_features].values

	# Scale features
	scaler = RobustScaler()
	X_scaled = scaler.fit_transform(X)

	# Pre-fit KMeans model and compute cluster meaning
	kmeans_model = KMeans(n_clusters=2, random_state=42)
	kmeans_model.fit(X_scaled)
	all_labels = kmeans_model.predict(X_scaled)

	cluster0_balance = data.loc[all_labels == 0, "BALANCE"].mean()
	cluster1_balance = data.loc[all_labels == 1, "BALANCE"].mean()

	if cluster0_balance > cluster1_balance:
	cluster_meaning = {0: "High Spend Customer", 1: "Low Spend Customer"}
	else:
	cluster_meaning = {1: "High Spend Customer", 0: "Low Spend Customer"}

	# Pre-fit DBSCAN and Hierarchical models
	hierarchical_model = AgglomerativeClustering(n_clusters=2).fit(X_scaled)
	dbscan_model = DBSCAN(eps=0.5, min_samples=5).fit(X_scaled)
	dbscan_core_samples = dbscan_model.components_
	dbscan_core_labels = dbscan_model.labels_[dbscan_model.core_sample_indices_]


	def predict_cluster(username, password, algorithm, k, *features):
	if username != "kavin" or password != "1234":
	return "❌ Invalid login. Please try again."

	features_scaled = scaler.transform([features])

	if algorithm == "KMeans":
	cluster = kmeans_model.predict(features_scaled)[0]
	cluster_type = cluster_meaning.get(cluster, "Unknown Cluster")
	return f"✅ Cluster {cluster} → {cluster_type} (KMeans, k=2)"

	elif algorithm == "Hierarchical":
	new_data = np.vstack([X_scaled, features_scaled])
	labels = AgglomerativeClustering(n_clusters=2).fit_predict(new_data)
	cluster = labels[-1]

	cluster0_balance = data.loc[labels[:-1] == 0, "BALANCE"].mean()
	cluster1_balance = data.loc[labels[:-1] == 1, "BALANCE"].mean()
	high_spend_cluster = 0 if cluster0_balance > cluster1_balance else 1

	cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"
	return f"✅ Cluster {cluster} → {cluster_type} (Hierarchical Clustering)"

	elif algorithm == "DBSCAN":
	dists = euclidean_distances(features_scaled, dbscan_core_samples)
	nearest_idx = np.argmin(dists)
	nearest_dist = dists[0, nearest_idx]
	cluster = dbscan_core_labels[nearest_idx]

	if nearest_dist <= dbscan_model.eps:
	valid_clusters = [c for c in np.unique(dbscan_model.labels_) if c != -1]
	cluster_balances = {}
	for c in valid_clusters:
	indices = np.where(dbscan_model.labels_ == c)[0]
	cluster_balances[c] = data.iloc[indices]["BALANCE"].mean()

	high_spend_cluster = max(cluster_balances, key=cluster_balances.get)
	cluster_type = "High Spend Customer" if cluster == high_spend_cluster else "Low Spend Customer"

	return f"✅ Cluster {cluster} → {cluster_type} (DBSCAN, dist={nearest_dist:.2f})"
	else:
	return "🚨 This data point is considered an OUTLIER (noise) by DBSCAN."

	else:
	return "⚠️ Please select a valid clustering algorithm."


	with gr.Blocks() as demo:
	with gr.Tab("🔑 Login & Predict Cluster"):
	gr.Markdown("## 🔒 Login and Select Clustering Method")

	username = gr.Textbox(label="Username", placeholder="Enter username")
	password = gr.Textbox(label="Password", type="password", placeholder="Enter password")

	algorithm = gr.Dropdown(
	["KMeans", "Hierarchical", "DBSCAN"],
	label="Select Clustering Algorithm",
	value="KMeans"
	)

	k_value = gr.Number(label="Number of Clusters (only for KMeans)", value=2)

	inputs = []
	with gr.Accordion("🔧 Enter Feature Values", open=True):
	for feature in selected_features:
	default_val = float(data[feature].median())
	inputs.append(gr.Number(label=feature, value=default_val))

	btn = gr.Button("🔍 Predict Cluster")
	output = gr.Textbox(label="Prediction Result", interactive=False)

	btn.click(
	fn=predict_cluster,
	inputs=[username, password, algorithm, k_value] + inputs,
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()