Spaces:
Sleeping
Sleeping
File size: 3,782 Bytes
4b452bf 1356b9c 9caa383 1356b9c bba2321 1356b9c 95641e3 7e33362 95641e3 1356b9c ff91270 e89459c 95641e3 bcf9156 1356b9c 9caa383 1356b9c 9caa383 bcf9156 1356b9c 9caa383 1356b9c 9caa383 1356b9c 9caa383 1356b9c 9caa383 1356b9c 045d115 bcf9156 1356b9c facbf76 1356b9c 3f8aba3 1356b9c 045d115 1356b9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import joblib
from tensorflow.keras.models import load_model
from keras.models import load_model
# Load data and models
df = pd.read_csv("dataset.csv")
scaler = joblib.load("scaler.pkl")
encoder = load_model("encoder.h5")
autoencoder = load_model("autoencoder.h5", compile=False)
# Safely extract correct feature columns for scaler
if hasattr(scaler, 'feature_names_in_'):
feature_cols = scaler.feature_names_in_.tolist()
else:
# Fallback: exclude known non-feature columns
feature_cols = df.columns.difference([
'COMM_NAME', 'COMM_CODE', 'COMM_WT', 'Cluster', 'Reconstruction_Error',
'Anomaly', 'tSNE_1', 'tSNE_2', 'PCA_1', 'PCA_2'
]).tolist()
# Transform only the original features the scaler expects
X_scaled = scaler.transform(df[feature_cols])
encoded_data = encoder.predict(X_scaled)
# Add t-SNE and PCA embeddings if not already present
if 'tSNE_1' not in df.columns:
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(encoded_data)
df['tSNE_1'] = tsne_result[:, 0]
df['tSNE_2'] = tsne_result[:, 1]
if 'PCA_1' not in df.columns:
pca = PCA(n_components=2)
pca_result = pca.fit_transform(encoded_data)
df['PCA_1'] = pca_result[:, 0]
df['PCA_2'] = pca_result[:, 1]
# Gradio UI functions
def plot_cluster_visualization(plot_type, cluster_id):
plt.figure(figsize=(8, 6))
x, y = ('tSNE_1', 'tSNE_2') if plot_type == 't-SNE' else ('PCA_1', 'PCA_2')
for cluster in sorted(df['Cluster'].unique()):
subset = df[df['Cluster'] == cluster]
plt.scatter(subset[x], subset[y], label=f'Cluster {cluster}', s=60)
if cluster_id != 'All':
cluster_id = int(cluster_id)
selected = df[df['Cluster'] == cluster_id]
plt.scatter(selected[x], selected[y], edgecolor='black', facecolor='none', s=120, label='Selected Cluster')
plt.title(f"{plot_type} Visualization of Clusters")
plt.xlabel(x)
plt.ylabel(y)
plt.legend()
plt.grid(True)
return plt.gcf()
def show_cluster_commodities(cluster_id, top_n):
if cluster_id == 'All':
result = df.sort_values(by='Reconstruction_Error', ascending=False)
else:
cluster_id = int(cluster_id)
result = df[df['Cluster'] == cluster_id].sort_values(by='Reconstruction_Error', ascending=False)
return result[['COMM_NAME', 'Cluster', 'Reconstruction_Error', 'Anomaly']].head(top_n)
def show_anomalies(top_n):
anomalies = df[df['Anomaly']].sort_values(by='Reconstruction_Error', ascending=False)
return anomalies[['COMM_NAME', 'Cluster', 'Reconstruction_Error']].head(top_n)
# Gradio UI layout
with gr.Blocks() as demo:
gr.Markdown("# π Commodity Index Clustering + Anomaly Detection (Autoencoder)")
with gr.Row():
plot_type = gr.Radio(["t-SNE", "PCA"], label="Plot Type", value="t-SNE")
cluster_choice = gr.Dropdown(['All'] + list(map(str, sorted(df['Cluster'].unique()))), label="Cluster", value='All')
top_n = gr.Slider(5, 50, step=1, label="Top N Results", value=10)
with gr.Row():
plot_output = gr.Plot()
table_output = gr.Dataframe()
plot_button = gr.Button("Show Cluster Visualization")
plot_button.click(fn=plot_cluster_visualization, inputs=[plot_type, cluster_choice], outputs=plot_output)
cluster_table_btn = gr.Button("Show Cluster Commodities")
cluster_table_btn.click(fn=show_cluster_commodities, inputs=[cluster_choice, top_n], outputs=table_output)
anomaly_btn = gr.Button("Show Top Anomalies")
anomaly_btn.click(fn=show_anomalies, inputs=[top_n], outputs=table_output)
demo.launch()
|