File size: 3,782 Bytes
4b452bf
1356b9c
9caa383
1356b9c
 
 
bba2321
1356b9c
95641e3
7e33362
95641e3
1356b9c
 
 
ff91270
e89459c
95641e3
 
 
 
 
 
 
 
 
 
 
 
bcf9156
 
 
1356b9c
 
 
 
 
9caa383
1356b9c
 
 
 
 
9caa383
bcf9156
1356b9c
 
 
 
 
 
9caa383
1356b9c
 
 
 
9caa383
1356b9c
 
 
 
 
 
9caa383
1356b9c
 
 
 
 
 
 
9caa383
1356b9c
 
 
045d115
bcf9156
1356b9c
 
facbf76
1356b9c
 
 
 
3f8aba3
1356b9c
 
 
 
 
 
 
 
 
 
 
 
045d115
1356b9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import joblib
from tensorflow.keras.models import load_model
from keras.models import load_model

# Load data and models
df = pd.read_csv("dataset.csv")
scaler = joblib.load("scaler.pkl")
encoder = load_model("encoder.h5")
autoencoder = load_model("autoencoder.h5", compile=False)

# Safely extract correct feature columns for scaler
if hasattr(scaler, 'feature_names_in_'):
    feature_cols = scaler.feature_names_in_.tolist()
else:
    # Fallback: exclude known non-feature columns
    feature_cols = df.columns.difference([
        'COMM_NAME', 'COMM_CODE', 'COMM_WT', 'Cluster', 'Reconstruction_Error',
        'Anomaly', 'tSNE_1', 'tSNE_2', 'PCA_1', 'PCA_2'
    ]).tolist()

# Transform only the original features the scaler expects
X_scaled = scaler.transform(df[feature_cols])
encoded_data = encoder.predict(X_scaled)

# Add t-SNE and PCA embeddings if not already present
if 'tSNE_1' not in df.columns:
    tsne = TSNE(n_components=2, random_state=42)
    tsne_result = tsne.fit_transform(encoded_data)
    df['tSNE_1'] = tsne_result[:, 0]
    df['tSNE_2'] = tsne_result[:, 1]

if 'PCA_1' not in df.columns:
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(encoded_data)
    df['PCA_1'] = pca_result[:, 0]
    df['PCA_2'] = pca_result[:, 1]

# Gradio UI functions
def plot_cluster_visualization(plot_type, cluster_id):
    plt.figure(figsize=(8, 6))
    x, y = ('tSNE_1', 'tSNE_2') if plot_type == 't-SNE' else ('PCA_1', 'PCA_2')
    for cluster in sorted(df['Cluster'].unique()):
        subset = df[df['Cluster'] == cluster]
        plt.scatter(subset[x], subset[y], label=f'Cluster {cluster}', s=60)

    if cluster_id != 'All':
        cluster_id = int(cluster_id)
        selected = df[df['Cluster'] == cluster_id]
        plt.scatter(selected[x], selected[y], edgecolor='black', facecolor='none', s=120, label='Selected Cluster')

    plt.title(f"{plot_type} Visualization of Clusters")
    plt.xlabel(x)
    plt.ylabel(y)
    plt.legend()
    plt.grid(True)
    return plt.gcf()

def show_cluster_commodities(cluster_id, top_n):
    if cluster_id == 'All':
        result = df.sort_values(by='Reconstruction_Error', ascending=False)
    else:
        cluster_id = int(cluster_id)
        result = df[df['Cluster'] == cluster_id].sort_values(by='Reconstruction_Error', ascending=False)
    return result[['COMM_NAME', 'Cluster', 'Reconstruction_Error', 'Anomaly']].head(top_n)

def show_anomalies(top_n):
    anomalies = df[df['Anomaly']].sort_values(by='Reconstruction_Error', ascending=False)
    return anomalies[['COMM_NAME', 'Cluster', 'Reconstruction_Error']].head(top_n)

# Gradio UI layout
with gr.Blocks() as demo:
    gr.Markdown("# πŸ“Š Commodity Index Clustering + Anomaly Detection (Autoencoder)")

    with gr.Row():
        plot_type = gr.Radio(["t-SNE", "PCA"], label="Plot Type", value="t-SNE")
        cluster_choice = gr.Dropdown(['All'] + list(map(str, sorted(df['Cluster'].unique()))), label="Cluster", value='All')
        top_n = gr.Slider(5, 50, step=1, label="Top N Results", value=10)

    with gr.Row():
        plot_output = gr.Plot()
        table_output = gr.Dataframe()

    plot_button = gr.Button("Show Cluster Visualization")
    plot_button.click(fn=plot_cluster_visualization, inputs=[plot_type, cluster_choice], outputs=plot_output)

    cluster_table_btn = gr.Button("Show Cluster Commodities")
    cluster_table_btn.click(fn=show_cluster_commodities, inputs=[cluster_choice, top_n], outputs=table_output)

    anomaly_btn = gr.Button("Show Top Anomalies")
    anomaly_btn.click(fn=show_anomalies, inputs=[top_n], outputs=table_output)

demo.launch()