Spaces:
Build error
Build error
import gradio as gr | |
from smart_open import open | |
import gensim | |
from gensim.similarities.annoy import AnnoyIndexer | |
import plotly.express as px | |
import pandas as pd | |
import numpy as np | |
import pacmap | |
# Load into gensim model | |
def load_gensim(fname): | |
model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False) | |
# Search using Annoy indexer; Faster method | |
annoy_index = AnnoyIndexer(model, 100) | |
return model, annoy_index | |
def searchNexplore(word, final_dfs, model, annoy_index, topn): | |
vector = model[word] | |
approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index) | |
rows = [] | |
for row in approximate_neighbors: | |
rows.append(row[0]) | |
searched_df = final_dfs.loc[rows] | |
return searched_df, approximate_neighbors | |
def embedding_dim_reduction( | |
embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
): | |
""" | |
Perform PaCMAP dimention reduction | |
Selection of values : | |
1. Default transorms MN_ratio=0.5, FP_ratio=2.0 | |
2. For heavy transformations MN_ratio=30, FP_ratio=100.0 | |
""" | |
reducer = pacmap.PaCMAP( | |
n_components=n_dim, | |
n_neighbors=n_neighbors, | |
MN_ratio=MN_ratio, | |
FP_ratio=FP_ratio, | |
lr=0.05, | |
num_iters=1000, | |
verbose=False, | |
) | |
reduced_embeddings = reducer.fit_transform(embeddings, init="pca") | |
return reduced_embeddings | |
model, annoy_index = load_gensim("embedding_dump.txt") | |
final_dfs = pd.read_csv("raw_embeddings_allinone.csv") | |
final_dfs.set_index("Unnamed: 0", inplace=True) | |
def get_semantic(input_text, topn): | |
searched_df, approximate_neighbors = searchNexplore( | |
input_text, final_dfs, model, annoy_index, topn | |
) | |
reduced_embeddings = embedding_dim_reduction( | |
searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
) | |
fig1 = px.scatter( | |
x=reduced_embeddings[:, 0], | |
y=reduced_embeddings[:, 1], | |
hover_name=searched_df.index.tolist(), | |
color=searched_df.index.tolist(), | |
) | |
reduced_embeddings = embedding_dim_reduction( | |
searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0 | |
) | |
fig2 = px.scatter_3d( | |
x=reduced_embeddings[:, 0], | |
y=reduced_embeddings[:, 1], | |
z=reduced_embeddings[:, 2], | |
hover_name=searched_df.index.tolist(), | |
color=searched_df.index.tolist(), | |
) | |
return fig1, fig2, approximate_neighbors | |
iface = gr.Interface( | |
fn=get_semantic, | |
inputs=[ | |
"text", | |
gr.Slider(0, 1000, value=100), | |
], | |
outputs=["plot", "plot", "list"], | |
examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]], | |
title="Sentiment Explorer", | |
description="Get Sentiment search results", | |
theme="peach", | |
).launch(inline=False) | |