Spaces:
Build error
Build error
File size: 2,789 Bytes
b01727d d6b4f91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import gradio as gr
from smart_open import open
import gensim
from gensim.similarities.annoy import AnnoyIndexer
import plotly.express as px
import pandas as pd
import numpy as np
import pacmap
# Load into gensim model
def load_gensim(fname):
model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False)
# Search using Annoy indexer; Faster method
annoy_index = AnnoyIndexer(model, 100)
return model, annoy_index
def searchNexplore(word, final_dfs, model, annoy_index, topn):
vector = model[word]
approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index)
rows = []
for row in approximate_neighbors:
rows.append(row[0])
searched_df = final_dfs.loc[rows]
return searched_df, approximate_neighbors
def embedding_dim_reduction(
embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
):
"""
Perform PaCMAP dimention reduction
Selection of values :
1. Default transorms MN_ratio=0.5, FP_ratio=2.0
2. For heavy transformations MN_ratio=30, FP_ratio=100.0
"""
reducer = pacmap.PaCMAP(
n_components=n_dim,
n_neighbors=n_neighbors,
MN_ratio=MN_ratio,
FP_ratio=FP_ratio,
lr=0.05,
num_iters=1000,
verbose=False,
)
reduced_embeddings = reducer.fit_transform(embeddings, init="pca")
return reduced_embeddings
model, annoy_index = load_gensim("embedding_dump.txt")
final_dfs = pd.read_csv("raw_embeddings_allinone.csv")
final_dfs.set_index("Unnamed: 0", inplace=True)
def get_semantic(input_text, topn):
searched_df, approximate_neighbors = searchNexplore(
input_text, final_dfs, model, annoy_index, topn
)
reduced_embeddings = embedding_dim_reduction(
searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
)
fig1 = px.scatter(
x=reduced_embeddings[:, 0],
y=reduced_embeddings[:, 1],
hover_name=searched_df.index.tolist(),
color=searched_df.index.tolist(),
)
reduced_embeddings = embedding_dim_reduction(
searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
)
fig2 = px.scatter_3d(
x=reduced_embeddings[:, 0],
y=reduced_embeddings[:, 1],
z=reduced_embeddings[:, 2],
hover_name=searched_df.index.tolist(),
color=searched_df.index.tolist(),
)
return fig1, fig2, approximate_neighbors
iface = gr.Interface(
fn=get_semantic,
inputs=[
"text",
gr.Slider(0, 1000, value=100),
],
outputs=["plot", "plot", "list"],
examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]],
title="Sentiment Explorer",
description="Get Sentiment search results",
theme="peach",
).launch(inline=False)
|