Spaces:

hellorahulk
/

semantic-search

Build error

App Files Files Community

semantic-search / app.py

hellorahulk

Update app.py

d6b4f91 almost 3 years ago

raw

history blame contribute delete

2.79 kB

	import gradio as gr

	from smart_open import open
	import gensim
	from gensim.similarities.annoy import AnnoyIndexer
	import plotly.express as px
	import pandas as pd
	import numpy as np
	import pacmap


	# Load into gensim model
	def load_gensim(fname):
	model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False)
	# Search using Annoy indexer; Faster method
	annoy_index = AnnoyIndexer(model, 100)
	return model, annoy_index


	def searchNexplore(word, final_dfs, model, annoy_index, topn):

	vector = model[word]
	approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index)
	rows = []
	for row in approximate_neighbors:
	rows.append(row[0])
	searched_df = final_dfs.loc[rows]
	return searched_df, approximate_neighbors


	def embedding_dim_reduction(
	embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
	):
	"""
	Perform PaCMAP dimention reduction

	Selection of values :
	1. Default transorms MN_ratio=0.5, FP_ratio=2.0
	2. For heavy transformations MN_ratio=30, FP_ratio=100.0
	"""
	reducer = pacmap.PaCMAP(
	n_components=n_dim,
	n_neighbors=n_neighbors,
	MN_ratio=MN_ratio,
	FP_ratio=FP_ratio,
	lr=0.05,
	num_iters=1000,
	verbose=False,
	)

	reduced_embeddings = reducer.fit_transform(embeddings, init="pca")
	return reduced_embeddings


	model, annoy_index = load_gensim("embedding_dump.txt")
	final_dfs = pd.read_csv("raw_embeddings_allinone.csv")
	final_dfs.set_index("Unnamed: 0", inplace=True)


	def get_semantic(input_text, topn):

	searched_df, approximate_neighbors = searchNexplore(
	input_text, final_dfs, model, annoy_index, topn
	)

	reduced_embeddings = embedding_dim_reduction(
	searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
	)

	fig1 = px.scatter(
	x=reduced_embeddings[:, 0],
	y=reduced_embeddings[:, 1],
	hover_name=searched_df.index.tolist(),
	color=searched_df.index.tolist(),
	)

	reduced_embeddings = embedding_dim_reduction(
	searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
	)

	fig2 = px.scatter_3d(
	x=reduced_embeddings[:, 0],
	y=reduced_embeddings[:, 1],
	z=reduced_embeddings[:, 2],
	hover_name=searched_df.index.tolist(),
	color=searched_df.index.tolist(),
	)

	return fig1, fig2, approximate_neighbors


	iface = gr.Interface(
	fn=get_semantic,
	inputs=[
	"text",
	gr.Slider(0, 1000, value=100),
	],
	outputs=["plot", "plot", "list"],
	examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]],
	title="Sentiment Explorer",
	description="Get Sentiment search results",
	theme="peach",
	).launch(inline=False)