Spaces:

gigisan81
/

Enron

Runtime error

App Files Files Community

Enron / app.py

gigisan81

Update app.py

c198e10 verified over 1 year ago

raw

history blame contribute delete

1.67 kB

	# https://huggingface.co/spaces/gigisan81/Enron

	import pandas as pd
	from transformers import AutoTokenizer, AutoModel
	from scipy.spatial.distance import cosine
	import torch
	import gradio as gr

	# Load the dataset
	dataset_path = '/content/drive/MyDrive/emails_folder/emails.csv'
	df = pd.read_csv(dataset_path)

	# Load the pre-trained model and tokenizer
	model_name = "paraphrase-MiniLM-L6-v2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModel.from_pretrained(model_name)

	# Tokenize the emails
	email_tokens = tokenizer(df['text'].tolist(), padding=True, truncation=True, return_tensors="pt")

	# Function to find the most relevant email based on the question
	def find_most_relevant_email(question):
	# Tokenize the question
	question_tokens = tokenizer(question, padding=True, truncation=True, return_tensors="pt")
	# Encode the question and emails
	with torch.no_grad():
	question_embedding = model(**question_tokens).pooler_output
	email_embeddings = model(**email_tokens).pooler_output
	# Calculate cosine similarity between question and email embeddings
	similarities = 1 - cosine(question_embedding, email_embeddings, axis=1)
	# Get the index of the most similar email
	most_similar_index = similarities.argmax().item()
	# Return the text of the most similar email
	return df['text'][most_similar_index]

	# Create Gradio interface
	iface = gr.Interface(
	fn=find_most_relevant_email,
	inputs="text",
	outputs="text",
	title="Email Question Answering System",
	description="Enter a question and get the most relevant email from the dataset."
	)

	# Launch the interface
	iface.launch()