ReithBjarkan's picture
Added instructions.
0e965c1
raw
history blame
4.71 kB
import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import openai
import numpy as np
import pandas as pd
# App title
st.title("Keyword Cosine Similarity Tool")
# Overview
st.header("How to Use This Tool")
st.markdown(
"""
Ever have to review a long list of queries to determine if they were even relevant to your target keyword? This Space aims to automate that process by entering your primary keyword and a list of related queries from any source you might do keyword research.
The result is an ordered list of your comparison keywords based on the cosine similarity of the embeddings of each query.
-- Quickly remove irrelevant keywords from your keyword research and move to the next step in your optimization!
"""
)
# Inputs
st.header("Input Parameters")
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
st.text_area(
"Keywords to Compare",
placeholder="Enter keywords separated by new lines or commas",
help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.",
key="keywords"
)
keywords = st.session_state.keywords
model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"])
openai_api_key = st.text_input("OpenAI API Key (optional)", type="password")
# Process Button
if st.button("Calculate Similarities"):
if not primary_keyword or not keywords:
st.error("Please provide both the primary keyword and keywords to compare.")
else:
# Process keywords: Split by commas and/or new lines
keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()]
# Check model type
if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]:
# Load SentenceTransformer model
st.info(f"Loading model: {model_name}")
model = SentenceTransformer(model_name)
# Generate embeddings
st.info("Generating embeddings...")
primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
elif model_name == "OpenAI Embeddings":
if not openai_api_key:
st.error("Please provide your OpenAI API key for this model.")
else:
openai.api_key = openai_api_key
st.info("Generating OpenAI embeddings...")
def get_openai_embedding(text):
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=text
)
return np.array(response['data'][0]['embedding'])
primary_embedding = get_openai_embedding(primary_keyword)
keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
else:
st.error("Invalid model selection.")
st.stop()
# Calculate cosine similarities
st.info("Calculating cosine similarities...")
similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
# Sort results by cosine similarity
st.info("Sorting results...")
results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True)
# Display results
st.header("Results")
df_results = pd.DataFrame(sorted_results)
st.table(df_results)
# Download results as CSV
st.download_button(
label="Download Results as CSV",
data=df_results.to_csv(index=False),
file_name="cosine_similarity_results.csv",
mime="text/csv"
)
# Debugging/Intermediate Data
st.header("Debugging Info")
st.write("Primary Embedding:", primary_embedding)
st.write("Keyword Embeddings:", keyword_embeddings)
# Footer
st.markdown("---")
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan)")
st.markdown("[Report a bug or make a suggestion](mailto:rybacorn[at]gmail[dot]com?subject=Keyword%20Cosine%20Similarity%20Tool%20Bug/Feedback)")