Spaces:
Sleeping
Sleeping
import streamlit as st | |
from sentence_transformers import SentenceTransformer, util | |
from llama_index import VectorStoreIndex, SimpleDirectoryReader | |
import os | |
# Load BERT model and create vectorizer | |
model_name = "bert-base-nli-mean-tokens" | |
model = SentenceTransformer(model_name) | |
vectorizer = model.encode | |
# Create docs folder if it doesn't exist | |
docs_path = "docs" | |
os.makedirs(docs_path, exist_ok=True) | |
# Streamlit app layout | |
st.title("Semantic and Similarity Search with BERT") | |
# File upload functionality | |
uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) | |
if uploaded_file is not None: | |
with open(os.path.join(docs_path, uploaded_file.name), "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
# Reload documents and index after file upload | |
documents = SimpleDirectoryReader( | |
input_dir=docs_path, | |
filename_as_id=True, | |
required_exts=[".txt"] | |
).load_data() | |
# Creating embedding | |
document_embeddings = [vectorizer(doc.text) for doc in documents] | |
# Creating Index | |
index = VectorStoreIndex(document_embeddings=document_embeddings) | |
# Sucess message | |
st.success("File uploaded & processed successfully!") | |
query = st.text_input("Enter your search query:") | |
if query: | |
# Semantic search (approximated) | |
most_similar_doc = index.similarity_search(query)[0] | |
st.subheader("Semantic Search Results") | |
st.write(most_similar_doc.text) | |
# Similarity search | |
similar_documents = index.similarity_search(query, k=5) # Show top 5 results | |
st.subheader("Similarity Search Results") | |
for doc in similar_documents: | |
st.write(f"- {doc.id}: {doc.text[:50]}...") | |