import streamlit as st from sentence_transformers import SentenceTransformer, util from llama_index import VectorStoreIndex, SimpleDirectoryReader import os # Load BERT model and create vectorizer model_name = "bert-base-nli-mean-tokens" model = SentenceTransformer(model_name) vectorizer = model.encode # Create docs folder if it doesn't exist docs_path = "docs" os.makedirs(docs_path, exist_ok=True) # Streamlit app layout st.title("Semantic and Similarity Search with BERT") # File upload functionality uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) if uploaded_file is not None: with open(os.path.join(docs_path, uploaded_file.name), "wb") as f: f.write(uploaded_file.getbuffer()) # Reload documents and index after file upload documents = SimpleDirectoryReader( input_dir=docs_path, filename_as_id=True, required_exts=[".txt"] ).load_data() # Creating embedding document_embeddings = [vectorizer(doc.text) for doc in documents] # Creating Index index = VectorStoreIndex(document_embeddings=document_embeddings) # Sucess message st.success("File uploaded & processed successfully!") query = st.text_input("Enter your search query:") if query: # Semantic search (approximated) most_similar_doc = index.similarity_search(query)[0] st.subheader("Semantic Search Results") st.write(most_similar_doc.text) # Similarity search similar_documents = index.similarity_search(query, k=5) # Show top 5 results st.subheader("Similarity Search Results") for doc in similar_documents: st.write(f"- {doc.id}: {doc.text[:50]}...")