Spaces:
Sleeping
Sleeping
import os | |
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" | |
import pickle | |
import numpy as np | |
import streamlit as st | |
import warnings | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import os | |
from pinecone import Pinecone | |
from langchain_pinecone import PineconeVectorStore | |
from langchain.chains import RetrievalQA | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.llms import HuggingFacePipeline | |
warnings.filterwarnings("ignore") | |
# ==================== Load Embeddings & Docs ==================== | |
try: | |
embeddings = np.load("src/embeddings.npy") | |
with open("src/documents.pkl", "rb") as f: | |
documents = pickle.load(f) | |
except Exception as e: | |
st.error(f"β Error loading embeddings or documents: {e}") | |
st.stop() | |
# ==================== Setup Pinecone ==================== | |
try: | |
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"]) | |
index = pc.Index("changi-rag-384") | |
except Exception as e: | |
st.error(f"β Error connecting to Pinecone: {e}") | |
st.stop() | |
# ==================== Embedding Model ==================== | |
from sentence_transformers import SentenceTransformer | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# ==================== Vector Store & Retriever ==================== | |
vectorstore = PineconeVectorStore( | |
index=index, | |
embedding=embed_model, | |
text_key="page_content" | |
) | |
retriever = vectorstore.as_retriever() | |
# ==================== HuggingFace QA Model ==================== | |
model_name = "google/flan-t5-base" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
llm = HuggingFacePipeline(pipeline=qa_pipeline) | |
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
# ==================== Streamlit UI ==================== | |
st.set_page_config(page_title="Changi RAG Chatbot", layout="wide") | |
st.title("π« Changi Airport RAG Chatbot") | |
query = st.text_input("Ask me anything about Changi Airport facilities:") | |
if query: | |
with st.spinner("Thinking..."): | |
try: | |
response = qa.run(query) | |
st.write("### βοΈ Answer:") | |
st.success(response) | |
except Exception as e: | |
st.error(f"β οΈ Failed to generate answer: {e}") |