File size: 2,546 Bytes
94fd389 95b733e 94fd389 afd55aa 94fd389 27f3810 c47a688 94fd389 c47a688 94fd389 95b733e 4e4d80a 95b733e 94fd389 afd55aa 94fd389 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false"
import pickle
import numpy as np
import streamlit as st
import warnings
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import os
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
warnings.filterwarnings("ignore")
# ==================== Load Embeddings & Docs ====================
try:
embeddings = np.load("src/embeddings.npy")
with open("src/documents.pkl", "rb") as f:
documents = pickle.load(f)
except Exception as e:
st.error(f"❌ Error loading embeddings or documents: {e}")
st.stop()
# ==================== Setup Pinecone ====================
try:
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("changi-rag-384")
except Exception as e:
st.error(f"❌ Error connecting to Pinecone: {e}")
st.stop()
# ==================== Embedding Model ====================
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# ==================== Vector Store & Retriever ====================
vectorstore = PineconeVectorStore(
index=index,
embedding=embed_model,
text_key="page_content"
)
retriever = vectorstore.as_retriever()
# ==================== HuggingFace QA Model ====================
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=qa_pipeline)
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
# ==================== Streamlit UI ====================
st.set_page_config(page_title="Changi RAG Chatbot", layout="wide")
st.title("🛫 Changi Airport RAG Chatbot")
query = st.text_input("Ask me anything about Changi Airport facilities:")
if query:
with st.spinner("Thinking..."):
try:
response = qa.run(query)
st.write("### ✈️ Answer:")
st.success(response)
except Exception as e:
st.error(f"⚠️ Failed to generate answer: {e}") |