File size: 2,546 Bytes
94fd389
 
 
95b733e
94fd389
afd55aa
 
94fd389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27f3810
 
c47a688
94fd389
 
 
 
c47a688
94fd389
 
 
 
 
 
 
 
 
95b733e
 
 
4e4d80a
 
95b733e
94fd389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afd55aa
94fd389
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false"


import pickle
import numpy as np
import streamlit as st
import warnings
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import os

from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline

warnings.filterwarnings("ignore")

# ==================== Load Embeddings & Docs ====================



try:
    embeddings = np.load("src/embeddings.npy")
    with open("src/documents.pkl", "rb") as f:
        documents = pickle.load(f)  
except Exception as e:
    st.error(f"❌ Error loading embeddings or documents: {e}")
    st.stop()


# ==================== Setup Pinecone ====================
try:
    pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
    index = pc.Index("changi-rag-384")
except Exception as e:
    st.error(f"❌ Error connecting to Pinecone: {e}")
    st.stop()

# ==================== Embedding Model ====================
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# ==================== Vector Store & Retriever ====================
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embed_model,
    text_key="page_content"
)
retriever = vectorstore.as_retriever()

# ==================== HuggingFace QA Model ====================
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=qa_pipeline)

qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# ==================== Streamlit UI ====================
st.set_page_config(page_title="Changi RAG Chatbot", layout="wide")
st.title("🛫 Changi Airport RAG Chatbot")

query = st.text_input("Ask me anything about Changi Airport facilities:")

if query:
    with st.spinner("Thinking..."):
        try:
            response = qa.run(query)
            st.write("### ✈️ Answer:")
            st.success(response)
        except Exception as e:
            st.error(f"⚠️ Failed to generate answer: {e}")