navjotk's picture
Update app.py
48caab1 verified
import os
os.environ["CHAINLIT_CONFIG_DIR"] = "/tmp/.chainlit"
os.environ["CHAINLIT_FILES_PATH"] = "/tmp/.chainlit/files"
import chainlit as cl
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import os
# === Step 1: Build or load Vectorstore ===
VECTORSTORE_DIR = "vectorstore"
DATA_PATH = "data/textile_notes.txt" # Your textile documents path
def build_vectorstore():
loader = TextLoader(DATA_PATH)
documents = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
db = FAISS.from_documents(chunks, embeddings)
db.save_local(VECTORSTORE_DIR)
return db
def load_vectorstore():
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
return FAISS.load_local(VECTORSTORE_DIR, embeddings)
# === Step 2: Load LLM and create QA chain ===
def load_qa_chain():
# Load local vectorstore if exists else build it
if os.path.exists(VECTORSTORE_DIR):
vectorstore = load_vectorstore()
else:
vectorstore = build_vectorstore()
# Load open-source Mistral 7B Instruct model (small & free-ish)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.1",
torch_dtype=torch.float16,
device_map="auto"
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
)
llm = HuggingFacePipeline(pipeline=pipe)
# Prompt template for friendly, user-focused answers
prompt_template = """
Answer the question using ONLY the context below.
Be clear, helpful, and friendly.
Context:
{context}
Question:
{question}
"""
prompt = PromptTemplate(
template=prompt_template,
input_variables=["context", "question"]
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
chain_type_kwargs={"prompt": prompt},
return_source_documents=True,
)
return qa_chain
# === Chainlit event handlers ===
@cl.on_chat_start
async def on_chat_start():
qa = load_qa_chain()
cl.user_session.set("qa_chain", qa)
await cl.Message("πŸ‘‹ Hi! Ask me anything about textile β€” I'll answer using our custom documents.").send()
@cl.on_message
async def on_message(message: cl.Message):
qa = cl.user_session.get("qa_chain")
answer = qa.run(message.content)
await cl.Message(answer).send()