CHAT-WITH-PDFS / app.py
riteshbhadana's picture
Create app.py
3878bea verified
import os
from dotenv import load_dotenv
load_dotenv()
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
# function to get raw text from pages in multiple PDFs
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text = text + page.extract_text()
return text
# function to break the raw text into chunks
def get_text_chunks(raw_text):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=6000,
chunk_overlap=1000,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.split_text(raw_text)
return chunks
# function to return a vectorstore with embeddings of the the chunks
def get_vectorstore(text_chunks):
if text_chunks == []:
st.error("The above file couln't be processed", icon="โš ๏ธ")
return
vectorstore = FAISS.from_texts(text_chunks, HuggingFaceEmbeddings())
return vectorstore
# setting up the chatbot
groq_api_key = os.getenv("GROQ_API_KEY_GIT")
llm = ChatGroq(
model="llama3-8b-8192",
api_key=groq_api_key,
)
# setting up the RAG chain
def get_conversation_chain(vectorstore):
if vectorstore:
retriever = vectorstore.as_retriever()
contextualize_q_system_prompt = (
"Given a chat history and the latest user question "
"which might reference context in the chat history, "
"formulate a standalone question which can be understood "
"without the chat history. Do NOT answer the question, "
"just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If the question is not related to the context provided,say that you "
"don't know the answer. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(
llm,
qa_prompt,
)
rag_chain = create_retrieval_chain(
history_aware_retriever,
question_answer_chain,
)
st.toast("You are ready to chat", icon="๐ŸŽ‰")
return rag_chain
else:
return
def main():
# setting up page title
st.set_page_config(
page_title="Chat with multiple PDFs",
page_icon=":books:",
layout="wide", # optional: set layout to wide for more space
)
st.title("Chat with multiple PDFs :books:")
# Sidebar for Navigation
with st.sidebar:
st.header("Navigation")
selection = st.radio(
"Go to",
("Upload & Process PDFs", "Chat with PDFs", "View Chat History"),
index=0
)
# INITIALIZING SESSION STATE VARIABLES
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "display_chat_history" not in st.session_state:
st.session_state.display_chat_history = []
if "raw_text" not in st.session_state:
st.session_state["raw_text"] = ""
# Conditional display based on sidebar selection
if selection == "Upload & Process PDFs":
# file uploader to allow users to upload files
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'",
accept_multiple_files=True,
disabled=False,
)
if not pdf_docs:
st.info("Kindly upload a pdf to chat :)")
if st.button("Process"):
if pdf_docs:
with st.spinner("Kindly wait while we process your documents"):
# get pdf text
st.session_state.raw_text = get_pdf_text(pdf_docs)
# get the text chunks
text_chunks = get_text_chunks(st.session_state.raw_text)
# create vector store with embeddings
vectorstore = get_vectorstore(text_chunks)
# create conversation chain
st.session_state.rag_chain = get_conversation_chain(vectorstore)
else:
st.toast("Kindly enter a pdf", icon="โš ๏ธ")
elif selection == "Chat with PDFs":
if st.session_state.raw_text != "" and st.session_state.rag_chain:
user_input = st.chat_input("Ask a question about your documents:")
if user_input:
# Display the user input
with st.chat_message("user"):
st.markdown(user_input)
# Append user input to display chat history
st.session_state.display_chat_history.append(
{"role": "user", "content": user_input}
)
try:
# Generate response
response = st.session_state.rag_chain.invoke(
{"input": user_input, "chat_history": st.session_state.chat_history}
)
# Display response to user
with st.chat_message("assistant"):
st.markdown(response["answer"])
# Append response to display chat history
st.session_state.display_chat_history.append(
{"role": "assistant", "content": response["answer"]}
)
# Appending the user and bot responses to the chat history for the model
st.session_state.chat_history.extend(
[
HumanMessage(content=user_input),
AIMessage(content=response["answer"]),
]
)
except:
st.error("You have exhausted your API limits!!")
else:
st.info("Please upload and process PDFs first!")
elif selection == "View Chat History":
st.header("Chat History")
if st.session_state.display_chat_history:
for chat in st.session_state.display_chat_history:
with st.chat_message(chat["role"]):
st.markdown(chat["content"])
else:
st.info("No chat history available yet!")
if __name__ == "__main__":
main()