Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_groq import ChatGroq | |
from langchain_core.prompts import MessagesPlaceholder | |
from langchain.chains import create_history_aware_retriever | |
from langchain_core.messages import HumanMessage, AIMessage | |
from langchain.chains import create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
# function to get raw text from pages in multiple PDFs | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text = text + page.extract_text() | |
return text | |
# function to break the raw text into chunks | |
def get_text_chunks(raw_text): | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=6000, | |
chunk_overlap=1000, | |
length_function=len, | |
is_separator_regex=False, | |
) | |
chunks = text_splitter.split_text(raw_text) | |
return chunks | |
# function to return a vectorstore with embeddings of the the chunks | |
def get_vectorstore(text_chunks): | |
if text_chunks == []: | |
st.error("The above file couln't be processed", icon="โ ๏ธ") | |
return | |
vectorstore = FAISS.from_texts(text_chunks, HuggingFaceEmbeddings()) | |
return vectorstore | |
# setting up the chatbot | |
groq_api_key = os.getenv("GROQ_API_KEY_GIT") | |
llm = ChatGroq( | |
model="llama3-8b-8192", | |
api_key=groq_api_key, | |
) | |
# setting up the RAG chain | |
def get_conversation_chain(vectorstore): | |
if vectorstore: | |
retriever = vectorstore.as_retriever() | |
contextualize_q_system_prompt = ( | |
"Given a chat history and the latest user question " | |
"which might reference context in the chat history, " | |
"formulate a standalone question which can be understood " | |
"without the chat history. Do NOT answer the question, " | |
"just reformulate it if needed and otherwise return it as is." | |
) | |
contextualize_q_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", contextualize_q_system_prompt), | |
MessagesPlaceholder("chat_history"), | |
("human", "{input}"), | |
] | |
) | |
history_aware_retriever = create_history_aware_retriever( | |
llm, retriever, contextualize_q_prompt | |
) | |
system_prompt = ( | |
"You are an assistant for question-answering tasks. " | |
"Use the following pieces of retrieved context to answer " | |
"the question. If the question is not related to the context provided,say that you " | |
"don't know the answer. Use three sentences maximum and keep the " | |
"answer concise." | |
"\n\n" | |
"{context}" | |
) | |
qa_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
MessagesPlaceholder("chat_history"), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain( | |
llm, | |
qa_prompt, | |
) | |
rag_chain = create_retrieval_chain( | |
history_aware_retriever, | |
question_answer_chain, | |
) | |
st.toast("You are ready to chat", icon="๐") | |
return rag_chain | |
else: | |
return | |
def main(): | |
# setting up page title | |
st.set_page_config( | |
page_title="Chat with multiple PDFs", | |
page_icon=":books:", | |
layout="wide", # optional: set layout to wide for more space | |
) | |
st.title("Chat with multiple PDFs :books:") | |
# Sidebar for Navigation | |
with st.sidebar: | |
st.header("Navigation") | |
selection = st.radio( | |
"Go to", | |
("Upload & Process PDFs", "Chat with PDFs", "View Chat History"), | |
index=0 | |
) | |
# INITIALIZING SESSION STATE VARIABLES | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
if "display_chat_history" not in st.session_state: | |
st.session_state.display_chat_history = [] | |
if "raw_text" not in st.session_state: | |
st.session_state["raw_text"] = "" | |
# Conditional display based on sidebar selection | |
if selection == "Upload & Process PDFs": | |
# file uploader to allow users to upload files | |
pdf_docs = st.file_uploader( | |
"Upload your PDFs here and click on 'Process'", | |
accept_multiple_files=True, | |
disabled=False, | |
) | |
if not pdf_docs: | |
st.info("Kindly upload a pdf to chat :)") | |
if st.button("Process"): | |
if pdf_docs: | |
with st.spinner("Kindly wait while we process your documents"): | |
# get pdf text | |
st.session_state.raw_text = get_pdf_text(pdf_docs) | |
# get the text chunks | |
text_chunks = get_text_chunks(st.session_state.raw_text) | |
# create vector store with embeddings | |
vectorstore = get_vectorstore(text_chunks) | |
# create conversation chain | |
st.session_state.rag_chain = get_conversation_chain(vectorstore) | |
else: | |
st.toast("Kindly enter a pdf", icon="โ ๏ธ") | |
elif selection == "Chat with PDFs": | |
if st.session_state.raw_text != "" and st.session_state.rag_chain: | |
user_input = st.chat_input("Ask a question about your documents:") | |
if user_input: | |
# Display the user input | |
with st.chat_message("user"): | |
st.markdown(user_input) | |
# Append user input to display chat history | |
st.session_state.display_chat_history.append( | |
{"role": "user", "content": user_input} | |
) | |
try: | |
# Generate response | |
response = st.session_state.rag_chain.invoke( | |
{"input": user_input, "chat_history": st.session_state.chat_history} | |
) | |
# Display response to user | |
with st.chat_message("assistant"): | |
st.markdown(response["answer"]) | |
# Append response to display chat history | |
st.session_state.display_chat_history.append( | |
{"role": "assistant", "content": response["answer"]} | |
) | |
# Appending the user and bot responses to the chat history for the model | |
st.session_state.chat_history.extend( | |
[ | |
HumanMessage(content=user_input), | |
AIMessage(content=response["answer"]), | |
] | |
) | |
except: | |
st.error("You have exhausted your API limits!!") | |
else: | |
st.info("Please upload and process PDFs first!") | |
elif selection == "View Chat History": | |
st.header("Chat History") | |
if st.session_state.display_chat_history: | |
for chat in st.session_state.display_chat_history: | |
with st.chat_message(chat["role"]): | |
st.markdown(chat["content"]) | |
else: | |
st.info("No chat history available yet!") | |
if __name__ == "__main__": | |
main() |