Spaces:
Sleeping
Sleeping
File size: 7,661 Bytes
3878bea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import os
from dotenv import load_dotenv
load_dotenv()
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
# function to get raw text from pages in multiple PDFs
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text = text + page.extract_text()
return text
# function to break the raw text into chunks
def get_text_chunks(raw_text):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=6000,
chunk_overlap=1000,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.split_text(raw_text)
return chunks
# function to return a vectorstore with embeddings of the the chunks
def get_vectorstore(text_chunks):
if text_chunks == []:
st.error("The above file couln't be processed", icon="⚠️")
return
vectorstore = FAISS.from_texts(text_chunks, HuggingFaceEmbeddings())
return vectorstore
# setting up the chatbot
groq_api_key = os.getenv("GROQ_API_KEY_GIT")
llm = ChatGroq(
model="llama3-8b-8192",
api_key=groq_api_key,
)
# setting up the RAG chain
def get_conversation_chain(vectorstore):
if vectorstore:
retriever = vectorstore.as_retriever()
contextualize_q_system_prompt = (
"Given a chat history and the latest user question "
"which might reference context in the chat history, "
"formulate a standalone question which can be understood "
"without the chat history. Do NOT answer the question, "
"just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If the question is not related to the context provided,say that you "
"don't know the answer. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(
llm,
qa_prompt,
)
rag_chain = create_retrieval_chain(
history_aware_retriever,
question_answer_chain,
)
st.toast("You are ready to chat", icon="🎉")
return rag_chain
else:
return
def main():
# setting up page title
st.set_page_config(
page_title="Chat with multiple PDFs",
page_icon=":books:",
layout="wide", # optional: set layout to wide for more space
)
st.title("Chat with multiple PDFs :books:")
# Sidebar for Navigation
with st.sidebar:
st.header("Navigation")
selection = st.radio(
"Go to",
("Upload & Process PDFs", "Chat with PDFs", "View Chat History"),
index=0
)
# INITIALIZING SESSION STATE VARIABLES
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "display_chat_history" not in st.session_state:
st.session_state.display_chat_history = []
if "raw_text" not in st.session_state:
st.session_state["raw_text"] = ""
# Conditional display based on sidebar selection
if selection == "Upload & Process PDFs":
# file uploader to allow users to upload files
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'",
accept_multiple_files=True,
disabled=False,
)
if not pdf_docs:
st.info("Kindly upload a pdf to chat :)")
if st.button("Process"):
if pdf_docs:
with st.spinner("Kindly wait while we process your documents"):
# get pdf text
st.session_state.raw_text = get_pdf_text(pdf_docs)
# get the text chunks
text_chunks = get_text_chunks(st.session_state.raw_text)
# create vector store with embeddings
vectorstore = get_vectorstore(text_chunks)
# create conversation chain
st.session_state.rag_chain = get_conversation_chain(vectorstore)
else:
st.toast("Kindly enter a pdf", icon="⚠️")
elif selection == "Chat with PDFs":
if st.session_state.raw_text != "" and st.session_state.rag_chain:
user_input = st.chat_input("Ask a question about your documents:")
if user_input:
# Display the user input
with st.chat_message("user"):
st.markdown(user_input)
# Append user input to display chat history
st.session_state.display_chat_history.append(
{"role": "user", "content": user_input}
)
try:
# Generate response
response = st.session_state.rag_chain.invoke(
{"input": user_input, "chat_history": st.session_state.chat_history}
)
# Display response to user
with st.chat_message("assistant"):
st.markdown(response["answer"])
# Append response to display chat history
st.session_state.display_chat_history.append(
{"role": "assistant", "content": response["answer"]}
)
# Appending the user and bot responses to the chat history for the model
st.session_state.chat_history.extend(
[
HumanMessage(content=user_input),
AIMessage(content=response["answer"]),
]
)
except:
st.error("You have exhausted your API limits!!")
else:
st.info("Please upload and process PDFs first!")
elif selection == "View Chat History":
st.header("Chat History")
if st.session_state.display_chat_history:
for chat in st.session_state.display_chat_history:
with st.chat_message(chat["role"]):
st.markdown(chat["content"])
else:
st.info("No chat history available yet!")
if __name__ == "__main__":
main() |