Spaces:

Mythus
/

BooksCheating

Paused

App Files Files Community

BooksCheating / app_chat.py

Mythus

Upload 7 files

22c5eeb verified over 1 year ago

raw

history blame contribute delete

4.69 kB

	import streamlit as st
	import os
	from constants import search_number_messages
	from langchain_utils import initialize_chat_conversation
	from search_indexing import download_and_index_pdf
	import re


	def remove_url(url_to_remove):
	"""
	Remove URLs from the session_state. Triggered by the respective button
	"""
	if url_to_remove in st.session_state.urls:
	st.session_state.urls.remove(url_to_remove)


	# Page title
	st.set_page_config(page_title='Talk with PDFs using LLMs - Beta')
	st.title('Talk with PDFs using LLMs - (Beta)')

	# Initialize the faiss_index key in the session state. This can be used to avoid having to download and embed the same PDF
	# every time the user asks a question
	if 'faiss_index' not in st.session_state:
	st.session_state['faiss_index'] = {
	'indexed_urls': [],
	'index': None
	}

	# Initialize conversation memory used by Langchain
	if 'conversation_memory' not in st.session_state:
	st.session_state['conversation_memory'] = None

	# Initialize chat history used by StreamLit (for display purposes)
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Store the URLs added by the user in the UI
	if 'urls' not in st.session_state:
	st.session_state.urls = []

	with st.sidebar:

	openai_api_key = st.text_input('Step 1 - OpenAI API Key:', type='password')

	# Add/Remove URLs form
	with st.form('urls-form', clear_on_submit=True):
	url = st.text_input('Step 2 - URLs to relevant PDFs: ')
	add_url_button = st.form_submit_button('Add')
	if add_url_button:
	if url not in st.session_state.urls:
	st.session_state.urls.append(url)

	# Display a container with the URLs added by the user so far
	with st.container():
	if st.session_state.urls:
	st.header('URLs added:')
	for url in st.session_state.urls:
	st.write(url)
	st.button(label='Remove', key=f"Remove {url}", on_click=remove_url, kwargs={'url_to_remove': url})
	st.divider()

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# React to user input
	if query_text := st.chat_input("Your message"):

	os.environ['OPENAI_API_KEY'] = openai_api_key

	# Display user message in chat message container, and append to session state
	st.chat_message("user").markdown(query_text)
	st.session_state.messages.append({"role": "user", "content": query_text})

	# Check if FAISS index already exists, or if it needs to be created as it includes new URLs
	session_urls = st.session_state.urls
	if st.session_state['faiss_index']['index'] is None or set(st.session_state['faiss_index']['indexed_urls']) != set(session_urls):
	st.session_state['faiss_index']['indexed_urls'] = session_urls
	with st.spinner('Downloading and indexing PDFs...'):
	faiss_index = download_and_index_pdf(session_urls)
	st.session_state['faiss_index']['index'] = faiss_index
	else:
	faiss_index = st.session_state['faiss_index']['index']

	# Check if conversation memory has already been initialized and is part of the session state
	if st.session_state['conversation_memory'] is None:
	conversation = initialize_chat_conversation(faiss_index)
	st.session_state['conversation_memory'] = conversation
	else:
	conversation = st.session_state['conversation_memory']

	# Search PDF snippets using the last few user messages
	user_messages_history = [message['content'] for message in st.session_state.messages[-search_number_messages:] if message['role'] == 'user']
	user_messages_history = '\n'.join(user_messages_history)

	with st.spinner('Querying OpenAI GPT...'):
	response = conversation.predict(input=query_text, user_messages_history=user_messages_history)

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	st.markdown(response)
	snippet_memory = conversation.memory.memories[1]
	for page_number, snippet in zip(snippet_memory.pages, snippet_memory.snippets):
	with st.expander(f'Snippet from page {page_number + 1}'):
	# Remove the <START> and <END> tags from the snippets before displaying them
	snippet = re.sub("<START_SNIPPET_PAGE_\d+>", '', snippet)
	snippet = re.sub("<END_SNIPPET_PAGE_\d+>", '', snippet)
	st.markdown(snippet)

	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})