shaolang's picture
Initial implementation of multidoc qa
0beb6cf
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import GutenbergLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
docs = [
doc
for path in ['cache/epub/35688/pg35688.txt', 'files/23718/23718-readme.txt']
for doc in GutenbergLoader(f'https://www.gutenberg.org/{path}').load()
]
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = "\n")
texts = text_splitter.split_documents(docs)
chain = None
embeddings = None
docsearch = None
def make_inference(openai_api_key, query):
global chain, embeddings, docsearch
if chain is None:
chain = load_qa_chain(OpenAI(temperature=0, openai_api_key=openai_api_key.strip()), chain_type="stuff")
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
docsearch = Chroma.from_documents(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
docs = docsearch.get_relevant_documents(query)
return(chain.run(input_documents=docs, question=query))
if __name__ == "__main__":
# make a gradio interface
import gradio as gr
gr.Interface(
make_inference,
[
gr.inputs.Textbox(lines=1, label="OpenAI API Key"),
gr.inputs.Textbox(lines=2, label="Query"),
],
gr.outputs.Textbox(label="Response"),
title="Multiple Document QA with LangChain",
description="Ask me anything about Lewis Carroll's Alice in Wonderland and Through the Looking-Glass",
).launch()