andreinigo's picture
Update app.py
6c74bc5
import PyPDF2
import pandas as pd
import os
import gradio as gr
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai
def proper_query(query):
prompt = f"The following text is a question: {query}\nHow should that question be modified so it becomes correctly written in that same language?\nFixed question:"
response = openai.Completion.create(engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.2)
return response.choices[0].text
def extract_text_from_pdf(file_path, splitter = "\n\n"):
with open(file_path, 'rb') as file:
pdf = PyPDF2.PdfReader(file)
text = ''
for page in pdf.pages:
text += page.extract_text()
chunks = text.split(splitter)
chunks = [splitter + chunk for chunk in chunks[1:]]
#create a csv file with the chunks in one column
#df = pd.DataFrame(chunks, columns=['text'])
#write to csv
#df.to_csv(file_path[:-4]+'.csv', index=False)
return chunks
os.environ["OPENAI_API_KEY"] = 'sk-'+ os.environ["OPENAI_API_KEY"]
embeddings = OpenAIEmbeddings()
text = extract_text_from_pdf('transito-dgo.pdf','ARTÍCULO')
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 500,
chunk_overlap = 0,
length_function = len,
)
texts = text_splitter.split_text(text)
docsearch = FAISS.from_texts(texts, embeddings)
def asesor_transito(query):
query = proper_query(query)
docs = docsearch.similarity_search(query)
refine_prompt_template = (
"The original question is as follows: {question}\n"
"We have provided an existing answer: {existing_answer}\n"
"You have the opportunity to refine the existing answer,"
"only if needed, exclusively with the context below.\n"
"------------\n"
"{context_str}\n"
"------------\n"
"If that context is not helpful to answer the question, then omit it.\n"
"Shorten the answer if possible.\n"
"Reply in the same language as the question.\n"
"If the given context is not helpful to improve the answer to the question, then return the existing answer.\n"
"Answer:"
)
refine_prompt = PromptTemplate(
input_variables=["question", "existing_answer", "context_str"],
template=refine_prompt_template,
)
initial_qa_template = (
"Context information is below. \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"Given the context information and not prior knowledge, "
"answer the question: {question}\n"
"If the context is not helpful to answer the question, then you will refuse to answer due to policy guidelines.\n"
)
initial_qa_prompt = PromptTemplate(
input_variables=["context_str", "question"], template=initial_qa_template
)
chain = load_qa_chain(OpenAI(temperature=0.3), chain_type="refine", return_refine_steps=False,
question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
ans = chain({"input_documents": docs, "question": query}, return_only_outputs=True)['output_text']
return ans
demo = gr.Interface(
fn=asesor_transito,
inputs=[
gr.Textbox(label="Pregunta / Question:", lines=3,),
],
outputs=[gr.Textbox(label="Respuesta: / Answer: ")],
title="Asesor de Reglamento de Tránsito Durango",
description ="Soy Viv, tu asesora personalizada para responder cualquier pregunta sobre el reglamento de tránsito del estado de Durango. Puedes preguntarme en cualquier idioma.",
examples=[
["cuál es la multa por no llevar casco?"],
["qué pasa si no tengo licencia de conducir?"],
],
)
if __name__ == "__main__":
demo.launch()