Spaces:
Runtime error
Runtime error
import PyPDF2 | |
import pandas as pd | |
import os | |
import gradio as gr | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores.faiss import FAISS | |
from langchain.docstore.document import Document | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms import OpenAI | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import openai | |
def proper_query(query): | |
prompt = f"The following text is a question: {query}\nHow should that question be modified so it becomes correctly written in that same language?\nFixed question:" | |
response = openai.Completion.create(engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.2) | |
return response.choices[0].text | |
def extract_text_from_pdf(file_path, splitter = "\n\n"): | |
with open(file_path, 'rb') as file: | |
pdf = PyPDF2.PdfReader(file) | |
text = '' | |
for page in pdf.pages: | |
text += page.extract_text() | |
chunks = text.split(splitter) | |
chunks = [splitter + chunk for chunk in chunks[1:]] | |
#create a csv file with the chunks in one column | |
#df = pd.DataFrame(chunks, columns=['text']) | |
#write to csv | |
#df.to_csv(file_path[:-4]+'.csv', index=False) | |
return chunks | |
os.environ["OPENAI_API_KEY"] = 'sk-'+ os.environ["OPENAI_API_KEY"] | |
embeddings = OpenAIEmbeddings() | |
text = extract_text_from_pdf('transito-dgo.pdf','ARTÍCULO') | |
text_splitter = RecursiveCharacterTextSplitter( | |
# Set a really small chunk size, just to show. | |
chunk_size = 500, | |
chunk_overlap = 0, | |
length_function = len, | |
) | |
texts = text_splitter.split_text(text) | |
docsearch = FAISS.from_texts(texts, embeddings) | |
def asesor_transito(query): | |
query = proper_query(query) | |
docs = docsearch.similarity_search(query) | |
refine_prompt_template = ( | |
"The original question is as follows: {question}\n" | |
"We have provided an existing answer: {existing_answer}\n" | |
"You have the opportunity to refine the existing answer," | |
"only if needed, exclusively with the context below.\n" | |
"------------\n" | |
"{context_str}\n" | |
"------------\n" | |
"If that context is not helpful to answer the question, then omit it.\n" | |
"Shorten the answer if possible.\n" | |
"Reply in the same language as the question.\n" | |
"If the given context is not helpful to improve the answer to the question, then return the existing answer.\n" | |
"Answer:" | |
) | |
refine_prompt = PromptTemplate( | |
input_variables=["question", "existing_answer", "context_str"], | |
template=refine_prompt_template, | |
) | |
initial_qa_template = ( | |
"Context information is below. \n" | |
"---------------------\n" | |
"{context_str}" | |
"\n---------------------\n" | |
"Given the context information and not prior knowledge, " | |
"answer the question: {question}\n" | |
"If the context is not helpful to answer the question, then you will refuse to answer due to policy guidelines.\n" | |
) | |
initial_qa_prompt = PromptTemplate( | |
input_variables=["context_str", "question"], template=initial_qa_template | |
) | |
chain = load_qa_chain(OpenAI(temperature=0.3), chain_type="refine", return_refine_steps=False, | |
question_prompt=initial_qa_prompt, refine_prompt=refine_prompt) | |
ans = chain({"input_documents": docs, "question": query}, return_only_outputs=True)['output_text'] | |
return ans | |
demo = gr.Interface( | |
fn=asesor_transito, | |
inputs=[ | |
gr.Textbox(label="Pregunta / Question:", lines=3,), | |
], | |
outputs=[gr.Textbox(label="Respuesta: / Answer: ")], | |
title="Asesor de Reglamento de Tránsito Durango", | |
description ="Soy Viv, tu asesora personalizada para responder cualquier pregunta sobre el reglamento de tránsito del estado de Durango. Puedes preguntarme en cualquier idioma.", | |
examples=[ | |
["cuál es la multa por no llevar casco?"], | |
["qué pasa si no tengo licencia de conducir?"], | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |