File size: 4,189 Bytes
7a96a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e8449
7a96a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3883f12
 
7a96a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ea89ae
7a96a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e8449
7a96a6f
 
 
 
6c74bc5
7a96a6f
 
 
 
 
 
 
02e8449
7a96a6f
02e8449
7a96a6f
02e8449
7a96a6f
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import PyPDF2
import pandas as pd
import os
import gradio as gr
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai

def proper_query(query):
    prompt = f"The following text is a question: {query}\nHow should that question be modified so it becomes correctly written in that same language?\nFixed question:"
    response = openai.Completion.create(engine="text-davinci-003", prompt=prompt, max_tokens=1000, temperature=0.2)
    return response.choices[0].text

def extract_text_from_pdf(file_path, splitter = "\n\n"):
    with open(file_path, 'rb') as file:
        pdf = PyPDF2.PdfReader(file)
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
        chunks = text.split(splitter)
        chunks = [splitter + chunk for chunk in chunks[1:]]
        #create a csv file with the chunks in one column
        #df = pd.DataFrame(chunks, columns=['text'])
        #write to csv
        #df.to_csv(file_path[:-4]+'.csv', index=False)
        return chunks

os.environ["OPENAI_API_KEY"] = 'sk-'+ os.environ["OPENAI_API_KEY"]
embeddings = OpenAIEmbeddings()
text = extract_text_from_pdf('transito-dgo.pdf','ARTÍCULO')

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 500,
    chunk_overlap  = 0,
    length_function = len,
)

texts = text_splitter.split_text(text)

docsearch = FAISS.from_texts(texts, embeddings)

def asesor_transito(query):
    query = proper_query(query)
    docs = docsearch.similarity_search(query)
    refine_prompt_template = (
        "The original question is as follows: {question}\n"
        "We have provided an existing answer: {existing_answer}\n"
        "You have the opportunity to refine the existing answer,"
        "only if needed, exclusively with the context below.\n"
        "------------\n"
        "{context_str}\n"
        "------------\n"
        "If that context is not helpful to answer the question, then omit it.\n"
        "Shorten the answer if possible.\n"
        "Reply in the same language as the question.\n"
        "If the given context is not helpful to improve the answer to the question, then return the existing answer.\n"
        "Answer:"
    )
    refine_prompt = PromptTemplate(
        input_variables=["question", "existing_answer", "context_str"],
        template=refine_prompt_template,
    )


    initial_qa_template = (
        "Context information is below. \n"
        "---------------------\n"
        "{context_str}"
        "\n---------------------\n"
        "Given the context information and not prior knowledge, "
        "answer the question: {question}\n"
        "If the context is not helpful to answer the question, then you will refuse to answer due to policy guidelines.\n"
    )
    initial_qa_prompt = PromptTemplate(
        input_variables=["context_str", "question"], template=initial_qa_template
    )
    chain = load_qa_chain(OpenAI(temperature=0.3), chain_type="refine", return_refine_steps=False,
                        question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
    ans = chain({"input_documents": docs, "question": query}, return_only_outputs=True)['output_text']
    return ans

demo = gr.Interface(
    fn=asesor_transito,
    inputs=[
        gr.Textbox(label="Pregunta / Question:", lines=3,),
    ],
    outputs=[gr.Textbox(label="Respuesta: / Answer: ")],
    title="Asesor de Reglamento de Tránsito Durango",
    description ="Soy Viv, tu asesora personalizada para responder cualquier pregunta sobre el reglamento de tránsito del estado de Durango. Puedes preguntarme en cualquier idioma.",
    examples=[
        ["cuál es la multa por no llevar casco?"],
        ["qué pasa si no tengo licencia de conducir?"],
    ],
)

if __name__ == "__main__":
    demo.launch()