Spaces:

ALVHB95
/

TFM_DataScience_APP

Running

ALVHB95 commited on Jan 30, 2024

Commit

ed4a174

1 Parent(s): 687f0cb

app16

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,12 @@ image_gradio_app = gr.Interface(
 loader = PyPDFDirectoryLoader('pdfs')
 data=loader.load()
 # split documents
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
 docs = text_splitter.split_documents(data)
 # define embedding
 embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
@@ -65,7 +70,12 @@ vectordb = Chroma.from_documents(
 )
 # define retriever
 retriever = vectordb.as_retriever(search_type="mmr")
-template = """Your name is AngryGreta and you are a recycling chatbot with the objective and poorpose to help people with waste management to improve environmental situation. Use the following pieces of context to answer the question if the question is related with recycling. Answer in the same language of the question. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
 context: {context}
 chat history: {chat_history}
 question: {question}
@@ -92,7 +102,7 @@ llm = HuggingFaceHub(
 )
 llm_chain = LLMChain(llm=llm, prompt=qa_prompt)
-memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", output_key='answer', return_messages=True)
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm = llm,

 loader = PyPDFDirectoryLoader('pdfs')
 data=loader.load()
 # split documents
+text_splitter = RecursiveCharacterTextSplitter(
+    separator="\n",
+    chunk_size=1000,
+    chunk_overlap=150,
+    length_function=len
+)
 docs = text_splitter.split_documents(data)
 # define embedding
 embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
 )
 # define retriever
 retriever = vectordb.as_retriever(search_type="mmr")
+template = """
+Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
+Use the following pieces of context to answer the question if the question is related with recycling /
+No more than two chunks of context /
+Answer in the same language of the question /
+Always say "thanks for asking!" at the end of the answer.
 context: {context}
 chat history: {chat_history}
 question: {question}
 )
 llm_chain = LLMChain(llm=llm, prompt=qa_prompt)
+memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", output_key='answer', return_messages=False)
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm = llm,