ALVHB95 commited on
Commit
7168ed6
·
1 Parent(s): 8879568
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -51,8 +51,8 @@ loader = PyPDFDirectoryLoader('pdfs')
51
  data=loader.load()
52
  # split documents
53
  text_splitter = RecursiveCharacterTextSplitter(
54
- chunk_size=500,
55
- chunk_overlap=70,
56
  length_function=len
57
  )
58
  docs = text_splitter.split_documents(data)
@@ -69,7 +69,7 @@ vectordb = Chroma.from_documents(
69
  persist_directory=persist_directory
70
  )
71
  # define retriever
72
- retriever = vectordb.as_retriever(search_kwargs={"k": 1}, search_type="mmr")
73
 
74
  class FinalAnswer(BaseModel):
75
  question: str = Field(description="the original question")
@@ -81,7 +81,10 @@ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
81
  template = """
82
  Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
83
  Use the following pieces of context to answer the question /
84
- Answer in the same language of the question /
 
 
 
85
  Context: {context}
86
  User: {question}
87
  {format_instructions}
@@ -99,10 +102,11 @@ llm = HuggingFaceHub(
99
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
100
  task="text-generation",
101
  model_kwargs={
102
- "max_new_tokens": 1024,
103
  "top_k": 30,
104
  "temperature": 0.1,
105
  "repetition_penalty": 1.03,
 
106
  },
107
  )
108
 
 
51
  data=loader.load()
52
  # split documents
53
  text_splitter = RecursiveCharacterTextSplitter(
54
+ chunk_size=1024,
55
+ chunk_overlap=150,
56
  length_function=len
57
  )
58
  docs = text_splitter.split_documents(data)
 
69
  persist_directory=persist_directory
70
  )
71
  # define retriever
72
+ retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
73
 
74
  class FinalAnswer(BaseModel):
75
  question: str = Field(description="the original question")
 
81
  template = """
82
  Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
83
  Use the following pieces of context to answer the question /
84
+ If the question is English answer in English /
85
+ If the question is Spanish answer in Spanish /
86
+ Do not mention the word context when you answer a question, use the word database instead /
87
+ Answer the question fully and provide as much relevant detail as possible. Do not cut your response short
88
  Context: {context}
89
  User: {question}
90
  {format_instructions}
 
102
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
103
  task="text-generation",
104
  model_kwargs={
105
+ "max_new_tokens": 2000,
106
  "top_k": 30,
107
  "temperature": 0.1,
108
  "repetition_penalty": 1.03,
109
+ "early_stopping "="never"
110
  },
111
  )
112