Spaces:
Running
Running
app20
Browse files
app.py
CHANGED
@@ -51,8 +51,8 @@ loader = PyPDFDirectoryLoader('pdfs')
|
|
51 |
data=loader.load()
|
52 |
# split documents
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
54 |
-
chunk_size=
|
55 |
-
chunk_overlap=
|
56 |
length_function=len
|
57 |
)
|
58 |
docs = text_splitter.split_documents(data)
|
@@ -69,7 +69,7 @@ vectordb = Chroma.from_documents(
|
|
69 |
persist_directory=persist_directory
|
70 |
)
|
71 |
# define retriever
|
72 |
-
retriever = vectordb.as_retriever(search_kwargs={"k":
|
73 |
|
74 |
class FinalAnswer(BaseModel):
|
75 |
question: str = Field(description="the original question")
|
@@ -81,7 +81,10 @@ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
|
|
81 |
template = """
|
82 |
Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
|
83 |
Use the following pieces of context to answer the question /
|
84 |
-
|
|
|
|
|
|
|
85 |
Context: {context}
|
86 |
User: {question}
|
87 |
{format_instructions}
|
@@ -99,10 +102,11 @@ llm = HuggingFaceHub(
|
|
99 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
100 |
task="text-generation",
|
101 |
model_kwargs={
|
102 |
-
"max_new_tokens":
|
103 |
"top_k": 30,
|
104 |
"temperature": 0.1,
|
105 |
"repetition_penalty": 1.03,
|
|
|
106 |
},
|
107 |
)
|
108 |
|
|
|
51 |
data=loader.load()
|
52 |
# split documents
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
54 |
+
chunk_size=1024,
|
55 |
+
chunk_overlap=150,
|
56 |
length_function=len
|
57 |
)
|
58 |
docs = text_splitter.split_documents(data)
|
|
|
69 |
persist_directory=persist_directory
|
70 |
)
|
71 |
# define retriever
|
72 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
|
73 |
|
74 |
class FinalAnswer(BaseModel):
|
75 |
question: str = Field(description="the original question")
|
|
|
81 |
template = """
|
82 |
Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
|
83 |
Use the following pieces of context to answer the question /
|
84 |
+
If the question is English answer in English /
|
85 |
+
If the question is Spanish answer in Spanish /
|
86 |
+
Do not mention the word context when you answer a question, use the word database instead /
|
87 |
+
Answer the question fully and provide as much relevant detail as possible. Do not cut your response short
|
88 |
Context: {context}
|
89 |
User: {question}
|
90 |
{format_instructions}
|
|
|
102 |
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
103 |
task="text-generation",
|
104 |
model_kwargs={
|
105 |
+
"max_new_tokens": 2000,
|
106 |
"top_k": 30,
|
107 |
"temperature": 0.1,
|
108 |
"repetition_penalty": 1.03,
|
109 |
+
"early_stopping "="never"
|
110 |
},
|
111 |
)
|
112 |
|