Spaces:

MuntasirHossain
/

RAG-PDF-Chatbot

Running

MuntasirHossain commited on 3 days ago

Commit

f99faa1

verified ·

1 Parent(s): ba41222

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,36 +41,35 @@ class ZephyrLLM(LLM):
         self.temperature = temperature
     def _call(self, prompt, stop=None):
-    # Format as chat message
-    messages = [{"role": "user", "content": prompt}]
-    # Apply Zephyr's chat template
-    formatted_prompt = self.tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    # Send request to Hugging Face Inference API
-    payload = {
-        "inputs": formatted_prompt,
-        "parameters": {
-            "max_new_tokens": self.max_new_tokens,
-            "temperature": self.temperature
-        }
-    }
-    response = requests.post(self.api_url, headers=self.headers, json=payload)
-    if response.status_code == 200:
-        full_response = response.json()[0]["generated_text"]
-        # Extract the assistant reply from the full response
-        # After <|assistant|>\n, everything is the model's answer
-        if "<|assistant|>" in full_response:
-            return full_response.split("<|assistant|>")[-1].strip()
         else:
-            return full_response.strip()
-    else:
-        raise Exception(f"Failed call [{response.status_code}]: {response.text}")
     @property

         self.temperature = temperature
     def _call(self, prompt, stop=None):
+        # Format as chat message
+        messages = [{"role": "user", "content": prompt}]
+        # Apply Zephyr's chat template
+        formatted_prompt = self.tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+            )
+        # Send request to Hugging Face Inference API
+        payload = {
+            "inputs": formatted_prompt,
+            "parameters": {
+                "max_new_tokens": self.max_new_tokens,
+                "temperature": self.temperature
+                }
+            }
+        response = requests.post(self.api_url, headers=self.headers, json=payload)
+        if response.status_code == 200:
+            full_response = response.json()[0]["generated_text"]
+            # Extract the assistant reply from the full response
+            # After <|assistant|>\n, everything is the model's answer
+            if "<|assistant|>" in full_response:
+                return full_response.split("<|assistant|>")[-1].strip()
+            else:
+                return full_response.strip()
         else:
+            raise Exception(f"Failed call [{response.status_code}]: {response.text}")
     @property