MuntasirHossain commited on
Commit
f99faa1
·
verified ·
1 Parent(s): ba41222

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -29
app.py CHANGED
@@ -41,36 +41,35 @@ class ZephyrLLM(LLM):
41
  self.temperature = temperature
42
 
43
  def _call(self, prompt, stop=None):
44
- # Format as chat message
45
- messages = [{"role": "user", "content": prompt}]
46
-
47
- # Apply Zephyr's chat template
48
- formatted_prompt = self.tokenizer.apply_chat_template(
49
- messages, tokenize=False, add_generation_prompt=True
50
- )
51
-
52
- # Send request to Hugging Face Inference API
53
- payload = {
54
- "inputs": formatted_prompt,
55
- "parameters": {
56
- "max_new_tokens": self.max_new_tokens,
57
- "temperature": self.temperature
58
- }
59
- }
60
- response = requests.post(self.api_url, headers=self.headers, json=payload)
61
-
62
- if response.status_code == 200:
63
- full_response = response.json()[0]["generated_text"]
64
-
65
- # Extract the assistant reply from the full response
66
- # After <|assistant|>\n, everything is the model's answer
67
- if "<|assistant|>" in full_response:
68
- return full_response.split("<|assistant|>")[-1].strip()
 
 
69
  else:
70
- return full_response.strip()
71
-
72
- else:
73
- raise Exception(f"Failed call [{response.status_code}]: {response.text}")
74
 
75
 
76
  @property
 
41
  self.temperature = temperature
42
 
43
  def _call(self, prompt, stop=None):
44
+ # Format as chat message
45
+ messages = [{"role": "user", "content": prompt}]
46
+
47
+ # Apply Zephyr's chat template
48
+ formatted_prompt = self.tokenizer.apply_chat_template(
49
+ messages, tokenize=False, add_generation_prompt=True
50
+ )
51
+ # Send request to Hugging Face Inference API
52
+ payload = {
53
+ "inputs": formatted_prompt,
54
+ "parameters": {
55
+ "max_new_tokens": self.max_new_tokens,
56
+ "temperature": self.temperature
57
+ }
58
+ }
59
+ response = requests.post(self.api_url, headers=self.headers, json=payload)
60
+
61
+ if response.status_code == 200:
62
+ full_response = response.json()[0]["generated_text"]
63
+
64
+ # Extract the assistant reply from the full response
65
+ # After <|assistant|>\n, everything is the model's answer
66
+ if "<|assistant|>" in full_response:
67
+ return full_response.split("<|assistant|>")[-1].strip()
68
+ else:
69
+ return full_response.strip()
70
+
71
  else:
72
+ raise Exception(f"Failed call [{response.status_code}]: {response.text}")
 
 
 
73
 
74
 
75
  @property