Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,29 +3,39 @@ from huggingface_hub import hf_hub_download
|
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
# Download the model from Hugging Face Hub
|
6 |
-
print("Downloading model...")
|
7 |
model_path = hf_hub_download(
|
8 |
-
repo_id="
|
9 |
-
filename="
|
10 |
)
|
11 |
-
print("Model downloaded to:
|
12 |
|
13 |
# Load the model with llama-cpp-python
|
14 |
-
print("Loading model...")
|
15 |
llm = Llama(
|
16 |
model_path=model_path,
|
17 |
-
n_ctx=2048
|
|
|
18 |
)
|
19 |
print("Model loaded.")
|
20 |
|
21 |
-
# Chat function
|
22 |
-
def chat(
|
23 |
-
print(f"User input: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
output = llm(
|
25 |
-
|
26 |
max_tokens=512,
|
27 |
-
stop=["</s>", "User:", "Assistant:"]
|
28 |
)
|
|
|
29 |
reply = output['choices'][0]['text'].strip()
|
30 |
print(f"Model reply: {reply}")
|
31 |
return reply
|
@@ -35,8 +45,8 @@ iface = gr.Interface(
|
|
35 |
fn=chat,
|
36 |
inputs="text",
|
37 |
outputs="text",
|
38 |
-
title="Llama 2
|
39 |
-
description="Running Llama 2
|
40 |
)
|
41 |
|
42 |
iface.launch()
|
|
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
# Download the model from Hugging Face Hub
|
6 |
+
print("===== Downloading model... =====")
|
7 |
model_path = hf_hub_download(
|
8 |
+
repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
|
9 |
+
filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf"
|
10 |
)
|
11 |
+
print(f"Model downloaded to: {model_path}")
|
12 |
|
13 |
# Load the model with llama-cpp-python
|
14 |
+
print("===== Loading model... =====")
|
15 |
llm = Llama(
|
16 |
model_path=model_path,
|
17 |
+
n_ctx=2048, # adjust as per RAM
|
18 |
+
n_threads=8 # adjust based on your Space CPU (8 is good default)
|
19 |
)
|
20 |
print("Model loaded.")
|
21 |
|
22 |
+
# Chat function with Dolphin 3.0 template
|
23 |
+
def chat(user_input):
|
24 |
+
print(f"User input: {user_input}")
|
25 |
+
full_prompt = f"""### System:
|
26 |
+
You are Dolphin 3.0, a helpful and friendly AI assistant.
|
27 |
+
|
28 |
+
### User:
|
29 |
+
{user_input}
|
30 |
+
|
31 |
+
### Assistant:"""
|
32 |
+
|
33 |
output = llm(
|
34 |
+
full_prompt,
|
35 |
max_tokens=512,
|
36 |
+
stop=["</s>", "### User:", "### Assistant:"]
|
37 |
)
|
38 |
+
|
39 |
reply = output['choices'][0]['text'].strip()
|
40 |
print(f"Model reply: {reply}")
|
41 |
return reply
|
|
|
45 |
fn=chat,
|
46 |
inputs="text",
|
47 |
outputs="text",
|
48 |
+
title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
|
49 |
+
description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
|
50 |
)
|
51 |
|
52 |
iface.launch()
|