Spaces:

vinuajeesh
/

mybot

Runtime error

vinuajeesh commited on Jun 7

Commit

e8dc0de

verified ·

1 Parent(s): ef35add

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,29 +3,39 @@ from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # Download the model from Hugging Face Hub
-print("Downloading model...")
 model_path = hf_hub_download(
-    repo_id="unsloth/gemma-3-4b-it-GGUF",
-    filename="gemma-3-4b-it-UD-Q4_K_XL.gguf"
 )
-print("Model downloaded to:", model_path)
 # Load the model with llama-cpp-python
-print("Loading model...")
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048  # You can adjust this if needed
 )
 print("Model loaded.")
-# Chat function
-def chat(prompt):
-    print(f"User input: {prompt}")
     output = llm(
-        prompt,
         max_tokens=512,
-        stop=["</s>", "User:", "Assistant:"]
     )
     reply = output['choices'][0]['text'].strip()
     print(f"Model reply: {reply}")
     return reply
@@ -35,8 +45,8 @@ iface = gr.Interface(
     fn=chat,
     inputs="text",
     outputs="text",
-    title="Llama 2 7B Chat GGUF - Space",
-    description="Running Llama 2 7B Chat GGUF model using llama-cpp-python on Hugging Face Space"
 )
 iface.launch()

 from llama_cpp import Llama
 # Download the model from Hugging Face Hub
+print("===== Downloading model... =====")
 model_path = hf_hub_download(
+    repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
+    filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf"
 )
+print(f"Model downloaded to: {model_path}")
 # Load the model with llama-cpp-python
+print("===== Loading model... =====")
 llm = Llama(
     model_path=model_path,
+    n_ctx=2048,          # adjust as per RAM
+    n_threads=8          # adjust based on your Space CPU (8 is good default)
 )
 print("Model loaded.")
+# Chat function with Dolphin 3.0 template
+def chat(user_input):
+    print(f"User input: {user_input}")
+    full_prompt = f"""### System:
+You are Dolphin 3.0, a helpful and friendly AI assistant.
+### User:
+{user_input}
+### Assistant:"""
     output = llm(
+        full_prompt,
         max_tokens=512,
+        stop=["</s>", "### User:", "### Assistant:"]
     )
     reply = output['choices'][0]['text'].strip()
     print(f"Model reply: {reply}")
     return reply
     fn=chat,
     inputs="text",
     outputs="text",
+    title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
+    description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
 )
 iface.launch()