Spaces:

vinuajeesh
/

mybot

Runtime error

App Files Files Community

vinuajeesh commited on Jun 7

Commit

633a2af

verified ·

1 Parent(s): 1cc005a

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -23

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# Download the model from Hugging Face Hub
 print("===== Downloading model... =====")
 model_path = hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
@@ -10,43 +11,74 @@ model_path = hf_hub_download(
 )
 print(f"Model downloaded to: {model_path}")
-# Load the model with llama-cpp-python
 print("===== Loading model... =====")
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,          # adjust as per RAM
-    n_threads=8          # adjust based on your Space CPU (8 is good default)
 )
-print("Model loaded.")
-# Chat function with Dolphin 3.0 template
-def chat(user_input):
-    print(f"User input: {user_input}")
     full_prompt = f"""### System:
 You are Dolphin 3.0, a helpful and friendly AI assistant.
-### User:
-{user_input}
 ### Assistant:"""
-    output = llm(
         full_prompt,
-        max_tokens=512,
-        stop=["</s>", "### User:", "### Assistant:"]
     )
-    reply = output['choices'][0]['text'].strip()
-    print(f"Model reply: {reply}")
-    return reply
-# Gradio UI
-iface = gr.Interface(
     fn=chat,
-    inputs="text",
-    outputs="text",
-    title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
-    description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
 )
-iface.launch()

 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+import os
+# --- 1. Model Downloading (No changes) ---
 print("===== Downloading model... =====")
 model_path = hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
 )
 print(f"Model downloaded to: {model_path}")
+# --- 2. Model Loading (Optimized for HF Space CPU) ---
 print("===== Loading model... =====")
+# This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
+n_threads = os.cpu_count()
 llm = Llama(
     model_path=model_path,
+    n_ctx=2048,
+    n_threads=n_threads,
+    # --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
+    # Set n_gpu_layers to 0. Free Spaces run on CPUs.
+    # Setting this to a non-zero value will cause errors without a GPU upgrade.
+    n_gpu_layers=0
 )
+print(f"Model loaded for CPU execution with {n_threads} threads.")
+# --- 3. Chat Function with Streaming (Still the best!) ---
+def chat(message, history):
+    # This function remains the same, as streaming is even MORE important on a slower CPU!
+    history_prompt = ""
+    for user_msg, assistant_msg in history:
+        history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
     full_prompt = f"""### System:
 You are Dolphin 3.0, a helpful and friendly AI assistant.
+{history_prompt}### User:
+{message}
 ### Assistant:"""
+    stream = llm(
         full_prompt,
+        max_tokens=1024,
+        stop=["</s>", "### User:", "### Assistant:"],
+        stream=True
     )
+    partial_message = ""
+    for output in stream:
+        token = output['choices'][0]['text']
+        partial_message += token
+        yield partial_message
+# --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
+iface = gr.ChatInterface(
     fn=chat,
+    title="🐬 Dolphin 3.0 on Hugging Face Spaces",
+    description="A sleek, streaming chat interface running on a CPU Space.",
+    chatbot=gr.Chatbot(height=500),
+    textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
+    theme="soft",
+    examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
+    cache_examples=False,
+    retry_btn=None,
+    undo_btn="Delete Previous",
+    clear_btn="Clear Chat",
 )
+# --- Pro-Tip: Create a requirements.txt file for your Space! ---
+# Your Space needs to know what libraries to install. Create a file
+# named `requirements.txt` in your repository with the following lines:
+#
+# gradio
+# llama-cpp-python
+# huggingface_hub
+if __name__ == "__main__":
+    iface.launch()