Spaces:

vinuajeesh
/

mybot

Runtime error

App Files Files Community

vinuajeesh commited on Jun 7

Commit

1cc005a

verified ·

1 Parent(s): 469cad2

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -94

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# --- 1. MODEL LOADING ---
 print("===== Downloading model... =====")
 model_path = hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
@@ -10,107 +10,43 @@ model_path = hf_hub_download(
 )
 print(f"Model downloaded to: {model_path}")
 print("===== Loading model... =====")
 llm = Llama(
     model_path=model_path,
-    n_ctx=1096,
-    n_threads=8,
-    n_gpu_layers=0
 )
 print("Model loaded.")
-# --- 2. UPGRADED CHAT & STREAMING LOGIC ---
-default_system_prompt = "You are Dolphin 3.0, a helpful and friendly AI assistant."
-def chat_stream(user_message, chat_history, system_prompt, temperature, top_p):
-    messages = [{"role": "system", "content": system_prompt}]
-    for human, ai in chat_history:
-        messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": ai})
-    messages.append({"role": "user", "content": user_message})
-    chat_history.append([user_message, ""])
-    stream = llm.create_chat_completion(
-        messages=messages,
-        temperature=temperature,
-        top_p=top_p,
-        max_tokens=1024,
-        stream=True,
-    )
-    for chunk in stream:
-        delta = chunk['choices'][0]['delta']
-        if 'content' in delta:
-            chat_history[-1][1] += delta['content']
-            yield chat_history
-# --- 3. ADVANCED GRADIO UI ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="#chatbot { min-height: 600px; }") as demo:
-    gr.Markdown("## 🐬 Dolphin 3.0 - Upgraded Chat Interface")
-    with gr.Row():
-        with gr.Column(scale=4):
-            chatbot = gr.Chatbot(
-                [],
-                elem_id="chatbot",
-                bubble_full_width=False,
-                avatar_images=(("human.png", "dolphin.png")),
-                label="Chat with Dolphin 3.0"
-            )
-            chat_history = gr.State([])
-            with gr.Row():
-                message = gr.Textbox(
-                    label="Type your message here...",
-                    placeholder="What's on your mind?",
-                    lines=1,
-                    scale=7,
-                )
-                send_button = gr.Button("Send", variant="primary", scale=1)
-        with gr.Column(scale=1):
-            with gr.Accordion("Advanced Settings", open=False):
-                system_prompt = gr.Textbox(value=default_system_prompt, label="System Prompt", lines=3)
-                temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, label="Temperature")
-                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p")
-            clear_button = gr.Button("🗑️ Clear Chat")
-            regenerate_button = gr.Button("🔄 Regenerate")
-    # --- 4. EVENT HANDLERS ---
-    def user_submit(user_message, history, system, temp, top_p):
-        yield gr.update(value=""), history + [[user_message, None]]
-        for updated_history in chat_stream(user_message, history, system, temp, top_p):
-             yield gr.update(value=""), updated_history
-    message.submit(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot])
-    send_button.click(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot])
-    def clear_chat():
-        return [], []
-    clear_button.click(clear_chat, [], [chatbot, chat_history], queue=False)
-    def regenerate_response(history, system, temp, top_p):
-        if not history:
-            return
-        last_user_message = history[-1][0]
-        reduced_history = history[:-1]
-        for updated_history in chat_stream(last_user_message, reduced_history, system, temp, top_p):
-            yield updated_history
-    regenerate_button.click(
-        regenerate_response,
-        [chat_history, system_prompt, temperature, top_p],
-        [chatbot]
     )
-# --- 5. LAUNCH THE APP (WITH THE BUG FIX) ---
-if __name__ == "__main__":
-    demo.queue()
-    # The show_api=False parameter tells Gradio to not build the API page, avoiding the bug.
-    demo.launch(debug=True, show_api=False)

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# Download the model from Hugging Face Hub
 print("===== Downloading model... =====")
 model_path = hf_hub_download(
     repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
 )
 print(f"Model downloaded to: {model_path}")
+# Load the model with llama-cpp-python
 print("===== Loading model... =====")
 llm = Llama(
     model_path=model_path,
+    n_ctx=2048,          # adjust as per RAM
+    n_threads=8          # adjust based on your Space CPU (8 is good default)
 )
 print("Model loaded.")
+# Chat function with Dolphin 3.0 template
+def chat(user_input):
+    print(f"User input: {user_input}")
+    full_prompt = f"""### System:
+You are Dolphin 3.0, a helpful and friendly AI assistant.
+### User:
+{user_input}
+### Assistant:"""
+    output = llm(
+        full_prompt,
+        max_tokens=512,
+        stop=["</s>", "### User:", "### Assistant:"]
     )
+    reply = output['choices'][0]['text'].strip()
+    print(f"Model reply: {reply}")
+    return reply
+# Gradio UI
+iface = gr.Interface(
+    fn=chat,
+    inputs="text",
+    outputs="text",
+    title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
+    description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
+)
+iface.launch()