Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
# --- 1. MODEL LOADING --- | |
print("===== Downloading model... =====") | |
model_path = hf_hub_download( | |
repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF", | |
filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf" | |
) | |
print(f"Model downloaded to: {model_path}") | |
print("===== Loading model... =====") | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=1096, | |
n_threads=8, | |
n_gpu_layers=0 | |
) | |
print("Model loaded.") | |
# --- 2. UPGRADED CHAT & STREAMING LOGIC --- | |
default_system_prompt = "You are Dolphin 3.0, a helpful and friendly AI assistant." | |
def chat_stream(user_message, chat_history, system_prompt, temperature, top_p): | |
messages = [{"role": "system", "content": system_prompt}] | |
for human, ai in chat_history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": ai}) | |
messages.append({"role": "user", "content": user_message}) | |
chat_history.append([user_message, ""]) | |
stream = llm.create_chat_completion( | |
messages=messages, | |
temperature=temperature, | |
top_p=top_p, | |
max_tokens=1024, | |
stream=True, | |
) | |
for chunk in stream: | |
delta = chunk['choices'][0]['delta'] | |
if 'content' in delta: | |
chat_history[-1][1] += delta['content'] | |
yield chat_history | |
# --- 3. ADVANCED GRADIO UI --- | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="#chatbot { min-height: 600px; }") as demo: | |
gr.Markdown("## π¬ Dolphin 3.0 - Upgraded Chat Interface") | |
with gr.Row(): | |
with gr.Column(scale=4): | |
chatbot = gr.Chatbot( | |
[], | |
elem_id="chatbot", | |
bubble_full_width=False, | |
avatar_images=(("human.png", "dolphin.png")), | |
label="Chat with Dolphin 3.0" | |
) | |
chat_history = gr.State([]) | |
with gr.Row(): | |
message = gr.Textbox( | |
label="Type your message here...", | |
placeholder="What's on your mind?", | |
lines=1, | |
scale=7, | |
) | |
send_button = gr.Button("Send", variant="primary", scale=1) | |
with gr.Column(scale=1): | |
with gr.Accordion("Advanced Settings", open=False): | |
system_prompt = gr.Textbox(value=default_system_prompt, label="System Prompt", lines=3) | |
temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, label="Temperature") | |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p") | |
clear_button = gr.Button("ποΈ Clear Chat") | |
regenerate_button = gr.Button("π Regenerate") | |
# --- 4. EVENT HANDLERS --- | |
def user_submit(user_message, history, system, temp, top_p): | |
yield gr.update(value=""), history + [[user_message, None]] | |
for updated_history in chat_stream(user_message, history, system, temp, top_p): | |
yield gr.update(value=""), updated_history | |
message.submit(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot]) | |
send_button.click(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot]) | |
def clear_chat(): | |
return [], [] | |
clear_button.click(clear_chat, [], [chatbot, chat_history], queue=False) | |
def regenerate_response(history, system, temp, top_p): | |
if not history: | |
return | |
last_user_message = history[-1][0] | |
reduced_history = history[:-1] | |
for updated_history in chat_stream(last_user_message, reduced_history, system, temp, top_p): | |
yield updated_history | |
regenerate_button.click( | |
regenerate_response, | |
[chat_history, system_prompt, temperature, top_p], | |
[chatbot] | |
) | |
# --- 5. LAUNCH THE APP (WITH THE BUG FIX) --- | |
if __name__ == "__main__": | |
demo.queue() | |
# The show_api=False parameter tells Gradio to not build the API page, avoiding the bug. | |
demo.launch(debug=True, show_api=False) |