Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import os | |
# --- 1. Model Downloading (No changes) --- | |
print("===== Downloading model... =====") | |
model_path = hf_hub_download( | |
repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF", | |
filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf" | |
) | |
print(f"Model downloaded to: {model_path}") | |
# --- 2. Model Loading (Optimized for HF Space CPU) --- | |
print("===== Loading model... =====") | |
# This will correctly get the number of CPUs allocated to your Space (often 2 on free tier) | |
n_threads = os.cpu_count() | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=2048, | |
n_threads=n_threads, | |
# --- !! KEY CHANGE FOR HUGGING FACE SPACES !! --- | |
# Set n_gpu_layers to 0. Free Spaces run on CPUs. | |
# Setting this to a non-zero value will cause errors without a GPU upgrade. | |
n_gpu_layers=0 | |
) | |
print(f"Model loaded for CPU execution with {n_threads} threads.") | |
# --- 3. Chat Function with Streaming (Still the best!) --- | |
def chat(message, history): | |
# This function remains the same, as streaming is even MORE important on a slower CPU! | |
history_prompt = "" | |
for user_msg, assistant_msg in history: | |
history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n" | |
full_prompt = f"""### System: | |
You are Dolphin 3.0, a helpful and friendly AI assistant. | |
{history_prompt}### User: | |
{message} | |
### Assistant:""" | |
stream = llm( | |
full_prompt, | |
max_tokens=1024, | |
stop=["</s>", "### User:", "### Assistant:"], | |
stream=True | |
) | |
partial_message = "" | |
for output in stream: | |
token = output['choices'][0]['text'] | |
partial_message += token | |
yield partial_message | |
# --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) --- | |
iface = gr.ChatInterface( | |
fn=chat, | |
title="🐬 Dolphin 3.0 on Hugging Face Spaces", | |
description="A sleek, streaming chat interface running on a CPU Space.", | |
chatbot=gr.Chatbot(height=500), | |
textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7), | |
theme="soft", | |
examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]], | |
cache_examples=False, | |
retry_btn=None, | |
undo_btn="Delete Previous", | |
clear_btn="Clear Chat", | |
) | |
# --- Pro-Tip: Create a requirements.txt file for your Space! --- | |
# Your Space needs to know what libraries to install. Create a file | |
# named `requirements.txt` in your repository with the following lines: | |
# | |
# gradio | |
# llama-cpp-python | |
# huggingface_hub | |
if __name__ == "__main__": | |
iface.launch() |