Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import os | |
# --- 1. Model Downloading (No changes) --- | |
print("===== Downloading model... =====") | |
model_path = hf_hub_download( | |
repo_id="RichardErkhov/openai-community_-_gpt2-xl-gguf", | |
filename="gpt2-xl.Q6_K.gguf" | |
) | |
print(f"Model downloaded to: {model_path}") | |
# --- 2. Model Loading (Optimized for HF Space CPU) --- | |
print("===== Loading model... =====") | |
n_threads = os.cpu_count() | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=2048, | |
n_threads=n_threads, | |
n_gpu_layers=0 | |
) | |
print(f"Model loaded for CPU execution with {n_threads} threads.") | |
# --- 3. Chat Function with Streaming (No changes) --- | |
def chat(message, history): | |
history_prompt = "" | |
for user_msg, assistant_msg in history: | |
history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n" | |
full_prompt = f"""### System: | |
You are Dolphin 3.0, a helpful and friendly AI assistant. | |
{history_prompt}### User: | |
{message} | |
### Assistant:""" | |
stream = llm( | |
full_prompt, | |
max_tokens=1024, | |
stop=["</s>", "### User:", "### Assistant:"], | |
stream=True | |
) | |
partial_message = "" | |
for output in stream: | |
token = output['choices'][0]['text'] | |
partial_message += token | |
yield partial_message | |
# --- 4. The Enhanced Chatbot UI (MAXIMUM COMPATIBILITY) --- | |
# We are removing ALL custom button arguments to ensure this works on older Gradio versions. | |
# Gradio will add the default 'Undo' and 'Clear' buttons for us. | |
iface = gr.ChatInterface( | |
fn=chat, | |
title="🐬 Dolphin 3.0 on Hugging Face Spaces", | |
description="A sleek, streaming chat interface running on a CPU Space.", | |
chatbot=gr.Chatbot(height=500), | |
textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7), | |
theme="soft", | |
examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]], | |
cache_examples=False, | |
) | |
if __name__ == "__main__": | |
iface.launch() |