import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os

# --- 1. Model Downloading (No changes) ---
print("===== Downloading model... =====")
model_path = hf_hub_download(
    repo_id="RichardErkhov/openai-community_-_gpt2-xl-gguf",
    filename="gpt2-xl.Q6_K.gguf"
)
print(f"Model downloaded to: {model_path}")

# --- 2. Model Loading (Optimized for HF Space CPU) ---
print("===== Loading model... =====")
n_threads = os.cpu_count()
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=n_threads,
    n_gpu_layers=0
)
print(f"Model loaded for CPU execution with {n_threads} threads.")


# --- 3. Chat Function with Streaming (No changes) ---
def chat(message, history):
    history_prompt = ""
    for user_msg, assistant_msg in history:
        history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"

    full_prompt = f"""### System:
You are Dolphin 3.0, a helpful and friendly AI assistant.

{history_prompt}### User:
{message}

### Assistant:"""

    stream = llm(
        full_prompt,
        max_tokens=1024,
        stop=["</s>", "### User:", "### Assistant:"],
        stream=True
    )

    partial_message = ""
    for output in stream:
        token = output['choices'][0]['text']
        partial_message += token
        yield partial_message

# --- 4. The Enhanced Chatbot UI (MAXIMUM COMPATIBILITY) ---
# We are removing ALL custom button arguments to ensure this works on older Gradio versions.
# Gradio will add the default 'Undo' and 'Clear' buttons for us.
iface = gr.ChatInterface(
    fn=chat,
    title="🐬 Dolphin 3.0 on Hugging Face Spaces",
    description="A sleek, streaming chat interface running on a CPU Space.",
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
    theme="soft",
    examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
    cache_examples=False,
)


if __name__ == "__main__":
    iface.launch()