mybot / app.py
vinuajeesh's picture
Update app.py
633a2af verified
raw
history blame
2.64 kB
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os
# --- 1. Model Downloading (No changes) ---
print("===== Downloading model... =====")
model_path = hf_hub_download(
repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf"
)
print(f"Model downloaded to: {model_path}")
# --- 2. Model Loading (Optimized for HF Space CPU) ---
print("===== Loading model... =====")
# This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
n_threads = os.cpu_count()
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=n_threads,
# --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
# Set n_gpu_layers to 0. Free Spaces run on CPUs.
# Setting this to a non-zero value will cause errors without a GPU upgrade.
n_gpu_layers=0
)
print(f"Model loaded for CPU execution with {n_threads} threads.")
# --- 3. Chat Function with Streaming (Still the best!) ---
def chat(message, history):
# This function remains the same, as streaming is even MORE important on a slower CPU!
history_prompt = ""
for user_msg, assistant_msg in history:
history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
full_prompt = f"""### System:
You are Dolphin 3.0, a helpful and friendly AI assistant.
{history_prompt}### User:
{message}
### Assistant:"""
stream = llm(
full_prompt,
max_tokens=1024,
stop=["</s>", "### User:", "### Assistant:"],
stream=True
)
partial_message = ""
for output in stream:
token = output['choices'][0]['text']
partial_message += token
yield partial_message
# --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
iface = gr.ChatInterface(
fn=chat,
title="🐬 Dolphin 3.0 on Hugging Face Spaces",
description="A sleek, streaming chat interface running on a CPU Space.",
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
theme="soft",
examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear Chat",
)
# --- Pro-Tip: Create a requirements.txt file for your Space! ---
# Your Space needs to know what libraries to install. Create a file
# named `requirements.txt` in your repository with the following lines:
#
# gradio
# llama-cpp-python
# huggingface_hub
if __name__ == "__main__":
iface.launch()