Spaces:

vinuajeesh
/

mybot

Runtime error

App Files Files Community

mybot / app.py

vinuajeesh

Update app.py

633a2af verified 2 months ago

raw

history blame

2.64 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import os

	# --- 1. Model Downloading (No changes) ---
	print("===== Downloading model... =====")
	model_path = hf_hub_download(
	repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
	filename="Dolphin3.0-Llama3.2-3B-Q4_K_M.gguf"
	)
	print(f"Model downloaded to: {model_path}")

	# --- 2. Model Loading (Optimized for HF Space CPU) ---
	print("===== Loading model... =====")

	# This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
	n_threads = os.cpu_count()

	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=n_threads,
	# --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
	# Set n_gpu_layers to 0. Free Spaces run on CPUs.
	# Setting this to a non-zero value will cause errors without a GPU upgrade.
	n_gpu_layers=0
	)
	print(f"Model loaded for CPU execution with {n_threads} threads.")


	# --- 3. Chat Function with Streaming (Still the best!) ---
	def chat(message, history):
	# This function remains the same, as streaming is even MORE important on a slower CPU!
	history_prompt = ""
	for user_msg, assistant_msg in history:
	history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"

	full_prompt = f"""### System:
	You are Dolphin 3.0, a helpful and friendly AI assistant.

	{history_prompt}### User:
	{message}

	### Assistant:"""

	stream = llm(
	full_prompt,
	max_tokens=1024,
	stop=["</s>", "### User:", "### Assistant:"],
	stream=True
	)

	partial_message = ""
	for output in stream:
	token = output['choices'][0]['text']
	partial_message += token
	yield partial_message

	# --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
	iface = gr.ChatInterface(
	fn=chat,
	title="🐬 Dolphin 3.0 on Hugging Face Spaces",
	description="A sleek, streaming chat interface running on a CPU Space.",
	chatbot=gr.Chatbot(height=500),
	textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
	theme="soft",
	examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
	cache_examples=False,
	retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear Chat",
	)

	# --- Pro-Tip: Create a requirements.txt file for your Space! ---
	# Your Space needs to know what libraries to install. Create a file
	# named `requirements.txt` in your repository with the following lines:
	#
	# gradio
	# llama-cpp-python
	# huggingface_hub

	if __name__ == "__main__":
	iface.launch()