Spaces:

lordolddog87
/

sob

Running

sob / app.py

Update app.py

359a761 verified 2 months ago

1.24 kB

	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import gradio as gr

	# Automatically download model to local path
	model_path = hf_hub_download(
	repo_id="TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
	filename="openhermes-2.5-mistral-7b.Q4_K_M.gguf"
	)

	# Load the model
	llm = Llama(model_path=model_path, n_ctx=2048)



	def chatbot_response(message, history):
	# Format history for chat template
	formatted_history = ""
	for user, bot in history[-4:]: # Last 4 turns
	formatted_history += f"<\|user\|>\n{user}<\|end\|>\n<\|assistant\|>\n{bot}<\|end\|>\n"
	prompt = formatted_history + f"<\|user\|>\n{message}<\|end\|>\n<\|assistant\|>\n"

	try:
	output = llm(prompt, max_tokens=256, stop=["<\|end\|>"], temperature=0.7)
	text = output["choices"][0]["text"].strip()
	return text
	except Exception as e:
	return f"Error: {e}"

	demo = gr.ChatInterface(
	fn=chatbot_response,
	title="🧠 Free CPU Chatbot (OpenHermes-2.5)",
	description="A lightweight, high-quality chatbot that runs on CPU using llama.cpp",
	theme="soft",
	examples=["What's your name?", "Tell me a joke", "What is Python used for?"]
	)

	if __name__ == "__main__":
	demo.launch(share=True)