Spaces:
Running
Running
File size: 1,237 Bytes
fd1145f e347bc6 359a761 fd1145f e347bc6 fd1145f e4cfecd e347bc6 359a761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import gradio as gr
# Automatically download model to local path
model_path = hf_hub_download(
repo_id="TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
filename="openhermes-2.5-mistral-7b.Q4_K_M.gguf"
)
# Load the model
llm = Llama(model_path=model_path, n_ctx=2048)
def chatbot_response(message, history):
# Format history for chat template
formatted_history = ""
for user, bot in history[-4:]: # Last 4 turns
formatted_history += f"<|user|>\n{user}<|end|>\n<|assistant|>\n{bot}<|end|>\n"
prompt = formatted_history + f"<|user|>\n{message}<|end|>\n<|assistant|>\n"
try:
output = llm(prompt, max_tokens=256, stop=["<|end|>"], temperature=0.7)
text = output["choices"][0]["text"].strip()
return text
except Exception as e:
return f"Error: {e}"
demo = gr.ChatInterface(
fn=chatbot_response,
title="🧠 Free CPU Chatbot (OpenHermes-2.5)",
description="A lightweight, high-quality chatbot that runs on CPU using llama.cpp",
theme="soft",
examples=["What's your name?", "Tell me a joke", "What is Python used for?"]
)
if __name__ == "__main__":
demo.launch(share=True)
|