Spaces:

K00B404
/

llama_gguf

Sleeping

App Files Files Community

llama_gguf / app.py

K00B404

Update app.py

138c1ab verified 2 months ago

raw

history blame contribute delete

2.31 kB

	# filename: tinyllama_gradio_chatbot.py
	import os
	os.system("wget https://huggingface.co/bartowski/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-IQ2_M.gguf")
	from llama_cpp import Llama
	import gradio as gr
	# filename: llama_cpp_gradio_chatbot_darkidol.py

	import os
	import gradio as gr
	from llama_cpp import Llama

	# Constants
	MODEL_FILENAME = "DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-IQ2_M.gguf"
	MODEL_URL = f"https://huggingface.co/bartowski/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF/resolve/main/{MODEL_FILENAME}"
	SYSTEM_PROMPT = "You are a helpful, uncensored AI assistant who responds clearly and concisely."

	# Step 1: Ensure model is present
	if not os.path.exists(MODEL_FILENAME):
	print("Downloading model...")
	os.system(f"wget {MODEL_URL}")

	# Step 2: Load model
	llm = Llama(
	model_path="./"+MODEL_FILENAME,
	n_ctx=2048,
	n_threads=4,
	use_mlock=True
	)

	# Step 3: Prompt template using DarkIdol's format
	def format_prompt(user_input, history):
	chat_history = ""
	for user_msg, bot_reply in history:
	chat_history += f"<\|start_header_id\|>user<\|end_header_id\|>\n{user_msg}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n{bot_reply}<\|eot_id\|>\n"
	full_prompt = f"""<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>{SYSTEM_PROMPT}<\|eot_id\|>{chat_history}<\|start_header_id\|>user<\|end_header_id\|>\n{user_input}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n"""
	return full_prompt

	# Step 4: Chat logic
	def chat_with_llama(user_input, chat_history):
	prompt = format_prompt(user_input, chat_history)
	output = llm(prompt, max_tokens=256, stop=["<\|eot_id\|>"], echo=False)
	response = output["choices"][0]["text"].strip()
	return response

	# Step 5: Gradio UI
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Your message", placeholder="Type something...")
	clear = gr.Button("Clear")

	state = gr.State([])

	def user_submit(user_input, history):
	reply = chat_with_llama(user_input, history)
	history.append((user_input, reply))
	return "", history

	msg.submit(user_submit, [msg, state], [msg, chatbot])
	clear.click(lambda: ([], []), None, [state, chatbot])

	demo.launch()