Spaces:

alphaoumardev
/

Summerpro

Runtime error

App Files Files Community

Summerpro / app.py

alphaoumardev

Update app.py

51e03da verified 10 days ago

raw

history blame contribute delete

1.92 kB

	import os
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Get the HF token from environment
	hf_token = os.getenv("HUGGINGFACE_TOKEN")

	# Your fine-tuned model
	model_id = "alphaoumardev/Llama3-8B-noryu-instruct"

	# Authenticate with token when loading tokenizer/model
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
	model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
	model.eval()

	# Device setup
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	def chat(user_input, history=[]):
	history.append({"role": "user", "content": user_input})

	# Format the prompt
	prompt = ""
	for turn in history:
	role = turn["role"]
	content = turn["content"]
	prompt += f"{role}: {content}\n"
	prompt += "assistant:"

	# Tokenize and generate
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)

	output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	assistant_reply = output_text.split("assistant:")[-1].strip()
	history.append({"role": "assistant", "content": assistant_reply})

	# Gradio expects tuple list format for Chatbot display
	chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
	return chat_history, history

	# Gradio Blocks UI
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	state = gr.State([]) # memory of the conversation
	txt = gr.Textbox(show_label=False, placeholder="Type your message...")

	txt.submit(chat, [txt, state], [chatbot, state])

	demo.launch()