Spaces:
Runtime error
Runtime error
File size: 1,921 Bytes
51e03da 6331ef2 ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 51e03da ff380d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Get the HF token from environment
hf_token = os.getenv("HUGGINGFACE_TOKEN")
# Your fine-tuned model
model_id = "alphaoumardev/Llama3-8B-noryu-instruct"
# Authenticate with token when loading tokenizer/model
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
model.eval()
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def chat(user_input, history=[]):
history.append({"role": "user", "content": user_input})
# Format the prompt
prompt = ""
for turn in history:
role = turn["role"]
content = turn["content"]
prompt += f"{role}: {content}\n"
prompt += "assistant:"
# Tokenize and generate
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_reply = output_text.split("assistant:")[-1].strip()
history.append({"role": "assistant", "content": assistant_reply})
# Gradio expects tuple list format for Chatbot display
chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
return chat_history, history
# Gradio Blocks UI
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
state = gr.State([]) # memory of the conversation
txt = gr.Textbox(show_label=False, placeholder="Type your message...")
txt.submit(chat, [txt, state], [chatbot, state])
demo.launch()
|