Spaces:
Sleeping
Sleeping
File size: 1,630 Bytes
48bd006 381bccf 48bd006 edad343 381bccf 0085f71 31adafd 21ea286 edad343 31adafd a474927 381bccf 31adafd 48bd006 31adafd 1b182bc 48bd006 6ebe49c edad343 0085f71 edad343 9ee0060 06e9c5a 9ee0060 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import spaces
model_id = "microsoft/phi-2"
device = "cuda" # ZeroGPU space
precision = torch.float16
# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id, torch_dtype=precision
).to(device)
@spaces.GPU
def respond(message, history):
# history comes in as a list of (user, bot) tuples
history = history or []
prompt = ""
for u, b in history:
prompt += f"User: {u}\nAssistant: {b}\n"
prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
eos_token_id=tokenizer.eos_token_id,
)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = decoded[len(prompt):].strip().split("\n")[0]
# append to history as a tuple of two plain strings
history.append([message, response])
# return "" to clear the textbox, and the updated history for the Chatbot
return "", history
with gr.Blocks() as demo:
gr.Markdown("## Phi-2 Chatbot (ZeroGPU‑safe)")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your message")
clear = gr.Button("Clear")
# on submit: feed (msg, history) → (clear box, updated history)
msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(share=True) |