Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import spaces | |
model_id = "microsoft/phi-2" | |
device = "cuda" # ZeroGPU space | |
precision = torch.float16 | |
# Load tokenizer & model | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, torch_dtype=precision | |
).to(device) | |
def respond(message, history): | |
# history comes in as a list of (user, bot) tuples | |
history = history or [] | |
prompt = "" | |
for u, b in history: | |
prompt += f"User: {u}\nAssistant: {b}\n" | |
prompt += f"User: {message}\nAssistant:" | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=256, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.95, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
response = decoded[len(prompt):].strip().split("\n")[0] | |
# append to history as a tuple of two plain strings | |
history.append([message, response]) | |
# return "" to clear the textbox, and the updated history for the Chatbot | |
return "", history | |
with gr.Blocks() as demo: | |
gr.Markdown("## Phi-2 Chatbot (ZeroGPU‑safe)") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label="Your message") | |
clear = gr.Button("Clear") | |
# on submit: feed (msg, history) → (clear box, updated history) | |
msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.launch(share=True) |