Simple_Chatbot / app.py
ar0551's picture
Update app.py
06e9c5a verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import spaces
model_id = "microsoft/phi-2"
device = "cuda" # ZeroGPU space
precision = torch.float16
# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id, torch_dtype=precision
).to(device)
@spaces.GPU
def respond(message, history):
# history comes in as a list of (user, bot) tuples
history = history or []
prompt = ""
for u, b in history:
prompt += f"User: {u}\nAssistant: {b}\n"
prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
eos_token_id=tokenizer.eos_token_id,
)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = decoded[len(prompt):].strip().split("\n")[0]
# append to history as a tuple of two plain strings
history.append([message, response])
# return "" to clear the textbox, and the updated history for the Chatbot
return "", history
with gr.Blocks() as demo:
gr.Markdown("## Phi-2 Chatbot (ZeroGPU‑safe)")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your message")
clear = gr.Button("Clear")
# on submit: feed (msg, history) → (clear box, updated history)
msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(share=True)