Spaces:

ar0551
/

Simple_Chatbot

Sleeping

File size: 1,630 Bytes

48bd006
 
 
381bccf
48bd006
edad343
381bccf
 
0085f71
31adafd
21ea286
edad343
31adafd
a474927
 
381bccf
31adafd
 
48bd006
 
31adafd
 
1b182bc
48bd006
6ebe49c
edad343
 
 
 
 
 
 
 
0085f71
edad343
9ee0060
 
06e9c5a
9ee0060

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import spaces

model_id = "microsoft/phi-2"
device = "cuda"  # ZeroGPU space
precision = torch.float16

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=precision
).to(device)

@spaces.GPU
def respond(message, history):
    # history comes in as a list of (user, bot) tuples
    history = history or []
    prompt = ""
    for u, b in history:
        prompt += f"User: {u}\nAssistant: {b}\n"
    prompt += f"User: {message}\nAssistant:"

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id,
    )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = decoded[len(prompt):].strip().split("\n")[0]

    # append to history as a tuple of two plain strings
    history.append([message, response])
    # return "" to clear the textbox, and the updated history for the Chatbot
    return "", history

with gr.Blocks() as demo:
    gr.Markdown("## Phi-2 Chatbot (ZeroGPU‑safe)")
    chatbot = gr.Chatbot()
    msg     = gr.Textbox(label="Your message")
    clear   = gr.Button("Clear")

    # on submit: feed (msg, history) → (clear box, updated history)
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(share=True)