File size: 1,630 Bytes
48bd006
 
 
381bccf
48bd006
edad343
381bccf
 
0085f71
31adafd
21ea286
edad343
31adafd
a474927
 
381bccf
31adafd
 
48bd006
 
31adafd
 
1b182bc
48bd006
6ebe49c
edad343
 
 
 
 
 
 
 
0085f71
edad343
9ee0060
 
06e9c5a
9ee0060
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import spaces

model_id = "microsoft/phi-2"
device = "cuda"  # ZeroGPU space
precision = torch.float16

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype=precision
).to(device)

@spaces.GPU
def respond(message, history):
    # history comes in as a list of (user, bot) tuples
    history = history or []
    prompt = ""
    for u, b in history:
        prompt += f"User: {u}\nAssistant: {b}\n"
    prompt += f"User: {message}\nAssistant:"

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id,
    )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = decoded[len(prompt):].strip().split("\n")[0]

    # append to history as a tuple of two plain strings
    history.append([message, response])
    # return "" to clear the textbox, and the updated history for the Chatbot
    return "", history

with gr.Blocks() as demo:
    gr.Markdown("## Phi-2 Chatbot (ZeroGPU‑safe)")
    chatbot = gr.Chatbot()
    msg     = gr.Textbox(label="Your message")
    clear   = gr.Button("Clear")

    # on submit: feed (msg, history) → (clear box, updated history)
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(share=True)