import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the model and tokenizer
model_name = "deepseek-ai/deepseek-llm-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

# Define chat function
def chat(message, history):
    inputs = tokenizer.apply_chat_template(
        history + [{"role": "user", "content": message}],
        return_tensors="pt"
    ).to(model.device)
    outputs = model.generate(inputs, max_new_tokens=256, do_sample=True)
    reply = tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1]
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": reply})
    return reply, history

# UI with Gradio
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    state = gr.State([])
    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
        send = gr.Button("Send")

    def respond(message, history):
        reply, updated_history = chat(message, history)
        return updated_history, updated_history

    send.click(respond, [msg, state], [chatbot, state])
    msg.submit(respond, [msg, state], [chatbot, state])

demo.launch()