import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Model ID on Hugging Face Hub
model_id = "Phonepadith/aidc-llm-laos-10k-gemma-3-4b-it"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Chat function
def chat_fn(message, history):
    # Format chat history for model
    prompt = ""
    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        top_p=0.9,
        temperature=0.7
    )
    reply = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only assistant's new response
    reply = reply.split("Assistant:")[-1].strip()
    history.append((message, reply))
    return history, history

# Create Gradio Chat UI
chatbot = gr.Chatbot()
demo = gr.ChatInterface(
    fn=chat_fn,
    chatbot=chatbot,
    title="💬 Lao Chatbot - Gemma 3 4B IT Fine-tuned",
    description="Chat in Lao with the fine-tuned `Phonepadith/aidc-llm-laos-10k-gemma-3-4b-it` model.",
    theme="soft"
)

if __name__ == "__main__":
    demo.launch()