import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Model ID on Hugging Face Hub model_id = "Phonepadith/aidc-llm-laos-10k-gemma-3-4b-it" # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto" ) # Chat function def chat_fn(message, history): # Format chat history for model prompt = "" for user, bot in history: prompt += f"User: {user}\nAssistant: {bot}\n" prompt += f"User: {message}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=200, do_sample=True, top_p=0.9, temperature=0.7 ) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only assistant's new response reply = reply.split("Assistant:")[-1].strip() history.append((message, reply)) return history, history # Create Gradio Chat UI chatbot = gr.Chatbot() demo = gr.ChatInterface( fn=chat_fn, chatbot=chatbot, title="💬 Lao Chatbot - Gemma 3 4B IT Fine-tuned", description="Chat in Lao with the fine-tuned `Phonepadith/aidc-llm-laos-10k-gemma-3-4b-it` model.", theme="soft" ) if __name__ == "__main__": demo.launch()