import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer import torch import threading # Model ringan untuk CPU Basic model_id = "microsoft/phi-2" # Load tokenizer & model tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True ) # Fungsi streaming def generate_reply(prompt): inputs = tokenizer(prompt, return_tensors="pt") streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=256, temperature=0.7, top_p=0.9, repetition_penalty=1.1 ) thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) thread.start() partial_text = "" for new_text in streamer: partial_text += new_text yield partial_text # Fungsi chatbot def chat_fn(message, history): prompt = "" for user, bot in history: prompt += f"User: {user}\nBot: {bot}\n" prompt += f"User: {message}\nBot:" reply = "" for token in generate_reply(prompt): reply = token.split("Bot:")[-1].strip() yield history + [(message, reply)], history + [(message, reply)] # URL logo rasmi dari Wikimedia logo_url = "https://kliacustoms.net/gudang/logo.jpg" # UI dengan tema biru with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="blue")) as demo: gr.HTML(f"""
Chatbot Eksperimen untuk simulasi kastam di KLIA