import gradio as gr from huggingface_hub import InferenceClient from transformers import ( AutoProcessor, LlavaForConditionalGeneration, FuyuForCausalLM, ) import torch from PIL import Image # === 1. Chat Model === chat_client = InferenceClient("openchat/openchat-3.5-1210") # === 2. LLaVA 1.6 (Mistral) === llava_proc = AutoProcessor.from_pretrained("llava-hf/llava-1.6-mistral-7b-hf") llava_model = LlavaForConditionalGeneration.from_pretrained( "llava-hf/llava-1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto" ) # === 3. Fuyu === fuyu_proc = AutoProcessor.from_pretrained("adept/fuyu-8b") fuyu_model = FuyuForCausalLM.from_pretrained( "adept/fuyu-8b", torch_dtype=torch.float16, device_map="auto" ) # === 4. Gabungan Multi-Model Handler === def smart_respond(message, history, image=None): if image: results = [] # — LLaVA try: inputs = llava_proc(text=message, images=image, return_tensors="pt").to("cuda") output = llava_model.generate(**inputs, max_new_tokens=512) llava_reply = llava_proc.decode(output[0], skip_special_tokens=True) results.append(f"🦙 **LLaVA 1.6**:\n{llava_reply}") except Exception as e: results.append(f"LLaVA error: {e}") # — Fuyu try: fuyu_inputs = fuyu_proc(images=image, text=message, return_tensors="pt").to("cuda") output = fuyu_model.generate(**fuyu_inputs, max_new_tokens=512) fuyu_reply = fuyu_proc.decode(output[0], skip_special_tokens=True) results.append(f"🧠 **Fuyu**:\n{fuyu_reply}") except Exception as e: results.append(f"Fuyu error: {e}") yield "\n\n---\n\n".join(results) else: # === Chat teks via OpenChat === messages = [{"role": "system", "content": "Elaina adalah AI ramah dalam bahasa Indonesia."}] for user, bot in history: if user: messages.append({"role": "user", "content": user}) if bot: messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": message}) response = "" for chunk in chat_client.chat_completion(messages, max_tokens=512, stream=True): token = chunk.choices[0].delta.content response += token yield response # === 5. Gradio Interface (ChatGPT-style) === with gr.Blocks() as demo: gr.Markdown("## 🤖 Elaina AI — Teks + Gambar (LLaVA + Fuyu + OpenChat)") chatbot = gr.Chatbot() state = gr.State([]) with gr.Row(): msg = gr.Textbox(placeholder="Ketik pesan kamu...", scale=4) img = gr.Image(type="pil", label="(Opsional) Tambahkan gambar") btn = gr.Button("Kirim") def user_submit(message, image, history): history = history or [] history.append((message, None)) return "", history, image def bot_response(history, image): user_msg = history[-1][0] for result in smart_respond(user_msg, history[:-1], image): yield history[:-1] + [(user_msg, result)] btn.click(user_submit, [msg, img, state], [msg, state, img]) \ .then(bot_response, state, chatbot) \ .then(lambda x: x, chatbot, state) # === 6. Jalankan === if __name__ == "__main__": demo.launch()