import gradio as gr
from huggingface_hub import InferenceClient
from transformers import (
    AutoProcessor, LlavaForConditionalGeneration,
    FuyuForCausalLM,
)
import torch
from PIL import Image

# === 1. Chat Model ===
chat_client = InferenceClient("openchat/openchat-3.5-1210")

# === 2. LLaVA 1.6 (Mistral) ===
llava_proc = AutoProcessor.from_pretrained("llava-hf/llava-1.6-mistral-7b-hf")
llava_model = LlavaForConditionalGeneration.from_pretrained(
    "llava-hf/llava-1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto"
)

# === 3. Fuyu ===
fuyu_proc = AutoProcessor.from_pretrained("adept/fuyu-8b")
fuyu_model = FuyuForCausalLM.from_pretrained(
    "adept/fuyu-8b", torch_dtype=torch.float16, device_map="auto"
)

# === 4. Gabungan Multi-Model Handler ===
def smart_respond(message, history, image=None):
    if image:
        results = []

        # — LLaVA
        try:
            inputs = llava_proc(text=message, images=image, return_tensors="pt").to("cuda")
            output = llava_model.generate(**inputs, max_new_tokens=512)
            llava_reply = llava_proc.decode(output[0], skip_special_tokens=True)
            results.append(f"🦙 **LLaVA 1.6**:\n{llava_reply}")
        except Exception as e:
            results.append(f"LLaVA error: {e}")

        # — Fuyu
        try:
            fuyu_inputs = fuyu_proc(images=image, text=message, return_tensors="pt").to("cuda")
            output = fuyu_model.generate(**fuyu_inputs, max_new_tokens=512)
            fuyu_reply = fuyu_proc.decode(output[0], skip_special_tokens=True)
            results.append(f"🧠 **Fuyu**:\n{fuyu_reply}")
        except Exception as e:
            results.append(f"Fuyu error: {e}")

        yield "\n\n---\n\n".join(results)

    else:
        # === Chat teks via OpenChat ===
        messages = [{"role": "system", "content": "Elaina adalah AI ramah dalam bahasa Indonesia."}]
        for user, bot in history:
            if user: messages.append({"role": "user", "content": user})
            if bot: messages.append({"role": "assistant", "content": bot})
        messages.append({"role": "user", "content": message})

        response = ""
        for chunk in chat_client.chat_completion(messages, max_tokens=512, stream=True):
            token = chunk.choices[0].delta.content
            response += token
            yield response

# === 5. Gradio Interface (ChatGPT-style) ===
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Elaina AI — Teks + Gambar (LLaVA + Fuyu + OpenChat)")
    chatbot = gr.Chatbot()
    state = gr.State([])

    with gr.Row():
        msg = gr.Textbox(placeholder="Ketik pesan kamu...", scale=4)
        img = gr.Image(type="pil", label="(Opsional) Tambahkan gambar")

    btn = gr.Button("Kirim")

    def user_submit(message, image, history):
        history = history or []
        history.append((message, None))
        return "", history, image

    def bot_response(history, image):
        user_msg = history[-1][0]
        for result in smart_respond(user_msg, history[:-1], image):
            yield history[:-1] + [(user_msg, result)]

    btn.click(user_submit, [msg, img, state], [msg, state, img]) \
       .then(bot_response, state, chatbot) \
       .then(lambda x: x, chatbot, state)

# === 6. Jalankan ===
if __name__ == "__main__":
    demo.launch()