import spaces
import torch
import gradio as gr
from PIL import Image
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from functools import lru_cache

MODEL_ID = "unsloth/Qwen2.5-VL-3B-Instruct"

@lru_cache(maxsize=1)
def _load_model():
    """Load and cache the model and processor inside GPU worker."""
    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16
    ).to("cuda")

    adapter_path = "thangvip/qwen-2.5-vl-3b-lora-brainrot-new"
    model.load_adapter(adapter_path)
    
    processor = AutoProcessor.from_pretrained(MODEL_ID)
    return model, processor

@spaces.GPU
def gpu_inference(image_path: str, prompt: str) -> str:
    """Perform inference entirely in GPU subprocess."""
    model, processor = _load_model()

    # Load and preprocess image
    image = Image.open(image_path).convert("RGB")
    if image.width > 512:
        ratio = image.height / image.width
        image = image.resize((512, int(512 * ratio)), Image.Resampling.LANCZOS)

    # Build conversation
    system_msg = (
            "You are BrainRot Bot.\n"
    )
    conversation = [
        {"role": "system", "content": [{"type": "text", "text": system_msg}]},
        {"role": "user", "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": prompt}
        ]}
    ]

    # Tokenize, generate, decode
    chat_input = processor.apply_chat_template(
        conversation, tokenize=False, add_generation_prompt=True
    )
    inputs = processor(text=[chat_input], images=[image], return_tensors="pt").to("cuda")
    output_ids = model.generate(**inputs, max_new_tokens=1024)
    decoded = processor.batch_decode(
        output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]

    # Extract assistant portion
    return decoded.split("assistant", 1)[-1].strip().lstrip(":").strip()

# Message handling

def add_message(history, user_input):
    if history is None:
        history = []
    for f in user_input.get("files", []):
        history.append({"role": "user", "content": (f,)})
    text = user_input.get("text", "")
    if text:
        history.append({"role": "user", "content": text})
    return history, gr.MultimodalTextbox(value=None)


def inference_interface(history):
    if not history:
        return history, gr.MultimodalTextbox(value=None)
    # Last user text
    user_text = next(
        (m["content"] for m in reversed(history)
         if m["role"] == "user" and isinstance(m["content"], str)),
        None
    )
    if user_text is None:
        return history, gr.MultimodalTextbox(value=None)
    # Last user image
    image_path = next(
        (m["content"][0] for m in reversed(history)
         if m["role"] == "user" and isinstance(m["content"], tuple)),
        None
    )
    if image_path is None:
        return history, gr.MultimodalTextbox(value=None)

    # GPU inference
    reply = gpu_inference(image_path, user_text)
    history.append({"role": "assistant", "content": reply})
    return history, gr.MultimodalTextbox(value=None)


def build_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# qwen-2.5-vl-3b-lora-brr\n Ask me anything about brainrot meme")
        chatbot = gr.Chatbot([], type="messages", label="Conversation")
        chat_input = gr.MultimodalTextbox(
            interactive=True,
            file_types=["image"],
            placeholder="Enter text and upload an image.",
            show_label=True
        )
        submit_evt = chat_input.submit(
            add_message, [chatbot, chat_input], [chatbot, chat_input]
        )
        submit_evt.then(
            inference_interface, [chatbot], [chatbot, chat_input]
        )
        with gr.Row():
            send_btn = gr.Button("Send")
            clear_btn = gr.ClearButton([chatbot, chat_input])
        send_click = send_btn.click(
            add_message, [chatbot, chat_input], [chatbot, chat_input]
        )
        send_click.then(
            inference_interface, [chatbot], [chatbot, chat_input]
        )
    return demo


if __name__ == "__main__":
    demo = build_demo()
    demo.launch(share=True)