import gradio as gr
import os
from huggingface_hub import InferenceClient
from gradio_client import Client, handle_file


def infer(text_prompt, action_prompt, profile: gr.OAuthProfile, oauth_token: gr.OAuthToken):

    gradio_auth_token = oauth_token.token
    print(f"OAuth token: {oauth_token.token}")

    inf_client = InferenceClient(
        provider="fal-ai",
        api_key=gradio_auth_token
    )

    # output is a PIL.Image object
    image = inf_client.text_to_image(
        text_prompt,
        model="multimodalart/isometric-skeumorphic-3d-bnb",
    )

    import tempfile

    # Create a temporary PNG file
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
        temp_image_path = tmp_file.name
        image.save(temp_image_path, format="PNG")

    print(f"✅ Image saved temporarily at: {temp_image_path}")


    gr_client = Client("Lightricks/ltx-video-distilled", hf_token=gradio_auth_token)

    action_prompt = f"{action_prompt}, RBNBICN, icon, white background, isometric perspective"
    result = gr_client.predict(
		prompt=action_prompt,
		negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
		input_image_filepath=handle_file(temp_image_path),
		input_video_filepath=None,
		height_ui=512,
		width_ui=704,
		mode="image-to-video",
		duration_ui=2,
		ui_frames_to_use=9,
		seed_ui=42,
		randomize_seed=True,
		ui_guidance_scale=1,
		improve_texture_flag=True,
		api_name="/image_to_video"
    )
    print(result)
    video_path = result[0]['video']
    return temp_image_path, video_path

with gr.Blocks() as demo:

    with gr.Column():

        gr.LoginButton()

        text_prompt = gr.Textbox(label="Icon label")
        action_prompt = gr.Textbox(label="Action label")
        submit_btn = gr.Button("Submit")
        image_out = gr.Image()
        video_out = gr.Video()

    
    submit_btn.click(
        fn = infer,
        inputs = [text_prompt, action_prompt],
        outputs = [image_out, video_out]
    )


demo.launch(show_error=True)