import gradio as gr
import subprocess
import os
import cv2
import numpy as np

# Paths and Model Config
sample_mode = "cross"  # "reconstruction" or "cross"
model_path = "checkpoints/checkpoint.pt"
pads = "0,0,0,0"
generate_from_filelist = 0  # 0 means real-time generation

def process_video(audio_path, video_path):
    # Step 1: Check if input files exist
    audio_exists = os.path.exists(audio_path)
    video_exists = os.path.exists(video_path)
    print(f"Audio exists: {audio_exists}, Video exists: {video_exists}")
    
    if not (audio_exists and video_exists):
        return "Error: One or both input files do not exist."

    # Set flags based on sample mode
    if sample_mode == "reconstruction":
        sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
    elif sample_mode == "cross":
        sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
    else:
        return "Error: sample_mode can only be 'cross' or 'reconstruction'"

    # Model flags and configurations
    MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
    DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
    SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
    DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
    TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
    GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"

    # Step 2: Combine all flags into one command
    command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
    print(f"Running command: {command}")

    # Step 3: Execute the command and capture output
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)

    if result.returncode != 0:
        return f"Error during video generation: {result.stderr}"

    # Step 4: Verify that the output video is generated correctly
    if not os.path.exists("output.mp4"):
        return "Error: Output video not generated."
    
    print("Video generation successful!")
    return "output.mp4"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("### Upload an Audio and Video file to generate an output video.")
    
    audio_input = gr.Audio(label="Upload Audio", type="filepath")
    video_input = gr.Video(label="Upload Video")
    output_video = gr.Video(label="Generated Video")

    create_test_video()  # Run the test video function once to ensure setup is correct

    def inference(audio, video):
        result = process_video(audio, video)
        if result.endswith(".mp4"):
            return result  # Return path to the generated video
        else:
            return f"Error: {result}"  # Display any errors

    gr.Interface(
        fn=inference,
        inputs=[audio_input, video_input],
        outputs=output_video
    ).launch()