import gradio as gr import subprocess import os import cv2 import numpy as np # Paths and Model Config sample_mode = "cross" # "reconstruction" or "cross" model_path = "checkpoints/checkpoint.pt" pads = "0,0,0,0" generate_from_filelist = 0 # 0 means real-time generation def process_video(audio_path, video_path): # Step 1: Check if input files exist audio_exists = os.path.exists(audio_path) video_exists = os.path.exists(video_path) print(f"Audio exists: {audio_exists}, Video exists: {video_exists}") if not (audio_exists and video_exists): return "Error: One or both input files do not exist." # Set flags based on sample mode if sample_mode == "reconstruction": sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame" elif sample_mode == "cross": sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt" else: return "Error: sample_mode can only be 'cross' or 'reconstruction'" # Model flags and configurations MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False" DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False" SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}" DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32" TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True" GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False" # Step 2: Combine all flags into one command command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}" print(f"Running command: {command}") # Step 3: Execute the command and capture output result = subprocess.run(command, shell=True, capture_output=True, text=True) print("STDOUT:", result.stdout) print("STDERR:", result.stderr) if result.returncode != 0: return f"Error during video generation: {result.stderr}" # Step 4: Verify that the output video is generated correctly if not os.path.exists("output.mp4"): return "Error: Output video not generated." print("Video generation successful!") return "output.mp4" # Gradio Interface with gr.Blocks() as demo: gr.Markdown("### Upload an Audio and Video file to generate an output video.") audio_input = gr.Audio(label="Upload Audio", type="filepath") video_input = gr.Video(label="Upload Video") output_video = gr.Video(label="Generated Video") create_test_video() # Run the test video function once to ensure setup is correct def inference(audio, video): result = process_video(audio, video) if result.endswith(".mp4"): return result # Return path to the generated video else: return f"Error: {result}" # Display any errors gr.Interface( fn=inference, inputs=[audio_input, video_input], outputs=output_video ).launch()