diff2lip / app.py
darshankr's picture
Update app.py
6c27de5 verified
import gradio as gr
import subprocess
import os
import cv2
import numpy as np
# Paths and Model Config
sample_mode = "cross" # "reconstruction" or "cross"
model_path = "checkpoints/checkpoint.pt"
pads = "0,0,0,0"
generate_from_filelist = 0 # 0 means real-time generation
def process_video(audio_path, video_path):
# Step 1: Check if input files exist
audio_exists = os.path.exists(audio_path)
video_exists = os.path.exists(video_path)
print(f"Audio exists: {audio_exists}, Video exists: {video_exists}")
if not (audio_exists and video_exists):
return "Error: One or both input files do not exist."
# Set flags based on sample mode
if sample_mode == "reconstruction":
sample_input_flags = "--sampling_input_type=first_frame --sampling_ref_type=first_frame"
elif sample_mode == "cross":
sample_input_flags = "--sampling_input_type=gt --sampling_ref_type=gt"
else:
return "Error: sample_mode can only be 'cross' or 'reconstruction'"
# Model flags and configurations
MODEL_FLAGS = "--attention_resolutions 32,16,8 --class_cond False --learn_sigma True --num_channels 128 --num_head_channels 64 --num_res_blocks 2 --resblock_updown True --use_fp16 True --use_scale_shift_norm False"
DIFFUSION_FLAGS = "--predict_xstart False --diffusion_steps 1000 --noise_schedule linear --rescale_timesteps False"
SAMPLE_FLAGS = f"--sampling_seed=7 {sample_input_flags} --timestep_respacing ddim25 --use_ddim True --model_path={model_path}"
DATA_FLAGS = "--nframes 5 --nrefer 1 --image_size 128 --sampling_batch_size=32"
TFG_FLAGS = "--face_hide_percentage 0.5 --use_ref=True --use_audio=True --audio_as_style=True"
GEN_FLAGS = f"--generate_from_filelist {generate_from_filelist} --video_path={video_path} --audio_path={audio_path} --out_path=output.mp4 --save_orig=False --face_det_batch_size 16 --pads {pads} --is_voxceleb2=False"
# Step 2: Combine all flags into one command
command = f"python your_model_script.py {MODEL_FLAGS} {DIFFUSION_FLAGS} {SAMPLE_FLAGS} {DATA_FLAGS} {TFG_FLAGS} {GEN_FLAGS}"
print(f"Running command: {command}")
# Step 3: Execute the command and capture output
result = subprocess.run(command, shell=True, capture_output=True, text=True)
print("STDOUT:", result.stdout)
print("STDERR:", result.stderr)
if result.returncode != 0:
return f"Error during video generation: {result.stderr}"
# Step 4: Verify that the output video is generated correctly
if not os.path.exists("output.mp4"):
return "Error: Output video not generated."
print("Video generation successful!")
return "output.mp4"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("### Upload an Audio and Video file to generate an output video.")
audio_input = gr.Audio(label="Upload Audio", type="filepath")
video_input = gr.Video(label="Upload Video")
output_video = gr.Video(label="Generated Video")
create_test_video() # Run the test video function once to ensure setup is correct
def inference(audio, video):
result = process_video(audio, video)
if result.endswith(".mp4"):
return result # Return path to the generated video
else:
return f"Error: {result}" # Display any errors
gr.Interface(
fn=inference,
inputs=[audio_input, video_input],
outputs=output_video
).launch()