import os import uuid import subprocess from pathlib import Path import gradio as gr from PIL import Image from pydub import AudioSegment # ────────────────────────────────────────────── # 1. Download Wav2Lip model checkpoint # ────────────────────────────────────────────── MODEL_PATH = Path("wav2lip_gan.pth") MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth" if not MODEL_PATH.exists(): os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}") # ────────────────────────────────────────────── # 2. Preprocess image and audio (no cropping) # ────────────────────────────────────────────── def preprocess(image, audio_file): if image is None or audio_file is None: raise ValueError("Both an image and an audio file are required.") uid = uuid.uuid4().hex img_path = f"{uid}.jpg" wav_path = f"{uid}.wav" out_path = f"{uid}_result.mp4" image.save(img_path) seg = AudioSegment.from_file(audio_file) seg = seg.set_frame_rate(16000).set_channels(1) seg.export(wav_path, format="wav") return img_path, wav_path, out_path # ────────────────────────────────────────────── # 3. Main inference function # ────────────────────────────────────────────── def generate(image, audio): try: img, wav, out_vid = preprocess(image, audio) except Exception as e: return f"❌ {e}" try: subprocess.run( [ "python", "inference.py", "--checkpoint_path", str(MODEL_PATH), "--face", img, "--audio", wav, "--outfile", out_vid, "--resize_factor", "1", "--pads", "0", "20", "0", "20", "--fps", "25", "--nosmooth" ], check=True, ) except subprocess.CalledProcessError as e: return f"❌ Wav2Lip failed: {e}" return out_vid if Path(out_vid).exists() else "❌ Generation failed." # ────────────────────────────────────────────── # 4. Gradio interface # ────────────────────────────────────────────── demo = gr.Interface( fn=generate, inputs=[ gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"), gr.Audio(type="filepath", label="Audio (any format)") ], outputs=gr.Video(label="Talking-head MP4"), title="🗣️ High-Quality Wav2Lip (No Crop, Full Image)", description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.", allow_flagging="never", live=True, ) if __name__ == "__main__": demo.launch()