Spaces:
Sleeping
Sleeping
import os | |
import uuid | |
import subprocess | |
from pathlib import Path | |
import gradio as gr | |
from PIL import Image | |
from pydub import AudioSegment | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1. Download Wav2Lip model checkpoint | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
MODEL_PATH = Path("wav2lip_gan.pth") | |
MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth" | |
if not MODEL_PATH.exists(): | |
os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}") | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2. Preprocess image and audio (no cropping) | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
def preprocess(image, audio_file): | |
if image is None or audio_file is None: | |
raise ValueError("Both an image and an audio file are required.") | |
uid = uuid.uuid4().hex | |
img_path = f"{uid}.jpg" | |
wav_path = f"{uid}.wav" | |
out_path = f"{uid}_result.mp4" | |
image.save(img_path) | |
seg = AudioSegment.from_file(audio_file) | |
seg = seg.set_frame_rate(16000).set_channels(1) | |
seg.export(wav_path, format="wav") | |
return img_path, wav_path, out_path | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3. Main inference function | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
def generate(image, audio): | |
try: | |
img, wav, out_vid = preprocess(image, audio) | |
except Exception as e: | |
return f"β {e}" | |
try: | |
subprocess.run( | |
[ | |
"python", "inference.py", | |
"--checkpoint_path", str(MODEL_PATH), | |
"--face", img, | |
"--audio", wav, | |
"--outfile", out_vid, | |
"--resize_factor", "1", | |
"--pads", "0", "20", "0", "20", | |
"--fps", "25", | |
"--nosmooth" | |
], | |
check=True, | |
) | |
except subprocess.CalledProcessError as e: | |
return f"β Wav2Lip failed: {e}" | |
return out_vid if Path(out_vid).exists() else "β Generation failed." | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4. Gradio interface | |
# ββββββββββββββββββββββββββββββββββββββββββββββ | |
demo = gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"), | |
gr.Audio(type="filepath", label="Audio (any format)") | |
], | |
outputs=gr.Video(label="Talking-head MP4"), | |
title="π£οΈ High-Quality Wav2Lip (No Crop, Full Image)", | |
description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.", | |
allow_flagging="never", | |
live=True, | |
) | |
if __name__ == "__main__": | |
demo.launch() |