wav2lip_api / app.py
mich123geb's picture
Update app.py
4f314db verified
import os
import uuid
import subprocess
from pathlib import Path
import gradio as gr
from PIL import Image
from pydub import AudioSegment
# ──────────────────────────────────────────────
# 1. Download Wav2Lip model checkpoint
# ──────────────────────────────────────────────
MODEL_PATH = Path("wav2lip_gan.pth")
MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
if not MODEL_PATH.exists():
os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
# ──────────────────────────────────────────────
# 2. Preprocess image and audio (no cropping)
# ──────────────────────────────────────────────
def preprocess(image, audio_file):
if image is None or audio_file is None:
raise ValueError("Both an image and an audio file are required.")
uid = uuid.uuid4().hex
img_path = f"{uid}.jpg"
wav_path = f"{uid}.wav"
out_path = f"{uid}_result.mp4"
image.save(img_path)
seg = AudioSegment.from_file(audio_file)
seg = seg.set_frame_rate(16000).set_channels(1)
seg.export(wav_path, format="wav")
return img_path, wav_path, out_path
# ──────────────────────────────────────────────
# 3. Main inference function
# ──────────────────────────────────────────────
def generate(image, audio):
try:
img, wav, out_vid = preprocess(image, audio)
except Exception as e:
return f"❌ {e}"
try:
subprocess.run(
[
"python", "inference.py",
"--checkpoint_path", str(MODEL_PATH),
"--face", img,
"--audio", wav,
"--outfile", out_vid,
"--resize_factor", "1",
"--pads", "0", "20", "0", "20",
"--fps", "25",
"--nosmooth"
],
check=True,
)
except subprocess.CalledProcessError as e:
return f"❌ Wav2Lip failed: {e}"
return out_vid if Path(out_vid).exists() else "❌ Generation failed."
# ──────────────────────────────────────────────
# 4. Gradio interface
# ──────────────────────────────────────────────
demo = gr.Interface(
fn=generate,
inputs=[
gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
gr.Audio(type="filepath", label="Audio (any format)")
],
outputs=gr.Video(label="Talking-head MP4"),
title="πŸ—£οΈ High-Quality Wav2Lip (No Crop, Full Image)",
description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
allow_flagging="never",
live=True,
)
if __name__ == "__main__":
demo.launch()