Spaces:
Sleeping
Sleeping
File size: 3,367 Bytes
952337e 13089ed a30d89d bfd9324 13089ed a37c88f a30d89d 0f154d9 a30d89d 0f154d9 a30d89d a37c88f a30d89d 0f154d9 a30d89d 952337e a30d89d 0f154d9 952337e c12b434 952337e a30d89d 0f154d9 a30d89d bfd9324 a30d89d bfd9324 a30d89d 0f154d9 a30d89d bfd9324 0f154d9 4f314db 0f154d9 952337e a30d89d 952337e a30d89d 0f154d9 a30d89d 952337e 0f154d9 a30d89d 0f154d9 a30d89d 0f154d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import uuid
import subprocess
from pathlib import Path
import gradio as gr
from PIL import Image
from pydub import AudioSegment
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 1. Download Wav2Lip model checkpoint
# ββββββββββββββββββββββββββββββββββββββββββββββ
MODEL_PATH = Path("wav2lip_gan.pth")
MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
if not MODEL_PATH.exists():
os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 2. Preprocess image and audio (no cropping)
# ββββββββββββββββββββββββββββββββββββββββββββββ
def preprocess(image, audio_file):
if image is None or audio_file is None:
raise ValueError("Both an image and an audio file are required.")
uid = uuid.uuid4().hex
img_path = f"{uid}.jpg"
wav_path = f"{uid}.wav"
out_path = f"{uid}_result.mp4"
image.save(img_path)
seg = AudioSegment.from_file(audio_file)
seg = seg.set_frame_rate(16000).set_channels(1)
seg.export(wav_path, format="wav")
return img_path, wav_path, out_path
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 3. Main inference function
# ββββββββββββββββββββββββββββββββββββββββββββββ
def generate(image, audio):
try:
img, wav, out_vid = preprocess(image, audio)
except Exception as e:
return f"β {e}"
try:
subprocess.run(
[
"python", "inference.py",
"--checkpoint_path", str(MODEL_PATH),
"--face", img,
"--audio", wav,
"--outfile", out_vid,
"--resize_factor", "1",
"--pads", "0", "20", "0", "20",
"--fps", "25",
"--nosmooth"
],
check=True,
)
except subprocess.CalledProcessError as e:
return f"β Wav2Lip failed: {e}"
return out_vid if Path(out_vid).exists() else "β Generation failed."
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 4. Gradio interface
# ββββββββββββββββββββββββββββββββββββββββββββββ
demo = gr.Interface(
fn=generate,
inputs=[
gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
gr.Audio(type="filepath", label="Audio (any format)")
],
outputs=gr.Video(label="Talking-head MP4"),
title="π£οΈ High-Quality Wav2Lip (No Crop, Full Image)",
description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
allow_flagging="never",
live=True,
)
if __name__ == "__main__":
demo.launch() |