Spaces:

mich123geb
/

wav2lip_api

Sleeping

App Files Files Community

wav2lip_api / app.py

mich123geb

Update app.py

4f314db verified 27 days ago

raw

history blame contribute delete

3.37 kB

	import os
	import uuid
	import subprocess
	from pathlib import Path

	import gradio as gr
	from PIL import Image
	from pydub import AudioSegment

	# ──────────────────────────────────────────────
	# 1. Download Wav2Lip model checkpoint
	# ──────────────────────────────────────────────
	MODEL_PATH = Path("wav2lip_gan.pth")
	MODEL_URL = "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"

	if not MODEL_PATH.exists():
	os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")

	# ──────────────────────────────────────────────
	# 2. Preprocess image and audio (no cropping)
	# ──────────────────────────────────────────────
	def preprocess(image, audio_file):
	if image is None or audio_file is None:
	raise ValueError("Both an image and an audio file are required.")

	uid = uuid.uuid4().hex
	img_path = f"{uid}.jpg"
	wav_path = f"{uid}.wav"
	out_path = f"{uid}_result.mp4"

	image.save(img_path)

	seg = AudioSegment.from_file(audio_file)
	seg = seg.set_frame_rate(16000).set_channels(1)
	seg.export(wav_path, format="wav")

	return img_path, wav_path, out_path

	# ──────────────────────────────────────────────
	# 3. Main inference function
	# ──────────────────────────────────────────────
	def generate(image, audio):
	try:
	img, wav, out_vid = preprocess(image, audio)
	except Exception as e:
	return f"❌ {e}"

	try:
	subprocess.run(
	[
	"python", "inference.py",
	"--checkpoint_path", str(MODEL_PATH),
	"--face", img,
	"--audio", wav,
	"--outfile", out_vid,
	"--resize_factor", "1",
	"--pads", "0", "20", "0", "20",
	"--fps", "25",
	"--nosmooth"
	],
	check=True,
	)
	except subprocess.CalledProcessError as e:
	return f"❌ Wav2Lip failed: {e}"

	return out_vid if Path(out_vid).exists() else "❌ Generation failed."

	# ──────────────────────────────────────────────
	# 4. Gradio interface
	# ──────────────────────────────────────────────
	demo = gr.Interface(
	fn=generate,
	inputs=[
	gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
	gr.Audio(type="filepath", label="Audio (any format)")
	],
	outputs=gr.Video(label="Talking-head MP4"),
	title="🗣️ High-Quality Wav2Lip (No Crop, Full Image)",
	description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
	allow_flagging="never",
	live=True,
	)

	if __name__ == "__main__":
	demo.launch()