File size: 2,261 Bytes
0711651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import torch
import gc
import os
import subprocess
import datetime

class AudioVideoSync:
    def __init__(self, wav2lip_dir="Wav2Lip"):
        self.wav2lip_dir = wav2lip_dir
        self.checkpoint_path = os.path.join(wav2lip_dir, "checkpoints", "wav2lip_gan.pth")

    def print_memory_usage(self, stage=""):
        """Prints GPU memory usage at different stages."""
        os.system('nvidia-smi')
        print(f"[{stage}] Allocated Memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
        print(f"[{stage}] Reserved Memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
        print("-" * 50)

    def sync_audio_video(self, video_path, audio_path, output_video=None):
        """Syncs audio and video using Wav2Lip."""
        if not os.path.exists(video_path) or not os.path.exists(audio_path):
            raise FileNotFoundError("Video or Audio file not found.")

        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        result_video = output_video or f"result_{timestamp}.mp4"

        #  Before running Wav2Lip, clear cache and check memory
        gc.collect()
        torch.cuda.empty_cache()
        self.print_memory_usage("Before Wav2Lip Inference")

        #  Run Wav2Lip
        # Run Wav2Lip inference with more accurate lip sync
        print("Running Wav2Lip for better lip movement...")
        result = subprocess.run([
            "python", os.path.join(self.wav2lip_dir, "inference.py"),
            "--checkpoint_path", self.checkpoint_path,
            "--face", video_path,
            "--audio", audio_path,
            "--outfile", result_video,
            "--wav2lip_batch_size", "1",
            "--resize_factor", "2",  # Better accuracy for lips
            "--nosmooth"  # Ensures smoother transitions
        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        if result.returncode != 0:
            print("Error in Wav2Lip inference:", result.stderr.decode())
            return None

        #  After inference, free memory
        gc.collect()
        torch.cuda.empty_cache()
        self.print_memory_usage("After Wav2Lip Inference")

        print(f" Output saved at: {result_video}")
        return result_video