File size: 5,248 Bytes
d4a8a38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""

Adapted from:

https://github.com/genmoai/mochi/blob/main/demos/fine_tuner/trim_and_crop_videos.py

"""

from pathlib import Path
import shutil

import click
from moviepy.editor import VideoFileClip
from tqdm import tqdm


@click.command()
@click.argument("folder", type=click.Path(exists=True, dir_okay=True))
@click.argument("output_folder", type=click.Path(dir_okay=True))
@click.option("--num_frames", "-f", type=float, default=30, help="Number of frames")
@click.option("--resolution", "-r", type=str, default="480x848", help="Video resolution")
@click.option("--force_upsample", is_flag=True, help="Force upsample.")
def truncate_videos(folder, output_folder, num_frames, resolution, force_upsample):
    """Truncate all MP4 and MOV files in FOLDER to specified number of frames and resolution"""
    input_path = Path(folder)
    output_path = Path(output_folder)
    output_path.mkdir(parents=True, exist_ok=True)

    # Parse target resolution
    target_height, target_width = map(int, resolution.split("x"))

    # Calculate duration
    duration = (num_frames / 30) + 0.09

    # Find all MP4 and MOV files
    video_files = (
        list(input_path.rglob("*.mp4"))
        + list(input_path.rglob("*.MOV"))
        + list(input_path.rglob("*.mov"))
        + list(input_path.rglob("*.MP4"))
    )

    for file_path in tqdm(video_files):
        try:
            relative_path = file_path.relative_to(input_path)
            output_file = output_path / relative_path.with_suffix(".mp4")
            output_file.parent.mkdir(parents=True, exist_ok=True)

            click.echo(f"Processing: {file_path}")
            video = VideoFileClip(str(file_path))

            # Skip if video is too short
            if video.duration < duration:
                click.echo(f"Skipping {file_path} as it is too short")
                continue

            # Skip if target resolution is larger than input
            if target_width > video.w or target_height > video.h:
                if force_upsample:
                    click.echo(
                        f"{file_path} as target resolution {resolution} is larger than input {video.w}x{video.h}. So, upsampling the video."
                    )
                    video = video.resize(width=target_width, height=target_height)
                else:
                    click.echo(
                        f"Skipping {file_path} as target resolution {resolution} is larger than input {video.w}x{video.h}"
                    )
                    continue

            # First truncate duration
            truncated = video.subclip(0, duration)

            # Calculate crop dimensions to maintain aspect ratio
            target_ratio = target_width / target_height
            current_ratio = truncated.w / truncated.h

            if current_ratio > target_ratio:
                # Video is wider than target ratio - crop width
                new_width = int(truncated.h * target_ratio)
                x1 = (truncated.w - new_width) // 2
                final = truncated.crop(x1=x1, width=new_width).resize((target_width, target_height))
            else:
                # Video is taller than target ratio - crop height
                new_height = int(truncated.w / target_ratio)
                y1 = (truncated.h - new_height) // 2
                final = truncated.crop(y1=y1, height=new_height).resize((target_width, target_height))

            # Set output parameters for consistent MP4 encoding
            output_params = {
                "codec": "libx264",
                "audio": False,  # Disable audio
                "preset": "medium",  # Balance between speed and quality
                "bitrate": "5000k",  # Adjust as needed
            }

            # Set FPS to 30
            final = final.set_fps(30)

            # Check for a corresponding .txt file
            txt_file_path = file_path.with_suffix(".txt")
            if txt_file_path.exists():
                output_txt_file = output_path / relative_path.with_suffix(".txt")
                output_txt_file.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy(txt_file_path, output_txt_file)
                click.echo(f"Copied {txt_file_path} to {output_txt_file}")
            else:
                # Print warning in bold yellow with a warning emoji
                click.echo(
                    f"\033[1;33m⚠️  Warning: No caption found for {file_path}, using an empty caption. This may hurt fine-tuning quality.\033[0m"
                )
                output_txt_file = output_path / relative_path.with_suffix(".txt")
                output_txt_file.parent.mkdir(parents=True, exist_ok=True)
                output_txt_file.touch()

            # Write the output file
            final.write_videofile(str(output_file), **output_params)

            # Clean up
            video.close()
            truncated.close()
            final.close()

        except Exception as e:
            click.echo(f"\033[1;31m Error processing {file_path}: {str(e)}\033[0m", err=True)
            raise


if __name__ == "__main__":
    truncate_videos()