File size: 12,966 Bytes
3914b35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
import math
from functools import lru_cache
from typing import List, Optional, Tuple
import cv2
import numpy
from cv2.typing import Size
import facefusion.choices
from facefusion.common_helper import is_windows
from facefusion.filesystem import get_file_extension, is_image, is_video
from facefusion.types import Duration, Fps, Orientation, Resolution, VisionFrame
@lru_cache()
def read_static_image(image_path : str) -> Optional[VisionFrame]:
return read_image(image_path)
def read_static_images(image_paths : List[str]) -> List[VisionFrame]:
frames = []
if image_paths:
for image_path in image_paths:
frames.append(read_static_image(image_path))
return frames
def read_image(image_path : str) -> Optional[VisionFrame]:
if is_image(image_path):
if is_windows():
image_buffer = numpy.fromfile(image_path, dtype = numpy.uint8)
return cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
return cv2.imread(image_path)
return None
def write_image(image_path : str, vision_frame : VisionFrame) -> bool:
if image_path:
if is_windows():
image_file_extension = get_file_extension(image_path)
_, vision_frame = cv2.imencode(image_file_extension, vision_frame)
vision_frame.tofile(image_path)
return is_image(image_path)
return cv2.imwrite(image_path, vision_frame)
return False
def detect_image_resolution(image_path : str) -> Optional[Resolution]:
if is_image(image_path):
image = read_image(image_path)
height, width = image.shape[:2]
if width > 0 and height > 0:
return width, height
return None
def restrict_image_resolution(image_path : str, resolution : Resolution) -> Resolution:
if is_image(image_path):
image_resolution = detect_image_resolution(image_path)
if image_resolution < resolution:
return image_resolution
return resolution
def create_image_resolutions(resolution : Resolution) -> List[str]:
resolutions = []
temp_resolutions = []
if resolution:
width, height = resolution
temp_resolutions.append(normalize_resolution(resolution))
for image_template_size in facefusion.choices.image_template_sizes:
temp_resolutions.append(normalize_resolution((width * image_template_size, height * image_template_size)))
temp_resolutions = sorted(set(temp_resolutions))
for temp_resolution in temp_resolutions:
resolutions.append(pack_resolution(temp_resolution))
return resolutions
def read_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]:
if is_video(video_path):
video_capture = cv2.VideoCapture(video_path)
if video_capture.isOpened():
frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
video_capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1))
has_vision_frame, vision_frame = video_capture.read()
video_capture.release()
if has_vision_frame:
return vision_frame
return None
def count_video_frame_total(video_path : str) -> int:
if is_video(video_path):
video_capture = cv2.VideoCapture(video_path)
if video_capture.isOpened():
video_frame_total = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
video_capture.release()
return video_frame_total
return 0
def predict_video_frame_total(video_path : str, fps : Fps, trim_frame_start : int, trim_frame_end : int) -> int:
if is_video(video_path):
target_video_fps = detect_video_fps(video_path)
extract_frame_total = count_trim_frame_total(video_path, trim_frame_start, trim_frame_end) * fps / target_video_fps
return math.floor(extract_frame_total)
return 0
def detect_video_fps(video_path : str) -> Optional[float]:
if is_video(video_path):
video_capture = cv2.VideoCapture(video_path)
if video_capture.isOpened():
video_fps = video_capture.get(cv2.CAP_PROP_FPS)
video_capture.release()
return video_fps
return None
def restrict_video_fps(video_path : str, fps : Fps) -> Fps:
if is_video(video_path):
video_fps = detect_video_fps(video_path)
if video_fps < fps:
return video_fps
return fps
def detect_video_duration(video_path : str) -> Duration:
video_frame_total = count_video_frame_total(video_path)
video_fps = detect_video_fps(video_path)
if video_frame_total and video_fps:
return video_frame_total / video_fps
return 0
def count_trim_frame_total(video_path : str, trim_frame_start : Optional[int], trim_frame_end : Optional[int]) -> int:
trim_frame_start, trim_frame_end = restrict_trim_frame(video_path, trim_frame_start, trim_frame_end)
return trim_frame_end - trim_frame_start
def restrict_trim_frame(video_path : str, trim_frame_start : Optional[int], trim_frame_end : Optional[int]) -> Tuple[int, int]:
video_frame_total = count_video_frame_total(video_path)
if isinstance(trim_frame_start, int):
trim_frame_start = max(0, min(trim_frame_start, video_frame_total))
if isinstance(trim_frame_end, int):
trim_frame_end = max(0, min(trim_frame_end, video_frame_total))
if isinstance(trim_frame_start, int) and isinstance(trim_frame_end, int):
return trim_frame_start, trim_frame_end
if isinstance(trim_frame_start, int):
return trim_frame_start, video_frame_total
if isinstance(trim_frame_end, int):
return 0, trim_frame_end
return 0, video_frame_total
def detect_video_resolution(video_path : str) -> Optional[Resolution]:
if is_video(video_path):
video_capture = cv2.VideoCapture(video_path)
if video_capture.isOpened():
width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)
height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
video_capture.release()
return int(width), int(height)
return None
def restrict_video_resolution(video_path : str, resolution : Resolution) -> Resolution:
if is_video(video_path):
video_resolution = detect_video_resolution(video_path)
if video_resolution < resolution:
return video_resolution
return resolution
def create_video_resolutions(resolution : Resolution) -> List[str]:
resolutions = []
temp_resolutions = []
if resolution:
width, height = resolution
temp_resolutions.append(normalize_resolution(resolution))
for video_template_size in facefusion.choices.video_template_sizes:
if width > height:
temp_resolutions.append(normalize_resolution((video_template_size * width / height, video_template_size)))
else:
temp_resolutions.append(normalize_resolution((video_template_size, video_template_size * height / width)))
temp_resolutions = sorted(set(temp_resolutions))
for temp_resolution in temp_resolutions:
resolutions.append(pack_resolution(temp_resolution))
return resolutions
def normalize_resolution(resolution : Tuple[float, float]) -> Resolution:
width, height = resolution
if width > 0 and height > 0:
normalize_width = round(width / 2) * 2
normalize_height = round(height / 2) * 2
return normalize_width, normalize_height
return 0, 0
def pack_resolution(resolution : Resolution) -> str:
width, height = normalize_resolution(resolution)
return str(width) + 'x' + str(height)
def unpack_resolution(resolution : str) -> Resolution:
width, height = map(int, resolution.split('x'))
return width, height
def detect_frame_orientation(vision_frame : VisionFrame) -> Orientation:
height, width = vision_frame.shape[:2]
if width > height:
return 'landscape'
return 'portrait'
def restrict_frame(vision_frame : VisionFrame, resolution : Resolution) -> VisionFrame:
height, width = vision_frame.shape[:2]
restrict_width, restrict_height = resolution
if height > restrict_height or width > restrict_width:
scale = min(restrict_height / height, restrict_width / width)
new_width = int(width * scale)
new_height = int(height * scale)
return cv2.resize(vision_frame, (new_width, new_height))
return vision_frame
def fit_frame(vision_frame: VisionFrame, resolution: Resolution) -> VisionFrame:
fit_width, fit_height = resolution
height, width = vision_frame.shape[:2]
scale = min(fit_height / height, fit_width / width)
new_width = int(width * scale)
new_height = int(height * scale)
paste_vision_frame = cv2.resize(vision_frame, (new_width, new_height))
x_pad = (fit_width - new_width) // 2
y_pad = (fit_height - new_height) // 2
temp_vision_frame = numpy.pad(paste_vision_frame, ((y_pad, fit_height - new_height - y_pad), (x_pad, fit_width - new_width - x_pad), (0, 0)))
return temp_vision_frame
def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame:
return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB)
def conditional_match_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> VisionFrame:
histogram_factor = calc_histogram_difference(source_vision_frame, target_vision_frame)
target_vision_frame = blend_vision_frames(target_vision_frame, match_frame_color(source_vision_frame, target_vision_frame), histogram_factor)
return target_vision_frame
def match_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> VisionFrame:
color_difference_sizes = numpy.linspace(16, target_vision_frame.shape[0], 3, endpoint = False)
for color_difference_size in color_difference_sizes:
source_vision_frame = equalize_frame_color(source_vision_frame, target_vision_frame, normalize_resolution((color_difference_size, color_difference_size)))
target_vision_frame = equalize_frame_color(source_vision_frame, target_vision_frame, target_vision_frame.shape[:2][::-1])
return target_vision_frame
def equalize_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame, size : Size) -> VisionFrame:
source_frame_resize = cv2.resize(source_vision_frame, size, interpolation = cv2.INTER_AREA).astype(numpy.float32)
target_frame_resize = cv2.resize(target_vision_frame, size, interpolation = cv2.INTER_AREA).astype(numpy.float32)
color_difference_vision_frame = numpy.subtract(source_frame_resize, target_frame_resize)
color_difference_vision_frame = cv2.resize(color_difference_vision_frame, target_vision_frame.shape[:2][::-1], interpolation = cv2.INTER_CUBIC)
target_vision_frame = numpy.add(target_vision_frame, color_difference_vision_frame).clip(0, 255).astype(numpy.uint8)
return target_vision_frame
def calc_histogram_difference(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> float:
histogram_source = cv2.calcHist([cv2.cvtColor(source_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ])
histogram_target = cv2.calcHist([cv2.cvtColor(target_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ])
histogram_difference = float(numpy.interp(cv2.compareHist(histogram_source, histogram_target, cv2.HISTCMP_CORREL), [ -1, 1 ], [ 0, 1 ]))
return histogram_difference
def blend_vision_frames(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame, blend_factor : float) -> VisionFrame:
blend_vision_frame = cv2.addWeighted(source_vision_frame, 1 - blend_factor, target_vision_frame, blend_factor, 0)
return blend_vision_frame
def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]:
vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0)))
tile_width = size[0] - 2 * size[2]
pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width
pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width
pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0)))
pad_height, pad_width = pad_vision_frame.shape[:2]
row_range = range(size[2], pad_height - size[2], tile_width)
col_range = range(size[2], pad_width - size[2], tile_width)
tile_vision_frames = []
for row_vision_frame in row_range:
top = row_vision_frame - size[2]
bottom = row_vision_frame + size[2] + tile_width
for column_vision_frame in col_range:
left = column_vision_frame - size[2]
right = column_vision_frame + size[2] + tile_width
tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :])
return tile_vision_frames, pad_width, pad_height
def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int, temp_height : int, pad_width : int, pad_height : int, size : Size) -> VisionFrame:
merge_vision_frame = numpy.zeros((pad_height, pad_width, 3)).astype(numpy.uint8)
tile_width = tile_vision_frames[0].shape[1] - 2 * size[2]
tiles_per_row = min(pad_width // tile_width, len(tile_vision_frames))
for index, tile_vision_frame in enumerate(tile_vision_frames):
tile_vision_frame = tile_vision_frame[size[2]:-size[2], size[2]:-size[2]]
row_index = index // tiles_per_row
col_index = index % tiles_per_row
top = row_index * tile_vision_frame.shape[0]
bottom = top + tile_vision_frame.shape[0]
left = col_index * tile_vision_frame.shape[1]
right = left + tile_vision_frame.shape[1]
merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame
merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :]
return merge_vision_frame
|