File size: 12,966 Bytes
3914b35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
import math
from functools import lru_cache
from typing import List, Optional, Tuple

import cv2
import numpy
from cv2.typing import Size

import facefusion.choices
from facefusion.common_helper import is_windows
from facefusion.filesystem import get_file_extension, is_image, is_video
from facefusion.types import Duration, Fps, Orientation, Resolution, VisionFrame


@lru_cache()
def read_static_image(image_path : str) -> Optional[VisionFrame]:
	return read_image(image_path)


def read_static_images(image_paths : List[str]) -> List[VisionFrame]:
	frames = []

	if image_paths:
		for image_path in image_paths:
			frames.append(read_static_image(image_path))
	return frames


def read_image(image_path : str) -> Optional[VisionFrame]:
	if is_image(image_path):
		if is_windows():
			image_buffer = numpy.fromfile(image_path, dtype = numpy.uint8)
			return cv2.imdecode(image_buffer, cv2.IMREAD_COLOR)
		return cv2.imread(image_path)
	return None


def write_image(image_path : str, vision_frame : VisionFrame) -> bool:
	if image_path:
		if is_windows():
			image_file_extension = get_file_extension(image_path)
			_, vision_frame = cv2.imencode(image_file_extension, vision_frame)
			vision_frame.tofile(image_path)
			return is_image(image_path)
		return cv2.imwrite(image_path, vision_frame)
	return False


def detect_image_resolution(image_path : str) -> Optional[Resolution]:
	if is_image(image_path):
		image = read_image(image_path)
		height, width = image.shape[:2]

		if width > 0 and height > 0:
			return width, height
	return None


def restrict_image_resolution(image_path : str, resolution : Resolution) -> Resolution:
	if is_image(image_path):
		image_resolution = detect_image_resolution(image_path)
		if image_resolution < resolution:
			return image_resolution
	return resolution


def create_image_resolutions(resolution : Resolution) -> List[str]:
	resolutions = []
	temp_resolutions = []

	if resolution:
		width, height = resolution
		temp_resolutions.append(normalize_resolution(resolution))
		for image_template_size in facefusion.choices.image_template_sizes:
			temp_resolutions.append(normalize_resolution((width * image_template_size, height * image_template_size)))
		temp_resolutions = sorted(set(temp_resolutions))
		for temp_resolution in temp_resolutions:
			resolutions.append(pack_resolution(temp_resolution))
	return resolutions


def read_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]:
	if is_video(video_path):
		video_capture = cv2.VideoCapture(video_path)
		if video_capture.isOpened():
			frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
			video_capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1))
			has_vision_frame, vision_frame = video_capture.read()
			video_capture.release()
			if has_vision_frame:
				return vision_frame
	return None


def count_video_frame_total(video_path : str) -> int:
	if is_video(video_path):
		video_capture = cv2.VideoCapture(video_path)
		if video_capture.isOpened():
			video_frame_total = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
			video_capture.release()
			return video_frame_total
	return 0


def predict_video_frame_total(video_path : str, fps : Fps, trim_frame_start : int, trim_frame_end : int) -> int:
	if is_video(video_path):
		target_video_fps = detect_video_fps(video_path)
		extract_frame_total = count_trim_frame_total(video_path, trim_frame_start, trim_frame_end) * fps / target_video_fps
		return math.floor(extract_frame_total)
	return 0


def detect_video_fps(video_path : str) -> Optional[float]:
	if is_video(video_path):
		video_capture = cv2.VideoCapture(video_path)
		if video_capture.isOpened():
			video_fps = video_capture.get(cv2.CAP_PROP_FPS)
			video_capture.release()
			return video_fps
	return None


def restrict_video_fps(video_path : str, fps : Fps) -> Fps:
	if is_video(video_path):
		video_fps = detect_video_fps(video_path)
		if video_fps < fps:
			return video_fps
	return fps


def detect_video_duration(video_path : str) -> Duration:
	video_frame_total = count_video_frame_total(video_path)
	video_fps = detect_video_fps(video_path)

	if video_frame_total and video_fps:
		return video_frame_total / video_fps
	return 0


def count_trim_frame_total(video_path : str, trim_frame_start : Optional[int], trim_frame_end : Optional[int]) -> int:
	trim_frame_start, trim_frame_end = restrict_trim_frame(video_path, trim_frame_start, trim_frame_end)

	return trim_frame_end - trim_frame_start


def restrict_trim_frame(video_path : str, trim_frame_start : Optional[int], trim_frame_end : Optional[int]) -> Tuple[int, int]:
	video_frame_total = count_video_frame_total(video_path)

	if isinstance(trim_frame_start, int):
		trim_frame_start = max(0, min(trim_frame_start, video_frame_total))
	if isinstance(trim_frame_end, int):
		trim_frame_end = max(0, min(trim_frame_end, video_frame_total))

	if isinstance(trim_frame_start, int) and isinstance(trim_frame_end, int):
		return trim_frame_start, trim_frame_end
	if isinstance(trim_frame_start, int):
		return trim_frame_start, video_frame_total
	if isinstance(trim_frame_end, int):
		return 0, trim_frame_end

	return 0, video_frame_total


def detect_video_resolution(video_path : str) -> Optional[Resolution]:
	if is_video(video_path):
		video_capture = cv2.VideoCapture(video_path)
		if video_capture.isOpened():
			width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)
			height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
			video_capture.release()
			return int(width), int(height)
	return None


def restrict_video_resolution(video_path : str, resolution : Resolution) -> Resolution:
	if is_video(video_path):
		video_resolution = detect_video_resolution(video_path)
		if video_resolution < resolution:
			return video_resolution
	return resolution


def create_video_resolutions(resolution : Resolution) -> List[str]:
	resolutions = []
	temp_resolutions = []

	if resolution:
		width, height = resolution
		temp_resolutions.append(normalize_resolution(resolution))
		for video_template_size in facefusion.choices.video_template_sizes:
			if width > height:
				temp_resolutions.append(normalize_resolution((video_template_size * width / height, video_template_size)))
			else:
				temp_resolutions.append(normalize_resolution((video_template_size, video_template_size * height / width)))
		temp_resolutions = sorted(set(temp_resolutions))
		for temp_resolution in temp_resolutions:
			resolutions.append(pack_resolution(temp_resolution))
	return resolutions


def normalize_resolution(resolution : Tuple[float, float]) -> Resolution:
	width, height = resolution

	if width > 0 and height > 0:
		normalize_width = round(width / 2) * 2
		normalize_height = round(height / 2) * 2
		return normalize_width, normalize_height
	return 0, 0


def pack_resolution(resolution : Resolution) -> str:
	width, height = normalize_resolution(resolution)
	return str(width) + 'x' + str(height)


def unpack_resolution(resolution : str) -> Resolution:
	width, height = map(int, resolution.split('x'))
	return width, height


def detect_frame_orientation(vision_frame : VisionFrame) -> Orientation:
	height, width = vision_frame.shape[:2]

	if width > height:
		return 'landscape'
	return 'portrait'


def restrict_frame(vision_frame : VisionFrame, resolution : Resolution) -> VisionFrame:
	height, width = vision_frame.shape[:2]
	restrict_width, restrict_height = resolution

	if height > restrict_height or width > restrict_width:
		scale = min(restrict_height / height, restrict_width / width)
		new_width = int(width * scale)
		new_height = int(height * scale)
		return cv2.resize(vision_frame, (new_width, new_height))
	return vision_frame


def fit_frame(vision_frame: VisionFrame, resolution: Resolution) -> VisionFrame:
	fit_width, fit_height = resolution
	height, width = vision_frame.shape[:2]
	scale = min(fit_height / height, fit_width / width)
	new_width = int(width * scale)
	new_height = int(height * scale)
	paste_vision_frame = cv2.resize(vision_frame, (new_width, new_height))
	x_pad = (fit_width - new_width) // 2
	y_pad = (fit_height - new_height) // 2
	temp_vision_frame = numpy.pad(paste_vision_frame, ((y_pad, fit_height - new_height - y_pad), (x_pad, fit_width - new_width - x_pad), (0, 0)))
	return temp_vision_frame


def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame:
	return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB)


def conditional_match_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> VisionFrame:
	histogram_factor = calc_histogram_difference(source_vision_frame, target_vision_frame)
	target_vision_frame = blend_vision_frames(target_vision_frame, match_frame_color(source_vision_frame, target_vision_frame), histogram_factor)
	return target_vision_frame


def match_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> VisionFrame:
	color_difference_sizes = numpy.linspace(16, target_vision_frame.shape[0], 3, endpoint = False)

	for color_difference_size in color_difference_sizes:
		source_vision_frame = equalize_frame_color(source_vision_frame, target_vision_frame, normalize_resolution((color_difference_size, color_difference_size)))
	target_vision_frame = equalize_frame_color(source_vision_frame, target_vision_frame, target_vision_frame.shape[:2][::-1])
	return target_vision_frame


def equalize_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame, size : Size) -> VisionFrame:
	source_frame_resize = cv2.resize(source_vision_frame, size, interpolation = cv2.INTER_AREA).astype(numpy.float32)
	target_frame_resize = cv2.resize(target_vision_frame, size, interpolation = cv2.INTER_AREA).astype(numpy.float32)
	color_difference_vision_frame = numpy.subtract(source_frame_resize, target_frame_resize)
	color_difference_vision_frame = cv2.resize(color_difference_vision_frame, target_vision_frame.shape[:2][::-1], interpolation = cv2.INTER_CUBIC)
	target_vision_frame = numpy.add(target_vision_frame, color_difference_vision_frame).clip(0, 255).astype(numpy.uint8)
	return target_vision_frame


def calc_histogram_difference(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> float:
	histogram_source = cv2.calcHist([cv2.cvtColor(source_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ])
	histogram_target = cv2.calcHist([cv2.cvtColor(target_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ])
	histogram_difference = float(numpy.interp(cv2.compareHist(histogram_source, histogram_target, cv2.HISTCMP_CORREL), [ -1, 1 ], [ 0, 1 ]))
	return histogram_difference


def blend_vision_frames(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame, blend_factor : float) -> VisionFrame:
	blend_vision_frame = cv2.addWeighted(source_vision_frame, 1 - blend_factor, target_vision_frame, blend_factor, 0)
	return blend_vision_frame


def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]:
	vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0)))
	tile_width = size[0] - 2 * size[2]
	pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width
	pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width
	pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0)))
	pad_height, pad_width = pad_vision_frame.shape[:2]
	row_range = range(size[2], pad_height - size[2], tile_width)
	col_range = range(size[2], pad_width - size[2], tile_width)
	tile_vision_frames = []

	for row_vision_frame in row_range:
		top = row_vision_frame - size[2]
		bottom = row_vision_frame + size[2] + tile_width

		for column_vision_frame in col_range:
			left = column_vision_frame - size[2]
			right = column_vision_frame + size[2] + tile_width
			tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :])

	return tile_vision_frames, pad_width, pad_height


def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int, temp_height : int, pad_width : int, pad_height : int, size : Size) -> VisionFrame:
	merge_vision_frame = numpy.zeros((pad_height, pad_width, 3)).astype(numpy.uint8)
	tile_width = tile_vision_frames[0].shape[1] - 2 * size[2]
	tiles_per_row = min(pad_width // tile_width, len(tile_vision_frames))

	for index, tile_vision_frame in enumerate(tile_vision_frames):
		tile_vision_frame = tile_vision_frame[size[2]:-size[2], size[2]:-size[2]]
		row_index = index // tiles_per_row
		col_index = index % tiles_per_row
		top = row_index * tile_vision_frame.shape[0]
		bottom = top + tile_vision_frame.shape[0]
		left = col_index * tile_vision_frame.shape[1]
		right = left + tile_vision_frame.shape[1]
		merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame

	merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :]
	return merge_vision_frame