Spaces:
Sleeping
Sleeping
""" | |
画像の拡張処理を行うための関数群を提供します。 | |
1. 画像の平均色を計算する関数 | |
2. 画像を指定された角度で回転させ、平均色で余白を埋める関数 | |
3. 回転した画像から最大の長方形を切り出す関数 | |
4. ランダムな正方形を切り出す関数 | |
""" | |
import os | |
from PIL import Image, ImageStat, ImageOps | |
from collections import Counter | |
import random | |
import math | |
from tqdm import tqdm | |
import argparse | |
def get_average_color(image): | |
"""画像の平均色を取得""" | |
# RGBAの場合はRGBに変換 | |
if image.mode == 'RGBA': | |
image = image.convert('RGB') | |
stat = ImageStat.Stat(image) | |
r, g, b = map(int, stat.mean) | |
return (r, g, b) | |
def get_edge_mode_color(img, edge_width=10): | |
"""画像の外周の最頻値(mode)を取得する""" | |
# 外周の10ピクセル領域を取得 | |
left = img.crop((0, 0, edge_width, img.height)) # 左端 | |
right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端 | |
top = img.crop((0, 0, img.width, edge_width)) # 上端 | |
bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端 | |
# 各領域のピクセルデータを取得して結合 | |
colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata()) | |
# 最頻値(mode)を計算 | |
mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得 | |
return mode_color | |
def rotate_image(image, angle, fill_color=(255, 255, 255)): | |
"""画像を指定された角度で回転させ、指定された色で余白を埋める""" | |
return image.rotate(angle, expand=True, fillcolor=fill_color) | |
def crop_square(cropped_rect_image, left, top, crop_size): | |
"""ランダムな正方形を切り出す""" | |
return cropped_rect_image.crop((left, top, left + crop_size, top + crop_size)) | |
def apply_random_flip(image, is_horizontal): | |
"""画像にランダムなフリップ(水平または垂直)を適用する""" | |
if is_horizontal: | |
return ImageOps.mirror(image) # 水平フリップ | |
return image | |
def process_image_pair( | |
source_image, | |
target_image, | |
output_size=(1024, 1024), | |
is_flip=False, | |
rotation_range=40, | |
min_scale=0.6, | |
max_scale=1.2, | |
source_is_avg_color_fill=False, | |
source_is_edge_mode_fill=False, | |
target_is_avg_color_fill=True, | |
target_is_edge_mode_fill=False, | |
expand_to_long_side=False | |
): | |
"""1組の画像に対して拡張処理を行う""" | |
orig_source_width, orig_source_height = source_image.size | |
orig_target_width, orig_target_height = target_image.size | |
# ソース画像の余白の色を決定 | |
if source_is_edge_mode_fill: | |
source_fill_color = get_edge_mode_color(source_image, edge_width=10) | |
elif source_is_avg_color_fill: | |
source_fill_color = get_average_color(source_image) | |
else: | |
source_fill_color = (255, 255, 255) | |
# ターゲット画像の余白の色を決定 | |
if target_is_edge_mode_fill: | |
target_fill_color = get_edge_mode_color(target_image, edge_width=10) | |
elif target_is_avg_color_fill: | |
target_fill_color = get_average_color(target_image) | |
else: | |
target_fill_color = (255, 255, 255) | |
base_source = source_image | |
base_target = target_image | |
# 長辺を基準にする場合の処理を追加 | |
if expand_to_long_side: | |
# sourceの長辺を取得して正方形のキャンバスを作成 | |
source_long_side = max(base_source.width, base_source.height) | |
source_canvas = Image.new("RGB", (source_long_side, source_long_side), source_fill_color) | |
# 中央に配置 | |
source_paste_x = (source_long_side - base_source.width) // 2 | |
source_paste_y = (source_long_side - base_source.height) // 2 | |
source_canvas.paste(base_source, (source_paste_x, source_paste_y)) | |
base_source = source_canvas | |
# targetも同様に処理 | |
target_long_side = max(base_target.width, base_target.height) | |
target_canvas = Image.new("RGB", (target_long_side, target_long_side), target_fill_color) | |
target_paste_x = (target_long_side - base_target.width) // 2 | |
target_paste_y = (target_long_side - base_target.height) // 2 | |
target_canvas.paste(base_target, (target_paste_x, target_paste_y)) | |
base_target = target_canvas | |
if rotation_range > 0: | |
angle = random.uniform(-rotation_range, rotation_range) | |
rotated_source = rotate_image(source_image, angle, source_fill_color) | |
rotated_target = rotate_image(target_image, angle, target_fill_color) | |
base_source = rotated_source | |
base_target = rotated_target | |
if is_flip: | |
is_horizontal = random.choice([True, False]) | |
flipped_source = apply_random_flip(base_source, is_horizontal) | |
flipped_target = apply_random_flip(base_target, is_horizontal) | |
base_source = flipped_source | |
base_target = flipped_target | |
scale = random.uniform(min_scale, max_scale) | |
canvas_scale = 1/scale | |
if canvas_scale > 1.0: | |
# 新規画像(canvas)を作成し中心に画像を配置 | |
scaled_source = Image.new("RGB", (int(base_source.width*canvas_scale), int(base_source.height*canvas_scale)), source_fill_color) | |
scaled_target = Image.new("RGB", (int(base_target.width*canvas_scale), int(base_target.height*canvas_scale)), target_fill_color) | |
scaled_source.paste(base_source, (int((scaled_source.width-base_source.width)/2), int((scaled_source.height-base_source.height)/2))) | |
scaled_target.paste(base_target, (int((scaled_target.width-base_target.width)/2), int((scaled_target.height-base_target.height)/2))) | |
else: | |
scaled_source = base_source | |
scaled_target = base_target | |
base_source_width, base_source_height = base_source.size | |
base_source_max_square_size = min(base_source_height, base_source_width) | |
crop_source_size = int(base_source_max_square_size * canvas_scale) | |
base_target_width, base_target_height = base_target.size | |
base_target_max_square_size = min(base_target_height, base_target_width) | |
crop_target_size = int(base_target_max_square_size * canvas_scale) | |
scaled_source_width, scaled_source_height = scaled_source.size | |
left_source = random.randint(0, scaled_source_width - crop_source_size) | |
top_source = random.randint(0, scaled_source_height - crop_source_size) | |
# sourceとtargetの位置合わせ. この場合、sourceとtargetのアスペクト比は同じと仮定 | |
left_target = left_source * orig_target_width // orig_source_width | |
top_target = top_source * orig_target_height // orig_source_height | |
final_source = crop_square(scaled_source, left_source, top_source, crop_source_size).resize(output_size) | |
final_target = crop_square(scaled_target, left_target, top_target, crop_target_size).resize(output_size) | |
return final_source, final_target | |
def process_images( | |
source_img, | |
target_img, | |
num_copies, | |
output_size, | |
is_flip, | |
rotation_range, | |
min_scale, | |
max_scale, | |
source_is_avg_color_fill, | |
source_is_edge_mode_fill, | |
target_is_avg_color_fill, | |
target_is_edge_mode_fill, | |
expand_to_long_side | |
): | |
aug_sources = [] | |
aug_targets = [] | |
for i in range(num_copies): | |
# 拡張処理を実行 | |
aug_source, aug_target = process_image_pair( | |
source_img, | |
target_img, | |
output_size, | |
is_flip, | |
rotation_range, | |
min_scale, | |
max_scale, | |
source_is_avg_color_fill, | |
source_is_edge_mode_fill, | |
target_is_avg_color_fill, | |
target_is_edge_mode_fill, | |
expand_to_long_side | |
) | |
aug_sources.append(aug_source) | |
aug_targets.append(aug_target) | |
return aug_sources, aug_targets | |
if __name__ == '__main__': | |
args = argparse.ArgumentParser() | |
args.add_argument('source_folder', type=str, help='source画像フォルダのパス / Path to source image folder') | |
args.add_argument('target_folder', type=str, help='target画像フォルダのパス / Path to target image folder') | |
args.add_argument('output_folder', type=str, help='出力先フォルダのパス / Path to output folder') | |
args.add_argument('--output_size', '-s', type=int, default=1024, | |
help='出力画像の一辺のサイズ / Output image size') | |
args.add_argument('--num_copies', '-n', type=int, default=1, | |
help='出力画像の枚数 / Number of augmented copies') | |
args.add_argument('--is_flip', '-f', type=bool, default=True, | |
help='フリップを適用するかどうか / Whether to apply random flip') | |
args.add_argument('--rotation_range', '-r', type=int, default=0, | |
help='回転角度の範囲 / Range of rotation angle') | |
args.add_argument('--min_scale', '--ms', type=float, default=1.0, | |
help='最小の画像サイズ / Minimum scale of the image') | |
args.add_argument('--max_scale', '--xs', type=float, default=1.0, | |
help='最大の画像サイズ / Maximum scale of the image') | |
args.add_argument('--source_is_avg_color_fill', '--sa', type=bool, default=True, | |
help='source画像を平均色で余白を埋めるかどうか / Whether to fill source image padding with average color') | |
args.add_argument('--source_is_edge_mode_fill', '--se', type=bool, default=False, | |
help='source画像を外周の最頻値で余白を埋めるかどうか / Whether to fill source image padding with edge mode color') | |
args.add_argument('--target_is_avg_color_fill', '--ta', type=bool, default=False, | |
help='target画像を平均色で余白を埋めるかどうか / Whether to fill target image padding with average color') | |
args.add_argument('--target_is_edge_mode_fill', '--te', type=bool, default=False, | |
help='target画像を外周の最頻値で余白を埋めるかどうか / Whether to fill target image padding with edge mode color') | |
args.add_argument('--expand_to_long_side', '--el', type=bool, default=False, | |
help='長辺まで拡張して正方形にするかどうか / Whether to expand the image to a square using the long side') | |
args = args.parse_args() | |
output_path_source = os.path.join(args.output_folder, 'aug_source') | |
output_path_target = os.path.join(args.output_folder, 'aug_target') | |
# 画像処理 | |
for image_name in tqdm(os.listdir(args.source_folder)): | |
if image_name.endswith('.jpg'): | |
source_path = os.path.join(args.source_folder, image_name) | |
target_path = os.path.join(args.target_folder, image_name) | |
process_images( | |
source_path, | |
target_path, | |
args.num_copies, | |
(args.output_size, args.output_size), | |
args.is_flip, | |
args.rotation_range, | |
args.min_scale, | |
args.max_scale, | |
args.source_is_avg_color_fill, | |
args.source_is_edge_mode_fill, | |
args.target_is_avg_color_fill, | |
args.target_is_edge_mode_fill, | |
args.expand_to_long_side | |
) | |