pair-images-aug-test / dataset_aug.py
yeq6x's picture
req
e317ed3
"""
画像の拡張処理を行うための関数群を提供します。
1. 画像の平均色を計算する関数
2. 画像を指定された角度で回転させ、平均色で余白を埋める関数
3. 回転した画像から最大の長方形を切り出す関数
4. ランダムな正方形を切り出す関数
"""
import os
from PIL import Image, ImageStat, ImageOps
from collections import Counter
import random
import math
from tqdm import tqdm
import argparse
def get_average_color(image):
"""画像の平均色を取得"""
# RGBAの場合はRGBに変換
if image.mode == 'RGBA':
image = image.convert('RGB')
stat = ImageStat.Stat(image)
r, g, b = map(int, stat.mean)
return (r, g, b)
def get_edge_mode_color(img, edge_width=10):
"""画像の外周の最頻値(mode)を取得する"""
# 外周の10ピクセル領域を取得
left = img.crop((0, 0, edge_width, img.height)) # 左端
right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端
top = img.crop((0, 0, img.width, edge_width)) # 上端
bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端
# 各領域のピクセルデータを取得して結合
colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata())
# 最頻値(mode)を計算
mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得
return mode_color
def rotate_image(image, angle, fill_color=(255, 255, 255)):
"""画像を指定された角度で回転させ、指定された色で余白を埋める"""
return image.rotate(angle, expand=True, fillcolor=fill_color)
def crop_square(cropped_rect_image, left, top, crop_size):
"""ランダムな正方形を切り出す"""
return cropped_rect_image.crop((left, top, left + crop_size, top + crop_size))
def apply_random_flip(image, is_horizontal):
"""画像にランダムなフリップ(水平または垂直)を適用する"""
if is_horizontal:
return ImageOps.mirror(image) # 水平フリップ
return image
def process_image_pair(
source_image,
target_image,
output_size=(1024, 1024),
is_flip=False,
rotation_range=40,
min_scale=0.6,
max_scale=1.2,
source_is_avg_color_fill=False,
source_is_edge_mode_fill=False,
target_is_avg_color_fill=True,
target_is_edge_mode_fill=False,
expand_to_long_side=False
):
"""1組の画像に対して拡張処理を行う"""
orig_source_width, orig_source_height = source_image.size
orig_target_width, orig_target_height = target_image.size
# ソース画像の余白の色を決定
if source_is_edge_mode_fill:
source_fill_color = get_edge_mode_color(source_image, edge_width=10)
elif source_is_avg_color_fill:
source_fill_color = get_average_color(source_image)
else:
source_fill_color = (255, 255, 255)
# ターゲット画像の余白の色を決定
if target_is_edge_mode_fill:
target_fill_color = get_edge_mode_color(target_image, edge_width=10)
elif target_is_avg_color_fill:
target_fill_color = get_average_color(target_image)
else:
target_fill_color = (255, 255, 255)
base_source = source_image
base_target = target_image
# 長辺を基準にする場合の処理を追加
if expand_to_long_side:
# sourceの長辺を取得して正方形のキャンバスを作成
source_long_side = max(base_source.width, base_source.height)
source_canvas = Image.new("RGB", (source_long_side, source_long_side), source_fill_color)
# 中央に配置
source_paste_x = (source_long_side - base_source.width) // 2
source_paste_y = (source_long_side - base_source.height) // 2
source_canvas.paste(base_source, (source_paste_x, source_paste_y))
base_source = source_canvas
# targetも同様に処理
target_long_side = max(base_target.width, base_target.height)
target_canvas = Image.new("RGB", (target_long_side, target_long_side), target_fill_color)
target_paste_x = (target_long_side - base_target.width) // 2
target_paste_y = (target_long_side - base_target.height) // 2
target_canvas.paste(base_target, (target_paste_x, target_paste_y))
base_target = target_canvas
if rotation_range > 0:
angle = random.uniform(-rotation_range, rotation_range)
rotated_source = rotate_image(source_image, angle, source_fill_color)
rotated_target = rotate_image(target_image, angle, target_fill_color)
base_source = rotated_source
base_target = rotated_target
if is_flip:
is_horizontal = random.choice([True, False])
flipped_source = apply_random_flip(base_source, is_horizontal)
flipped_target = apply_random_flip(base_target, is_horizontal)
base_source = flipped_source
base_target = flipped_target
scale = random.uniform(min_scale, max_scale)
canvas_scale = 1/scale
if canvas_scale > 1.0:
# 新規画像(canvas)を作成し中心に画像を配置
scaled_source = Image.new("RGB", (int(base_source.width*canvas_scale), int(base_source.height*canvas_scale)), source_fill_color)
scaled_target = Image.new("RGB", (int(base_target.width*canvas_scale), int(base_target.height*canvas_scale)), target_fill_color)
scaled_source.paste(base_source, (int((scaled_source.width-base_source.width)/2), int((scaled_source.height-base_source.height)/2)))
scaled_target.paste(base_target, (int((scaled_target.width-base_target.width)/2), int((scaled_target.height-base_target.height)/2)))
else:
scaled_source = base_source
scaled_target = base_target
base_source_width, base_source_height = base_source.size
base_source_max_square_size = min(base_source_height, base_source_width)
crop_source_size = int(base_source_max_square_size * canvas_scale)
base_target_width, base_target_height = base_target.size
base_target_max_square_size = min(base_target_height, base_target_width)
crop_target_size = int(base_target_max_square_size * canvas_scale)
scaled_source_width, scaled_source_height = scaled_source.size
left_source = random.randint(0, scaled_source_width - crop_source_size)
top_source = random.randint(0, scaled_source_height - crop_source_size)
# sourceとtargetの位置合わせ. この場合、sourceとtargetのアスペクト比は同じと仮定
left_target = left_source * orig_target_width // orig_source_width
top_target = top_source * orig_target_height // orig_source_height
final_source = crop_square(scaled_source, left_source, top_source, crop_source_size).resize(output_size)
final_target = crop_square(scaled_target, left_target, top_target, crop_target_size).resize(output_size)
return final_source, final_target
def process_images(
source_img,
target_img,
num_copies,
output_size,
is_flip,
rotation_range,
min_scale,
max_scale,
source_is_avg_color_fill,
source_is_edge_mode_fill,
target_is_avg_color_fill,
target_is_edge_mode_fill,
expand_to_long_side
):
aug_sources = []
aug_targets = []
for i in range(num_copies):
# 拡張処理を実行
aug_source, aug_target = process_image_pair(
source_img,
target_img,
output_size,
is_flip,
rotation_range,
min_scale,
max_scale,
source_is_avg_color_fill,
source_is_edge_mode_fill,
target_is_avg_color_fill,
target_is_edge_mode_fill,
expand_to_long_side
)
aug_sources.append(aug_source)
aug_targets.append(aug_target)
return aug_sources, aug_targets
if __name__ == '__main__':
args = argparse.ArgumentParser()
args.add_argument('source_folder', type=str, help='source画像フォルダのパス / Path to source image folder')
args.add_argument('target_folder', type=str, help='target画像フォルダのパス / Path to target image folder')
args.add_argument('output_folder', type=str, help='出力先フォルダのパス / Path to output folder')
args.add_argument('--output_size', '-s', type=int, default=1024,
help='出力画像の一辺のサイズ / Output image size')
args.add_argument('--num_copies', '-n', type=int, default=1,
help='出力画像の枚数 / Number of augmented copies')
args.add_argument('--is_flip', '-f', type=bool, default=True,
help='フリップを適用するかどうか / Whether to apply random flip')
args.add_argument('--rotation_range', '-r', type=int, default=0,
help='回転角度の範囲 / Range of rotation angle')
args.add_argument('--min_scale', '--ms', type=float, default=1.0,
help='最小の画像サイズ / Minimum scale of the image')
args.add_argument('--max_scale', '--xs', type=float, default=1.0,
help='最大の画像サイズ / Maximum scale of the image')
args.add_argument('--source_is_avg_color_fill', '--sa', type=bool, default=True,
help='source画像を平均色で余白を埋めるかどうか / Whether to fill source image padding with average color')
args.add_argument('--source_is_edge_mode_fill', '--se', type=bool, default=False,
help='source画像を外周の最頻値で余白を埋めるかどうか / Whether to fill source image padding with edge mode color')
args.add_argument('--target_is_avg_color_fill', '--ta', type=bool, default=False,
help='target画像を平均色で余白を埋めるかどうか / Whether to fill target image padding with average color')
args.add_argument('--target_is_edge_mode_fill', '--te', type=bool, default=False,
help='target画像を外周の最頻値で余白を埋めるかどうか / Whether to fill target image padding with edge mode color')
args.add_argument('--expand_to_long_side', '--el', type=bool, default=False,
help='長辺まで拡張して正方形にするかどうか / Whether to expand the image to a square using the long side')
args = args.parse_args()
output_path_source = os.path.join(args.output_folder, 'aug_source')
output_path_target = os.path.join(args.output_folder, 'aug_target')
# 画像処理
for image_name in tqdm(os.listdir(args.source_folder)):
if image_name.endswith('.jpg'):
source_path = os.path.join(args.source_folder, image_name)
target_path = os.path.join(args.target_folder, image_name)
process_images(
source_path,
target_path,
args.num_copies,
(args.output_size, args.output_size),
args.is_flip,
args.rotation_range,
args.min_scale,
args.max_scale,
args.source_is_avg_color_fill,
args.source_is_edge_mode_fill,
args.target_is_avg_color_fill,
args.target_is_edge_mode_fill,
args.expand_to_long_side
)