Spaces:

yeq6x
/

pair-images-aug-test

Sleeping

App Files Files Community

pair-images-aug-test / dataset_aug.py

yeq6x

req

e317ed3 6 months ago

raw

history blame contribute delete

11.5 kB

	"""
	画像の拡張処理を行うための関数群を提供します。

	1. 画像の平均色を計算する関数
	2. 画像を指定された角度で回転させ、平均色で余白を埋める関数
	3. 回転した画像から最大の長方形を切り出す関数
	4. ランダムな正方形を切り出す関数
	"""

	import os
	from PIL import Image, ImageStat, ImageOps
	from collections import Counter
	import random
	import math
	from tqdm import tqdm
	import argparse

	def get_average_color(image):
	"""画像の平均色を取得"""
	# RGBAの場合はRGBに変換
	if image.mode == 'RGBA':
	image = image.convert('RGB')

	stat = ImageStat.Stat(image)
	r, g, b = map(int, stat.mean)
	return (r, g, b)

	def get_edge_mode_color(img, edge_width=10):
	"""画像の外周の最頻値（mode）を取得する"""
	# 外周の10ピクセル領域を取得
	left = img.crop((0, 0, edge_width, img.height)) # 左端
	right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端
	top = img.crop((0, 0, img.width, edge_width)) # 上端
	bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端

	# 各領域のピクセルデータを取得して結合
	colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata())
	# 最頻値（mode）を計算
	mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得

	return mode_color

	def rotate_image(image, angle, fill_color=(255, 255, 255)):
	"""画像を指定された角度で回転させ、指定された色で余白を埋める"""
	return image.rotate(angle, expand=True, fillcolor=fill_color)

	def crop_square(cropped_rect_image, left, top, crop_size):
	"""ランダムな正方形を切り出す"""
	return cropped_rect_image.crop((left, top, left + crop_size, top + crop_size))

	def apply_random_flip(image, is_horizontal):
	"""画像にランダムなフリップ（水平または垂直）を適用する"""
	if is_horizontal:
	return ImageOps.mirror(image) # 水平フリップ
	return image

	def process_image_pair(
	source_image,
	target_image,
	output_size=(1024, 1024),
	is_flip=False,
	rotation_range=40,
	min_scale=0.6,
	max_scale=1.2,
	source_is_avg_color_fill=False,
	source_is_edge_mode_fill=False,
	target_is_avg_color_fill=True,
	target_is_edge_mode_fill=False,
	expand_to_long_side=False
	):
	"""1組の画像に対して拡張処理を行う"""
	orig_source_width, orig_source_height = source_image.size
	orig_target_width, orig_target_height = target_image.size

	# ソース画像の余白の色を決定
	if source_is_edge_mode_fill:
	source_fill_color = get_edge_mode_color(source_image, edge_width=10)
	elif source_is_avg_color_fill:
	source_fill_color = get_average_color(source_image)
	else:
	source_fill_color = (255, 255, 255)

	# ターゲット画像の余白の色を決定
	if target_is_edge_mode_fill:
	target_fill_color = get_edge_mode_color(target_image, edge_width=10)
	elif target_is_avg_color_fill:
	target_fill_color = get_average_color(target_image)
	else:
	target_fill_color = (255, 255, 255)

	base_source = source_image
	base_target = target_image

	# 長辺を基準にする場合の処理を追加
	if expand_to_long_side:
	# sourceの長辺を取得して正方形のキャンバスを作成
	source_long_side = max(base_source.width, base_source.height)
	source_canvas = Image.new("RGB", (source_long_side, source_long_side), source_fill_color)
	# 中央に配置
	source_paste_x = (source_long_side - base_source.width) // 2
	source_paste_y = (source_long_side - base_source.height) // 2
	source_canvas.paste(base_source, (source_paste_x, source_paste_y))
	base_source = source_canvas

	# targetも同様に処理
	target_long_side = max(base_target.width, base_target.height)
	target_canvas = Image.new("RGB", (target_long_side, target_long_side), target_fill_color)
	target_paste_x = (target_long_side - base_target.width) // 2
	target_paste_y = (target_long_side - base_target.height) // 2
	target_canvas.paste(base_target, (target_paste_x, target_paste_y))
	base_target = target_canvas

	if rotation_range > 0:
	angle = random.uniform(-rotation_range, rotation_range)
	rotated_source = rotate_image(source_image, angle, source_fill_color)
	rotated_target = rotate_image(target_image, angle, target_fill_color)
	base_source = rotated_source
	base_target = rotated_target

	if is_flip:
	is_horizontal = random.choice([True, False])
	flipped_source = apply_random_flip(base_source, is_horizontal)
	flipped_target = apply_random_flip(base_target, is_horizontal)
	base_source = flipped_source
	base_target = flipped_target

	scale = random.uniform(min_scale, max_scale)
	canvas_scale = 1/scale

	if canvas_scale > 1.0:
	# 新規画像(canvas)を作成し中心に画像を配置
	scaled_source = Image.new("RGB", (int(base_source.widthcanvas_scale), int(base_source.heightcanvas_scale)), source_fill_color)
	scaled_target = Image.new("RGB", (int(base_target.widthcanvas_scale), int(base_target.heightcanvas_scale)), target_fill_color)
	scaled_source.paste(base_source, (int((scaled_source.width-base_source.width)/2), int((scaled_source.height-base_source.height)/2)))
	scaled_target.paste(base_target, (int((scaled_target.width-base_target.width)/2), int((scaled_target.height-base_target.height)/2)))
	else:
	scaled_source = base_source
	scaled_target = base_target

	base_source_width, base_source_height = base_source.size
	base_source_max_square_size = min(base_source_height, base_source_width)
	crop_source_size = int(base_source_max_square_size * canvas_scale)

	base_target_width, base_target_height = base_target.size
	base_target_max_square_size = min(base_target_height, base_target_width)
	crop_target_size = int(base_target_max_square_size * canvas_scale)

	scaled_source_width, scaled_source_height = scaled_source.size
	left_source = random.randint(0, scaled_source_width - crop_source_size)
	top_source = random.randint(0, scaled_source_height - crop_source_size)

	# sourceとtargetの位置合わせ. この場合、sourceとtargetのアスペクト比は同じと仮定
	left_target = left_source * orig_target_width // orig_source_width
	top_target = top_source * orig_target_height // orig_source_height

	final_source = crop_square(scaled_source, left_source, top_source, crop_source_size).resize(output_size)
	final_target = crop_square(scaled_target, left_target, top_target, crop_target_size).resize(output_size)

	return final_source, final_target

	def process_images(
	source_img,
	target_img,
	num_copies,
	output_size,
	is_flip,
	rotation_range,
	min_scale,
	max_scale,
	source_is_avg_color_fill,
	source_is_edge_mode_fill,
	target_is_avg_color_fill,
	target_is_edge_mode_fill,
	expand_to_long_side
	):
	aug_sources = []
	aug_targets = []

	for i in range(num_copies):
	# 拡張処理を実行
	aug_source, aug_target = process_image_pair(
	source_img,
	target_img,
	output_size,
	is_flip,
	rotation_range,
	min_scale,
	max_scale,
	source_is_avg_color_fill,
	source_is_edge_mode_fill,
	target_is_avg_color_fill,
	target_is_edge_mode_fill,
	expand_to_long_side
	)

	aug_sources.append(aug_source)
	aug_targets.append(aug_target)

	return aug_sources, aug_targets

	if __name__ == '__main__':
	args = argparse.ArgumentParser()
	args.add_argument('source_folder', type=str, help='source画像フォルダのパス / Path to source image folder')
	args.add_argument('target_folder', type=str, help='target画像フォルダのパス / Path to target image folder')
	args.add_argument('output_folder', type=str, help='出力先フォルダのパス / Path to output folder')
	args.add_argument('--output_size', '-s', type=int, default=1024,
	help='出力画像の一辺のサイズ / Output image size')
	args.add_argument('--num_copies', '-n', type=int, default=1,
	help='出力画像の枚数 / Number of augmented copies')
	args.add_argument('--is_flip', '-f', type=bool, default=True,
	help='フリップを適用するかどうか / Whether to apply random flip')
	args.add_argument('--rotation_range', '-r', type=int, default=0,
	help='回転角度の範囲 / Range of rotation angle')
	args.add_argument('--min_scale', '--ms', type=float, default=1.0,
	help='最小の画像サイズ / Minimum scale of the image')
	args.add_argument('--max_scale', '--xs', type=float, default=1.0,
	help='最大の画像サイズ / Maximum scale of the image')
	args.add_argument('--source_is_avg_color_fill', '--sa', type=bool, default=True,
	help='source画像を平均色で余白を埋めるかどうか / Whether to fill source image padding with average color')
	args.add_argument('--source_is_edge_mode_fill', '--se', type=bool, default=False,
	help='source画像を外周の最頻値で余白を埋めるかどうか / Whether to fill source image padding with edge mode color')
	args.add_argument('--target_is_avg_color_fill', '--ta', type=bool, default=False,
	help='target画像を平均色で余白を埋めるかどうか / Whether to fill target image padding with average color')
	args.add_argument('--target_is_edge_mode_fill', '--te', type=bool, default=False,
	help='target画像を外周の最頻値で余白を埋めるかどうか / Whether to fill target image padding with edge mode color')
	args.add_argument('--expand_to_long_side', '--el', type=bool, default=False,
	help='長辺まで拡張して正方形にするかどうか / Whether to expand the image to a square using the long side')
	args = args.parse_args()

	output_path_source = os.path.join(args.output_folder, 'aug_source')
	output_path_target = os.path.join(args.output_folder, 'aug_target')

	# 画像処理
	for image_name in tqdm(os.listdir(args.source_folder)):
	if image_name.endswith('.jpg'):
	source_path = os.path.join(args.source_folder, image_name)
	target_path = os.path.join(args.target_folder, image_name)

	process_images(
	source_path,
	target_path,
	args.num_copies,
	(args.output_size, args.output_size),
	args.is_flip,
	args.rotation_range,
	args.min_scale,
	args.max_scale,
	args.source_is_avg_color_fill,
	args.source_is_edge_mode_fill,
	args.target_is_avg_color_fill,
	args.target_is_edge_mode_fill,
	args.expand_to_long_side
	)