Spaces:
Configuration error
Configuration error
import math | |
import random | |
from enum import Enum | |
from typing import Dict, Optional, Sequence, Tuple, Union | |
import cv2 | |
import numpy as np | |
import skimage.transform | |
from custom_albumentations.core.bbox_utils import denormalize_bbox, normalize_bbox | |
from ... import random_utils | |
from ...core.transforms_interface import ( | |
BoxInternalType, | |
DualTransform, | |
ImageColorType, | |
KeypointInternalType, | |
ScaleFloatType, | |
to_tuple, | |
) | |
from ..functional import bbox_from_mask | |
from . import functional as F | |
__all__ = [ | |
"ShiftScaleRotate", | |
"ElasticTransform", | |
"Perspective", | |
"Affine", | |
"PiecewiseAffine", | |
"VerticalFlip", | |
"HorizontalFlip", | |
"Flip", | |
"Transpose", | |
"OpticalDistortion", | |
"GridDistortion", | |
"PadIfNeeded", | |
] | |
class ShiftScaleRotate(DualTransform): | |
"""Randomly apply affine transforms: translate, scale and rotate the input. | |
Args: | |
shift_limit ((float, float) or float): shift factor range for both height and width. If shift_limit | |
is a single float value, the range will be (-shift_limit, shift_limit). Absolute values for lower and | |
upper bounds should lie in range [0, 1]. Default: (-0.0625, 0.0625). | |
scale_limit ((float, float) or float): scaling factor range. If scale_limit is a single float value, the | |
range will be (-scale_limit, scale_limit). Note that the scale_limit will be biased by 1. | |
If scale_limit is a tuple, like (low, high), sampling will be done from the range (1 + low, 1 + high). | |
Default: (-0.1, 0.1). | |
rotate_limit ((int, int) or int): rotation range. If rotate_limit is a single int value, the | |
range will be (-rotate_limit, rotate_limit). Default: (-45, 45). | |
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of: | |
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. | |
Default: cv2.INTER_LINEAR. | |
border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of: | |
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. | |
Default: cv2.BORDER_REFLECT_101 | |
value (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
mask_value (int, float, | |
list of int, | |
list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks. | |
shift_limit_x ((float, float) or float): shift factor range for width. If it is set then this value | |
instead of shift_limit will be used for shifting width. If shift_limit_x is a single float value, | |
the range will be (-shift_limit_x, shift_limit_x). Absolute values for lower and upper bounds should lie in | |
the range [0, 1]. Default: None. | |
shift_limit_y ((float, float) or float): shift factor range for height. If it is set then this value | |
instead of shift_limit will be used for shifting height. If shift_limit_y is a single float value, | |
the range will be (-shift_limit_y, shift_limit_y). Absolute values for lower and upper bounds should lie | |
in the range [0, 1]. Default: None. | |
rotate_method (str): rotation method used for the bounding boxes. Should be one of "largest_box" or "ellipse". | |
Default: "largest_box" | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
shift_limit=0.0625, | |
scale_limit=0.1, | |
rotate_limit=45, | |
interpolation=cv2.INTER_LINEAR, | |
border_mode=cv2.BORDER_REFLECT_101, | |
value=None, | |
mask_value=None, | |
shift_limit_x=None, | |
shift_limit_y=None, | |
rotate_method="largest_box", | |
always_apply=False, | |
p=0.5, | |
): | |
super(ShiftScaleRotate, self).__init__(always_apply, p) | |
self.shift_limit_x = to_tuple(shift_limit_x if shift_limit_x is not None else shift_limit) | |
self.shift_limit_y = to_tuple(shift_limit_y if shift_limit_y is not None else shift_limit) | |
self.scale_limit = to_tuple(scale_limit, bias=1.0) | |
self.rotate_limit = to_tuple(rotate_limit) | |
self.interpolation = interpolation | |
self.border_mode = border_mode | |
self.value = value | |
self.mask_value = mask_value | |
self.rotate_method = rotate_method | |
if self.rotate_method not in ["largest_box", "ellipse"]: | |
raise ValueError(f"Rotation method {self.rotate_method} is not valid.") | |
def apply(self, img, angle=0, scale=0, dx=0, dy=0, interpolation=cv2.INTER_LINEAR, **params): | |
return F.shift_scale_rotate(img, angle, scale, dx, dy, interpolation, self.border_mode, self.value) | |
def apply_to_mask(self, img, angle=0, scale=0, dx=0, dy=0, **params): | |
return F.shift_scale_rotate(img, angle, scale, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value) | |
def apply_to_keypoint(self, keypoint, angle=0, scale=0, dx=0, dy=0, rows=0, cols=0, **params): | |
return F.keypoint_shift_scale_rotate(keypoint, angle, scale, dx, dy, rows, cols) | |
def get_params(self): | |
return { | |
"angle": random.uniform(self.rotate_limit[0], self.rotate_limit[1]), | |
"scale": random.uniform(self.scale_limit[0], self.scale_limit[1]), | |
"dx": random.uniform(self.shift_limit_x[0], self.shift_limit_x[1]), | |
"dy": random.uniform(self.shift_limit_y[0], self.shift_limit_y[1]), | |
} | |
def apply_to_bbox(self, bbox, angle, scale, dx, dy, **params): | |
return F.bbox_shift_scale_rotate(bbox, angle, scale, dx, dy, self.rotate_method, **params) | |
def get_transform_init_args(self): | |
return { | |
"shift_limit_x": self.shift_limit_x, | |
"shift_limit_y": self.shift_limit_y, | |
"scale_limit": to_tuple(self.scale_limit, bias=-1.0), | |
"rotate_limit": self.rotate_limit, | |
"interpolation": self.interpolation, | |
"border_mode": self.border_mode, | |
"value": self.value, | |
"mask_value": self.mask_value, | |
"rotate_method": self.rotate_method, | |
} | |
class ElasticTransform(DualTransform): | |
"""Elastic deformation of images as described in [Simard2003]_ (with modifications). | |
Based on https://gist.github.com/ernestum/601cdf56d2b424757de5 | |
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for | |
Convolutional Neural Networks applied to Visual Document Analysis", in | |
Proc. of the International Conference on Document Analysis and | |
Recognition, 2003. | |
Args: | |
alpha (float): | |
sigma (float): Gaussian filter parameter. | |
alpha_affine (float): The range will be (-alpha_affine, alpha_affine) | |
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of: | |
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. | |
Default: cv2.INTER_LINEAR. | |
border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of: | |
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. | |
Default: cv2.BORDER_REFLECT_101 | |
value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
mask_value (int, float, | |
list of ints, | |
list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks. | |
approximate (boolean): Whether to smooth displacement map with fixed kernel size. | |
Enabling this option gives ~2X speedup on large images. | |
same_dxdy (boolean): Whether to use same random generated shift for x and y. | |
Enabling this option gives ~2X speedup. | |
Targets: | |
image, mask, bbox | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
alpha=1, | |
sigma=50, | |
alpha_affine=50, | |
interpolation=cv2.INTER_LINEAR, | |
border_mode=cv2.BORDER_REFLECT_101, | |
value=None, | |
mask_value=None, | |
always_apply=False, | |
approximate=False, | |
same_dxdy=False, | |
p=0.5, | |
): | |
super(ElasticTransform, self).__init__(always_apply, p) | |
self.alpha = alpha | |
self.alpha_affine = alpha_affine | |
self.sigma = sigma | |
self.interpolation = interpolation | |
self.border_mode = border_mode | |
self.value = value | |
self.mask_value = mask_value | |
self.approximate = approximate | |
self.same_dxdy = same_dxdy | |
def apply(self, img, random_state=None, interpolation=cv2.INTER_LINEAR, **params): | |
return F.elastic_transform( | |
img, | |
self.alpha, | |
self.sigma, | |
self.alpha_affine, | |
interpolation, | |
self.border_mode, | |
self.value, | |
np.random.RandomState(random_state), | |
self.approximate, | |
self.same_dxdy, | |
) | |
def apply_to_mask(self, img, random_state=None, **params): | |
return F.elastic_transform( | |
img, | |
self.alpha, | |
self.sigma, | |
self.alpha_affine, | |
cv2.INTER_NEAREST, | |
self.border_mode, | |
self.mask_value, | |
np.random.RandomState(random_state), | |
self.approximate, | |
self.same_dxdy, | |
) | |
def apply_to_bbox(self, bbox, random_state=None, **params): | |
rows, cols = params["rows"], params["cols"] | |
mask = np.zeros((rows, cols), dtype=np.uint8) | |
bbox_denorm = F.denormalize_bbox(bbox, rows, cols) | |
x_min, y_min, x_max, y_max = bbox_denorm[:4] | |
x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max) | |
mask[y_min:y_max, x_min:x_max] = 1 | |
mask = F.elastic_transform( | |
mask, | |
self.alpha, | |
self.sigma, | |
self.alpha_affine, | |
cv2.INTER_NEAREST, | |
self.border_mode, | |
self.mask_value, | |
np.random.RandomState(random_state), | |
self.approximate, | |
) | |
bbox_returned = bbox_from_mask(mask) | |
bbox_returned = F.normalize_bbox(bbox_returned, rows, cols) | |
return bbox_returned | |
def get_params(self): | |
return {"random_state": random.randint(0, 10000)} | |
def get_transform_init_args_names(self): | |
return ( | |
"alpha", | |
"sigma", | |
"alpha_affine", | |
"interpolation", | |
"border_mode", | |
"value", | |
"mask_value", | |
"approximate", | |
"same_dxdy", | |
) | |
class Perspective(DualTransform): | |
"""Perform a random four point perspective transform of the input. | |
Args: | |
scale (float or (float, float)): standard deviation of the normal distributions. These are used to sample | |
the random distances of the subimage's corners from the full image's corners. | |
If scale is a single float value, the range will be (0, scale). Default: (0.05, 0.1). | |
keep_size (bool): Whether to resize image’s back to their original size after applying the perspective | |
transform. If set to False, the resulting images may end up having different shapes | |
and will always be a list, never an array. Default: True | |
pad_mode (OpenCV flag): OpenCV border mode. | |
pad_val (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
Default: 0 | |
mask_pad_val (int, float, list of int, list of float): padding value for mask | |
if border_mode is cv2.BORDER_CONSTANT. Default: 0 | |
fit_output (bool): If True, the image plane size and position will be adjusted to still capture | |
the whole image after perspective transformation. (Followed by image resizing if keep_size is set to True.) | |
Otherwise, parts of the transformed image may be outside of the image plane. | |
This setting should not be set to True when using large scale values as it could lead to very large images. | |
Default: False | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, keypoints, bboxes | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
scale=(0.05, 0.1), | |
keep_size=True, | |
pad_mode=cv2.BORDER_CONSTANT, | |
pad_val=0, | |
mask_pad_val=0, | |
fit_output=False, | |
interpolation=cv2.INTER_LINEAR, | |
always_apply=False, | |
p=0.5, | |
): | |
super().__init__(always_apply, p) | |
self.scale = to_tuple(scale, 0) | |
self.keep_size = keep_size | |
self.pad_mode = pad_mode | |
self.pad_val = pad_val | |
self.mask_pad_val = mask_pad_val | |
self.fit_output = fit_output | |
self.interpolation = interpolation | |
def apply(self, img, matrix=None, max_height=None, max_width=None, **params): | |
return F.perspective( | |
img, matrix, max_width, max_height, self.pad_val, self.pad_mode, self.keep_size, params["interpolation"] | |
) | |
def apply_to_bbox(self, bbox, matrix=None, max_height=None, max_width=None, **params): | |
return F.perspective_bbox(bbox, params["rows"], params["cols"], matrix, max_width, max_height, self.keep_size) | |
def apply_to_keypoint(self, keypoint, matrix=None, max_height=None, max_width=None, **params): | |
return F.perspective_keypoint( | |
keypoint, params["rows"], params["cols"], matrix, max_width, max_height, self.keep_size | |
) | |
def targets_as_params(self): | |
return ["image"] | |
def get_params_dependent_on_targets(self, params): | |
h, w = params["image"].shape[:2] | |
scale = random_utils.uniform(*self.scale) | |
points = random_utils.normal(0, scale, [4, 2]) | |
points = np.mod(np.abs(points), 0.32) | |
# top left -- no changes needed, just use jitter | |
# top right | |
points[1, 0] = 1.0 - points[1, 0] # w = 1.0 - jitter | |
# bottom right | |
points[2] = 1.0 - points[2] # w = 1.0 - jitt | |
# bottom left | |
points[3, 1] = 1.0 - points[3, 1] # h = 1.0 - jitter | |
points[:, 0] *= w | |
points[:, 1] *= h | |
# Obtain a consistent order of the points and unpack them individually. | |
# Warning: don't just do (tl, tr, br, bl) = _order_points(...) | |
# here, because the reordered points is used further below. | |
points = self._order_points(points) | |
tl, tr, br, bl = points | |
# compute the width of the new image, which will be the | |
# maximum distance between bottom-right and bottom-left | |
# x-coordiates or the top-right and top-left x-coordinates | |
min_width = None | |
max_width = None | |
while min_width is None or min_width < 2: | |
width_top = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | |
width_bottom = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | |
max_width = int(max(width_top, width_bottom)) | |
min_width = int(min(width_top, width_bottom)) | |
if min_width < 2: | |
step_size = (2 - min_width) / 2 | |
tl[0] -= step_size | |
tr[0] += step_size | |
bl[0] -= step_size | |
br[0] += step_size | |
# compute the height of the new image, which will be the maximum distance between the top-right | |
# and bottom-right y-coordinates or the top-left and bottom-left y-coordinates | |
min_height = None | |
max_height = None | |
while min_height is None or min_height < 2: | |
height_right = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | |
height_left = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | |
max_height = int(max(height_right, height_left)) | |
min_height = int(min(height_right, height_left)) | |
if min_height < 2: | |
step_size = (2 - min_height) / 2 | |
tl[1] -= step_size | |
tr[1] -= step_size | |
bl[1] += step_size | |
br[1] += step_size | |
# now that we have the dimensions of the new image, construct | |
# the set of destination points to obtain a "birds eye view", | |
# (i.e. top-down view) of the image, again specifying points | |
# in the top-left, top-right, bottom-right, and bottom-left order | |
# do not use width-1 or height-1 here, as for e.g. width=3, height=2 | |
# the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0) | |
dst = np.array([[0, 0], [max_width, 0], [max_width, max_height], [0, max_height]], dtype=np.float32) | |
# compute the perspective transform matrix and then apply it | |
m = cv2.getPerspectiveTransform(points, dst) | |
if self.fit_output: | |
m, max_width, max_height = self._expand_transform(m, (h, w)) | |
return {"matrix": m, "max_height": max_height, "max_width": max_width, "interpolation": self.interpolation} | |
def _expand_transform(cls, matrix, shape): | |
height, width = shape | |
# do not use width-1 or height-1 here, as for e.g. width=3, height=2, max_height | |
# the bottom right coordinate is at (3.0, 2.0) and not (2.0, 1.0) | |
rect = np.array([[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32) | |
dst = cv2.perspectiveTransform(np.array([rect]), matrix)[0] | |
# get min x, y over transformed 4 points | |
# then modify target points by subtracting these minima => shift to (0, 0) | |
dst -= dst.min(axis=0, keepdims=True) | |
dst = np.around(dst, decimals=0) | |
matrix_expanded = cv2.getPerspectiveTransform(rect, dst) | |
max_width, max_height = dst.max(axis=0) | |
return matrix_expanded, int(max_width), int(max_height) | |
def _order_points(pts: np.ndarray) -> np.ndarray: | |
pts = np.array(sorted(pts, key=lambda x: x[0])) | |
left = pts[:2] # points with smallest x coordinate - left points | |
right = pts[2:] # points with greatest x coordinate - right points | |
if left[0][1] < left[1][1]: | |
tl, bl = left | |
else: | |
bl, tl = left | |
if right[0][1] < right[1][1]: | |
tr, br = right | |
else: | |
br, tr = right | |
return np.array([tl, tr, br, bl], dtype=np.float32) | |
def get_transform_init_args_names(self): | |
return "scale", "keep_size", "pad_mode", "pad_val", "mask_pad_val", "fit_output", "interpolation" | |
class Affine(DualTransform): | |
"""Augmentation to apply affine transformations to images. | |
This is mostly a wrapper around the corresponding classes and functions in OpenCV. | |
Affine transformations involve: | |
- Translation ("move" image on the x-/y-axis) | |
- Rotation | |
- Scaling ("zoom" in/out) | |
- Shear (move one side of the image, turning a square into a trapezoid) | |
All such transformations can create "new" pixels in the image without a defined content, e.g. | |
if the image is translated to the left, pixels are created on the right. | |
A method has to be defined to deal with these pixel values. | |
The parameters `cval` and `mode` of this class deal with this. | |
Some transformations involve interpolations between several pixels | |
of the input image to generate output pixel values. The parameters `interpolation` and | |
`mask_interpolation` deals with the method of interpolation used for this. | |
Args: | |
scale (number, tuple of number or dict): Scaling factor to use, where ``1.0`` denotes "no change" and | |
``0.5`` is zoomed out to ``50`` percent of the original size. | |
* If a single number, then that value will be used for all images. | |
* If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]``. | |
That the same range will be used for both x- and y-axis. To keep the aspect ratio, set | |
``keep_ratio=True``, then the same value will be used for both x- and y-axis. | |
* If a dictionary, then it is expected to have the keys ``x`` and/or ``y``. | |
Each of these keys can have the same values as described above. | |
Using a dictionary allows to set different values for the two axis and sampling will then happen | |
*independently* per axis, resulting in samples that differ between the axes. Note that when | |
the ``keep_ratio=True``, the x- and y-axis ranges should be the same. | |
translate_percent (None, number, tuple of number or dict): Translation as a fraction of the image height/width | |
(x-translation, y-translation), where ``0`` denotes "no change" | |
and ``0.5`` denotes "half of the axis size". | |
* If ``None`` then equivalent to ``0.0`` unless `translate_px` has a value other than ``None``. | |
* If a single number, then that value will be used for all images. | |
* If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]``. | |
That sampled fraction value will be used identically for both x- and y-axis. | |
* If a dictionary, then it is expected to have the keys ``x`` and/or ``y``. | |
Each of these keys can have the same values as described above. | |
Using a dictionary allows to set different values for the two axis and sampling will then happen | |
*independently* per axis, resulting in samples that differ between the axes. | |
translate_px (None, int, tuple of int or dict): Translation in pixels. | |
* If ``None`` then equivalent to ``0`` unless `translate_percent` has a value other than ``None``. | |
* If a single int, then that value will be used for all images. | |
* If a tuple ``(a, b)``, then a value will be uniformly sampled per image from | |
the discrete interval ``[a..b]``. That number will be used identically for both x- and y-axis. | |
* If a dictionary, then it is expected to have the keys ``x`` and/or ``y``. | |
Each of these keys can have the same values as described above. | |
Using a dictionary allows to set different values for the two axis and sampling will then happen | |
*independently* per axis, resulting in samples that differ between the axes. | |
rotate (number or tuple of number): Rotation in degrees (**NOT** radians), i.e. expected value range is | |
around ``[-360, 360]``. Rotation happens around the *center* of the image, | |
not the top left corner as in some other frameworks. | |
* If a number, then that value will be used for all images. | |
* If a tuple ``(a, b)``, then a value will be uniformly sampled per image from the interval ``[a, b]`` | |
and used as the rotation value. | |
shear (number, tuple of number or dict): Shear in degrees (**NOT** radians), i.e. expected value range is | |
around ``[-360, 360]``, with reasonable values being in the range of ``[-45, 45]``. | |
* If a number, then that value will be used for all images as | |
the shear on the x-axis (no shear on the y-axis will be done). | |
* If a tuple ``(a, b)``, then two value will be uniformly sampled per image | |
from the interval ``[a, b]`` and be used as the x- and y-shear value. | |
* If a dictionary, then it is expected to have the keys ``x`` and/or ``y``. | |
Each of these keys can have the same values as described above. | |
Using a dictionary allows to set different values for the two axis and sampling will then happen | |
*independently* per axis, resulting in samples that differ between the axes. | |
interpolation (int): OpenCV interpolation flag. | |
mask_interpolation (int): OpenCV interpolation flag. | |
cval (number or sequence of number): The constant value to use when filling in newly created pixels. | |
(E.g. translating by 1px to the right will create a new 1px-wide column of pixels | |
on the left of the image). | |
The value is only used when `mode=constant`. The expected value range is ``[0, 255]`` for ``uint8`` images. | |
cval_mask (number or tuple of number): Same as cval but only for masks. | |
mode (int): OpenCV border flag. | |
fit_output (bool): If True, the image plane size and position will be adjusted to tightly capture | |
the whole image after affine transformation (`translate_percent` and `translate_px` are ignored). | |
Otherwise (``False``), parts of the transformed image may end up outside the image plane. | |
Fitting the output shape can be useful to avoid corners of the image being outside the image plane | |
after applying rotations. Default: False | |
keep_ratio (bool): When True, the original aspect ratio will be kept when the random scale is applied. | |
Default: False. | |
rotate_method (str): rotation method used for the bounding boxes. Should be one of "largest_box" or | |
"ellipse"[1]. | |
Default: "largest_box" | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, keypoints, bboxes | |
Image types: | |
uint8, float32 | |
Reference: | |
[1] https://arxiv.org/abs/2109.13488 | |
""" | |
def __init__( | |
self, | |
scale: Optional[Union[float, Sequence[float], dict]] = None, | |
translate_percent: Optional[Union[float, Sequence[float], dict]] = None, | |
translate_px: Optional[Union[int, Sequence[int], dict]] = None, | |
rotate: Optional[Union[float, Sequence[float]]] = None, | |
shear: Optional[Union[float, Sequence[float], dict]] = None, | |
interpolation: int = cv2.INTER_LINEAR, | |
mask_interpolation: int = cv2.INTER_NEAREST, | |
cval: Union[int, float, Sequence[int], Sequence[float]] = 0, | |
cval_mask: Union[int, float, Sequence[int], Sequence[float]] = 0, | |
mode: int = cv2.BORDER_CONSTANT, | |
fit_output: bool = False, | |
keep_ratio: bool = False, | |
rotate_method: str = "largest_box", | |
always_apply: bool = False, | |
p: float = 0.5, | |
): | |
super().__init__(always_apply=always_apply, p=p) | |
params = [scale, translate_percent, translate_px, rotate, shear] | |
if all([p is None for p in params]): | |
scale = {"x": (0.9, 1.1), "y": (0.9, 1.1)} | |
translate_percent = {"x": (-0.1, 0.1), "y": (-0.1, 0.1)} | |
rotate = (-15, 15) | |
shear = {"x": (-10, 10), "y": (-10, 10)} | |
else: | |
scale = scale if scale is not None else 1.0 | |
rotate = rotate if rotate is not None else 0.0 | |
shear = shear if shear is not None else 0.0 | |
self.interpolation = interpolation | |
self.mask_interpolation = mask_interpolation | |
self.cval = cval | |
self.cval_mask = cval_mask | |
self.mode = mode | |
self.scale = self._handle_dict_arg(scale, "scale") | |
self.translate_percent, self.translate_px = self._handle_translate_arg(translate_px, translate_percent) | |
self.rotate = to_tuple(rotate, rotate) | |
self.fit_output = fit_output | |
self.shear = self._handle_dict_arg(shear, "shear") | |
self.keep_ratio = keep_ratio | |
self.rotate_method = rotate_method | |
if self.keep_ratio and self.scale["x"] != self.scale["y"]: | |
raise ValueError( | |
"When keep_ratio is True, the x and y scale range should be identical. got {}".format(self.scale) | |
) | |
def get_transform_init_args_names(self): | |
return ( | |
"interpolation", | |
"mask_interpolation", | |
"cval", | |
"mode", | |
"scale", | |
"translate_percent", | |
"translate_px", | |
"rotate", | |
"fit_output", | |
"shear", | |
"cval_mask", | |
"keep_ratio", | |
"rotate_method", | |
) | |
def _handle_dict_arg(val: Union[float, Sequence[float], dict], name: str, default: float = 1.0): | |
if isinstance(val, dict): | |
if "x" not in val and "y" not in val: | |
raise ValueError( | |
f'Expected {name} dictionary to contain at least key "x" or ' 'key "y". Found neither of them.' | |
) | |
x = val.get("x", default) | |
y = val.get("y", default) | |
return {"x": to_tuple(x, x), "y": to_tuple(y, y)} | |
return {"x": to_tuple(val, val), "y": to_tuple(val, val)} | |
def _handle_translate_arg( | |
cls, | |
translate_px: Optional[Union[float, Sequence[float], dict]], | |
translate_percent: Optional[Union[float, Sequence[float], dict]], | |
): | |
if translate_percent is None and translate_px is None: | |
translate_px = 0 | |
if translate_percent is not None and translate_px is not None: | |
raise ValueError( | |
"Expected either translate_percent or translate_px to be " "provided, " "but neither of them was." | |
) | |
if translate_percent is not None: | |
# translate by percent | |
return cls._handle_dict_arg(translate_percent, "translate_percent", default=0.0), translate_px | |
if translate_px is None: | |
raise ValueError("translate_px is None.") | |
# translate by pixels | |
return translate_percent, cls._handle_dict_arg(translate_px, "translate_px") | |
def apply( | |
self, | |
img: np.ndarray, | |
matrix: skimage.transform.ProjectiveTransform = None, | |
output_shape: Sequence[int] = (), | |
**params | |
) -> np.ndarray: | |
return F.warp_affine( | |
img, | |
matrix, | |
interpolation=self.interpolation, | |
cval=self.cval, | |
mode=self.mode, | |
output_shape=output_shape, | |
) | |
def apply_to_mask( | |
self, | |
img: np.ndarray, | |
matrix: skimage.transform.ProjectiveTransform = None, | |
output_shape: Sequence[int] = (), | |
**params | |
) -> np.ndarray: | |
return F.warp_affine( | |
img, | |
matrix, | |
interpolation=self.mask_interpolation, | |
cval=self.cval_mask, | |
mode=self.mode, | |
output_shape=output_shape, | |
) | |
def apply_to_bbox( | |
self, | |
bbox: BoxInternalType, | |
matrix: skimage.transform.ProjectiveTransform = None, | |
rows: int = 0, | |
cols: int = 0, | |
output_shape: Sequence[int] = (), | |
**params | |
) -> BoxInternalType: | |
return F.bbox_affine(bbox, matrix, self.rotate_method, rows, cols, output_shape) | |
def apply_to_keypoint( | |
self, | |
keypoint: KeypointInternalType, | |
matrix: Optional[skimage.transform.ProjectiveTransform] = None, | |
scale: Optional[dict] = None, | |
**params | |
) -> KeypointInternalType: | |
assert scale is not None and matrix is not None | |
return F.keypoint_affine(keypoint, matrix=matrix, scale=scale) | |
def targets_as_params(self): | |
return ["image"] | |
def get_params_dependent_on_targets(self, params: dict) -> dict: | |
h, w = params["image"].shape[:2] | |
translate: Dict[str, Union[int, float]] | |
if self.translate_px is not None: | |
translate = {key: random.randint(*value) for key, value in self.translate_px.items()} | |
elif self.translate_percent is not None: | |
translate = {key: random.uniform(*value) for key, value in self.translate_percent.items()} | |
translate["x"] = translate["x"] * w | |
translate["y"] = translate["y"] * h | |
else: | |
translate = {"x": 0, "y": 0} | |
# Look to issue https://github.com/albumentations-team/albumentations/issues/1079 | |
shear = {key: -random.uniform(*value) for key, value in self.shear.items()} | |
scale = {key: random.uniform(*value) for key, value in self.scale.items()} | |
if self.keep_ratio: | |
scale["y"] = scale["x"] | |
# Look to issue https://github.com/albumentations-team/albumentations/issues/1079 | |
rotate = -random.uniform(*self.rotate) | |
# for images we use additional shifts of (0.5, 0.5) as otherwise | |
# we get an ugly black border for 90deg rotations | |
shift_x = w / 2 - 0.5 | |
shift_y = h / 2 - 0.5 | |
matrix_to_topleft = skimage.transform.SimilarityTransform(translation=[-shift_x, -shift_y]) | |
matrix_shear_y_rot = skimage.transform.AffineTransform(rotation=-np.pi / 2) | |
matrix_shear_y = skimage.transform.AffineTransform(shear=np.deg2rad(shear["y"])) | |
matrix_shear_y_rot_inv = skimage.transform.AffineTransform(rotation=np.pi / 2) | |
matrix_transforms = skimage.transform.AffineTransform( | |
scale=(scale["x"], scale["y"]), | |
translation=(translate["x"], translate["y"]), | |
rotation=np.deg2rad(rotate), | |
shear=np.deg2rad(shear["x"]), | |
) | |
matrix_to_center = skimage.transform.SimilarityTransform(translation=[shift_x, shift_y]) | |
matrix = ( | |
matrix_to_topleft | |
+ matrix_shear_y_rot | |
+ matrix_shear_y | |
+ matrix_shear_y_rot_inv | |
+ matrix_transforms | |
+ matrix_to_center | |
) | |
if self.fit_output: | |
matrix, output_shape = self._compute_affine_warp_output_shape(matrix, params["image"].shape) | |
else: | |
output_shape = params["image"].shape | |
return { | |
"rotate": rotate, | |
"scale": scale, | |
"matrix": matrix, | |
"output_shape": output_shape, | |
} | |
def _compute_affine_warp_output_shape( | |
matrix: skimage.transform.ProjectiveTransform, input_shape: Sequence[int] | |
) -> Tuple[skimage.transform.ProjectiveTransform, Sequence[int]]: | |
height, width = input_shape[:2] | |
if height == 0 or width == 0: | |
return matrix, input_shape | |
# determine shape of output image | |
corners = np.array([[0, 0], [0, height - 1], [width - 1, height - 1], [width - 1, 0]]) | |
corners = matrix(corners) | |
minc = corners[:, 0].min() | |
minr = corners[:, 1].min() | |
maxc = corners[:, 0].max() | |
maxr = corners[:, 1].max() | |
out_height = maxr - minr + 1 | |
out_width = maxc - minc + 1 | |
if len(input_shape) == 3: | |
output_shape = np.ceil((out_height, out_width, input_shape[2])) | |
else: | |
output_shape = np.ceil((out_height, out_width)) | |
output_shape_tuple = tuple([int(v) for v in output_shape.tolist()]) | |
# fit output image in new shape | |
translation = (-minc, -minr) | |
matrix_to_fit = skimage.transform.SimilarityTransform(translation=translation) | |
matrix = matrix + matrix_to_fit | |
return matrix, output_shape_tuple | |
class PiecewiseAffine(DualTransform): | |
"""Apply affine transformations that differ between local neighbourhoods. | |
This augmentation places a regular grid of points on an image and randomly moves the neighbourhood of these point | |
around via affine transformations. This leads to local distortions. | |
This is mostly a wrapper around scikit-image's ``PiecewiseAffine``. | |
See also ``Affine`` for a similar technique. | |
Note: | |
This augmenter is very slow. Try to use ``ElasticTransformation`` instead, which is at least 10x faster. | |
Note: | |
For coordinate-based inputs (keypoints, bounding boxes, polygons, ...), | |
this augmenter still has to perform an image-based augmentation, | |
which will make it significantly slower and not fully correct for such inputs than other transforms. | |
Args: | |
scale (float, tuple of float): Each point on the regular grid is moved around via a normal distribution. | |
This scale factor is equivalent to the normal distribution's sigma. | |
Note that the jitter (how far each point is moved in which direction) is multiplied by the height/width of | |
the image if ``absolute_scale=False`` (default), so this scale can be the same for different sized images. | |
Recommended values are in the range ``0.01`` to ``0.05`` (weak to strong augmentations). | |
* If a single ``float``, then that value will always be used as the scale. | |
* If a tuple ``(a, b)`` of ``float`` s, then a random value will | |
be uniformly sampled per image from the interval ``[a, b]``. | |
nb_rows (int, tuple of int): Number of rows of points that the regular grid should have. | |
Must be at least ``2``. For large images, you might want to pick a higher value than ``4``. | |
You might have to then adjust scale to lower values. | |
* If a single ``int``, then that value will always be used as the number of rows. | |
* If a tuple ``(a, b)``, then a value from the discrete interval | |
``[a..b]`` will be uniformly sampled per image. | |
nb_cols (int, tuple of int): Number of columns. Analogous to `nb_rows`. | |
interpolation (int): The order of interpolation. The order has to be in the range 0-5: | |
- 0: Nearest-neighbor | |
- 1: Bi-linear (default) | |
- 2: Bi-quadratic | |
- 3: Bi-cubic | |
- 4: Bi-quartic | |
- 5: Bi-quintic | |
mask_interpolation (int): same as interpolation but for mask. | |
cval (number): The constant value to use when filling in newly created pixels. | |
cval_mask (number): Same as cval but only for masks. | |
mode (str): {'constant', 'edge', 'symmetric', 'reflect', 'wrap'}, optional | |
Points outside the boundaries of the input are filled according | |
to the given mode. Modes match the behaviour of `numpy.pad`. | |
absolute_scale (bool): Take `scale` as an absolute value rather than a relative value. | |
keypoints_threshold (float): Used as threshold in conversion from distance maps to keypoints. | |
The search for keypoints works by searching for the | |
argmin (non-inverted) or argmax (inverted) in each channel. This | |
parameters contains the maximum (non-inverted) or minimum (inverted) value to accept in order to view a hit | |
as a keypoint. Use ``None`` to use no min/max. Default: 0.01 | |
Targets: | |
image, mask, keypoints, bboxes | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
scale: ScaleFloatType = (0.03, 0.05), | |
nb_rows: Union[int, Sequence[int]] = 4, | |
nb_cols: Union[int, Sequence[int]] = 4, | |
interpolation: int = 1, | |
mask_interpolation: int = 0, | |
cval: int = 0, | |
cval_mask: int = 0, | |
mode: str = "constant", | |
absolute_scale: bool = False, | |
always_apply: bool = False, | |
keypoints_threshold: float = 0.01, | |
p: float = 0.5, | |
): | |
super(PiecewiseAffine, self).__init__(always_apply, p) | |
self.scale = to_tuple(scale, scale) | |
self.nb_rows = to_tuple(nb_rows, nb_rows) | |
self.nb_cols = to_tuple(nb_cols, nb_cols) | |
self.interpolation = interpolation | |
self.mask_interpolation = mask_interpolation | |
self.cval = cval | |
self.cval_mask = cval_mask | |
self.mode = mode | |
self.absolute_scale = absolute_scale | |
self.keypoints_threshold = keypoints_threshold | |
def get_transform_init_args_names(self): | |
return ( | |
"scale", | |
"nb_rows", | |
"nb_cols", | |
"interpolation", | |
"mask_interpolation", | |
"cval", | |
"cval_mask", | |
"mode", | |
"absolute_scale", | |
"keypoints_threshold", | |
) | |
def targets_as_params(self): | |
return ["image"] | |
def get_params_dependent_on_targets(self, params) -> dict: | |
h, w = params["image"].shape[:2] | |
nb_rows = np.clip(random.randint(*self.nb_rows), 2, None) | |
nb_cols = np.clip(random.randint(*self.nb_cols), 2, None) | |
nb_cells = nb_cols * nb_rows | |
scale = random.uniform(*self.scale) | |
jitter: np.ndarray = random_utils.normal(0, scale, (nb_cells, 2)) | |
if not np.any(jitter > 0): | |
for i in range(10): # See: https://github.com/albumentations-team/albumentations/issues/1442 | |
jitter = random_utils.normal(0, scale, (nb_cells, 2)) | |
if np.any(jitter > 0): | |
break | |
if not np.any(jitter > 0): | |
return {"matrix": None} | |
y = np.linspace(0, h, nb_rows) | |
x = np.linspace(0, w, nb_cols) | |
# (H, W) and (H, W) for H=rows, W=cols | |
xx_src, yy_src = np.meshgrid(x, y) | |
# (1, HW, 2) => (HW, 2) for H=rows, W=cols | |
points_src = np.dstack([yy_src.flat, xx_src.flat])[0] | |
if self.absolute_scale: | |
jitter[:, 0] = jitter[:, 0] / h if h > 0 else 0.0 | |
jitter[:, 1] = jitter[:, 1] / w if w > 0 else 0.0 | |
jitter[:, 0] = jitter[:, 0] * h | |
jitter[:, 1] = jitter[:, 1] * w | |
points_dest = np.copy(points_src) | |
points_dest[:, 0] = points_dest[:, 0] + jitter[:, 0] | |
points_dest[:, 1] = points_dest[:, 1] + jitter[:, 1] | |
# Restrict all destination points to be inside the image plane. | |
# This is necessary, as otherwise keypoints could be augmented | |
# outside of the image plane and these would be replaced by | |
# (-1, -1), which would not conform with the behaviour of the other augmenters. | |
points_dest[:, 0] = np.clip(points_dest[:, 0], 0, h - 1) | |
points_dest[:, 1] = np.clip(points_dest[:, 1], 0, w - 1) | |
matrix = skimage.transform.PiecewiseAffineTransform() | |
matrix.estimate(points_src[:, ::-1], points_dest[:, ::-1]) | |
return { | |
"matrix": matrix, | |
} | |
def apply( | |
self, img: np.ndarray, matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, **params | |
) -> np.ndarray: | |
return F.piecewise_affine(img, matrix, self.interpolation, self.mode, self.cval) | |
def apply_to_mask( | |
self, img: np.ndarray, matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, **params | |
) -> np.ndarray: | |
return F.piecewise_affine(img, matrix, self.mask_interpolation, self.mode, self.cval_mask) | |
def apply_to_bbox( | |
self, | |
bbox: BoxInternalType, | |
rows: int = 0, | |
cols: int = 0, | |
matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, | |
**params | |
) -> BoxInternalType: | |
return F.bbox_piecewise_affine(bbox, matrix, rows, cols, self.keypoints_threshold) | |
def apply_to_keypoint( | |
self, | |
keypoint: KeypointInternalType, | |
rows: int = 0, | |
cols: int = 0, | |
matrix: Optional[skimage.transform.PiecewiseAffineTransform] = None, | |
**params | |
): | |
return F.keypoint_piecewise_affine(keypoint, matrix, rows, cols, self.keypoints_threshold) | |
class PadIfNeeded(DualTransform): | |
"""Pad side of the image / max if side is less than desired number. | |
Args: | |
min_height (int): minimal result image height. | |
min_width (int): minimal result image width. | |
pad_height_divisor (int): if not None, ensures image height is dividable by value of this argument. | |
pad_width_divisor (int): if not None, ensures image width is dividable by value of this argument. | |
position (Union[str, PositionType]): Position of the image. should be PositionType.CENTER or | |
PositionType.TOP_LEFT or PositionType.TOP_RIGHT or PositionType.BOTTOM_LEFT or PositionType.BOTTOM_RIGHT. | |
or PositionType.RANDOM. Default: PositionType.CENTER. | |
border_mode (OpenCV flag): OpenCV border mode. | |
value (int, float, list of int, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
mask_value (int, float, | |
list of int, | |
list of float): padding value for mask if border_mode is cv2.BORDER_CONSTANT. | |
p (float): probability of applying the transform. Default: 1.0. | |
Targets: | |
image, mask, bbox, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
class PositionType(Enum): | |
CENTER = "center" | |
TOP_LEFT = "top_left" | |
TOP_RIGHT = "top_right" | |
BOTTOM_LEFT = "bottom_left" | |
BOTTOM_RIGHT = "bottom_right" | |
RANDOM = "random" | |
def __init__( | |
self, | |
min_height: Optional[int] = 1024, | |
min_width: Optional[int] = 1024, | |
pad_height_divisor: Optional[int] = None, | |
pad_width_divisor: Optional[int] = None, | |
position: Union[PositionType, str] = PositionType.CENTER, | |
border_mode: int = cv2.BORDER_REFLECT_101, | |
value: Optional[ImageColorType] = None, | |
mask_value: Optional[ImageColorType] = None, | |
always_apply: bool = False, | |
p: float = 1.0, | |
): | |
if (min_height is None) == (pad_height_divisor is None): | |
raise ValueError("Only one of 'min_height' and 'pad_height_divisor' parameters must be set") | |
if (min_width is None) == (pad_width_divisor is None): | |
raise ValueError("Only one of 'min_width' and 'pad_width_divisor' parameters must be set") | |
super(PadIfNeeded, self).__init__(always_apply, p) | |
self.min_height = min_height | |
self.min_width = min_width | |
self.pad_width_divisor = pad_width_divisor | |
self.pad_height_divisor = pad_height_divisor | |
self.position = PadIfNeeded.PositionType(position) | |
self.border_mode = border_mode | |
self.value = value | |
self.mask_value = mask_value | |
def update_params(self, params, **kwargs): | |
params = super(PadIfNeeded, self).update_params(params, **kwargs) | |
rows = params["rows"] | |
cols = params["cols"] | |
if self.min_height is not None: | |
if rows < self.min_height: | |
h_pad_top = int((self.min_height - rows) / 2.0) | |
h_pad_bottom = self.min_height - rows - h_pad_top | |
else: | |
h_pad_top = 0 | |
h_pad_bottom = 0 | |
else: | |
pad_remained = rows % self.pad_height_divisor | |
pad_rows = self.pad_height_divisor - pad_remained if pad_remained > 0 else 0 | |
h_pad_top = pad_rows // 2 | |
h_pad_bottom = pad_rows - h_pad_top | |
if self.min_width is not None: | |
if cols < self.min_width: | |
w_pad_left = int((self.min_width - cols) / 2.0) | |
w_pad_right = self.min_width - cols - w_pad_left | |
else: | |
w_pad_left = 0 | |
w_pad_right = 0 | |
else: | |
pad_remainder = cols % self.pad_width_divisor | |
pad_cols = self.pad_width_divisor - pad_remainder if pad_remainder > 0 else 0 | |
w_pad_left = pad_cols // 2 | |
w_pad_right = pad_cols - w_pad_left | |
h_pad_top, h_pad_bottom, w_pad_left, w_pad_right = self.__update_position_params( | |
h_top=h_pad_top, h_bottom=h_pad_bottom, w_left=w_pad_left, w_right=w_pad_right | |
) | |
params.update( | |
{ | |
"pad_top": h_pad_top, | |
"pad_bottom": h_pad_bottom, | |
"pad_left": w_pad_left, | |
"pad_right": w_pad_right, | |
} | |
) | |
return params | |
def apply( | |
self, img: np.ndarray, pad_top: int = 0, pad_bottom: int = 0, pad_left: int = 0, pad_right: int = 0, **params | |
) -> np.ndarray: | |
return F.pad_with_params( | |
img, | |
pad_top, | |
pad_bottom, | |
pad_left, | |
pad_right, | |
border_mode=self.border_mode, | |
value=self.value, | |
) | |
def apply_to_mask( | |
self, img: np.ndarray, pad_top: int = 0, pad_bottom: int = 0, pad_left: int = 0, pad_right: int = 0, **params | |
) -> np.ndarray: | |
return F.pad_with_params( | |
img, | |
pad_top, | |
pad_bottom, | |
pad_left, | |
pad_right, | |
border_mode=self.border_mode, | |
value=self.mask_value, | |
) | |
def apply_to_bbox( | |
self, | |
bbox: BoxInternalType, | |
pad_top: int = 0, | |
pad_bottom: int = 0, | |
pad_left: int = 0, | |
pad_right: int = 0, | |
rows: int = 0, | |
cols: int = 0, | |
**params | |
) -> BoxInternalType: | |
x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4] | |
bbox = x_min + pad_left, y_min + pad_top, x_max + pad_left, y_max + pad_top | |
return normalize_bbox(bbox, rows + pad_top + pad_bottom, cols + pad_left + pad_right) | |
def apply_to_keypoint( | |
self, | |
keypoint: KeypointInternalType, | |
pad_top: int = 0, | |
pad_bottom: int = 0, | |
pad_left: int = 0, | |
pad_right: int = 0, | |
**params | |
) -> KeypointInternalType: | |
x, y, angle, scale = keypoint[:4] | |
return x + pad_left, y + pad_top, angle, scale | |
def get_transform_init_args_names(self): | |
return ( | |
"min_height", | |
"min_width", | |
"pad_height_divisor", | |
"pad_width_divisor", | |
"border_mode", | |
"value", | |
"mask_value", | |
) | |
def __update_position_params( | |
self, h_top: int, h_bottom: int, w_left: int, w_right: int | |
) -> Tuple[int, int, int, int]: | |
if self.position == PadIfNeeded.PositionType.TOP_LEFT: | |
h_bottom += h_top | |
w_right += w_left | |
h_top = 0 | |
w_left = 0 | |
elif self.position == PadIfNeeded.PositionType.TOP_RIGHT: | |
h_bottom += h_top | |
w_left += w_right | |
h_top = 0 | |
w_right = 0 | |
elif self.position == PadIfNeeded.PositionType.BOTTOM_LEFT: | |
h_top += h_bottom | |
w_right += w_left | |
h_bottom = 0 | |
w_left = 0 | |
elif self.position == PadIfNeeded.PositionType.BOTTOM_RIGHT: | |
h_top += h_bottom | |
w_left += w_right | |
h_bottom = 0 | |
w_right = 0 | |
elif self.position == PadIfNeeded.PositionType.RANDOM: | |
h_pad = h_top + h_bottom | |
w_pad = w_left + w_right | |
h_top = random.randint(0, h_pad) | |
h_bottom = h_pad - h_top | |
w_left = random.randint(0, w_pad) | |
w_right = w_pad - w_left | |
return h_top, h_bottom, w_left, w_right | |
class VerticalFlip(DualTransform): | |
"""Flip the input vertically around the x-axis. | |
Args: | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, bboxes, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
def apply(self, img: np.ndarray, **params) -> np.ndarray: | |
return F.vflip(img) | |
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType: | |
return F.bbox_vflip(bbox, **params) | |
def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType: | |
return F.keypoint_vflip(keypoint, **params) | |
def get_transform_init_args_names(self): | |
return () | |
class HorizontalFlip(DualTransform): | |
"""Flip the input horizontally around the y-axis. | |
Args: | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, bboxes, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
def apply(self, img: np.ndarray, **params) -> np.ndarray: | |
if img.ndim == 3 and img.shape[2] > 1 and img.dtype == np.uint8: | |
# Opencv is faster than numpy only in case of | |
# non-gray scale 8bits images | |
return F.hflip_cv2(img) | |
return F.hflip(img) | |
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType: | |
return F.bbox_hflip(bbox, **params) | |
def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType: | |
return F.keypoint_hflip(keypoint, **params) | |
def get_transform_init_args_names(self): | |
return () | |
class Flip(DualTransform): | |
"""Flip the input either horizontally, vertically or both horizontally and vertically. | |
Args: | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, bboxes, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
def apply(self, img: np.ndarray, d: int = 0, **params) -> np.ndarray: | |
"""Args: | |
d (int): code that specifies how to flip the input. 0 for vertical flipping, 1 for horizontal flipping, | |
-1 for both vertical and horizontal flipping (which is also could be seen as rotating the input by | |
180 degrees). | |
""" | |
return F.random_flip(img, d) | |
def get_params(self): | |
# Random int in the range [-1, 1] | |
return {"d": random.randint(-1, 1)} | |
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType: | |
return F.bbox_flip(bbox, **params) | |
def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType: | |
return F.keypoint_flip(keypoint, **params) | |
def get_transform_init_args_names(self): | |
return () | |
class Transpose(DualTransform): | |
"""Transpose the input by swapping rows and columns. | |
Args: | |
p (float): probability of applying the transform. Default: 0.5. | |
Targets: | |
image, mask, bboxes, keypoints | |
Image types: | |
uint8, float32 | |
""" | |
def apply(self, img: np.ndarray, **params) -> np.ndarray: | |
return F.transpose(img) | |
def apply_to_bbox(self, bbox: BoxInternalType, **params) -> BoxInternalType: | |
return F.bbox_transpose(bbox, 0, **params) | |
def apply_to_keypoint(self, keypoint: KeypointInternalType, **params) -> KeypointInternalType: | |
return F.keypoint_transpose(keypoint) | |
def get_transform_init_args_names(self): | |
return () | |
class OpticalDistortion(DualTransform): | |
""" | |
Args: | |
distort_limit (float, (float, float)): If distort_limit is a single float, the range | |
will be (-distort_limit, distort_limit). Default: (-0.05, 0.05). | |
shift_limit (float, (float, float))): If shift_limit is a single float, the range | |
will be (-shift_limit, shift_limit). Default: (-0.05, 0.05). | |
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of: | |
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. | |
Default: cv2.INTER_LINEAR. | |
border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of: | |
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. | |
Default: cv2.BORDER_REFLECT_101 | |
value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
mask_value (int, float, | |
list of ints, | |
list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks. | |
Targets: | |
image, mask, bbox | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
distort_limit: ScaleFloatType = 0.05, | |
shift_limit: ScaleFloatType = 0.05, | |
interpolation: int = cv2.INTER_LINEAR, | |
border_mode: int = cv2.BORDER_REFLECT_101, | |
value: Optional[ImageColorType] = None, | |
mask_value: Optional[ImageColorType] = None, | |
always_apply: bool = False, | |
p: float = 0.5, | |
): | |
super(OpticalDistortion, self).__init__(always_apply, p) | |
self.shift_limit = to_tuple(shift_limit) | |
self.distort_limit = to_tuple(distort_limit) | |
self.interpolation = interpolation | |
self.border_mode = border_mode | |
self.value = value | |
self.mask_value = mask_value | |
def apply( | |
self, img: np.ndarray, k: int = 0, dx: int = 0, dy: int = 0, interpolation: int = cv2.INTER_LINEAR, **params | |
) -> np.ndarray: | |
return F.optical_distortion(img, k, dx, dy, interpolation, self.border_mode, self.value) | |
def apply_to_mask(self, img: np.ndarray, k: int = 0, dx: int = 0, dy: int = 0, **params) -> np.ndarray: | |
return F.optical_distortion(img, k, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value) | |
def apply_to_bbox(self, bbox: BoxInternalType, k: int = 0, dx: int = 0, dy: int = 0, **params) -> BoxInternalType: | |
rows, cols = params["rows"], params["cols"] | |
mask = np.zeros((rows, cols), dtype=np.uint8) | |
bbox_denorm = F.denormalize_bbox(bbox, rows, cols) | |
x_min, y_min, x_max, y_max = bbox_denorm[:4] | |
x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max) | |
mask[y_min:y_max, x_min:x_max] = 1 | |
mask = F.optical_distortion(mask, k, dx, dy, cv2.INTER_NEAREST, self.border_mode, self.mask_value) | |
bbox_returned = bbox_from_mask(mask) | |
bbox_returned = F.normalize_bbox(bbox_returned, rows, cols) | |
return bbox_returned | |
def get_params(self): | |
return { | |
"k": random.uniform(self.distort_limit[0], self.distort_limit[1]), | |
"dx": round(random.uniform(self.shift_limit[0], self.shift_limit[1])), | |
"dy": round(random.uniform(self.shift_limit[0], self.shift_limit[1])), | |
} | |
def get_transform_init_args_names(self): | |
return ( | |
"distort_limit", | |
"shift_limit", | |
"interpolation", | |
"border_mode", | |
"value", | |
"mask_value", | |
) | |
class GridDistortion(DualTransform): | |
""" | |
Args: | |
num_steps (int): count of grid cells on each side. | |
distort_limit (float, (float, float)): If distort_limit is a single float, the range | |
will be (-distort_limit, distort_limit). Default: (-0.03, 0.03). | |
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of: | |
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. | |
Default: cv2.INTER_LINEAR. | |
border_mode (OpenCV flag): flag that is used to specify the pixel extrapolation method. Should be one of: | |
cv2.BORDER_CONSTANT, cv2.BORDER_REPLICATE, cv2.BORDER_REFLECT, cv2.BORDER_WRAP, cv2.BORDER_REFLECT_101. | |
Default: cv2.BORDER_REFLECT_101 | |
value (int, float, list of ints, list of float): padding value if border_mode is cv2.BORDER_CONSTANT. | |
mask_value (int, float, | |
list of ints, | |
list of float): padding value if border_mode is cv2.BORDER_CONSTANT applied for masks. | |
normalized (bool): if true, distortion will be normalized to do not go outside the image. Default: False | |
See for more information: https://github.com/albumentations-team/albumentations/pull/722 | |
Targets: | |
image, mask | |
Image types: | |
uint8, float32 | |
""" | |
def __init__( | |
self, | |
num_steps: int = 5, | |
distort_limit: ScaleFloatType = 0.3, | |
interpolation: int = cv2.INTER_LINEAR, | |
border_mode: int = cv2.BORDER_REFLECT_101, | |
value: Optional[ImageColorType] = None, | |
mask_value: Optional[ImageColorType] = None, | |
normalized: bool = False, | |
always_apply: bool = False, | |
p: float = 0.5, | |
): | |
super(GridDistortion, self).__init__(always_apply, p) | |
self.num_steps = num_steps | |
self.distort_limit = to_tuple(distort_limit) | |
self.interpolation = interpolation | |
self.border_mode = border_mode | |
self.value = value | |
self.mask_value = mask_value | |
self.normalized = normalized | |
def apply( | |
self, img: np.ndarray, stepsx: Tuple = (), stepsy: Tuple = (), interpolation: int = cv2.INTER_LINEAR, **params | |
) -> np.ndarray: | |
return F.grid_distortion(img, self.num_steps, stepsx, stepsy, interpolation, self.border_mode, self.value) | |
def apply_to_mask(self, img: np.ndarray, stepsx: Tuple = (), stepsy: Tuple = (), **params) -> np.ndarray: | |
return F.grid_distortion( | |
img, self.num_steps, stepsx, stepsy, cv2.INTER_NEAREST, self.border_mode, self.mask_value | |
) | |
def apply_to_bbox(self, bbox: BoxInternalType, stepsx: Tuple = (), stepsy: Tuple = (), **params) -> BoxInternalType: | |
rows, cols = params["rows"], params["cols"] | |
mask = np.zeros((rows, cols), dtype=np.uint8) | |
bbox_denorm = F.denormalize_bbox(bbox, rows, cols) | |
x_min, y_min, x_max, y_max = bbox_denorm[:4] | |
x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max) | |
mask[y_min:y_max, x_min:x_max] = 1 | |
mask = F.grid_distortion( | |
mask, self.num_steps, stepsx, stepsy, cv2.INTER_NEAREST, self.border_mode, self.mask_value | |
) | |
bbox_returned = bbox_from_mask(mask) | |
bbox_returned = F.normalize_bbox(bbox_returned, rows, cols) | |
return bbox_returned | |
def _normalize(self, h, w, xsteps, ysteps): | |
# compensate for smaller last steps in source image. | |
x_step = w // self.num_steps | |
last_x_step = min(w, ((self.num_steps + 1) * x_step)) - (self.num_steps * x_step) | |
xsteps[-1] *= last_x_step / x_step | |
y_step = h // self.num_steps | |
last_y_step = min(h, ((self.num_steps + 1) * y_step)) - (self.num_steps * y_step) | |
ysteps[-1] *= last_y_step / y_step | |
# now normalize such that distortion never leaves image bounds. | |
tx = w / math.floor(w / self.num_steps) | |
ty = h / math.floor(h / self.num_steps) | |
xsteps = np.array(xsteps) * (tx / np.sum(xsteps)) | |
ysteps = np.array(ysteps) * (ty / np.sum(ysteps)) | |
return {"stepsx": xsteps, "stepsy": ysteps} | |
def targets_as_params(self): | |
return ["image"] | |
def get_params_dependent_on_targets(self, params): | |
h, w = params["image"].shape[:2] | |
stepsx = [1 + random.uniform(self.distort_limit[0], self.distort_limit[1]) for _ in range(self.num_steps + 1)] | |
stepsy = [1 + random.uniform(self.distort_limit[0], self.distort_limit[1]) for _ in range(self.num_steps + 1)] | |
if self.normalized: | |
return self._normalize(h, w, stepsx, stepsy) | |
return {"stepsx": stepsx, "stepsy": stepsy} | |
def get_transform_init_args_names(self): | |
return "num_steps", "distort_limit", "interpolation", "border_mode", "value", "mask_value", "normalized" | |