JasonSmithSO's picture
Upload 777 files
0034848 verified
from __future__ import division
from typing import Any, Dict, List, Optional, Sequence, Tuple, TypeVar, cast
import numpy as np
from .transforms_interface import BoxInternalType, BoxType
from .utils import DataProcessor, Params
__all__ = [
"normalize_bbox",
"denormalize_bbox",
"normalize_bboxes",
"denormalize_bboxes",
"calculate_bbox_area",
"filter_bboxes_by_visibility",
"convert_bbox_to_albumentations",
"convert_bbox_from_albumentations",
"convert_bboxes_to_albumentations",
"convert_bboxes_from_albumentations",
"check_bbox",
"check_bboxes",
"filter_bboxes",
"union_of_bboxes",
"BboxProcessor",
"BboxParams",
]
TBox = TypeVar("TBox", BoxType, BoxInternalType)
class BboxParams(Params):
"""
Parameters of bounding boxes
Args:
format (str): format of bounding boxes. Should be 'coco', 'pascal_voc', 'albumentations' or 'yolo'.
The `coco` format
`[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
The `pascal_voc` format
`[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
The `albumentations` format
is like `pascal_voc`, but normalized,
in other words: `[x_min, y_min, x_max, y_max]`, e.g. [0.2, 0.3, 0.4, 0.5].
The `yolo` format
`[x, y, width, height]`, e.g. [0.1, 0.2, 0.3, 0.4];
`x`, `y` - normalized bbox center; `width`, `height` - normalized bbox width and height.
label_fields (list): list of fields that are joined with boxes, e.g labels.
Should be same type as boxes.
min_area (float): minimum area of a bounding box. All bounding boxes whose
visible area in pixels is less than this value will be removed. Default: 0.0.
min_visibility (float): minimum fraction of area for a bounding box
to remain this box in list. Default: 0.0.
min_width (float): Minimum width of a bounding box. All bounding boxes whose width is
less than this value will be removed. Default: 0.0.
min_height (float): Minimum height of a bounding box. All bounding boxes whose height is
less than this value will be removed. Default: 0.0.
check_each_transform (bool): if `True`, then bboxes will be checked after each dual transform.
Default: `True`
"""
def __init__(
self,
format: str,
label_fields: Optional[Sequence[str]] = None,
min_area: float = 0.0,
min_visibility: float = 0.0,
min_width: float = 0.0,
min_height: float = 0.0,
check_each_transform: bool = True,
):
super(BboxParams, self).__init__(format, label_fields)
self.min_area = min_area
self.min_visibility = min_visibility
self.min_width = min_width
self.min_height = min_height
self.check_each_transform = check_each_transform
def _to_dict(self) -> Dict[str, Any]:
data = super(BboxParams, self)._to_dict()
data.update(
{
"min_area": self.min_area,
"min_visibility": self.min_visibility,
"min_width": self.min_width,
"min_height": self.min_height,
"check_each_transform": self.check_each_transform,
}
)
return data
@classmethod
def is_serializable(cls) -> bool:
return True
@classmethod
def get_class_fullname(cls) -> str:
return "BboxParams"
class BboxProcessor(DataProcessor):
def __init__(self, params: BboxParams, additional_targets: Optional[Dict[str, str]] = None):
super().__init__(params, additional_targets)
@property
def default_data_name(self) -> str:
return "bboxes"
def ensure_data_valid(self, data: Dict[str, Any]) -> None:
for data_name in self.data_fields:
data_exists = data_name in data and len(data[data_name])
if data_exists and len(data[data_name][0]) < 5:
if self.params.label_fields is None:
raise ValueError(
"Please specify 'label_fields' in 'bbox_params' or add labels to the end of bbox "
"because bboxes must have labels"
)
if self.params.label_fields:
if not all(i in data.keys() for i in self.params.label_fields):
raise ValueError("Your 'label_fields' are not valid - them must have same names as params in dict")
def filter(self, data: Sequence, rows: int, cols: int) -> List:
self.params: BboxParams
return filter_bboxes(
data,
rows,
cols,
min_area=self.params.min_area,
min_visibility=self.params.min_visibility,
min_width=self.params.min_width,
min_height=self.params.min_height,
)
def check(self, data: Sequence, rows: int, cols: int) -> None:
check_bboxes(data)
def convert_from_albumentations(self, data: Sequence, rows: int, cols: int) -> List[BoxType]:
return convert_bboxes_from_albumentations(data, self.params.format, rows, cols, check_validity=True)
def convert_to_albumentations(self, data: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity=True)
def normalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox:
"""Normalize coordinates of a bounding box. Divide x-coordinates by image width and y-coordinates
by image height.
Args:
bbox: Denormalized bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image height.
cols: Image width.
Returns:
Normalized bounding box `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: If rows or cols is less or equal zero
"""
if rows <= 0:
raise ValueError("Argument rows must be positive integer")
if cols <= 0:
raise ValueError("Argument cols must be positive integer")
tail: Tuple[Any, ...]
(x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
x_min, x_max = x_min / cols, x_max / cols
y_min, y_max = y_min / rows, y_max / rows
return cast(BoxType, (x_min, y_min, x_max, y_max) + tail) # type: ignore
def denormalize_bbox(bbox: TBox, rows: int, cols: int) -> TBox:
"""Denormalize coordinates of a bounding box. Multiply x-coordinates by image width and y-coordinates
by image height. This is an inverse operation for :func:`~albumentations.augmentations.bbox.normalize_bbox`.
Args:
bbox: Normalized bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image height.
cols: Image width.
Returns:
Denormalized bounding box `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: If rows or cols is less or equal zero
"""
tail: Tuple[Any, ...]
(x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
if rows <= 0:
raise ValueError("Argument rows must be positive integer")
if cols <= 0:
raise ValueError("Argument cols must be positive integer")
x_min, x_max = x_min * cols, x_max * cols
y_min, y_max = y_min * rows, y_max * rows
return cast(BoxType, (x_min, y_min, x_max, y_max) + tail) # type: ignore
def normalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
"""Normalize a list of bounding boxes.
Args:
bboxes: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
rows: Image height.
cols: Image width.
Returns:
Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
"""
return [normalize_bbox(bbox, rows, cols) for bbox in bboxes]
def denormalize_bboxes(bboxes: Sequence[BoxType], rows: int, cols: int) -> List[BoxType]:
"""Denormalize a list of bounding boxes.
Args:
bboxes: Normalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
rows: Image height.
cols: Image width.
Returns:
List: Denormalized bounding boxes `[(x_min, y_min, x_max, y_max)]`.
"""
return [denormalize_bbox(bbox, rows, cols) for bbox in bboxes]
def calculate_bbox_area(bbox: BoxType, rows: int, cols: int) -> float:
"""Calculate the area of a bounding box in (fractional) pixels.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image height.
cols: Image width.
Return:
Area in (fractional) pixels of the (denormalized) bounding box.
"""
bbox = denormalize_bbox(bbox, rows, cols)
x_min, y_min, x_max, y_max = bbox[:4]
area = (x_max - x_min) * (y_max - y_min)
return area
def filter_bboxes_by_visibility(
original_shape: Sequence[int],
bboxes: Sequence[BoxType],
transformed_shape: Sequence[int],
transformed_bboxes: Sequence[BoxType],
threshold: float = 0.0,
min_area: float = 0.0,
) -> List[BoxType]:
"""Filter bounding boxes and return only those boxes whose visibility after transformation is above
the threshold and minimal area of bounding box in pixels is more then min_area.
Args:
original_shape: Original image shape `(height, width, ...)`.
bboxes: Original bounding boxes `[(x_min, y_min, x_max, y_max)]`.
transformed_shape: Transformed image shape `(height, width)`.
transformed_bboxes: Transformed bounding boxes `[(x_min, y_min, x_max, y_max)]`.
threshold: visibility threshold. Should be a value in the range [0.0, 1.0].
min_area: Minimal area threshold.
Returns:
Filtered bounding boxes `[(x_min, y_min, x_max, y_max)]`.
"""
img_height, img_width = original_shape[:2]
transformed_img_height, transformed_img_width = transformed_shape[:2]
visible_bboxes = []
for bbox, transformed_bbox in zip(bboxes, transformed_bboxes):
if not all(0.0 <= value <= 1.0 for value in transformed_bbox[:4]):
continue
bbox_area = calculate_bbox_area(bbox, img_height, img_width)
transformed_bbox_area = calculate_bbox_area(transformed_bbox, transformed_img_height, transformed_img_width)
if transformed_bbox_area < min_area:
continue
visibility = transformed_bbox_area / bbox_area
if visibility >= threshold:
visible_bboxes.append(transformed_bbox)
return visible_bboxes
def convert_bbox_to_albumentations(
bbox: BoxType, source_format: str, rows: int, cols: int, check_validity: bool = False
) -> BoxType:
"""Convert a bounding box from a format specified in `source_format` to the format used by albumentations:
normalized coordinates of top-left and bottom-right corners of the bounding box in a form of
`(x_min, y_min, x_max, y_max)` e.g. `(0.15, 0.27, 0.67, 0.5)`.
Args:
bbox: A bounding box tuple.
source_format: format of the bounding box. Should be 'coco', 'pascal_voc', or 'yolo'.
check_validity: Check if all boxes are valid boxes.
rows: Image height.
cols: Image width.
Returns:
tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
Note:
The `coco` format of a bounding box looks like `(x_min, y_min, width, height)`, e.g. (97, 12, 150, 200).
The `pascal_voc` format of a bounding box looks like `(x_min, y_min, x_max, y_max)`, e.g. (97, 12, 247, 212).
The `yolo` format of a bounding box looks like `(x, y, width, height)`, e.g. (0.3, 0.1, 0.05, 0.07);
where `x`, `y` coordinates of the center of the box, all values normalized to 1 by image height and width.
Raises:
ValueError: if `target_format` is not equal to `coco` or `pascal_voc`, or `yolo`.
ValueError: If in YOLO format all labels not in range (0, 1).
"""
if source_format not in {"coco", "pascal_voc", "yolo"}:
raise ValueError(
f"Unknown source_format {source_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'"
)
if source_format == "coco":
(x_min, y_min, width, height), tail = bbox[:4], bbox[4:]
x_max = x_min + width
y_max = y_min + height
elif source_format == "yolo":
# https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/scripts/voc_label.py#L12
_bbox = np.array(bbox[:4])
if check_validity and np.any((_bbox <= 0) | (_bbox > 1)):
raise ValueError("In YOLO format all coordinates must be float and in range (0, 1]")
(x, y, w, h), tail = bbox[:4], bbox[4:]
w_half, h_half = w / 2, h / 2
x_min = x - w_half
y_min = y - h_half
x_max = x_min + w
y_max = y_min + h
else:
(x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:]
bbox = (x_min, y_min, x_max, y_max) + tuple(tail) # type: ignore
if source_format != "yolo":
bbox = normalize_bbox(bbox, rows, cols)
if check_validity:
check_bbox(bbox)
return bbox
def convert_bbox_from_albumentations(
bbox: BoxType, target_format: str, rows: int, cols: int, check_validity: bool = False
) -> BoxType:
"""Convert a bounding box from the format used by albumentations to a format, specified in `target_format`.
Args:
bbox: An albumentations bounding box `(x_min, y_min, x_max, y_max)`.
target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
rows: Image height.
cols: Image width.
check_validity: Check if all boxes are valid boxes.
Returns:
tuple: A bounding box.
Note:
The `coco` format of a bounding box looks like `[x_min, y_min, width, height]`, e.g. [97, 12, 150, 200].
The `pascal_voc` format of a bounding box looks like `[x_min, y_min, x_max, y_max]`, e.g. [97, 12, 247, 212].
The `yolo` format of a bounding box looks like `[x, y, width, height]`, e.g. [0.3, 0.1, 0.05, 0.07].
Raises:
ValueError: if `target_format` is not equal to `coco`, `pascal_voc` or `yolo`.
"""
if target_format not in {"coco", "pascal_voc", "yolo"}:
raise ValueError(
f"Unknown target_format {target_format}. Supported formats are: 'coco', 'pascal_voc' and 'yolo'"
)
if check_validity:
check_bbox(bbox)
if target_format != "yolo":
bbox = denormalize_bbox(bbox, rows, cols)
if target_format == "coco":
(x_min, y_min, x_max, y_max), tail = bbox[:4], tuple(bbox[4:])
width = x_max - x_min
height = y_max - y_min
bbox = cast(BoxType, (x_min, y_min, width, height) + tail)
elif target_format == "yolo":
(x_min, y_min, x_max, y_max), tail = bbox[:4], bbox[4:]
x = (x_min + x_max) / 2.0
y = (y_min + y_max) / 2.0
w = x_max - x_min
h = y_max - y_min
bbox = cast(BoxType, (x, y, w, h) + tail)
return bbox
def convert_bboxes_to_albumentations(
bboxes: Sequence[BoxType], source_format, rows, cols, check_validity=False
) -> List[BoxType]:
"""Convert a list bounding boxes from a format specified in `source_format` to the format used by albumentations"""
return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
def convert_bboxes_from_albumentations(
bboxes: Sequence[BoxType], target_format: str, rows: int, cols: int, check_validity: bool = False
) -> List[BoxType]:
"""Convert a list of bounding boxes from the format used by albumentations to a format, specified
in `target_format`.
Args:
bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`.
target_format: required format of the output bounding box. Should be 'coco', 'pascal_voc' or 'yolo'.
rows: Image height.
cols: Image width.
check_validity: Check if all boxes are valid boxes.
Returns:
List of bounding boxes.
"""
return [convert_bbox_from_albumentations(bbox, target_format, rows, cols, check_validity) for bbox in bboxes]
def check_bbox(bbox: BoxType) -> None:
"""Check if bbox boundaries are in range 0, 1 and minimums are lesser then maximums"""
for name, value in zip(["x_min", "y_min", "x_max", "y_max"], bbox[:4]):
if not 0 <= value <= 1 and not np.isclose(value, 0) and not np.isclose(value, 1):
raise ValueError(f"Expected {name} for bbox {bbox} to be in the range [0.0, 1.0], got {value}.")
x_min, y_min, x_max, y_max = bbox[:4]
if x_max <= x_min:
raise ValueError(f"x_max is less than or equal to x_min for bbox {bbox}.")
if y_max <= y_min:
raise ValueError(f"y_max is less than or equal to y_min for bbox {bbox}.")
def check_bboxes(bboxes: Sequence[BoxType]) -> None:
"""Check if bboxes boundaries are in range 0, 1 and minimums are lesser then maximums"""
for bbox in bboxes:
check_bbox(bbox)
def filter_bboxes(
bboxes: Sequence[BoxType],
rows: int,
cols: int,
min_area: float = 0.0,
min_visibility: float = 0.0,
min_width: float = 0.0,
min_height: float = 0.0,
) -> List[BoxType]:
"""Remove bounding boxes that either lie outside of the visible area by more then min_visibility
or whose area in pixels is under the threshold set by `min_area`. Also it crops boxes to final image size.
Args:
bboxes: List of albumentation bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image height.
cols: Image width.
min_area: Minimum area of a bounding box. All bounding boxes whose visible area in pixels.
is less than this value will be removed. Default: 0.0.
min_visibility: Minimum fraction of area for a bounding box to remain this box in list. Default: 0.0.
min_width: Minimum width of a bounding box. All bounding boxes whose width is
less than this value will be removed. Default: 0.0.
min_height: Minimum height of a bounding box. All bounding boxes whose height is
less than this value will be removed. Default: 0.0.
Returns:
List of bounding boxes.
"""
resulting_boxes: List[BoxType] = []
for bbox in bboxes:
# Calculate areas of bounding box before and after clipping.
transformed_box_area = calculate_bbox_area(bbox, rows, cols)
bbox, tail = cast(BoxType, tuple(np.clip(bbox[:4], 0, 1.0))), tuple(bbox[4:])
clipped_box_area = calculate_bbox_area(bbox, rows, cols)
# Calculate width and height of the clipped bounding box.
x_min, y_min, x_max, y_max = denormalize_bbox(bbox, rows, cols)[:4]
clipped_width, clipped_height = x_max - x_min, y_max - y_min
if (
clipped_box_area != 0 # to ensure transformed_box_area!=0 and to handle min_area=0 or min_visibility=0
and clipped_box_area >= min_area
and clipped_box_area / transformed_box_area >= min_visibility
and clipped_width >= min_width
and clipped_height >= min_height
):
resulting_boxes.append(cast(BoxType, bbox + tail))
return resulting_boxes
def union_of_bboxes(height: int, width: int, bboxes: Sequence[BoxType], erosion_rate: float = 0.0) -> BoxType:
"""Calculate union of bounding boxes.
Args:
height (float): Height of image or space.
width (float): Width of image or space.
bboxes (List[tuple]): List like bounding boxes. Format is `[(x_min, y_min, x_max, y_max)]`.
erosion_rate (float): How much each bounding box can be shrinked, useful for erosive cropping.
Set this in range [0, 1]. 0 will not be erosive at all, 1.0 can make any bbox to lose its volume.
Returns:
tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
"""
x1, y1 = width, height
x2, y2 = 0, 0
for bbox in bboxes:
x_min, y_min, x_max, y_max = bbox[:4]
w, h = x_max - x_min, y_max - y_min
lim_x1, lim_y1 = x_min + erosion_rate * w, y_min + erosion_rate * h
lim_x2, lim_y2 = x_max - erosion_rate * w, y_max - erosion_rate * h
x1, y1 = np.min([x1, lim_x1]), np.min([y1, lim_y1])
x2, y2 = np.max([x2, lim_x2]), np.max([y2, lim_y2])
return x1, y1, x2, y2