File size: 3,449 Bytes
a249588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple

import numpy as np

from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec

INF = 1e6
NEG_INF = -1e6


class BaseAnnotationProcessor(BaseKeypointCodec):
    """Base class for annotation processors."""

    def decode(self, *args, **kwargs):
        pass


@KEYPOINT_CODECS.register_module()
class YOLOXPoseAnnotationProcessor(BaseAnnotationProcessor):
    """Convert dataset annotations to the input format of YOLOX-Pose.

    This processor expands bounding boxes and converts category IDs to labels.

    Args:
        extend_bbox (bool, optional): Whether to expand the bounding box
            to include all keypoints. Defaults to False.
        input_size (tuple, optional): The size of the input image for the
            model, formatted as (h, w). This argument is necessary for the
            codec in deployment but is not used indeed.
    """

    auxiliary_encode_keys = {'category_id', 'bbox'}
    label_mapping_table = dict(
        bbox='bboxes',
        bbox_labels='labels',
        keypoints='keypoints',
        keypoints_visible='keypoints_visible',
        area='areas',
    )
    instance_mapping_table = dict(
        bbox='bboxes',
        bbox_score='bbox_scores',
        keypoints='keypoints',
        keypoints_visible='keypoints_visible',
        # remove 'bbox_scales' in default instance_mapping_table to avoid
        # length mismatch during training with multiple datasets
    )

    def __init__(self,
                 extend_bbox: bool = False,
                 input_size: Optional[Tuple] = None):
        super().__init__()
        self.extend_bbox = extend_bbox

    def encode(self,
               keypoints: Optional[np.ndarray] = None,
               keypoints_visible: Optional[np.ndarray] = None,
               bbox: Optional[np.ndarray] = None,
               category_id: Optional[List[int]] = None
               ) -> Dict[str, np.ndarray]:
        """Encode keypoints, bounding boxes, and category IDs.

        Args:
            keypoints (np.ndarray, optional): Keypoints array. Defaults
                to None.
            keypoints_visible (np.ndarray, optional): Visibility array for
                keypoints. Defaults to None.
            bbox (np.ndarray, optional): Bounding box array. Defaults to None.
            category_id (List[int], optional): List of category IDs. Defaults
                to None.

        Returns:
            Dict[str, np.ndarray]: Encoded annotations.
        """
        results = {}

        if self.extend_bbox and bbox is not None:
            # Handle keypoints visibility
            if keypoints_visible.ndim == 3:
                keypoints_visible = keypoints_visible[..., 0]

            # Expand bounding box to include keypoints
            kpts_min = keypoints.copy()
            kpts_min[keypoints_visible == 0] = INF
            bbox[..., :2] = np.minimum(bbox[..., :2], kpts_min.min(axis=1))

            kpts_max = keypoints.copy()
            kpts_max[keypoints_visible == 0] = NEG_INF
            bbox[..., 2:] = np.maximum(bbox[..., 2:], kpts_max.max(axis=1))

            results['bbox'] = bbox

        if category_id is not None:
            # Convert category IDs to labels
            bbox_labels = np.array(category_id).astype(np.int8) - 1
            results['bbox_labels'] = bbox_labels

        return results