Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,298 Bytes
a249588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from mmpose.structures import bbox_cs2xyxy, bbox_xyxy2cs
from .base import BaseKeypointCodec
@KEYPOINT_CODECS.register_module()
class EDPoseLabel(BaseKeypointCodec):
r"""Generate keypoint and label coordinates for `ED-Pose`_ by
Yang J. et al (2023).
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
Encoded:
- keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
- keypoints_visible (np.ndarray): Keypoint visibility in shape
(N, K, D)
- area (np.ndarray): Area in shape (N)
- bbox (np.ndarray): Bbox in shape (N, 4)
Args:
num_select (int): The number of candidate instances
num_keypoints (int): The Number of keypoints
"""
auxiliary_encode_keys = {'area', 'bboxes', 'img_shape'}
instance_mapping_table = dict(
bbox='bboxes',
keypoints='keypoints',
keypoints_visible='keypoints_visible',
area='areas',
)
def __init__(self, num_select: int = 100, num_keypoints: int = 17):
super().__init__()
self.num_select = num_select
self.num_keypoints = num_keypoints
def encode(
self,
img_shape,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None,
area: Optional[np.ndarray] = None,
bboxes: Optional[np.ndarray] = None,
) -> dict:
"""Encoding keypoints, area and bbox from input image space to
normalized space.
Args:
- img_shape (Sequence[int]): The shape of image in the format
of (width, height).
- keypoints (np.ndarray): Keypoint coordinates in
shape (N, K, D).
- keypoints_visible (np.ndarray): Keypoint visibility in shape
(N, K)
- area (np.ndarray):
- bboxes (np.ndarray):
Returns:
encoded (dict): Contains the following items:
- keypoint_labels (np.ndarray): The processed keypoints in
shape like (N, K, D).
- keypoints_visible (np.ndarray): Keypoint visibility in shape
(N, K, D)
- area_labels (np.ndarray): The processed target
area in shape (N).
- bboxes_labels: The processed target bbox in
shape (N, 4).
"""
w, h = img_shape
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if bboxes is not None:
bboxes = np.concatenate(bbox_xyxy2cs(bboxes), axis=-1)
bboxes = bboxes / np.array([w, h, w, h], dtype=np.float32)
if area is not None:
area = area / float(w * h)
if keypoints is not None:
keypoints = keypoints / np.array([w, h], dtype=np.float32)
encoded = dict(
keypoints=keypoints,
area=area,
bbox=bboxes,
keypoints_visible=keypoints_visible)
return encoded
def decode(self, input_shapes: np.ndarray, pred_logits: np.ndarray,
pred_boxes: np.ndarray, pred_keypoints: np.ndarray):
"""Select the final top-k keypoints, and decode the results from
normalize size to origin input size.
Args:
input_shapes (Tensor): The size of input image resize.
test_cfg (ConfigType): Config of testing.
pred_logits (Tensor): The result of score.
pred_boxes (Tensor): The result of bbox.
pred_keypoints (Tensor): The result of keypoints.
Returns:
tuple: Decoded boxes, keypoints, and keypoint scores.
"""
# Initialization
num_keypoints = self.num_keypoints
prob = pred_logits.reshape(-1)
# Select top-k instances based on prediction scores
topk_indexes = np.argsort(-prob)[:self.num_select]
topk_values = np.take_along_axis(prob, topk_indexes, axis=0)
scores = np.tile(topk_values[:, np.newaxis], [1, num_keypoints])
# Decode bounding boxes
topk_boxes = topk_indexes // pred_logits.shape[1]
boxes = bbox_cs2xyxy(*np.split(pred_boxes, [2], axis=-1))
boxes = np.take_along_axis(
boxes, np.tile(topk_boxes[:, np.newaxis], [1, 4]), axis=0)
# Convert from relative to absolute coordinates
img_h, img_w = np.split(input_shapes, 2, axis=0)
scale_fct = np.hstack([img_w, img_h, img_w, img_h])
boxes = boxes * scale_fct[np.newaxis, :]
# Decode keypoints
topk_keypoints = topk_indexes // pred_logits.shape[1]
keypoints = np.take_along_axis(
pred_keypoints,
np.tile(topk_keypoints[:, np.newaxis], [1, num_keypoints * 3]),
axis=0)
keypoints = keypoints[:, :(num_keypoints * 2)]
keypoints = keypoints * np.tile(
np.hstack([img_w, img_h]), [num_keypoints])[np.newaxis, :]
keypoints = keypoints.reshape(-1, num_keypoints, 2)
return boxes, keypoints, scores
|