Spaces:
Running
on
Zero
Running
on
Zero
File size: 11,313 Bytes
a249588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import cv2
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils import (generate_offset_heatmap, generate_onehot_heatmaps,
get_heatmap_maximum, refine_keypoints_dark_udp)
@KEYPOINT_CODECS.register_module()
class OneHotHeatmap(BaseKeypointCodec):
r"""Generate keypoint heatmaps by Unbiased Data Processing (UDP).
See the paper: `The Devil is in the Details: Delving into Unbiased Data
Processing for Human Pose Estimation`_ by Huang et al (2020) for details.
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
- heatmap size: [W, H]
Encoded:
- heatmap (np.ndarray): The generated heatmap in shape (C_out, H, W)
where [W, H] is the `heatmap_size`, and the C_out is the output
channel number which depends on the `heatmap_type`. If
`heatmap_type=='gaussian'`, C_out equals to keypoint number K;
if `heatmap_type=='combined'`, C_out equals to K*3
(x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape (K,)
Args:
input_size (tuple): Image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
heatmap_type (str): The heatmap type to encode the keypoitns. Options
are:
- ``'gaussian'``: Gaussian heatmap
- ``'combined'``: Combination of a binary label map and offset
maps for X and Y axes.
sigma (float): The sigma value of the Gaussian heatmap when
``heatmap_type=='gaussian'``. Defaults to 2.0
radius_factor (float): The radius factor of the binary label
map when ``heatmap_type=='combined'``. The positive region is
defined as the neighbor of the keypoit with the radius
:math:`r=radius_factor*max(W, H)`. Defaults to 0.0546875
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation in DarkPose. Defaults to 11
.. _`The Devil is in the Details: Delving into Unbiased Data Processing for
Human Pose Estimation`: https://arxiv.org/abs/1911.07524
"""
label_mapping_table = dict(keypoint_weights='keypoint_weights', )
field_mapping_table = dict(heatmaps='heatmaps', )
def __init__(self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
heatmap_type: str = 'gaussian',
sigma: float = 2.,
radius_factor: float = 0.0546875,
blur_kernel_size: int = 11,
increase_sigma_with_padding=False,
amap_scale: float = 1.0,
normalize=None,
) -> None:
super().__init__()
self.input_size = np.array(input_size)
self.heatmap_size = np.array(heatmap_size)
self.sigma = sigma
self.radius_factor = radius_factor
self.heatmap_type = heatmap_type
self.blur_kernel_size = blur_kernel_size
self.increase_sigma_with_padding = increase_sigma_with_padding
self.normalize = normalize
self.amap_size = self.input_size * amap_scale
self.scale_factor = ((self.amap_size - 1) /
(self.heatmap_size - 1)).astype(np.float32)
self.input_center = self.input_size / 2
self.top_left = self.input_center - self.amap_size / 2
if self.heatmap_type not in {'gaussian', 'combined'}:
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
def _kpts_to_activation_pts(self, keypoints: np.ndarray) -> np.ndarray:
"""
Transform the keypoint coordinates to the activation space.
In the original UDPHeatmap, activation map is the same as the input image space with
different resolution but in this case we allow the activation map to have different
size (padding) than the input image space.
Centers of activation map and input image space are aligned.
"""
transformed_keypoints = keypoints - self.top_left
transformed_keypoints = transformed_keypoints / self.scale_factor
return transformed_keypoints
def _activation_pts_to_kpts(self, keypoints: np.ndarray) -> np.ndarray:
"""
Transform the points in activation map to the keypoint coordinates.
In the original UDPHeatmap, activation map is the same as the input image space with
different resolution but in this case we allow the activation map to have different
size (padding) than the input image space.
Centers of activation map and input image space are aligned.
"""
W, H = self.heatmap_size
transformed_keypoints = keypoints / [W - 1, H - 1] * self.amap_size
transformed_keypoints += self.top_left
return transformed_keypoints
def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None,
id_similarity: Optional[float] = 0.0,
keypoints_visibility: Optional[np.ndarray] = None) -> dict:
"""Encode keypoints into heatmaps. Note that the original keypoint
coordinates should be in the input image space.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
id_similarity (float): The usefulness of the identity information
for the whole pose. Defaults to 0.0
keypoints_visibility (np.ndarray): The visibility bit for each
keypoint (N, K). Defaults to None
Returns:
dict:
- heatmap (np.ndarray): The generated heatmap in shape
(C_out, H, W) where [W, H] is the `heatmap_size`, and the
C_out is the output channel number which depends on the
`heatmap_type`. If `heatmap_type=='gaussian'`, C_out equals to
keypoint number K; if `heatmap_type=='combined'`, C_out
equals to K*3 (x_offset, y_offset and class label)
- keypoint_weights (np.ndarray): The target weights in shape
(K,)
"""
assert keypoints.shape[0] == 1, (
f'{self.__class__.__name__} only support single-instance '
'keypoint encoding')
if keypoints_visibility is None:
keypoints_visibility = np.zeros(keypoints.shape[:2], dtype=np.float32)
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.heatmap_type == 'gaussian':
heatmaps, keypoint_weights = generate_onehot_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=self._kpts_to_activation_pts(keypoints),
keypoints_visible=keypoints_visible,
sigma=self.sigma,
keypoints_visibility=keypoints_visibility,
increase_sigma_with_padding=self.increase_sigma_with_padding)
elif self.heatmap_type == 'combined':
heatmaps, keypoint_weights = generate_offset_heatmap(
heatmap_size=self.heatmap_size,
keypoints=self._kpts_to_activation_pts(keypoints),
keypoints_visible=keypoints_visible,
radius_factor=self.radius_factor)
else:
raise ValueError(
f'{self.__class__.__name__} got invalid `heatmap_type` value'
f'{self.heatmap_type}. Should be one of '
'{"gaussian", "combined"}')
if self.normalize is not None:
heatmaps_sum = np.sum(heatmaps, axis=(1, 2), keepdims=False)
mask = heatmaps_sum > 0
heatmaps[mask, :, :] = heatmaps[mask, :, :] / (heatmaps_sum[mask, None, None] + np.finfo(np.float32).eps)
heatmaps = heatmaps * self.normalize
annotated = keypoints_visible > 0
heatmap_keypoints = self._kpts_to_activation_pts(keypoints)
in_image = np.logical_and(
heatmap_keypoints[:, :, 0] >= 0,
heatmap_keypoints[:, :, 0] < self.heatmap_size[0],
)
in_image = np.logical_and(
in_image,
heatmap_keypoints[:, :, 1] >= 0,
)
in_image = np.logical_and(
in_image,
heatmap_keypoints[:, :, 1] < self.heatmap_size[1],
)
encoded = dict(
heatmaps=heatmaps,
keypoint_weights=keypoint_weights,
annotated=annotated,
in_image=in_image,
keypoints_scaled=keypoints,
heatmap_keypoints=heatmap_keypoints,
identification_similarity=id_similarity,
)
return encoded
def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from heatmaps. The decoded keypoint
coordinates are in the input image space.
Args:
encoded (np.ndarray): Heatmaps in shape (K, H, W)
Returns:
tuple:
- keypoints (np.ndarray): Decoded keypoint coordinates in shape
(N, K, D)
- scores (np.ndarray): The keypoint scores in shape (N, K). It
usually represents the confidence of the keypoint prediction
"""
heatmaps = encoded.copy()
if self.heatmap_type == 'gaussian':
keypoints, scores = get_heatmap_maximum(heatmaps)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None]
scores = scores[None]
keypoints = refine_keypoints_dark_udp(
keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size)
elif self.heatmap_type == 'combined':
_K, H, W = heatmaps.shape
K = _K // 3
for cls_heatmap in heatmaps[::3]:
# Apply Gaussian blur on classification maps
ks = 2 * self.blur_kernel_size + 1
cv2.GaussianBlur(cls_heatmap, (ks, ks), 0, cls_heatmap)
# valid radius
radius = self.radius_factor * max(W, H)
x_offset = heatmaps[1::3].flatten() * radius
y_offset = heatmaps[2::3].flatten() * radius
keypoints, scores = get_heatmap_maximum(heatmaps=heatmaps[::3])
index = (keypoints[..., 0] + keypoints[..., 1] * W).flatten()
index += W * H * np.arange(0, K)
index = index.astype(int)
keypoints += np.stack((x_offset[index], y_offset[index]), axis=-1)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None].astype(np.float32)
scores = scores[None]
keypoints = self._activation_pts_to_kpts(keypoints)
return keypoints, scores
|