Spaces:
Running
on
Zero
Running
on
Zero
""" | |
This module provides high-level interfaces to run MMDetection and MMPose | |
models sequentially. Users can call run_MMDetector and run_MMPose from | |
other scripts (e.g., bmp_demo.py) to perform object detection and | |
pose estimation in a clean, modular fashion. | |
""" | |
import numpy as np | |
from mmdet.apis import inference_detector | |
from mmengine.structures import InstanceData | |
from mmpose.apis import inference_topdown | |
from mmpose.evaluation.functional import nms | |
from mmpose.structures import merge_data_samples | |
def run_MMDetector(detector, image, det_cat_id: int = 0, bbox_thr: float = 0.3, nms_thr: float = 0.3) -> InstanceData: | |
""" | |
Run an MMDetection model to detect bounding boxes (and masks) in an image. | |
Args: | |
detector: An initialized MMDetection detector model. | |
image: Input image as file path or BGR numpy array. | |
det_cat_id: Category ID to filter detections (default is 0 for 'person'). | |
bbox_thr: Minimum bounding box score threshold. | |
nms_thr: IoU threshold for Non-Maximum Suppression (NMS). | |
Returns: | |
InstanceData: A structure containing filtered bboxes, bbox_scores, and masks (if available). | |
""" | |
# Run detection | |
det_result = inference_detector(detector, image) | |
pred_instances = det_result.pred_instances.cpu().numpy() | |
# Aggregate bboxes and scores into an (N, 5) array | |
bboxes_all = np.concatenate((pred_instances.bboxes, pred_instances.scores[:, None]), axis=1) | |
# Filter by category and score | |
keep_mask = np.logical_and(pred_instances.labels == det_cat_id, pred_instances.scores > bbox_thr) | |
if not np.any(keep_mask): | |
# Return empty structure if nothing passes threshold | |
return InstanceData(bboxes=np.zeros((0, 4)), bbox_scores=np.zeros((0,)), masks=np.zeros((0, 1, 1))) | |
bboxes = bboxes_all[keep_mask] | |
masks = getattr(pred_instances, "masks", None) | |
if masks is not None: | |
masks = masks[keep_mask] | |
# Sort detections by descending score | |
order = np.argsort(bboxes[:, 4])[::-1] | |
bboxes = bboxes[order] | |
if masks is not None: | |
masks = masks[order] | |
# Apply Non-Maximum Suppression | |
keep_indices = nms(bboxes, nms_thr) | |
bboxes = bboxes[keep_indices] | |
if masks is not None: | |
masks = masks[keep_indices] | |
# Construct InstanceData to return | |
det_instances = InstanceData(bboxes=bboxes[:, :4], bbox_scores=bboxes[:, 4], masks=masks) | |
return det_instances | |
def run_MMPose(pose_estimator, image, detections: InstanceData, kpt_thr: float = 0.3) -> InstanceData: | |
""" | |
Run an MMPose top-down model to estimate human pose given detected bounding boxes. | |
Args: | |
pose_estimator: An initialized MMPose model. | |
image: Input image as file path or RGB/BGR numpy array. | |
detections: InstanceData from run_MMDetector containing bboxes and masks. | |
kpt_thr: Minimum keypoint score threshold to filter low-confidence joints. | |
Returns: | |
InstanceData: A structure containing estimated keypoints, keypoint_scores, | |
original bboxes, and masks (if provided). | |
""" | |
# Extract bounding boxes | |
bboxes = detections.bboxes | |
if bboxes.shape[0] == 0: | |
# No detections => empty pose data | |
return InstanceData( | |
keypoints=np.zeros((0, 17, 3)), | |
keypoint_scores=np.zeros((0, 17)), | |
bboxes=bboxes, | |
bbox_scores=detections.bbox_scores, | |
masks=detections.masks, | |
) | |
# Run top-down pose estimation | |
pose_results = inference_topdown(pose_estimator, image, bboxes, masks=detections.masks) | |
data_samples = merge_data_samples(pose_results) | |
# Attach masks back into the data_samples if available | |
if detections.masks is not None: | |
data_samples.pred_instances.pred_masks = detections.masks | |
# Filter out low-confidence keypoints | |
kp_scores = data_samples.pred_instances.keypoint_scores | |
kp_mask = kp_scores >= kpt_thr | |
# data_samples.pred_instances.keypoints[~kp_mask] = [0, 0, 0] | |
# Return final InstanceData for poses | |
return data_samples.pred_instances | |