""" This module provides high-level interfaces to run MMDetection and MMPose models sequentially. Users can call run_MMDetector and run_MMPose from other scripts (e.g., bmp_demo.py) to perform object detection and pose estimation in a clean, modular fashion. """ import numpy as np from mmdet.apis import inference_detector from mmengine.structures import InstanceData from mmpose.apis import inference_topdown from mmpose.evaluation.functional import nms from mmpose.structures import merge_data_samples def run_MMDetector(detector, image, det_cat_id: int = 0, bbox_thr: float = 0.3, nms_thr: float = 0.3) -> InstanceData: """ Run an MMDetection model to detect bounding boxes (and masks) in an image. Args: detector: An initialized MMDetection detector model. image: Input image as file path or BGR numpy array. det_cat_id: Category ID to filter detections (default is 0 for 'person'). bbox_thr: Minimum bounding box score threshold. nms_thr: IoU threshold for Non-Maximum Suppression (NMS). Returns: InstanceData: A structure containing filtered bboxes, bbox_scores, and masks (if available). """ # Run detection det_result = inference_detector(detector, image) pred_instances = det_result.pred_instances.cpu().numpy() # Aggregate bboxes and scores into an (N, 5) array bboxes_all = np.concatenate((pred_instances.bboxes, pred_instances.scores[:, None]), axis=1) # Filter by category and score keep_mask = np.logical_and(pred_instances.labels == det_cat_id, pred_instances.scores > bbox_thr) if not np.any(keep_mask): # Return empty structure if nothing passes threshold return InstanceData(bboxes=np.zeros((0, 4)), bbox_scores=np.zeros((0,)), masks=np.zeros((0, 1, 1))) bboxes = bboxes_all[keep_mask] masks = getattr(pred_instances, "masks", None) if masks is not None: masks = masks[keep_mask] # Sort detections by descending score order = np.argsort(bboxes[:, 4])[::-1] bboxes = bboxes[order] if masks is not None: masks = masks[order] # Apply Non-Maximum Suppression keep_indices = nms(bboxes, nms_thr) bboxes = bboxes[keep_indices] if masks is not None: masks = masks[keep_indices] # Construct InstanceData to return det_instances = InstanceData(bboxes=bboxes[:, :4], bbox_scores=bboxes[:, 4], masks=masks) return det_instances def run_MMPose(pose_estimator, image, detections: InstanceData, kpt_thr: float = 0.3) -> InstanceData: """ Run an MMPose top-down model to estimate human pose given detected bounding boxes. Args: pose_estimator: An initialized MMPose model. image: Input image as file path or RGB/BGR numpy array. detections: InstanceData from run_MMDetector containing bboxes and masks. kpt_thr: Minimum keypoint score threshold to filter low-confidence joints. Returns: InstanceData: A structure containing estimated keypoints, keypoint_scores, original bboxes, and masks (if provided). """ # Extract bounding boxes bboxes = detections.bboxes if bboxes.shape[0] == 0: # No detections => empty pose data return InstanceData( keypoints=np.zeros((0, 17, 3)), keypoint_scores=np.zeros((0, 17)), bboxes=bboxes, bbox_scores=detections.bbox_scores, masks=detections.masks, ) # Run top-down pose estimation pose_results = inference_topdown(pose_estimator, image, bboxes, masks=detections.masks) data_samples = merge_data_samples(pose_results) # Attach masks back into the data_samples if available if detections.masks is not None: data_samples.pred_instances.pred_masks = detections.masks # Filter out low-confidence keypoints kp_scores = data_samples.pred_instances.keypoint_scores kp_mask = kp_scores >= kpt_thr # data_samples.pred_instances.keypoints[~kp_mask] = [0, 0, 0] # Return final InstanceData for poses return data_samples.pred_instances