File size: 4,108 Bytes
a249588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
This module provides high-level interfaces to run MMDetection and MMPose
models sequentially. Users can call run_MMDetector and run_MMPose from
other scripts (e.g., bmp_demo.py) to perform object detection and
pose estimation in a clean, modular fashion.
"""

import numpy as np
from mmdet.apis import inference_detector
from mmengine.structures import InstanceData

from mmpose.apis import inference_topdown
from mmpose.evaluation.functional import nms
from mmpose.structures import merge_data_samples


def run_MMDetector(detector, image, det_cat_id: int = 0, bbox_thr: float = 0.3, nms_thr: float = 0.3) -> InstanceData:
    """
    Run an MMDetection model to detect bounding boxes (and masks) in an image.

    Args:
        detector: An initialized MMDetection detector model.
        image: Input image as file path or BGR numpy array.
        det_cat_id: Category ID to filter detections (default is 0 for 'person').
        bbox_thr: Minimum bounding box score threshold.
        nms_thr: IoU threshold for Non-Maximum Suppression (NMS).

    Returns:
        InstanceData: A structure containing filtered bboxes, bbox_scores, and masks (if available).
    """
    # Run detection
    det_result = inference_detector(detector, image)
    pred_instances = det_result.pred_instances.cpu().numpy()

    # Aggregate bboxes and scores into an (N, 5) array
    bboxes_all = np.concatenate((pred_instances.bboxes, pred_instances.scores[:, None]), axis=1)

    # Filter by category and score
    keep_mask = np.logical_and(pred_instances.labels == det_cat_id, pred_instances.scores > bbox_thr)
    if not np.any(keep_mask):
        # Return empty structure if nothing passes threshold
        return InstanceData(bboxes=np.zeros((0, 4)), bbox_scores=np.zeros((0,)), masks=np.zeros((0, 1, 1)))

    bboxes = bboxes_all[keep_mask]
    masks = getattr(pred_instances, "masks", None)
    if masks is not None:
        masks = masks[keep_mask]

    # Sort detections by descending score
    order = np.argsort(bboxes[:, 4])[::-1]
    bboxes = bboxes[order]
    if masks is not None:
        masks = masks[order]

    # Apply Non-Maximum Suppression
    keep_indices = nms(bboxes, nms_thr)
    bboxes = bboxes[keep_indices]
    if masks is not None:
        masks = masks[keep_indices]

    # Construct InstanceData to return
    det_instances = InstanceData(bboxes=bboxes[:, :4], bbox_scores=bboxes[:, 4], masks=masks)
    return det_instances


def run_MMPose(pose_estimator, image, detections: InstanceData, kpt_thr: float = 0.3) -> InstanceData:
    """
    Run an MMPose top-down model to estimate human pose given detected bounding boxes.

    Args:
        pose_estimator: An initialized MMPose model.
        image: Input image as file path or RGB/BGR numpy array.
        detections: InstanceData from run_MMDetector containing bboxes and masks.
        kpt_thr: Minimum keypoint score threshold to filter low-confidence joints.

    Returns:
        InstanceData: A structure containing estimated keypoints, keypoint_scores,
                      original bboxes, and masks (if provided).
    """
    # Extract bounding boxes
    bboxes = detections.bboxes
    if bboxes.shape[0] == 0:
        # No detections => empty pose data
        return InstanceData(
            keypoints=np.zeros((0, 17, 3)),
            keypoint_scores=np.zeros((0, 17)),
            bboxes=bboxes,
            bbox_scores=detections.bbox_scores,
            masks=detections.masks,
        )

    # Run top-down pose estimation
    pose_results = inference_topdown(pose_estimator, image, bboxes, masks=detections.masks)
    data_samples = merge_data_samples(pose_results)

    # Attach masks back into the data_samples if available
    if detections.masks is not None:
        data_samples.pred_instances.pred_masks = detections.masks

    # Filter out low-confidence keypoints
    kp_scores = data_samples.pred_instances.keypoint_scores
    kp_mask = kp_scores >= kpt_thr
    # data_samples.pred_instances.keypoints[~kp_mask] = [0, 0, 0]

    # Return final InstanceData for poses
    return data_samples.pred_instances