import numpy as np def select_mask(masks, rule="largest"): if rule == "largest": return max(masks, key=lambda m: m["area"]) raise ValueError("Unknown mask-selection rule") def pixel_to_metric(mask, depth, fx_px, fy_px): import cv2 # Handle shape mismatch between mask and depth if mask.shape != depth.shape: print(f"⚠️ Shape mismatch: mask {mask.shape} vs depth {depth.shape}") # Resize mask to match depth dimensions if mask.dtype == bool: mask_uint8 = mask.astype(np.uint8) * 255 else: mask_uint8 = mask mask_resized = cv2.resize(mask_uint8, (depth.shape[1], depth.shape[0]), interpolation=cv2.INTER_NEAREST) mask = mask_resized > 127 # Convert back to boolean print(f"✅ Resized mask to {mask.shape}") ys, xs = np.where(mask) if len(xs) == 0 or len(ys) == 0: print("⚠️ Empty mask - no object pixels found") return dict(distance_m=0.0, width_m=0.0, height_m=0.0) bbox_w, bbox_h = xs.max() - xs.min(), ys.max() - ys.min() D = float(np.median(depth[mask])) width_m = (bbox_w * D) / fx_px height_m = (bbox_h * D) / fy_px return dict(distance_m=D, width_m=width_m, height_m=height_m)