opencv_zoo / tools /quantize /transform.py

Wanli

Update handpose estimation model from MediaPipe (2023feb) (#133)

50fc340 over 2 years ago

5.07 kB

	# This file is part of OpenCV Zoo project.
	# It is subject to the license terms in the LICENSE file found in the same directory.
	#
	# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
	# Third party copyrights are property of their respective owners.

	import collections
	import numpy as np
	import cv2 as cv
	import sys

	class Compose:
	def __init__(self, transforms=[]):
	self.transforms = transforms

	def __call__(self, img):
	for t in self.transforms:
	img = t(img)
	if img is None:
	break
	return img

	class Resize:
	def __init__(self, size, interpolation=cv.INTER_LINEAR):
	self.size = size
	self.interpolation = interpolation

	def __call__(self, img):
	return cv.resize(img, self.size)

	class CenterCrop:
	def __init__(self, size):
	self.size = size # w, h

	def __call__(self, img):
	h, w, _ = img.shape
	ws = int(w / 2 - self.size[0] / 2)
	hs = int(h / 2 - self.size[1] / 2)
	return img[hs:hs+self.size[1], ws:ws+self.size[0], :]

	class Normalize:
	def __init__(self, mean=None, std=None):
	self.mean = mean
	self.std = std

	def __call__(self, img):
	img = img.astype("float32")
	if self.mean is not None:
	img[:, :, 0] = img[:, :, 0] - self.mean[0]
	img[:, :, 1] = img[:, :, 1] - self.mean[1]
	img[:, :, 2] = img[:, :, 2] - self.mean[2]
	if self.std is not None:
	img[:, :, 0] = img[:, :, 0] / self.std[0]
	img[:, :, 1] = img[:, :, 1] / self.std[1]
	img[:, :, 2] = img[:, :, 2] / self.std[2]
	return img

	class ColorConvert:
	def __init__(self, ctype):
	self.ctype = ctype

	def __call__(self, img):
	return cv.cvtColor(img, self.ctype)

	class HandAlign:
	def __init__(self, model):
	self.model = model
	sys.path.append('../../models/palm_detection_mediapipe')
	from mp_palmdet import MPPalmDet
	self.palm_detector = MPPalmDet(modelPath='../../models/palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx', nmsThreshold=0.3, scoreThreshold=0.9)

	def __call__(self, img):
	return self.mp_handpose_align(img)

	def mp_handpose_align(self, img):
	palms = self.palm_detector.infer(img)
	if len(palms) == 0:
	return None
	palm = palms[0]
	palm_bbox = palm[0:4].reshape(2, 2)
	palm_landmarks = palm[4:18].reshape(7, 2)
	p1 = palm_landmarks[0]
	p2 = palm_landmarks[2]
	radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
	radians = radians - 2 * np.pi * np.floor((radians + np.pi) / (2 * np.pi))
	angle = np.rad2deg(radians)
	# get bbox center
	center_palm_bbox = np.sum(palm_bbox, axis=0) / 2
	# get rotation matrix
	rotation_matrix = cv.getRotationMatrix2D(center_palm_bbox, angle, 1.0)
	# get rotated image
	rotated_image = cv.warpAffine(img, rotation_matrix, (img.shape[1], img.shape[0]))
	# get bounding boxes from rotated palm landmarks
	homogeneous_coord = np.c_[palm_landmarks, np.ones(palm_landmarks.shape[0])]
	rotated_palm_landmarks = np.array([
	np.dot(homogeneous_coord, rotation_matrix[0]),
	np.dot(homogeneous_coord, rotation_matrix[1])])
	# get landmark bounding box
	rotated_palm_bbox = np.array([
	np.amin(rotated_palm_landmarks, axis=1),
	np.amax(rotated_palm_landmarks, axis=1)]) # [top-left, bottom-right]

	# shift bounding box
	wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
	shift_vector = [0, -0.1] * wh_rotated_palm_bbox
	rotated_palm_bbox = rotated_palm_bbox + shift_vector
	# squarify bounding boxx
	center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
	wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
	new_half_size = np.amax(wh_rotated_palm_bbox) / 2
	rotated_palm_bbox = np.array([
	center_rotated_plam_bbox - new_half_size,
	center_rotated_plam_bbox + new_half_size])

	# enlarge bounding box
	center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
	wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
	new_half_size = wh_rotated_palm_bbox * 1.5
	rotated_palm_bbox = np.array([
	center_rotated_plam_bbox - new_half_size,
	center_rotated_plam_bbox + new_half_size])

	# Crop the rotated image by the bounding box
	[[x1, y1], [x2, y2]] = rotated_palm_bbox.astype(np.int32)
	diff = np.maximum([-x1, -y1, x2 - rotated_image.shape[1], y2 - rotated_image.shape[0]], 0)
	[x1, y1, x2, y2] = [x1, y1, x2, y2] + diff
	crop = rotated_image[y1:y2, x1:x2, :]
	crop = cv.copyMakeBorder(crop, diff[1], diff[3], diff[0], diff[2], cv.BORDER_CONSTANT, value=(0, 0, 0))
	return crop