Spaces:

acmyu
/

KeyframesAI

Paused

App Files Files Community

KeyframesAI / libs /easy_dwpose /dwpose.py

acmyu

output pose coords as list

968474b 26 days ago

raw

history blame contribute delete

3.02 kB

	from typing import Callable, Dict, Optional, Union

	import cv2
	import numpy as np
	import PIL
	import PIL.Image
	import torch
	from huggingface_hub import hf_hub_download

	from .body_estimation import Wholebody, resize_image
	from .draw import draw_openpose


	class DWposeDetector:
	def __init__(self, device: str = "сpu"):
	hf_hub_download("RedHash/DWPose", "yolox_l.onnx", local_dir="./checkpoints")
	hf_hub_download("RedHash/DWPose", "dw-ll_ucoco_384.onnx", local_dir="./checkpoints")
	self.pose_estimation = Wholebody(
	device=device, model_det="checkpoints/yolox_l.onnx", model_pose="checkpoints/dw-ll_ucoco_384.onnx"
	)

	def _format_pose(self, candidates, scores, width, height):
	num_candidates, _, locs = candidates.shape

	candidates[..., 0] /= float(width)
	candidates[..., 1] /= float(height)

	bodies = candidates[:, :18].copy()
	bodies_flat = bodies.reshape(num_candidates * 18, locs)

	body_scores = scores[:, :18]
	for i in range(len(body_scores)):
	for j in range(len(body_scores[i])):
	if body_scores[i][j] > 0.3:
	body_scores[i][j] = int(18 * i + j)
	else:
	body_scores[i][j] = -1

	faces = candidates[:, 24:92]
	faces_scores = scores[:, 24:92]

	hands = np.vstack([candidates[:, 92:113], candidates[:, 113:]])
	hands_scores = np.vstack([scores[:, 92:113], scores[:, 113:]])

	pose = dict(
	bodies=bodies_flat,
	bodies_multi=bodies,
	body_scores=body_scores,
	hands=hands,
	hands_scores=hands_scores,
	faces=faces,
	faces_scores=faces_scores,
	num_candidates=num_candidates,
	)

	return pose

	@torch.inference_mode()
	def __call__(
	self,
	image: Union[PIL.Image.Image, np.ndarray],
	detect_resolution: int = 512,
	draw_pose: Optional[Callable] = draw_openpose,
	output_type: str = "pil",
	**kwargs,
	) -> Union[PIL.Image.Image, np.ndarray, Dict]:
	if type(image) != np.ndarray:
	image = np.array(image.convert("RGB"))

	image = image.copy()
	original_height, original_width, _ = image.shape

	image = resize_image(image, target_resolution=detect_resolution)
	height, width, _ = image.shape

	candidates, scores = self.pose_estimation(image)

	pose = self._format_pose(candidates, scores, width, height)

	if not draw_pose:
	return pose

	pose_image = draw_pose(pose, height=height, width=width, **kwargs)
	pose_image = cv2.resize(pose_image, (original_width, original_height), cv2.INTER_LANCZOS4)

	if output_type == "pil":
	pose_image = PIL.Image.fromarray(pose_image)
	elif output_type == "np":
	pass
	else:
	raise ValueError("output_type should be 'pil' or 'np'")

	return pose_image, pose