smog

File size: 10,983 Bytes

fe64bad

# # -*- coding: utf-8 -*-
#
# # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
# # holder of all proprietary rights on this computer program.
# # You can only use this computer program if you have closed
# # a license agreement with MPG or you get the right to use the computer
# # program from someone who is authorized to grant you that right.
# # Any use of the computer program without a valid license is prohibited and
# # liable to prosecution.
# #
# # Copyright©2019 Max-Planck-Gesellschaft zur Förderung
# # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
# # for Intelligent Systems. All rights reserved.
# #
# # Contact: ps-license@tuebingen.mpg.de
#
# import torch
# import joblib
# import numpy as np
# import os.path as osp
# from torch.utils.data import Dataset
#
# # from lib.core.config import VIBE_DB_DIR
# VIBE_DB_DIR = '../VIBE/data/vibe_db'
# # from lib.data_utils.img_utils import split_into_chunks
#
# def split_into_chunks(vid_names, seqlen, stride):
#     video_start_end_indices = []
#
#     video_names, group = np.unique(vid_names, return_index=True)
#     perm = np.argsort(group)
#     video_names, group = video_names[perm], group[perm]
#
#     indices = np.split(np.arange(0, vid_names.shape[0]), group[1:])
#
#     for idx in range(len(video_names)):
#         indexes = indices[idx]
#         if indexes.shape[0] < seqlen:
#             continue
#         chunks = view_as_windows(indexes, (seqlen,), step=stride)
#         start_finish = chunks[:, (0, -1)].tolist()
#         video_start_end_indices += start_finish
#
#     return video_start_end_indices
#
# class AMASS(Dataset):
#     def __init__(self, seqlen):
#         self.seqlen = seqlen
#
#         self.stride = seqlen
#
#         self.db = self.load_db()
#         self.vid_indices = split_into_chunks(self.db['vid_name'], self.seqlen, self.stride)
#         del self.db['vid_name']
#         print(f'AMASS dataset number of videos: {len(self.vid_indices)}')
#
#     def __len__(self):
#         return len(self.vid_indices)
#
#     def __getitem__(self, index):
#         return self.get_single_item(index)
#
#     def load_db(self):
#         db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt')
#         db = joblib.load(db_file)
#         return db
#
#     def get_single_item(self, index):
#         start_index, end_index = self.vid_indices[index]
#         thetas = self.db['theta'][start_index:end_index+1]
#
#         cam = np.array([1., 0., 0.])[None, ...]
#         cam = np.repeat(cam, thetas.shape[0], axis=0)
#         theta = np.concatenate([cam, thetas], axis=-1)
#
#         target = {
#             'theta': torch.from_numpy(theta).float(),  # cam, pose and shape
#         }
#         return target

import os
import numpy as np
import joblib
from .dataset import Dataset
from src.config import ROT_CONVENTION_TO_ROT_NUMBER
from src import config
from PIL import Image
import sys

sys.path.append('')

# action2motion_joints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 21, 24, 38]
# change 0 and 8
action2motion_joints = [8, 1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 21, 24, 38]  # [18,]

from src.utils.action_label_to_idx import action_label_to_idx, idx_to_action_label


def get_z(cam_s, cam_pos, joints, img_size, flength):
    """

    Solves for the depth offset of the model to approx. orth with persp camera.

    """
    # Translate the model itself: Solve the best z that maps to orth_proj points
    joints_orth_target = (cam_s * (joints[:, :2] + cam_pos) + 1) * 0.5 * img_size
    height3d = np.linalg.norm(np.max(joints[:, :2], axis=0) - np.min(joints[:, :2], axis=0))
    height2d = np.linalg.norm(np.max(joints_orth_target, axis=0) - np.min(joints_orth_target, axis=0))
    tz = np.array(flength * (height3d / height2d))
    return float(tz)


def get_trans_from_vibe(vibe, use_z=True):
    alltrans = []
    for t in range(vibe["joints3d"].shape[0]):
        # Convert crop cam to orig cam
        # No need! Because `convert_crop_cam_to_orig_img` from demoutils of vibe
        # does this already for us :)
        # Its format is: [sx, sy, tx, ty]
        cam_orig = vibe["orig_cam"][t]
        x = cam_orig[2]
        y = cam_orig[3]
        if use_z:
            z = get_z(cam_s=cam_orig[0],  # TODO: There are two scales instead of 1.
                      cam_pos=cam_orig[2:4],
                      joints=vibe['joints3d'][t],
                      img_size=480,
                      flength=500)
            # z = 500 / (0.5 * 480 * cam_orig[0])
        else:
            z = 0
        trans = [x, y, z]
        alltrans.append(trans)
    alltrans = np.array(alltrans)
    return alltrans - alltrans[0]


class AMASS(Dataset):
    dataname = "amass"

    def __init__(self, datapath="data/amass/amass_30fps_legacy_db.pt", split="train", use_z=1, **kwargs):
        assert '_db.pt' in datapath
        self.datapath = datapath.replace('_db.pt', '_{}.pt'.format(split))
        assert os.path.exists(self.datapath)
        print('datapath used by amass is [{}]'.format(self.datapath))
        super().__init__(**kwargs)

        self.dataname = "amass"

        # FIXME - hardcoded:
        self.rot_convention = 'legacy'
        self.use_betas = False
        self.use_gender = False
        self.use_body_features = False
        if 'clip_preprocess' in kwargs.keys():
            self.clip_preprocess = kwargs['clip_preprocess']

        self.use_z = (use_z != 0)

        # keep_actions = [6, 7, 8, 9, 22, 23, 24, 38, 80, 93, 99, 100, 102]
        dummy_class = [0]
        genders = config.GENDERS
        self.num_classes = len(dummy_class)

        self.db = self.load_db()
        self._joints3d = []
        self._poses = []
        self._num_frames_in_video = []
        self._actions = []
        self._betas = []
        self._genders = []
        self._heights = []
        self._masses = []
        self._clip_images = []
        self._clip_texts = []
        self._clip_pathes = []
        self._actions_cat = []
        self.clip_label_text = "text_raw_labels"  # "text_proc_labels"

        seq_len = 100
        n_sequences = len(self.db['thetas'])
        # split sequences
        for seq_idx in range(n_sequences):
            n_sub_seq = self.db['thetas'][seq_idx].shape[0] // seq_len
            if n_sub_seq == 0: continue
            n_frames_in_use = n_sub_seq * seq_len
            joints3d = np.split(self.db['joints3d'][seq_idx][:n_frames_in_use], n_sub_seq)
            poses = np.split(self.db['thetas'][seq_idx][:n_frames_in_use], n_sub_seq)
            self._joints3d.extend(joints3d)
            self._poses.extend(poses)
            self._num_frames_in_video.extend([seq_len] * n_sub_seq)

            if 'action_cat' in self.db:
                self._actions_cat.extend(np.split(self.db['action_cat'][seq_idx][:n_frames_in_use], n_sub_seq))

            if self.use_betas:
                self._betas.extend(np.split(self.db['betas'][seq_idx][:n_frames_in_use], n_sub_seq))
            if self.use_gender:
                self._genders.extend([str(self.db['genders'][seq_idx]).replace("b'female'", "female").replace("b'male'",
                                                                                                              "male")] * n_sub_seq)
            if self.use_body_features:
                self._heights.extend([self.db['heights'][seq_idx]] * n_sub_seq)
                self._masses.extend([self.db['masses'][seq_idx]] * n_sub_seq)
            if 'clip_images' in self.db.keys():
                images = [np.squeeze(e) for e in np.split(self.db['clip_images'][seq_idx][:n_sub_seq], n_sub_seq)]
                processed_images = [self.clip_preprocess(Image.fromarray(img)) for img in images]
                self._clip_images.extend(processed_images)
            if self.clip_label_text in self.db:
                self._clip_texts.extend(np.split(self.db[self.clip_label_text][seq_idx][:n_frames_in_use], n_sub_seq))
            if 'clip_pathes' in self.db:
                self._clip_pathes.extend(np.split(self.db['clip_pathes'][seq_idx][:n_sub_seq], n_sub_seq))
            if 'clip_images_emb' in self.db.keys():
                self._clip_images_emb.extend(np.split(self.db['clip_images_emb'][seq_idx][:n_sub_seq], n_sub_seq))



            actions = [0] * n_sub_seq
            self._actions.extend(actions)

        assert len(self._num_frames_in_video) == len(self._poses) == len(self._joints3d) == len(self._actions)
        if self.use_betas:
            assert len(self._poses) == len(self._betas)
        if self.use_gender:
            assert len(self._poses) == len(self._genders)
        if 'clip_images' in self.db.keys():
            assert len(self._poses) == len(self._clip_images)

        self._actions = np.array(self._actions)
        self._num_frames_in_video = np.array(self._num_frames_in_video)

        N = len(self._poses)
        # same set for training and testing
        self._train = np.arange(N)
        self._test = np.arange(N)

        self._action_to_label = {x: i for i, x in enumerate(dummy_class)}
        self._label_to_action = {i: x for i, x in enumerate(dummy_class)}

        self._gender_to_label = {x: i for i, x in enumerate(genders)}
        self._label_to_gender = {i: x for i, x in enumerate(genders)}

        self._action_classes = idx_to_action_label

    def load_db(self):
        # Load amass dataset encoded to a .db file
        # The loaded data is structured:
        # {
        #     'theta': [data_size, 82] (float64) (structured [pose(72), betas(10)])
        #     'vid_name': [data_size] (str)
        # }
        # data_size should be [16275369]
        db_file = self.datapath
        db = joblib.load(db_file)

        if 'clip_images' in db and db['clip_images'][0] is None:  # No images added
            del db['clip_images']

        return db

    def _load_joints3D(self, ind, frame_ix):
        joints3D = self._joints3d[ind][frame_ix]
        return joints3D

    def _load_rotvec(self, ind, frame_ix):
        pose = self._poses[ind][frame_ix, :].reshape(-1, ROT_CONVENTION_TO_ROT_NUMBER[self.rot_convention] + 1,
                                                     3)  # +1 for global orientation
        return pose

    def _load_betas(self, ind, frame_ix):
        betas = self._betas[ind][frame_ix].transpose((1, 0))
        return betas

    def _load_gender(self, ind, frame_ix):
        gender = self._gender_to_label[self._genders[ind]]
        return gender

    def _load_body_features(self, ind, frame_ix):
        return {'mass': float(self._masses[ind]), 'height': float(self._heights[ind])}


if __name__ == "__main__":
    dataset = AMASS()