Spaces:
Running
on
Zero
Running
on
Zero
COCO_ROOT = "path/to/COCO/" | |
MPII_ROOT = "path/to/MPII/" | |
AIC_ROOT = "path/to/AIC/" | |
OCHUMAN_ROOT = "path/to/OCHuman/" | |
BATCH_SIZE = 64 | |
COCO_NAME = "COCO" | |
MPII_NAME = "MPII" | |
AIC_NAME = "AIC" | |
OCHUMAN_NAME = "OCHuman" | |
_base_ = ['../_base_/default_runtime.py'] | |
# resume = True | |
load_from = "work_dirs/ViTb-multi/epoch_210.pth" | |
# runtime | |
train_cfg = dict(max_epochs=210, val_interval=5) | |
# optimizer | |
custom_imports = dict( | |
imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], | |
allow_failed_imports=False) | |
optim_wrapper = dict( | |
optimizer=dict( | |
type='AdamW', lr=5e-4*BATCH_SIZE/64, betas=(0.9, 0.999), weight_decay=0.1), | |
paramwise_cfg=dict( | |
num_layers=12, | |
layer_decay_rate=0.75, | |
custom_keys={ | |
'bias': dict(decay_multi=0.0), | |
'pos_embed': dict(decay_mult=0.0), | |
'relative_position_bias_table': dict(decay_mult=0.0), | |
'norm': dict(decay_mult=0.0), | |
}, | |
), | |
constructor='LayerDecayOptimWrapperConstructor', | |
clip_grad=dict(max_norm=1., norm_type=2), | |
) | |
# learning policy | |
param_scheduler = [ | |
dict( | |
type='LinearLR', begin=0, end=500, start_factor=0.001, | |
by_epoch=False), # warm-up | |
dict( | |
type='MultiStepLR', | |
begin=0, | |
end=210, | |
milestones=[170, 200], | |
gamma=0.1, | |
by_epoch=True) | |
] | |
# automatically scaling LR based on the actual training batch size | |
auto_scale_lr = dict(base_batch_size=512) | |
# hooks | |
default_hooks = dict( | |
checkpoint=dict(save_best='{}/AP'.format(COCO_NAME), rule='greater', max_keep_ckpts=1)) | |
# codec settings | |
codec = dict( | |
type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) | |
# model settings | |
model = dict( | |
type='TopdownPoseEstimator', | |
data_preprocessor=dict( | |
type='PoseDataPreprocessor', | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
bgr_to_rgb=True), | |
backbone=dict( | |
type='mmpretrain.VisionTransformer', | |
arch='base', | |
img_size=(256, 192), | |
patch_size=16, | |
qkv_bias=True, | |
drop_path_rate=0.3, | |
with_cls_token=False, | |
out_type='featmap', | |
patch_cfg=dict(padding=2), | |
init_cfg=None, | |
# init_cfg=dict( | |
# type='Pretrained', | |
# checkpoint='models/pretrained/mae_pretrain_vit_base_20230913.pth'), | |
), | |
head=dict( | |
type='HeatmapHead', | |
in_channels=768, | |
out_channels=21, | |
deconv_out_channels=(256, 256), | |
deconv_kernel_sizes=(4, 4), | |
loss=dict(type='KeypointMSELoss', use_target_weight=True), | |
decoder=codec), | |
test_cfg=dict( | |
flip_test=True, | |
flip_mode='heatmap', | |
shift_heatmap=False, | |
)) | |
# pipelines | |
train_pipeline = [ | |
dict(type='LoadImage'), | |
dict(type='GetBBoxCenterScale'), | |
dict( | |
type='MaskBackground', | |
prob=1.0, | |
continue_on_failure=False, | |
alpha=0.2, | |
dilate_prob=0.5, | |
dilate_amount=0.1, | |
erode_prob=0.5, | |
erode_amount=0.5, | |
), | |
dict(type='RandomFlip', direction='horizontal'), | |
dict(type='RandomHalfBody'), | |
dict(type='RandomBBoxTransform'), | |
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), | |
dict(type='GenerateTarget', encoder=codec), | |
dict(type='PackPoseInputs') | |
] | |
val_pipeline = [ | |
dict(type='LoadImage'), | |
dict(type='GetBBoxCenterScale'), | |
dict(type='MaskBackground', continue_on_failure=False, alpha=0.2), | |
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), | |
dict(type='PackPoseInputs') | |
] | |
# # base dataset settings | |
# data_root = TRAIN_ROOT | |
# val_data_root = VAL_ROOT | |
# dataset_type = 'CocoDataset' | |
# data_mode = 'topdown' | |
coco_train_dataset = dict( | |
type="CocoDataset", | |
data_root=COCO_ROOT, | |
data_mode="topdown", | |
ann_file='annotations/person_keypoints_train2017.json', | |
data_prefix=dict(img='train2017/'), | |
pipeline=[], | |
test_mode=False, | |
) | |
coco_val_dataset = dict( | |
type="CocoDataset", | |
data_root=COCO_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/person_keypoints_val2017.json", | |
bbox_file=COCO_ROOT + "/detections/rtmdet-l-ins-mask.json", | |
filter_cfg=dict(bbox_score_thr=0.3), | |
data_prefix=dict(img='val2017/'), | |
pipeline=[], | |
test_mode=True, | |
) | |
mpii_train_dataset = dict( | |
type="MpiiDataset", | |
data_root=MPII_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/mpii_sam_train.json", | |
data_prefix=dict(img='images/'), | |
pipeline=[], | |
test_mode=False, | |
) | |
mpii_val_dataset = dict( | |
type="MpiiDataset", | |
data_root=MPII_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/mpii_sam_val.json", | |
data_prefix=dict(img='images/'), | |
pipeline=[], | |
test_mode=True, | |
) | |
aic_train_dataset = dict( | |
type="AicDataset", | |
data_root=AIC_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/aic_sam_train.json", | |
data_prefix=dict(img='images/'), | |
pipeline=[], | |
test_mode=False, | |
) | |
aic_val_dataset = dict( | |
type="AicDataset", | |
data_root=AIC_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/aic_sam_val.json", | |
data_prefix=dict(img='images/'), | |
pipeline=[], | |
test_mode=True, | |
) | |
ochuman_val_dataset = dict( | |
type="OCHumanDataset", | |
data_root=OCHUMAN_ROOT, | |
data_mode="topdown", | |
ann_file="annotations/person_keypoints_val2017.json", | |
data_prefix=dict(img='val2017/'), | |
# bbox_file=OCHUMAN_ROOT + "/detections/rtmdet-l-ins.json", | |
# filter_cfg=dict(bbox_score_thr=0.3), | |
pipeline=[], | |
test_mode=True, | |
) | |
combined_val_dataset = dict( | |
type='CombinedDataset', | |
metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'), | |
datasets=[coco_val_dataset, mpii_val_dataset, aic_val_dataset, ochuman_val_dataset], | |
pipeline=val_pipeline, | |
test_mode=True, | |
keypoints_mapping=[ | |
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO | |
{0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18, | |
9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points | |
{0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16, | |
9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points | |
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for OCHuman as merged is based on COCO | |
], | |
) | |
combined_train_dataset = dict( | |
type='CombinedDataset', | |
metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'), | |
datasets=[coco_train_dataset, mpii_train_dataset, aic_train_dataset], | |
pipeline=train_pipeline, | |
test_mode=False, | |
keypoints_mapping=[ | |
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, | |
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO | |
{0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18, | |
9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points | |
{0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16, | |
9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points | |
], | |
) | |
# data loaders | |
train_dataloader = dict( | |
batch_size=BATCH_SIZE, | |
num_workers=8, | |
persistent_workers=True, | |
sampler=dict( | |
type='MultiSourceSampler', | |
batch_size=BATCH_SIZE, | |
source_ratio=[1, 1, 1], | |
shuffle=True, | |
), | |
dataset=combined_train_dataset, | |
) | |
val_dataloader = dict( | |
batch_size=128, | |
num_workers=8, | |
persistent_workers=True, | |
drop_last=False, | |
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | |
dataset=combined_val_dataset, | |
) | |
test_dataloader = val_dataloader | |
# evaluators | |
val_evaluator = dict( | |
type='MultiDatasetEvaluator', | |
metrics=[ | |
dict(type='CocoMetric', | |
ann_file=COCO_ROOT + 'annotations/person_keypoints_val2017.json', | |
prefix=COCO_NAME, | |
nms_mode='none', | |
outfile_prefix='COCO_MaskPose', | |
ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'], | |
), | |
dict(type='PCKAccuracy', | |
prefix=MPII_NAME, | |
), | |
dict(type='PCKAccuracy', | |
prefix=AIC_NAME, | |
), | |
dict(type='CocoMetric', | |
ann_file=OCHUMAN_ROOT + 'annotations/person_keypoints_val2017.json', | |
prefix=OCHUMAN_NAME, | |
outfile_prefix='ochuman', | |
nms_mode='none', | |
ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'], | |
), | |
], | |
datasets=combined_val_dataset['datasets'], | |
) | |
test_evaluator = val_evaluator | |