Miroslav Purkrabek
add code
a249588
COCO_ROOT = "path/to/COCO/"
MPII_ROOT = "path/to/MPII/"
AIC_ROOT = "path/to/AIC/"
OCHUMAN_ROOT = "path/to/OCHuman/"
BATCH_SIZE = 64
COCO_NAME = "COCO"
MPII_NAME = "MPII"
AIC_NAME = "AIC"
OCHUMAN_NAME = "OCHuman"
_base_ = ['../_base_/default_runtime.py']
# resume = True
load_from = "work_dirs/ViTb-multi/epoch_210.pth"
# runtime
train_cfg = dict(max_epochs=210, val_interval=5)
# optimizer
custom_imports = dict(
imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
allow_failed_imports=False)
optim_wrapper = dict(
optimizer=dict(
type='AdamW', lr=5e-4*BATCH_SIZE/64, betas=(0.9, 0.999), weight_decay=0.1),
paramwise_cfg=dict(
num_layers=12,
layer_decay_rate=0.75,
custom_keys={
'bias': dict(decay_multi=0.0),
'pos_embed': dict(decay_mult=0.0),
'relative_position_bias_table': dict(decay_mult=0.0),
'norm': dict(decay_mult=0.0),
},
),
constructor='LayerDecayOptimWrapperConstructor',
clip_grad=dict(max_norm=1., norm_type=2),
)
# learning policy
param_scheduler = [
dict(
type='LinearLR', begin=0, end=500, start_factor=0.001,
by_epoch=False), # warm-up
dict(
type='MultiStepLR',
begin=0,
end=210,
milestones=[170, 200],
gamma=0.1,
by_epoch=True)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# hooks
default_hooks = dict(
checkpoint=dict(save_best='{}/AP'.format(COCO_NAME), rule='greater', max_keep_ckpts=1))
# codec settings
codec = dict(
type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='mmpretrain.VisionTransformer',
arch='base',
img_size=(256, 192),
patch_size=16,
qkv_bias=True,
drop_path_rate=0.3,
with_cls_token=False,
out_type='featmap',
patch_cfg=dict(padding=2),
init_cfg=None,
# init_cfg=dict(
# type='Pretrained',
# checkpoint='models/pretrained/mae_pretrain_vit_base_20230913.pth'),
),
head=dict(
type='HeatmapHead',
in_channels=768,
out_channels=21,
deconv_out_channels=(256, 256),
deconv_kernel_sizes=(4, 4),
loss=dict(type='KeypointMSELoss', use_target_weight=True),
decoder=codec),
test_cfg=dict(
flip_test=True,
flip_mode='heatmap',
shift_heatmap=False,
))
# pipelines
train_pipeline = [
dict(type='LoadImage'),
dict(type='GetBBoxCenterScale'),
dict(
type='MaskBackground',
prob=1.0,
continue_on_failure=False,
alpha=0.2,
dilate_prob=0.5,
dilate_amount=0.1,
erode_prob=0.5,
erode_amount=0.5,
),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage'),
dict(type='GetBBoxCenterScale'),
dict(type='MaskBackground', continue_on_failure=False, alpha=0.2),
dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
dict(type='PackPoseInputs')
]
# # base dataset settings
# data_root = TRAIN_ROOT
# val_data_root = VAL_ROOT
# dataset_type = 'CocoDataset'
# data_mode = 'topdown'
coco_train_dataset = dict(
type="CocoDataset",
data_root=COCO_ROOT,
data_mode="topdown",
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=[],
test_mode=False,
)
coco_val_dataset = dict(
type="CocoDataset",
data_root=COCO_ROOT,
data_mode="topdown",
ann_file="annotations/person_keypoints_val2017.json",
bbox_file=COCO_ROOT + "/detections/rtmdet-l-ins-mask.json",
filter_cfg=dict(bbox_score_thr=0.3),
data_prefix=dict(img='val2017/'),
pipeline=[],
test_mode=True,
)
mpii_train_dataset = dict(
type="MpiiDataset",
data_root=MPII_ROOT,
data_mode="topdown",
ann_file="annotations/mpii_sam_train.json",
data_prefix=dict(img='images/'),
pipeline=[],
test_mode=False,
)
mpii_val_dataset = dict(
type="MpiiDataset",
data_root=MPII_ROOT,
data_mode="topdown",
ann_file="annotations/mpii_sam_val.json",
data_prefix=dict(img='images/'),
pipeline=[],
test_mode=True,
)
aic_train_dataset = dict(
type="AicDataset",
data_root=AIC_ROOT,
data_mode="topdown",
ann_file="annotations/aic_sam_train.json",
data_prefix=dict(img='images/'),
pipeline=[],
test_mode=False,
)
aic_val_dataset = dict(
type="AicDataset",
data_root=AIC_ROOT,
data_mode="topdown",
ann_file="annotations/aic_sam_val.json",
data_prefix=dict(img='images/'),
pipeline=[],
test_mode=True,
)
ochuman_val_dataset = dict(
type="OCHumanDataset",
data_root=OCHUMAN_ROOT,
data_mode="topdown",
ann_file="annotations/person_keypoints_val2017.json",
data_prefix=dict(img='val2017/'),
# bbox_file=OCHUMAN_ROOT + "/detections/rtmdet-l-ins.json",
# filter_cfg=dict(bbox_score_thr=0.3),
pipeline=[],
test_mode=True,
)
combined_val_dataset = dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'),
datasets=[coco_val_dataset, mpii_val_dataset, aic_val_dataset, ochuman_val_dataset],
pipeline=val_pipeline,
test_mode=True,
keypoints_mapping=[
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO
{0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18,
9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points
{0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16,
9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for OCHuman as merged is based on COCO
],
)
combined_train_dataset = dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/merged_COCO_AIC_MPII.py'),
datasets=[coco_train_dataset, mpii_train_dataset, aic_train_dataset],
pipeline=train_pipeline,
test_mode=False,
keypoints_mapping=[
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16}, # Identity mapping for COCO as merged is based on COCO
{0: 16, 1: 14, 2: 12, 3: 11, 4: 13, 5: 15, 6: 20, 7: 17, 8: 18,
9: 19, 10: 10, 11: 8, 12: 6, 13: 5, 14: 7, 15: 9}, # MPII -> COCO and additional points
{0: 6, 1: 8, 2: 10, 3: 5, 4: 7, 5: 9, 6: 12, 7: 14, 8: 16,
9: 11, 10: 13, 11: 15, 12: 19, 13: 17}, # AIC -> COCO and additional points
],
)
# data loaders
train_dataloader = dict(
batch_size=BATCH_SIZE,
num_workers=8,
persistent_workers=True,
sampler=dict(
type='MultiSourceSampler',
batch_size=BATCH_SIZE,
source_ratio=[1, 1, 1],
shuffle=True,
),
dataset=combined_train_dataset,
)
val_dataloader = dict(
batch_size=128,
num_workers=8,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=combined_val_dataset,
)
test_dataloader = val_dataloader
# evaluators
val_evaluator = dict(
type='MultiDatasetEvaluator',
metrics=[
dict(type='CocoMetric',
ann_file=COCO_ROOT + 'annotations/person_keypoints_val2017.json',
prefix=COCO_NAME,
nms_mode='none',
outfile_prefix='COCO_MaskPose',
ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'],
),
dict(type='PCKAccuracy',
prefix=MPII_NAME,
),
dict(type='PCKAccuracy',
prefix=AIC_NAME,
),
dict(type='CocoMetric',
ann_file=OCHUMAN_ROOT + 'annotations/person_keypoints_val2017.json',
prefix=OCHUMAN_NAME,
outfile_prefix='ochuman',
nms_mode='none',
ignore_stats=['AP .5', 'AP .75', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'],
),
],
datasets=combined_val_dataset['datasets'],
)
test_evaluator = val_evaluator