model: target: Models.interpretable_diffusion.gaussian_diffusion.Diffusion_TS params: seq_length: 24 feature_size: 14 n_layer_enc: 3 n_layer_dec: 2 d_model: 64 # 4 X 16 timesteps: 1000 sampling_timesteps: 1000 loss_type: 'l1' beta_schedule: 'cosine' n_heads: 4 mlp_hidden_times: 4 attn_pd: 0.0 resid_pd: 0.0 kernel_size: 1 padding_size: 0 solver: base_lr: 1.0e-5 max_epochs: 14000 results_folder: ./Checkpoints_mujoco gradient_accumulate_every: 2 save_cycle: 1400 # max_epochs // 10 ema: decay: 0.995 update_interval: 10 scheduler: target: engine.lr_sch.ReduceLROnPlateauWithWarmup params: factor: 0.5 patience: 3000 min_lr: 1.0e-5 threshold: 1.0e-1 threshold_mode: rel warmup_lr: 8.0e-4 warmup: 500 verbose: False dataloader: train_dataset: target: utils.data_utils.mujoco_dataset.MuJoCoDataset params: num: 10000 dim: 14 window: 24 # seq_length save2npy: True neg_one_to_one: True seed: 123 period: train test_dataset: target: utils.data_utils.mujoco_dataset.MuJoCoDataset params: num: 1000 dim: 14 window: 24 # seq_length save2npy: True neg_one_to_one: True seed: 123 style: separate period: test distribution: geometric coefficient: 1.0e-2 step_size: 5.0e-2 sampling_steps: 250 batch_size: 128 sample_size: 256 shuffle: True