model: target: Models.interpretable_diffusion.gaussian_diffusion.Diffusion_TS params: seq_length: 100 feature_size: 14 n_layer_enc: 3 n_layer_dec: 3 d_model: 64 # 4 X 16 timesteps: 500 sampling_timesteps: 500 loss_type: 'l1' beta_schedule: 'cosine' n_heads: 4 mlp_hidden_times: 4 attn_pd: 0 resid_pd: 0 kernel_size: 1 padding_size: 0 solver: base_lr: 1.0e-5 max_epochs: 12000 results_folder: ./Checkpoints_mujoco_sssd gradient_accumulate_every: 2 save_cycle: 1200 # max_epochs // 10 ema: decay: 0.995 update_interval: 10 scheduler: target: engine.lr_sch.ReduceLROnPlateauWithWarmup params: factor: 0.5 patience: 3000 min_lr: 1.0e-5 threshold: 1.0e-1 threshold_mode: rel warmup_lr: 8.0e-4 warmup: 500 verbose: False