{ "trainer": { "dict_class": "VSAEMultiGaussian", "trainer_class": "VSAEMultiGaussianTrainer", "activation_dim": 2048, "dict_size": 8192, "lr": 0.001, "kl_coeff": 50, "warmup_steps": 500, "sparsity_warmup_steps": 500, "corr_rate": 0.0, "var_flag": 0, "steps": 10000, "decay_start": 8000, "use_april_update_mode": true, "seed": null, "device": "cuda", "layer": 0, "lm_name": "gelu-1l", "wandb_name": "VSAEMulti_gelu-1l_d8192_lr0.001_kl50_corr0.0_trainer_0", "submodule_name": null }, "buffer": { "d_submodule": 2048, "n_ctxs": 3000, "ctx_len": 128, "refresh_batch_size": 32, "out_batch_size": 1024, "device": "cuda" } }