davidquarel's picture
Upload folder using huggingface_hub
9552422 verified
_wandb:
value:
cli_version: 0.20.1
m:
- "1": step/ppo-update-after
"6":
- 3
"7": []
- "1": ppo/std/std_actor_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": ued/distances/levels_cheese_is_in_corner_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": step/env-step-all-after
"6":
- 3
"7": []
- "1": ued/layout/prop_walls_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/prev_batch_level_ids_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_actor_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_advantage
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/avg_critic_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/cheese-corne_dist_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/prop_walls_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_critic_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/num_walls_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/avg_benchmark_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/cheese-corne_dist_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_grad_norm_pre_clip
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/solvable_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/scoring/scores_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/first_visit_time_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_critic_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_critic_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_entropy
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/lvl_benchmark_regret_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/last_visit_time_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/eta
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_actor_approxkl3
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/avg_benchmark_regret
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/prop_walls_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_critic_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/lvl_benchmark_regret_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/mouse-cheese_dist_solvable_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/num_replay_batches
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_grad_norm_pre_clip
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/avg_benchmark_regret
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/avg_benchmark_regret
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/solvable_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/avg_first_visit_time
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_actor_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": ued/distances/cheese-corner_dist_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/visit_patterns/avg_last_visit_time
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_actor_approxkl1
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/solvable_num
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/num_walls_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/avg_benchmark_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/avg_benchmark_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/mouse-cheese_dist_finite_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/mouse-cheese_dist_finite_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/num_walls_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_entropy
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_advantage
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": train-replay/shift_proportion
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_entropy
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_critic_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/lvl_benchmark_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/mouse-cheese_dist_finite_avg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/distances/mouse-cheese_dist_solvable_wavg
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_actor_approxkl3
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_actor_approxkl3
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_advantage
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/lvl_benchmark_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_actor_clipfrac
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/avg_benchmark_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/std/std_actor_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/lvl_benchmark_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/proxy_corner/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/shift_proportion
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/scoring/avg_scores
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/avg_actor_approxkl1
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/avg_actor_loss
"5": 1
"6":
- 1
- 3
"7": []
- "1": ppo/avg_grad_norm_pre_clip
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/avg_benchmark_regret
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-tree/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/lvl_benchmark_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/avg_avg_return
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-all/proxy_corner/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": ued/layout/levels16_img
"5": 4
"6":
- 1
- 3
"7": []
- "1": ppo/max/max_actor_approxkl1
"5": 1
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/lvl_benchmark_regret_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/avg_avg_episode_length
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/rollouts_gif
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-orig/proxy_corner/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/lvl_avg_episode_length_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-shift/lvl_benchmark_regret_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/shift_proportion
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/avg_reward_per_step
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-replay/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": eval-batch-train/proxy_corner/lvl_avg_return_hist
"5": 4
"6":
- 1
- 3
"7": []
- "1": train-generate/lvl_reward_per_step_hist
"5": 4
"6":
- 1
- 3
"7": []
python_version: 3.11.11
t:
"1":
- 12
- 45
"2":
- 12
- 45
"3":
- 2
- 3
- 7
- 13
- 16
- 55
- 61
"4": 3.11.11
"5": 0.20.1
"12": 0.20.1
"13": linux-x86_64
chain_mutate:
value: true
checkpointing:
value: true
clipping:
value: false
console_log:
value: true
env_corner_size:
value: 1
env_layout:
value: tree
env_penalize_time:
value: false
env_size:
value: 13
env_terminate_after_corner:
value: false
evals_num_env_steps:
value: 512
evals_num_levels:
value: 256
gif_grid_width:
value: 16
img_level_of_detail:
value: 1
keep_all_checkpoints:
value: true
level_splayer:
value: mouse
log_gifs:
value: true
log_hists:
value: false
log_imgs:
value: true
max_num_checkpoints:
value: 1
mutate_cheese:
value: true
net_cnn_type:
value: large
net_rnn_type:
value: ff
net_width:
value: 256
num_cycles_per_big_eval:
value: 1024
num_cycles_per_checkpoint:
value: 64
num_cycles_per_eval:
value: 32
num_cycles_per_gifs:
value: 1024
num_cycles_per_log:
value: 32
num_env_steps_per_cycle:
value: 128
num_epochs_per_cycle:
value: 5
num_minibatches_per_epoch:
value: 4
num_mutate_steps:
value: 12
num_parallel_envs:
value: 256
num_total_env_steps:
value: 200000000
num_train_levels:
value: 2048
obs_level_of_detail:
value: 0
plr_buffer_size:
value: 4096
plr_prob_replay:
value: 0.5
plr_proxy_shaping:
value: false
plr_proxy_shaping_coeff:
value: 0.5
plr_regret_estimator:
value: maxmc-actor
plr_robust:
value: false
plr_staleness_coeff:
value: 0.1
plr_temperature:
value: 0.1
ppo_clip_eps:
value: 0.1
ppo_critic_coeff:
value: 0.5
ppo_entropy_coeff:
value: 0.001
ppo_gae_lambda:
value: 0.95
ppo_gamma:
value: 0.999
ppo_lr:
value: 5e-05
ppo_lr_annealing:
value: false
ppo_max_grad_norm:
value: 0.5
ppo_proxy_critic_coeff:
value: 0.5
prob_mutate_shift:
value: 0
prob_shift:
value: 1
proxy_name:
value: proxy_corner
seed:
value: 42
train_proxy_critic:
value: false
ued:
value: plr
use_fixed_eval_levels:
value: false
wall_prob:
value: 0.1