|
_wandb: |
|
value: |
|
cli_version: 0.20.1 |
|
m: |
|
- "1": step/ppo-update-after |
|
"6": |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_actor_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/levels_cheese_is_in_corner_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": step/env-step-all-after |
|
"6": |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/prop_walls_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/prev_batch_level_ids_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_actor_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_advantage |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_critic_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/cheese-corne_dist_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/prop_walls_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_critic_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/num_walls_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/avg_benchmark_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/cheese-corne_dist_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_grad_norm_pre_clip |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/solvable_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/scoring/scores_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/first_visit_time_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_critic_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_critic_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_entropy |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/lvl_benchmark_regret_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/last_visit_time_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/eta |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_actor_approxkl3 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/avg_benchmark_regret |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/prop_walls_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_critic_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/lvl_benchmark_regret_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/mouse-cheese_dist_solvable_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/num_replay_batches |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_grad_norm_pre_clip |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/avg_benchmark_regret |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/avg_benchmark_regret |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/solvable_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/avg_first_visit_time |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_actor_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/cheese-corner_dist_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/visit_patterns/avg_last_visit_time |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_actor_approxkl1 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/solvable_num |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/num_walls_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/avg_benchmark_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/avg_benchmark_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/mouse-cheese_dist_finite_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/mouse-cheese_dist_finite_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/num_walls_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_entropy |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_advantage |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/shift_proportion |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_entropy |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_critic_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/lvl_benchmark_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/mouse-cheese_dist_finite_avg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/distances/mouse-cheese_dist_solvable_wavg |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_actor_approxkl3 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_actor_approxkl3 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_advantage |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/lvl_benchmark_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_actor_clipfrac |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/avg_benchmark_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/std/std_actor_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/lvl_benchmark_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/proxy_corner/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/shift_proportion |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/scoring/avg_scores |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_actor_approxkl1 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_actor_loss |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/avg_grad_norm_pre_clip |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/avg_benchmark_regret |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-tree/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/lvl_benchmark_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/avg_avg_return |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-all/proxy_corner/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ued/layout/levels16_img |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": ppo/max/max_actor_approxkl1 |
|
"5": 1 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/lvl_benchmark_regret_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/avg_avg_episode_length |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/rollouts_gif |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-orig/proxy_corner/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/lvl_avg_episode_length_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-shift/lvl_benchmark_regret_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/shift_proportion |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/avg_reward_per_step |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-replay/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": eval-batch-train/proxy_corner/lvl_avg_return_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
- "1": train-generate/lvl_reward_per_step_hist |
|
"5": 4 |
|
"6": |
|
- 1 |
|
- 3 |
|
"7": [] |
|
python_version: 3.11.11 |
|
t: |
|
"1": |
|
- 12 |
|
- 45 |
|
"2": |
|
- 12 |
|
- 45 |
|
"3": |
|
- 2 |
|
- 3 |
|
- 7 |
|
- 13 |
|
- 16 |
|
- 55 |
|
- 61 |
|
"4": 3.11.11 |
|
"5": 0.20.1 |
|
"12": 0.20.1 |
|
"13": linux-x86_64 |
|
chain_mutate: |
|
value: true |
|
checkpointing: |
|
value: true |
|
clipping: |
|
value: false |
|
console_log: |
|
value: true |
|
env_corner_size: |
|
value: 1 |
|
env_layout: |
|
value: tree |
|
env_penalize_time: |
|
value: false |
|
env_size: |
|
value: 13 |
|
env_terminate_after_corner: |
|
value: false |
|
evals_num_env_steps: |
|
value: 512 |
|
evals_num_levels: |
|
value: 256 |
|
gif_grid_width: |
|
value: 16 |
|
img_level_of_detail: |
|
value: 1 |
|
keep_all_checkpoints: |
|
value: true |
|
level_splayer: |
|
value: mouse |
|
log_gifs: |
|
value: true |
|
log_hists: |
|
value: false |
|
log_imgs: |
|
value: true |
|
max_num_checkpoints: |
|
value: 1 |
|
mutate_cheese: |
|
value: true |
|
net_cnn_type: |
|
value: large |
|
net_rnn_type: |
|
value: ff |
|
net_width: |
|
value: 256 |
|
num_cycles_per_big_eval: |
|
value: 1024 |
|
num_cycles_per_checkpoint: |
|
value: 64 |
|
num_cycles_per_eval: |
|
value: 32 |
|
num_cycles_per_gifs: |
|
value: 1024 |
|
num_cycles_per_log: |
|
value: 32 |
|
num_env_steps_per_cycle: |
|
value: 128 |
|
num_epochs_per_cycle: |
|
value: 5 |
|
num_minibatches_per_epoch: |
|
value: 4 |
|
num_mutate_steps: |
|
value: 12 |
|
num_parallel_envs: |
|
value: 256 |
|
num_total_env_steps: |
|
value: 200000000 |
|
num_train_levels: |
|
value: 2048 |
|
obs_level_of_detail: |
|
value: 0 |
|
plr_buffer_size: |
|
value: 4096 |
|
plr_prob_replay: |
|
value: 0.5 |
|
plr_proxy_shaping: |
|
value: false |
|
plr_proxy_shaping_coeff: |
|
value: 0.5 |
|
plr_regret_estimator: |
|
value: maxmc-actor |
|
plr_robust: |
|
value: false |
|
plr_staleness_coeff: |
|
value: 0.1 |
|
plr_temperature: |
|
value: 0.1 |
|
ppo_clip_eps: |
|
value: 0.1 |
|
ppo_critic_coeff: |
|
value: 0.5 |
|
ppo_entropy_coeff: |
|
value: 0.001 |
|
ppo_gae_lambda: |
|
value: 0.95 |
|
ppo_gamma: |
|
value: 0.999 |
|
ppo_lr: |
|
value: 5e-05 |
|
ppo_lr_annealing: |
|
value: false |
|
ppo_max_grad_norm: |
|
value: 0.5 |
|
ppo_proxy_critic_coeff: |
|
value: 0.5 |
|
prob_mutate_shift: |
|
value: 0 |
|
prob_shift: |
|
value: 1 |
|
proxy_name: |
|
value: proxy_corner |
|
seed: |
|
value: 42 |
|
train_proxy_critic: |
|
value: false |
|
ued: |
|
value: plr |
|
use_fixed_eval_levels: |
|
value: false |
|
wall_prob: |
|
value: 0.1 |
|
|