{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22399944000139999, "eval_steps": 500, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015101841517857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 606.2172241210938, "completions/mean_terminated_length": 552.70703125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.1384008526802063, "learning_rate": 2e-07, "loss": -0.0147, "num_tokens": 40532698.0, "reward": 0.4949602484703064, "reward_std": 0.2629406154155731, "rewards/simpleverify_reward/mean": 0.4949602484703064, "rewards/simpleverify_reward/std": 0.4999788999557495, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00018666620000116666, "grad_norm": 0.13450804352760315, "learning_rate": 2e-07, "loss": 0.0449, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00027999930000175, "grad_norm": 0.1348700374364853, "learning_rate": 2e-07, "loss": 0.0278, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0003733324000023333, "grad_norm": 0.1597312092781067, "learning_rate": 2e-07, "loss": -0.0179, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0004666655000029167, "grad_norm": 0.1616683453321457, "learning_rate": 2e-07, "loss": 0.0235, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0005599986000035, "grad_norm": 0.1294235736131668, "learning_rate": 2e-07, "loss": -0.0039, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0006533317000040833, "grad_norm": 0.12849809229373932, "learning_rate": 2e-07, "loss": 0.0026, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0007466648000046666, "grad_norm": 0.12978479266166687, "learning_rate": 2e-07, "loss": -0.0078, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00083999790000525, "grad_norm": 0.13568826019763947, "learning_rate": 2e-07, "loss": 0.04, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0009333310000058334, "grad_norm": 0.14199160039424896, "learning_rate": 2e-07, "loss": 0.0345, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0010266641000064166, "grad_norm": 0.14209267497062683, "learning_rate": 2e-07, "loss": 0.0078, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.001119997200007, "grad_norm": 0.14880694448947906, "learning_rate": 2e-07, "loss": 0.0261, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0012133303000075833, "grad_norm": 0.14299704134464264, "learning_rate": 2e-07, "loss": -0.0346, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0013066634000081666, "grad_norm": 0.14938555657863617, "learning_rate": 2e-07, "loss": 0.0212, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00139999650000875, "grad_norm": 0.14810962975025177, "learning_rate": 2e-07, "loss": -0.0232, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0014933296000093333, "grad_norm": 0.14529141783714294, "learning_rate": 2e-07, "loss": 0.0658, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0015866627000099165, "grad_norm": 0.1455274224281311, "learning_rate": 2e-07, "loss": -0.0176, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0016799958000105, "grad_norm": 0.16608372330665588, "learning_rate": 2e-07, "loss": 0.0133, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0017733289000110833, "grad_norm": 0.15505066514015198, "learning_rate": 2e-07, "loss": 0.0161, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0018666620000116667, "grad_norm": 0.14904393255710602, "learning_rate": 2e-07, "loss": 0.0315, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00195999510001225, "grad_norm": 0.14434808492660522, "learning_rate": 2e-07, "loss": 0.035, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.002053328200012833, "grad_norm": 0.1571751832962036, "learning_rate": 2e-07, "loss": -0.017, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0021466613000134167, "grad_norm": 0.16357989609241486, "learning_rate": 2e-07, "loss": 0.0199, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.002239994400014, "grad_norm": 0.18290925025939941, "learning_rate": 2e-07, "loss": 0.0082, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.002333327500014583, "grad_norm": 0.1514730155467987, "learning_rate": 2e-07, "loss": 0.0354, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0024266606000151666, "grad_norm": 0.14194783568382263, "learning_rate": 2e-07, "loss": 0.0168, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00251999370001575, "grad_norm": 0.1521742343902588, "learning_rate": 2e-07, "loss": 0.0935, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.002613326800016333, "grad_norm": 0.13186417520046234, "learning_rate": 2e-07, "loss": -0.0332, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0027066599000169166, "grad_norm": 0.20116032660007477, "learning_rate": 2e-07, "loss": 0.006, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0027999930000175, "grad_norm": 0.1515357494354248, "learning_rate": 2e-07, "loss": -0.0256, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.002893326100018083, "grad_norm": 0.15172889828681946, "learning_rate": 2e-07, "loss": 0.019, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0029866592000186666, "grad_norm": 0.15013325214385986, "learning_rate": 2e-07, "loss": -0.0013, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00307999230001925, "grad_norm": 0.15168893337249756, "learning_rate": 2e-07, "loss": 0.0498, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.003173325400019833, "grad_norm": 0.18933169543743134, "learning_rate": 2e-07, "loss": 0.0524, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0032666585000204165, "grad_norm": 0.16392450034618378, "learning_rate": 2e-07, "loss": 0.0415, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.003359991600021, "grad_norm": 0.19064961373806, "learning_rate": 2e-07, "loss": 0.0346, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0034533247000215835, "grad_norm": 0.1580137312412262, "learning_rate": 2e-07, "loss": 0.0544, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0035466578000221665, "grad_norm": 0.19043877720832825, "learning_rate": 2e-07, "loss": 0.0709, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00363999090002275, "grad_norm": 0.204293355345726, "learning_rate": 2e-07, "loss": 0.0375, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0037333240000233334, "grad_norm": 0.16858668625354767, "learning_rate": 2e-07, "loss": -0.0164, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0038266571000239165, "grad_norm": 0.16858167946338654, "learning_rate": 2e-07, "loss": 0.0138, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0039199902000245, "grad_norm": 0.1629207730293274, "learning_rate": 2e-07, "loss": 0.0197, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004013323300025083, "grad_norm": 0.21567024290561676, "learning_rate": 2e-07, "loss": 0.048, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004106656400025666, "grad_norm": 0.22543172538280487, "learning_rate": 2e-07, "loss": 0.0087, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00419998950002625, "grad_norm": 0.1762998253107071, "learning_rate": 2e-07, "loss": 0.0206, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004293322600026833, "grad_norm": 0.20864303410053253, "learning_rate": 2e-07, "loss": 0.0375, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004386655700027417, "grad_norm": 0.19752885401248932, "learning_rate": 2e-07, "loss": 0.0183, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004479988800028, "grad_norm": 0.1848761886358261, "learning_rate": 2e-07, "loss": 0.0478, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004573321900028583, "grad_norm": 0.27787908911705017, "learning_rate": 2e-07, "loss": 0.0309, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004666655000029166, "grad_norm": 0.2812020778656006, "learning_rate": 2e-07, "loss": 0.0198, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00475998810002975, "grad_norm": 0.21986417472362518, "learning_rate": 2e-07, "loss": 0.0374, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004853321200030333, "grad_norm": 0.2621350884437561, "learning_rate": 2e-07, "loss": 0.021, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.004946654300030917, "grad_norm": 0.20501263439655304, "learning_rate": 2e-07, "loss": 0.0191, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0050399874000315, "grad_norm": 0.1719241738319397, "learning_rate": 2e-07, "loss": -0.0386, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005133320500032083, "grad_norm": 0.2824048697948456, "learning_rate": 2e-07, "loss": -0.034, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005226653600032666, "grad_norm": 0.2427029013633728, "learning_rate": 2e-07, "loss": -0.0367, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00531998670003325, "grad_norm": 0.22163940966129303, "learning_rate": 2e-07, "loss": 0.0107, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005413319800033833, "grad_norm": 0.35862430930137634, "learning_rate": 2e-07, "loss": 0.0754, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005506652900034417, "grad_norm": 0.2770756483078003, "learning_rate": 2e-07, "loss": 0.02, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005599986000035, "grad_norm": 0.25597915053367615, "learning_rate": 2e-07, "loss": 0.0175, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005693319100035584, "grad_norm": 0.2557786703109741, "learning_rate": 2e-07, "loss": -0.0224, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005786652200036166, "grad_norm": 0.30547863245010376, "learning_rate": 2e-07, "loss": 0.0494, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00587998530003675, "grad_norm": 0.3096722364425659, "learning_rate": 2e-07, "loss": -0.0235, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.005973318400037333, "grad_norm": 0.26014670729637146, "learning_rate": 2e-07, "loss": 0.0719, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01416015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 602.6310424804688, "completions/mean_terminated_length": 552.453857421875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.006066651500037917, "grad_norm": 0.1104929968714714, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 80742274.0, "reward": 0.5538853406906128, "reward_std": 0.21083912253379822, "rewards/simpleverify_reward/mean": 0.5538853406906128, "rewards/simpleverify_reward/std": 0.49709224700927734, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0061599846000385, "grad_norm": 0.11229357123374939, "learning_rate": 2e-07, "loss": 0.0065, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0062533177000390835, "grad_norm": 0.12859536707401276, "learning_rate": 2e-07, "loss": 0.0236, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.006346650800039666, "grad_norm": 0.11728814244270325, "learning_rate": 2e-07, "loss": 0.0706, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00643998390004025, "grad_norm": 0.13092705607414246, "learning_rate": 2e-07, "loss": 0.0183, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.006533317000040833, "grad_norm": 0.11778075993061066, "learning_rate": 2e-07, "loss": 0.0162, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0066266501000414165, "grad_norm": 0.1319500207901001, "learning_rate": 2e-07, "loss": 0.0406, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.006719983200042, "grad_norm": 0.11401458084583282, "learning_rate": 2e-07, "loss": -0.0647, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0068133163000425835, "grad_norm": 0.11675283312797546, "learning_rate": 2e-07, "loss": 0.0509, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.006906649400043167, "grad_norm": 0.12296499311923981, "learning_rate": 2e-07, "loss": 0.0684, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0069999825000437495, "grad_norm": 0.1141868457198143, "learning_rate": 2e-07, "loss": 0.0491, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007093315600044333, "grad_norm": 0.11803154647350311, "learning_rate": 2e-07, "loss": -0.0113, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0071866487000449165, "grad_norm": 0.12159748375415802, "learning_rate": 2e-07, "loss": 0.0246, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0072799818000455, "grad_norm": 0.13280890882015228, "learning_rate": 2e-07, "loss": 0.044, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007373314900046083, "grad_norm": 0.11563581228256226, "learning_rate": 2e-07, "loss": 0.0591, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007466648000046667, "grad_norm": 0.12397162616252899, "learning_rate": 2e-07, "loss": 0.0293, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0075599811000472495, "grad_norm": 0.12922480702400208, "learning_rate": 2e-07, "loss": 0.0283, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007653314200047833, "grad_norm": 0.11023665219545364, "learning_rate": 2e-07, "loss": 0.0284, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007746647300048416, "grad_norm": 0.11985830962657928, "learning_rate": 2e-07, "loss": 0.0829, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007839980400049, "grad_norm": 0.1204838901758194, "learning_rate": 2e-07, "loss": 0.0369, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.007933313500049582, "grad_norm": 0.12884598970413208, "learning_rate": 2e-07, "loss": -0.0038, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008026646600050166, "grad_norm": 0.11503426730632782, "learning_rate": 2e-07, "loss": 0.0121, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00811997970005075, "grad_norm": 0.1093788594007492, "learning_rate": 2e-07, "loss": 0.046, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008213312800051333, "grad_norm": 0.1122165322303772, "learning_rate": 2e-07, "loss": 0.0541, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008306645900051916, "grad_norm": 0.11874477565288544, "learning_rate": 2e-07, "loss": -0.024, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0083999790000525, "grad_norm": 0.12048187106847763, "learning_rate": 2e-07, "loss": 0.0459, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008493312100053083, "grad_norm": 0.13318213820457458, "learning_rate": 2e-07, "loss": 0.0106, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008586645200053667, "grad_norm": 0.13598953187465668, "learning_rate": 2e-07, "loss": 0.0471, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00867997830005425, "grad_norm": 0.1182006448507309, "learning_rate": 2e-07, "loss": 0.0114, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008773311400054834, "grad_norm": 0.1281680166721344, "learning_rate": 2e-07, "loss": 0.0171, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008866644500055417, "grad_norm": 0.12943843007087708, "learning_rate": 2e-07, "loss": -0.0092, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.008959977600056, "grad_norm": 0.11512872576713562, "learning_rate": 2e-07, "loss": 0.0429, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009053310700056582, "grad_norm": 0.12001234292984009, "learning_rate": 2e-07, "loss": -0.0073, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009146643800057166, "grad_norm": 0.1210739016532898, "learning_rate": 2e-07, "loss": 0.0295, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00923997690005775, "grad_norm": 0.124770887196064, "learning_rate": 2e-07, "loss": 0.02, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009333310000058333, "grad_norm": 0.11245358735322952, "learning_rate": 2e-07, "loss": 0.0193, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009426643100058916, "grad_norm": 0.11386345326900482, "learning_rate": 2e-07, "loss": 0.0639, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0095199762000595, "grad_norm": 0.12418945878744125, "learning_rate": 2e-07, "loss": 0.0139, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009613309300060083, "grad_norm": 0.12058115750551224, "learning_rate": 2e-07, "loss": 0.0484, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009706642400060667, "grad_norm": 0.12680678069591522, "learning_rate": 2e-07, "loss": 0.002, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.00979997550006125, "grad_norm": 0.13684795796871185, "learning_rate": 2e-07, "loss": 0.0507, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009893308600061834, "grad_norm": 0.11940007656812668, "learning_rate": 2e-07, "loss": 0.0325, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.009986641700062417, "grad_norm": 0.11589401960372925, "learning_rate": 2e-07, "loss": 0.0425, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010079974800063, "grad_norm": 0.129934161901474, "learning_rate": 2e-07, "loss": 0.0255, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010173307900063584, "grad_norm": 0.1248784214258194, "learning_rate": 2e-07, "loss": 0.0002, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010266641000064166, "grad_norm": 0.12240680307149887, "learning_rate": 2e-07, "loss": 0.0264, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010359974100064749, "grad_norm": 0.12803420424461365, "learning_rate": 2e-07, "loss": 0.0163, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010453307200065333, "grad_norm": 0.11498578637838364, "learning_rate": 2e-07, "loss": 0.0484, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010546640300065916, "grad_norm": 0.11552631109952927, "learning_rate": 2e-07, "loss": 0.0551, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0106399734000665, "grad_norm": 0.11569365859031677, "learning_rate": 2e-07, "loss": 0.0066, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010733306500067083, "grad_norm": 0.11575288325548172, "learning_rate": 2e-07, "loss": 0.0326, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.010826639600067666, "grad_norm": 0.11555801331996918, "learning_rate": 2e-07, "loss": -0.0182, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01091997270006825, "grad_norm": 0.12371595948934555, "learning_rate": 2e-07, "loss": -0.0353, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011013305800068833, "grad_norm": 0.11161545664072037, "learning_rate": 2e-07, "loss": -0.0043, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011106638900069417, "grad_norm": 0.125113844871521, "learning_rate": 2e-07, "loss": 0.0181, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01119997200007, "grad_norm": 0.12157007306814194, "learning_rate": 2e-07, "loss": -0.01, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011293305100070584, "grad_norm": 0.12889772653579712, "learning_rate": 2e-07, "loss": 0.0606, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011386638200071167, "grad_norm": 0.12442421913146973, "learning_rate": 2e-07, "loss": 0.0378, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011479971300071749, "grad_norm": 0.12123935669660568, "learning_rate": 2e-07, "loss": 0.0293, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011573304400072332, "grad_norm": 0.11454013735055923, "learning_rate": 2e-07, "loss": -0.033, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011666637500072916, "grad_norm": 0.14208470284938812, "learning_rate": 2e-07, "loss": 0.0569, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0117599706000735, "grad_norm": 0.13243162631988525, "learning_rate": 2e-07, "loss": -0.0346, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011853303700074083, "grad_norm": 0.11157102137804031, "learning_rate": 2e-07, "loss": 0.0, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.011946636800074666, "grad_norm": 0.13012441992759705, "learning_rate": 2e-07, "loss": 0.011, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013863699776785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 605.7926025390625, "completions/mean_terminated_length": 556.72509765625, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.01203996990007525, "grad_norm": 0.12900763750076294, "learning_rate": 2e-07, "loss": 0.0005, "num_tokens": 121267841.0, "reward": 0.552001953125, "reward_std": 0.21141129732131958, "rewards/simpleverify_reward/mean": 0.552001953125, "rewards/simpleverify_reward/std": 0.49729272723197937, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012133303000075833, "grad_norm": 0.11976109445095062, "learning_rate": 2e-07, "loss": 0.0134, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012226636100076417, "grad_norm": 0.1242467537522316, "learning_rate": 2e-07, "loss": -0.0174, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012319969200077, "grad_norm": 0.1271137148141861, "learning_rate": 2e-07, "loss": 0.0942, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012413302300077584, "grad_norm": 0.12402600049972534, "learning_rate": 2e-07, "loss": -0.0061, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012506635400078167, "grad_norm": 0.115492083132267, "learning_rate": 2e-07, "loss": 0.0171, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01259996850007875, "grad_norm": 0.12605193257331848, "learning_rate": 2e-07, "loss": 0.0221, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012693301600079332, "grad_norm": 0.10870201885700226, "learning_rate": 2e-07, "loss": 0.0135, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012786634700079916, "grad_norm": 0.11895795166492462, "learning_rate": 2e-07, "loss": 0.0295, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0128799678000805, "grad_norm": 0.1160581037402153, "learning_rate": 2e-07, "loss": -0.0232, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.012973300900081083, "grad_norm": 0.12504318356513977, "learning_rate": 2e-07, "loss": 0.0529, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013066634000081666, "grad_norm": 0.11358392238616943, "learning_rate": 2e-07, "loss": 0.0407, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01315996710008225, "grad_norm": 0.11238788813352585, "learning_rate": 2e-07, "loss": 0.0048, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013253300200082833, "grad_norm": 0.11288043856620789, "learning_rate": 2e-07, "loss": 0.012, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013346633300083417, "grad_norm": 0.12846171855926514, "learning_rate": 2e-07, "loss": -0.0201, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013439966400084, "grad_norm": 0.1235656589269638, "learning_rate": 2e-07, "loss": 0.0596, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013533299500084583, "grad_norm": 0.1266559213399887, "learning_rate": 2e-07, "loss": 0.0439, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013626632600085167, "grad_norm": 0.11292146891355515, "learning_rate": 2e-07, "loss": 0.0322, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01371996570008575, "grad_norm": 0.12272363156080246, "learning_rate": 2e-07, "loss": 0.0592, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013813298800086334, "grad_norm": 0.11472923308610916, "learning_rate": 2e-07, "loss": 0.0068, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013906631900086916, "grad_norm": 0.11798423528671265, "learning_rate": 2e-07, "loss": 0.0426, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.013999965000087499, "grad_norm": 0.12502296268939972, "learning_rate": 2e-07, "loss": 0.0166, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014093298100088083, "grad_norm": 0.11589020490646362, "learning_rate": 2e-07, "loss": 0.0337, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014186631200088666, "grad_norm": 0.12701231241226196, "learning_rate": 2e-07, "loss": 0.0482, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01427996430008925, "grad_norm": 0.10634884983301163, "learning_rate": 2e-07, "loss": 0.0042, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014373297400089833, "grad_norm": 0.11809704452753067, "learning_rate": 2e-07, "loss": 0.0196, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014466630500090416, "grad_norm": 0.13478153944015503, "learning_rate": 2e-07, "loss": 0.0058, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014559963600091, "grad_norm": 0.1195162758231163, "learning_rate": 2e-07, "loss": 0.056, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014653296700091583, "grad_norm": 0.11595071107149124, "learning_rate": 2e-07, "loss": 0.0649, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014746629800092167, "grad_norm": 0.12632755935192108, "learning_rate": 2e-07, "loss": -0.0015, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01483996290009275, "grad_norm": 0.11408816277980804, "learning_rate": 2e-07, "loss": 0.051, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.014933296000093334, "grad_norm": 0.1233525425195694, "learning_rate": 2e-07, "loss": 0.0814, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015026629100093917, "grad_norm": 0.11964831501245499, "learning_rate": 2e-07, "loss": -0.0034, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015119962200094499, "grad_norm": 0.11419013142585754, "learning_rate": 2e-07, "loss": 0.0566, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015213295300095082, "grad_norm": 0.11575242877006531, "learning_rate": 2e-07, "loss": 0.0263, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015306628400095666, "grad_norm": 0.11876792460680008, "learning_rate": 2e-07, "loss": -0.0166, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01539996150009625, "grad_norm": 0.12759201228618622, "learning_rate": 2e-07, "loss": 0.0211, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015493294600096833, "grad_norm": 0.12791675329208374, "learning_rate": 2e-07, "loss": 0.0246, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015586627700097416, "grad_norm": 0.11425651609897614, "learning_rate": 2e-07, "loss": -0.0075, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015679960800098, "grad_norm": 0.12347857654094696, "learning_rate": 2e-07, "loss": 0.0169, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015773293900098583, "grad_norm": 0.1158008798956871, "learning_rate": 2e-07, "loss": -0.0052, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.015866627000099165, "grad_norm": 0.12241604924201965, "learning_rate": 2e-07, "loss": 0.0353, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01595996010009975, "grad_norm": 0.11978351324796677, "learning_rate": 2e-07, "loss": 0.0497, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016053293200100332, "grad_norm": 0.11186036467552185, "learning_rate": 2e-07, "loss": -0.0239, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016146626300100917, "grad_norm": 0.11632370948791504, "learning_rate": 2e-07, "loss": 0.0577, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0162399594001015, "grad_norm": 0.12881873548030853, "learning_rate": 2e-07, "loss": 0.0534, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016333292500102084, "grad_norm": 0.11721882969141006, "learning_rate": 2e-07, "loss": -0.018, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016426625600102666, "grad_norm": 0.1351327896118164, "learning_rate": 2e-07, "loss": -0.0178, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01651995870010325, "grad_norm": 0.11248568445444107, "learning_rate": 2e-07, "loss": -0.0057, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016613291800103833, "grad_norm": 0.11764824390411377, "learning_rate": 2e-07, "loss": 0.0408, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016706624900104418, "grad_norm": 0.13204790651798248, "learning_rate": 2e-07, "loss": 0.0438, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016799958000105, "grad_norm": 0.12130002677440643, "learning_rate": 2e-07, "loss": 0.0207, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016893291100105585, "grad_norm": 0.11385934054851532, "learning_rate": 2e-07, "loss": 0.0587, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.016986624200106167, "grad_norm": 0.11729420721530914, "learning_rate": 2e-07, "loss": 0.0419, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017079957300106748, "grad_norm": 0.11200563609600067, "learning_rate": 2e-07, "loss": 0.0386, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017173290400107333, "grad_norm": 0.12005419284105301, "learning_rate": 2e-07, "loss": -0.0178, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017266623500107915, "grad_norm": 0.1239481270313263, "learning_rate": 2e-07, "loss": 0.042, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0173599566001085, "grad_norm": 0.11413433402776718, "learning_rate": 2e-07, "loss": 0.0456, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017453289700109082, "grad_norm": 0.12707901000976562, "learning_rate": 2e-07, "loss": 0.0366, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017546622800109667, "grad_norm": 0.12394179403781891, "learning_rate": 2e-07, "loss": 0.0301, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01763995590011025, "grad_norm": 0.1205565333366394, "learning_rate": 2e-07, "loss": 0.0374, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017733289000110834, "grad_norm": 0.11749666929244995, "learning_rate": 2e-07, "loss": -0.0035, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017826622100111416, "grad_norm": 0.11596448719501495, "learning_rate": 2e-07, "loss": 0.0627, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.017919955200112, "grad_norm": 0.12997643649578094, "learning_rate": 2e-07, "loss": 0.0235, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014439174107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 607.7674560546875, "completions/mean_terminated_length": 556.6622924804688, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.018013288300112583, "grad_norm": 0.12148799747228622, "learning_rate": 2e-07, "loss": 0.0042, "num_tokens": 161796159.0, "reward": 0.5532052516937256, "reward_std": 0.20556531846523285, "rewards/simpleverify_reward/mean": 0.5532051920890808, "rewards/simpleverify_reward/std": 0.4971654415130615, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018106621400113165, "grad_norm": 0.11413375288248062, "learning_rate": 2e-07, "loss": 0.0473, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01819995450011375, "grad_norm": 0.12304969131946564, "learning_rate": 2e-07, "loss": 0.0029, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01829328760011433, "grad_norm": 0.1279825121164322, "learning_rate": 2e-07, "loss": -0.0084, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018386620700114917, "grad_norm": 0.10730310529470444, "learning_rate": 2e-07, "loss": -0.0079, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0184799538001155, "grad_norm": 0.12900662422180176, "learning_rate": 2e-07, "loss": 0.0582, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018573286900116084, "grad_norm": 0.1261545717716217, "learning_rate": 2e-07, "loss": 0.0585, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018666620000116665, "grad_norm": 0.1182534247636795, "learning_rate": 2e-07, "loss": 0.0575, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01875995310011725, "grad_norm": 0.11720357090234756, "learning_rate": 2e-07, "loss": 0.016, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018853286200117832, "grad_norm": 0.12043602764606476, "learning_rate": 2e-07, "loss": 0.0563, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.018946619300118418, "grad_norm": 0.11089618504047394, "learning_rate": 2e-07, "loss": 0.0261, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019039952400119, "grad_norm": 0.10863696038722992, "learning_rate": 2e-07, "loss": 0.0051, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019133285500119585, "grad_norm": 0.11304053664207458, "learning_rate": 2e-07, "loss": 0.0508, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019226618600120166, "grad_norm": 0.11691603809595108, "learning_rate": 2e-07, "loss": -0.0158, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019319951700120748, "grad_norm": 0.1256258636713028, "learning_rate": 2e-07, "loss": 0.0878, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019413284800121333, "grad_norm": 0.12094900012016296, "learning_rate": 2e-07, "loss": 0.0259, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019506617900121915, "grad_norm": 0.13027654588222504, "learning_rate": 2e-07, "loss": 0.0189, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0195999510001225, "grad_norm": 0.1252318024635315, "learning_rate": 2e-07, "loss": 0.0193, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019693284100123082, "grad_norm": 0.11989153176546097, "learning_rate": 2e-07, "loss": 0.0605, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019786617200123667, "grad_norm": 0.1289915293455124, "learning_rate": 2e-07, "loss": -0.0137, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.01987995030012425, "grad_norm": 0.11391989886760712, "learning_rate": 2e-07, "loss": 0.0234, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.019973283400124834, "grad_norm": 0.11034820228815079, "learning_rate": 2e-07, "loss": 0.0079, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020066616500125416, "grad_norm": 0.12173809111118317, "learning_rate": 2e-07, "loss": 0.007, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020159949600126, "grad_norm": 0.11439619958400726, "learning_rate": 2e-07, "loss": -0.0044, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020253282700126583, "grad_norm": 0.12216250598430634, "learning_rate": 2e-07, "loss": -0.0015, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020346615800127168, "grad_norm": 0.11854705214500427, "learning_rate": 2e-07, "loss": -0.0137, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02043994890012775, "grad_norm": 0.11740019917488098, "learning_rate": 2e-07, "loss": 0.0347, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02053328200012833, "grad_norm": 0.11064712703227997, "learning_rate": 2e-07, "loss": 0.0802, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020626615100128916, "grad_norm": 0.1068812757730484, "learning_rate": 2e-07, "loss": 0.0338, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020719948200129498, "grad_norm": 0.11171789467334747, "learning_rate": 2e-07, "loss": -0.0278, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020813281300130083, "grad_norm": 0.12349051237106323, "learning_rate": 2e-07, "loss": -0.0184, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.020906614400130665, "grad_norm": 0.1268196702003479, "learning_rate": 2e-07, "loss": -0.0025, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02099994750013125, "grad_norm": 0.11517728120088577, "learning_rate": 2e-07, "loss": 0.0095, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021093280600131832, "grad_norm": 0.11174178868532181, "learning_rate": 2e-07, "loss": 0.0186, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021186613700132417, "grad_norm": 0.12372954189777374, "learning_rate": 2e-07, "loss": 0.0155, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021279946800133, "grad_norm": 0.1380620300769806, "learning_rate": 2e-07, "loss": -0.0025, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021373279900133584, "grad_norm": 0.12020408362150192, "learning_rate": 2e-07, "loss": 0.0184, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021466613000134166, "grad_norm": 0.10718788951635361, "learning_rate": 2e-07, "loss": 0.0039, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02155994610013475, "grad_norm": 0.11507224291563034, "learning_rate": 2e-07, "loss": 0.0373, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021653279200135333, "grad_norm": 0.12471979856491089, "learning_rate": 2e-07, "loss": 0.0121, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.021746612300135915, "grad_norm": 0.1148390918970108, "learning_rate": 2e-07, "loss": 0.0884, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0218399454001365, "grad_norm": 0.10520583391189575, "learning_rate": 2e-07, "loss": -0.0148, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02193327850013708, "grad_norm": 0.12078914046287537, "learning_rate": 2e-07, "loss": 0.0656, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022026611600137667, "grad_norm": 0.12394321709871292, "learning_rate": 2e-07, "loss": 0.0058, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02211994470013825, "grad_norm": 0.11081356555223465, "learning_rate": 2e-07, "loss": 0.0124, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022213277800138834, "grad_norm": 0.13361530005931854, "learning_rate": 2e-07, "loss": 0.0723, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022306610900139415, "grad_norm": 0.11601588129997253, "learning_rate": 2e-07, "loss": 0.0673, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02239994400014, "grad_norm": 0.1133677214384079, "learning_rate": 2e-07, "loss": 0.0653, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022493277100140582, "grad_norm": 0.11076609790325165, "learning_rate": 2e-07, "loss": 0.0399, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022586610200141168, "grad_norm": 0.11817794293165207, "learning_rate": 2e-07, "loss": 0.0336, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02267994330014175, "grad_norm": 0.11222653090953827, "learning_rate": 2e-07, "loss": 0.0529, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022773276400142334, "grad_norm": 0.11679072678089142, "learning_rate": 2e-07, "loss": 0.0135, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022866609500142916, "grad_norm": 0.11334266513586044, "learning_rate": 2e-07, "loss": 0.0316, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.022959942600143498, "grad_norm": 0.11308363825082779, "learning_rate": 2e-07, "loss": 0.0573, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023053275700144083, "grad_norm": 0.12372971326112747, "learning_rate": 2e-07, "loss": 0.082, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023146608800144665, "grad_norm": 0.12267614156007767, "learning_rate": 2e-07, "loss": -0.0151, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02323994190014525, "grad_norm": 0.11206039041280746, "learning_rate": 2e-07, "loss": 0.0176, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023333275000145832, "grad_norm": 0.12499067932367325, "learning_rate": 2e-07, "loss": 0.0357, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023426608100146417, "grad_norm": 0.12153958529233932, "learning_rate": 2e-07, "loss": 0.0173, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023519941200147, "grad_norm": 0.12343767285346985, "learning_rate": 2e-07, "loss": 0.0651, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023613274300147584, "grad_norm": 0.11580204218626022, "learning_rate": 2e-07, "loss": 0.0082, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023706607400148166, "grad_norm": 0.15583446621894836, "learning_rate": 2e-07, "loss": 0.0155, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02379994050014875, "grad_norm": 0.11579901725053787, "learning_rate": 2e-07, "loss": 0.0042, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.023893273600149333, "grad_norm": 0.10898569226264954, "learning_rate": 2e-07, "loss": 0.0009, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014212472098214302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 606.5999145507812, "completions/mean_terminated_length": 556.2919311523438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.023986606700149918, "grad_norm": 0.12543149292469025, "learning_rate": 2e-07, "loss": 0.0598, "num_tokens": 202181815.0, "reward": 0.5600237250328064, "reward_std": 0.20419713854789734, "rewards/simpleverify_reward/mean": 0.5600237250328064, "rewards/simpleverify_reward/std": 0.4963884651660919, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0240799398001505, "grad_norm": 0.11861643195152283, "learning_rate": 2e-07, "loss": 0.0575, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02417327290015108, "grad_norm": 0.11964251101016998, "learning_rate": 2e-07, "loss": 0.0071, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024266606000151666, "grad_norm": 0.11687079817056656, "learning_rate": 2e-07, "loss": 0.0029, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024359939100152248, "grad_norm": 0.12559251487255096, "learning_rate": 2e-07, "loss": 0.0233, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024453272200152833, "grad_norm": 0.11664198338985443, "learning_rate": 2e-07, "loss": 0.0217, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024546605300153415, "grad_norm": 0.12424785643815994, "learning_rate": 2e-07, "loss": 0.0445, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024639938400154, "grad_norm": 0.11672412604093552, "learning_rate": 2e-07, "loss": 0.0064, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024733271500154582, "grad_norm": 0.11804558336734772, "learning_rate": 2e-07, "loss": 0.0397, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.024826604600155167, "grad_norm": 0.11709222942590714, "learning_rate": 2e-07, "loss": 0.0379, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02491993770015575, "grad_norm": 0.12235447019338608, "learning_rate": 2e-07, "loss": -0.0187, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025013270800156334, "grad_norm": 0.12662118673324585, "learning_rate": 2e-07, "loss": 0.0304, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025106603900156916, "grad_norm": 0.10628720372915268, "learning_rate": 2e-07, "loss": 0.0409, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0251999370001575, "grad_norm": 0.13236229121685028, "learning_rate": 2e-07, "loss": -0.0009, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025293270100158083, "grad_norm": 0.10969637334346771, "learning_rate": 2e-07, "loss": -0.0023, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025386603200158665, "grad_norm": 0.11970086395740509, "learning_rate": 2e-07, "loss": 0.0604, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02547993630015925, "grad_norm": 0.1275329291820526, "learning_rate": 2e-07, "loss": 0.0549, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02557326940015983, "grad_norm": 0.136390820145607, "learning_rate": 2e-07, "loss": 0.0522, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025666602500160417, "grad_norm": 0.11464012414216995, "learning_rate": 2e-07, "loss": 0.0298, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025759935600161, "grad_norm": 0.12442323565483093, "learning_rate": 2e-07, "loss": 0.0481, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025853268700161584, "grad_norm": 0.12034337222576141, "learning_rate": 2e-07, "loss": 0.0217, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.025946601800162165, "grad_norm": 0.12065837532281876, "learning_rate": 2e-07, "loss": 0.0426, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02603993490016275, "grad_norm": 0.1317777931690216, "learning_rate": 2e-07, "loss": -0.0177, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026133268000163332, "grad_norm": 0.12822996079921722, "learning_rate": 2e-07, "loss": -0.0009, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026226601100163918, "grad_norm": 0.11570777744054794, "learning_rate": 2e-07, "loss": 0.0552, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0263199342001645, "grad_norm": 0.12981736660003662, "learning_rate": 2e-07, "loss": 0.0368, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026413267300165084, "grad_norm": 0.12223238497972488, "learning_rate": 2e-07, "loss": -0.0233, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026506600400165666, "grad_norm": 0.11930467188358307, "learning_rate": 2e-07, "loss": 0.0127, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026599933500166248, "grad_norm": 0.11253789812326431, "learning_rate": 2e-07, "loss": 0.0343, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026693266600166833, "grad_norm": 0.10800273716449738, "learning_rate": 2e-07, "loss": 0.0182, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026786599700167415, "grad_norm": 0.12859350442886353, "learning_rate": 2e-07, "loss": 0.0228, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026879932800168, "grad_norm": 0.12045934796333313, "learning_rate": 2e-07, "loss": 0.0165, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.026973265900168582, "grad_norm": 0.11929840594530106, "learning_rate": 2e-07, "loss": 0.0396, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027066599000169167, "grad_norm": 0.11182188987731934, "learning_rate": 2e-07, "loss": 0.0102, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02715993210016975, "grad_norm": 0.11942093819379807, "learning_rate": 2e-07, "loss": -0.0245, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027253265200170334, "grad_norm": 0.116909921169281, "learning_rate": 2e-07, "loss": 0.0272, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027346598300170916, "grad_norm": 0.12012860178947449, "learning_rate": 2e-07, "loss": 0.0483, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0274399314001715, "grad_norm": 0.11081382632255554, "learning_rate": 2e-07, "loss": 0.0389, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027533264500172083, "grad_norm": 0.11655379086732864, "learning_rate": 2e-07, "loss": 0.0133, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027626597600172668, "grad_norm": 0.12386627495288849, "learning_rate": 2e-07, "loss": 0.011, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02771993070017325, "grad_norm": 0.13282302021980286, "learning_rate": 2e-07, "loss": 0.0175, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02781326380017383, "grad_norm": 0.12305509299039841, "learning_rate": 2e-07, "loss": 0.0232, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027906596900174416, "grad_norm": 0.12764370441436768, "learning_rate": 2e-07, "loss": 0.0465, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.027999930000174998, "grad_norm": 0.11132095009088516, "learning_rate": 2e-07, "loss": 0.0498, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028093263100175583, "grad_norm": 0.11686699837446213, "learning_rate": 2e-07, "loss": 0.0014, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028186596200176165, "grad_norm": 0.1156989187002182, "learning_rate": 2e-07, "loss": 0.031, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02827992930017675, "grad_norm": 0.12015429139137268, "learning_rate": 2e-07, "loss": 0.092, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028373262400177332, "grad_norm": 0.10886608809232712, "learning_rate": 2e-07, "loss": 0.038, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028466595500177917, "grad_norm": 0.12010408937931061, "learning_rate": 2e-07, "loss": -0.0338, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0285599286001785, "grad_norm": 0.12743006646633148, "learning_rate": 2e-07, "loss": 0.065, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028653261700179084, "grad_norm": 0.11614226549863815, "learning_rate": 2e-07, "loss": 0.0538, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028746594800179666, "grad_norm": 0.11234788596630096, "learning_rate": 2e-07, "loss": 0.0313, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02883992790018025, "grad_norm": 0.11347447335720062, "learning_rate": 2e-07, "loss": 0.015, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.028933261000180833, "grad_norm": 0.1314207762479782, "learning_rate": 2e-07, "loss": -0.0089, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029026594100181415, "grad_norm": 0.12442416697740555, "learning_rate": 2e-07, "loss": 0.0896, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029119927200182, "grad_norm": 0.11614236235618591, "learning_rate": 2e-07, "loss": 0.0082, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02921326030018258, "grad_norm": 0.12755386531352997, "learning_rate": 2e-07, "loss": 0.0157, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029306593400183167, "grad_norm": 0.11465509980916977, "learning_rate": 2e-07, "loss": -0.0032, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.02939992650018375, "grad_norm": 0.12995555996894836, "learning_rate": 2e-07, "loss": 0.0251, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029493259600184334, "grad_norm": 0.13707414269447327, "learning_rate": 2e-07, "loss": 0.0313, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029586592700184915, "grad_norm": 0.12843500077724457, "learning_rate": 2e-07, "loss": 0.0607, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0296799258001855, "grad_norm": 0.11542893201112747, "learning_rate": 2e-07, "loss": -0.0149, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029773258900186082, "grad_norm": 0.12541645765304565, "learning_rate": 2e-07, "loss": 0.0162, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.029866592000186667, "grad_norm": 0.1312170922756195, "learning_rate": 2e-07, "loss": 0.014, "step": 320 }, { "clip_ratio/high_max": 0.007659014401724562, "clip_ratio/high_mean": 0.003439755688305013, "clip_ratio/low_mean": 0.002222136659838725, "clip_ratio/low_min": 0.0002078174802591093, "clip_ratio/region_mean": 0.005661892442731187, "completions/clipped_ratio": 0.014404296875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 609.98681640625, "completions/mean_terminated_length": 559.039306640625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.02995992510018725, "grad_norm": 0.11453958600759506, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 242896201.0, "reward": 0.5606515407562256, "reward_std": 0.20513592660427094, "rewards/simpleverify_reward/mean": 0.5606514811515808, "rewards/simpleverify_reward/std": 0.49631205201148987, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030053258200187834, "grad_norm": 0.11788046360015869, "learning_rate": 2e-07, "loss": -0.0174, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030146591300188416, "grad_norm": 0.12439335137605667, "learning_rate": 2e-07, "loss": 0.0568, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030239924400188998, "grad_norm": 0.11043991148471832, "learning_rate": 2e-07, "loss": 0.0301, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030333257500189583, "grad_norm": 0.15738534927368164, "learning_rate": 2e-07, "loss": 0.0921, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030426590600190165, "grad_norm": 0.1203802227973938, "learning_rate": 2e-07, "loss": 0.0169, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03051992370019075, "grad_norm": 0.12494948506355286, "learning_rate": 2e-07, "loss": 0.0357, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030613256800191332, "grad_norm": 0.11916864663362503, "learning_rate": 2e-07, "loss": -0.0414, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030706589900191917, "grad_norm": 0.11881279945373535, "learning_rate": 2e-07, "loss": 0.0315, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0307999230001925, "grad_norm": 0.12076196819543839, "learning_rate": 2e-07, "loss": 0.0072, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030893256100193084, "grad_norm": 0.12143860012292862, "learning_rate": 2e-07, "loss": 0.0246, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.030986589200193666, "grad_norm": 0.12284310907125473, "learning_rate": 2e-07, "loss": -0.0249, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03107992230019425, "grad_norm": 0.11416839808225632, "learning_rate": 2e-07, "loss": 0.0132, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.031173255400194833, "grad_norm": 0.1261385828256607, "learning_rate": 2e-07, "loss": 0.0864, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03126658850019542, "grad_norm": 0.11621243506669998, "learning_rate": 2e-07, "loss": -0.0043, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.031359921600196, "grad_norm": 0.11200471967458725, "learning_rate": 2e-07, "loss": 0.008, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03145325470019658, "grad_norm": 0.13593006134033203, "learning_rate": 2e-07, "loss": -0.0065, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.031546587800197166, "grad_norm": 0.12017598003149033, "learning_rate": 2e-07, "loss": 0.0413, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03163992090019775, "grad_norm": 0.11766191571950912, "learning_rate": 2e-07, "loss": 0.0022, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03173325400019833, "grad_norm": 0.12265715003013611, "learning_rate": 2e-07, "loss": -0.0016, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.031826587100198915, "grad_norm": 0.1262984275817871, "learning_rate": 2e-07, "loss": 0.0071, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0319199202001995, "grad_norm": 0.11907317489385605, "learning_rate": 2e-07, "loss": 0.0478, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.032013253300200085, "grad_norm": 0.1633232980966568, "learning_rate": 2e-07, "loss": 0.0285, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.032106586400200664, "grad_norm": 0.11573426425457001, "learning_rate": 2e-07, "loss": 0.0139, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03219991950020125, "grad_norm": 0.11459071934223175, "learning_rate": 2e-07, "loss": 0.0555, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.032293252600201834, "grad_norm": 0.11378493905067444, "learning_rate": 2e-07, "loss": 0.0129, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03238658570020242, "grad_norm": 0.11712933331727982, "learning_rate": 2e-07, "loss": 0.0479, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.032479918800203, "grad_norm": 0.12378206849098206, "learning_rate": 2e-07, "loss": -0.0291, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03257325190020358, "grad_norm": 0.11774018406867981, "learning_rate": 2e-07, "loss": 0.0424, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03266658500020417, "grad_norm": 0.11160732060670853, "learning_rate": 2e-07, "loss": 0.0686, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.032759918100204746, "grad_norm": 0.1186843290925026, "learning_rate": 2e-07, "loss": 0.0547, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03285325120020533, "grad_norm": 0.1090741753578186, "learning_rate": 2e-07, "loss": 0.0303, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03294658430020592, "grad_norm": 0.11925852298736572, "learning_rate": 2e-07, "loss": 0.0263, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0330399174002065, "grad_norm": 0.1273898333311081, "learning_rate": 2e-07, "loss": 0.0313, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03313325050020708, "grad_norm": 0.128389373421669, "learning_rate": 2e-07, "loss": 0.011, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.033226583600207665, "grad_norm": 0.12090560793876648, "learning_rate": 2e-07, "loss": 0.0393, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03331991670020825, "grad_norm": 0.12877219915390015, "learning_rate": 2e-07, "loss": 0.0315, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.033413249800208836, "grad_norm": 0.13702577352523804, "learning_rate": 2e-07, "loss": 0.0422, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.033506582900209414, "grad_norm": 0.12415044009685516, "learning_rate": 2e-07, "loss": 0.023, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03359991600021, "grad_norm": 0.11190974712371826, "learning_rate": 2e-07, "loss": 0.0343, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.033693249100210584, "grad_norm": 0.11857246607542038, "learning_rate": 2e-07, "loss": 0.0174, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03378658220021117, "grad_norm": 0.11898130923509598, "learning_rate": 2e-07, "loss": 0.0709, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03387991530021175, "grad_norm": 0.12032792717218399, "learning_rate": 2e-07, "loss": 0.051, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03397324840021233, "grad_norm": 0.11882678419351578, "learning_rate": 2e-07, "loss": 0.0545, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03406658150021292, "grad_norm": 0.11008977144956589, "learning_rate": 2e-07, "loss": 0.048, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.034159914600213497, "grad_norm": 0.11396142095327377, "learning_rate": 2e-07, "loss": 0.0365, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03425324770021408, "grad_norm": 0.1285429745912552, "learning_rate": 2e-07, "loss": 0.0125, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03434658080021467, "grad_norm": 0.12839408218860626, "learning_rate": 2e-07, "loss": 0.0241, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03443991390021525, "grad_norm": 0.11845999211072922, "learning_rate": 2e-07, "loss": 0.0288, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03453324700021583, "grad_norm": 0.10861969739198685, "learning_rate": 2e-07, "loss": 0.0435, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.034626580100216416, "grad_norm": 0.11874452233314514, "learning_rate": 2e-07, "loss": 0.0309, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.034719913200217, "grad_norm": 0.1427195817232132, "learning_rate": 2e-07, "loss": 0.043, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.034813246300217586, "grad_norm": 0.12500236928462982, "learning_rate": 2e-07, "loss": 0.0071, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.034906579400218164, "grad_norm": 0.12108566612005234, "learning_rate": 2e-07, "loss": 0.0399, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03499991250021875, "grad_norm": 0.11117847263813019, "learning_rate": 2e-07, "loss": 0.0311, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.035093245600219335, "grad_norm": 0.11412937939167023, "learning_rate": 2e-07, "loss": 0.0649, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03518657870021991, "grad_norm": 0.11501997709274292, "learning_rate": 2e-07, "loss": 0.0279, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0352799118002205, "grad_norm": 0.10797776281833649, "learning_rate": 2e-07, "loss": 0.0364, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03537324490022108, "grad_norm": 0.1246580109000206, "learning_rate": 2e-07, "loss": -0.0078, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03546657800022167, "grad_norm": 0.11960603296756744, "learning_rate": 2e-07, "loss": 0.0679, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03555991110022225, "grad_norm": 0.12690047919750214, "learning_rate": 2e-07, "loss": -0.0046, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03565324420022283, "grad_norm": 0.1212008073925972, "learning_rate": 2e-07, "loss": 0.0458, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03574657730022342, "grad_norm": 0.11976614594459534, "learning_rate": 2e-07, "loss": 0.0094, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.035839910400224, "grad_norm": 0.12451492995023727, "learning_rate": 2e-07, "loss": 0.0013, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015363420758928603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 616.3922729492188, "completions/mean_terminated_length": 562.0994873046875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.03593324350022458, "grad_norm": 0.11171372979879379, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 283969903.0, "reward": 0.5599365234375, "reward_std": 0.2054661214351654, "rewards/simpleverify_reward/mean": 0.5599365234375, "rewards/simpleverify_reward/std": 0.4963989853858948, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.036026576600225166, "grad_norm": 0.11575938761234283, "learning_rate": 2e-07, "loss": 0.0334, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03611990970022575, "grad_norm": 0.12197114527225494, "learning_rate": 2e-07, "loss": 0.0425, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03621324280022633, "grad_norm": 0.1176382303237915, "learning_rate": 2e-07, "loss": 0.0689, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.036306575900226914, "grad_norm": 0.12390800565481186, "learning_rate": 2e-07, "loss": 0.0209, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0363999090002275, "grad_norm": 0.11948312073945999, "learning_rate": 2e-07, "loss": 0.0087, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.036493242100228085, "grad_norm": 0.13085635006427765, "learning_rate": 2e-07, "loss": 0.0376, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03658657520022866, "grad_norm": 0.119266077876091, "learning_rate": 2e-07, "loss": -0.0046, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03667990830022925, "grad_norm": 0.11295431852340698, "learning_rate": 2e-07, "loss": 0.0317, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.036773241400229834, "grad_norm": 0.11514849960803986, "learning_rate": 2e-07, "loss": 0.057, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03686657450023042, "grad_norm": 0.11986764520406723, "learning_rate": 2e-07, "loss": 0.0096, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.036959907600231, "grad_norm": 0.1266181766986847, "learning_rate": 2e-07, "loss": 0.0448, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03705324070023158, "grad_norm": 0.13177832961082458, "learning_rate": 2e-07, "loss": 0.0036, "step": 397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03714657380023217, "grad_norm": 0.11497318744659424, "learning_rate": 2e-07, "loss": -0.0068, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03723990690023275, "grad_norm": 0.13660897314548492, "learning_rate": 2e-07, "loss": 0.0214, "step": 399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03733324000023333, "grad_norm": 0.1235608235001564, "learning_rate": 2e-07, "loss": 0.0762, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.037426573100233916, "grad_norm": 0.10542558878660202, "learning_rate": 2e-07, "loss": 0.0184, "step": 401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0375199062002345, "grad_norm": 0.11293075233697891, "learning_rate": 2e-07, "loss": 0.0687, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03761323930023508, "grad_norm": 0.11791659146547318, "learning_rate": 2e-07, "loss": -0.0036, "step": 403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.037706572400235665, "grad_norm": 0.11492893844842911, "learning_rate": 2e-07, "loss": -0.0362, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03779990550023625, "grad_norm": 0.12748655676841736, "learning_rate": 2e-07, "loss": 0.0239, "step": 405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.037893238600236835, "grad_norm": 0.11961893737316132, "learning_rate": 2e-07, "loss": 0.0365, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03798657170023741, "grad_norm": 0.11468523740768433, "learning_rate": 2e-07, "loss": 0.0155, "step": 407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.038079904800238, "grad_norm": 0.1257142424583435, "learning_rate": 2e-07, "loss": 0.0635, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.038173237900238584, "grad_norm": 0.12088916450738907, "learning_rate": 2e-07, "loss": -0.0164, "step": 409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03826657100023917, "grad_norm": 0.1149362325668335, "learning_rate": 2e-07, "loss": 0.0101, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03835990410023975, "grad_norm": 0.11720459908246994, "learning_rate": 2e-07, "loss": 0.0574, "step": 411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03845323720024033, "grad_norm": 0.11584582179784775, "learning_rate": 2e-07, "loss": 0.0412, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03854657030024092, "grad_norm": 0.12471593916416168, "learning_rate": 2e-07, "loss": 0.0329, "step": 413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.038639903400241496, "grad_norm": 0.11379550397396088, "learning_rate": 2e-07, "loss": 0.044, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03873323650024208, "grad_norm": 0.11682140827178955, "learning_rate": 2e-07, "loss": -0.018, "step": 415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.038826569600242666, "grad_norm": 0.1138271912932396, "learning_rate": 2e-07, "loss": 0.015, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03891990270024325, "grad_norm": 0.1217326819896698, "learning_rate": 2e-07, "loss": -0.0015, "step": 417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03901323580024383, "grad_norm": 0.12329524010419846, "learning_rate": 2e-07, "loss": -0.0005, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.039106568900244415, "grad_norm": 0.12177414447069168, "learning_rate": 2e-07, "loss": 0.0277, "step": 419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.039199902000245, "grad_norm": 0.12002681940793991, "learning_rate": 2e-07, "loss": 0.0399, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.039293235100245585, "grad_norm": 0.12456163763999939, "learning_rate": 2e-07, "loss": 0.0044, "step": 421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.039386568200246164, "grad_norm": 0.11519532650709152, "learning_rate": 2e-07, "loss": 0.028, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03947990130024675, "grad_norm": 0.11130990833044052, "learning_rate": 2e-07, "loss": 0.0393, "step": 423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.039573234400247334, "grad_norm": 0.11588078737258911, "learning_rate": 2e-07, "loss": 0.0315, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03966656750024792, "grad_norm": 0.11550088226795197, "learning_rate": 2e-07, "loss": 0.058, "step": 425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0397599006002485, "grad_norm": 0.12595392763614655, "learning_rate": 2e-07, "loss": 0.0118, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03985323370024908, "grad_norm": 0.12484121322631836, "learning_rate": 2e-07, "loss": 0.0389, "step": 427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.03994656680024967, "grad_norm": 0.11769852787256241, "learning_rate": 2e-07, "loss": 0.035, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040039899900250246, "grad_norm": 0.11621078103780746, "learning_rate": 2e-07, "loss": 0.0064, "step": 429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04013323300025083, "grad_norm": 0.11027857661247253, "learning_rate": 2e-07, "loss": 0.0507, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04022656610025142, "grad_norm": 0.11220718175172806, "learning_rate": 2e-07, "loss": 0.0227, "step": 431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040319899200252, "grad_norm": 0.12183266133069992, "learning_rate": 2e-07, "loss": 0.053, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04041323230025258, "grad_norm": 0.12204910814762115, "learning_rate": 2e-07, "loss": 0.019, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040506565400253165, "grad_norm": 0.12495990842580795, "learning_rate": 2e-07, "loss": 0.0201, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04059989850025375, "grad_norm": 0.12262093275785446, "learning_rate": 2e-07, "loss": 0.0418, "step": 435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040693231600254336, "grad_norm": 0.12112521380186081, "learning_rate": 2e-07, "loss": 0.0599, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040786564700254914, "grad_norm": 0.11415773630142212, "learning_rate": 2e-07, "loss": 0.0425, "step": 437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0408798978002555, "grad_norm": 0.11473115533590317, "learning_rate": 2e-07, "loss": 0.0512, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.040973230900256084, "grad_norm": 0.11164270341396332, "learning_rate": 2e-07, "loss": 0.0042, "step": 439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04106656400025666, "grad_norm": 0.1157250851392746, "learning_rate": 2e-07, "loss": 0.0145, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04115989710025725, "grad_norm": 0.13490960001945496, "learning_rate": 2e-07, "loss": 0.0383, "step": 441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04125323020025783, "grad_norm": 0.11610034853219986, "learning_rate": 2e-07, "loss": 0.0263, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04134656330025842, "grad_norm": 0.1263304501771927, "learning_rate": 2e-07, "loss": 0.0536, "step": 443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.041439896400258996, "grad_norm": 0.1265595555305481, "learning_rate": 2e-07, "loss": 0.0161, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04153322950025958, "grad_norm": 0.12974615395069122, "learning_rate": 2e-07, "loss": 0.0123, "step": 445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04162656260026017, "grad_norm": 0.11630167812108994, "learning_rate": 2e-07, "loss": 0.024, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04171989570026075, "grad_norm": 0.12233869731426239, "learning_rate": 2e-07, "loss": 0.0134, "step": 447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04181322880026133, "grad_norm": 0.12354002892971039, "learning_rate": 2e-07, "loss": 0.035, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014369419642857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 609.72900390625, "completions/mean_terminated_length": 558.9029541015625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.041906561900261916, "grad_norm": 0.12005385011434555, "learning_rate": 2e-07, "loss": 0.039, "num_tokens": 324656226.0, "reward": 0.5673479437828064, "reward_std": 0.2025642991065979, "rewards/simpleverify_reward/mean": 0.5673479437828064, "rewards/simpleverify_reward/std": 0.4954477846622467, "step": 449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0419998950002625, "grad_norm": 0.111904576420784, "learning_rate": 2e-07, "loss": 0.0713, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.042093228100263086, "grad_norm": 0.11571154743432999, "learning_rate": 2e-07, "loss": 0.0067, "step": 451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.042186561200263664, "grad_norm": 0.12770473957061768, "learning_rate": 2e-07, "loss": 0.0297, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04227989430026425, "grad_norm": 0.10647688060998917, "learning_rate": 2e-07, "loss": 0.0154, "step": 453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.042373227400264835, "grad_norm": 0.11231386661529541, "learning_rate": 2e-07, "loss": 0.0165, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04246656050026541, "grad_norm": 0.11723629385232925, "learning_rate": 2e-07, "loss": 0.0302, "step": 455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.042559893600266, "grad_norm": 0.13797250390052795, "learning_rate": 2e-07, "loss": -0.013, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04265322670026658, "grad_norm": 0.1287396252155304, "learning_rate": 2e-07, "loss": 0.0386, "step": 457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04274655980026717, "grad_norm": 0.11675171554088593, "learning_rate": 2e-07, "loss": 0.0429, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04283989290026775, "grad_norm": 0.1157287135720253, "learning_rate": 2e-07, "loss": 0.0297, "step": 459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04293322600026833, "grad_norm": 0.11516328901052475, "learning_rate": 2e-07, "loss": -0.0116, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04302655910026892, "grad_norm": 0.11692577600479126, "learning_rate": 2e-07, "loss": 0.0477, "step": 461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0431198922002695, "grad_norm": 0.12244383245706558, "learning_rate": 2e-07, "loss": 0.0036, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04321322530027008, "grad_norm": 0.1258409023284912, "learning_rate": 2e-07, "loss": 0.055, "step": 463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.043306558400270666, "grad_norm": 0.12354937195777893, "learning_rate": 2e-07, "loss": 0.0226, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04339989150027125, "grad_norm": 0.11941950768232346, "learning_rate": 2e-07, "loss": 0.02, "step": 465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04349322460027183, "grad_norm": 0.12062560766935349, "learning_rate": 2e-07, "loss": 0.0144, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.043586557700272414, "grad_norm": 0.11342823505401611, "learning_rate": 2e-07, "loss": 0.0512, "step": 467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.043679890800273, "grad_norm": 0.1262807548046112, "learning_rate": 2e-07, "loss": -0.0059, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.043773223900273585, "grad_norm": 0.11998648941516876, "learning_rate": 2e-07, "loss": 0.055, "step": 469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04386655700027416, "grad_norm": 0.11615382134914398, "learning_rate": 2e-07, "loss": -0.0117, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04395989010027475, "grad_norm": 0.13024789094924927, "learning_rate": 2e-07, "loss": -0.0026, "step": 471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.044053223200275334, "grad_norm": 0.12381007522344589, "learning_rate": 2e-07, "loss": 0.0091, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04414655630027592, "grad_norm": 0.12598036229610443, "learning_rate": 2e-07, "loss": 0.0025, "step": 473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0442398894002765, "grad_norm": 0.12260746210813522, "learning_rate": 2e-07, "loss": 0.0382, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04433322250027708, "grad_norm": 0.12364880740642548, "learning_rate": 2e-07, "loss": 0.062, "step": 475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04442655560027767, "grad_norm": 0.11674712598323822, "learning_rate": 2e-07, "loss": 0.0135, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04451988870027825, "grad_norm": 0.13197995722293854, "learning_rate": 2e-07, "loss": 0.0884, "step": 477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04461322180027883, "grad_norm": 0.13497941195964813, "learning_rate": 2e-07, "loss": 0.0441, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.044706554900279416, "grad_norm": 0.11924539506435394, "learning_rate": 2e-07, "loss": 0.0474, "step": 479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04479988800028, "grad_norm": 0.103081613779068, "learning_rate": 2e-07, "loss": 0.0364, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04489322110028058, "grad_norm": 0.13224312663078308, "learning_rate": 2e-07, "loss": 0.0208, "step": 481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.044986554200281165, "grad_norm": 0.11899323016405106, "learning_rate": 2e-07, "loss": 0.0197, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04507988730028175, "grad_norm": 0.12379021942615509, "learning_rate": 2e-07, "loss": 0.005, "step": 483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.045173220400282335, "grad_norm": 0.11254280060529709, "learning_rate": 2e-07, "loss": 0.047, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04526655350028291, "grad_norm": 0.1226729080080986, "learning_rate": 2e-07, "loss": 0.0211, "step": 485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0453598866002835, "grad_norm": 0.12863241136074066, "learning_rate": 2e-07, "loss": -0.0305, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.045453219700284084, "grad_norm": 0.12958678603172302, "learning_rate": 2e-07, "loss": 0.0206, "step": 487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04554655280028467, "grad_norm": 0.12602970004081726, "learning_rate": 2e-07, "loss": 0.0223, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04563988590028525, "grad_norm": 0.11636984348297119, "learning_rate": 2e-07, "loss": 0.0206, "step": 489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04573321900028583, "grad_norm": 0.11772146075963974, "learning_rate": 2e-07, "loss": 0.0514, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04582655210028642, "grad_norm": 0.1220271959900856, "learning_rate": 2e-07, "loss": -0.0046, "step": 491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.045919885200286996, "grad_norm": 0.11762391030788422, "learning_rate": 2e-07, "loss": 0.041, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04601321830028758, "grad_norm": 0.11399685591459274, "learning_rate": 2e-07, "loss": 0.0506, "step": 493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.046106551400288166, "grad_norm": 0.12811368703842163, "learning_rate": 2e-07, "loss": 0.0331, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04619988450028875, "grad_norm": 0.11819914728403091, "learning_rate": 2e-07, "loss": 0.0667, "step": 495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04629321760028933, "grad_norm": 0.12240269035100937, "learning_rate": 2e-07, "loss": 0.0431, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.046386550700289915, "grad_norm": 0.11342491954565048, "learning_rate": 2e-07, "loss": 0.0047, "step": 497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0464798838002905, "grad_norm": 0.12665295600891113, "learning_rate": 2e-07, "loss": 0.0138, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.046573216900291085, "grad_norm": 0.11746233701705933, "learning_rate": 2e-07, "loss": 0.031, "step": 499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.046666550000291664, "grad_norm": 0.11802034080028534, "learning_rate": 2e-07, "loss": 0.0471, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04675988310029225, "grad_norm": 0.17467068135738373, "learning_rate": 2e-07, "loss": 0.0101, "step": 501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.046853216200292834, "grad_norm": 0.1267123967409134, "learning_rate": 2e-07, "loss": 0.0123, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04694654930029342, "grad_norm": 0.12186948955059052, "learning_rate": 2e-07, "loss": 0.0087, "step": 503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.047039882400294, "grad_norm": 0.11872043460607529, "learning_rate": 2e-07, "loss": 0.0029, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04713321550029458, "grad_norm": 0.10396856814622879, "learning_rate": 2e-07, "loss": -0.0218, "step": 505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04722654860029517, "grad_norm": 0.11877740174531937, "learning_rate": 2e-07, "loss": 0.0688, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.047319881700295746, "grad_norm": 0.11273297667503357, "learning_rate": 2e-07, "loss": 0.0157, "step": 507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04741321480029633, "grad_norm": 0.12132605165243149, "learning_rate": 2e-07, "loss": 0.057, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04750654790029692, "grad_norm": 0.11349218338727951, "learning_rate": 2e-07, "loss": 0.0336, "step": 509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0475998810002975, "grad_norm": 0.12423590570688248, "learning_rate": 2e-07, "loss": 0.0168, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04769321410029808, "grad_norm": 0.12998363375663757, "learning_rate": 2e-07, "loss": 0.014, "step": 511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.047786547200298665, "grad_norm": 0.1233840137720108, "learning_rate": 2e-07, "loss": 0.0127, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014753069196428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 616.5823974609375, "completions/mean_terminated_length": 564.4816284179688, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.04787988030029925, "grad_norm": 0.12010251730680466, "learning_rate": 2e-07, "loss": 0.0267, "num_tokens": 365726406.0, "reward": 0.5629185438156128, "reward_std": 0.20161347091197968, "rewards/simpleverify_reward/mean": 0.5629185438156128, "rewards/simpleverify_reward/std": 0.496029794216156, "step": 513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.047973213400299836, "grad_norm": 0.12401369959115982, "learning_rate": 2e-07, "loss": 0.0836, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.048066546500300414, "grad_norm": 0.12161466479301453, "learning_rate": 2e-07, "loss": 0.0406, "step": 515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.048159879600301, "grad_norm": 0.11125585436820984, "learning_rate": 2e-07, "loss": 0.0133, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.048253212700301584, "grad_norm": 0.11706260591745377, "learning_rate": 2e-07, "loss": -0.0356, "step": 517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04834654580030216, "grad_norm": 0.12694256007671356, "learning_rate": 2e-07, "loss": -0.0134, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04843987890030275, "grad_norm": 0.12294843792915344, "learning_rate": 2e-07, "loss": 0.0201, "step": 519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04853321200030333, "grad_norm": 0.1124502494931221, "learning_rate": 2e-07, "loss": 0.0809, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04862654510030392, "grad_norm": 0.112555593252182, "learning_rate": 2e-07, "loss": 0.0489, "step": 521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.048719878200304496, "grad_norm": 0.11587313562631607, "learning_rate": 2e-07, "loss": 0.0318, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04881321130030508, "grad_norm": 0.12432841211557388, "learning_rate": 2e-07, "loss": 0.0464, "step": 523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04890654440030567, "grad_norm": 0.11995571106672287, "learning_rate": 2e-07, "loss": 0.0179, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04899987750030625, "grad_norm": 0.12222591787576675, "learning_rate": 2e-07, "loss": 0.0345, "step": 525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04909321060030683, "grad_norm": 0.12418544292449951, "learning_rate": 2e-07, "loss": -0.0131, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.049186543700307415, "grad_norm": 0.12256152182817459, "learning_rate": 2e-07, "loss": 0.0498, "step": 527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.049279876800308, "grad_norm": 0.11545681953430176, "learning_rate": 2e-07, "loss": 0.0307, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.049373209900308586, "grad_norm": 0.11925394088029861, "learning_rate": 2e-07, "loss": 0.0234, "step": 529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.049466543000309164, "grad_norm": 0.12285978347063065, "learning_rate": 2e-07, "loss": -0.0017, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04955987610030975, "grad_norm": 0.12846523523330688, "learning_rate": 2e-07, "loss": 0.0268, "step": 531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.049653209200310335, "grad_norm": 0.12642474472522736, "learning_rate": 2e-07, "loss": 0.0386, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04974654230031091, "grad_norm": 0.11340638250112534, "learning_rate": 2e-07, "loss": 0.0533, "step": 533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0498398754003115, "grad_norm": 0.1254989504814148, "learning_rate": 2e-07, "loss": -0.0036, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.04993320850031208, "grad_norm": 0.11798431724309921, "learning_rate": 2e-07, "loss": 0.0462, "step": 535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05002654160031267, "grad_norm": 0.120912104845047, "learning_rate": 2e-07, "loss": 0.0146, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05011987470031325, "grad_norm": 0.12695574760437012, "learning_rate": 2e-07, "loss": 0.0651, "step": 537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05021320780031383, "grad_norm": 0.11621187627315521, "learning_rate": 2e-07, "loss": 0.0308, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05030654090031442, "grad_norm": 0.11845751851797104, "learning_rate": 2e-07, "loss": 0.0305, "step": 539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.050399874000315, "grad_norm": 0.11774702370166779, "learning_rate": 2e-07, "loss": 0.0046, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05049320710031558, "grad_norm": 0.11731205135583878, "learning_rate": 2e-07, "loss": 0.0199, "step": 541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.050586540200316166, "grad_norm": 0.12474443763494492, "learning_rate": 2e-07, "loss": 0.017, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05067987330031675, "grad_norm": 0.12901581823825836, "learning_rate": 2e-07, "loss": 0.0712, "step": 543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05077320640031733, "grad_norm": 0.12137272953987122, "learning_rate": 2e-07, "loss": -0.0208, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.050866539500317914, "grad_norm": 0.11454559862613678, "learning_rate": 2e-07, "loss": 0.0446, "step": 545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0509598726003185, "grad_norm": 0.11357691884040833, "learning_rate": 2e-07, "loss": -0.0066, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.051053205700319085, "grad_norm": 0.12128932029008865, "learning_rate": 2e-07, "loss": 0.0085, "step": 547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05114653880031966, "grad_norm": 0.12682893872261047, "learning_rate": 2e-07, "loss": 0.0452, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05123987190032025, "grad_norm": 0.11873123049736023, "learning_rate": 2e-07, "loss": 0.0378, "step": 549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05133320500032083, "grad_norm": 0.11859776824712753, "learning_rate": 2e-07, "loss": -0.0072, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05142653810032142, "grad_norm": 0.13037604093551636, "learning_rate": 2e-07, "loss": -0.0015, "step": 551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.051519871200322, "grad_norm": 0.12165237218141556, "learning_rate": 2e-07, "loss": 0.0473, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05161320430032258, "grad_norm": 0.10922785103321075, "learning_rate": 2e-07, "loss": 0.0226, "step": 553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05170653740032317, "grad_norm": 0.10690672695636749, "learning_rate": 2e-07, "loss": 0.0224, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05179987050032375, "grad_norm": 0.11512351036071777, "learning_rate": 2e-07, "loss": 0.004, "step": 555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05189320360032433, "grad_norm": 0.1353617012500763, "learning_rate": 2e-07, "loss": -0.0119, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.051986536700324916, "grad_norm": 0.21192969381809235, "learning_rate": 2e-07, "loss": 0.0529, "step": 557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0520798698003255, "grad_norm": 0.11621417105197906, "learning_rate": 2e-07, "loss": 0.0025, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05217320290032608, "grad_norm": 0.1171375960111618, "learning_rate": 2e-07, "loss": 0.0645, "step": 559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.052266536000326665, "grad_norm": 0.12459057569503784, "learning_rate": 2e-07, "loss": 0.0123, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05235986910032725, "grad_norm": 0.1224704161286354, "learning_rate": 2e-07, "loss": 0.0502, "step": 561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.052453202200327835, "grad_norm": 0.12132956087589264, "learning_rate": 2e-07, "loss": 0.0058, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05254653530032841, "grad_norm": 0.1108483150601387, "learning_rate": 2e-07, "loss": 0.0575, "step": 563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.052639868400329, "grad_norm": 0.12339775264263153, "learning_rate": 2e-07, "loss": -0.012, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.052733201500329584, "grad_norm": 0.10971226543188095, "learning_rate": 2e-07, "loss": 0.04, "step": 565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05282653460033017, "grad_norm": 0.12245997786521912, "learning_rate": 2e-07, "loss": 0.0452, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05291986770033075, "grad_norm": 0.12251701205968857, "learning_rate": 2e-07, "loss": 0.0105, "step": 567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05301320080033133, "grad_norm": 0.11216636747121811, "learning_rate": 2e-07, "loss": 0.024, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05310653390033192, "grad_norm": 0.1176990494132042, "learning_rate": 2e-07, "loss": 0.0197, "step": 569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.053199867000332496, "grad_norm": 0.12697085738182068, "learning_rate": 2e-07, "loss": 0.0239, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05329320010033308, "grad_norm": 0.12195334583520889, "learning_rate": 2e-07, "loss": 0.0298, "step": 571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.053386533200333666, "grad_norm": 0.11798093467950821, "learning_rate": 2e-07, "loss": 0.06, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05347986630033425, "grad_norm": 0.11503004282712936, "learning_rate": 2e-07, "loss": -0.0118, "step": 573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05357319940033483, "grad_norm": 0.11584347486495972, "learning_rate": 2e-07, "loss": 0.0595, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.053666532500335415, "grad_norm": 0.12356378883123398, "learning_rate": 2e-07, "loss": 0.0522, "step": 575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.053759865600336, "grad_norm": 0.12094777077436447, "learning_rate": 2e-07, "loss": 0.0167, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015869140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 625.546630859375, "completions/mean_terminated_length": 569.58544921875, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.053853198700336585, "grad_norm": 0.12044382840394974, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 407349056.0, "reward": 0.5599365234375, "reward_std": 0.20234021544456482, "rewards/simpleverify_reward/mean": 0.5599365234375, "rewards/simpleverify_reward/std": 0.4963989853858948, "step": 577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.053946531800337164, "grad_norm": 0.11953672766685486, "learning_rate": 2e-07, "loss": 0.0715, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05403986490033775, "grad_norm": 0.12127688527107239, "learning_rate": 2e-07, "loss": 0.0208, "step": 579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.054133198000338334, "grad_norm": 0.12586002051830292, "learning_rate": 2e-07, "loss": -0.038, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05422653110033892, "grad_norm": 0.10979132354259491, "learning_rate": 2e-07, "loss": 0.0477, "step": 581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0543198642003395, "grad_norm": 0.12793982028961182, "learning_rate": 2e-07, "loss": 0.0262, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05441319730034008, "grad_norm": 0.1122364029288292, "learning_rate": 2e-07, "loss": 0.0315, "step": 583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05450653040034067, "grad_norm": 0.1163412481546402, "learning_rate": 2e-07, "loss": 0.0696, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.054599863500341246, "grad_norm": 0.11988294869661331, "learning_rate": 2e-07, "loss": 0.0072, "step": 585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05469319660034183, "grad_norm": 0.11390503495931625, "learning_rate": 2e-07, "loss": -0.0012, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.054786529700342416, "grad_norm": 0.12465047091245651, "learning_rate": 2e-07, "loss": 0.0085, "step": 587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.054879862800343, "grad_norm": 0.11787502467632294, "learning_rate": 2e-07, "loss": 0.0265, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05497319590034358, "grad_norm": 0.12637466192245483, "learning_rate": 2e-07, "loss": 0.0607, "step": 589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.055066529000344165, "grad_norm": 0.11292491853237152, "learning_rate": 2e-07, "loss": 0.0227, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05515986210034475, "grad_norm": 0.12993048131465912, "learning_rate": 2e-07, "loss": 0.0229, "step": 591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.055253195200345336, "grad_norm": 0.10965694487094879, "learning_rate": 2e-07, "loss": 0.0405, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.055346528300345914, "grad_norm": 0.12179083377122879, "learning_rate": 2e-07, "loss": 0.0402, "step": 593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0554398614003465, "grad_norm": 0.1250147670507431, "learning_rate": 2e-07, "loss": -0.0507, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.055533194500347084, "grad_norm": 0.11787499487400055, "learning_rate": 2e-07, "loss": -0.0143, "step": 595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05562652760034766, "grad_norm": 0.12058252096176147, "learning_rate": 2e-07, "loss": 0.0578, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05571986070034825, "grad_norm": 0.11161614954471588, "learning_rate": 2e-07, "loss": 0.0465, "step": 597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05581319380034883, "grad_norm": 0.11848585307598114, "learning_rate": 2e-07, "loss": 0.0747, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05590652690034942, "grad_norm": 0.1354919672012329, "learning_rate": 2e-07, "loss": 0.0425, "step": 599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.055999860000349996, "grad_norm": 0.12854740023612976, "learning_rate": 2e-07, "loss": 0.0058, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05609319310035058, "grad_norm": 0.1257350742816925, "learning_rate": 2e-07, "loss": 0.0417, "step": 601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05618652620035117, "grad_norm": 0.11217089742422104, "learning_rate": 2e-07, "loss": 0.0166, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05627985930035175, "grad_norm": 0.12380943447351456, "learning_rate": 2e-07, "loss": 0.0433, "step": 603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05637319240035233, "grad_norm": 0.12836982309818268, "learning_rate": 2e-07, "loss": 0.0084, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.056466525500352915, "grad_norm": 0.12369835376739502, "learning_rate": 2e-07, "loss": 0.0228, "step": 605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0565598586003535, "grad_norm": 0.12817060947418213, "learning_rate": 2e-07, "loss": 0.0585, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.056653191700354086, "grad_norm": 0.1351020783185959, "learning_rate": 2e-07, "loss": 0.0436, "step": 607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.056746524800354664, "grad_norm": 0.13817666471004486, "learning_rate": 2e-07, "loss": 0.0267, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05683985790035525, "grad_norm": 0.1242447942495346, "learning_rate": 2e-07, "loss": 0.0016, "step": 609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.056933191000355834, "grad_norm": 0.12094396352767944, "learning_rate": 2e-07, "loss": 0.043, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05702652410035641, "grad_norm": 0.11039764434099197, "learning_rate": 2e-07, "loss": 0.0177, "step": 611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.057119857200357, "grad_norm": 0.12412373721599579, "learning_rate": 2e-07, "loss": 0.0507, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05721319030035758, "grad_norm": 0.12555938959121704, "learning_rate": 2e-07, "loss": 0.0393, "step": 613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05730652340035817, "grad_norm": 0.11187642067670822, "learning_rate": 2e-07, "loss": 0.0236, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05739985650035875, "grad_norm": 0.11063692718744278, "learning_rate": 2e-07, "loss": 0.0386, "step": 615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05749318960035933, "grad_norm": 0.12599587440490723, "learning_rate": 2e-07, "loss": 0.0117, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05758652270035992, "grad_norm": 0.12586668133735657, "learning_rate": 2e-07, "loss": 0.0194, "step": 617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0576798558003605, "grad_norm": 0.13283438980579376, "learning_rate": 2e-07, "loss": 0.0414, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05777318890036108, "grad_norm": 0.12420965731143951, "learning_rate": 2e-07, "loss": 0.0465, "step": 619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.057866522000361666, "grad_norm": 0.1160416528582573, "learning_rate": 2e-07, "loss": 0.0362, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05795985510036225, "grad_norm": 0.12362019717693329, "learning_rate": 2e-07, "loss": 0.0924, "step": 621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05805318820036283, "grad_norm": 0.12439006567001343, "learning_rate": 2e-07, "loss": 0.0085, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.058146521300363414, "grad_norm": 0.13101525604724884, "learning_rate": 2e-07, "loss": 0.0178, "step": 623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.058239854400364, "grad_norm": 0.1352616548538208, "learning_rate": 2e-07, "loss": 0.0304, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.058333187500364585, "grad_norm": 0.12140699476003647, "learning_rate": 2e-07, "loss": 0.0126, "step": 625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05842652060036516, "grad_norm": 0.12117926776409149, "learning_rate": 2e-07, "loss": 0.0585, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05851985370036575, "grad_norm": 0.13625787198543549, "learning_rate": 2e-07, "loss": -0.0058, "step": 627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05861318680036633, "grad_norm": 0.1238526925444603, "learning_rate": 2e-07, "loss": 0.0615, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05870651990036692, "grad_norm": 0.12498913705348969, "learning_rate": 2e-07, "loss": 0.0354, "step": 629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0587998530003675, "grad_norm": 0.10788042098283768, "learning_rate": 2e-07, "loss": 0.0406, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05889318610036808, "grad_norm": 0.09922719746828079, "learning_rate": 2e-07, "loss": 0.0742, "step": 631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05898651920036867, "grad_norm": 0.12002391368150711, "learning_rate": 2e-07, "loss": 0.0098, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05907985230036925, "grad_norm": 0.13086529076099396, "learning_rate": 2e-07, "loss": 0.0533, "step": 633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05917318540036983, "grad_norm": 0.1261654645204544, "learning_rate": 2e-07, "loss": 0.0205, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.059266518500370416, "grad_norm": 0.12553098797798157, "learning_rate": 2e-07, "loss": -0.0116, "step": 635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.059359851600371, "grad_norm": 0.12851980328559875, "learning_rate": 2e-07, "loss": 0.0237, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05945318470037158, "grad_norm": 0.12041840702295303, "learning_rate": 2e-07, "loss": -0.0246, "step": 637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.059546517800372165, "grad_norm": 0.11303598433732986, "learning_rate": 2e-07, "loss": -0.0066, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.05963985090037275, "grad_norm": 0.13044410943984985, "learning_rate": 2e-07, "loss": 0.0573, "step": 639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.059733184000373335, "grad_norm": 0.11876334995031357, "learning_rate": 2e-07, "loss": 0.036, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014107840401785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 615.0515747070312, "completions/mean_terminated_length": 565.2401733398438, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.05982651710037391, "grad_norm": 0.12729071080684662, "learning_rate": 2e-07, "loss": 0.0173, "num_tokens": 448368123.0, "reward": 0.5781599283218384, "reward_std": 0.19839850068092346, "rewards/simpleverify_reward/mean": 0.5781598687171936, "rewards/simpleverify_reward/std": 0.49385756254196167, "step": 641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0599198502003745, "grad_norm": 0.13411562144756317, "learning_rate": 2e-07, "loss": 0.0245, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.060013183300375084, "grad_norm": 0.129892036318779, "learning_rate": 2e-07, "loss": 0.0098, "step": 643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06010651640037567, "grad_norm": 0.11809277534484863, "learning_rate": 2e-07, "loss": 0.0443, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06019984950037625, "grad_norm": 0.10522212833166122, "learning_rate": 2e-07, "loss": 0.0345, "step": 645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06029318260037683, "grad_norm": 0.13501420617103577, "learning_rate": 2e-07, "loss": 0.0616, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06038651570037742, "grad_norm": 0.12211109697818756, "learning_rate": 2e-07, "loss": 0.0129, "step": 647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.060479848800377996, "grad_norm": 0.11742499470710754, "learning_rate": 2e-07, "loss": 0.0271, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06057318190037858, "grad_norm": 0.11299612373113632, "learning_rate": 2e-07, "loss": 0.0164, "step": 649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.060666515000379166, "grad_norm": 0.1275217980146408, "learning_rate": 2e-07, "loss": 0.0818, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06075984810037975, "grad_norm": 0.1271250993013382, "learning_rate": 2e-07, "loss": 0.0342, "step": 651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06085318120038033, "grad_norm": 0.13171058893203735, "learning_rate": 2e-07, "loss": -0.0025, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.060946514300380915, "grad_norm": 0.12337896227836609, "learning_rate": 2e-07, "loss": 0.015, "step": 653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0610398474003815, "grad_norm": 0.12394645810127258, "learning_rate": 2e-07, "loss": 0.0436, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.061133180500382085, "grad_norm": 0.11018452793359756, "learning_rate": 2e-07, "loss": 0.079, "step": 655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.061226513600382663, "grad_norm": 0.11741793155670166, "learning_rate": 2e-07, "loss": 0.0177, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06131984670038325, "grad_norm": 0.11924643069505692, "learning_rate": 2e-07, "loss": 0.0028, "step": 657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.061413179800383834, "grad_norm": 0.11948590725660324, "learning_rate": 2e-07, "loss": -0.019, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06150651290038442, "grad_norm": 0.11440945416688919, "learning_rate": 2e-07, "loss": 0.0107, "step": 659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.061599846000385, "grad_norm": 0.12413817644119263, "learning_rate": 2e-07, "loss": 0.0293, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06169317910038558, "grad_norm": 0.12349940836429596, "learning_rate": 2e-07, "loss": 0.0244, "step": 661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06178651220038617, "grad_norm": 0.13031722605228424, "learning_rate": 2e-07, "loss": -0.003, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.061879845300386746, "grad_norm": 0.11165805160999298, "learning_rate": 2e-07, "loss": 0.0323, "step": 663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06197317840038733, "grad_norm": 0.11943699419498444, "learning_rate": 2e-07, "loss": 0.0322, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.062066511500387916, "grad_norm": 0.10237414389848709, "learning_rate": 2e-07, "loss": 0.0357, "step": 665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0621598446003885, "grad_norm": 0.14724992215633392, "learning_rate": 2e-07, "loss": 0.0206, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06225317770038908, "grad_norm": 0.11832781881093979, "learning_rate": 2e-07, "loss": 0.0615, "step": 667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.062346510800389665, "grad_norm": 0.12036070972681046, "learning_rate": 2e-07, "loss": -0.0115, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06243984390039025, "grad_norm": 0.12463673949241638, "learning_rate": 2e-07, "loss": 0.0009, "step": 669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06253317700039084, "grad_norm": 0.12530124187469482, "learning_rate": 2e-07, "loss": 0.0237, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06262651010039141, "grad_norm": 0.13599103689193726, "learning_rate": 2e-07, "loss": 0.0792, "step": 671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.062719843200392, "grad_norm": 0.12094172090291977, "learning_rate": 2e-07, "loss": -0.0046, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06281317630039258, "grad_norm": 0.11020464450120926, "learning_rate": 2e-07, "loss": 0.0168, "step": 673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06290650940039316, "grad_norm": 0.12409074604511261, "learning_rate": 2e-07, "loss": 0.0158, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06299984250039375, "grad_norm": 0.12593719363212585, "learning_rate": 2e-07, "loss": 0.0537, "step": 675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06309317560039433, "grad_norm": 0.1168484091758728, "learning_rate": 2e-07, "loss": 0.0272, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06318650870039491, "grad_norm": 0.11330760270357132, "learning_rate": 2e-07, "loss": 0.0236, "step": 677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0632798418003955, "grad_norm": 0.125793918967247, "learning_rate": 2e-07, "loss": 0.0541, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06337317490039608, "grad_norm": 0.12242653220891953, "learning_rate": 2e-07, "loss": -0.0508, "step": 679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06346650800039666, "grad_norm": 0.1316857784986496, "learning_rate": 2e-07, "loss": 0.0169, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06355984110039725, "grad_norm": 0.1167135164141655, "learning_rate": 2e-07, "loss": 0.0087, "step": 681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06365317420039783, "grad_norm": 0.11635976284742355, "learning_rate": 2e-07, "loss": 0.0711, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06374650730039842, "grad_norm": 0.10801029950380325, "learning_rate": 2e-07, "loss": 0.06, "step": 683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.063839840400399, "grad_norm": 0.12761594355106354, "learning_rate": 2e-07, "loss": 0.0146, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06393317350039958, "grad_norm": 0.12739582359790802, "learning_rate": 2e-07, "loss": 0.0169, "step": 685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06402650660040017, "grad_norm": 0.12135310471057892, "learning_rate": 2e-07, "loss": -0.0238, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06411983970040075, "grad_norm": 0.12800715863704681, "learning_rate": 2e-07, "loss": 0.0015, "step": 687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06421317280040133, "grad_norm": 0.11485518515110016, "learning_rate": 2e-07, "loss": 0.0185, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06430650590040192, "grad_norm": 0.10507942736148834, "learning_rate": 2e-07, "loss": -0.0023, "step": 689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0643998390004025, "grad_norm": 0.11504718661308289, "learning_rate": 2e-07, "loss": 0.0298, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06449317210040308, "grad_norm": 0.14104816317558289, "learning_rate": 2e-07, "loss": 0.0274, "step": 691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06458650520040367, "grad_norm": 0.11345266550779343, "learning_rate": 2e-07, "loss": 0.0339, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06467983830040425, "grad_norm": 0.12505804002285004, "learning_rate": 2e-07, "loss": 0.0422, "step": 693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06477317140040484, "grad_norm": 0.12232425808906555, "learning_rate": 2e-07, "loss": 0.0532, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06486650450040542, "grad_norm": 0.10989726334810257, "learning_rate": 2e-07, "loss": 0.0472, "step": 695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.064959837600406, "grad_norm": 0.12593911588191986, "learning_rate": 2e-07, "loss": 0.0156, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06505317070040659, "grad_norm": 0.12165261805057526, "learning_rate": 2e-07, "loss": 0.0283, "step": 697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06514650380040717, "grad_norm": 0.11726663261651993, "learning_rate": 2e-07, "loss": 0.0509, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06523983690040774, "grad_norm": 0.11849267780780792, "learning_rate": 2e-07, "loss": 0.024, "step": 699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06533317000040834, "grad_norm": 0.11234334856271744, "learning_rate": 2e-07, "loss": 0.018, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06542650310040891, "grad_norm": 0.12003482133150101, "learning_rate": 2e-07, "loss": -0.0038, "step": 701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06551983620040949, "grad_norm": 0.12619444727897644, "learning_rate": 2e-07, "loss": 0.022, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06561316930041008, "grad_norm": 0.12507031857967377, "learning_rate": 2e-07, "loss": 0.0494, "step": 703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06570650240041066, "grad_norm": 0.12343506515026093, "learning_rate": 2e-07, "loss": 0.0418, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014299665178571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 620.75537109375, "completions/mean_terminated_length": 570.339599609375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.06579983550041126, "grad_norm": 0.12528972327709198, "learning_rate": 2e-07, "loss": -0.0102, "num_tokens": 489691551.0, "reward": 0.5700858235359192, "reward_std": 0.19754192233085632, "rewards/simpleverify_reward/mean": 0.5700858235359192, "rewards/simpleverify_reward/std": 0.4950679540634155, "step": 705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06589316860041183, "grad_norm": 0.1104116141796112, "learning_rate": 2e-07, "loss": 0.0105, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06598650170041241, "grad_norm": 0.12788976728916168, "learning_rate": 2e-07, "loss": -0.0125, "step": 707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.066079834800413, "grad_norm": 0.1163189634680748, "learning_rate": 2e-07, "loss": 0.0251, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06617316790041358, "grad_norm": 0.11647484451532364, "learning_rate": 2e-07, "loss": 0.0366, "step": 709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06626650100041416, "grad_norm": 0.12332244962453842, "learning_rate": 2e-07, "loss": 0.0463, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06635983410041475, "grad_norm": 0.11935242265462875, "learning_rate": 2e-07, "loss": 0.0586, "step": 711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06645316720041533, "grad_norm": 0.10812757164239883, "learning_rate": 2e-07, "loss": 0.0196, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06654650030041591, "grad_norm": 0.1112031415104866, "learning_rate": 2e-07, "loss": 0.054, "step": 713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0666398334004165, "grad_norm": 0.13041889667510986, "learning_rate": 2e-07, "loss": 0.021, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06673316650041708, "grad_norm": 0.12210946530103683, "learning_rate": 2e-07, "loss": 0.0012, "step": 715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06682649960041767, "grad_norm": 0.12628982961177826, "learning_rate": 2e-07, "loss": 0.0403, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06691983270041825, "grad_norm": 0.12646602094173431, "learning_rate": 2e-07, "loss": 0.0136, "step": 717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06701316580041883, "grad_norm": 0.1331997960805893, "learning_rate": 2e-07, "loss": 0.0267, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06710649890041942, "grad_norm": 0.13744772970676422, "learning_rate": 2e-07, "loss": 0.0239, "step": 719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06719983200042, "grad_norm": 0.12118663638830185, "learning_rate": 2e-07, "loss": 0.0253, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06729316510042058, "grad_norm": 0.1154777854681015, "learning_rate": 2e-07, "loss": 0.0117, "step": 721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06738649820042117, "grad_norm": 0.11925779283046722, "learning_rate": 2e-07, "loss": 0.0025, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06747983130042175, "grad_norm": 0.11497519910335541, "learning_rate": 2e-07, "loss": 0.0385, "step": 723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06757316440042234, "grad_norm": 0.10929624736309052, "learning_rate": 2e-07, "loss": 0.0265, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06766649750042292, "grad_norm": 0.12020684033632278, "learning_rate": 2e-07, "loss": 0.052, "step": 725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0677598306004235, "grad_norm": 0.1540072113275528, "learning_rate": 2e-07, "loss": 0.0138, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06785316370042409, "grad_norm": 0.10292994976043701, "learning_rate": 2e-07, "loss": -0.0066, "step": 727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06794649680042467, "grad_norm": 0.13004785776138306, "learning_rate": 2e-07, "loss": 0.0245, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06803982990042524, "grad_norm": 0.12006964534521103, "learning_rate": 2e-07, "loss": 0.0026, "step": 729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06813316300042584, "grad_norm": 0.11545645445585251, "learning_rate": 2e-07, "loss": 0.0154, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06822649610042641, "grad_norm": 0.11766402423381805, "learning_rate": 2e-07, "loss": 0.0505, "step": 731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06831982920042699, "grad_norm": 0.11055393517017365, "learning_rate": 2e-07, "loss": 0.0176, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06841316230042759, "grad_norm": 0.11462956666946411, "learning_rate": 2e-07, "loss": 0.0122, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06850649540042816, "grad_norm": 0.10686997324228287, "learning_rate": 2e-07, "loss": 0.0605, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06859982850042876, "grad_norm": 0.1133904904127121, "learning_rate": 2e-07, "loss": 0.0305, "step": 735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06869316160042933, "grad_norm": 0.1171252503991127, "learning_rate": 2e-07, "loss": 0.0631, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06878649470042991, "grad_norm": 0.11815008521080017, "learning_rate": 2e-07, "loss": 0.0314, "step": 737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0688798278004305, "grad_norm": 0.12277720123529434, "learning_rate": 2e-07, "loss": 0.0232, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06897316090043108, "grad_norm": 0.12605513632297516, "learning_rate": 2e-07, "loss": 0.0418, "step": 739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06906649400043166, "grad_norm": 0.12493447959423065, "learning_rate": 2e-07, "loss": 0.0183, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06915982710043225, "grad_norm": 0.1309005320072174, "learning_rate": 2e-07, "loss": 0.0358, "step": 741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06925316020043283, "grad_norm": 0.11564654856920242, "learning_rate": 2e-07, "loss": 0.0537, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06934649330043341, "grad_norm": 0.11595161259174347, "learning_rate": 2e-07, "loss": -0.0148, "step": 743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.069439826400434, "grad_norm": 0.1234142929315567, "learning_rate": 2e-07, "loss": 0.0345, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06953315950043458, "grad_norm": 0.12163940072059631, "learning_rate": 2e-07, "loss": 0.0429, "step": 745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06962649260043517, "grad_norm": 0.12371745705604553, "learning_rate": 2e-07, "loss": 0.0779, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06971982570043575, "grad_norm": 0.12303513288497925, "learning_rate": 2e-07, "loss": 0.0324, "step": 747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06981315880043633, "grad_norm": 0.1302696019411087, "learning_rate": 2e-07, "loss": 0.0414, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.06990649190043692, "grad_norm": 0.11138232797384262, "learning_rate": 2e-07, "loss": 0.0375, "step": 749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0699998250004375, "grad_norm": 0.11783741414546967, "learning_rate": 2e-07, "loss": 0.0422, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07009315810043808, "grad_norm": 0.11582102626562119, "learning_rate": 2e-07, "loss": 0.0314, "step": 751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07018649120043867, "grad_norm": 0.11814740300178528, "learning_rate": 2e-07, "loss": 0.0264, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07027982430043925, "grad_norm": 0.12032865732908249, "learning_rate": 2e-07, "loss": 0.027, "step": 753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07037315740043983, "grad_norm": 0.12482430040836334, "learning_rate": 2e-07, "loss": -0.0004, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07046649050044042, "grad_norm": 0.1225215271115303, "learning_rate": 2e-07, "loss": 0.0662, "step": 755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.070559823600441, "grad_norm": 0.1313469111919403, "learning_rate": 2e-07, "loss": 0.038, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07065315670044159, "grad_norm": 0.12063515931367874, "learning_rate": 2e-07, "loss": -0.0188, "step": 757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07074648980044217, "grad_norm": 0.12330294400453568, "learning_rate": 2e-07, "loss": 0.0557, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07083982290044274, "grad_norm": 0.11578790843486786, "learning_rate": 2e-07, "loss": 0.036, "step": 759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07093315600044334, "grad_norm": 0.12653319537639618, "learning_rate": 2e-07, "loss": 0.0478, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07102648910044392, "grad_norm": 0.11110926419496536, "learning_rate": 2e-07, "loss": 0.0131, "step": 761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0711198222004445, "grad_norm": 0.12657906115055084, "learning_rate": 2e-07, "loss": -0.0323, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07121315530044509, "grad_norm": 0.12415838986635208, "learning_rate": 2e-07, "loss": 0.0426, "step": 763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07130648840044566, "grad_norm": 0.12819182872772217, "learning_rate": 2e-07, "loss": 0.0306, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07139982150044624, "grad_norm": 0.1336245983839035, "learning_rate": 2e-07, "loss": -0.043, "step": 765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07149315460044683, "grad_norm": 0.12627893686294556, "learning_rate": 2e-07, "loss": 0.0531, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07158648770044741, "grad_norm": 0.12721318006515503, "learning_rate": 2e-07, "loss": -0.0013, "step": 767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.071679820800448, "grad_norm": 0.13815297186374664, "learning_rate": 2e-07, "loss": 0.0259, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013462611607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 610.3123779296875, "completions/mean_terminated_length": 562.74560546875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.07177315390044858, "grad_norm": 0.13740846514701843, "learning_rate": 2e-07, "loss": 0.0006, "num_tokens": 530465705.0, "reward": 0.5800955891609192, "reward_std": 0.194438174366951, "rewards/simpleverify_reward/mean": 0.5800955891609192, "rewards/simpleverify_reward/std": 0.493547260761261, "step": 769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07186648700044916, "grad_norm": 0.10117055475711823, "learning_rate": 2e-07, "loss": 0.0509, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07195982010044975, "grad_norm": 0.12342734634876251, "learning_rate": 2e-07, "loss": 0.0357, "step": 771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07205315320045033, "grad_norm": 0.12916740775108337, "learning_rate": 2e-07, "loss": 0.034, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07214648630045091, "grad_norm": 0.11947473883628845, "learning_rate": 2e-07, "loss": 0.0202, "step": 773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0722398194004515, "grad_norm": 0.12576515972614288, "learning_rate": 2e-07, "loss": 0.0005, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07233315250045208, "grad_norm": 0.10413672775030136, "learning_rate": 2e-07, "loss": 0.0016, "step": 775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07242648560045266, "grad_norm": 0.1263851374387741, "learning_rate": 2e-07, "loss": 0.0381, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07251981870045325, "grad_norm": 0.12243737280368805, "learning_rate": 2e-07, "loss": -0.0041, "step": 777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07261315180045383, "grad_norm": 0.11369921267032623, "learning_rate": 2e-07, "loss": 0.0585, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07270648490045442, "grad_norm": 0.12998270988464355, "learning_rate": 2e-07, "loss": 0.0066, "step": 779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.072799818000455, "grad_norm": 0.12854069471359253, "learning_rate": 2e-07, "loss": 0.0701, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07289315110045558, "grad_norm": 0.12057509273290634, "learning_rate": 2e-07, "loss": 0.035, "step": 781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07298648420045617, "grad_norm": 0.1271604746580124, "learning_rate": 2e-07, "loss": 0.0202, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07307981730045675, "grad_norm": 0.12926845252513885, "learning_rate": 2e-07, "loss": 0.0012, "step": 783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07317315040045733, "grad_norm": 0.12242759019136429, "learning_rate": 2e-07, "loss": 0.0325, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07326648350045792, "grad_norm": 0.12419641762971878, "learning_rate": 2e-07, "loss": -0.0034, "step": 785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0733598166004585, "grad_norm": 0.10994675010442734, "learning_rate": 2e-07, "loss": 0.005, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07345314970045909, "grad_norm": 0.12860162556171417, "learning_rate": 2e-07, "loss": -0.009, "step": 787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07354648280045967, "grad_norm": 0.12840574979782104, "learning_rate": 2e-07, "loss": 0.0636, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07363981590046025, "grad_norm": 0.12296827137470245, "learning_rate": 2e-07, "loss": 0.0429, "step": 789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07373314900046084, "grad_norm": 0.11585507541894913, "learning_rate": 2e-07, "loss": 0.0262, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07382648210046142, "grad_norm": 0.12902520596981049, "learning_rate": 2e-07, "loss": 0.0103, "step": 791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.073919815200462, "grad_norm": 0.11695924401283264, "learning_rate": 2e-07, "loss": 0.0638, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07401314830046259, "grad_norm": 0.13254743814468384, "learning_rate": 2e-07, "loss": 0.0255, "step": 793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07410648140046316, "grad_norm": 0.11255477368831635, "learning_rate": 2e-07, "loss": 0.0662, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07419981450046374, "grad_norm": 0.12763340771198273, "learning_rate": 2e-07, "loss": 0.0833, "step": 795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07429314760046433, "grad_norm": 0.12116031348705292, "learning_rate": 2e-07, "loss": 0.0073, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07438648070046491, "grad_norm": 0.11752741038799286, "learning_rate": 2e-07, "loss": 0.0479, "step": 797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0744798138004655, "grad_norm": 0.1262942999601364, "learning_rate": 2e-07, "loss": -0.0255, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07457314690046608, "grad_norm": 0.11733046919107437, "learning_rate": 2e-07, "loss": -0.0031, "step": 799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07466648000046666, "grad_norm": 0.12669570744037628, "learning_rate": 2e-07, "loss": 0.0052, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07475981310046725, "grad_norm": 0.12124934792518616, "learning_rate": 2e-07, "loss": 0.0378, "step": 801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07485314620046783, "grad_norm": 0.12422847002744675, "learning_rate": 2e-07, "loss": 0.0019, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07494647930046841, "grad_norm": 0.11205616593360901, "learning_rate": 2e-07, "loss": 0.0084, "step": 803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.075039812400469, "grad_norm": 0.12309475988149643, "learning_rate": 2e-07, "loss": 0.0715, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07513314550046958, "grad_norm": 0.11718499660491943, "learning_rate": 2e-07, "loss": -0.0219, "step": 805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07522647860047016, "grad_norm": 0.11949222534894943, "learning_rate": 2e-07, "loss": 0.0122, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07531981170047075, "grad_norm": 0.12406980991363525, "learning_rate": 2e-07, "loss": 0.0576, "step": 807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07541314480047133, "grad_norm": 0.14561213552951813, "learning_rate": 2e-07, "loss": 0.0315, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07550647790047192, "grad_norm": 0.11671137809753418, "learning_rate": 2e-07, "loss": 0.0299, "step": 809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0755998110004725, "grad_norm": 0.12124525755643845, "learning_rate": 2e-07, "loss": -0.0034, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07569314410047308, "grad_norm": 0.11499109119176865, "learning_rate": 2e-07, "loss": 0.0489, "step": 811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07578647720047367, "grad_norm": 0.11485016345977783, "learning_rate": 2e-07, "loss": 0.0293, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07587981030047425, "grad_norm": 0.12557214498519897, "learning_rate": 2e-07, "loss": 0.0221, "step": 813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07597314340047483, "grad_norm": 0.13099895417690277, "learning_rate": 2e-07, "loss": 0.0095, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07606647650047542, "grad_norm": 0.12623251974582672, "learning_rate": 2e-07, "loss": -0.0348, "step": 815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.076159809600476, "grad_norm": 0.11864347010850906, "learning_rate": 2e-07, "loss": 0.0391, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07625314270047658, "grad_norm": 0.12183661013841629, "learning_rate": 2e-07, "loss": 0.0529, "step": 817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07634647580047717, "grad_norm": 0.132380411028862, "learning_rate": 2e-07, "loss": 0.012, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07643980890047775, "grad_norm": 0.11724260449409485, "learning_rate": 2e-07, "loss": 0.0049, "step": 819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07653314200047834, "grad_norm": 0.12289160490036011, "learning_rate": 2e-07, "loss": 0.0227, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07662647510047892, "grad_norm": 0.1231679618358612, "learning_rate": 2e-07, "loss": -0.0061, "step": 821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0767198082004795, "grad_norm": 0.12912705540657043, "learning_rate": 2e-07, "loss": 0.0406, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07681314130048009, "grad_norm": 0.12607784569263458, "learning_rate": 2e-07, "loss": 0.0058, "step": 823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07690647440048066, "grad_norm": 0.11383119225502014, "learning_rate": 2e-07, "loss": 0.0103, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07699980750048124, "grad_norm": 0.11440588533878326, "learning_rate": 2e-07, "loss": 0.0441, "step": 825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07709314060048184, "grad_norm": 0.15354876220226288, "learning_rate": 2e-07, "loss": 0.1046, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07718647370048241, "grad_norm": 0.1340963989496231, "learning_rate": 2e-07, "loss": 0.0378, "step": 827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07727980680048299, "grad_norm": 0.12661057710647583, "learning_rate": 2e-07, "loss": 0.0518, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07737313990048358, "grad_norm": 0.13152505457401276, "learning_rate": 2e-07, "loss": 0.0164, "step": 829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07746647300048416, "grad_norm": 0.11410197615623474, "learning_rate": 2e-07, "loss": 0.0579, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07755980610048475, "grad_norm": 0.11915868520736694, "learning_rate": 2e-07, "loss": 0.0111, "step": 831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07765313920048533, "grad_norm": 0.12689052522182465, "learning_rate": 2e-07, "loss": 0.0209, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013916015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 620.7677612304688, "completions/mean_terminated_length": 571.7238159179688, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.07774647230048591, "grad_norm": 0.10465911775827408, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 571812769.0, "reward": 0.5783517360687256, "reward_std": 0.19668343663215637, "rewards/simpleverify_reward/mean": 0.5783516764640808, "rewards/simpleverify_reward/std": 0.4938271641731262, "step": 833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0778398054004865, "grad_norm": 0.10756061226129532, "learning_rate": 2e-07, "loss": 0.0025, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07793313850048708, "grad_norm": 0.1274019032716751, "learning_rate": 2e-07, "loss": 0.0706, "step": 835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07802647160048766, "grad_norm": 0.12987348437309265, "learning_rate": 2e-07, "loss": -0.032, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07811980470048825, "grad_norm": 0.11911167204380035, "learning_rate": 2e-07, "loss": 0.0797, "step": 837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07821313780048883, "grad_norm": 0.12521375715732574, "learning_rate": 2e-07, "loss": 0.0468, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07830647090048942, "grad_norm": 0.11838897317647934, "learning_rate": 2e-07, "loss": 0.0323, "step": 839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07839980400049, "grad_norm": 0.12225809693336487, "learning_rate": 2e-07, "loss": -0.0205, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07849313710049058, "grad_norm": 0.11803404986858368, "learning_rate": 2e-07, "loss": -0.0037, "step": 841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07858647020049117, "grad_norm": 0.1223907396197319, "learning_rate": 2e-07, "loss": -0.0084, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07867980330049175, "grad_norm": 0.13088178634643555, "learning_rate": 2e-07, "loss": 0.0267, "step": 843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07877313640049233, "grad_norm": 0.1251993626356125, "learning_rate": 2e-07, "loss": 0.0387, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07886646950049292, "grad_norm": 0.12215556204319, "learning_rate": 2e-07, "loss": 0.0091, "step": 845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0789598026004935, "grad_norm": 0.128033846616745, "learning_rate": 2e-07, "loss": 0.0022, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07905313570049408, "grad_norm": 0.1244906485080719, "learning_rate": 2e-07, "loss": 0.0296, "step": 847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07914646880049467, "grad_norm": 0.13499657809734344, "learning_rate": 2e-07, "loss": 0.022, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07923980190049525, "grad_norm": 0.11610082536935806, "learning_rate": 2e-07, "loss": 0.0274, "step": 849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07933313500049584, "grad_norm": 0.12622587382793427, "learning_rate": 2e-07, "loss": 0.0085, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07942646810049642, "grad_norm": 0.12759481370449066, "learning_rate": 2e-07, "loss": 0.0486, "step": 851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.079519801200497, "grad_norm": 0.11895918101072311, "learning_rate": 2e-07, "loss": 0.0291, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07961313430049759, "grad_norm": 0.12240754812955856, "learning_rate": 2e-07, "loss": 0.0018, "step": 853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07970646740049817, "grad_norm": 0.13033229112625122, "learning_rate": 2e-07, "loss": 0.0423, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07979980050049874, "grad_norm": 0.1316843330860138, "learning_rate": 2e-07, "loss": 0.0134, "step": 855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07989313360049934, "grad_norm": 0.12169570475816727, "learning_rate": 2e-07, "loss": 0.0428, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.07998646670049991, "grad_norm": 0.12846745550632477, "learning_rate": 2e-07, "loss": 0.0183, "step": 857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08007979980050049, "grad_norm": 0.1295788586139679, "learning_rate": 2e-07, "loss": 0.0014, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08017313290050108, "grad_norm": 0.12061750143766403, "learning_rate": 2e-07, "loss": 0.0294, "step": 859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08026646600050166, "grad_norm": 0.12721611559391022, "learning_rate": 2e-07, "loss": 0.0245, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08035979910050225, "grad_norm": 0.1279670000076294, "learning_rate": 2e-07, "loss": 0.0683, "step": 861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08045313220050283, "grad_norm": 0.10878216475248337, "learning_rate": 2e-07, "loss": 0.0068, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08054646530050341, "grad_norm": 0.1190527155995369, "learning_rate": 2e-07, "loss": 0.0346, "step": 863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.080639798400504, "grad_norm": 0.1198086366057396, "learning_rate": 2e-07, "loss": -0.0105, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08073313150050458, "grad_norm": 0.1230640709400177, "learning_rate": 2e-07, "loss": -0.0022, "step": 865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08082646460050516, "grad_norm": 0.12793295085430145, "learning_rate": 2e-07, "loss": 0.0478, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08091979770050575, "grad_norm": 0.1162162646651268, "learning_rate": 2e-07, "loss": 0.0049, "step": 867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08101313080050633, "grad_norm": 0.12349235266447067, "learning_rate": 2e-07, "loss": 0.0411, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08110646390050691, "grad_norm": 0.12162021547555923, "learning_rate": 2e-07, "loss": 0.0037, "step": 869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0811997970005075, "grad_norm": 0.1159018874168396, "learning_rate": 2e-07, "loss": 0.0168, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08129313010050808, "grad_norm": 0.13062871992588043, "learning_rate": 2e-07, "loss": 0.0835, "step": 871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08138646320050867, "grad_norm": 0.11619515717029572, "learning_rate": 2e-07, "loss": -0.0007, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08147979630050925, "grad_norm": 0.12824323773384094, "learning_rate": 2e-07, "loss": 0.0625, "step": 873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08157312940050983, "grad_norm": 0.133867546916008, "learning_rate": 2e-07, "loss": 0.0118, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08166646250051042, "grad_norm": 0.1203899085521698, "learning_rate": 2e-07, "loss": 0.0304, "step": 875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.081759795600511, "grad_norm": 0.11946319788694382, "learning_rate": 2e-07, "loss": 0.0062, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08185312870051158, "grad_norm": 0.12081809341907501, "learning_rate": 2e-07, "loss": 0.0314, "step": 877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08194646180051217, "grad_norm": 0.12231713533401489, "learning_rate": 2e-07, "loss": -0.0148, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08203979490051275, "grad_norm": 0.12683087587356567, "learning_rate": 2e-07, "loss": 0.0375, "step": 879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08213312800051333, "grad_norm": 0.117578886449337, "learning_rate": 2e-07, "loss": 0.0885, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08222646110051392, "grad_norm": 0.13788649439811707, "learning_rate": 2e-07, "loss": 0.0422, "step": 881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0823197942005145, "grad_norm": 0.1260453760623932, "learning_rate": 2e-07, "loss": 0.0614, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08241312730051509, "grad_norm": 0.12063919752836227, "learning_rate": 2e-07, "loss": 0.0148, "step": 883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08250646040051567, "grad_norm": 0.11219226568937302, "learning_rate": 2e-07, "loss": 0.0447, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08259979350051624, "grad_norm": 0.12594319880008698, "learning_rate": 2e-07, "loss": -0.0184, "step": 885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08269312660051684, "grad_norm": 0.1485334038734436, "learning_rate": 2e-07, "loss": 0.0517, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08278645970051741, "grad_norm": 0.11263196915388107, "learning_rate": 2e-07, "loss": 0.0144, "step": 887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08287979280051799, "grad_norm": 0.12246878445148468, "learning_rate": 2e-07, "loss": 0.0342, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08297312590051859, "grad_norm": 0.11180509626865387, "learning_rate": 2e-07, "loss": 0.026, "step": 889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08306645900051916, "grad_norm": 0.12872332334518433, "learning_rate": 2e-07, "loss": 0.0288, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08315979210051976, "grad_norm": 0.12065563350915909, "learning_rate": 2e-07, "loss": 0.0306, "step": 891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08325312520052033, "grad_norm": 0.132719486951828, "learning_rate": 2e-07, "loss": 0.056, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08334645830052091, "grad_norm": 0.1211332380771637, "learning_rate": 2e-07, "loss": 0.0119, "step": 893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0834397914005215, "grad_norm": 0.13359197974205017, "learning_rate": 2e-07, "loss": 0.0638, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08353312450052208, "grad_norm": 0.13562211394309998, "learning_rate": 2e-07, "loss": 0.0588, "step": 895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08362645760052266, "grad_norm": 0.1118512749671936, "learning_rate": 2e-07, "loss": 0.0426, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013480050223214302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 622.762451171875, "completions/mean_terminated_length": 575.30322265625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.08371979070052325, "grad_norm": 0.12406393885612488, "learning_rate": 2e-07, "loss": 0.0198, "num_tokens": 613244209.0, "reward": 0.577340304851532, "reward_std": 0.1989586055278778, "rewards/simpleverify_reward/mean": 0.5773402452468872, "rewards/simpleverify_reward/std": 0.493986576795578, "step": 897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08381312380052383, "grad_norm": 0.11742869764566422, "learning_rate": 2e-07, "loss": 0.0467, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08390645690052441, "grad_norm": 0.12272965908050537, "learning_rate": 2e-07, "loss": 0.0292, "step": 899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.083999790000525, "grad_norm": 0.12145863473415375, "learning_rate": 2e-07, "loss": 0.0382, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08409312310052558, "grad_norm": 0.12996797263622284, "learning_rate": 2e-07, "loss": 0.012, "step": 901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08418645620052617, "grad_norm": 0.12358734011650085, "learning_rate": 2e-07, "loss": 0.0215, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08427978930052675, "grad_norm": 0.1235094666481018, "learning_rate": 2e-07, "loss": 0.0651, "step": 903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08437312240052733, "grad_norm": 0.11705267429351807, "learning_rate": 2e-07, "loss": 0.022, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08446645550052792, "grad_norm": 0.12949518859386444, "learning_rate": 2e-07, "loss": 0.0665, "step": 905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0845597886005285, "grad_norm": 0.11463525891304016, "learning_rate": 2e-07, "loss": 0.0223, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08465312170052908, "grad_norm": 0.12273424863815308, "learning_rate": 2e-07, "loss": 0.0309, "step": 907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08474645480052967, "grad_norm": 0.12761981785297394, "learning_rate": 2e-07, "loss": 0.0209, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08483978790053025, "grad_norm": 0.12849204242229462, "learning_rate": 2e-07, "loss": 0.0329, "step": 909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08493312100053083, "grad_norm": 0.13091443479061127, "learning_rate": 2e-07, "loss": -0.0119, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08502645410053142, "grad_norm": 0.1361619532108307, "learning_rate": 2e-07, "loss": 0.0113, "step": 911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.085119787200532, "grad_norm": 0.11390850692987442, "learning_rate": 2e-07, "loss": 0.0746, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08521312030053259, "grad_norm": 0.11825183033943176, "learning_rate": 2e-07, "loss": 0.0625, "step": 913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08530645340053317, "grad_norm": 0.1295870989561081, "learning_rate": 2e-07, "loss": 0.0284, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08539978650053374, "grad_norm": 0.11985878646373749, "learning_rate": 2e-07, "loss": 0.0665, "step": 915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08549311960053434, "grad_norm": 0.118943952023983, "learning_rate": 2e-07, "loss": 0.0489, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08558645270053492, "grad_norm": 0.1350439190864563, "learning_rate": 2e-07, "loss": 0.0474, "step": 917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0856797858005355, "grad_norm": 0.11932313442230225, "learning_rate": 2e-07, "loss": 0.0262, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08577311890053609, "grad_norm": 0.11024530231952667, "learning_rate": 2e-07, "loss": 0.0309, "step": 919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08586645200053666, "grad_norm": 0.11782639473676682, "learning_rate": 2e-07, "loss": 0.0105, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08595978510053724, "grad_norm": 0.12905670702457428, "learning_rate": 2e-07, "loss": -0.0011, "step": 921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08605311820053783, "grad_norm": 0.1251768171787262, "learning_rate": 2e-07, "loss": 0.0531, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08614645130053841, "grad_norm": 0.11842817068099976, "learning_rate": 2e-07, "loss": 0.0083, "step": 923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.086239784400539, "grad_norm": 0.12237225472927094, "learning_rate": 2e-07, "loss": 0.0174, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08633311750053958, "grad_norm": 0.11277849227190018, "learning_rate": 2e-07, "loss": 0.0261, "step": 925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08642645060054016, "grad_norm": 0.11197086423635483, "learning_rate": 2e-07, "loss": 0.0356, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08651978370054075, "grad_norm": 0.19528403878211975, "learning_rate": 2e-07, "loss": 0.0454, "step": 927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08661311680054133, "grad_norm": 0.12729953229427338, "learning_rate": 2e-07, "loss": 0.0159, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08670644990054191, "grad_norm": 0.12793217599391937, "learning_rate": 2e-07, "loss": 0.046, "step": 929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0867997830005425, "grad_norm": 0.11462729424238205, "learning_rate": 2e-07, "loss": 0.003, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08689311610054308, "grad_norm": 0.11786491423845291, "learning_rate": 2e-07, "loss": 0.0147, "step": 931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08698644920054366, "grad_norm": 0.12702345848083496, "learning_rate": 2e-07, "loss": 0.0343, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08707978230054425, "grad_norm": 0.1180480569601059, "learning_rate": 2e-07, "loss": 0.0417, "step": 933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08717311540054483, "grad_norm": 0.11706763505935669, "learning_rate": 2e-07, "loss": 0.0586, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08726644850054542, "grad_norm": 0.14973102509975433, "learning_rate": 2e-07, "loss": -0.0179, "step": 935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.087359781600546, "grad_norm": 0.12751786410808563, "learning_rate": 2e-07, "loss": -0.0014, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08745311470054658, "grad_norm": 0.12770016491413116, "learning_rate": 2e-07, "loss": 0.0779, "step": 937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08754644780054717, "grad_norm": 0.11583378911018372, "learning_rate": 2e-07, "loss": 0.0166, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08763978090054775, "grad_norm": 0.1234976127743721, "learning_rate": 2e-07, "loss": 0.0623, "step": 939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08773311400054833, "grad_norm": 0.12373754382133484, "learning_rate": 2e-07, "loss": 0.0113, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08782644710054892, "grad_norm": 0.12702524662017822, "learning_rate": 2e-07, "loss": 0.0505, "step": 941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0879197802005495, "grad_norm": 0.12134741246700287, "learning_rate": 2e-07, "loss": 0.027, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08801311330055009, "grad_norm": 0.1295042335987091, "learning_rate": 2e-07, "loss": 0.0766, "step": 943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08810644640055067, "grad_norm": 0.11991428583860397, "learning_rate": 2e-07, "loss": 0.0078, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08819977950055125, "grad_norm": 0.12392047792673111, "learning_rate": 2e-07, "loss": 0.0931, "step": 945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08829311260055184, "grad_norm": 0.13601675629615784, "learning_rate": 2e-07, "loss": 0.017, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08838644570055242, "grad_norm": 0.12515506148338318, "learning_rate": 2e-07, "loss": 0.0152, "step": 947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.088479778800553, "grad_norm": 0.14124850928783417, "learning_rate": 2e-07, "loss": 0.0055, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08857311190055359, "grad_norm": 0.13649940490722656, "learning_rate": 2e-07, "loss": 0.0017, "step": 949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08866644500055416, "grad_norm": 0.12156989425420761, "learning_rate": 2e-07, "loss": -0.0188, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08875977810055474, "grad_norm": 0.1219983845949173, "learning_rate": 2e-07, "loss": 0.0455, "step": 951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08885311120055533, "grad_norm": 0.13010255992412567, "learning_rate": 2e-07, "loss": -0.001, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08894644430055591, "grad_norm": 0.1486521065235138, "learning_rate": 2e-07, "loss": 0.0179, "step": 953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0890397774005565, "grad_norm": 0.12371961772441864, "learning_rate": 2e-07, "loss": 0.0225, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08913311050055708, "grad_norm": 0.10351689904928207, "learning_rate": 2e-07, "loss": 0.0148, "step": 955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08922644360055766, "grad_norm": 0.14067280292510986, "learning_rate": 2e-07, "loss": 0.0402, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08931977670055825, "grad_norm": 0.12496519088745117, "learning_rate": 2e-07, "loss": -0.0029, "step": 957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08941310980055883, "grad_norm": 0.12028787285089493, "learning_rate": 2e-07, "loss": 0.0401, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08950644290055941, "grad_norm": 0.11538785696029663, "learning_rate": 2e-07, "loss": 0.0085, "step": 959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08959977600056, "grad_norm": 0.121493399143219, "learning_rate": 2e-07, "loss": 0.0532, "step": 960 }, { "clip_ratio/high_max": 0.010437682183692232, "clip_ratio/high_mean": 0.0051509564218577, "clip_ratio/low_mean": 0.0035535662100301124, "clip_ratio/low_min": 0.00044791259279008955, "clip_ratio/region_mean": 0.008704522508196533, "completions/clipped_ratio": 0.013218470982142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 613.3426513671875, "completions/mean_terminated_length": 566.6906127929688, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.08969310910056058, "grad_norm": 0.11790748685598373, "learning_rate": 2e-07, "loss": 0.0131, "num_tokens": 654088676.0, "reward": 0.5857980251312256, "reward_std": 0.2002045214176178, "rewards/simpleverify_reward/mean": 0.5857979655265808, "rewards/simpleverify_reward/std": 0.49258795380592346, "step": 961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08978644220056116, "grad_norm": 0.12580302357673645, "learning_rate": 2e-07, "loss": 0.0554, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08987977530056175, "grad_norm": 0.12604328989982605, "learning_rate": 2e-07, "loss": 0.0033, "step": 963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.08997310840056233, "grad_norm": 0.12717796862125397, "learning_rate": 2e-07, "loss": -0.0242, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09006644150056292, "grad_norm": 0.13788248598575592, "learning_rate": 2e-07, "loss": 0.0084, "step": 965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0901597746005635, "grad_norm": 0.11159532517194748, "learning_rate": 2e-07, "loss": 0.0279, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09025310770056408, "grad_norm": 0.13069631159305573, "learning_rate": 2e-07, "loss": 0.0046, "step": 967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09034644080056467, "grad_norm": 0.12297243624925613, "learning_rate": 2e-07, "loss": 0.0487, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09043977390056525, "grad_norm": 0.13381244242191315, "learning_rate": 2e-07, "loss": 0.0543, "step": 969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09053310700056583, "grad_norm": 0.1314571052789688, "learning_rate": 2e-07, "loss": -0.0218, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09062644010056642, "grad_norm": 0.1321249008178711, "learning_rate": 2e-07, "loss": 0.0477, "step": 971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.090719773200567, "grad_norm": 0.12110405415296555, "learning_rate": 2e-07, "loss": 0.0448, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09081310630056758, "grad_norm": 0.11183035373687744, "learning_rate": 2e-07, "loss": 0.0389, "step": 973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09090643940056817, "grad_norm": 0.1100270226597786, "learning_rate": 2e-07, "loss": 0.0172, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09099977250056875, "grad_norm": 0.14608921110630035, "learning_rate": 2e-07, "loss": 0.042, "step": 975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09109310560056934, "grad_norm": 0.24353210628032684, "learning_rate": 2e-07, "loss": 0.0256, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09118643870056992, "grad_norm": 0.13845276832580566, "learning_rate": 2e-07, "loss": 0.0278, "step": 977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0912797718005705, "grad_norm": 0.10552609711885452, "learning_rate": 2e-07, "loss": 0.0217, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09137310490057109, "grad_norm": 0.13302023708820343, "learning_rate": 2e-07, "loss": 0.0148, "step": 979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09146643800057166, "grad_norm": 0.1335231512784958, "learning_rate": 2e-07, "loss": -0.0143, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09155977110057224, "grad_norm": 0.11808809638023376, "learning_rate": 2e-07, "loss": 0.0531, "step": 981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09165310420057284, "grad_norm": 0.12824606895446777, "learning_rate": 2e-07, "loss": 0.0357, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09174643730057341, "grad_norm": 0.1350392997264862, "learning_rate": 2e-07, "loss": 0.0734, "step": 983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09183977040057399, "grad_norm": 0.12262378633022308, "learning_rate": 2e-07, "loss": 0.0402, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09193310350057458, "grad_norm": 0.12261346727609634, "learning_rate": 2e-07, "loss": 0.0163, "step": 985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09202643660057516, "grad_norm": 0.12925438582897186, "learning_rate": 2e-07, "loss": 0.046, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09211976970057575, "grad_norm": 0.1323062628507614, "learning_rate": 2e-07, "loss": 0.0801, "step": 987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09221310280057633, "grad_norm": 0.12085221707820892, "learning_rate": 2e-07, "loss": 0.0439, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09230643590057691, "grad_norm": 0.12385974824428558, "learning_rate": 2e-07, "loss": 0.0125, "step": 989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0923997690005775, "grad_norm": 0.14545592665672302, "learning_rate": 2e-07, "loss": -0.0456, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09249310210057808, "grad_norm": 0.1537473350763321, "learning_rate": 2e-07, "loss": 0.0719, "step": 991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09258643520057866, "grad_norm": 0.13486535847187042, "learning_rate": 2e-07, "loss": -0.0114, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09267976830057925, "grad_norm": 0.1258154660463333, "learning_rate": 2e-07, "loss": 0.0479, "step": 993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09277310140057983, "grad_norm": 0.13114792108535767, "learning_rate": 2e-07, "loss": 0.046, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09286643450058042, "grad_norm": 0.13173554837703705, "learning_rate": 2e-07, "loss": 0.0251, "step": 995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.092959767600581, "grad_norm": 0.13012327253818512, "learning_rate": 2e-07, "loss": -0.0174, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09305310070058158, "grad_norm": 0.12421735376119614, "learning_rate": 2e-07, "loss": 0.0358, "step": 997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09314643380058217, "grad_norm": 0.12075912952423096, "learning_rate": 2e-07, "loss": 0.0116, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09323976690058275, "grad_norm": 0.12936711311340332, "learning_rate": 2e-07, "loss": 0.0216, "step": 999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09333310000058333, "grad_norm": 0.131841242313385, "learning_rate": 2e-07, "loss": 0.0509, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09342643310058392, "grad_norm": 0.12027263641357422, "learning_rate": 2e-07, "loss": 0.0358, "step": 1001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0935197662005845, "grad_norm": 0.13181601464748383, "learning_rate": 2e-07, "loss": -0.0164, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09361309930058508, "grad_norm": 0.11895671486854553, "learning_rate": 2e-07, "loss": 0.0009, "step": 1003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09370643240058567, "grad_norm": 0.12323889881372452, "learning_rate": 2e-07, "loss": 0.0323, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09379976550058625, "grad_norm": 0.13781878352165222, "learning_rate": 2e-07, "loss": 0.0301, "step": 1005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09389309860058684, "grad_norm": 0.12690988183021545, "learning_rate": 2e-07, "loss": 0.0072, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09398643170058742, "grad_norm": 0.14287704229354858, "learning_rate": 2e-07, "loss": 0.0427, "step": 1007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.094079764800588, "grad_norm": 0.12391387671232224, "learning_rate": 2e-07, "loss": 0.0214, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09417309790058859, "grad_norm": 0.1258545070886612, "learning_rate": 2e-07, "loss": 0.0312, "step": 1009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09426643100058917, "grad_norm": 0.13660462200641632, "learning_rate": 2e-07, "loss": 0.0565, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09435976410058974, "grad_norm": 0.1260143518447876, "learning_rate": 2e-07, "loss": 0.0246, "step": 1011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09445309720059034, "grad_norm": 0.12764915823936462, "learning_rate": 2e-07, "loss": 0.0397, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09454643030059091, "grad_norm": 0.1330089122056961, "learning_rate": 2e-07, "loss": 0.025, "step": 1013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09463976340059149, "grad_norm": 0.12474074959754944, "learning_rate": 2e-07, "loss": 0.0636, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09473309650059208, "grad_norm": 0.1148066520690918, "learning_rate": 2e-07, "loss": 0.0544, "step": 1015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09482642960059266, "grad_norm": 0.1243598684668541, "learning_rate": 2e-07, "loss": 0.0361, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09491976270059325, "grad_norm": 0.12512850761413574, "learning_rate": 2e-07, "loss": -0.001, "step": 1017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09501309580059383, "grad_norm": 0.12310733646154404, "learning_rate": 2e-07, "loss": 0.0651, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09510642890059441, "grad_norm": 0.12769833207130432, "learning_rate": 2e-07, "loss": 0.037, "step": 1019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.095199762000595, "grad_norm": 0.13213273882865906, "learning_rate": 2e-07, "loss": 0.0175, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09529309510059558, "grad_norm": 0.11583854258060455, "learning_rate": 2e-07, "loss": 0.0038, "step": 1021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09538642820059616, "grad_norm": 0.1235031858086586, "learning_rate": 2e-07, "loss": 0.0518, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09547976130059675, "grad_norm": 0.12434105575084686, "learning_rate": 2e-07, "loss": 0.0566, "step": 1023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09557309440059733, "grad_norm": 0.11817721277475357, "learning_rate": 2e-07, "loss": 0.0169, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014177594866071397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 622.6942138671875, "completions/mean_terminated_length": 572.742919921875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.09566642750059791, "grad_norm": 0.1316581666469574, "learning_rate": 2e-07, "loss": -0.0141, "num_tokens": 695637763.0, "reward": 0.5737479329109192, "reward_std": 0.19330938160419464, "rewards/simpleverify_reward/mean": 0.5737479329109192, "rewards/simpleverify_reward/std": 0.4945356249809265, "step": 1025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0957597606005985, "grad_norm": 0.12498697638511658, "learning_rate": 2e-07, "loss": 0.0301, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09585309370059908, "grad_norm": 0.13646142184734344, "learning_rate": 2e-07, "loss": 0.0424, "step": 1027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09594642680059967, "grad_norm": 0.12600235641002655, "learning_rate": 2e-07, "loss": -0.058, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09603975990060025, "grad_norm": 0.12201765924692154, "learning_rate": 2e-07, "loss": 0.0038, "step": 1029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09613309300060083, "grad_norm": 0.11507520079612732, "learning_rate": 2e-07, "loss": -0.0047, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09622642610060142, "grad_norm": 0.12242674827575684, "learning_rate": 2e-07, "loss": 0.0513, "step": 1031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.096319759200602, "grad_norm": 0.13105055689811707, "learning_rate": 2e-07, "loss": 0.0651, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09641309230060258, "grad_norm": 0.11185221374034882, "learning_rate": 2e-07, "loss": 0.0484, "step": 1033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09650642540060317, "grad_norm": 0.13074176013469696, "learning_rate": 2e-07, "loss": 0.022, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09659975850060375, "grad_norm": 0.10907220095396042, "learning_rate": 2e-07, "loss": 0.0097, "step": 1035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09669309160060433, "grad_norm": 0.11873073875904083, "learning_rate": 2e-07, "loss": 0.0114, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09678642470060492, "grad_norm": 0.11945661157369614, "learning_rate": 2e-07, "loss": -0.004, "step": 1037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0968797578006055, "grad_norm": 0.1256019026041031, "learning_rate": 2e-07, "loss": 0.0505, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09697309090060609, "grad_norm": 0.13739430904388428, "learning_rate": 2e-07, "loss": 0.0016, "step": 1039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09706642400060667, "grad_norm": 0.12666425108909607, "learning_rate": 2e-07, "loss": 0.0507, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09715975710060724, "grad_norm": 0.11463279277086258, "learning_rate": 2e-07, "loss": 0.0116, "step": 1041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09725309020060784, "grad_norm": 0.12110929936170578, "learning_rate": 2e-07, "loss": 0.0401, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09734642330060841, "grad_norm": 0.1359110325574875, "learning_rate": 2e-07, "loss": 0.0182, "step": 1043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09743975640060899, "grad_norm": 0.11389145255088806, "learning_rate": 2e-07, "loss": 0.0293, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09753308950060958, "grad_norm": 0.1231398656964302, "learning_rate": 2e-07, "loss": -0.0253, "step": 1045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09762642260061016, "grad_norm": 0.17772337794303894, "learning_rate": 2e-07, "loss": 0.0316, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09771975570061076, "grad_norm": 0.11407757550477982, "learning_rate": 2e-07, "loss": 0.0395, "step": 1047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09781308880061133, "grad_norm": 0.1250477433204651, "learning_rate": 2e-07, "loss": 0.0315, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09790642190061191, "grad_norm": 0.1850556880235672, "learning_rate": 2e-07, "loss": 0.0119, "step": 1049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0979997550006125, "grad_norm": 0.1270701289176941, "learning_rate": 2e-07, "loss": -0.0083, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09809308810061308, "grad_norm": 0.1168687492609024, "learning_rate": 2e-07, "loss": 0.0472, "step": 1051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09818642120061366, "grad_norm": 0.12744373083114624, "learning_rate": 2e-07, "loss": 0.0008, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09827975430061425, "grad_norm": 0.1210247203707695, "learning_rate": 2e-07, "loss": 0.0268, "step": 1053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09837308740061483, "grad_norm": 0.12781542539596558, "learning_rate": 2e-07, "loss": 0.0288, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09846642050061541, "grad_norm": 0.14108054339885712, "learning_rate": 2e-07, "loss": 0.0538, "step": 1055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.098559753600616, "grad_norm": 0.11946498602628708, "learning_rate": 2e-07, "loss": 0.025, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09865308670061658, "grad_norm": 0.33612948656082153, "learning_rate": 2e-07, "loss": 0.0307, "step": 1057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09874641980061717, "grad_norm": 0.1263088434934616, "learning_rate": 2e-07, "loss": 0.014, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09883975290061775, "grad_norm": 0.1360570341348648, "learning_rate": 2e-07, "loss": 0.0537, "step": 1059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09893308600061833, "grad_norm": 0.11681803315877914, "learning_rate": 2e-07, "loss": 0.0073, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09902641910061892, "grad_norm": 0.12059196829795837, "learning_rate": 2e-07, "loss": 0.0361, "step": 1061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.0991197522006195, "grad_norm": 0.13508820533752441, "learning_rate": 2e-07, "loss": 0.0639, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09921308530062008, "grad_norm": 0.12632830440998077, "learning_rate": 2e-07, "loss": 0.0638, "step": 1063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09930641840062067, "grad_norm": 0.12242299318313599, "learning_rate": 2e-07, "loss": 0.0208, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09939975150062125, "grad_norm": 0.1329362392425537, "learning_rate": 2e-07, "loss": 0.0094, "step": 1065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09949308460062183, "grad_norm": 0.12316466867923737, "learning_rate": 2e-07, "loss": 0.0322, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09958641770062242, "grad_norm": 0.11811467260122299, "learning_rate": 2e-07, "loss": -0.0181, "step": 1067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.099679750800623, "grad_norm": 0.1136048287153244, "learning_rate": 2e-07, "loss": 0.0292, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09977308390062359, "grad_norm": 0.14453837275505066, "learning_rate": 2e-07, "loss": -0.0128, "step": 1069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09986641700062417, "grad_norm": 0.1360822319984436, "learning_rate": 2e-07, "loss": 0.0367, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.09995975010062474, "grad_norm": 0.12935367226600647, "learning_rate": 2e-07, "loss": 0.0279, "step": 1071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10005308320062534, "grad_norm": 0.12355770915746689, "learning_rate": 2e-07, "loss": 0.0386, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10014641630062592, "grad_norm": 0.1192934438586235, "learning_rate": 2e-07, "loss": 0.0574, "step": 1073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1002397494006265, "grad_norm": 0.129846453666687, "learning_rate": 2e-07, "loss": 0.0484, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10033308250062709, "grad_norm": 0.1394384503364563, "learning_rate": 2e-07, "loss": 0.0356, "step": 1075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10042641560062766, "grad_norm": 0.12558715045452118, "learning_rate": 2e-07, "loss": 0.0689, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10051974870062824, "grad_norm": 0.1378021091222763, "learning_rate": 2e-07, "loss": 0.0504, "step": 1077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10061308180062883, "grad_norm": 0.11771351844072342, "learning_rate": 2e-07, "loss": 0.0605, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10070641490062941, "grad_norm": 0.124101422727108, "learning_rate": 2e-07, "loss": 0.069, "step": 1079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10079974800063, "grad_norm": 0.12010723352432251, "learning_rate": 2e-07, "loss": -0.0035, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10089308110063058, "grad_norm": 0.1290493756532669, "learning_rate": 2e-07, "loss": 0.0495, "step": 1081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10098641420063116, "grad_norm": 0.11392639577388763, "learning_rate": 2e-07, "loss": 0.0006, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10107974730063175, "grad_norm": 0.12000121176242828, "learning_rate": 2e-07, "loss": 0.018, "step": 1083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10117308040063233, "grad_norm": 0.12304224818944931, "learning_rate": 2e-07, "loss": 0.0629, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10126641350063291, "grad_norm": 0.16488148272037506, "learning_rate": 2e-07, "loss": 0.0297, "step": 1085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1013597466006335, "grad_norm": 0.1308477520942688, "learning_rate": 2e-07, "loss": 0.0248, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10145307970063408, "grad_norm": 0.12387493252754211, "learning_rate": 2e-07, "loss": -0.0092, "step": 1087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10154641280063466, "grad_norm": 0.11677070707082748, "learning_rate": 2e-07, "loss": 0.0413, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0135498046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 618.5260620117188, "completions/mean_terminated_length": 570.7597045898438, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.10163974590063525, "grad_norm": 0.129710853099823, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 736832311.0, "reward": 0.5871233344078064, "reward_std": 0.19259746372699738, "rewards/simpleverify_reward/mean": 0.5871233344078064, "rewards/simpleverify_reward/std": 0.4923553168773651, "step": 1089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10173307900063583, "grad_norm": 0.12425310909748077, "learning_rate": 2e-07, "loss": 0.0202, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10182641210063642, "grad_norm": 0.11199373751878738, "learning_rate": 2e-07, "loss": 0.062, "step": 1091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.101919745200637, "grad_norm": 0.1283552348613739, "learning_rate": 2e-07, "loss": 0.0056, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10201307830063758, "grad_norm": 0.12982302904129028, "learning_rate": 2e-07, "loss": 0.0258, "step": 1093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10210641140063817, "grad_norm": 0.1246814951300621, "learning_rate": 2e-07, "loss": 0.0061, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10219974450063875, "grad_norm": 0.12260641157627106, "learning_rate": 2e-07, "loss": 0.0315, "step": 1095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10229307760063933, "grad_norm": 0.12535782158374786, "learning_rate": 2e-07, "loss": 0.0385, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10238641070063992, "grad_norm": 0.11780212819576263, "learning_rate": 2e-07, "loss": 0.0473, "step": 1097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1024797438006405, "grad_norm": 0.12184536457061768, "learning_rate": 2e-07, "loss": 0.0091, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10257307690064109, "grad_norm": 0.16125360131263733, "learning_rate": 2e-07, "loss": 0.0209, "step": 1099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10266641000064167, "grad_norm": 0.129767045378685, "learning_rate": 2e-07, "loss": 0.0204, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10275974310064225, "grad_norm": 0.12149225920438766, "learning_rate": 2e-07, "loss": 0.0449, "step": 1101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10285307620064284, "grad_norm": 0.11585115641355515, "learning_rate": 2e-07, "loss": 0.0016, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10294640930064342, "grad_norm": 0.11527204513549805, "learning_rate": 2e-07, "loss": 0.0237, "step": 1103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.103039742400644, "grad_norm": 0.12590822577476501, "learning_rate": 2e-07, "loss": 0.0478, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10313307550064459, "grad_norm": 0.13409964740276337, "learning_rate": 2e-07, "loss": 0.0078, "step": 1105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10322640860064516, "grad_norm": 0.1281125694513321, "learning_rate": 2e-07, "loss": 0.0423, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10331974170064574, "grad_norm": 0.13257557153701782, "learning_rate": 2e-07, "loss": 0.005, "step": 1107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10341307480064633, "grad_norm": 0.11419656872749329, "learning_rate": 2e-07, "loss": 0.0004, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10350640790064691, "grad_norm": 0.12049297243356705, "learning_rate": 2e-07, "loss": 0.0565, "step": 1109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1035997410006475, "grad_norm": 0.15598013997077942, "learning_rate": 2e-07, "loss": 0.0524, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10369307410064808, "grad_norm": 0.13944809138774872, "learning_rate": 2e-07, "loss": 0.024, "step": 1111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10378640720064866, "grad_norm": 0.12656055390834808, "learning_rate": 2e-07, "loss": 0.0231, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10387974030064925, "grad_norm": 0.14490559697151184, "learning_rate": 2e-07, "loss": 0.0587, "step": 1113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10397307340064983, "grad_norm": 0.1195591539144516, "learning_rate": 2e-07, "loss": 0.0106, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10406640650065041, "grad_norm": 0.1294788271188736, "learning_rate": 2e-07, "loss": 0.0526, "step": 1115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.104159739600651, "grad_norm": 0.13575363159179688, "learning_rate": 2e-07, "loss": 0.0186, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10425307270065158, "grad_norm": 0.1433030515909195, "learning_rate": 2e-07, "loss": 0.0229, "step": 1117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10434640580065216, "grad_norm": 0.1215771734714508, "learning_rate": 2e-07, "loss": -0.0045, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10443973890065275, "grad_norm": 0.12646552920341492, "learning_rate": 2e-07, "loss": 0.0145, "step": 1119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10453307200065333, "grad_norm": 0.1285991221666336, "learning_rate": 2e-07, "loss": 0.0268, "step": 1120 }, { "clip_ratio/high_max": 0.011275653843767941, "clip_ratio/high_mean": 0.00543773511890322, "clip_ratio/low_mean": 0.003624753844633233, "clip_ratio/low_min": 0.0002658166085893754, "clip_ratio/region_mean": 0.00906248899991624, "completions/clipped_ratio": 0.013636997767857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 621.5830078125, "completions/mean_terminated_length": 573.54736328125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.10462640510065392, "grad_norm": 0.12258480489253998, "learning_rate": 2e-07, "loss": 0.0079, "num_tokens": 778202160.0, "reward": 0.5866001844406128, "reward_std": 0.19235768914222717, "rewards/simpleverify_reward/mean": 0.5866001844406128, "rewards/simpleverify_reward/std": 0.4924476146697998, "step": 1121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1047197382006545, "grad_norm": 0.12275806814432144, "learning_rate": 2e-07, "loss": 0.0424, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10481307130065508, "grad_norm": 0.1303173154592514, "learning_rate": 2e-07, "loss": 0.0309, "step": 1123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10490640440065567, "grad_norm": 0.12321112304925919, "learning_rate": 2e-07, "loss": -0.0192, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10499973750065625, "grad_norm": 0.1279294639825821, "learning_rate": 2e-07, "loss": -0.0149, "step": 1125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10509307060065683, "grad_norm": 0.12685002386569977, "learning_rate": 2e-07, "loss": 0.0356, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10518640370065742, "grad_norm": 0.13565027713775635, "learning_rate": 2e-07, "loss": -0.0219, "step": 1127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.105279736800658, "grad_norm": 0.11720327287912369, "learning_rate": 2e-07, "loss": 0.028, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10537306990065858, "grad_norm": 0.1343325823545456, "learning_rate": 2e-07, "loss": 0.0607, "step": 1129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10546640300065917, "grad_norm": 0.12469281256198883, "learning_rate": 2e-07, "loss": -0.0125, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10555973610065975, "grad_norm": 0.12600409984588623, "learning_rate": 2e-07, "loss": -0.0212, "step": 1131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10565306920066034, "grad_norm": 0.12666453421115875, "learning_rate": 2e-07, "loss": 0.042, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10574640230066092, "grad_norm": 0.12581267952919006, "learning_rate": 2e-07, "loss": 0.0004, "step": 1133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1058397354006615, "grad_norm": 0.11753392219543457, "learning_rate": 2e-07, "loss": 0.0252, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10593306850066209, "grad_norm": 0.12723000347614288, "learning_rate": 2e-07, "loss": 0.0637, "step": 1135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10602640160066266, "grad_norm": 0.12447874993085861, "learning_rate": 2e-07, "loss": 0.0335, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10611973470066324, "grad_norm": 0.14147357642650604, "learning_rate": 2e-07, "loss": 0.0608, "step": 1137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10621306780066384, "grad_norm": 0.13559502363204956, "learning_rate": 2e-07, "loss": 0.0269, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10630640090066441, "grad_norm": 0.10702571272850037, "learning_rate": 2e-07, "loss": 0.0123, "step": 1139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10639973400066499, "grad_norm": 0.12850673496723175, "learning_rate": 2e-07, "loss": 0.0162, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10649306710066558, "grad_norm": 0.11446702480316162, "learning_rate": 2e-07, "loss": 0.005, "step": 1141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10658640020066616, "grad_norm": 0.12650370597839355, "learning_rate": 2e-07, "loss": 0.002, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10667973330066675, "grad_norm": 0.1344020813703537, "learning_rate": 2e-07, "loss": -0.0006, "step": 1143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10677306640066733, "grad_norm": 0.12671393156051636, "learning_rate": 2e-07, "loss": 0.007, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10686639950066791, "grad_norm": 0.12316898256540298, "learning_rate": 2e-07, "loss": 0.0167, "step": 1145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1069597326006685, "grad_norm": 0.12120793759822845, "learning_rate": 2e-07, "loss": 0.0165, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10705306570066908, "grad_norm": 0.12227485328912735, "learning_rate": 2e-07, "loss": 0.014, "step": 1147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10714639880066966, "grad_norm": 0.1323586404323578, "learning_rate": 2e-07, "loss": 0.0436, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10723973190067025, "grad_norm": 0.12394601851701736, "learning_rate": 2e-07, "loss": 0.0321, "step": 1149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10733306500067083, "grad_norm": 0.12051504105329514, "learning_rate": 2e-07, "loss": 0.0496, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10742639810067142, "grad_norm": 0.12830033898353577, "learning_rate": 2e-07, "loss": 0.0517, "step": 1151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.107519731200672, "grad_norm": 0.1275833398103714, "learning_rate": 2e-07, "loss": -0.0025, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10761306430067258, "grad_norm": 0.12625139951705933, "learning_rate": 2e-07, "loss": 0.0164, "step": 1153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10770639740067317, "grad_norm": 0.12693054974079132, "learning_rate": 2e-07, "loss": 0.0518, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10779973050067375, "grad_norm": 0.12615393102169037, "learning_rate": 2e-07, "loss": 0.0444, "step": 1155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10789306360067433, "grad_norm": 0.13960233330726624, "learning_rate": 2e-07, "loss": -0.0057, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10798639670067492, "grad_norm": 0.14542271196842194, "learning_rate": 2e-07, "loss": 0.0071, "step": 1157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1080797298006755, "grad_norm": 0.1156541183590889, "learning_rate": 2e-07, "loss": 0.0337, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10817306290067608, "grad_norm": 0.1293029934167862, "learning_rate": 2e-07, "loss": 0.0044, "step": 1159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10826639600067667, "grad_norm": 0.13470028340816498, "learning_rate": 2e-07, "loss": 0.0357, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10835972910067725, "grad_norm": 0.11810430884361267, "learning_rate": 2e-07, "loss": 0.0222, "step": 1161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10845306220067784, "grad_norm": 0.12567394971847534, "learning_rate": 2e-07, "loss": 0.0182, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10854639530067842, "grad_norm": 0.12532854080200195, "learning_rate": 2e-07, "loss": 0.0081, "step": 1163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.108639728400679, "grad_norm": 0.12335190176963806, "learning_rate": 2e-07, "loss": 0.0085, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10873306150067959, "grad_norm": 0.11937543749809265, "learning_rate": 2e-07, "loss": 0.028, "step": 1165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10882639460068017, "grad_norm": 0.1282988339662552, "learning_rate": 2e-07, "loss": 0.0289, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10891972770068074, "grad_norm": 0.11780912429094315, "learning_rate": 2e-07, "loss": 0.0319, "step": 1167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10901306080068134, "grad_norm": 0.13476280868053436, "learning_rate": 2e-07, "loss": 0.0439, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10910639390068191, "grad_norm": 0.1252293437719345, "learning_rate": 2e-07, "loss": 0.0191, "step": 1169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10919972700068249, "grad_norm": 0.11742155253887177, "learning_rate": 2e-07, "loss": 0.0373, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10929306010068308, "grad_norm": 0.11951114982366562, "learning_rate": 2e-07, "loss": 0.0836, "step": 1171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10938639320068366, "grad_norm": 0.12027372419834137, "learning_rate": 2e-07, "loss": 0.0779, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10947972630068425, "grad_norm": 0.12384378910064697, "learning_rate": 2e-07, "loss": 0.0222, "step": 1173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10957305940068483, "grad_norm": 0.12592965364456177, "learning_rate": 2e-07, "loss": 0.037, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10966639250068541, "grad_norm": 0.14081540703773499, "learning_rate": 2e-07, "loss": 0.1091, "step": 1175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.109759725600686, "grad_norm": 0.12784026563167572, "learning_rate": 2e-07, "loss": 0.0305, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10985305870068658, "grad_norm": 0.1324957311153412, "learning_rate": 2e-07, "loss": -0.0032, "step": 1177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.10994639180068716, "grad_norm": 0.29785025119781494, "learning_rate": 2e-07, "loss": 0.0256, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11003972490068775, "grad_norm": 0.1288004070520401, "learning_rate": 2e-07, "loss": 0.0335, "step": 1179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11013305800068833, "grad_norm": 0.1405888944864273, "learning_rate": 2e-07, "loss": 0.0984, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11022639110068891, "grad_norm": 0.11853551864624023, "learning_rate": 2e-07, "loss": 0.0486, "step": 1181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1103197242006895, "grad_norm": 0.12615849077701569, "learning_rate": 2e-07, "loss": 0.0701, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11041305730069008, "grad_norm": 0.12660016119480133, "learning_rate": 2e-07, "loss": 0.022, "step": 1183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11050639040069067, "grad_norm": 0.13127849996089935, "learning_rate": 2e-07, "loss": 0.0544, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0126953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 608.6767578125, "completions/mean_terminated_length": 563.8348388671875, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.11059972350069125, "grad_norm": 0.13055351376533508, "learning_rate": 2e-07, "loss": -0.0166, "num_tokens": 818799714.0, "reward": 0.5965750813484192, "reward_std": 0.18743781745433807, "rewards/simpleverify_reward/mean": 0.5965750813484192, "rewards/simpleverify_reward/std": 0.49058884382247925, "step": 1185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11069305660069183, "grad_norm": 0.12696699798107147, "learning_rate": 2e-07, "loss": 0.0274, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11078638970069242, "grad_norm": 0.1355103999376297, "learning_rate": 2e-07, "loss": -0.0022, "step": 1187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.110879722800693, "grad_norm": 0.12477428466081619, "learning_rate": 2e-07, "loss": -0.0208, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11097305590069358, "grad_norm": 0.12387882173061371, "learning_rate": 2e-07, "loss": 0.0336, "step": 1189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11106638900069417, "grad_norm": 0.12007330358028412, "learning_rate": 2e-07, "loss": 0.0144, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11115972210069475, "grad_norm": 0.12894201278686523, "learning_rate": 2e-07, "loss": -0.0123, "step": 1191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11125305520069532, "grad_norm": 0.11981654167175293, "learning_rate": 2e-07, "loss": 0.0037, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11134638830069592, "grad_norm": 0.1369446963071823, "learning_rate": 2e-07, "loss": 0.0503, "step": 1193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1114397214006965, "grad_norm": 0.12924689054489136, "learning_rate": 2e-07, "loss": 0.038, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11153305450069709, "grad_norm": 0.116915762424469, "learning_rate": 2e-07, "loss": 0.1079, "step": 1195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11162638760069767, "grad_norm": 0.11777377128601074, "learning_rate": 2e-07, "loss": -0.0137, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11171972070069824, "grad_norm": 0.1275106519460678, "learning_rate": 2e-07, "loss": 0.0412, "step": 1197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11181305380069884, "grad_norm": 0.13204318284988403, "learning_rate": 2e-07, "loss": 0.0141, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11190638690069941, "grad_norm": 0.1237158551812172, "learning_rate": 2e-07, "loss": 0.0397, "step": 1199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11199972000069999, "grad_norm": 0.13569581508636475, "learning_rate": 2e-07, "loss": 0.044, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11209305310070058, "grad_norm": 0.11376943439245224, "learning_rate": 2e-07, "loss": 0.0012, "step": 1201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11218638620070116, "grad_norm": 0.12897680699825287, "learning_rate": 2e-07, "loss": 0.0278, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11227971930070174, "grad_norm": 0.12899047136306763, "learning_rate": 2e-07, "loss": 0.0144, "step": 1203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11237305240070233, "grad_norm": 0.14368315041065216, "learning_rate": 2e-07, "loss": 0.0578, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11246638550070291, "grad_norm": 0.12675721943378448, "learning_rate": 2e-07, "loss": 0.0301, "step": 1205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1125597186007035, "grad_norm": 0.14048494398593903, "learning_rate": 2e-07, "loss": 0.0417, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11265305170070408, "grad_norm": 0.1362312138080597, "learning_rate": 2e-07, "loss": 0.1037, "step": 1207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11274638480070466, "grad_norm": 0.12211427837610245, "learning_rate": 2e-07, "loss": 0.019, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11283971790070525, "grad_norm": 0.13619595766067505, "learning_rate": 2e-07, "loss": -0.0398, "step": 1209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11293305100070583, "grad_norm": 0.11813727021217346, "learning_rate": 2e-07, "loss": 0.0109, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11302638410070641, "grad_norm": 0.17518094182014465, "learning_rate": 2e-07, "loss": 0.0258, "step": 1211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.113119717200707, "grad_norm": 0.1293713003396988, "learning_rate": 2e-07, "loss": 0.0342, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11321305030070758, "grad_norm": 0.12582015991210938, "learning_rate": 2e-07, "loss": 0.0084, "step": 1213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11330638340070817, "grad_norm": 0.13867539167404175, "learning_rate": 2e-07, "loss": 0.0422, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11339971650070875, "grad_norm": 0.13374628126621246, "learning_rate": 2e-07, "loss": 0.0642, "step": 1215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11349304960070933, "grad_norm": 0.12595230340957642, "learning_rate": 2e-07, "loss": 0.0163, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11358638270070992, "grad_norm": 0.1340281218290329, "learning_rate": 2e-07, "loss": -0.005, "step": 1217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1136797158007105, "grad_norm": 0.12060722708702087, "learning_rate": 2e-07, "loss": 0.0036, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11377304890071108, "grad_norm": 0.12858417630195618, "learning_rate": 2e-07, "loss": 0.0018, "step": 1219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11386638200071167, "grad_norm": 0.13602882623672485, "learning_rate": 2e-07, "loss": 0.0213, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11395971510071225, "grad_norm": 0.13078676164150238, "learning_rate": 2e-07, "loss": 0.0145, "step": 1221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11405304820071283, "grad_norm": 0.1232333853840828, "learning_rate": 2e-07, "loss": 0.0592, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11414638130071342, "grad_norm": 0.10972514748573303, "learning_rate": 2e-07, "loss": 0.0369, "step": 1223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.114239714400714, "grad_norm": 0.1329655796289444, "learning_rate": 2e-07, "loss": 0.0343, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11433304750071459, "grad_norm": 0.1647261679172516, "learning_rate": 2e-07, "loss": 0.0282, "step": 1225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11442638060071517, "grad_norm": 0.23968738317489624, "learning_rate": 2e-07, "loss": 0.0271, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11451971370071574, "grad_norm": 0.12331417948007584, "learning_rate": 2e-07, "loss": 0.0584, "step": 1227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11461304680071634, "grad_norm": 0.12272704392671585, "learning_rate": 2e-07, "loss": 0.0427, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11470637990071691, "grad_norm": 0.12608398497104645, "learning_rate": 2e-07, "loss": 0.015, "step": 1229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1147997130007175, "grad_norm": 0.12190182507038116, "learning_rate": 2e-07, "loss": 0.07, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11489304610071809, "grad_norm": 0.1147938147187233, "learning_rate": 2e-07, "loss": 0.0108, "step": 1231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11498637920071866, "grad_norm": 0.11709746718406677, "learning_rate": 2e-07, "loss": -0.0009, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11507971230071924, "grad_norm": 0.1322403997182846, "learning_rate": 2e-07, "loss": 0.0431, "step": 1233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11517304540071983, "grad_norm": 0.1292407363653183, "learning_rate": 2e-07, "loss": 0.0141, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11526637850072041, "grad_norm": 0.2323407083749771, "learning_rate": 2e-07, "loss": 0.0406, "step": 1235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.115359711600721, "grad_norm": 0.13938459753990173, "learning_rate": 2e-07, "loss": -0.0354, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11545304470072158, "grad_norm": 0.12293282151222229, "learning_rate": 2e-07, "loss": 0.0237, "step": 1237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11554637780072216, "grad_norm": 0.1341751217842102, "learning_rate": 2e-07, "loss": 0.0267, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11563971090072275, "grad_norm": 0.12490011006593704, "learning_rate": 2e-07, "loss": 0.045, "step": 1239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11573304400072333, "grad_norm": 0.1277049034833908, "learning_rate": 2e-07, "loss": 0.0421, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11582637710072391, "grad_norm": 0.1135387122631073, "learning_rate": 2e-07, "loss": 0.081, "step": 1241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1159197102007245, "grad_norm": 0.11486272513866425, "learning_rate": 2e-07, "loss": 0.0155, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11601304330072508, "grad_norm": 0.13372312486171722, "learning_rate": 2e-07, "loss": 0.0285, "step": 1243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11610637640072566, "grad_norm": 0.1335843801498413, "learning_rate": 2e-07, "loss": 0.0242, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11619970950072625, "grad_norm": 0.1441110223531723, "learning_rate": 2e-07, "loss": 0.0589, "step": 1245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11629304260072683, "grad_norm": 0.13124550879001617, "learning_rate": 2e-07, "loss": 0.0586, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11638637570072742, "grad_norm": 0.12812970578670502, "learning_rate": 2e-07, "loss": 0.0505, "step": 1247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.116479708800728, "grad_norm": 0.14105205237865448, "learning_rate": 2e-07, "loss": 0.0411, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011858258928571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 608.3168334960938, "completions/mean_terminated_length": 566.462646484375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.11657304190072858, "grad_norm": 0.12530314922332764, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 859315523.0, "reward": 0.5994175672531128, "reward_std": 0.1901005059480667, "rewards/simpleverify_reward/mean": 0.5994175672531128, "rewards/simpleverify_reward/std": 0.4900206923484802, "step": 1249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11666637500072917, "grad_norm": 0.13039752840995789, "learning_rate": 2e-07, "loss": 0.001, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11675970810072975, "grad_norm": 0.14744889736175537, "learning_rate": 2e-07, "loss": 0.0467, "step": 1251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11685304120073033, "grad_norm": 0.14962077140808105, "learning_rate": 2e-07, "loss": 0.0156, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11694637430073092, "grad_norm": 0.1135014146566391, "learning_rate": 2e-07, "loss": 0.0235, "step": 1253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1170397074007315, "grad_norm": 0.12785571813583374, "learning_rate": 2e-07, "loss": -0.0171, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11713304050073207, "grad_norm": 0.1328587681055069, "learning_rate": 2e-07, "loss": 0.0297, "step": 1255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11722637360073267, "grad_norm": 0.12974928319454193, "learning_rate": 2e-07, "loss": 0.0488, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11731970670073325, "grad_norm": 0.13236884772777557, "learning_rate": 2e-07, "loss": -0.0041, "step": 1257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11741303980073384, "grad_norm": 0.12275372445583344, "learning_rate": 2e-07, "loss": 0.0284, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11750637290073442, "grad_norm": 0.12199831753969193, "learning_rate": 2e-07, "loss": 0.0517, "step": 1259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.117599706000735, "grad_norm": 0.10939118266105652, "learning_rate": 2e-07, "loss": 0.0514, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11769303910073559, "grad_norm": 0.1254461705684662, "learning_rate": 2e-07, "loss": 0.0361, "step": 1261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11778637220073616, "grad_norm": 0.14978745579719543, "learning_rate": 2e-07, "loss": 0.0263, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11787970530073674, "grad_norm": 0.13184259831905365, "learning_rate": 2e-07, "loss": -0.0347, "step": 1263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11797303840073733, "grad_norm": 0.1274329572916031, "learning_rate": 2e-07, "loss": 0.0283, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11806637150073791, "grad_norm": 0.12235521525144577, "learning_rate": 2e-07, "loss": 0.0188, "step": 1265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1181597046007385, "grad_norm": 0.12771612405776978, "learning_rate": 2e-07, "loss": 0.0374, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11825303770073908, "grad_norm": 0.14621379971504211, "learning_rate": 2e-07, "loss": 0.0711, "step": 1267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11834637080073966, "grad_norm": 0.12454205751419067, "learning_rate": 2e-07, "loss": 0.0449, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11843970390074025, "grad_norm": 0.12909890711307526, "learning_rate": 2e-07, "loss": 0.0406, "step": 1269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11853303700074083, "grad_norm": 0.1296859085559845, "learning_rate": 2e-07, "loss": 0.0226, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11862637010074141, "grad_norm": 0.12741021811962128, "learning_rate": 2e-07, "loss": 0.0523, "step": 1271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.118719703200742, "grad_norm": 0.1277891993522644, "learning_rate": 2e-07, "loss": -0.017, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11881303630074258, "grad_norm": 0.13933908939361572, "learning_rate": 2e-07, "loss": 0.0436, "step": 1273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11890636940074316, "grad_norm": 0.12748397886753082, "learning_rate": 2e-07, "loss": 0.0197, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11899970250074375, "grad_norm": 0.1312733292579651, "learning_rate": 2e-07, "loss": 0.1026, "step": 1275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11909303560074433, "grad_norm": 0.19052410125732422, "learning_rate": 2e-07, "loss": 0.0519, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11918636870074492, "grad_norm": 0.1322513371706009, "learning_rate": 2e-07, "loss": -0.0025, "step": 1277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1192797018007455, "grad_norm": 0.12174084037542343, "learning_rate": 2e-07, "loss": 0.0332, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11937303490074608, "grad_norm": 0.12063100934028625, "learning_rate": 2e-07, "loss": 0.0359, "step": 1279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11946636800074667, "grad_norm": 0.1366986632347107, "learning_rate": 2e-07, "loss": 0.0171, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11955970110074725, "grad_norm": 0.13286036252975464, "learning_rate": 2e-07, "loss": 0.0453, "step": 1281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11965303420074783, "grad_norm": 0.12996463477611542, "learning_rate": 2e-07, "loss": 0.0296, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11974636730074842, "grad_norm": 0.13416169583797455, "learning_rate": 2e-07, "loss": 0.0216, "step": 1283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.119839700400749, "grad_norm": 0.12199593335390091, "learning_rate": 2e-07, "loss": 0.0118, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.11993303350074958, "grad_norm": 0.12001723796129227, "learning_rate": 2e-07, "loss": 0.0646, "step": 1285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12002636660075017, "grad_norm": 0.12678152322769165, "learning_rate": 2e-07, "loss": -0.0011, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12011969970075075, "grad_norm": 0.12558075785636902, "learning_rate": 2e-07, "loss": 0.0131, "step": 1287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12021303280075134, "grad_norm": 0.13155266642570496, "learning_rate": 2e-07, "loss": 0.0163, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12030636590075192, "grad_norm": 0.13256101310253143, "learning_rate": 2e-07, "loss": 0.0163, "step": 1289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1203996990007525, "grad_norm": 0.15142793953418732, "learning_rate": 2e-07, "loss": 0.0032, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12049303210075309, "grad_norm": 0.13562975823879242, "learning_rate": 2e-07, "loss": 0.0168, "step": 1291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12058636520075366, "grad_norm": 0.1157451644539833, "learning_rate": 2e-07, "loss": 0.0323, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12067969830075424, "grad_norm": 0.12735344469547272, "learning_rate": 2e-07, "loss": 0.0513, "step": 1293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12077303140075484, "grad_norm": 0.1280812919139862, "learning_rate": 2e-07, "loss": 0.0328, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12086636450075541, "grad_norm": 0.142857164144516, "learning_rate": 2e-07, "loss": 0.0534, "step": 1295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12095969760075599, "grad_norm": 0.13524094223976135, "learning_rate": 2e-07, "loss": 0.0192, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12105303070075658, "grad_norm": 0.13018885254859924, "learning_rate": 2e-07, "loss": 0.052, "step": 1297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12114636380075716, "grad_norm": 0.12639300525188446, "learning_rate": 2e-07, "loss": -0.0129, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12123969690075775, "grad_norm": 0.13235361874103546, "learning_rate": 2e-07, "loss": -0.0366, "step": 1299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12133303000075833, "grad_norm": 0.14917443692684174, "learning_rate": 2e-07, "loss": 0.0161, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12142636310075891, "grad_norm": 0.14884494245052338, "learning_rate": 2e-07, "loss": 0.0409, "step": 1301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1215196962007595, "grad_norm": 0.15109430253505707, "learning_rate": 2e-07, "loss": 0.0272, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12161302930076008, "grad_norm": 0.13050200045108795, "learning_rate": 2e-07, "loss": 0.0099, "step": 1303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12170636240076066, "grad_norm": 0.11470898985862732, "learning_rate": 2e-07, "loss": 0.0107, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12179969550076125, "grad_norm": 0.14495515823364258, "learning_rate": 2e-07, "loss": 0.0504, "step": 1305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12189302860076183, "grad_norm": 0.1229834333062172, "learning_rate": 2e-07, "loss": -0.014, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12198636170076241, "grad_norm": 0.1366640031337738, "learning_rate": 2e-07, "loss": 0.0114, "step": 1307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.122079694800763, "grad_norm": 0.13780227303504944, "learning_rate": 2e-07, "loss": 0.0231, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12217302790076358, "grad_norm": 0.1299963891506195, "learning_rate": 2e-07, "loss": -0.0283, "step": 1309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12226636100076417, "grad_norm": 0.12133779376745224, "learning_rate": 2e-07, "loss": -0.0305, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12235969410076475, "grad_norm": 0.12540125846862793, "learning_rate": 2e-07, "loss": 0.0219, "step": 1311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12245302720076533, "grad_norm": 0.13978445529937744, "learning_rate": 2e-07, "loss": 0.0207, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012555803571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 607.9358520507812, "completions/mean_terminated_length": 563.5834350585938, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.12254636030076592, "grad_norm": 0.11809849739074707, "learning_rate": 2e-07, "loss": 0.0165, "num_tokens": 899854737.0, "reward": 0.5961216688156128, "reward_std": 0.19023947417736053, "rewards/simpleverify_reward/mean": 0.5961216688156128, "rewards/simpleverify_reward/std": 0.49067792296409607, "step": 1313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1226396934007665, "grad_norm": 0.12058080732822418, "learning_rate": 2e-07, "loss": 0.0601, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12273302650076708, "grad_norm": 0.125620037317276, "learning_rate": 2e-07, "loss": 0.016, "step": 1315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12282635960076767, "grad_norm": 0.12974177300930023, "learning_rate": 2e-07, "loss": 0.0512, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12291969270076825, "grad_norm": 0.1465127170085907, "learning_rate": 2e-07, "loss": 0.0096, "step": 1317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12301302580076884, "grad_norm": 0.14485350251197815, "learning_rate": 2e-07, "loss": -0.0101, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12310635890076942, "grad_norm": 0.1286245733499527, "learning_rate": 2e-07, "loss": 0.0203, "step": 1319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12319969200077, "grad_norm": 0.12529346346855164, "learning_rate": 2e-07, "loss": 0.0662, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12329302510077059, "grad_norm": 0.1323009878396988, "learning_rate": 2e-07, "loss": -0.0042, "step": 1321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12338635820077117, "grad_norm": 0.11834096163511276, "learning_rate": 2e-07, "loss": 0.0508, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12347969130077174, "grad_norm": 0.12331033498048782, "learning_rate": 2e-07, "loss": 0.0369, "step": 1323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12357302440077234, "grad_norm": 0.12092196941375732, "learning_rate": 2e-07, "loss": 0.0079, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12366635750077291, "grad_norm": 0.13256673514842987, "learning_rate": 2e-07, "loss": 0.0218, "step": 1325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12375969060077349, "grad_norm": 0.13517709076404572, "learning_rate": 2e-07, "loss": 0.1028, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12385302370077408, "grad_norm": 0.12170856446027756, "learning_rate": 2e-07, "loss": 0.0972, "step": 1327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12394635680077466, "grad_norm": 0.12465059012174606, "learning_rate": 2e-07, "loss": 0.0062, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12403968990077525, "grad_norm": 0.13149112462997437, "learning_rate": 2e-07, "loss": 0.0087, "step": 1329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12413302300077583, "grad_norm": 0.11944835633039474, "learning_rate": 2e-07, "loss": 0.015, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12422635610077641, "grad_norm": 0.12846983969211578, "learning_rate": 2e-07, "loss": -0.0237, "step": 1331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.124319689200777, "grad_norm": 0.12928855419158936, "learning_rate": 2e-07, "loss": -0.0063, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12441302230077758, "grad_norm": 0.12158067524433136, "learning_rate": 2e-07, "loss": 0.0373, "step": 1333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12450635540077816, "grad_norm": 0.1204054057598114, "learning_rate": 2e-07, "loss": -0.0123, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12459968850077875, "grad_norm": 0.12858541309833527, "learning_rate": 2e-07, "loss": 0.047, "step": 1335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12469302160077933, "grad_norm": 0.12837396562099457, "learning_rate": 2e-07, "loss": 0.0171, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12478635470077991, "grad_norm": 0.1236601248383522, "learning_rate": 2e-07, "loss": 0.0411, "step": 1337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1248796878007805, "grad_norm": 0.133812814950943, "learning_rate": 2e-07, "loss": 0.0218, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12497302090078108, "grad_norm": 0.12431017309427261, "learning_rate": 2e-07, "loss": 0.0609, "step": 1339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12506635400078167, "grad_norm": 0.1297278255224228, "learning_rate": 2e-07, "loss": 0.0609, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12515968710078224, "grad_norm": 0.13260437548160553, "learning_rate": 2e-07, "loss": 0.023, "step": 1341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12525302020078283, "grad_norm": 0.13998012244701385, "learning_rate": 2e-07, "loss": 0.0367, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12534635330078342, "grad_norm": 0.13109180331230164, "learning_rate": 2e-07, "loss": 0.0317, "step": 1343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.125439686400784, "grad_norm": 0.1368030607700348, "learning_rate": 2e-07, "loss": 0.0573, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12553301950078458, "grad_norm": 0.13046666979789734, "learning_rate": 2e-07, "loss": 0.0615, "step": 1345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12562635260078517, "grad_norm": 0.12309705466032028, "learning_rate": 2e-07, "loss": 0.013, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12571968570078576, "grad_norm": 0.14114674925804138, "learning_rate": 2e-07, "loss": 0.0253, "step": 1347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12581301880078632, "grad_norm": 0.1223163977265358, "learning_rate": 2e-07, "loss": 0.0312, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12590635190078692, "grad_norm": 0.1385715752840042, "learning_rate": 2e-07, "loss": -0.0011, "step": 1349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1259996850007875, "grad_norm": 0.12719270586967468, "learning_rate": 2e-07, "loss": -0.0216, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12609301810078807, "grad_norm": 0.1360722929239273, "learning_rate": 2e-07, "loss": 0.0301, "step": 1351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12618635120078867, "grad_norm": 0.1249774917960167, "learning_rate": 2e-07, "loss": 0.0363, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12627968430078926, "grad_norm": 0.12800733745098114, "learning_rate": 2e-07, "loss": 0.0119, "step": 1353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12637301740078982, "grad_norm": 0.17600104212760925, "learning_rate": 2e-07, "loss": 0.0151, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12646635050079041, "grad_norm": 0.15301010012626648, "learning_rate": 2e-07, "loss": 0.0049, "step": 1355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.126559683600791, "grad_norm": 0.1212579682469368, "learning_rate": 2e-07, "loss": 0.0208, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12665301670079157, "grad_norm": 0.13823603093624115, "learning_rate": 2e-07, "loss": -0.0217, "step": 1357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12674634980079216, "grad_norm": 0.12493650615215302, "learning_rate": 2e-07, "loss": 0.005, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12683968290079276, "grad_norm": 0.13408571481704712, "learning_rate": 2e-07, "loss": 0.068, "step": 1359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12693301600079332, "grad_norm": 0.14997972548007965, "learning_rate": 2e-07, "loss": 0.0373, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1270263491007939, "grad_norm": 0.13704782724380493, "learning_rate": 2e-07, "loss": 0.0693, "step": 1361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1271196822007945, "grad_norm": 0.12894350290298462, "learning_rate": 2e-07, "loss": 0.0351, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12721301530079507, "grad_norm": 0.14157943427562714, "learning_rate": 2e-07, "loss": 0.022, "step": 1363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12730634840079566, "grad_norm": 0.13020886480808258, "learning_rate": 2e-07, "loss": 0.0376, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12739968150079625, "grad_norm": 0.11284741759300232, "learning_rate": 2e-07, "loss": 0.049, "step": 1365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12749301460079684, "grad_norm": 0.12828324735164642, "learning_rate": 2e-07, "loss": 0.0226, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1275863477007974, "grad_norm": 0.14629149436950684, "learning_rate": 2e-07, "loss": 0.0188, "step": 1367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.127679680800798, "grad_norm": 0.12480209767818451, "learning_rate": 2e-07, "loss": -0.0348, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1277730139007986, "grad_norm": 0.14701150357723236, "learning_rate": 2e-07, "loss": 0.0068, "step": 1369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12786634700079916, "grad_norm": 0.12233898043632507, "learning_rate": 2e-07, "loss": -0.0118, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12795968010079975, "grad_norm": 0.13516168296337128, "learning_rate": 2e-07, "loss": 0.0266, "step": 1371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12805301320080034, "grad_norm": 0.14430968463420868, "learning_rate": 2e-07, "loss": 0.0487, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1281463463008009, "grad_norm": 0.12676605582237244, "learning_rate": 2e-07, "loss": 0.0945, "step": 1373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1282396794008015, "grad_norm": 0.1392187625169754, "learning_rate": 2e-07, "loss": 0.0426, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1283330125008021, "grad_norm": 0.12715740501880646, "learning_rate": 2e-07, "loss": 0.0379, "step": 1375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12842634560080265, "grad_norm": 0.13816578686237335, "learning_rate": 2e-07, "loss": 0.0257, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013044084821428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 614.19140625, "completions/mean_terminated_length": 568.1741333007812, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.12851967870080325, "grad_norm": 0.12122588604688644, "learning_rate": 2e-07, "loss": 0.0611, "num_tokens": 940763185.0, "reward": 0.5921630859375, "reward_std": 0.18556271493434906, "rewards/simpleverify_reward/mean": 0.5921630859375, "rewards/simpleverify_reward/std": 0.49143680930137634, "step": 1377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12861301180080384, "grad_norm": 0.1205112412571907, "learning_rate": 2e-07, "loss": 0.0333, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1287063449008044, "grad_norm": 0.12911224365234375, "learning_rate": 2e-07, "loss": 0.073, "step": 1379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.128799678000805, "grad_norm": 0.12581101059913635, "learning_rate": 2e-07, "loss": 0.0222, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1288930111008056, "grad_norm": 0.11776664108037949, "learning_rate": 2e-07, "loss": 0.0123, "step": 1381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12898634420080615, "grad_norm": 0.11414356529712677, "learning_rate": 2e-07, "loss": 0.0512, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12907967730080674, "grad_norm": 0.1338527351617813, "learning_rate": 2e-07, "loss": 0.0084, "step": 1383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12917301040080734, "grad_norm": 0.1056576818227768, "learning_rate": 2e-07, "loss": 0.0722, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1292663435008079, "grad_norm": 0.12868031859397888, "learning_rate": 2e-07, "loss": 0.0189, "step": 1385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1293596766008085, "grad_norm": 0.11210151761770248, "learning_rate": 2e-07, "loss": 0.0338, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12945300970080909, "grad_norm": 0.1337253302335739, "learning_rate": 2e-07, "loss": -0.0242, "step": 1387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12954634280080968, "grad_norm": 0.13245192170143127, "learning_rate": 2e-07, "loss": -0.009, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12963967590081024, "grad_norm": 0.1296364814043045, "learning_rate": 2e-07, "loss": 0.041, "step": 1389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12973300900081083, "grad_norm": 0.11979067325592041, "learning_rate": 2e-07, "loss": 0.0326, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.12982634210081143, "grad_norm": 0.11506450176239014, "learning_rate": 2e-07, "loss": 0.0056, "step": 1391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.129919675200812, "grad_norm": 0.12648504972457886, "learning_rate": 2e-07, "loss": 0.0118, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13001300830081258, "grad_norm": 0.1077185645699501, "learning_rate": 2e-07, "loss": 0.0581, "step": 1393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13010634140081317, "grad_norm": 0.14138028025627136, "learning_rate": 2e-07, "loss": 0.0433, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13019967450081374, "grad_norm": 0.1270858645439148, "learning_rate": 2e-07, "loss": 0.017, "step": 1395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13029300760081433, "grad_norm": 0.12222479283809662, "learning_rate": 2e-07, "loss": -0.0203, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13038634070081492, "grad_norm": 0.1286737322807312, "learning_rate": 2e-07, "loss": 0.0113, "step": 1397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1304796738008155, "grad_norm": 0.12092230468988419, "learning_rate": 2e-07, "loss": 0.0144, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13057300690081608, "grad_norm": 0.1365211457014084, "learning_rate": 2e-07, "loss": 0.0064, "step": 1399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13066634000081667, "grad_norm": 0.12843672931194305, "learning_rate": 2e-07, "loss": 0.0236, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13075967310081724, "grad_norm": 0.11588891595602036, "learning_rate": 2e-07, "loss": 0.0152, "step": 1401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13085300620081783, "grad_norm": 0.1260339617729187, "learning_rate": 2e-07, "loss": 0.0571, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13094633930081842, "grad_norm": 0.1260555386543274, "learning_rate": 2e-07, "loss": 0.0285, "step": 1403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13103967240081899, "grad_norm": 0.13749821484088898, "learning_rate": 2e-07, "loss": -0.0126, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13113300550081958, "grad_norm": 0.12537643313407898, "learning_rate": 2e-07, "loss": 0.0342, "step": 1405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13122633860082017, "grad_norm": 0.13549663126468658, "learning_rate": 2e-07, "loss": -0.0033, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13131967170082076, "grad_norm": 0.1296100616455078, "learning_rate": 2e-07, "loss": 0.0417, "step": 1407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13141300480082133, "grad_norm": 0.11951570212841034, "learning_rate": 2e-07, "loss": 0.0588, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13150633790082192, "grad_norm": 0.12128618359565735, "learning_rate": 2e-07, "loss": 0.0182, "step": 1409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1315996710008225, "grad_norm": 0.1259547919034958, "learning_rate": 2e-07, "loss": -0.0133, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13169300410082307, "grad_norm": 0.12305407971143723, "learning_rate": 2e-07, "loss": 0.0231, "step": 1411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13178633720082367, "grad_norm": 0.12852798402309418, "learning_rate": 2e-07, "loss": -0.0356, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13187967030082426, "grad_norm": 0.12451092898845673, "learning_rate": 2e-07, "loss": 0.0249, "step": 1413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13197300340082482, "grad_norm": 0.12386109679937363, "learning_rate": 2e-07, "loss": 0.0406, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13206633650082542, "grad_norm": 0.12550488114356995, "learning_rate": 2e-07, "loss": 0.03, "step": 1415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.132159669600826, "grad_norm": 0.13374298810958862, "learning_rate": 2e-07, "loss": 0.0468, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13225300270082657, "grad_norm": 0.13182391226291656, "learning_rate": 2e-07, "loss": -0.0005, "step": 1417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13234633580082716, "grad_norm": 0.12399030476808548, "learning_rate": 2e-07, "loss": 0.0296, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13243966890082776, "grad_norm": 0.13912323117256165, "learning_rate": 2e-07, "loss": 0.0109, "step": 1419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13253300200082832, "grad_norm": 0.1259942352771759, "learning_rate": 2e-07, "loss": 0.0146, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1326263351008289, "grad_norm": 0.12687213718891144, "learning_rate": 2e-07, "loss": -0.0082, "step": 1421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1327196682008295, "grad_norm": 0.13498739898204803, "learning_rate": 2e-07, "loss": 0.0356, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13281300130083007, "grad_norm": 0.14409327507019043, "learning_rate": 2e-07, "loss": 0.0678, "step": 1423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13290633440083066, "grad_norm": 0.1604636013507843, "learning_rate": 2e-07, "loss": 0.0247, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13299966750083125, "grad_norm": 0.13310372829437256, "learning_rate": 2e-07, "loss": -0.0224, "step": 1425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13309300060083182, "grad_norm": 0.12996575236320496, "learning_rate": 2e-07, "loss": -0.0049, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1331863337008324, "grad_norm": 0.1303420513868332, "learning_rate": 2e-07, "loss": 0.0134, "step": 1427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.133279666800833, "grad_norm": 0.14204761385917664, "learning_rate": 2e-07, "loss": -0.0087, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1333729999008336, "grad_norm": 0.13351668417453766, "learning_rate": 2e-07, "loss": 0.0417, "step": 1429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13346633300083416, "grad_norm": 0.13873291015625, "learning_rate": 2e-07, "loss": 0.0429, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13355966610083475, "grad_norm": 0.13662871718406677, "learning_rate": 2e-07, "loss": 0.0197, "step": 1431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13365299920083534, "grad_norm": 0.1312268078327179, "learning_rate": 2e-07, "loss": 0.0233, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1337463323008359, "grad_norm": 0.14710059762001038, "learning_rate": 2e-07, "loss": 0.0508, "step": 1433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1338396654008365, "grad_norm": 0.21976804733276367, "learning_rate": 2e-07, "loss": 0.0348, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1339329985008371, "grad_norm": 0.11967466026544571, "learning_rate": 2e-07, "loss": 0.0112, "step": 1435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13402633160083766, "grad_norm": 0.13249734044075012, "learning_rate": 2e-07, "loss": 0.0539, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13411966470083825, "grad_norm": 0.1204148679971695, "learning_rate": 2e-07, "loss": 0.0565, "step": 1437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13421299780083884, "grad_norm": 0.20165148377418518, "learning_rate": 2e-07, "loss": 0.0353, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1343063309008394, "grad_norm": 0.13655179738998413, "learning_rate": 2e-07, "loss": 0.0441, "step": 1439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13439966400084, "grad_norm": 0.1327507644891739, "learning_rate": 2e-07, "loss": 0.0275, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01318359375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 620.673095703125, "completions/mean_terminated_length": 574.24365234375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.1344929971008406, "grad_norm": 0.14014360308647156, "learning_rate": 2e-07, "loss": 0.0198, "num_tokens": 982072539.0, "reward": 0.5868443250656128, "reward_std": 0.18858738243579865, "rewards/simpleverify_reward/mean": 0.5868443250656128, "rewards/simpleverify_reward/std": 0.49240460991859436, "step": 1441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13458633020084115, "grad_norm": 0.1204172894358635, "learning_rate": 2e-07, "loss": -0.0031, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13467966330084175, "grad_norm": 0.11600620299577713, "learning_rate": 2e-07, "loss": 0.0314, "step": 1443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13477299640084234, "grad_norm": 0.12386095523834229, "learning_rate": 2e-07, "loss": 0.0401, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1348663295008429, "grad_norm": 0.12171697616577148, "learning_rate": 2e-07, "loss": -0.0037, "step": 1445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1349596626008435, "grad_norm": 0.12403026968240738, "learning_rate": 2e-07, "loss": 0.0421, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1350529957008441, "grad_norm": 0.13129982352256775, "learning_rate": 2e-07, "loss": 0.0079, "step": 1447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13514632880084468, "grad_norm": 0.11801241338253021, "learning_rate": 2e-07, "loss": -0.0167, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13523966190084524, "grad_norm": 0.1307920664548874, "learning_rate": 2e-07, "loss": 0.0087, "step": 1449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13533299500084583, "grad_norm": 0.13329008221626282, "learning_rate": 2e-07, "loss": 0.0434, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13542632810084643, "grad_norm": 0.12755557894706726, "learning_rate": 2e-07, "loss": -0.0231, "step": 1451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.135519661200847, "grad_norm": 0.12927190959453583, "learning_rate": 2e-07, "loss": -0.0311, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13561299430084758, "grad_norm": 0.12124588340520859, "learning_rate": 2e-07, "loss": 0.0296, "step": 1453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13570632740084818, "grad_norm": 0.12378164380788803, "learning_rate": 2e-07, "loss": 0.0516, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13579966050084874, "grad_norm": 0.1416238248348236, "learning_rate": 2e-07, "loss": 0.0491, "step": 1455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13589299360084933, "grad_norm": 0.12794120609760284, "learning_rate": 2e-07, "loss": 0.0571, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13598632670084992, "grad_norm": 0.12237957864999771, "learning_rate": 2e-07, "loss": 0.015, "step": 1457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1360796598008505, "grad_norm": 0.13529318571090698, "learning_rate": 2e-07, "loss": 0.0529, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13617299290085108, "grad_norm": 0.1210915669798851, "learning_rate": 2e-07, "loss": 0.0111, "step": 1459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13626632600085167, "grad_norm": 0.14912164211273193, "learning_rate": 2e-07, "loss": 0.0318, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13635965910085224, "grad_norm": 0.13777463138103485, "learning_rate": 2e-07, "loss": 0.012, "step": 1461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13645299220085283, "grad_norm": 0.1252203732728958, "learning_rate": 2e-07, "loss": 0.0339, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13654632530085342, "grad_norm": 0.13650867342948914, "learning_rate": 2e-07, "loss": -0.0138, "step": 1463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13663965840085399, "grad_norm": 0.12399674206972122, "learning_rate": 2e-07, "loss": 0.0206, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13673299150085458, "grad_norm": 0.13404396176338196, "learning_rate": 2e-07, "loss": 0.0047, "step": 1465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13682632460085517, "grad_norm": 0.12794950604438782, "learning_rate": 2e-07, "loss": 0.0254, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13691965770085573, "grad_norm": 0.12589828670024872, "learning_rate": 2e-07, "loss": 0.0579, "step": 1467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13701299080085633, "grad_norm": 0.11313801258802414, "learning_rate": 2e-07, "loss": 0.0313, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13710632390085692, "grad_norm": 0.12220331281423569, "learning_rate": 2e-07, "loss": 0.0137, "step": 1469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1371996570008575, "grad_norm": 0.13046205043792725, "learning_rate": 2e-07, "loss": 0.0429, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13729299010085808, "grad_norm": 0.13688932359218597, "learning_rate": 2e-07, "loss": -0.0201, "step": 1471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13738632320085867, "grad_norm": 0.1379668116569519, "learning_rate": 2e-07, "loss": -0.0078, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13747965630085926, "grad_norm": 0.12323816120624542, "learning_rate": 2e-07, "loss": 0.0377, "step": 1473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13757298940085982, "grad_norm": 0.1078827902674675, "learning_rate": 2e-07, "loss": 0.0429, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13766632250086042, "grad_norm": 0.13498204946517944, "learning_rate": 2e-07, "loss": 0.0071, "step": 1475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.137759655600861, "grad_norm": 0.14558745920658112, "learning_rate": 2e-07, "loss": 0.0167, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13785298870086157, "grad_norm": 0.13273562490940094, "learning_rate": 2e-07, "loss": 0.025, "step": 1477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13794632180086216, "grad_norm": 0.13516178727149963, "learning_rate": 2e-07, "loss": 0.052, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13803965490086276, "grad_norm": 0.16784650087356567, "learning_rate": 2e-07, "loss": 0.0406, "step": 1479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13813298800086332, "grad_norm": 0.1249307319521904, "learning_rate": 2e-07, "loss": 0.0471, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1382263211008639, "grad_norm": 0.14602787792682648, "learning_rate": 2e-07, "loss": 0.0162, "step": 1481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1383196542008645, "grad_norm": 0.12841884791851044, "learning_rate": 2e-07, "loss": 0.0472, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13841298730086507, "grad_norm": 0.13268157839775085, "learning_rate": 2e-07, "loss": 0.0471, "step": 1483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13850632040086566, "grad_norm": 0.13661326467990875, "learning_rate": 2e-07, "loss": 0.0489, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13859965350086625, "grad_norm": 0.12214091420173645, "learning_rate": 2e-07, "loss": 0.0501, "step": 1485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13869298660086682, "grad_norm": 0.13982731103897095, "learning_rate": 2e-07, "loss": -0.046, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1387863197008674, "grad_norm": 0.11978689581155777, "learning_rate": 2e-07, "loss": 0.0229, "step": 1487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.138879652800868, "grad_norm": 0.13548658788204193, "learning_rate": 2e-07, "loss": 0.0752, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13897298590086857, "grad_norm": 0.1313246190547943, "learning_rate": 2e-07, "loss": 0.0415, "step": 1489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13906631900086916, "grad_norm": 0.12334547936916351, "learning_rate": 2e-07, "loss": 0.0192, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13915965210086975, "grad_norm": 0.1284998506307602, "learning_rate": 2e-07, "loss": 0.0127, "step": 1491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13925298520087034, "grad_norm": 0.132889062166214, "learning_rate": 2e-07, "loss": 0.008, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1393463183008709, "grad_norm": 0.13802756369113922, "learning_rate": 2e-07, "loss": 0.0654, "step": 1493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1394396514008715, "grad_norm": 0.1258484125137329, "learning_rate": 2e-07, "loss": 0.0397, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1395329845008721, "grad_norm": 0.14119204878807068, "learning_rate": 2e-07, "loss": 0.0943, "step": 1495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13962631760087266, "grad_norm": 0.14203029870986938, "learning_rate": 2e-07, "loss": 0.0544, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13971965070087325, "grad_norm": 0.13593608140945435, "learning_rate": 2e-07, "loss": 0.0188, "step": 1497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.13981298380087384, "grad_norm": 0.13752004504203796, "learning_rate": 2e-07, "loss": -0.0276, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1399063169008744, "grad_norm": 0.13230733573436737, "learning_rate": 2e-07, "loss": -0.0003, "step": 1499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.139999650000875, "grad_norm": 0.12464489787817001, "learning_rate": 2e-07, "loss": 0.0332, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1400929831008756, "grad_norm": 0.14144942164421082, "learning_rate": 2e-07, "loss": 0.0688, "step": 1501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14018631620087615, "grad_norm": 0.1382383406162262, "learning_rate": 2e-07, "loss": 0.0209, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14027964930087675, "grad_norm": 0.13249830901622772, "learning_rate": 2e-07, "loss": 0.027, "step": 1503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14037298240087734, "grad_norm": 0.1336497962474823, "learning_rate": 2e-07, "loss": 0.0216, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012712751116071397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 611.5469970703125, "completions/mean_terminated_length": 566.6795654296875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.1404663155008779, "grad_norm": 0.13943322002887726, "learning_rate": 2e-07, "loss": 0.0564, "num_tokens": 1022796423.0, "reward": 0.5952671766281128, "reward_std": 0.183739572763443, "rewards/simpleverify_reward/mean": 0.5952671766281128, "rewards/simpleverify_reward/std": 0.4908445477485657, "step": 1505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1405596486008785, "grad_norm": 0.12328717112541199, "learning_rate": 2e-07, "loss": -0.0193, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1406529817008791, "grad_norm": 0.13059201836585999, "learning_rate": 2e-07, "loss": -0.0285, "step": 1507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14074631480087965, "grad_norm": 0.12330266833305359, "learning_rate": 2e-07, "loss": -0.0017, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14083964790088024, "grad_norm": 0.13692261278629303, "learning_rate": 2e-07, "loss": 0.0583, "step": 1509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14093298100088084, "grad_norm": 0.14435957372188568, "learning_rate": 2e-07, "loss": 0.0028, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14102631410088143, "grad_norm": 0.11849499493837357, "learning_rate": 2e-07, "loss": 0.0233, "step": 1511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.141119647200882, "grad_norm": 0.1235920712351799, "learning_rate": 2e-07, "loss": 0.0416, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14121298030088258, "grad_norm": 0.12330605834722519, "learning_rate": 2e-07, "loss": -0.0135, "step": 1513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14130631340088318, "grad_norm": 0.12435866892337799, "learning_rate": 2e-07, "loss": -0.0175, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14139964650088374, "grad_norm": 0.1300330013036728, "learning_rate": 2e-07, "loss": 0.0019, "step": 1515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14149297960088433, "grad_norm": 0.1270255744457245, "learning_rate": 2e-07, "loss": 0.0404, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14158631270088493, "grad_norm": 0.13278484344482422, "learning_rate": 2e-07, "loss": 0.055, "step": 1517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1416796458008855, "grad_norm": 0.1299923062324524, "learning_rate": 2e-07, "loss": -0.0178, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14177297890088608, "grad_norm": 0.13562241196632385, "learning_rate": 2e-07, "loss": 0.0229, "step": 1519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14186631200088667, "grad_norm": 0.12498418241739273, "learning_rate": 2e-07, "loss": 0.0491, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14195964510088724, "grad_norm": 0.13515660166740417, "learning_rate": 2e-07, "loss": 0.038, "step": 1521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14205297820088783, "grad_norm": 0.1273382306098938, "learning_rate": 2e-07, "loss": 0.0904, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14214631130088842, "grad_norm": 0.12775585055351257, "learning_rate": 2e-07, "loss": 0.0804, "step": 1523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.142239644400889, "grad_norm": 0.13200220465660095, "learning_rate": 2e-07, "loss": 0.0376, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14233297750088958, "grad_norm": 0.12758012115955353, "learning_rate": 2e-07, "loss": 0.0164, "step": 1525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14242631060089017, "grad_norm": 0.12444531917572021, "learning_rate": 2e-07, "loss": 0.0083, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14251964370089074, "grad_norm": 0.1462988406419754, "learning_rate": 2e-07, "loss": 0.0337, "step": 1527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14261297680089133, "grad_norm": 0.12262451648712158, "learning_rate": 2e-07, "loss": 0.0468, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14270630990089192, "grad_norm": 0.13249804079532623, "learning_rate": 2e-07, "loss": 0.0675, "step": 1529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14279964300089248, "grad_norm": 0.12518811225891113, "learning_rate": 2e-07, "loss": 0.0297, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14289297610089308, "grad_norm": 0.1315504014492035, "learning_rate": 2e-07, "loss": 0.0685, "step": 1531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14298630920089367, "grad_norm": 0.13933368027210236, "learning_rate": 2e-07, "loss": -0.0068, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14307964230089426, "grad_norm": 0.1306922286748886, "learning_rate": 2e-07, "loss": 0.0372, "step": 1533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14317297540089483, "grad_norm": 0.12907284498214722, "learning_rate": 2e-07, "loss": 0.0044, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14326630850089542, "grad_norm": 0.11949587613344193, "learning_rate": 2e-07, "loss": 0.0582, "step": 1535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.143359641600896, "grad_norm": 0.11586958169937134, "learning_rate": 2e-07, "loss": 0.0154, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14345297470089657, "grad_norm": 0.141885906457901, "learning_rate": 2e-07, "loss": 0.0884, "step": 1537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14354630780089717, "grad_norm": 0.13956746459007263, "learning_rate": 2e-07, "loss": 0.0612, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14363964090089776, "grad_norm": 0.13310831785202026, "learning_rate": 2e-07, "loss": 0.0481, "step": 1539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14373297400089832, "grad_norm": 0.12801481783390045, "learning_rate": 2e-07, "loss": 0.0222, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14382630710089891, "grad_norm": 0.1253482550382614, "learning_rate": 2e-07, "loss": 0.0084, "step": 1541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1439196402008995, "grad_norm": 0.13254080712795258, "learning_rate": 2e-07, "loss": 0.0194, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14401297330090007, "grad_norm": 0.12622767686843872, "learning_rate": 2e-07, "loss": 0.0332, "step": 1543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14410630640090066, "grad_norm": 0.131544828414917, "learning_rate": 2e-07, "loss": 0.0271, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14419963950090126, "grad_norm": 0.12295348197221756, "learning_rate": 2e-07, "loss": 0.0163, "step": 1545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14429297260090182, "grad_norm": 0.12304048985242844, "learning_rate": 2e-07, "loss": 0.0437, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1443863057009024, "grad_norm": 0.12579649686813354, "learning_rate": 2e-07, "loss": 0.0166, "step": 1547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.144479638800903, "grad_norm": 0.11576581746339798, "learning_rate": 2e-07, "loss": -0.0036, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14457297190090357, "grad_norm": 0.13440829515457153, "learning_rate": 2e-07, "loss": 0.0583, "step": 1549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14466630500090416, "grad_norm": 0.1419072151184082, "learning_rate": 2e-07, "loss": 0.0432, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14475963810090475, "grad_norm": 0.1361137479543686, "learning_rate": 2e-07, "loss": 0.0056, "step": 1551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14485297120090532, "grad_norm": 0.13247188925743103, "learning_rate": 2e-07, "loss": 0.0514, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1449463043009059, "grad_norm": 0.12846198678016663, "learning_rate": 2e-07, "loss": 0.0092, "step": 1553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1450396374009065, "grad_norm": 0.11642246693372726, "learning_rate": 2e-07, "loss": 0.072, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1451329705009071, "grad_norm": 0.12781000137329102, "learning_rate": 2e-07, "loss": -0.0145, "step": 1555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14522630360090766, "grad_norm": 0.13026975095272064, "learning_rate": 2e-07, "loss": 0.0429, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14531963670090825, "grad_norm": 0.1336844265460968, "learning_rate": 2e-07, "loss": -0.0242, "step": 1557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14541296980090884, "grad_norm": 0.13330115377902985, "learning_rate": 2e-07, "loss": 0.0334, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1455063029009094, "grad_norm": 0.13167071342468262, "learning_rate": 2e-07, "loss": 0.014, "step": 1559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14559963600091, "grad_norm": 0.1251688450574875, "learning_rate": 2e-07, "loss": 0.0574, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1456929691009106, "grad_norm": 0.12467975169420242, "learning_rate": 2e-07, "loss": 0.02, "step": 1561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14578630220091116, "grad_norm": 0.1277707815170288, "learning_rate": 2e-07, "loss": 0.0408, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14587963530091175, "grad_norm": 0.1245904341340065, "learning_rate": 2e-07, "loss": 0.006, "step": 1563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14597296840091234, "grad_norm": 0.14062905311584473, "learning_rate": 2e-07, "loss": 0.014, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1460663015009129, "grad_norm": 0.1285715103149414, "learning_rate": 2e-07, "loss": -0.0086, "step": 1565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1461596346009135, "grad_norm": 0.1425938755273819, "learning_rate": 2e-07, "loss": -0.0099, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1462529677009141, "grad_norm": 0.12996259331703186, "learning_rate": 2e-07, "loss": 0.0447, "step": 1567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14634630080091465, "grad_norm": 0.12410619109869003, "learning_rate": 2e-07, "loss": -0.0095, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012102399553571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 614.2689208984375, "completions/mean_terminated_length": 571.6153564453125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.14643963390091524, "grad_norm": 0.1255834847688675, "learning_rate": 2e-07, "loss": 0.0413, "num_tokens": 1063670202.0, "reward": 0.5979527235031128, "reward_std": 0.1826418787240982, "rewards/simpleverify_reward/mean": 0.5979527235031128, "rewards/simpleverify_reward/std": 0.490315705537796, "step": 1569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14653296700091584, "grad_norm": 0.1274181604385376, "learning_rate": 2e-07, "loss": 0.0088, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1466263001009164, "grad_norm": 0.126582533121109, "learning_rate": 2e-07, "loss": 0.0316, "step": 1571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.146719633200917, "grad_norm": 0.13323159515857697, "learning_rate": 2e-07, "loss": 0.0302, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14681296630091759, "grad_norm": 0.12743723392486572, "learning_rate": 2e-07, "loss": 0.0898, "step": 1573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14690629940091818, "grad_norm": 0.12763725221157074, "learning_rate": 2e-07, "loss": 0.0363, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14699963250091874, "grad_norm": 0.11844750493764877, "learning_rate": 2e-07, "loss": 0.0203, "step": 1575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14709296560091933, "grad_norm": 0.12216989696025848, "learning_rate": 2e-07, "loss": 0.0264, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14718629870091993, "grad_norm": 0.12879298627376556, "learning_rate": 2e-07, "loss": 0.0503, "step": 1577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1472796318009205, "grad_norm": 0.13290274143218994, "learning_rate": 2e-07, "loss": 0.0776, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14737296490092108, "grad_norm": 0.13088519871234894, "learning_rate": 2e-07, "loss": 0.0466, "step": 1579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14746629800092168, "grad_norm": 0.130288764834404, "learning_rate": 2e-07, "loss": 0.0197, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14755963110092224, "grad_norm": 0.1316644251346588, "learning_rate": 2e-07, "loss": 0.0481, "step": 1581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14765296420092283, "grad_norm": 0.12390497326850891, "learning_rate": 2e-07, "loss": 0.0011, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14774629730092342, "grad_norm": 0.13405756652355194, "learning_rate": 2e-07, "loss": 0.0033, "step": 1583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.147839630400924, "grad_norm": 0.13446617126464844, "learning_rate": 2e-07, "loss": -0.004, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14793296350092458, "grad_norm": 0.1343328207731247, "learning_rate": 2e-07, "loss": 0.0169, "step": 1585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14802629660092517, "grad_norm": 0.16320449113845825, "learning_rate": 2e-07, "loss": 0.0245, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14811962970092574, "grad_norm": 0.12486258149147034, "learning_rate": 2e-07, "loss": 0.0286, "step": 1587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14821296280092633, "grad_norm": 0.11726725846529007, "learning_rate": 2e-07, "loss": 0.0086, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14830629590092692, "grad_norm": 0.1314283311367035, "learning_rate": 2e-07, "loss": 0.0489, "step": 1589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14839962900092749, "grad_norm": 0.1410963237285614, "learning_rate": 2e-07, "loss": 0.0157, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14849296210092808, "grad_norm": 0.1482408493757248, "learning_rate": 2e-07, "loss": 0.0194, "step": 1591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14858629520092867, "grad_norm": 0.1295221894979477, "learning_rate": 2e-07, "loss": 0.03, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14867962830092923, "grad_norm": 0.12675008177757263, "learning_rate": 2e-07, "loss": 0.0152, "step": 1593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14877296140092983, "grad_norm": 0.1261344999074936, "learning_rate": 2e-07, "loss": 0.0292, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14886629450093042, "grad_norm": 0.11228286474943161, "learning_rate": 2e-07, "loss": 0.0003, "step": 1595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.148959627600931, "grad_norm": 0.13561365008354187, "learning_rate": 2e-07, "loss": -0.0352, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14905296070093157, "grad_norm": 0.11695383489131927, "learning_rate": 2e-07, "loss": 0.0326, "step": 1597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14914629380093217, "grad_norm": 0.1335514932870865, "learning_rate": 2e-07, "loss": 0.0571, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14923962690093276, "grad_norm": 0.1267106533050537, "learning_rate": 2e-07, "loss": 0.0173, "step": 1599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14933296000093332, "grad_norm": 0.1275610476732254, "learning_rate": 2e-07, "loss": 0.0153, "step": 1600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14942629310093392, "grad_norm": 0.15320239961147308, "learning_rate": 2e-07, "loss": 0.0447, "step": 1601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1495196262009345, "grad_norm": 0.13889829814434052, "learning_rate": 2e-07, "loss": 0.0228, "step": 1602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14961295930093507, "grad_norm": 0.14359885454177856, "learning_rate": 2e-07, "loss": 0.0692, "step": 1603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14970629240093566, "grad_norm": 0.12624703347682953, "learning_rate": 2e-07, "loss": 0.0049, "step": 1604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14979962550093626, "grad_norm": 0.1442633420228958, "learning_rate": 2e-07, "loss": 0.0469, "step": 1605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.14989295860093682, "grad_norm": 0.1283549815416336, "learning_rate": 2e-07, "loss": 0.0172, "step": 1606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1499862917009374, "grad_norm": 0.12885038554668427, "learning_rate": 2e-07, "loss": 0.0271, "step": 1607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.150079624800938, "grad_norm": 0.1230170726776123, "learning_rate": 2e-07, "loss": 0.0375, "step": 1608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15017295790093857, "grad_norm": 0.1217385083436966, "learning_rate": 2e-07, "loss": 0.0408, "step": 1609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15026629100093916, "grad_norm": 0.1265076845884323, "learning_rate": 2e-07, "loss": -0.0227, "step": 1610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15035962410093975, "grad_norm": 0.13014468550682068, "learning_rate": 2e-07, "loss": 0.0351, "step": 1611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15045295720094032, "grad_norm": 0.13369403779506683, "learning_rate": 2e-07, "loss": 0.0302, "step": 1612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1505462903009409, "grad_norm": 0.12770317494869232, "learning_rate": 2e-07, "loss": -0.0355, "step": 1613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1506396234009415, "grad_norm": 0.13406477868556976, "learning_rate": 2e-07, "loss": 0.011, "step": 1614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1507329565009421, "grad_norm": 0.1345720887184143, "learning_rate": 2e-07, "loss": 0.0324, "step": 1615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15082628960094266, "grad_norm": 0.13447582721710205, "learning_rate": 2e-07, "loss": 0.058, "step": 1616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15091962270094325, "grad_norm": 0.14136497676372528, "learning_rate": 2e-07, "loss": 0.0127, "step": 1617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15101295580094384, "grad_norm": 0.13039113581180573, "learning_rate": 2e-07, "loss": 0.0265, "step": 1618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1511062889009444, "grad_norm": 0.13440050184726715, "learning_rate": 2e-07, "loss": 0.0415, "step": 1619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.151199622000945, "grad_norm": 0.1323552429676056, "learning_rate": 2e-07, "loss": 0.027, "step": 1620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1512929551009456, "grad_norm": 0.144898921251297, "learning_rate": 2e-07, "loss": 0.0341, "step": 1621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15138628820094616, "grad_norm": 0.14660273492336273, "learning_rate": 2e-07, "loss": -0.0229, "step": 1622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15147962130094675, "grad_norm": 0.1274280548095703, "learning_rate": 2e-07, "loss": 0.0251, "step": 1623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15157295440094734, "grad_norm": 0.12751837074756622, "learning_rate": 2e-07, "loss": -0.0064, "step": 1624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1516662875009479, "grad_norm": 0.12840381264686584, "learning_rate": 2e-07, "loss": 0.0723, "step": 1625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1517596206009485, "grad_norm": 0.13688528537750244, "learning_rate": 2e-07, "loss": 0.0275, "step": 1626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1518529537009491, "grad_norm": 0.1506073921918869, "learning_rate": 2e-07, "loss": 0.0403, "step": 1627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15194628680094965, "grad_norm": 0.1503155380487442, "learning_rate": 2e-07, "loss": 0.0364, "step": 1628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15203961990095025, "grad_norm": 0.13525450229644775, "learning_rate": 2e-07, "loss": 0.0368, "step": 1629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15213295300095084, "grad_norm": 0.14401701092720032, "learning_rate": 2e-07, "loss": 0.0278, "step": 1630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1522262861009514, "grad_norm": 0.1395573616027832, "learning_rate": 2e-07, "loss": -0.0194, "step": 1631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.152319619200952, "grad_norm": 0.14466142654418945, "learning_rate": 2e-07, "loss": 0.0168, "step": 1632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011300223214285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 607.0234375, "completions/mean_terminated_length": 567.1466674804688, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.1524129523009526, "grad_norm": 0.12688760459423065, "learning_rate": 2e-07, "loss": 0.0459, "num_tokens": 1104186637.0, "reward": 0.6036028265953064, "reward_std": 0.18167415261268616, "rewards/simpleverify_reward/mean": 0.6036028265953064, "rewards/simpleverify_reward/std": 0.4891529977321625, "step": 1633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15250628540095315, "grad_norm": 0.1347307711839676, "learning_rate": 2e-07, "loss": 0.0385, "step": 1634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15259961850095374, "grad_norm": 0.1331218034029007, "learning_rate": 2e-07, "loss": 0.0188, "step": 1635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15269295160095434, "grad_norm": 0.12972086668014526, "learning_rate": 2e-07, "loss": -0.0072, "step": 1636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15278628470095493, "grad_norm": 0.12005738168954849, "learning_rate": 2e-07, "loss": 0.0089, "step": 1637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1528796178009555, "grad_norm": 0.1425633728504181, "learning_rate": 2e-07, "loss": 0.0432, "step": 1638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15297295090095608, "grad_norm": 0.1493898630142212, "learning_rate": 2e-07, "loss": 0.045, "step": 1639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15306628400095668, "grad_norm": 0.1384979635477066, "learning_rate": 2e-07, "loss": -0.0196, "step": 1640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15315961710095724, "grad_norm": 0.12740084528923035, "learning_rate": 2e-07, "loss": 0.0237, "step": 1641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15325295020095783, "grad_norm": 0.13496433198451996, "learning_rate": 2e-07, "loss": 0.0684, "step": 1642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15334628330095842, "grad_norm": 0.12255571037530899, "learning_rate": 2e-07, "loss": 0.0213, "step": 1643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.153439616400959, "grad_norm": 0.12416941672563553, "learning_rate": 2e-07, "loss": 0.0114, "step": 1644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15353294950095958, "grad_norm": 0.12521426379680634, "learning_rate": 2e-07, "loss": 0.0226, "step": 1645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15362628260096017, "grad_norm": 0.12777189910411835, "learning_rate": 2e-07, "loss": 0.0468, "step": 1646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15371961570096074, "grad_norm": 0.1303749531507492, "learning_rate": 2e-07, "loss": 0.0167, "step": 1647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15381294880096133, "grad_norm": 0.12536653876304626, "learning_rate": 2e-07, "loss": 0.0759, "step": 1648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15390628190096192, "grad_norm": 0.14741076529026031, "learning_rate": 2e-07, "loss": 0.0454, "step": 1649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1539996150009625, "grad_norm": 0.12200335413217545, "learning_rate": 2e-07, "loss": 0.0044, "step": 1650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15409294810096308, "grad_norm": 0.13597562909126282, "learning_rate": 2e-07, "loss": 0.0445, "step": 1651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15418628120096367, "grad_norm": 0.1319851577281952, "learning_rate": 2e-07, "loss": 0.0355, "step": 1652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15427961430096424, "grad_norm": 0.15145090222358704, "learning_rate": 2e-07, "loss": 0.0221, "step": 1653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15437294740096483, "grad_norm": 0.12274263799190521, "learning_rate": 2e-07, "loss": 0.0358, "step": 1654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15446628050096542, "grad_norm": 0.13365498185157776, "learning_rate": 2e-07, "loss": 0.0548, "step": 1655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15455961360096598, "grad_norm": 0.13028521835803986, "learning_rate": 2e-07, "loss": 0.0694, "step": 1656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15465294670096658, "grad_norm": 0.13391505181789398, "learning_rate": 2e-07, "loss": 0.036, "step": 1657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15474627980096717, "grad_norm": 0.12943685054779053, "learning_rate": 2e-07, "loss": 0.0018, "step": 1658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15483961290096776, "grad_norm": 0.1325240135192871, "learning_rate": 2e-07, "loss": 0.0232, "step": 1659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15493294600096832, "grad_norm": 0.12339305132627487, "learning_rate": 2e-07, "loss": 0.0233, "step": 1660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15502627910096892, "grad_norm": 0.12262265384197235, "learning_rate": 2e-07, "loss": 0.0404, "step": 1661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1551196122009695, "grad_norm": 0.13826113939285278, "learning_rate": 2e-07, "loss": 0.0393, "step": 1662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15521294530097007, "grad_norm": 0.1265203207731247, "learning_rate": 2e-07, "loss": 0.0413, "step": 1663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15530627840097067, "grad_norm": 0.12628337740898132, "learning_rate": 2e-07, "loss": -0.0049, "step": 1664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15539961150097126, "grad_norm": 0.13126251101493835, "learning_rate": 2e-07, "loss": 0.0898, "step": 1665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15549294460097182, "grad_norm": 0.12042747437953949, "learning_rate": 2e-07, "loss": -0.0074, "step": 1666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15558627770097241, "grad_norm": 0.1237117350101471, "learning_rate": 2e-07, "loss": 0.0042, "step": 1667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.155679610800973, "grad_norm": 0.1371576488018036, "learning_rate": 2e-07, "loss": 0.0465, "step": 1668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15577294390097357, "grad_norm": 0.14915134012699127, "learning_rate": 2e-07, "loss": 0.0424, "step": 1669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15586627700097416, "grad_norm": 0.11963173002004623, "learning_rate": 2e-07, "loss": 0.0036, "step": 1670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15595961010097475, "grad_norm": 0.13197095692157745, "learning_rate": 2e-07, "loss": 0.061, "step": 1671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15605294320097532, "grad_norm": 0.12244408577680588, "learning_rate": 2e-07, "loss": 0.0374, "step": 1672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1561462763009759, "grad_norm": 0.11905831098556519, "learning_rate": 2e-07, "loss": 0.0247, "step": 1673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1562396094009765, "grad_norm": 0.1333603411912918, "learning_rate": 2e-07, "loss": 0.0248, "step": 1674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15633294250097707, "grad_norm": 0.1352948248386383, "learning_rate": 2e-07, "loss": 0.0265, "step": 1675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15642627560097766, "grad_norm": 0.12792669236660004, "learning_rate": 2e-07, "loss": 0.0017, "step": 1676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15651960870097825, "grad_norm": 0.12432333827018738, "learning_rate": 2e-07, "loss": 0.0217, "step": 1677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15661294180097884, "grad_norm": 0.13010576367378235, "learning_rate": 2e-07, "loss": 0.0282, "step": 1678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1567062749009794, "grad_norm": 0.12494930624961853, "learning_rate": 2e-07, "loss": -0.0043, "step": 1679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15679960800098, "grad_norm": 0.13553807139396667, "learning_rate": 2e-07, "loss": 0.0217, "step": 1680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1568929411009806, "grad_norm": 0.13172224164009094, "learning_rate": 2e-07, "loss": 0.0329, "step": 1681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15698627420098116, "grad_norm": 0.13221698999404907, "learning_rate": 2e-07, "loss": 0.0364, "step": 1682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15707960730098175, "grad_norm": 0.1474255472421646, "learning_rate": 2e-07, "loss": 0.0274, "step": 1683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15717294040098234, "grad_norm": 0.12861399352550507, "learning_rate": 2e-07, "loss": 0.0088, "step": 1684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1572662735009829, "grad_norm": 0.15298102796077728, "learning_rate": 2e-07, "loss": 0.0064, "step": 1685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1573596066009835, "grad_norm": 0.13587716221809387, "learning_rate": 2e-07, "loss": -0.0259, "step": 1686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1574529397009841, "grad_norm": 0.1306609809398651, "learning_rate": 2e-07, "loss": 0.0436, "step": 1687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15754627280098465, "grad_norm": 0.13562940061092377, "learning_rate": 2e-07, "loss": 0.03, "step": 1688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15763960590098525, "grad_norm": 0.12904827296733856, "learning_rate": 2e-07, "loss": -0.0048, "step": 1689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15773293900098584, "grad_norm": 0.13245359063148499, "learning_rate": 2e-07, "loss": -0.0109, "step": 1690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1578262721009864, "grad_norm": 0.12895280122756958, "learning_rate": 2e-07, "loss": 0.009, "step": 1691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.157919605200987, "grad_norm": 0.128696471452713, "learning_rate": 2e-07, "loss": 0.0514, "step": 1692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1580129383009876, "grad_norm": 0.26684674620628357, "learning_rate": 2e-07, "loss": 0.0167, "step": 1693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15810627140098815, "grad_norm": 0.13796038925647736, "learning_rate": 2e-07, "loss": 0.0098, "step": 1694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15819960450098874, "grad_norm": 0.11801528930664062, "learning_rate": 2e-07, "loss": 0.0306, "step": 1695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15829293760098934, "grad_norm": 0.13526540994644165, "learning_rate": 2e-07, "loss": -0.0183, "step": 1696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011579241071428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 612.7501220703125, "completions/mean_terminated_length": 571.9441528320312, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.1583862707009899, "grad_norm": 0.1290045976638794, "learning_rate": 2e-07, "loss": 0.0604, "num_tokens": 1145024370.0, "reward": 0.600045382976532, "reward_std": 0.18511967360973358, "rewards/simpleverify_reward/mean": 0.6000453233718872, "rewards/simpleverify_reward/std": 0.48989295959472656, "step": 1697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1584796038009905, "grad_norm": 0.12889961898326874, "learning_rate": 2e-07, "loss": 0.0152, "step": 1698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15857293690099108, "grad_norm": 0.14831697940826416, "learning_rate": 2e-07, "loss": -0.031, "step": 1699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15866627000099168, "grad_norm": 0.12529629468917847, "learning_rate": 2e-07, "loss": -0.0083, "step": 1700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15875960310099224, "grad_norm": 0.12842491269111633, "learning_rate": 2e-07, "loss": 0.0244, "step": 1701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15885293620099283, "grad_norm": 0.12892888486385345, "learning_rate": 2e-07, "loss": 0.0271, "step": 1702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15894626930099343, "grad_norm": 0.13366591930389404, "learning_rate": 2e-07, "loss": -0.0035, "step": 1703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.159039602400994, "grad_norm": 0.12670020759105682, "learning_rate": 2e-07, "loss": 0.01, "step": 1704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15913293550099458, "grad_norm": 0.125052809715271, "learning_rate": 2e-07, "loss": 0.0182, "step": 1705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15922626860099517, "grad_norm": 0.15874795615673065, "learning_rate": 2e-07, "loss": 0.0309, "step": 1706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15931960170099574, "grad_norm": 0.1257004290819168, "learning_rate": 2e-07, "loss": 0.0212, "step": 1707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15941293480099633, "grad_norm": 0.12522895634174347, "learning_rate": 2e-07, "loss": 0.0208, "step": 1708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15950626790099692, "grad_norm": 0.14015363156795502, "learning_rate": 2e-07, "loss": 0.0433, "step": 1709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1595996010009975, "grad_norm": 0.12972092628479004, "learning_rate": 2e-07, "loss": -0.0161, "step": 1710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15969293410099808, "grad_norm": 0.13276153802871704, "learning_rate": 2e-07, "loss": 0.0178, "step": 1711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15978626720099867, "grad_norm": 0.13770943880081177, "learning_rate": 2e-07, "loss": 0.0452, "step": 1712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15987960030099924, "grad_norm": 0.12955541908740997, "learning_rate": 2e-07, "loss": 0.0334, "step": 1713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.15997293340099983, "grad_norm": 0.12294356524944305, "learning_rate": 2e-07, "loss": 0.0232, "step": 1714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16006626650100042, "grad_norm": 0.12066502869129181, "learning_rate": 2e-07, "loss": 0.0322, "step": 1715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16015959960100098, "grad_norm": 0.1262289434671402, "learning_rate": 2e-07, "loss": -0.0043, "step": 1716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16025293270100158, "grad_norm": 0.14107133448123932, "learning_rate": 2e-07, "loss": 0.0102, "step": 1717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16034626580100217, "grad_norm": 0.14218828082084656, "learning_rate": 2e-07, "loss": 0.0111, "step": 1718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16043959890100276, "grad_norm": 0.14309479296207428, "learning_rate": 2e-07, "loss": 0.0131, "step": 1719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16053293200100333, "grad_norm": 0.1403043270111084, "learning_rate": 2e-07, "loss": 0.1061, "step": 1720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16062626510100392, "grad_norm": 0.13049417734146118, "learning_rate": 2e-07, "loss": 0.0245, "step": 1721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1607195982010045, "grad_norm": 0.1225542277097702, "learning_rate": 2e-07, "loss": 0.0147, "step": 1722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16081293130100507, "grad_norm": 0.13677062094211578, "learning_rate": 2e-07, "loss": 0.0073, "step": 1723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16090626440100567, "grad_norm": 0.1378423124551773, "learning_rate": 2e-07, "loss": 0.0263, "step": 1724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16099959750100626, "grad_norm": 0.13612021505832672, "learning_rate": 2e-07, "loss": 0.0208, "step": 1725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16109293060100682, "grad_norm": 0.13025879859924316, "learning_rate": 2e-07, "loss": 0.0352, "step": 1726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16118626370100742, "grad_norm": 0.144289031624794, "learning_rate": 2e-07, "loss": 0.0164, "step": 1727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.161279596801008, "grad_norm": 0.1390911191701889, "learning_rate": 2e-07, "loss": 0.0224, "step": 1728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16137292990100857, "grad_norm": 0.12302779406309128, "learning_rate": 2e-07, "loss": -0.0004, "step": 1729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16146626300100916, "grad_norm": 0.1456684023141861, "learning_rate": 2e-07, "loss": 0.0059, "step": 1730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16155959610100976, "grad_norm": 0.1347608119249344, "learning_rate": 2e-07, "loss": 0.0385, "step": 1731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16165292920101032, "grad_norm": 0.14202958345413208, "learning_rate": 2e-07, "loss": 0.0581, "step": 1732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1617462623010109, "grad_norm": 0.13702383637428284, "learning_rate": 2e-07, "loss": 0.0365, "step": 1733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1618395954010115, "grad_norm": 0.13873878121376038, "learning_rate": 2e-07, "loss": 0.0259, "step": 1734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16193292850101207, "grad_norm": 0.14012522995471954, "learning_rate": 2e-07, "loss": 0.0488, "step": 1735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16202626160101266, "grad_norm": 0.12661084532737732, "learning_rate": 2e-07, "loss": 0.0148, "step": 1736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16211959470101325, "grad_norm": 0.13942812383174896, "learning_rate": 2e-07, "loss": 0.0093, "step": 1737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16221292780101382, "grad_norm": 0.1348314881324768, "learning_rate": 2e-07, "loss": 0.0025, "step": 1738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1623062609010144, "grad_norm": 0.14213192462921143, "learning_rate": 2e-07, "loss": 0.0373, "step": 1739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.162399594001015, "grad_norm": 0.1281989961862564, "learning_rate": 2e-07, "loss": 0.0614, "step": 1740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1624929271010156, "grad_norm": 0.15294545888900757, "learning_rate": 2e-07, "loss": -0.0002, "step": 1741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16258626020101616, "grad_norm": 0.1489385962486267, "learning_rate": 2e-07, "loss": 0.0383, "step": 1742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16267959330101675, "grad_norm": 0.14791050553321838, "learning_rate": 2e-07, "loss": 0.0546, "step": 1743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16277292640101734, "grad_norm": 0.13886301219463348, "learning_rate": 2e-07, "loss": 0.0453, "step": 1744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1628662595010179, "grad_norm": 0.16963832080364227, "learning_rate": 2e-07, "loss": 0.0487, "step": 1745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1629595926010185, "grad_norm": 0.1556134670972824, "learning_rate": 2e-07, "loss": 0.0122, "step": 1746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1630529257010191, "grad_norm": 0.14118660986423492, "learning_rate": 2e-07, "loss": 0.0362, "step": 1747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16314625880101966, "grad_norm": 0.13312576711177826, "learning_rate": 2e-07, "loss": -0.0247, "step": 1748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16323959190102025, "grad_norm": 0.13028459250926971, "learning_rate": 2e-07, "loss": 0.0041, "step": 1749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16333292500102084, "grad_norm": 0.1503874510526657, "learning_rate": 2e-07, "loss": 0.0951, "step": 1750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1634262581010214, "grad_norm": 0.1632710099220276, "learning_rate": 2e-07, "loss": 0.0346, "step": 1751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.163519591201022, "grad_norm": 0.1434256136417389, "learning_rate": 2e-07, "loss": 0.0327, "step": 1752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1636129243010226, "grad_norm": 0.1334051638841629, "learning_rate": 2e-07, "loss": 0.0567, "step": 1753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16370625740102315, "grad_norm": 0.13846084475517273, "learning_rate": 2e-07, "loss": 0.0048, "step": 1754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16379959050102375, "grad_norm": 0.14348450303077698, "learning_rate": 2e-07, "loss": 0.0007, "step": 1755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16389292360102434, "grad_norm": 0.13149744272232056, "learning_rate": 2e-07, "loss": 0.0598, "step": 1756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1639862567010249, "grad_norm": 0.14540605247020721, "learning_rate": 2e-07, "loss": -0.0143, "step": 1757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1640795898010255, "grad_norm": 0.15157799422740936, "learning_rate": 2e-07, "loss": 0.0368, "step": 1758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16417292290102609, "grad_norm": 0.15297657251358032, "learning_rate": 2e-07, "loss": 0.0196, "step": 1759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16426625600102665, "grad_norm": 0.1308118999004364, "learning_rate": 2e-07, "loss": 0.0305, "step": 1760 }, { "clip_ratio/high_max": 0.018076628795824945, "clip_ratio/high_mean": 0.008838482841383666, "clip_ratio/low_mean": 0.003733100224053487, "clip_ratio/low_min": 0.0004693133814726025, "clip_ratio/region_mean": 0.012571583036333323, "completions/clipped_ratio": 0.011038643973214302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 611.9730834960938, "completions/mean_terminated_length": 573.0847778320312, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.16435958910102724, "grad_norm": 0.10833572596311569, "learning_rate": 2e-07, "loss": 0.0141, "num_tokens": 1185818079.0, "reward": 0.6115373969078064, "reward_std": 0.180352583527565, "rewards/simpleverify_reward/mean": 0.6115373969078064, "rewards/simpleverify_reward/std": 0.4874049425125122, "step": 1761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16445292220102783, "grad_norm": 0.1127336174249649, "learning_rate": 2e-07, "loss": 0.0355, "step": 1762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16454625530102843, "grad_norm": 0.13127727806568146, "learning_rate": 2e-07, "loss": 0.0356, "step": 1763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.164639588401029, "grad_norm": 0.13594135642051697, "learning_rate": 2e-07, "loss": 0.0436, "step": 1764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16473292150102958, "grad_norm": 0.1298554688692093, "learning_rate": 2e-07, "loss": -0.0052, "step": 1765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16482625460103018, "grad_norm": 0.12493173032999039, "learning_rate": 2e-07, "loss": 0.0685, "step": 1766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16491958770103074, "grad_norm": 0.13796378672122955, "learning_rate": 2e-07, "loss": -0.0158, "step": 1767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16501292080103133, "grad_norm": 0.13567331433296204, "learning_rate": 2e-07, "loss": 0.0082, "step": 1768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16510625390103192, "grad_norm": 0.13345091044902802, "learning_rate": 2e-07, "loss": 0.0117, "step": 1769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1651995870010325, "grad_norm": 0.13981086015701294, "learning_rate": 2e-07, "loss": 0.0136, "step": 1770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16529292010103308, "grad_norm": 0.14303749799728394, "learning_rate": 2e-07, "loss": 0.0095, "step": 1771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16538625320103367, "grad_norm": 0.12364780157804489, "learning_rate": 2e-07, "loss": 0.028, "step": 1772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16547958630103424, "grad_norm": 0.14133331179618835, "learning_rate": 2e-07, "loss": 0.0188, "step": 1773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16557291940103483, "grad_norm": 0.13271479308605194, "learning_rate": 2e-07, "loss": -0.0079, "step": 1774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16566625250103542, "grad_norm": 0.12968213856220245, "learning_rate": 2e-07, "loss": 0.0744, "step": 1775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16575958560103599, "grad_norm": 0.13478386402130127, "learning_rate": 2e-07, "loss": 0.0292, "step": 1776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16585291870103658, "grad_norm": 0.13739782571792603, "learning_rate": 2e-07, "loss": 0.022, "step": 1777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16594625180103717, "grad_norm": 0.15036891400814056, "learning_rate": 2e-07, "loss": -0.0148, "step": 1778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16603958490103773, "grad_norm": 0.12279122322797775, "learning_rate": 2e-07, "loss": 0.0167, "step": 1779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16613291800103833, "grad_norm": 0.14099600911140442, "learning_rate": 2e-07, "loss": 0.0456, "step": 1780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16622625110103892, "grad_norm": 0.17009855806827545, "learning_rate": 2e-07, "loss": 0.0197, "step": 1781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1663195842010395, "grad_norm": 0.13186194002628326, "learning_rate": 2e-07, "loss": 0.0417, "step": 1782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16641291730104008, "grad_norm": 0.12062453478574753, "learning_rate": 2e-07, "loss": -0.0157, "step": 1783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16650625040104067, "grad_norm": 0.1374969184398651, "learning_rate": 2e-07, "loss": 0.0262, "step": 1784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16659958350104126, "grad_norm": 0.1265432983636856, "learning_rate": 2e-07, "loss": 0.0123, "step": 1785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16669291660104182, "grad_norm": 0.15387628972530365, "learning_rate": 2e-07, "loss": 0.0475, "step": 1786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16678624970104242, "grad_norm": 0.13528841733932495, "learning_rate": 2e-07, "loss": 0.0213, "step": 1787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.166879582801043, "grad_norm": 0.1590741127729416, "learning_rate": 2e-07, "loss": 0.02, "step": 1788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16697291590104357, "grad_norm": 0.14132629334926605, "learning_rate": 2e-07, "loss": 0.025, "step": 1789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16706624900104416, "grad_norm": 0.13157938420772552, "learning_rate": 2e-07, "loss": -0.004, "step": 1790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16715958210104476, "grad_norm": 0.1383260041475296, "learning_rate": 2e-07, "loss": 0.039, "step": 1791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16725291520104532, "grad_norm": 0.12811826169490814, "learning_rate": 2e-07, "loss": 0.0213, "step": 1792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1673462483010459, "grad_norm": 0.14032649993896484, "learning_rate": 2e-07, "loss": 0.0318, "step": 1793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1674395814010465, "grad_norm": 0.13948988914489746, "learning_rate": 2e-07, "loss": 0.0479, "step": 1794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16753291450104707, "grad_norm": 0.14622651040554047, "learning_rate": 2e-07, "loss": 0.0349, "step": 1795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16762624760104766, "grad_norm": 0.14201799035072327, "learning_rate": 2e-07, "loss": 0.011, "step": 1796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16771958070104825, "grad_norm": 0.13881975412368774, "learning_rate": 2e-07, "loss": 0.0214, "step": 1797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16781291380104882, "grad_norm": 0.13623951375484467, "learning_rate": 2e-07, "loss": 0.0196, "step": 1798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1679062469010494, "grad_norm": 0.15504132211208344, "learning_rate": 2e-07, "loss": 0.0214, "step": 1799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16799958000105, "grad_norm": 0.13203254342079163, "learning_rate": 2e-07, "loss": -0.0011, "step": 1800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16809291310105057, "grad_norm": 0.14779958128929138, "learning_rate": 2e-07, "loss": 0.0437, "step": 1801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16818624620105116, "grad_norm": 0.13587374985218048, "learning_rate": 2e-07, "loss": 0.0465, "step": 1802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16827957930105175, "grad_norm": 0.14497019350528717, "learning_rate": 2e-07, "loss": 0.0636, "step": 1803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16837291240105234, "grad_norm": 0.1412143111228943, "learning_rate": 2e-07, "loss": 0.0519, "step": 1804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1684662455010529, "grad_norm": 0.13916926085948944, "learning_rate": 2e-07, "loss": 0.0206, "step": 1805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1685595786010535, "grad_norm": 0.13273264467716217, "learning_rate": 2e-07, "loss": 0.0105, "step": 1806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1686529117010541, "grad_norm": 0.13299500942230225, "learning_rate": 2e-07, "loss": 0.0405, "step": 1807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16874624480105466, "grad_norm": 0.16355222463607788, "learning_rate": 2e-07, "loss": 0.0545, "step": 1808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16883957790105525, "grad_norm": 0.14443746209144592, "learning_rate": 2e-07, "loss": 0.0568, "step": 1809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16893291100105584, "grad_norm": 0.13638895750045776, "learning_rate": 2e-07, "loss": 0.0095, "step": 1810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1690262441010564, "grad_norm": 0.13642951846122742, "learning_rate": 2e-07, "loss": 0.0211, "step": 1811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.169119577201057, "grad_norm": 0.14684197306632996, "learning_rate": 2e-07, "loss": 0.0252, "step": 1812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1692129103010576, "grad_norm": 0.15036767721176147, "learning_rate": 2e-07, "loss": 0.0117, "step": 1813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16930624340105815, "grad_norm": 0.12976665794849396, "learning_rate": 2e-07, "loss": 0.0508, "step": 1814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16939957650105875, "grad_norm": 0.12477904558181763, "learning_rate": 2e-07, "loss": 0.0155, "step": 1815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16949290960105934, "grad_norm": 0.14248622953891754, "learning_rate": 2e-07, "loss": 0.0571, "step": 1816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1695862427010599, "grad_norm": 0.1477234810590744, "learning_rate": 2e-07, "loss": 0.007, "step": 1817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1696795758010605, "grad_norm": 0.1388540118932724, "learning_rate": 2e-07, "loss": 0.0404, "step": 1818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1697729089010611, "grad_norm": 0.13827331364154816, "learning_rate": 2e-07, "loss": 0.0086, "step": 1819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16986624200106165, "grad_norm": 0.1328304260969162, "learning_rate": 2e-07, "loss": 0.0031, "step": 1820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.16995957510106224, "grad_norm": 0.16423270106315613, "learning_rate": 2e-07, "loss": 0.0478, "step": 1821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17005290820106284, "grad_norm": 0.17471261322498322, "learning_rate": 2e-07, "loss": 0.077, "step": 1822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17014624130106343, "grad_norm": 0.14542202651500702, "learning_rate": 2e-07, "loss": 0.0019, "step": 1823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.170239574401064, "grad_norm": 0.14537037909030914, "learning_rate": 2e-07, "loss": -0.0299, "step": 1824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009905133928571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 617.5223388671875, "completions/mean_terminated_length": 582.7228393554688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.17033290750106458, "grad_norm": 0.12343145161867142, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 1226943024.0, "reward": 0.6091831922531128, "reward_std": 0.18122167885303497, "rewards/simpleverify_reward/mean": 0.6091831922531128, "rewards/simpleverify_reward/std": 0.48793768882751465, "step": 1825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17042624060106518, "grad_norm": 0.12463295459747314, "learning_rate": 2e-07, "loss": 0.1004, "step": 1826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17051957370106574, "grad_norm": 0.12601059675216675, "learning_rate": 2e-07, "loss": 0.0171, "step": 1827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17061290680106633, "grad_norm": 0.1419769674539566, "learning_rate": 2e-07, "loss": -0.0393, "step": 1828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17070623990106693, "grad_norm": 0.13415008783340454, "learning_rate": 2e-07, "loss": -0.0069, "step": 1829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1707995730010675, "grad_norm": 0.13519296050071716, "learning_rate": 2e-07, "loss": -0.0246, "step": 1830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17089290610106808, "grad_norm": 0.13976716995239258, "learning_rate": 2e-07, "loss": 0.0529, "step": 1831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17098623920106867, "grad_norm": 0.13812245428562164, "learning_rate": 2e-07, "loss": 0.0206, "step": 1832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17107957230106924, "grad_norm": 0.13825631141662598, "learning_rate": 2e-07, "loss": 0.0094, "step": 1833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17117290540106983, "grad_norm": 0.14670385420322418, "learning_rate": 2e-07, "loss": 0.041, "step": 1834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17126623850107042, "grad_norm": 0.12002365291118622, "learning_rate": 2e-07, "loss": -0.0302, "step": 1835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.171359571601071, "grad_norm": 0.15034233033657074, "learning_rate": 2e-07, "loss": 0.0327, "step": 1836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17145290470107158, "grad_norm": 0.14089319109916687, "learning_rate": 2e-07, "loss": 0.0384, "step": 1837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17154623780107217, "grad_norm": 0.1912098526954651, "learning_rate": 2e-07, "loss": 0.0373, "step": 1838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17163957090107274, "grad_norm": 0.12161079794168472, "learning_rate": 2e-07, "loss": 0.023, "step": 1839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17173290400107333, "grad_norm": 0.14579805731773376, "learning_rate": 2e-07, "loss": 0.0029, "step": 1840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17182623710107392, "grad_norm": 0.12602153420448303, "learning_rate": 2e-07, "loss": 0.0902, "step": 1841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17191957020107448, "grad_norm": 0.13122965395450592, "learning_rate": 2e-07, "loss": -0.0086, "step": 1842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17201290330107508, "grad_norm": 0.14741212129592896, "learning_rate": 2e-07, "loss": 0.0245, "step": 1843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17210623640107567, "grad_norm": 0.1368563175201416, "learning_rate": 2e-07, "loss": 0.0862, "step": 1844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17219956950107626, "grad_norm": 0.13294988870620728, "learning_rate": 2e-07, "loss": 0.0676, "step": 1845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17229290260107682, "grad_norm": 0.1595952808856964, "learning_rate": 2e-07, "loss": 0.0328, "step": 1846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17238623570107742, "grad_norm": 0.1500539928674698, "learning_rate": 2e-07, "loss": 0.0238, "step": 1847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.172479568801078, "grad_norm": 0.14594227075576782, "learning_rate": 2e-07, "loss": 0.0801, "step": 1848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17257290190107857, "grad_norm": 0.13235926628112793, "learning_rate": 2e-07, "loss": 0.0286, "step": 1849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17266623500107917, "grad_norm": 0.12428905814886093, "learning_rate": 2e-07, "loss": 0.0015, "step": 1850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17275956810107976, "grad_norm": 0.1370844841003418, "learning_rate": 2e-07, "loss": 0.0355, "step": 1851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17285290120108032, "grad_norm": 0.1450466364622116, "learning_rate": 2e-07, "loss": -0.0069, "step": 1852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17294623430108091, "grad_norm": 0.15126030147075653, "learning_rate": 2e-07, "loss": -0.0009, "step": 1853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1730395674010815, "grad_norm": 0.17182661592960358, "learning_rate": 2e-07, "loss": 0.02, "step": 1854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17313290050108207, "grad_norm": 0.14480751752853394, "learning_rate": 2e-07, "loss": 0.0011, "step": 1855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17322623360108266, "grad_norm": 0.16947631537914276, "learning_rate": 2e-07, "loss": 0.0086, "step": 1856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17331956670108326, "grad_norm": 0.13966505229473114, "learning_rate": 2e-07, "loss": 0.0229, "step": 1857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17341289980108382, "grad_norm": 0.136332705616951, "learning_rate": 2e-07, "loss": 0.0121, "step": 1858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1735062329010844, "grad_norm": 0.1282629668712616, "learning_rate": 2e-07, "loss": 0.0134, "step": 1859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.173599566001085, "grad_norm": 0.12012812495231628, "learning_rate": 2e-07, "loss": -0.0126, "step": 1860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17369289910108557, "grad_norm": 0.14423710107803345, "learning_rate": 2e-07, "loss": 0.0481, "step": 1861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17378623220108616, "grad_norm": 0.15105998516082764, "learning_rate": 2e-07, "loss": 0.0624, "step": 1862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17387956530108675, "grad_norm": 0.1641751080751419, "learning_rate": 2e-07, "loss": 0.059, "step": 1863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17397289840108732, "grad_norm": 0.1319325864315033, "learning_rate": 2e-07, "loss": 0.0417, "step": 1864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1740662315010879, "grad_norm": 0.1350196897983551, "learning_rate": 2e-07, "loss": 0.0371, "step": 1865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1741595646010885, "grad_norm": 0.14809785783290863, "learning_rate": 2e-07, "loss": 0.0168, "step": 1866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1742528977010891, "grad_norm": 0.12649235129356384, "learning_rate": 2e-07, "loss": -0.0114, "step": 1867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17434623080108966, "grad_norm": 0.14483027160167694, "learning_rate": 2e-07, "loss": 0.0109, "step": 1868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17443956390109025, "grad_norm": 0.13062791526317596, "learning_rate": 2e-07, "loss": 0.009, "step": 1869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17453289700109084, "grad_norm": 0.13055458664894104, "learning_rate": 2e-07, "loss": 0.0324, "step": 1870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1746262301010914, "grad_norm": 0.14936086535453796, "learning_rate": 2e-07, "loss": 0.0341, "step": 1871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.174719563201092, "grad_norm": 0.1262301653623581, "learning_rate": 2e-07, "loss": 0.0143, "step": 1872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1748128963010926, "grad_norm": 0.13482236862182617, "learning_rate": 2e-07, "loss": -0.0176, "step": 1873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17490622940109316, "grad_norm": 0.1613515168428421, "learning_rate": 2e-07, "loss": 0.0465, "step": 1874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17499956250109375, "grad_norm": 0.15323977172374725, "learning_rate": 2e-07, "loss": 0.0259, "step": 1875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17509289560109434, "grad_norm": 0.15485461056232452, "learning_rate": 2e-07, "loss": 0.0373, "step": 1876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1751862287010949, "grad_norm": 0.13084079325199127, "learning_rate": 2e-07, "loss": 0.0189, "step": 1877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1752795618010955, "grad_norm": 0.14444264769554138, "learning_rate": 2e-07, "loss": 0.0516, "step": 1878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1753728949010961, "grad_norm": 0.1359085738658905, "learning_rate": 2e-07, "loss": 0.0212, "step": 1879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17546622800109665, "grad_norm": 0.14733566343784332, "learning_rate": 2e-07, "loss": 0.0036, "step": 1880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17555956110109724, "grad_norm": 0.1420356184244156, "learning_rate": 2e-07, "loss": 0.0256, "step": 1881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17565289420109784, "grad_norm": 0.1636306196451187, "learning_rate": 2e-07, "loss": 0.015, "step": 1882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1757462273010984, "grad_norm": 0.14768120646476746, "learning_rate": 2e-07, "loss": 0.0305, "step": 1883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.175839560401099, "grad_norm": 0.16058096289634705, "learning_rate": 2e-07, "loss": -0.0201, "step": 1884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17593289350109959, "grad_norm": 0.14790402352809906, "learning_rate": 2e-07, "loss": 0.0167, "step": 1885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17602622660110018, "grad_norm": 0.13940365612506866, "learning_rate": 2e-07, "loss": 0.0464, "step": 1886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17611955970110074, "grad_norm": 0.13371802866458893, "learning_rate": 2e-07, "loss": 0.0241, "step": 1887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17621289280110133, "grad_norm": 0.14206700026988983, "learning_rate": 2e-07, "loss": -0.0048, "step": 1888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010637555803571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 632.0046997070312, "completions/mean_terminated_length": 594.760009765625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.17630622590110193, "grad_norm": 0.12968692183494568, "learning_rate": 2e-07, "loss": 0.0453, "num_tokens": 1268941588.0, "reward": 0.6137521266937256, "reward_std": 0.17672185599803925, "rewards/simpleverify_reward/mean": 0.6137520670890808, "rewards/simpleverify_reward/std": 0.4868928790092468, "step": 1889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1763995590011025, "grad_norm": 0.1270974576473236, "learning_rate": 2e-07, "loss": -0.0061, "step": 1890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17649289210110308, "grad_norm": 0.12535491585731506, "learning_rate": 2e-07, "loss": 0.0367, "step": 1891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17658622520110367, "grad_norm": 0.12181065231561661, "learning_rate": 2e-07, "loss": 0.0281, "step": 1892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17667955830110424, "grad_norm": 0.13414418697357178, "learning_rate": 2e-07, "loss": 0.0629, "step": 1893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17677289140110483, "grad_norm": 0.13094033300876617, "learning_rate": 2e-07, "loss": 0.0186, "step": 1894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17686622450110542, "grad_norm": 0.15238836407661438, "learning_rate": 2e-07, "loss": -0.0039, "step": 1895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.176959557601106, "grad_norm": 0.1314670443534851, "learning_rate": 2e-07, "loss": 0.005, "step": 1896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17705289070110658, "grad_norm": 0.14410607516765594, "learning_rate": 2e-07, "loss": -0.0036, "step": 1897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17714622380110717, "grad_norm": 0.1851828396320343, "learning_rate": 2e-07, "loss": 0.059, "step": 1898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17723955690110774, "grad_norm": 0.12756502628326416, "learning_rate": 2e-07, "loss": -0.01, "step": 1899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17733289000110833, "grad_norm": 0.12007372826337814, "learning_rate": 2e-07, "loss": 0.0301, "step": 1900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17742622310110892, "grad_norm": 0.1303112953901291, "learning_rate": 2e-07, "loss": 0.0174, "step": 1901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17751955620110949, "grad_norm": 0.1678982675075531, "learning_rate": 2e-07, "loss": 0.0266, "step": 1902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17761288930111008, "grad_norm": 0.1291574388742447, "learning_rate": 2e-07, "loss": 0.0172, "step": 1903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17770622240111067, "grad_norm": 0.1233028993010521, "learning_rate": 2e-07, "loss": -0.0103, "step": 1904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17779955550111123, "grad_norm": 0.13970455527305603, "learning_rate": 2e-07, "loss": -0.0202, "step": 1905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17789288860111183, "grad_norm": 0.13417783379554749, "learning_rate": 2e-07, "loss": 0.0313, "step": 1906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17798622170111242, "grad_norm": 0.1270092874765396, "learning_rate": 2e-07, "loss": 0.0073, "step": 1907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.178079554801113, "grad_norm": 0.14816221594810486, "learning_rate": 2e-07, "loss": 0.0441, "step": 1908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17817288790111357, "grad_norm": 0.139995738863945, "learning_rate": 2e-07, "loss": 0.0535, "step": 1909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17826622100111417, "grad_norm": 0.13110674917697906, "learning_rate": 2e-07, "loss": 0.014, "step": 1910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17835955410111476, "grad_norm": 0.14220739901065826, "learning_rate": 2e-07, "loss": 0.0105, "step": 1911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17845288720111532, "grad_norm": 0.1262693703174591, "learning_rate": 2e-07, "loss": 0.0082, "step": 1912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17854622030111592, "grad_norm": 0.13542824983596802, "learning_rate": 2e-07, "loss": 0.0486, "step": 1913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1786395534011165, "grad_norm": 0.13898195326328278, "learning_rate": 2e-07, "loss": 0.0035, "step": 1914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17873288650111707, "grad_norm": 0.1645851880311966, "learning_rate": 2e-07, "loss": 0.0194, "step": 1915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17882621960111766, "grad_norm": 0.13773660361766815, "learning_rate": 2e-07, "loss": 0.0151, "step": 1916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17891955270111826, "grad_norm": 0.1356283575296402, "learning_rate": 2e-07, "loss": -0.0031, "step": 1917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17901288580111882, "grad_norm": 0.11995309591293335, "learning_rate": 2e-07, "loss": 0.0519, "step": 1918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1791062189011194, "grad_norm": 0.13299036026000977, "learning_rate": 2e-07, "loss": 0.0273, "step": 1919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17919955200112, "grad_norm": 0.13857421278953552, "learning_rate": 2e-07, "loss": 0.0211, "step": 1920 }, { "clip_ratio/high_max": 0.01674611441558227, "clip_ratio/high_mean": 0.007233279859065078, "clip_ratio/low_mean": 0.0036107119158259593, "clip_ratio/low_min": 0.0003255704759794753, "clip_ratio/region_mean": 0.010843991651199758, "completions/clipped_ratio": 0.008335658482142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 619.3406372070312, "completions/mean_terminated_length": 590.1167602539062, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.17929288510112057, "grad_norm": 0.09886615723371506, "learning_rate": 2e-07, "loss": 0.0146, "num_tokens": 1310170984.0, "reward": 0.6195417642593384, "reward_std": 0.17761610448360443, "rewards/simpleverify_reward/mean": 0.6195417046546936, "rewards/simpleverify_reward/std": 0.48550376296043396, "step": 1921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17938621820112116, "grad_norm": 0.12264218926429749, "learning_rate": 2e-07, "loss": 0.0389, "step": 1922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17947955130112175, "grad_norm": 0.13081879913806915, "learning_rate": 2e-07, "loss": 0.0346, "step": 1923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17957288440112232, "grad_norm": 0.1340312957763672, "learning_rate": 2e-07, "loss": 0.0165, "step": 1924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1796662175011229, "grad_norm": 0.1363367736339569, "learning_rate": 2e-07, "loss": 0.0437, "step": 1925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1797595506011235, "grad_norm": 0.14061513543128967, "learning_rate": 2e-07, "loss": 0.0012, "step": 1926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1798528837011241, "grad_norm": 0.13663895428180695, "learning_rate": 2e-07, "loss": -0.0332, "step": 1927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.17994621680112466, "grad_norm": 0.13007336854934692, "learning_rate": 2e-07, "loss": 0.0286, "step": 1928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18003954990112525, "grad_norm": 0.12456807494163513, "learning_rate": 2e-07, "loss": -0.0047, "step": 1929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18013288300112584, "grad_norm": 0.14594756066799164, "learning_rate": 2e-07, "loss": 0.028, "step": 1930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1802262161011264, "grad_norm": 0.1262948215007782, "learning_rate": 2e-07, "loss": -0.0199, "step": 1931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.180319549201127, "grad_norm": 0.12121875584125519, "learning_rate": 2e-07, "loss": 0.01, "step": 1932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1804128823011276, "grad_norm": 0.1400436908006668, "learning_rate": 2e-07, "loss": 0.0231, "step": 1933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18050621540112816, "grad_norm": 0.13571596145629883, "learning_rate": 2e-07, "loss": -0.0381, "step": 1934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18059954850112875, "grad_norm": 0.12717264890670776, "learning_rate": 2e-07, "loss": 0.0003, "step": 1935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18069288160112934, "grad_norm": 0.11864596605300903, "learning_rate": 2e-07, "loss": 0.0501, "step": 1936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1807862147011299, "grad_norm": 0.16038836538791656, "learning_rate": 2e-07, "loss": 0.051, "step": 1937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1808795478011305, "grad_norm": 0.12937262654304504, "learning_rate": 2e-07, "loss": 0.0376, "step": 1938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1809728809011311, "grad_norm": 0.1536262035369873, "learning_rate": 2e-07, "loss": 0.0422, "step": 1939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18106621400113165, "grad_norm": 0.12450678646564484, "learning_rate": 2e-07, "loss": 0.0078, "step": 1940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18115954710113225, "grad_norm": 0.1392851024866104, "learning_rate": 2e-07, "loss": 0.0366, "step": 1941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18125288020113284, "grad_norm": 0.12198419123888016, "learning_rate": 2e-07, "loss": -0.0018, "step": 1942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1813462133011334, "grad_norm": 0.1918262392282486, "learning_rate": 2e-07, "loss": 0.049, "step": 1943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.181439546401134, "grad_norm": 0.13931629061698914, "learning_rate": 2e-07, "loss": 0.0782, "step": 1944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1815328795011346, "grad_norm": 0.15207862854003906, "learning_rate": 2e-07, "loss": 0.0325, "step": 1945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18162621260113515, "grad_norm": 0.11981978267431259, "learning_rate": 2e-07, "loss": 0.0334, "step": 1946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18171954570113574, "grad_norm": 0.14882461726665497, "learning_rate": 2e-07, "loss": 0.0173, "step": 1947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18181287880113634, "grad_norm": 0.13769622147083282, "learning_rate": 2e-07, "loss": 0.0025, "step": 1948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18190621190113693, "grad_norm": 0.18945784866809845, "learning_rate": 2e-07, "loss": 0.0167, "step": 1949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1819995450011375, "grad_norm": 0.1421399563550949, "learning_rate": 2e-07, "loss": -0.0259, "step": 1950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18209287810113808, "grad_norm": 0.13331574201583862, "learning_rate": 2e-07, "loss": -0.0295, "step": 1951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18218621120113868, "grad_norm": 0.14329425990581512, "learning_rate": 2e-07, "loss": 0.0432, "step": 1952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18227954430113924, "grad_norm": 0.14803071320056915, "learning_rate": 2e-07, "loss": 0.013, "step": 1953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18237287740113983, "grad_norm": 0.16635844111442566, "learning_rate": 2e-07, "loss": 0.0408, "step": 1954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18246621050114042, "grad_norm": 0.13200974464416504, "learning_rate": 2e-07, "loss": 0.0292, "step": 1955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.182559543601141, "grad_norm": 0.1481582671403885, "learning_rate": 2e-07, "loss": 0.0122, "step": 1956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18265287670114158, "grad_norm": 0.1292586624622345, "learning_rate": 2e-07, "loss": 0.0407, "step": 1957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18274620980114217, "grad_norm": 0.13599826395511627, "learning_rate": 2e-07, "loss": 0.0012, "step": 1958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18283954290114274, "grad_norm": 0.14243608713150024, "learning_rate": 2e-07, "loss": 0.0235, "step": 1959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18293287600114333, "grad_norm": 0.12822553515434265, "learning_rate": 2e-07, "loss": 0.0219, "step": 1960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18302620910114392, "grad_norm": 0.1303199827671051, "learning_rate": 2e-07, "loss": 0.0244, "step": 1961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1831195422011445, "grad_norm": 0.1413554698228836, "learning_rate": 2e-07, "loss": 0.0261, "step": 1962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18321287530114508, "grad_norm": 0.14116507768630981, "learning_rate": 2e-07, "loss": -0.0197, "step": 1963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18330620840114567, "grad_norm": 0.11513558775186539, "learning_rate": 2e-07, "loss": 0.0384, "step": 1964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18339954150114623, "grad_norm": 0.1423267126083374, "learning_rate": 2e-07, "loss": 0.0146, "step": 1965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18349287460114683, "grad_norm": 0.1290888786315918, "learning_rate": 2e-07, "loss": -0.0045, "step": 1966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18358620770114742, "grad_norm": 0.1253574788570404, "learning_rate": 2e-07, "loss": 0.0003, "step": 1967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18367954080114798, "grad_norm": 0.15042288601398468, "learning_rate": 2e-07, "loss": 0.0353, "step": 1968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18377287390114858, "grad_norm": 0.1410323530435562, "learning_rate": 2e-07, "loss": 0.0099, "step": 1969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18386620700114917, "grad_norm": 0.1407528519630432, "learning_rate": 2e-07, "loss": 0.0243, "step": 1970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18395954010114976, "grad_norm": 0.14316454529762268, "learning_rate": 2e-07, "loss": 0.074, "step": 1971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18405287320115032, "grad_norm": 0.13648678362369537, "learning_rate": 2e-07, "loss": 0.0411, "step": 1972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18414620630115092, "grad_norm": 0.16273942589759827, "learning_rate": 2e-07, "loss": 0.0054, "step": 1973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1842395394011515, "grad_norm": 0.14621920883655548, "learning_rate": 2e-07, "loss": 0.0527, "step": 1974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18433287250115207, "grad_norm": 0.17995776236057281, "learning_rate": 2e-07, "loss": -0.01, "step": 1975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18442620560115267, "grad_norm": 0.1395529806613922, "learning_rate": 2e-07, "loss": 0.0605, "step": 1976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18451953870115326, "grad_norm": 0.136857271194458, "learning_rate": 2e-07, "loss": 0.0215, "step": 1977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18461287180115382, "grad_norm": 0.13114769756793976, "learning_rate": 2e-07, "loss": 0.0209, "step": 1978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1847062049011544, "grad_norm": 0.1532019078731537, "learning_rate": 2e-07, "loss": 0.057, "step": 1979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.184799538001155, "grad_norm": 0.13745979964733124, "learning_rate": 2e-07, "loss": 0.0009, "step": 1980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18489287110115557, "grad_norm": 0.14160163700580597, "learning_rate": 2e-07, "loss": 0.0729, "step": 1981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18498620420115616, "grad_norm": 0.1361771821975708, "learning_rate": 2e-07, "loss": 0.0142, "step": 1982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18507953730115675, "grad_norm": 0.14404182136058807, "learning_rate": 2e-07, "loss": 0.0106, "step": 1983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18517287040115732, "grad_norm": 0.13711047172546387, "learning_rate": 2e-07, "loss": -0.0038, "step": 1984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009381975446428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 626.4524536132812, "completions/mean_terminated_length": 593.5928344726562, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.1852662035011579, "grad_norm": 0.12949836254119873, "learning_rate": 2e-07, "loss": -0.0001, "num_tokens": 1351795941.0, "reward": 0.6247907876968384, "reward_std": 0.17726653814315796, "rewards/simpleverify_reward/mean": 0.6247907280921936, "rewards/simpleverify_reward/std": 0.48418107628822327, "step": 1985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1853595366011585, "grad_norm": 0.13212190568447113, "learning_rate": 2e-07, "loss": -0.0413, "step": 1986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18545286970115907, "grad_norm": 0.14804911613464355, "learning_rate": 2e-07, "loss": 0.0191, "step": 1987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18554620280115966, "grad_norm": 0.13002555072307587, "learning_rate": 2e-07, "loss": -0.0198, "step": 1988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18563953590116025, "grad_norm": 0.14281854033470154, "learning_rate": 2e-07, "loss": 0.0198, "step": 1989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18573286900116084, "grad_norm": 0.12832076847553253, "learning_rate": 2e-07, "loss": 0.0755, "step": 1990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1858262021011614, "grad_norm": 0.12089355289936066, "learning_rate": 2e-07, "loss": 0.0357, "step": 1991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.185919535201162, "grad_norm": 0.14158028364181519, "learning_rate": 2e-07, "loss": -0.0045, "step": 1992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1860128683011626, "grad_norm": 0.12692396342754364, "learning_rate": 2e-07, "loss": 0.0369, "step": 1993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18610620140116316, "grad_norm": 0.1321793794631958, "learning_rate": 2e-07, "loss": 0.0077, "step": 1994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18619953450116375, "grad_norm": 0.12042239308357239, "learning_rate": 2e-07, "loss": -0.0038, "step": 1995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18629286760116434, "grad_norm": 0.1490997076034546, "learning_rate": 2e-07, "loss": 0.0656, "step": 1996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1863862007011649, "grad_norm": 0.13332825899124146, "learning_rate": 2e-07, "loss": 0.0277, "step": 1997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1864795338011655, "grad_norm": 0.13467949628829956, "learning_rate": 2e-07, "loss": 0.0452, "step": 1998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1865728669011661, "grad_norm": 0.13089565932750702, "learning_rate": 2e-07, "loss": 0.0751, "step": 1999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18666620000116665, "grad_norm": 0.14168663322925568, "learning_rate": 2e-07, "loss": 0.0361, "step": 2000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18675953310116725, "grad_norm": 0.13525070250034332, "learning_rate": 2e-07, "loss": 0.066, "step": 2001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18685286620116784, "grad_norm": 0.13899831473827362, "learning_rate": 2e-07, "loss": 0.0526, "step": 2002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1869461993011684, "grad_norm": 0.1302626132965088, "learning_rate": 2e-07, "loss": 0.0542, "step": 2003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.187039532401169, "grad_norm": 0.1303328573703766, "learning_rate": 2e-07, "loss": 0.0404, "step": 2004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1871328655011696, "grad_norm": 0.11603037267923355, "learning_rate": 2e-07, "loss": 0.0121, "step": 2005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18722619860117015, "grad_norm": 0.16481482982635498, "learning_rate": 2e-07, "loss": -0.0204, "step": 2006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18731953170117074, "grad_norm": 0.13323041796684265, "learning_rate": 2e-07, "loss": 0.0204, "step": 2007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18741286480117134, "grad_norm": 0.13928939402103424, "learning_rate": 2e-07, "loss": 0.0451, "step": 2008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1875061979011719, "grad_norm": 0.1318858414888382, "learning_rate": 2e-07, "loss": 0.0075, "step": 2009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1875995310011725, "grad_norm": 0.12346776574850082, "learning_rate": 2e-07, "loss": -0.0176, "step": 2010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18769286410117308, "grad_norm": 0.1380702406167984, "learning_rate": 2e-07, "loss": -0.0018, "step": 2011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18778619720117368, "grad_norm": 0.1435062736272812, "learning_rate": 2e-07, "loss": 0.0031, "step": 2012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18787953030117424, "grad_norm": 0.11607632040977478, "learning_rate": 2e-07, "loss": 0.034, "step": 2013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18797286340117483, "grad_norm": 0.14182893931865692, "learning_rate": 2e-07, "loss": -0.0043, "step": 2014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18806619650117543, "grad_norm": 0.127472922205925, "learning_rate": 2e-07, "loss": 0.0377, "step": 2015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.188159529601176, "grad_norm": 0.13234007358551025, "learning_rate": 2e-07, "loss": 0.0441, "step": 2016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18825286270117658, "grad_norm": 0.1482890546321869, "learning_rate": 2e-07, "loss": 0.0209, "step": 2017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18834619580117717, "grad_norm": 0.13471786677837372, "learning_rate": 2e-07, "loss": 0.0245, "step": 2018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18843952890117774, "grad_norm": 0.1459258496761322, "learning_rate": 2e-07, "loss": -0.0077, "step": 2019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18853286200117833, "grad_norm": 0.1374923288822174, "learning_rate": 2e-07, "loss": 0.0266, "step": 2020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18862619510117892, "grad_norm": 0.15669749677181244, "learning_rate": 2e-07, "loss": 0.0346, "step": 2021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1887195282011795, "grad_norm": 0.13293330371379852, "learning_rate": 2e-07, "loss": 0.009, "step": 2022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18881286130118008, "grad_norm": 0.13252577185630798, "learning_rate": 2e-07, "loss": 0.0119, "step": 2023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18890619440118067, "grad_norm": 0.15301652252674103, "learning_rate": 2e-07, "loss": 0.0378, "step": 2024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18899952750118124, "grad_norm": 0.22243423759937286, "learning_rate": 2e-07, "loss": 0.0204, "step": 2025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18909286060118183, "grad_norm": 0.2164534330368042, "learning_rate": 2e-07, "loss": 0.0202, "step": 2026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18918619370118242, "grad_norm": 0.11972451210021973, "learning_rate": 2e-07, "loss": 0.059, "step": 2027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18927952680118298, "grad_norm": 0.20433871448040009, "learning_rate": 2e-07, "loss": -0.0025, "step": 2028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18937285990118358, "grad_norm": 0.13868603110313416, "learning_rate": 2e-07, "loss": 0.055, "step": 2029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18946619300118417, "grad_norm": 0.12149716168642044, "learning_rate": 2e-07, "loss": 0.0281, "step": 2030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18955952610118473, "grad_norm": 0.13556385040283203, "learning_rate": 2e-07, "loss": -0.0038, "step": 2031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18965285920118533, "grad_norm": 0.12901639938354492, "learning_rate": 2e-07, "loss": 0.0028, "step": 2032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18974619230118592, "grad_norm": 0.14690649509429932, "learning_rate": 2e-07, "loss": -0.0078, "step": 2033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1898395254011865, "grad_norm": 0.12696930766105652, "learning_rate": 2e-07, "loss": 0.0937, "step": 2034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.18993285850118707, "grad_norm": 0.13723741471767426, "learning_rate": 2e-07, "loss": 0.0676, "step": 2035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19002619160118767, "grad_norm": 0.2401839941740036, "learning_rate": 2e-07, "loss": 0.0778, "step": 2036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19011952470118826, "grad_norm": 0.13668695092201233, "learning_rate": 2e-07, "loss": 0.0071, "step": 2037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19021285780118882, "grad_norm": 0.14171245694160461, "learning_rate": 2e-07, "loss": -0.0104, "step": 2038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19030619090118941, "grad_norm": 0.13272352516651154, "learning_rate": 2e-07, "loss": -0.0116, "step": 2039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19039952400119, "grad_norm": 0.17398177087306976, "learning_rate": 2e-07, "loss": 0.0223, "step": 2040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19049285710119057, "grad_norm": 0.11527741700410843, "learning_rate": 2e-07, "loss": -0.0561, "step": 2041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19058619020119116, "grad_norm": 0.1422957479953766, "learning_rate": 2e-07, "loss": 0.0371, "step": 2042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19067952330119176, "grad_norm": 0.12242025136947632, "learning_rate": 2e-07, "loss": 0.0418, "step": 2043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19077285640119232, "grad_norm": 0.1306590735912323, "learning_rate": 2e-07, "loss": 0.0283, "step": 2044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1908661895011929, "grad_norm": 0.12630999088287354, "learning_rate": 2e-07, "loss": 0.0125, "step": 2045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1909595226011935, "grad_norm": 0.14573551714420319, "learning_rate": 2e-07, "loss": 0.0296, "step": 2046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19105285570119407, "grad_norm": 0.12549155950546265, "learning_rate": 2e-07, "loss": 0.0583, "step": 2047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19114618880119466, "grad_norm": 0.12897123396396637, "learning_rate": 2e-07, "loss": 0.0142, "step": 2048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008980887276785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 621.1405029296875, "completions/mean_terminated_length": 589.6502685546875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.19123952190119525, "grad_norm": 0.13816216588020325, "learning_rate": 2e-07, "loss": 0.0352, "num_tokens": 1393137939.0, "reward": 0.6235700845718384, "reward_std": 0.17483706772327423, "rewards/simpleverify_reward/mean": 0.6235700249671936, "rewards/simpleverify_reward/std": 0.48449406027793884, "step": 2049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19133285500119582, "grad_norm": 0.13441555202007294, "learning_rate": 2e-07, "loss": 0.0308, "step": 2050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1914261881011964, "grad_norm": 0.1378239244222641, "learning_rate": 2e-07, "loss": -0.0108, "step": 2051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.191519521201197, "grad_norm": 0.14629487693309784, "learning_rate": 2e-07, "loss": 0.035, "step": 2052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1916128543011976, "grad_norm": 0.12785065174102783, "learning_rate": 2e-07, "loss": 0.0105, "step": 2053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19170618740119816, "grad_norm": 0.13275893032550812, "learning_rate": 2e-07, "loss": 0.0131, "step": 2054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19179952050119875, "grad_norm": 0.13304942846298218, "learning_rate": 2e-07, "loss": -0.0095, "step": 2055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19189285360119934, "grad_norm": 0.12484243512153625, "learning_rate": 2e-07, "loss": 0.0526, "step": 2056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1919861867011999, "grad_norm": 0.13004404306411743, "learning_rate": 2e-07, "loss": 0.0123, "step": 2057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1920795198012005, "grad_norm": 0.2022404670715332, "learning_rate": 2e-07, "loss": -0.003, "step": 2058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1921728529012011, "grad_norm": 0.14473304152488708, "learning_rate": 2e-07, "loss": 0.0643, "step": 2059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19226618600120166, "grad_norm": 0.14179791510105133, "learning_rate": 2e-07, "loss": -0.0284, "step": 2060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19235951910120225, "grad_norm": 0.1339181810617447, "learning_rate": 2e-07, "loss": -0.0113, "step": 2061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19245285220120284, "grad_norm": 0.1328258365392685, "learning_rate": 2e-07, "loss": -0.0076, "step": 2062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1925461853012034, "grad_norm": 0.14393477141857147, "learning_rate": 2e-07, "loss": 0.0397, "step": 2063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.192639518401204, "grad_norm": 0.12550607323646545, "learning_rate": 2e-07, "loss": 0.024, "step": 2064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1927328515012046, "grad_norm": 0.13866785168647766, "learning_rate": 2e-07, "loss": 0.0372, "step": 2065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19282618460120515, "grad_norm": 0.13745541870594025, "learning_rate": 2e-07, "loss": 0.0199, "step": 2066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19291951770120574, "grad_norm": 0.21414770185947418, "learning_rate": 2e-07, "loss": 0.0039, "step": 2067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19301285080120634, "grad_norm": 0.12601198256015778, "learning_rate": 2e-07, "loss": 0.0283, "step": 2068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1931061839012069, "grad_norm": 0.13887163996696472, "learning_rate": 2e-07, "loss": 0.0486, "step": 2069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1931995170012075, "grad_norm": 0.12245846539735794, "learning_rate": 2e-07, "loss": 0.0181, "step": 2070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19329285010120809, "grad_norm": 0.1408935785293579, "learning_rate": 2e-07, "loss": 0.0514, "step": 2071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19338618320120865, "grad_norm": 0.13606879115104675, "learning_rate": 2e-07, "loss": 0.0526, "step": 2072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19347951630120924, "grad_norm": 0.1314115673303604, "learning_rate": 2e-07, "loss": 0.0333, "step": 2073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19357284940120983, "grad_norm": 0.1365850865840912, "learning_rate": 2e-07, "loss": 0.0171, "step": 2074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19366618250121043, "grad_norm": 0.11020214110612869, "learning_rate": 2e-07, "loss": 0.0461, "step": 2075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.193759515601211, "grad_norm": 0.42245760560035706, "learning_rate": 2e-07, "loss": 0.0189, "step": 2076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19385284870121158, "grad_norm": 0.17467431724071503, "learning_rate": 2e-07, "loss": 0.011, "step": 2077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19394618180121218, "grad_norm": 0.12905199825763702, "learning_rate": 2e-07, "loss": 0.0286, "step": 2078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19403951490121274, "grad_norm": 0.1307622492313385, "learning_rate": 2e-07, "loss": -0.0261, "step": 2079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19413284800121333, "grad_norm": 0.12891347706317902, "learning_rate": 2e-07, "loss": 0.0312, "step": 2080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19422618110121392, "grad_norm": 0.16293098032474518, "learning_rate": 2e-07, "loss": 0.0479, "step": 2081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1943195142012145, "grad_norm": 0.13904277980327606, "learning_rate": 2e-07, "loss": 0.0098, "step": 2082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19441284730121508, "grad_norm": 0.14107856154441833, "learning_rate": 2e-07, "loss": 0.0439, "step": 2083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19450618040121567, "grad_norm": 0.13813064992427826, "learning_rate": 2e-07, "loss": 0.0029, "step": 2084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19459951350121624, "grad_norm": 0.1166517436504364, "learning_rate": 2e-07, "loss": 0.0467, "step": 2085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19469284660121683, "grad_norm": 0.1782504916191101, "learning_rate": 2e-07, "loss": -0.0165, "step": 2086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19478617970121742, "grad_norm": 0.13020426034927368, "learning_rate": 2e-07, "loss": 0.0333, "step": 2087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19487951280121799, "grad_norm": 0.1313367635011673, "learning_rate": 2e-07, "loss": 0.0231, "step": 2088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19497284590121858, "grad_norm": 0.1328446865081787, "learning_rate": 2e-07, "loss": 0.0104, "step": 2089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19506617900121917, "grad_norm": 0.14593270421028137, "learning_rate": 2e-07, "loss": 0.0168, "step": 2090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19515951210121973, "grad_norm": 0.1278373748064041, "learning_rate": 2e-07, "loss": 0.0039, "step": 2091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19525284520122033, "grad_norm": 0.13942936062812805, "learning_rate": 2e-07, "loss": 0.0317, "step": 2092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19534617830122092, "grad_norm": 0.1356835663318634, "learning_rate": 2e-07, "loss": 0.0623, "step": 2093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1954395114012215, "grad_norm": 0.14816170930862427, "learning_rate": 2e-07, "loss": 0.0392, "step": 2094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19553284450122208, "grad_norm": 0.1527928113937378, "learning_rate": 2e-07, "loss": 0.0111, "step": 2095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19562617760122267, "grad_norm": 0.11939983814954758, "learning_rate": 2e-07, "loss": -0.0336, "step": 2096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19571951070122326, "grad_norm": 0.12709404528141022, "learning_rate": 2e-07, "loss": 0.0484, "step": 2097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19581284380122382, "grad_norm": 0.1341167837381363, "learning_rate": 2e-07, "loss": 0.0576, "step": 2098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19590617690122442, "grad_norm": 0.11741939932107925, "learning_rate": 2e-07, "loss": -0.0014, "step": 2099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.195999510001225, "grad_norm": 0.155959814786911, "learning_rate": 2e-07, "loss": 0.0223, "step": 2100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19609284310122557, "grad_norm": 0.1319066435098648, "learning_rate": 2e-07, "loss": 0.0044, "step": 2101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19618617620122616, "grad_norm": 0.12806671857833862, "learning_rate": 2e-07, "loss": 0.036, "step": 2102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19627950930122676, "grad_norm": 0.13918417692184448, "learning_rate": 2e-07, "loss": 0.0182, "step": 2103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19637284240122732, "grad_norm": 0.13889160752296448, "learning_rate": 2e-07, "loss": -0.0009, "step": 2104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1964661755012279, "grad_norm": 0.1448560208082199, "learning_rate": 2e-07, "loss": 0.0411, "step": 2105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1965595086012285, "grad_norm": 0.12901759147644043, "learning_rate": 2e-07, "loss": 0.0262, "step": 2106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19665284170122907, "grad_norm": 0.14847217500209808, "learning_rate": 2e-07, "loss": 0.0547, "step": 2107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19674617480122966, "grad_norm": 0.17445239424705505, "learning_rate": 2e-07, "loss": 0.0248, "step": 2108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19683950790123025, "grad_norm": 0.12774260342121124, "learning_rate": 2e-07, "loss": 0.0214, "step": 2109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19693284100123082, "grad_norm": 0.12575243413448334, "learning_rate": 2e-07, "loss": -0.0035, "step": 2110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1970261741012314, "grad_norm": 0.14290933310985565, "learning_rate": 2e-07, "loss": 0.0552, "step": 2111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.197119507201232, "grad_norm": 0.14731821417808533, "learning_rate": 2e-07, "loss": 0.0309, "step": 2112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009852818080357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 629.9486083984375, "completions/mean_terminated_length": 595.4583740234375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.19721284030123257, "grad_norm": 0.13937370479106903, "learning_rate": 2e-07, "loss": 0.0692, "num_tokens": 1435032328.0, "reward": 0.6246512532234192, "reward_std": 0.1760890781879425, "rewards/simpleverify_reward/mean": 0.6246512532234192, "rewards/simpleverify_reward/std": 0.48421710729599, "step": 2113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19730617340123316, "grad_norm": 0.13251301646232605, "learning_rate": 2e-07, "loss": 0.041, "step": 2114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19739950650123375, "grad_norm": 0.13234274089336395, "learning_rate": 2e-07, "loss": -0.0099, "step": 2115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19749283960123434, "grad_norm": 0.1307438164949417, "learning_rate": 2e-07, "loss": 0.0473, "step": 2116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1975861727012349, "grad_norm": 0.13851957023143768, "learning_rate": 2e-07, "loss": -0.0179, "step": 2117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1976795058012355, "grad_norm": 0.10829498618841171, "learning_rate": 2e-07, "loss": 0.0232, "step": 2118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1977728389012361, "grad_norm": 0.14571848511695862, "learning_rate": 2e-07, "loss": 0.0283, "step": 2119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19786617200123666, "grad_norm": 0.12630069255828857, "learning_rate": 2e-07, "loss": 0.0295, "step": 2120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19795950510123725, "grad_norm": 0.11628395318984985, "learning_rate": 2e-07, "loss": 0.0504, "step": 2121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19805283820123784, "grad_norm": 0.13721053302288055, "learning_rate": 2e-07, "loss": 0.0328, "step": 2122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1981461713012384, "grad_norm": 0.12931771576404572, "learning_rate": 2e-07, "loss": 0.0114, "step": 2123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.198239504401239, "grad_norm": 0.12818796932697296, "learning_rate": 2e-07, "loss": 0.0193, "step": 2124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1983328375012396, "grad_norm": 0.12518827617168427, "learning_rate": 2e-07, "loss": 0.0132, "step": 2125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19842617060124015, "grad_norm": 0.1292812079191208, "learning_rate": 2e-07, "loss": 0.016, "step": 2126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19851950370124075, "grad_norm": 0.12755903601646423, "learning_rate": 2e-07, "loss": 0.0367, "step": 2127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19861283680124134, "grad_norm": 0.14565172791481018, "learning_rate": 2e-07, "loss": 0.0083, "step": 2128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1987061699012419, "grad_norm": 0.1382758468389511, "learning_rate": 2e-07, "loss": 0.0432, "step": 2129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1987995030012425, "grad_norm": 0.1382252722978592, "learning_rate": 2e-07, "loss": 0.0303, "step": 2130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1988928361012431, "grad_norm": 0.13032706081867218, "learning_rate": 2e-07, "loss": 0.0342, "step": 2131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19898616920124365, "grad_norm": 0.13422849774360657, "learning_rate": 2e-07, "loss": 0.001, "step": 2132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19907950230124424, "grad_norm": 0.13972826302051544, "learning_rate": 2e-07, "loss": 0.008, "step": 2133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19917283540124484, "grad_norm": 0.14237481355667114, "learning_rate": 2e-07, "loss": -0.0133, "step": 2134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1992661685012454, "grad_norm": 0.12254750728607178, "learning_rate": 2e-07, "loss": -0.0098, "step": 2135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.199359501601246, "grad_norm": 0.12538908421993256, "learning_rate": 2e-07, "loss": -0.0114, "step": 2136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19945283470124658, "grad_norm": 0.1335274875164032, "learning_rate": 2e-07, "loss": 0.0143, "step": 2137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19954616780124718, "grad_norm": 0.1363525390625, "learning_rate": 2e-07, "loss": -0.0333, "step": 2138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19963950090124774, "grad_norm": 0.12447486072778702, "learning_rate": 2e-07, "loss": 0.0264, "step": 2139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19973283400124833, "grad_norm": 0.1231338232755661, "learning_rate": 2e-07, "loss": 0.0623, "step": 2140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.19982616710124892, "grad_norm": 0.13693514466285706, "learning_rate": 2e-07, "loss": 0.0119, "step": 2141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.1999195002012495, "grad_norm": 0.13250130414962769, "learning_rate": 2e-07, "loss": -0.0269, "step": 2142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20001283330125008, "grad_norm": 0.126338392496109, "learning_rate": 2e-07, "loss": 0.0172, "step": 2143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20010616640125067, "grad_norm": 0.1334684193134308, "learning_rate": 2e-07, "loss": 0.049, "step": 2144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20019949950125124, "grad_norm": 0.16273587942123413, "learning_rate": 2e-07, "loss": 0.032, "step": 2145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20029283260125183, "grad_norm": 0.12837401032447815, "learning_rate": 2e-07, "loss": -0.0034, "step": 2146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20038616570125242, "grad_norm": 0.11919860541820526, "learning_rate": 2e-07, "loss": 0.0311, "step": 2147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.200479498801253, "grad_norm": 0.14974726736545563, "learning_rate": 2e-07, "loss": -0.0173, "step": 2148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20057283190125358, "grad_norm": 0.12926071882247925, "learning_rate": 2e-07, "loss": 0.0275, "step": 2149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20066616500125417, "grad_norm": 0.13605505228042603, "learning_rate": 2e-07, "loss": 0.0283, "step": 2150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20075949810125474, "grad_norm": 0.14126770198345184, "learning_rate": 2e-07, "loss": -0.0326, "step": 2151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20085283120125533, "grad_norm": 0.2894771695137024, "learning_rate": 2e-07, "loss": 0.0647, "step": 2152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20094616430125592, "grad_norm": 0.13805672526359558, "learning_rate": 2e-07, "loss": 0.0097, "step": 2153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20103949740125648, "grad_norm": 0.16674578189849854, "learning_rate": 2e-07, "loss": -0.0191, "step": 2154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20113283050125708, "grad_norm": 0.13608667254447937, "learning_rate": 2e-07, "loss": 0.0144, "step": 2155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20122616360125767, "grad_norm": 0.13364991545677185, "learning_rate": 2e-07, "loss": 0.0863, "step": 2156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20131949670125826, "grad_norm": 0.1404200792312622, "learning_rate": 2e-07, "loss": 0.005, "step": 2157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20141282980125882, "grad_norm": 0.16753411293029785, "learning_rate": 2e-07, "loss": 0.0109, "step": 2158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20150616290125942, "grad_norm": 0.13755065202713013, "learning_rate": 2e-07, "loss": 0.0564, "step": 2159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20159949600126, "grad_norm": 0.15062540769577026, "learning_rate": 2e-07, "loss": 0.0459, "step": 2160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20169282910126057, "grad_norm": 0.13488022983074188, "learning_rate": 2e-07, "loss": -0.0031, "step": 2161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20178616220126117, "grad_norm": 0.14370547235012054, "learning_rate": 2e-07, "loss": 0.0303, "step": 2162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20187949530126176, "grad_norm": 0.14172962307929993, "learning_rate": 2e-07, "loss": -0.0111, "step": 2163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20197282840126232, "grad_norm": 0.13476237654685974, "learning_rate": 2e-07, "loss": 0.0357, "step": 2164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20206616150126291, "grad_norm": 0.1525455266237259, "learning_rate": 2e-07, "loss": -0.0049, "step": 2165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2021594946012635, "grad_norm": 0.13718955218791962, "learning_rate": 2e-07, "loss": 0.0217, "step": 2166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20225282770126407, "grad_norm": 0.1331365704536438, "learning_rate": 2e-07, "loss": 0.0386, "step": 2167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20234616080126466, "grad_norm": 0.1426374316215515, "learning_rate": 2e-07, "loss": 0.0006, "step": 2168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20243949390126526, "grad_norm": 0.1502739042043686, "learning_rate": 2e-07, "loss": 0.0516, "step": 2169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20253282700126582, "grad_norm": 0.14679895341396332, "learning_rate": 2e-07, "loss": 0.007, "step": 2170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2026261601012664, "grad_norm": 0.14002646505832672, "learning_rate": 2e-07, "loss": 0.0381, "step": 2171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.202719493201267, "grad_norm": 0.16434414684772491, "learning_rate": 2e-07, "loss": 0.0556, "step": 2172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20281282630126757, "grad_norm": 0.14029616117477417, "learning_rate": 2e-07, "loss": -0.0146, "step": 2173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20290615940126816, "grad_norm": 0.15086455643177032, "learning_rate": 2e-07, "loss": 0.06, "step": 2174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20299949250126875, "grad_norm": 0.13477592170238495, "learning_rate": 2e-07, "loss": 0.0468, "step": 2175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20309282560126932, "grad_norm": 0.1533563733100891, "learning_rate": 2e-07, "loss": 0.0095, "step": 2176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010114397321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 627.794921875, "completions/mean_terminated_length": 592.3577270507812, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.2031861587012699, "grad_norm": 0.12680374085903168, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 1476804146.0, "reward": 0.6364397406578064, "reward_std": 0.1716804802417755, "rewards/simpleverify_reward/mean": 0.6364397406578064, "rewards/simpleverify_reward/std": 0.4810282588005066, "step": 2177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2032794918012705, "grad_norm": 0.13784533739089966, "learning_rate": 2e-07, "loss": 0.0165, "step": 2178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2033728249012711, "grad_norm": 0.1357499212026596, "learning_rate": 2e-07, "loss": 0.0141, "step": 2179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20346615800127166, "grad_norm": 0.1367364376783371, "learning_rate": 2e-07, "loss": 0.0288, "step": 2180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20355949110127225, "grad_norm": 0.1486557126045227, "learning_rate": 2e-07, "loss": 0.0366, "step": 2181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20365282420127284, "grad_norm": 0.12914809584617615, "learning_rate": 2e-07, "loss": 0.0411, "step": 2182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2037461573012734, "grad_norm": 0.13629017770290375, "learning_rate": 2e-07, "loss": 0.029, "step": 2183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.203839490401274, "grad_norm": 0.13471633195877075, "learning_rate": 2e-07, "loss": 0.044, "step": 2184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2039328235012746, "grad_norm": 0.13050241768360138, "learning_rate": 2e-07, "loss": 0.0171, "step": 2185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20402615660127515, "grad_norm": 0.13189242780208588, "learning_rate": 2e-07, "loss": 0.0159, "step": 2186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20411948970127575, "grad_norm": 0.13076217472553253, "learning_rate": 2e-07, "loss": 0.0522, "step": 2187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20421282280127634, "grad_norm": 0.1266879290342331, "learning_rate": 2e-07, "loss": 0.0081, "step": 2188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2043061559012769, "grad_norm": 0.15487824380397797, "learning_rate": 2e-07, "loss": 0.0271, "step": 2189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2043994890012775, "grad_norm": 0.1416330635547638, "learning_rate": 2e-07, "loss": -0.0033, "step": 2190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2044928221012781, "grad_norm": 0.14031192660331726, "learning_rate": 2e-07, "loss": 0.0407, "step": 2191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20458615520127865, "grad_norm": 0.14013458788394928, "learning_rate": 2e-07, "loss": 0.0393, "step": 2192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20467948830127924, "grad_norm": 0.13578231632709503, "learning_rate": 2e-07, "loss": 0.0078, "step": 2193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20477282140127984, "grad_norm": 0.12682318687438965, "learning_rate": 2e-07, "loss": -0.0261, "step": 2194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2048661545012804, "grad_norm": 0.12077781558036804, "learning_rate": 2e-07, "loss": 0.014, "step": 2195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.204959487601281, "grad_norm": 0.13998205959796906, "learning_rate": 2e-07, "loss": 0.0243, "step": 2196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20505282070128159, "grad_norm": 0.11716945469379425, "learning_rate": 2e-07, "loss": 0.037, "step": 2197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20514615380128218, "grad_norm": 0.13920149207115173, "learning_rate": 2e-07, "loss": 0.0311, "step": 2198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20523948690128274, "grad_norm": 0.1269838511943817, "learning_rate": 2e-07, "loss": 0.0311, "step": 2199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20533282000128333, "grad_norm": 0.16309025883674622, "learning_rate": 2e-07, "loss": 0.0078, "step": 2200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20542615310128393, "grad_norm": 0.14266115427017212, "learning_rate": 2e-07, "loss": 0.0378, "step": 2201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2055194862012845, "grad_norm": 0.1387840062379837, "learning_rate": 2e-07, "loss": -0.0118, "step": 2202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20561281930128508, "grad_norm": 0.13319367170333862, "learning_rate": 2e-07, "loss": -0.0146, "step": 2203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20570615240128567, "grad_norm": 0.13310669362545013, "learning_rate": 2e-07, "loss": 0.0308, "step": 2204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20579948550128624, "grad_norm": 0.12871991097927094, "learning_rate": 2e-07, "loss": 0.0469, "step": 2205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20589281860128683, "grad_norm": 0.12678378820419312, "learning_rate": 2e-07, "loss": 0.0002, "step": 2206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20598615170128742, "grad_norm": 0.13681796193122864, "learning_rate": 2e-07, "loss": -0.0137, "step": 2207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.206079484801288, "grad_norm": 0.14061179757118225, "learning_rate": 2e-07, "loss": 0.0623, "step": 2208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20617281790128858, "grad_norm": 0.12841878831386566, "learning_rate": 2e-07, "loss": 0.0103, "step": 2209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20626615100128917, "grad_norm": 0.13524489104747772, "learning_rate": 2e-07, "loss": 0.0338, "step": 2210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20635948410128974, "grad_norm": 0.12110421061515808, "learning_rate": 2e-07, "loss": 0.0493, "step": 2211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20645281720129033, "grad_norm": 0.1355554312467575, "learning_rate": 2e-07, "loss": -0.0267, "step": 2212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20654615030129092, "grad_norm": 0.14319929480552673, "learning_rate": 2e-07, "loss": 0.0102, "step": 2213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20663948340129148, "grad_norm": 0.13738730549812317, "learning_rate": 2e-07, "loss": 0.0249, "step": 2214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20673281650129208, "grad_norm": 0.13152654469013214, "learning_rate": 2e-07, "loss": -0.0043, "step": 2215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20682614960129267, "grad_norm": 0.1363997757434845, "learning_rate": 2e-07, "loss": 0.0259, "step": 2216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20691948270129323, "grad_norm": 0.1330546736717224, "learning_rate": 2e-07, "loss": 0.038, "step": 2217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20701281580129383, "grad_norm": 0.12124863266944885, "learning_rate": 2e-07, "loss": 0.013, "step": 2218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20710614890129442, "grad_norm": 0.14265406131744385, "learning_rate": 2e-07, "loss": 0.0454, "step": 2219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.207199482001295, "grad_norm": 0.13326601684093475, "learning_rate": 2e-07, "loss": 0.045, "step": 2220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20729281510129557, "grad_norm": 0.14154662191867828, "learning_rate": 2e-07, "loss": 0.0147, "step": 2221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20738614820129617, "grad_norm": 0.13577722012996674, "learning_rate": 2e-07, "loss": -0.029, "step": 2222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20747948130129676, "grad_norm": 0.13199861347675323, "learning_rate": 2e-07, "loss": 0.0437, "step": 2223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20757281440129732, "grad_norm": 0.13864371180534363, "learning_rate": 2e-07, "loss": 0.0142, "step": 2224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20766614750129792, "grad_norm": 0.13210584223270416, "learning_rate": 2e-07, "loss": 0.0534, "step": 2225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2077594806012985, "grad_norm": 0.12624220550060272, "learning_rate": 2e-07, "loss": -0.0017, "step": 2226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20785281370129907, "grad_norm": 0.17965185642242432, "learning_rate": 2e-07, "loss": 0.0245, "step": 2227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20794614680129966, "grad_norm": 0.137176513671875, "learning_rate": 2e-07, "loss": 0.0185, "step": 2228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20803947990130026, "grad_norm": 0.13284356892108917, "learning_rate": 2e-07, "loss": 0.0839, "step": 2229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20813281300130082, "grad_norm": 0.14883238077163696, "learning_rate": 2e-07, "loss": -0.0086, "step": 2230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2082261461013014, "grad_norm": 0.14229613542556763, "learning_rate": 2e-07, "loss": 0.0463, "step": 2231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.208319479201302, "grad_norm": 0.1320336014032364, "learning_rate": 2e-07, "loss": 0.0207, "step": 2232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20841281230130257, "grad_norm": 0.13614624738693237, "learning_rate": 2e-07, "loss": 0.0313, "step": 2233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20850614540130316, "grad_norm": 0.13710087537765503, "learning_rate": 2e-07, "loss": 0.0244, "step": 2234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20859947850130375, "grad_norm": 0.14568546414375305, "learning_rate": 2e-07, "loss": 0.0227, "step": 2235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20869281160130432, "grad_norm": 0.13461199402809143, "learning_rate": 2e-07, "loss": 0.0749, "step": 2236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2087861447013049, "grad_norm": 0.13689970970153809, "learning_rate": 2e-07, "loss": -0.0181, "step": 2237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2088794778013055, "grad_norm": 0.11833404004573822, "learning_rate": 2e-07, "loss": 0.0521, "step": 2238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20897281090130607, "grad_norm": 0.13790574669837952, "learning_rate": 2e-07, "loss": 0.0366, "step": 2239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20906614400130666, "grad_norm": 0.13585473597049713, "learning_rate": 2e-07, "loss": 0.0397, "step": 2240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010585239955357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 643.7006225585938, "completions/mean_terminated_length": 606.7662353515625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.20915947710130725, "grad_norm": 0.11351791024208069, "learning_rate": 2e-07, "loss": 0.0011, "num_tokens": 1519406149.0, "reward": 0.6213204860687256, "reward_std": 0.17462831735610962, "rewards/simpleverify_reward/mean": 0.6213204264640808, "rewards/simpleverify_reward/std": 0.4850623607635498, "step": 2241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20925281020130784, "grad_norm": 0.14349308609962463, "learning_rate": 2e-07, "loss": 0.0218, "step": 2242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2093461433013084, "grad_norm": 0.12408077716827393, "learning_rate": 2e-07, "loss": 0.0374, "step": 2243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.209439476401309, "grad_norm": 0.1251942366361618, "learning_rate": 2e-07, "loss": -0.0147, "step": 2244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2095328095013096, "grad_norm": 0.13669657707214355, "learning_rate": 2e-07, "loss": -0.0164, "step": 2245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20962614260131016, "grad_norm": 0.11843916773796082, "learning_rate": 2e-07, "loss": 0.0416, "step": 2246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20971947570131075, "grad_norm": 0.1312740594148636, "learning_rate": 2e-07, "loss": 0.0419, "step": 2247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.20981280880131134, "grad_norm": 0.12000513076782227, "learning_rate": 2e-07, "loss": 0.0008, "step": 2248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2099061419013119, "grad_norm": 0.1360626369714737, "learning_rate": 2e-07, "loss": 0.0131, "step": 2249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2099994750013125, "grad_norm": 0.12431520223617554, "learning_rate": 2e-07, "loss": 0.0612, "step": 2250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2100928081013131, "grad_norm": 0.12137124687433243, "learning_rate": 2e-07, "loss": 0.0171, "step": 2251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21018614120131365, "grad_norm": 0.1274702101945877, "learning_rate": 2e-07, "loss": 0.0361, "step": 2252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21027947430131425, "grad_norm": 0.15389187633991241, "learning_rate": 2e-07, "loss": 0.025, "step": 2253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21037280740131484, "grad_norm": 0.13659945130348206, "learning_rate": 2e-07, "loss": 0.0464, "step": 2254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2104661405013154, "grad_norm": 0.12139477580785751, "learning_rate": 2e-07, "loss": 0.0245, "step": 2255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.210559473601316, "grad_norm": 0.14551663398742676, "learning_rate": 2e-07, "loss": 0.0459, "step": 2256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2106528067013166, "grad_norm": 0.1292411983013153, "learning_rate": 2e-07, "loss": -0.0302, "step": 2257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21074613980131715, "grad_norm": 0.13494211435317993, "learning_rate": 2e-07, "loss": 0.0259, "step": 2258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21083947290131774, "grad_norm": 0.1279744803905487, "learning_rate": 2e-07, "loss": 0.0587, "step": 2259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21093280600131833, "grad_norm": 0.12434187531471252, "learning_rate": 2e-07, "loss": -0.0053, "step": 2260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21102613910131893, "grad_norm": 0.12350655347108841, "learning_rate": 2e-07, "loss": 0.0447, "step": 2261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2111194722013195, "grad_norm": 0.13516467809677124, "learning_rate": 2e-07, "loss": 0.0352, "step": 2262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21121280530132008, "grad_norm": 0.14506122469902039, "learning_rate": 2e-07, "loss": -0.0034, "step": 2263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21130613840132068, "grad_norm": 0.12468710541725159, "learning_rate": 2e-07, "loss": 0.0409, "step": 2264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21139947150132124, "grad_norm": 0.12582720816135406, "learning_rate": 2e-07, "loss": 0.003, "step": 2265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21149280460132183, "grad_norm": 0.13199901580810547, "learning_rate": 2e-07, "loss": 0.0292, "step": 2266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21158613770132242, "grad_norm": 0.1256171464920044, "learning_rate": 2e-07, "loss": 0.0249, "step": 2267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.211679470801323, "grad_norm": 0.13507325947284698, "learning_rate": 2e-07, "loss": -0.0002, "step": 2268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21177280390132358, "grad_norm": 0.12713368237018585, "learning_rate": 2e-07, "loss": 0.0476, "step": 2269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21186613700132417, "grad_norm": 0.13991907238960266, "learning_rate": 2e-07, "loss": 0.0596, "step": 2270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21195947010132474, "grad_norm": 0.1254131942987442, "learning_rate": 2e-07, "loss": -0.0131, "step": 2271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21205280320132533, "grad_norm": 0.14195223152637482, "learning_rate": 2e-07, "loss": 0.0292, "step": 2272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21214613630132592, "grad_norm": 0.12449552863836288, "learning_rate": 2e-07, "loss": 0.0115, "step": 2273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21223946940132649, "grad_norm": 0.14159028232097626, "learning_rate": 2e-07, "loss": -0.0045, "step": 2274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21233280250132708, "grad_norm": 0.12826327979564667, "learning_rate": 2e-07, "loss": 0.0026, "step": 2275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21242613560132767, "grad_norm": 0.1404125988483429, "learning_rate": 2e-07, "loss": 0.0625, "step": 2276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21251946870132823, "grad_norm": 0.14949820935726166, "learning_rate": 2e-07, "loss": 0.0298, "step": 2277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21261280180132883, "grad_norm": 0.13975505530834198, "learning_rate": 2e-07, "loss": 0.0067, "step": 2278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21270613490132942, "grad_norm": 0.14105868339538574, "learning_rate": 2e-07, "loss": 0.0092, "step": 2279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21279946800132998, "grad_norm": 0.1289086639881134, "learning_rate": 2e-07, "loss": -0.0123, "step": 2280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21289280110133058, "grad_norm": 0.14238393306732178, "learning_rate": 2e-07, "loss": 0.0284, "step": 2281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21298613420133117, "grad_norm": 0.149738147854805, "learning_rate": 2e-07, "loss": 0.0456, "step": 2282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21307946730133176, "grad_norm": 0.1313241720199585, "learning_rate": 2e-07, "loss": 0.0281, "step": 2283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21317280040133232, "grad_norm": 0.14210982620716095, "learning_rate": 2e-07, "loss": 0.0126, "step": 2284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21326613350133292, "grad_norm": 0.12760142982006073, "learning_rate": 2e-07, "loss": 0.0226, "step": 2285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2133594666013335, "grad_norm": 0.13607583940029144, "learning_rate": 2e-07, "loss": 0.0626, "step": 2286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21345279970133407, "grad_norm": 0.1333509385585785, "learning_rate": 2e-07, "loss": 0.0021, "step": 2287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21354613280133466, "grad_norm": 0.13705310225486755, "learning_rate": 2e-07, "loss": 0.0315, "step": 2288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21363946590133526, "grad_norm": 0.15610744059085846, "learning_rate": 2e-07, "loss": 0.0385, "step": 2289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21373279900133582, "grad_norm": 0.13528165221214294, "learning_rate": 2e-07, "loss": 0.0944, "step": 2290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2138261321013364, "grad_norm": 0.15812267363071442, "learning_rate": 2e-07, "loss": 0.0481, "step": 2291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.213919465201337, "grad_norm": 0.1344677209854126, "learning_rate": 2e-07, "loss": 0.0048, "step": 2292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21401279830133757, "grad_norm": 0.13811402022838593, "learning_rate": 2e-07, "loss": 0.062, "step": 2293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21410613140133816, "grad_norm": 0.13607700169086456, "learning_rate": 2e-07, "loss": 0.0908, "step": 2294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21419946450133875, "grad_norm": 0.13305304944515228, "learning_rate": 2e-07, "loss": -0.0208, "step": 2295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21429279760133932, "grad_norm": 0.14102695882320404, "learning_rate": 2e-07, "loss": 0.0151, "step": 2296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2143861307013399, "grad_norm": 0.12146824598312378, "learning_rate": 2e-07, "loss": 0.0352, "step": 2297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2144794638013405, "grad_norm": 0.14284424483776093, "learning_rate": 2e-07, "loss": 0.0351, "step": 2298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21457279690134107, "grad_norm": 0.11069469898939133, "learning_rate": 2e-07, "loss": 0.0269, "step": 2299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21466613000134166, "grad_norm": 0.13185881078243256, "learning_rate": 2e-07, "loss": 0.0139, "step": 2300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21475946310134225, "grad_norm": 0.12587696313858032, "learning_rate": 2e-07, "loss": 0.0278, "step": 2301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21485279620134284, "grad_norm": 0.1323939561843872, "learning_rate": 2e-07, "loss": 0.0077, "step": 2302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2149461293013434, "grad_norm": 0.14714030921459198, "learning_rate": 2e-07, "loss": 0.0295, "step": 2303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.215039462401344, "grad_norm": 0.13663747906684875, "learning_rate": 2e-07, "loss": -0.0263, "step": 2304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010271344866071397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 632.8683471679688, "completions/mean_terminated_length": 596.9281616210938, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.2151327955013446, "grad_norm": 0.1281842738389969, "learning_rate": 2e-07, "loss": 0.0707, "num_tokens": 1561440709.0, "reward": 0.6358293890953064, "reward_std": 0.16936269402503967, "rewards/simpleverify_reward/mean": 0.6358293890953064, "rewards/simpleverify_reward/std": 0.48120108246803284, "step": 2305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21522612860134516, "grad_norm": 0.1206863522529602, "learning_rate": 2e-07, "loss": 0.028, "step": 2306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21531946170134575, "grad_norm": 0.11955222487449646, "learning_rate": 2e-07, "loss": 0.0428, "step": 2307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21541279480134634, "grad_norm": 0.11896443367004395, "learning_rate": 2e-07, "loss": 0.0444, "step": 2308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2155061279013469, "grad_norm": 0.12493612617254257, "learning_rate": 2e-07, "loss": 0.0201, "step": 2309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2155994610013475, "grad_norm": 0.14842964708805084, "learning_rate": 2e-07, "loss": -0.0116, "step": 2310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2156927941013481, "grad_norm": 0.13239946961402893, "learning_rate": 2e-07, "loss": 0.0182, "step": 2311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21578612720134865, "grad_norm": 0.13741400837898254, "learning_rate": 2e-07, "loss": 0.0348, "step": 2312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21587946030134925, "grad_norm": 0.1294623762369156, "learning_rate": 2e-07, "loss": 0.0509, "step": 2313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21597279340134984, "grad_norm": 0.13177455961704254, "learning_rate": 2e-07, "loss": 0.0228, "step": 2314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2160661265013504, "grad_norm": 0.1309087574481964, "learning_rate": 2e-07, "loss": 0.0447, "step": 2315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.216159459601351, "grad_norm": 0.13819552958011627, "learning_rate": 2e-07, "loss": 0.0275, "step": 2316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2162527927013516, "grad_norm": 0.12594059109687805, "learning_rate": 2e-07, "loss": 0.0309, "step": 2317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21634612580135215, "grad_norm": 0.1281934678554535, "learning_rate": 2e-07, "loss": 0.0194, "step": 2318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21643945890135274, "grad_norm": 0.14670638740062714, "learning_rate": 2e-07, "loss": 0.0018, "step": 2319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21653279200135334, "grad_norm": 0.14128999412059784, "learning_rate": 2e-07, "loss": 0.0563, "step": 2320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2166261251013539, "grad_norm": 0.12362868338823318, "learning_rate": 2e-07, "loss": 0.0145, "step": 2321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2167194582013545, "grad_norm": 0.1351068913936615, "learning_rate": 2e-07, "loss": 0.0204, "step": 2322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21681279130135508, "grad_norm": 0.13755571842193604, "learning_rate": 2e-07, "loss": 0.0185, "step": 2323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21690612440135568, "grad_norm": 0.1624317467212677, "learning_rate": 2e-07, "loss": 0.0205, "step": 2324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21699945750135624, "grad_norm": 0.12980917096138, "learning_rate": 2e-07, "loss": 0.0097, "step": 2325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21709279060135683, "grad_norm": 0.14179974794387817, "learning_rate": 2e-07, "loss": 0.0443, "step": 2326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21718612370135743, "grad_norm": 0.11776971817016602, "learning_rate": 2e-07, "loss": 0.0006, "step": 2327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.217279456801358, "grad_norm": 0.13906636834144592, "learning_rate": 2e-07, "loss": 0.0555, "step": 2328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21737278990135858, "grad_norm": 0.1274702548980713, "learning_rate": 2e-07, "loss": -0.01, "step": 2329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21746612300135917, "grad_norm": 0.1376921832561493, "learning_rate": 2e-07, "loss": 0.0302, "step": 2330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21755945610135974, "grad_norm": 0.12755465507507324, "learning_rate": 2e-07, "loss": 0.0226, "step": 2331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21765278920136033, "grad_norm": 0.1292119175195694, "learning_rate": 2e-07, "loss": 0.0206, "step": 2332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21774612230136092, "grad_norm": 0.13331297039985657, "learning_rate": 2e-07, "loss": 0.0274, "step": 2333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2178394554013615, "grad_norm": 0.14038382470607758, "learning_rate": 2e-07, "loss": 0.0308, "step": 2334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21793278850136208, "grad_norm": 0.13483187556266785, "learning_rate": 2e-07, "loss": 0.0172, "step": 2335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21802612160136267, "grad_norm": 0.12990142405033112, "learning_rate": 2e-07, "loss": 0.0536, "step": 2336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21811945470136324, "grad_norm": 0.1341685801744461, "learning_rate": 2e-07, "loss": 0.0293, "step": 2337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21821278780136383, "grad_norm": 0.12747296690940857, "learning_rate": 2e-07, "loss": 0.0039, "step": 2338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21830612090136442, "grad_norm": 0.12237270921468735, "learning_rate": 2e-07, "loss": 0.0464, "step": 2339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21839945400136498, "grad_norm": 0.13124117255210876, "learning_rate": 2e-07, "loss": 0.0649, "step": 2340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21849278710136558, "grad_norm": 0.11371566355228424, "learning_rate": 2e-07, "loss": -0.0112, "step": 2341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21858612020136617, "grad_norm": 0.13129395246505737, "learning_rate": 2e-07, "loss": 0.044, "step": 2342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21867945330136673, "grad_norm": 0.13029909133911133, "learning_rate": 2e-07, "loss": -0.0044, "step": 2343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21877278640136733, "grad_norm": 0.13145920634269714, "learning_rate": 2e-07, "loss": 0.0218, "step": 2344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21886611950136792, "grad_norm": 0.14842307567596436, "learning_rate": 2e-07, "loss": 0.0169, "step": 2345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2189594526013685, "grad_norm": 0.12834519147872925, "learning_rate": 2e-07, "loss": 0.0503, "step": 2346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21905278570136907, "grad_norm": 0.1272391974925995, "learning_rate": 2e-07, "loss": 0.0131, "step": 2347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21914611880136967, "grad_norm": 0.13513964414596558, "learning_rate": 2e-07, "loss": 0.0132, "step": 2348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21923945190137026, "grad_norm": 0.13601651787757874, "learning_rate": 2e-07, "loss": 0.0034, "step": 2349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21933278500137082, "grad_norm": 0.13371379673480988, "learning_rate": 2e-07, "loss": 0.0125, "step": 2350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21942611810137141, "grad_norm": 0.1232561469078064, "learning_rate": 2e-07, "loss": 0.0378, "step": 2351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.219519451201372, "grad_norm": 0.14330853521823883, "learning_rate": 2e-07, "loss": 0.0338, "step": 2352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21961278430137257, "grad_norm": 0.13117177784442902, "learning_rate": 2e-07, "loss": 0.0047, "step": 2353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21970611740137316, "grad_norm": 0.12683221697807312, "learning_rate": 2e-07, "loss": 0.0099, "step": 2354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21979945050137376, "grad_norm": 0.122899129986763, "learning_rate": 2e-07, "loss": -0.0044, "step": 2355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.21989278360137432, "grad_norm": 0.127985417842865, "learning_rate": 2e-07, "loss": 0.0392, "step": 2356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2199861167013749, "grad_norm": 0.12008952349424362, "learning_rate": 2e-07, "loss": 0.0156, "step": 2357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2200794498013755, "grad_norm": 0.13086745142936707, "learning_rate": 2e-07, "loss": 0.0197, "step": 2358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22017278290137607, "grad_norm": 0.1184086874127388, "learning_rate": 2e-07, "loss": 0.0382, "step": 2359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22026611600137666, "grad_norm": 0.13691842555999756, "learning_rate": 2e-07, "loss": 0.0018, "step": 2360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22035944910137725, "grad_norm": 0.13860419392585754, "learning_rate": 2e-07, "loss": 0.0011, "step": 2361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22045278220137782, "grad_norm": 0.1200907751917839, "learning_rate": 2e-07, "loss": 0.0242, "step": 2362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2205461153013784, "grad_norm": 0.13657982647418976, "learning_rate": 2e-07, "loss": 0.0019, "step": 2363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.220639448401379, "grad_norm": 0.12590554356575012, "learning_rate": 2e-07, "loss": 0.0541, "step": 2364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2207327815013796, "grad_norm": 0.13369132578372955, "learning_rate": 2e-07, "loss": 0.0193, "step": 2365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22082611460138016, "grad_norm": 0.12960048019886017, "learning_rate": 2e-07, "loss": 0.038, "step": 2366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22091944770138075, "grad_norm": 0.1389169991016388, "learning_rate": 2e-07, "loss": -0.0024, "step": 2367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22101278080138134, "grad_norm": 0.13383693993091583, "learning_rate": 2e-07, "loss": 0.0107, "step": 2368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0101318359375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 630.2313232421875, "completions/mean_terminated_length": 594.75732421875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.2211061139013819, "grad_norm": 0.13836167752742767, "learning_rate": 2e-07, "loss": 0.0412, "num_tokens": 1603306448.0, "reward": 0.630685031414032, "reward_std": 0.16827768087387085, "rewards/simpleverify_reward/mean": 0.6306849718093872, "rewards/simpleverify_reward/std": 0.48262354731559753, "step": 2369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2211994470013825, "grad_norm": 0.15980982780456543, "learning_rate": 2e-07, "loss": 0.0297, "step": 2370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2212927801013831, "grad_norm": 0.12128932029008865, "learning_rate": 2e-07, "loss": 0.0037, "step": 2371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22138611320138366, "grad_norm": 0.14348870515823364, "learning_rate": 2e-07, "loss": 0.0195, "step": 2372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22147944630138425, "grad_norm": 0.12883640825748444, "learning_rate": 2e-07, "loss": 0.0048, "step": 2373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22157277940138484, "grad_norm": 0.14356324076652527, "learning_rate": 2e-07, "loss": 0.0414, "step": 2374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2216661125013854, "grad_norm": 0.12526337802410126, "learning_rate": 2e-07, "loss": 0.0135, "step": 2375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.221759445601386, "grad_norm": 0.1389387995004654, "learning_rate": 2e-07, "loss": 0.0172, "step": 2376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2218527787013866, "grad_norm": 0.12593252956867218, "learning_rate": 2e-07, "loss": 0.0213, "step": 2377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22194611180138715, "grad_norm": 0.1451130509376526, "learning_rate": 2e-07, "loss": 0.0589, "step": 2378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22203944490138774, "grad_norm": 0.1360453963279724, "learning_rate": 2e-07, "loss": 0.0411, "step": 2379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22213277800138834, "grad_norm": 0.1293872594833374, "learning_rate": 2e-07, "loss": 0.031, "step": 2380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2222261111013889, "grad_norm": 0.14260068535804749, "learning_rate": 2e-07, "loss": -0.0169, "step": 2381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2223194442013895, "grad_norm": 0.13326534628868103, "learning_rate": 2e-07, "loss": -0.0326, "step": 2382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22241277730139009, "grad_norm": 0.14006340503692627, "learning_rate": 2e-07, "loss": 0.0154, "step": 2383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22250611040139065, "grad_norm": 0.12549535930156708, "learning_rate": 2e-07, "loss": 0.0346, "step": 2384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22259944350139124, "grad_norm": 0.13087137043476105, "learning_rate": 2e-07, "loss": 0.0127, "step": 2385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22269277660139183, "grad_norm": 0.1305152177810669, "learning_rate": 2e-07, "loss": 0.0032, "step": 2386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22278610970139243, "grad_norm": 0.13426345586776733, "learning_rate": 2e-07, "loss": -0.023, "step": 2387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.222879442801393, "grad_norm": 0.13531267642974854, "learning_rate": 2e-07, "loss": 0.0707, "step": 2388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22297277590139358, "grad_norm": 0.13917961716651917, "learning_rate": 2e-07, "loss": 0.0696, "step": 2389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22306610900139417, "grad_norm": 0.133058100938797, "learning_rate": 2e-07, "loss": -0.0011, "step": 2390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22315944210139474, "grad_norm": 0.136958047747612, "learning_rate": 2e-07, "loss": 0.0428, "step": 2391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22325277520139533, "grad_norm": 0.1331188976764679, "learning_rate": 2e-07, "loss": 0.0489, "step": 2392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22334610830139592, "grad_norm": 0.12682807445526123, "learning_rate": 2e-07, "loss": 0.0557, "step": 2393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.2234394414013965, "grad_norm": 0.14918924868106842, "learning_rate": 2e-07, "loss": 0.0415, "step": 2394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22353277450139708, "grad_norm": 0.12926289439201355, "learning_rate": 2e-07, "loss": 0.0046, "step": 2395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22362610760139767, "grad_norm": 0.11474764347076416, "learning_rate": 2e-07, "loss": 0.0149, "step": 2396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22371944070139824, "grad_norm": 0.13529391586780548, "learning_rate": 2e-07, "loss": 0.0207, "step": 2397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22381277380139883, "grad_norm": 0.12376414984464645, "learning_rate": 2e-07, "loss": 0.0359, "step": 2398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22390610690139942, "grad_norm": 0.1280665248632431, "learning_rate": 2e-07, "loss": 0.0509, "step": 2399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "epoch": 0.22399944000139999, "grad_norm": 0.1242837980389595, "learning_rate": 2e-07, "loss": 0.0324, "step": 2400 }, { "epoch": 0.22399944000139999, "step": 2400, "total_flos": 0.0, "train_loss": 0.004557218201868333, "train_runtime": 34670.8361, "train_samples_per_second": 62.023, "train_steps_per_second": 0.069 } ], "logging_steps": 1, "max_steps": 2400, "num_input_tokens_seen": 1603306448, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }