|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.17561912602044316, |
|
"eval_steps": 500, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00021952390752555395, |
|
"grad_norm": 129536.0, |
|
"learning_rate": 1.99775e-05, |
|
"loss": 11.4186, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0004390478150511079, |
|
"grad_norm": 154.0, |
|
"learning_rate": 1.9952500000000003e-05, |
|
"loss": 6.1068, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0006585717225766619, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 1.9927500000000002e-05, |
|
"loss": 4.8048, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0008780956301022158, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 1.99025e-05, |
|
"loss": 0.2014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0010976195376277698, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 1.98775e-05, |
|
"loss": 0.2177, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013171434451533237, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.98525e-05, |
|
"loss": 0.2286, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0015366673526788777, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.9827500000000003e-05, |
|
"loss": 0.1803, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0017561912602044316, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.9802500000000002e-05, |
|
"loss": 0.1814, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0019757151677299856, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.97775e-05, |
|
"loss": 0.1881, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0021952390752555395, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.97525e-05, |
|
"loss": 0.1877, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0024147629827810935, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 1.97275e-05, |
|
"loss": 0.1892, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0026342868903066474, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 1.9702500000000003e-05, |
|
"loss": 0.1921, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0028538107978322014, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.9677500000000003e-05, |
|
"loss": 0.2261, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0030733347053577553, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.9652500000000002e-05, |
|
"loss": 0.2294, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0032928586128833093, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.96275e-05, |
|
"loss": 0.1871, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0035123825204088632, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.96025e-05, |
|
"loss": 0.1888, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.003731906427934417, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9577500000000004e-05, |
|
"loss": 0.1925, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.003951430335459971, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.95525e-05, |
|
"loss": 0.1974, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004170954242985525, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.9527500000000002e-05, |
|
"loss": 0.2109, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.004390478150511079, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.9502500000000002e-05, |
|
"loss": 0.2015, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004610002058036633, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.94775e-05, |
|
"loss": 0.1631, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.004829525965562187, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.94525e-05, |
|
"loss": 0.195, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.005049049873087741, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.94275e-05, |
|
"loss": 0.1882, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.005268573780613295, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.9402500000000003e-05, |
|
"loss": 0.1956, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.005488097688138849, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.9377500000000002e-05, |
|
"loss": 0.2236, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.005707621595664403, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 1.93525e-05, |
|
"loss": 0.1906, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.005927145503189957, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.93275e-05, |
|
"loss": 0.2174, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.006146669410715511, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.93025e-05, |
|
"loss": 0.1898, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.006366193318241065, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.9277500000000003e-05, |
|
"loss": 0.2021, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0065857172257666186, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.92525e-05, |
|
"loss": 0.192, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0068052411332921725, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.9227500000000002e-05, |
|
"loss": 0.2299, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0070247650408177265, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 1.92025e-05, |
|
"loss": 0.2137, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.00724428894834328, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.91775e-05, |
|
"loss": 0.1918, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.007463812855868834, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 1.91525e-05, |
|
"loss": 0.2303, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.007683336763394388, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.91275e-05, |
|
"loss": 0.1957, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.007902860670919942, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.9102500000000002e-05, |
|
"loss": 0.2029, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.008122384578445496, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.90775e-05, |
|
"loss": 0.219, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.00834190848597105, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.90525e-05, |
|
"loss": 0.2189, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.008561432393496604, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.90275e-05, |
|
"loss": 0.2014, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.008780956301022158, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.9002500000000003e-05, |
|
"loss": 0.1757, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.009000480208547712, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.8977500000000003e-05, |
|
"loss": 0.2146, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.009220004116073266, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.8952500000000002e-05, |
|
"loss": 0.1767, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.00943952802359882, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.89275e-05, |
|
"loss": 0.1913, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.009659051931124374, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.89025e-05, |
|
"loss": 0.1938, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.009878575838649928, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.8877500000000003e-05, |
|
"loss": 0.1898, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.010098099746175482, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.8852500000000003e-05, |
|
"loss": 0.214, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.010317623653701036, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.8827500000000002e-05, |
|
"loss": 0.191, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.01053714756122659, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 1.88025e-05, |
|
"loss": 0.207, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.010756671468752144, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.87775e-05, |
|
"loss": 0.2191, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.010976195376277698, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.8752500000000004e-05, |
|
"loss": 0.2271, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.011195719283803252, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.87275e-05, |
|
"loss": 0.1823, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.011415243191328805, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.8702500000000003e-05, |
|
"loss": 0.2054, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.01163476709885436, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 1.8677500000000002e-05, |
|
"loss": 0.2131, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.011854291006379913, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.86525e-05, |
|
"loss": 0.1569, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.012073814913905467, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.86275e-05, |
|
"loss": 0.1877, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.012293338821431021, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.86025e-05, |
|
"loss": 0.2228, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.012512862728956575, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 1.8577500000000003e-05, |
|
"loss": 0.2096, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.01273238663648213, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.8552500000000002e-05, |
|
"loss": 0.1861, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.012951910544007683, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.8527500000000002e-05, |
|
"loss": 0.1832, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.013171434451533237, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 1.85025e-05, |
|
"loss": 0.2182, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.013390958359058791, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.84775e-05, |
|
"loss": 0.2285, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.013610482266584345, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.8452500000000003e-05, |
|
"loss": 0.2024, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.013830006174109899, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.8427500000000003e-05, |
|
"loss": 0.2115, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.014049530081635453, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.8402500000000002e-05, |
|
"loss": 0.227, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.014269053989161007, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.83775e-05, |
|
"loss": 0.185, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.01448857789668656, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.83525e-05, |
|
"loss": 0.1754, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.014708101804212115, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.8327500000000004e-05, |
|
"loss": 0.2198, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.014927625711737669, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.83025e-05, |
|
"loss": 0.1966, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.015147149619263223, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.8277500000000002e-05, |
|
"loss": 0.187, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.015366673526788777, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.8252500000000002e-05, |
|
"loss": 0.2088, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01558619743431433, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.82275e-05, |
|
"loss": 0.2132, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.015805721341839885, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.82025e-05, |
|
"loss": 0.2149, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.01602524524936544, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.81775e-05, |
|
"loss": 0.1931, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.016244769156890992, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.8152500000000003e-05, |
|
"loss": 0.1721, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.016464293064416546, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.8127500000000002e-05, |
|
"loss": 0.1911, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0166838169719421, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.81025e-05, |
|
"loss": 0.2294, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.016903340879467654, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.80775e-05, |
|
"loss": 0.1867, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.017122864786993208, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 1.80525e-05, |
|
"loss": 0.1997, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.017342388694518762, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.8027500000000003e-05, |
|
"loss": 0.2095, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.017561912602044316, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.80025e-05, |
|
"loss": 0.1862, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01778143650956987, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.7977500000000002e-05, |
|
"loss": 0.1971, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.018000960417095424, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.79525e-05, |
|
"loss": 0.193, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.018220484324620978, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.79275e-05, |
|
"loss": 0.2195, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.018440008232146532, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.79025e-05, |
|
"loss": 0.1954, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.018659532139672086, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.78775e-05, |
|
"loss": 0.1546, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.01887905604719764, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.7852500000000002e-05, |
|
"loss": 0.188, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.019098579954723194, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.78275e-05, |
|
"loss": 0.2128, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.019318103862248748, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.78025e-05, |
|
"loss": 0.1846, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.0195376277697743, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.77775e-05, |
|
"loss": 0.1938, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.019757151677299856, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 1.77525e-05, |
|
"loss": 0.1781, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01997667558482541, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 1.7727500000000003e-05, |
|
"loss": 0.1848, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.020196199492350964, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.7702500000000002e-05, |
|
"loss": 0.1739, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.020415723399876518, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.76775e-05, |
|
"loss": 0.1878, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.02063524730740207, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.76525e-05, |
|
"loss": 0.2028, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.020854771214927625, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.76275e-05, |
|
"loss": 0.1897, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.02107429512245318, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.7602500000000003e-05, |
|
"loss": 0.1748, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.021293819029978733, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.75775e-05, |
|
"loss": 0.1997, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.021513342937504287, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.7552500000000002e-05, |
|
"loss": 0.1989, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.02173286684502984, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.75275e-05, |
|
"loss": 0.1836, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.021952390752555395, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.7502500000000004e-05, |
|
"loss": 0.2198, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02217191466008095, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.74775e-05, |
|
"loss": 0.1847, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.022391438567606503, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.7452500000000003e-05, |
|
"loss": 0.1954, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.022610962475132057, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 1.7427500000000002e-05, |
|
"loss": 0.2007, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.02283048638265761, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 1.74025e-05, |
|
"loss": 0.2068, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.023050010290183165, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.73775e-05, |
|
"loss": 0.1898, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.02326953419770872, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.73525e-05, |
|
"loss": 0.1661, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.023489058105234273, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 1.7327500000000003e-05, |
|
"loss": 0.1745, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.023708582012759827, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.7302500000000002e-05, |
|
"loss": 0.1691, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.02392810592028538, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.7277500000000002e-05, |
|
"loss": 0.204, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.024147629827810935, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 1.72525e-05, |
|
"loss": 0.1775, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.02436715373533649, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.72275e-05, |
|
"loss": 0.1953, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.024586677642862043, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.7202500000000003e-05, |
|
"loss": 0.1916, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.024806201550387597, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.7177500000000003e-05, |
|
"loss": 0.1762, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.02502572545791315, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.7152500000000002e-05, |
|
"loss": 0.2267, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.025245249365438704, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.71275e-05, |
|
"loss": 0.1924, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.02546477327296426, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.71025e-05, |
|
"loss": 0.2081, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.025684297180489812, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.7077500000000004e-05, |
|
"loss": 0.1693, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.025903821088015366, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.70525e-05, |
|
"loss": 0.2278, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.02612334499554092, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.7027500000000003e-05, |
|
"loss": 0.1819, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.026342868903066474, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.7002500000000002e-05, |
|
"loss": 0.147, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.026562392810592028, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 1.69775e-05, |
|
"loss": 0.1857, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.026781916718117582, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.69525e-05, |
|
"loss": 0.1665, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.027001440625643136, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.69275e-05, |
|
"loss": 0.2032, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.02722096453316869, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 1.6902500000000003e-05, |
|
"loss": 0.182, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.027440488440694244, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.6877500000000002e-05, |
|
"loss": 0.1936, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.027660012348219798, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.6852500000000002e-05, |
|
"loss": 0.1971, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.027879536255745352, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.68275e-05, |
|
"loss": 0.1824, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.028099060163270906, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.68025e-05, |
|
"loss": 0.1731, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.02831858407079646, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.6777500000000003e-05, |
|
"loss": 0.1818, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.028538107978322014, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.67525e-05, |
|
"loss": 0.1959, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.028757631885847568, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.6727500000000002e-05, |
|
"loss": 0.1777, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.02897715579337312, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.67025e-05, |
|
"loss": 0.1719, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.029196679700898676, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.66775e-05, |
|
"loss": 0.1985, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.02941620360842423, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.66525e-05, |
|
"loss": 0.1841, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.029635727515949783, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.66275e-05, |
|
"loss": 0.2174, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.029855251423475337, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.6602500000000002e-05, |
|
"loss": 0.1925, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.03007477533100089, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.6577500000000002e-05, |
|
"loss": 0.1686, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.030294299238526445, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 1.65525e-05, |
|
"loss": 0.1877, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.030513823146052, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.65275e-05, |
|
"loss": 0.2048, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.030733347053577553, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.65025e-05, |
|
"loss": 0.2031, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.030952870961103107, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.6477500000000003e-05, |
|
"loss": 0.217, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.03117239486862866, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.6452500000000002e-05, |
|
"loss": 0.1783, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.031391918776154215, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.64275e-05, |
|
"loss": 0.1761, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.03161144268367977, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.64025e-05, |
|
"loss": 0.182, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.03183096659120532, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 1.63775e-05, |
|
"loss": 0.1732, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.03205049049873088, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.6352500000000003e-05, |
|
"loss": 0.1951, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.03227001440625643, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.63275e-05, |
|
"loss": 0.2427, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.032489538313781985, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.6302500000000002e-05, |
|
"loss": 0.1904, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.03270906222130754, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.62775e-05, |
|
"loss": 0.1938, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.03292858612883309, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 1.62525e-05, |
|
"loss": 0.1673, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03314811003635865, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.62275e-05, |
|
"loss": 0.2362, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.0333676339438842, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.62025e-05, |
|
"loss": 0.1716, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.033587157851409755, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.6177500000000002e-05, |
|
"loss": 0.1872, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.03380668175893531, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.61525e-05, |
|
"loss": 0.2068, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.03402620566646086, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.61275e-05, |
|
"loss": 0.1902, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.034245729573986416, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.61025e-05, |
|
"loss": 0.1866, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.03446525348151197, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.60775e-05, |
|
"loss": 0.1694, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.034684777389037524, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.6052500000000003e-05, |
|
"loss": 0.1915, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.03490430129656308, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.60275e-05, |
|
"loss": 0.2041, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.03512382520408863, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.60025e-05, |
|
"loss": 0.2597, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.035343349111614186, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.59775e-05, |
|
"loss": 0.2025, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.03556287301913974, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.5952500000000004e-05, |
|
"loss": 0.178, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.035782396926665294, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.5927500000000003e-05, |
|
"loss": 0.2145, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.03600192083419085, |
|
"grad_norm": 0.625, |
|
"learning_rate": 1.5902500000000002e-05, |
|
"loss": 0.2196, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.0362214447417164, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.5877500000000002e-05, |
|
"loss": 0.236, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.036440968649241956, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.58525e-05, |
|
"loss": 0.1927, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.03666049255676751, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.5827500000000004e-05, |
|
"loss": 0.1991, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.036880016464293064, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.58025e-05, |
|
"loss": 0.2282, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.03709954037181862, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.5777500000000003e-05, |
|
"loss": 0.1799, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.03731906427934417, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.5752500000000002e-05, |
|
"loss": 0.1936, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.037538588186869726, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.57275e-05, |
|
"loss": 0.1886, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.03775811209439528, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 1.57025e-05, |
|
"loss": 0.1771, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.037977636001920834, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.56775e-05, |
|
"loss": 0.1853, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.03819715990944639, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.5652500000000003e-05, |
|
"loss": 0.1987, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.03841668381697194, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.5627500000000002e-05, |
|
"loss": 0.2049, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.038636207724497496, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.5602500000000002e-05, |
|
"loss": 0.1691, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.03885573163202305, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.55775e-05, |
|
"loss": 0.1981, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.0390752555395486, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 1.55525e-05, |
|
"loss": 0.1842, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.03929477944707416, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.5527500000000003e-05, |
|
"loss": 0.1696, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.03951430335459971, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.55025e-05, |
|
"loss": 0.2018, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.039733827262125265, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 1.5477500000000002e-05, |
|
"loss": 0.201, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.03995335116965082, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.54525e-05, |
|
"loss": 0.1855, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.04017287507717637, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.54275e-05, |
|
"loss": 0.1679, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.04039239898470193, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.54025e-05, |
|
"loss": 0.1916, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.04061192289222748, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.53775e-05, |
|
"loss": 0.1715, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.040831446799753035, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.5352500000000003e-05, |
|
"loss": 0.1899, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.04105097070727859, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.5327500000000002e-05, |
|
"loss": 0.193, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.04127049461480414, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.53025e-05, |
|
"loss": 0.1822, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.0414900185223297, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 1.52775e-05, |
|
"loss": 0.1502, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.04170954242985525, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.5252500000000002e-05, |
|
"loss": 0.1909, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.041929066337380805, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.5227500000000001e-05, |
|
"loss": 0.1853, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.04214859024490636, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.5202500000000002e-05, |
|
"loss": 0.1905, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.04236811415243191, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.51775e-05, |
|
"loss": 0.1877, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.04258763805995747, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.5152500000000001e-05, |
|
"loss": 0.1947, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.04280716196748302, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.5127500000000002e-05, |
|
"loss": 0.1985, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.043026685875008575, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.5102500000000002e-05, |
|
"loss": 0.1846, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.04324620978253413, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.5077500000000001e-05, |
|
"loss": 0.2175, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.04346573369005968, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.50525e-05, |
|
"loss": 0.203, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.043685257597585236, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 1.5027500000000001e-05, |
|
"loss": 0.1688, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.04390478150511079, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.5002500000000002e-05, |
|
"loss": 0.1951, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.044124305412636344, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.49775e-05, |
|
"loss": 0.1931, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.0443438293201619, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.4952500000000001e-05, |
|
"loss": 0.2017, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.04456335322768745, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.49275e-05, |
|
"loss": 0.2095, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.044782877135213006, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 1.4902500000000002e-05, |
|
"loss": 0.1814, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.04500240104273856, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.48775e-05, |
|
"loss": 0.1912, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.045221924950264114, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.48525e-05, |
|
"loss": 0.1799, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.04544144885778967, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.4827500000000002e-05, |
|
"loss": 0.1754, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.04566097276531522, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.4802500000000003e-05, |
|
"loss": 0.2019, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.045880496672840776, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.47775e-05, |
|
"loss": 0.1767, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.04610002058036633, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.4752500000000001e-05, |
|
"loss": 0.1762, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.046319544487891884, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.4727500000000001e-05, |
|
"loss": 0.2086, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.04653906839541744, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.4702500000000002e-05, |
|
"loss": 0.2083, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.04675859230294299, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.4677500000000003e-05, |
|
"loss": 0.1774, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.046978116210468546, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.46525e-05, |
|
"loss": 0.1908, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.0471976401179941, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.4627500000000002e-05, |
|
"loss": 0.2223, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.047417164025519654, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.4602500000000001e-05, |
|
"loss": 0.2084, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.04763668793304521, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.4577500000000002e-05, |
|
"loss": 0.2029, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.04785621184057076, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.45525e-05, |
|
"loss": 0.1641, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.048075735748096315, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.4527500000000001e-05, |
|
"loss": 0.2053, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.04829525965562187, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.4502500000000002e-05, |
|
"loss": 0.1783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.04851478356314742, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 1.4477500000000002e-05, |
|
"loss": 0.1733, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.04873430747067298, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.4452500000000001e-05, |
|
"loss": 0.1852, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.04895383137819853, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.44275e-05, |
|
"loss": 0.1626, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.049173355285724085, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.4402500000000001e-05, |
|
"loss": 0.1804, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.04939287919324964, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.4377500000000003e-05, |
|
"loss": 0.1895, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.04961240310077519, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.43525e-05, |
|
"loss": 0.1911, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.04983192700830075, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.4327500000000001e-05, |
|
"loss": 0.1903, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.0500514509158263, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.43025e-05, |
|
"loss": 0.2024, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.050270974823351855, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 1.4277500000000002e-05, |
|
"loss": 0.1669, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.05049049873087741, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.42525e-05, |
|
"loss": 0.1819, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.05071002263840296, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.42275e-05, |
|
"loss": 0.1808, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.05092954654592852, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.4202500000000002e-05, |
|
"loss": 0.1809, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.05114907045345407, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.4177500000000001e-05, |
|
"loss": 0.177, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.051368594360979625, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.41525e-05, |
|
"loss": 0.1882, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.05158811826850518, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.41275e-05, |
|
"loss": 0.1705, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.05180764217603073, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.4102500000000001e-05, |
|
"loss": 0.1671, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.05202716608355629, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.4077500000000002e-05, |
|
"loss": 0.1622, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.05224668999108184, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.4052500000000001e-05, |
|
"loss": 0.1847, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.052466213898607394, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.40275e-05, |
|
"loss": 0.1789, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.05268573780613295, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 1.4002500000000002e-05, |
|
"loss": 0.1815, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0529052617136585, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.3977500000000001e-05, |
|
"loss": 0.2352, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.053124785621184056, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 1.3952500000000002e-05, |
|
"loss": 0.1681, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.05334430952870961, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.39275e-05, |
|
"loss": 0.2157, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.053563833436235164, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.3902500000000001e-05, |
|
"loss": 0.1829, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.05378335734376072, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.3877500000000002e-05, |
|
"loss": 0.1778, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.05400288125128627, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 1.3852500000000002e-05, |
|
"loss": 0.2, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.054222405158811826, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.3827500000000001e-05, |
|
"loss": 0.1975, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.05444192906633738, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.38025e-05, |
|
"loss": 0.1732, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.054661452973862934, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.3777500000000001e-05, |
|
"loss": 0.2128, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.05488097688138849, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.3752500000000003e-05, |
|
"loss": 0.2146, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.05510050078891404, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.37275e-05, |
|
"loss": 0.1893, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.055320024696439596, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.3702500000000001e-05, |
|
"loss": 0.2053, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.05553954860396515, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.36775e-05, |
|
"loss": 0.1867, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.055759072511490704, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.3652500000000002e-05, |
|
"loss": 0.2008, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.05597859641901626, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.36275e-05, |
|
"loss": 0.1968, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.05619812032654181, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.36025e-05, |
|
"loss": 0.199, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.056417644234067366, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 1.3577500000000002e-05, |
|
"loss": 0.1708, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.05663716814159292, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.3552500000000001e-05, |
|
"loss": 0.1923, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.056856692049118474, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.35275e-05, |
|
"loss": 0.2035, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.05707621595664403, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.35025e-05, |
|
"loss": 0.2349, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.05729573986416958, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.3477500000000001e-05, |
|
"loss": 0.1965, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.057515263771695135, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.3452500000000002e-05, |
|
"loss": 0.1683, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.05773478767922069, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.3427500000000001e-05, |
|
"loss": 0.2099, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.05795431158674624, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.3402500000000001e-05, |
|
"loss": 0.1802, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.0581738354942718, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 1.33775e-05, |
|
"loss": 0.1875, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.05839335940179735, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.3352500000000001e-05, |
|
"loss": 0.1849, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.058612883309322905, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.3327500000000002e-05, |
|
"loss": 0.1549, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.05883240721684846, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.33025e-05, |
|
"loss": 0.1936, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.05905193112437401, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.3277500000000001e-05, |
|
"loss": 0.1891, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.05927145503189957, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.3252500000000002e-05, |
|
"loss": 0.1879, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.05949097893942512, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 1.3227500000000002e-05, |
|
"loss": 0.1838, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.059710502846950675, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.3202500000000001e-05, |
|
"loss": 0.1807, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.05993002675447623, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 1.31775e-05, |
|
"loss": 0.1519, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.06014955066200178, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.3152500000000002e-05, |
|
"loss": 0.1843, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.06036907456952734, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.3127500000000003e-05, |
|
"loss": 0.2063, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.06058859847705289, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 1.31025e-05, |
|
"loss": 0.1785, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.060808122384578445, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.3077500000000001e-05, |
|
"loss": 0.1707, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.061027646292104, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.30525e-05, |
|
"loss": 0.1761, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.06124717019962955, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.3027500000000002e-05, |
|
"loss": 0.1782, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.061466694107155107, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 1.30025e-05, |
|
"loss": 0.1988, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.06168621801468066, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.29775e-05, |
|
"loss": 0.1701, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.061905741922206214, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.2952500000000002e-05, |
|
"loss": 0.2079, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.06212526582973177, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.2927500000000001e-05, |
|
"loss": 0.1982, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.06234478973725732, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.29025e-05, |
|
"loss": 0.1798, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.06256431364478288, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.28775e-05, |
|
"loss": 0.1582, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.06278383755230843, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.2852500000000001e-05, |
|
"loss": 0.2233, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.06300336145983398, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.2827500000000002e-05, |
|
"loss": 0.1952, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.06322288536735954, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.2802500000000002e-05, |
|
"loss": 0.1854, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.06344240927488509, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.2777500000000001e-05, |
|
"loss": 0.205, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.06366193318241065, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.27525e-05, |
|
"loss": 0.1899, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.0638814570899362, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.2727500000000001e-05, |
|
"loss": 0.1874, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.06410098099746175, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.2702500000000002e-05, |
|
"loss": 0.186, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.06432050490498731, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 1.26775e-05, |
|
"loss": 0.1619, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.06454002881251286, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.2652500000000001e-05, |
|
"loss": 0.1959, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.06475955272003842, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.26275e-05, |
|
"loss": 0.1919, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.06497907662756397, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.2602500000000002e-05, |
|
"loss": 0.1998, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.06519860053508952, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.25775e-05, |
|
"loss": 0.2058, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.06541812444261508, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 1.25525e-05, |
|
"loss": 0.1861, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.06563764835014063, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.2527500000000002e-05, |
|
"loss": 0.2199, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.06585717225766619, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.2502500000000003e-05, |
|
"loss": 0.1804, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06607669616519174, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 1.24775e-05, |
|
"loss": 0.1742, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.0662962200727173, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.2452500000000001e-05, |
|
"loss": 0.1782, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.06651574398024285, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.2427500000000001e-05, |
|
"loss": 0.1822, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.0667352678877684, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.2402500000000002e-05, |
|
"loss": 0.1902, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.06695479179529396, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 1.23775e-05, |
|
"loss": 0.1937, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.06717431570281951, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 1.23525e-05, |
|
"loss": 0.1919, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.06739383961034506, |
|
"grad_norm": 5.625, |
|
"learning_rate": 1.2327500000000002e-05, |
|
"loss": 0.1763, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.06761336351787062, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.2302500000000001e-05, |
|
"loss": 0.1956, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.06783288742539617, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.22775e-05, |
|
"loss": 0.2159, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.06805241133292173, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.22525e-05, |
|
"loss": 0.1775, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.06827193524044728, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.2227500000000001e-05, |
|
"loss": 0.1617, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.06849145914797283, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.2202500000000002e-05, |
|
"loss": 0.1725, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.06871098305549839, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.2177500000000002e-05, |
|
"loss": 0.2037, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.06893050696302394, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.2152500000000001e-05, |
|
"loss": 0.21, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.0691500308705495, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.21275e-05, |
|
"loss": 0.1816, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.06936955477807505, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.2102500000000001e-05, |
|
"loss": 0.1811, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.0695890786856006, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.2077500000000003e-05, |
|
"loss": 0.1659, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.06980860259312616, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.20525e-05, |
|
"loss": 0.1924, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.07002812650065171, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 1.2027500000000001e-05, |
|
"loss": 0.1891, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.07024765040817726, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.20025e-05, |
|
"loss": 0.1836, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.07046717431570282, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.1977500000000002e-05, |
|
"loss": 0.2071, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.07068669822322837, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.19525e-05, |
|
"loss": 0.2144, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.07090622213075393, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.19275e-05, |
|
"loss": 0.1881, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.07112574603827948, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.1902500000000002e-05, |
|
"loss": 0.1751, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.07134526994580503, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.1877500000000001e-05, |
|
"loss": 0.1844, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.07156479385333059, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.18525e-05, |
|
"loss": 0.177, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.07178431776085614, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.18275e-05, |
|
"loss": 0.1831, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.0720038416683817, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.1802500000000001e-05, |
|
"loss": 0.1793, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.07222336557590725, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.1777500000000002e-05, |
|
"loss": 0.1824, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.0724428894834328, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.17525e-05, |
|
"loss": 0.2064, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.07266241339095836, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.17275e-05, |
|
"loss": 0.1824, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.07288193729848391, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 1.1702500000000002e-05, |
|
"loss": 0.1792, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.07310146120600947, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.1677500000000001e-05, |
|
"loss": 0.1882, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.07332098511353502, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.16525e-05, |
|
"loss": 0.1945, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.07354050902106057, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.16275e-05, |
|
"loss": 0.2059, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.07376003292858613, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.1602500000000001e-05, |
|
"loss": 0.2108, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.07397955683611168, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.1577500000000002e-05, |
|
"loss": 0.1833, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.07419908074363724, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.1552500000000002e-05, |
|
"loss": 0.1719, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.07441860465116279, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.1527500000000001e-05, |
|
"loss": 0.191, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.07463812855868834, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.15025e-05, |
|
"loss": 0.1933, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.0748576524662139, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.1477500000000001e-05, |
|
"loss": 0.2048, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.07507717637373945, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.1452500000000003e-05, |
|
"loss": 0.1669, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.075296700281265, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.14275e-05, |
|
"loss": 0.1684, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.07551622418879056, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.1402500000000001e-05, |
|
"loss": 0.1744, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.07573574809631611, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.13775e-05, |
|
"loss": 0.1822, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.07595527200384167, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 1.1352500000000002e-05, |
|
"loss": 0.1808, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.07617479591136722, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.13275e-05, |
|
"loss": 0.1872, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.07639431981889278, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.13025e-05, |
|
"loss": 0.192, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.07661384372641833, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.1277500000000002e-05, |
|
"loss": 0.219, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.07683336763394388, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 1.1252500000000001e-05, |
|
"loss": 0.2029, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.07705289154146944, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.12275e-05, |
|
"loss": 0.1787, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.07727241544899499, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.12025e-05, |
|
"loss": 0.1613, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.07749193935652054, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.1177500000000001e-05, |
|
"loss": 0.1823, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.0777114632640461, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.1152500000000002e-05, |
|
"loss": 0.1823, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.07793098717157165, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.11275e-05, |
|
"loss": 0.194, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.0781505110790972, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.1102500000000001e-05, |
|
"loss": 0.202, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.07837003498662276, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.10775e-05, |
|
"loss": 0.1599, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.07858955889414831, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.1052500000000001e-05, |
|
"loss": 0.183, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.07880908280167387, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.1027499999999999e-05, |
|
"loss": 0.1697, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.07902860670919942, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.10025e-05, |
|
"loss": 0.1872, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.07924813061672498, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.0977500000000001e-05, |
|
"loss": 0.2008, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.07946765452425053, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.0952500000000002e-05, |
|
"loss": 0.1857, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.07968717843177608, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.0927500000000002e-05, |
|
"loss": 0.2013, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.07990670233930164, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.0902500000000001e-05, |
|
"loss": 0.1996, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.08012622624682719, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.08775e-05, |
|
"loss": 0.1866, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.08034575015435275, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.0852500000000002e-05, |
|
"loss": 0.1854, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.0805652740618783, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.0827500000000003e-05, |
|
"loss": 0.2041, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.08078479796940385, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.08025e-05, |
|
"loss": 0.2103, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.08100432187692941, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.0777500000000001e-05, |
|
"loss": 0.1689, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.08122384578445496, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.07525e-05, |
|
"loss": 0.2075, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.08144336969198052, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.0727500000000002e-05, |
|
"loss": 0.1958, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.08166289359950607, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.07025e-05, |
|
"loss": 0.2133, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.08188241750703162, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.06775e-05, |
|
"loss": 0.2199, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.08210194141455718, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.0652500000000002e-05, |
|
"loss": 0.1816, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.08232146532208273, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.0627500000000001e-05, |
|
"loss": 0.1618, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.08254098922960829, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.06025e-05, |
|
"loss": 0.2193, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.08276051313713384, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.05775e-05, |
|
"loss": 0.1626, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.0829800370446594, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.0552500000000001e-05, |
|
"loss": 0.1873, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.08319956095218495, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.0527500000000002e-05, |
|
"loss": 0.2129, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.0834190848597105, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.05025e-05, |
|
"loss": 0.1906, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.08363860876723606, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 1.0477500000000001e-05, |
|
"loss": 0.1711, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.08385813267476161, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.04525e-05, |
|
"loss": 0.1813, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.08407765658228716, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 1.0427500000000001e-05, |
|
"loss": 0.1996, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.08429718048981272, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.0402499999999999e-05, |
|
"loss": 0.1536, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.08451670439733827, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 1.03775e-05, |
|
"loss": 0.1617, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.08473622830486383, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 1.0352500000000001e-05, |
|
"loss": 0.1874, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.08495575221238938, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.03275e-05, |
|
"loss": 0.2133, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.08517527611991493, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.0302500000000002e-05, |
|
"loss": 0.1796, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.08539480002744049, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.02775e-05, |
|
"loss": 0.1698, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.08561432393496604, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 1.02525e-05, |
|
"loss": 0.1938, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0858338478424916, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.0227500000000002e-05, |
|
"loss": 0.1816, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.08605337175001715, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.0202500000000003e-05, |
|
"loss": 0.1677, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.0862728956575427, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.01775e-05, |
|
"loss": 0.1956, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.08649241956506826, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.0152500000000001e-05, |
|
"loss": 0.1769, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.08671194347259381, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.0127500000000001e-05, |
|
"loss": 0.2014, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.08693146738011936, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.0102500000000002e-05, |
|
"loss": 0.2199, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.08715099128764492, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.00775e-05, |
|
"loss": 0.1815, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.08737051519517047, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.00525e-05, |
|
"loss": 0.1797, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.08759003910269603, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.0027500000000002e-05, |
|
"loss": 0.2142, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.08780956301022158, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.0002500000000001e-05, |
|
"loss": 0.2015, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.08802908691774713, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 9.9775e-06, |
|
"loss": 0.1653, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.08824861082527269, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 9.9525e-06, |
|
"loss": 0.1784, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.08846813473279824, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 9.927500000000001e-06, |
|
"loss": 0.1915, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.0886876586403238, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 9.9025e-06, |
|
"loss": 0.1997, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.08890718254784935, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 9.877500000000002e-06, |
|
"loss": 0.1816, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.0891267064553749, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 9.852500000000001e-06, |
|
"loss": 0.1954, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.08934623036290046, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 9.8275e-06, |
|
"loss": 0.1532, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.08956575427042601, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 9.8025e-06, |
|
"loss": 0.2117, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.08978527817795157, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 9.7775e-06, |
|
"loss": 0.1957, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.09000480208547712, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 9.7525e-06, |
|
"loss": 0.1683, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.09022432599300267, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 9.727500000000001e-06, |
|
"loss": 0.202, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.09044384990052823, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 9.7025e-06, |
|
"loss": 0.1806, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.09066337380805378, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 9.6775e-06, |
|
"loss": 0.1941, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.09088289771557934, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 9.652500000000001e-06, |
|
"loss": 0.1824, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.09110242162310489, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 9.6275e-06, |
|
"loss": 0.1489, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.09132194553063044, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 9.602500000000002e-06, |
|
"loss": 0.1578, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.091541469438156, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 9.577500000000001e-06, |
|
"loss": 0.1837, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.09176099334568155, |
|
"grad_norm": 0.75, |
|
"learning_rate": 9.5525e-06, |
|
"loss": 0.1943, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.0919805172532071, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 9.5275e-06, |
|
"loss": 0.1938, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.09220004116073266, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 9.502500000000001e-06, |
|
"loss": 0.1722, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.09241956506825821, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 9.4775e-06, |
|
"loss": 0.1893, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.09263908897578377, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 9.452500000000001e-06, |
|
"loss": 0.1843, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.09285861288330932, |
|
"grad_norm": 0.75, |
|
"learning_rate": 9.4275e-06, |
|
"loss": 0.2091, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.09307813679083488, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 9.402500000000002e-06, |
|
"loss": 0.1744, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.09329766069836043, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 9.377500000000001e-06, |
|
"loss": 0.1934, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.09351718460588598, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 9.3525e-06, |
|
"loss": 0.1982, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.09373670851341154, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 9.3275e-06, |
|
"loss": 0.1705, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.09395623242093709, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 9.302500000000001e-06, |
|
"loss": 0.1701, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.09417575632846265, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 9.2775e-06, |
|
"loss": 0.1956, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.0943952802359882, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 9.252500000000002e-06, |
|
"loss": 0.1792, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.09461480414351375, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 9.227500000000001e-06, |
|
"loss": 0.1744, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.09483432805103931, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 9.2025e-06, |
|
"loss": 0.2185, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.09505385195856486, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 9.1775e-06, |
|
"loss": 0.1655, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.09527337586609042, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 9.152500000000001e-06, |
|
"loss": 0.2083, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.09549289977361597, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 9.1275e-06, |
|
"loss": 0.2039, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.09571242368114152, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 9.102500000000001e-06, |
|
"loss": 0.1923, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.09593194758866708, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 9.0775e-06, |
|
"loss": 0.2013, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.09615147149619263, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 9.0525e-06, |
|
"loss": 0.1856, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.09637099540371818, |
|
"grad_norm": 0.75, |
|
"learning_rate": 9.027500000000001e-06, |
|
"loss": 0.2029, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.09659051931124374, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 9.0025e-06, |
|
"loss": 0.1753, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.09681004321876929, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 8.977500000000002e-06, |
|
"loss": 0.1767, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.09702956712629485, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 8.952500000000001e-06, |
|
"loss": 0.1727, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.0972490910338204, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 8.9275e-06, |
|
"loss": 0.1802, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.09746861494134595, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 8.9025e-06, |
|
"loss": 0.217, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.09768813884887151, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 8.877500000000001e-06, |
|
"loss": 0.1644, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.09790766275639706, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 8.8525e-06, |
|
"loss": 0.195, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.09812718666392262, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 8.827500000000001e-06, |
|
"loss": 0.1872, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.09834671057144817, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 8.802500000000001e-06, |
|
"loss": 0.196, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.09856623447897372, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 8.7775e-06, |
|
"loss": 0.186, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.09878575838649928, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 8.7525e-06, |
|
"loss": 0.1871, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.09900528229402483, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 8.7275e-06, |
|
"loss": 0.189, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.09922480620155039, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 8.7025e-06, |
|
"loss": 0.1596, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.09944433010907594, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 8.677500000000001e-06, |
|
"loss": 0.1781, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.0996638540166015, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 8.6525e-06, |
|
"loss": 0.1879, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.09988337792412705, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 8.627500000000002e-06, |
|
"loss": 0.2002, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.1001029018316526, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 8.602500000000001e-06, |
|
"loss": 0.2004, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.10032242573917816, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 8.5775e-06, |
|
"loss": 0.1844, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.10054194964670371, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 8.5525e-06, |
|
"loss": 0.1701, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.10076147355422926, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 8.527500000000001e-06, |
|
"loss": 0.2176, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.10098099746175482, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 8.5025e-06, |
|
"loss": 0.1736, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.10120052136928037, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 8.477500000000001e-06, |
|
"loss": 0.1813, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.10142004527680593, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 8.4525e-06, |
|
"loss": 0.1878, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.10163956918433148, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 8.4275e-06, |
|
"loss": 0.1796, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.10185909309185703, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 8.402500000000001e-06, |
|
"loss": 0.1702, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.10207861699938259, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 8.3775e-06, |
|
"loss": 0.1689, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.10229814090690814, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 8.352500000000002e-06, |
|
"loss": 0.1922, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.1025176648144337, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 8.327500000000001e-06, |
|
"loss": 0.1813, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.10273718872195925, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 8.3025e-06, |
|
"loss": 0.1486, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.1029567126294848, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 8.2775e-06, |
|
"loss": 0.1606, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.10317623653701036, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 8.252500000000001e-06, |
|
"loss": 0.2014, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.10339576044453591, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 8.2275e-06, |
|
"loss": 0.1757, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.10361528435206147, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 8.202500000000002e-06, |
|
"loss": 0.1895, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.10383480825958702, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 8.177500000000001e-06, |
|
"loss": 0.1753, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.10405433216711257, |
|
"grad_norm": 0.75, |
|
"learning_rate": 8.1525e-06, |
|
"loss": 0.1705, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.10427385607463813, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 8.1275e-06, |
|
"loss": 0.217, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.10449337998216368, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 8.1025e-06, |
|
"loss": 0.2155, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.10471290388968924, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 8.0775e-06, |
|
"loss": 0.1972, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.10493242779721479, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 8.052500000000001e-06, |
|
"loss": 0.1927, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.10515195170474034, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 8.0275e-06, |
|
"loss": 0.2067, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.1053714756122659, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 8.0025e-06, |
|
"loss": 0.1802, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.10559099951979145, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 7.9775e-06, |
|
"loss": 0.1808, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.105810523427317, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 7.9525e-06, |
|
"loss": 0.1641, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.10603004733484256, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 7.9275e-06, |
|
"loss": 0.175, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.10624957124236811, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 7.902500000000001e-06, |
|
"loss": 0.1984, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.10646909514989367, |
|
"grad_norm": 1.0, |
|
"learning_rate": 7.8775e-06, |
|
"loss": 0.182, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.10668861905741922, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 7.852500000000001e-06, |
|
"loss": 0.1785, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.10690814296494477, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 7.827500000000001e-06, |
|
"loss": 0.1917, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.10712766687247033, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 7.8025e-06, |
|
"loss": 0.1747, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.10734719077999588, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 7.777500000000001e-06, |
|
"loss": 0.1806, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.10756671468752144, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 7.7525e-06, |
|
"loss": 0.1672, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.10778623859504699, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 7.727500000000002e-06, |
|
"loss": 0.2223, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.10800576250257254, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 7.702500000000001e-06, |
|
"loss": 0.1946, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.1082252864100981, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 7.6775e-06, |
|
"loss": 0.1682, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.10844481031762365, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 7.6525e-06, |
|
"loss": 0.1494, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.1086643342251492, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 7.627500000000001e-06, |
|
"loss": 0.1856, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.10888385813267476, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 7.6025000000000005e-06, |
|
"loss": 0.1922, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.10910338204020031, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 7.577500000000001e-06, |
|
"loss": 0.1919, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.10932290594772587, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 7.5525e-06, |
|
"loss": 0.1758, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.10954242985525142, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 7.527500000000001e-06, |
|
"loss": 0.2053, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.10976195376277698, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 7.502500000000001e-06, |
|
"loss": 0.1742, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.10998147767030253, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 7.477500000000001e-06, |
|
"loss": 0.2089, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.11020100157782808, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 7.4525e-06, |
|
"loss": 0.1788, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.11042052548535364, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 7.4275000000000005e-06, |
|
"loss": 0.1981, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.11064004939287919, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 7.4025e-06, |
|
"loss": 0.1708, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.11085957330040475, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 7.377500000000001e-06, |
|
"loss": 0.1621, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.1110790972079303, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 7.3525e-06, |
|
"loss": 0.1805, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.11129862111545585, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 7.3275000000000006e-06, |
|
"loss": 0.201, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.11151814502298141, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 7.3025e-06, |
|
"loss": 0.2034, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.11173766893050696, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 7.277500000000001e-06, |
|
"loss": 0.1858, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.11195719283803252, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 7.2525000000000004e-06, |
|
"loss": 0.2008, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.11217671674555807, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 7.227500000000001e-06, |
|
"loss": 0.1954, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.11239624065308362, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 7.2025e-06, |
|
"loss": 0.1638, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.11261576456060918, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 7.1775e-06, |
|
"loss": 0.1845, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.11283528846813473, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 7.152500000000001e-06, |
|
"loss": 0.1953, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.11305481237566029, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 7.127500000000001e-06, |
|
"loss": 0.1937, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.11327433628318584, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 7.102500000000001e-06, |
|
"loss": 0.1736, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.1134938601907114, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 7.0775000000000004e-06, |
|
"loss": 0.2043, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.11371338409823695, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 7.052500000000001e-06, |
|
"loss": 0.1758, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.1139329080057625, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 7.0275e-06, |
|
"loss": 0.1895, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.11415243191328805, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 7.002500000000001e-06, |
|
"loss": 0.2119, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.11437195582081361, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 6.9775000000000005e-06, |
|
"loss": 0.1839, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.11459147972833916, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 6.952500000000001e-06, |
|
"loss": 0.186, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.11481100363586472, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 6.9275e-06, |
|
"loss": 0.1887, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.11503052754339027, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 6.902500000000001e-06, |
|
"loss": 0.1916, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.11525005145091582, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 6.877500000000001e-06, |
|
"loss": 0.1755, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.11546957535844138, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 6.852500000000001e-06, |
|
"loss": 0.2005, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.11568909926596693, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 6.8275e-06, |
|
"loss": 0.229, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.11590862317349249, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 6.8025000000000005e-06, |
|
"loss": 0.1747, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.11612814708101804, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 6.7775e-06, |
|
"loss": 0.1735, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.1163476709885436, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 6.752500000000001e-06, |
|
"loss": 0.1971, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.11656719489606915, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 6.7275e-06, |
|
"loss": 0.1885, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.1167867188035947, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 6.702500000000001e-06, |
|
"loss": 0.1801, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.11700624271112026, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 6.6775e-06, |
|
"loss": 0.2179, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.11722576661864581, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 6.6525e-06, |
|
"loss": 0.1607, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.11744529052617136, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 6.6275e-06, |
|
"loss": 0.1931, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.11766481443369692, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 6.602500000000001e-06, |
|
"loss": 0.2009, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.11788433834122247, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 6.5775e-06, |
|
"loss": 0.1551, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.11810386224874803, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 6.5525e-06, |
|
"loss": 0.1775, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.11832338615627358, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 6.5275000000000015e-06, |
|
"loss": 0.1673, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.11854291006379913, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 6.502500000000001e-06, |
|
"loss": 0.1785, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.11876243397132469, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 6.477500000000001e-06, |
|
"loss": 0.1902, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.11898195787885024, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 6.4525000000000005e-06, |
|
"loss": 0.1794, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.1192014817863758, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 6.427500000000001e-06, |
|
"loss": 0.1954, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.11942100569390135, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 6.4025e-06, |
|
"loss": 0.21, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.1196405296014269, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 6.377500000000001e-06, |
|
"loss": 0.1887, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.11986005350895246, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 6.352500000000001e-06, |
|
"loss": 0.1928, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.12007957741647801, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 6.327500000000001e-06, |
|
"loss": 0.2024, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.12029910132400357, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 6.3025e-06, |
|
"loss": 0.1855, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.12051862523152912, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 6.2775000000000005e-06, |
|
"loss": 0.1693, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.12073814913905467, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 6.2525e-06, |
|
"loss": 0.1777, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12095767304658023, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 6.227500000000001e-06, |
|
"loss": 0.221, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.12117719695410578, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 6.2025e-06, |
|
"loss": 0.1692, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.12139672086163134, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 6.1775000000000006e-06, |
|
"loss": 0.192, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.12161624476915689, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 6.1525e-06, |
|
"loss": 0.1882, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.12183576867668244, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 6.127500000000001e-06, |
|
"loss": 0.2051, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.122055292584208, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 6.1025000000000004e-06, |
|
"loss": 0.2132, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.12227481649173355, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 6.077500000000001e-06, |
|
"loss": 0.1776, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.1224943403992591, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 6.0525e-06, |
|
"loss": 0.2029, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.12271386430678466, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 6.0275e-06, |
|
"loss": 0.209, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.12293338821431021, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 6.0025e-06, |
|
"loss": 0.1967, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.12315291212183577, |
|
"grad_norm": 1.0, |
|
"learning_rate": 5.977500000000001e-06, |
|
"loss": 0.2383, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.12337243602936132, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 5.9525e-06, |
|
"loss": 0.163, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.12359195993688687, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 5.9275e-06, |
|
"loss": 0.196, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.12381148384441243, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 5.902500000000001e-06, |
|
"loss": 0.2017, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.12403100775193798, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 5.8775e-06, |
|
"loss": 0.1859, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.12425053165946354, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 5.852500000000001e-06, |
|
"loss": 0.2002, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.12447005556698909, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 5.8275000000000005e-06, |
|
"loss": 0.1784, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.12468957947451464, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 5.802500000000001e-06, |
|
"loss": 0.1809, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.1249091033820402, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 5.7775e-06, |
|
"loss": 0.1663, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.12512862728956575, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 5.752500000000001e-06, |
|
"loss": 0.173, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.1253481511970913, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 5.727500000000001e-06, |
|
"loss": 0.1743, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.12556767510461686, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 5.702500000000001e-06, |
|
"loss": 0.2364, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.12578719901214241, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 5.6775e-06, |
|
"loss": 0.212, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.12600672291966797, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 5.6525000000000005e-06, |
|
"loss": 0.1757, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.12622624682719352, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 5.6275e-06, |
|
"loss": 0.1868, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.12644577073471908, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 5.602500000000001e-06, |
|
"loss": 0.1901, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.12666529464224463, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 5.5775e-06, |
|
"loss": 0.2302, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.12688481854977018, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 5.552500000000001e-06, |
|
"loss": 0.1639, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.12710434245729574, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 5.5275e-06, |
|
"loss": 0.1778, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.1273238663648213, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 5.5025e-06, |
|
"loss": 0.1727, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.12754339027234685, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 5.4775e-06, |
|
"loss": 0.1731, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.1277629141798724, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 5.452500000000001e-06, |
|
"loss": 0.1762, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.12798243808739795, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 5.4275e-06, |
|
"loss": 0.1874, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.1282019619949235, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 5.4025e-06, |
|
"loss": 0.199, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.12842148590244906, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 5.3775e-06, |
|
"loss": 0.1935, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.12864100980997462, |
|
"grad_norm": 0.875, |
|
"learning_rate": 5.352500000000001e-06, |
|
"loss": 0.1862, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.12886053371750017, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 5.3275e-06, |
|
"loss": 0.1693, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.12908005762502572, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 5.3025000000000005e-06, |
|
"loss": 0.1972, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.12929958153255128, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 5.277500000000001e-06, |
|
"loss": 0.1742, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.12951910544007683, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 5.2525e-06, |
|
"loss": 0.1765, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.12973862934760239, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 5.227500000000001e-06, |
|
"loss": 0.1713, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.12995815325512794, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 5.202500000000001e-06, |
|
"loss": 0.1647, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.1301776771626535, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 5.177500000000001e-06, |
|
"loss": 0.1795, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.13039720107017905, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 5.1525e-06, |
|
"loss": 0.2107, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.1306167249777046, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 5.1275000000000005e-06, |
|
"loss": 0.1919, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.13083624888523016, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 5.1025e-06, |
|
"loss": 0.1755, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.1310557727927557, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 5.077500000000001e-06, |
|
"loss": 0.1673, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.13127529670028126, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 5.0525e-06, |
|
"loss": 0.2152, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.13149482060780682, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 5.0275000000000006e-06, |
|
"loss": 0.2161, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.13171434451533237, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 5.0025e-06, |
|
"loss": 0.1849, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13193386842285793, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 4.977500000000001e-06, |
|
"loss": 0.1786, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.13215339233038348, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 4.9525000000000004e-06, |
|
"loss": 0.1818, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.13237291623790903, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 4.927500000000001e-06, |
|
"loss": 0.182, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.1325924401454346, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 4.902500000000001e-06, |
|
"loss": 0.1778, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.13281196405296014, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 4.8775e-06, |
|
"loss": 0.165, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.1330314879604857, |
|
"grad_norm": 0.875, |
|
"learning_rate": 4.8525000000000006e-06, |
|
"loss": 0.2036, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.13325101186801125, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 4.827500000000001e-06, |
|
"loss": 0.1749, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.1334705357755368, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 4.8025e-06, |
|
"loss": 0.1979, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.13369005968306236, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.7775e-06, |
|
"loss": 0.1883, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.1339095835905879, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 4.752500000000001e-06, |
|
"loss": 0.1742, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.13412910749811346, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 4.7275e-06, |
|
"loss": 0.1704, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.13434863140563902, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 4.7025e-06, |
|
"loss": 0.1963, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.13456815531316457, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 4.6775000000000005e-06, |
|
"loss": 0.1935, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.13478767922069013, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 4.652500000000001e-06, |
|
"loss": 0.1756, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.13500720312821568, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 4.6275e-06, |
|
"loss": 0.1922, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.13522672703574123, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 4.6025e-06, |
|
"loss": 0.2005, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.1354462509432668, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.577500000000001e-06, |
|
"loss": 0.184, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.13566577485079234, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 4.5525e-06, |
|
"loss": 0.1865, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.1358852987583179, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.5275e-06, |
|
"loss": 0.201, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.13610482266584345, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 4.5025000000000005e-06, |
|
"loss": 0.2129, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.136324346573369, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 4.4775e-06, |
|
"loss": 0.2103, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.13654387048089456, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 4.4525e-06, |
|
"loss": 0.1933, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.1367633943884201, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 4.4275e-06, |
|
"loss": 0.1837, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.13698291829594567, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 4.4025e-06, |
|
"loss": 0.1696, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.13720244220347122, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 4.3775e-06, |
|
"loss": 0.1885, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.13742196611099677, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 4.3525e-06, |
|
"loss": 0.175, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.13764149001852233, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 4.3275000000000005e-06, |
|
"loss": 0.1905, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.13786101392604788, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 4.302500000000001e-06, |
|
"loss": 0.1854, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.13808053783357344, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 4.2775e-06, |
|
"loss": 0.2116, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.138300061741099, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 4.2525e-06, |
|
"loss": 0.184, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.13851958564862454, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 4.227500000000001e-06, |
|
"loss": 0.1831, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.1387391095561501, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.202500000000001e-06, |
|
"loss": 0.1765, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.13895863346367565, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 4.1775e-06, |
|
"loss": 0.1742, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.1391781573712012, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 4.1525000000000005e-06, |
|
"loss": 0.2031, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.13939768127872676, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 4.127500000000001e-06, |
|
"loss": 0.1976, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.1396172051862523, |
|
"grad_norm": 0.625, |
|
"learning_rate": 4.1025e-06, |
|
"loss": 0.1701, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.13983672909377787, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 4.0775e-06, |
|
"loss": 0.1827, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.14005625300130342, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 4.052500000000001e-06, |
|
"loss": 0.1767, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.14027577690882898, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.0275e-06, |
|
"loss": 0.1869, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.14049530081635453, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 4.0025e-06, |
|
"loss": 0.1954, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.14071482472388008, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 3.9775000000000005e-06, |
|
"loss": 0.1762, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.14093434863140564, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 3.9525e-06, |
|
"loss": 0.1865, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.1411538725389312, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 3.9275e-06, |
|
"loss": 0.1816, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.14137339644645674, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 3.9025e-06, |
|
"loss": 0.1757, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 3.8775000000000006e-06, |
|
"loss": 0.1836, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.14181244426150785, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 3.8525e-06, |
|
"loss": 0.1953, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.1420319681690334, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 3.8275e-06, |
|
"loss": 0.1605, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.14225149207655896, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 3.8025e-06, |
|
"loss": 0.1865, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.14247101598408451, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 3.7775000000000003e-06, |
|
"loss": 0.1746, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.14269053989161007, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 3.7525e-06, |
|
"loss": 0.1572, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14291006379913562, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 3.7275000000000007e-06, |
|
"loss": 0.1942, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.14312958770666118, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 3.7025000000000005e-06, |
|
"loss": 0.1841, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.14334911161418673, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 3.6775000000000004e-06, |
|
"loss": 0.1964, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.14356863552171228, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 3.6525000000000006e-06, |
|
"loss": 0.198, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.14378815942923784, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.6275000000000004e-06, |
|
"loss": 0.1773, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.1440076833367634, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 3.6025000000000002e-06, |
|
"loss": 0.1699, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.14422720724428895, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 3.5775000000000005e-06, |
|
"loss": 0.2117, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.1444467311518145, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 3.5525000000000003e-06, |
|
"loss": 0.1783, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.14466625505934005, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 3.5275000000000005e-06, |
|
"loss": 0.1608, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.1448857789668656, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 3.5025000000000003e-06, |
|
"loss": 0.1933, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.14510530287439116, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 3.4775e-06, |
|
"loss": 0.2031, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.14532482678191672, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.4525000000000004e-06, |
|
"loss": 0.188, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.14554435068944227, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 3.4275000000000002e-06, |
|
"loss": 0.1767, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.14576387459696782, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 3.4025000000000005e-06, |
|
"loss": 0.1888, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.14598339850449338, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 3.3775000000000003e-06, |
|
"loss": 0.1918, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.14620292241201893, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 3.3525e-06, |
|
"loss": 0.167, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.14642244631954449, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 3.3275000000000003e-06, |
|
"loss": 0.1635, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.14664197022707004, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 3.3025e-06, |
|
"loss": 0.2107, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.1468614941345956, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 3.2775e-06, |
|
"loss": 0.1872, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.14708101804212115, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 3.2525e-06, |
|
"loss": 0.1627, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.1473005419496467, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 3.2275e-06, |
|
"loss": 0.1499, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.14752006585717226, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 3.2025000000000003e-06, |
|
"loss": 0.1921, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.1477395897646978, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 3.1775e-06, |
|
"loss": 0.1811, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.14795911367222336, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 3.1525e-06, |
|
"loss": 0.176, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.14817863757974892, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 3.1275e-06, |
|
"loss": 0.2066, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.14839816148727447, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 3.1025000000000004e-06, |
|
"loss": 0.1424, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.14861768539480003, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 3.0775000000000006e-06, |
|
"loss": 0.1997, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.14883720930232558, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 3.0525000000000004e-06, |
|
"loss": 0.1976, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.14905673320985113, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 3.0275000000000002e-06, |
|
"loss": 0.1596, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.1492762571173767, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 3.0025000000000005e-06, |
|
"loss": 0.1694, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.14949578102490224, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 2.9775000000000003e-06, |
|
"loss": 0.1774, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.1497153049324278, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 2.9525000000000005e-06, |
|
"loss": 0.1849, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.14993482883995335, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 2.9275000000000003e-06, |
|
"loss": 0.2215, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.1501543527474789, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 2.9025e-06, |
|
"loss": 0.1916, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.15037387665500446, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 2.8775000000000004e-06, |
|
"loss": 0.185, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.15059340056253, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 2.8525000000000002e-06, |
|
"loss": 0.1826, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.15081292447005556, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 2.8275e-06, |
|
"loss": 0.1935, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.15103244837758112, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 2.8025000000000003e-06, |
|
"loss": 0.1683, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.15125197228510667, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 2.7775e-06, |
|
"loss": 0.2083, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.15147149619263223, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 2.7525000000000003e-06, |
|
"loss": 0.1656, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.15169102010015778, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 2.7275e-06, |
|
"loss": 0.1748, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.15191054400768333, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 2.7025e-06, |
|
"loss": 0.2087, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.1521300679152089, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 2.6775e-06, |
|
"loss": 0.1721, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.15234959182273444, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 2.6525e-06, |
|
"loss": 0.2098, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.15256911573026, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 2.6275000000000003e-06, |
|
"loss": 0.1765, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.15278863963778555, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 2.6025e-06, |
|
"loss": 0.1839, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.1530081635453111, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 2.5775e-06, |
|
"loss": 0.1648, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.15322768745283666, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 2.5525e-06, |
|
"loss": 0.1808, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.1534472113603622, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 2.5275e-06, |
|
"loss": 0.1903, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.15366673526788777, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 2.5024999999999998e-06, |
|
"loss": 0.1867, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15388625917541332, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 2.4775e-06, |
|
"loss": 0.1942, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.15410578308293887, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 2.4525000000000002e-06, |
|
"loss": 0.1753, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.15432530699046443, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 2.4275e-06, |
|
"loss": 0.1916, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.15454483089798998, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 2.4025000000000003e-06, |
|
"loss": 0.1735, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.15476435480551554, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 2.3775e-06, |
|
"loss": 0.1675, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.1549838787130411, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 2.3525e-06, |
|
"loss": 0.176, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.15520340262056664, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 2.3275e-06, |
|
"loss": 0.1785, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.1554229265280922, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 2.3025000000000004e-06, |
|
"loss": 0.1981, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.15564245043561775, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 2.2775000000000002e-06, |
|
"loss": 0.2026, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.1558619743431433, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 2.2525e-06, |
|
"loss": 0.1676, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.15608149825066886, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 2.2275000000000003e-06, |
|
"loss": 0.1657, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.1563010221581944, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 2.2025e-06, |
|
"loss": 0.1702, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.15652054606571997, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 2.1775000000000003e-06, |
|
"loss": 0.1788, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.15674006997324552, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 2.1525e-06, |
|
"loss": 0.1713, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.15695959388077108, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 2.1275e-06, |
|
"loss": 0.1754, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.15717911778829663, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 2.1025e-06, |
|
"loss": 0.1924, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.15739864169582218, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.0775e-06, |
|
"loss": 0.1997, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.15761816560334774, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 2.0525000000000003e-06, |
|
"loss": 0.1917, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.1578376895108733, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 2.0275000000000005e-06, |
|
"loss": 0.2014, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.15805721341839885, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 2.0025000000000003e-06, |
|
"loss": 0.1756, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.1582767373259244, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 1.9775e-06, |
|
"loss": 0.1767, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.15849626123344995, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.9525000000000004e-06, |
|
"loss": 0.1863, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.1587157851409755, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.9275e-06, |
|
"loss": 0.2036, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.15893530904850106, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.9025000000000002e-06, |
|
"loss": 0.1922, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.15915483295602662, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.8775000000000002e-06, |
|
"loss": 0.1937, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.15937435686355217, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.8525e-06, |
|
"loss": 0.2084, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.15959388077107772, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.8275e-06, |
|
"loss": 0.2239, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.15981340467860328, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.8025000000000001e-06, |
|
"loss": 0.1826, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.16003292858612883, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.7775000000000001e-06, |
|
"loss": 0.1847, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.16025245249365438, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.7525e-06, |
|
"loss": 0.2061, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.16047197640117994, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.7275e-06, |
|
"loss": 0.1872, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.1606915003087055, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 1.7025000000000002e-06, |
|
"loss": 0.1826, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.16091102421623105, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.6775000000000002e-06, |
|
"loss": 0.1821, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.1611305481237566, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.6525000000000003e-06, |
|
"loss": 0.1842, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.16135007203128215, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 1.6275e-06, |
|
"loss": 0.1754, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.1615695959388077, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.6025000000000001e-06, |
|
"loss": 0.1928, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.16178911984633326, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.5775000000000001e-06, |
|
"loss": 0.1871, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.16200864375385882, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.5525000000000002e-06, |
|
"loss": 0.2064, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.16222816766138437, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.5275000000000002e-06, |
|
"loss": 0.2008, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.16244769156890992, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.5025e-06, |
|
"loss": 0.1788, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.16266721547643548, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.4775e-06, |
|
"loss": 0.1762, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.16288673938396103, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.4525e-06, |
|
"loss": 0.1807, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.1631062632914866, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.4275e-06, |
|
"loss": 0.2052, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.16332578719901214, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.4025000000000003e-06, |
|
"loss": 0.1669, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.1635453111065377, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.3775000000000002e-06, |
|
"loss": 0.1858, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.16376483501406325, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 1.3525000000000002e-06, |
|
"loss": 0.1636, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.1639843589215888, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.3275000000000002e-06, |
|
"loss": 0.1912, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.16420388282911436, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.3025000000000002e-06, |
|
"loss": 0.2127, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.1644234067366399, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.2775e-06, |
|
"loss": 0.1856, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.16464293064416546, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.2525e-06, |
|
"loss": 0.1888, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.16486245455169102, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.2275000000000001e-06, |
|
"loss": 0.2093, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.16508197845921657, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.2025000000000001e-06, |
|
"loss": 0.1947, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.16530150236674213, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.1775e-06, |
|
"loss": 0.2203, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.16552102627426768, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.1525000000000002e-06, |
|
"loss": 0.1957, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.16574055018179323, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.1275000000000002e-06, |
|
"loss": 0.1815, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.1659600740893188, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.1025e-06, |
|
"loss": 0.163, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.16617959799684434, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.0775e-06, |
|
"loss": 0.167, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.1663991219043699, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.0525e-06, |
|
"loss": 0.1934, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.16661864581189545, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.0275000000000001e-06, |
|
"loss": 0.1982, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.166838169719421, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.0025000000000001e-06, |
|
"loss": 0.202, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.16705769362694656, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 9.775000000000002e-07, |
|
"loss": 0.1615, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.1672772175344721, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 9.525000000000001e-07, |
|
"loss": 0.2037, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.16749674144199767, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 9.275000000000001e-07, |
|
"loss": 0.2211, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.16771626534952322, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 9.025e-07, |
|
"loss": 0.1871, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.16793578925704877, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 8.775000000000001e-07, |
|
"loss": 0.2264, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.16815531316457433, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 8.525000000000001e-07, |
|
"loss": 0.1951, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.16837483707209988, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 8.275000000000001e-07, |
|
"loss": 0.1819, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.16859436097962543, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 8.025e-07, |
|
"loss": 0.1665, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.168813884887151, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 7.775000000000001e-07, |
|
"loss": 0.1548, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.16903340879467654, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 7.525e-07, |
|
"loss": 0.1992, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.1692529327022021, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 7.275e-07, |
|
"loss": 0.1725, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.16947245660972765, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 7.025000000000002e-07, |
|
"loss": 0.1665, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.1696919805172532, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 6.775000000000001e-07, |
|
"loss": 0.1567, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.16991150442477876, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 6.525000000000001e-07, |
|
"loss": 0.1834, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.1701310283323043, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 6.275e-07, |
|
"loss": 0.1979, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.17035055223982987, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 6.025000000000001e-07, |
|
"loss": 0.2028, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.17057007614735542, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 5.775000000000001e-07, |
|
"loss": 0.181, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.17078960005488097, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 5.525e-07, |
|
"loss": 0.1798, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.17100912396240653, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 5.275e-07, |
|
"loss": 0.1906, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.17122864786993208, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 5.025000000000001e-07, |
|
"loss": 0.1689, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.17144817177745764, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 4.775000000000001e-07, |
|
"loss": 0.1893, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.1716676956849832, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 4.525e-07, |
|
"loss": 0.1728, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.17188721959250874, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 4.275000000000001e-07, |
|
"loss": 0.1853, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.1721067435000343, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 4.0250000000000006e-07, |
|
"loss": 0.1792, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.17232626740755985, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 3.7750000000000004e-07, |
|
"loss": 0.1823, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.1725457913150854, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 3.525e-07, |
|
"loss": 0.1791, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.17276531522261096, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.2750000000000004e-07, |
|
"loss": 0.192, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.17298483913013651, |
|
"grad_norm": 0.875, |
|
"learning_rate": 3.025e-07, |
|
"loss": 0.1608, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.17320436303766207, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 2.7750000000000004e-07, |
|
"loss": 0.1995, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.17342388694518762, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 2.525e-07, |
|
"loss": 0.186, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.17364341085271318, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 2.2750000000000002e-07, |
|
"loss": 0.1908, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.17386293476023873, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 2.0250000000000002e-07, |
|
"loss": 0.1637, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.17408245866776428, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.775e-07, |
|
"loss": 0.1692, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.17430198257528984, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 1.5250000000000002e-07, |
|
"loss": 0.1555, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.1745215064828154, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.275e-07, |
|
"loss": 0.1798, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.17474103039034095, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.0250000000000001e-07, |
|
"loss": 0.2014, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.1749605542978665, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 7.750000000000001e-08, |
|
"loss": 0.1988, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.17518007820539205, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 5.250000000000001e-08, |
|
"loss": 0.2301, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.1753996021129176, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 2.75e-08, |
|
"loss": 0.1639, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.17561912602044316, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 2.5e-09, |
|
"loss": 0.1793, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5267428972077793e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|