|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.968804495252858, |
|
"eval_steps": 500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006458696635019053, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.9988374346056966e-05, |
|
"loss": 1.2716, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0012917393270038106, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.997545695278693e-05, |
|
"loss": 0.5754, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001937608990505716, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 1.996253955951689e-05, |
|
"loss": 0.5303, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0025834786540076212, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.9949622166246855e-05, |
|
"loss": 0.4799, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0032293483175095264, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.9936704772976815e-05, |
|
"loss": 0.5005, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.003875217981011432, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.9923787379706776e-05, |
|
"loss": 0.5047, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004521087644513337, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.991086998643674e-05, |
|
"loss": 0.5359, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0051669573080152425, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.98979525931667e-05, |
|
"loss": 0.5002, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005812826971517148, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.9885035199896665e-05, |
|
"loss": 0.4836, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006458696635019053, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.9872117806626625e-05, |
|
"loss": 0.4985, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0071045662985209585, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.9859200413356586e-05, |
|
"loss": 0.4608, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.007750435962022864, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.9846283020086546e-05, |
|
"loss": 0.4479, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008396305625524769, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.983336562681651e-05, |
|
"loss": 0.4692, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009042175289026674, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.982044823354647e-05, |
|
"loss": 0.4382, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.00968804495252858, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 1.9807530840276435e-05, |
|
"loss": 0.4905, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010333914616030485, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.9794613447006395e-05, |
|
"loss": 0.4983, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01097978427953239, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.9781696053736356e-05, |
|
"loss": 0.4793, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.011625653943034296, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.976877866046632e-05, |
|
"loss": 0.4776, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.012271523606536201, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.975586126719628e-05, |
|
"loss": 0.4828, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.012917393270038106, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.9742943873926245e-05, |
|
"loss": 0.4848, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.013563262933540012, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.9730026480656205e-05, |
|
"loss": 0.456, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.014209132597041917, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.971710908738617e-05, |
|
"loss": 0.4839, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.014855002260543822, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.970419169411613e-05, |
|
"loss": 0.5081, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.015500871924045728, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.969127430084609e-05, |
|
"loss": 0.489, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01614674158754763, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.9678356907576054e-05, |
|
"loss": 0.462, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.016792611251049538, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.9665439514306015e-05, |
|
"loss": 0.4653, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.017438480914551444, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.965252212103598e-05, |
|
"loss": 0.435, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.018084350578053347, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.9639604727765936e-05, |
|
"loss": 0.4497, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.018730220241555254, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.96266873344959e-05, |
|
"loss": 0.4648, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01937608990505716, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.961376994122586e-05, |
|
"loss": 0.4975, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.020021959568559063, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.9600852547955825e-05, |
|
"loss": 0.4695, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02066782923206097, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.9587935154685785e-05, |
|
"loss": 0.4662, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.021313698895562876, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.957501776141575e-05, |
|
"loss": 0.4465, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.02195956855906478, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.956210036814571e-05, |
|
"loss": 0.4287, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.022605438222566686, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.954918297487567e-05, |
|
"loss": 0.4362, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.023251307886068592, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.9536265581605635e-05, |
|
"loss": 0.4555, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.023897177549570495, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.9523348188335595e-05, |
|
"loss": 0.4631, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.024543047213072402, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.951043079506556e-05, |
|
"loss": 0.4535, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02518891687657431, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.949751340179552e-05, |
|
"loss": 0.4533, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02583478654007621, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.948459600852548e-05, |
|
"loss": 0.4782, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.026480656203578118, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.9471678615255444e-05, |
|
"loss": 0.47, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.027126525867080024, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9458761221985405e-05, |
|
"loss": 0.4593, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.027772395530581927, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.944584382871537e-05, |
|
"loss": 0.4724, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.028418265194083834, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.943292643544533e-05, |
|
"loss": 0.426, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.02906413485758574, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.942000904217529e-05, |
|
"loss": 0.3985, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.029710004521087643, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.940709164890525e-05, |
|
"loss": 0.4164, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03035587418458955, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.9394174255635215e-05, |
|
"loss": 0.4387, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.031001743848091456, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.9381256862365175e-05, |
|
"loss": 0.4655, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03164761351159336, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.936833946909514e-05, |
|
"loss": 0.4184, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.03229348317509526, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.93554220758251e-05, |
|
"loss": 0.4113, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03293935283859717, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.934250468255506e-05, |
|
"loss": 0.4463, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.033585222502099076, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.9329587289285024e-05, |
|
"loss": 0.455, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03423109216560098, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.9316669896014985e-05, |
|
"loss": 0.4951, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03487696182910289, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.930375250274495e-05, |
|
"loss": 0.4477, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.035522831492604795, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.929083510947491e-05, |
|
"loss": 0.4095, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.036168701156106695, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.9277917716204874e-05, |
|
"loss": 0.4777, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.0368145708196086, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.9265000322934834e-05, |
|
"loss": 0.4488, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.03746044048311051, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.9252082929664795e-05, |
|
"loss": 0.4323, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.038106310146612414, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.923916553639476e-05, |
|
"loss": 0.4281, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.03875217981011432, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.922624814312472e-05, |
|
"loss": 0.4037, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03939804947361623, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.9213330749854683e-05, |
|
"loss": 0.4093, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04004391913711813, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.920041335658464e-05, |
|
"loss": 0.3905, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04068978880062003, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9187495963314605e-05, |
|
"loss": 0.3851, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.04133565846412194, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.9174578570044565e-05, |
|
"loss": 0.436, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.041981528127623846, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.916166117677453e-05, |
|
"loss": 0.446, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.04262739779112575, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.914874378350449e-05, |
|
"loss": 0.4071, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.04327326745462766, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.9135826390234454e-05, |
|
"loss": 0.4127, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.04391913711812956, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9122908996964414e-05, |
|
"loss": 0.4071, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.044565006781631465, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.9109991603694375e-05, |
|
"loss": 0.4482, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.04521087644513337, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.909707421042434e-05, |
|
"loss": 0.4328, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04585674610863528, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.90841568171543e-05, |
|
"loss": 0.3965, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.046502615772137185, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.9071239423884263e-05, |
|
"loss": 0.4269, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04714848543563909, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.9058322030614224e-05, |
|
"loss": 0.4305, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.04779435509914099, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.9045404637344185e-05, |
|
"loss": 0.4325, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.0484402247626429, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.903248724407415e-05, |
|
"loss": 0.4243, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.049086094426144804, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.901956985080411e-05, |
|
"loss": 0.4072, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.04973196408964671, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.9006652457534073e-05, |
|
"loss": 0.3744, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.05037783375314862, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.8993735064264034e-05, |
|
"loss": 0.4234, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.05102370341665052, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.8980817670993994e-05, |
|
"loss": 0.4271, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.05166957308015242, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.8967900277723955e-05, |
|
"loss": 0.4418, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05231544274365433, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.895498288445392e-05, |
|
"loss": 0.3877, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.052961312407156236, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.894206549118388e-05, |
|
"loss": 0.3883, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.05360718207065814, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.8929148097913844e-05, |
|
"loss": 0.3925, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.05425305173416005, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.8916230704643804e-05, |
|
"loss": 0.4525, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.054898921397661955, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.8903313311373765e-05, |
|
"loss": 0.422, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.055544791061163855, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.889039591810373e-05, |
|
"loss": 0.4227, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.05619066072466576, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.887747852483369e-05, |
|
"loss": 0.4237, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.05683653038816767, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.8864561131563653e-05, |
|
"loss": 0.4033, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.057482400051669574, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.8851643738293614e-05, |
|
"loss": 0.4373, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.05812826971517148, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.8838726345023578e-05, |
|
"loss": 0.4513, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05877413937867338, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.882580895175354e-05, |
|
"loss": 0.3729, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.05942000904217529, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.88128915584835e-05, |
|
"loss": 0.413, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.06006587870567719, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.8799974165213463e-05, |
|
"loss": 0.3959, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.0607117483691791, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.8787056771943424e-05, |
|
"loss": 0.407, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.061357618032681006, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.8774139378673388e-05, |
|
"loss": 0.4124, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.06200348769618291, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.8761221985403345e-05, |
|
"loss": 0.4375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.06264935735968481, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.874830459213331e-05, |
|
"loss": 0.403, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.06329522702318673, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.873538719886327e-05, |
|
"loss": 0.4728, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.06394109668668863, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.8722469805593233e-05, |
|
"loss": 0.4148, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.06458696635019053, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 1.8709552412323194e-05, |
|
"loss": 0.4078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06523283601369244, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.8696635019053158e-05, |
|
"loss": 0.3877, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.06587870567719434, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.868371762578312e-05, |
|
"loss": 0.4138, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.06652457534069625, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.867080023251308e-05, |
|
"loss": 0.4336, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.06717044500419815, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.8657882839243043e-05, |
|
"loss": 0.4201, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.06781631466770006, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.8644965445973004e-05, |
|
"loss": 0.3994, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.06846218433120196, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.8632048052702968e-05, |
|
"loss": 0.4029, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.06910805399470386, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.861913065943293e-05, |
|
"loss": 0.3882, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.06975392365820578, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.860621326616289e-05, |
|
"loss": 0.3975, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.07039979332170768, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.8593295872892853e-05, |
|
"loss": 0.4082, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.07104566298520959, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.8580378479622814e-05, |
|
"loss": 0.4019, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07169153264871149, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.8567461086352778e-05, |
|
"loss": 0.4039, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.07233740231221339, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.8554543693082738e-05, |
|
"loss": 0.3988, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.0729832719757153, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.85416262998127e-05, |
|
"loss": 0.3993, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.0736291416392172, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.852870890654266e-05, |
|
"loss": 0.3876, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.07427501130271912, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.8515791513272623e-05, |
|
"loss": 0.3997, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.07492088096622102, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.8502874120002584e-05, |
|
"loss": 0.4093, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.07556675062972293, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.8489956726732548e-05, |
|
"loss": 0.4015, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.07621262029322483, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.847703933346251e-05, |
|
"loss": 0.3889, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.07685848995672673, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.846412194019247e-05, |
|
"loss": 0.4231, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.07750435962022864, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.8451204546922433e-05, |
|
"loss": 0.4312, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07815022928373054, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.8438287153652394e-05, |
|
"loss": 0.4293, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.07879609894723245, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.8425369760382358e-05, |
|
"loss": 0.4149, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.07944196861073435, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.8412452367112318e-05, |
|
"loss": 0.3975, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.08008783827423625, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.839953497384228e-05, |
|
"loss": 0.3924, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.08073370793773817, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.8386617580572243e-05, |
|
"loss": 0.3891, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.08137957760124007, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.8373700187302203e-05, |
|
"loss": 0.4134, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.08202544726474198, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.8360782794032167e-05, |
|
"loss": 0.404, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.08267131692824388, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.8347865400762128e-05, |
|
"loss": 0.3847, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.08331718659174578, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.8334948007492092e-05, |
|
"loss": 0.4006, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.08396305625524769, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.832203061422205e-05, |
|
"loss": 0.3921, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08460892591874959, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 1.8309113220952013e-05, |
|
"loss": 0.4274, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.0852547955822515, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.8296195827681974e-05, |
|
"loss": 0.3612, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.0859006652457534, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.8283278434411938e-05, |
|
"loss": 0.4426, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.08654653490925532, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.82703610411419e-05, |
|
"loss": 0.424, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.08719240457275722, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.8257443647871862e-05, |
|
"loss": 0.3938, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.08783827423625912, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.8244526254601823e-05, |
|
"loss": 0.3766, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.08848414389976103, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.8231608861331784e-05, |
|
"loss": 0.4252, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.08913001356326293, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.8218691468061748e-05, |
|
"loss": 0.3753, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.08977588322676484, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.8205774074791708e-05, |
|
"loss": 0.3706, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.09042175289026674, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.8192856681521672e-05, |
|
"loss": 0.436, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.09106762255376864, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.8179939288251633e-05, |
|
"loss": 0.3956, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.09171349221727056, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.8167021894981593e-05, |
|
"loss": 0.3947, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.09235936188077246, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.8154104501711557e-05, |
|
"loss": 0.3785, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.09300523154427437, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.8141187108441518e-05, |
|
"loss": 0.391, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.09365110120777627, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.8128269715171482e-05, |
|
"loss": 0.3503, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.09429697087127818, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.8115352321901442e-05, |
|
"loss": 0.3873, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.09494284053478008, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.8102434928631403e-05, |
|
"loss": 0.4106, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.09558871019828198, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.8089517535361364e-05, |
|
"loss": 0.382, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.0962345798617839, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.8076600142091328e-05, |
|
"loss": 0.3992, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.0968804495252858, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.8063682748821288e-05, |
|
"loss": 0.4325, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09752631918878771, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.8050765355551252e-05, |
|
"loss": 0.3857, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.09817218885228961, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.8037847962281213e-05, |
|
"loss": 0.3752, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.09881805851579151, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.8024930569011173e-05, |
|
"loss": 0.382, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.09946392817929342, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.8012013175741137e-05, |
|
"loss": 0.4044, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.10010979784279532, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.7999095782471098e-05, |
|
"loss": 0.4115, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.10075566750629723, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.7986178389201062e-05, |
|
"loss": 0.3554, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.10140153716979913, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.7973260995931023e-05, |
|
"loss": 0.4131, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.10204740683330105, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.7960343602660983e-05, |
|
"loss": 0.3967, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.10269327649680295, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.7947426209390947e-05, |
|
"loss": 0.4074, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.10333914616030485, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.7934508816120908e-05, |
|
"loss": 0.3977, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.10398501582380676, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.7921591422850872e-05, |
|
"loss": 0.3739, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.10463088548730866, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.7908674029580832e-05, |
|
"loss": 0.3866, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.10527675515081057, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.7895756636310796e-05, |
|
"loss": 0.4252, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.10592262481431247, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.7882839243040754e-05, |
|
"loss": 0.4244, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.10656849447781437, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.7869921849770718e-05, |
|
"loss": 0.3704, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.10721436414131628, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.7857004456500678e-05, |
|
"loss": 0.3986, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.10786023380481818, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.7844087063230642e-05, |
|
"loss": 0.3588, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.1085061034683201, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.7831169669960603e-05, |
|
"loss": 0.3863, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.109151973131822, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.7818252276690567e-05, |
|
"loss": 0.3947, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.10979784279532391, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.7805334883420527e-05, |
|
"loss": 0.3786, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.11044371245882581, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.7792417490150488e-05, |
|
"loss": 0.401, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.11108958212232771, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.7779500096880452e-05, |
|
"loss": 0.3877, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.11173545178582962, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.7766582703610413e-05, |
|
"loss": 0.3789, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.11238132144933152, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.7753665310340376e-05, |
|
"loss": 0.3633, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.11302719111283344, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.7740747917070337e-05, |
|
"loss": 0.3516, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.11367306077633534, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.7727830523800298e-05, |
|
"loss": 0.3654, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.11431893043983724, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.771491313053026e-05, |
|
"loss": 0.4036, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.11496480010333915, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.7701995737260222e-05, |
|
"loss": 0.3714, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.11561066976684105, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.7689078343990186e-05, |
|
"loss": 0.3859, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.11625653943034296, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.7676160950720147e-05, |
|
"loss": 0.3662, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11690240909384486, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.7663243557450107e-05, |
|
"loss": 0.3789, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.11754827875734676, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.7650326164180068e-05, |
|
"loss": 0.381, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.11819414842084867, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.7637408770910032e-05, |
|
"loss": 0.3797, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.11884001808435057, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.7624491377639993e-05, |
|
"loss": 0.3987, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.11948588774785249, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.7611573984369957e-05, |
|
"loss": 0.3614, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.12013175741135439, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.7598656591099917e-05, |
|
"loss": 0.3693, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.1207776270748563, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.7585739197829878e-05, |
|
"loss": 0.4046, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.1214234967383582, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.7572821804559842e-05, |
|
"loss": 0.3848, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.1220693664018601, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.7559904411289802e-05, |
|
"loss": 0.3783, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.12271523606536201, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.7546987018019766e-05, |
|
"loss": 0.3475, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.12336110572886391, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.7534069624749727e-05, |
|
"loss": 0.379, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.12400697539236583, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.7521152231479688e-05, |
|
"loss": 0.3901, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.12465284505586773, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.750823483820965e-05, |
|
"loss": 0.3238, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.12529871471936962, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.7495317444939612e-05, |
|
"loss": 0.3564, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.12594458438287154, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.7482400051669576e-05, |
|
"loss": 0.374, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.12659045404637345, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.7469482658399537e-05, |
|
"loss": 0.3689, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.12723632370987534, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.74565652651295e-05, |
|
"loss": 0.3852, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.12788219337337725, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.7443647871859458e-05, |
|
"loss": 0.3713, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.12852806303687916, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.7430730478589422e-05, |
|
"loss": 0.3635, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.12917393270038105, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7417813085319383e-05, |
|
"loss": 0.3597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12981980236388296, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.7404895692049346e-05, |
|
"loss": 0.363, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.13046567202738488, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.7391978298779307e-05, |
|
"loss": 0.3515, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.1311115416908868, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.737906090550927e-05, |
|
"loss": 0.3909, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.13175741135438868, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.736614351223923e-05, |
|
"loss": 0.3673, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.1324032810178906, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.7353226118969192e-05, |
|
"loss": 0.3656, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.1330491506813925, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.7340308725699156e-05, |
|
"loss": 0.3491, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.1336950203448944, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.7327391332429117e-05, |
|
"loss": 0.3741, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.1343408900083963, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.731447393915908e-05, |
|
"loss": 0.3811, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.13498675967189822, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.730155654588904e-05, |
|
"loss": 0.361, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.13563262933540013, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.7288639152619002e-05, |
|
"loss": 0.3211, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.13627849899890201, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.7275721759348966e-05, |
|
"loss": 0.3374, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.13692436866240393, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.7262804366078927e-05, |
|
"loss": 0.3585, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.13757023832590584, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.724988697280889e-05, |
|
"loss": 0.3484, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.13821610798940773, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.723696957953885e-05, |
|
"loss": 0.3619, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.13886197765290964, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.7224052186268812e-05, |
|
"loss": 0.3664, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.13950784731641155, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.7211134792998772e-05, |
|
"loss": 0.3225, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.14015371697991344, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.7198217399728736e-05, |
|
"loss": 0.3962, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.14079958664341535, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.7185300006458697e-05, |
|
"loss": 0.3453, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.14144545630691727, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.717238261318866e-05, |
|
"loss": 0.3801, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.14209132597041918, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.715946521991862e-05, |
|
"loss": 0.3448, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.14273719563392107, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.7146547826648582e-05, |
|
"loss": 0.337, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.14338306529742298, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.7133630433378546e-05, |
|
"loss": 0.3336, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.1440289349609249, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.7120713040108507e-05, |
|
"loss": 0.3712, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.14467480462442678, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.710779564683847e-05, |
|
"loss": 0.3873, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.1453206742879287, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.709487825356843e-05, |
|
"loss": 0.3807, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.1459665439514306, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.7081960860298392e-05, |
|
"loss": 0.3768, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.14661241361493252, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.7069043467028356e-05, |
|
"loss": 0.3609, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.1472582832784344, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.7056126073758316e-05, |
|
"loss": 0.3956, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.14790415294193632, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.704320868048828e-05, |
|
"loss": 0.348, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.14855002260543823, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.703029128721824e-05, |
|
"loss": 0.3529, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.14919589226894012, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.7017373893948205e-05, |
|
"loss": 0.374, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.14984176193244203, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.7004456500678162e-05, |
|
"loss": 0.3567, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.15048763159594394, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.6991539107408126e-05, |
|
"loss": 0.3336, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.15113350125944586, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.6978621714138087e-05, |
|
"loss": 0.3622, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.15177937092294774, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.696570432086805e-05, |
|
"loss": 0.3463, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.15242524058644966, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.695278692759801e-05, |
|
"loss": 0.3618, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.15307111024995157, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.6939869534327975e-05, |
|
"loss": 0.361, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.15371697991345346, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.6926952141057936e-05, |
|
"loss": 0.3525, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.15436284957695537, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.6914034747787897e-05, |
|
"loss": 0.3987, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.15500871924045728, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.690111735451786e-05, |
|
"loss": 0.3815, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15565458890395917, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.688819996124782e-05, |
|
"loss": 0.3661, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.15630045856746108, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.6875282567977785e-05, |
|
"loss": 0.3854, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.156946328230963, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.6862365174707746e-05, |
|
"loss": 0.3461, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.1575921978944649, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.6849447781437706e-05, |
|
"loss": 0.3339, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.1582380675579668, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.683653038816767e-05, |
|
"loss": 0.3447, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.1588839372214687, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.682361299489763e-05, |
|
"loss": 0.3569, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.15952980688497062, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.6810695601627595e-05, |
|
"loss": 0.3774, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.1601756765484725, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.6797778208357556e-05, |
|
"loss": 0.3591, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.16082154621197442, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.6784860815087516e-05, |
|
"loss": 0.353, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.16146741587547633, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.6771943421817477e-05, |
|
"loss": 0.3687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16211328553897825, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.675902602854744e-05, |
|
"loss": 0.3569, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.16275915520248013, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.67461086352774e-05, |
|
"loss": 0.3871, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.16340502486598205, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.6733191242007365e-05, |
|
"loss": 0.3491, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.16405089452948396, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.6720273848737326e-05, |
|
"loss": 0.3472, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.16469676419298585, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.6707356455467287e-05, |
|
"loss": 0.3888, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.16534263385648776, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.669443906219725e-05, |
|
"loss": 0.3699, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.16598850351998967, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.668152166892721e-05, |
|
"loss": 0.3709, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.16663437318349156, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.6668604275657175e-05, |
|
"loss": 0.3407, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.16728024284699347, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.6655686882387136e-05, |
|
"loss": 0.3095, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.16792611251049538, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.6642769489117096e-05, |
|
"loss": 0.3539, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1685719821739973, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.662985209584706e-05, |
|
"loss": 0.3644, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.16921785183749918, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.661693470257702e-05, |
|
"loss": 0.338, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.1698637215010011, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.6604017309306985e-05, |
|
"loss": 0.3815, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.170509591164503, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.6591099916036945e-05, |
|
"loss": 0.3428, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.1711554608280049, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.657818252276691e-05, |
|
"loss": 0.3627, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.1718013304915068, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.6565265129496867e-05, |
|
"loss": 0.339, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.17244720015500872, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.655234773622683e-05, |
|
"loss": 0.3156, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.17309306981851064, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.653943034295679e-05, |
|
"loss": 0.3778, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.17373893948201252, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.6526512949686755e-05, |
|
"loss": 0.3228, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.17438480914551444, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.6513595556416716e-05, |
|
"loss": 0.3097, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.17503067880901635, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.6500678163146676e-05, |
|
"loss": 0.3373, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.17567654847251823, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.648776076987664e-05, |
|
"loss": 0.2963, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.17632241813602015, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.64748433766066e-05, |
|
"loss": 0.3356, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.17696828779952206, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.6461925983336565e-05, |
|
"loss": 0.3537, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.17761415746302398, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.6449008590066526e-05, |
|
"loss": 0.3363, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.17826002712652586, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.643609119679649e-05, |
|
"loss": 0.3279, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.17890589679002777, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.642317380352645e-05, |
|
"loss": 0.3453, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.1795517664535297, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.641025641025641e-05, |
|
"loss": 0.3551, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.18019763611703157, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.6397339016986375e-05, |
|
"loss": 0.3309, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.1808435057805335, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.6384421623716335e-05, |
|
"loss": 0.3477, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1814893754440354, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.63715042304463e-05, |
|
"loss": 0.3189, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.18213524510753729, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.635858683717626e-05, |
|
"loss": 0.3501, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.1827811147710392, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.634566944390622e-05, |
|
"loss": 0.376, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.1834269844345411, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.633275205063618e-05, |
|
"loss": 0.3182, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.18407285409804303, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.6319834657366145e-05, |
|
"loss": 0.348, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.1847187237615449, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.6306917264096106e-05, |
|
"loss": 0.3668, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.18536459342504683, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.629399987082607e-05, |
|
"loss": 0.3126, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.18601046308854874, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.628108247755603e-05, |
|
"loss": 0.3166, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.18665633275205062, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.626816508428599e-05, |
|
"loss": 0.3214, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.18730220241555254, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.6255247691015955e-05, |
|
"loss": 0.3319, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.18794807207905445, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.6242330297745915e-05, |
|
"loss": 0.3513, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.18859394174255636, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.622941290447588e-05, |
|
"loss": 0.3384, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.18923981140605825, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.621649551120584e-05, |
|
"loss": 0.3425, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.18988568106956016, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.62035781179358e-05, |
|
"loss": 0.3595, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.19053155073306208, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.6190660724665765e-05, |
|
"loss": 0.316, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.19117742039656396, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.6177743331395725e-05, |
|
"loss": 0.3667, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.19182329006006588, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.616482593812569e-05, |
|
"loss": 0.3272, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.1924691597235678, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.615190854485565e-05, |
|
"loss": 0.3412, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.19311502938706968, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.6138991151585614e-05, |
|
"loss": 0.3202, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.1937608990505716, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.612607375831557e-05, |
|
"loss": 0.3119, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1944067687140735, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.6113156365045535e-05, |
|
"loss": 0.3172, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.19505263837757542, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.6100238971775496e-05, |
|
"loss": 0.3259, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.1956985080410773, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.608732157850546e-05, |
|
"loss": 0.3456, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.19634437770457921, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.607440418523542e-05, |
|
"loss": 0.3434, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.19699024736808113, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.606148679196538e-05, |
|
"loss": 0.3182, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.19763611703158301, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.6048569398695345e-05, |
|
"loss": 0.3398, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.19828198669508493, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.6035652005425305e-05, |
|
"loss": 0.3491, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.19892785635858684, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.602273461215527e-05, |
|
"loss": 0.3299, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.19957372602208875, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.600981721888523e-05, |
|
"loss": 0.3862, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.20021959568559064, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.5996899825615194e-05, |
|
"loss": 0.3252, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.20086546534909255, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.5983982432345154e-05, |
|
"loss": 0.3303, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.20151133501259447, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.5971065039075115e-05, |
|
"loss": 0.293, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.20215720467609635, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.595814764580508e-05, |
|
"loss": 0.3045, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.20280307433959827, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.594523025253504e-05, |
|
"loss": 0.3352, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.20344894400310018, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.5932312859265004e-05, |
|
"loss": 0.3099, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.2040948136666021, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.5919395465994964e-05, |
|
"loss": 0.3435, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.20474068333010398, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.5906478072724925e-05, |
|
"loss": 0.3232, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.2053865529936059, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.5893560679454885e-05, |
|
"loss": 0.3215, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.2060324226571078, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.588064328618485e-05, |
|
"loss": 0.2984, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.2066782923206097, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.586772589291481e-05, |
|
"loss": 0.3075, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.2073241619841116, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.5854808499644774e-05, |
|
"loss": 0.3388, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.20797003164761352, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.5841891106374735e-05, |
|
"loss": 0.3372, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.2086159013111154, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.5828973713104695e-05, |
|
"loss": 0.3434, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.20926177097461732, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.581605631983466e-05, |
|
"loss": 0.3172, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.20990764063811923, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.580313892656462e-05, |
|
"loss": 0.2827, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.21055351030162114, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 1.5790221533294584e-05, |
|
"loss": 0.3381, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.21119937996512303, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.5777304140024544e-05, |
|
"loss": 0.3342, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.21184524962862494, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.5764386746754505e-05, |
|
"loss": 0.325, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.21249111929212686, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.575146935348447e-05, |
|
"loss": 0.3414, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.21313698895562874, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 1.573855196021443e-05, |
|
"loss": 0.3553, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.21378285861913066, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.5725634566944394e-05, |
|
"loss": 0.3177, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.21442872828263257, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.5712717173674354e-05, |
|
"loss": 0.3091, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.21507459794613448, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.5699799780404318e-05, |
|
"loss": 0.3097, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.21572046760963637, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.5686882387134275e-05, |
|
"loss": 0.3581, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.21636633727313828, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.567396499386424e-05, |
|
"loss": 0.3502, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.2170122069366402, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.56610476005942e-05, |
|
"loss": 0.3187, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.21765807660014208, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.5648130207324164e-05, |
|
"loss": 0.3103, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.218303946263644, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.5635212814054124e-05, |
|
"loss": 0.3385, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.2189498159271459, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.5622295420784085e-05, |
|
"loss": 0.3202, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.21959568559064782, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.560937802751405e-05, |
|
"loss": 0.3226, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.2202415552541497, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.559646063424401e-05, |
|
"loss": 0.309, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.22088742491765162, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.5583543240973974e-05, |
|
"loss": 0.359, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.22153329458115353, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.5570625847703934e-05, |
|
"loss": 0.3195, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.22217916424465542, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.5557708454433898e-05, |
|
"loss": 0.3031, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.22282503390815733, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.554479106116386e-05, |
|
"loss": 0.347, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.22347090357165925, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.553187366789382e-05, |
|
"loss": 0.3115, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.22411677323516113, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.5518956274623783e-05, |
|
"loss": 0.3012, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.22476264289866305, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.5506038881353744e-05, |
|
"loss": 0.343, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.22540851256216496, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.5493121488083708e-05, |
|
"loss": 0.3255, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.22605438222566687, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.548020409481367e-05, |
|
"loss": 0.3245, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22670025188916876, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.546728670154363e-05, |
|
"loss": 0.3372, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.22734612155267067, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.545436930827359e-05, |
|
"loss": 0.3241, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.22799199121617258, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.5441451915003554e-05, |
|
"loss": 0.3369, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.22863786087967447, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.5428534521733514e-05, |
|
"loss": 0.3031, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.22928373054317638, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.541561712846348e-05, |
|
"loss": 0.3163, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.2299296002066783, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.540269973519344e-05, |
|
"loss": 0.3225, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.2305754698701802, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.53897823419234e-05, |
|
"loss": 0.2971, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.2312213395336821, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.5376864948653364e-05, |
|
"loss": 0.2844, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.231867209197184, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.5363947555383324e-05, |
|
"loss": 0.3211, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.23251307886068592, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.5351030162113288e-05, |
|
"loss": 0.338, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2331589485241878, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.533811276884325e-05, |
|
"loss": 0.3553, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.23380481818768972, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 1.532519537557321e-05, |
|
"loss": 0.31, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.23445068785119164, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.5312277982303173e-05, |
|
"loss": 0.3114, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.23509655751469352, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.5299360589033134e-05, |
|
"loss": 0.3469, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.23574242717819544, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.5286443195763098e-05, |
|
"loss": 0.3169, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.23638829684169735, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.527352580249306e-05, |
|
"loss": 0.3241, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.23703416650519926, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.5260608409223022e-05, |
|
"loss": 0.3186, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.23768003616870115, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.5247691015952981e-05, |
|
"loss": 0.3087, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.23832590583220306, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.5234773622682944e-05, |
|
"loss": 0.3515, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.23897177549570497, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.5221856229412906e-05, |
|
"loss": 0.3323, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.23961764515920686, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.5208938836142868e-05, |
|
"loss": 0.3137, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.24026351482270877, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.519602144287283e-05, |
|
"loss": 0.321, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.2409093844862107, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 1.5183104049602791e-05, |
|
"loss": 0.3145, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.2415552541497126, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.5170186656332753e-05, |
|
"loss": 0.3122, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.2422011238132145, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.5157269263062716e-05, |
|
"loss": 0.306, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2428469934767164, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.5144351869792678e-05, |
|
"loss": 0.2876, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.2434928631402183, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.513143447652264e-05, |
|
"loss": 0.3036, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.2441387328037202, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.5118517083252603e-05, |
|
"loss": 0.3037, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.2447846024672221, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.5105599689982561e-05, |
|
"loss": 0.347, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.24543047213072403, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.5092682296712524e-05, |
|
"loss": 0.3208, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.24607634179422594, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.5079764903442486e-05, |
|
"loss": 0.3294, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.24672221145772782, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.5066847510172448e-05, |
|
"loss": 0.3173, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.24736808112122974, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.505393011690241e-05, |
|
"loss": 0.3107, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.24801395078473165, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.5041012723632373e-05, |
|
"loss": 0.2944, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.24865982044823354, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.5028095330362334e-05, |
|
"loss": 0.329, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.24930569011173545, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.5015177937092296e-05, |
|
"loss": 0.2837, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.24995155977523736, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.5002260543822258e-05, |
|
"loss": 0.2982, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.25059742943873925, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.498934315055222e-05, |
|
"loss": 0.346, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.2512432991022412, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.4976425757282183e-05, |
|
"loss": 0.2859, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.2518891687657431, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.4963508364012143e-05, |
|
"loss": 0.3087, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.25253503842924496, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.4950590970742106e-05, |
|
"loss": 0.3128, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.2531809080927469, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.4937673577472068e-05, |
|
"loss": 0.3046, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.2538267777562488, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.492475618420203e-05, |
|
"loss": 0.2898, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.2544726474197507, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.4911838790931992e-05, |
|
"loss": 0.3, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.2551185170832526, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.4898921397661955e-05, |
|
"loss": 0.2813, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.2557643867467545, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.4886004004391914e-05, |
|
"loss": 0.3018, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.4873086611121876e-05, |
|
"loss": 0.3272, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.25705612607375833, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.4860169217851838e-05, |
|
"loss": 0.3298, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.2577019957372602, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.48472518245818e-05, |
|
"loss": 0.2867, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.2583478654007621, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.4834334431311763e-05, |
|
"loss": 0.2922, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25899373506426404, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.4821417038041725e-05, |
|
"loss": 0.2894, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.2596396047277659, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.4808499644771686e-05, |
|
"loss": 0.2941, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.26028547439126787, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.4795582251501648e-05, |
|
"loss": 0.3188, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.26093134405476975, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.478266485823161e-05, |
|
"loss": 0.3216, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.26157721371827164, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.4769747464961573e-05, |
|
"loss": 0.3011, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.2622230833817736, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.4756830071691535e-05, |
|
"loss": 0.2879, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.26286895304527547, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.4743912678421495e-05, |
|
"loss": 0.3318, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.26351482270877735, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.4730995285151458e-05, |
|
"loss": 0.3285, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.2641606923722793, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.471807789188142e-05, |
|
"loss": 0.3014, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.2648065620357812, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.4705160498611382e-05, |
|
"loss": 0.3336, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.26545243169928306, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.4692243105341345e-05, |
|
"loss": 0.3544, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.266098301362785, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.4679325712071307e-05, |
|
"loss": 0.3025, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.2667441710262869, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.4666408318801266e-05, |
|
"loss": 0.2966, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.2673900406897888, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.4653490925531228e-05, |
|
"loss": 0.2699, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.2680359103532907, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.464057353226119e-05, |
|
"loss": 0.2826, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.2686817800167926, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.4627656138991153e-05, |
|
"loss": 0.3193, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.2693276496802945, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.4614738745721115e-05, |
|
"loss": 0.296, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.26997351934379643, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.4601821352451076e-05, |
|
"loss": 0.2911, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.2706193890072983, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.4588903959181038e-05, |
|
"loss": 0.2901, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.27126525867080026, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.4575986565911e-05, |
|
"loss": 0.3118, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.27191112833430214, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.4563069172640962e-05, |
|
"loss": 0.2657, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.27255699799780403, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.4550151779370925e-05, |
|
"loss": 0.3434, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.27320286766130597, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.4537234386100887e-05, |
|
"loss": 0.3357, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.27384873732480786, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.4524316992830848e-05, |
|
"loss": 0.2931, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.27449460698830974, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.451139959956081e-05, |
|
"loss": 0.2707, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.2751404766518117, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 1.4498482206290772e-05, |
|
"loss": 0.279, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.27578634631531357, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.4485564813020735e-05, |
|
"loss": 0.2904, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.27643221597881545, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 1.4472647419750697e-05, |
|
"loss": 0.3024, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.2770780856423174, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.4459730026480659e-05, |
|
"loss": 0.2895, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.2777239553058193, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.4446812633210618e-05, |
|
"loss": 0.3145, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.27836982496932117, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.443389523994058e-05, |
|
"loss": 0.2871, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.2790156946328231, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.4420977846670543e-05, |
|
"loss": 0.3401, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.279661564296325, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.4408060453400505e-05, |
|
"loss": 0.2879, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.2803074339598269, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.4395143060130467e-05, |
|
"loss": 0.2919, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.2809533036233288, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.4382225666860428e-05, |
|
"loss": 0.2694, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.2815991732868307, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.436930827359039e-05, |
|
"loss": 0.2994, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.28224504295033265, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.4356390880320352e-05, |
|
"loss": 0.2842, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.28289091261383453, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.4343473487050315e-05, |
|
"loss": 0.2762, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.2835367822773364, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.4330556093780277e-05, |
|
"loss": 0.3197, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.28418265194083836, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.431763870051024e-05, |
|
"loss": 0.3159, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.28482852160434025, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.43047213072402e-05, |
|
"loss": 0.3014, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.28547439126784213, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.4291803913970162e-05, |
|
"loss": 0.2796, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.2861202609313441, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 1.4278886520700124e-05, |
|
"loss": 0.2716, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.28676613059484596, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.4265969127430087e-05, |
|
"loss": 0.2985, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.28741200025834784, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.4253051734160049e-05, |
|
"loss": 0.2737, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.2880578699218498, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.4240134340890011e-05, |
|
"loss": 0.2981, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.28870373958535167, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.422721694761997e-05, |
|
"loss": 0.306, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.28934960924885356, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.4214299554349932e-05, |
|
"loss": 0.2849, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2899954789123555, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.4201382161079895e-05, |
|
"loss": 0.2926, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.2906413485758574, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.4188464767809857e-05, |
|
"loss": 0.2816, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2912872182393593, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.417554737453982e-05, |
|
"loss": 0.282, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.2919330879028612, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.416262998126978e-05, |
|
"loss": 0.3208, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.2925789575663631, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.4149712587999742e-05, |
|
"loss": 0.2701, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.29322482722986504, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.4136795194729705e-05, |
|
"loss": 0.3363, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.2938706968933669, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 1.4123877801459667e-05, |
|
"loss": 0.2998, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.2945165665568688, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.4110960408189629e-05, |
|
"loss": 0.2947, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.29516243622037075, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.4098043014919591e-05, |
|
"loss": 0.3119, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.29580830588387264, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.4085125621649552e-05, |
|
"loss": 0.2674, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.2964541755473745, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.4072208228379514e-05, |
|
"loss": 0.2893, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.29710004521087646, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.4059290835109477e-05, |
|
"loss": 0.3205, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.29774591487437835, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.4046373441839439e-05, |
|
"loss": 0.2671, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.29839178453788023, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.4033456048569401e-05, |
|
"loss": 0.2869, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.2990376542013822, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.4020538655299363e-05, |
|
"loss": 0.2731, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.29968352386488406, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.4007621262029322e-05, |
|
"loss": 0.2805, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.30032939352838595, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.3994703868759285e-05, |
|
"loss": 0.3045, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.3009752631918879, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.3981786475489247e-05, |
|
"loss": 0.3243, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.3016211328553898, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.396886908221921e-05, |
|
"loss": 0.2969, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.3022670025188917, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.3955951688949172e-05, |
|
"loss": 0.2728, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.3029128721823936, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.3943034295679132e-05, |
|
"loss": 0.2671, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.3035587418458955, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.3930116902409094e-05, |
|
"loss": 0.301, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.3042046115093974, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.3917199509139057e-05, |
|
"loss": 0.2767, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.3048504811728993, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.3904282115869019e-05, |
|
"loss": 0.2887, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.3054963508364012, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.3891364722598981e-05, |
|
"loss": 0.2729, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.30614222049990314, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.3878447329328944e-05, |
|
"loss": 0.2962, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.306788090163405, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.3865529936058904e-05, |
|
"loss": 0.2769, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.3074339598269069, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.3852612542788866e-05, |
|
"loss": 0.296, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.30807982949040885, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.3839695149518829e-05, |
|
"loss": 0.2779, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.30872569915391074, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.3826777756248791e-05, |
|
"loss": 0.285, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.3093715688174126, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.3813860362978753e-05, |
|
"loss": 0.2754, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.31001743848091456, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.3800942969708716e-05, |
|
"loss": 0.2928, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.31066330814441645, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.3788025576438675e-05, |
|
"loss": 0.2771, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.31130917780791834, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.3775108183168637e-05, |
|
"loss": 0.2937, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.3119550474714203, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.3762190789898599e-05, |
|
"loss": 0.3024, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.31260091713492216, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.3749273396628561e-05, |
|
"loss": 0.2915, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3132467867984241, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.3736356003358524e-05, |
|
"loss": 0.2942, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.313892656461926, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.3723438610088484e-05, |
|
"loss": 0.2808, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.3145385261254279, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.3710521216818447e-05, |
|
"loss": 0.2821, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.3151843957889298, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.3697603823548409e-05, |
|
"loss": 0.27, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.3158302654524317, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.3684686430278371e-05, |
|
"loss": 0.2942, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.3164761351159336, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.3671769037008333e-05, |
|
"loss": 0.2887, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.31712200477943553, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.3658851643738296e-05, |
|
"loss": 0.3196, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.3177678744429374, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.3645934250468256e-05, |
|
"loss": 0.2701, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.3184137441064393, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.3633016857198219e-05, |
|
"loss": 0.2718, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.31905961376994124, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.3620099463928181e-05, |
|
"loss": 0.2818, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.3197054834334431, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.3607182070658143e-05, |
|
"loss": 0.2695, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.320351353096945, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.3594264677388106e-05, |
|
"loss": 0.2907, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.32099722276044695, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.3581347284118068e-05, |
|
"loss": 0.264, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.32164309242394884, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.3568429890848027e-05, |
|
"loss": 0.3022, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.3222889620874507, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.3555512497577989e-05, |
|
"loss": 0.294, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.32293483175095267, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 1.3542595104307951e-05, |
|
"loss": 0.2754, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32358070141445455, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.3529677711037914e-05, |
|
"loss": 0.3165, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.3242265710779565, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.3516760317767876e-05, |
|
"loss": 0.3159, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.3248724407414584, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.3503842924497836e-05, |
|
"loss": 0.2921, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.32551831040496026, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.3490925531227799e-05, |
|
"loss": 0.3127, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.3261641800684622, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.3478008137957761e-05, |
|
"loss": 0.2831, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.3268100497319641, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.3465090744687723e-05, |
|
"loss": 0.3182, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.327455919395466, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.3452173351417686e-05, |
|
"loss": 0.268, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.3281017890589679, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.3439255958147648e-05, |
|
"loss": 0.2848, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.3287476587224698, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.3426338564877609e-05, |
|
"loss": 0.2831, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.3293935283859717, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.341342117160757e-05, |
|
"loss": 0.2941, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.33003939804947363, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.3400503778337533e-05, |
|
"loss": 0.2867, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.3306852677129755, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.3387586385067495e-05, |
|
"loss": 0.2909, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.3313311373764774, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.3374668991797458e-05, |
|
"loss": 0.2824, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.33197700703997934, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 1.336175159852742e-05, |
|
"loss": 0.2925, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.33262287670348123, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.3348834205257379e-05, |
|
"loss": 0.3143, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3332687463669831, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.3335916811987341e-05, |
|
"loss": 0.3114, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.33391461603048506, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.3322999418717303e-05, |
|
"loss": 0.2604, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.33456048569398694, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.3310082025447266e-05, |
|
"loss": 0.3184, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.3352063553574889, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.3297164632177228e-05, |
|
"loss": 0.2632, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.33585222502099077, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.3284247238907189e-05, |
|
"loss": 0.3283, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.33649809468449265, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.3271329845637151e-05, |
|
"loss": 0.2789, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.3371439643479946, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.3258412452367113e-05, |
|
"loss": 0.2834, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.3377898340114965, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.3245495059097076e-05, |
|
"loss": 0.2747, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.33843570367499837, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.3232577665827038e-05, |
|
"loss": 0.2696, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.3390815733385003, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.3219660272557e-05, |
|
"loss": 0.2821, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.3397274430020022, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.320674287928696e-05, |
|
"loss": 0.2733, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.3403733126655041, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.3193825486016923e-05, |
|
"loss": 0.2673, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.341019182329006, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.3180908092746885e-05, |
|
"loss": 0.253, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.3416650519925079, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.3167990699476848e-05, |
|
"loss": 0.3147, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.3423109216560098, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.315507330620681e-05, |
|
"loss": 0.2985, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.34295679131951173, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.3142155912936772e-05, |
|
"loss": 0.2319, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.3436026609830136, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.3129238519666731e-05, |
|
"loss": 0.3024, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.34424853064651556, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 1.3116321126396693e-05, |
|
"loss": 0.2603, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.34489440031001745, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.3103403733126656e-05, |
|
"loss": 0.2716, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.34554026997351933, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.3090486339856618e-05, |
|
"loss": 0.2717, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.3461861396370213, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.307756894658658e-05, |
|
"loss": 0.2988, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.34683200930052316, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.306465155331654e-05, |
|
"loss": 0.3058, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.34747787896402504, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.3051734160046503e-05, |
|
"loss": 0.2729, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.348123748627527, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.3038816766776465e-05, |
|
"loss": 0.2547, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.34876961829102887, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 1.3025899373506428e-05, |
|
"loss": 0.3013, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.34941548795453076, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.301298198023639e-05, |
|
"loss": 0.2928, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.3500613576180327, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.3000064586966352e-05, |
|
"loss": 0.2673, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.3507072272815346, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.2987147193696313e-05, |
|
"loss": 0.2968, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.35135309694503647, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.2974229800426275e-05, |
|
"loss": 0.2581, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.3519989666085384, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.2961312407156237e-05, |
|
"loss": 0.2805, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.3526448362720403, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.29483950138862e-05, |
|
"loss": 0.2609, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.3532907059355422, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.2935477620616162e-05, |
|
"loss": 0.2851, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.3539365755990441, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.2922560227346124e-05, |
|
"loss": 0.2649, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.354582445262546, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.2909642834076083e-05, |
|
"loss": 0.3208, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.35522831492604795, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.2896725440806046e-05, |
|
"loss": 0.2663, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.35587418458954984, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.2883808047536008e-05, |
|
"loss": 0.266, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.3565200542530517, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.287089065426597e-05, |
|
"loss": 0.2681, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.35716592391655366, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.2857973260995932e-05, |
|
"loss": 0.3058, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.35781179358005555, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 1.2845055867725893e-05, |
|
"loss": 0.271, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.35845766324355743, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.2832138474455855e-05, |
|
"loss": 0.2786, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.3591035329070594, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.2819221081185818e-05, |
|
"loss": 0.2897, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.35974940257056126, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.280630368791578e-05, |
|
"loss": 0.262, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.36039527223406315, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.2793386294645742e-05, |
|
"loss": 0.2695, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.3610411418975651, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.2780468901375704e-05, |
|
"loss": 0.2609, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.361687011561067, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.2767551508105665e-05, |
|
"loss": 0.2691, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.36233288122456886, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.2754634114835627e-05, |
|
"loss": 0.2583, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.3629787508880708, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.274171672156559e-05, |
|
"loss": 0.2713, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.3636246205515727, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.2728799328295552e-05, |
|
"loss": 0.2948, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.36427049021507457, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.2715881935025514e-05, |
|
"loss": 0.268, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.3649163598785765, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.2702964541755477e-05, |
|
"loss": 0.3048, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.3655622295420784, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.2690047148485435e-05, |
|
"loss": 0.3148, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.36620809920558034, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.2677129755215398e-05, |
|
"loss": 0.2606, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.3668539688690822, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.266421236194536e-05, |
|
"loss": 0.2501, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.3674998385325841, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.2651294968675322e-05, |
|
"loss": 0.2725, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.36814570819608605, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.2638377575405285e-05, |
|
"loss": 0.2557, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.36879157785958794, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.2625460182135245e-05, |
|
"loss": 0.2807, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.3694374475230898, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.2612542788865207e-05, |
|
"loss": 0.2331, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.37008331718659176, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.259962539559517e-05, |
|
"loss": 0.2674, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.37072918685009365, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 1.2586708002325132e-05, |
|
"loss": 0.2602, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.37137505651359554, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.2573790609055094e-05, |
|
"loss": 0.2682, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.3720209261770975, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.2560873215785057e-05, |
|
"loss": 0.2859, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.37266679584059936, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.2547955822515017e-05, |
|
"loss": 0.2611, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.37331266550410125, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 1.253503842924498e-05, |
|
"loss": 0.3003, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.3739585351676032, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.2522121035974942e-05, |
|
"loss": 0.2663, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.3746044048311051, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.2509203642704904e-05, |
|
"loss": 0.2889, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.37525027449460696, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.2496286249434866e-05, |
|
"loss": 0.284, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.3758961441581089, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.2483368856164825e-05, |
|
"loss": 0.2516, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.3765420138216108, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.2470451462894788e-05, |
|
"loss": 0.2857, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.37718788348511273, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.245753406962475e-05, |
|
"loss": 0.2398, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.3778337531486146, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.2444616676354712e-05, |
|
"loss": 0.2716, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.3784796228121165, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.2431699283084674e-05, |
|
"loss": 0.2544, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.37912549247561844, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.2418781889814637e-05, |
|
"loss": 0.264, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.3797713621391203, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.2405864496544597e-05, |
|
"loss": 0.2627, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.3804172318026222, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.239294710327456e-05, |
|
"loss": 0.2469, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.38106310146612415, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.2380029710004522e-05, |
|
"loss": 0.237, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.38170897112962604, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.2367112316734484e-05, |
|
"loss": 0.2634, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.3823548407931279, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.2354194923464447e-05, |
|
"loss": 0.2816, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.38300071045662987, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.2341277530194409e-05, |
|
"loss": 0.2863, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.38364658012013175, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.232836013692437e-05, |
|
"loss": 0.3067, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.38429244978363364, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.2315442743654332e-05, |
|
"loss": 0.2695, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.3849383194471356, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.2302525350384294e-05, |
|
"loss": 0.2605, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.38558418911063747, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.2289607957114256e-05, |
|
"loss": 0.2201, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.38623005877413935, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.2276690563844219e-05, |
|
"loss": 0.2648, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.3868759284376413, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.2263773170574177e-05, |
|
"loss": 0.2734, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.3875217981011432, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.225085577730414e-05, |
|
"loss": 0.2623, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3881676677646451, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.2237938384034102e-05, |
|
"loss": 0.2523, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.388813537428147, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.2225020990764064e-05, |
|
"loss": 0.2922, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.3894594070916489, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.2212103597494027e-05, |
|
"loss": 0.2432, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.39010527675515083, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.2199186204223989e-05, |
|
"loss": 0.2515, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.3907511464186527, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.218626881095395e-05, |
|
"loss": 0.27, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.3913970160821546, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.2173351417683912e-05, |
|
"loss": 0.2588, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.39204288574565654, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.2160434024413874e-05, |
|
"loss": 0.2737, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.39268875540915843, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.2147516631143836e-05, |
|
"loss": 0.2602, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.3933346250726603, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.2134599237873799e-05, |
|
"loss": 0.2524, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.39398049473616226, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.2121681844603761e-05, |
|
"loss": 0.2579, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.39462636439966414, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.2108764451333722e-05, |
|
"loss": 0.2479, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.39527223406316603, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.2095847058063684e-05, |
|
"loss": 0.287, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.39591810372666797, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.2082929664793646e-05, |
|
"loss": 0.2955, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.39656397339016985, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 1.2070012271523608e-05, |
|
"loss": 0.2678, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.3972098430536718, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.205709487825357e-05, |
|
"loss": 0.277, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.3978557127171737, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.204417748498353e-05, |
|
"loss": 0.27, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.39850158238067557, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.2031260091713492e-05, |
|
"loss": 0.236, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.3991474520441775, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.2018342698443454e-05, |
|
"loss": 0.2735, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.3997933217076794, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.2005425305173417e-05, |
|
"loss": 0.2839, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.4004391913711813, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.1992507911903379e-05, |
|
"loss": 0.2568, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.4010850610346832, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.1979590518633341e-05, |
|
"loss": 0.2549, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.4017309306981851, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.1966673125363302e-05, |
|
"loss": 0.2614, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.402376800361687, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.1953755732093264e-05, |
|
"loss": 0.2681, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.40302267002518893, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.1940838338823226e-05, |
|
"loss": 0.2931, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.4036685396886908, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.1927920945553189e-05, |
|
"loss": 0.3026, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4043144093521927, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.1915003552283151e-05, |
|
"loss": 0.2875, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.40496027901569465, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.1902086159013113e-05, |
|
"loss": 0.254, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.40560614867919653, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.1889168765743074e-05, |
|
"loss": 0.2546, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.4062520183426984, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.1876251372473036e-05, |
|
"loss": 0.278, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.40689788800620036, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.1863333979202998e-05, |
|
"loss": 0.263, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.40754375766970224, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.185041658593296e-05, |
|
"loss": 0.2772, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.4081896273332042, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.1837499192662923e-05, |
|
"loss": 0.2636, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.40883549699670607, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.1824581799392882e-05, |
|
"loss": 0.2763, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.40948136666020796, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.1811664406122844e-05, |
|
"loss": 0.282, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.4101272363237099, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.1798747012852806e-05, |
|
"loss": 0.2645, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.4107731059872118, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.1785829619582769e-05, |
|
"loss": 0.252, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.41141897565071367, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.1772912226312731e-05, |
|
"loss": 0.2339, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.4120648453142156, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.1759994833042693e-05, |
|
"loss": 0.2572, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.4127107149777175, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.1747077439772654e-05, |
|
"loss": 0.2661, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.4133565846412194, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.1734160046502616e-05, |
|
"loss": 0.2379, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4140024543047213, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.1721242653232578e-05, |
|
"loss": 0.2577, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.4146483239682232, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.170832525996254e-05, |
|
"loss": 0.2605, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.4152941936317251, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.1695407866692503e-05, |
|
"loss": 0.2856, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.41594006329522704, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.1682490473422465e-05, |
|
"loss": 0.2529, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.4165859329587289, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.1669573080152426e-05, |
|
"loss": 0.25, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.4172318026222308, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.1656655686882388e-05, |
|
"loss": 0.2936, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.41787767228573275, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.164373829361235e-05, |
|
"loss": 0.2371, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.41852354194923463, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.1630820900342313e-05, |
|
"loss": 0.281, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.4191694116127366, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.1617903507072275e-05, |
|
"loss": 0.2726, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.41981528127623846, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.1604986113802234e-05, |
|
"loss": 0.2745, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.42046115093974035, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.1592068720532196e-05, |
|
"loss": 0.2778, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.4211070206032423, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.1579151327262159e-05, |
|
"loss": 0.2831, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.4217528902667442, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.1566233933992121e-05, |
|
"loss": 0.2526, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.42239875993024606, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.1553316540722083e-05, |
|
"loss": 0.2756, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.423044629593748, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.1540399147452045e-05, |
|
"loss": 0.2947, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.4236904992572499, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.1527481754182006e-05, |
|
"loss": 0.2641, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.42433636892075177, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.1514564360911968e-05, |
|
"loss": 0.2662, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.4249822385842537, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.150164696764193e-05, |
|
"loss": 0.2915, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.4256281082477556, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.1488729574371893e-05, |
|
"loss": 0.2619, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.4262739779112575, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.1475812181101855e-05, |
|
"loss": 0.2574, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4269198475747594, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.1462894787831817e-05, |
|
"loss": 0.2781, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.4275657172382613, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.1449977394561778e-05, |
|
"loss": 0.2637, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.4282115869017632, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.143706000129174e-05, |
|
"loss": 0.2976, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.42885745656526514, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.1424142608021703e-05, |
|
"loss": 0.2766, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.429503326228767, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.1411225214751665e-05, |
|
"loss": 0.2573, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.43014919589226897, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 1.1398307821481627e-05, |
|
"loss": 0.2483, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.43079506555577085, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.1385390428211586e-05, |
|
"loss": 0.2724, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.43144093521927274, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.1372473034941548e-05, |
|
"loss": 0.2643, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.4320868048827747, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.135955564167151e-05, |
|
"loss": 0.2381, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.43273267454627656, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.1346638248401473e-05, |
|
"loss": 0.2742, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.43337854420977845, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.1333720855131435e-05, |
|
"loss": 0.2807, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.4340244138732804, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.1320803461861398e-05, |
|
"loss": 0.2549, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.4346702835367823, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.1307886068591358e-05, |
|
"loss": 0.2546, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.43531615320028416, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.129496867532132e-05, |
|
"loss": 0.2787, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.4359620228637861, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.1282051282051283e-05, |
|
"loss": 0.2977, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.436607892527288, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.1269133888781245e-05, |
|
"loss": 0.2579, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.4372537621907899, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.1256216495511207e-05, |
|
"loss": 0.2587, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.4378996318542918, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.124329910224117e-05, |
|
"loss": 0.2931, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.4385455015177937, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.123038170897113e-05, |
|
"loss": 0.2854, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.43919137118129564, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.1217464315701093e-05, |
|
"loss": 0.2496, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.43983724084479753, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.1204546922431055e-05, |
|
"loss": 0.2726, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.4404831105082994, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.1191629529161017e-05, |
|
"loss": 0.2489, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.44112898017180135, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.117871213589098e-05, |
|
"loss": 0.2744, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.44177484983530324, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.1165794742620938e-05, |
|
"loss": 0.2838, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.4424207194988051, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.11528773493509e-05, |
|
"loss": 0.2332, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.44306658916230707, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.1139959956080863e-05, |
|
"loss": 0.2553, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.44371245882580895, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.1127042562810825e-05, |
|
"loss": 0.2643, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.44435832848931084, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.1114125169540788e-05, |
|
"loss": 0.265, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.4450041981528128, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.110120777627075e-05, |
|
"loss": 0.2502, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.44565006781631467, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.108829038300071e-05, |
|
"loss": 0.2605, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.44629593747981655, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.1075372989730673e-05, |
|
"loss": 0.2276, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.4469418071433185, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.1062455596460635e-05, |
|
"loss": 0.2222, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.4475876768068204, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.1049538203190597e-05, |
|
"loss": 0.2718, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.44823354647032226, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.103662080992056e-05, |
|
"loss": 0.244, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.4488794161338242, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.1023703416650522e-05, |
|
"loss": 0.258, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.4495252857973261, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.1010786023380482e-05, |
|
"loss": 0.243, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.45017115546082803, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.0997868630110445e-05, |
|
"loss": 0.2232, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.4508170251243299, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.0984951236840407e-05, |
|
"loss": 0.2422, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.4514628947878318, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.097203384357037e-05, |
|
"loss": 0.2531, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.45210876445133374, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.0959116450300332e-05, |
|
"loss": 0.2793, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.45275463411483563, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.094619905703029e-05, |
|
"loss": 0.2557, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.4534005037783375, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.0933281663760253e-05, |
|
"loss": 0.2386, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.45404637344183946, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.0920364270490215e-05, |
|
"loss": 0.2314, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.45469224310534134, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 1.0907446877220177e-05, |
|
"loss": 0.2889, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.45533811276884323, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.089452948395014e-05, |
|
"loss": 0.3016, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.45598398243234517, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.0881612090680102e-05, |
|
"loss": 0.2784, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.45662985209584706, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.0868694697410063e-05, |
|
"loss": 0.2575, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.45727572175934894, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.0855777304140025e-05, |
|
"loss": 0.2651, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.4579215914228509, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.0842859910869987e-05, |
|
"loss": 0.2405, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.45856746108635277, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.082994251759995e-05, |
|
"loss": 0.2219, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.45921333074985465, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.0817025124329912e-05, |
|
"loss": 0.2152, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.4598592004133566, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.0804107731059874e-05, |
|
"loss": 0.2447, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.4605050700768585, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.0791190337789835e-05, |
|
"loss": 0.2324, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.4611509397403604, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.0778272944519797e-05, |
|
"loss": 0.2615, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.4617968094038623, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.076535555124976e-05, |
|
"loss": 0.2184, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.4624426790673642, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.0752438157979721e-05, |
|
"loss": 0.2694, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.46308854873086613, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.0739520764709684e-05, |
|
"loss": 0.2464, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.463734418394368, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.0726603371439643e-05, |
|
"loss": 0.2583, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.4643802880578699, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.0713685978169605e-05, |
|
"loss": 0.2577, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.46502615772137185, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.0700768584899567e-05, |
|
"loss": 0.2363, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.46567202738487373, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.068785119162953e-05, |
|
"loss": 0.2979, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.4663178970483756, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.0674933798359492e-05, |
|
"loss": 0.2314, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.46696376671187756, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.0662016405089454e-05, |
|
"loss": 0.2572, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.46760963637537944, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.0649099011819415e-05, |
|
"loss": 0.2777, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.46825550603888133, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.0636181618549377e-05, |
|
"loss": 0.2364, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.46890137570238327, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.062326422527934e-05, |
|
"loss": 0.2629, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.46954724536588516, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.0610346832009302e-05, |
|
"loss": 0.2379, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.47019311502938704, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.0597429438739264e-05, |
|
"loss": 0.2396, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.470838984692889, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.0584512045469226e-05, |
|
"loss": 0.2371, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.47148485435639087, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.0571594652199187e-05, |
|
"loss": 0.2627, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4721307240198928, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.0558677258929149e-05, |
|
"loss": 0.2372, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.4727765936833947, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.0545759865659111e-05, |
|
"loss": 0.2485, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.4734224633468966, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.0532842472389074e-05, |
|
"loss": 0.256, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.4740683330103985, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 1.0519925079119036e-05, |
|
"loss": 0.2518, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.4747142026739004, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.0507007685848995e-05, |
|
"loss": 0.2475, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.4753600723374023, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.0494090292578957e-05, |
|
"loss": 0.2587, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.47600594200090424, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.048117289930892e-05, |
|
"loss": 0.2741, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.4766518116644061, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.0468255506038882e-05, |
|
"loss": 0.2456, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.477297681327908, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.0455338112768844e-05, |
|
"loss": 0.2799, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.47794355099140995, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.0442420719498806e-05, |
|
"loss": 0.2491, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.47858942065491183, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.0429503326228767e-05, |
|
"loss": 0.2457, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.4792352903184137, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 1.041658593295873e-05, |
|
"loss": 0.2494, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.47988115998191566, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 1.0403668539688691e-05, |
|
"loss": 0.2613, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.48052702964541755, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.0390751146418654e-05, |
|
"loss": 0.254, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.48117289930891943, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.0377833753148616e-05, |
|
"loss": 0.234, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.4818187689724214, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.0364916359878577e-05, |
|
"loss": 0.256, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.48246463863592326, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.0351998966608539e-05, |
|
"loss": 0.2439, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.4831105082994252, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.0339081573338501e-05, |
|
"loss": 0.234, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.4837563779629271, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.0326164180068464e-05, |
|
"loss": 0.2987, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.484402247626429, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.0313246786798426e-05, |
|
"loss": 0.2478, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4850481172899309, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.0300329393528388e-05, |
|
"loss": 0.2426, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.4856939869534328, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.0287412000258347e-05, |
|
"loss": 0.2623, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.4863398566169347, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.027449460698831e-05, |
|
"loss": 0.2589, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.4869857262804366, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.0261577213718272e-05, |
|
"loss": 0.2339, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.4876315959439385, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.0248659820448234e-05, |
|
"loss": 0.2472, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.4882774656074404, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.0235742427178196e-05, |
|
"loss": 0.2423, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.48892333527094234, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.0222825033908158e-05, |
|
"loss": 0.2387, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.4895692049344442, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.0209907640638119e-05, |
|
"loss": 0.2336, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.4902150745979461, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.0196990247368081e-05, |
|
"loss": 0.2365, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.49086094426144805, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.0184072854098044e-05, |
|
"loss": 0.2368, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.49150681392494994, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.0171155460828006e-05, |
|
"loss": 0.2262, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.4921526835884519, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.0158238067557968e-05, |
|
"loss": 0.2606, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.49279855325195376, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.0145320674287929e-05, |
|
"loss": 0.2414, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.49344442291545565, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.0132403281017891e-05, |
|
"loss": 0.244, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.4940902925789576, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.0119485887747853e-05, |
|
"loss": 0.247, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.4947361622424595, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.0106568494477816e-05, |
|
"loss": 0.2467, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.49538203190596136, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.0093651101207778e-05, |
|
"loss": 0.2655, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.4960279015694633, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.008073370793774e-05, |
|
"loss": 0.2706, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.4966737712329652, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.00678163146677e-05, |
|
"loss": 0.2847, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.4973196408964671, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.0054898921397662e-05, |
|
"loss": 0.2563, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.497965510559969, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 1.0041981528127624e-05, |
|
"loss": 0.2613, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.4986113802234709, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.0029064134857586e-05, |
|
"loss": 0.2822, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.4992572498869728, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.0016146741587548e-05, |
|
"loss": 0.2212, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.49990311955047473, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.000322934831751e-05, |
|
"loss": 0.2724, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.5005489892139766, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.990311955047473e-06, |
|
"loss": 0.1998, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5011948588774785, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 9.977394561777434e-06, |
|
"loss": 0.2457, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.5018407285409804, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 9.964477168507396e-06, |
|
"loss": 0.275, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.5024865982044824, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 9.951559775237358e-06, |
|
"loss": 0.2456, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.5031324678679843, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 9.938642381967319e-06, |
|
"loss": 0.2707, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.5037783375314862, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 9.925724988697281e-06, |
|
"loss": 0.2261, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.504424207194988, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 9.912807595427243e-06, |
|
"loss": 0.2636, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.5050700768584899, |
|
"grad_norm": 1.25, |
|
"learning_rate": 9.899890202157206e-06, |
|
"loss": 0.274, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.5057159465219918, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 9.886972808887168e-06, |
|
"loss": 0.2692, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.5063618161854938, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 9.87405541561713e-06, |
|
"loss": 0.2192, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.5070076858489957, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 9.86113802234709e-06, |
|
"loss": 0.2687, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.5076535555124976, |
|
"grad_norm": 1.25, |
|
"learning_rate": 9.848220629077053e-06, |
|
"loss": 0.2762, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.5082994251759995, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 9.835303235807014e-06, |
|
"loss": 0.2624, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.5089452948395013, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 9.822385842536976e-06, |
|
"loss": 0.2207, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.5095911645030033, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 9.809468449266938e-06, |
|
"loss": 0.2796, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.5102370341665052, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 9.7965510559969e-06, |
|
"loss": 0.2686, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5108829038300071, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 9.783633662726863e-06, |
|
"loss": 0.2711, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.511528773493509, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 9.770716269456825e-06, |
|
"loss": 0.2388, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.5121746431570109, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.757798876186786e-06, |
|
"loss": 0.266, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 9.744881482916748e-06, |
|
"loss": 0.2336, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.5134663824840148, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 9.73196408964671e-06, |
|
"loss": 0.2289, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.5141122521475167, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 9.719046696376671e-06, |
|
"loss": 0.2497, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.5147581218110185, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 9.706129303106633e-06, |
|
"loss": 0.2412, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.5154039914745204, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 9.693211909836595e-06, |
|
"loss": 0.2535, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.5160498611380223, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 9.680294516566558e-06, |
|
"loss": 0.2646, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.5166957308015242, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 9.66737712329652e-06, |
|
"loss": 0.2541, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5173416004650262, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.654459730026482e-06, |
|
"loss": 0.2489, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.5179874701285281, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.641542336756443e-06, |
|
"loss": 0.2325, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.51863333979203, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 9.628624943486405e-06, |
|
"loss": 0.2153, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.5192792094555319, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.615707550216366e-06, |
|
"loss": 0.2447, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.5199250791190337, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 9.602790156946328e-06, |
|
"loss": 0.2733, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.5205709487825357, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 9.58987276367629e-06, |
|
"loss": 0.2482, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.5212168184460376, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 9.576955370406253e-06, |
|
"loss": 0.2449, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.5218626881095395, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 9.564037977136215e-06, |
|
"loss": 0.2589, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.5225085577730414, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 9.551120583866177e-06, |
|
"loss": 0.2875, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.5231544274365433, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 9.538203190596138e-06, |
|
"loss": 0.2275, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5238002971000452, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 9.5252857973261e-06, |
|
"loss": 0.271, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.5244461667635472, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 9.512368404056062e-06, |
|
"loss": 0.3096, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.525092036427049, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 9.499451010786023e-06, |
|
"loss": 0.2679, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.5257379060905509, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.486533617515985e-06, |
|
"loss": 0.266, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.5263837757540528, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 9.473616224245948e-06, |
|
"loss": 0.2349, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.5270296454175547, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 9.46069883097591e-06, |
|
"loss": 0.2319, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.5276755150810566, |
|
"grad_norm": 1.25, |
|
"learning_rate": 9.447781437705872e-06, |
|
"loss": 0.2353, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.5283213847445586, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 9.434864044435835e-06, |
|
"loss": 0.246, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.5289672544080605, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 9.421946651165795e-06, |
|
"loss": 0.2627, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.5296131240715624, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 9.409029257895757e-06, |
|
"loss": 0.2387, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.5302589937350642, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 9.396111864625718e-06, |
|
"loss": 0.2612, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.5309048633985661, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 9.38319447135568e-06, |
|
"loss": 0.2385, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.5315507330620681, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 9.370277078085643e-06, |
|
"loss": 0.2664, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.53219660272557, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 9.357359684815605e-06, |
|
"loss": 0.2554, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.5328424723890719, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 9.344442291545567e-06, |
|
"loss": 0.2444, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.5334883420525738, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 9.33152489827553e-06, |
|
"loss": 0.224, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.5341342117160757, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 9.31860750500549e-06, |
|
"loss": 0.2496, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.5347800813795776, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 9.305690111735452e-06, |
|
"loss": 0.2633, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.5354259510430796, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 9.292772718465415e-06, |
|
"loss": 0.2992, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.5360718207065814, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 9.279855325195375e-06, |
|
"loss": 0.2912, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.5367176903700833, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 9.266937931925338e-06, |
|
"loss": 0.2553, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.5373635600335852, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.2540205386553e-06, |
|
"loss": 0.2505, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.5380094296970871, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.241103145385262e-06, |
|
"loss": 0.268, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.538655299360589, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.228185752115224e-06, |
|
"loss": 0.2382, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.539301169024091, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 9.215268358845187e-06, |
|
"loss": 0.2399, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.5399470386875929, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 9.202350965575147e-06, |
|
"loss": 0.2322, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.5405929083510947, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 9.18943357230511e-06, |
|
"loss": 0.2495, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.5412387780145966, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 9.17651617903507e-06, |
|
"loss": 0.2474, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.5418846476780985, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 9.163598785765032e-06, |
|
"loss": 0.2348, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.5425305173416005, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 9.150681392494995e-06, |
|
"loss": 0.2457, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5431763870051024, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 9.137763999224957e-06, |
|
"loss": 0.2355, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.5438222566686043, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 9.12484660595492e-06, |
|
"loss": 0.2834, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.5444681263321062, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 9.111929212684882e-06, |
|
"loss": 0.2525, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.5451139959956081, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 9.099011819414842e-06, |
|
"loss": 0.2204, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.5457598656591099, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 9.086094426144805e-06, |
|
"loss": 0.2495, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.5464057353226119, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 9.073177032874767e-06, |
|
"loss": 0.2312, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.5470516049861138, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.060259639604727e-06, |
|
"loss": 0.2916, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.5476974746496157, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 9.04734224633469e-06, |
|
"loss": 0.2585, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.5483433443131176, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 9.034424853064652e-06, |
|
"loss": 0.242, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.5489892139766195, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 9.021507459794614e-06, |
|
"loss": 0.2534, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5496350836401214, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 9.008590066524577e-06, |
|
"loss": 0.2025, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.5502809533036234, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 8.995672673254539e-06, |
|
"loss": 0.2508, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.5509268229671253, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 8.9827552799845e-06, |
|
"loss": 0.2332, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.5515726926306271, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 8.969837886714462e-06, |
|
"loss": 0.2065, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.552218562294129, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 8.956920493444422e-06, |
|
"loss": 0.2772, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.5528644319576309, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 8.944003100174385e-06, |
|
"loss": 0.234, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.5535103016211329, |
|
"grad_norm": 1.0, |
|
"learning_rate": 8.931085706904347e-06, |
|
"loss": 0.2368, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.5541561712846348, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 8.91816831363431e-06, |
|
"loss": 0.2195, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.5548020409481367, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 8.905250920364272e-06, |
|
"loss": 0.2484, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.5554479106116386, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 8.892333527094234e-06, |
|
"loss": 0.2895, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.5560937802751404, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 8.879416133824194e-06, |
|
"loss": 0.2546, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.5567396499386423, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 8.866498740554157e-06, |
|
"loss": 0.2201, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.5573855196021443, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 8.853581347284119e-06, |
|
"loss": 0.2487, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.5580313892656462, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 8.84066395401408e-06, |
|
"loss": 0.2372, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.5586772589291481, |
|
"grad_norm": 1.125, |
|
"learning_rate": 8.827746560744042e-06, |
|
"loss": 0.2551, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.55932312859265, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 8.814829167474004e-06, |
|
"loss": 0.244, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.5599689982561519, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 8.801911774203966e-06, |
|
"loss": 0.2379, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.5606148679196538, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 8.788994380933929e-06, |
|
"loss": 0.2401, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.5612607375831558, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 8.77607698766389e-06, |
|
"loss": 0.2448, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.5619066072466576, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 8.763159594393852e-06, |
|
"loss": 0.25, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5625524769101595, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 8.750242201123814e-06, |
|
"loss": 0.2246, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.5631983465736614, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 8.737324807853775e-06, |
|
"loss": 0.2884, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.5638442162371633, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 8.724407414583737e-06, |
|
"loss": 0.2578, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.5644900859006653, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 8.711490021313699e-06, |
|
"loss": 0.2126, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.5651359555641672, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 8.698572628043661e-06, |
|
"loss": 0.2831, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.5657818252276691, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 8.685655234773624e-06, |
|
"loss": 0.2257, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.566427694891171, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 8.672737841503586e-06, |
|
"loss": 0.2834, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.5670735645546728, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 8.659820448233547e-06, |
|
"loss": 0.2592, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.5677194342181747, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 8.646903054963509e-06, |
|
"loss": 0.2134, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.5683653038816767, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 8.633985661693471e-06, |
|
"loss": 0.2348, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5690111735451786, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 8.621068268423432e-06, |
|
"loss": 0.2694, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.5696570432086805, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 8.608150875153394e-06, |
|
"loss": 0.2366, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.5703029128721824, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 8.595233481883356e-06, |
|
"loss": 0.2335, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.5709487825356843, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 8.582316088613319e-06, |
|
"loss": 0.2394, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.5715946521991863, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 8.569398695343281e-06, |
|
"loss": 0.2623, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.5722405218626881, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 8.556481302073242e-06, |
|
"loss": 0.2294, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.57288639152619, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 8.543563908803204e-06, |
|
"loss": 0.2336, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.5735322611896919, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 8.530646515533166e-06, |
|
"loss": 0.2838, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.5741781308531938, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 8.517729122263127e-06, |
|
"loss": 0.2696, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.5748240005166957, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 8.504811728993089e-06, |
|
"loss": 0.2556, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.5754698701801977, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 8.491894335723051e-06, |
|
"loss": 0.2377, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.5761157398436996, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 8.478976942453014e-06, |
|
"loss": 0.2368, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.5767616095072015, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 8.466059549182976e-06, |
|
"loss": 0.2255, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.5774074791707033, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 8.453142155912938e-06, |
|
"loss": 0.2601, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.5780533488342052, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 8.440224762642899e-06, |
|
"loss": 0.2555, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.5786992184977071, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 8.427307369372861e-06, |
|
"loss": 0.2149, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.5793450881612091, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 8.414389976102823e-06, |
|
"loss": 0.2565, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.579990957824711, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 8.401472582832784e-06, |
|
"loss": 0.2314, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.5806368274882129, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 8.388555189562746e-06, |
|
"loss": 0.2334, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.5812826971517148, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 8.375637796292709e-06, |
|
"loss": 0.2555, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5819285668152167, |
|
"grad_norm": 1.25, |
|
"learning_rate": 8.36272040302267e-06, |
|
"loss": 0.2389, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.5825744364787186, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 8.349803009752633e-06, |
|
"loss": 0.2529, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.5832203061422205, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 8.336885616482594e-06, |
|
"loss": 0.258, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.5838661758057224, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 8.323968223212556e-06, |
|
"loss": 0.2563, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.5845120454692243, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 8.311050829942518e-06, |
|
"loss": 0.2306, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.5851579151327262, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 8.298133436672479e-06, |
|
"loss": 0.2295, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.5858037847962281, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 8.285216043402441e-06, |
|
"loss": 0.2589, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.5864496544597301, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 8.272298650132403e-06, |
|
"loss": 0.2552, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.587095524123232, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 8.259381256862366e-06, |
|
"loss": 0.2173, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.5877413937867338, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 8.246463863592328e-06, |
|
"loss": 0.235, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5883872634502357, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 8.23354647032229e-06, |
|
"loss": 0.2277, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.5890331331137376, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 8.220629077052251e-06, |
|
"loss": 0.1985, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.5896790027772395, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 8.207711683782213e-06, |
|
"loss": 0.2056, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.5903248724407415, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 8.194794290512176e-06, |
|
"loss": 0.2722, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.5909707421042434, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 8.181876897242136e-06, |
|
"loss": 0.2612, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.5916166117677453, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 8.168959503972098e-06, |
|
"loss": 0.2506, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.5922624814312472, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 8.15604211070206e-06, |
|
"loss": 0.2495, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.592908351094749, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 8.143124717432023e-06, |
|
"loss": 0.2629, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.593554220758251, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 8.130207324161985e-06, |
|
"loss": 0.257, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.5942000904217529, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 8.117289930891946e-06, |
|
"loss": 0.2712, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5948459600852548, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 8.104372537621908e-06, |
|
"loss": 0.2282, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.5954918297487567, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 8.09145514435187e-06, |
|
"loss": 0.2401, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.5961376994122586, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 8.078537751081831e-06, |
|
"loss": 0.2296, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.5967835690757605, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 8.065620357811793e-06, |
|
"loss": 0.2449, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.5974294387392625, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 8.052702964541756e-06, |
|
"loss": 0.2533, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.5980753084027643, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 8.039785571271718e-06, |
|
"loss": 0.2334, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.5987211780662662, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 8.02686817800168e-06, |
|
"loss": 0.2295, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.5993670477297681, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 8.013950784731643e-06, |
|
"loss": 0.252, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.60001291739327, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 8.001033391461603e-06, |
|
"loss": 0.2393, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.6006587870567719, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 7.988115998191565e-06, |
|
"loss": 0.2367, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6013046567202739, |
|
"grad_norm": 1.125, |
|
"learning_rate": 7.975198604921528e-06, |
|
"loss": 0.2473, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.6019505263837758, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 7.962281211651488e-06, |
|
"loss": 0.2641, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.6025963960472777, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 7.94936381838145e-06, |
|
"loss": 0.2767, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.6032422657107795, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 7.936446425111413e-06, |
|
"loss": 0.2438, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.6038881353742814, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 7.923529031841375e-06, |
|
"loss": 0.2143, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.6045340050377834, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 7.910611638571337e-06, |
|
"loss": 0.2204, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.6051798747012853, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 7.897694245301298e-06, |
|
"loss": 0.2618, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.6058257443647872, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 7.88477685203126e-06, |
|
"loss": 0.2456, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.6064716140282891, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 7.871859458761223e-06, |
|
"loss": 0.2472, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.607117483691791, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 7.858942065491185e-06, |
|
"loss": 0.2297, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6077633533552929, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 7.846024672221146e-06, |
|
"loss": 0.2648, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.6084092230187949, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 7.833107278951108e-06, |
|
"loss": 0.1953, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.6090550926822967, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 7.82018988568107e-06, |
|
"loss": 0.2508, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.6097009623457986, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 7.807272492411032e-06, |
|
"loss": 0.2198, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.6103468320093005, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 7.794355099140995e-06, |
|
"loss": 0.2324, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.6109927016728024, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 7.781437705870955e-06, |
|
"loss": 0.2298, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.6116385713363043, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 7.768520312600918e-06, |
|
"loss": 0.2689, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.6122844409998063, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 7.75560291933088e-06, |
|
"loss": 0.2756, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.6129303106633082, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 7.742685526060842e-06, |
|
"loss": 0.233, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.61357618032681, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 7.729768132790804e-06, |
|
"loss": 0.2231, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6142220499903119, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 7.716850739520765e-06, |
|
"loss": 0.2582, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.6148679196538138, |
|
"grad_norm": 1.0, |
|
"learning_rate": 7.703933346250727e-06, |
|
"loss": 0.2592, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.6155137893173158, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 7.69101595298069e-06, |
|
"loss": 0.2449, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.6161596589808177, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 7.67809855971065e-06, |
|
"loss": 0.2509, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.6168055286443196, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 7.665181166440613e-06, |
|
"loss": 0.225, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.6174513983078215, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 7.652263773170575e-06, |
|
"loss": 0.2244, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.6180972679713234, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 7.639346379900537e-06, |
|
"loss": 0.2768, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.6187431376348252, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 7.6264289866304986e-06, |
|
"loss": 0.2491, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.6193890072983272, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 7.613511593360461e-06, |
|
"loss": 0.2623, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.6200348769618291, |
|
"grad_norm": 1.125, |
|
"learning_rate": 7.600594200090422e-06, |
|
"loss": 0.2585, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.620680746625331, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 7.587676806820385e-06, |
|
"loss": 0.2312, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.6213266162888329, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 7.574759413550347e-06, |
|
"loss": 0.2359, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.6219724859523348, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 7.5618420202803075e-06, |
|
"loss": 0.2409, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.6226183556158367, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 7.54892462701027e-06, |
|
"loss": 0.2221, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.6232642252793387, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 7.536007233740232e-06, |
|
"loss": 0.2712, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.6239100949428406, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 7.5230898404701935e-06, |
|
"loss": 0.2658, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.6245559646063424, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 7.510172447200156e-06, |
|
"loss": 0.2249, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.6252018342698443, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 7.497255053930117e-06, |
|
"loss": 0.2309, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.6258477039333462, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 7.4843376606600795e-06, |
|
"loss": 0.2182, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.6264935735968482, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 7.471420267390042e-06, |
|
"loss": 0.2291, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.6271394432603501, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 7.458502874120002e-06, |
|
"loss": 0.2266, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.627785312923852, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 7.445585480849965e-06, |
|
"loss": 0.2655, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.6284311825873539, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 7.432668087579927e-06, |
|
"loss": 0.2227, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.6290770522508558, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 7.4197506943098884e-06, |
|
"loss": 0.2341, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.6297229219143576, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 7.406833301039851e-06, |
|
"loss": 0.2642, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.6303687915778596, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 7.393915907769813e-06, |
|
"loss": 0.2253, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.6310146612413615, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 7.3809985144997745e-06, |
|
"loss": 0.2435, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.6316605309048634, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 7.368081121229737e-06, |
|
"loss": 0.2608, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.6323064005683653, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 7.355163727959699e-06, |
|
"loss": 0.2654, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.6329522702318672, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 7.34224633468966e-06, |
|
"loss": 0.2321, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.6335981398953691, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 7.329328941419622e-06, |
|
"loss": 0.2545, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.6342440095588711, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 7.316411548149584e-06, |
|
"loss": 0.2016, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.6348898792223729, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 7.303494154879546e-06, |
|
"loss": 0.2529, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.6355357488858748, |
|
"grad_norm": 1.0, |
|
"learning_rate": 7.290576761609508e-06, |
|
"loss": 0.2683, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.6361816185493767, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 7.277659368339469e-06, |
|
"loss": 0.2553, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.6368274882128786, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 7.264741975069432e-06, |
|
"loss": 0.2363, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.6374733578763806, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 7.251824581799394e-06, |
|
"loss": 0.2267, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.6381192275398825, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 7.238907188529355e-06, |
|
"loss": 0.241, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.6387650972033844, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 7.225989795259317e-06, |
|
"loss": 0.2146, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.6394109668668863, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 7.213072401989279e-06, |
|
"loss": 0.261, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.6400568365303881, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 7.200155008719241e-06, |
|
"loss": 0.2299, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.64070270619389, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 7.187237615449203e-06, |
|
"loss": 0.2633, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.641348575857392, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 7.174320222179165e-06, |
|
"loss": 0.2379, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.6419944455208939, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 7.161402828909127e-06, |
|
"loss": 0.2405, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.6426403151843958, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 7.148485435639089e-06, |
|
"loss": 0.2508, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.6432861848478977, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 7.135568042369051e-06, |
|
"loss": 0.24, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.6439320545113996, |
|
"grad_norm": 1.125, |
|
"learning_rate": 7.122650649099012e-06, |
|
"loss": 0.2258, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.6445779241749015, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 7.109733255828974e-06, |
|
"loss": 0.2058, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.6452237938384034, |
|
"grad_norm": 1.25, |
|
"learning_rate": 7.096815862558936e-06, |
|
"loss": 0.2515, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.6458696635019053, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 7.083898469288898e-06, |
|
"loss": 0.269, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6465155331654072, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 7.07098107601886e-06, |
|
"loss": 0.2313, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.6471614028289091, |
|
"grad_norm": 1.125, |
|
"learning_rate": 7.058063682748822e-06, |
|
"loss": 0.2284, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.647807272492411, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 7.045146289478784e-06, |
|
"loss": 0.2329, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.648453142155913, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 7.032228896208746e-06, |
|
"loss": 0.2498, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.6490990118194149, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 7.019311502938707e-06, |
|
"loss": 0.2136, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.6497448814829168, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 7.006394109668669e-06, |
|
"loss": 0.2427, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.6503907511464186, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 6.993476716398631e-06, |
|
"loss": 0.2034, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.6510366208099205, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 6.980559323128593e-06, |
|
"loss": 0.273, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.6516824904734224, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 6.967641929858555e-06, |
|
"loss": 0.254, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.6523283601369244, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 6.954724536588517e-06, |
|
"loss": 0.2433, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.6529742298004263, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 6.941807143318479e-06, |
|
"loss": 0.2463, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.6536200994639282, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 6.928889750048441e-06, |
|
"loss": 0.2632, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.6542659691274301, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 6.915972356778403e-06, |
|
"loss": 0.2612, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.654911838790932, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 6.903054963508364e-06, |
|
"loss": 0.2633, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.6555577084544338, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 6.890137570238326e-06, |
|
"loss": 0.2134, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.6562035781179358, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 6.877220176968288e-06, |
|
"loss": 0.2409, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.6568494477814377, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 6.86430278369825e-06, |
|
"loss": 0.2656, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.6574953174449396, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 6.851385390428212e-06, |
|
"loss": 0.2124, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.6581411871084415, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 6.838467997158174e-06, |
|
"loss": 0.2451, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.6587870567719434, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 6.825550603888136e-06, |
|
"loss": 0.2633, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.6594329264354454, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 6.812633210618098e-06, |
|
"loss": 0.2689, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.6600787960989473, |
|
"grad_norm": 1.125, |
|
"learning_rate": 6.799715817348059e-06, |
|
"loss": 0.2153, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.6607246657624491, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 6.786798424078021e-06, |
|
"loss": 0.1888, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.661370535425951, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 6.7738810308079835e-06, |
|
"loss": 0.2652, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.6620164050894529, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 6.760963637537945e-06, |
|
"loss": 0.2182, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.6626622747529548, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 6.748046244267907e-06, |
|
"loss": 0.2141, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.6633081444164568, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 6.7351288509978696e-06, |
|
"loss": 0.2405, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.6639540140799587, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 6.722211457727831e-06, |
|
"loss": 0.2127, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.6645998837434606, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 6.709294064457793e-06, |
|
"loss": 0.2512, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.6652457534069625, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 6.696376671187756e-06, |
|
"loss": 0.2451, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.6658916230704643, |
|
"grad_norm": 1.375, |
|
"learning_rate": 6.683459277917716e-06, |
|
"loss": 0.2546, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.6665374927339662, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 6.6705418846476785e-06, |
|
"loss": 0.208, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.6671833623974682, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 6.65762449137764e-06, |
|
"loss": 0.2314, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.6678292320609701, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 6.644707098107602e-06, |
|
"loss": 0.2281, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.668475101724472, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 6.6317897048375645e-06, |
|
"loss": 0.2652, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.6691209713879739, |
|
"grad_norm": 1.125, |
|
"learning_rate": 6.618872311567526e-06, |
|
"loss": 0.215, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.6697668410514758, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 6.605954918297488e-06, |
|
"loss": 0.2365, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.6704127107149778, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 6.5930375250274505e-06, |
|
"loss": 0.2547, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.6710585803784797, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 6.580120131757411e-06, |
|
"loss": 0.2333, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.6717044500419815, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 6.567202738487373e-06, |
|
"loss": 0.259, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.6723503197054834, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 6.554285345217336e-06, |
|
"loss": 0.2513, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.6729961893689853, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 6.541367951947297e-06, |
|
"loss": 0.2835, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.6736420590324872, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 6.5284505586772594e-06, |
|
"loss": 0.2441, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.6742879286959892, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 6.515533165407222e-06, |
|
"loss": 0.229, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.6749337983594911, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 6.502615772137183e-06, |
|
"loss": 0.214, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.675579668022993, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 6.4896983788671455e-06, |
|
"loss": 0.2228, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.6762255376864948, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 6.476780985597108e-06, |
|
"loss": 0.2207, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.6768714073499967, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 6.463863592327068e-06, |
|
"loss": 0.2385, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.6775172770134987, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 6.450946199057031e-06, |
|
"loss": 0.2337, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.6781631466770006, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 6.438028805786992e-06, |
|
"loss": 0.2325, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6788090163405025, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 6.425111412516954e-06, |
|
"loss": 0.2066, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.6794548860040044, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 6.412194019246917e-06, |
|
"loss": 0.2501, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.6801007556675063, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 6.399276625976878e-06, |
|
"loss": 0.2366, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.6807466253310082, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 6.38635923270684e-06, |
|
"loss": 0.2267, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.6813924949945102, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 6.373441839436803e-06, |
|
"loss": 0.2168, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.682038364658012, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 6.360524446166763e-06, |
|
"loss": 0.2523, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.6826842343215139, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 6.347607052896726e-06, |
|
"loss": 0.2334, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.6833301039850158, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 6.334689659626688e-06, |
|
"loss": 0.2319, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.6839759736485177, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 6.321772266356649e-06, |
|
"loss": 0.2315, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.6846218433120196, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 6.308854873086612e-06, |
|
"loss": 0.2355, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.6852677129755216, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 6.295937479816574e-06, |
|
"loss": 0.2184, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.6859135826390235, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 6.283020086546535e-06, |
|
"loss": 0.2648, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.6865594523025254, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 6.270102693276498e-06, |
|
"loss": 0.2613, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.6872053219660272, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 6.25718530000646e-06, |
|
"loss": 0.2433, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.6878511916295291, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 6.2442679067364205e-06, |
|
"loss": 0.214, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.6884970612930311, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 6.231350513466383e-06, |
|
"loss": 0.2556, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.689142930956533, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 6.218433120196344e-06, |
|
"loss": 0.252, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.6897888006200349, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 6.2055157269263066e-06, |
|
"loss": 0.2207, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.6904346702835368, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 6.192598333656269e-06, |
|
"loss": 0.2363, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.6910805399470387, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 6.17968094038623e-06, |
|
"loss": 0.2087, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.6917264096105405, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 6.166763547116193e-06, |
|
"loss": 0.2266, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.6923722792740425, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 0.2275, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.6930181489375444, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 6.1409287605761155e-06, |
|
"loss": 0.2398, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.6936640186010463, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 6.128011367306078e-06, |
|
"loss": 0.2038, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.6943098882645482, |
|
"grad_norm": 1.25, |
|
"learning_rate": 6.11509397403604e-06, |
|
"loss": 0.2484, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.6949557579280501, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 6.1021765807660015e-06, |
|
"loss": 0.2225, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.695601627591552, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 6.089259187495964e-06, |
|
"loss": 0.2146, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.696247497255054, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 6.076341794225926e-06, |
|
"loss": 0.2215, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.6968933669185559, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 6.0634244009558875e-06, |
|
"loss": 0.2484, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.6975392365820577, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 6.05050700768585e-06, |
|
"loss": 0.2411, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6981851062455596, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 6.037589614415812e-06, |
|
"loss": 0.2753, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.6988309759090615, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 6.024672221145773e-06, |
|
"loss": 0.2271, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.6994768455725635, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 6.011754827875735e-06, |
|
"loss": 0.2329, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.7001227152360654, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 5.9988374346056964e-06, |
|
"loss": 0.2663, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.7007685848995673, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 5.985920041335659e-06, |
|
"loss": 0.2558, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.7014144545630692, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 5.973002648065621e-06, |
|
"loss": 0.2417, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.702060324226571, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 5.9600852547955825e-06, |
|
"loss": 0.2314, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.7027061938900729, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 5.947167861525545e-06, |
|
"loss": 0.2169, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.7033520635535749, |
|
"grad_norm": 1.125, |
|
"learning_rate": 5.934250468255507e-06, |
|
"loss": 0.2502, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.7039979332170768, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 5.921333074985468e-06, |
|
"loss": 0.2163, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.7046438028805787, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 5.90841568171543e-06, |
|
"loss": 0.231, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.7052896725440806, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 5.895498288445392e-06, |
|
"loss": 0.2155, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.7059355422075825, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 5.882580895175354e-06, |
|
"loss": 0.2304, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.7065814118710844, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 5.869663501905316e-06, |
|
"loss": 0.2275, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.7072272815345864, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 5.856746108635278e-06, |
|
"loss": 0.2555, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.7078731511980882, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 5.84382871536524e-06, |
|
"loss": 0.2211, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.7085190208615901, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 5.830911322095202e-06, |
|
"loss": 0.2312, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.709164890525092, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 5.817993928825163e-06, |
|
"loss": 0.2354, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.7098107601885939, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 5.805076535555125e-06, |
|
"loss": 0.244, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.7104566298520959, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 5.792159142285087e-06, |
|
"loss": 0.2207, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7111024995155978, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 5.779241749015049e-06, |
|
"loss": 0.2197, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.7117483691790997, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 5.766324355745011e-06, |
|
"loss": 0.2475, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.7123942388426016, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 5.753406962474973e-06, |
|
"loss": 0.2199, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.7130401085061034, |
|
"grad_norm": 1.25, |
|
"learning_rate": 5.740489569204935e-06, |
|
"loss": 0.2299, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.7136859781696053, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 5.727572175934897e-06, |
|
"loss": 0.2194, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.7143318478331073, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 5.714654782664859e-06, |
|
"loss": 0.2057, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.7149777174966092, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 5.701737389394821e-06, |
|
"loss": 0.2703, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.7156235871601111, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 5.688819996124782e-06, |
|
"loss": 0.223, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.716269456823613, |
|
"grad_norm": 1.25, |
|
"learning_rate": 5.675902602854744e-06, |
|
"loss": 0.2439, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.7169153264871149, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 5.662985209584706e-06, |
|
"loss": 0.2441, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.7175611961506168, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 5.650067816314668e-06, |
|
"loss": 0.2307, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.7182070658141188, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 5.6371504230446304e-06, |
|
"loss": 0.228, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.7188529354776206, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 5.624233029774592e-06, |
|
"loss": 0.2389, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.7194988051411225, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 5.611315636504554e-06, |
|
"loss": 0.2133, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.7201446748046244, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 5.598398243234516e-06, |
|
"loss": 0.2637, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.7207905444681263, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 5.585480849964478e-06, |
|
"loss": 0.2252, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.7214364141316283, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 5.57256345669444e-06, |
|
"loss": 0.2309, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.7220822837951302, |
|
"grad_norm": 1.25, |
|
"learning_rate": 5.559646063424401e-06, |
|
"loss": 0.233, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.7227281534586321, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 5.546728670154363e-06, |
|
"loss": 0.1879, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.723374023122134, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 5.533811276884325e-06, |
|
"loss": 0.2332, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.7240198927856358, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 5.520893883614287e-06, |
|
"loss": 0.2329, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.7246657624491377, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 5.507976490344249e-06, |
|
"loss": 0.2142, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.7253116321126397, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 5.495059097074211e-06, |
|
"loss": 0.2515, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.7259575017761416, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 5.482141703804173e-06, |
|
"loss": 0.1971, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.7266033714396435, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 5.469224310534135e-06, |
|
"loss": 0.227, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.7272492411031454, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 5.456306917264097e-06, |
|
"loss": 0.2482, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.7278951107666473, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 5.443389523994058e-06, |
|
"loss": 0.2288, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.7285409804301491, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 5.43047213072402e-06, |
|
"loss": 0.2573, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.7291868500936511, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 5.417554737453983e-06, |
|
"loss": 0.2271, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.729832719757153, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 5.404637344183944e-06, |
|
"loss": 0.2434, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.7304785894206549, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 5.391719950913906e-06, |
|
"loss": 0.2314, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.7311244590841568, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 5.378802557643868e-06, |
|
"loss": 0.2399, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.7317703287476587, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 5.36588516437383e-06, |
|
"loss": 0.2411, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.7324161984111607, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 5.352967771103792e-06, |
|
"loss": 0.257, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.7330620680746626, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 5.340050377833753e-06, |
|
"loss": 0.2474, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.7337079377381645, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 5.327132984563715e-06, |
|
"loss": 0.237, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.7343538074016663, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 5.3142155912936775e-06, |
|
"loss": 0.2134, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.7349996770651682, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 5.301298198023639e-06, |
|
"loss": 0.2061, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.7356455467286701, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 5.288380804753601e-06, |
|
"loss": 0.2141, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.7362914163921721, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 5.2754634114835636e-06, |
|
"loss": 0.2399, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.736937286055674, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 5.262546018213525e-06, |
|
"loss": 0.2636, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.7375831557191759, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 5.249628624943487e-06, |
|
"loss": 0.2606, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.7382290253826778, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 5.23671123167345e-06, |
|
"loss": 0.2397, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.7388748950461796, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 5.22379383840341e-06, |
|
"loss": 0.2462, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.7395207647096815, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 5.2108764451333725e-06, |
|
"loss": 0.2722, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.7401666343731835, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 5.197959051863335e-06, |
|
"loss": 0.2328, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.7408125040366854, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 5.185041658593296e-06, |
|
"loss": 0.2192, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.7414583737001873, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 5.1721242653232585e-06, |
|
"loss": 0.2083, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.7421042433636892, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 5.15920687205322e-06, |
|
"loss": 0.2051, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.7427501130271911, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 5.146289478783182e-06, |
|
"loss": 0.2385, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7433959826906931, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 5.1333720855131445e-06, |
|
"loss": 0.2129, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.744041852354195, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 5.120454692243105e-06, |
|
"loss": 0.231, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.7446877220176968, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 5.1075372989730674e-06, |
|
"loss": 0.2169, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.7453335916811987, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 5.09461990570303e-06, |
|
"loss": 0.2281, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.7459794613447006, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 5.081702512432991e-06, |
|
"loss": 0.1929, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.7466253310082025, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 5.0687851191629535e-06, |
|
"loss": 0.2483, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.7472712006717045, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 5.055867725892916e-06, |
|
"loss": 0.2081, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.7479170703352064, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 5.042950332622877e-06, |
|
"loss": 0.2112, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.7485629399987083, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 5.0300329393528395e-06, |
|
"loss": 0.2387, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.7492088096622102, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 5.017115546082802e-06, |
|
"loss": 0.2498, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.749854679325712, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 5.004198152812762e-06, |
|
"loss": 0.2093, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.7505005489892139, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 4.991280759542725e-06, |
|
"loss": 0.2232, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.7511464186527159, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.978363366272687e-06, |
|
"loss": 0.2384, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.7517922883162178, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 4.965445973002648e-06, |
|
"loss": 0.2139, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.7524381579797197, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.952528579732611e-06, |
|
"loss": 0.2457, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.7530840276432216, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.939611186462572e-06, |
|
"loss": 0.2236, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.7537298973067235, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 4.926693793192534e-06, |
|
"loss": 0.241, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.7543757669702255, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 4.913776399922496e-06, |
|
"loss": 0.2113, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.7550216366337273, |
|
"grad_norm": 1.0, |
|
"learning_rate": 4.900859006652458e-06, |
|
"loss": 0.2375, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.7556675062972292, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.88794161338242e-06, |
|
"loss": 0.2422, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.7563133759607311, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.875024220112382e-06, |
|
"loss": 0.2266, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.756959245624233, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 4.862106826842344e-06, |
|
"loss": 0.2342, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.7576051152877349, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.849189433572306e-06, |
|
"loss": 0.205, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.7582509849512369, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 4.836272040302267e-06, |
|
"loss": 0.229, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.7588968546147388, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.823354647032229e-06, |
|
"loss": 0.2068, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.7595427242782407, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 4.810437253762192e-06, |
|
"loss": 0.2329, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.7601885939417425, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.797519860492153e-06, |
|
"loss": 0.2479, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.7608344636052444, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 4.7846024672221145e-06, |
|
"loss": 0.219, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.7614803332687463, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 4.771685073952077e-06, |
|
"loss": 0.214, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.7621262029322483, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 4.758767680682039e-06, |
|
"loss": 0.2501, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.7627720725957502, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.7458502874120006e-06, |
|
"loss": 0.2031, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.7634179422592521, |
|
"grad_norm": 0.875, |
|
"learning_rate": 4.732932894141963e-06, |
|
"loss": 0.2355, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.764063811922754, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.720015500871924e-06, |
|
"loss": 0.2322, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.7647096815862559, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 4.707098107601887e-06, |
|
"loss": 0.2518, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.7653555512497578, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.694180714331848e-06, |
|
"loss": 0.227, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.7660014209132597, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 4.68126332106181e-06, |
|
"loss": 0.2343, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.7666472905767616, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.668345927791772e-06, |
|
"loss": 0.2372, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.7672931602402635, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 4.655428534521734e-06, |
|
"loss": 0.2584, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.7679390299037654, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.642511141251696e-06, |
|
"loss": 0.2545, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.7685848995672673, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 4.629593747981658e-06, |
|
"loss": 0.2579, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.616676354711619e-06, |
|
"loss": 0.2326, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.7698766388942712, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 4.6037589614415815e-06, |
|
"loss": 0.2593, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.770522508557773, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.590841568171544e-06, |
|
"loss": 0.2103, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.7711683782212749, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 4.577924174901505e-06, |
|
"loss": 0.2412, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.7718142478847768, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 4.565006781631467e-06, |
|
"loss": 0.2634, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.7724601175482787, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.552089388361429e-06, |
|
"loss": 0.2483, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.7731059872117807, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.539171995091391e-06, |
|
"loss": 0.2265, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.7737518568752826, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.526254601821353e-06, |
|
"loss": 0.2277, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.7743977265387845, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.513337208551315e-06, |
|
"loss": 0.2311, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.7750435962022864, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.5004198152812765e-06, |
|
"loss": 0.2154, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7756894658657882, |
|
"grad_norm": 1.0, |
|
"learning_rate": 4.487502422011239e-06, |
|
"loss": 0.2349, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.7763353355292902, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.4745850287412e-06, |
|
"loss": 0.2194, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.7769812051927921, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.4616676354711625e-06, |
|
"loss": 0.2572, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.777627074856294, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 4.448750242201124e-06, |
|
"loss": 0.2068, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.7782729445197959, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 4.435832848931086e-06, |
|
"loss": 0.2165, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.7789188141832978, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 4.4229154556610485e-06, |
|
"loss": 0.2507, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.7795646838467997, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 4.40999806239101e-06, |
|
"loss": 0.2445, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.7802105535103017, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.397080669120971e-06, |
|
"loss": 0.2526, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.7808564231738035, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 4.384163275850934e-06, |
|
"loss": 0.2274, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.7815022928373054, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.371245882580896e-06, |
|
"loss": 0.2206, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.7821481625008073, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 4.3583284893108574e-06, |
|
"loss": 0.2237, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.7827940321643092, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.345411096040819e-06, |
|
"loss": 0.2292, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.7834399018278112, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.332493702770781e-06, |
|
"loss": 0.2187, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.7840857714913131, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.3195763095007435e-06, |
|
"loss": 0.2322, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.784731641154815, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 4.306658916230705e-06, |
|
"loss": 0.2421, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.7853775108183169, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.293741522960667e-06, |
|
"loss": 0.2402, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.7860233804818187, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.280824129690629e-06, |
|
"loss": 0.2079, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.7866692501453206, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.267906736420591e-06, |
|
"loss": 0.2632, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.7873151198088226, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 4.254989343150552e-06, |
|
"loss": 0.2151, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.7879609894723245, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.242071949880515e-06, |
|
"loss": 0.2426, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.7886068591358264, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.229154556610476e-06, |
|
"loss": 0.2211, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.7892527287993283, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 4.216237163340438e-06, |
|
"loss": 0.2445, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.7898985984628302, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.203319770070401e-06, |
|
"loss": 0.2213, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.7905444681263321, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.190402376800362e-06, |
|
"loss": 0.2377, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.791190337789834, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 4.177484983530324e-06, |
|
"loss": 0.2229, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.7918362074533359, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 4.164567590260286e-06, |
|
"loss": 0.2211, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.7924820771168378, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.151650196990248e-06, |
|
"loss": 0.2128, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.7931279467803397, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 4.13873280372021e-06, |
|
"loss": 0.2372, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.7937738164438416, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 4.125815410450171e-06, |
|
"loss": 0.2202, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.7944196861073436, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.112898017180133e-06, |
|
"loss": 0.2204, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.7950655557708455, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 4.099980623910096e-06, |
|
"loss": 0.2391, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.7957114254343474, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.087063230640057e-06, |
|
"loss": 0.2184, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.7963572950978492, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 4.074145837370019e-06, |
|
"loss": 0.2371, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.7970031647613511, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.061228444099981e-06, |
|
"loss": 0.2155, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.797649034424853, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 4.048311050829943e-06, |
|
"loss": 0.2208, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.798294904088355, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.0353936575599046e-06, |
|
"loss": 0.2519, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.7989407737518569, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.022476264289867e-06, |
|
"loss": 0.2373, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.7995866434153588, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.009558871019828e-06, |
|
"loss": 0.2572, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.8002325130788607, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 3.996641477749791e-06, |
|
"loss": 0.2135, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 0.8008783827423626, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 3.983724084479753e-06, |
|
"loss": 0.2309, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.8015242524058644, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 3.970806691209714e-06, |
|
"loss": 0.2218, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 0.8021701220693664, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 3.957889297939676e-06, |
|
"loss": 0.2109, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.8028159917328683, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 3.944971904669638e-06, |
|
"loss": 0.2329, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 0.8034618613963702, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 3.9320545113996e-06, |
|
"loss": 0.2506, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.8041077310598721, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 3.919137118129562e-06, |
|
"loss": 0.2371, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.804753600723374, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 3.906219724859523e-06, |
|
"loss": 0.2433, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.805399470386876, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.8933023315894855e-06, |
|
"loss": 0.2441, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 0.8060453400503779, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 3.880384938319448e-06, |
|
"loss": 0.2258, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.8066912097138798, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 3.867467545049409e-06, |
|
"loss": 0.245, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 0.8073370793773816, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 3.854550151779371e-06, |
|
"loss": 0.2194, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8079829490408835, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 3.841632758509333e-06, |
|
"loss": 0.227, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 0.8086288187043854, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 3.828715365239295e-06, |
|
"loss": 0.2206, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.8092746883678874, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 3.815797971969257e-06, |
|
"loss": 0.2514, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 0.8099205580313893, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 3.802880578699219e-06, |
|
"loss": 0.2319, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.8105664276948912, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 3.789963185429181e-06, |
|
"loss": 0.1914, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.8112122973583931, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 3.7770457921591423e-06, |
|
"loss": 0.2361, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.811858167021895, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 3.7641283988891046e-06, |
|
"loss": 0.2408, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 0.8125040366853968, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 3.7512110056190665e-06, |
|
"loss": 0.2192, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.8131499063488988, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 3.7382936123490284e-06, |
|
"loss": 0.2201, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 0.8137957760124007, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 3.72537621907899e-06, |
|
"loss": 0.2429, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.8144416456759026, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 3.712458825808952e-06, |
|
"loss": 0.2223, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 0.8150875153394045, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 3.699541432538914e-06, |
|
"loss": 0.206, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.8157333850029064, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.686624039268876e-06, |
|
"loss": 0.2308, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 0.8163792546664084, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 3.673706645998838e-06, |
|
"loss": 0.2061, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.8170251243299103, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 3.6607892527287996e-06, |
|
"loss": 0.2433, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.8176709939934121, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 3.6478718594587614e-06, |
|
"loss": 0.221, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.818316863656914, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 3.6349544661887233e-06, |
|
"loss": 0.1949, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 0.8189627333204159, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.6220370729186856e-06, |
|
"loss": 0.274, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.8196086029839178, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 3.609119679648647e-06, |
|
"loss": 0.1969, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 0.8202544726474198, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 3.596202286378609e-06, |
|
"loss": 0.2273, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.8209003423109217, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 3.583284893108571e-06, |
|
"loss": 0.2622, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.8215462119744236, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 3.570367499838533e-06, |
|
"loss": 0.2085, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.8221920816379255, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 3.5574501065684945e-06, |
|
"loss": 0.242, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 0.8228379513014273, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 3.5445327132984564e-06, |
|
"loss": 0.2249, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.8234838209649292, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 3.5316153200284187e-06, |
|
"loss": 0.2375, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.8241296906284312, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 3.5186979267583805e-06, |
|
"loss": 0.231, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.8247755602919331, |
|
"grad_norm": 1.375, |
|
"learning_rate": 3.505780533488342e-06, |
|
"loss": 0.2467, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 0.825421429955435, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.4928631402183043e-06, |
|
"loss": 0.2118, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.8260672996189369, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.479945746948266e-06, |
|
"loss": 0.2308, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 0.8267131692824388, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 3.467028353678228e-06, |
|
"loss": 0.2351, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.8273590389459408, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 3.4541109604081903e-06, |
|
"loss": 0.2173, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 0.8280049086094426, |
|
"grad_norm": 1.125, |
|
"learning_rate": 3.4411935671381517e-06, |
|
"loss": 0.2532, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.8286507782729445, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.4282761738681136e-06, |
|
"loss": 0.2222, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 0.8292966479364464, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.4153587805980755e-06, |
|
"loss": 0.2353, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.8299425175999483, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 3.4024413873280378e-06, |
|
"loss": 0.2213, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.8305883872634502, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 3.3895239940579992e-06, |
|
"loss": 0.2482, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.8312342569269522, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 3.376606600787961e-06, |
|
"loss": 0.2217, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 0.8318801265904541, |
|
"grad_norm": 1.125, |
|
"learning_rate": 3.3636892075179234e-06, |
|
"loss": 0.2221, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.832525996253956, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 3.3507718142478852e-06, |
|
"loss": 0.2003, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 0.8331718659174578, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 3.3378544209778467e-06, |
|
"loss": 0.2185, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.8338177355809597, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 3.3249370277078086e-06, |
|
"loss": 0.2438, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 0.8344636052444616, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 3.312019634437771e-06, |
|
"loss": 0.2259, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.8351094749079636, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.2991022411677327e-06, |
|
"loss": 0.2109, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 0.8357553445714655, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 3.286184847897694e-06, |
|
"loss": 0.2293, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.8364012142349674, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 3.2732674546276565e-06, |
|
"loss": 0.2258, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.8370470838984693, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.2603500613576183e-06, |
|
"loss": 0.2252, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.8376929535619712, |
|
"grad_norm": 1.125, |
|
"learning_rate": 3.24743266808758e-06, |
|
"loss": 0.2463, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 0.8383388232254732, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 3.2345152748175425e-06, |
|
"loss": 0.2751, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.838984692888975, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 3.221597881547504e-06, |
|
"loss": 0.2129, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 0.8396305625524769, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 3.208680488277466e-06, |
|
"loss": 0.2241, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8402764322159788, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 3.1957630950074277e-06, |
|
"loss": 0.1997, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 0.8409223018794807, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 3.18284570173739e-06, |
|
"loss": 0.1954, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.8415681715429826, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 3.1699283084673514e-06, |
|
"loss": 0.2343, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 0.8422140412064846, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 3.1570109151973133e-06, |
|
"loss": 0.2288, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.8428599108699865, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 3.1440935219272756e-06, |
|
"loss": 0.2359, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.8435057805334883, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 3.1311761286572374e-06, |
|
"loss": 0.2185, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.8441516501969902, |
|
"grad_norm": 1.0, |
|
"learning_rate": 3.118258735387199e-06, |
|
"loss": 0.2491, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 0.8447975198604921, |
|
"grad_norm": 1.375, |
|
"learning_rate": 3.1053413421171607e-06, |
|
"loss": 0.2181, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.845443389523994, |
|
"grad_norm": 1.0, |
|
"learning_rate": 3.092423948847123e-06, |
|
"loss": 0.2381, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 0.846089259187496, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 3.079506555577085e-06, |
|
"loss": 0.2612, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.8467351288509979, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.0665891623070463e-06, |
|
"loss": 0.229, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 0.8473809985144998, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 3.0536717690370086e-06, |
|
"loss": 0.2439, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.8480268681780017, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.0407543757669705e-06, |
|
"loss": 0.2199, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 0.8486727378415035, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 3.0278369824969324e-06, |
|
"loss": 0.2042, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.8493186075050055, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.014919589226894e-06, |
|
"loss": 0.2303, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.8499644771685074, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 3.002002195956856e-06, |
|
"loss": 0.212, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.8506103468320093, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 2.989084802686818e-06, |
|
"loss": 0.2391, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 0.8512562164955112, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 2.97616740941678e-06, |
|
"loss": 0.2093, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.8519020861590131, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 2.963250016146742e-06, |
|
"loss": 0.2014, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 0.852547955822515, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 2.9503326228767036e-06, |
|
"loss": 0.2446, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.853193825486017, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.9374152296066654e-06, |
|
"loss": 0.2072, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 0.8538396951495189, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 2.9244978363366277e-06, |
|
"loss": 0.2366, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.8544855648130207, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 2.9115804430665896e-06, |
|
"loss": 0.2186, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 0.8551314344765226, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 2.898663049796551e-06, |
|
"loss": 0.229, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.8557773041400245, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 2.885745656526513e-06, |
|
"loss": 0.2373, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.8564231738035264, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 2.872828263256475e-06, |
|
"loss": 0.2457, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.8570690434670284, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 2.859910869986437e-06, |
|
"loss": 0.2358, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 0.8577149131305303, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 2.8469934767163985e-06, |
|
"loss": 0.223, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.8583607827940322, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 2.834076083446361e-06, |
|
"loss": 0.226, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 0.859006652457534, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 2.8211586901763227e-06, |
|
"loss": 0.2203, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.8596525221210359, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 2.8082412969062845e-06, |
|
"loss": 0.2594, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 0.8602983917845379, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 2.795323903636246e-06, |
|
"loss": 0.226, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.8609442614480398, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 2.7824065103662083e-06, |
|
"loss": 0.2076, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.8615901311115417, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 2.76948911709617e-06, |
|
"loss": 0.231, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.8622360007750436, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 2.756571723826132e-06, |
|
"loss": 0.226, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.8628818704385455, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 2.7436543305560943e-06, |
|
"loss": 0.2411, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.8635277401020474, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 2.7307369372860557e-06, |
|
"loss": 0.2044, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 0.8641736097655494, |
|
"grad_norm": 0.875, |
|
"learning_rate": 2.7178195440160176e-06, |
|
"loss": 0.2203, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.8648194794290512, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 2.70490215074598e-06, |
|
"loss": 0.1989, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 0.8654653490925531, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 2.6919847574759418e-06, |
|
"loss": 0.2382, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.866111218756055, |
|
"grad_norm": 1.25, |
|
"learning_rate": 2.6790673642059032e-06, |
|
"loss": 0.2477, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 0.8667570884195569, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 2.666149970935865e-06, |
|
"loss": 0.2271, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.8674029580830588, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 2.6532325776658274e-06, |
|
"loss": 0.2345, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 0.8680488277465608, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 2.6403151843957892e-06, |
|
"loss": 0.1954, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.8686946974100627, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 2.6273977911257507e-06, |
|
"loss": 0.2317, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.8693405670735646, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 2.614480397855713e-06, |
|
"loss": 0.2256, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.8699864367370664, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.601563004585675e-06, |
|
"loss": 0.2492, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 0.8706323064005683, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 2.5886456113156367e-06, |
|
"loss": 0.197, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.8712781760640703, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 2.575728218045598e-06, |
|
"loss": 0.2342, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 0.8719240457275722, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 2.5628108247755604e-06, |
|
"loss": 0.2428, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8725699153910741, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 2.5498934315055223e-06, |
|
"loss": 0.2235, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 0.873215785054576, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 2.536976038235484e-06, |
|
"loss": 0.2374, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.8738616547180779, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 2.5240586449654465e-06, |
|
"loss": 0.2008, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 0.8745075243815797, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 2.511141251695408e-06, |
|
"loss": 0.2279, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 0.8751533940450817, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 2.4982238584253698e-06, |
|
"loss": 0.2138, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.8757992637085836, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 2.4853064651553317e-06, |
|
"loss": 0.2408, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.8764451333720855, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 2.472389071885294e-06, |
|
"loss": 0.2262, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 0.8770910030355874, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 2.4594716786152554e-06, |
|
"loss": 0.2629, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 0.8777368726990893, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 2.4465542853452177e-06, |
|
"loss": 0.2294, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 0.8783827423625913, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 2.433636892075179e-06, |
|
"loss": 0.2014, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.8790286120260932, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 2.4207194988051414e-06, |
|
"loss": 0.2162, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 0.8796744816895951, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.4078021055351033e-06, |
|
"loss": 0.2502, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 0.8803203513530969, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.394884712265065e-06, |
|
"loss": 0.2121, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 0.8809662210165988, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 2.381967318995027e-06, |
|
"loss": 0.2182, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.8816120906801007, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 2.369049925724989e-06, |
|
"loss": 0.2202, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.8822579603436027, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 2.3561325324549508e-06, |
|
"loss": 0.2262, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 0.8829038300071046, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 2.3432151391849126e-06, |
|
"loss": 0.2558, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 0.8835496996706065, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 2.3302977459148745e-06, |
|
"loss": 0.2542, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.8841955693341084, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 2.3173803526448368e-06, |
|
"loss": 0.221, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 0.8848414389976103, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 2.3044629593747982e-06, |
|
"loss": 0.2304, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.8854873086611121, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 2.2915455661047605e-06, |
|
"loss": 0.2542, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 0.8861331783246141, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 2.278628172834722e-06, |
|
"loss": 0.2394, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 0.886779047988116, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 2.2657107795646843e-06, |
|
"loss": 0.2272, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 0.8874249176516179, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 2.252793386294646e-06, |
|
"loss": 0.2146, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 0.8880707873151198, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 2.239875993024608e-06, |
|
"loss": 0.2356, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.8887166569786217, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 2.22695859975457e-06, |
|
"loss": 0.2072, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.8893625266421237, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.2140412064845317e-06, |
|
"loss": 0.2024, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 0.8900083963056256, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 2.2011238132144936e-06, |
|
"loss": 0.2202, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 0.8906542659691274, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 2.1882064199444555e-06, |
|
"loss": 0.2418, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 0.8913001356326293, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 2.1752890266744173e-06, |
|
"loss": 0.2116, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.8919460052961312, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 2.162371633404379e-06, |
|
"loss": 0.2606, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 0.8925918749596331, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 2.149454240134341e-06, |
|
"loss": 0.2238, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 0.8932377446231351, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.136536846864303e-06, |
|
"loss": 0.234, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 0.893883614286637, |
|
"grad_norm": 1.25, |
|
"learning_rate": 2.123619453594265e-06, |
|
"loss": 0.2401, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 0.8945294839501389, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 2.1107020603242267e-06, |
|
"loss": 0.2205, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.8951753536136408, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 2.097784667054189e-06, |
|
"loss": 0.2125, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 0.8958212232771426, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.0848672737841504e-06, |
|
"loss": 0.2512, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 0.8964670929406445, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 2.0719498805141127e-06, |
|
"loss": 0.2168, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 0.8971129626041465, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 2.059032487244074e-06, |
|
"loss": 0.2138, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 0.8977588322676484, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 2.0461150939740364e-06, |
|
"loss": 0.2093, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.8984047019311503, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 2.033197700703998e-06, |
|
"loss": 0.2138, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 0.8990505715946522, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 2.02028030743396e-06, |
|
"loss": 0.2486, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.8996964412581541, |
|
"grad_norm": 1.0, |
|
"learning_rate": 2.007362914163922e-06, |
|
"loss": 0.2278, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 0.9003423109216561, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.994445520893884e-06, |
|
"loss": 0.2334, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 0.900988180585158, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.9815281276238458e-06, |
|
"loss": 0.2393, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.9016340502486598, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.9686107343538076e-06, |
|
"loss": 0.2319, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 0.9022799199121617, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.9556933410837695e-06, |
|
"loss": 0.2443, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 0.9029257895756636, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 1.9427759478137314e-06, |
|
"loss": 0.2578, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 0.9035716592391655, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.9298585545436932e-06, |
|
"loss": 0.2175, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 0.9042175289026675, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.916941161273655e-06, |
|
"loss": 0.2073, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9048633985661694, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.904023768003617e-06, |
|
"loss": 0.2171, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 0.9055092682296713, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 1.8911063747335788e-06, |
|
"loss": 0.222, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 0.9061551378931731, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.8781889814635407e-06, |
|
"loss": 0.2034, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 0.906801007556675, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.8652715881935026e-06, |
|
"loss": 0.2077, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.9074468772201769, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.8523541949234647e-06, |
|
"loss": 0.2269, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.9080927468836789, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.8394368016534265e-06, |
|
"loss": 0.2174, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 0.9087386165471808, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.8265194083833884e-06, |
|
"loss": 0.2188, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 0.9093844862106827, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.8136020151133503e-06, |
|
"loss": 0.2332, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.9100303558741846, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.8006846218433121e-06, |
|
"loss": 0.1985, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 0.9106762255376865, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7877672285732742e-06, |
|
"loss": 0.2411, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.9113220952011885, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7748498353032359e-06, |
|
"loss": 0.257, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 0.9119679648646903, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.761932442033198e-06, |
|
"loss": 0.2211, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 0.9126138345281922, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.7490150487631596e-06, |
|
"loss": 0.2456, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 0.9132597041916941, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.7360976554931217e-06, |
|
"loss": 0.2163, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 0.913905573855196, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.7231802622230838e-06, |
|
"loss": 0.2068, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.9145514435186979, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.7102628689530454e-06, |
|
"loss": 0.216, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 0.9151973131821999, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.6973454756830075e-06, |
|
"loss": 0.2269, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 0.9158431828457018, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.6844280824129691e-06, |
|
"loss": 0.2435, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 0.9164890525092036, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.6715106891429312e-06, |
|
"loss": 0.2096, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 0.9171349221727055, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.6585932958728929e-06, |
|
"loss": 0.2148, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.9177807918362074, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.645675902602855e-06, |
|
"loss": 0.212, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 0.9184266614997093, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.6327585093328168e-06, |
|
"loss": 0.2286, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 0.9190725311632113, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.6198411160627787e-06, |
|
"loss": 0.2263, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 0.9197184008267132, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.6069237227927406e-06, |
|
"loss": 0.2046, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 0.9203642704902151, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.5940063295227024e-06, |
|
"loss": 0.2153, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.921010140153717, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.5810889362526643e-06, |
|
"loss": 0.2215, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.9216560098172188, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.5681715429826264e-06, |
|
"loss": 0.2487, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 0.9223018794807208, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.555254149712588e-06, |
|
"loss": 0.2326, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 0.9229477491442227, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.5423367564425501e-06, |
|
"loss": 0.2476, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 0.9235936188077246, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.5294193631725118e-06, |
|
"loss": 0.2299, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.9242394884712265, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.5165019699024739e-06, |
|
"loss": 0.2373, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 0.9248853581347284, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.5035845766324355e-06, |
|
"loss": 0.2055, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 0.9255312277982303, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.4906671833623976e-06, |
|
"loss": 0.2434, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 0.9261770974617323, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.4777497900923597e-06, |
|
"loss": 0.2256, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 0.9268229671252342, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.4648323968223213e-06, |
|
"loss": 0.2611, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.927468836788736, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.4519150035522834e-06, |
|
"loss": 0.273, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 0.9281147064522379, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.438997610282245e-06, |
|
"loss": 0.22, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 0.9287605761157398, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.4260802170122071e-06, |
|
"loss": 0.2239, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 0.9294064457792417, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.413162823742169e-06, |
|
"loss": 0.2509, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 0.9300523154427437, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 1.4002454304721309e-06, |
|
"loss": 0.2163, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.9306981851062456, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.3873280372020927e-06, |
|
"loss": 0.2509, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 0.9313440547697475, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.3744106439320546e-06, |
|
"loss": 0.2203, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 0.9319899244332494, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.3614932506620165e-06, |
|
"loss": 0.2271, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 0.9326357940967512, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.3485758573919783e-06, |
|
"loss": 0.2165, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 0.9332816637602532, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.3356584641219402e-06, |
|
"loss": 0.2613, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.9339275334237551, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.3227410708519023e-06, |
|
"loss": 0.2163, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 0.934573403087257, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.309823677581864e-06, |
|
"loss": 0.2471, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 0.9352192727507589, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.296906284311826e-06, |
|
"loss": 0.2347, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 0.9358651424142608, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.2839888910417877e-06, |
|
"loss": 0.2137, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 0.9365110120777627, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.2710714977717498e-06, |
|
"loss": 0.1864, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.9371568817412647, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.2581541045017118e-06, |
|
"loss": 0.2092, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 0.9378027514047665, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.2452367112316735e-06, |
|
"loss": 0.2157, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 0.9384486210682684, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.2323193179616356e-06, |
|
"loss": 0.2132, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 0.9390944907317703, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 1.2194019246915974e-06, |
|
"loss": 0.2278, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 0.9397403603952722, |
|
"grad_norm": 1.5, |
|
"learning_rate": 1.2064845314215593e-06, |
|
"loss": 0.2301, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.9403862300587741, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 1.1935671381515212e-06, |
|
"loss": 0.2384, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 0.9410320997222761, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.180649744881483e-06, |
|
"loss": 0.2372, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.941677969385778, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.167732351611445e-06, |
|
"loss": 0.2263, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 0.9423238390492799, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.1548149583414068e-06, |
|
"loss": 0.2134, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 0.9429697087127817, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.1418975650713686e-06, |
|
"loss": 0.218, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.9436155783762836, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.1289801718013305e-06, |
|
"loss": 0.2383, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 0.9442614480397856, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.1160627785312924e-06, |
|
"loss": 0.2381, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 0.9449073177032875, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.1031453852612543e-06, |
|
"loss": 0.2117, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 0.9455531873667894, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.0902279919912161e-06, |
|
"loss": 0.2245, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 0.9461990570302913, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.0773105987211782e-06, |
|
"loss": 0.2186, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.9468449266937932, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.06439320545114e-06, |
|
"loss": 0.2165, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 0.947490796357295, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.051475812181102e-06, |
|
"loss": 0.2171, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 0.948136666020797, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.0385584189110638e-06, |
|
"loss": 0.2335, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 0.9487825356842989, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.0256410256410257e-06, |
|
"loss": 0.2102, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 0.9494284053478008, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.0127236323709875e-06, |
|
"loss": 0.2354, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.9500742750113027, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 9.998062391009496e-07, |
|
"loss": 0.2335, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 0.9507201446748046, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 9.868888458309115e-07, |
|
"loss": 0.2285, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.9513660143383065, |
|
"grad_norm": 0.875, |
|
"learning_rate": 9.739714525608734e-07, |
|
"loss": 0.2098, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 0.9520118840018085, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 9.610540592908352e-07, |
|
"loss": 0.2188, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 0.9526577536653104, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 9.481366660207971e-07, |
|
"loss": 0.2318, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.9533036233288122, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 9.35219272750759e-07, |
|
"loss": 0.2758, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 0.9539494929923141, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 9.223018794807209e-07, |
|
"loss": 0.2229, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 0.954595362655816, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 9.093844862106828e-07, |
|
"loss": 0.1928, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 0.955241232319318, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 8.964670929406447e-07, |
|
"loss": 0.2239, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 0.9558871019828199, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 8.835496996706065e-07, |
|
"loss": 0.2304, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.9565329716463218, |
|
"grad_norm": 1.125, |
|
"learning_rate": 8.706323064005684e-07, |
|
"loss": 0.2196, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 0.9571788413098237, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 8.577149131305303e-07, |
|
"loss": 0.2376, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 0.9578247109733256, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 8.447975198604922e-07, |
|
"loss": 0.2115, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 0.9584705806368274, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 8.318801265904541e-07, |
|
"loss": 0.2222, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 0.9591164503003294, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 8.18962733320416e-07, |
|
"loss": 0.2235, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.9597623199638313, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 8.060453400503778e-07, |
|
"loss": 0.223, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 0.9604081896273332, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 7.931279467803397e-07, |
|
"loss": 0.242, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 0.9610540592908351, |
|
"grad_norm": 1.0, |
|
"learning_rate": 7.802105535103016e-07, |
|
"loss": 0.24, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.961699928954337, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 7.672931602402637e-07, |
|
"loss": 0.2555, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 0.9623457986178389, |
|
"grad_norm": 1.375, |
|
"learning_rate": 7.543757669702255e-07, |
|
"loss": 0.2124, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.9629916682813409, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 7.414583737001874e-07, |
|
"loss": 0.2361, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 0.9636375379448427, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 7.285409804301493e-07, |
|
"loss": 0.2524, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 0.9642834076083446, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 7.156235871601111e-07, |
|
"loss": 0.2455, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 0.9649292772718465, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 7.027061938900731e-07, |
|
"loss": 0.2373, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 0.9655751469353484, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 6.89788800620035e-07, |
|
"loss": 0.2175, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.9662210165988504, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 6.768714073499968e-07, |
|
"loss": 0.2294, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 0.9668668862623523, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 6.639540140799587e-07, |
|
"loss": 0.2087, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 0.9675127559258542, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 6.510366208099206e-07, |
|
"loss": 0.2346, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 0.9681586255893561, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 6.381192275398824e-07, |
|
"loss": 0.2342, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 0.968804495252858, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 6.252018342698444e-07, |
|
"loss": 0.2246, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 15483, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5678418788963066e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|