|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0051031256644696, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015464017165059054, |
|
"grad_norm": 4352.0, |
|
"learning_rate": 1.9972307692307693e-05, |
|
"loss": 10.9174, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0030928034330118107, |
|
"grad_norm": 71168.0, |
|
"learning_rate": 1.9941538461538464e-05, |
|
"loss": 11.9649, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004639205149517716, |
|
"grad_norm": 190.0, |
|
"learning_rate": 1.9910769230769232e-05, |
|
"loss": 5.27, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0061856068660236215, |
|
"grad_norm": 16.125, |
|
"learning_rate": 1.9880000000000003e-05, |
|
"loss": 0.3647, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007732008582529527, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 1.984923076923077e-05, |
|
"loss": 0.3099, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009278410299035433, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 1.9818461538461538e-05, |
|
"loss": 0.2842, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010824812015541337, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 1.978769230769231e-05, |
|
"loss": 0.2943, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.012371213732047243, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.9756923076923077e-05, |
|
"loss": 0.3539, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013917615448553147, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.9726153846153848e-05, |
|
"loss": 0.259, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.015464017165059053, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.9695384615384616e-05, |
|
"loss": 0.2741, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01701041888156496, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9664615384615387e-05, |
|
"loss": 0.281, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.018556820598070865, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.9633846153846155e-05, |
|
"loss": 0.2586, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.020103222314576768, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.9603076923076926e-05, |
|
"loss": 0.2776, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.021649624031082674, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.9572307692307693e-05, |
|
"loss": 0.3186, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02319602574758858, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.9541538461538464e-05, |
|
"loss": 0.3315, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.024742427464094486, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.9510769230769232e-05, |
|
"loss": 0.257, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.026288829180600392, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.948e-05, |
|
"loss": 0.2592, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.027835230897106295, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.944923076923077e-05, |
|
"loss": 0.2703, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0293816326136122, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.941846153846154e-05, |
|
"loss": 0.2547, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.030928034330118107, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.938769230769231e-05, |
|
"loss": 0.3182, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03247443604662401, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.9356923076923077e-05, |
|
"loss": 0.3005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03402083776312992, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.932615384615385e-05, |
|
"loss": 0.2693, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.035567239479635825, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.929538461538462e-05, |
|
"loss": 0.2925, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03711364119614173, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.9264615384615387e-05, |
|
"loss": 0.3165, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03866004291264763, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.9233846153846155e-05, |
|
"loss": 0.2606, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.040206444629153536, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.9203076923076923e-05, |
|
"loss": 0.324, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04175284634565944, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.9172307692307694e-05, |
|
"loss": 0.2787, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04329924806216535, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.914153846153846e-05, |
|
"loss": 0.3092, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.044845649778671254, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.9110769230769233e-05, |
|
"loss": 0.2831, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04639205149517716, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.908e-05, |
|
"loss": 0.282, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.047938453211683066, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 1.904923076923077e-05, |
|
"loss": 0.3863, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04948485492818897, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.901846153846154e-05, |
|
"loss": 0.246, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05103125664469488, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.898769230769231e-05, |
|
"loss": 0.3483, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.052577658361200784, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.8956923076923078e-05, |
|
"loss": 0.4107, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05412406007770668, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.892615384615385e-05, |
|
"loss": 0.2813, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05567046179421259, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.8895384615384617e-05, |
|
"loss": 0.283, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.057216863510718495, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.8864615384615384e-05, |
|
"loss": 0.268, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0587632652272244, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.8833846153846155e-05, |
|
"loss": 0.2852, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06030966694373031, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 1.8803076923076923e-05, |
|
"loss": 0.2477, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06185606866023621, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.8772307692307694e-05, |
|
"loss": 0.2418, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06340247037674211, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.8741538461538462e-05, |
|
"loss": 0.2218, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06494887209324803, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.8710769230769233e-05, |
|
"loss": 0.2616, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.06649527380975392, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.8680000000000004e-05, |
|
"loss": 0.3475, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.06804167552625984, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.8649230769230772e-05, |
|
"loss": 0.3025, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.06958807724276574, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.861846153846154e-05, |
|
"loss": 0.3119, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07113447895927165, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.8587692307692307e-05, |
|
"loss": 0.3004, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07268088067577755, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.8556923076923078e-05, |
|
"loss": 0.2957, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07422728239228346, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.8526153846153846e-05, |
|
"loss": 0.3162, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07577368410878936, |
|
"grad_norm": 1.75, |
|
"learning_rate": 1.8495384615384617e-05, |
|
"loss": 0.3637, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.07732008582529526, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.8464615384615385e-05, |
|
"loss": 0.2379, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07886648754180117, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 1.8433846153846156e-05, |
|
"loss": 0.3098, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.08041288925830707, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.8403076923076924e-05, |
|
"loss": 0.3977, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08195929097481298, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.8372307692307695e-05, |
|
"loss": 0.3034, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.08350569269131888, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.8341538461538462e-05, |
|
"loss": 0.2327, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0850520944078248, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.8310769230769233e-05, |
|
"loss": 0.2561, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0865984961243307, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.828e-05, |
|
"loss": 0.3739, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.08814489784083661, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.824923076923077e-05, |
|
"loss": 0.3605, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.08969129955734251, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.821846153846154e-05, |
|
"loss": 0.2557, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09123770127384842, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.8187692307692308e-05, |
|
"loss": 0.2806, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.09278410299035432, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.815692307692308e-05, |
|
"loss": 0.2977, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09433050470686022, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.8126153846153846e-05, |
|
"loss": 0.2845, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.09587690642336613, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.8095384615384618e-05, |
|
"loss": 0.3309, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.09742330813987203, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.806461538461539e-05, |
|
"loss": 0.3197, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.09896970985637794, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.8033846153846156e-05, |
|
"loss": 0.2654, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10051611157288384, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.8003076923076924e-05, |
|
"loss": 0.2954, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.10206251328938976, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.7972307692307692e-05, |
|
"loss": 0.3237, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.10360891500589565, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.7941538461538463e-05, |
|
"loss": 0.2887, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.10515531672240157, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.791076923076923e-05, |
|
"loss": 0.3018, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.10670171843890747, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.788e-05, |
|
"loss": 0.261, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.10824812015541337, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.784923076923077e-05, |
|
"loss": 0.254, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10979452187191928, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.781846153846154e-05, |
|
"loss": 0.2944, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.11134092358842518, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.778769230769231e-05, |
|
"loss": 0.3163, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11288732530493109, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.775692307692308e-05, |
|
"loss": 0.2838, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11443372702143699, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.7726153846153847e-05, |
|
"loss": 0.236, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1159801287379429, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.7695384615384618e-05, |
|
"loss": 0.2164, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1175265304544488, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.7664615384615386e-05, |
|
"loss": 0.3331, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.11907293217095472, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.7633846153846153e-05, |
|
"loss": 0.303, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.12061933388746061, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.7603076923076924e-05, |
|
"loss": 0.3264, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.12216573560396653, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.7572307692307692e-05, |
|
"loss": 0.2097, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.12371213732047243, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.7541538461538463e-05, |
|
"loss": 0.2456, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12525853903697834, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.751076923076923e-05, |
|
"loss": 0.2877, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.12680494075348422, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.7480000000000002e-05, |
|
"loss": 0.2902, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.12835134246999014, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.7449230769230773e-05, |
|
"loss": 0.2357, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.12989774418649605, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.741846153846154e-05, |
|
"loss": 0.2926, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13144414590300196, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.738769230769231e-05, |
|
"loss": 0.2301, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.13299054761950785, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.7356923076923076e-05, |
|
"loss": 0.2501, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.13453694933601376, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.7326153846153847e-05, |
|
"loss": 0.2393, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.13608335105251967, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.7295384615384615e-05, |
|
"loss": 0.2337, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1376297527690256, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.7264615384615386e-05, |
|
"loss": 0.3147, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.13917615448553147, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.7233846153846154e-05, |
|
"loss": 0.2949, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14072255620203739, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7203076923076925e-05, |
|
"loss": 0.3394, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1422689579185433, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.7172307692307696e-05, |
|
"loss": 0.3119, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.14381535963504918, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.7141538461538464e-05, |
|
"loss": 0.2959, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.1453617613515551, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.711076923076923e-05, |
|
"loss": 0.2677, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.146908163068061, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.7080000000000002e-05, |
|
"loss": 0.2575, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.14845456478456692, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 1.704923076923077e-05, |
|
"loss": 0.2419, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1500009665010728, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.7018461538461538e-05, |
|
"loss": 0.2631, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.15154736821757872, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.698769230769231e-05, |
|
"loss": 0.2415, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.15309376993408463, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.6956923076923077e-05, |
|
"loss": 0.2498, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.15464017165059052, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.6926153846153848e-05, |
|
"loss": 0.2845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15618657336709643, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.6895384615384615e-05, |
|
"loss": 0.3159, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.15773297508360234, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.6864615384615387e-05, |
|
"loss": 0.2969, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.15927937680010826, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.6833846153846158e-05, |
|
"loss": 0.3195, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.16082577851661414, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.6803076923076925e-05, |
|
"loss": 0.3086, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.16237218023312006, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.6772307692307693e-05, |
|
"loss": 0.297, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.16391858194962597, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.674153846153846e-05, |
|
"loss": 0.2677, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.16546498366613188, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.6710769230769232e-05, |
|
"loss": 0.294, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.16701138538263777, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.668e-05, |
|
"loss": 0.2483, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.16855778709914368, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.664923076923077e-05, |
|
"loss": 0.2564, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.1701041888156496, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.661846153846154e-05, |
|
"loss": 0.2363, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17165059053215548, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.658769230769231e-05, |
|
"loss": 0.2486, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.1731969922486614, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.655692307692308e-05, |
|
"loss": 0.3142, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.1747433939651673, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 1.6526153846153848e-05, |
|
"loss": 0.4319, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.17628979568167322, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.6495384615384616e-05, |
|
"loss": 0.2727, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.1778361973981791, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 1.6464615384615387e-05, |
|
"loss": 0.2472, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.17938259911468502, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.6433846153846155e-05, |
|
"loss": 0.3036, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.18092900083119093, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.6403076923076922e-05, |
|
"loss": 0.2199, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.18247540254769684, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.6372307692307693e-05, |
|
"loss": 0.2474, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.18402180426420273, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.634153846153846e-05, |
|
"loss": 0.2892, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.18556820598070864, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.6310769230769232e-05, |
|
"loss": 0.3317, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18711460769721455, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.628e-05, |
|
"loss": 0.3066, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.18866100941372044, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 1.624923076923077e-05, |
|
"loss": 0.2811, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.19020741113022635, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.6218461538461542e-05, |
|
"loss": 0.2503, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.19175381284673226, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.618769230769231e-05, |
|
"loss": 0.3128, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.19330021456323818, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.6156923076923078e-05, |
|
"loss": 0.3067, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.19484661627974406, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.6126153846153845e-05, |
|
"loss": 0.2975, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.19639301799624997, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.6095384615384616e-05, |
|
"loss": 0.3083, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1979394197127559, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 1.6064615384615384e-05, |
|
"loss": 0.2786, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.19948582142926177, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.6033846153846155e-05, |
|
"loss": 0.404, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.20103222314576769, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.6003076923076923e-05, |
|
"loss": 0.3213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2025786248622736, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.5972307692307694e-05, |
|
"loss": 0.24, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.2041250265787795, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 1.5941538461538465e-05, |
|
"loss": 0.2711, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2056714282952854, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.5910769230769233e-05, |
|
"loss": 0.2493, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2072178300117913, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.588e-05, |
|
"loss": 0.353, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.20876423172829722, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.584923076923077e-05, |
|
"loss": 0.2534, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.21031063344480314, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.581846153846154e-05, |
|
"loss": 0.2088, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.21185703516130902, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.5787692307692307e-05, |
|
"loss": 0.3146, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.21340343687781493, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.5756923076923078e-05, |
|
"loss": 0.2947, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.21494983859432085, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.5726153846153846e-05, |
|
"loss": 0.2039, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.21649624031082673, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.5695384615384617e-05, |
|
"loss": 0.252, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21804264202733264, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.5664615384615388e-05, |
|
"loss": 0.2689, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.21958904374383856, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.5633846153846156e-05, |
|
"loss": 0.3239, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.22113544546034447, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.5603076923076927e-05, |
|
"loss": 0.2891, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.22268184717685036, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.5572307692307694e-05, |
|
"loss": 0.3306, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.22422824889335627, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.5541538461538462e-05, |
|
"loss": 0.2971, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.22577465060986218, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.551076923076923e-05, |
|
"loss": 0.2892, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2273210523263681, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.548e-05, |
|
"loss": 0.2773, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.22886745404287398, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.544923076923077e-05, |
|
"loss": 0.2767, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2304138557593799, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.541846153846154e-05, |
|
"loss": 0.2899, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.2319602574758858, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.5387692307692307e-05, |
|
"loss": 0.2521, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2335066591923917, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.535692307692308e-05, |
|
"loss": 0.2479, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.2350530609088976, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.532615384615385e-05, |
|
"loss": 0.3154, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.23659946262540352, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.5295384615384617e-05, |
|
"loss": 0.3391, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.23814586434190943, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.5264615384615385e-05, |
|
"loss": 0.265, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.23969226605841532, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.5233846153846154e-05, |
|
"loss": 0.2949, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.24123866777492123, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.5203076923076925e-05, |
|
"loss": 0.3136, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.24278506949142714, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.5172307692307693e-05, |
|
"loss": 0.3073, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.24433147120793305, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.5141538461538463e-05, |
|
"loss": 0.3271, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.24587787292443894, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 1.5110769230769232e-05, |
|
"loss": 0.2722, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.24742427464094485, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.5080000000000001e-05, |
|
"loss": 0.3513, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24897067635745077, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.504923076923077e-05, |
|
"loss": 0.2212, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.2505170780739567, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.501846153846154e-05, |
|
"loss": 0.2914, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.25206347979046256, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.498769230769231e-05, |
|
"loss": 0.281, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.25360988150696845, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.4956923076923077e-05, |
|
"loss": 0.2509, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.2551562832234744, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.4926153846153848e-05, |
|
"loss": 0.2994, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2567026849399803, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.4895384615384616e-05, |
|
"loss": 0.2839, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.2582490866564862, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.4864615384615385e-05, |
|
"loss": 0.229, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.2597954883729921, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.4833846153846155e-05, |
|
"loss": 0.2381, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.261341890089498, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.4803076923076924e-05, |
|
"loss": 0.3495, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.2628882918060039, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.4772307692307692e-05, |
|
"loss": 0.2756, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2644346935225098, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.4741538461538463e-05, |
|
"loss": 0.3189, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.2659810952390157, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 1.4710769230769232e-05, |
|
"loss": 0.289, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.26752749695552164, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.4680000000000002e-05, |
|
"loss": 0.2848, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.2690738986720275, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.4649230769230771e-05, |
|
"loss": 0.3115, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.2706203003885334, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.4618461538461539e-05, |
|
"loss": 0.3075, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.27216670210503935, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.458769230769231e-05, |
|
"loss": 0.2705, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.27371310382154523, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 1.4556923076923078e-05, |
|
"loss": 0.266, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2752595055380512, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.4526153846153847e-05, |
|
"loss": 0.3179, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.27680590725455706, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.4495384615384616e-05, |
|
"loss": 0.2775, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.27835230897106295, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.4464615384615386e-05, |
|
"loss": 0.3279, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2798987106875689, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.4433846153846155e-05, |
|
"loss": 0.2373, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.28144511240407477, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.4403076923076925e-05, |
|
"loss": 0.2216, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.28299151412058066, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.4372307692307694e-05, |
|
"loss": 0.3206, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.2845379158370866, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.4341538461538462e-05, |
|
"loss": 0.2467, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.2860843175535925, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.4310769230769233e-05, |
|
"loss": 0.2818, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.28763071927009837, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 1.428e-05, |
|
"loss": 0.2477, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.2891771209866043, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.4249230769230772e-05, |
|
"loss": 0.2576, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.2907235227031102, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.421846153846154e-05, |
|
"loss": 0.2841, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.2922699244196161, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.4187692307692309e-05, |
|
"loss": 0.3371, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.293816326136122, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.4156923076923076e-05, |
|
"loss": 0.3037, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2953627278526279, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.4126153846153847e-05, |
|
"loss": 0.2526, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.29690912956913385, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.4095384615384617e-05, |
|
"loss": 0.2125, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.29845553128563973, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 1.4064615384615386e-05, |
|
"loss": 0.2783, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3000019330021456, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.4033846153846156e-05, |
|
"loss": 0.3131, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.30154833471865156, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.4003076923076923e-05, |
|
"loss": 0.3226, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.30309473643515744, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.3972307692307694e-05, |
|
"loss": 0.2819, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3046411381516633, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.3941538461538462e-05, |
|
"loss": 0.2868, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.30618753986816927, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 1.3910769230769232e-05, |
|
"loss": 0.2615, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.30773394158467515, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.3880000000000001e-05, |
|
"loss": 0.255, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.30928034330118104, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.384923076923077e-05, |
|
"loss": 0.251, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.310826745017687, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.3818461538461541e-05, |
|
"loss": 0.2983, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.31237314673419286, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.3787692307692309e-05, |
|
"loss": 0.2705, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.3139195484506988, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.3756923076923079e-05, |
|
"loss": 0.2937, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3154659501672047, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.3726153846153846e-05, |
|
"loss": 0.3296, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.3170123518837106, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.3695384615384617e-05, |
|
"loss": 0.2666, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3185587536002165, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.3664615384615385e-05, |
|
"loss": 0.3124, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.3201051553167224, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.3633846153846156e-05, |
|
"loss": 0.3752, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.3216515570332283, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.3603076923076924e-05, |
|
"loss": 0.2622, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3231979587497342, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 1.3572307692307693e-05, |
|
"loss": 0.2526, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.3247443604662401, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.3541538461538464e-05, |
|
"loss": 0.2775, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.326290762182746, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.3510769230769232e-05, |
|
"loss": 0.3322, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.32783716389925194, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.3480000000000001e-05, |
|
"loss": 0.2897, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.3293835656157578, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.344923076923077e-05, |
|
"loss": 0.2815, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.33092996733226376, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.341846153846154e-05, |
|
"loss": 0.2959, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.33247636904876965, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.3387692307692308e-05, |
|
"loss": 0.2571, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.33402277076527553, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.3356923076923079e-05, |
|
"loss": 0.247, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.3355691724817815, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.3326153846153847e-05, |
|
"loss": 0.248, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.33711557419828736, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.3295384615384616e-05, |
|
"loss": 0.2438, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.33866197591479325, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.3264615384615385e-05, |
|
"loss": 0.3612, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.3402083776312992, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.3233846153846155e-05, |
|
"loss": 0.3287, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34175477934780507, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.3203076923076926e-05, |
|
"loss": 0.2756, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.34330118106431096, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.3172307692307694e-05, |
|
"loss": 0.2886, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.3448475827808169, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.3141538461538463e-05, |
|
"loss": 0.2446, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.3463939844973228, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.311076923076923e-05, |
|
"loss": 0.3064, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3479403862138287, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.3080000000000002e-05, |
|
"loss": 0.2376, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3494867879303346, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.304923076923077e-05, |
|
"loss": 0.2932, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.3510331896468405, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.301846153846154e-05, |
|
"loss": 0.2979, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.35257959136334643, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.2987692307692308e-05, |
|
"loss": 0.2897, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.3541259930798523, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.2956923076923078e-05, |
|
"loss": 0.2744, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.3556723947963582, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.2926153846153849e-05, |
|
"loss": 0.2708, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.35721879651286415, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.2895384615384616e-05, |
|
"loss": 0.2224, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.35876519822937003, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.2864615384615386e-05, |
|
"loss": 0.2728, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.3603115999458759, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.2833846153846155e-05, |
|
"loss": 0.2661, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.36185800166238186, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.2803076923076925e-05, |
|
"loss": 0.2892, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.36340440337888774, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.2772307692307692e-05, |
|
"loss": 0.3092, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3649508050953937, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.2741538461538463e-05, |
|
"loss": 0.2542, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.36649720681189957, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.2710769230769231e-05, |
|
"loss": 0.3589, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.36804360852840545, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 1.268e-05, |
|
"loss": 0.2237, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.3695900102449114, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.264923076923077e-05, |
|
"loss": 0.3413, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.3711364119614173, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.261846153846154e-05, |
|
"loss": 0.2556, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37268281367792316, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.258769230769231e-05, |
|
"loss": 0.3087, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.3742292153944291, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.2556923076923078e-05, |
|
"loss": 0.2609, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.375775617110935, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.2526153846153848e-05, |
|
"loss": 0.2572, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.3773220188274409, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.2495384615384615e-05, |
|
"loss": 0.3003, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.3788684205439468, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.2464615384615386e-05, |
|
"loss": 0.259, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.3804148222604527, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 1.2433846153846154e-05, |
|
"loss": 0.2606, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.38196122397695864, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.2403076923076925e-05, |
|
"loss": 0.2351, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.3835076256934645, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.2372307692307693e-05, |
|
"loss": 0.2664, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.3850540274099704, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.2341538461538462e-05, |
|
"loss": 0.245, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.38660042912647635, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.2310769230769233e-05, |
|
"loss": 0.2781, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.38814683084298224, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.2280000000000001e-05, |
|
"loss": 0.2847, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.3896932325594881, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 1.224923076923077e-05, |
|
"loss": 0.3223, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.39123963427599406, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.221846153846154e-05, |
|
"loss": 0.3068, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.39278603599249995, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 1.218769230769231e-05, |
|
"loss": 0.2393, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.39433243770900583, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.2156923076923077e-05, |
|
"loss": 0.2918, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3958788394255118, |
|
"grad_norm": 0.84375, |
|
"learning_rate": 1.2126153846153848e-05, |
|
"loss": 0.2146, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.39742524114201766, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.2095384615384616e-05, |
|
"loss": 0.3178, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.39897164285852355, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.2064615384615385e-05, |
|
"loss": 0.2247, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.4005180445750295, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 1.2033846153846154e-05, |
|
"loss": 0.2684, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.40206444629153537, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.2003076923076924e-05, |
|
"loss": 0.2332, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4036108480080413, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.1972307692307695e-05, |
|
"loss": 0.4153, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.4051572497245472, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.1941538461538463e-05, |
|
"loss": 0.2559, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.4067036514410531, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 1.1910769230769232e-05, |
|
"loss": 0.2974, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.408250053157559, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.188e-05, |
|
"loss": 0.3523, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.4097964548740649, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.1849230769230771e-05, |
|
"loss": 0.2619, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.4113428565905708, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.1818461538461539e-05, |
|
"loss": 0.2833, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.41288925830707673, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.178769230769231e-05, |
|
"loss": 0.244, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.4144356600235826, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.1756923076923077e-05, |
|
"loss": 0.2238, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.4159820617400885, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.1726153846153847e-05, |
|
"loss": 0.2839, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.41752846345659445, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 1.1695384615384618e-05, |
|
"loss": 0.3264, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.41907486517310033, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.1664615384615386e-05, |
|
"loss": 0.2501, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.42062126688960627, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 1.1633846153846155e-05, |
|
"loss": 0.3618, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.42216766860611216, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.1603076923076924e-05, |
|
"loss": 0.2353, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.42371407032261804, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 1.1572307692307694e-05, |
|
"loss": 0.2745, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.425260472039124, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 1.1541538461538461e-05, |
|
"loss": 0.2673, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.42680687375562987, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 1.1510769230769232e-05, |
|
"loss": 0.2448, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.42835327547213575, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 1.148e-05, |
|
"loss": 0.2428, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.4298996771886417, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 1.144923076923077e-05, |
|
"loss": 0.2259, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.4314460789051476, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.141846153846154e-05, |
|
"loss": 0.3214, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.43299248062165346, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 1.1387692307692308e-05, |
|
"loss": 0.272, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4345388823381594, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.135692307692308e-05, |
|
"loss": 0.3055, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.4360852840546653, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.1326153846153847e-05, |
|
"loss": 0.3783, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.43763168577117123, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 1.1295384615384617e-05, |
|
"loss": 0.2318, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.4391780874876771, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.1264615384615384e-05, |
|
"loss": 0.3072, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.440724489204183, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.1233846153846155e-05, |
|
"loss": 0.3272, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.44227089092068894, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.1203076923076923e-05, |
|
"loss": 0.2422, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.4438172926371948, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.1172307692307694e-05, |
|
"loss": 0.2424, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.4453636943537007, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.1141538461538462e-05, |
|
"loss": 0.288, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.44691009607020665, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 1.1110769230769231e-05, |
|
"loss": 0.2376, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.44845649778671254, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.1080000000000002e-05, |
|
"loss": 0.2541, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4500028995032184, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.104923076923077e-05, |
|
"loss": 0.2928, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.45154930121972436, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 1.101846153846154e-05, |
|
"loss": 0.2582, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.45309570293623025, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.0987692307692309e-05, |
|
"loss": 0.2548, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.4546421046527362, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.0956923076923078e-05, |
|
"loss": 0.3462, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.4561885063692421, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.0926153846153846e-05, |
|
"loss": 0.3076, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.45773490808574796, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.0895384615384617e-05, |
|
"loss": 0.2761, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.4592813098022539, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.0864615384615385e-05, |
|
"loss": 0.3359, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.4608277115187598, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.0833846153846154e-05, |
|
"loss": 0.3213, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.46237411323526567, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 1.0803076923076925e-05, |
|
"loss": 0.2917, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.4639205149517716, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.0772307692307693e-05, |
|
"loss": 0.2774, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4654669166682775, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.0741538461538464e-05, |
|
"loss": 0.3373, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.4670133183847834, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.0710769230769232e-05, |
|
"loss": 0.248, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.4685597201012893, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.0680000000000001e-05, |
|
"loss": 0.2782, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.4701061218177952, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.0649230769230769e-05, |
|
"loss": 0.3041, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.47165252353430115, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 1.061846153846154e-05, |
|
"loss": 0.2109, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.47319892525080703, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.0587692307692308e-05, |
|
"loss": 0.2815, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.4747453269673129, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.0556923076923079e-05, |
|
"loss": 0.2775, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.47629172868381886, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 1.0526153846153846e-05, |
|
"loss": 0.2645, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.47783813040032475, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.0495384615384616e-05, |
|
"loss": 0.2738, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.47938453211683063, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.0464615384615387e-05, |
|
"loss": 0.2912, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.48093093383333657, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 1.0433846153846155e-05, |
|
"loss": 0.217, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.48247733554984246, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.0403076923076924e-05, |
|
"loss": 0.3397, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.48402373726634834, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.0372307692307693e-05, |
|
"loss": 0.215, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.4855701389828543, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.0341538461538463e-05, |
|
"loss": 0.2587, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.48711654069936017, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.031076923076923e-05, |
|
"loss": 0.2183, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.4886629424158661, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.0280000000000002e-05, |
|
"loss": 0.2551, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.490209344132372, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.024923076923077e-05, |
|
"loss": 0.2389, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.4917557458488779, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.0218461538461539e-05, |
|
"loss": 0.2774, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.4933021475653838, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.018769230769231e-05, |
|
"loss": 0.2608, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.4948485492818897, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.0156923076923077e-05, |
|
"loss": 0.3287, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.4963949509983956, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.0126153846153849e-05, |
|
"loss": 0.236, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.49794135271490153, |
|
"grad_norm": 1.0, |
|
"learning_rate": 1.0095384615384616e-05, |
|
"loss": 0.259, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.4994877544314074, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.0064615384615386e-05, |
|
"loss": 0.2668, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.5010341561479134, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.0033846153846153e-05, |
|
"loss": 0.3078, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.5025805578644192, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.0003076923076924e-05, |
|
"loss": 0.2674, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5041269595809251, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 9.972307692307694e-06, |
|
"loss": 0.276, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.505673361297431, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 9.941538461538463e-06, |
|
"loss": 0.2331, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.5072197630139369, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 9.910769230769231e-06, |
|
"loss": 0.2518, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.5087661647304429, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 9.88e-06, |
|
"loss": 0.3217, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.5103125664469488, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 9.84923076923077e-06, |
|
"loss": 0.2582, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5118589681634547, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 9.818461538461539e-06, |
|
"loss": 0.2967, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.5134053698799605, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 9.787692307692308e-06, |
|
"loss": 0.2508, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.5149517715964664, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 9.756923076923078e-06, |
|
"loss": 0.247, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.5164981733129724, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 9.726153846153847e-06, |
|
"loss": 0.2664, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.5180445750294783, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 9.695384615384617e-06, |
|
"loss": 0.2963, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5195909767459842, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 9.664615384615386e-06, |
|
"loss": 0.2869, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5211373784624901, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 9.633846153846155e-06, |
|
"loss": 0.2125, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.522683780178996, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 9.603076923076923e-06, |
|
"loss": 0.2379, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.5242301818955019, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 9.572307692307693e-06, |
|
"loss": 0.2897, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.5257765836120079, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 9.541538461538462e-06, |
|
"loss": 0.2954, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5273229853285137, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 9.510769230769231e-06, |
|
"loss": 0.257, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.5288693870450196, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.3107, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.5304157887615255, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 9.44923076923077e-06, |
|
"loss": 0.2178, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.5319621904780314, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 9.41846153846154e-06, |
|
"loss": 0.2407, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.5335085921945374, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 9.387692307692309e-06, |
|
"loss": 0.284, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.5350549939110433, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 9.356923076923078e-06, |
|
"loss": 0.2358, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.5366013956275492, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 9.326153846153848e-06, |
|
"loss": 0.2455, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.538147797344055, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 9.295384615384615e-06, |
|
"loss": 0.3416, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.5396941990605609, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 9.264615384615385e-06, |
|
"loss": 0.2908, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.5412406007770668, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 9.233846153846154e-06, |
|
"loss": 0.2648, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5427870024935728, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 9.203076923076924e-06, |
|
"loss": 0.2159, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.5443334042100787, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 9.172307692307693e-06, |
|
"loss": 0.3019, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5458798059265846, |
|
"grad_norm": 1.4375, |
|
"learning_rate": 9.141538461538462e-06, |
|
"loss": 0.2886, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.5474262076430905, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 9.110769230769232e-06, |
|
"loss": 0.2674, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.5489726093595964, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 9.080000000000001e-06, |
|
"loss": 0.2809, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5505190110761023, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 9.04923076923077e-06, |
|
"loss": 0.3105, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.5520654127926082, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 9.01846153846154e-06, |
|
"loss": 0.3115, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.5536118145091141, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 8.987692307692308e-06, |
|
"loss": 0.2605, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.55515821622562, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 8.956923076923077e-06, |
|
"loss": 0.2281, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.5567046179421259, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 8.926153846153846e-06, |
|
"loss": 0.2732, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5582510196586318, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 8.895384615384616e-06, |
|
"loss": 0.2134, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.5597974213751378, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 8.864615384615385e-06, |
|
"loss": 0.2788, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.5613438230916437, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 8.833846153846155e-06, |
|
"loss": 0.2558, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.5628902248081495, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 8.803076923076924e-06, |
|
"loss": 0.2719, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.5644366265246554, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 8.772307692307693e-06, |
|
"loss": 0.2596, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5659830282411613, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 8.741538461538463e-06, |
|
"loss": 0.2484, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.5675294299576673, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 8.710769230769232e-06, |
|
"loss": 0.2734, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.5690758316741732, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 8.68e-06, |
|
"loss": 0.2728, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.5706222333906791, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 8.64923076923077e-06, |
|
"loss": 0.2746, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.572168635107185, |
|
"grad_norm": 0.94140625, |
|
"learning_rate": 8.618461538461539e-06, |
|
"loss": 0.2767, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5737150368236908, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 8.587692307692308e-06, |
|
"loss": 0.2798, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.5752614385401967, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 8.556923076923077e-06, |
|
"loss": 0.2573, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.5768078402567027, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 8.526153846153847e-06, |
|
"loss": 0.2756, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.5783542419732086, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 8.495384615384616e-06, |
|
"loss": 0.2819, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.5799006436897145, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 8.464615384615386e-06, |
|
"loss": 0.22, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5814470454062204, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 8.433846153846155e-06, |
|
"loss": 0.2857, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.5829934471227263, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 8.403076923076924e-06, |
|
"loss": 0.2803, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.5845398488392322, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 8.372307692307692e-06, |
|
"loss": 0.2207, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.5860862505557382, |
|
"grad_norm": 1.375, |
|
"learning_rate": 8.341538461538462e-06, |
|
"loss": 0.2684, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.587632652272244, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 8.310769230769231e-06, |
|
"loss": 0.2353, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5891790539887499, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 8.28e-06, |
|
"loss": 0.3244, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.5907254557052558, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 8.24923076923077e-06, |
|
"loss": 0.288, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.5922718574217617, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 8.218461538461539e-06, |
|
"loss": 0.261, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.5938182591382677, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 8.187692307692309e-06, |
|
"loss": 0.3277, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.5953646608547736, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 8.156923076923078e-06, |
|
"loss": 0.2727, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5969110625712795, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 8.126153846153847e-06, |
|
"loss": 0.2319, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.5984574642877853, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 8.095384615384617e-06, |
|
"loss": 0.252, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.6000038660042912, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 8.064615384615384e-06, |
|
"loss": 0.2683, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.6015502677207971, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 8.033846153846154e-06, |
|
"loss": 0.3251, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.6030966694373031, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 8.003076923076923e-06, |
|
"loss": 0.3153, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.604643071153809, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 7.972307692307693e-06, |
|
"loss": 0.3234, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.6061894728703149, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 7.941538461538462e-06, |
|
"loss": 0.2812, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.6077358745868208, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 7.910769230769231e-06, |
|
"loss": 0.2959, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.6092822763033267, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 7.88e-06, |
|
"loss": 0.2818, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.6108286780198326, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 7.84923076923077e-06, |
|
"loss": 0.3152, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6123750797363385, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 7.81846153846154e-06, |
|
"loss": 0.2988, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.6139214814528444, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 7.787692307692309e-06, |
|
"loss": 0.2835, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.6154678831693503, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 7.756923076923077e-06, |
|
"loss": 0.3486, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.6170142848858562, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 7.726153846153846e-06, |
|
"loss": 0.2934, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.6185606866023621, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 7.695384615384615e-06, |
|
"loss": 0.2678, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6201070883188681, |
|
"grad_norm": 0.90625, |
|
"learning_rate": 7.664615384615385e-06, |
|
"loss": 0.2608, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.621653490035374, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 7.633846153846154e-06, |
|
"loss": 0.289, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.6231998917518798, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 7.6030769230769245e-06, |
|
"loss": 0.2856, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.6247462934683857, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 7.572307692307693e-06, |
|
"loss": 0.2569, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.6262926951848916, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 7.5415384615384624e-06, |
|
"loss": 0.2727, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.6278390969013976, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 7.510769230769232e-06, |
|
"loss": 0.279, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.6293854986179035, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.3606, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.6309319003344094, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 7.44923076923077e-06, |
|
"loss": 0.2959, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.6324783020509153, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 7.418461538461539e-06, |
|
"loss": 0.2622, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.6340247037674211, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 7.387692307692308e-06, |
|
"loss": 0.2207, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.635571105483927, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 7.356923076923077e-06, |
|
"loss": 0.3007, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.637117507200433, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 7.326153846153847e-06, |
|
"loss": 0.2815, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.6386639089169389, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 7.295384615384617e-06, |
|
"loss": 0.2587, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.6402103106334448, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 7.264615384615385e-06, |
|
"loss": 0.2999, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.6417567123499507, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 7.233846153846155e-06, |
|
"loss": 0.2398, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.6433031140664566, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 7.203076923076924e-06, |
|
"loss": 0.2716, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6448495157829626, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 7.172307692307693e-06, |
|
"loss": 0.2222, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.6463959174994685, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 7.141538461538462e-06, |
|
"loss": 0.285, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.6479423192159743, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 7.1107692307692314e-06, |
|
"loss": 0.3562, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.6494887209324802, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 7.08e-06, |
|
"loss": 0.283, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6510351226489861, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 7.049230769230769e-06, |
|
"loss": 0.2915, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.652581524365492, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 7.01846153846154e-06, |
|
"loss": 0.2424, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.654127926081998, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 6.987692307692309e-06, |
|
"loss": 0.2456, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.6556743277985039, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 6.9569230769230776e-06, |
|
"loss": 0.2946, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.6572207295150098, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 6.926153846153847e-06, |
|
"loss": 0.3338, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6587671312315156, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 6.895384615384616e-06, |
|
"loss": 0.2645, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.6603135329480215, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 6.864615384615385e-06, |
|
"loss": 0.2671, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.6618599346645275, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 6.833846153846154e-06, |
|
"loss": 0.2627, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.6634063363810334, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 6.803076923076924e-06, |
|
"loss": 0.2972, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.6649527380975393, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 6.772307692307692e-06, |
|
"loss": 0.2637, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.6664991398140452, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 6.741538461538462e-06, |
|
"loss": 0.2459, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.6680455415305511, |
|
"grad_norm": 0.93359375, |
|
"learning_rate": 6.710769230769232e-06, |
|
"loss": 0.3008, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.669591943247057, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 6.680000000000001e-06, |
|
"loss": 0.2881, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.671138344963563, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 6.64923076923077e-06, |
|
"loss": 0.2887, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.6726847466800688, |
|
"grad_norm": 0.875, |
|
"learning_rate": 6.618461538461539e-06, |
|
"loss": 0.3097, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.6742311483965747, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 6.587692307692309e-06, |
|
"loss": 0.2623, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.6757775501130806, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 6.556923076923077e-06, |
|
"loss": 0.2589, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.6773239518295865, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 6.5261538461538465e-06, |
|
"loss": 0.2149, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.6788703535460925, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 6.495384615384616e-06, |
|
"loss": 0.22, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.6804167552625984, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 6.4646153846153845e-06, |
|
"loss": 0.2636, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6819631569791043, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 6.433846153846154e-06, |
|
"loss": 0.3088, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.6835095586956101, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 6.403076923076924e-06, |
|
"loss": 0.2378, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.685055960412116, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 6.3723076923076935e-06, |
|
"loss": 0.2405, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.6866023621286219, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 6.341538461538462e-06, |
|
"loss": 0.2825, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.6881487638451279, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 6.3107692307692315e-06, |
|
"loss": 0.3513, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.6896951655616338, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 6.280000000000001e-06, |
|
"loss": 0.3271, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.6912415672781397, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 6.249230769230769e-06, |
|
"loss": 0.2742, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.6927879689946456, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 6.218461538461539e-06, |
|
"loss": 0.2642, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.6943343707111514, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 6.187692307692308e-06, |
|
"loss": 0.2364, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.6958807724276574, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 6.156923076923077e-06, |
|
"loss": 0.2883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6974271741441633, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 6.126153846153846e-06, |
|
"loss": 0.2219, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.6989735758606692, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 6.095384615384616e-06, |
|
"loss": 0.2639, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.7005199775771751, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 6.064615384615386e-06, |
|
"loss": 0.2764, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.702066379293681, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 6.033846153846154e-06, |
|
"loss": 0.3097, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.7036127810101869, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 6.003076923076924e-06, |
|
"loss": 0.2262, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7051591827266929, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 5.972307692307693e-06, |
|
"loss": 0.2537, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.7067055844431988, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 5.941538461538462e-06, |
|
"loss": 0.347, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.7082519861597046, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 5.910769230769231e-06, |
|
"loss": 0.3193, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.7097983878762105, |
|
"grad_norm": 0.75, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 0.273, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.7113447895927164, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 5.849230769230769e-06, |
|
"loss": 0.2902, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.7128911913092224, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 5.818461538461538e-06, |
|
"loss": 0.3653, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.7144375930257283, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 5.787692307692309e-06, |
|
"loss": 0.3106, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.7159839947422342, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 5.756923076923078e-06, |
|
"loss": 0.2368, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.7175303964587401, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 5.726153846153847e-06, |
|
"loss": 0.249, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.719076798175246, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 5.695384615384616e-06, |
|
"loss": 0.3709, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.7206231998917518, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 5.664615384615385e-06, |
|
"loss": 0.2921, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.7221696016082578, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 5.633846153846154e-06, |
|
"loss": 0.3115, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.7237160033247637, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 5.603076923076923e-06, |
|
"loss": 0.2479, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.7252624050412696, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 5.572307692307693e-06, |
|
"loss": 0.2297, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.7268088067577755, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 5.541538461538461e-06, |
|
"loss": 0.2454, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.7283552084742814, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 5.5107692307692315e-06, |
|
"loss": 0.2849, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.7299016101907874, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.2797, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.7314480119072932, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 5.44923076923077e-06, |
|
"loss": 0.3882, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.7329944136237991, |
|
"grad_norm": 0.90234375, |
|
"learning_rate": 5.418461538461539e-06, |
|
"loss": 0.2509, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.734540815340305, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 5.387692307692308e-06, |
|
"loss": 0.2408, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.7360872170568109, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 5.356923076923078e-06, |
|
"loss": 0.2413, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.7376336187733168, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 5.326153846153846e-06, |
|
"loss": 0.2432, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.7391800204898228, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 5.2953846153846156e-06, |
|
"loss": 0.277, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.7407264222063287, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 5.264615384615385e-06, |
|
"loss": 0.2486, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.7422728239228346, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 5.2338461538461535e-06, |
|
"loss": 0.3, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7438192256393404, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 5.203076923076924e-06, |
|
"loss": 0.26, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.7453656273558463, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 5.172307692307693e-06, |
|
"loss": 0.2937, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.7469120290723523, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 5.1415384615384625e-06, |
|
"loss": 0.3057, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.7484584307888582, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 5.110769230769231e-06, |
|
"loss": 0.3284, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.7500048325053641, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 5.0800000000000005e-06, |
|
"loss": 0.2434, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.75155123422187, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 5.04923076923077e-06, |
|
"loss": 0.257, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.7530976359383759, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 5.0184615384615384e-06, |
|
"loss": 0.2714, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.7546440376548817, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 4.987692307692308e-06, |
|
"loss": 0.2182, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.7561904393713877, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 4.956923076923077e-06, |
|
"loss": 0.2855, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.7577368410878936, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.926153846153847e-06, |
|
"loss": 0.2774, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7592832428043995, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 4.895384615384616e-06, |
|
"loss": 0.2489, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.7608296445209054, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.8646153846153846e-06, |
|
"loss": 0.3157, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.7623760462374113, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 4.833846153846154e-06, |
|
"loss": 0.2704, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.7639224479539173, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 4.803076923076923e-06, |
|
"loss": 0.2995, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.7654688496704232, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 4.772307692307693e-06, |
|
"loss": 0.2422, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.767015251386929, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.741538461538462e-06, |
|
"loss": 0.2692, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.7685616531034349, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.710769230769231e-06, |
|
"loss": 0.2704, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.7701080548199408, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 4.680000000000001e-06, |
|
"loss": 0.3147, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.7716544565364467, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 4.6492307692307695e-06, |
|
"loss": 0.2867, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.7732008582529527, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.618461538461539e-06, |
|
"loss": 0.2896, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7747472599694586, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.587692307692308e-06, |
|
"loss": 0.2335, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.7762936616859645, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.556923076923077e-06, |
|
"loss": 0.2441, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.7778400634024704, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 4.526153846153847e-06, |
|
"loss": 0.3049, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.7793864651189762, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.495384615384616e-06, |
|
"loss": 0.2605, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.7809328668354822, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 4.464615384615385e-06, |
|
"loss": 0.2876, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.7824792685519881, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.433846153846154e-06, |
|
"loss": 0.3434, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.784025670268494, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.403076923076923e-06, |
|
"loss": 0.2956, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.7855720719849999, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 4.372307692307693e-06, |
|
"loss": 0.3175, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.7871184737015058, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 4.341538461538462e-06, |
|
"loss": 0.2914, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.7886648754180117, |
|
"grad_norm": 1.125, |
|
"learning_rate": 4.310769230769231e-06, |
|
"loss": 0.2657, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.7902112771345177, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 4.2800000000000005e-06, |
|
"loss": 0.3168, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.7917576788510235, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 4.249230769230769e-06, |
|
"loss": 0.2422, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.7933040805675294, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 4.218461538461539e-06, |
|
"loss": 0.2651, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.7948504822840353, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 4.187692307692308e-06, |
|
"loss": 0.245, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.7963968840005412, |
|
"grad_norm": 0.82421875, |
|
"learning_rate": 4.156923076923077e-06, |
|
"loss": 0.3055, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.7979432857170471, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.126153846153847e-06, |
|
"loss": 0.2992, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.7994896874335531, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 4.095384615384615e-06, |
|
"loss": 0.3123, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.801036089150059, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.0646153846153854e-06, |
|
"loss": 0.2849, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.8025824908665649, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 4.033846153846154e-06, |
|
"loss": 0.317, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.8041288925830707, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 4.003076923076923e-06, |
|
"loss": 0.2567, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.8056752942995766, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 3.972307692307693e-06, |
|
"loss": 0.2918, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.8072216960160826, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 3.941538461538461e-06, |
|
"loss": 0.3973, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.8087680977325885, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 3.9107692307692316e-06, |
|
"loss": 0.3034, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.8103144994490944, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 3.88e-06, |
|
"loss": 0.2369, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.8118609011656003, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 3.8492307692307695e-06, |
|
"loss": 0.261, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.8134073028821062, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 3.818461538461539e-06, |
|
"loss": 0.2657, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.814953704598612, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 3.787692307692308e-06, |
|
"loss": 0.2336, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.816500106315118, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 3.7569230769230773e-06, |
|
"loss": 0.2683, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.8180465080316239, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.7261538461538467e-06, |
|
"loss": 0.2703, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.8195929097481298, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.6953846153846156e-06, |
|
"loss": 0.2907, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.8211393114646357, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 3.6646153846153846e-06, |
|
"loss": 0.3177, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.8226857131811416, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 3.633846153846154e-06, |
|
"loss": 0.3023, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.8242321148976476, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 3.6030769230769234e-06, |
|
"loss": 0.2169, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.8257785166141535, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 3.572307692307693e-06, |
|
"loss": 0.24, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.8273249183306594, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 3.5415384615384618e-06, |
|
"loss": 0.3394, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.8288713200471652, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 3.5107692307692307e-06, |
|
"loss": 0.2527, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.8304177217636711, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.2447, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.831964123480177, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 3.4492307692307695e-06, |
|
"loss": 0.2509, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.833510525196683, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 3.418461538461539e-06, |
|
"loss": 0.3633, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.8350569269131889, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 3.387692307692308e-06, |
|
"loss": 0.3206, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8366033286296948, |
|
"grad_norm": 1.5078125, |
|
"learning_rate": 3.356923076923077e-06, |
|
"loss": 0.3542, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.8381497303462007, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 3.3261538461538463e-06, |
|
"loss": 0.2731, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.8396961320627065, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 3.2953846153846157e-06, |
|
"loss": 0.3256, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.8412425337792125, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 3.264615384615385e-06, |
|
"loss": 0.2471, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8427889354957184, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 3.233846153846154e-06, |
|
"loss": 0.2755, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.8443353372122243, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 3.203076923076923e-06, |
|
"loss": 0.3139, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.8458817389287302, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 3.1723076923076924e-06, |
|
"loss": 0.2722, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.8474281406452361, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 3.141538461538462e-06, |
|
"loss": 0.3058, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.848974542361742, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 3.110769230769231e-06, |
|
"loss": 0.2424, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.850520944078248, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 3.08e-06, |
|
"loss": 0.2752, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8520673457947538, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 3.049230769230769e-06, |
|
"loss": 0.2309, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.8536137475112597, |
|
"grad_norm": 0.85546875, |
|
"learning_rate": 3.0184615384615385e-06, |
|
"loss": 0.33, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.8551601492277656, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 2.987692307692308e-06, |
|
"loss": 0.2942, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.8567065509442715, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 2.9569230769230773e-06, |
|
"loss": 0.3103, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.8582529526607775, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 2.9261538461538463e-06, |
|
"loss": 0.2775, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.8597993543772834, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 2.8953846153846153e-06, |
|
"loss": 0.2941, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.8613457560937893, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 2.8646153846153847e-06, |
|
"loss": 0.2591, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.8628921578102952, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.833846153846154e-06, |
|
"loss": 0.2801, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.864438559526801, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 2.8030769230769234e-06, |
|
"loss": 0.3041, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.8659849612433069, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 2.7723076923076924e-06, |
|
"loss": 0.2866, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8675313629598129, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 2.7415384615384614e-06, |
|
"loss": 0.3128, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.8690777646763188, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.710769230769231e-06, |
|
"loss": 0.3121, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.8706241663928247, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 2.68e-06, |
|
"loss": 0.2264, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.8721705681093306, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.6492307692307696e-06, |
|
"loss": 0.2619, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.8737169698258365, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 2.6184615384615385e-06, |
|
"loss": 0.2631, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.8752633715423425, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 2.587692307692308e-06, |
|
"loss": 0.2636, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.8768097732588483, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 2.5569230769230773e-06, |
|
"loss": 0.3569, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.8783561749753542, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 2.5261538461538463e-06, |
|
"loss": 0.2297, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.8799025766918601, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 2.4953846153846157e-06, |
|
"loss": 0.2181, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.881448978408366, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 2.4646153846153847e-06, |
|
"loss": 0.3117, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.8829953801248719, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 2.433846153846154e-06, |
|
"loss": 0.3071, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.8845417818413779, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 2.4030769230769235e-06, |
|
"loss": 0.2599, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.8860881835578838, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 2.3723076923076924e-06, |
|
"loss": 0.265, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.8876345852743897, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 2.341538461538462e-06, |
|
"loss": 0.2922, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.8891809869908955, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 2.310769230769231e-06, |
|
"loss": 0.3616, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.8907273887074014, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 2.28e-06, |
|
"loss": 0.2587, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.8922737904239074, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 2.2492307692307696e-06, |
|
"loss": 0.335, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.8938201921404133, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 2.218461538461539e-06, |
|
"loss": 0.288, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.8953665938569192, |
|
"grad_norm": 0.921875, |
|
"learning_rate": 2.187692307692308e-06, |
|
"loss": 0.2932, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.8969129955734251, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 2.156923076923077e-06, |
|
"loss": 0.282, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.898459397289931, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 2.1261538461538463e-06, |
|
"loss": 0.2073, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.9000057990064368, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 2.0953846153846157e-06, |
|
"loss": 0.2583, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.9015522007229428, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 2.064615384615385e-06, |
|
"loss": 0.2805, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.9030986024394487, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 2.033846153846154e-06, |
|
"loss": 0.2416, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.9046450041559546, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 2.003076923076923e-06, |
|
"loss": 0.2826, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.9061914058724605, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.9723076923076924e-06, |
|
"loss": 0.3072, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.9077378075889664, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.941538461538462e-06, |
|
"loss": 0.3551, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.9092842093054724, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 1.9107692307692312e-06, |
|
"loss": 0.3224, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.9108306110219783, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 0.2501, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.9123770127384841, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.8492307692307692e-06, |
|
"loss": 0.2555, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.91392341445499, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 1.8184615384615386e-06, |
|
"loss": 0.329, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.9154698161714959, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.7876923076923078e-06, |
|
"loss": 0.3193, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.9170162178880018, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.7569230769230772e-06, |
|
"loss": 0.3162, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.9185626196045078, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 1.7261538461538463e-06, |
|
"loss": 0.29, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.9201090213210137, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.6953846153846153e-06, |
|
"loss": 0.3122, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.9216554230375196, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.6646153846153847e-06, |
|
"loss": 0.2374, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.9232018247540255, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.6338461538461539e-06, |
|
"loss": 0.2562, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.9247482264705313, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.6030769230769233e-06, |
|
"loss": 0.2854, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.9262946281870373, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.5723076923076925e-06, |
|
"loss": 0.3549, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.9278410299035432, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.5415384615384614e-06, |
|
"loss": 0.4152, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9293874316200491, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.5107692307692308e-06, |
|
"loss": 0.2626, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.930933833336555, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.3302, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.9324802350530609, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 1.4492307692307694e-06, |
|
"loss": 0.3063, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.9340266367695668, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 1.4184615384615386e-06, |
|
"loss": 0.2758, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.9355730384860728, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.3876923076923076e-06, |
|
"loss": 0.2999, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.9371194402025786, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.356923076923077e-06, |
|
"loss": 0.3438, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.9386658419190845, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 1.3261538461538461e-06, |
|
"loss": 0.2368, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.9402122436355904, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 1.2953846153846155e-06, |
|
"loss": 0.2524, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9417586453520963, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.2646153846153847e-06, |
|
"loss": 0.2264, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.9433050470686023, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.233846153846154e-06, |
|
"loss": 0.2067, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9448514487851082, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.2030769230769233e-06, |
|
"loss": 0.231, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.9463978505016141, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 1.1723076923076925e-06, |
|
"loss": 0.2731, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.94794425221812, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.1415384615384617e-06, |
|
"loss": 0.2837, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.9494906539346258, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.1107692307692309e-06, |
|
"loss": 0.2272, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.9510370556511317, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.08e-06, |
|
"loss": 0.3152, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.9525834573676377, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.0492307692307694e-06, |
|
"loss": 0.3017, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.9541298590841436, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 1.0184615384615386e-06, |
|
"loss": 0.3378, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.9556762608006495, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 9.876923076923078e-07, |
|
"loss": 0.2503, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.9572226625171554, |
|
"grad_norm": 0.87109375, |
|
"learning_rate": 9.56923076923077e-07, |
|
"loss": 0.3438, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.9587690642336613, |
|
"grad_norm": 0.875, |
|
"learning_rate": 9.261538461538462e-07, |
|
"loss": 0.2667, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9603154659501673, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 8.953846153846155e-07, |
|
"loss": 0.2745, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.9618618676666731, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 8.646153846153847e-07, |
|
"loss": 0.2683, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.963408269383179, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 8.338461538461539e-07, |
|
"loss": 0.2422, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.9649546710996849, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 8.030769230769231e-07, |
|
"loss": 0.2588, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.9665010728161908, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 7.723076923076923e-07, |
|
"loss": 0.2812, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.9680474745326967, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 7.415384615384616e-07, |
|
"loss": 0.3232, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.9695938762492027, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 7.107692307692309e-07, |
|
"loss": 0.2422, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.9711402779657086, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 6.800000000000001e-07, |
|
"loss": 0.2833, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.9726866796822145, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 6.492307692307692e-07, |
|
"loss": 0.2517, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.9742330813987203, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 6.184615384615385e-07, |
|
"loss": 0.2534, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.9757794831152262, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 5.876923076923077e-07, |
|
"loss": 0.2911, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.9773258848317322, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 5.56923076923077e-07, |
|
"loss": 0.2628, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.9788722865482381, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 5.261538461538462e-07, |
|
"loss": 0.3191, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.980418688264744, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 4.953846153846155e-07, |
|
"loss": 0.3225, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.9819650899812499, |
|
"grad_norm": 0.92578125, |
|
"learning_rate": 4.6461538461538465e-07, |
|
"loss": 0.2836, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.9835114916977558, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 4.3384615384615384e-07, |
|
"loss": 0.303, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.9850578934142616, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.0307692307692313e-07, |
|
"loss": 0.2381, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.9866042951307676, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 3.7230769230769236e-07, |
|
"loss": 0.2821, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.9881506968472735, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 3.4153846153846155e-07, |
|
"loss": 0.3187, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.9896970985637794, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 3.107692307692308e-07, |
|
"loss": 0.2993, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9912435002802853, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 2.8e-07, |
|
"loss": 0.2923, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.9927899019967912, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 2.4923076923076926e-07, |
|
"loss": 0.2407, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.9943363037132972, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 2.1846153846153847e-07, |
|
"loss": 0.257, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.9958827054298031, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 1.8769230769230773e-07, |
|
"loss": 0.2531, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.997429107146309, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.5692307692307694e-07, |
|
"loss": 0.273, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.9989755088628148, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 1.2615384615384617e-07, |
|
"loss": 0.313, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.0004639205149517, |
|
"grad_norm": 0.75, |
|
"learning_rate": 9.53846153846154e-08, |
|
"loss": 0.2218, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.0020103222314576, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 6.461538461538462e-08, |
|
"loss": 0.3402, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.0035567239479637, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 3.384615384615385e-08, |
|
"loss": 0.2327, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.0051031256644696, |
|
"grad_norm": 0.98046875, |
|
"learning_rate": 3.0769230769230774e-09, |
|
"loss": 0.2185, |
|
"step": 6500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4935783864782275e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|