{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 11030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045330915684496827, "grad_norm": 7.529475230749862, "learning_rate": 7.252946509519493e-08, "loss": 1.4215, "step": 1 }, { "epoch": 0.0009066183136899365, "grad_norm": 7.593120325509581, "learning_rate": 1.4505893019038986e-07, "loss": 1.4242, "step": 2 }, { "epoch": 0.0013599274705349048, "grad_norm": 7.532775709452766, "learning_rate": 2.175883952855848e-07, "loss": 1.4193, "step": 3 }, { "epoch": 0.001813236627379873, "grad_norm": 7.480629522246691, "learning_rate": 2.901178603807797e-07, "loss": 1.4377, "step": 4 }, { "epoch": 0.0022665457842248413, "grad_norm": 7.533367455654741, "learning_rate": 3.626473254759746e-07, "loss": 1.424, "step": 5 }, { "epoch": 0.0027198549410698096, "grad_norm": 7.503278620384197, "learning_rate": 4.351767905711696e-07, "loss": 1.4218, "step": 6 }, { "epoch": 0.003173164097914778, "grad_norm": 7.414722368465223, "learning_rate": 5.077062556663645e-07, "loss": 1.4318, "step": 7 }, { "epoch": 0.003626473254759746, "grad_norm": 7.392160161716042, "learning_rate": 5.802357207615594e-07, "loss": 1.4235, "step": 8 }, { "epoch": 0.004079782411604714, "grad_norm": 7.091971499091691, "learning_rate": 6.527651858567544e-07, "loss": 1.4262, "step": 9 }, { "epoch": 0.004533091568449683, "grad_norm": 6.927167730241256, "learning_rate": 7.252946509519492e-07, "loss": 1.4008, "step": 10 }, { "epoch": 0.004986400725294651, "grad_norm": 6.872660690947453, "learning_rate": 7.978241160471442e-07, "loss": 1.4105, "step": 11 }, { "epoch": 0.005439709882139619, "grad_norm": 5.825265694236906, "learning_rate": 8.703535811423392e-07, "loss": 1.3582, "step": 12 }, { "epoch": 0.0058930190389845875, "grad_norm": 5.69185883661227, "learning_rate": 9.428830462375341e-07, "loss": 1.3446, "step": 13 }, { "epoch": 0.006346328195829556, "grad_norm": 5.686582467137656, "learning_rate": 1.015412511332729e-06, "loss": 1.3501, "step": 14 }, { "epoch": 0.006799637352674524, "grad_norm": 5.397470900512968, "learning_rate": 1.087941976427924e-06, "loss": 1.3717, "step": 15 }, { "epoch": 0.007252946509519492, "grad_norm": 4.265133635491454, "learning_rate": 1.1604714415231189e-06, "loss": 1.3252, "step": 16 }, { "epoch": 0.0077062556663644605, "grad_norm": 3.0568764658023695, "learning_rate": 1.2330009066183137e-06, "loss": 1.3057, "step": 17 }, { "epoch": 0.008159564823209429, "grad_norm": 2.982365453971247, "learning_rate": 1.3055303717135088e-06, "loss": 1.3353, "step": 18 }, { "epoch": 0.008612873980054397, "grad_norm": 2.876089762919186, "learning_rate": 1.3780598368087036e-06, "loss": 1.3148, "step": 19 }, { "epoch": 0.009066183136899365, "grad_norm": 2.479210068210551, "learning_rate": 1.4505893019038985e-06, "loss": 1.3243, "step": 20 }, { "epoch": 0.009519492293744334, "grad_norm": 2.370547996501153, "learning_rate": 1.5231187669990935e-06, "loss": 1.296, "step": 21 }, { "epoch": 0.009972801450589302, "grad_norm": 2.4189834603812606, "learning_rate": 1.5956482320942884e-06, "loss": 1.283, "step": 22 }, { "epoch": 0.01042611060743427, "grad_norm": 4.275657305949915, "learning_rate": 1.6681776971894835e-06, "loss": 1.2747, "step": 23 }, { "epoch": 0.010879419764279238, "grad_norm": 4.732041812425561, "learning_rate": 1.7407071622846783e-06, "loss": 1.2956, "step": 24 }, { "epoch": 0.011332728921124207, "grad_norm": 4.927270309461219, "learning_rate": 1.8132366273798732e-06, "loss": 1.3214, "step": 25 }, { "epoch": 0.011786038077969175, "grad_norm": 4.56380071180813, "learning_rate": 1.8857660924750682e-06, "loss": 1.2854, "step": 26 }, { "epoch": 0.012239347234814143, "grad_norm": 4.299328907403423, "learning_rate": 1.958295557570263e-06, "loss": 1.2559, "step": 27 }, { "epoch": 0.012692656391659111, "grad_norm": 3.8168000380553355, "learning_rate": 2.030825022665458e-06, "loss": 1.2572, "step": 28 }, { "epoch": 0.01314596554850408, "grad_norm": 3.213128102048786, "learning_rate": 2.1033544877606528e-06, "loss": 1.2756, "step": 29 }, { "epoch": 0.013599274705349048, "grad_norm": 2.219329310202926, "learning_rate": 2.175883952855848e-06, "loss": 1.2569, "step": 30 }, { "epoch": 0.014052583862194016, "grad_norm": 1.7568293989130694, "learning_rate": 2.248413417951043e-06, "loss": 1.2699, "step": 31 }, { "epoch": 0.014505893019038985, "grad_norm": 1.6642781528914266, "learning_rate": 2.3209428830462377e-06, "loss": 1.2148, "step": 32 }, { "epoch": 0.014959202175883953, "grad_norm": 1.6444882858479255, "learning_rate": 2.3934723481414326e-06, "loss": 1.2067, "step": 33 }, { "epoch": 0.015412511332728921, "grad_norm": 1.444218972205397, "learning_rate": 2.4660018132366274e-06, "loss": 1.1976, "step": 34 }, { "epoch": 0.01586582048957389, "grad_norm": 1.2167499792974639, "learning_rate": 2.5385312783318227e-06, "loss": 1.2053, "step": 35 }, { "epoch": 0.016319129646418858, "grad_norm": 1.063596741114456, "learning_rate": 2.6110607434270176e-06, "loss": 1.205, "step": 36 }, { "epoch": 0.016772438803263828, "grad_norm": 1.0019747182103695, "learning_rate": 2.683590208522213e-06, "loss": 1.185, "step": 37 }, { "epoch": 0.017225747960108794, "grad_norm": 0.9229724935151243, "learning_rate": 2.7561196736174073e-06, "loss": 1.1722, "step": 38 }, { "epoch": 0.017679057116953764, "grad_norm": 0.8714312092546272, "learning_rate": 2.8286491387126025e-06, "loss": 1.1883, "step": 39 }, { "epoch": 0.01813236627379873, "grad_norm": 0.7943330216747643, "learning_rate": 2.901178603807797e-06, "loss": 1.1936, "step": 40 }, { "epoch": 0.0185856754306437, "grad_norm": 0.7253701008160783, "learning_rate": 2.9737080689029922e-06, "loss": 1.1729, "step": 41 }, { "epoch": 0.019038984587488667, "grad_norm": 0.69653499597173, "learning_rate": 3.046237533998187e-06, "loss": 1.1528, "step": 42 }, { "epoch": 0.019492293744333637, "grad_norm": 0.6689308321427192, "learning_rate": 3.1187669990933824e-06, "loss": 1.1364, "step": 43 }, { "epoch": 0.019945602901178604, "grad_norm": 0.6639943306036193, "learning_rate": 3.1912964641885768e-06, "loss": 1.1176, "step": 44 }, { "epoch": 0.020398912058023574, "grad_norm": 0.6367525434075686, "learning_rate": 3.263825929283772e-06, "loss": 1.1492, "step": 45 }, { "epoch": 0.02085222121486854, "grad_norm": 0.635669356253204, "learning_rate": 3.336355394378967e-06, "loss": 1.1288, "step": 46 }, { "epoch": 0.02130553037171351, "grad_norm": 0.6442838488693255, "learning_rate": 3.4088848594741618e-06, "loss": 1.1523, "step": 47 }, { "epoch": 0.021758839528558477, "grad_norm": 0.6285851181409213, "learning_rate": 3.4814143245693566e-06, "loss": 1.1134, "step": 48 }, { "epoch": 0.022212148685403447, "grad_norm": 0.5319666055071556, "learning_rate": 3.553943789664552e-06, "loss": 1.1196, "step": 49 }, { "epoch": 0.022665457842248413, "grad_norm": 0.44072232117257715, "learning_rate": 3.6264732547597463e-06, "loss": 1.1076, "step": 50 }, { "epoch": 0.023118766999093383, "grad_norm": 0.5115195003085903, "learning_rate": 3.6990027198549416e-06, "loss": 1.1108, "step": 51 }, { "epoch": 0.02357207615593835, "grad_norm": 0.5768766956470096, "learning_rate": 3.7715321849501364e-06, "loss": 1.1074, "step": 52 }, { "epoch": 0.02402538531278332, "grad_norm": 0.4752090887531169, "learning_rate": 3.844061650045331e-06, "loss": 1.0964, "step": 53 }, { "epoch": 0.024478694469628286, "grad_norm": 0.3990349919601281, "learning_rate": 3.916591115140526e-06, "loss": 1.1129, "step": 54 }, { "epoch": 0.024932003626473256, "grad_norm": 0.44473257086688445, "learning_rate": 3.989120580235721e-06, "loss": 1.1191, "step": 55 }, { "epoch": 0.025385312783318223, "grad_norm": 0.46856393244209626, "learning_rate": 4.061650045330916e-06, "loss": 1.112, "step": 56 }, { "epoch": 0.025838621940163193, "grad_norm": 0.4009684218791682, "learning_rate": 4.1341795104261115e-06, "loss": 1.0957, "step": 57 }, { "epoch": 0.02629193109700816, "grad_norm": 0.4385633000094962, "learning_rate": 4.2067089755213055e-06, "loss": 1.0889, "step": 58 }, { "epoch": 0.02674524025385313, "grad_norm": 0.42631185708881886, "learning_rate": 4.279238440616501e-06, "loss": 1.0952, "step": 59 }, { "epoch": 0.027198549410698096, "grad_norm": 0.3346718217262581, "learning_rate": 4.351767905711696e-06, "loss": 1.0702, "step": 60 }, { "epoch": 0.027651858567543066, "grad_norm": 0.40613340660380465, "learning_rate": 4.424297370806891e-06, "loss": 1.1127, "step": 61 }, { "epoch": 0.028105167724388033, "grad_norm": 0.3614821600947752, "learning_rate": 4.496826835902086e-06, "loss": 1.0921, "step": 62 }, { "epoch": 0.028558476881233003, "grad_norm": 0.25465498353508886, "learning_rate": 4.569356300997281e-06, "loss": 1.0749, "step": 63 }, { "epoch": 0.02901178603807797, "grad_norm": 0.3073199891997316, "learning_rate": 4.6418857660924755e-06, "loss": 1.0895, "step": 64 }, { "epoch": 0.02946509519492294, "grad_norm": 0.3006958992766178, "learning_rate": 4.71441523118767e-06, "loss": 1.0852, "step": 65 }, { "epoch": 0.029918404351767906, "grad_norm": 0.2837198992495677, "learning_rate": 4.786944696282865e-06, "loss": 1.061, "step": 66 }, { "epoch": 0.030371713508612876, "grad_norm": 0.2857505959419577, "learning_rate": 4.859474161378061e-06, "loss": 1.0745, "step": 67 }, { "epoch": 0.030825022665457842, "grad_norm": 0.2653991335834452, "learning_rate": 4.932003626473255e-06, "loss": 1.055, "step": 68 }, { "epoch": 0.03127833182230281, "grad_norm": 0.25194464043191933, "learning_rate": 5.00453309156845e-06, "loss": 1.0748, "step": 69 }, { "epoch": 0.03173164097914778, "grad_norm": 0.272630914249413, "learning_rate": 5.077062556663645e-06, "loss": 1.0763, "step": 70 }, { "epoch": 0.03218495013599275, "grad_norm": 0.2527844916171606, "learning_rate": 5.14959202175884e-06, "loss": 1.0583, "step": 71 }, { "epoch": 0.032638259292837715, "grad_norm": 0.24485480400333573, "learning_rate": 5.222121486854035e-06, "loss": 1.0374, "step": 72 }, { "epoch": 0.03309156844968268, "grad_norm": 0.22405491451290266, "learning_rate": 5.294650951949229e-06, "loss": 1.0705, "step": 73 }, { "epoch": 0.033544877606527655, "grad_norm": 0.23431603942798793, "learning_rate": 5.367180417044426e-06, "loss": 1.0725, "step": 74 }, { "epoch": 0.03399818676337262, "grad_norm": 0.21663120224720253, "learning_rate": 5.43970988213962e-06, "loss": 1.0711, "step": 75 }, { "epoch": 0.03445149592021759, "grad_norm": 0.22246624907104942, "learning_rate": 5.5122393472348145e-06, "loss": 1.0612, "step": 76 }, { "epoch": 0.034904805077062555, "grad_norm": 0.21851808101971187, "learning_rate": 5.584768812330009e-06, "loss": 1.0664, "step": 77 }, { "epoch": 0.03535811423390753, "grad_norm": 0.20227337165615802, "learning_rate": 5.657298277425205e-06, "loss": 1.0556, "step": 78 }, { "epoch": 0.035811423390752495, "grad_norm": 0.25861105320060773, "learning_rate": 5.7298277425204e-06, "loss": 1.0437, "step": 79 }, { "epoch": 0.03626473254759746, "grad_norm": 0.26878735990491354, "learning_rate": 5.802357207615594e-06, "loss": 1.0554, "step": 80 }, { "epoch": 0.03671804170444243, "grad_norm": 0.2601458095321201, "learning_rate": 5.874886672710789e-06, "loss": 1.0476, "step": 81 }, { "epoch": 0.0371713508612874, "grad_norm": 0.23420150673811044, "learning_rate": 5.9474161378059845e-06, "loss": 1.039, "step": 82 }, { "epoch": 0.03762466001813237, "grad_norm": 0.23480027015431468, "learning_rate": 6.019945602901179e-06, "loss": 1.0429, "step": 83 }, { "epoch": 0.038077969174977334, "grad_norm": 0.24891656391471473, "learning_rate": 6.092475067996374e-06, "loss": 1.0432, "step": 84 }, { "epoch": 0.0385312783318223, "grad_norm": 0.23861585885709863, "learning_rate": 6.165004533091569e-06, "loss": 1.0473, "step": 85 }, { "epoch": 0.038984587488667274, "grad_norm": 0.2322146070647115, "learning_rate": 6.237533998186765e-06, "loss": 1.0433, "step": 86 }, { "epoch": 0.03943789664551224, "grad_norm": 0.21109490343434156, "learning_rate": 6.310063463281959e-06, "loss": 1.0463, "step": 87 }, { "epoch": 0.03989120580235721, "grad_norm": 0.216219946579301, "learning_rate": 6.3825929283771536e-06, "loss": 1.0273, "step": 88 }, { "epoch": 0.040344514959202174, "grad_norm": 0.21641766636978838, "learning_rate": 6.455122393472348e-06, "loss": 1.0541, "step": 89 }, { "epoch": 0.04079782411604715, "grad_norm": 0.21008896150899378, "learning_rate": 6.527651858567544e-06, "loss": 1.0455, "step": 90 }, { "epoch": 0.041251133272892114, "grad_norm": 0.23417944081803094, "learning_rate": 6.600181323662739e-06, "loss": 1.0303, "step": 91 }, { "epoch": 0.04170444242973708, "grad_norm": 0.22395410726447243, "learning_rate": 6.672710788757934e-06, "loss": 1.0503, "step": 92 }, { "epoch": 0.04215775158658205, "grad_norm": 0.26822216050774617, "learning_rate": 6.745240253853128e-06, "loss": 0.9992, "step": 93 }, { "epoch": 0.04261106074342702, "grad_norm": 0.21999717180540845, "learning_rate": 6.8177697189483235e-06, "loss": 1.0432, "step": 94 }, { "epoch": 0.04306436990027199, "grad_norm": 0.3236383930535883, "learning_rate": 6.890299184043518e-06, "loss": 1.0317, "step": 95 }, { "epoch": 0.043517679057116954, "grad_norm": 0.25512225750318396, "learning_rate": 6.962828649138713e-06, "loss": 1.0215, "step": 96 }, { "epoch": 0.04397098821396192, "grad_norm": 0.26450325288157267, "learning_rate": 7.035358114233908e-06, "loss": 1.0393, "step": 97 }, { "epoch": 0.044424297370806894, "grad_norm": 0.24804464453064065, "learning_rate": 7.107887579329104e-06, "loss": 1.029, "step": 98 }, { "epoch": 0.04487760652765186, "grad_norm": 0.2919095912577784, "learning_rate": 7.180417044424299e-06, "loss": 1.0368, "step": 99 }, { "epoch": 0.04533091568449683, "grad_norm": 0.24562955224846647, "learning_rate": 7.252946509519493e-06, "loss": 1.0218, "step": 100 }, { "epoch": 0.04578422484134179, "grad_norm": 0.33344637601345345, "learning_rate": 7.3254759746146875e-06, "loss": 1.0547, "step": 101 }, { "epoch": 0.04623753399818677, "grad_norm": 0.321768239448474, "learning_rate": 7.398005439709883e-06, "loss": 1.0237, "step": 102 }, { "epoch": 0.04669084315503173, "grad_norm": 0.24083502679003194, "learning_rate": 7.470534904805078e-06, "loss": 1.0195, "step": 103 }, { "epoch": 0.0471441523118767, "grad_norm": 0.28060603553154856, "learning_rate": 7.543064369900273e-06, "loss": 1.0351, "step": 104 }, { "epoch": 0.047597461468721666, "grad_norm": 0.2738387759702386, "learning_rate": 7.615593834995467e-06, "loss": 1.0212, "step": 105 }, { "epoch": 0.04805077062556664, "grad_norm": 0.3116966323450892, "learning_rate": 7.688123300090663e-06, "loss": 1.0472, "step": 106 }, { "epoch": 0.048504079782411606, "grad_norm": 0.27690805315664624, "learning_rate": 7.760652765185858e-06, "loss": 1.0135, "step": 107 }, { "epoch": 0.04895738893925657, "grad_norm": 0.35925442687867143, "learning_rate": 7.833182230281052e-06, "loss": 1.024, "step": 108 }, { "epoch": 0.04941069809610154, "grad_norm": 0.4725508242687737, "learning_rate": 7.905711695376246e-06, "loss": 1.0296, "step": 109 }, { "epoch": 0.04986400725294651, "grad_norm": 0.5860799696821, "learning_rate": 7.978241160471442e-06, "loss": 1.0325, "step": 110 }, { "epoch": 0.05031731640979148, "grad_norm": 0.6164419910581893, "learning_rate": 8.050770625566638e-06, "loss": 1.0345, "step": 111 }, { "epoch": 0.050770625566636446, "grad_norm": 0.5174761037366001, "learning_rate": 8.123300090661832e-06, "loss": 1.0114, "step": 112 }, { "epoch": 0.05122393472348141, "grad_norm": 0.3168303533663923, "learning_rate": 8.195829555757027e-06, "loss": 1.0336, "step": 113 }, { "epoch": 0.051677243880326386, "grad_norm": 0.3589573933522987, "learning_rate": 8.268359020852223e-06, "loss": 1.0227, "step": 114 }, { "epoch": 0.05213055303717135, "grad_norm": 0.5555446423775562, "learning_rate": 8.340888485947417e-06, "loss": 1.0336, "step": 115 }, { "epoch": 0.05258386219401632, "grad_norm": 0.5298312726366445, "learning_rate": 8.413417951042611e-06, "loss": 1.0297, "step": 116 }, { "epoch": 0.053037171350861285, "grad_norm": 0.45833430552891236, "learning_rate": 8.485947416137807e-06, "loss": 1.0175, "step": 117 }, { "epoch": 0.05349048050770626, "grad_norm": 0.3404266340027579, "learning_rate": 8.558476881233002e-06, "loss": 1.0138, "step": 118 }, { "epoch": 0.053943789664551225, "grad_norm": 0.2716326187250938, "learning_rate": 8.631006346328196e-06, "loss": 1.0209, "step": 119 }, { "epoch": 0.05439709882139619, "grad_norm": 0.2169059375583515, "learning_rate": 8.703535811423392e-06, "loss": 1.0212, "step": 120 }, { "epoch": 0.05485040797824116, "grad_norm": 0.2684269012519948, "learning_rate": 8.776065276518586e-06, "loss": 1.0082, "step": 121 }, { "epoch": 0.05530371713508613, "grad_norm": 0.30791696302426647, "learning_rate": 8.848594741613782e-06, "loss": 1.0324, "step": 122 }, { "epoch": 0.0557570262919311, "grad_norm": 0.33670968089785597, "learning_rate": 8.921124206708976e-06, "loss": 1.0181, "step": 123 }, { "epoch": 0.056210335448776065, "grad_norm": 0.30814426757289026, "learning_rate": 8.993653671804172e-06, "loss": 1.0349, "step": 124 }, { "epoch": 0.05666364460562103, "grad_norm": 0.2710148391522126, "learning_rate": 9.066183136899366e-06, "loss": 1.0403, "step": 125 }, { "epoch": 0.057116953762466005, "grad_norm": 0.30956819531611085, "learning_rate": 9.138712601994561e-06, "loss": 1.0221, "step": 126 }, { "epoch": 0.05757026291931097, "grad_norm": 0.28018797445897453, "learning_rate": 9.211242067089757e-06, "loss": 0.9988, "step": 127 }, { "epoch": 0.05802357207615594, "grad_norm": 0.29807694967288845, "learning_rate": 9.283771532184951e-06, "loss": 1.0079, "step": 128 }, { "epoch": 0.058476881233000905, "grad_norm": 0.3581679458803219, "learning_rate": 9.356300997280145e-06, "loss": 1.0088, "step": 129 }, { "epoch": 0.05893019038984588, "grad_norm": 0.4378519762890707, "learning_rate": 9.42883046237534e-06, "loss": 1.0326, "step": 130 }, { "epoch": 0.059383499546690845, "grad_norm": 0.4608327122340969, "learning_rate": 9.501359927470536e-06, "loss": 1.0248, "step": 131 }, { "epoch": 0.05983680870353581, "grad_norm": 0.6226650794548556, "learning_rate": 9.57388939256573e-06, "loss": 1.0072, "step": 132 }, { "epoch": 0.06029011786038078, "grad_norm": 0.8050827947500385, "learning_rate": 9.646418857660926e-06, "loss": 1.013, "step": 133 }, { "epoch": 0.06074342701722575, "grad_norm": 0.7301147675463479, "learning_rate": 9.718948322756122e-06, "loss": 1.014, "step": 134 }, { "epoch": 0.06119673617407072, "grad_norm": 0.6438107049965188, "learning_rate": 9.791477787851316e-06, "loss": 1.0017, "step": 135 }, { "epoch": 0.061650045330915684, "grad_norm": 0.5337604175814439, "learning_rate": 9.86400725294651e-06, "loss": 1.0164, "step": 136 }, { "epoch": 0.06210335448776065, "grad_norm": 0.6247160761796741, "learning_rate": 9.936536718041705e-06, "loss": 1.02, "step": 137 }, { "epoch": 0.06255666364460562, "grad_norm": 0.9056770429202234, "learning_rate": 1.00090661831369e-05, "loss": 1.0117, "step": 138 }, { "epoch": 0.06300997280145058, "grad_norm": 0.9037833745324745, "learning_rate": 1.0081595648232093e-05, "loss": 1.0253, "step": 139 }, { "epoch": 0.06346328195829556, "grad_norm": 0.7331943366534031, "learning_rate": 1.015412511332729e-05, "loss": 1.0037, "step": 140 }, { "epoch": 0.06391659111514053, "grad_norm": 0.6175146976953618, "learning_rate": 1.0226654578422487e-05, "loss": 1.0018, "step": 141 }, { "epoch": 0.0643699002719855, "grad_norm": 0.7702300635686421, "learning_rate": 1.029918404351768e-05, "loss": 1.0247, "step": 142 }, { "epoch": 0.06482320942883046, "grad_norm": 0.9590076569081369, "learning_rate": 1.0371713508612875e-05, "loss": 1.0145, "step": 143 }, { "epoch": 0.06527651858567543, "grad_norm": 0.7708905008564033, "learning_rate": 1.044424297370807e-05, "loss": 1.0148, "step": 144 }, { "epoch": 0.0657298277425204, "grad_norm": 0.42277990735301435, "learning_rate": 1.0516772438803264e-05, "loss": 0.99, "step": 145 }, { "epoch": 0.06618313689936536, "grad_norm": 0.4997673663935069, "learning_rate": 1.0589301903898458e-05, "loss": 1.0151, "step": 146 }, { "epoch": 0.06663644605621033, "grad_norm": 0.5779083721673532, "learning_rate": 1.0661831368993654e-05, "loss": 1.0145, "step": 147 }, { "epoch": 0.06708975521305531, "grad_norm": 0.5388433807596953, "learning_rate": 1.0734360834088851e-05, "loss": 1.0177, "step": 148 }, { "epoch": 0.06754306436990028, "grad_norm": 0.5567911105247827, "learning_rate": 1.0806890299184045e-05, "loss": 1.0088, "step": 149 }, { "epoch": 0.06799637352674524, "grad_norm": 0.506075219557083, "learning_rate": 1.087941976427924e-05, "loss": 1.0219, "step": 150 }, { "epoch": 0.06844968268359021, "grad_norm": 0.5889903383048785, "learning_rate": 1.0951949229374435e-05, "loss": 0.9782, "step": 151 }, { "epoch": 0.06890299184043518, "grad_norm": 0.5350192314416039, "learning_rate": 1.1024478694469629e-05, "loss": 1.0064, "step": 152 }, { "epoch": 0.06935630099728014, "grad_norm": 0.5303676810012745, "learning_rate": 1.1097008159564823e-05, "loss": 1.0204, "step": 153 }, { "epoch": 0.06980961015412511, "grad_norm": 0.5079071512985294, "learning_rate": 1.1169537624660019e-05, "loss": 1.005, "step": 154 }, { "epoch": 0.07026291931097008, "grad_norm": 0.47473225387067336, "learning_rate": 1.1242067089755213e-05, "loss": 1.0377, "step": 155 }, { "epoch": 0.07071622846781506, "grad_norm": 0.42462480410500053, "learning_rate": 1.131459655485041e-05, "loss": 0.9941, "step": 156 }, { "epoch": 0.07116953762466002, "grad_norm": 0.35964292572208717, "learning_rate": 1.1387126019945604e-05, "loss": 1.0001, "step": 157 }, { "epoch": 0.07162284678150499, "grad_norm": 0.39773833837524003, "learning_rate": 1.14596554850408e-05, "loss": 0.992, "step": 158 }, { "epoch": 0.07207615593834996, "grad_norm": 0.479362738779813, "learning_rate": 1.1532184950135994e-05, "loss": 0.987, "step": 159 }, { "epoch": 0.07252946509519492, "grad_norm": 0.6632019070593157, "learning_rate": 1.1604714415231188e-05, "loss": 0.9943, "step": 160 }, { "epoch": 0.07298277425203989, "grad_norm": 0.9541332299472309, "learning_rate": 1.1677243880326384e-05, "loss": 0.9834, "step": 161 }, { "epoch": 0.07343608340888486, "grad_norm": 1.1416313556087756, "learning_rate": 1.1749773345421578e-05, "loss": 1.0205, "step": 162 }, { "epoch": 0.07388939256572982, "grad_norm": 0.7004538190727048, "learning_rate": 1.1822302810516773e-05, "loss": 1.0019, "step": 163 }, { "epoch": 0.0743427017225748, "grad_norm": 0.6050101670133614, "learning_rate": 1.1894832275611969e-05, "loss": 0.9974, "step": 164 }, { "epoch": 0.07479601087941977, "grad_norm": 0.8872309721836288, "learning_rate": 1.1967361740707165e-05, "loss": 1.0094, "step": 165 }, { "epoch": 0.07524932003626474, "grad_norm": 1.07492902627576, "learning_rate": 1.2039891205802359e-05, "loss": 1.0086, "step": 166 }, { "epoch": 0.0757026291931097, "grad_norm": 0.85153474556294, "learning_rate": 1.2112420670897553e-05, "loss": 0.9942, "step": 167 }, { "epoch": 0.07615593834995467, "grad_norm": 0.8083358228009153, "learning_rate": 1.2184950135992748e-05, "loss": 1.0054, "step": 168 }, { "epoch": 0.07660924750679964, "grad_norm": 0.8246194549076243, "learning_rate": 1.2257479601087942e-05, "loss": 1.0078, "step": 169 }, { "epoch": 0.0770625566636446, "grad_norm": 0.6179745477622758, "learning_rate": 1.2330009066183138e-05, "loss": 0.9781, "step": 170 }, { "epoch": 0.07751586582048957, "grad_norm": 0.8043698107366198, "learning_rate": 1.2402538531278332e-05, "loss": 1.0099, "step": 171 }, { "epoch": 0.07796917497733455, "grad_norm": 0.9066933771301231, "learning_rate": 1.247506799637353e-05, "loss": 0.9886, "step": 172 }, { "epoch": 0.07842248413417952, "grad_norm": 0.6347094122637169, "learning_rate": 1.2547597461468723e-05, "loss": 0.9943, "step": 173 }, { "epoch": 0.07887579329102448, "grad_norm": 0.6365785592681293, "learning_rate": 1.2620126926563917e-05, "loss": 0.9873, "step": 174 }, { "epoch": 0.07932910244786945, "grad_norm": 0.7711016299374871, "learning_rate": 1.2692656391659113e-05, "loss": 0.9857, "step": 175 }, { "epoch": 0.07978241160471441, "grad_norm": 0.7236955138393364, "learning_rate": 1.2765185856754307e-05, "loss": 1.0181, "step": 176 }, { "epoch": 0.08023572076155938, "grad_norm": 0.9003477864097517, "learning_rate": 1.2837715321849503e-05, "loss": 1.0146, "step": 177 }, { "epoch": 0.08068902991840435, "grad_norm": 1.1278349840855353, "learning_rate": 1.2910244786944697e-05, "loss": 0.9764, "step": 178 }, { "epoch": 0.08114233907524931, "grad_norm": 0.6811372412745094, "learning_rate": 1.298277425203989e-05, "loss": 0.9956, "step": 179 }, { "epoch": 0.0815956482320943, "grad_norm": 0.5658465562911695, "learning_rate": 1.3055303717135088e-05, "loss": 0.9769, "step": 180 }, { "epoch": 0.08204895738893926, "grad_norm": 0.7869886779910564, "learning_rate": 1.3127833182230282e-05, "loss": 1.0278, "step": 181 }, { "epoch": 0.08250226654578423, "grad_norm": 0.8547891846663555, "learning_rate": 1.3200362647325478e-05, "loss": 0.999, "step": 182 }, { "epoch": 0.0829555757026292, "grad_norm": 1.066852201180377, "learning_rate": 1.3272892112420672e-05, "loss": 1.007, "step": 183 }, { "epoch": 0.08340888485947416, "grad_norm": 0.8352376405215245, "learning_rate": 1.3345421577515868e-05, "loss": 0.9839, "step": 184 }, { "epoch": 0.08386219401631913, "grad_norm": 0.5699671212503709, "learning_rate": 1.3417951042611062e-05, "loss": 0.996, "step": 185 }, { "epoch": 0.0843155031731641, "grad_norm": 0.9273513907060452, "learning_rate": 1.3490480507706256e-05, "loss": 0.9951, "step": 186 }, { "epoch": 0.08476881233000906, "grad_norm": 1.0662364604979808, "learning_rate": 1.3563009972801451e-05, "loss": 1.0038, "step": 187 }, { "epoch": 0.08522212148685404, "grad_norm": 0.8284705529549362, "learning_rate": 1.3635539437896647e-05, "loss": 1.0117, "step": 188 }, { "epoch": 0.08567543064369901, "grad_norm": 0.5995211711330223, "learning_rate": 1.3708068902991843e-05, "loss": 0.9843, "step": 189 }, { "epoch": 0.08612873980054397, "grad_norm": 0.5064728784890009, "learning_rate": 1.3780598368087037e-05, "loss": 0.9806, "step": 190 }, { "epoch": 0.08658204895738894, "grad_norm": 0.9197902804616978, "learning_rate": 1.3853127833182232e-05, "loss": 0.9975, "step": 191 }, { "epoch": 0.08703535811423391, "grad_norm": 0.9641847114468183, "learning_rate": 1.3925657298277426e-05, "loss": 0.9976, "step": 192 }, { "epoch": 0.08748866727107887, "grad_norm": 0.799915175155661, "learning_rate": 1.399818676337262e-05, "loss": 0.9962, "step": 193 }, { "epoch": 0.08794197642792384, "grad_norm": 0.648345510976243, "learning_rate": 1.4070716228467816e-05, "loss": 0.9752, "step": 194 }, { "epoch": 0.0883952855847688, "grad_norm": 0.6423807015901696, "learning_rate": 1.414324569356301e-05, "loss": 0.9731, "step": 195 }, { "epoch": 0.08884859474161379, "grad_norm": 1.1101079338102626, "learning_rate": 1.4215775158658207e-05, "loss": 0.9768, "step": 196 }, { "epoch": 0.08930190389845875, "grad_norm": 1.5007306788581445, "learning_rate": 1.4288304623753401e-05, "loss": 0.9954, "step": 197 }, { "epoch": 0.08975521305530372, "grad_norm": 0.42375501441243135, "learning_rate": 1.4360834088848597e-05, "loss": 0.996, "step": 198 }, { "epoch": 0.09020852221214869, "grad_norm": 1.4560313524567314, "learning_rate": 1.4433363553943791e-05, "loss": 1.0033, "step": 199 }, { "epoch": 0.09066183136899365, "grad_norm": 1.1422298759333185, "learning_rate": 1.4505893019038985e-05, "loss": 0.9744, "step": 200 }, { "epoch": 0.09111514052583862, "grad_norm": 0.7038514687452473, "learning_rate": 1.4578422484134181e-05, "loss": 0.997, "step": 201 }, { "epoch": 0.09156844968268359, "grad_norm": 1.1676043384874575, "learning_rate": 1.4650951949229375e-05, "loss": 0.9795, "step": 202 }, { "epoch": 0.09202175883952855, "grad_norm": 1.1845334269071475, "learning_rate": 1.4723481414324569e-05, "loss": 0.9782, "step": 203 }, { "epoch": 0.09247506799637353, "grad_norm": 0.8555969826316577, "learning_rate": 1.4796010879419766e-05, "loss": 0.9908, "step": 204 }, { "epoch": 0.0929283771532185, "grad_norm": 0.930955148540943, "learning_rate": 1.4868540344514962e-05, "loss": 0.9693, "step": 205 }, { "epoch": 0.09338168631006347, "grad_norm": 0.824673597329167, "learning_rate": 1.4941069809610156e-05, "loss": 1.0046, "step": 206 }, { "epoch": 0.09383499546690843, "grad_norm": 0.9628763437161365, "learning_rate": 1.501359927470535e-05, "loss": 0.9728, "step": 207 }, { "epoch": 0.0942883046237534, "grad_norm": 1.124075851760181, "learning_rate": 1.5086128739800546e-05, "loss": 0.9887, "step": 208 }, { "epoch": 0.09474161378059837, "grad_norm": 1.0020541511811916, "learning_rate": 1.515865820489574e-05, "loss": 0.9742, "step": 209 }, { "epoch": 0.09519492293744333, "grad_norm": 1.428557914150642, "learning_rate": 1.5231187669990934e-05, "loss": 0.9868, "step": 210 }, { "epoch": 0.0956482320942883, "grad_norm": 0.4816310505349378, "learning_rate": 1.530371713508613e-05, "loss": 0.9689, "step": 211 }, { "epoch": 0.09610154125113328, "grad_norm": 1.350677712085829, "learning_rate": 1.5376246600181325e-05, "loss": 0.9773, "step": 212 }, { "epoch": 0.09655485040797825, "grad_norm": 0.8961976530458231, "learning_rate": 1.544877606527652e-05, "loss": 0.9582, "step": 213 }, { "epoch": 0.09700815956482321, "grad_norm": 0.8280523544487235, "learning_rate": 1.5521305530371716e-05, "loss": 1.0079, "step": 214 }, { "epoch": 0.09746146872166818, "grad_norm": 1.1733044914183084, "learning_rate": 1.559383499546691e-05, "loss": 0.9795, "step": 215 }, { "epoch": 0.09791477787851315, "grad_norm": 1.014229491267023, "learning_rate": 1.5666364460562104e-05, "loss": 0.9773, "step": 216 }, { "epoch": 0.09836808703535811, "grad_norm": 1.1072318673235464, "learning_rate": 1.57388939256573e-05, "loss": 0.9792, "step": 217 }, { "epoch": 0.09882139619220308, "grad_norm": 0.60564072292483, "learning_rate": 1.5811423390752492e-05, "loss": 0.993, "step": 218 }, { "epoch": 0.09927470534904805, "grad_norm": 0.920081494826898, "learning_rate": 1.5883952855847688e-05, "loss": 0.9778, "step": 219 }, { "epoch": 0.09972801450589303, "grad_norm": 1.2326616239964998, "learning_rate": 1.5956482320942884e-05, "loss": 0.979, "step": 220 }, { "epoch": 0.10018132366273799, "grad_norm": 0.8094055421302522, "learning_rate": 1.602901178603808e-05, "loss": 0.9739, "step": 221 }, { "epoch": 0.10063463281958296, "grad_norm": 1.0843736866199611, "learning_rate": 1.6101541251133275e-05, "loss": 0.9846, "step": 222 }, { "epoch": 0.10108794197642793, "grad_norm": 0.8290507828181226, "learning_rate": 1.617407071622847e-05, "loss": 0.9699, "step": 223 }, { "epoch": 0.10154125113327289, "grad_norm": 0.8769177830467694, "learning_rate": 1.6246600181323663e-05, "loss": 1.0043, "step": 224 }, { "epoch": 0.10199456029011786, "grad_norm": 0.9536416098834379, "learning_rate": 1.631912964641886e-05, "loss": 1.0005, "step": 225 }, { "epoch": 0.10244786944696282, "grad_norm": 0.7300608573017586, "learning_rate": 1.6391659111514055e-05, "loss": 0.9705, "step": 226 }, { "epoch": 0.10290117860380779, "grad_norm": 0.763092364228021, "learning_rate": 1.6464188576609247e-05, "loss": 0.9785, "step": 227 }, { "epoch": 0.10335448776065277, "grad_norm": 0.8079383183643218, "learning_rate": 1.6536718041704446e-05, "loss": 0.9923, "step": 228 }, { "epoch": 0.10380779691749774, "grad_norm": 0.8517826259405005, "learning_rate": 1.660924750679964e-05, "loss": 0.9818, "step": 229 }, { "epoch": 0.1042611060743427, "grad_norm": 0.8998822996749852, "learning_rate": 1.6681776971894834e-05, "loss": 0.9849, "step": 230 }, { "epoch": 0.10471441523118767, "grad_norm": 1.2408709545670018, "learning_rate": 1.675430643699003e-05, "loss": 0.9742, "step": 231 }, { "epoch": 0.10516772438803264, "grad_norm": 0.9556970521991514, "learning_rate": 1.6826835902085222e-05, "loss": 0.9904, "step": 232 }, { "epoch": 0.1056210335448776, "grad_norm": 0.812130685400505, "learning_rate": 1.6899365367180418e-05, "loss": 0.9818, "step": 233 }, { "epoch": 0.10607434270172257, "grad_norm": 0.4449645505521567, "learning_rate": 1.6971894832275613e-05, "loss": 0.9902, "step": 234 }, { "epoch": 0.10652765185856754, "grad_norm": 0.7252411381260219, "learning_rate": 1.7044424297370806e-05, "loss": 0.9667, "step": 235 }, { "epoch": 0.10698096101541252, "grad_norm": 0.9295651078163392, "learning_rate": 1.7116953762466005e-05, "loss": 0.9821, "step": 236 }, { "epoch": 0.10743427017225748, "grad_norm": 0.9572789366170193, "learning_rate": 1.71894832275612e-05, "loss": 0.9636, "step": 237 }, { "epoch": 0.10788757932910245, "grad_norm": 0.9985681552390233, "learning_rate": 1.7262012692656393e-05, "loss": 0.9577, "step": 238 }, { "epoch": 0.10834088848594742, "grad_norm": 1.2711800486169227, "learning_rate": 1.733454215775159e-05, "loss": 0.9807, "step": 239 }, { "epoch": 0.10879419764279238, "grad_norm": 0.6670602415088438, "learning_rate": 1.7407071622846784e-05, "loss": 0.9862, "step": 240 }, { "epoch": 0.10924750679963735, "grad_norm": 0.8376552771623001, "learning_rate": 1.7479601087941977e-05, "loss": 0.9601, "step": 241 }, { "epoch": 0.10970081595648232, "grad_norm": 1.343570555732653, "learning_rate": 1.7552130553037172e-05, "loss": 0.9864, "step": 242 }, { "epoch": 0.11015412511332728, "grad_norm": 0.735222859876767, "learning_rate": 1.7624660018132368e-05, "loss": 0.9788, "step": 243 }, { "epoch": 0.11060743427017226, "grad_norm": 1.1559965308172442, "learning_rate": 1.7697189483227564e-05, "loss": 0.9786, "step": 244 }, { "epoch": 0.11106074342701723, "grad_norm": 1.2826780864038205, "learning_rate": 1.776971894832276e-05, "loss": 0.9881, "step": 245 }, { "epoch": 0.1115140525838622, "grad_norm": 0.7814106001336353, "learning_rate": 1.784224841341795e-05, "loss": 0.9544, "step": 246 }, { "epoch": 0.11196736174070716, "grad_norm": 1.6537276432939085, "learning_rate": 1.7914777878513147e-05, "loss": 0.9917, "step": 247 }, { "epoch": 0.11242067089755213, "grad_norm": 0.6893437570159018, "learning_rate": 1.7987307343608343e-05, "loss": 1.003, "step": 248 }, { "epoch": 0.1128739800543971, "grad_norm": 1.6849725670625908, "learning_rate": 1.8059836808703535e-05, "loss": 0.9828, "step": 249 }, { "epoch": 0.11332728921124206, "grad_norm": 1.0060674291542064, "learning_rate": 1.813236627379873e-05, "loss": 0.9615, "step": 250 }, { "epoch": 0.11378059836808703, "grad_norm": 2.0804817610239903, "learning_rate": 1.8204895738893927e-05, "loss": 0.9939, "step": 251 }, { "epoch": 0.11423390752493201, "grad_norm": 1.9634789743524923, "learning_rate": 1.8277425203989122e-05, "loss": 0.9896, "step": 252 }, { "epoch": 0.11468721668177698, "grad_norm": 1.1649779335436878, "learning_rate": 1.8349954669084318e-05, "loss": 1.0184, "step": 253 }, { "epoch": 0.11514052583862194, "grad_norm": 1.5843507590428396, "learning_rate": 1.8422484134179514e-05, "loss": 0.9832, "step": 254 }, { "epoch": 0.11559383499546691, "grad_norm": 1.0529136255633502, "learning_rate": 1.8495013599274706e-05, "loss": 0.9868, "step": 255 }, { "epoch": 0.11604714415231188, "grad_norm": 1.887089900140882, "learning_rate": 1.8567543064369902e-05, "loss": 0.9755, "step": 256 }, { "epoch": 0.11650045330915684, "grad_norm": 1.5891049260652443, "learning_rate": 1.8640072529465098e-05, "loss": 0.9647, "step": 257 }, { "epoch": 0.11695376246600181, "grad_norm": 1.2323878574116947, "learning_rate": 1.871260199456029e-05, "loss": 0.988, "step": 258 }, { "epoch": 0.11740707162284678, "grad_norm": 1.4017061386902538, "learning_rate": 1.8785131459655486e-05, "loss": 0.959, "step": 259 }, { "epoch": 0.11786038077969176, "grad_norm": 0.9841075490698694, "learning_rate": 1.885766092475068e-05, "loss": 0.975, "step": 260 }, { "epoch": 0.11831368993653672, "grad_norm": 1.425459904030462, "learning_rate": 1.8930190389845877e-05, "loss": 0.9987, "step": 261 }, { "epoch": 0.11876699909338169, "grad_norm": 1.0006046414278666, "learning_rate": 1.9002719854941073e-05, "loss": 0.9618, "step": 262 }, { "epoch": 0.11922030825022666, "grad_norm": 1.463377849964184, "learning_rate": 1.9075249320036265e-05, "loss": 0.9887, "step": 263 }, { "epoch": 0.11967361740707162, "grad_norm": 1.2191563633199565, "learning_rate": 1.914777878513146e-05, "loss": 0.9855, "step": 264 }, { "epoch": 0.12012692656391659, "grad_norm": 1.2698795528056874, "learning_rate": 1.9220308250226656e-05, "loss": 0.9757, "step": 265 }, { "epoch": 0.12058023572076156, "grad_norm": 1.4800822421542457, "learning_rate": 1.9292837715321852e-05, "loss": 0.9955, "step": 266 }, { "epoch": 0.12103354487760652, "grad_norm": 1.2941761703168384, "learning_rate": 1.9365367180417044e-05, "loss": 0.9769, "step": 267 }, { "epoch": 0.1214868540344515, "grad_norm": 1.137685934242054, "learning_rate": 1.9437896645512243e-05, "loss": 0.9638, "step": 268 }, { "epoch": 0.12194016319129647, "grad_norm": 0.9786061476311878, "learning_rate": 1.9510426110607436e-05, "loss": 0.9866, "step": 269 }, { "epoch": 0.12239347234814144, "grad_norm": 1.14610021536446, "learning_rate": 1.958295557570263e-05, "loss": 0.9424, "step": 270 }, { "epoch": 0.1228467815049864, "grad_norm": 1.2594427817814677, "learning_rate": 1.9655485040797827e-05, "loss": 0.9687, "step": 271 }, { "epoch": 0.12330009066183137, "grad_norm": 0.8612758658610435, "learning_rate": 1.972801450589302e-05, "loss": 0.9848, "step": 272 }, { "epoch": 0.12375339981867634, "grad_norm": 0.8756591780106714, "learning_rate": 1.9800543970988215e-05, "loss": 0.9605, "step": 273 }, { "epoch": 0.1242067089755213, "grad_norm": 0.7538940670576584, "learning_rate": 1.987307343608341e-05, "loss": 0.9873, "step": 274 }, { "epoch": 0.12466001813236627, "grad_norm": 0.9105521249427928, "learning_rate": 1.9945602901178603e-05, "loss": 0.9761, "step": 275 }, { "epoch": 0.12511332728921123, "grad_norm": 1.2082970794151782, "learning_rate": 2.00181323662738e-05, "loss": 0.9647, "step": 276 }, { "epoch": 0.1255666364460562, "grad_norm": 1.2276525054309486, "learning_rate": 2.0090661831368995e-05, "loss": 0.9652, "step": 277 }, { "epoch": 0.12601994560290117, "grad_norm": 0.9486143367623325, "learning_rate": 2.0163191296464187e-05, "loss": 0.9894, "step": 278 }, { "epoch": 0.12647325475974613, "grad_norm": 1.0961709262304196, "learning_rate": 2.0235720761559383e-05, "loss": 0.9903, "step": 279 }, { "epoch": 0.12692656391659113, "grad_norm": 1.0835334523571587, "learning_rate": 2.030825022665458e-05, "loss": 0.9718, "step": 280 }, { "epoch": 0.1273798730734361, "grad_norm": 1.5108390987852842, "learning_rate": 2.0380779691749777e-05, "loss": 0.9911, "step": 281 }, { "epoch": 0.12783318223028106, "grad_norm": 0.6959667630768904, "learning_rate": 2.0453309156844973e-05, "loss": 0.9889, "step": 282 }, { "epoch": 0.12828649138712603, "grad_norm": 1.1060822358941296, "learning_rate": 2.0525838621940165e-05, "loss": 0.9748, "step": 283 }, { "epoch": 0.128739800543971, "grad_norm": 1.7081303899139821, "learning_rate": 2.059836808703536e-05, "loss": 0.9791, "step": 284 }, { "epoch": 0.12919310970081596, "grad_norm": 0.6993825425452259, "learning_rate": 2.0670897552130557e-05, "loss": 0.9776, "step": 285 }, { "epoch": 0.12964641885766093, "grad_norm": 2.0172542972716916, "learning_rate": 2.074342701722575e-05, "loss": 0.9558, "step": 286 }, { "epoch": 0.1300997280145059, "grad_norm": 1.126456177113453, "learning_rate": 2.0815956482320945e-05, "loss": 0.9888, "step": 287 }, { "epoch": 0.13055303717135086, "grad_norm": 2.127455302311884, "learning_rate": 2.088848594741614e-05, "loss": 0.9933, "step": 288 }, { "epoch": 0.13100634632819583, "grad_norm": 1.693138414413347, "learning_rate": 2.0961015412511333e-05, "loss": 0.983, "step": 289 }, { "epoch": 0.1314596554850408, "grad_norm": 1.6609776648326933, "learning_rate": 2.103354487760653e-05, "loss": 0.9613, "step": 290 }, { "epoch": 0.13191296464188576, "grad_norm": 1.6803908880826948, "learning_rate": 2.1106074342701724e-05, "loss": 0.9858, "step": 291 }, { "epoch": 0.13236627379873073, "grad_norm": 1.3168606047561777, "learning_rate": 2.1178603807796916e-05, "loss": 0.989, "step": 292 }, { "epoch": 0.1328195829555757, "grad_norm": 1.4157167392194014, "learning_rate": 2.1251133272892112e-05, "loss": 0.9664, "step": 293 }, { "epoch": 0.13327289211242066, "grad_norm": 1.1724638158572707, "learning_rate": 2.1323662737987308e-05, "loss": 0.9722, "step": 294 }, { "epoch": 0.13372620126926563, "grad_norm": 1.506810988004093, "learning_rate": 2.1396192203082504e-05, "loss": 0.9827, "step": 295 }, { "epoch": 0.13417951042611062, "grad_norm": 1.1259386386552064, "learning_rate": 2.1468721668177703e-05, "loss": 0.9659, "step": 296 }, { "epoch": 0.1346328195829556, "grad_norm": 1.6238681862761164, "learning_rate": 2.1541251133272895e-05, "loss": 0.983, "step": 297 }, { "epoch": 0.13508612873980055, "grad_norm": 1.5296050415433289, "learning_rate": 2.161378059836809e-05, "loss": 0.9803, "step": 298 }, { "epoch": 0.13553943789664552, "grad_norm": 1.2091591954175966, "learning_rate": 2.1686310063463286e-05, "loss": 0.9696, "step": 299 }, { "epoch": 0.1359927470534905, "grad_norm": 1.4703297260596262, "learning_rate": 2.175883952855848e-05, "loss": 0.9707, "step": 300 }, { "epoch": 0.13644605621033545, "grad_norm": 1.088299707082397, "learning_rate": 2.1831368993653674e-05, "loss": 0.9799, "step": 301 }, { "epoch": 0.13689936536718042, "grad_norm": 1.6130249012013766, "learning_rate": 2.190389845874887e-05, "loss": 0.985, "step": 302 }, { "epoch": 0.1373526745240254, "grad_norm": 1.299180705339519, "learning_rate": 2.1976427923844062e-05, "loss": 0.9602, "step": 303 }, { "epoch": 0.13780598368087035, "grad_norm": 1.3590180237916514, "learning_rate": 2.2048957388939258e-05, "loss": 0.9836, "step": 304 }, { "epoch": 0.13825929283771532, "grad_norm": 1.274439141028146, "learning_rate": 2.2121486854034454e-05, "loss": 0.9868, "step": 305 }, { "epoch": 0.1387126019945603, "grad_norm": 1.214938895567437, "learning_rate": 2.2194016319129646e-05, "loss": 0.9945, "step": 306 }, { "epoch": 0.13916591115140525, "grad_norm": 1.0867048046166174, "learning_rate": 2.2266545784224842e-05, "loss": 0.9891, "step": 307 }, { "epoch": 0.13961922030825022, "grad_norm": 1.152651322304279, "learning_rate": 2.2339075249320037e-05, "loss": 0.9784, "step": 308 }, { "epoch": 0.14007252946509519, "grad_norm": 0.9335177965692247, "learning_rate": 2.2411604714415233e-05, "loss": 0.9822, "step": 309 }, { "epoch": 0.14052583862194015, "grad_norm": 1.2073636881570384, "learning_rate": 2.2484134179510425e-05, "loss": 0.9672, "step": 310 }, { "epoch": 0.14097914777878512, "grad_norm": 1.0938314217297167, "learning_rate": 2.255666364460562e-05, "loss": 0.9764, "step": 311 }, { "epoch": 0.1414324569356301, "grad_norm": 1.0785867621298086, "learning_rate": 2.262919310970082e-05, "loss": 0.9597, "step": 312 }, { "epoch": 0.14188576609247508, "grad_norm": 0.9920753641718812, "learning_rate": 2.2701722574796016e-05, "loss": 0.9663, "step": 313 }, { "epoch": 0.14233907524932005, "grad_norm": 0.7983365951041685, "learning_rate": 2.2774252039891208e-05, "loss": 0.9667, "step": 314 }, { "epoch": 0.142792384406165, "grad_norm": 1.146961954359942, "learning_rate": 2.2846781504986404e-05, "loss": 0.9607, "step": 315 }, { "epoch": 0.14324569356300998, "grad_norm": 0.7097464715104542, "learning_rate": 2.29193109700816e-05, "loss": 0.9664, "step": 316 }, { "epoch": 0.14369900271985495, "grad_norm": 1.0022962311934114, "learning_rate": 2.2991840435176792e-05, "loss": 0.9649, "step": 317 }, { "epoch": 0.1441523118766999, "grad_norm": 0.9211817305255298, "learning_rate": 2.3064369900271988e-05, "loss": 0.9681, "step": 318 }, { "epoch": 0.14460562103354488, "grad_norm": 1.4108296547781745, "learning_rate": 2.3136899365367183e-05, "loss": 0.9641, "step": 319 }, { "epoch": 0.14505893019038985, "grad_norm": 1.1814512581491157, "learning_rate": 2.3209428830462376e-05, "loss": 0.9798, "step": 320 }, { "epoch": 0.1455122393472348, "grad_norm": 1.2438981011225425, "learning_rate": 2.328195829555757e-05, "loss": 0.986, "step": 321 }, { "epoch": 0.14596554850407978, "grad_norm": 0.924826557267048, "learning_rate": 2.3354487760652767e-05, "loss": 0.947, "step": 322 }, { "epoch": 0.14641885766092474, "grad_norm": 1.0860833968842445, "learning_rate": 2.3427017225747963e-05, "loss": 0.9624, "step": 323 }, { "epoch": 0.1468721668177697, "grad_norm": 1.0217514014564444, "learning_rate": 2.3499546690843155e-05, "loss": 0.977, "step": 324 }, { "epoch": 0.14732547597461468, "grad_norm": 1.1674151382952693, "learning_rate": 2.357207615593835e-05, "loss": 0.9866, "step": 325 }, { "epoch": 0.14777878513145964, "grad_norm": 1.222662801119326, "learning_rate": 2.3644605621033546e-05, "loss": 0.9663, "step": 326 }, { "epoch": 0.1482320942883046, "grad_norm": 1.1876386584855878, "learning_rate": 2.371713508612874e-05, "loss": 0.9822, "step": 327 }, { "epoch": 0.1486854034451496, "grad_norm": 1.7198416796464207, "learning_rate": 2.3789664551223938e-05, "loss": 0.9473, "step": 328 }, { "epoch": 0.14913871260199457, "grad_norm": 0.7915454311641326, "learning_rate": 2.3862194016319134e-05, "loss": 0.9725, "step": 329 }, { "epoch": 0.14959202175883954, "grad_norm": 2.3746230624113895, "learning_rate": 2.393472348141433e-05, "loss": 0.9798, "step": 330 }, { "epoch": 0.1500453309156845, "grad_norm": 1.5656498130662204, "learning_rate": 2.400725294650952e-05, "loss": 0.9755, "step": 331 }, { "epoch": 0.15049864007252947, "grad_norm": 2.30620570887288, "learning_rate": 2.4079782411604717e-05, "loss": 0.9673, "step": 332 }, { "epoch": 0.15095194922937444, "grad_norm": 2.2084039737648116, "learning_rate": 2.4152311876699913e-05, "loss": 0.9932, "step": 333 }, { "epoch": 0.1514052583862194, "grad_norm": 1.35532113659019, "learning_rate": 2.4224841341795105e-05, "loss": 0.9762, "step": 334 }, { "epoch": 0.15185856754306437, "grad_norm": 1.8516249415137251, "learning_rate": 2.42973708068903e-05, "loss": 0.9902, "step": 335 }, { "epoch": 0.15231187669990934, "grad_norm": 0.9894275372304367, "learning_rate": 2.4369900271985497e-05, "loss": 0.9772, "step": 336 }, { "epoch": 0.1527651858567543, "grad_norm": 1.9629261005028331, "learning_rate": 2.4442429737080692e-05, "loss": 0.9646, "step": 337 }, { "epoch": 0.15321849501359927, "grad_norm": 1.630421617469693, "learning_rate": 2.4514959202175885e-05, "loss": 0.9771, "step": 338 }, { "epoch": 0.15367180417044424, "grad_norm": 1.8564933288855712, "learning_rate": 2.458748866727108e-05, "loss": 0.9571, "step": 339 }, { "epoch": 0.1541251133272892, "grad_norm": 1.5492286378045599, "learning_rate": 2.4660018132366276e-05, "loss": 0.9696, "step": 340 }, { "epoch": 0.15457842248413417, "grad_norm": 1.486217931479215, "learning_rate": 2.473254759746147e-05, "loss": 0.9867, "step": 341 }, { "epoch": 0.15503173164097914, "grad_norm": 1.388439102854886, "learning_rate": 2.4805077062556664e-05, "loss": 0.9458, "step": 342 }, { "epoch": 0.1554850407978241, "grad_norm": 1.2965776663451756, "learning_rate": 2.487760652765186e-05, "loss": 0.9591, "step": 343 }, { "epoch": 0.1559383499546691, "grad_norm": 1.2478183763765311, "learning_rate": 2.495013599274706e-05, "loss": 0.9543, "step": 344 }, { "epoch": 0.15639165911151406, "grad_norm": 0.85743320943999, "learning_rate": 2.502266545784225e-05, "loss": 0.982, "step": 345 }, { "epoch": 0.15684496826835903, "grad_norm": 1.6825091253771363, "learning_rate": 2.5095194922937447e-05, "loss": 0.9658, "step": 346 }, { "epoch": 0.157298277425204, "grad_norm": 1.1383786372012867, "learning_rate": 2.5167724388032643e-05, "loss": 0.9673, "step": 347 }, { "epoch": 0.15775158658204896, "grad_norm": 1.2181311015413976, "learning_rate": 2.5240253853127835e-05, "loss": 0.975, "step": 348 }, { "epoch": 0.15820489573889393, "grad_norm": 1.0559719089348336, "learning_rate": 2.531278331822303e-05, "loss": 0.9801, "step": 349 }, { "epoch": 0.1586582048957389, "grad_norm": 1.090044494260907, "learning_rate": 2.5385312783318226e-05, "loss": 0.9279, "step": 350 }, { "epoch": 0.15911151405258386, "grad_norm": 1.6691200224256781, "learning_rate": 2.5457842248413422e-05, "loss": 0.9802, "step": 351 }, { "epoch": 0.15956482320942883, "grad_norm": 0.9696518951664035, "learning_rate": 2.5530371713508614e-05, "loss": 0.9394, "step": 352 }, { "epoch": 0.1600181323662738, "grad_norm": 1.410040240273291, "learning_rate": 2.560290117860381e-05, "loss": 0.9638, "step": 353 }, { "epoch": 0.16047144152311876, "grad_norm": 1.1460934676665542, "learning_rate": 2.5675430643699006e-05, "loss": 0.9613, "step": 354 }, { "epoch": 0.16092475067996373, "grad_norm": 1.4017681591240083, "learning_rate": 2.5747960108794198e-05, "loss": 0.9737, "step": 355 }, { "epoch": 0.1613780598368087, "grad_norm": 1.0099084176329762, "learning_rate": 2.5820489573889394e-05, "loss": 0.9812, "step": 356 }, { "epoch": 0.16183136899365366, "grad_norm": 1.1334909486260827, "learning_rate": 2.589301903898459e-05, "loss": 0.9628, "step": 357 }, { "epoch": 0.16228467815049863, "grad_norm": 1.0710329320227614, "learning_rate": 2.596554850407978e-05, "loss": 0.9621, "step": 358 }, { "epoch": 0.1627379873073436, "grad_norm": 0.875927793017003, "learning_rate": 2.6038077969174977e-05, "loss": 0.9619, "step": 359 }, { "epoch": 0.1631912964641886, "grad_norm": 1.0576806114027948, "learning_rate": 2.6110607434270176e-05, "loss": 0.978, "step": 360 }, { "epoch": 0.16364460562103356, "grad_norm": 0.9739823039410348, "learning_rate": 2.6183136899365372e-05, "loss": 0.9654, "step": 361 }, { "epoch": 0.16409791477787852, "grad_norm": 1.0171786127394473, "learning_rate": 2.6255666364460564e-05, "loss": 0.9782, "step": 362 }, { "epoch": 0.1645512239347235, "grad_norm": 1.2685989662829482, "learning_rate": 2.632819582955576e-05, "loss": 0.9664, "step": 363 }, { "epoch": 0.16500453309156846, "grad_norm": 1.3795232726736537, "learning_rate": 2.6400725294650956e-05, "loss": 0.9466, "step": 364 }, { "epoch": 0.16545784224841342, "grad_norm": 1.143035224843098, "learning_rate": 2.6473254759746148e-05, "loss": 0.9764, "step": 365 }, { "epoch": 0.1659111514052584, "grad_norm": 1.035017254464268, "learning_rate": 2.6545784224841344e-05, "loss": 0.9614, "step": 366 }, { "epoch": 0.16636446056210336, "grad_norm": 1.2512499706125675, "learning_rate": 2.661831368993654e-05, "loss": 0.9669, "step": 367 }, { "epoch": 0.16681776971894832, "grad_norm": 1.434821260736265, "learning_rate": 2.6690843155031735e-05, "loss": 0.9628, "step": 368 }, { "epoch": 0.1672710788757933, "grad_norm": 0.9861234853952395, "learning_rate": 2.6763372620126928e-05, "loss": 0.9353, "step": 369 }, { "epoch": 0.16772438803263826, "grad_norm": 1.657770515919325, "learning_rate": 2.6835902085222123e-05, "loss": 0.9765, "step": 370 }, { "epoch": 0.16817769718948322, "grad_norm": 0.784806763311947, "learning_rate": 2.690843155031732e-05, "loss": 0.9748, "step": 371 }, { "epoch": 0.1686310063463282, "grad_norm": 1.8103571432207946, "learning_rate": 2.698096101541251e-05, "loss": 0.9728, "step": 372 }, { "epoch": 0.16908431550317315, "grad_norm": 1.2109196259265576, "learning_rate": 2.7053490480507707e-05, "loss": 0.9528, "step": 373 }, { "epoch": 0.16953762466001812, "grad_norm": 1.755987961675713, "learning_rate": 2.7126019945602903e-05, "loss": 0.955, "step": 374 }, { "epoch": 0.1699909338168631, "grad_norm": 1.47446086150359, "learning_rate": 2.7198549410698095e-05, "loss": 0.9719, "step": 375 }, { "epoch": 0.17044424297370808, "grad_norm": 1.4309535484584595, "learning_rate": 2.7271078875793294e-05, "loss": 0.9636, "step": 376 }, { "epoch": 0.17089755213055305, "grad_norm": 1.3375420224856274, "learning_rate": 2.734360834088849e-05, "loss": 0.9496, "step": 377 }, { "epoch": 0.17135086128739802, "grad_norm": 1.4029269470347197, "learning_rate": 2.7416137805983685e-05, "loss": 0.9686, "step": 378 }, { "epoch": 0.17180417044424298, "grad_norm": 1.1303986834868789, "learning_rate": 2.7488667271078878e-05, "loss": 0.9509, "step": 379 }, { "epoch": 0.17225747960108795, "grad_norm": 1.2601602176914741, "learning_rate": 2.7561196736174073e-05, "loss": 0.94, "step": 380 }, { "epoch": 0.17271078875793291, "grad_norm": 1.0864166540196005, "learning_rate": 2.763372620126927e-05, "loss": 0.9515, "step": 381 }, { "epoch": 0.17316409791477788, "grad_norm": 1.4109073582275067, "learning_rate": 2.7706255666364465e-05, "loss": 0.9492, "step": 382 }, { "epoch": 0.17361740707162285, "grad_norm": 1.019247148616662, "learning_rate": 2.7778785131459657e-05, "loss": 0.9542, "step": 383 }, { "epoch": 0.17407071622846781, "grad_norm": 1.4792739894601246, "learning_rate": 2.7851314596554853e-05, "loss": 0.9812, "step": 384 }, { "epoch": 0.17452402538531278, "grad_norm": 1.1364851097800064, "learning_rate": 2.792384406165005e-05, "loss": 0.9637, "step": 385 }, { "epoch": 0.17497733454215775, "grad_norm": 1.1947650453129484, "learning_rate": 2.799637352674524e-05, "loss": 0.9531, "step": 386 }, { "epoch": 0.17543064369900271, "grad_norm": 1.192453878730054, "learning_rate": 2.8068902991840437e-05, "loss": 0.975, "step": 387 }, { "epoch": 0.17588395285584768, "grad_norm": 1.3248162957588472, "learning_rate": 2.8141432456935632e-05, "loss": 0.9646, "step": 388 }, { "epoch": 0.17633726201269265, "grad_norm": 1.2805823370885974, "learning_rate": 2.8213961922030825e-05, "loss": 0.9403, "step": 389 }, { "epoch": 0.1767905711695376, "grad_norm": 1.266465542509188, "learning_rate": 2.828649138712602e-05, "loss": 0.9736, "step": 390 }, { "epoch": 0.17724388032638258, "grad_norm": 2.0371687395076705, "learning_rate": 2.8359020852221216e-05, "loss": 0.958, "step": 391 }, { "epoch": 0.17769718948322757, "grad_norm": 0.9297561826921558, "learning_rate": 2.8431550317316415e-05, "loss": 0.9381, "step": 392 }, { "epoch": 0.17815049864007254, "grad_norm": 2.7848167599641656, "learning_rate": 2.8504079782411607e-05, "loss": 0.9506, "step": 393 }, { "epoch": 0.1786038077969175, "grad_norm": 2.1763201975486255, "learning_rate": 2.8576609247506803e-05, "loss": 0.9662, "step": 394 }, { "epoch": 0.17905711695376247, "grad_norm": 2.4169897403665606, "learning_rate": 2.8649138712602e-05, "loss": 0.95, "step": 395 }, { "epoch": 0.17951042611060744, "grad_norm": 1.674867088697692, "learning_rate": 2.8721668177697194e-05, "loss": 0.9526, "step": 396 }, { "epoch": 0.1799637352674524, "grad_norm": 2.651554107237099, "learning_rate": 2.8794197642792387e-05, "loss": 0.9594, "step": 397 }, { "epoch": 0.18041704442429737, "grad_norm": 1.6943939649347142, "learning_rate": 2.8866727107887582e-05, "loss": 0.9418, "step": 398 }, { "epoch": 0.18087035358114234, "grad_norm": 3.24733464439908, "learning_rate": 2.8939256572982778e-05, "loss": 0.9807, "step": 399 }, { "epoch": 0.1813236627379873, "grad_norm": 2.848802228919831, "learning_rate": 2.901178603807797e-05, "loss": 0.96, "step": 400 }, { "epoch": 0.18177697189483227, "grad_norm": 2.3903543908947307, "learning_rate": 2.9084315503173166e-05, "loss": 0.9622, "step": 401 }, { "epoch": 0.18223028105167724, "grad_norm": 2.570811576203462, "learning_rate": 2.9156844968268362e-05, "loss": 0.9618, "step": 402 }, { "epoch": 0.1826835902085222, "grad_norm": 1.7840259015514057, "learning_rate": 2.9229374433363554e-05, "loss": 0.9641, "step": 403 }, { "epoch": 0.18313689936536717, "grad_norm": 1.9099188763348687, "learning_rate": 2.930190389845875e-05, "loss": 0.9565, "step": 404 }, { "epoch": 0.18359020852221214, "grad_norm": 1.9098174286301464, "learning_rate": 2.9374433363553945e-05, "loss": 0.9709, "step": 405 }, { "epoch": 0.1840435176790571, "grad_norm": 1.3590025674105495, "learning_rate": 2.9446962828649138e-05, "loss": 0.9554, "step": 406 }, { "epoch": 0.18449682683590207, "grad_norm": 1.7368624137802966, "learning_rate": 2.9519492293744334e-05, "loss": 0.9706, "step": 407 }, { "epoch": 0.18495013599274707, "grad_norm": 1.3745574589171, "learning_rate": 2.9592021758839533e-05, "loss": 0.9597, "step": 408 }, { "epoch": 0.18540344514959203, "grad_norm": 1.4778064141083451, "learning_rate": 2.9664551223934728e-05, "loss": 0.9602, "step": 409 }, { "epoch": 0.185856754306437, "grad_norm": 1.0653730846884035, "learning_rate": 2.9737080689029924e-05, "loss": 0.9636, "step": 410 }, { "epoch": 0.18631006346328197, "grad_norm": 2.1234201480340276, "learning_rate": 2.9809610154125116e-05, "loss": 0.9707, "step": 411 }, { "epoch": 0.18676337262012693, "grad_norm": 1.0590017972746357, "learning_rate": 2.9882139619220312e-05, "loss": 0.9585, "step": 412 }, { "epoch": 0.1872166817769719, "grad_norm": 2.7210232300497235, "learning_rate": 2.9954669084315508e-05, "loss": 0.9452, "step": 413 }, { "epoch": 0.18766999093381687, "grad_norm": 2.1877292119928664, "learning_rate": 3.00271985494107e-05, "loss": 0.9723, "step": 414 }, { "epoch": 0.18812330009066183, "grad_norm": 2.0841889264754125, "learning_rate": 3.0099728014505896e-05, "loss": 0.989, "step": 415 }, { "epoch": 0.1885766092475068, "grad_norm": 1.8469443919409643, "learning_rate": 3.017225747960109e-05, "loss": 0.9734, "step": 416 }, { "epoch": 0.18902991840435177, "grad_norm": 1.6506258498520425, "learning_rate": 3.0244786944696284e-05, "loss": 0.9404, "step": 417 }, { "epoch": 0.18948322756119673, "grad_norm": 1.923147191356826, "learning_rate": 3.031731640979148e-05, "loss": 0.9625, "step": 418 }, { "epoch": 0.1899365367180417, "grad_norm": 1.2048827962157127, "learning_rate": 3.0389845874886675e-05, "loss": 0.9528, "step": 419 }, { "epoch": 0.19038984587488667, "grad_norm": 2.4279715097804995, "learning_rate": 3.0462375339981867e-05, "loss": 0.974, "step": 420 }, { "epoch": 0.19084315503173163, "grad_norm": 1.6050707457366387, "learning_rate": 3.053490480507706e-05, "loss": 0.9527, "step": 421 }, { "epoch": 0.1912964641885766, "grad_norm": 2.541110518933097, "learning_rate": 3.060743427017226e-05, "loss": 0.962, "step": 422 }, { "epoch": 0.19174977334542156, "grad_norm": 1.836933897500519, "learning_rate": 3.0679963735267454e-05, "loss": 0.9794, "step": 423 }, { "epoch": 0.19220308250226656, "grad_norm": 2.4305183812740028, "learning_rate": 3.075249320036265e-05, "loss": 0.984, "step": 424 }, { "epoch": 0.19265639165911153, "grad_norm": 1.5581896763457288, "learning_rate": 3.0825022665457846e-05, "loss": 0.9655, "step": 425 }, { "epoch": 0.1931097008159565, "grad_norm": 2.58055892095061, "learning_rate": 3.089755213055304e-05, "loss": 0.9613, "step": 426 }, { "epoch": 0.19356300997280146, "grad_norm": 1.9075019676816203, "learning_rate": 3.097008159564824e-05, "loss": 0.968, "step": 427 }, { "epoch": 0.19401631912964643, "grad_norm": 2.752592179918321, "learning_rate": 3.104261106074343e-05, "loss": 0.9619, "step": 428 }, { "epoch": 0.1944696282864914, "grad_norm": 2.1888998537634015, "learning_rate": 3.111514052583862e-05, "loss": 0.9445, "step": 429 }, { "epoch": 0.19492293744333636, "grad_norm": 2.4776533717422744, "learning_rate": 3.118766999093382e-05, "loss": 0.9449, "step": 430 }, { "epoch": 0.19537624660018132, "grad_norm": 2.3166200470254923, "learning_rate": 3.126019945602901e-05, "loss": 0.9533, "step": 431 }, { "epoch": 0.1958295557570263, "grad_norm": 2.0543035818440196, "learning_rate": 3.133272892112421e-05, "loss": 0.9733, "step": 432 }, { "epoch": 0.19628286491387126, "grad_norm": 1.725738023355161, "learning_rate": 3.1405258386219405e-05, "loss": 0.9399, "step": 433 }, { "epoch": 0.19673617407071622, "grad_norm": 2.190044694058987, "learning_rate": 3.14777878513146e-05, "loss": 0.9537, "step": 434 }, { "epoch": 0.1971894832275612, "grad_norm": 1.681817873763034, "learning_rate": 3.1550317316409796e-05, "loss": 0.9815, "step": 435 }, { "epoch": 0.19764279238440616, "grad_norm": 2.4082255773002337, "learning_rate": 3.1622846781504985e-05, "loss": 0.9555, "step": 436 }, { "epoch": 0.19809610154125112, "grad_norm": 1.6480505623629258, "learning_rate": 3.169537624660018e-05, "loss": 0.9342, "step": 437 }, { "epoch": 0.1985494106980961, "grad_norm": 2.5776498102159335, "learning_rate": 3.1767905711695376e-05, "loss": 0.957, "step": 438 }, { "epoch": 0.19900271985494106, "grad_norm": 1.9078277283950271, "learning_rate": 3.184043517679057e-05, "loss": 0.9373, "step": 439 }, { "epoch": 0.19945602901178605, "grad_norm": 2.568161324239812, "learning_rate": 3.191296464188577e-05, "loss": 0.9794, "step": 440 }, { "epoch": 0.19990933816863102, "grad_norm": 2.2894333232044555, "learning_rate": 3.1985494106980963e-05, "loss": 0.9586, "step": 441 }, { "epoch": 0.20036264732547598, "grad_norm": 2.123231739178061, "learning_rate": 3.205802357207616e-05, "loss": 0.9494, "step": 442 }, { "epoch": 0.20081595648232095, "grad_norm": 2.0163618298621904, "learning_rate": 3.2130553037171355e-05, "loss": 0.946, "step": 443 }, { "epoch": 0.20126926563916592, "grad_norm": 2.1520317891360015, "learning_rate": 3.220308250226655e-05, "loss": 0.9458, "step": 444 }, { "epoch": 0.20172257479601088, "grad_norm": 1.8116655801577795, "learning_rate": 3.2275611967361746e-05, "loss": 0.9492, "step": 445 }, { "epoch": 0.20217588395285585, "grad_norm": 2.009897325477093, "learning_rate": 3.234814143245694e-05, "loss": 0.9541, "step": 446 }, { "epoch": 0.20262919310970082, "grad_norm": 1.5442159378463132, "learning_rate": 3.242067089755213e-05, "loss": 0.9457, "step": 447 }, { "epoch": 0.20308250226654578, "grad_norm": 2.173506011999066, "learning_rate": 3.2493200362647327e-05, "loss": 0.9721, "step": 448 }, { "epoch": 0.20353581142339075, "grad_norm": 1.643863843059418, "learning_rate": 3.256572982774252e-05, "loss": 0.9632, "step": 449 }, { "epoch": 0.20398912058023572, "grad_norm": 2.209913866183974, "learning_rate": 3.263825929283772e-05, "loss": 0.9707, "step": 450 }, { "epoch": 0.20444242973708068, "grad_norm": 1.661173082303144, "learning_rate": 3.2710788757932914e-05, "loss": 0.9391, "step": 451 }, { "epoch": 0.20489573889392565, "grad_norm": 2.253982197167722, "learning_rate": 3.278331822302811e-05, "loss": 0.9684, "step": 452 }, { "epoch": 0.20534904805077062, "grad_norm": 2.0170203884876687, "learning_rate": 3.28558476881233e-05, "loss": 0.9648, "step": 453 }, { "epoch": 0.20580235720761558, "grad_norm": 1.7536772328596786, "learning_rate": 3.2928377153218494e-05, "loss": 0.9788, "step": 454 }, { "epoch": 0.20625566636446055, "grad_norm": 1.4875251459649068, "learning_rate": 3.300090661831369e-05, "loss": 0.9608, "step": 455 }, { "epoch": 0.20670897552130554, "grad_norm": 2.0338352212820685, "learning_rate": 3.307343608340889e-05, "loss": 0.9463, "step": 456 }, { "epoch": 0.2071622846781505, "grad_norm": 1.626294211075029, "learning_rate": 3.314596554850408e-05, "loss": 0.9316, "step": 457 }, { "epoch": 0.20761559383499548, "grad_norm": 1.91432661999181, "learning_rate": 3.321849501359928e-05, "loss": 0.9559, "step": 458 }, { "epoch": 0.20806890299184044, "grad_norm": 1.73936737312804, "learning_rate": 3.329102447869447e-05, "loss": 0.9504, "step": 459 }, { "epoch": 0.2085222121486854, "grad_norm": 1.8456621164350187, "learning_rate": 3.336355394378967e-05, "loss": 0.9613, "step": 460 }, { "epoch": 0.20897552130553038, "grad_norm": 1.5793257081467367, "learning_rate": 3.3436083408884864e-05, "loss": 0.9824, "step": 461 }, { "epoch": 0.20942883046237534, "grad_norm": 1.716702803775347, "learning_rate": 3.350861287398006e-05, "loss": 0.955, "step": 462 }, { "epoch": 0.2098821396192203, "grad_norm": 1.5963732824544927, "learning_rate": 3.3581142339075255e-05, "loss": 0.9643, "step": 463 }, { "epoch": 0.21033544877606528, "grad_norm": 1.576318357655231, "learning_rate": 3.3653671804170444e-05, "loss": 0.939, "step": 464 }, { "epoch": 0.21078875793291024, "grad_norm": 1.3341066719418122, "learning_rate": 3.372620126926564e-05, "loss": 0.966, "step": 465 }, { "epoch": 0.2112420670897552, "grad_norm": 1.577007332868618, "learning_rate": 3.3798730734360836e-05, "loss": 0.9605, "step": 466 }, { "epoch": 0.21169537624660018, "grad_norm": 1.2438279883518881, "learning_rate": 3.387126019945603e-05, "loss": 0.95, "step": 467 }, { "epoch": 0.21214868540344514, "grad_norm": 1.7311023717383647, "learning_rate": 3.394378966455123e-05, "loss": 0.9691, "step": 468 }, { "epoch": 0.2126019945602901, "grad_norm": 1.3532327479345592, "learning_rate": 3.401631912964642e-05, "loss": 0.9519, "step": 469 }, { "epoch": 0.21305530371713507, "grad_norm": 1.4283240442390355, "learning_rate": 3.408884859474161e-05, "loss": 0.9568, "step": 470 }, { "epoch": 0.21350861287398004, "grad_norm": 1.1996727896280002, "learning_rate": 3.416137805983681e-05, "loss": 0.9535, "step": 471 }, { "epoch": 0.21396192203082504, "grad_norm": 1.454659709827934, "learning_rate": 3.423390752493201e-05, "loss": 0.96, "step": 472 }, { "epoch": 0.21441523118767, "grad_norm": 1.4237580965066585, "learning_rate": 3.4306436990027205e-05, "loss": 0.9482, "step": 473 }, { "epoch": 0.21486854034451497, "grad_norm": 1.2459222998644557, "learning_rate": 3.43789664551224e-05, "loss": 0.9298, "step": 474 }, { "epoch": 0.21532184950135994, "grad_norm": 1.2253481611987795, "learning_rate": 3.445149592021759e-05, "loss": 0.9601, "step": 475 }, { "epoch": 0.2157751586582049, "grad_norm": 1.459057401423948, "learning_rate": 3.4524025385312786e-05, "loss": 0.9606, "step": 476 }, { "epoch": 0.21622846781504987, "grad_norm": 1.1233323514766091, "learning_rate": 3.459655485040798e-05, "loss": 0.9721, "step": 477 }, { "epoch": 0.21668177697189483, "grad_norm": 1.3043576100009113, "learning_rate": 3.466908431550318e-05, "loss": 0.9525, "step": 478 }, { "epoch": 0.2171350861287398, "grad_norm": 1.3832254596026332, "learning_rate": 3.474161378059837e-05, "loss": 0.9594, "step": 479 }, { "epoch": 0.21758839528558477, "grad_norm": 1.2584601603856145, "learning_rate": 3.481414324569357e-05, "loss": 0.9455, "step": 480 }, { "epoch": 0.21804170444242973, "grad_norm": 1.3025648768868698, "learning_rate": 3.488667271078876e-05, "loss": 0.9502, "step": 481 }, { "epoch": 0.2184950135992747, "grad_norm": 1.275222027612667, "learning_rate": 3.495920217588395e-05, "loss": 0.9436, "step": 482 }, { "epoch": 0.21894832275611967, "grad_norm": 1.7548243760034752, "learning_rate": 3.503173164097915e-05, "loss": 0.9569, "step": 483 }, { "epoch": 0.21940163191296463, "grad_norm": 0.8399736419025778, "learning_rate": 3.5104261106074345e-05, "loss": 0.9551, "step": 484 }, { "epoch": 0.2198549410698096, "grad_norm": 1.129363645823298, "learning_rate": 3.517679057116954e-05, "loss": 0.9859, "step": 485 }, { "epoch": 0.22030825022665457, "grad_norm": 1.1626536011306632, "learning_rate": 3.5249320036264736e-05, "loss": 0.9453, "step": 486 }, { "epoch": 0.22076155938349953, "grad_norm": 1.864673231136819, "learning_rate": 3.532184950135993e-05, "loss": 0.9649, "step": 487 }, { "epoch": 0.22121486854034453, "grad_norm": 1.1042580111971072, "learning_rate": 3.539437896645513e-05, "loss": 0.9576, "step": 488 }, { "epoch": 0.2216681776971895, "grad_norm": 1.1836427905143154, "learning_rate": 3.546690843155032e-05, "loss": 0.9517, "step": 489 }, { "epoch": 0.22212148685403446, "grad_norm": 2.342247134267181, "learning_rate": 3.553943789664552e-05, "loss": 0.9655, "step": 490 }, { "epoch": 0.22257479601087943, "grad_norm": 1.2170209970010542, "learning_rate": 3.5611967361740714e-05, "loss": 0.9519, "step": 491 }, { "epoch": 0.2230281051677244, "grad_norm": 3.6069422122775756, "learning_rate": 3.56844968268359e-05, "loss": 0.966, "step": 492 }, { "epoch": 0.22348141432456936, "grad_norm": 3.4020997299570754, "learning_rate": 3.57570262919311e-05, "loss": 0.9622, "step": 493 }, { "epoch": 0.22393472348141433, "grad_norm": 2.0703202379124086, "learning_rate": 3.5829555757026295e-05, "loss": 0.9673, "step": 494 }, { "epoch": 0.2243880326382593, "grad_norm": 1.8798428035499974, "learning_rate": 3.590208522212149e-05, "loss": 0.9524, "step": 495 }, { "epoch": 0.22484134179510426, "grad_norm": 2.0689412249061245, "learning_rate": 3.5974614687216686e-05, "loss": 0.9547, "step": 496 }, { "epoch": 0.22529465095194923, "grad_norm": 1.8412218637991171, "learning_rate": 3.604714415231188e-05, "loss": 0.9648, "step": 497 }, { "epoch": 0.2257479601087942, "grad_norm": 1.7173059139337934, "learning_rate": 3.611967361740707e-05, "loss": 0.9845, "step": 498 }, { "epoch": 0.22620126926563916, "grad_norm": 1.622864022129563, "learning_rate": 3.6192203082502266e-05, "loss": 0.9494, "step": 499 }, { "epoch": 0.22665457842248413, "grad_norm": 1.2183858503471552, "learning_rate": 3.626473254759746e-05, "loss": 0.949, "step": 500 }, { "epoch": 0.2271078875793291, "grad_norm": 2.020147289846656, "learning_rate": 3.633726201269266e-05, "loss": 0.9955, "step": 501 }, { "epoch": 0.22756119673617406, "grad_norm": 1.4447638200900164, "learning_rate": 3.6409791477787854e-05, "loss": 0.9498, "step": 502 }, { "epoch": 0.22801450589301903, "grad_norm": 1.7123218331671592, "learning_rate": 3.648232094288305e-05, "loss": 0.9547, "step": 503 }, { "epoch": 0.22846781504986402, "grad_norm": 1.1189017955401865, "learning_rate": 3.6554850407978245e-05, "loss": 0.958, "step": 504 }, { "epoch": 0.228921124206709, "grad_norm": 1.5710798014022602, "learning_rate": 3.662737987307344e-05, "loss": 0.9481, "step": 505 }, { "epoch": 0.22937443336355395, "grad_norm": 1.0928368581759351, "learning_rate": 3.6699909338168636e-05, "loss": 0.9696, "step": 506 }, { "epoch": 0.22982774252039892, "grad_norm": 1.4562855441557099, "learning_rate": 3.677243880326383e-05, "loss": 0.9558, "step": 507 }, { "epoch": 0.2302810516772439, "grad_norm": 1.27946568955478, "learning_rate": 3.684496826835903e-05, "loss": 0.9729, "step": 508 }, { "epoch": 0.23073436083408885, "grad_norm": 1.3648165411198019, "learning_rate": 3.691749773345422e-05, "loss": 0.9641, "step": 509 }, { "epoch": 0.23118766999093382, "grad_norm": 1.2389923769002713, "learning_rate": 3.699002719854941e-05, "loss": 0.9485, "step": 510 }, { "epoch": 0.23164097914777879, "grad_norm": 1.2884338009450194, "learning_rate": 3.706255666364461e-05, "loss": 0.9554, "step": 511 }, { "epoch": 0.23209428830462375, "grad_norm": 1.2635214843844798, "learning_rate": 3.7135086128739804e-05, "loss": 0.96, "step": 512 }, { "epoch": 0.23254759746146872, "grad_norm": 1.3449930784708084, "learning_rate": 3.7207615593835e-05, "loss": 0.9783, "step": 513 }, { "epoch": 0.23300090661831369, "grad_norm": 1.2017070929523874, "learning_rate": 3.7280145058930195e-05, "loss": 0.9523, "step": 514 }, { "epoch": 0.23345421577515865, "grad_norm": 1.182331614691288, "learning_rate": 3.7352674524025384e-05, "loss": 0.9534, "step": 515 }, { "epoch": 0.23390752493200362, "grad_norm": 2.144501459609031, "learning_rate": 3.742520398912058e-05, "loss": 0.9432, "step": 516 }, { "epoch": 0.23436083408884859, "grad_norm": 1.2747142778267662, "learning_rate": 3.7497733454215775e-05, "loss": 0.9566, "step": 517 }, { "epoch": 0.23481414324569355, "grad_norm": 2.4620074969854047, "learning_rate": 3.757026291931097e-05, "loss": 0.9471, "step": 518 }, { "epoch": 0.23526745240253852, "grad_norm": 1.8801220243051684, "learning_rate": 3.764279238440617e-05, "loss": 0.9402, "step": 519 }, { "epoch": 0.2357207615593835, "grad_norm": 2.119286911603709, "learning_rate": 3.771532184950136e-05, "loss": 0.9531, "step": 520 }, { "epoch": 0.23617407071622848, "grad_norm": 1.1300278682233014, "learning_rate": 3.778785131459656e-05, "loss": 0.9375, "step": 521 }, { "epoch": 0.23662737987307345, "grad_norm": 1.6033175128383725, "learning_rate": 3.7860380779691754e-05, "loss": 0.9422, "step": 522 }, { "epoch": 0.2370806890299184, "grad_norm": 1.4232237030481933, "learning_rate": 3.793291024478695e-05, "loss": 0.957, "step": 523 }, { "epoch": 0.23753399818676338, "grad_norm": 1.3808325787943572, "learning_rate": 3.8005439709882145e-05, "loss": 0.9476, "step": 524 }, { "epoch": 0.23798730734360835, "grad_norm": 1.6691142411930429, "learning_rate": 3.807796917497734e-05, "loss": 0.9373, "step": 525 }, { "epoch": 0.2384406165004533, "grad_norm": 1.133448296100496, "learning_rate": 3.815049864007253e-05, "loss": 0.9161, "step": 526 }, { "epoch": 0.23889392565729828, "grad_norm": 1.926215865516284, "learning_rate": 3.8223028105167726e-05, "loss": 0.9573, "step": 527 }, { "epoch": 0.23934723481414324, "grad_norm": 1.340633701403204, "learning_rate": 3.829555757026292e-05, "loss": 0.9557, "step": 528 }, { "epoch": 0.2398005439709882, "grad_norm": 2.1109409055127695, "learning_rate": 3.836808703535812e-05, "loss": 0.9671, "step": 529 }, { "epoch": 0.24025385312783318, "grad_norm": 1.5979412222712766, "learning_rate": 3.844061650045331e-05, "loss": 0.9672, "step": 530 }, { "epoch": 0.24070716228467814, "grad_norm": 1.8683967877175744, "learning_rate": 3.851314596554851e-05, "loss": 0.939, "step": 531 }, { "epoch": 0.2411604714415231, "grad_norm": 1.9360932158405308, "learning_rate": 3.8585675430643704e-05, "loss": 0.9697, "step": 532 }, { "epoch": 0.24161378059836808, "grad_norm": 1.5259129054713283, "learning_rate": 3.865820489573889e-05, "loss": 0.9466, "step": 533 }, { "epoch": 0.24206708975521304, "grad_norm": 1.6509895065249705, "learning_rate": 3.873073436083409e-05, "loss": 0.9299, "step": 534 }, { "epoch": 0.242520398912058, "grad_norm": 1.269177380338361, "learning_rate": 3.8803263825929284e-05, "loss": 0.9565, "step": 535 }, { "epoch": 0.242973708068903, "grad_norm": 1.0338895898209688, "learning_rate": 3.887579329102449e-05, "loss": 0.959, "step": 536 }, { "epoch": 0.24342701722574797, "grad_norm": 1.4818339572638375, "learning_rate": 3.8948322756119676e-05, "loss": 0.9588, "step": 537 }, { "epoch": 0.24388032638259294, "grad_norm": 1.2722675390262228, "learning_rate": 3.902085222121487e-05, "loss": 0.9638, "step": 538 }, { "epoch": 0.2443336355394379, "grad_norm": 1.3612266051887767, "learning_rate": 3.909338168631007e-05, "loss": 0.9274, "step": 539 }, { "epoch": 0.24478694469628287, "grad_norm": 1.480495981829154, "learning_rate": 3.916591115140526e-05, "loss": 0.9382, "step": 540 }, { "epoch": 0.24524025385312784, "grad_norm": 1.2829854988717788, "learning_rate": 3.923844061650046e-05, "loss": 0.959, "step": 541 }, { "epoch": 0.2456935630099728, "grad_norm": 1.3780139592094205, "learning_rate": 3.9310970081595654e-05, "loss": 0.9221, "step": 542 }, { "epoch": 0.24614687216681777, "grad_norm": 1.239582924070583, "learning_rate": 3.938349954669084e-05, "loss": 0.9503, "step": 543 }, { "epoch": 0.24660018132366274, "grad_norm": 1.2936459163175882, "learning_rate": 3.945602901178604e-05, "loss": 0.9498, "step": 544 }, { "epoch": 0.2470534904805077, "grad_norm": 1.3775933047312408, "learning_rate": 3.9528558476881235e-05, "loss": 0.9483, "step": 545 }, { "epoch": 0.24750679963735267, "grad_norm": 1.5598119027454163, "learning_rate": 3.960108794197643e-05, "loss": 0.9447, "step": 546 }, { "epoch": 0.24796010879419764, "grad_norm": 1.1998876824176332, "learning_rate": 3.9673617407071626e-05, "loss": 0.969, "step": 547 }, { "epoch": 0.2484134179510426, "grad_norm": 1.0134801174318884, "learning_rate": 3.974614687216682e-05, "loss": 0.9386, "step": 548 }, { "epoch": 0.24886672710788757, "grad_norm": 1.4381390276751846, "learning_rate": 3.981867633726202e-05, "loss": 0.9718, "step": 549 }, { "epoch": 0.24932003626473254, "grad_norm": 1.4605196802219678, "learning_rate": 3.9891205802357206e-05, "loss": 0.9513, "step": 550 }, { "epoch": 0.2497733454215775, "grad_norm": 1.4001566651018313, "learning_rate": 3.99637352674524e-05, "loss": 0.9295, "step": 551 }, { "epoch": 0.25022665457842247, "grad_norm": 1.291957654122112, "learning_rate": 4.00362647325476e-05, "loss": 0.9462, "step": 552 }, { "epoch": 0.25067996373526746, "grad_norm": 1.1999730788625291, "learning_rate": 4.01087941976428e-05, "loss": 0.9489, "step": 553 }, { "epoch": 0.2511332728921124, "grad_norm": 1.5790423358996064, "learning_rate": 4.018132366273799e-05, "loss": 0.9635, "step": 554 }, { "epoch": 0.2515865820489574, "grad_norm": 1.2381244912841012, "learning_rate": 4.0253853127833185e-05, "loss": 0.9403, "step": 555 }, { "epoch": 0.25203989120580234, "grad_norm": 1.307376724226094, "learning_rate": 4.0326382592928374e-05, "loss": 0.9663, "step": 556 }, { "epoch": 0.25249320036264733, "grad_norm": 1.3002750678175563, "learning_rate": 4.0398912058023576e-05, "loss": 0.9529, "step": 557 }, { "epoch": 0.25294650951949227, "grad_norm": 1.6072989036837477, "learning_rate": 4.0471441523118765e-05, "loss": 0.9564, "step": 558 }, { "epoch": 0.25339981867633726, "grad_norm": 1.0194923838105554, "learning_rate": 4.054397098821397e-05, "loss": 0.9326, "step": 559 }, { "epoch": 0.25385312783318226, "grad_norm": 1.3024416403067682, "learning_rate": 4.061650045330916e-05, "loss": 0.9556, "step": 560 }, { "epoch": 0.2543064369900272, "grad_norm": 1.7419771580738306, "learning_rate": 4.068902991840435e-05, "loss": 0.9594, "step": 561 }, { "epoch": 0.2547597461468722, "grad_norm": 0.8971046707659623, "learning_rate": 4.0761559383499555e-05, "loss": 0.9467, "step": 562 }, { "epoch": 0.25521305530371713, "grad_norm": 1.645313176607941, "learning_rate": 4.0834088848594744e-05, "loss": 0.9312, "step": 563 }, { "epoch": 0.2556663644605621, "grad_norm": 1.195448452751475, "learning_rate": 4.0906618313689946e-05, "loss": 0.9589, "step": 564 }, { "epoch": 0.25611967361740706, "grad_norm": 1.6457482280515048, "learning_rate": 4.0979147778785135e-05, "loss": 0.9454, "step": 565 }, { "epoch": 0.25657298277425206, "grad_norm": 0.7854798695255296, "learning_rate": 4.105167724388033e-05, "loss": 0.9635, "step": 566 }, { "epoch": 0.257026291931097, "grad_norm": 1.488885666451011, "learning_rate": 4.112420670897552e-05, "loss": 0.9485, "step": 567 }, { "epoch": 0.257479601087942, "grad_norm": 1.1775176610973483, "learning_rate": 4.119673617407072e-05, "loss": 0.9373, "step": 568 }, { "epoch": 0.25793291024478693, "grad_norm": 1.6447196547334462, "learning_rate": 4.126926563916591e-05, "loss": 0.9698, "step": 569 }, { "epoch": 0.2583862194016319, "grad_norm": 1.064849081769065, "learning_rate": 4.1341795104261113e-05, "loss": 0.9679, "step": 570 }, { "epoch": 0.25883952855847686, "grad_norm": 1.9217854084732204, "learning_rate": 4.14143245693563e-05, "loss": 0.957, "step": 571 }, { "epoch": 0.25929283771532186, "grad_norm": 1.2819150792240919, "learning_rate": 4.14868540344515e-05, "loss": 0.9487, "step": 572 }, { "epoch": 0.2597461468721668, "grad_norm": 1.540776515928187, "learning_rate": 4.1559383499546694e-05, "loss": 0.9262, "step": 573 }, { "epoch": 0.2601994560290118, "grad_norm": 1.7253705410680575, "learning_rate": 4.163191296464189e-05, "loss": 0.96, "step": 574 }, { "epoch": 0.2606527651858567, "grad_norm": 1.1089352134676513, "learning_rate": 4.170444242973708e-05, "loss": 0.9744, "step": 575 }, { "epoch": 0.2611060743427017, "grad_norm": 1.777148657032061, "learning_rate": 4.177697189483228e-05, "loss": 0.949, "step": 576 }, { "epoch": 0.2615593834995467, "grad_norm": 1.4040459375685417, "learning_rate": 4.1849501359927477e-05, "loss": 0.9524, "step": 577 }, { "epoch": 0.26201269265639165, "grad_norm": 1.363661384446174, "learning_rate": 4.1922030825022666e-05, "loss": 0.9248, "step": 578 }, { "epoch": 0.26246600181323665, "grad_norm": 1.6117646709108437, "learning_rate": 4.199456029011787e-05, "loss": 0.9726, "step": 579 }, { "epoch": 0.2629193109700816, "grad_norm": 1.2476370201670928, "learning_rate": 4.206708975521306e-05, "loss": 0.9327, "step": 580 }, { "epoch": 0.2633726201269266, "grad_norm": 1.4314195517941304, "learning_rate": 4.213961922030826e-05, "loss": 0.9463, "step": 581 }, { "epoch": 0.2638259292837715, "grad_norm": 1.5430061204680638, "learning_rate": 4.221214868540345e-05, "loss": 0.9456, "step": 582 }, { "epoch": 0.2642792384406165, "grad_norm": 1.0488828633476652, "learning_rate": 4.2284678150498644e-05, "loss": 0.9476, "step": 583 }, { "epoch": 0.26473254759746145, "grad_norm": 2.1197775593819923, "learning_rate": 4.235720761559383e-05, "loss": 0.9314, "step": 584 }, { "epoch": 0.26518585675430645, "grad_norm": 1.8056138737176077, "learning_rate": 4.2429737080689035e-05, "loss": 0.9614, "step": 585 }, { "epoch": 0.2656391659111514, "grad_norm": 1.5801111277044997, "learning_rate": 4.2502266545784224e-05, "loss": 0.966, "step": 586 }, { "epoch": 0.2660924750679964, "grad_norm": 1.5349632629898555, "learning_rate": 4.257479601087943e-05, "loss": 0.9573, "step": 587 }, { "epoch": 0.2665457842248413, "grad_norm": 1.556743587544592, "learning_rate": 4.2647325475974616e-05, "loss": 0.9504, "step": 588 }, { "epoch": 0.2669990933816863, "grad_norm": 1.1596358445369075, "learning_rate": 4.271985494106981e-05, "loss": 0.9672, "step": 589 }, { "epoch": 0.26745240253853125, "grad_norm": 1.195398568484075, "learning_rate": 4.279238440616501e-05, "loss": 0.9626, "step": 590 }, { "epoch": 0.26790571169537625, "grad_norm": 1.1033787807812672, "learning_rate": 4.28649138712602e-05, "loss": 0.9523, "step": 591 }, { "epoch": 0.26835902085222124, "grad_norm": 1.0042108309685107, "learning_rate": 4.2937443336355405e-05, "loss": 0.9548, "step": 592 }, { "epoch": 0.2688123300090662, "grad_norm": 1.9032736640558985, "learning_rate": 4.3009972801450594e-05, "loss": 0.9399, "step": 593 }, { "epoch": 0.2692656391659112, "grad_norm": 1.136651589882562, "learning_rate": 4.308250226654579e-05, "loss": 0.956, "step": 594 }, { "epoch": 0.2697189483227561, "grad_norm": 1.7103361967878274, "learning_rate": 4.315503173164098e-05, "loss": 0.9482, "step": 595 }, { "epoch": 0.2701722574796011, "grad_norm": 1.541116995730545, "learning_rate": 4.322756119673618e-05, "loss": 0.9682, "step": 596 }, { "epoch": 0.27062556663644605, "grad_norm": 1.1267202170601, "learning_rate": 4.330009066183137e-05, "loss": 0.942, "step": 597 }, { "epoch": 0.27107887579329104, "grad_norm": 1.565888393703851, "learning_rate": 4.337262012692657e-05, "loss": 0.9377, "step": 598 }, { "epoch": 0.271532184950136, "grad_norm": 1.4273425430379525, "learning_rate": 4.344514959202176e-05, "loss": 0.9437, "step": 599 }, { "epoch": 0.271985494106981, "grad_norm": 1.5600849488326662, "learning_rate": 4.351767905711696e-05, "loss": 0.9476, "step": 600 }, { "epoch": 0.2724388032638259, "grad_norm": 1.135573122780415, "learning_rate": 4.3590208522212146e-05, "loss": 0.9428, "step": 601 }, { "epoch": 0.2728921124206709, "grad_norm": 1.3490206870238195, "learning_rate": 4.366273798730735e-05, "loss": 0.9691, "step": 602 }, { "epoch": 0.27334542157751585, "grad_norm": 1.128610946917998, "learning_rate": 4.373526745240254e-05, "loss": 0.9489, "step": 603 }, { "epoch": 0.27379873073436084, "grad_norm": 1.6060508246099943, "learning_rate": 4.380779691749774e-05, "loss": 0.9397, "step": 604 }, { "epoch": 0.2742520398912058, "grad_norm": 1.6422481783015415, "learning_rate": 4.388032638259293e-05, "loss": 0.9442, "step": 605 }, { "epoch": 0.2747053490480508, "grad_norm": 1.1786944961955894, "learning_rate": 4.3952855847688125e-05, "loss": 0.9471, "step": 606 }, { "epoch": 0.2751586582048957, "grad_norm": 1.4822634265150343, "learning_rate": 4.402538531278332e-05, "loss": 0.9806, "step": 607 }, { "epoch": 0.2756119673617407, "grad_norm": 1.1157046712485972, "learning_rate": 4.4097914777878516e-05, "loss": 0.9593, "step": 608 }, { "epoch": 0.2760652765185857, "grad_norm": 1.2696351292897434, "learning_rate": 4.417044424297372e-05, "loss": 0.9485, "step": 609 }, { "epoch": 0.27651858567543064, "grad_norm": 1.5004429262170311, "learning_rate": 4.424297370806891e-05, "loss": 0.9605, "step": 610 }, { "epoch": 0.27697189483227563, "grad_norm": 1.3166953584045524, "learning_rate": 4.43155031731641e-05, "loss": 0.9308, "step": 611 }, { "epoch": 0.2774252039891206, "grad_norm": 1.5383699431969209, "learning_rate": 4.438803263825929e-05, "loss": 0.955, "step": 612 }, { "epoch": 0.27787851314596557, "grad_norm": 1.3181004348883552, "learning_rate": 4.4460562103354495e-05, "loss": 0.9264, "step": 613 }, { "epoch": 0.2783318223028105, "grad_norm": 1.4599614054002643, "learning_rate": 4.4533091568449684e-05, "loss": 0.9313, "step": 614 }, { "epoch": 0.2787851314596555, "grad_norm": 1.23469334382025, "learning_rate": 4.4605621033544886e-05, "loss": 0.9534, "step": 615 }, { "epoch": 0.27923844061650044, "grad_norm": 1.45190394219971, "learning_rate": 4.4678150498640075e-05, "loss": 0.9223, "step": 616 }, { "epoch": 0.27969174977334543, "grad_norm": 1.9880340072203195, "learning_rate": 4.475067996373527e-05, "loss": 0.9432, "step": 617 }, { "epoch": 0.28014505893019037, "grad_norm": 0.7192033033206872, "learning_rate": 4.4823209428830466e-05, "loss": 0.9431, "step": 618 }, { "epoch": 0.28059836808703537, "grad_norm": 1.4351317849040863, "learning_rate": 4.489573889392566e-05, "loss": 0.942, "step": 619 }, { "epoch": 0.2810516772438803, "grad_norm": 1.8447619120650107, "learning_rate": 4.496826835902085e-05, "loss": 0.9388, "step": 620 }, { "epoch": 0.2815049864007253, "grad_norm": 1.2907046235938708, "learning_rate": 4.504079782411605e-05, "loss": 0.9585, "step": 621 }, { "epoch": 0.28195829555757024, "grad_norm": 1.2251979616107977, "learning_rate": 4.511332728921124e-05, "loss": 0.9501, "step": 622 }, { "epoch": 0.28241160471441523, "grad_norm": 1.551927285602187, "learning_rate": 4.518585675430644e-05, "loss": 0.946, "step": 623 }, { "epoch": 0.2828649138712602, "grad_norm": 2.061551751088097, "learning_rate": 4.525838621940164e-05, "loss": 0.9285, "step": 624 }, { "epoch": 0.28331822302810517, "grad_norm": 0.7512781294983745, "learning_rate": 4.533091568449683e-05, "loss": 0.9371, "step": 625 }, { "epoch": 0.28377153218495016, "grad_norm": 2.0405974876420636, "learning_rate": 4.540344514959203e-05, "loss": 0.9567, "step": 626 }, { "epoch": 0.2842248413417951, "grad_norm": 1.4742751324848076, "learning_rate": 4.547597461468722e-05, "loss": 0.9422, "step": 627 }, { "epoch": 0.2846781504986401, "grad_norm": 1.5328943781014133, "learning_rate": 4.5548504079782416e-05, "loss": 0.9357, "step": 628 }, { "epoch": 0.28513145965548503, "grad_norm": 1.3232319267999346, "learning_rate": 4.5621033544877605e-05, "loss": 0.9566, "step": 629 }, { "epoch": 0.28558476881233, "grad_norm": 1.1144389268150887, "learning_rate": 4.569356300997281e-05, "loss": 0.9292, "step": 630 }, { "epoch": 0.28603807796917496, "grad_norm": 1.2136622195056797, "learning_rate": 4.5766092475068e-05, "loss": 0.9519, "step": 631 }, { "epoch": 0.28649138712601996, "grad_norm": 1.306717656271579, "learning_rate": 4.58386219401632e-05, "loss": 0.9279, "step": 632 }, { "epoch": 0.2869446962828649, "grad_norm": 1.4432515955110041, "learning_rate": 4.591115140525839e-05, "loss": 0.9349, "step": 633 }, { "epoch": 0.2873980054397099, "grad_norm": 1.4164479779407202, "learning_rate": 4.5983680870353584e-05, "loss": 0.9474, "step": 634 }, { "epoch": 0.28785131459655483, "grad_norm": 1.5294999958974749, "learning_rate": 4.605621033544878e-05, "loss": 0.9445, "step": 635 }, { "epoch": 0.2883046237533998, "grad_norm": 1.5895802553551033, "learning_rate": 4.6128739800543975e-05, "loss": 0.9322, "step": 636 }, { "epoch": 0.28875793291024476, "grad_norm": 1.498006153484362, "learning_rate": 4.6201269265639164e-05, "loss": 0.9553, "step": 637 }, { "epoch": 0.28921124206708976, "grad_norm": 1.1755218537203898, "learning_rate": 4.627379873073437e-05, "loss": 0.9383, "step": 638 }, { "epoch": 0.2896645512239347, "grad_norm": 1.4137079765174556, "learning_rate": 4.6346328195829556e-05, "loss": 0.9408, "step": 639 }, { "epoch": 0.2901178603807797, "grad_norm": 1.0248597069673635, "learning_rate": 4.641885766092475e-05, "loss": 0.9334, "step": 640 }, { "epoch": 0.2905711695376247, "grad_norm": 2.2747519060711303, "learning_rate": 4.6491387126019954e-05, "loss": 0.9202, "step": 641 }, { "epoch": 0.2910244786944696, "grad_norm": 1.3427678665040443, "learning_rate": 4.656391659111514e-05, "loss": 0.9299, "step": 642 }, { "epoch": 0.2914777878513146, "grad_norm": 1.6888287052454223, "learning_rate": 4.6636446056210345e-05, "loss": 0.9339, "step": 643 }, { "epoch": 0.29193109700815956, "grad_norm": 2.0069576737966837, "learning_rate": 4.6708975521305534e-05, "loss": 0.9366, "step": 644 }, { "epoch": 0.29238440616500455, "grad_norm": 1.0789915284508653, "learning_rate": 4.678150498640073e-05, "loss": 0.9531, "step": 645 }, { "epoch": 0.2928377153218495, "grad_norm": 2.023101410388226, "learning_rate": 4.6854034451495925e-05, "loss": 0.9714, "step": 646 }, { "epoch": 0.2932910244786945, "grad_norm": 1.1164339111917163, "learning_rate": 4.692656391659112e-05, "loss": 0.9527, "step": 647 }, { "epoch": 0.2937443336355394, "grad_norm": 1.9092266421533028, "learning_rate": 4.699909338168631e-05, "loss": 0.9579, "step": 648 }, { "epoch": 0.2941976427923844, "grad_norm": 1.4251562678565066, "learning_rate": 4.707162284678151e-05, "loss": 0.943, "step": 649 }, { "epoch": 0.29465095194922936, "grad_norm": 1.643575347582023, "learning_rate": 4.71441523118767e-05, "loss": 0.9401, "step": 650 }, { "epoch": 0.29510426110607435, "grad_norm": 1.427653154543193, "learning_rate": 4.72166817769719e-05, "loss": 0.9246, "step": 651 }, { "epoch": 0.2955575702629193, "grad_norm": 1.6420314732057328, "learning_rate": 4.728921124206709e-05, "loss": 0.9473, "step": 652 }, { "epoch": 0.2960108794197643, "grad_norm": 1.5372403835920674, "learning_rate": 4.736174070716229e-05, "loss": 0.9394, "step": 653 }, { "epoch": 0.2964641885766092, "grad_norm": 1.443002825729381, "learning_rate": 4.743427017225748e-05, "loss": 0.9411, "step": 654 }, { "epoch": 0.2969174977334542, "grad_norm": 1.383833767613978, "learning_rate": 4.750679963735268e-05, "loss": 0.9527, "step": 655 }, { "epoch": 0.2973708068902992, "grad_norm": 1.2672921924395384, "learning_rate": 4.7579329102447876e-05, "loss": 0.9497, "step": 656 }, { "epoch": 0.29782411604714415, "grad_norm": 1.322260122751291, "learning_rate": 4.7651858567543065e-05, "loss": 0.9572, "step": 657 }, { "epoch": 0.29827742520398914, "grad_norm": 1.4198954924053768, "learning_rate": 4.772438803263827e-05, "loss": 0.9245, "step": 658 }, { "epoch": 0.2987307343608341, "grad_norm": 1.6074043812265746, "learning_rate": 4.7796917497733456e-05, "loss": 0.9393, "step": 659 }, { "epoch": 0.2991840435176791, "grad_norm": 1.3889537307043998, "learning_rate": 4.786944696282866e-05, "loss": 0.947, "step": 660 }, { "epoch": 0.299637352674524, "grad_norm": 1.4247655267517096, "learning_rate": 4.794197642792385e-05, "loss": 0.9405, "step": 661 }, { "epoch": 0.300090661831369, "grad_norm": 1.20279970873019, "learning_rate": 4.801450589301904e-05, "loss": 0.9587, "step": 662 }, { "epoch": 0.30054397098821395, "grad_norm": 1.3786019714696676, "learning_rate": 4.808703535811424e-05, "loss": 0.9411, "step": 663 }, { "epoch": 0.30099728014505894, "grad_norm": 1.929617923565853, "learning_rate": 4.8159564823209434e-05, "loss": 0.9344, "step": 664 }, { "epoch": 0.3014505893019039, "grad_norm": 1.0067407455516777, "learning_rate": 4.823209428830462e-05, "loss": 0.9178, "step": 665 }, { "epoch": 0.3019038984587489, "grad_norm": 1.549228113666535, "learning_rate": 4.8304623753399826e-05, "loss": 0.9343, "step": 666 }, { "epoch": 0.3023572076155938, "grad_norm": 0.7627259150470267, "learning_rate": 4.8377153218495015e-05, "loss": 0.9463, "step": 667 }, { "epoch": 0.3028105167724388, "grad_norm": 1.118071455001717, "learning_rate": 4.844968268359021e-05, "loss": 0.9403, "step": 668 }, { "epoch": 0.30326382592928375, "grad_norm": 2.085374202560522, "learning_rate": 4.8522212148685406e-05, "loss": 0.951, "step": 669 }, { "epoch": 0.30371713508612874, "grad_norm": 1.4349210170197022, "learning_rate": 4.85947416137806e-05, "loss": 0.9274, "step": 670 }, { "epoch": 0.3041704442429737, "grad_norm": 1.5069733806820143, "learning_rate": 4.866727107887579e-05, "loss": 0.9479, "step": 671 }, { "epoch": 0.3046237533998187, "grad_norm": 1.7533379438880692, "learning_rate": 4.873980054397099e-05, "loss": 0.9279, "step": 672 }, { "epoch": 0.30507706255666367, "grad_norm": 1.5943690943812319, "learning_rate": 4.881233000906619e-05, "loss": 0.9211, "step": 673 }, { "epoch": 0.3055303717135086, "grad_norm": 1.8387553355893573, "learning_rate": 4.8884859474161385e-05, "loss": 0.9294, "step": 674 }, { "epoch": 0.3059836808703536, "grad_norm": 1.1760719368516273, "learning_rate": 4.895738893925658e-05, "loss": 0.9575, "step": 675 }, { "epoch": 0.30643699002719854, "grad_norm": 1.4345071390854536, "learning_rate": 4.902991840435177e-05, "loss": 0.9344, "step": 676 }, { "epoch": 0.30689029918404354, "grad_norm": 1.2391889309053328, "learning_rate": 4.910244786944697e-05, "loss": 0.9552, "step": 677 }, { "epoch": 0.3073436083408885, "grad_norm": 2.318642027700126, "learning_rate": 4.917497733454216e-05, "loss": 0.9248, "step": 678 }, { "epoch": 0.30779691749773347, "grad_norm": 1.2668720948466181, "learning_rate": 4.9247506799637356e-05, "loss": 0.9259, "step": 679 }, { "epoch": 0.3082502266545784, "grad_norm": 2.398702632279838, "learning_rate": 4.932003626473255e-05, "loss": 0.9419, "step": 680 }, { "epoch": 0.3087035358114234, "grad_norm": 2.0310362041673673, "learning_rate": 4.939256572982775e-05, "loss": 0.9293, "step": 681 }, { "epoch": 0.30915684496826834, "grad_norm": 1.4453520499587376, "learning_rate": 4.946509519492294e-05, "loss": 0.9602, "step": 682 }, { "epoch": 0.30961015412511333, "grad_norm": 2.011099408055547, "learning_rate": 4.953762466001814e-05, "loss": 0.9286, "step": 683 }, { "epoch": 0.3100634632819583, "grad_norm": 1.2684667838083392, "learning_rate": 4.961015412511333e-05, "loss": 0.9299, "step": 684 }, { "epoch": 0.31051677243880327, "grad_norm": 1.5747713645808834, "learning_rate": 4.9682683590208524e-05, "loss": 0.9476, "step": 685 }, { "epoch": 0.3109700815956482, "grad_norm": 1.123407952679056, "learning_rate": 4.975521305530372e-05, "loss": 0.9522, "step": 686 }, { "epoch": 0.3114233907524932, "grad_norm": 1.597207251130449, "learning_rate": 4.9827742520398915e-05, "loss": 0.956, "step": 687 }, { "epoch": 0.3118766999093382, "grad_norm": 1.6820258157634909, "learning_rate": 4.990027198549412e-05, "loss": 0.9559, "step": 688 }, { "epoch": 0.31233000906618313, "grad_norm": 1.4018764858507986, "learning_rate": 4.9972801450589307e-05, "loss": 0.9442, "step": 689 }, { "epoch": 0.31278331822302813, "grad_norm": 1.6131708884834055, "learning_rate": 5.00453309156845e-05, "loss": 0.9326, "step": 690 }, { "epoch": 0.31323662737987307, "grad_norm": 1.2462099631286407, "learning_rate": 5.01178603807797e-05, "loss": 0.9487, "step": 691 }, { "epoch": 0.31368993653671806, "grad_norm": 1.5208327473941317, "learning_rate": 5.0190389845874894e-05, "loss": 0.9424, "step": 692 }, { "epoch": 0.314143245693563, "grad_norm": 1.4826036015931, "learning_rate": 5.026291931097008e-05, "loss": 0.9336, "step": 693 }, { "epoch": 0.314596554850408, "grad_norm": 1.0222214380788672, "learning_rate": 5.0335448776065285e-05, "loss": 0.9289, "step": 694 }, { "epoch": 0.31504986400725293, "grad_norm": 2.2856275746771444, "learning_rate": 5.0407978241160474e-05, "loss": 0.9511, "step": 695 }, { "epoch": 0.3155031731640979, "grad_norm": 1.606043549452128, "learning_rate": 5.048050770625567e-05, "loss": 0.968, "step": 696 }, { "epoch": 0.31595648232094287, "grad_norm": 1.8605764784677907, "learning_rate": 5.0553037171350865e-05, "loss": 0.9382, "step": 697 }, { "epoch": 0.31640979147778786, "grad_norm": 1.9170253947578217, "learning_rate": 5.062556663644606e-05, "loss": 0.9168, "step": 698 }, { "epoch": 0.3168631006346328, "grad_norm": 1.4747125669832035, "learning_rate": 5.069809610154125e-05, "loss": 0.9577, "step": 699 }, { "epoch": 0.3173164097914778, "grad_norm": 1.1456782744336196, "learning_rate": 5.077062556663645e-05, "loss": 0.9277, "step": 700 }, { "epoch": 0.31776971894832273, "grad_norm": 2.4945305919427265, "learning_rate": 5.084315503173164e-05, "loss": 0.9382, "step": 701 }, { "epoch": 0.3182230281051677, "grad_norm": 1.7506456691391596, "learning_rate": 5.0915684496826844e-05, "loss": 0.9247, "step": 702 }, { "epoch": 0.31867633726201267, "grad_norm": 1.9290101775128465, "learning_rate": 5.098821396192203e-05, "loss": 0.9278, "step": 703 }, { "epoch": 0.31912964641885766, "grad_norm": 1.943948849995102, "learning_rate": 5.106074342701723e-05, "loss": 0.9409, "step": 704 }, { "epoch": 0.31958295557570265, "grad_norm": 1.0925635074806124, "learning_rate": 5.113327289211243e-05, "loss": 0.9767, "step": 705 }, { "epoch": 0.3200362647325476, "grad_norm": 2.7443835444337688, "learning_rate": 5.120580235720762e-05, "loss": 0.9459, "step": 706 }, { "epoch": 0.3204895738893926, "grad_norm": 1.6399571873406757, "learning_rate": 5.1278331822302816e-05, "loss": 0.9416, "step": 707 }, { "epoch": 0.3209428830462375, "grad_norm": 2.772354970587644, "learning_rate": 5.135086128739801e-05, "loss": 0.9653, "step": 708 }, { "epoch": 0.3213961922030825, "grad_norm": 2.414772295833044, "learning_rate": 5.142339075249321e-05, "loss": 0.9551, "step": 709 }, { "epoch": 0.32184950135992746, "grad_norm": 2.060630552139286, "learning_rate": 5.1495920217588396e-05, "loss": 0.9738, "step": 710 }, { "epoch": 0.32230281051677245, "grad_norm": 2.2322406420972327, "learning_rate": 5.15684496826836e-05, "loss": 0.9462, "step": 711 }, { "epoch": 0.3227561196736174, "grad_norm": 1.5836345924954258, "learning_rate": 5.164097914777879e-05, "loss": 0.9437, "step": 712 }, { "epoch": 0.3232094288304624, "grad_norm": 2.456757066423452, "learning_rate": 5.171350861287398e-05, "loss": 0.9554, "step": 713 }, { "epoch": 0.3236627379873073, "grad_norm": 1.694641394167381, "learning_rate": 5.178603807796918e-05, "loss": 0.947, "step": 714 }, { "epoch": 0.3241160471441523, "grad_norm": 2.243767862017921, "learning_rate": 5.1858567543064374e-05, "loss": 0.91, "step": 715 }, { "epoch": 0.32456935630099726, "grad_norm": 1.872888216798527, "learning_rate": 5.193109700815956e-05, "loss": 0.936, "step": 716 }, { "epoch": 0.32502266545784225, "grad_norm": 1.6499139879123348, "learning_rate": 5.2003626473254766e-05, "loss": 0.9561, "step": 717 }, { "epoch": 0.3254759746146872, "grad_norm": 14.27191138969359, "learning_rate": 5.2076155938349955e-05, "loss": 0.9815, "step": 718 }, { "epoch": 0.3259292837715322, "grad_norm": 4.906331644511764, "learning_rate": 5.214868540344516e-05, "loss": 0.979, "step": 719 }, { "epoch": 0.3263825929283772, "grad_norm": 2.70976793228864, "learning_rate": 5.222121486854035e-05, "loss": 0.9663, "step": 720 }, { "epoch": 0.3268359020852221, "grad_norm": 3.5571947908840644, "learning_rate": 5.229374433363554e-05, "loss": 0.9513, "step": 721 }, { "epoch": 0.3272892112420671, "grad_norm": 3.6818530706869774, "learning_rate": 5.2366273798730744e-05, "loss": 0.9455, "step": 722 }, { "epoch": 0.32774252039891205, "grad_norm": 2.645555123605198, "learning_rate": 5.243880326382593e-05, "loss": 0.965, "step": 723 }, { "epoch": 0.32819582955575705, "grad_norm": 2.784085621744387, "learning_rate": 5.251133272892113e-05, "loss": 0.9533, "step": 724 }, { "epoch": 0.328649138712602, "grad_norm": 2.3024027886423517, "learning_rate": 5.2583862194016325e-05, "loss": 0.9354, "step": 725 }, { "epoch": 0.329102447869447, "grad_norm": 2.579127658747222, "learning_rate": 5.265639165911152e-05, "loss": 0.9397, "step": 726 }, { "epoch": 0.3295557570262919, "grad_norm": 1.2708395013442761, "learning_rate": 5.272892112420671e-05, "loss": 0.9657, "step": 727 }, { "epoch": 0.3300090661831369, "grad_norm": 1.9629383819187782, "learning_rate": 5.280145058930191e-05, "loss": 0.952, "step": 728 }, { "epoch": 0.33046237533998185, "grad_norm": 1.8655648944363101, "learning_rate": 5.28739800543971e-05, "loss": 0.953, "step": 729 }, { "epoch": 0.33091568449682685, "grad_norm": 1.2396388000732834, "learning_rate": 5.2946509519492296e-05, "loss": 0.9355, "step": 730 }, { "epoch": 0.3313689936536718, "grad_norm": 1.8809795764883877, "learning_rate": 5.301903898458749e-05, "loss": 0.9526, "step": 731 }, { "epoch": 0.3318223028105168, "grad_norm": 1.3191286502350539, "learning_rate": 5.309156844968269e-05, "loss": 0.9319, "step": 732 }, { "epoch": 0.3322756119673617, "grad_norm": 1.9798303247571511, "learning_rate": 5.3164097914777877e-05, "loss": 0.9374, "step": 733 }, { "epoch": 0.3327289211242067, "grad_norm": 1.2759488480503673, "learning_rate": 5.323662737987308e-05, "loss": 0.965, "step": 734 }, { "epoch": 0.33318223028105165, "grad_norm": 1.8322738341727243, "learning_rate": 5.330915684496827e-05, "loss": 0.963, "step": 735 }, { "epoch": 0.33363553943789664, "grad_norm": 1.6352896069820073, "learning_rate": 5.338168631006347e-05, "loss": 0.9737, "step": 736 }, { "epoch": 0.33408884859474164, "grad_norm": 1.3918609316239046, "learning_rate": 5.3454215775158666e-05, "loss": 0.9438, "step": 737 }, { "epoch": 0.3345421577515866, "grad_norm": 1.3228035517395398, "learning_rate": 5.3526745240253855e-05, "loss": 0.9208, "step": 738 }, { "epoch": 0.33499546690843157, "grad_norm": 1.6255087275273532, "learning_rate": 5.359927470534906e-05, "loss": 0.9404, "step": 739 }, { "epoch": 0.3354487760652765, "grad_norm": 1.3172359407571101, "learning_rate": 5.3671804170444246e-05, "loss": 0.941, "step": 740 }, { "epoch": 0.3359020852221215, "grad_norm": 1.6297905344371948, "learning_rate": 5.374433363553944e-05, "loss": 0.9437, "step": 741 }, { "epoch": 0.33635539437896644, "grad_norm": 1.3056419101852335, "learning_rate": 5.381686310063464e-05, "loss": 0.9404, "step": 742 }, { "epoch": 0.33680870353581144, "grad_norm": 1.3377445143115474, "learning_rate": 5.3889392565729834e-05, "loss": 0.9263, "step": 743 }, { "epoch": 0.3372620126926564, "grad_norm": 1.001597063894214, "learning_rate": 5.396192203082502e-05, "loss": 0.9271, "step": 744 }, { "epoch": 0.33771532184950137, "grad_norm": 1.871707356585933, "learning_rate": 5.4034451495920225e-05, "loss": 0.9371, "step": 745 }, { "epoch": 0.3381686310063463, "grad_norm": 1.4830158478531945, "learning_rate": 5.4106980961015414e-05, "loss": 0.9522, "step": 746 }, { "epoch": 0.3386219401631913, "grad_norm": 1.6566586212983272, "learning_rate": 5.4179510426110616e-05, "loss": 0.9524, "step": 747 }, { "epoch": 0.33907524932003624, "grad_norm": 0.9045872877310464, "learning_rate": 5.4252039891205805e-05, "loss": 0.9213, "step": 748 }, { "epoch": 0.33952855847688124, "grad_norm": 2.151902261591363, "learning_rate": 5.4324569356301e-05, "loss": 0.9256, "step": 749 }, { "epoch": 0.3399818676337262, "grad_norm": 1.4616099276475119, "learning_rate": 5.439709882139619e-05, "loss": 0.9431, "step": 750 }, { "epoch": 0.34043517679057117, "grad_norm": 1.5446030109830196, "learning_rate": 5.446962828649139e-05, "loss": 0.9301, "step": 751 }, { "epoch": 0.34088848594741616, "grad_norm": 1.7241557764299809, "learning_rate": 5.454215775158659e-05, "loss": 0.9531, "step": 752 }, { "epoch": 0.3413417951042611, "grad_norm": 1.2094845365091573, "learning_rate": 5.4614687216681784e-05, "loss": 0.9412, "step": 753 }, { "epoch": 0.3417951042611061, "grad_norm": 1.5959627438521686, "learning_rate": 5.468721668177698e-05, "loss": 0.9491, "step": 754 }, { "epoch": 0.34224841341795104, "grad_norm": 1.6644343552917442, "learning_rate": 5.475974614687217e-05, "loss": 0.933, "step": 755 }, { "epoch": 0.34270172257479603, "grad_norm": 1.3011371533464957, "learning_rate": 5.483227561196737e-05, "loss": 0.9344, "step": 756 }, { "epoch": 0.34315503173164097, "grad_norm": 1.1195681959219173, "learning_rate": 5.490480507706256e-05, "loss": 0.9609, "step": 757 }, { "epoch": 0.34360834088848596, "grad_norm": 1.8164413707936073, "learning_rate": 5.4977334542157755e-05, "loss": 0.9417, "step": 758 }, { "epoch": 0.3440616500453309, "grad_norm": 1.1700058888793645, "learning_rate": 5.504986400725295e-05, "loss": 0.9279, "step": 759 }, { "epoch": 0.3445149592021759, "grad_norm": 1.8051358340955026, "learning_rate": 5.512239347234815e-05, "loss": 0.9457, "step": 760 }, { "epoch": 0.34496826835902084, "grad_norm": 1.6711570478921167, "learning_rate": 5.5194922937443336e-05, "loss": 0.9456, "step": 761 }, { "epoch": 0.34542157751586583, "grad_norm": 1.3647272668717862, "learning_rate": 5.526745240253854e-05, "loss": 0.9558, "step": 762 }, { "epoch": 0.34587488667271077, "grad_norm": 1.9918458135029675, "learning_rate": 5.533998186763373e-05, "loss": 0.9307, "step": 763 }, { "epoch": 0.34632819582955576, "grad_norm": 1.5588487059053593, "learning_rate": 5.541251133272893e-05, "loss": 0.9268, "step": 764 }, { "epoch": 0.3467815049864007, "grad_norm": 1.703386531274259, "learning_rate": 5.548504079782412e-05, "loss": 0.9188, "step": 765 }, { "epoch": 0.3472348141432457, "grad_norm": 1.2934814450755638, "learning_rate": 5.5557570262919314e-05, "loss": 0.9598, "step": 766 }, { "epoch": 0.34768812330009063, "grad_norm": 1.2928405130100107, "learning_rate": 5.56300997280145e-05, "loss": 0.9512, "step": 767 }, { "epoch": 0.34814143245693563, "grad_norm": 1.2435153039805282, "learning_rate": 5.5702629193109706e-05, "loss": 0.9217, "step": 768 }, { "epoch": 0.3485947416137806, "grad_norm": 1.3857379328018322, "learning_rate": 5.57751586582049e-05, "loss": 0.9231, "step": 769 }, { "epoch": 0.34904805077062556, "grad_norm": 1.9927241506804212, "learning_rate": 5.58476881233001e-05, "loss": 0.9254, "step": 770 }, { "epoch": 0.34950135992747056, "grad_norm": 1.2773892745695101, "learning_rate": 5.592021758839529e-05, "loss": 0.9343, "step": 771 }, { "epoch": 0.3499546690843155, "grad_norm": 1.1659303152428873, "learning_rate": 5.599274705349048e-05, "loss": 0.9351, "step": 772 }, { "epoch": 0.3504079782411605, "grad_norm": 1.7074661165938192, "learning_rate": 5.6065276518585684e-05, "loss": 0.9458, "step": 773 }, { "epoch": 0.35086128739800543, "grad_norm": 1.2843992359116825, "learning_rate": 5.613780598368087e-05, "loss": 0.9228, "step": 774 }, { "epoch": 0.3513145965548504, "grad_norm": 1.7481393842284374, "learning_rate": 5.6210335448776075e-05, "loss": 0.9444, "step": 775 }, { "epoch": 0.35176790571169536, "grad_norm": 1.4815623776744518, "learning_rate": 5.6282864913871264e-05, "loss": 0.9576, "step": 776 }, { "epoch": 0.35222121486854036, "grad_norm": 1.2764403249645446, "learning_rate": 5.635539437896646e-05, "loss": 0.9382, "step": 777 }, { "epoch": 0.3526745240253853, "grad_norm": 1.4924814269355533, "learning_rate": 5.642792384406165e-05, "loss": 0.9512, "step": 778 }, { "epoch": 0.3531278331822303, "grad_norm": 1.402499178070738, "learning_rate": 5.650045330915685e-05, "loss": 0.9368, "step": 779 }, { "epoch": 0.3535811423390752, "grad_norm": 1.237132969496704, "learning_rate": 5.657298277425204e-05, "loss": 0.9425, "step": 780 }, { "epoch": 0.3540344514959202, "grad_norm": 1.9800239458870281, "learning_rate": 5.664551223934724e-05, "loss": 0.9427, "step": 781 }, { "epoch": 0.35448776065276516, "grad_norm": 1.437096883657844, "learning_rate": 5.671804170444243e-05, "loss": 0.9463, "step": 782 }, { "epoch": 0.35494106980961015, "grad_norm": 1.217399965920037, "learning_rate": 5.679057116953763e-05, "loss": 0.936, "step": 783 }, { "epoch": 0.35539437896645515, "grad_norm": 1.413852336694359, "learning_rate": 5.686310063463283e-05, "loss": 0.9411, "step": 784 }, { "epoch": 0.3558476881233001, "grad_norm": 1.8809740802111852, "learning_rate": 5.693563009972802e-05, "loss": 0.9372, "step": 785 }, { "epoch": 0.3563009972801451, "grad_norm": 1.2321508482998587, "learning_rate": 5.7008159564823215e-05, "loss": 0.9542, "step": 786 }, { "epoch": 0.35675430643699, "grad_norm": 1.7736862433223881, "learning_rate": 5.708068902991841e-05, "loss": 0.94, "step": 787 }, { "epoch": 0.357207615593835, "grad_norm": 0.9134031731076103, "learning_rate": 5.7153218495013606e-05, "loss": 0.9347, "step": 788 }, { "epoch": 0.35766092475067995, "grad_norm": 2.252952662307836, "learning_rate": 5.7225747960108795e-05, "loss": 0.952, "step": 789 }, { "epoch": 0.35811423390752495, "grad_norm": 1.3692228101803525, "learning_rate": 5.7298277425204e-05, "loss": 0.9355, "step": 790 }, { "epoch": 0.3585675430643699, "grad_norm": 2.189743158739513, "learning_rate": 5.7370806890299186e-05, "loss": 0.9489, "step": 791 }, { "epoch": 0.3590208522212149, "grad_norm": 1.9551980650289926, "learning_rate": 5.744333635539439e-05, "loss": 0.9664, "step": 792 }, { "epoch": 0.3594741613780598, "grad_norm": 1.7212370425969903, "learning_rate": 5.751586582048958e-05, "loss": 0.9241, "step": 793 }, { "epoch": 0.3599274705349048, "grad_norm": 1.6201884833517672, "learning_rate": 5.7588395285584773e-05, "loss": 0.9431, "step": 794 }, { "epoch": 0.36038077969174975, "grad_norm": 1.709171079486392, "learning_rate": 5.766092475067996e-05, "loss": 0.9317, "step": 795 }, { "epoch": 0.36083408884859475, "grad_norm": 1.4776356790789862, "learning_rate": 5.7733454215775165e-05, "loss": 0.9312, "step": 796 }, { "epoch": 0.3612873980054397, "grad_norm": 1.5880136309183253, "learning_rate": 5.7805983680870354e-05, "loss": 0.9222, "step": 797 }, { "epoch": 0.3617407071622847, "grad_norm": 1.082206323201442, "learning_rate": 5.7878513145965556e-05, "loss": 0.9551, "step": 798 }, { "epoch": 0.3621940163191296, "grad_norm": 1.864204107029482, "learning_rate": 5.7951042611060745e-05, "loss": 0.943, "step": 799 }, { "epoch": 0.3626473254759746, "grad_norm": 1.4318810249564151, "learning_rate": 5.802357207615594e-05, "loss": 0.9451, "step": 800 }, { "epoch": 0.3631006346328196, "grad_norm": 1.7663768843759915, "learning_rate": 5.809610154125114e-05, "loss": 0.9478, "step": 801 }, { "epoch": 0.36355394378966455, "grad_norm": 1.714925029259117, "learning_rate": 5.816863100634633e-05, "loss": 0.9627, "step": 802 }, { "epoch": 0.36400725294650954, "grad_norm": 1.4279510579227686, "learning_rate": 5.8241160471441535e-05, "loss": 0.9544, "step": 803 }, { "epoch": 0.3644605621033545, "grad_norm": 1.2960248997622423, "learning_rate": 5.8313689936536724e-05, "loss": 0.9186, "step": 804 }, { "epoch": 0.3649138712601995, "grad_norm": 1.6499717232077822, "learning_rate": 5.838621940163192e-05, "loss": 0.9447, "step": 805 }, { "epoch": 0.3653671804170444, "grad_norm": 1.1813548839874675, "learning_rate": 5.845874886672711e-05, "loss": 0.9348, "step": 806 }, { "epoch": 0.3658204895738894, "grad_norm": 1.7394480002465147, "learning_rate": 5.853127833182231e-05, "loss": 0.9454, "step": 807 }, { "epoch": 0.36627379873073435, "grad_norm": 1.312484754311586, "learning_rate": 5.86038077969175e-05, "loss": 0.9315, "step": 808 }, { "epoch": 0.36672710788757934, "grad_norm": 1.6765711002743922, "learning_rate": 5.86763372620127e-05, "loss": 0.9535, "step": 809 }, { "epoch": 0.3671804170444243, "grad_norm": 1.381483561502983, "learning_rate": 5.874886672710789e-05, "loss": 0.9387, "step": 810 }, { "epoch": 0.3676337262012693, "grad_norm": 1.502672669938309, "learning_rate": 5.882139619220309e-05, "loss": 0.9515, "step": 811 }, { "epoch": 0.3680870353581142, "grad_norm": 1.8710689693066422, "learning_rate": 5.8893925657298276e-05, "loss": 0.9355, "step": 812 }, { "epoch": 0.3685403445149592, "grad_norm": 1.1328187776829437, "learning_rate": 5.896645512239348e-05, "loss": 0.944, "step": 813 }, { "epoch": 0.36899365367180414, "grad_norm": 1.3399127517108294, "learning_rate": 5.903898458748867e-05, "loss": 0.9397, "step": 814 }, { "epoch": 0.36944696282864914, "grad_norm": 2.009219902389139, "learning_rate": 5.911151405258387e-05, "loss": 0.934, "step": 815 }, { "epoch": 0.36990027198549413, "grad_norm": 1.4688141732459803, "learning_rate": 5.9184043517679065e-05, "loss": 0.9458, "step": 816 }, { "epoch": 0.37035358114233907, "grad_norm": 1.3381098366428306, "learning_rate": 5.9256572982774254e-05, "loss": 0.9442, "step": 817 }, { "epoch": 0.37080689029918407, "grad_norm": 1.3616784643936284, "learning_rate": 5.9329102447869457e-05, "loss": 0.944, "step": 818 }, { "epoch": 0.371260199456029, "grad_norm": 1.4096871244479137, "learning_rate": 5.9401631912964645e-05, "loss": 0.94, "step": 819 }, { "epoch": 0.371713508612874, "grad_norm": 1.7074436844594758, "learning_rate": 5.947416137805985e-05, "loss": 0.9417, "step": 820 }, { "epoch": 0.37216681776971894, "grad_norm": 1.7678346549776505, "learning_rate": 5.954669084315504e-05, "loss": 0.9286, "step": 821 }, { "epoch": 0.37262012692656393, "grad_norm": 1.1637863567745292, "learning_rate": 5.961922030825023e-05, "loss": 0.9511, "step": 822 }, { "epoch": 0.37307343608340887, "grad_norm": 1.7514825295503125, "learning_rate": 5.969174977334542e-05, "loss": 0.9473, "step": 823 }, { "epoch": 0.37352674524025387, "grad_norm": 1.2437716011129283, "learning_rate": 5.9764279238440624e-05, "loss": 0.9224, "step": 824 }, { "epoch": 0.3739800543970988, "grad_norm": 1.7196545486493893, "learning_rate": 5.983680870353581e-05, "loss": 0.9404, "step": 825 }, { "epoch": 0.3744333635539438, "grad_norm": 1.8427541021855762, "learning_rate": 5.9909338168631015e-05, "loss": 0.9276, "step": 826 }, { "epoch": 0.37488667271078874, "grad_norm": 1.1210243295638511, "learning_rate": 5.9981867633726204e-05, "loss": 0.9356, "step": 827 }, { "epoch": 0.37533998186763373, "grad_norm": 1.424003799396712, "learning_rate": 6.00543970988214e-05, "loss": 0.9593, "step": 828 }, { "epoch": 0.37579329102447867, "grad_norm": 1.1238202116507956, "learning_rate": 6.012692656391659e-05, "loss": 0.9162, "step": 829 }, { "epoch": 0.37624660018132366, "grad_norm": 2.026538280877823, "learning_rate": 6.019945602901179e-05, "loss": 0.9294, "step": 830 }, { "epoch": 0.3766999093381686, "grad_norm": 1.2014594325435937, "learning_rate": 6.027198549410698e-05, "loss": 0.9257, "step": 831 }, { "epoch": 0.3771532184950136, "grad_norm": 2.0270748289882388, "learning_rate": 6.034451495920218e-05, "loss": 0.9555, "step": 832 }, { "epoch": 0.3776065276518586, "grad_norm": 1.2228762637781396, "learning_rate": 6.041704442429738e-05, "loss": 0.9481, "step": 833 }, { "epoch": 0.37805983680870353, "grad_norm": 1.4314068051907045, "learning_rate": 6.048957388939257e-05, "loss": 0.9441, "step": 834 }, { "epoch": 0.3785131459655485, "grad_norm": 1.3228392319424185, "learning_rate": 6.056210335448777e-05, "loss": 0.9312, "step": 835 }, { "epoch": 0.37896645512239346, "grad_norm": 1.2413311415999304, "learning_rate": 6.063463281958296e-05, "loss": 0.9141, "step": 836 }, { "epoch": 0.37941976427923846, "grad_norm": 1.3113889397002814, "learning_rate": 6.070716228467816e-05, "loss": 0.9445, "step": 837 }, { "epoch": 0.3798730734360834, "grad_norm": 2.017958389580416, "learning_rate": 6.077969174977335e-05, "loss": 0.9346, "step": 838 }, { "epoch": 0.3803263825929284, "grad_norm": 1.2966653428498385, "learning_rate": 6.0852221214868546e-05, "loss": 0.9323, "step": 839 }, { "epoch": 0.38077969174977333, "grad_norm": 1.175181285226416, "learning_rate": 6.0924750679963735e-05, "loss": 0.9346, "step": 840 }, { "epoch": 0.3812330009066183, "grad_norm": 2.021480541116888, "learning_rate": 6.099728014505894e-05, "loss": 0.9446, "step": 841 }, { "epoch": 0.38168631006346326, "grad_norm": 1.5891051212701277, "learning_rate": 6.106980961015413e-05, "loss": 0.9231, "step": 842 }, { "epoch": 0.38213961922030826, "grad_norm": 1.064099475896568, "learning_rate": 6.114233907524933e-05, "loss": 0.9452, "step": 843 }, { "epoch": 0.3825929283771532, "grad_norm": 2.7015999552489935, "learning_rate": 6.121486854034452e-05, "loss": 0.9431, "step": 844 }, { "epoch": 0.3830462375339982, "grad_norm": 1.7418737344370814, "learning_rate": 6.128739800543972e-05, "loss": 0.9238, "step": 845 }, { "epoch": 0.38349954669084313, "grad_norm": 2.627607625135701, "learning_rate": 6.135992747053491e-05, "loss": 0.9571, "step": 846 }, { "epoch": 0.3839528558476881, "grad_norm": 2.54241893808046, "learning_rate": 6.143245693563011e-05, "loss": 0.9514, "step": 847 }, { "epoch": 0.3844061650045331, "grad_norm": 1.8302121868747863, "learning_rate": 6.15049864007253e-05, "loss": 0.933, "step": 848 }, { "epoch": 0.38485947416137806, "grad_norm": 1.6333171259177501, "learning_rate": 6.157751586582049e-05, "loss": 0.9463, "step": 849 }, { "epoch": 0.38531278331822305, "grad_norm": 1.914548102894475, "learning_rate": 6.165004533091569e-05, "loss": 0.9507, "step": 850 }, { "epoch": 0.385766092475068, "grad_norm": 1.3907372666515196, "learning_rate": 6.172257479601088e-05, "loss": 0.9486, "step": 851 }, { "epoch": 0.386219401631913, "grad_norm": 2.024963232012732, "learning_rate": 6.179510426110608e-05, "loss": 0.9523, "step": 852 }, { "epoch": 0.3866727107887579, "grad_norm": 1.2764898812678038, "learning_rate": 6.186763372620127e-05, "loss": 0.9403, "step": 853 }, { "epoch": 0.3871260199456029, "grad_norm": 1.9531234990880517, "learning_rate": 6.194016319129647e-05, "loss": 0.9407, "step": 854 }, { "epoch": 0.38757932910244786, "grad_norm": 1.5240135873469607, "learning_rate": 6.201269265639166e-05, "loss": 0.9375, "step": 855 }, { "epoch": 0.38803263825929285, "grad_norm": 1.8581356343156015, "learning_rate": 6.208522212148687e-05, "loss": 0.9165, "step": 856 }, { "epoch": 0.3884859474161378, "grad_norm": 1.725383299895511, "learning_rate": 6.215775158658205e-05, "loss": 0.9106, "step": 857 }, { "epoch": 0.3889392565729828, "grad_norm": 1.58018603371685, "learning_rate": 6.223028105167724e-05, "loss": 0.9226, "step": 858 }, { "epoch": 0.3893925657298277, "grad_norm": 1.4709829394769822, "learning_rate": 6.230281051677245e-05, "loss": 0.9524, "step": 859 }, { "epoch": 0.3898458748866727, "grad_norm": 1.5012042692618373, "learning_rate": 6.237533998186764e-05, "loss": 0.9279, "step": 860 }, { "epoch": 0.39029918404351766, "grad_norm": 1.222271787969785, "learning_rate": 6.244786944696282e-05, "loss": 0.9202, "step": 861 }, { "epoch": 0.39075249320036265, "grad_norm": 1.4335192520689135, "learning_rate": 6.252039891205803e-05, "loss": 0.9341, "step": 862 }, { "epoch": 0.3912058023572076, "grad_norm": 1.6968967796136794, "learning_rate": 6.259292837715322e-05, "loss": 0.938, "step": 863 }, { "epoch": 0.3916591115140526, "grad_norm": 2.033955446949894, "learning_rate": 6.266545784224842e-05, "loss": 0.9264, "step": 864 }, { "epoch": 0.3921124206708976, "grad_norm": 1.1319894877171877, "learning_rate": 6.273798730734362e-05, "loss": 0.91, "step": 865 }, { "epoch": 0.3925657298277425, "grad_norm": 1.446720954686739, "learning_rate": 6.281051677243881e-05, "loss": 0.9597, "step": 866 }, { "epoch": 0.3930190389845875, "grad_norm": 1.1355051979362119, "learning_rate": 6.288304623753401e-05, "loss": 0.9239, "step": 867 }, { "epoch": 0.39347234814143245, "grad_norm": 1.73059005014981, "learning_rate": 6.29555757026292e-05, "loss": 0.9549, "step": 868 }, { "epoch": 0.39392565729827744, "grad_norm": 1.35999332508219, "learning_rate": 6.302810516772439e-05, "loss": 0.943, "step": 869 }, { "epoch": 0.3943789664551224, "grad_norm": 1.4683590015016095, "learning_rate": 6.310063463281959e-05, "loss": 0.9381, "step": 870 }, { "epoch": 0.3948322756119674, "grad_norm": 1.179008047372574, "learning_rate": 6.317316409791478e-05, "loss": 0.9174, "step": 871 }, { "epoch": 0.3952855847688123, "grad_norm": 1.7643075715157752, "learning_rate": 6.324569356300997e-05, "loss": 0.9233, "step": 872 }, { "epoch": 0.3957388939256573, "grad_norm": 1.387620600407311, "learning_rate": 6.331822302810517e-05, "loss": 0.943, "step": 873 }, { "epoch": 0.39619220308250225, "grad_norm": 1.6175734965492337, "learning_rate": 6.339075249320036e-05, "loss": 0.9374, "step": 874 }, { "epoch": 0.39664551223934724, "grad_norm": 2.29583331129652, "learning_rate": 6.346328195829556e-05, "loss": 0.9313, "step": 875 }, { "epoch": 0.3970988213961922, "grad_norm": 0.9501061734503268, "learning_rate": 6.353581142339075e-05, "loss": 0.9396, "step": 876 }, { "epoch": 0.3975521305530372, "grad_norm": 2.3376966689093863, "learning_rate": 6.360834088848596e-05, "loss": 0.9267, "step": 877 }, { "epoch": 0.3980054397098821, "grad_norm": 1.6714058145697481, "learning_rate": 6.368087035358114e-05, "loss": 0.9337, "step": 878 }, { "epoch": 0.3984587488667271, "grad_norm": 1.5636515921010241, "learning_rate": 6.375339981867635e-05, "loss": 0.9301, "step": 879 }, { "epoch": 0.3989120580235721, "grad_norm": 1.589175417373944, "learning_rate": 6.382592928377154e-05, "loss": 0.9354, "step": 880 }, { "epoch": 0.39936536718041704, "grad_norm": 1.4358513671945847, "learning_rate": 6.389845874886674e-05, "loss": 0.9318, "step": 881 }, { "epoch": 0.39981867633726204, "grad_norm": 2.0755915247955485, "learning_rate": 6.397098821396193e-05, "loss": 0.9283, "step": 882 }, { "epoch": 0.400271985494107, "grad_norm": 1.5438756859296723, "learning_rate": 6.404351767905712e-05, "loss": 0.9404, "step": 883 }, { "epoch": 0.40072529465095197, "grad_norm": 1.1137385313055184, "learning_rate": 6.411604714415232e-05, "loss": 0.9395, "step": 884 }, { "epoch": 0.4011786038077969, "grad_norm": 1.9809110569943833, "learning_rate": 6.418857660924751e-05, "loss": 0.949, "step": 885 }, { "epoch": 0.4016319129646419, "grad_norm": 1.1910777816308293, "learning_rate": 6.426110607434271e-05, "loss": 0.9313, "step": 886 }, { "epoch": 0.40208522212148684, "grad_norm": 2.1660806740082315, "learning_rate": 6.43336355394379e-05, "loss": 0.9405, "step": 887 }, { "epoch": 0.40253853127833183, "grad_norm": 1.3323787517285712, "learning_rate": 6.44061650045331e-05, "loss": 0.9421, "step": 888 }, { "epoch": 0.4029918404351768, "grad_norm": 1.8324767131959123, "learning_rate": 6.447869446962829e-05, "loss": 0.9393, "step": 889 }, { "epoch": 0.40344514959202177, "grad_norm": 1.465930308979378, "learning_rate": 6.455122393472349e-05, "loss": 0.9481, "step": 890 }, { "epoch": 0.4038984587488667, "grad_norm": 1.773266930159506, "learning_rate": 6.462375339981868e-05, "loss": 0.9232, "step": 891 }, { "epoch": 0.4043517679057117, "grad_norm": 1.4240441690792642, "learning_rate": 6.469628286491388e-05, "loss": 0.9281, "step": 892 }, { "epoch": 0.40480507706255664, "grad_norm": 1.2483571738783068, "learning_rate": 6.476881233000907e-05, "loss": 0.9315, "step": 893 }, { "epoch": 0.40525838621940163, "grad_norm": 2.3218767888054117, "learning_rate": 6.484134179510426e-05, "loss": 0.955, "step": 894 }, { "epoch": 0.4057116953762466, "grad_norm": 1.5962884848038854, "learning_rate": 6.491387126019945e-05, "loss": 0.9453, "step": 895 }, { "epoch": 0.40616500453309157, "grad_norm": 1.9981418057687077, "learning_rate": 6.498640072529465e-05, "loss": 0.9514, "step": 896 }, { "epoch": 0.40661831368993656, "grad_norm": 1.9826590766827636, "learning_rate": 6.505893019038986e-05, "loss": 0.9407, "step": 897 }, { "epoch": 0.4070716228467815, "grad_norm": 1.651000508081076, "learning_rate": 6.513145965548504e-05, "loss": 0.945, "step": 898 }, { "epoch": 0.4075249320036265, "grad_norm": 1.08598493225144, "learning_rate": 6.520398912058025e-05, "loss": 0.9412, "step": 899 }, { "epoch": 0.40797824116047143, "grad_norm": 1.750476079696457, "learning_rate": 6.527651858567544e-05, "loss": 0.9472, "step": 900 }, { "epoch": 0.4084315503173164, "grad_norm": 1.4679979289095233, "learning_rate": 6.534904805077064e-05, "loss": 0.947, "step": 901 }, { "epoch": 0.40888485947416137, "grad_norm": 1.9173282776325855, "learning_rate": 6.542157751586583e-05, "loss": 0.9255, "step": 902 }, { "epoch": 0.40933816863100636, "grad_norm": 1.4773057222190584, "learning_rate": 6.549410698096103e-05, "loss": 0.9321, "step": 903 }, { "epoch": 0.4097914777878513, "grad_norm": 1.6709503751089287, "learning_rate": 6.556663644605622e-05, "loss": 0.9573, "step": 904 }, { "epoch": 0.4102447869446963, "grad_norm": 1.6020648585906692, "learning_rate": 6.563916591115141e-05, "loss": 0.919, "step": 905 }, { "epoch": 0.41069809610154123, "grad_norm": 1.487934521806381, "learning_rate": 6.57116953762466e-05, "loss": 0.9116, "step": 906 }, { "epoch": 0.4111514052583862, "grad_norm": 1.3687883772116523, "learning_rate": 6.57842248413418e-05, "loss": 0.926, "step": 907 }, { "epoch": 0.41160471441523117, "grad_norm": 1.833150663003179, "learning_rate": 6.585675430643699e-05, "loss": 0.9363, "step": 908 }, { "epoch": 0.41205802357207616, "grad_norm": 1.5368831743113036, "learning_rate": 6.592928377153219e-05, "loss": 0.9148, "step": 909 }, { "epoch": 0.4125113327289211, "grad_norm": 1.513761134710497, "learning_rate": 6.600181323662738e-05, "loss": 0.9261, "step": 910 }, { "epoch": 0.4129646418857661, "grad_norm": 1.6763655748813047, "learning_rate": 6.607434270172258e-05, "loss": 0.9169, "step": 911 }, { "epoch": 0.4134179510426111, "grad_norm": 1.4085450863245177, "learning_rate": 6.614687216681778e-05, "loss": 0.9159, "step": 912 }, { "epoch": 0.413871260199456, "grad_norm": 1.5639693571864681, "learning_rate": 6.621940163191297e-05, "loss": 0.916, "step": 913 }, { "epoch": 0.414324569356301, "grad_norm": 1.7572519553089596, "learning_rate": 6.629193109700816e-05, "loss": 0.9535, "step": 914 }, { "epoch": 0.41477787851314596, "grad_norm": 2.0709414092633525, "learning_rate": 6.636446056210336e-05, "loss": 0.9392, "step": 915 }, { "epoch": 0.41523118766999095, "grad_norm": 0.7585721348321177, "learning_rate": 6.643699002719855e-05, "loss": 0.8992, "step": 916 }, { "epoch": 0.4156844968268359, "grad_norm": 2.063707486862334, "learning_rate": 6.650951949229374e-05, "loss": 0.9389, "step": 917 }, { "epoch": 0.4161378059836809, "grad_norm": 1.8206549581627278, "learning_rate": 6.658204895738894e-05, "loss": 0.939, "step": 918 }, { "epoch": 0.4165911151405258, "grad_norm": 1.1900306558083373, "learning_rate": 6.665457842248413e-05, "loss": 0.9314, "step": 919 }, { "epoch": 0.4170444242973708, "grad_norm": 2.187042650518004, "learning_rate": 6.672710788757934e-05, "loss": 0.9388, "step": 920 }, { "epoch": 0.41749773345421576, "grad_norm": 1.3145749436236447, "learning_rate": 6.679963735267453e-05, "loss": 0.9323, "step": 921 }, { "epoch": 0.41795104261106075, "grad_norm": 1.609851220728418, "learning_rate": 6.687216681776973e-05, "loss": 0.9334, "step": 922 }, { "epoch": 0.4184043517679057, "grad_norm": 1.7458796730496562, "learning_rate": 6.694469628286492e-05, "loss": 0.9349, "step": 923 }, { "epoch": 0.4188576609247507, "grad_norm": 1.2191972068500994, "learning_rate": 6.701722574796012e-05, "loss": 0.9386, "step": 924 }, { "epoch": 0.4193109700815956, "grad_norm": 2.6586821050446847, "learning_rate": 6.708975521305531e-05, "loss": 0.9669, "step": 925 }, { "epoch": 0.4197642792384406, "grad_norm": 1.3176141361787714, "learning_rate": 6.716228467815051e-05, "loss": 0.9209, "step": 926 }, { "epoch": 0.42021758839528556, "grad_norm": 2.8726390473026027, "learning_rate": 6.72348141432457e-05, "loss": 0.9523, "step": 927 }, { "epoch": 0.42067089755213055, "grad_norm": 2.139370711372781, "learning_rate": 6.730734360834089e-05, "loss": 0.9492, "step": 928 }, { "epoch": 0.42112420670897555, "grad_norm": 1.7433666846502294, "learning_rate": 6.737987307343609e-05, "loss": 0.951, "step": 929 }, { "epoch": 0.4215775158658205, "grad_norm": 1.7675896957281527, "learning_rate": 6.745240253853128e-05, "loss": 0.9316, "step": 930 }, { "epoch": 0.4220308250226655, "grad_norm": 1.855870845526705, "learning_rate": 6.752493200362648e-05, "loss": 0.9625, "step": 931 }, { "epoch": 0.4224841341795104, "grad_norm": 1.4116875109813614, "learning_rate": 6.759746146872167e-05, "loss": 0.9318, "step": 932 }, { "epoch": 0.4229374433363554, "grad_norm": 1.473417657477488, "learning_rate": 6.766999093381687e-05, "loss": 0.932, "step": 933 }, { "epoch": 0.42339075249320035, "grad_norm": 1.9827973039569222, "learning_rate": 6.774252039891206e-05, "loss": 0.9521, "step": 934 }, { "epoch": 0.42384406165004535, "grad_norm": 1.490915646142894, "learning_rate": 6.781504986400727e-05, "loss": 0.9339, "step": 935 }, { "epoch": 0.4242973708068903, "grad_norm": 1.4891955713786058, "learning_rate": 6.788757932910245e-05, "loss": 0.9397, "step": 936 }, { "epoch": 0.4247506799637353, "grad_norm": 1.3194849777903461, "learning_rate": 6.796010879419766e-05, "loss": 0.9414, "step": 937 }, { "epoch": 0.4252039891205802, "grad_norm": 1.502342131945211, "learning_rate": 6.803263825929285e-05, "loss": 0.9439, "step": 938 }, { "epoch": 0.4256572982774252, "grad_norm": 1.774141083835349, "learning_rate": 6.810516772438803e-05, "loss": 0.9257, "step": 939 }, { "epoch": 0.42611060743427015, "grad_norm": 1.7455053940178245, "learning_rate": 6.817769718948322e-05, "loss": 0.9387, "step": 940 }, { "epoch": 0.42656391659111514, "grad_norm": 1.129544706531614, "learning_rate": 6.825022665457843e-05, "loss": 0.9403, "step": 941 }, { "epoch": 0.4270172257479601, "grad_norm": 1.732948916377446, "learning_rate": 6.832275611967361e-05, "loss": 0.9193, "step": 942 }, { "epoch": 0.4274705349048051, "grad_norm": 1.0596313411874538, "learning_rate": 6.839528558476882e-05, "loss": 0.9328, "step": 943 }, { "epoch": 0.42792384406165007, "grad_norm": 2.510228751742774, "learning_rate": 6.846781504986402e-05, "loss": 0.9321, "step": 944 }, { "epoch": 0.428377153218495, "grad_norm": 1.5354028712389074, "learning_rate": 6.854034451495921e-05, "loss": 0.933, "step": 945 }, { "epoch": 0.42883046237534, "grad_norm": 2.10013097564691, "learning_rate": 6.861287398005441e-05, "loss": 0.9582, "step": 946 }, { "epoch": 0.42928377153218494, "grad_norm": 1.9929757857944115, "learning_rate": 6.86854034451496e-05, "loss": 0.9406, "step": 947 }, { "epoch": 0.42973708068902994, "grad_norm": 1.963260940250398, "learning_rate": 6.87579329102448e-05, "loss": 0.941, "step": 948 }, { "epoch": 0.4301903898458749, "grad_norm": 1.4080118567743063, "learning_rate": 6.883046237533999e-05, "loss": 0.9593, "step": 949 }, { "epoch": 0.43064369900271987, "grad_norm": 2.7590175421937264, "learning_rate": 6.890299184043518e-05, "loss": 0.9396, "step": 950 }, { "epoch": 0.4310970081595648, "grad_norm": 2.320743470688124, "learning_rate": 6.897552130553037e-05, "loss": 0.9304, "step": 951 }, { "epoch": 0.4315503173164098, "grad_norm": 1.9443881398778053, "learning_rate": 6.904805077062557e-05, "loss": 0.9351, "step": 952 }, { "epoch": 0.43200362647325474, "grad_norm": 2.468051713952071, "learning_rate": 6.912058023572076e-05, "loss": 0.9404, "step": 953 }, { "epoch": 0.43245693563009974, "grad_norm": 1.2599390167703235, "learning_rate": 6.919310970081596e-05, "loss": 0.947, "step": 954 }, { "epoch": 0.4329102447869447, "grad_norm": 3.5081738768640727, "learning_rate": 6.926563916591115e-05, "loss": 0.9288, "step": 955 }, { "epoch": 0.43336355394378967, "grad_norm": 2.2382158006370503, "learning_rate": 6.933816863100635e-05, "loss": 0.9456, "step": 956 }, { "epoch": 0.4338168631006346, "grad_norm": 3.705209928534828, "learning_rate": 6.941069809610154e-05, "loss": 0.9513, "step": 957 }, { "epoch": 0.4342701722574796, "grad_norm": 2.9758727994960004, "learning_rate": 6.948322756119675e-05, "loss": 0.9585, "step": 958 }, { "epoch": 0.43472348141432454, "grad_norm": 2.9047355279969183, "learning_rate": 6.955575702629193e-05, "loss": 0.9442, "step": 959 }, { "epoch": 0.43517679057116954, "grad_norm": 3.4944956515289642, "learning_rate": 6.962828649138714e-05, "loss": 0.9593, "step": 960 }, { "epoch": 0.43563009972801453, "grad_norm": 1.9226738845289713, "learning_rate": 6.970081595648233e-05, "loss": 0.9415, "step": 961 }, { "epoch": 0.43608340888485947, "grad_norm": 1.6124738141894455, "learning_rate": 6.977334542157751e-05, "loss": 0.9145, "step": 962 }, { "epoch": 0.43653671804170446, "grad_norm": 1.970471438700279, "learning_rate": 6.984587488667272e-05, "loss": 0.9516, "step": 963 }, { "epoch": 0.4369900271985494, "grad_norm": 1.5307380655043137, "learning_rate": 6.99184043517679e-05, "loss": 0.9313, "step": 964 }, { "epoch": 0.4374433363553944, "grad_norm": 2.025095875444784, "learning_rate": 6.999093381686311e-05, "loss": 0.9345, "step": 965 }, { "epoch": 0.43789664551223934, "grad_norm": 1.2270479229092894, "learning_rate": 7.00634632819583e-05, "loss": 0.9428, "step": 966 }, { "epoch": 0.43834995466908433, "grad_norm": 2.1293985378337625, "learning_rate": 7.01359927470535e-05, "loss": 0.9093, "step": 967 }, { "epoch": 0.43880326382592927, "grad_norm": 1.7044946143740973, "learning_rate": 7.020852221214869e-05, "loss": 0.9276, "step": 968 }, { "epoch": 0.43925657298277426, "grad_norm": 1.7963157746965182, "learning_rate": 7.028105167724389e-05, "loss": 0.9396, "step": 969 }, { "epoch": 0.4397098821396192, "grad_norm": 1.8697551917468913, "learning_rate": 7.035358114233908e-05, "loss": 0.923, "step": 970 }, { "epoch": 0.4401631912964642, "grad_norm": 1.328352473910411, "learning_rate": 7.042611060743428e-05, "loss": 0.9222, "step": 971 }, { "epoch": 0.44061650045330913, "grad_norm": 1.478687393293392, "learning_rate": 7.049864007252947e-05, "loss": 0.9366, "step": 972 }, { "epoch": 0.44106980961015413, "grad_norm": 1.2856007597418637, "learning_rate": 7.057116953762466e-05, "loss": 0.9094, "step": 973 }, { "epoch": 0.44152311876699907, "grad_norm": 1.7851810947990185, "learning_rate": 7.064369900271986e-05, "loss": 0.9502, "step": 974 }, { "epoch": 0.44197642792384406, "grad_norm": 1.0345446187242393, "learning_rate": 7.071622846781505e-05, "loss": 0.9339, "step": 975 }, { "epoch": 0.44242973708068906, "grad_norm": 2.0394740698837945, "learning_rate": 7.078875793291025e-05, "loss": 0.9386, "step": 976 }, { "epoch": 0.442883046237534, "grad_norm": 1.4928779330991742, "learning_rate": 7.086128739800544e-05, "loss": 0.9379, "step": 977 }, { "epoch": 0.443336355394379, "grad_norm": 1.9033789297205554, "learning_rate": 7.093381686310065e-05, "loss": 0.9333, "step": 978 }, { "epoch": 0.44378966455122393, "grad_norm": 1.4671009456033464, "learning_rate": 7.100634632819584e-05, "loss": 0.9476, "step": 979 }, { "epoch": 0.4442429737080689, "grad_norm": 2.1270428907674583, "learning_rate": 7.107887579329104e-05, "loss": 0.9523, "step": 980 }, { "epoch": 0.44469628286491386, "grad_norm": 1.8338154927689072, "learning_rate": 7.115140525838623e-05, "loss": 0.9733, "step": 981 }, { "epoch": 0.44514959202175886, "grad_norm": 1.5092680025813463, "learning_rate": 7.122393472348143e-05, "loss": 0.9506, "step": 982 }, { "epoch": 0.4456029011786038, "grad_norm": 1.5110762059552627, "learning_rate": 7.129646418857662e-05, "loss": 0.9435, "step": 983 }, { "epoch": 0.4460562103354488, "grad_norm": 1.4695380475526965, "learning_rate": 7.13689936536718e-05, "loss": 0.9302, "step": 984 }, { "epoch": 0.4465095194922937, "grad_norm": 1.4501562669917534, "learning_rate": 7.1441523118767e-05, "loss": 0.9055, "step": 985 }, { "epoch": 0.4469628286491387, "grad_norm": 1.9374464873842485, "learning_rate": 7.15140525838622e-05, "loss": 0.9205, "step": 986 }, { "epoch": 0.44741613780598366, "grad_norm": 1.6017113416133755, "learning_rate": 7.158658204895739e-05, "loss": 0.9268, "step": 987 }, { "epoch": 0.44786944696282865, "grad_norm": 0.9925648829984232, "learning_rate": 7.165911151405259e-05, "loss": 0.926, "step": 988 }, { "epoch": 0.4483227561196736, "grad_norm": 1.580300006432411, "learning_rate": 7.173164097914778e-05, "loss": 0.9295, "step": 989 }, { "epoch": 0.4487760652765186, "grad_norm": 1.3785101751945472, "learning_rate": 7.180417044424298e-05, "loss": 0.9155, "step": 990 }, { "epoch": 0.4492293744333635, "grad_norm": 1.9267176644549786, "learning_rate": 7.187669990933817e-05, "loss": 0.9396, "step": 991 }, { "epoch": 0.4496826835902085, "grad_norm": 1.568814431355168, "learning_rate": 7.194922937443337e-05, "loss": 0.9509, "step": 992 }, { "epoch": 0.4501359927470535, "grad_norm": 1.5381497981036112, "learning_rate": 7.202175883952857e-05, "loss": 0.9323, "step": 993 }, { "epoch": 0.45058930190389845, "grad_norm": 1.6920723928858534, "learning_rate": 7.209428830462376e-05, "loss": 0.9194, "step": 994 }, { "epoch": 0.45104261106074345, "grad_norm": 1.3137412035397498, "learning_rate": 7.216681776971895e-05, "loss": 0.9234, "step": 995 }, { "epoch": 0.4514959202175884, "grad_norm": 1.9229419765415254, "learning_rate": 7.223934723481414e-05, "loss": 0.948, "step": 996 }, { "epoch": 0.4519492293744334, "grad_norm": 1.239145594527727, "learning_rate": 7.231187669990934e-05, "loss": 0.9514, "step": 997 }, { "epoch": 0.4524025385312783, "grad_norm": 1.6807576549877472, "learning_rate": 7.238440616500453e-05, "loss": 0.9326, "step": 998 }, { "epoch": 0.4528558476881233, "grad_norm": 2.103544411539409, "learning_rate": 7.245693563009974e-05, "loss": 0.9547, "step": 999 }, { "epoch": 0.45330915684496825, "grad_norm": 0.9493300849718785, "learning_rate": 7.252946509519492e-05, "loss": 0.9291, "step": 1000 }, { "epoch": 0.45376246600181325, "grad_norm": 1.0630335941273665, "learning_rate": 7.260199456029013e-05, "loss": 0.9218, "step": 1001 }, { "epoch": 0.4542157751586582, "grad_norm": 1.3308930955965812, "learning_rate": 7.267452402538532e-05, "loss": 0.9596, "step": 1002 }, { "epoch": 0.4546690843155032, "grad_norm": 2.150122918889247, "learning_rate": 7.274705349048052e-05, "loss": 0.9426, "step": 1003 }, { "epoch": 0.4551223934723481, "grad_norm": 1.6883739049117814, "learning_rate": 7.281958295557571e-05, "loss": 0.9241, "step": 1004 }, { "epoch": 0.4555757026291931, "grad_norm": 1.417905367224721, "learning_rate": 7.289211242067091e-05, "loss": 0.9238, "step": 1005 }, { "epoch": 0.45602901178603805, "grad_norm": 1.5758494544426542, "learning_rate": 7.29646418857661e-05, "loss": 0.9361, "step": 1006 }, { "epoch": 0.45648232094288305, "grad_norm": 1.2800568807162367, "learning_rate": 7.303717135086129e-05, "loss": 0.9421, "step": 1007 }, { "epoch": 0.45693563009972804, "grad_norm": 2.3279690848714707, "learning_rate": 7.310970081595649e-05, "loss": 0.9439, "step": 1008 }, { "epoch": 0.457388939256573, "grad_norm": 1.2441247004377207, "learning_rate": 7.318223028105168e-05, "loss": 0.9351, "step": 1009 }, { "epoch": 0.457842248413418, "grad_norm": 1.8489834056260686, "learning_rate": 7.325475974614688e-05, "loss": 0.9571, "step": 1010 }, { "epoch": 0.4582955575702629, "grad_norm": 1.18632322569053, "learning_rate": 7.332728921124207e-05, "loss": 0.9328, "step": 1011 }, { "epoch": 0.4587488667271079, "grad_norm": 2.130749616424102, "learning_rate": 7.339981867633727e-05, "loss": 0.9397, "step": 1012 }, { "epoch": 0.45920217588395285, "grad_norm": 1.8435400088620908, "learning_rate": 7.347234814143246e-05, "loss": 0.9469, "step": 1013 }, { "epoch": 0.45965548504079784, "grad_norm": 1.5466130380534353, "learning_rate": 7.354487760652766e-05, "loss": 0.9468, "step": 1014 }, { "epoch": 0.4601087941976428, "grad_norm": 1.7102975341037632, "learning_rate": 7.361740707162285e-05, "loss": 0.9448, "step": 1015 }, { "epoch": 0.4605621033544878, "grad_norm": 1.696463562610429, "learning_rate": 7.368993653671806e-05, "loss": 0.9411, "step": 1016 }, { "epoch": 0.4610154125113327, "grad_norm": 1.413872623013266, "learning_rate": 7.376246600181324e-05, "loss": 0.9633, "step": 1017 }, { "epoch": 0.4614687216681777, "grad_norm": 1.6939628491066452, "learning_rate": 7.383499546690843e-05, "loss": 0.9352, "step": 1018 }, { "epoch": 0.46192203082502264, "grad_norm": 1.2403461460914742, "learning_rate": 7.390752493200364e-05, "loss": 0.9384, "step": 1019 }, { "epoch": 0.46237533998186764, "grad_norm": 1.8243041776791569, "learning_rate": 7.398005439709882e-05, "loss": 0.934, "step": 1020 }, { "epoch": 0.4628286491387126, "grad_norm": 1.5417445980794333, "learning_rate": 7.405258386219401e-05, "loss": 0.942, "step": 1021 }, { "epoch": 0.46328195829555757, "grad_norm": 1.4465034253728488, "learning_rate": 7.412511332728922e-05, "loss": 0.9036, "step": 1022 }, { "epoch": 0.4637352674524025, "grad_norm": 1.0618175991445225, "learning_rate": 7.41976427923844e-05, "loss": 0.946, "step": 1023 }, { "epoch": 0.4641885766092475, "grad_norm": 1.8891330937985298, "learning_rate": 7.427017225747961e-05, "loss": 0.938, "step": 1024 }, { "epoch": 0.4646418857660925, "grad_norm": 1.821385470525838, "learning_rate": 7.434270172257481e-05, "loss": 0.9531, "step": 1025 }, { "epoch": 0.46509519492293744, "grad_norm": 1.2519590241371303, "learning_rate": 7.441523118767e-05, "loss": 0.9259, "step": 1026 }, { "epoch": 0.46554850407978243, "grad_norm": 1.4444719397577677, "learning_rate": 7.44877606527652e-05, "loss": 0.9378, "step": 1027 }, { "epoch": 0.46600181323662737, "grad_norm": 1.326672762837731, "learning_rate": 7.456029011786039e-05, "loss": 0.9247, "step": 1028 }, { "epoch": 0.46645512239347237, "grad_norm": 0.8931438874695452, "learning_rate": 7.463281958295558e-05, "loss": 0.9371, "step": 1029 }, { "epoch": 0.4669084315503173, "grad_norm": 1.722591982160326, "learning_rate": 7.470534904805077e-05, "loss": 0.9191, "step": 1030 }, { "epoch": 0.4673617407071623, "grad_norm": 1.3548159637884043, "learning_rate": 7.477787851314597e-05, "loss": 0.9276, "step": 1031 }, { "epoch": 0.46781504986400724, "grad_norm": 2.0565819452148775, "learning_rate": 7.485040797824116e-05, "loss": 0.9261, "step": 1032 }, { "epoch": 0.46826835902085223, "grad_norm": 1.6696871761574352, "learning_rate": 7.492293744333636e-05, "loss": 0.9372, "step": 1033 }, { "epoch": 0.46872166817769717, "grad_norm": 0.82577844790418, "learning_rate": 7.499546690843155e-05, "loss": 0.9207, "step": 1034 }, { "epoch": 0.46917497733454216, "grad_norm": 1.9443843401600236, "learning_rate": 7.506799637352675e-05, "loss": 0.9354, "step": 1035 }, { "epoch": 0.4696282864913871, "grad_norm": 2.0138320341947327, "learning_rate": 7.514052583862194e-05, "loss": 0.9227, "step": 1036 }, { "epoch": 0.4700815956482321, "grad_norm": 1.1650082821792327, "learning_rate": 7.521305530371714e-05, "loss": 0.9328, "step": 1037 }, { "epoch": 0.47053490480507704, "grad_norm": 1.2838474467658123, "learning_rate": 7.528558476881233e-05, "loss": 0.9362, "step": 1038 }, { "epoch": 0.47098821396192203, "grad_norm": 1.0453670570329108, "learning_rate": 7.535811423390754e-05, "loss": 0.9229, "step": 1039 }, { "epoch": 0.471441523118767, "grad_norm": 2.0724878446960027, "learning_rate": 7.543064369900273e-05, "loss": 0.962, "step": 1040 }, { "epoch": 0.47189483227561196, "grad_norm": 1.3337189561193297, "learning_rate": 7.550317316409791e-05, "loss": 0.9352, "step": 1041 }, { "epoch": 0.47234814143245696, "grad_norm": 2.693288053840458, "learning_rate": 7.557570262919312e-05, "loss": 0.9483, "step": 1042 }, { "epoch": 0.4728014505893019, "grad_norm": 1.3480366895442522, "learning_rate": 7.56482320942883e-05, "loss": 0.9276, "step": 1043 }, { "epoch": 0.4732547597461469, "grad_norm": 3.269449355440945, "learning_rate": 7.572076155938351e-05, "loss": 0.9343, "step": 1044 }, { "epoch": 0.47370806890299183, "grad_norm": 2.5634358365156875, "learning_rate": 7.57932910244787e-05, "loss": 0.9544, "step": 1045 }, { "epoch": 0.4741613780598368, "grad_norm": 2.2653530336177266, "learning_rate": 7.58658204895739e-05, "loss": 0.9343, "step": 1046 }, { "epoch": 0.47461468721668176, "grad_norm": 2.0650438598951073, "learning_rate": 7.593834995466909e-05, "loss": 0.9451, "step": 1047 }, { "epoch": 0.47506799637352676, "grad_norm": 1.946766269386176, "learning_rate": 7.601087941976429e-05, "loss": 0.9368, "step": 1048 }, { "epoch": 0.4755213055303717, "grad_norm": 1.9837057639824804, "learning_rate": 7.608340888485948e-05, "loss": 0.9326, "step": 1049 }, { "epoch": 0.4759746146872167, "grad_norm": 1.7696256080976231, "learning_rate": 7.615593834995468e-05, "loss": 0.9364, "step": 1050 }, { "epoch": 0.47642792384406163, "grad_norm": 1.6890934312484855, "learning_rate": 7.622846781504987e-05, "loss": 0.9371, "step": 1051 }, { "epoch": 0.4768812330009066, "grad_norm": 2.7295061742800923, "learning_rate": 7.630099728014506e-05, "loss": 0.9653, "step": 1052 }, { "epoch": 0.47733454215775156, "grad_norm": 1.700768595181305, "learning_rate": 7.637352674524026e-05, "loss": 0.9319, "step": 1053 }, { "epoch": 0.47778785131459656, "grad_norm": 2.660900188884576, "learning_rate": 7.644605621033545e-05, "loss": 0.9156, "step": 1054 }, { "epoch": 0.4782411604714415, "grad_norm": 1.8192445686234342, "learning_rate": 7.651858567543064e-05, "loss": 0.9432, "step": 1055 }, { "epoch": 0.4786944696282865, "grad_norm": 2.61197597226296, "learning_rate": 7.659111514052584e-05, "loss": 0.9189, "step": 1056 }, { "epoch": 0.4791477787851315, "grad_norm": 1.723966847834538, "learning_rate": 7.666364460562105e-05, "loss": 0.9382, "step": 1057 }, { "epoch": 0.4796010879419764, "grad_norm": 1.7936903514703189, "learning_rate": 7.673617407071623e-05, "loss": 0.9424, "step": 1058 }, { "epoch": 0.4800543970988214, "grad_norm": 1.9636976024452146, "learning_rate": 7.680870353581144e-05, "loss": 0.9529, "step": 1059 }, { "epoch": 0.48050770625566636, "grad_norm": 1.3505731920784558, "learning_rate": 7.688123300090663e-05, "loss": 0.9118, "step": 1060 }, { "epoch": 0.48096101541251135, "grad_norm": 1.2878004364864275, "learning_rate": 7.695376246600183e-05, "loss": 0.9216, "step": 1061 }, { "epoch": 0.4814143245693563, "grad_norm": 2.6822139725243637, "learning_rate": 7.702629193109702e-05, "loss": 0.9486, "step": 1062 }, { "epoch": 0.4818676337262013, "grad_norm": 1.4387186659833817, "learning_rate": 7.70988213961922e-05, "loss": 0.9162, "step": 1063 }, { "epoch": 0.4823209428830462, "grad_norm": 3.3730033560990575, "learning_rate": 7.717135086128741e-05, "loss": 0.96, "step": 1064 }, { "epoch": 0.4827742520398912, "grad_norm": 2.436141421851693, "learning_rate": 7.72438803263826e-05, "loss": 0.9433, "step": 1065 }, { "epoch": 0.48322756119673616, "grad_norm": 3.2372033928135977, "learning_rate": 7.731640979147779e-05, "loss": 0.9245, "step": 1066 }, { "epoch": 0.48368087035358115, "grad_norm": 2.260827881164444, "learning_rate": 7.738893925657299e-05, "loss": 0.9502, "step": 1067 }, { "epoch": 0.4841341795104261, "grad_norm": 3.584449133109595, "learning_rate": 7.746146872166818e-05, "loss": 0.957, "step": 1068 }, { "epoch": 0.4845874886672711, "grad_norm": 3.089701408521804, "learning_rate": 7.753399818676338e-05, "loss": 0.9348, "step": 1069 }, { "epoch": 0.485040797824116, "grad_norm": 2.298167656109134, "learning_rate": 7.760652765185857e-05, "loss": 0.9082, "step": 1070 }, { "epoch": 0.485494106980961, "grad_norm": 2.1529663896142273, "learning_rate": 7.767905711695377e-05, "loss": 0.9233, "step": 1071 }, { "epoch": 0.485947416137806, "grad_norm": 2.467847476419537, "learning_rate": 7.775158658204897e-05, "loss": 0.9277, "step": 1072 }, { "epoch": 0.48640072529465095, "grad_norm": 1.7350022379515349, "learning_rate": 7.782411604714416e-05, "loss": 0.9274, "step": 1073 }, { "epoch": 0.48685403445149594, "grad_norm": 3.203641244436875, "learning_rate": 7.789664551223935e-05, "loss": 0.9306, "step": 1074 }, { "epoch": 0.4873073436083409, "grad_norm": 2.9372658375626886, "learning_rate": 7.796917497733455e-05, "loss": 0.9453, "step": 1075 }, { "epoch": 0.4877606527651859, "grad_norm": 2.2765481770407003, "learning_rate": 7.804170444242974e-05, "loss": 0.9351, "step": 1076 }, { "epoch": 0.4882139619220308, "grad_norm": 1.8437389756468139, "learning_rate": 7.811423390752493e-05, "loss": 0.9491, "step": 1077 }, { "epoch": 0.4886672710788758, "grad_norm": 3.007804058290887, "learning_rate": 7.818676337262013e-05, "loss": 0.9533, "step": 1078 }, { "epoch": 0.48912058023572075, "grad_norm": 2.3478189255955613, "learning_rate": 7.825929283771532e-05, "loss": 0.9308, "step": 1079 }, { "epoch": 0.48957388939256574, "grad_norm": 2.6457468583946366, "learning_rate": 7.833182230281053e-05, "loss": 0.9334, "step": 1080 }, { "epoch": 0.4900271985494107, "grad_norm": 2.486224967214583, "learning_rate": 7.840435176790571e-05, "loss": 0.9388, "step": 1081 }, { "epoch": 0.4904805077062557, "grad_norm": 2.238906257278672, "learning_rate": 7.847688123300092e-05, "loss": 0.9277, "step": 1082 }, { "epoch": 0.4909338168631006, "grad_norm": 1.8644857548968845, "learning_rate": 7.85494106980961e-05, "loss": 0.9227, "step": 1083 }, { "epoch": 0.4913871260199456, "grad_norm": 2.7479078988924313, "learning_rate": 7.862194016319131e-05, "loss": 0.9429, "step": 1084 }, { "epoch": 0.49184043517679055, "grad_norm": 2.4983002854989325, "learning_rate": 7.86944696282865e-05, "loss": 0.9331, "step": 1085 }, { "epoch": 0.49229374433363554, "grad_norm": 2.2531732778404154, "learning_rate": 7.876699909338169e-05, "loss": 0.9152, "step": 1086 }, { "epoch": 0.4927470534904805, "grad_norm": 2.0842759437699603, "learning_rate": 7.883952855847689e-05, "loss": 0.9418, "step": 1087 }, { "epoch": 0.4932003626473255, "grad_norm": 2.303072083728707, "learning_rate": 7.891205802357208e-05, "loss": 0.9364, "step": 1088 }, { "epoch": 0.49365367180417047, "grad_norm": 1.8734469770043933, "learning_rate": 7.898458748866728e-05, "loss": 0.9462, "step": 1089 }, { "epoch": 0.4941069809610154, "grad_norm": 2.774160319433171, "learning_rate": 7.905711695376247e-05, "loss": 0.9294, "step": 1090 }, { "epoch": 0.4945602901178604, "grad_norm": 2.5669691956525025, "learning_rate": 7.912964641885767e-05, "loss": 0.919, "step": 1091 }, { "epoch": 0.49501359927470534, "grad_norm": 1.8666203696065424, "learning_rate": 7.920217588395286e-05, "loss": 0.9266, "step": 1092 }, { "epoch": 0.49546690843155033, "grad_norm": 1.7351050736888558, "learning_rate": 7.927470534904806e-05, "loss": 0.9531, "step": 1093 }, { "epoch": 0.4959202175883953, "grad_norm": 2.6803966572788314, "learning_rate": 7.934723481414325e-05, "loss": 0.9467, "step": 1094 }, { "epoch": 0.49637352674524027, "grad_norm": 2.289641312674465, "learning_rate": 7.941976427923845e-05, "loss": 0.9038, "step": 1095 }, { "epoch": 0.4968268359020852, "grad_norm": 2.425630692644342, "learning_rate": 7.949229374433364e-05, "loss": 0.932, "step": 1096 }, { "epoch": 0.4972801450589302, "grad_norm": 2.2846383408461426, "learning_rate": 7.956482320942883e-05, "loss": 0.9415, "step": 1097 }, { "epoch": 0.49773345421577514, "grad_norm": 2.0699897017654996, "learning_rate": 7.963735267452403e-05, "loss": 0.9381, "step": 1098 }, { "epoch": 0.49818676337262013, "grad_norm": 1.6040296720231002, "learning_rate": 7.970988213961922e-05, "loss": 0.9077, "step": 1099 }, { "epoch": 0.4986400725294651, "grad_norm": 2.9515464867478127, "learning_rate": 7.978241160471441e-05, "loss": 0.9248, "step": 1100 }, { "epoch": 0.49909338168631007, "grad_norm": 2.7109636171352878, "learning_rate": 7.985494106980962e-05, "loss": 0.9466, "step": 1101 }, { "epoch": 0.499546690843155, "grad_norm": 1.7343425595722934, "learning_rate": 7.99274705349048e-05, "loss": 0.9299, "step": 1102 }, { "epoch": 0.5, "grad_norm": 1.4764011414013212, "learning_rate": 8e-05, "loss": 0.9386, "step": 1103 }, { "epoch": 0.5004533091568449, "grad_norm": 2.695670112637328, "learning_rate": 7.999999799694122e-05, "loss": 0.9023, "step": 1104 }, { "epoch": 0.50090661831369, "grad_norm": 2.2925051754476518, "learning_rate": 7.99999919877651e-05, "loss": 0.9321, "step": 1105 }, { "epoch": 0.5013599274705349, "grad_norm": 2.2740432513920323, "learning_rate": 7.99999819724722e-05, "loss": 0.9286, "step": 1106 }, { "epoch": 0.5018132366273799, "grad_norm": 2.2060117856176227, "learning_rate": 7.999996795106356e-05, "loss": 0.9207, "step": 1107 }, { "epoch": 0.5022665457842248, "grad_norm": 2.15396850113845, "learning_rate": 7.999994992354056e-05, "loss": 0.9383, "step": 1108 }, { "epoch": 0.5027198549410699, "grad_norm": 1.8069551985266765, "learning_rate": 7.999992788990503e-05, "loss": 0.9442, "step": 1109 }, { "epoch": 0.5031731640979148, "grad_norm": 2.3582115363757667, "learning_rate": 7.999990185015916e-05, "loss": 0.9224, "step": 1110 }, { "epoch": 0.5036264732547597, "grad_norm": 2.0277008831098047, "learning_rate": 7.999987180430555e-05, "loss": 0.9092, "step": 1111 }, { "epoch": 0.5040797824116047, "grad_norm": 2.2899359432657618, "learning_rate": 7.999983775234725e-05, "loss": 0.9266, "step": 1112 }, { "epoch": 0.5045330915684497, "grad_norm": 1.9631974550532858, "learning_rate": 7.999979969428761e-05, "loss": 0.9345, "step": 1113 }, { "epoch": 0.5049864007252947, "grad_norm": 2.1380498594261144, "learning_rate": 7.99997576301305e-05, "loss": 0.9049, "step": 1114 }, { "epoch": 0.5054397098821396, "grad_norm": 1.9504991230032644, "learning_rate": 7.999971155988009e-05, "loss": 0.9342, "step": 1115 }, { "epoch": 0.5058930190389845, "grad_norm": 2.341121499622892, "learning_rate": 7.999966148354102e-05, "loss": 0.9186, "step": 1116 }, { "epoch": 0.5063463281958296, "grad_norm": 1.9971894307381017, "learning_rate": 7.999960740111828e-05, "loss": 0.9294, "step": 1117 }, { "epoch": 0.5067996373526745, "grad_norm": 2.0086681838912948, "learning_rate": 7.999954931261732e-05, "loss": 0.9267, "step": 1118 }, { "epoch": 0.5072529465095195, "grad_norm": 1.6778690218469297, "learning_rate": 7.999948721804394e-05, "loss": 0.9151, "step": 1119 }, { "epoch": 0.5077062556663645, "grad_norm": 2.530369648740528, "learning_rate": 7.999942111740435e-05, "loss": 0.9356, "step": 1120 }, { "epoch": 0.5081595648232095, "grad_norm": 2.312473866902849, "learning_rate": 7.999935101070518e-05, "loss": 0.941, "step": 1121 }, { "epoch": 0.5086128739800544, "grad_norm": 1.6581041886500867, "learning_rate": 7.999927689795345e-05, "loss": 0.952, "step": 1122 }, { "epoch": 0.5090661831368993, "grad_norm": 1.3904681149437466, "learning_rate": 7.999919877915658e-05, "loss": 0.9541, "step": 1123 }, { "epoch": 0.5095194922937444, "grad_norm": 2.497886380008753, "learning_rate": 7.999911665432241e-05, "loss": 0.9381, "step": 1124 }, { "epoch": 0.5099728014505893, "grad_norm": 1.8857460370201293, "learning_rate": 7.999903052345913e-05, "loss": 0.9345, "step": 1125 }, { "epoch": 0.5104261106074343, "grad_norm": 2.1569080203720494, "learning_rate": 7.99989403865754e-05, "loss": 0.9197, "step": 1126 }, { "epoch": 0.5108794197642792, "grad_norm": 1.9615290493573445, "learning_rate": 7.999884624368025e-05, "loss": 0.9378, "step": 1127 }, { "epoch": 0.5113327289211242, "grad_norm": 1.993796741702161, "learning_rate": 7.999874809478308e-05, "loss": 0.9248, "step": 1128 }, { "epoch": 0.5117860380779692, "grad_norm": 1.510655058083619, "learning_rate": 7.999864593989372e-05, "loss": 0.9496, "step": 1129 }, { "epoch": 0.5122393472348141, "grad_norm": 2.4776453926090585, "learning_rate": 7.999853977902244e-05, "loss": 0.9512, "step": 1130 }, { "epoch": 0.5126926563916591, "grad_norm": 2.103657395433103, "learning_rate": 7.999842961217983e-05, "loss": 0.9367, "step": 1131 }, { "epoch": 0.5131459655485041, "grad_norm": 1.9812817697375218, "learning_rate": 7.999831543937695e-05, "loss": 0.9307, "step": 1132 }, { "epoch": 0.513599274705349, "grad_norm": 1.697164342042479, "learning_rate": 7.999819726062522e-05, "loss": 0.9359, "step": 1133 }, { "epoch": 0.514052583862194, "grad_norm": 1.9589356983806658, "learning_rate": 7.999807507593648e-05, "loss": 0.946, "step": 1134 }, { "epoch": 0.514505893019039, "grad_norm": 1.4351637273159832, "learning_rate": 7.999794888532299e-05, "loss": 0.9311, "step": 1135 }, { "epoch": 0.514959202175884, "grad_norm": 2.502173387766251, "learning_rate": 7.999781868879735e-05, "loss": 0.9466, "step": 1136 }, { "epoch": 0.5154125113327289, "grad_norm": 2.0352046025474713, "learning_rate": 7.999768448637261e-05, "loss": 0.9388, "step": 1137 }, { "epoch": 0.5158658204895739, "grad_norm": 1.881696184478307, "learning_rate": 7.999754627806223e-05, "loss": 0.9446, "step": 1138 }, { "epoch": 0.5163191296464189, "grad_norm": 1.8376949912854326, "learning_rate": 7.999740406388004e-05, "loss": 0.9462, "step": 1139 }, { "epoch": 0.5167724388032638, "grad_norm": 1.8035638595031986, "learning_rate": 7.999725784384029e-05, "loss": 0.931, "step": 1140 }, { "epoch": 0.5172257479601088, "grad_norm": 1.3988736284030177, "learning_rate": 7.99971076179576e-05, "loss": 0.9448, "step": 1141 }, { "epoch": 0.5176790571169537, "grad_norm": 1.368310315317596, "learning_rate": 7.999695338624704e-05, "loss": 0.9166, "step": 1142 }, { "epoch": 0.5181323662737988, "grad_norm": 1.1847295092246892, "learning_rate": 7.999679514872404e-05, "loss": 0.9199, "step": 1143 }, { "epoch": 0.5185856754306437, "grad_norm": 1.6034456483432264, "learning_rate": 7.999663290540447e-05, "loss": 0.9346, "step": 1144 }, { "epoch": 0.5190389845874886, "grad_norm": 1.1223924091267217, "learning_rate": 7.999646665630457e-05, "loss": 0.9315, "step": 1145 }, { "epoch": 0.5194922937443336, "grad_norm": 2.2708987539565246, "learning_rate": 7.999629640144097e-05, "loss": 0.9488, "step": 1146 }, { "epoch": 0.5199456029011786, "grad_norm": 1.7764572636641531, "learning_rate": 7.999612214083075e-05, "loss": 0.9195, "step": 1147 }, { "epoch": 0.5203989120580236, "grad_norm": 1.550365515234303, "learning_rate": 7.999594387449135e-05, "loss": 0.9383, "step": 1148 }, { "epoch": 0.5208522212148685, "grad_norm": 1.7845138918426924, "learning_rate": 7.999576160244063e-05, "loss": 0.9345, "step": 1149 }, { "epoch": 0.5213055303717135, "grad_norm": 0.9496993566026093, "learning_rate": 7.999557532469683e-05, "loss": 0.9338, "step": 1150 }, { "epoch": 0.5217588395285585, "grad_norm": 1.4664221535426507, "learning_rate": 7.999538504127863e-05, "loss": 0.9516, "step": 1151 }, { "epoch": 0.5222121486854034, "grad_norm": 1.0281250562577597, "learning_rate": 7.999519075220506e-05, "loss": 0.9194, "step": 1152 }, { "epoch": 0.5226654578422484, "grad_norm": 1.5976559252038354, "learning_rate": 7.999499245749559e-05, "loss": 0.9383, "step": 1153 }, { "epoch": 0.5231187669990934, "grad_norm": 1.654659855912099, "learning_rate": 7.99947901571701e-05, "loss": 0.9203, "step": 1154 }, { "epoch": 0.5235720761559384, "grad_norm": 0.9466408928244222, "learning_rate": 7.999458385124881e-05, "loss": 0.9326, "step": 1155 }, { "epoch": 0.5240253853127833, "grad_norm": 1.8925501229806927, "learning_rate": 7.999437353975243e-05, "loss": 0.9143, "step": 1156 }, { "epoch": 0.5244786944696282, "grad_norm": 1.4996031490785258, "learning_rate": 7.999415922270198e-05, "loss": 0.9327, "step": 1157 }, { "epoch": 0.5249320036264733, "grad_norm": 1.4119384062680873, "learning_rate": 7.999394090011896e-05, "loss": 0.9482, "step": 1158 }, { "epoch": 0.5253853127833182, "grad_norm": 1.1803796683777192, "learning_rate": 7.99937185720252e-05, "loss": 0.9299, "step": 1159 }, { "epoch": 0.5258386219401632, "grad_norm": 1.8550921726156167, "learning_rate": 7.999349223844299e-05, "loss": 0.9373, "step": 1160 }, { "epoch": 0.5262919310970081, "grad_norm": 1.1433555743769983, "learning_rate": 7.9993261899395e-05, "loss": 0.9201, "step": 1161 }, { "epoch": 0.5267452402538532, "grad_norm": 1.5592202948602767, "learning_rate": 7.999302755490429e-05, "loss": 0.932, "step": 1162 }, { "epoch": 0.5271985494106981, "grad_norm": 1.2656121878911029, "learning_rate": 7.999278920499434e-05, "loss": 0.9446, "step": 1163 }, { "epoch": 0.527651858567543, "grad_norm": 1.2928889801566954, "learning_rate": 7.9992546849689e-05, "loss": 0.9319, "step": 1164 }, { "epoch": 0.528105167724388, "grad_norm": 1.3797198159110364, "learning_rate": 7.999230048901257e-05, "loss": 0.9183, "step": 1165 }, { "epoch": 0.528558476881233, "grad_norm": 1.5960311777159253, "learning_rate": 7.999205012298972e-05, "loss": 0.9271, "step": 1166 }, { "epoch": 0.529011786038078, "grad_norm": 0.9653553560637108, "learning_rate": 7.99917957516455e-05, "loss": 0.9255, "step": 1167 }, { "epoch": 0.5294650951949229, "grad_norm": 1.7923802030136942, "learning_rate": 7.999153737500539e-05, "loss": 0.9154, "step": 1168 }, { "epoch": 0.529918404351768, "grad_norm": 1.3826587408325095, "learning_rate": 7.999127499309528e-05, "loss": 0.9532, "step": 1169 }, { "epoch": 0.5303717135086129, "grad_norm": 1.4846656499424202, "learning_rate": 7.999100860594148e-05, "loss": 0.9265, "step": 1170 }, { "epoch": 0.5308250226654578, "grad_norm": 1.4535982006707384, "learning_rate": 7.999073821357062e-05, "loss": 0.9457, "step": 1171 }, { "epoch": 0.5312783318223028, "grad_norm": 1.5863041789555974, "learning_rate": 7.99904638160098e-05, "loss": 0.9429, "step": 1172 }, { "epoch": 0.5317316409791478, "grad_norm": 1.2546242586166059, "learning_rate": 7.999018541328647e-05, "loss": 0.9277, "step": 1173 }, { "epoch": 0.5321849501359928, "grad_norm": 1.4142749526831937, "learning_rate": 7.998990300542858e-05, "loss": 0.9371, "step": 1174 }, { "epoch": 0.5326382592928377, "grad_norm": 1.3169646383955897, "learning_rate": 7.998961659246435e-05, "loss": 0.9317, "step": 1175 }, { "epoch": 0.5330915684496826, "grad_norm": 1.0731343832370837, "learning_rate": 7.998932617442251e-05, "loss": 0.9134, "step": 1176 }, { "epoch": 0.5335448776065277, "grad_norm": 1.4176139039321232, "learning_rate": 7.998903175133212e-05, "loss": 0.9276, "step": 1177 }, { "epoch": 0.5339981867633726, "grad_norm": 1.4422524326429709, "learning_rate": 7.998873332322267e-05, "loss": 0.9258, "step": 1178 }, { "epoch": 0.5344514959202176, "grad_norm": 1.5488328519118832, "learning_rate": 7.998843089012406e-05, "loss": 0.9338, "step": 1179 }, { "epoch": 0.5349048050770625, "grad_norm": 1.1924329779085927, "learning_rate": 7.998812445206657e-05, "loss": 0.929, "step": 1180 }, { "epoch": 0.5353581142339076, "grad_norm": 1.5778714592525653, "learning_rate": 7.998781400908089e-05, "loss": 0.9354, "step": 1181 }, { "epoch": 0.5358114233907525, "grad_norm": 1.4769312241470074, "learning_rate": 7.998749956119812e-05, "loss": 0.9259, "step": 1182 }, { "epoch": 0.5362647325475974, "grad_norm": 1.0079471194190697, "learning_rate": 7.998718110844973e-05, "loss": 0.9068, "step": 1183 }, { "epoch": 0.5367180417044425, "grad_norm": 1.9703380999389104, "learning_rate": 7.998685865086766e-05, "loss": 0.9184, "step": 1184 }, { "epoch": 0.5371713508612874, "grad_norm": 1.0223939694929478, "learning_rate": 7.998653218848416e-05, "loss": 0.9493, "step": 1185 }, { "epoch": 0.5376246600181324, "grad_norm": 1.2875613889844806, "learning_rate": 7.998620172133194e-05, "loss": 0.9417, "step": 1186 }, { "epoch": 0.5380779691749773, "grad_norm": 1.5459725293825066, "learning_rate": 7.998586724944411e-05, "loss": 0.9454, "step": 1187 }, { "epoch": 0.5385312783318223, "grad_norm": 1.054289505560474, "learning_rate": 7.998552877285417e-05, "loss": 0.9192, "step": 1188 }, { "epoch": 0.5389845874886673, "grad_norm": 1.817488217352224, "learning_rate": 7.998518629159599e-05, "loss": 0.9387, "step": 1189 }, { "epoch": 0.5394378966455122, "grad_norm": 1.162264441379759, "learning_rate": 7.998483980570389e-05, "loss": 0.9366, "step": 1190 }, { "epoch": 0.5398912058023572, "grad_norm": 1.9368595731406375, "learning_rate": 7.998448931521257e-05, "loss": 0.9244, "step": 1191 }, { "epoch": 0.5403445149592022, "grad_norm": 1.1519053366737986, "learning_rate": 7.998413482015714e-05, "loss": 0.9509, "step": 1192 }, { "epoch": 0.5407978241160472, "grad_norm": 2.133965877373918, "learning_rate": 7.998377632057308e-05, "loss": 0.9249, "step": 1193 }, { "epoch": 0.5412511332728921, "grad_norm": 1.7186559651137223, "learning_rate": 7.998341381649634e-05, "loss": 0.926, "step": 1194 }, { "epoch": 0.541704442429737, "grad_norm": 1.3636483375018569, "learning_rate": 7.998304730796318e-05, "loss": 0.9326, "step": 1195 }, { "epoch": 0.5421577515865821, "grad_norm": 1.4906512702901547, "learning_rate": 7.998267679501031e-05, "loss": 0.9365, "step": 1196 }, { "epoch": 0.542611060743427, "grad_norm": 1.2461624683024761, "learning_rate": 7.998230227767486e-05, "loss": 0.929, "step": 1197 }, { "epoch": 0.543064369900272, "grad_norm": 1.4783227903279204, "learning_rate": 7.998192375599435e-05, "loss": 0.9255, "step": 1198 }, { "epoch": 0.543517679057117, "grad_norm": 1.2780109133170123, "learning_rate": 7.998154123000666e-05, "loss": 0.9361, "step": 1199 }, { "epoch": 0.543970988213962, "grad_norm": 1.452412540590342, "learning_rate": 7.998115469975011e-05, "loss": 0.9511, "step": 1200 }, { "epoch": 0.5444242973708069, "grad_norm": 1.1088623461061695, "learning_rate": 7.998076416526342e-05, "loss": 0.963, "step": 1201 }, { "epoch": 0.5448776065276518, "grad_norm": 1.6431126138045689, "learning_rate": 7.998036962658569e-05, "loss": 0.9382, "step": 1202 }, { "epoch": 0.5453309156844969, "grad_norm": 0.9347390343264433, "learning_rate": 7.997997108375644e-05, "loss": 0.9279, "step": 1203 }, { "epoch": 0.5457842248413418, "grad_norm": 1.7019736890586254, "learning_rate": 7.99795685368156e-05, "loss": 0.9393, "step": 1204 }, { "epoch": 0.5462375339981868, "grad_norm": 1.3404063688894605, "learning_rate": 7.997916198580346e-05, "loss": 0.901, "step": 1205 }, { "epoch": 0.5466908431550317, "grad_norm": 1.360721351867984, "learning_rate": 7.997875143076075e-05, "loss": 0.9416, "step": 1206 }, { "epoch": 0.5471441523118767, "grad_norm": 1.151947628961951, "learning_rate": 7.99783368717286e-05, "loss": 0.949, "step": 1207 }, { "epoch": 0.5475974614687217, "grad_norm": 1.5328976542994077, "learning_rate": 7.997791830874851e-05, "loss": 0.9304, "step": 1208 }, { "epoch": 0.5480507706255666, "grad_norm": 2.661320412141175, "learning_rate": 7.99774957418624e-05, "loss": 0.981, "step": 1209 }, { "epoch": 0.5485040797824116, "grad_norm": 2.093724040810777, "learning_rate": 7.997706917111263e-05, "loss": 0.9455, "step": 1210 }, { "epoch": 0.5489573889392566, "grad_norm": 3.5408644929709077, "learning_rate": 7.997663859654188e-05, "loss": 0.9449, "step": 1211 }, { "epoch": 0.5494106980961015, "grad_norm": 2.4651567078238648, "learning_rate": 7.997620401819327e-05, "loss": 0.9304, "step": 1212 }, { "epoch": 0.5498640072529465, "grad_norm": 458.4225741727906, "learning_rate": 7.997576543611035e-05, "loss": 5.2168, "step": 1213 }, { "epoch": 0.5503173164097914, "grad_norm": 313.7124630810254, "learning_rate": 7.997532285033703e-05, "loss": 6.9433, "step": 1214 }, { "epoch": 0.5507706255666365, "grad_norm": 6.360882298827062, "learning_rate": 7.997487626091764e-05, "loss": 1.0227, "step": 1215 }, { "epoch": 0.5512239347234814, "grad_norm": 3.158002357006737, "learning_rate": 7.997442566789692e-05, "loss": 0.9592, "step": 1216 }, { "epoch": 0.5516772438803264, "grad_norm": 3.692694630644349, "learning_rate": 7.997397107131998e-05, "loss": 0.9621, "step": 1217 }, { "epoch": 0.5521305530371714, "grad_norm": 4.965938920823364, "learning_rate": 7.997351247123235e-05, "loss": 0.9933, "step": 1218 }, { "epoch": 0.5525838621940163, "grad_norm": 8.78811334609886, "learning_rate": 7.997304986767997e-05, "loss": 1.2122, "step": 1219 }, { "epoch": 0.5530371713508613, "grad_norm": 92.38026935671674, "learning_rate": 7.997258326070917e-05, "loss": 2.2687, "step": 1220 }, { "epoch": 0.5534904805077062, "grad_norm": 81.58266715608298, "learning_rate": 7.997211265036668e-05, "loss": 1.5842, "step": 1221 }, { "epoch": 0.5539437896645513, "grad_norm": 11.537055091633448, "learning_rate": 7.997163803669964e-05, "loss": 1.3824, "step": 1222 }, { "epoch": 0.5543970988213962, "grad_norm": 7.456925964856799, "learning_rate": 7.997115941975556e-05, "loss": 1.154, "step": 1223 }, { "epoch": 0.5548504079782411, "grad_norm": 6.303112625292072, "learning_rate": 7.99706767995824e-05, "loss": 1.1473, "step": 1224 }, { "epoch": 0.5553037171350861, "grad_norm": 3.3845628152008422, "learning_rate": 7.997019017622848e-05, "loss": 1.0683, "step": 1225 }, { "epoch": 0.5557570262919311, "grad_norm": 1.7646647907574375, "learning_rate": 7.996969954974255e-05, "loss": 1.0392, "step": 1226 }, { "epoch": 0.5562103354487761, "grad_norm": 4.115075438053526, "learning_rate": 7.996920492017373e-05, "loss": 1.0611, "step": 1227 }, { "epoch": 0.556663644605621, "grad_norm": 3.3623463084528726, "learning_rate": 7.996870628757159e-05, "loss": 1.1618, "step": 1228 }, { "epoch": 0.557116953762466, "grad_norm": 48.15532672489662, "learning_rate": 7.996820365198603e-05, "loss": 2.1243, "step": 1229 }, { "epoch": 0.557570262919311, "grad_norm": 21.134767678430123, "learning_rate": 7.996769701346741e-05, "loss": 1.5914, "step": 1230 }, { "epoch": 0.5580235720761559, "grad_norm": 75.78279999219518, "learning_rate": 7.996718637206649e-05, "loss": 5.1073, "step": 1231 }, { "epoch": 0.5584768812330009, "grad_norm": 126.13380963663259, "learning_rate": 7.996667172783438e-05, "loss": 6.331, "step": 1232 }, { "epoch": 0.5589301903898459, "grad_norm": 51.96350051580755, "learning_rate": 7.996615308082265e-05, "loss": 1.5288, "step": 1233 }, { "epoch": 0.5593834995466909, "grad_norm": 28.80271282637818, "learning_rate": 7.996563043108322e-05, "loss": 2.1403, "step": 1234 }, { "epoch": 0.5598368087035358, "grad_norm": 97.37830535250542, "learning_rate": 7.996510377866844e-05, "loss": 2.3812, "step": 1235 }, { "epoch": 0.5602901178603807, "grad_norm": 49.158381903799615, "learning_rate": 7.996457312363107e-05, "loss": 3.1171, "step": 1236 }, { "epoch": 0.5607434270172258, "grad_norm": 12.38216600435011, "learning_rate": 7.996403846602424e-05, "loss": 1.8746, "step": 1237 }, { "epoch": 0.5611967361740707, "grad_norm": 4.324586692507766, "learning_rate": 7.99634998059015e-05, "loss": 1.4435, "step": 1238 }, { "epoch": 0.5616500453309157, "grad_norm": 3.3462980222683325, "learning_rate": 7.996295714331683e-05, "loss": 1.4027, "step": 1239 }, { "epoch": 0.5621033544877606, "grad_norm": 3.605305558795563, "learning_rate": 7.996241047832453e-05, "loss": 1.3149, "step": 1240 }, { "epoch": 0.5625566636446057, "grad_norm": 4.471247860157743, "learning_rate": 7.996185981097939e-05, "loss": 1.2441, "step": 1241 }, { "epoch": 0.5630099728014506, "grad_norm": 2.885832077838531, "learning_rate": 7.996130514133653e-05, "loss": 1.2062, "step": 1242 }, { "epoch": 0.5634632819582955, "grad_norm": 2.6359807712590837, "learning_rate": 7.996074646945152e-05, "loss": 1.1397, "step": 1243 }, { "epoch": 0.5639165911151405, "grad_norm": 2.7926486576159095, "learning_rate": 7.996018379538031e-05, "loss": 1.0891, "step": 1244 }, { "epoch": 0.5643699002719855, "grad_norm": 2.5005195936382316, "learning_rate": 7.995961711917926e-05, "loss": 1.1666, "step": 1245 }, { "epoch": 0.5648232094288305, "grad_norm": 2.584819171582866, "learning_rate": 7.99590464409051e-05, "loss": 1.1488, "step": 1246 }, { "epoch": 0.5652765185856754, "grad_norm": 61.156546306424815, "learning_rate": 7.995847176061501e-05, "loss": 1.5666, "step": 1247 }, { "epoch": 0.5657298277425205, "grad_norm": 3.021898463670056, "learning_rate": 7.995789307836653e-05, "loss": 1.1849, "step": 1248 }, { "epoch": 0.5661831368993654, "grad_norm": 1.6544180970863034, "learning_rate": 7.995731039421763e-05, "loss": 1.1253, "step": 1249 }, { "epoch": 0.5666364460562103, "grad_norm": 2.6431154418775753, "learning_rate": 7.995672370822667e-05, "loss": 1.1008, "step": 1250 }, { "epoch": 0.5670897552130553, "grad_norm": 2.200698773121035, "learning_rate": 7.995613302045239e-05, "loss": 1.1226, "step": 1251 }, { "epoch": 0.5675430643699003, "grad_norm": 26.565601334752664, "learning_rate": 7.995553833095397e-05, "loss": 1.3236, "step": 1252 }, { "epoch": 0.5679963735267453, "grad_norm": 6.4679922883095236, "learning_rate": 7.995493963979094e-05, "loss": 1.2446, "step": 1253 }, { "epoch": 0.5684496826835902, "grad_norm": 3.493636257874444, "learning_rate": 7.995433694702329e-05, "loss": 1.1342, "step": 1254 }, { "epoch": 0.5689029918404351, "grad_norm": 1.675744974231861, "learning_rate": 7.995373025271138e-05, "loss": 1.0905, "step": 1255 }, { "epoch": 0.5693563009972802, "grad_norm": 1.8046539521240326, "learning_rate": 7.995311955691595e-05, "loss": 1.0564, "step": 1256 }, { "epoch": 0.5698096101541251, "grad_norm": 1.8734850415748778, "learning_rate": 7.995250485969818e-05, "loss": 1.0821, "step": 1257 }, { "epoch": 0.5702629193109701, "grad_norm": 1.9209030292315845, "learning_rate": 7.995188616111963e-05, "loss": 1.1502, "step": 1258 }, { "epoch": 0.570716228467815, "grad_norm": 1.5317055867851805, "learning_rate": 7.995126346124226e-05, "loss": 1.0637, "step": 1259 }, { "epoch": 0.57116953762466, "grad_norm": 1.5788066546923587, "learning_rate": 7.995063676012845e-05, "loss": 1.0521, "step": 1260 }, { "epoch": 0.571622846781505, "grad_norm": 1.8816817566078492, "learning_rate": 7.995000605784095e-05, "loss": 1.0306, "step": 1261 }, { "epoch": 0.5720761559383499, "grad_norm": 2.884448257782959, "learning_rate": 7.994937135444293e-05, "loss": 1.0761, "step": 1262 }, { "epoch": 0.572529465095195, "grad_norm": 1.8975965334554525, "learning_rate": 7.994873264999798e-05, "loss": 1.0934, "step": 1263 }, { "epoch": 0.5729827742520399, "grad_norm": 2.233408106192115, "learning_rate": 7.994808994457002e-05, "loss": 1.0733, "step": 1264 }, { "epoch": 0.5734360834088849, "grad_norm": 1.5195251817120963, "learning_rate": 7.994744323822347e-05, "loss": 1.0589, "step": 1265 }, { "epoch": 0.5738893925657298, "grad_norm": 1.1418443348878775, "learning_rate": 7.994679253102305e-05, "loss": 1.0423, "step": 1266 }, { "epoch": 0.5743427017225748, "grad_norm": 1.6079964726642064, "learning_rate": 7.994613782303397e-05, "loss": 1.0534, "step": 1267 }, { "epoch": 0.5747960108794198, "grad_norm": 1.7167391757859043, "learning_rate": 7.994547911432179e-05, "loss": 1.0216, "step": 1268 }, { "epoch": 0.5752493200362647, "grad_norm": 1.953630930445719, "learning_rate": 7.994481640495248e-05, "loss": 1.0546, "step": 1269 }, { "epoch": 0.5757026291931097, "grad_norm": 1.634860135400939, "learning_rate": 7.994414969499241e-05, "loss": 1.0288, "step": 1270 }, { "epoch": 0.5761559383499547, "grad_norm": 1.4514753665306004, "learning_rate": 7.994347898450835e-05, "loss": 1.0309, "step": 1271 }, { "epoch": 0.5766092475067996, "grad_norm": 2.0419521742444844, "learning_rate": 7.994280427356748e-05, "loss": 1.001, "step": 1272 }, { "epoch": 0.5770625566636446, "grad_norm": 1.8733591217118115, "learning_rate": 7.994212556223737e-05, "loss": 1.0026, "step": 1273 }, { "epoch": 0.5775158658204895, "grad_norm": 1.6836158781503174, "learning_rate": 7.994144285058601e-05, "loss": 1.0068, "step": 1274 }, { "epoch": 0.5779691749773346, "grad_norm": 1.1864778050994311, "learning_rate": 7.994075613868175e-05, "loss": 1.0058, "step": 1275 }, { "epoch": 0.5784224841341795, "grad_norm": 1.8067729507238013, "learning_rate": 7.994006542659338e-05, "loss": 0.9946, "step": 1276 }, { "epoch": 0.5788757932910245, "grad_norm": 1.5073763024346656, "learning_rate": 7.993937071439009e-05, "loss": 0.9928, "step": 1277 }, { "epoch": 0.5793291024478694, "grad_norm": 1.927566117439918, "learning_rate": 7.993867200214143e-05, "loss": 0.9681, "step": 1278 }, { "epoch": 0.5797824116047144, "grad_norm": 1.8000031188328407, "learning_rate": 7.993796928991739e-05, "loss": 0.9995, "step": 1279 }, { "epoch": 0.5802357207615594, "grad_norm": 1.907114469392778, "learning_rate": 7.993726257778835e-05, "loss": 0.9595, "step": 1280 }, { "epoch": 0.5806890299184043, "grad_norm": 1.808329208408959, "learning_rate": 7.99365518658251e-05, "loss": 0.9383, "step": 1281 }, { "epoch": 0.5811423390752494, "grad_norm": 1.6880834866555432, "learning_rate": 7.99358371540988e-05, "loss": 0.9799, "step": 1282 }, { "epoch": 0.5815956482320943, "grad_norm": 1.2229675518457344, "learning_rate": 7.993511844268104e-05, "loss": 0.9725, "step": 1283 }, { "epoch": 0.5820489573889392, "grad_norm": 1.5338957692342234, "learning_rate": 7.99343957316438e-05, "loss": 0.9913, "step": 1284 }, { "epoch": 0.5825022665457842, "grad_norm": 1.1766546562858549, "learning_rate": 7.993366902105947e-05, "loss": 0.9664, "step": 1285 }, { "epoch": 0.5829555757026292, "grad_norm": 1.2346382879692552, "learning_rate": 7.993293831100082e-05, "loss": 0.9574, "step": 1286 }, { "epoch": 0.5834088848594742, "grad_norm": 1.6965676322990313, "learning_rate": 7.993220360154104e-05, "loss": 0.9602, "step": 1287 }, { "epoch": 0.5838621940163191, "grad_norm": 1.918588837761549, "learning_rate": 7.99314648927537e-05, "loss": 0.9952, "step": 1288 }, { "epoch": 0.584315503173164, "grad_norm": 1.621675240384828, "learning_rate": 7.99307221847128e-05, "loss": 0.9499, "step": 1289 }, { "epoch": 0.5847688123300091, "grad_norm": 1.3984305779589283, "learning_rate": 7.992997547749273e-05, "loss": 0.9474, "step": 1290 }, { "epoch": 0.585222121486854, "grad_norm": 2.838204096053592, "learning_rate": 7.992922477116824e-05, "loss": 0.9669, "step": 1291 }, { "epoch": 0.585675430643699, "grad_norm": 2.011885667719017, "learning_rate": 7.992847006581456e-05, "loss": 0.9472, "step": 1292 }, { "epoch": 0.5861287398005439, "grad_norm": 2.8116067743456004, "learning_rate": 7.992771136150725e-05, "loss": 0.9471, "step": 1293 }, { "epoch": 0.586582048957389, "grad_norm": 2.3974385914188003, "learning_rate": 7.99269486583223e-05, "loss": 0.9646, "step": 1294 }, { "epoch": 0.5870353581142339, "grad_norm": 2.588487426700054, "learning_rate": 7.99261819563361e-05, "loss": 0.9845, "step": 1295 }, { "epoch": 0.5874886672710788, "grad_norm": 1.9946409335867252, "learning_rate": 7.992541125562544e-05, "loss": 0.9658, "step": 1296 }, { "epoch": 0.5879419764279239, "grad_norm": 2.266049837869208, "learning_rate": 7.992463655626751e-05, "loss": 0.9722, "step": 1297 }, { "epoch": 0.5883952855847688, "grad_norm": 1.5701754684562426, "learning_rate": 7.992385785833988e-05, "loss": 0.9642, "step": 1298 }, { "epoch": 0.5888485947416138, "grad_norm": 2.202067463055082, "learning_rate": 7.992307516192055e-05, "loss": 0.969, "step": 1299 }, { "epoch": 0.5893019038984587, "grad_norm": 1.8095347433948126, "learning_rate": 7.992228846708792e-05, "loss": 0.9603, "step": 1300 }, { "epoch": 0.5897552130553038, "grad_norm": 1.6509282478582363, "learning_rate": 7.992149777392077e-05, "loss": 0.973, "step": 1301 }, { "epoch": 0.5902085222121487, "grad_norm": 2.2915044849714423, "learning_rate": 7.992070308249828e-05, "loss": 0.9433, "step": 1302 }, { "epoch": 0.5906618313689936, "grad_norm": 1.2766915249749922, "learning_rate": 7.991990439290005e-05, "loss": 0.9688, "step": 1303 }, { "epoch": 0.5911151405258386, "grad_norm": 3.2523074456173195, "learning_rate": 7.991910170520608e-05, "loss": 0.9348, "step": 1304 }, { "epoch": 0.5915684496826836, "grad_norm": 2.7062886295586894, "learning_rate": 7.991829501949676e-05, "loss": 0.9717, "step": 1305 }, { "epoch": 0.5920217588395286, "grad_norm": 2.3460593100765474, "learning_rate": 7.991748433585288e-05, "loss": 0.9519, "step": 1306 }, { "epoch": 0.5924750679963735, "grad_norm": 2.050284758739638, "learning_rate": 7.991666965435562e-05, "loss": 0.9528, "step": 1307 }, { "epoch": 0.5929283771532184, "grad_norm": 2.3173908452231053, "learning_rate": 7.991585097508658e-05, "loss": 0.968, "step": 1308 }, { "epoch": 0.5933816863100635, "grad_norm": 1.664965170663303, "learning_rate": 7.991502829812775e-05, "loss": 0.9529, "step": 1309 }, { "epoch": 0.5938349954669084, "grad_norm": 2.8134508013011033, "learning_rate": 7.991420162356154e-05, "loss": 0.9718, "step": 1310 }, { "epoch": 0.5942883046237534, "grad_norm": 2.0525387646376596, "learning_rate": 7.991337095147072e-05, "loss": 0.9534, "step": 1311 }, { "epoch": 0.5947416137805984, "grad_norm": 2.8079330954088935, "learning_rate": 7.99125362819385e-05, "loss": 0.9752, "step": 1312 }, { "epoch": 0.5951949229374434, "grad_norm": 2.557892818975597, "learning_rate": 7.991169761504847e-05, "loss": 0.9403, "step": 1313 }, { "epoch": 0.5956482320942883, "grad_norm": 2.12967401460863, "learning_rate": 7.991085495088464e-05, "loss": 0.9375, "step": 1314 }, { "epoch": 0.5961015412511332, "grad_norm": 1.9215189073485524, "learning_rate": 7.991000828953137e-05, "loss": 0.951, "step": 1315 }, { "epoch": 0.5965548504079783, "grad_norm": 2.3037538539618345, "learning_rate": 7.990915763107347e-05, "loss": 0.9353, "step": 1316 }, { "epoch": 0.5970081595648232, "grad_norm": 1.805875867735959, "learning_rate": 7.990830297559617e-05, "loss": 0.9474, "step": 1317 }, { "epoch": 0.5974614687216682, "grad_norm": 2.8327858032680733, "learning_rate": 7.990744432318502e-05, "loss": 0.9417, "step": 1318 }, { "epoch": 0.5979147778785131, "grad_norm": 2.4619755996781345, "learning_rate": 7.990658167392604e-05, "loss": 0.9295, "step": 1319 }, { "epoch": 0.5983680870353582, "grad_norm": 2.186343817657232, "learning_rate": 7.990571502790563e-05, "loss": 0.9544, "step": 1320 }, { "epoch": 0.5988213961922031, "grad_norm": 2.1313860980545902, "learning_rate": 7.990484438521057e-05, "loss": 0.9239, "step": 1321 }, { "epoch": 0.599274705349048, "grad_norm": 2.0511770893164254, "learning_rate": 7.990396974592807e-05, "loss": 0.9394, "step": 1322 }, { "epoch": 0.599728014505893, "grad_norm": 1.7728446426029978, "learning_rate": 7.990309111014572e-05, "loss": 0.9418, "step": 1323 }, { "epoch": 0.600181323662738, "grad_norm": 2.526656642597573, "learning_rate": 7.990220847795153e-05, "loss": 0.9463, "step": 1324 }, { "epoch": 0.600634632819583, "grad_norm": 2.303369493056542, "learning_rate": 7.990132184943388e-05, "loss": 0.9578, "step": 1325 }, { "epoch": 0.6010879419764279, "grad_norm": 1.9275862722963868, "learning_rate": 7.990043122468159e-05, "loss": 0.9241, "step": 1326 }, { "epoch": 0.601541251133273, "grad_norm": 1.6665595644686135, "learning_rate": 7.989953660378383e-05, "loss": 0.9378, "step": 1327 }, { "epoch": 0.6019945602901179, "grad_norm": 2.3225013070577667, "learning_rate": 7.989863798683024e-05, "loss": 0.9304, "step": 1328 }, { "epoch": 0.6024478694469628, "grad_norm": 1.915796748216751, "learning_rate": 7.989773537391077e-05, "loss": 0.9569, "step": 1329 }, { "epoch": 0.6029011786038078, "grad_norm": 2.5688479199689462, "learning_rate": 7.989682876511588e-05, "loss": 0.934, "step": 1330 }, { "epoch": 0.6033544877606528, "grad_norm": 2.329107561217384, "learning_rate": 7.989591816053631e-05, "loss": 0.9347, "step": 1331 }, { "epoch": 0.6038077969174978, "grad_norm": 1.7195539103111102, "learning_rate": 7.989500356026328e-05, "loss": 0.9449, "step": 1332 }, { "epoch": 0.6042611060743427, "grad_norm": 1.406267482256429, "learning_rate": 7.989408496438841e-05, "loss": 0.9568, "step": 1333 }, { "epoch": 0.6047144152311876, "grad_norm": 2.6848622742825228, "learning_rate": 7.989316237300368e-05, "loss": 0.9483, "step": 1334 }, { "epoch": 0.6051677243880327, "grad_norm": 2.162609365332896, "learning_rate": 7.989223578620149e-05, "loss": 0.9599, "step": 1335 }, { "epoch": 0.6056210335448776, "grad_norm": 2.0175932601545865, "learning_rate": 7.989130520407464e-05, "loss": 0.9449, "step": 1336 }, { "epoch": 0.6060743427017226, "grad_norm": 1.779589988335226, "learning_rate": 7.989037062671634e-05, "loss": 0.9265, "step": 1337 }, { "epoch": 0.6065276518585675, "grad_norm": 2.1826721080643052, "learning_rate": 7.988943205422018e-05, "loss": 0.9348, "step": 1338 }, { "epoch": 0.6069809610154125, "grad_norm": 1.717886833298261, "learning_rate": 7.988848948668018e-05, "loss": 0.9413, "step": 1339 }, { "epoch": 0.6074342701722575, "grad_norm": 2.2886083528021426, "learning_rate": 7.988754292419073e-05, "loss": 0.9215, "step": 1340 }, { "epoch": 0.6078875793291024, "grad_norm": 1.9866475715961616, "learning_rate": 7.988659236684662e-05, "loss": 0.9438, "step": 1341 }, { "epoch": 0.6083408884859474, "grad_norm": 2.0295994444821113, "learning_rate": 7.988563781474305e-05, "loss": 0.9531, "step": 1342 }, { "epoch": 0.6087941976427924, "grad_norm": 1.5789802353855236, "learning_rate": 7.988467926797566e-05, "loss": 0.9425, "step": 1343 }, { "epoch": 0.6092475067996374, "grad_norm": 2.3128570759172025, "learning_rate": 7.988371672664039e-05, "loss": 0.9562, "step": 1344 }, { "epoch": 0.6097008159564823, "grad_norm": 1.7763972510444845, "learning_rate": 7.98827501908337e-05, "loss": 0.9389, "step": 1345 }, { "epoch": 0.6101541251133273, "grad_norm": 2.359117774790098, "learning_rate": 7.988177966065235e-05, "loss": 0.9398, "step": 1346 }, { "epoch": 0.6106074342701723, "grad_norm": 1.9886390515000991, "learning_rate": 7.988080513619356e-05, "loss": 0.9358, "step": 1347 }, { "epoch": 0.6110607434270172, "grad_norm": 2.22824617226276, "learning_rate": 7.987982661755492e-05, "loss": 0.9424, "step": 1348 }, { "epoch": 0.6115140525838622, "grad_norm": 1.906587053746284, "learning_rate": 7.987884410483446e-05, "loss": 0.9463, "step": 1349 }, { "epoch": 0.6119673617407072, "grad_norm": 2.1392049623450737, "learning_rate": 7.987785759813055e-05, "loss": 0.9347, "step": 1350 }, { "epoch": 0.6124206708975521, "grad_norm": 1.8769844890974132, "learning_rate": 7.9876867097542e-05, "loss": 0.943, "step": 1351 }, { "epoch": 0.6128739800543971, "grad_norm": 2.1337407439509306, "learning_rate": 7.987587260316802e-05, "loss": 0.9289, "step": 1352 }, { "epoch": 0.613327289211242, "grad_norm": 1.7303312413707148, "learning_rate": 7.987487411510819e-05, "loss": 0.9369, "step": 1353 }, { "epoch": 0.6137805983680871, "grad_norm": 2.2776315754828045, "learning_rate": 7.987387163346255e-05, "loss": 0.9286, "step": 1354 }, { "epoch": 0.614233907524932, "grad_norm": 1.9913971121024345, "learning_rate": 7.987286515833146e-05, "loss": 0.9322, "step": 1355 }, { "epoch": 0.614687216681777, "grad_norm": 1.9592980400938038, "learning_rate": 7.987185468981574e-05, "loss": 0.9151, "step": 1356 }, { "epoch": 0.6151405258386219, "grad_norm": 1.7373336694492125, "learning_rate": 7.98708402280166e-05, "loss": 0.9459, "step": 1357 }, { "epoch": 0.6155938349954669, "grad_norm": 2.089939243807353, "learning_rate": 7.986982177303564e-05, "loss": 0.9236, "step": 1358 }, { "epoch": 0.6160471441523119, "grad_norm": 1.7025325541452467, "learning_rate": 7.986879932497485e-05, "loss": 0.9217, "step": 1359 }, { "epoch": 0.6165004533091568, "grad_norm": 2.3096298590643203, "learning_rate": 7.986777288393663e-05, "loss": 0.9287, "step": 1360 }, { "epoch": 0.6169537624660019, "grad_norm": 2.0271215119783506, "learning_rate": 7.98667424500238e-05, "loss": 0.9447, "step": 1361 }, { "epoch": 0.6174070716228468, "grad_norm": 1.8882906808108777, "learning_rate": 7.986570802333954e-05, "loss": 0.9269, "step": 1362 }, { "epoch": 0.6178603807796917, "grad_norm": 1.610086784862466, "learning_rate": 7.986466960398744e-05, "loss": 0.9241, "step": 1363 }, { "epoch": 0.6183136899365367, "grad_norm": 2.1475935050972903, "learning_rate": 7.986362719207153e-05, "loss": 0.9232, "step": 1364 }, { "epoch": 0.6187669990933817, "grad_norm": 1.588744859714186, "learning_rate": 7.98625807876962e-05, "loss": 0.9199, "step": 1365 }, { "epoch": 0.6192203082502267, "grad_norm": 2.352295992110243, "learning_rate": 7.986153039096625e-05, "loss": 0.9317, "step": 1366 }, { "epoch": 0.6196736174070716, "grad_norm": 2.0995241256689825, "learning_rate": 7.98604760019869e-05, "loss": 0.9327, "step": 1367 }, { "epoch": 0.6201269265639165, "grad_norm": 1.8197922833989448, "learning_rate": 7.985941762086371e-05, "loss": 0.9342, "step": 1368 }, { "epoch": 0.6205802357207616, "grad_norm": 2.0819771219754437, "learning_rate": 7.985835524770271e-05, "loss": 0.9953, "step": 1369 }, { "epoch": 0.6210335448776065, "grad_norm": 1.2857633386561893, "learning_rate": 7.985728888261027e-05, "loss": 0.9169, "step": 1370 }, { "epoch": 0.6214868540344515, "grad_norm": 1.2483107286995725, "learning_rate": 7.985621852569323e-05, "loss": 0.9301, "step": 1371 }, { "epoch": 0.6219401631912964, "grad_norm": 0.89864803664589, "learning_rate": 7.985514417705877e-05, "loss": 0.9325, "step": 1372 }, { "epoch": 0.6223934723481415, "grad_norm": 1.09216750285962, "learning_rate": 7.985406583681449e-05, "loss": 0.9302, "step": 1373 }, { "epoch": 0.6228467815049864, "grad_norm": 1.292905323669219, "learning_rate": 7.985298350506837e-05, "loss": 0.9301, "step": 1374 }, { "epoch": 0.6233000906618313, "grad_norm": 1.1108136847149448, "learning_rate": 7.985189718192884e-05, "loss": 0.9462, "step": 1375 }, { "epoch": 0.6237533998186764, "grad_norm": 1.1866522145441745, "learning_rate": 7.985080686750468e-05, "loss": 0.9292, "step": 1376 }, { "epoch": 0.6242067089755213, "grad_norm": 1.4535054206120084, "learning_rate": 7.98497125619051e-05, "loss": 0.9456, "step": 1377 }, { "epoch": 0.6246600181323663, "grad_norm": 1.5542611638723598, "learning_rate": 7.984861426523968e-05, "loss": 0.956, "step": 1378 }, { "epoch": 0.6251133272892112, "grad_norm": 0.6866864281773077, "learning_rate": 7.984751197761845e-05, "loss": 0.9383, "step": 1379 }, { "epoch": 0.6255666364460563, "grad_norm": 1.6741905060897702, "learning_rate": 7.984640569915176e-05, "loss": 0.9312, "step": 1380 }, { "epoch": 0.6260199456029012, "grad_norm": 1.311452940152574, "learning_rate": 7.984529542995043e-05, "loss": 0.9386, "step": 1381 }, { "epoch": 0.6264732547597461, "grad_norm": 1.1132470777439514, "learning_rate": 7.984418117012568e-05, "loss": 0.9467, "step": 1382 }, { "epoch": 0.6269265639165911, "grad_norm": 1.264760185545391, "learning_rate": 7.984306291978908e-05, "loss": 0.9292, "step": 1383 }, { "epoch": 0.6273798730734361, "grad_norm": 1.3272033012507551, "learning_rate": 7.984194067905263e-05, "loss": 0.9171, "step": 1384 }, { "epoch": 0.6278331822302811, "grad_norm": 1.3557194290938646, "learning_rate": 7.984081444802872e-05, "loss": 0.9251, "step": 1385 }, { "epoch": 0.628286491387126, "grad_norm": 1.0980682868674414, "learning_rate": 7.983968422683018e-05, "loss": 0.9239, "step": 1386 }, { "epoch": 0.6287398005439709, "grad_norm": 1.5934923601302393, "learning_rate": 7.983855001557015e-05, "loss": 0.9318, "step": 1387 }, { "epoch": 0.629193109700816, "grad_norm": 0.7844962092886303, "learning_rate": 7.983741181436225e-05, "loss": 0.9413, "step": 1388 }, { "epoch": 0.6296464188576609, "grad_norm": 1.3638088264805168, "learning_rate": 7.98362696233205e-05, "loss": 0.9567, "step": 1389 }, { "epoch": 0.6300997280145059, "grad_norm": 1.139579570245386, "learning_rate": 7.983512344255925e-05, "loss": 0.9267, "step": 1390 }, { "epoch": 0.6305530371713509, "grad_norm": 1.3610140489641078, "learning_rate": 7.983397327219333e-05, "loss": 0.9267, "step": 1391 }, { "epoch": 0.6310063463281959, "grad_norm": 0.9320054369917435, "learning_rate": 7.983281911233791e-05, "loss": 0.9409, "step": 1392 }, { "epoch": 0.6314596554850408, "grad_norm": 1.499860486745327, "learning_rate": 7.983166096310859e-05, "loss": 0.9552, "step": 1393 }, { "epoch": 0.6319129646418857, "grad_norm": 1.1558453692058146, "learning_rate": 7.983049882462135e-05, "loss": 0.9162, "step": 1394 }, { "epoch": 0.6323662737987308, "grad_norm": 1.3403899864824378, "learning_rate": 7.98293326969926e-05, "loss": 0.938, "step": 1395 }, { "epoch": 0.6328195829555757, "grad_norm": 1.1588261461086737, "learning_rate": 7.982816258033913e-05, "loss": 0.9193, "step": 1396 }, { "epoch": 0.6332728921124207, "grad_norm": 1.2331229247558908, "learning_rate": 7.982698847477814e-05, "loss": 0.9341, "step": 1397 }, { "epoch": 0.6337262012692656, "grad_norm": 1.2653788870119165, "learning_rate": 7.982581038042718e-05, "loss": 0.9427, "step": 1398 }, { "epoch": 0.6341795104261106, "grad_norm": 1.2767503401600189, "learning_rate": 7.982462829740426e-05, "loss": 0.921, "step": 1399 }, { "epoch": 0.6346328195829556, "grad_norm": 1.3302808801174517, "learning_rate": 7.982344222582779e-05, "loss": 0.9346, "step": 1400 }, { "epoch": 0.6350861287398005, "grad_norm": 0.92780265892514, "learning_rate": 7.982225216581654e-05, "loss": 0.9212, "step": 1401 }, { "epoch": 0.6355394378966455, "grad_norm": 0.8800335726620268, "learning_rate": 7.98210581174897e-05, "loss": 0.9412, "step": 1402 }, { "epoch": 0.6359927470534905, "grad_norm": 1.1153276690092782, "learning_rate": 7.981986008096686e-05, "loss": 0.9287, "step": 1403 }, { "epoch": 0.6364460562103355, "grad_norm": 1.256485378493966, "learning_rate": 7.9818658056368e-05, "loss": 0.9164, "step": 1404 }, { "epoch": 0.6368993653671804, "grad_norm": 1.2733483486197694, "learning_rate": 7.98174520438135e-05, "loss": 0.9435, "step": 1405 }, { "epoch": 0.6373526745240253, "grad_norm": 1.3844232381262884, "learning_rate": 7.981624204342417e-05, "loss": 0.9215, "step": 1406 }, { "epoch": 0.6378059836808704, "grad_norm": 1.201826875599897, "learning_rate": 7.981502805532118e-05, "loss": 0.9408, "step": 1407 }, { "epoch": 0.6382592928377153, "grad_norm": 1.0322254041761534, "learning_rate": 7.981381007962612e-05, "loss": 0.9232, "step": 1408 }, { "epoch": 0.6387126019945603, "grad_norm": 1.2185504932525735, "learning_rate": 7.981258811646095e-05, "loss": 0.9292, "step": 1409 }, { "epoch": 0.6391659111514053, "grad_norm": 0.9029014426623471, "learning_rate": 7.98113621659481e-05, "loss": 0.9132, "step": 1410 }, { "epoch": 0.6396192203082502, "grad_norm": 1.5644726001312201, "learning_rate": 7.981013222821031e-05, "loss": 0.9468, "step": 1411 }, { "epoch": 0.6400725294650952, "grad_norm": 1.3038607170684196, "learning_rate": 7.980889830337077e-05, "loss": 0.9275, "step": 1412 }, { "epoch": 0.6405258386219401, "grad_norm": 1.0410800257093935, "learning_rate": 7.980766039155309e-05, "loss": 0.9446, "step": 1413 }, { "epoch": 0.6409791477787852, "grad_norm": 2.4141088118177843, "learning_rate": 7.980641849288122e-05, "loss": 0.9371, "step": 1414 }, { "epoch": 0.6414324569356301, "grad_norm": 1.228897285490401, "learning_rate": 7.980517260747954e-05, "loss": 0.944, "step": 1415 }, { "epoch": 0.641885766092475, "grad_norm": 2.4479424692656293, "learning_rate": 7.980392273547285e-05, "loss": 0.9284, "step": 1416 }, { "epoch": 0.64233907524932, "grad_norm": 1.7091327710384099, "learning_rate": 7.980266887698632e-05, "loss": 0.9387, "step": 1417 }, { "epoch": 0.642792384406165, "grad_norm": 2.3890263096655855, "learning_rate": 7.980141103214551e-05, "loss": 0.9221, "step": 1418 }, { "epoch": 0.64324569356301, "grad_norm": 1.9193959280156798, "learning_rate": 7.980014920107642e-05, "loss": 0.9273, "step": 1419 }, { "epoch": 0.6436990027198549, "grad_norm": 2.0164249198085744, "learning_rate": 7.979888338390542e-05, "loss": 0.9391, "step": 1420 }, { "epoch": 0.6441523118766999, "grad_norm": 1.6119978119957605, "learning_rate": 7.979761358075926e-05, "loss": 0.9506, "step": 1421 }, { "epoch": 0.6446056210335449, "grad_norm": 1.6736310107523196, "learning_rate": 7.979633979176517e-05, "loss": 0.9432, "step": 1422 }, { "epoch": 0.6450589301903898, "grad_norm": 1.468712745019073, "learning_rate": 7.979506201705067e-05, "loss": 0.9467, "step": 1423 }, { "epoch": 0.6455122393472348, "grad_norm": 0.9726777079324235, "learning_rate": 7.979378025674376e-05, "loss": 0.9193, "step": 1424 }, { "epoch": 0.6459655485040798, "grad_norm": 1.6539041570232453, "learning_rate": 7.97924945109728e-05, "loss": 0.9453, "step": 1425 }, { "epoch": 0.6464188576609248, "grad_norm": 1.3154314528445687, "learning_rate": 7.979120477986658e-05, "loss": 0.9241, "step": 1426 }, { "epoch": 0.6468721668177697, "grad_norm": 1.7102290439434462, "learning_rate": 7.978991106355423e-05, "loss": 0.9502, "step": 1427 }, { "epoch": 0.6473254759746147, "grad_norm": 1.3075561370817466, "learning_rate": 7.978861336216537e-05, "loss": 0.9403, "step": 1428 }, { "epoch": 0.6477787851314597, "grad_norm": 1.9349112092499678, "learning_rate": 7.978731167582995e-05, "loss": 0.9435, "step": 1429 }, { "epoch": 0.6482320942883046, "grad_norm": 1.570407751158983, "learning_rate": 7.97860060046783e-05, "loss": 0.9217, "step": 1430 }, { "epoch": 0.6486854034451496, "grad_norm": 1.421079935982384, "learning_rate": 7.978469634884125e-05, "loss": 0.928, "step": 1431 }, { "epoch": 0.6491387126019945, "grad_norm": 1.2841587321735604, "learning_rate": 7.978338270844994e-05, "loss": 0.94, "step": 1432 }, { "epoch": 0.6495920217588396, "grad_norm": 1.3928384119436406, "learning_rate": 7.97820650836359e-05, "loss": 0.9398, "step": 1433 }, { "epoch": 0.6500453309156845, "grad_norm": 1.3651014096147718, "learning_rate": 7.978074347453115e-05, "loss": 0.9246, "step": 1434 }, { "epoch": 0.6504986400725294, "grad_norm": 1.2096046758140748, "learning_rate": 7.977941788126802e-05, "loss": 0.9657, "step": 1435 }, { "epoch": 0.6509519492293744, "grad_norm": 1.1120361246873771, "learning_rate": 7.977808830397929e-05, "loss": 0.9461, "step": 1436 }, { "epoch": 0.6514052583862194, "grad_norm": 1.2910511666765059, "learning_rate": 7.977675474279809e-05, "loss": 0.9219, "step": 1437 }, { "epoch": 0.6518585675430644, "grad_norm": 1.1215445136594928, "learning_rate": 7.977541719785801e-05, "loss": 0.9141, "step": 1438 }, { "epoch": 0.6523118766999093, "grad_norm": 1.6952095828596256, "learning_rate": 7.9774075669293e-05, "loss": 0.9366, "step": 1439 }, { "epoch": 0.6527651858567544, "grad_norm": 0.9451610027782341, "learning_rate": 7.977273015723741e-05, "loss": 0.9203, "step": 1440 }, { "epoch": 0.6532184950135993, "grad_norm": 1.3362321758857796, "learning_rate": 7.977138066182603e-05, "loss": 0.9232, "step": 1441 }, { "epoch": 0.6536718041704442, "grad_norm": 1.0719712929789755, "learning_rate": 7.977002718319396e-05, "loss": 0.9324, "step": 1442 }, { "epoch": 0.6541251133272892, "grad_norm": 1.3102656091343237, "learning_rate": 7.976866972147682e-05, "loss": 0.9207, "step": 1443 }, { "epoch": 0.6545784224841342, "grad_norm": 1.396227255395904, "learning_rate": 7.97673082768105e-05, "loss": 0.9233, "step": 1444 }, { "epoch": 0.6550317316409792, "grad_norm": 1.3004682718554335, "learning_rate": 7.97659428493314e-05, "loss": 0.9332, "step": 1445 }, { "epoch": 0.6554850407978241, "grad_norm": 1.1222152997595582, "learning_rate": 7.976457343917623e-05, "loss": 0.9408, "step": 1446 }, { "epoch": 0.655938349954669, "grad_norm": 1.010423149381966, "learning_rate": 7.976320004648218e-05, "loss": 0.9328, "step": 1447 }, { "epoch": 0.6563916591115141, "grad_norm": 1.0442575856032936, "learning_rate": 7.976182267138678e-05, "loss": 0.9139, "step": 1448 }, { "epoch": 0.656844968268359, "grad_norm": 1.5087229332014087, "learning_rate": 7.976044131402799e-05, "loss": 0.923, "step": 1449 }, { "epoch": 0.657298277425204, "grad_norm": 1.0492995955091606, "learning_rate": 7.975905597454415e-05, "loss": 0.9302, "step": 1450 }, { "epoch": 0.6577515865820489, "grad_norm": 1.6268835563491717, "learning_rate": 7.975766665307399e-05, "loss": 0.9321, "step": 1451 }, { "epoch": 0.658204895738894, "grad_norm": 1.434609139268748, "learning_rate": 7.975627334975669e-05, "loss": 0.9411, "step": 1452 }, { "epoch": 0.6586582048957389, "grad_norm": 1.152359541686999, "learning_rate": 7.975487606473175e-05, "loss": 0.9208, "step": 1453 }, { "epoch": 0.6591115140525838, "grad_norm": 1.275071016605143, "learning_rate": 7.975347479813915e-05, "loss": 0.9454, "step": 1454 }, { "epoch": 0.6595648232094289, "grad_norm": 1.3747622696785118, "learning_rate": 7.97520695501192e-05, "loss": 0.9252, "step": 1455 }, { "epoch": 0.6600181323662738, "grad_norm": 1.3638940251498712, "learning_rate": 7.975066032081266e-05, "loss": 0.9337, "step": 1456 }, { "epoch": 0.6604714415231188, "grad_norm": 1.1981565751091483, "learning_rate": 7.974924711036068e-05, "loss": 0.9386, "step": 1457 }, { "epoch": 0.6609247506799637, "grad_norm": 1.3092383105588519, "learning_rate": 7.974782991890477e-05, "loss": 0.9412, "step": 1458 }, { "epoch": 0.6613780598368088, "grad_norm": 1.2775713219438978, "learning_rate": 7.974640874658688e-05, "loss": 0.9603, "step": 1459 }, { "epoch": 0.6618313689936537, "grad_norm": 1.1866648580842087, "learning_rate": 7.974498359354933e-05, "loss": 0.9298, "step": 1460 }, { "epoch": 0.6622846781504986, "grad_norm": 1.4329945995979836, "learning_rate": 7.974355445993487e-05, "loss": 0.9003, "step": 1461 }, { "epoch": 0.6627379873073436, "grad_norm": 1.1897078145130777, "learning_rate": 7.974212134588664e-05, "loss": 0.9311, "step": 1462 }, { "epoch": 0.6631912964641886, "grad_norm": 1.382182919223205, "learning_rate": 7.974068425154814e-05, "loss": 0.9431, "step": 1463 }, { "epoch": 0.6636446056210336, "grad_norm": 1.1836673731075391, "learning_rate": 7.973924317706333e-05, "loss": 0.9184, "step": 1464 }, { "epoch": 0.6640979147778785, "grad_norm": 1.403900630926551, "learning_rate": 7.97377981225765e-05, "loss": 0.9106, "step": 1465 }, { "epoch": 0.6645512239347234, "grad_norm": 1.104040275923374, "learning_rate": 7.973634908823243e-05, "loss": 0.919, "step": 1466 }, { "epoch": 0.6650045330915685, "grad_norm": 1.4111445792483353, "learning_rate": 7.97348960741762e-05, "loss": 0.9253, "step": 1467 }, { "epoch": 0.6654578422484134, "grad_norm": 1.295177332094139, "learning_rate": 7.973343908055336e-05, "loss": 0.93, "step": 1468 }, { "epoch": 0.6659111514052584, "grad_norm": 1.1154787319680681, "learning_rate": 7.973197810750981e-05, "loss": 0.907, "step": 1469 }, { "epoch": 0.6663644605621033, "grad_norm": 1.405607701681695, "learning_rate": 7.97305131551919e-05, "loss": 0.9121, "step": 1470 }, { "epoch": 0.6668177697189483, "grad_norm": 1.1353180137855903, "learning_rate": 7.972904422374632e-05, "loss": 0.924, "step": 1471 }, { "epoch": 0.6672710788757933, "grad_norm": 1.3389026335076168, "learning_rate": 7.97275713133202e-05, "loss": 0.9131, "step": 1472 }, { "epoch": 0.6677243880326382, "grad_norm": 1.373350731878485, "learning_rate": 7.972609442406103e-05, "loss": 0.9268, "step": 1473 }, { "epoch": 0.6681776971894833, "grad_norm": 1.324180490903275, "learning_rate": 7.972461355611679e-05, "loss": 0.9102, "step": 1474 }, { "epoch": 0.6686310063463282, "grad_norm": 0.9100181369526462, "learning_rate": 7.972312870963575e-05, "loss": 0.9222, "step": 1475 }, { "epoch": 0.6690843155031732, "grad_norm": 1.1398206666275492, "learning_rate": 7.97216398847666e-05, "loss": 0.9205, "step": 1476 }, { "epoch": 0.6695376246600181, "grad_norm": 1.6321757800330525, "learning_rate": 7.97201470816585e-05, "loss": 0.9219, "step": 1477 }, { "epoch": 0.6699909338168631, "grad_norm": 1.0691110799792862, "learning_rate": 7.971865030046091e-05, "loss": 0.9153, "step": 1478 }, { "epoch": 0.6704442429737081, "grad_norm": 1.4063599964918188, "learning_rate": 7.971714954132379e-05, "loss": 0.9475, "step": 1479 }, { "epoch": 0.670897552130553, "grad_norm": 0.9548924711984405, "learning_rate": 7.97156448043974e-05, "loss": 0.8958, "step": 1480 }, { "epoch": 0.671350861287398, "grad_norm": 1.4095897790969576, "learning_rate": 7.971413608983247e-05, "loss": 0.9282, "step": 1481 }, { "epoch": 0.671804170444243, "grad_norm": 1.300863169704553, "learning_rate": 7.971262339778008e-05, "loss": 0.9176, "step": 1482 }, { "epoch": 0.672257479601088, "grad_norm": 1.4993402999099295, "learning_rate": 7.971110672839175e-05, "loss": 0.9185, "step": 1483 }, { "epoch": 0.6727107887579329, "grad_norm": 0.8135270923635025, "learning_rate": 7.970958608181937e-05, "loss": 0.9189, "step": 1484 }, { "epoch": 0.6731640979147778, "grad_norm": 0.8555422604833746, "learning_rate": 7.970806145821524e-05, "loss": 0.9237, "step": 1485 }, { "epoch": 0.6736174070716229, "grad_norm": 1.1357472013320014, "learning_rate": 7.970653285773207e-05, "loss": 0.947, "step": 1486 }, { "epoch": 0.6740707162284678, "grad_norm": 1.6291106894648493, "learning_rate": 7.970500028052292e-05, "loss": 0.9343, "step": 1487 }, { "epoch": 0.6745240253853128, "grad_norm": 0.9800402802610361, "learning_rate": 7.970346372674131e-05, "loss": 0.9177, "step": 1488 }, { "epoch": 0.6749773345421578, "grad_norm": 1.6021452061858645, "learning_rate": 7.970192319654112e-05, "loss": 0.9235, "step": 1489 }, { "epoch": 0.6754306436990027, "grad_norm": 0.8963986217221879, "learning_rate": 7.970037869007664e-05, "loss": 0.9227, "step": 1490 }, { "epoch": 0.6758839528558477, "grad_norm": 1.568003514975853, "learning_rate": 7.969883020750255e-05, "loss": 0.9117, "step": 1491 }, { "epoch": 0.6763372620126926, "grad_norm": 0.7963495238434515, "learning_rate": 7.969727774897396e-05, "loss": 0.9333, "step": 1492 }, { "epoch": 0.6767905711695377, "grad_norm": 1.436774335982093, "learning_rate": 7.96957213146463e-05, "loss": 0.9245, "step": 1493 }, { "epoch": 0.6772438803263826, "grad_norm": 1.2075870329133558, "learning_rate": 7.969416090467553e-05, "loss": 0.9282, "step": 1494 }, { "epoch": 0.6776971894832275, "grad_norm": 1.152055897003922, "learning_rate": 7.969259651921786e-05, "loss": 0.9189, "step": 1495 }, { "epoch": 0.6781504986400725, "grad_norm": 1.4679513233877814, "learning_rate": 7.969102815843001e-05, "loss": 0.9397, "step": 1496 }, { "epoch": 0.6786038077969175, "grad_norm": 1.3133252538797986, "learning_rate": 7.968945582246903e-05, "loss": 0.916, "step": 1497 }, { "epoch": 0.6790571169537625, "grad_norm": 1.4591851431123457, "learning_rate": 7.96878795114924e-05, "loss": 0.9164, "step": 1498 }, { "epoch": 0.6795104261106074, "grad_norm": 0.9340497400217468, "learning_rate": 7.9686299225658e-05, "loss": 0.9361, "step": 1499 }, { "epoch": 0.6799637352674524, "grad_norm": 1.301829269838523, "learning_rate": 7.96847149651241e-05, "loss": 0.9194, "step": 1500 }, { "epoch": 0.6804170444242974, "grad_norm": 1.0748653925931202, "learning_rate": 7.968312673004936e-05, "loss": 0.9414, "step": 1501 }, { "epoch": 0.6808703535811423, "grad_norm": 1.2738493254433245, "learning_rate": 7.968153452059285e-05, "loss": 0.9141, "step": 1502 }, { "epoch": 0.6813236627379873, "grad_norm": 1.0661727000630012, "learning_rate": 7.967993833691405e-05, "loss": 0.9135, "step": 1503 }, { "epoch": 0.6817769718948323, "grad_norm": 1.5119080456386675, "learning_rate": 7.96783381791728e-05, "loss": 0.9457, "step": 1504 }, { "epoch": 0.6822302810516773, "grad_norm": 1.279320549430296, "learning_rate": 7.967673404752937e-05, "loss": 0.9168, "step": 1505 }, { "epoch": 0.6826835902085222, "grad_norm": 1.0515337888292569, "learning_rate": 7.967512594214441e-05, "loss": 0.9215, "step": 1506 }, { "epoch": 0.6831368993653671, "grad_norm": 1.6463945267980995, "learning_rate": 7.967351386317899e-05, "loss": 0.9142, "step": 1507 }, { "epoch": 0.6835902085222122, "grad_norm": 1.1761736050717322, "learning_rate": 7.967189781079456e-05, "loss": 0.9301, "step": 1508 }, { "epoch": 0.6840435176790571, "grad_norm": 1.1027548558801048, "learning_rate": 7.967027778515297e-05, "loss": 0.9368, "step": 1509 }, { "epoch": 0.6844968268359021, "grad_norm": 1.255542151966555, "learning_rate": 7.966865378641647e-05, "loss": 0.918, "step": 1510 }, { "epoch": 0.684950135992747, "grad_norm": 0.8458071359348894, "learning_rate": 7.966702581474771e-05, "loss": 0.9086, "step": 1511 }, { "epoch": 0.6854034451495921, "grad_norm": 1.3613545985066633, "learning_rate": 7.966539387030973e-05, "loss": 0.9254, "step": 1512 }, { "epoch": 0.685856754306437, "grad_norm": 1.450391164240738, "learning_rate": 7.966375795326599e-05, "loss": 0.9178, "step": 1513 }, { "epoch": 0.6863100634632819, "grad_norm": 1.3428017085178099, "learning_rate": 7.966211806378031e-05, "loss": 0.9326, "step": 1514 }, { "epoch": 0.6867633726201269, "grad_norm": 1.1933103355423667, "learning_rate": 7.966047420201695e-05, "loss": 0.9358, "step": 1515 }, { "epoch": 0.6872166817769719, "grad_norm": 1.1402047730851161, "learning_rate": 7.965882636814053e-05, "loss": 0.9142, "step": 1516 }, { "epoch": 0.6876699909338169, "grad_norm": 1.0385513011125143, "learning_rate": 7.96571745623161e-05, "loss": 0.9285, "step": 1517 }, { "epoch": 0.6881233000906618, "grad_norm": 1.2881533460323966, "learning_rate": 7.965551878470909e-05, "loss": 0.9345, "step": 1518 }, { "epoch": 0.6885766092475069, "grad_norm": 1.2111539197118497, "learning_rate": 7.965385903548531e-05, "loss": 0.9129, "step": 1519 }, { "epoch": 0.6890299184043518, "grad_norm": 1.4794832941761022, "learning_rate": 7.965219531481103e-05, "loss": 0.9101, "step": 1520 }, { "epoch": 0.6894832275611967, "grad_norm": 1.1762039870526118, "learning_rate": 7.965052762285285e-05, "loss": 0.9268, "step": 1521 }, { "epoch": 0.6899365367180417, "grad_norm": 1.3548672335838676, "learning_rate": 7.964885595977778e-05, "loss": 0.9315, "step": 1522 }, { "epoch": 0.6903898458748867, "grad_norm": 1.695712155641778, "learning_rate": 7.964718032575328e-05, "loss": 0.9335, "step": 1523 }, { "epoch": 0.6908431550317317, "grad_norm": 0.6282614902121157, "learning_rate": 7.964550072094714e-05, "loss": 0.9274, "step": 1524 }, { "epoch": 0.6912964641885766, "grad_norm": 1.6417997166383307, "learning_rate": 7.964381714552759e-05, "loss": 0.9339, "step": 1525 }, { "epoch": 0.6917497733454215, "grad_norm": 1.5480116528684826, "learning_rate": 7.964212959966324e-05, "loss": 0.9563, "step": 1526 }, { "epoch": 0.6922030825022666, "grad_norm": 0.8009024921884106, "learning_rate": 7.96404380835231e-05, "loss": 0.9197, "step": 1527 }, { "epoch": 0.6926563916591115, "grad_norm": 1.4332514144118524, "learning_rate": 7.96387425972766e-05, "loss": 0.9177, "step": 1528 }, { "epoch": 0.6931097008159565, "grad_norm": 1.1195741622053257, "learning_rate": 7.963704314109352e-05, "loss": 0.897, "step": 1529 }, { "epoch": 0.6935630099728014, "grad_norm": 1.1922109245131962, "learning_rate": 7.963533971514407e-05, "loss": 0.9292, "step": 1530 }, { "epoch": 0.6940163191296465, "grad_norm": 1.3501485944758176, "learning_rate": 7.963363231959888e-05, "loss": 0.9163, "step": 1531 }, { "epoch": 0.6944696282864914, "grad_norm": 1.0904475231874073, "learning_rate": 7.963192095462893e-05, "loss": 0.9266, "step": 1532 }, { "epoch": 0.6949229374433363, "grad_norm": 1.275256355821671, "learning_rate": 7.963020562040561e-05, "loss": 0.9246, "step": 1533 }, { "epoch": 0.6953762466001813, "grad_norm": 1.0247707258329817, "learning_rate": 7.962848631710073e-05, "loss": 0.9209, "step": 1534 }, { "epoch": 0.6958295557570263, "grad_norm": 1.4914533477544665, "learning_rate": 7.962676304488649e-05, "loss": 0.9341, "step": 1535 }, { "epoch": 0.6962828649138713, "grad_norm": 1.388342477921883, "learning_rate": 7.962503580393546e-05, "loss": 0.9221, "step": 1536 }, { "epoch": 0.6967361740707162, "grad_norm": 1.4582813777650665, "learning_rate": 7.962330459442063e-05, "loss": 0.9259, "step": 1537 }, { "epoch": 0.6971894832275612, "grad_norm": 0.9661722660132974, "learning_rate": 7.962156941651539e-05, "loss": 0.9089, "step": 1538 }, { "epoch": 0.6976427923844062, "grad_norm": 1.7048772714952134, "learning_rate": 7.961983027039356e-05, "loss": 0.9047, "step": 1539 }, { "epoch": 0.6980961015412511, "grad_norm": 0.8187451298745775, "learning_rate": 7.961808715622926e-05, "loss": 0.8927, "step": 1540 }, { "epoch": 0.6985494106980961, "grad_norm": 1.5864101154372818, "learning_rate": 7.96163400741971e-05, "loss": 0.9125, "step": 1541 }, { "epoch": 0.6990027198549411, "grad_norm": 0.9956114535329907, "learning_rate": 7.961458902447206e-05, "loss": 0.9349, "step": 1542 }, { "epoch": 0.699456029011786, "grad_norm": 1.6800659628699306, "learning_rate": 7.96128340072295e-05, "loss": 0.9255, "step": 1543 }, { "epoch": 0.699909338168631, "grad_norm": 1.1004440480411313, "learning_rate": 7.96110750226452e-05, "loss": 0.9305, "step": 1544 }, { "epoch": 0.7003626473254759, "grad_norm": 2.0529920561143915, "learning_rate": 7.96093120708953e-05, "loss": 0.9132, "step": 1545 }, { "epoch": 0.700815956482321, "grad_norm": 1.7491498743857656, "learning_rate": 7.960754515215641e-05, "loss": 0.9264, "step": 1546 }, { "epoch": 0.7012692656391659, "grad_norm": 1.2663240740834958, "learning_rate": 7.960577426660547e-05, "loss": 0.9227, "step": 1547 }, { "epoch": 0.7017225747960109, "grad_norm": 1.6982063174876354, "learning_rate": 7.960399941441982e-05, "loss": 0.934, "step": 1548 }, { "epoch": 0.7021758839528558, "grad_norm": 1.0108445558017256, "learning_rate": 7.960222059577724e-05, "loss": 0.9416, "step": 1549 }, { "epoch": 0.7026291931097008, "grad_norm": 1.7882289612233115, "learning_rate": 7.960043781085588e-05, "loss": 0.9331, "step": 1550 }, { "epoch": 0.7030825022665458, "grad_norm": 1.4991008584259256, "learning_rate": 7.959865105983429e-05, "loss": 0.9269, "step": 1551 }, { "epoch": 0.7035358114233907, "grad_norm": 1.470941404043774, "learning_rate": 7.959686034289143e-05, "loss": 0.919, "step": 1552 }, { "epoch": 0.7039891205802358, "grad_norm": 1.5493392200395835, "learning_rate": 7.959506566020661e-05, "loss": 0.9078, "step": 1553 }, { "epoch": 0.7044424297370807, "grad_norm": 1.207564299086569, "learning_rate": 7.959326701195962e-05, "loss": 0.9274, "step": 1554 }, { "epoch": 0.7048957388939256, "grad_norm": 1.7186556059131886, "learning_rate": 7.959146439833056e-05, "loss": 0.9393, "step": 1555 }, { "epoch": 0.7053490480507706, "grad_norm": 1.1000951526346143, "learning_rate": 7.95896578195e-05, "loss": 0.9312, "step": 1556 }, { "epoch": 0.7058023572076156, "grad_norm": 1.413641672638118, "learning_rate": 7.958784727564885e-05, "loss": 0.9439, "step": 1557 }, { "epoch": 0.7062556663644606, "grad_norm": 1.262466218915977, "learning_rate": 7.958603276695844e-05, "loss": 0.9177, "step": 1558 }, { "epoch": 0.7067089755213055, "grad_norm": 1.0056998750155368, "learning_rate": 7.958421429361051e-05, "loss": 0.9314, "step": 1559 }, { "epoch": 0.7071622846781505, "grad_norm": 1.7201027897721712, "learning_rate": 7.958239185578718e-05, "loss": 0.9297, "step": 1560 }, { "epoch": 0.7076155938349955, "grad_norm": 1.3087421388934883, "learning_rate": 7.9580565453671e-05, "loss": 0.9221, "step": 1561 }, { "epoch": 0.7080689029918404, "grad_norm": 1.8017926198478544, "learning_rate": 7.957873508744483e-05, "loss": 0.9215, "step": 1562 }, { "epoch": 0.7085222121486854, "grad_norm": 1.3442120007254794, "learning_rate": 7.957690075729204e-05, "loss": 0.9065, "step": 1563 }, { "epoch": 0.7089755213055303, "grad_norm": 1.69517653924518, "learning_rate": 7.957506246339631e-05, "loss": 0.9141, "step": 1564 }, { "epoch": 0.7094288304623754, "grad_norm": 1.3037448072054871, "learning_rate": 7.957322020594178e-05, "loss": 0.9034, "step": 1565 }, { "epoch": 0.7098821396192203, "grad_norm": 1.8197518205065386, "learning_rate": 7.957137398511294e-05, "loss": 0.9293, "step": 1566 }, { "epoch": 0.7103354487760652, "grad_norm": 1.545137758794244, "learning_rate": 7.95695238010947e-05, "loss": 0.923, "step": 1567 }, { "epoch": 0.7107887579329103, "grad_norm": 1.4728903059560132, "learning_rate": 7.956766965407235e-05, "loss": 0.9318, "step": 1568 }, { "epoch": 0.7112420670897552, "grad_norm": 1.337121231153321, "learning_rate": 7.956581154423161e-05, "loss": 0.9267, "step": 1569 }, { "epoch": 0.7116953762466002, "grad_norm": 1.4306505531205653, "learning_rate": 7.956394947175855e-05, "loss": 0.9284, "step": 1570 }, { "epoch": 0.7121486854034451, "grad_norm": 1.1227059238548984, "learning_rate": 7.956208343683968e-05, "loss": 0.9136, "step": 1571 }, { "epoch": 0.7126019945602902, "grad_norm": 1.5626711476184012, "learning_rate": 7.956021343966188e-05, "loss": 0.9297, "step": 1572 }, { "epoch": 0.7130553037171351, "grad_norm": 1.2199248538016538, "learning_rate": 7.955833948041245e-05, "loss": 0.934, "step": 1573 }, { "epoch": 0.71350861287398, "grad_norm": 1.5746775926831904, "learning_rate": 7.955646155927903e-05, "loss": 0.9317, "step": 1574 }, { "epoch": 0.713961922030825, "grad_norm": 1.2832423458639428, "learning_rate": 7.955457967644976e-05, "loss": 0.9289, "step": 1575 }, { "epoch": 0.71441523118767, "grad_norm": 1.3186729377921877, "learning_rate": 7.955269383211309e-05, "loss": 0.9246, "step": 1576 }, { "epoch": 0.714868540344515, "grad_norm": 1.3468244005861185, "learning_rate": 7.955080402645786e-05, "loss": 0.9274, "step": 1577 }, { "epoch": 0.7153218495013599, "grad_norm": 1.4321268774150047, "learning_rate": 7.954891025967339e-05, "loss": 0.9124, "step": 1578 }, { "epoch": 0.7157751586582048, "grad_norm": 1.1385986106825017, "learning_rate": 7.954701253194932e-05, "loss": 0.913, "step": 1579 }, { "epoch": 0.7162284678150499, "grad_norm": 1.1239357549832045, "learning_rate": 7.954511084347573e-05, "loss": 0.9348, "step": 1580 }, { "epoch": 0.7166817769718948, "grad_norm": 1.634241588716829, "learning_rate": 7.954320519444306e-05, "loss": 0.9393, "step": 1581 }, { "epoch": 0.7171350861287398, "grad_norm": 1.286046306669166, "learning_rate": 7.954129558504218e-05, "loss": 0.9161, "step": 1582 }, { "epoch": 0.7175883952855848, "grad_norm": 1.1258300707260802, "learning_rate": 7.953938201546433e-05, "loss": 0.9362, "step": 1583 }, { "epoch": 0.7180417044424298, "grad_norm": 0.7990104918191255, "learning_rate": 7.953746448590115e-05, "loss": 0.9166, "step": 1584 }, { "epoch": 0.7184950135992747, "grad_norm": 1.2081486370465535, "learning_rate": 7.953554299654473e-05, "loss": 0.9137, "step": 1585 }, { "epoch": 0.7189483227561196, "grad_norm": 1.4046554229003951, "learning_rate": 7.953361754758747e-05, "loss": 0.9525, "step": 1586 }, { "epoch": 0.7194016319129647, "grad_norm": 1.340047119426043, "learning_rate": 7.953168813922225e-05, "loss": 0.9576, "step": 1587 }, { "epoch": 0.7198549410698096, "grad_norm": 1.0007700379652889, "learning_rate": 7.952975477164226e-05, "loss": 0.9115, "step": 1588 }, { "epoch": 0.7203082502266546, "grad_norm": 1.8016114548086035, "learning_rate": 7.952781744504115e-05, "loss": 0.9078, "step": 1589 }, { "epoch": 0.7207615593834995, "grad_norm": 1.1137649424925504, "learning_rate": 7.952587615961297e-05, "loss": 0.9373, "step": 1590 }, { "epoch": 0.7212148685403446, "grad_norm": 1.2202961906059926, "learning_rate": 7.952393091555211e-05, "loss": 0.936, "step": 1591 }, { "epoch": 0.7216681776971895, "grad_norm": 1.5876694060576904, "learning_rate": 7.952198171305342e-05, "loss": 0.9109, "step": 1592 }, { "epoch": 0.7221214868540344, "grad_norm": 0.911805360479133, "learning_rate": 7.952002855231211e-05, "loss": 0.9208, "step": 1593 }, { "epoch": 0.7225747960108794, "grad_norm": 1.8170330280243425, "learning_rate": 7.951807143352379e-05, "loss": 0.9378, "step": 1594 }, { "epoch": 0.7230281051677244, "grad_norm": 1.066533853604382, "learning_rate": 7.951611035688447e-05, "loss": 0.932, "step": 1595 }, { "epoch": 0.7234814143245694, "grad_norm": 1.9402484332759569, "learning_rate": 7.951414532259056e-05, "loss": 0.948, "step": 1596 }, { "epoch": 0.7239347234814143, "grad_norm": 1.5190925698007571, "learning_rate": 7.951217633083888e-05, "loss": 0.9439, "step": 1597 }, { "epoch": 0.7243880326382592, "grad_norm": 1.3978651455550268, "learning_rate": 7.951020338182661e-05, "loss": 0.9344, "step": 1598 }, { "epoch": 0.7248413417951043, "grad_norm": 1.2580379239658595, "learning_rate": 7.950822647575136e-05, "loss": 0.9317, "step": 1599 }, { "epoch": 0.7252946509519492, "grad_norm": 1.1958836735490492, "learning_rate": 7.95062456128111e-05, "loss": 0.9301, "step": 1600 }, { "epoch": 0.7257479601087942, "grad_norm": 1.3390547132824158, "learning_rate": 7.950426079320426e-05, "loss": 0.9282, "step": 1601 }, { "epoch": 0.7262012692656392, "grad_norm": 1.0677794464942607, "learning_rate": 7.950227201712958e-05, "loss": 0.9226, "step": 1602 }, { "epoch": 0.7266545784224842, "grad_norm": 1.650453563800068, "learning_rate": 7.950027928478626e-05, "loss": 0.9291, "step": 1603 }, { "epoch": 0.7271078875793291, "grad_norm": 1.1822205603930291, "learning_rate": 7.949828259637389e-05, "loss": 0.9191, "step": 1604 }, { "epoch": 0.727561196736174, "grad_norm": 1.2227479869034787, "learning_rate": 7.949628195209242e-05, "loss": 0.9106, "step": 1605 }, { "epoch": 0.7280145058930191, "grad_norm": 1.1223480536684485, "learning_rate": 7.949427735214226e-05, "loss": 0.9361, "step": 1606 }, { "epoch": 0.728467815049864, "grad_norm": 1.5493655098866297, "learning_rate": 7.949226879672413e-05, "loss": 0.9119, "step": 1607 }, { "epoch": 0.728921124206709, "grad_norm": 1.0234500386221204, "learning_rate": 7.949025628603922e-05, "loss": 0.9327, "step": 1608 }, { "epoch": 0.7293744333635539, "grad_norm": 1.492547706848186, "learning_rate": 7.94882398202891e-05, "loss": 0.926, "step": 1609 }, { "epoch": 0.729827742520399, "grad_norm": 1.2332803017498088, "learning_rate": 7.948621939967569e-05, "loss": 0.9105, "step": 1610 }, { "epoch": 0.7302810516772439, "grad_norm": 1.4086917332508235, "learning_rate": 7.948419502440136e-05, "loss": 0.9264, "step": 1611 }, { "epoch": 0.7307343608340888, "grad_norm": 1.1172030351308644, "learning_rate": 7.948216669466886e-05, "loss": 0.9089, "step": 1612 }, { "epoch": 0.7311876699909338, "grad_norm": 1.3503135750035187, "learning_rate": 7.948013441068133e-05, "loss": 0.923, "step": 1613 }, { "epoch": 0.7316409791477788, "grad_norm": 1.2259622128138963, "learning_rate": 7.947809817264231e-05, "loss": 0.9231, "step": 1614 }, { "epoch": 0.7320942883046238, "grad_norm": 1.4232173729274333, "learning_rate": 7.947605798075573e-05, "loss": 0.9193, "step": 1615 }, { "epoch": 0.7325475974614687, "grad_norm": 1.6651003074157964, "learning_rate": 7.947401383522593e-05, "loss": 0.9239, "step": 1616 }, { "epoch": 0.7330009066183137, "grad_norm": 0.9547568321963682, "learning_rate": 7.947196573625763e-05, "loss": 0.9055, "step": 1617 }, { "epoch": 0.7334542157751587, "grad_norm": 1.1535682545394768, "learning_rate": 7.946991368405595e-05, "loss": 0.8959, "step": 1618 }, { "epoch": 0.7339075249320036, "grad_norm": 1.1977697753150058, "learning_rate": 7.946785767882643e-05, "loss": 0.9199, "step": 1619 }, { "epoch": 0.7343608340888486, "grad_norm": 1.622562549131728, "learning_rate": 7.946579772077496e-05, "loss": 0.9161, "step": 1620 }, { "epoch": 0.7348141432456936, "grad_norm": 1.619497455820455, "learning_rate": 7.946373381010786e-05, "loss": 0.9382, "step": 1621 }, { "epoch": 0.7352674524025385, "grad_norm": 0.824707615481893, "learning_rate": 7.946166594703183e-05, "loss": 0.9083, "step": 1622 }, { "epoch": 0.7357207615593835, "grad_norm": 2.6767244545028452, "learning_rate": 7.9459594131754e-05, "loss": 0.9513, "step": 1623 }, { "epoch": 0.7361740707162284, "grad_norm": 1.5565079779736144, "learning_rate": 7.945751836448183e-05, "loss": 0.9203, "step": 1624 }, { "epoch": 0.7366273798730735, "grad_norm": 2.5905223467695744, "learning_rate": 7.945543864542324e-05, "loss": 0.906, "step": 1625 }, { "epoch": 0.7370806890299184, "grad_norm": 1.5067643477298034, "learning_rate": 7.94533549747865e-05, "loss": 0.9336, "step": 1626 }, { "epoch": 0.7375339981867634, "grad_norm": 3.0763962990680787, "learning_rate": 7.945126735278032e-05, "loss": 0.9254, "step": 1627 }, { "epoch": 0.7379873073436083, "grad_norm": 2.2353150935551858, "learning_rate": 7.944917577961377e-05, "loss": 0.9255, "step": 1628 }, { "epoch": 0.7384406165004533, "grad_norm": 2.9059403792442136, "learning_rate": 7.944708025549633e-05, "loss": 0.9362, "step": 1629 }, { "epoch": 0.7388939256572983, "grad_norm": 2.7283581662552407, "learning_rate": 7.944498078063785e-05, "loss": 0.9188, "step": 1630 }, { "epoch": 0.7393472348141432, "grad_norm": 1.996717425245677, "learning_rate": 7.944287735524864e-05, "loss": 0.8976, "step": 1631 }, { "epoch": 0.7398005439709883, "grad_norm": 2.0292707392657685, "learning_rate": 7.944076997953933e-05, "loss": 0.9152, "step": 1632 }, { "epoch": 0.7402538531278332, "grad_norm": 1.9673653874135555, "learning_rate": 7.943865865372101e-05, "loss": 0.9179, "step": 1633 }, { "epoch": 0.7407071622846781, "grad_norm": 1.5470466704608743, "learning_rate": 7.94365433780051e-05, "loss": 0.9275, "step": 1634 }, { "epoch": 0.7411604714415231, "grad_norm": 2.563345794340878, "learning_rate": 7.943442415260347e-05, "loss": 0.9445, "step": 1635 }, { "epoch": 0.7416137805983681, "grad_norm": 1.9136668260515428, "learning_rate": 7.943230097772837e-05, "loss": 0.8974, "step": 1636 }, { "epoch": 0.7420670897552131, "grad_norm": 2.5924886938470455, "learning_rate": 7.943017385359243e-05, "loss": 0.9108, "step": 1637 }, { "epoch": 0.742520398912058, "grad_norm": 2.4536383216965634, "learning_rate": 7.94280427804087e-05, "loss": 0.929, "step": 1638 }, { "epoch": 0.742973708068903, "grad_norm": 1.8114302544939989, "learning_rate": 7.942590775839061e-05, "loss": 0.9285, "step": 1639 }, { "epoch": 0.743427017225748, "grad_norm": 1.6244613942470991, "learning_rate": 7.9423768787752e-05, "loss": 0.9032, "step": 1640 }, { "epoch": 0.7438803263825929, "grad_norm": 2.3530250272162805, "learning_rate": 7.942162586870706e-05, "loss": 0.9161, "step": 1641 }, { "epoch": 0.7443336355394379, "grad_norm": 1.8584388808978303, "learning_rate": 7.941947900147044e-05, "loss": 0.9272, "step": 1642 }, { "epoch": 0.7447869446962828, "grad_norm": 2.4582610775265463, "learning_rate": 7.941732818625716e-05, "loss": 0.9184, "step": 1643 }, { "epoch": 0.7452402538531279, "grad_norm": 2.2643593517328635, "learning_rate": 7.94151734232826e-05, "loss": 0.9353, "step": 1644 }, { "epoch": 0.7456935630099728, "grad_norm": 1.9524050391304608, "learning_rate": 7.941301471276258e-05, "loss": 0.9293, "step": 1645 }, { "epoch": 0.7461468721668177, "grad_norm": 1.6916660364723919, "learning_rate": 7.94108520549133e-05, "loss": 0.9129, "step": 1646 }, { "epoch": 0.7466001813236628, "grad_norm": 2.2214102393711808, "learning_rate": 7.940868544995138e-05, "loss": 0.9177, "step": 1647 }, { "epoch": 0.7470534904805077, "grad_norm": 1.7438225939604073, "learning_rate": 7.940651489809379e-05, "loss": 0.9209, "step": 1648 }, { "epoch": 0.7475067996373527, "grad_norm": 2.369305136182219, "learning_rate": 7.940434039955791e-05, "loss": 0.9491, "step": 1649 }, { "epoch": 0.7479601087941976, "grad_norm": 2.01098161749891, "learning_rate": 7.940216195456153e-05, "loss": 0.9257, "step": 1650 }, { "epoch": 0.7484134179510427, "grad_norm": 2.0615866540966965, "learning_rate": 7.939997956332283e-05, "loss": 0.9127, "step": 1651 }, { "epoch": 0.7488667271078876, "grad_norm": 1.7731931889637351, "learning_rate": 7.939779322606038e-05, "loss": 0.929, "step": 1652 }, { "epoch": 0.7493200362647325, "grad_norm": 2.180067268600542, "learning_rate": 7.939560294299316e-05, "loss": 0.9073, "step": 1653 }, { "epoch": 0.7497733454215775, "grad_norm": 1.7900524959049056, "learning_rate": 7.939340871434052e-05, "loss": 0.92, "step": 1654 }, { "epoch": 0.7502266545784225, "grad_norm": 2.277986104443345, "learning_rate": 7.939121054032222e-05, "loss": 0.9207, "step": 1655 }, { "epoch": 0.7506799637352675, "grad_norm": 1.8719486926156035, "learning_rate": 7.938900842115842e-05, "loss": 0.9303, "step": 1656 }, { "epoch": 0.7511332728921124, "grad_norm": 2.1175402153031624, "learning_rate": 7.938680235706966e-05, "loss": 0.9241, "step": 1657 }, { "epoch": 0.7515865820489573, "grad_norm": 1.8909093687470349, "learning_rate": 7.93845923482769e-05, "loss": 0.9411, "step": 1658 }, { "epoch": 0.7520398912058024, "grad_norm": 2.0361710305834535, "learning_rate": 7.938237839500146e-05, "loss": 0.9206, "step": 1659 }, { "epoch": 0.7524932003626473, "grad_norm": 1.7324229312635189, "learning_rate": 7.938016049746508e-05, "loss": 0.9018, "step": 1660 }, { "epoch": 0.7529465095194923, "grad_norm": 2.1450181548836005, "learning_rate": 7.937793865588988e-05, "loss": 0.9261, "step": 1661 }, { "epoch": 0.7533998186763372, "grad_norm": 1.7843965199903666, "learning_rate": 7.93757128704984e-05, "loss": 0.9089, "step": 1662 }, { "epoch": 0.7538531278331823, "grad_norm": 2.299898765797171, "learning_rate": 7.937348314151356e-05, "loss": 0.9379, "step": 1663 }, { "epoch": 0.7543064369900272, "grad_norm": 2.0620894526521423, "learning_rate": 7.937124946915867e-05, "loss": 0.9361, "step": 1664 }, { "epoch": 0.7547597461468721, "grad_norm": 1.9048367529610168, "learning_rate": 7.936901185365742e-05, "loss": 0.9182, "step": 1665 }, { "epoch": 0.7552130553037172, "grad_norm": 1.6611039239393344, "learning_rate": 7.936677029523394e-05, "loss": 0.9338, "step": 1666 }, { "epoch": 0.7556663644605621, "grad_norm": 2.102048030231007, "learning_rate": 7.936452479411272e-05, "loss": 0.9069, "step": 1667 }, { "epoch": 0.7561196736174071, "grad_norm": 1.7620393819835651, "learning_rate": 7.936227535051863e-05, "loss": 0.9212, "step": 1668 }, { "epoch": 0.756572982774252, "grad_norm": 2.2512977771902256, "learning_rate": 7.9360021964677e-05, "loss": 0.9229, "step": 1669 }, { "epoch": 0.757026291931097, "grad_norm": 2.015352319394172, "learning_rate": 7.93577646368135e-05, "loss": 0.9408, "step": 1670 }, { "epoch": 0.757479601087942, "grad_norm": 1.8535425171906466, "learning_rate": 7.935550336715418e-05, "loss": 0.9121, "step": 1671 }, { "epoch": 0.7579329102447869, "grad_norm": 1.5669436455360037, "learning_rate": 7.935323815592556e-05, "loss": 0.9205, "step": 1672 }, { "epoch": 0.7583862194016319, "grad_norm": 2.3113617324149756, "learning_rate": 7.935096900335446e-05, "loss": 0.9174, "step": 1673 }, { "epoch": 0.7588395285584769, "grad_norm": 1.9216312865218204, "learning_rate": 7.934869590966816e-05, "loss": 0.9202, "step": 1674 }, { "epoch": 0.7592928377153219, "grad_norm": 1.988642637785182, "learning_rate": 7.934641887509434e-05, "loss": 0.916, "step": 1675 }, { "epoch": 0.7597461468721668, "grad_norm": 1.5998802343272702, "learning_rate": 7.934413789986104e-05, "loss": 0.909, "step": 1676 }, { "epoch": 0.7601994560290117, "grad_norm": 2.294474321784248, "learning_rate": 7.934185298419667e-05, "loss": 0.9228, "step": 1677 }, { "epoch": 0.7606527651858568, "grad_norm": 1.9116528021499337, "learning_rate": 7.933956412833012e-05, "loss": 0.9034, "step": 1678 }, { "epoch": 0.7611060743427017, "grad_norm": 1.9371458240693507, "learning_rate": 7.93372713324906e-05, "loss": 0.8914, "step": 1679 }, { "epoch": 0.7615593834995467, "grad_norm": 1.670027982721587, "learning_rate": 7.933497459690774e-05, "loss": 0.9165, "step": 1680 }, { "epoch": 0.7620126926563917, "grad_norm": 2.0939037970480383, "learning_rate": 7.933267392181158e-05, "loss": 0.9104, "step": 1681 }, { "epoch": 0.7624660018132366, "grad_norm": 1.7057591480371763, "learning_rate": 7.933036930743254e-05, "loss": 0.9308, "step": 1682 }, { "epoch": 0.7629193109700816, "grad_norm": 2.1059271346017865, "learning_rate": 7.93280607540014e-05, "loss": 0.9135, "step": 1683 }, { "epoch": 0.7633726201269265, "grad_norm": 1.7272173438059368, "learning_rate": 7.93257482617494e-05, "loss": 0.9051, "step": 1684 }, { "epoch": 0.7638259292837716, "grad_norm": 2.076928966559462, "learning_rate": 7.932343183090814e-05, "loss": 0.9116, "step": 1685 }, { "epoch": 0.7642792384406165, "grad_norm": 1.767246601566885, "learning_rate": 7.932111146170961e-05, "loss": 0.9304, "step": 1686 }, { "epoch": 0.7647325475974615, "grad_norm": 2.0773705998058785, "learning_rate": 7.93187871543862e-05, "loss": 0.9034, "step": 1687 }, { "epoch": 0.7651858567543064, "grad_norm": 1.7156429733025549, "learning_rate": 7.931645890917073e-05, "loss": 0.8961, "step": 1688 }, { "epoch": 0.7656391659111514, "grad_norm": 2.1095110458418267, "learning_rate": 7.93141267262963e-05, "loss": 0.9313, "step": 1689 }, { "epoch": 0.7660924750679964, "grad_norm": 1.7808566243720418, "learning_rate": 7.931179060599658e-05, "loss": 0.9276, "step": 1690 }, { "epoch": 0.7665457842248413, "grad_norm": 1.9722931686024765, "learning_rate": 7.930945054850547e-05, "loss": 0.8984, "step": 1691 }, { "epoch": 0.7669990933816863, "grad_norm": 1.7297142886084518, "learning_rate": 7.930710655405737e-05, "loss": 0.9423, "step": 1692 }, { "epoch": 0.7674524025385313, "grad_norm": 2.09795530852484, "learning_rate": 7.930475862288703e-05, "loss": 0.9084, "step": 1693 }, { "epoch": 0.7679057116953762, "grad_norm": 1.8130848712015828, "learning_rate": 7.93024067552296e-05, "loss": 0.9174, "step": 1694 }, { "epoch": 0.7683590208522212, "grad_norm": 1.9486354746804728, "learning_rate": 7.930005095132063e-05, "loss": 0.8859, "step": 1695 }, { "epoch": 0.7688123300090662, "grad_norm": 1.6610830244142931, "learning_rate": 7.929769121139605e-05, "loss": 0.9079, "step": 1696 }, { "epoch": 0.7692656391659112, "grad_norm": 2.0460180687717573, "learning_rate": 7.929532753569218e-05, "loss": 0.8912, "step": 1697 }, { "epoch": 0.7697189483227561, "grad_norm": 1.79591691277189, "learning_rate": 7.92929599244458e-05, "loss": 0.8982, "step": 1698 }, { "epoch": 0.770172257479601, "grad_norm": 1.8483299038463623, "learning_rate": 7.9290588377894e-05, "loss": 0.9154, "step": 1699 }, { "epoch": 0.7706255666364461, "grad_norm": 1.5011304692640992, "learning_rate": 7.928821289627429e-05, "loss": 0.9085, "step": 1700 }, { "epoch": 0.771078875793291, "grad_norm": 2.129936947091731, "learning_rate": 7.92858334798246e-05, "loss": 0.9058, "step": 1701 }, { "epoch": 0.771532184950136, "grad_norm": 1.7852326615692842, "learning_rate": 7.928345012878321e-05, "loss": 0.9034, "step": 1702 }, { "epoch": 0.7719854941069809, "grad_norm": 1.873329223337439, "learning_rate": 7.928106284338886e-05, "loss": 0.9131, "step": 1703 }, { "epoch": 0.772438803263826, "grad_norm": 1.5280991339987988, "learning_rate": 7.927867162388062e-05, "loss": 0.9191, "step": 1704 }, { "epoch": 0.7728921124206709, "grad_norm": 2.0331663461113405, "learning_rate": 7.927627647049796e-05, "loss": 0.913, "step": 1705 }, { "epoch": 0.7733454215775158, "grad_norm": 1.679945163799018, "learning_rate": 7.92738773834808e-05, "loss": 0.9296, "step": 1706 }, { "epoch": 0.7737987307343608, "grad_norm": 1.9947849618158682, "learning_rate": 7.92714743630694e-05, "loss": 0.9258, "step": 1707 }, { "epoch": 0.7742520398912058, "grad_norm": 1.6150125606886787, "learning_rate": 7.92690674095044e-05, "loss": 0.9051, "step": 1708 }, { "epoch": 0.7747053490480508, "grad_norm": 1.9619595748543135, "learning_rate": 7.926665652302691e-05, "loss": 0.9223, "step": 1709 }, { "epoch": 0.7751586582048957, "grad_norm": 1.6067886747898399, "learning_rate": 7.926424170387837e-05, "loss": 0.8983, "step": 1710 }, { "epoch": 0.7756119673617408, "grad_norm": 1.993624313425568, "learning_rate": 7.926182295230061e-05, "loss": 0.9188, "step": 1711 }, { "epoch": 0.7760652765185857, "grad_norm": 1.6171927259541257, "learning_rate": 7.925940026853591e-05, "loss": 0.9262, "step": 1712 }, { "epoch": 0.7765185856754306, "grad_norm": 1.9063007673850898, "learning_rate": 7.925697365282688e-05, "loss": 0.9072, "step": 1713 }, { "epoch": 0.7769718948322756, "grad_norm": 1.5283610485454961, "learning_rate": 7.925454310541657e-05, "loss": 0.919, "step": 1714 }, { "epoch": 0.7774252039891206, "grad_norm": 1.9007753105093976, "learning_rate": 7.925210862654841e-05, "loss": 0.8952, "step": 1715 }, { "epoch": 0.7778785131459656, "grad_norm": 1.4968084693622616, "learning_rate": 7.924967021646619e-05, "loss": 0.935, "step": 1716 }, { "epoch": 0.7783318223028105, "grad_norm": 1.7585259595766491, "learning_rate": 7.924722787541416e-05, "loss": 0.9213, "step": 1717 }, { "epoch": 0.7787851314596554, "grad_norm": 1.4663408642318976, "learning_rate": 7.92447816036369e-05, "loss": 0.9071, "step": 1718 }, { "epoch": 0.7792384406165005, "grad_norm": 1.7614208457813667, "learning_rate": 7.924233140137944e-05, "loss": 0.9083, "step": 1719 }, { "epoch": 0.7796917497733454, "grad_norm": 1.3948695332698449, "learning_rate": 7.923987726888715e-05, "loss": 0.9333, "step": 1720 }, { "epoch": 0.7801450589301904, "grad_norm": 1.500249770714973, "learning_rate": 7.923741920640582e-05, "loss": 0.9241, "step": 1721 }, { "epoch": 0.7805983680870353, "grad_norm": 1.7991159537585926, "learning_rate": 7.923495721418166e-05, "loss": 0.9429, "step": 1722 }, { "epoch": 0.7810516772438804, "grad_norm": 1.1208894200581272, "learning_rate": 7.923249129246122e-05, "loss": 0.9203, "step": 1723 }, { "epoch": 0.7815049864007253, "grad_norm": 1.9391706689903134, "learning_rate": 7.923002144149147e-05, "loss": 0.9263, "step": 1724 }, { "epoch": 0.7819582955575702, "grad_norm": 1.3737541288004789, "learning_rate": 7.922754766151978e-05, "loss": 0.9153, "step": 1725 }, { "epoch": 0.7824116047144152, "grad_norm": 2.3499996537588013, "learning_rate": 7.922506995279391e-05, "loss": 0.9107, "step": 1726 }, { "epoch": 0.7828649138712602, "grad_norm": 2.0874228541593634, "learning_rate": 7.922258831556198e-05, "loss": 0.9147, "step": 1727 }, { "epoch": 0.7833182230281052, "grad_norm": 1.826134864757779, "learning_rate": 7.922010275007257e-05, "loss": 0.9115, "step": 1728 }, { "epoch": 0.7837715321849501, "grad_norm": 1.4612182341913045, "learning_rate": 7.921761325657462e-05, "loss": 0.923, "step": 1729 }, { "epoch": 0.7842248413417952, "grad_norm": 2.0753558715009097, "learning_rate": 7.921511983531743e-05, "loss": 0.9245, "step": 1730 }, { "epoch": 0.7846781504986401, "grad_norm": 1.4895921997661783, "learning_rate": 7.921262248655072e-05, "loss": 0.9257, "step": 1731 }, { "epoch": 0.785131459655485, "grad_norm": 2.0954247135900634, "learning_rate": 7.921012121052466e-05, "loss": 0.9167, "step": 1732 }, { "epoch": 0.78558476881233, "grad_norm": 1.7627028888287175, "learning_rate": 7.92076160074897e-05, "loss": 0.933, "step": 1733 }, { "epoch": 0.786038077969175, "grad_norm": 1.6025091007530117, "learning_rate": 7.920510687769678e-05, "loss": 0.9221, "step": 1734 }, { "epoch": 0.78649138712602, "grad_norm": 1.2774296962590526, "learning_rate": 7.920259382139717e-05, "loss": 0.9243, "step": 1735 }, { "epoch": 0.7869446962828649, "grad_norm": 1.759656423599581, "learning_rate": 7.92000768388426e-05, "loss": 0.9185, "step": 1736 }, { "epoch": 0.7873980054397098, "grad_norm": 1.1061421552328816, "learning_rate": 7.91975559302851e-05, "loss": 0.9239, "step": 1737 }, { "epoch": 0.7878513145965549, "grad_norm": 2.2393761230187113, "learning_rate": 7.919503109597718e-05, "loss": 0.9352, "step": 1738 }, { "epoch": 0.7883046237533998, "grad_norm": 1.8128884521216173, "learning_rate": 7.919250233617172e-05, "loss": 0.9255, "step": 1739 }, { "epoch": 0.7887579329102448, "grad_norm": 1.6001121445597892, "learning_rate": 7.918996965112195e-05, "loss": 0.9098, "step": 1740 }, { "epoch": 0.7892112420670897, "grad_norm": 1.452754779080375, "learning_rate": 7.918743304108155e-05, "loss": 0.9143, "step": 1741 }, { "epoch": 0.7896645512239348, "grad_norm": 1.6136122025580508, "learning_rate": 7.918489250630456e-05, "loss": 0.9097, "step": 1742 }, { "epoch": 0.7901178603807797, "grad_norm": 1.3009597747442827, "learning_rate": 7.918234804704542e-05, "loss": 0.9206, "step": 1743 }, { "epoch": 0.7905711695376246, "grad_norm": 1.673391552595416, "learning_rate": 7.917979966355897e-05, "loss": 0.9245, "step": 1744 }, { "epoch": 0.7910244786944697, "grad_norm": 1.272014424618839, "learning_rate": 7.917724735610045e-05, "loss": 0.9101, "step": 1745 }, { "epoch": 0.7914777878513146, "grad_norm": 1.7720327122892794, "learning_rate": 7.917469112492545e-05, "loss": 0.912, "step": 1746 }, { "epoch": 0.7919310970081596, "grad_norm": 1.4435121693441268, "learning_rate": 7.917213097029e-05, "loss": 0.9248, "step": 1747 }, { "epoch": 0.7923844061650045, "grad_norm": 1.6110529380554193, "learning_rate": 7.916956689245052e-05, "loss": 0.9268, "step": 1748 }, { "epoch": 0.7928377153218495, "grad_norm": 1.3536267615481845, "learning_rate": 7.91669988916638e-05, "loss": 0.9207, "step": 1749 }, { "epoch": 0.7932910244786945, "grad_norm": 1.383196858775192, "learning_rate": 7.9164426968187e-05, "loss": 0.8997, "step": 1750 }, { "epoch": 0.7937443336355394, "grad_norm": 1.0971948641912048, "learning_rate": 7.916185112227777e-05, "loss": 0.9193, "step": 1751 }, { "epoch": 0.7941976427923844, "grad_norm": 1.7357797790911682, "learning_rate": 7.915927135419406e-05, "loss": 0.9225, "step": 1752 }, { "epoch": 0.7946509519492294, "grad_norm": 1.4461152239869444, "learning_rate": 7.915668766419421e-05, "loss": 0.9312, "step": 1753 }, { "epoch": 0.7951042611060744, "grad_norm": 1.5577993233120675, "learning_rate": 7.915410005253704e-05, "loss": 0.9117, "step": 1754 }, { "epoch": 0.7955575702629193, "grad_norm": 1.5606338547077045, "learning_rate": 7.915150851948165e-05, "loss": 0.9398, "step": 1755 }, { "epoch": 0.7960108794197642, "grad_norm": 1.1999519249466981, "learning_rate": 7.914891306528763e-05, "loss": 0.9257, "step": 1756 }, { "epoch": 0.7964641885766093, "grad_norm": 1.108417109314915, "learning_rate": 7.914631369021491e-05, "loss": 0.9372, "step": 1757 }, { "epoch": 0.7969174977334542, "grad_norm": 1.227350708121383, "learning_rate": 7.914371039452383e-05, "loss": 0.9009, "step": 1758 }, { "epoch": 0.7973708068902992, "grad_norm": 0.9409253492616543, "learning_rate": 7.91411031784751e-05, "loss": 0.9233, "step": 1759 }, { "epoch": 0.7978241160471442, "grad_norm": 1.3499533854938484, "learning_rate": 7.913849204232988e-05, "loss": 0.9202, "step": 1760 }, { "epoch": 0.7982774252039891, "grad_norm": 1.129686996540689, "learning_rate": 7.913587698634962e-05, "loss": 0.9275, "step": 1761 }, { "epoch": 0.7987307343608341, "grad_norm": 1.2648613596190044, "learning_rate": 7.913325801079628e-05, "loss": 0.9288, "step": 1762 }, { "epoch": 0.799184043517679, "grad_norm": 0.9820708876078165, "learning_rate": 7.913063511593215e-05, "loss": 0.9091, "step": 1763 }, { "epoch": 0.7996373526745241, "grad_norm": 1.891956870126177, "learning_rate": 7.91280083020199e-05, "loss": 0.9193, "step": 1764 }, { "epoch": 0.800090661831369, "grad_norm": 1.5204951242229794, "learning_rate": 7.912537756932262e-05, "loss": 0.9433, "step": 1765 }, { "epoch": 0.800543970988214, "grad_norm": 1.473056311902333, "learning_rate": 7.912274291810379e-05, "loss": 0.9237, "step": 1766 }, { "epoch": 0.8009972801450589, "grad_norm": 1.3649679161830286, "learning_rate": 7.912010434862729e-05, "loss": 0.9151, "step": 1767 }, { "epoch": 0.8014505893019039, "grad_norm": 0.9100209246582931, "learning_rate": 7.911746186115735e-05, "loss": 0.9066, "step": 1768 }, { "epoch": 0.8019038984587489, "grad_norm": 1.1790276467554137, "learning_rate": 7.911481545595865e-05, "loss": 0.922, "step": 1769 }, { "epoch": 0.8023572076155938, "grad_norm": 0.7735552370711257, "learning_rate": 7.911216513329621e-05, "loss": 0.9249, "step": 1770 }, { "epoch": 0.8028105167724388, "grad_norm": 0.9378161241453069, "learning_rate": 7.91095108934355e-05, "loss": 0.9114, "step": 1771 }, { "epoch": 0.8032638259292838, "grad_norm": 0.840458219646737, "learning_rate": 7.910685273664232e-05, "loss": 0.9098, "step": 1772 }, { "epoch": 0.8037171350861287, "grad_norm": 1.6211348697595427, "learning_rate": 7.910419066318292e-05, "loss": 0.9317, "step": 1773 }, { "epoch": 0.8041704442429737, "grad_norm": 1.6020391938474858, "learning_rate": 7.91015246733239e-05, "loss": 0.9177, "step": 1774 }, { "epoch": 0.8046237533998187, "grad_norm": 2.1069019058471468, "learning_rate": 7.909885476733227e-05, "loss": 0.9432, "step": 1775 }, { "epoch": 0.8050770625566637, "grad_norm": 1.0992963962272102, "learning_rate": 7.90961809454754e-05, "loss": 0.9453, "step": 1776 }, { "epoch": 0.8055303717135086, "grad_norm": 1.3066059253728206, "learning_rate": 7.909350320802112e-05, "loss": 0.9475, "step": 1777 }, { "epoch": 0.8059836808703535, "grad_norm": 1.676447767810327, "learning_rate": 7.909082155523761e-05, "loss": 0.9423, "step": 1778 }, { "epoch": 0.8064369900271986, "grad_norm": 0.9808954101544356, "learning_rate": 7.908813598739344e-05, "loss": 0.9176, "step": 1779 }, { "epoch": 0.8068902991840435, "grad_norm": 1.7822462719515302, "learning_rate": 7.908544650475756e-05, "loss": 0.9321, "step": 1780 }, { "epoch": 0.8073436083408885, "grad_norm": 1.0642349456647628, "learning_rate": 7.908275310759934e-05, "loss": 0.9363, "step": 1781 }, { "epoch": 0.8077969174977334, "grad_norm": 1.6089464432319618, "learning_rate": 7.908005579618855e-05, "loss": 0.9492, "step": 1782 }, { "epoch": 0.8082502266545785, "grad_norm": 1.3896456894488163, "learning_rate": 7.90773545707953e-05, "loss": 0.9478, "step": 1783 }, { "epoch": 0.8087035358114234, "grad_norm": 1.4608198950446802, "learning_rate": 7.907464943169014e-05, "loss": 0.9286, "step": 1784 }, { "epoch": 0.8091568449682683, "grad_norm": 1.6393990651129717, "learning_rate": 7.907194037914402e-05, "loss": 0.9334, "step": 1785 }, { "epoch": 0.8096101541251133, "grad_norm": 1.3680630152650128, "learning_rate": 7.906922741342823e-05, "loss": 0.9569, "step": 1786 }, { "epoch": 0.8100634632819583, "grad_norm": 1.3451944860453235, "learning_rate": 7.906651053481449e-05, "loss": 0.9488, "step": 1787 }, { "epoch": 0.8105167724388033, "grad_norm": 1.3761097448645225, "learning_rate": 7.906378974357491e-05, "loss": 0.9271, "step": 1788 }, { "epoch": 0.8109700815956482, "grad_norm": 1.0354138815239464, "learning_rate": 7.9061065039982e-05, "loss": 0.9319, "step": 1789 }, { "epoch": 0.8114233907524931, "grad_norm": 1.7840426016503628, "learning_rate": 7.905833642430859e-05, "loss": 0.9201, "step": 1790 }, { "epoch": 0.8118766999093382, "grad_norm": 1.0991144932865402, "learning_rate": 7.905560389682802e-05, "loss": 0.9422, "step": 1791 }, { "epoch": 0.8123300090661831, "grad_norm": 1.7410019219364492, "learning_rate": 7.905286745781393e-05, "loss": 0.9333, "step": 1792 }, { "epoch": 0.8127833182230281, "grad_norm": 1.326663590982856, "learning_rate": 7.905012710754039e-05, "loss": 0.9233, "step": 1793 }, { "epoch": 0.8132366273798731, "grad_norm": 1.4115634050808967, "learning_rate": 7.904738284628186e-05, "loss": 0.9284, "step": 1794 }, { "epoch": 0.8136899365367181, "grad_norm": 0.9962439092830639, "learning_rate": 7.904463467431318e-05, "loss": 0.9295, "step": 1795 }, { "epoch": 0.814143245693563, "grad_norm": 1.381098265914572, "learning_rate": 7.904188259190958e-05, "loss": 0.9229, "step": 1796 }, { "epoch": 0.8145965548504079, "grad_norm": 0.9978437690135863, "learning_rate": 7.90391265993467e-05, "loss": 0.915, "step": 1797 }, { "epoch": 0.815049864007253, "grad_norm": 1.4862059807082395, "learning_rate": 7.903636669690055e-05, "loss": 0.9314, "step": 1798 }, { "epoch": 0.8155031731640979, "grad_norm": 1.1144432734724155, "learning_rate": 7.903360288484757e-05, "loss": 0.9241, "step": 1799 }, { "epoch": 0.8159564823209429, "grad_norm": 1.2328494633110572, "learning_rate": 7.903083516346453e-05, "loss": 0.9214, "step": 1800 }, { "epoch": 0.8164097914777878, "grad_norm": 1.1680709997801715, "learning_rate": 7.902806353302864e-05, "loss": 0.9151, "step": 1801 }, { "epoch": 0.8168631006346329, "grad_norm": 1.330300591677882, "learning_rate": 7.902528799381748e-05, "loss": 0.9247, "step": 1802 }, { "epoch": 0.8173164097914778, "grad_norm": 1.1634559200739671, "learning_rate": 7.902250854610904e-05, "loss": 0.9133, "step": 1803 }, { "epoch": 0.8177697189483227, "grad_norm": 1.1629904041348953, "learning_rate": 7.901972519018168e-05, "loss": 0.9276, "step": 1804 }, { "epoch": 0.8182230281051677, "grad_norm": 0.922258462701729, "learning_rate": 7.901693792631417e-05, "loss": 0.9182, "step": 1805 }, { "epoch": 0.8186763372620127, "grad_norm": 1.4945601542524378, "learning_rate": 7.901414675478566e-05, "loss": 0.9068, "step": 1806 }, { "epoch": 0.8191296464188577, "grad_norm": 1.1690537061761888, "learning_rate": 7.90113516758757e-05, "loss": 0.9325, "step": 1807 }, { "epoch": 0.8195829555757026, "grad_norm": 0.9259118042003727, "learning_rate": 7.90085526898642e-05, "loss": 0.9118, "step": 1808 }, { "epoch": 0.8200362647325476, "grad_norm": 1.073952677083576, "learning_rate": 7.90057497970315e-05, "loss": 0.9234, "step": 1809 }, { "epoch": 0.8204895738893926, "grad_norm": 1.0606122751880336, "learning_rate": 7.900294299765835e-05, "loss": 0.922, "step": 1810 }, { "epoch": 0.8209428830462375, "grad_norm": 0.8178404470192219, "learning_rate": 7.90001322920258e-05, "loss": 0.9353, "step": 1811 }, { "epoch": 0.8213961922030825, "grad_norm": 1.0864148207638016, "learning_rate": 7.89973176804154e-05, "loss": 0.9043, "step": 1812 }, { "epoch": 0.8218495013599275, "grad_norm": 1.396013331016924, "learning_rate": 7.899449916310902e-05, "loss": 0.9179, "step": 1813 }, { "epoch": 0.8223028105167725, "grad_norm": 0.903960400302424, "learning_rate": 7.899167674038895e-05, "loss": 0.921, "step": 1814 }, { "epoch": 0.8227561196736174, "grad_norm": 1.1104747982865313, "learning_rate": 7.898885041253784e-05, "loss": 0.9341, "step": 1815 }, { "epoch": 0.8232094288304623, "grad_norm": 1.546051520835061, "learning_rate": 7.89860201798388e-05, "loss": 0.9156, "step": 1816 }, { "epoch": 0.8236627379873074, "grad_norm": 1.056296595397706, "learning_rate": 7.898318604257525e-05, "loss": 0.9224, "step": 1817 }, { "epoch": 0.8241160471441523, "grad_norm": 1.1857692078339375, "learning_rate": 7.898034800103105e-05, "loss": 0.9347, "step": 1818 }, { "epoch": 0.8245693563009973, "grad_norm": 0.9431938504785246, "learning_rate": 7.897750605549044e-05, "loss": 0.9094, "step": 1819 }, { "epoch": 0.8250226654578422, "grad_norm": 1.00053041695227, "learning_rate": 7.897466020623803e-05, "loss": 0.9049, "step": 1820 }, { "epoch": 0.8254759746146872, "grad_norm": 1.7200724950872759, "learning_rate": 7.897181045355888e-05, "loss": 0.9326, "step": 1821 }, { "epoch": 0.8259292837715322, "grad_norm": 0.8190955956053115, "learning_rate": 7.896895679773836e-05, "loss": 0.929, "step": 1822 }, { "epoch": 0.8263825929283771, "grad_norm": 1.2907631253169443, "learning_rate": 7.89660992390623e-05, "loss": 0.93, "step": 1823 }, { "epoch": 0.8268359020852222, "grad_norm": 1.3315105914287373, "learning_rate": 7.896323777781687e-05, "loss": 0.904, "step": 1824 }, { "epoch": 0.8272892112420671, "grad_norm": 1.5573466864762546, "learning_rate": 7.896037241428867e-05, "loss": 0.9286, "step": 1825 }, { "epoch": 0.827742520398912, "grad_norm": 0.909848425851475, "learning_rate": 7.895750314876467e-05, "loss": 0.9295, "step": 1826 }, { "epoch": 0.828195829555757, "grad_norm": 1.095915624457976, "learning_rate": 7.895462998153225e-05, "loss": 0.9358, "step": 1827 }, { "epoch": 0.828649138712602, "grad_norm": 1.2893677561679044, "learning_rate": 7.895175291287913e-05, "loss": 0.9237, "step": 1828 }, { "epoch": 0.829102447869447, "grad_norm": 1.4999765388227546, "learning_rate": 7.894887194309348e-05, "loss": 0.9015, "step": 1829 }, { "epoch": 0.8295557570262919, "grad_norm": 1.1911744158471365, "learning_rate": 7.894598707246386e-05, "loss": 0.9192, "step": 1830 }, { "epoch": 0.8300090661831369, "grad_norm": 1.0728255119231427, "learning_rate": 7.894309830127915e-05, "loss": 0.9286, "step": 1831 }, { "epoch": 0.8304623753399819, "grad_norm": 1.0087230156208657, "learning_rate": 7.894020562982872e-05, "loss": 0.934, "step": 1832 }, { "epoch": 0.8309156844968268, "grad_norm": 1.522594314527388, "learning_rate": 7.893730905840222e-05, "loss": 0.9018, "step": 1833 }, { "epoch": 0.8313689936536718, "grad_norm": 0.7973183538871538, "learning_rate": 7.89344085872898e-05, "loss": 0.927, "step": 1834 }, { "epoch": 0.8318223028105167, "grad_norm": 1.855544936511742, "learning_rate": 7.893150421678194e-05, "loss": 0.927, "step": 1835 }, { "epoch": 0.8322756119673618, "grad_norm": 0.8056066842071624, "learning_rate": 7.89285959471695e-05, "loss": 0.9172, "step": 1836 }, { "epoch": 0.8327289211242067, "grad_norm": 1.9025083726328658, "learning_rate": 7.892568377874377e-05, "loss": 0.9146, "step": 1837 }, { "epoch": 0.8331822302810517, "grad_norm": 1.2507127246423972, "learning_rate": 7.892276771179642e-05, "loss": 0.9508, "step": 1838 }, { "epoch": 0.8336355394378967, "grad_norm": 2.2560883551688007, "learning_rate": 7.891984774661948e-05, "loss": 0.9015, "step": 1839 }, { "epoch": 0.8340888485947416, "grad_norm": 2.0455518330722375, "learning_rate": 7.891692388350541e-05, "loss": 0.9422, "step": 1840 }, { "epoch": 0.8345421577515866, "grad_norm": 1.372323999919083, "learning_rate": 7.891399612274704e-05, "loss": 0.9311, "step": 1841 }, { "epoch": 0.8349954669084315, "grad_norm": 1.4975814754658494, "learning_rate": 7.89110644646376e-05, "loss": 0.9196, "step": 1842 }, { "epoch": 0.8354487760652766, "grad_norm": 1.233209061589469, "learning_rate": 7.890812890947069e-05, "loss": 0.9191, "step": 1843 }, { "epoch": 0.8359020852221215, "grad_norm": 1.229898355976964, "learning_rate": 7.89051894575403e-05, "loss": 0.9145, "step": 1844 }, { "epoch": 0.8363553943789664, "grad_norm": 1.1743737632891098, "learning_rate": 7.890224610914088e-05, "loss": 0.9159, "step": 1845 }, { "epoch": 0.8368087035358114, "grad_norm": 1.256231992133992, "learning_rate": 7.889929886456716e-05, "loss": 0.9212, "step": 1846 }, { "epoch": 0.8372620126926564, "grad_norm": 1.2181987229451725, "learning_rate": 7.889634772411434e-05, "loss": 0.9256, "step": 1847 }, { "epoch": 0.8377153218495014, "grad_norm": 1.1724636515352895, "learning_rate": 7.889339268807798e-05, "loss": 0.9204, "step": 1848 }, { "epoch": 0.8381686310063463, "grad_norm": 1.5135944704346873, "learning_rate": 7.889043375675404e-05, "loss": 0.9264, "step": 1849 }, { "epoch": 0.8386219401631912, "grad_norm": 1.0930376508916606, "learning_rate": 7.888747093043886e-05, "loss": 0.9148, "step": 1850 }, { "epoch": 0.8390752493200363, "grad_norm": 1.1300623826826586, "learning_rate": 7.888450420942917e-05, "loss": 0.949, "step": 1851 }, { "epoch": 0.8395285584768812, "grad_norm": 1.0776064343592024, "learning_rate": 7.888153359402212e-05, "loss": 0.9133, "step": 1852 }, { "epoch": 0.8399818676337262, "grad_norm": 1.169310277838133, "learning_rate": 7.887855908451519e-05, "loss": 0.9187, "step": 1853 }, { "epoch": 0.8404351767905711, "grad_norm": 1.3006570776997852, "learning_rate": 7.887558068120633e-05, "loss": 0.9509, "step": 1854 }, { "epoch": 0.8408884859474162, "grad_norm": 1.2783096636890436, "learning_rate": 7.887259838439379e-05, "loss": 0.8994, "step": 1855 }, { "epoch": 0.8413417951042611, "grad_norm": 1.3633806409595846, "learning_rate": 7.88696121943763e-05, "loss": 0.9221, "step": 1856 }, { "epoch": 0.841795104261106, "grad_norm": 1.1454200097862939, "learning_rate": 7.88666221114529e-05, "loss": 0.909, "step": 1857 }, { "epoch": 0.8422484134179511, "grad_norm": 0.792917901865371, "learning_rate": 7.886362813592306e-05, "loss": 0.8995, "step": 1858 }, { "epoch": 0.842701722574796, "grad_norm": 0.8726086814655074, "learning_rate": 7.886063026808665e-05, "loss": 0.9291, "step": 1859 }, { "epoch": 0.843155031731641, "grad_norm": 1.148682263673849, "learning_rate": 7.885762850824391e-05, "loss": 0.9091, "step": 1860 }, { "epoch": 0.8436083408884859, "grad_norm": 1.713108628393308, "learning_rate": 7.885462285669547e-05, "loss": 0.9482, "step": 1861 }, { "epoch": 0.844061650045331, "grad_norm": 0.7722409650883388, "learning_rate": 7.885161331374237e-05, "loss": 0.9278, "step": 1862 }, { "epoch": 0.8445149592021759, "grad_norm": 1.1057762932736723, "learning_rate": 7.884859987968602e-05, "loss": 0.9402, "step": 1863 }, { "epoch": 0.8449682683590208, "grad_norm": 1.4314360738551417, "learning_rate": 7.88455825548282e-05, "loss": 0.9309, "step": 1864 }, { "epoch": 0.8454215775158658, "grad_norm": 0.6300549431830416, "learning_rate": 7.884256133947114e-05, "loss": 0.9189, "step": 1865 }, { "epoch": 0.8458748866727108, "grad_norm": 1.1571207443230156, "learning_rate": 7.883953623391739e-05, "loss": 0.9275, "step": 1866 }, { "epoch": 0.8463281958295558, "grad_norm": 1.1712971078196217, "learning_rate": 7.883650723846995e-05, "loss": 0.914, "step": 1867 }, { "epoch": 0.8467815049864007, "grad_norm": 0.9520714310253804, "learning_rate": 7.883347435343215e-05, "loss": 0.9196, "step": 1868 }, { "epoch": 0.8472348141432456, "grad_norm": 1.3441081009257478, "learning_rate": 7.883043757910778e-05, "loss": 0.9054, "step": 1869 }, { "epoch": 0.8476881233000907, "grad_norm": 1.0893051796953523, "learning_rate": 7.882739691580098e-05, "loss": 0.9284, "step": 1870 }, { "epoch": 0.8481414324569356, "grad_norm": 1.4188753979500799, "learning_rate": 7.882435236381626e-05, "loss": 0.8936, "step": 1871 }, { "epoch": 0.8485947416137806, "grad_norm": 1.056036314234669, "learning_rate": 7.882130392345853e-05, "loss": 0.9049, "step": 1872 }, { "epoch": 0.8490480507706256, "grad_norm": 2.0425290191911376, "learning_rate": 7.881825159503314e-05, "loss": 0.9247, "step": 1873 }, { "epoch": 0.8495013599274706, "grad_norm": 0.9805987720396707, "learning_rate": 7.881519537884575e-05, "loss": 0.9249, "step": 1874 }, { "epoch": 0.8499546690843155, "grad_norm": 2.558584057567685, "learning_rate": 7.881213527520247e-05, "loss": 0.9321, "step": 1875 }, { "epoch": 0.8504079782411604, "grad_norm": 2.151800476607534, "learning_rate": 7.880907128440978e-05, "loss": 0.9407, "step": 1876 }, { "epoch": 0.8508612873980055, "grad_norm": 1.7239138527213016, "learning_rate": 7.880600340677454e-05, "loss": 0.9219, "step": 1877 }, { "epoch": 0.8513145965548504, "grad_norm": 1.6079557910022217, "learning_rate": 7.880293164260401e-05, "loss": 0.896, "step": 1878 }, { "epoch": 0.8517679057116954, "grad_norm": 1.658867651037123, "learning_rate": 7.879985599220584e-05, "loss": 0.9122, "step": 1879 }, { "epoch": 0.8522212148685403, "grad_norm": 1.5748051908391758, "learning_rate": 7.879677645588807e-05, "loss": 0.9185, "step": 1880 }, { "epoch": 0.8526745240253853, "grad_norm": 1.3867307016838302, "learning_rate": 7.879369303395911e-05, "loss": 0.9315, "step": 1881 }, { "epoch": 0.8531278331822303, "grad_norm": 1.4805550659807891, "learning_rate": 7.879060572672778e-05, "loss": 0.9154, "step": 1882 }, { "epoch": 0.8535811423390752, "grad_norm": 1.0552828322901946, "learning_rate": 7.878751453450328e-05, "loss": 0.9263, "step": 1883 }, { "epoch": 0.8540344514959202, "grad_norm": 2.0524396724954443, "learning_rate": 7.87844194575952e-05, "loss": 0.928, "step": 1884 }, { "epoch": 0.8544877606527652, "grad_norm": 1.519824649737183, "learning_rate": 7.878132049631353e-05, "loss": 0.9371, "step": 1885 }, { "epoch": 0.8549410698096102, "grad_norm": 2.1640222226736245, "learning_rate": 7.877821765096864e-05, "loss": 0.9399, "step": 1886 }, { "epoch": 0.8553943789664551, "grad_norm": 1.9649005494130922, "learning_rate": 7.877511092187127e-05, "loss": 0.9313, "step": 1887 }, { "epoch": 0.8558476881233001, "grad_norm": 1.950310635034749, "learning_rate": 7.877200030933261e-05, "loss": 0.9175, "step": 1888 }, { "epoch": 0.8563009972801451, "grad_norm": 1.9368759888736782, "learning_rate": 7.876888581366416e-05, "loss": 0.9236, "step": 1889 }, { "epoch": 0.85675430643699, "grad_norm": 1.659245595604548, "learning_rate": 7.876576743517785e-05, "loss": 0.9035, "step": 1890 }, { "epoch": 0.857207615593835, "grad_norm": 1.6672005733032513, "learning_rate": 7.8762645174186e-05, "loss": 0.9056, "step": 1891 }, { "epoch": 0.85766092475068, "grad_norm": 1.5751160697561608, "learning_rate": 7.875951903100132e-05, "loss": 0.9195, "step": 1892 }, { "epoch": 0.858114233907525, "grad_norm": 1.4796178311390398, "learning_rate": 7.87563890059369e-05, "loss": 0.9312, "step": 1893 }, { "epoch": 0.8585675430643699, "grad_norm": 1.6292273630805565, "learning_rate": 7.875325509930622e-05, "loss": 0.9215, "step": 1894 }, { "epoch": 0.8590208522212148, "grad_norm": 0.73607429564796, "learning_rate": 7.875011731142313e-05, "loss": 0.8984, "step": 1895 }, { "epoch": 0.8594741613780599, "grad_norm": 1.6700320955660077, "learning_rate": 7.874697564260191e-05, "loss": 0.942, "step": 1896 }, { "epoch": 0.8599274705349048, "grad_norm": 1.0564377631010868, "learning_rate": 7.874383009315722e-05, "loss": 0.9428, "step": 1897 }, { "epoch": 0.8603807796917498, "grad_norm": 1.5480957520865852, "learning_rate": 7.874068066340407e-05, "loss": 0.9147, "step": 1898 }, { "epoch": 0.8608340888485947, "grad_norm": 0.8970282649964363, "learning_rate": 7.87375273536579e-05, "loss": 0.9235, "step": 1899 }, { "epoch": 0.8612873980054397, "grad_norm": 1.4433493918516522, "learning_rate": 7.87343701642345e-05, "loss": 0.9056, "step": 1900 }, { "epoch": 0.8617407071622847, "grad_norm": 1.243568067087514, "learning_rate": 7.873120909545012e-05, "loss": 0.9286, "step": 1901 }, { "epoch": 0.8621940163191296, "grad_norm": 1.173756480819487, "learning_rate": 7.872804414762129e-05, "loss": 0.9262, "step": 1902 }, { "epoch": 0.8626473254759747, "grad_norm": 1.4821238794615876, "learning_rate": 7.872487532106505e-05, "loss": 0.9174, "step": 1903 }, { "epoch": 0.8631006346328196, "grad_norm": 0.8928453738158387, "learning_rate": 7.872170261609872e-05, "loss": 0.9331, "step": 1904 }, { "epoch": 0.8635539437896645, "grad_norm": 1.930574366895751, "learning_rate": 7.871852603304007e-05, "loss": 0.9371, "step": 1905 }, { "epoch": 0.8640072529465095, "grad_norm": 1.3834056912924555, "learning_rate": 7.871534557220725e-05, "loss": 0.9259, "step": 1906 }, { "epoch": 0.8644605621033545, "grad_norm": 1.6355135580826483, "learning_rate": 7.871216123391878e-05, "loss": 0.9242, "step": 1907 }, { "epoch": 0.8649138712601995, "grad_norm": 1.5058385576921285, "learning_rate": 7.87089730184936e-05, "loss": 0.9287, "step": 1908 }, { "epoch": 0.8653671804170444, "grad_norm": 1.3462543545052719, "learning_rate": 7.8705780926251e-05, "loss": 0.9111, "step": 1909 }, { "epoch": 0.8658204895738894, "grad_norm": 1.0877757397201948, "learning_rate": 7.87025849575107e-05, "loss": 0.926, "step": 1910 }, { "epoch": 0.8662737987307344, "grad_norm": 1.3593626934644014, "learning_rate": 7.869938511259276e-05, "loss": 0.92, "step": 1911 }, { "epoch": 0.8667271078875793, "grad_norm": 1.1836973947487357, "learning_rate": 7.869618139181766e-05, "loss": 0.9158, "step": 1912 }, { "epoch": 0.8671804170444243, "grad_norm": 1.5625996137583116, "learning_rate": 7.869297379550628e-05, "loss": 0.908, "step": 1913 }, { "epoch": 0.8676337262012692, "grad_norm": 1.0627189959446726, "learning_rate": 7.868976232397985e-05, "loss": 0.9415, "step": 1914 }, { "epoch": 0.8680870353581143, "grad_norm": 1.6716850933788188, "learning_rate": 7.868654697756003e-05, "loss": 0.9172, "step": 1915 }, { "epoch": 0.8685403445149592, "grad_norm": 1.4701949106090457, "learning_rate": 7.868332775656883e-05, "loss": 0.9316, "step": 1916 }, { "epoch": 0.8689936536718041, "grad_norm": 1.038897792998551, "learning_rate": 7.868010466132865e-05, "loss": 0.8974, "step": 1917 }, { "epoch": 0.8694469628286491, "grad_norm": 1.0459557711153045, "learning_rate": 7.867687769216233e-05, "loss": 0.9142, "step": 1918 }, { "epoch": 0.8699002719854941, "grad_norm": 1.416156212127103, "learning_rate": 7.867364684939303e-05, "loss": 0.9207, "step": 1919 }, { "epoch": 0.8703535811423391, "grad_norm": 0.9907253177086155, "learning_rate": 7.867041213334433e-05, "loss": 0.9206, "step": 1920 }, { "epoch": 0.870806890299184, "grad_norm": 1.6835315667242408, "learning_rate": 7.86671735443402e-05, "loss": 0.9275, "step": 1921 }, { "epoch": 0.8712601994560291, "grad_norm": 1.4598022631944116, "learning_rate": 7.866393108270502e-05, "loss": 0.9039, "step": 1922 }, { "epoch": 0.871713508612874, "grad_norm": 1.3164116509609889, "learning_rate": 7.86606847487635e-05, "loss": 0.9024, "step": 1923 }, { "epoch": 0.8721668177697189, "grad_norm": 1.3453272903569233, "learning_rate": 7.865743454284077e-05, "loss": 0.91, "step": 1924 }, { "epoch": 0.8726201269265639, "grad_norm": 0.9856561617296831, "learning_rate": 7.865418046526239e-05, "loss": 0.9117, "step": 1925 }, { "epoch": 0.8730734360834089, "grad_norm": 1.2255954495451855, "learning_rate": 7.865092251635421e-05, "loss": 0.9191, "step": 1926 }, { "epoch": 0.8735267452402539, "grad_norm": 0.9522296793462942, "learning_rate": 7.864766069644255e-05, "loss": 0.9114, "step": 1927 }, { "epoch": 0.8739800543970988, "grad_norm": 1.0007976348293464, "learning_rate": 7.864439500585409e-05, "loss": 0.9072, "step": 1928 }, { "epoch": 0.8744333635539437, "grad_norm": 1.593905339012299, "learning_rate": 7.86411254449159e-05, "loss": 0.8908, "step": 1929 }, { "epoch": 0.8748866727107888, "grad_norm": 0.9323896646646574, "learning_rate": 7.863785201395543e-05, "loss": 0.9132, "step": 1930 }, { "epoch": 0.8753399818676337, "grad_norm": 1.1368012813161903, "learning_rate": 7.863457471330052e-05, "loss": 0.9132, "step": 1931 }, { "epoch": 0.8757932910244787, "grad_norm": 1.0172639352303696, "learning_rate": 7.86312935432794e-05, "loss": 0.8964, "step": 1932 }, { "epoch": 0.8762466001813236, "grad_norm": 1.2784740082318715, "learning_rate": 7.862800850422072e-05, "loss": 0.9267, "step": 1933 }, { "epoch": 0.8766999093381687, "grad_norm": 1.591670422534571, "learning_rate": 7.862471959645345e-05, "loss": 0.9229, "step": 1934 }, { "epoch": 0.8771532184950136, "grad_norm": 0.6819382224568834, "learning_rate": 7.8621426820307e-05, "loss": 0.8821, "step": 1935 }, { "epoch": 0.8776065276518585, "grad_norm": 1.053769190765739, "learning_rate": 7.861813017611114e-05, "loss": 0.9235, "step": 1936 }, { "epoch": 0.8780598368087036, "grad_norm": 1.6341179947462707, "learning_rate": 7.861482966419606e-05, "loss": 0.9178, "step": 1937 }, { "epoch": 0.8785131459655485, "grad_norm": 0.9667888648931678, "learning_rate": 7.861152528489228e-05, "loss": 0.9115, "step": 1938 }, { "epoch": 0.8789664551223935, "grad_norm": 1.5333161777049396, "learning_rate": 7.860821703853079e-05, "loss": 0.9404, "step": 1939 }, { "epoch": 0.8794197642792384, "grad_norm": 1.1719089114984325, "learning_rate": 7.86049049254429e-05, "loss": 0.933, "step": 1940 }, { "epoch": 0.8798730734360835, "grad_norm": 1.4785235681664683, "learning_rate": 7.860158894596031e-05, "loss": 0.9034, "step": 1941 }, { "epoch": 0.8803263825929284, "grad_norm": 1.375099759841513, "learning_rate": 7.859826910041512e-05, "loss": 0.9077, "step": 1942 }, { "epoch": 0.8807796917497733, "grad_norm": 1.2023429138552386, "learning_rate": 7.859494538913987e-05, "loss": 0.9129, "step": 1943 }, { "epoch": 0.8812330009066183, "grad_norm": 1.0487767734071967, "learning_rate": 7.85916178124674e-05, "loss": 0.9252, "step": 1944 }, { "epoch": 0.8816863100634633, "grad_norm": 1.315668702077222, "learning_rate": 7.858828637073098e-05, "loss": 0.9331, "step": 1945 }, { "epoch": 0.8821396192203083, "grad_norm": 0.8512507988453885, "learning_rate": 7.858495106426428e-05, "loss": 0.8915, "step": 1946 }, { "epoch": 0.8825929283771532, "grad_norm": 1.2463898176587642, "learning_rate": 7.858161189340133e-05, "loss": 0.9493, "step": 1947 }, { "epoch": 0.8830462375339981, "grad_norm": 1.046572294355905, "learning_rate": 7.857826885847657e-05, "loss": 0.9258, "step": 1948 }, { "epoch": 0.8834995466908432, "grad_norm": 0.9816737819465419, "learning_rate": 7.857492195982479e-05, "loss": 0.9098, "step": 1949 }, { "epoch": 0.8839528558476881, "grad_norm": 1.0349480003304663, "learning_rate": 7.857157119778122e-05, "loss": 0.9183, "step": 1950 }, { "epoch": 0.8844061650045331, "grad_norm": 0.9675622840423959, "learning_rate": 7.856821657268142e-05, "loss": 0.9086, "step": 1951 }, { "epoch": 0.8848594741613781, "grad_norm": 1.1068727139880046, "learning_rate": 7.856485808486139e-05, "loss": 0.9394, "step": 1952 }, { "epoch": 0.885312783318223, "grad_norm": 1.6612026848770702, "learning_rate": 7.856149573465748e-05, "loss": 0.9161, "step": 1953 }, { "epoch": 0.885766092475068, "grad_norm": 0.8600754329714205, "learning_rate": 7.855812952240643e-05, "loss": 0.9185, "step": 1954 }, { "epoch": 0.8862194016319129, "grad_norm": 0.9599775104914932, "learning_rate": 7.855475944844541e-05, "loss": 0.9034, "step": 1955 }, { "epoch": 0.886672710788758, "grad_norm": 0.8665775471465307, "learning_rate": 7.855138551311191e-05, "loss": 0.9257, "step": 1956 }, { "epoch": 0.8871260199456029, "grad_norm": 0.9915757495132598, "learning_rate": 7.854800771674385e-05, "loss": 0.9006, "step": 1957 }, { "epoch": 0.8875793291024479, "grad_norm": 1.124512493442088, "learning_rate": 7.854462605967953e-05, "loss": 0.9238, "step": 1958 }, { "epoch": 0.8880326382592928, "grad_norm": 1.0292277668198335, "learning_rate": 7.854124054225763e-05, "loss": 0.9123, "step": 1959 }, { "epoch": 0.8884859474161378, "grad_norm": 1.6918327331351273, "learning_rate": 7.853785116481721e-05, "loss": 0.9248, "step": 1960 }, { "epoch": 0.8889392565729828, "grad_norm": 1.011139398594677, "learning_rate": 7.853445792769775e-05, "loss": 0.9295, "step": 1961 }, { "epoch": 0.8893925657298277, "grad_norm": 1.5322599696983101, "learning_rate": 7.853106083123906e-05, "loss": 0.9036, "step": 1962 }, { "epoch": 0.8898458748866727, "grad_norm": 0.7864815261429484, "learning_rate": 7.852765987578139e-05, "loss": 0.9174, "step": 1963 }, { "epoch": 0.8902991840435177, "grad_norm": 1.7542102992351025, "learning_rate": 7.852425506166537e-05, "loss": 0.9031, "step": 1964 }, { "epoch": 0.8907524932003626, "grad_norm": 0.9686390603926942, "learning_rate": 7.852084638923198e-05, "loss": 0.9362, "step": 1965 }, { "epoch": 0.8912058023572076, "grad_norm": 1.5086809646566532, "learning_rate": 7.851743385882259e-05, "loss": 0.9296, "step": 1966 }, { "epoch": 0.8916591115140526, "grad_norm": 1.1535192938110832, "learning_rate": 7.851401747077902e-05, "loss": 0.9233, "step": 1967 }, { "epoch": 0.8921124206708976, "grad_norm": 1.4339655942012337, "learning_rate": 7.851059722544341e-05, "loss": 0.9133, "step": 1968 }, { "epoch": 0.8925657298277425, "grad_norm": 1.4053030276027347, "learning_rate": 7.85071731231583e-05, "loss": 0.9258, "step": 1969 }, { "epoch": 0.8930190389845875, "grad_norm": 1.5118303735546952, "learning_rate": 7.850374516426664e-05, "loss": 0.9179, "step": 1970 }, { "epoch": 0.8934723481414325, "grad_norm": 1.4054411787286962, "learning_rate": 7.850031334911172e-05, "loss": 0.9226, "step": 1971 }, { "epoch": 0.8939256572982774, "grad_norm": 1.4225297470218643, "learning_rate": 7.849687767803729e-05, "loss": 0.9288, "step": 1972 }, { "epoch": 0.8943789664551224, "grad_norm": 0.979544679670593, "learning_rate": 7.849343815138743e-05, "loss": 0.9125, "step": 1973 }, { "epoch": 0.8948322756119673, "grad_norm": 1.2215022477361774, "learning_rate": 7.848999476950658e-05, "loss": 0.9198, "step": 1974 }, { "epoch": 0.8952855847688124, "grad_norm": 1.415951447041961, "learning_rate": 7.848654753273964e-05, "loss": 0.9276, "step": 1975 }, { "epoch": 0.8957388939256573, "grad_norm": 1.2800237313489038, "learning_rate": 7.848309644143187e-05, "loss": 0.9175, "step": 1976 }, { "epoch": 0.8961922030825022, "grad_norm": 0.8672779343681286, "learning_rate": 7.847964149592888e-05, "loss": 0.945, "step": 1977 }, { "epoch": 0.8966455122393472, "grad_norm": 1.0887878880562545, "learning_rate": 7.84761826965767e-05, "loss": 0.9167, "step": 1978 }, { "epoch": 0.8970988213961922, "grad_norm": 1.206296033443608, "learning_rate": 7.847272004372175e-05, "loss": 0.9266, "step": 1979 }, { "epoch": 0.8975521305530372, "grad_norm": 1.036993095428557, "learning_rate": 7.846925353771083e-05, "loss": 0.9239, "step": 1980 }, { "epoch": 0.8980054397098821, "grad_norm": 0.9631618052852938, "learning_rate": 7.84657831788911e-05, "loss": 0.9247, "step": 1981 }, { "epoch": 0.898458748866727, "grad_norm": 1.6676870877470913, "learning_rate": 7.846230896761013e-05, "loss": 0.9261, "step": 1982 }, { "epoch": 0.8989120580235721, "grad_norm": 1.110322302655846, "learning_rate": 7.845883090421587e-05, "loss": 0.9078, "step": 1983 }, { "epoch": 0.899365367180417, "grad_norm": 0.9280638904351795, "learning_rate": 7.84553489890567e-05, "loss": 0.9251, "step": 1984 }, { "epoch": 0.899818676337262, "grad_norm": 1.191271806473513, "learning_rate": 7.845186322248127e-05, "loss": 0.9068, "step": 1985 }, { "epoch": 0.900271985494107, "grad_norm": 1.1485611312108646, "learning_rate": 7.844837360483876e-05, "loss": 0.901, "step": 1986 }, { "epoch": 0.900725294650952, "grad_norm": 1.081102725502325, "learning_rate": 7.844488013647863e-05, "loss": 0.9174, "step": 1987 }, { "epoch": 0.9011786038077969, "grad_norm": 1.7013648372956993, "learning_rate": 7.844138281775076e-05, "loss": 0.913, "step": 1988 }, { "epoch": 0.9016319129646418, "grad_norm": 0.8518688363766326, "learning_rate": 7.843788164900542e-05, "loss": 0.9236, "step": 1989 }, { "epoch": 0.9020852221214869, "grad_norm": 0.9602186964432055, "learning_rate": 7.843437663059326e-05, "loss": 0.9257, "step": 1990 }, { "epoch": 0.9025385312783318, "grad_norm": 1.1007205064140608, "learning_rate": 7.843086776286535e-05, "loss": 0.9005, "step": 1991 }, { "epoch": 0.9029918404351768, "grad_norm": 1.5633732150140909, "learning_rate": 7.842735504617308e-05, "loss": 0.9149, "step": 1992 }, { "epoch": 0.9034451495920217, "grad_norm": 1.0836193387359279, "learning_rate": 7.842383848086825e-05, "loss": 0.9182, "step": 1993 }, { "epoch": 0.9038984587488668, "grad_norm": 1.31433170520456, "learning_rate": 7.842031806730308e-05, "loss": 0.9169, "step": 1994 }, { "epoch": 0.9043517679057117, "grad_norm": 0.9113846521936262, "learning_rate": 7.841679380583015e-05, "loss": 0.9224, "step": 1995 }, { "epoch": 0.9048050770625566, "grad_norm": 1.0190960678703165, "learning_rate": 7.84132656968024e-05, "loss": 0.9154, "step": 1996 }, { "epoch": 0.9052583862194016, "grad_norm": 1.5887734062362449, "learning_rate": 7.840973374057321e-05, "loss": 0.9137, "step": 1997 }, { "epoch": 0.9057116953762466, "grad_norm": 1.2529172088740863, "learning_rate": 7.840619793749629e-05, "loss": 0.9007, "step": 1998 }, { "epoch": 0.9061650045330916, "grad_norm": 1.5400381611178562, "learning_rate": 7.840265828792577e-05, "loss": 0.9382, "step": 1999 }, { "epoch": 0.9066183136899365, "grad_norm": 1.075439265816313, "learning_rate": 7.839911479221617e-05, "loss": 0.9251, "step": 2000 }, { "epoch": 0.9070716228467816, "grad_norm": 2.318803572368361, "learning_rate": 7.839556745072237e-05, "loss": 0.9076, "step": 2001 }, { "epoch": 0.9075249320036265, "grad_norm": 1.7876709606412562, "learning_rate": 7.839201626379964e-05, "loss": 0.9012, "step": 2002 }, { "epoch": 0.9079782411604714, "grad_norm": 2.0054168572832514, "learning_rate": 7.838846123180365e-05, "loss": 0.9187, "step": 2003 }, { "epoch": 0.9084315503173164, "grad_norm": 1.689293107845989, "learning_rate": 7.838490235509046e-05, "loss": 0.9477, "step": 2004 }, { "epoch": 0.9088848594741614, "grad_norm": 1.6622720273573734, "learning_rate": 7.838133963401646e-05, "loss": 0.9326, "step": 2005 }, { "epoch": 0.9093381686310064, "grad_norm": 1.535687367949534, "learning_rate": 7.837777306893852e-05, "loss": 0.899, "step": 2006 }, { "epoch": 0.9097914777878513, "grad_norm": 0.967309844911602, "learning_rate": 7.837420266021381e-05, "loss": 0.9142, "step": 2007 }, { "epoch": 0.9102447869446962, "grad_norm": 1.3293369030876259, "learning_rate": 7.837062840819992e-05, "loss": 0.9093, "step": 2008 }, { "epoch": 0.9106980961015413, "grad_norm": 1.5396209453348517, "learning_rate": 7.836705031325483e-05, "loss": 0.8787, "step": 2009 }, { "epoch": 0.9111514052583862, "grad_norm": 0.9061096488375496, "learning_rate": 7.836346837573689e-05, "loss": 0.9106, "step": 2010 }, { "epoch": 0.9116047144152312, "grad_norm": 1.1583135400807418, "learning_rate": 7.835988259600484e-05, "loss": 0.9056, "step": 2011 }, { "epoch": 0.9120580235720761, "grad_norm": 1.088696554693576, "learning_rate": 7.835629297441782e-05, "loss": 0.9363, "step": 2012 }, { "epoch": 0.9125113327289212, "grad_norm": 1.131126391453818, "learning_rate": 7.835269951133533e-05, "loss": 0.9096, "step": 2013 }, { "epoch": 0.9129646418857661, "grad_norm": 0.9493205325075973, "learning_rate": 7.834910220711726e-05, "loss": 0.9125, "step": 2014 }, { "epoch": 0.913417951042611, "grad_norm": 1.1977154060555357, "learning_rate": 7.83455010621239e-05, "loss": 0.9121, "step": 2015 }, { "epoch": 0.9138712601994561, "grad_norm": 1.2816978519780815, "learning_rate": 7.834189607671592e-05, "loss": 0.9347, "step": 2016 }, { "epoch": 0.914324569356301, "grad_norm": 1.5159415415529491, "learning_rate": 7.833828725125437e-05, "loss": 0.9276, "step": 2017 }, { "epoch": 0.914777878513146, "grad_norm": 0.8543607108804667, "learning_rate": 7.833467458610066e-05, "loss": 0.9265, "step": 2018 }, { "epoch": 0.9152311876699909, "grad_norm": 1.0664333823337842, "learning_rate": 7.833105808161663e-05, "loss": 0.9322, "step": 2019 }, { "epoch": 0.915684496826836, "grad_norm": 1.4518516774385624, "learning_rate": 7.832743773816449e-05, "loss": 0.9165, "step": 2020 }, { "epoch": 0.9161378059836809, "grad_norm": 1.4491375808051148, "learning_rate": 7.83238135561068e-05, "loss": 0.9188, "step": 2021 }, { "epoch": 0.9165911151405258, "grad_norm": 1.1109842016709826, "learning_rate": 7.832018553580656e-05, "loss": 0.9268, "step": 2022 }, { "epoch": 0.9170444242973708, "grad_norm": 1.1780706654238313, "learning_rate": 7.831655367762713e-05, "loss": 0.9318, "step": 2023 }, { "epoch": 0.9174977334542158, "grad_norm": 1.243395912827345, "learning_rate": 7.831291798193223e-05, "loss": 0.8992, "step": 2024 }, { "epoch": 0.9179510426110608, "grad_norm": 1.693192681398444, "learning_rate": 7.830927844908598e-05, "loss": 0.9155, "step": 2025 }, { "epoch": 0.9184043517679057, "grad_norm": 0.6778118755232769, "learning_rate": 7.830563507945292e-05, "loss": 0.9197, "step": 2026 }, { "epoch": 0.9188576609247506, "grad_norm": 1.2273949794572174, "learning_rate": 7.830198787339793e-05, "loss": 0.9405, "step": 2027 }, { "epoch": 0.9193109700815957, "grad_norm": 1.995113093100038, "learning_rate": 7.82983368312863e-05, "loss": 0.906, "step": 2028 }, { "epoch": 0.9197642792384406, "grad_norm": 0.8271095546835355, "learning_rate": 7.829468195348364e-05, "loss": 0.9144, "step": 2029 }, { "epoch": 0.9202175883952856, "grad_norm": 2.563433223909738, "learning_rate": 7.829102324035606e-05, "loss": 0.9236, "step": 2030 }, { "epoch": 0.9206708975521306, "grad_norm": 1.5627701966698677, "learning_rate": 7.828736069226997e-05, "loss": 0.9275, "step": 2031 }, { "epoch": 0.9211242067089755, "grad_norm": 2.7296710824367993, "learning_rate": 7.828369430959218e-05, "loss": 0.9174, "step": 2032 }, { "epoch": 0.9215775158658205, "grad_norm": 2.7024211857838147, "learning_rate": 7.828002409268988e-05, "loss": 0.9473, "step": 2033 }, { "epoch": 0.9220308250226654, "grad_norm": 1.8361234295274025, "learning_rate": 7.827635004193068e-05, "loss": 0.9293, "step": 2034 }, { "epoch": 0.9224841341795105, "grad_norm": 1.428809783355935, "learning_rate": 7.827267215768253e-05, "loss": 0.9068, "step": 2035 }, { "epoch": 0.9229374433363554, "grad_norm": 1.5874831759118813, "learning_rate": 7.826899044031378e-05, "loss": 0.8993, "step": 2036 }, { "epoch": 0.9233907524932004, "grad_norm": 1.3359129198897113, "learning_rate": 7.826530489019317e-05, "loss": 0.9245, "step": 2037 }, { "epoch": 0.9238440616500453, "grad_norm": 0.9722651297547459, "learning_rate": 7.826161550768981e-05, "loss": 0.9261, "step": 2038 }, { "epoch": 0.9242973708068903, "grad_norm": 1.6712570611625996, "learning_rate": 7.825792229317321e-05, "loss": 0.914, "step": 2039 }, { "epoch": 0.9247506799637353, "grad_norm": 1.163357024194584, "learning_rate": 7.825422524701325e-05, "loss": 0.9261, "step": 2040 }, { "epoch": 0.9252039891205802, "grad_norm": 1.4634406551460675, "learning_rate": 7.82505243695802e-05, "loss": 0.8953, "step": 2041 }, { "epoch": 0.9256572982774252, "grad_norm": 1.298249299172218, "learning_rate": 7.824681966124473e-05, "loss": 0.9328, "step": 2042 }, { "epoch": 0.9261106074342702, "grad_norm": 1.7048809867193615, "learning_rate": 7.824311112237786e-05, "loss": 0.92, "step": 2043 }, { "epoch": 0.9265639165911151, "grad_norm": 1.097002574606533, "learning_rate": 7.823939875335102e-05, "loss": 0.9132, "step": 2044 }, { "epoch": 0.9270172257479601, "grad_norm": 1.7998620031400903, "learning_rate": 7.8235682554536e-05, "loss": 0.9244, "step": 2045 }, { "epoch": 0.927470534904805, "grad_norm": 1.549324269284471, "learning_rate": 7.823196252630501e-05, "loss": 0.9287, "step": 2046 }, { "epoch": 0.9279238440616501, "grad_norm": 1.2848002863591101, "learning_rate": 7.822823866903062e-05, "loss": 0.912, "step": 2047 }, { "epoch": 0.928377153218495, "grad_norm": 1.6949020669410695, "learning_rate": 7.822451098308575e-05, "loss": 0.927, "step": 2048 }, { "epoch": 0.92883046237534, "grad_norm": 0.9768834410288397, "learning_rate": 7.822077946884379e-05, "loss": 0.9224, "step": 2049 }, { "epoch": 0.929283771532185, "grad_norm": 1.5883945529268555, "learning_rate": 7.821704412667843e-05, "loss": 0.9383, "step": 2050 }, { "epoch": 0.9297370806890299, "grad_norm": 1.501720905766599, "learning_rate": 7.821330495696378e-05, "loss": 0.927, "step": 2051 }, { "epoch": 0.9301903898458749, "grad_norm": 0.9098898339361701, "learning_rate": 7.820956196007434e-05, "loss": 0.9354, "step": 2052 }, { "epoch": 0.9306436990027198, "grad_norm": 1.278482862037537, "learning_rate": 7.820581513638496e-05, "loss": 0.9316, "step": 2053 }, { "epoch": 0.9310970081595649, "grad_norm": 1.2482751172995765, "learning_rate": 7.820206448627092e-05, "loss": 0.9179, "step": 2054 }, { "epoch": 0.9315503173164098, "grad_norm": 0.962851730205354, "learning_rate": 7.819831001010783e-05, "loss": 0.9299, "step": 2055 }, { "epoch": 0.9320036264732547, "grad_norm": 0.945567235695268, "learning_rate": 7.819455170827176e-05, "loss": 0.9193, "step": 2056 }, { "epoch": 0.9324569356300997, "grad_norm": 0.9950976759208129, "learning_rate": 7.819078958113906e-05, "loss": 0.9118, "step": 2057 }, { "epoch": 0.9329102447869447, "grad_norm": 1.2769028079949594, "learning_rate": 7.818702362908656e-05, "loss": 0.9106, "step": 2058 }, { "epoch": 0.9333635539437897, "grad_norm": 1.0418318765631487, "learning_rate": 7.81832538524914e-05, "loss": 0.9008, "step": 2059 }, { "epoch": 0.9338168631006346, "grad_norm": 1.417323799259494, "learning_rate": 7.817948025173115e-05, "loss": 0.9163, "step": 2060 }, { "epoch": 0.9342701722574795, "grad_norm": 0.8815784612032971, "learning_rate": 7.817570282718376e-05, "loss": 0.9298, "step": 2061 }, { "epoch": 0.9347234814143246, "grad_norm": 1.0787158088381572, "learning_rate": 7.817192157922753e-05, "loss": 0.9245, "step": 2062 }, { "epoch": 0.9351767905711695, "grad_norm": 1.2657339195989432, "learning_rate": 7.816813650824115e-05, "loss": 0.9176, "step": 2063 }, { "epoch": 0.9356300997280145, "grad_norm": 1.124232128480022, "learning_rate": 7.816434761460373e-05, "loss": 0.9051, "step": 2064 }, { "epoch": 0.9360834088848595, "grad_norm": 1.262621559009067, "learning_rate": 7.816055489869475e-05, "loss": 0.8969, "step": 2065 }, { "epoch": 0.9365367180417045, "grad_norm": 1.0373148561695584, "learning_rate": 7.815675836089405e-05, "loss": 0.9206, "step": 2066 }, { "epoch": 0.9369900271985494, "grad_norm": 1.2388672399975567, "learning_rate": 7.815295800158184e-05, "loss": 0.8941, "step": 2067 }, { "epoch": 0.9374433363553943, "grad_norm": 0.9015962308792302, "learning_rate": 7.814915382113877e-05, "loss": 0.9174, "step": 2068 }, { "epoch": 0.9378966455122394, "grad_norm": 1.1451217243930738, "learning_rate": 7.814534581994582e-05, "loss": 0.9369, "step": 2069 }, { "epoch": 0.9383499546690843, "grad_norm": 1.494771989405895, "learning_rate": 7.814153399838437e-05, "loss": 0.926, "step": 2070 }, { "epoch": 0.9388032638259293, "grad_norm": 0.7828430186285001, "learning_rate": 7.81377183568362e-05, "loss": 0.9037, "step": 2071 }, { "epoch": 0.9392565729827742, "grad_norm": 0.9180017247655212, "learning_rate": 7.813389889568345e-05, "loss": 0.9043, "step": 2072 }, { "epoch": 0.9397098821396193, "grad_norm": 0.9574009487647769, "learning_rate": 7.813007561530866e-05, "loss": 0.9325, "step": 2073 }, { "epoch": 0.9401631912964642, "grad_norm": 0.8221090824475117, "learning_rate": 7.812624851609472e-05, "loss": 0.9236, "step": 2074 }, { "epoch": 0.9406165004533091, "grad_norm": 1.214653225766719, "learning_rate": 7.812241759842496e-05, "loss": 0.9046, "step": 2075 }, { "epoch": 0.9410698096101541, "grad_norm": 1.1055429364825522, "learning_rate": 7.811858286268303e-05, "loss": 0.9053, "step": 2076 }, { "epoch": 0.9415231187669991, "grad_norm": 1.4509151457640221, "learning_rate": 7.8114744309253e-05, "loss": 0.9173, "step": 2077 }, { "epoch": 0.9419764279238441, "grad_norm": 1.0416451180306128, "learning_rate": 7.811090193851931e-05, "loss": 0.911, "step": 2078 }, { "epoch": 0.942429737080689, "grad_norm": 1.1678126104125133, "learning_rate": 7.810705575086678e-05, "loss": 0.9003, "step": 2079 }, { "epoch": 0.942883046237534, "grad_norm": 1.358430294566764, "learning_rate": 7.810320574668062e-05, "loss": 0.8952, "step": 2080 }, { "epoch": 0.943336355394379, "grad_norm": 0.9188379503403183, "learning_rate": 7.809935192634643e-05, "loss": 0.9018, "step": 2081 }, { "epoch": 0.9437896645512239, "grad_norm": 1.1614789723605714, "learning_rate": 7.809549429025017e-05, "loss": 0.9152, "step": 2082 }, { "epoch": 0.9442429737080689, "grad_norm": 1.3977685369526134, "learning_rate": 7.80916328387782e-05, "loss": 0.9173, "step": 2083 }, { "epoch": 0.9446962828649139, "grad_norm": 0.7419204567584518, "learning_rate": 7.808776757231726e-05, "loss": 0.9094, "step": 2084 }, { "epoch": 0.9451495920217589, "grad_norm": 1.038700963820599, "learning_rate": 7.808389849125442e-05, "loss": 0.9242, "step": 2085 }, { "epoch": 0.9456029011786038, "grad_norm": 1.55581888113832, "learning_rate": 7.808002559597725e-05, "loss": 0.9124, "step": 2086 }, { "epoch": 0.9460562103354487, "grad_norm": 0.919003261652116, "learning_rate": 7.80761488868736e-05, "loss": 0.915, "step": 2087 }, { "epoch": 0.9465095194922938, "grad_norm": 1.6614298071717641, "learning_rate": 7.807226836433174e-05, "loss": 0.925, "step": 2088 }, { "epoch": 0.9469628286491387, "grad_norm": 0.7627365858748864, "learning_rate": 7.80683840287403e-05, "loss": 0.9111, "step": 2089 }, { "epoch": 0.9474161378059837, "grad_norm": 1.4599074222335378, "learning_rate": 7.806449588048833e-05, "loss": 0.925, "step": 2090 }, { "epoch": 0.9478694469628286, "grad_norm": 0.9113280825679893, "learning_rate": 7.806060391996522e-05, "loss": 0.912, "step": 2091 }, { "epoch": 0.9483227561196736, "grad_norm": 1.7599646157165878, "learning_rate": 7.805670814756076e-05, "loss": 0.9003, "step": 2092 }, { "epoch": 0.9487760652765186, "grad_norm": 1.0279100439178879, "learning_rate": 7.805280856366514e-05, "loss": 0.8983, "step": 2093 }, { "epoch": 0.9492293744333635, "grad_norm": 1.834157761292403, "learning_rate": 7.804890516866891e-05, "loss": 0.9042, "step": 2094 }, { "epoch": 0.9496826835902086, "grad_norm": 1.47396455696421, "learning_rate": 7.8044997962963e-05, "loss": 0.9257, "step": 2095 }, { "epoch": 0.9501359927470535, "grad_norm": 1.707625611609699, "learning_rate": 7.804108694693874e-05, "loss": 0.9229, "step": 2096 }, { "epoch": 0.9505893019038985, "grad_norm": 1.5171776073479208, "learning_rate": 7.803717212098782e-05, "loss": 0.931, "step": 2097 }, { "epoch": 0.9510426110607434, "grad_norm": 1.4888033208179747, "learning_rate": 7.803325348550233e-05, "loss": 0.9234, "step": 2098 }, { "epoch": 0.9514959202175884, "grad_norm": 1.3231390706403896, "learning_rate": 7.80293310408747e-05, "loss": 0.9223, "step": 2099 }, { "epoch": 0.9519492293744334, "grad_norm": 1.2486753216920425, "learning_rate": 7.802540478749782e-05, "loss": 0.9064, "step": 2100 }, { "epoch": 0.9524025385312783, "grad_norm": 1.1274407786641953, "learning_rate": 7.802147472576489e-05, "loss": 0.9173, "step": 2101 }, { "epoch": 0.9528558476881233, "grad_norm": 1.2586224091020084, "learning_rate": 7.801754085606952e-05, "loss": 0.9243, "step": 2102 }, { "epoch": 0.9533091568449683, "grad_norm": 1.5011901728348551, "learning_rate": 7.801360317880572e-05, "loss": 0.9001, "step": 2103 }, { "epoch": 0.9537624660018132, "grad_norm": 1.3232696890060862, "learning_rate": 7.800966169436781e-05, "loss": 0.9301, "step": 2104 }, { "epoch": 0.9542157751586582, "grad_norm": 0.9292018390649422, "learning_rate": 7.80057164031506e-05, "loss": 0.9137, "step": 2105 }, { "epoch": 0.9546690843155031, "grad_norm": 1.8011605681197995, "learning_rate": 7.800176730554919e-05, "loss": 0.9092, "step": 2106 }, { "epoch": 0.9551223934723482, "grad_norm": 1.0746033198367946, "learning_rate": 7.79978144019591e-05, "loss": 0.9059, "step": 2107 }, { "epoch": 0.9555757026291931, "grad_norm": 1.3037789972587326, "learning_rate": 7.799385769277621e-05, "loss": 0.893, "step": 2108 }, { "epoch": 0.956029011786038, "grad_norm": 1.1686674618103532, "learning_rate": 7.798989717839682e-05, "loss": 0.9067, "step": 2109 }, { "epoch": 0.956482320942883, "grad_norm": 1.2518188061513011, "learning_rate": 7.798593285921757e-05, "loss": 0.9313, "step": 2110 }, { "epoch": 0.956935630099728, "grad_norm": 1.586823001222998, "learning_rate": 7.798196473563552e-05, "loss": 0.9154, "step": 2111 }, { "epoch": 0.957388939256573, "grad_norm": 1.0722388892936208, "learning_rate": 7.797799280804807e-05, "loss": 0.9147, "step": 2112 }, { "epoch": 0.9578422484134179, "grad_norm": 0.8375675302892033, "learning_rate": 7.797401707685302e-05, "loss": 0.9152, "step": 2113 }, { "epoch": 0.958295557570263, "grad_norm": 1.4175599263106644, "learning_rate": 7.797003754244856e-05, "loss": 0.9355, "step": 2114 }, { "epoch": 0.9587488667271079, "grad_norm": 1.3701079004809034, "learning_rate": 7.796605420523324e-05, "loss": 0.9125, "step": 2115 }, { "epoch": 0.9592021758839528, "grad_norm": 0.8899366841966995, "learning_rate": 7.796206706560602e-05, "loss": 0.9176, "step": 2116 }, { "epoch": 0.9596554850407978, "grad_norm": 1.3156374734731617, "learning_rate": 7.795807612396621e-05, "loss": 0.9093, "step": 2117 }, { "epoch": 0.9601087941976428, "grad_norm": 1.116853026460026, "learning_rate": 7.795408138071352e-05, "loss": 0.9289, "step": 2118 }, { "epoch": 0.9605621033544878, "grad_norm": 1.1251138021677896, "learning_rate": 7.795008283624804e-05, "loss": 0.8893, "step": 2119 }, { "epoch": 0.9610154125113327, "grad_norm": 1.4682909967362272, "learning_rate": 7.794608049097023e-05, "loss": 0.9206, "step": 2120 }, { "epoch": 0.9614687216681777, "grad_norm": 1.2829656286845377, "learning_rate": 7.794207434528094e-05, "loss": 0.9266, "step": 2121 }, { "epoch": 0.9619220308250227, "grad_norm": 1.0067194099581505, "learning_rate": 7.79380643995814e-05, "loss": 0.9294, "step": 2122 }, { "epoch": 0.9623753399818676, "grad_norm": 1.5589323070280092, "learning_rate": 7.793405065427319e-05, "loss": 0.915, "step": 2123 }, { "epoch": 0.9628286491387126, "grad_norm": 1.3503121432410896, "learning_rate": 7.793003310975832e-05, "loss": 0.9258, "step": 2124 }, { "epoch": 0.9632819582955575, "grad_norm": 1.3783276183337374, "learning_rate": 7.79260117664392e-05, "loss": 0.9068, "step": 2125 }, { "epoch": 0.9637352674524026, "grad_norm": 1.138457795312723, "learning_rate": 7.792198662471849e-05, "loss": 0.9316, "step": 2126 }, { "epoch": 0.9641885766092475, "grad_norm": 1.301988403670565, "learning_rate": 7.79179576849994e-05, "loss": 0.913, "step": 2127 }, { "epoch": 0.9646418857660924, "grad_norm": 1.000087106048032, "learning_rate": 7.79139249476854e-05, "loss": 0.9355, "step": 2128 }, { "epoch": 0.9650951949229375, "grad_norm": 1.4736344885644197, "learning_rate": 7.790988841318039e-05, "loss": 0.9125, "step": 2129 }, { "epoch": 0.9655485040797824, "grad_norm": 1.2849744368033131, "learning_rate": 7.790584808188864e-05, "loss": 0.9047, "step": 2130 }, { "epoch": 0.9660018132366274, "grad_norm": 1.0260240895553023, "learning_rate": 7.79018039542148e-05, "loss": 0.933, "step": 2131 }, { "epoch": 0.9664551223934723, "grad_norm": 1.2749394866630974, "learning_rate": 7.78977560305639e-05, "loss": 0.9346, "step": 2132 }, { "epoch": 0.9669084315503174, "grad_norm": 1.2820611389855099, "learning_rate": 7.789370431134137e-05, "loss": 0.9332, "step": 2133 }, { "epoch": 0.9673617407071623, "grad_norm": 1.270380924830418, "learning_rate": 7.788964879695297e-05, "loss": 0.9247, "step": 2134 }, { "epoch": 0.9678150498640072, "grad_norm": 1.119537695743955, "learning_rate": 7.788558948780489e-05, "loss": 0.9411, "step": 2135 }, { "epoch": 0.9682683590208522, "grad_norm": 1.9938783978467594, "learning_rate": 7.788152638430368e-05, "loss": 0.9091, "step": 2136 }, { "epoch": 0.9687216681776972, "grad_norm": 0.9552824489174617, "learning_rate": 7.787745948685628e-05, "loss": 0.9183, "step": 2137 }, { "epoch": 0.9691749773345422, "grad_norm": 2.162970207394887, "learning_rate": 7.787338879586997e-05, "loss": 0.8935, "step": 2138 }, { "epoch": 0.9696282864913871, "grad_norm": 1.5071513852193574, "learning_rate": 7.786931431175248e-05, "loss": 0.9238, "step": 2139 }, { "epoch": 0.970081595648232, "grad_norm": 2.170136250527026, "learning_rate": 7.786523603491186e-05, "loss": 0.929, "step": 2140 }, { "epoch": 0.9705349048050771, "grad_norm": 1.6531311777914721, "learning_rate": 7.786115396575657e-05, "loss": 0.9098, "step": 2141 }, { "epoch": 0.970988213961922, "grad_norm": 1.8240418694804739, "learning_rate": 7.785706810469545e-05, "loss": 0.9238, "step": 2142 }, { "epoch": 0.971441523118767, "grad_norm": 1.431986152326375, "learning_rate": 7.785297845213768e-05, "loss": 0.9243, "step": 2143 }, { "epoch": 0.971894832275612, "grad_norm": 2.000603034835783, "learning_rate": 7.784888500849289e-05, "loss": 0.9342, "step": 2144 }, { "epoch": 0.972348141432457, "grad_norm": 1.5308427617419182, "learning_rate": 7.784478777417102e-05, "loss": 0.9183, "step": 2145 }, { "epoch": 0.9728014505893019, "grad_norm": 1.7363192544268813, "learning_rate": 7.784068674958243e-05, "loss": 0.9118, "step": 2146 }, { "epoch": 0.9732547597461468, "grad_norm": 1.3677834108002378, "learning_rate": 7.783658193513785e-05, "loss": 0.9328, "step": 2147 }, { "epoch": 0.9737080689029919, "grad_norm": 1.7186860883724848, "learning_rate": 7.783247333124838e-05, "loss": 0.9149, "step": 2148 }, { "epoch": 0.9741613780598368, "grad_norm": 1.2238881205558976, "learning_rate": 7.782836093832552e-05, "loss": 0.9365, "step": 2149 }, { "epoch": 0.9746146872166818, "grad_norm": 1.3534348287786235, "learning_rate": 7.782424475678115e-05, "loss": 0.9451, "step": 2150 }, { "epoch": 0.9750679963735267, "grad_norm": 1.1052672191599187, "learning_rate": 7.782012478702751e-05, "loss": 0.9317, "step": 2151 }, { "epoch": 0.9755213055303718, "grad_norm": 1.2303786076626344, "learning_rate": 7.781600102947722e-05, "loss": 0.9209, "step": 2152 }, { "epoch": 0.9759746146872167, "grad_norm": 0.9382553039596676, "learning_rate": 7.781187348454327e-05, "loss": 0.9036, "step": 2153 }, { "epoch": 0.9764279238440616, "grad_norm": 1.2953614727494012, "learning_rate": 7.780774215263908e-05, "loss": 0.9219, "step": 2154 }, { "epoch": 0.9768812330009066, "grad_norm": 1.350788658773205, "learning_rate": 7.780360703417839e-05, "loss": 0.943, "step": 2155 }, { "epoch": 0.9773345421577516, "grad_norm": 0.9159495396952788, "learning_rate": 7.779946812957534e-05, "loss": 0.9061, "step": 2156 }, { "epoch": 0.9777878513145966, "grad_norm": 1.7956150390992585, "learning_rate": 7.779532543924448e-05, "loss": 0.9088, "step": 2157 }, { "epoch": 0.9782411604714415, "grad_norm": 1.134703665517406, "learning_rate": 7.77911789636007e-05, "loss": 0.912, "step": 2158 }, { "epoch": 0.9786944696282865, "grad_norm": 1.6927003256802784, "learning_rate": 7.778702870305928e-05, "loss": 0.93, "step": 2159 }, { "epoch": 0.9791477787851315, "grad_norm": 1.4289654003874595, "learning_rate": 7.778287465803587e-05, "loss": 0.9139, "step": 2160 }, { "epoch": 0.9796010879419764, "grad_norm": 1.5734075035384878, "learning_rate": 7.777871682894654e-05, "loss": 0.8953, "step": 2161 }, { "epoch": 0.9800543970988214, "grad_norm": 1.506554520846465, "learning_rate": 7.777455521620769e-05, "loss": 0.8895, "step": 2162 }, { "epoch": 0.9805077062556664, "grad_norm": 1.385587339502483, "learning_rate": 7.77703898202361e-05, "loss": 0.9061, "step": 2163 }, { "epoch": 0.9809610154125114, "grad_norm": 1.111357073090794, "learning_rate": 7.776622064144897e-05, "loss": 0.901, "step": 2164 }, { "epoch": 0.9814143245693563, "grad_norm": 1.6120495691512586, "learning_rate": 7.776204768026385e-05, "loss": 0.9304, "step": 2165 }, { "epoch": 0.9818676337262012, "grad_norm": 0.9865939177380262, "learning_rate": 7.775787093709868e-05, "loss": 0.8954, "step": 2166 }, { "epoch": 0.9823209428830463, "grad_norm": 1.6466174080538547, "learning_rate": 7.775369041237176e-05, "loss": 0.9184, "step": 2167 }, { "epoch": 0.9827742520398912, "grad_norm": 1.1676816688905982, "learning_rate": 7.77495061065018e-05, "loss": 0.8997, "step": 2168 }, { "epoch": 0.9832275611967362, "grad_norm": 1.435193221174988, "learning_rate": 7.774531801990785e-05, "loss": 0.9363, "step": 2169 }, { "epoch": 0.9836808703535811, "grad_norm": 1.4249992507220068, "learning_rate": 7.774112615300937e-05, "loss": 0.9154, "step": 2170 }, { "epoch": 0.9841341795104261, "grad_norm": 1.1664267552168033, "learning_rate": 7.773693050622619e-05, "loss": 0.9114, "step": 2171 }, { "epoch": 0.9845874886672711, "grad_norm": 1.2961778449704024, "learning_rate": 7.773273107997852e-05, "loss": 0.9009, "step": 2172 }, { "epoch": 0.985040797824116, "grad_norm": 1.1089153579771993, "learning_rate": 7.772852787468693e-05, "loss": 0.9071, "step": 2173 }, { "epoch": 0.985494106980961, "grad_norm": 0.9737238839715389, "learning_rate": 7.772432089077239e-05, "loss": 0.9358, "step": 2174 }, { "epoch": 0.985947416137806, "grad_norm": 1.3153996805282484, "learning_rate": 7.772011012865624e-05, "loss": 0.9012, "step": 2175 }, { "epoch": 0.986400725294651, "grad_norm": 1.1486490387850294, "learning_rate": 7.77158955887602e-05, "loss": 0.9189, "step": 2176 }, { "epoch": 0.9868540344514959, "grad_norm": 1.0995814570715765, "learning_rate": 7.771167727150639e-05, "loss": 0.9073, "step": 2177 }, { "epoch": 0.9873073436083409, "grad_norm": 1.6598417496182507, "learning_rate": 7.770745517731726e-05, "loss": 0.9165, "step": 2178 }, { "epoch": 0.9877606527651859, "grad_norm": 0.7824188656677149, "learning_rate": 7.770322930661566e-05, "loss": 0.8973, "step": 2179 }, { "epoch": 0.9882139619220308, "grad_norm": 1.5409921615137672, "learning_rate": 7.769899965982486e-05, "loss": 0.9192, "step": 2180 }, { "epoch": 0.9886672710788758, "grad_norm": 1.0604310416736975, "learning_rate": 7.769476623736843e-05, "loss": 0.9203, "step": 2181 }, { "epoch": 0.9891205802357208, "grad_norm": 1.5093408369707018, "learning_rate": 7.769052903967039e-05, "loss": 0.898, "step": 2182 }, { "epoch": 0.9895738893925657, "grad_norm": 1.0961621594675746, "learning_rate": 7.768628806715509e-05, "loss": 0.8935, "step": 2183 }, { "epoch": 0.9900271985494107, "grad_norm": 1.0916652263890707, "learning_rate": 7.76820433202473e-05, "loss": 0.9252, "step": 2184 }, { "epoch": 0.9904805077062556, "grad_norm": 1.2429591267128823, "learning_rate": 7.767779479937209e-05, "loss": 0.9137, "step": 2185 }, { "epoch": 0.9909338168631007, "grad_norm": 1.3963081484600093, "learning_rate": 7.767354250495503e-05, "loss": 0.9442, "step": 2186 }, { "epoch": 0.9913871260199456, "grad_norm": 0.9061475553676106, "learning_rate": 7.766928643742195e-05, "loss": 0.9147, "step": 2187 }, { "epoch": 0.9918404351767905, "grad_norm": 1.0183968396184193, "learning_rate": 7.766502659719914e-05, "loss": 0.9313, "step": 2188 }, { "epoch": 0.9922937443336355, "grad_norm": 1.0487807724081928, "learning_rate": 7.766076298471321e-05, "loss": 0.8972, "step": 2189 }, { "epoch": 0.9927470534904805, "grad_norm": 1.1055296900394975, "learning_rate": 7.765649560039119e-05, "loss": 0.915, "step": 2190 }, { "epoch": 0.9932003626473255, "grad_norm": 0.9652677239035385, "learning_rate": 7.765222444466046e-05, "loss": 0.9201, "step": 2191 }, { "epoch": 0.9936536718041704, "grad_norm": 1.1788439268482402, "learning_rate": 7.764794951794879e-05, "loss": 0.9048, "step": 2192 }, { "epoch": 0.9941069809610155, "grad_norm": 0.9870610084492013, "learning_rate": 7.764367082068435e-05, "loss": 0.9188, "step": 2193 }, { "epoch": 0.9945602901178604, "grad_norm": 0.8529555795352023, "learning_rate": 7.763938835329563e-05, "loss": 0.9053, "step": 2194 }, { "epoch": 0.9950135992747053, "grad_norm": 1.1980789220927577, "learning_rate": 7.763510211621153e-05, "loss": 0.9065, "step": 2195 }, { "epoch": 0.9954669084315503, "grad_norm": 1.0324930518255682, "learning_rate": 7.763081210986137e-05, "loss": 0.9267, "step": 2196 }, { "epoch": 0.9959202175883953, "grad_norm": 1.6759204226838986, "learning_rate": 7.762651833467477e-05, "loss": 0.9087, "step": 2197 }, { "epoch": 0.9963735267452403, "grad_norm": 0.9363873854731483, "learning_rate": 7.762222079108178e-05, "loss": 0.9154, "step": 2198 }, { "epoch": 0.9968268359020852, "grad_norm": 0.9101739183691779, "learning_rate": 7.761791947951282e-05, "loss": 0.894, "step": 2199 }, { "epoch": 0.9972801450589301, "grad_norm": 1.1971256271876236, "learning_rate": 7.761361440039866e-05, "loss": 0.9173, "step": 2200 }, { "epoch": 0.9977334542157752, "grad_norm": 1.441666744955298, "learning_rate": 7.760930555417046e-05, "loss": 0.9192, "step": 2201 }, { "epoch": 0.9981867633726201, "grad_norm": 1.4124953002832787, "learning_rate": 7.760499294125978e-05, "loss": 0.922, "step": 2202 }, { "epoch": 0.9986400725294651, "grad_norm": 0.6924001270512046, "learning_rate": 7.760067656209856e-05, "loss": 0.91, "step": 2203 }, { "epoch": 0.99909338168631, "grad_norm": 1.0878264239184325, "learning_rate": 7.759635641711905e-05, "loss": 0.8992, "step": 2204 }, { "epoch": 0.9995466908431551, "grad_norm": 1.7918154205156307, "learning_rate": 7.759203250675397e-05, "loss": 0.9229, "step": 2205 }, { "epoch": 1.0, "grad_norm": 0.8001246597792144, "learning_rate": 7.758770483143634e-05, "loss": 0.9126, "step": 2206 }, { "epoch": 1.000453309156845, "grad_norm": 1.5185579693502667, "learning_rate": 7.758337339159961e-05, "loss": 0.8875, "step": 2207 }, { "epoch": 1.0009066183136899, "grad_norm": 1.0933507833824967, "learning_rate": 7.757903818767759e-05, "loss": 0.8931, "step": 2208 }, { "epoch": 1.001359927470535, "grad_norm": 1.8863710760456849, "learning_rate": 7.757469922010442e-05, "loss": 0.8899, "step": 2209 }, { "epoch": 1.00181323662738, "grad_norm": 0.9833619747869214, "learning_rate": 7.757035648931473e-05, "loss": 0.9037, "step": 2210 }, { "epoch": 1.0022665457842248, "grad_norm": 2.1399333316083426, "learning_rate": 7.756600999574339e-05, "loss": 0.9109, "step": 2211 }, { "epoch": 1.0027198549410699, "grad_norm": 2.0005771211578534, "learning_rate": 7.756165973982576e-05, "loss": 0.9107, "step": 2212 }, { "epoch": 1.0031731640979147, "grad_norm": 1.5542662023629812, "learning_rate": 7.755730572199753e-05, "loss": 0.9068, "step": 2213 }, { "epoch": 1.0036264732547597, "grad_norm": 2.175806139743037, "learning_rate": 7.755294794269474e-05, "loss": 0.8979, "step": 2214 }, { "epoch": 1.0040797824116048, "grad_norm": 1.7977508796479036, "learning_rate": 7.754858640235384e-05, "loss": 0.9194, "step": 2215 }, { "epoch": 1.0045330915684496, "grad_norm": 2.210366813675899, "learning_rate": 7.754422110141165e-05, "loss": 0.924, "step": 2216 }, { "epoch": 1.0049864007252947, "grad_norm": 1.853332220541622, "learning_rate": 7.75398520403054e-05, "loss": 0.8976, "step": 2217 }, { "epoch": 1.0054397098821397, "grad_norm": 2.2972341605743694, "learning_rate": 7.753547921947263e-05, "loss": 0.9, "step": 2218 }, { "epoch": 1.0058930190389845, "grad_norm": 1.633886802763704, "learning_rate": 7.75311026393513e-05, "loss": 0.8844, "step": 2219 }, { "epoch": 1.0063463281958296, "grad_norm": 2.524998367041391, "learning_rate": 7.752672230037973e-05, "loss": 0.8821, "step": 2220 }, { "epoch": 1.0067996373526744, "grad_norm": 2.0718755194235468, "learning_rate": 7.752233820299665e-05, "loss": 0.9025, "step": 2221 }, { "epoch": 1.0072529465095195, "grad_norm": 2.2298908250180824, "learning_rate": 7.751795034764111e-05, "loss": 0.8973, "step": 2222 }, { "epoch": 1.0077062556663645, "grad_norm": 1.9356365820254164, "learning_rate": 7.75135587347526e-05, "loss": 0.8844, "step": 2223 }, { "epoch": 1.0081595648232093, "grad_norm": 2.3273615763303876, "learning_rate": 7.750916336477091e-05, "loss": 0.9057, "step": 2224 }, { "epoch": 1.0086128739800544, "grad_norm": 1.9799922306252278, "learning_rate": 7.750476423813627e-05, "loss": 0.9001, "step": 2225 }, { "epoch": 1.0090661831368994, "grad_norm": 2.0869692332820353, "learning_rate": 7.750036135528928e-05, "loss": 0.8881, "step": 2226 }, { "epoch": 1.0095194922937443, "grad_norm": 1.6856188548995934, "learning_rate": 7.749595471667089e-05, "loss": 0.9074, "step": 2227 }, { "epoch": 1.0099728014505893, "grad_norm": 2.4435857441160196, "learning_rate": 7.749154432272243e-05, "loss": 0.9041, "step": 2228 }, { "epoch": 1.0104261106074344, "grad_norm": 2.124321890291241, "learning_rate": 7.748713017388561e-05, "loss": 0.8972, "step": 2229 }, { "epoch": 1.0108794197642792, "grad_norm": 2.1068105077788317, "learning_rate": 7.748271227060255e-05, "loss": 0.9126, "step": 2230 }, { "epoch": 1.0113327289211242, "grad_norm": 2.066471803816435, "learning_rate": 7.747829061331569e-05, "loss": 0.9293, "step": 2231 }, { "epoch": 1.011786038077969, "grad_norm": 1.8039954906125946, "learning_rate": 7.747386520246788e-05, "loss": 0.9032, "step": 2232 }, { "epoch": 1.0122393472348141, "grad_norm": 1.5807654626841818, "learning_rate": 7.746943603850233e-05, "loss": 0.9019, "step": 2233 }, { "epoch": 1.0126926563916592, "grad_norm": 2.3804846364971013, "learning_rate": 7.746500312186265e-05, "loss": 0.903, "step": 2234 }, { "epoch": 1.013145965548504, "grad_norm": 2.1307147315297237, "learning_rate": 7.74605664529928e-05, "loss": 0.8944, "step": 2235 }, { "epoch": 1.013599274705349, "grad_norm": 1.8019974699856864, "learning_rate": 7.74561260323371e-05, "loss": 0.9054, "step": 2236 }, { "epoch": 1.014052583862194, "grad_norm": 1.5792102590987154, "learning_rate": 7.745168186034031e-05, "loss": 0.909, "step": 2237 }, { "epoch": 1.014505893019039, "grad_norm": 2.4276543434651425, "learning_rate": 7.744723393744753e-05, "loss": 0.9168, "step": 2238 }, { "epoch": 1.014959202175884, "grad_norm": 2.1863054099445702, "learning_rate": 7.744278226410421e-05, "loss": 0.9102, "step": 2239 }, { "epoch": 1.015412511332729, "grad_norm": 1.6551878936554276, "learning_rate": 7.743832684075619e-05, "loss": 0.894, "step": 2240 }, { "epoch": 1.0158658204895739, "grad_norm": 1.4720541685309292, "learning_rate": 7.743386766784971e-05, "loss": 0.904, "step": 2241 }, { "epoch": 1.016319129646419, "grad_norm": 2.3303368847634838, "learning_rate": 7.742940474583138e-05, "loss": 0.8956, "step": 2242 }, { "epoch": 1.0167724388032637, "grad_norm": 2.114358412758921, "learning_rate": 7.742493807514816e-05, "loss": 0.9016, "step": 2243 }, { "epoch": 1.0172257479601088, "grad_norm": 1.711440800557558, "learning_rate": 7.74204676562474e-05, "loss": 0.9022, "step": 2244 }, { "epoch": 1.0176790571169538, "grad_norm": 1.4505947171433313, "learning_rate": 7.741599348957682e-05, "loss": 0.8938, "step": 2245 }, { "epoch": 1.0181323662737987, "grad_norm": 2.299696009950017, "learning_rate": 7.741151557558453e-05, "loss": 0.9039, "step": 2246 }, { "epoch": 1.0185856754306437, "grad_norm": 1.9991585483115963, "learning_rate": 7.740703391471901e-05, "loss": 0.8908, "step": 2247 }, { "epoch": 1.0190389845874888, "grad_norm": 1.7471115864747764, "learning_rate": 7.74025485074291e-05, "loss": 0.9081, "step": 2248 }, { "epoch": 1.0194922937443336, "grad_norm": 1.5589193681773093, "learning_rate": 7.739805935416403e-05, "loss": 0.8848, "step": 2249 }, { "epoch": 1.0199456029011786, "grad_norm": 2.1584689226691904, "learning_rate": 7.739356645537341e-05, "loss": 0.8798, "step": 2250 }, { "epoch": 1.0203989120580235, "grad_norm": 1.9412375402706006, "learning_rate": 7.738906981150722e-05, "loss": 0.8937, "step": 2251 }, { "epoch": 1.0208522212148685, "grad_norm": 1.8215949759328434, "learning_rate": 7.73845694230158e-05, "loss": 0.8989, "step": 2252 }, { "epoch": 1.0213055303717136, "grad_norm": 1.5748510760355887, "learning_rate": 7.738006529034988e-05, "loss": 0.9251, "step": 2253 }, { "epoch": 1.0217588395285584, "grad_norm": 2.3294748424644185, "learning_rate": 7.737555741396055e-05, "loss": 0.9614, "step": 2254 }, { "epoch": 1.0222121486854034, "grad_norm": 2.216438206850962, "learning_rate": 7.73710457942993e-05, "loss": 0.9025, "step": 2255 }, { "epoch": 1.0226654578422485, "grad_norm": 1.3267813490440317, "learning_rate": 7.736653043181801e-05, "loss": 0.9056, "step": 2256 }, { "epoch": 1.0231187669990933, "grad_norm": 1.0981707099163063, "learning_rate": 7.736201132696886e-05, "loss": 0.8839, "step": 2257 }, { "epoch": 1.0235720761559384, "grad_norm": 2.478351679818519, "learning_rate": 7.735748848020447e-05, "loss": 0.9099, "step": 2258 }, { "epoch": 1.0240253853127834, "grad_norm": 2.231708844544931, "learning_rate": 7.735296189197781e-05, "loss": 0.9035, "step": 2259 }, { "epoch": 1.0244786944696282, "grad_norm": 1.7311914536654498, "learning_rate": 7.734843156274225e-05, "loss": 0.9061, "step": 2260 }, { "epoch": 1.0249320036264733, "grad_norm": 1.7117349612462849, "learning_rate": 7.73438974929515e-05, "loss": 0.9019, "step": 2261 }, { "epoch": 1.0253853127833181, "grad_norm": 1.6648973396767386, "learning_rate": 7.733935968305968e-05, "loss": 0.8828, "step": 2262 }, { "epoch": 1.0258386219401632, "grad_norm": 1.2387574746255279, "learning_rate": 7.733481813352123e-05, "loss": 0.9062, "step": 2263 }, { "epoch": 1.0262919310970082, "grad_norm": 2.4416915668388617, "learning_rate": 7.733027284479102e-05, "loss": 0.9199, "step": 2264 }, { "epoch": 1.026745240253853, "grad_norm": 2.232178516225162, "learning_rate": 7.732572381732428e-05, "loss": 0.9186, "step": 2265 }, { "epoch": 1.027198549410698, "grad_norm": 1.5151712264483101, "learning_rate": 7.73211710515766e-05, "loss": 0.8894, "step": 2266 }, { "epoch": 1.0276518585675432, "grad_norm": 1.709864962692262, "learning_rate": 7.731661454800396e-05, "loss": 0.911, "step": 2267 }, { "epoch": 1.028105167724388, "grad_norm": 1.5576594195788114, "learning_rate": 7.731205430706269e-05, "loss": 0.8991, "step": 2268 }, { "epoch": 1.028558476881233, "grad_norm": 1.3548723227923902, "learning_rate": 7.730749032920954e-05, "loss": 0.9018, "step": 2269 }, { "epoch": 1.0290117860380779, "grad_norm": 2094.400847675909, "learning_rate": 7.730292261490156e-05, "loss": 5.1744, "step": 2270 }, { "epoch": 1.029465095194923, "grad_norm": 2347.1983543794768, "learning_rate": 7.729835116459628e-05, "loss": 19.7327, "step": 2271 }, { "epoch": 1.029918404351768, "grad_norm": 484.90331737775756, "learning_rate": 7.729377597875149e-05, "loss": 13.7732, "step": 2272 }, { "epoch": 1.0303717135086128, "grad_norm": 876.832268778176, "learning_rate": 7.728919705782543e-05, "loss": 8.9639, "step": 2273 }, { "epoch": 1.0308250226654578, "grad_norm": 99.49642660724203, "learning_rate": 7.728461440227671e-05, "loss": 9.9606, "step": 2274 }, { "epoch": 1.0312783318223029, "grad_norm": 85.27771864859518, "learning_rate": 7.728002801256428e-05, "loss": 8.7004, "step": 2275 }, { "epoch": 1.0317316409791477, "grad_norm": 78.2805370924912, "learning_rate": 7.727543788914748e-05, "loss": 10.0065, "step": 2276 }, { "epoch": 1.0321849501359928, "grad_norm": 23.41223561039528, "learning_rate": 7.7270844032486e-05, "loss": 8.0816, "step": 2277 }, { "epoch": 1.0326382592928378, "grad_norm": 20.175729082986933, "learning_rate": 7.726624644303998e-05, "loss": 7.3153, "step": 2278 }, { "epoch": 1.0330915684496826, "grad_norm": 9.75742066020185, "learning_rate": 7.726164512126986e-05, "loss": 6.7263, "step": 2279 }, { "epoch": 1.0335448776065277, "grad_norm": 17.648493843880793, "learning_rate": 7.725704006763645e-05, "loss": 6.8283, "step": 2280 }, { "epoch": 1.0339981867633725, "grad_norm": 26.638731568393787, "learning_rate": 7.7252431282601e-05, "loss": 6.7527, "step": 2281 }, { "epoch": 1.0344514959202176, "grad_norm": 23.206981693976875, "learning_rate": 7.724781876662506e-05, "loss": 7.704, "step": 2282 }, { "epoch": 1.0349048050770626, "grad_norm": 12.788331990931729, "learning_rate": 7.72432025201706e-05, "loss": 6.7132, "step": 2283 }, { "epoch": 1.0353581142339074, "grad_norm": 39.03643047084416, "learning_rate": 7.723858254369996e-05, "loss": 7.7887, "step": 2284 }, { "epoch": 1.0358114233907525, "grad_norm": 32.76241818914737, "learning_rate": 7.723395883767584e-05, "loss": 7.7046, "step": 2285 }, { "epoch": 1.0362647325475975, "grad_norm": 24.36971825459457, "learning_rate": 7.72293314025613e-05, "loss": 6.8577, "step": 2286 }, { "epoch": 1.0367180417044424, "grad_norm": 10.303752047838843, "learning_rate": 7.722470023881981e-05, "loss": 6.682, "step": 2287 }, { "epoch": 1.0371713508612874, "grad_norm": 12.555851035553703, "learning_rate": 7.72200653469152e-05, "loss": 6.6666, "step": 2288 }, { "epoch": 1.0376246600181325, "grad_norm": 8.807971772600515, "learning_rate": 7.721542672731165e-05, "loss": 6.5208, "step": 2289 }, { "epoch": 1.0380779691749773, "grad_norm": 22.32535207919814, "learning_rate": 7.721078438047374e-05, "loss": 6.6746, "step": 2290 }, { "epoch": 1.0385312783318223, "grad_norm": 14.38124906748772, "learning_rate": 7.72061383068664e-05, "loss": 6.4662, "step": 2291 }, { "epoch": 1.0389845874886672, "grad_norm": 7.568909580713177, "learning_rate": 7.720148850695499e-05, "loss": 6.4102, "step": 2292 }, { "epoch": 1.0394378966455122, "grad_norm": 7.226379433606147, "learning_rate": 7.719683498120515e-05, "loss": 6.4042, "step": 2293 }, { "epoch": 1.0398912058023573, "grad_norm": 3.2910504108242793, "learning_rate": 7.719217773008297e-05, "loss": 6.2012, "step": 2294 }, { "epoch": 1.040344514959202, "grad_norm": 21.57453009949906, "learning_rate": 7.71875167540549e-05, "loss": 6.6328, "step": 2295 }, { "epoch": 1.0407978241160472, "grad_norm": 22.385754574956085, "learning_rate": 7.718285205358773e-05, "loss": 6.6894, "step": 2296 }, { "epoch": 1.0412511332728922, "grad_norm": 5.945513098375774, "learning_rate": 7.717818362914863e-05, "loss": 6.2616, "step": 2297 }, { "epoch": 1.041704442429737, "grad_norm": 8.173923801414059, "learning_rate": 7.717351148120518e-05, "loss": 6.2885, "step": 2298 }, { "epoch": 1.042157751586582, "grad_norm": 9.016759697090615, "learning_rate": 7.716883561022532e-05, "loss": 6.2979, "step": 2299 }, { "epoch": 1.0426110607434271, "grad_norm": 9.266474932908805, "learning_rate": 7.716415601667733e-05, "loss": 6.2237, "step": 2300 }, { "epoch": 1.043064369900272, "grad_norm": 6.767329605633009, "learning_rate": 7.715947270102989e-05, "loss": 6.1056, "step": 2301 }, { "epoch": 1.043517679057117, "grad_norm": 5.246256794767085, "learning_rate": 7.715478566375205e-05, "loss": 6.0562, "step": 2302 }, { "epoch": 1.0439709882139618, "grad_norm": 7.186198752310072, "learning_rate": 7.715009490531323e-05, "loss": 6.0726, "step": 2303 }, { "epoch": 1.0444242973708069, "grad_norm": 5.689662293806424, "learning_rate": 7.714540042618321e-05, "loss": 5.994, "step": 2304 }, { "epoch": 1.044877606527652, "grad_norm": 6.308013738728765, "learning_rate": 7.714070222683218e-05, "loss": 5.9721, "step": 2305 }, { "epoch": 1.0453309156844968, "grad_norm": 6.862451968431989, "learning_rate": 7.713600030773068e-05, "loss": 6.0071, "step": 2306 }, { "epoch": 1.0457842248413418, "grad_norm": 4.8252196554993, "learning_rate": 7.71312946693496e-05, "loss": 5.9203, "step": 2307 }, { "epoch": 1.0462375339981869, "grad_norm": 3.9848829782143635, "learning_rate": 7.712658531216022e-05, "loss": 5.9275, "step": 2308 }, { "epoch": 1.0466908431550317, "grad_norm": 4.057493290774481, "learning_rate": 7.71218722366342e-05, "loss": 5.8273, "step": 2309 }, { "epoch": 1.0471441523118767, "grad_norm": 3.0565916948383385, "learning_rate": 7.71171554432436e-05, "loss": 5.8217, "step": 2310 }, { "epoch": 1.0475974614687216, "grad_norm": 4.353616486361989, "learning_rate": 7.71124349324608e-05, "loss": 5.766, "step": 2311 }, { "epoch": 1.0480507706255666, "grad_norm": 3.9955243508303635, "learning_rate": 7.710771070475856e-05, "loss": 5.7621, "step": 2312 }, { "epoch": 1.0485040797824117, "grad_norm": 4.112359729899903, "learning_rate": 7.710298276061004e-05, "loss": 5.7338, "step": 2313 }, { "epoch": 1.0489573889392565, "grad_norm": 3.23379318534331, "learning_rate": 7.709825110048874e-05, "loss": 5.6867, "step": 2314 }, { "epoch": 1.0494106980961015, "grad_norm": 2.810271897670167, "learning_rate": 7.709351572486857e-05, "loss": 5.6649, "step": 2315 }, { "epoch": 1.0498640072529466, "grad_norm": 3.276479968147849, "learning_rate": 7.708877663422379e-05, "loss": 5.6716, "step": 2316 }, { "epoch": 1.0503173164097914, "grad_norm": 3.865306941823564, "learning_rate": 7.708403382902902e-05, "loss": 5.6195, "step": 2317 }, { "epoch": 1.0507706255666365, "grad_norm": 4.386260701428785, "learning_rate": 7.707928730975929e-05, "loss": 5.6135, "step": 2318 }, { "epoch": 1.0512239347234815, "grad_norm": 4.133733879646934, "learning_rate": 7.707453707688993e-05, "loss": 5.6432, "step": 2319 }, { "epoch": 1.0516772438803264, "grad_norm": 6.517596080729875, "learning_rate": 7.706978313089675e-05, "loss": 5.6264, "step": 2320 }, { "epoch": 1.0521305530371714, "grad_norm": 4.687582088693947, "learning_rate": 7.706502547225584e-05, "loss": 5.5784, "step": 2321 }, { "epoch": 1.0525838621940162, "grad_norm": 3.181854883766817, "learning_rate": 7.706026410144369e-05, "loss": 5.523, "step": 2322 }, { "epoch": 1.0530371713508613, "grad_norm": 5.174179744759722, "learning_rate": 7.705549901893717e-05, "loss": 5.5056, "step": 2323 }, { "epoch": 1.0534904805077063, "grad_norm": 1.9160133338952412, "learning_rate": 7.705073022521352e-05, "loss": 5.4102, "step": 2324 }, { "epoch": 1.0539437896645512, "grad_norm": 5.385273723434842, "learning_rate": 7.704595772075035e-05, "loss": 5.4396, "step": 2325 }, { "epoch": 1.0543970988213962, "grad_norm": 4.002965601314066, "learning_rate": 7.704118150602565e-05, "loss": 5.3912, "step": 2326 }, { "epoch": 1.0548504079782413, "grad_norm": 3.584810032026845, "learning_rate": 7.703640158151774e-05, "loss": 5.3343, "step": 2327 }, { "epoch": 1.055303717135086, "grad_norm": 3.6560051107641565, "learning_rate": 7.703161794770538e-05, "loss": 5.3136, "step": 2328 }, { "epoch": 1.0557570262919311, "grad_norm": 4.5830305556772215, "learning_rate": 7.702683060506763e-05, "loss": 5.3429, "step": 2329 }, { "epoch": 1.056210335448776, "grad_norm": 3.842265795416284, "learning_rate": 7.702203955408399e-05, "loss": 5.3285, "step": 2330 }, { "epoch": 1.056663644605621, "grad_norm": 2.8899699162486594, "learning_rate": 7.701724479523428e-05, "loss": 5.2961, "step": 2331 }, { "epoch": 1.057116953762466, "grad_norm": 4.346202073452472, "learning_rate": 7.70124463289987e-05, "loss": 5.2977, "step": 2332 }, { "epoch": 1.0575702629193109, "grad_norm": 3.4558992101620363, "learning_rate": 7.700764415585786e-05, "loss": 5.2648, "step": 2333 }, { "epoch": 1.058023572076156, "grad_norm": 5.035945977295431, "learning_rate": 7.700283827629269e-05, "loss": 5.1828, "step": 2334 }, { "epoch": 1.058476881233001, "grad_norm": 3.1414050214624245, "learning_rate": 7.699802869078453e-05, "loss": 5.1401, "step": 2335 }, { "epoch": 1.0589301903898458, "grad_norm": 2.427845230213013, "learning_rate": 7.699321539981504e-05, "loss": 5.1136, "step": 2336 }, { "epoch": 1.0593834995466909, "grad_norm": 6.044614966963591, "learning_rate": 7.698839840386631e-05, "loss": 5.2149, "step": 2337 }, { "epoch": 1.059836808703536, "grad_norm": 4.802677519756708, "learning_rate": 7.698357770342078e-05, "loss": 5.099, "step": 2338 }, { "epoch": 1.0602901178603807, "grad_norm": 7.454029054108183, "learning_rate": 7.697875329896125e-05, "loss": 5.203, "step": 2339 }, { "epoch": 1.0607434270172258, "grad_norm": 4.265031459287632, "learning_rate": 7.697392519097088e-05, "loss": 5.0654, "step": 2340 }, { "epoch": 1.0611967361740706, "grad_norm": 6.179294367563578, "learning_rate": 7.696909337993326e-05, "loss": 5.0645, "step": 2341 }, { "epoch": 1.0616500453309157, "grad_norm": 6.38213675273815, "learning_rate": 7.696425786633228e-05, "loss": 5.1571, "step": 2342 }, { "epoch": 1.0621033544877607, "grad_norm": 2.6829234602743792, "learning_rate": 7.695941865065225e-05, "loss": 4.9171, "step": 2343 }, { "epoch": 1.0625566636446055, "grad_norm": 4.383582946138513, "learning_rate": 7.695457573337781e-05, "loss": 4.9873, "step": 2344 }, { "epoch": 1.0630099728014506, "grad_norm": 3.385949007010201, "learning_rate": 7.694972911499399e-05, "loss": 4.9172, "step": 2345 }, { "epoch": 1.0634632819582956, "grad_norm": 5.6134277605691345, "learning_rate": 7.694487879598623e-05, "loss": 4.9417, "step": 2346 }, { "epoch": 1.0639165911151405, "grad_norm": 3.520010106284252, "learning_rate": 7.694002477684025e-05, "loss": 4.9082, "step": 2347 }, { "epoch": 1.0643699002719855, "grad_norm": 3.034377899870042, "learning_rate": 7.693516705804225e-05, "loss": 4.8911, "step": 2348 }, { "epoch": 1.0648232094288304, "grad_norm": 4.825325152016642, "learning_rate": 7.693030564007871e-05, "loss": 4.9004, "step": 2349 }, { "epoch": 1.0652765185856754, "grad_norm": 4.098330200828925, "learning_rate": 7.692544052343654e-05, "loss": 4.8361, "step": 2350 }, { "epoch": 1.0657298277425205, "grad_norm": 2.8501916794739115, "learning_rate": 7.692057170860296e-05, "loss": 4.8928, "step": 2351 }, { "epoch": 1.0661831368993653, "grad_norm": 3.481166902992882, "learning_rate": 7.691569919606562e-05, "loss": 4.8165, "step": 2352 }, { "epoch": 1.0666364460562103, "grad_norm": 4.548356729234137, "learning_rate": 7.69108229863125e-05, "loss": 4.801, "step": 2353 }, { "epoch": 1.0670897552130554, "grad_norm": 2.8404612214771103, "learning_rate": 7.6905943079832e-05, "loss": 4.754, "step": 2354 }, { "epoch": 1.0675430643699002, "grad_norm": 4.406614412457609, "learning_rate": 7.690105947711284e-05, "loss": 4.7807, "step": 2355 }, { "epoch": 1.0679963735267453, "grad_norm": 2.975650072219128, "learning_rate": 7.68961721786441e-05, "loss": 4.7435, "step": 2356 }, { "epoch": 1.0684496826835903, "grad_norm": 3.2224576890140755, "learning_rate": 7.689128118491528e-05, "loss": 4.689, "step": 2357 }, { "epoch": 1.0689029918404351, "grad_norm": 4.49404226118954, "learning_rate": 7.688638649641625e-05, "loss": 4.7083, "step": 2358 }, { "epoch": 1.0693563009972802, "grad_norm": 3.4028010455065383, "learning_rate": 7.68814881136372e-05, "loss": 4.6854, "step": 2359 }, { "epoch": 1.069809610154125, "grad_norm": 3.417154333709478, "learning_rate": 7.687658603706871e-05, "loss": 4.6616, "step": 2360 }, { "epoch": 1.07026291931097, "grad_norm": 3.109779214135758, "learning_rate": 7.687168026720176e-05, "loss": 4.6825, "step": 2361 }, { "epoch": 1.0707162284678151, "grad_norm": 5.144983619192727, "learning_rate": 7.686677080452766e-05, "loss": 4.6557, "step": 2362 }, { "epoch": 1.07116953762466, "grad_norm": 2.647248585452473, "learning_rate": 7.686185764953812e-05, "loss": 4.559, "step": 2363 }, { "epoch": 1.071622846781505, "grad_norm": 6.056998150479192, "learning_rate": 7.68569408027252e-05, "loss": 4.6205, "step": 2364 }, { "epoch": 1.07207615593835, "grad_norm": 2.9256785892477324, "learning_rate": 7.685202026458134e-05, "loss": 4.5948, "step": 2365 }, { "epoch": 1.0725294650951949, "grad_norm": 2.9901784413984482, "learning_rate": 7.684709603559935e-05, "loss": 4.5207, "step": 2366 }, { "epoch": 1.07298277425204, "grad_norm": 3.9674019615395966, "learning_rate": 7.684216811627238e-05, "loss": 4.5315, "step": 2367 }, { "epoch": 1.0734360834088847, "grad_norm": 2.972142747724148, "learning_rate": 7.683723650709402e-05, "loss": 4.5386, "step": 2368 }, { "epoch": 1.0738893925657298, "grad_norm": 3.9312147811492673, "learning_rate": 7.683230120855815e-05, "loss": 4.5, "step": 2369 }, { "epoch": 1.0743427017225748, "grad_norm": 2.6374426720542585, "learning_rate": 7.682736222115907e-05, "loss": 4.4408, "step": 2370 }, { "epoch": 1.0747960108794197, "grad_norm": 4.274885092364638, "learning_rate": 7.682241954539142e-05, "loss": 4.4518, "step": 2371 }, { "epoch": 1.0752493200362647, "grad_norm": 4.19526951150165, "learning_rate": 7.681747318175025e-05, "loss": 4.3835, "step": 2372 }, { "epoch": 1.0757026291931098, "grad_norm": 2.3921944764250997, "learning_rate": 7.681252313073092e-05, "loss": 4.3843, "step": 2373 }, { "epoch": 1.0761559383499546, "grad_norm": 4.810701961157943, "learning_rate": 7.680756939282922e-05, "loss": 4.3948, "step": 2374 }, { "epoch": 1.0766092475067996, "grad_norm": 3.131931733551208, "learning_rate": 7.680261196854126e-05, "loss": 4.3439, "step": 2375 }, { "epoch": 1.0770625566636447, "grad_norm": 4.569001551357258, "learning_rate": 7.679765085836356e-05, "loss": 4.3168, "step": 2376 }, { "epoch": 1.0775158658204895, "grad_norm": 2.9895574266777274, "learning_rate": 7.679268606279297e-05, "loss": 4.3037, "step": 2377 }, { "epoch": 1.0779691749773346, "grad_norm": 3.8921834062998424, "learning_rate": 7.678771758232675e-05, "loss": 4.2744, "step": 2378 }, { "epoch": 1.0784224841341796, "grad_norm": 2.6288390211254096, "learning_rate": 7.678274541746248e-05, "loss": 4.2429, "step": 2379 }, { "epoch": 1.0788757932910245, "grad_norm": 3.208514566156114, "learning_rate": 7.677776956869817e-05, "loss": 4.2045, "step": 2380 }, { "epoch": 1.0793291024478695, "grad_norm": 4.530669103126331, "learning_rate": 7.677279003653214e-05, "loss": 4.1915, "step": 2381 }, { "epoch": 1.0797824116047143, "grad_norm": 4.665701391689266, "learning_rate": 7.676780682146312e-05, "loss": 4.2446, "step": 2382 }, { "epoch": 1.0802357207615594, "grad_norm": 4.181863311691966, "learning_rate": 7.676281992399019e-05, "loss": 4.1811, "step": 2383 }, { "epoch": 1.0806890299184044, "grad_norm": 4.697938501375869, "learning_rate": 7.67578293446128e-05, "loss": 4.1898, "step": 2384 }, { "epoch": 1.0811423390752493, "grad_norm": 3.407601097260339, "learning_rate": 7.675283508383077e-05, "loss": 4.1687, "step": 2385 }, { "epoch": 1.0815956482320943, "grad_norm": 4.246301736407681, "learning_rate": 7.674783714214429e-05, "loss": 4.0947, "step": 2386 }, { "epoch": 1.0820489573889394, "grad_norm": 3.3101422462549372, "learning_rate": 7.674283552005392e-05, "loss": 4.0747, "step": 2387 }, { "epoch": 1.0825022665457842, "grad_norm": 3.946113429489755, "learning_rate": 7.673783021806061e-05, "loss": 4.0114, "step": 2388 }, { "epoch": 1.0829555757026292, "grad_norm": 3.4848023400151082, "learning_rate": 7.673282123666561e-05, "loss": 3.9321, "step": 2389 }, { "epoch": 1.083408884859474, "grad_norm": 4.972567838511231, "learning_rate": 7.67278085763706e-05, "loss": 4.0086, "step": 2390 }, { "epoch": 1.0838621940163191, "grad_norm": 3.130310871185009, "learning_rate": 7.672279223767764e-05, "loss": 3.9045, "step": 2391 }, { "epoch": 1.0843155031731642, "grad_norm": 4.802960928236241, "learning_rate": 7.67177722210891e-05, "loss": 3.9087, "step": 2392 }, { "epoch": 1.084768812330009, "grad_norm": 5.202314546415333, "learning_rate": 7.671274852710776e-05, "loss": 3.8298, "step": 2393 }, { "epoch": 1.085222121486854, "grad_norm": 3.7508633615579163, "learning_rate": 7.670772115623676e-05, "loss": 3.78, "step": 2394 }, { "epoch": 1.085675430643699, "grad_norm": 6.304773991396164, "learning_rate": 7.67026901089796e-05, "loss": 3.8618, "step": 2395 }, { "epoch": 1.086128739800544, "grad_norm": 6.390770431787671, "learning_rate": 7.669765538584016e-05, "loss": 3.7276, "step": 2396 }, { "epoch": 1.086582048957389, "grad_norm": 3.5866551818913925, "learning_rate": 7.669261698732269e-05, "loss": 3.6477, "step": 2397 }, { "epoch": 1.087035358114234, "grad_norm": 4.538345949554096, "learning_rate": 7.668757491393179e-05, "loss": 3.4833, "step": 2398 }, { "epoch": 1.0874886672710788, "grad_norm": 5.779064664963691, "learning_rate": 7.668252916617242e-05, "loss": 3.2762, "step": 2399 }, { "epoch": 1.087941976427924, "grad_norm": 7.5630192933270015, "learning_rate": 7.667747974454996e-05, "loss": 3.1313, "step": 2400 }, { "epoch": 1.0883952855847687, "grad_norm": 8.763467322067829, "learning_rate": 7.667242664957011e-05, "loss": 2.7427, "step": 2401 }, { "epoch": 1.0888485947416138, "grad_norm": 146.8084897121597, "learning_rate": 7.666736988173897e-05, "loss": 3.0558, "step": 2402 }, { "epoch": 1.0893019038984588, "grad_norm": 17.93007801953832, "learning_rate": 7.666230944156296e-05, "loss": 3.3578, "step": 2403 }, { "epoch": 1.0897552130553037, "grad_norm": 21.370476910915702, "learning_rate": 7.665724532954889e-05, "loss": 3.5115, "step": 2404 }, { "epoch": 1.0902085222121487, "grad_norm": 73.94842449206689, "learning_rate": 7.665217754620399e-05, "loss": 4.7996, "step": 2405 }, { "epoch": 1.0906618313689938, "grad_norm": 32.224541415865374, "learning_rate": 7.664710609203578e-05, "loss": 4.525, "step": 2406 }, { "epoch": 1.0911151405258386, "grad_norm": 25.470087910366463, "learning_rate": 7.66420309675522e-05, "loss": 3.5655, "step": 2407 }, { "epoch": 1.0915684496826836, "grad_norm": 9.028878238788725, "learning_rate": 7.663695217326153e-05, "loss": 2.8089, "step": 2408 }, { "epoch": 1.0920217588395285, "grad_norm": 10.874227060185458, "learning_rate": 7.663186970967242e-05, "loss": 2.3529, "step": 2409 }, { "epoch": 1.0924750679963735, "grad_norm": 3.746355789449805, "learning_rate": 7.662678357729389e-05, "loss": 1.7036, "step": 2410 }, { "epoch": 1.0929283771532186, "grad_norm": 3.0732286491000003, "learning_rate": 7.662169377663535e-05, "loss": 1.415, "step": 2411 }, { "epoch": 1.0933816863100634, "grad_norm": 2.195814918145043, "learning_rate": 7.661660030820656e-05, "loss": 1.2601, "step": 2412 }, { "epoch": 1.0938349954669084, "grad_norm": 1.9478671034936976, "learning_rate": 7.661150317251762e-05, "loss": 1.1993, "step": 2413 }, { "epoch": 1.0942883046237535, "grad_norm": 1.391282981818816, "learning_rate": 7.660640237007905e-05, "loss": 1.1046, "step": 2414 }, { "epoch": 1.0947416137805983, "grad_norm": 2.332372771745188, "learning_rate": 7.660129790140169e-05, "loss": 1.0679, "step": 2415 }, { "epoch": 1.0951949229374434, "grad_norm": 3.2810543061801094, "learning_rate": 7.659618976699678e-05, "loss": 1.0904, "step": 2416 }, { "epoch": 1.0956482320942884, "grad_norm": 4.783434866604042, "learning_rate": 7.659107796737591e-05, "loss": 1.1597, "step": 2417 }, { "epoch": 1.0961015412511332, "grad_norm": 1.4169900057492881, "learning_rate": 7.658596250305104e-05, "loss": 1.0635, "step": 2418 }, { "epoch": 1.0965548504079783, "grad_norm": 2.129068635451705, "learning_rate": 7.658084337453449e-05, "loss": 1.0603, "step": 2419 }, { "epoch": 1.0970081595648231, "grad_norm": 1.452141928685363, "learning_rate": 7.657572058233899e-05, "loss": 1.0662, "step": 2420 }, { "epoch": 1.0974614687216682, "grad_norm": 3.4300726820737903, "learning_rate": 7.657059412697756e-05, "loss": 1.1361, "step": 2421 }, { "epoch": 1.0979147778785132, "grad_norm": 2.3886914950271874, "learning_rate": 7.656546400896367e-05, "loss": 1.0868, "step": 2422 }, { "epoch": 1.098368087035358, "grad_norm": 1.092510608795054, "learning_rate": 7.656033022881108e-05, "loss": 1.0606, "step": 2423 }, { "epoch": 1.098821396192203, "grad_norm": 0.9375230914908772, "learning_rate": 7.655519278703396e-05, "loss": 1.0357, "step": 2424 }, { "epoch": 1.0992747053490481, "grad_norm": 1.139610822744927, "learning_rate": 7.655005168414686e-05, "loss": 1.0064, "step": 2425 }, { "epoch": 1.099728014505893, "grad_norm": 0.8332695051057878, "learning_rate": 7.654490692066467e-05, "loss": 0.9903, "step": 2426 }, { "epoch": 1.100181323662738, "grad_norm": 1.2814771613044904, "learning_rate": 7.653975849710264e-05, "loss": 1.0036, "step": 2427 }, { "epoch": 1.1006346328195828, "grad_norm": 1.4089558691819342, "learning_rate": 7.653460641397642e-05, "loss": 1.0381, "step": 2428 }, { "epoch": 1.101087941976428, "grad_norm": 1.3099162445043189, "learning_rate": 7.652945067180199e-05, "loss": 0.9961, "step": 2429 }, { "epoch": 1.101541251133273, "grad_norm": 1.1267567021261842, "learning_rate": 7.652429127109572e-05, "loss": 1.024, "step": 2430 }, { "epoch": 1.1019945602901178, "grad_norm": 1.5904055784862956, "learning_rate": 7.651912821237431e-05, "loss": 0.9913, "step": 2431 }, { "epoch": 1.1024478694469628, "grad_norm": 1.2487516800208966, "learning_rate": 7.65139614961549e-05, "loss": 0.9597, "step": 2432 }, { "epoch": 1.1029011786038079, "grad_norm": 1.959454225873341, "learning_rate": 7.650879112295494e-05, "loss": 0.9849, "step": 2433 }, { "epoch": 1.1033544877606527, "grad_norm": 1.5929326700362598, "learning_rate": 7.650361709329226e-05, "loss": 0.975, "step": 2434 }, { "epoch": 1.1038077969174978, "grad_norm": 1.8047963560034164, "learning_rate": 7.649843940768503e-05, "loss": 0.9681, "step": 2435 }, { "epoch": 1.1042611060743428, "grad_norm": 1.5581133083443275, "learning_rate": 7.649325806665183e-05, "loss": 0.9457, "step": 2436 }, { "epoch": 1.1047144152311876, "grad_norm": 1.5609699840842997, "learning_rate": 7.648807307071158e-05, "loss": 0.9635, "step": 2437 }, { "epoch": 1.1051677243880327, "grad_norm": 2.0459035237511713, "learning_rate": 7.648288442038357e-05, "loss": 1.016, "step": 2438 }, { "epoch": 1.1056210335448775, "grad_norm": 1.3571992925540473, "learning_rate": 7.647769211618747e-05, "loss": 0.9861, "step": 2439 }, { "epoch": 1.1060743427017226, "grad_norm": 0.990632716941994, "learning_rate": 7.647249615864331e-05, "loss": 1.0008, "step": 2440 }, { "epoch": 1.1065276518585676, "grad_norm": 1.4706179534010668, "learning_rate": 7.646729654827145e-05, "loss": 0.9701, "step": 2441 }, { "epoch": 1.1069809610154124, "grad_norm": 0.8910500972462858, "learning_rate": 7.646209328559268e-05, "loss": 0.9682, "step": 2442 }, { "epoch": 1.1074342701722575, "grad_norm": 1.4283472784253217, "learning_rate": 7.645688637112811e-05, "loss": 0.9548, "step": 2443 }, { "epoch": 1.1078875793291025, "grad_norm": 1.1043942541295964, "learning_rate": 7.645167580539922e-05, "loss": 0.9437, "step": 2444 }, { "epoch": 1.1083408884859474, "grad_norm": 1.8738190408695377, "learning_rate": 7.644646158892785e-05, "loss": 0.9316, "step": 2445 }, { "epoch": 1.1087941976427924, "grad_norm": 1.4746900057808436, "learning_rate": 7.644124372223626e-05, "loss": 0.971, "step": 2446 }, { "epoch": 1.1092475067996372, "grad_norm": 1.1493014519251057, "learning_rate": 7.643602220584701e-05, "loss": 0.9264, "step": 2447 }, { "epoch": 1.1097008159564823, "grad_norm": 1.3187134610505222, "learning_rate": 7.643079704028304e-05, "loss": 0.9422, "step": 2448 }, { "epoch": 1.1101541251133273, "grad_norm": 1.1095596928438682, "learning_rate": 7.64255682260677e-05, "loss": 0.9392, "step": 2449 }, { "epoch": 1.1106074342701722, "grad_norm": 1.4266491678899524, "learning_rate": 7.642033576372463e-05, "loss": 0.9425, "step": 2450 }, { "epoch": 1.1110607434270172, "grad_norm": 1.009834238705206, "learning_rate": 7.64150996537779e-05, "loss": 0.9155, "step": 2451 }, { "epoch": 1.1115140525838623, "grad_norm": 1.433696949184942, "learning_rate": 7.640985989675191e-05, "loss": 0.9407, "step": 2452 }, { "epoch": 1.111967361740707, "grad_norm": 1.0666704419903505, "learning_rate": 7.640461649317146e-05, "loss": 0.9336, "step": 2453 }, { "epoch": 1.1124206708975521, "grad_norm": 1.2984760240676392, "learning_rate": 7.639936944356168e-05, "loss": 0.9417, "step": 2454 }, { "epoch": 1.1128739800543972, "grad_norm": 1.1918224594598372, "learning_rate": 7.639411874844806e-05, "loss": 0.915, "step": 2455 }, { "epoch": 1.113327289211242, "grad_norm": 1.1399726354631918, "learning_rate": 7.638886440835649e-05, "loss": 0.9269, "step": 2456 }, { "epoch": 1.113780598368087, "grad_norm": 1.1259936713883603, "learning_rate": 7.638360642381321e-05, "loss": 0.9201, "step": 2457 }, { "epoch": 1.1142339075249321, "grad_norm": 1.0193578753805663, "learning_rate": 7.637834479534482e-05, "loss": 0.9327, "step": 2458 }, { "epoch": 1.114687216681777, "grad_norm": 1.1044841956360525, "learning_rate": 7.637307952347828e-05, "loss": 0.9497, "step": 2459 }, { "epoch": 1.115140525838622, "grad_norm": 1.012558911050277, "learning_rate": 7.636781060874092e-05, "loss": 0.9082, "step": 2460 }, { "epoch": 1.1155938349954668, "grad_norm": 1.4830525053645778, "learning_rate": 7.636253805166045e-05, "loss": 0.9208, "step": 2461 }, { "epoch": 1.1160471441523119, "grad_norm": 1.146971627522293, "learning_rate": 7.635726185276494e-05, "loss": 0.9358, "step": 2462 }, { "epoch": 1.116500453309157, "grad_norm": 1.023974811328896, "learning_rate": 7.635198201258278e-05, "loss": 0.9309, "step": 2463 }, { "epoch": 1.1169537624660018, "grad_norm": 0.8935474185989489, "learning_rate": 7.63466985316428e-05, "loss": 0.9273, "step": 2464 }, { "epoch": 1.1174070716228468, "grad_norm": 1.0958782202499184, "learning_rate": 7.634141141047414e-05, "loss": 0.9256, "step": 2465 }, { "epoch": 1.1178603807796919, "grad_norm": 1.4887624159912314, "learning_rate": 7.633612064960632e-05, "loss": 0.9266, "step": 2466 }, { "epoch": 1.1183136899365367, "grad_norm": 1.015838922047579, "learning_rate": 7.633082624956922e-05, "loss": 0.9111, "step": 2467 }, { "epoch": 1.1187669990933817, "grad_norm": 0.8695679020538793, "learning_rate": 7.63255282108931e-05, "loss": 0.9355, "step": 2468 }, { "epoch": 1.1192203082502266, "grad_norm": 0.8703763236121019, "learning_rate": 7.632022653410857e-05, "loss": 0.9207, "step": 2469 }, { "epoch": 1.1196736174070716, "grad_norm": 0.812331056610596, "learning_rate": 7.631492121974662e-05, "loss": 0.9301, "step": 2470 }, { "epoch": 1.1201269265639167, "grad_norm": 0.7181298956805691, "learning_rate": 7.630961226833859e-05, "loss": 0.8956, "step": 2471 }, { "epoch": 1.1205802357207615, "grad_norm": 0.808461751198003, "learning_rate": 7.630429968041615e-05, "loss": 0.9347, "step": 2472 }, { "epoch": 1.1210335448776065, "grad_norm": 1.007645175733487, "learning_rate": 7.629898345651141e-05, "loss": 0.9135, "step": 2473 }, { "epoch": 1.1214868540344516, "grad_norm": 1.3499517384440476, "learning_rate": 7.629366359715681e-05, "loss": 0.9187, "step": 2474 }, { "epoch": 1.1219401631912964, "grad_norm": 1.2773696189994, "learning_rate": 7.628834010288513e-05, "loss": 0.9335, "step": 2475 }, { "epoch": 1.1223934723481415, "grad_norm": 1.165589796024637, "learning_rate": 7.628301297422953e-05, "loss": 0.9168, "step": 2476 }, { "epoch": 1.1228467815049865, "grad_norm": 0.6914705711250302, "learning_rate": 7.627768221172356e-05, "loss": 0.9206, "step": 2477 }, { "epoch": 1.1233000906618313, "grad_norm": 0.8911036953948623, "learning_rate": 7.627234781590108e-05, "loss": 0.9118, "step": 2478 }, { "epoch": 1.1237533998186764, "grad_norm": 1.174356734574226, "learning_rate": 7.626700978729637e-05, "loss": 0.9165, "step": 2479 }, { "epoch": 1.1242067089755212, "grad_norm": 1.4243606815522214, "learning_rate": 7.626166812644406e-05, "loss": 0.9031, "step": 2480 }, { "epoch": 1.1246600181323663, "grad_norm": 1.0104136059650182, "learning_rate": 7.62563228338791e-05, "loss": 0.9371, "step": 2481 }, { "epoch": 1.1251133272892113, "grad_norm": 1.3679607130559739, "learning_rate": 7.625097391013686e-05, "loss": 0.9216, "step": 2482 }, { "epoch": 1.1255666364460561, "grad_norm": 0.7605776028218899, "learning_rate": 7.624562135575305e-05, "loss": 0.9193, "step": 2483 }, { "epoch": 1.1260199456029012, "grad_norm": 1.097170344691278, "learning_rate": 7.624026517126372e-05, "loss": 0.9251, "step": 2484 }, { "epoch": 1.126473254759746, "grad_norm": 1.3643178901497581, "learning_rate": 7.623490535720533e-05, "loss": 0.9204, "step": 2485 }, { "epoch": 1.126926563916591, "grad_norm": 0.6805111504924223, "learning_rate": 7.622954191411469e-05, "loss": 0.9136, "step": 2486 }, { "epoch": 1.1273798730734361, "grad_norm": 1.390267924217174, "learning_rate": 7.622417484252893e-05, "loss": 0.916, "step": 2487 }, { "epoch": 1.127833182230281, "grad_norm": 0.6533919223161649, "learning_rate": 7.621880414298562e-05, "loss": 0.9226, "step": 2488 }, { "epoch": 1.128286491387126, "grad_norm": 1.1872894974238155, "learning_rate": 7.621342981602261e-05, "loss": 0.9172, "step": 2489 }, { "epoch": 1.128739800543971, "grad_norm": 0.9217151741251335, "learning_rate": 7.620805186217818e-05, "loss": 0.9202, "step": 2490 }, { "epoch": 1.1291931097008159, "grad_norm": 0.9842007561608125, "learning_rate": 7.620267028199095e-05, "loss": 0.9219, "step": 2491 }, { "epoch": 1.129646418857661, "grad_norm": 1.1846503608849615, "learning_rate": 7.619728507599989e-05, "loss": 0.9174, "step": 2492 }, { "epoch": 1.130099728014506, "grad_norm": 1.472483558658305, "learning_rate": 7.619189624474434e-05, "loss": 0.9138, "step": 2493 }, { "epoch": 1.1305530371713508, "grad_norm": 0.9306921584233746, "learning_rate": 7.618650378876402e-05, "loss": 0.9193, "step": 2494 }, { "epoch": 1.1310063463281959, "grad_norm": 0.9633637319534175, "learning_rate": 7.6181107708599e-05, "loss": 0.907, "step": 2495 }, { "epoch": 1.131459655485041, "grad_norm": 1.0971287559002065, "learning_rate": 7.617570800478972e-05, "loss": 0.9167, "step": 2496 }, { "epoch": 1.1319129646418857, "grad_norm": 1.4526608956706664, "learning_rate": 7.617030467787693e-05, "loss": 0.9026, "step": 2497 }, { "epoch": 1.1323662737987308, "grad_norm": 0.9475142097255123, "learning_rate": 7.616489772840185e-05, "loss": 0.912, "step": 2498 }, { "epoch": 1.1328195829555756, "grad_norm": 0.8775327521121625, "learning_rate": 7.615948715690597e-05, "loss": 0.9284, "step": 2499 }, { "epoch": 1.1332728921124207, "grad_norm": 1.0582659214031427, "learning_rate": 7.615407296393119e-05, "loss": 0.9184, "step": 2500 }, { "epoch": 1.1337262012692657, "grad_norm": 1.5271819299876443, "learning_rate": 7.614865515001974e-05, "loss": 0.9231, "step": 2501 }, { "epoch": 1.1341795104261105, "grad_norm": 1.0015893797072324, "learning_rate": 7.614323371571424e-05, "loss": 0.9146, "step": 2502 }, { "epoch": 1.1346328195829556, "grad_norm": 1.2164207747997662, "learning_rate": 7.613780866155764e-05, "loss": 0.9297, "step": 2503 }, { "epoch": 1.1350861287398006, "grad_norm": 0.7889813222080632, "learning_rate": 7.613237998809333e-05, "loss": 0.9235, "step": 2504 }, { "epoch": 1.1355394378966455, "grad_norm": 0.7859768731989358, "learning_rate": 7.612694769586494e-05, "loss": 0.9322, "step": 2505 }, { "epoch": 1.1359927470534905, "grad_norm": 0.9111834734628061, "learning_rate": 7.612151178541659e-05, "loss": 0.91, "step": 2506 }, { "epoch": 1.1364460562103353, "grad_norm": 1.6831698668773063, "learning_rate": 7.611607225729267e-05, "loss": 0.9041, "step": 2507 }, { "epoch": 1.1368993653671804, "grad_norm": 0.9278636638639448, "learning_rate": 7.611062911203796e-05, "loss": 0.91, "step": 2508 }, { "epoch": 1.1373526745240254, "grad_norm": 1.1417805485928099, "learning_rate": 7.610518235019761e-05, "loss": 0.8866, "step": 2509 }, { "epoch": 1.1378059836808703, "grad_norm": 0.8359143911587421, "learning_rate": 7.609973197231717e-05, "loss": 0.9157, "step": 2510 }, { "epoch": 1.1382592928377153, "grad_norm": 0.9312356921696457, "learning_rate": 7.609427797894244e-05, "loss": 0.8979, "step": 2511 }, { "epoch": 1.1387126019945604, "grad_norm": 1.022535736314734, "learning_rate": 7.608882037061971e-05, "loss": 0.9055, "step": 2512 }, { "epoch": 1.1391659111514052, "grad_norm": 1.503163175442753, "learning_rate": 7.608335914789556e-05, "loss": 0.8992, "step": 2513 }, { "epoch": 1.1396192203082502, "grad_norm": 1.4210352241046167, "learning_rate": 7.607789431131693e-05, "loss": 0.8897, "step": 2514 }, { "epoch": 1.1400725294650953, "grad_norm": 0.5385180223230056, "learning_rate": 7.607242586143116e-05, "loss": 0.9154, "step": 2515 }, { "epoch": 1.1405258386219401, "grad_norm": 1.1859766645777048, "learning_rate": 7.606695379878594e-05, "loss": 0.9441, "step": 2516 }, { "epoch": 1.1409791477787852, "grad_norm": 1.6895843831205228, "learning_rate": 7.606147812392927e-05, "loss": 0.9107, "step": 2517 }, { "epoch": 1.1414324569356302, "grad_norm": 0.8565771715782029, "learning_rate": 7.60559988374096e-05, "loss": 0.8997, "step": 2518 }, { "epoch": 1.141885766092475, "grad_norm": 1.1209485071380931, "learning_rate": 7.605051593977568e-05, "loss": 0.8853, "step": 2519 }, { "epoch": 1.14233907524932, "grad_norm": 1.343868933374682, "learning_rate": 7.604502943157665e-05, "loss": 0.908, "step": 2520 }, { "epoch": 1.142792384406165, "grad_norm": 1.2485935137366657, "learning_rate": 7.603953931336198e-05, "loss": 0.9288, "step": 2521 }, { "epoch": 1.14324569356301, "grad_norm": 1.3532240494401604, "learning_rate": 7.603404558568151e-05, "loss": 0.9002, "step": 2522 }, { "epoch": 1.143699002719855, "grad_norm": 0.9149357600666658, "learning_rate": 7.602854824908548e-05, "loss": 0.9175, "step": 2523 }, { "epoch": 1.1441523118766999, "grad_norm": 1.04936557555316, "learning_rate": 7.602304730412447e-05, "loss": 0.914, "step": 2524 }, { "epoch": 1.144605621033545, "grad_norm": 0.9212649155702058, "learning_rate": 7.601754275134938e-05, "loss": 0.9126, "step": 2525 }, { "epoch": 1.1450589301903897, "grad_norm": 0.932651325087658, "learning_rate": 7.601203459131156e-05, "loss": 0.8986, "step": 2526 }, { "epoch": 1.1455122393472348, "grad_norm": 1.4385094334184956, "learning_rate": 7.600652282456263e-05, "loss": 0.9165, "step": 2527 }, { "epoch": 1.1459655485040798, "grad_norm": 1.2077042690003126, "learning_rate": 7.600100745165462e-05, "loss": 0.9392, "step": 2528 }, { "epoch": 1.1464188576609247, "grad_norm": 1.161081611810291, "learning_rate": 7.599548847313989e-05, "loss": 0.895, "step": 2529 }, { "epoch": 1.1468721668177697, "grad_norm": 0.9915851674186564, "learning_rate": 7.598996588957121e-05, "loss": 0.9079, "step": 2530 }, { "epoch": 1.1473254759746148, "grad_norm": 1.555088850165638, "learning_rate": 7.598443970150167e-05, "loss": 0.9129, "step": 2531 }, { "epoch": 1.1477787851314596, "grad_norm": 0.8546076408452524, "learning_rate": 7.597890990948475e-05, "loss": 0.9151, "step": 2532 }, { "epoch": 1.1482320942883046, "grad_norm": 0.8038005981336634, "learning_rate": 7.597337651407426e-05, "loss": 0.897, "step": 2533 }, { "epoch": 1.1486854034451497, "grad_norm": 0.9638697279369236, "learning_rate": 7.596783951582438e-05, "loss": 0.9274, "step": 2534 }, { "epoch": 1.1491387126019945, "grad_norm": 1.3712369069835817, "learning_rate": 7.596229891528966e-05, "loss": 0.9033, "step": 2535 }, { "epoch": 1.1495920217588396, "grad_norm": 0.825817306897089, "learning_rate": 7.595675471302503e-05, "loss": 0.8914, "step": 2536 }, { "epoch": 1.1500453309156846, "grad_norm": 1.363891096200731, "learning_rate": 7.595120690958573e-05, "loss": 0.8805, "step": 2537 }, { "epoch": 1.1504986400725294, "grad_norm": 1.1947621466232174, "learning_rate": 7.59456555055274e-05, "loss": 0.9182, "step": 2538 }, { "epoch": 1.1509519492293745, "grad_norm": 0.9677551862483756, "learning_rate": 7.594010050140602e-05, "loss": 0.8963, "step": 2539 }, { "epoch": 1.1514052583862193, "grad_norm": 1.2571966850173217, "learning_rate": 7.593454189777797e-05, "loss": 0.8964, "step": 2540 }, { "epoch": 1.1518585675430644, "grad_norm": 0.9208543285835852, "learning_rate": 7.592897969519993e-05, "loss": 0.8932, "step": 2541 }, { "epoch": 1.1523118766999094, "grad_norm": 1.1032560853094908, "learning_rate": 7.592341389422897e-05, "loss": 0.9149, "step": 2542 }, { "epoch": 1.1527651858567542, "grad_norm": 1.4800792449400988, "learning_rate": 7.591784449542254e-05, "loss": 0.8924, "step": 2543 }, { "epoch": 1.1532184950135993, "grad_norm": 0.9366647358575119, "learning_rate": 7.591227149933842e-05, "loss": 0.9124, "step": 2544 }, { "epoch": 1.1536718041704441, "grad_norm": 1.0088030415222509, "learning_rate": 7.590669490653478e-05, "loss": 0.9281, "step": 2545 }, { "epoch": 1.1541251133272892, "grad_norm": 0.9028450410959744, "learning_rate": 7.59011147175701e-05, "loss": 0.9385, "step": 2546 }, { "epoch": 1.1545784224841342, "grad_norm": 0.8259817147521045, "learning_rate": 7.589553093300328e-05, "loss": 0.9043, "step": 2547 }, { "epoch": 1.155031731640979, "grad_norm": 1.4346055352371538, "learning_rate": 7.588994355339353e-05, "loss": 0.9151, "step": 2548 }, { "epoch": 1.155485040797824, "grad_norm": 0.9892515695564202, "learning_rate": 7.588435257930047e-05, "loss": 0.9277, "step": 2549 }, { "epoch": 1.1559383499546692, "grad_norm": 1.4684712179761417, "learning_rate": 7.587875801128402e-05, "loss": 0.9027, "step": 2550 }, { "epoch": 1.156391659111514, "grad_norm": 0.7811831251307954, "learning_rate": 7.587315984990452e-05, "loss": 0.8945, "step": 2551 }, { "epoch": 1.156844968268359, "grad_norm": 0.8768445419436297, "learning_rate": 7.586755809572263e-05, "loss": 0.9185, "step": 2552 }, { "epoch": 1.157298277425204, "grad_norm": 1.177639924039331, "learning_rate": 7.586195274929939e-05, "loss": 0.8989, "step": 2553 }, { "epoch": 1.157751586582049, "grad_norm": 1.26906065150842, "learning_rate": 7.585634381119617e-05, "loss": 0.9115, "step": 2554 }, { "epoch": 1.158204895738894, "grad_norm": 1.2913527725248244, "learning_rate": 7.585073128197474e-05, "loss": 0.9202, "step": 2555 }, { "epoch": 1.158658204895739, "grad_norm": 0.9481087338570369, "learning_rate": 7.584511516219723e-05, "loss": 0.9003, "step": 2556 }, { "epoch": 1.1591115140525838, "grad_norm": 0.9717179926389582, "learning_rate": 7.583949545242606e-05, "loss": 0.905, "step": 2557 }, { "epoch": 1.1595648232094289, "grad_norm": 1.0743527932004222, "learning_rate": 7.583387215322412e-05, "loss": 0.9173, "step": 2558 }, { "epoch": 1.1600181323662737, "grad_norm": 1.55460288529018, "learning_rate": 7.582824526515455e-05, "loss": 0.8988, "step": 2559 }, { "epoch": 1.1604714415231188, "grad_norm": 0.941172773993807, "learning_rate": 7.582261478878093e-05, "loss": 0.9227, "step": 2560 }, { "epoch": 1.1609247506799638, "grad_norm": 1.1106782394955375, "learning_rate": 7.581698072466715e-05, "loss": 0.9024, "step": 2561 }, { "epoch": 1.1613780598368086, "grad_norm": 1.1922328609266923, "learning_rate": 7.581134307337748e-05, "loss": 0.9008, "step": 2562 }, { "epoch": 1.1618313689936537, "grad_norm": 1.511977561625631, "learning_rate": 7.580570183547657e-05, "loss": 0.8876, "step": 2563 }, { "epoch": 1.1622846781504985, "grad_norm": 0.8459893617632162, "learning_rate": 7.580005701152938e-05, "loss": 0.897, "step": 2564 }, { "epoch": 1.1627379873073436, "grad_norm": 1.043130199894637, "learning_rate": 7.579440860210126e-05, "loss": 0.8886, "step": 2565 }, { "epoch": 1.1631912964641886, "grad_norm": 1.1660976348126435, "learning_rate": 7.578875660775793e-05, "loss": 0.9111, "step": 2566 }, { "epoch": 1.1636446056210334, "grad_norm": 1.3563675283910377, "learning_rate": 7.578310102906546e-05, "loss": 0.9044, "step": 2567 }, { "epoch": 1.1640979147778785, "grad_norm": 1.0404595819540663, "learning_rate": 7.577744186659024e-05, "loss": 0.8995, "step": 2568 }, { "epoch": 1.1645512239347235, "grad_norm": 1.5086139584840645, "learning_rate": 7.577177912089907e-05, "loss": 0.928, "step": 2569 }, { "epoch": 1.1650045330915684, "grad_norm": 0.5528351344081979, "learning_rate": 7.576611279255909e-05, "loss": 0.9045, "step": 2570 }, { "epoch": 1.1654578422484134, "grad_norm": 1.3048300757444824, "learning_rate": 7.57604428821378e-05, "loss": 0.9234, "step": 2571 }, { "epoch": 1.1659111514052585, "grad_norm": 1.2997522902374257, "learning_rate": 7.575476939020304e-05, "loss": 0.894, "step": 2572 }, { "epoch": 1.1663644605621033, "grad_norm": 1.0592137658864458, "learning_rate": 7.574909231732307e-05, "loss": 0.9158, "step": 2573 }, { "epoch": 1.1668177697189483, "grad_norm": 1.2537558784035714, "learning_rate": 7.574341166406644e-05, "loss": 0.8945, "step": 2574 }, { "epoch": 1.1672710788757934, "grad_norm": 0.8674377825008158, "learning_rate": 7.573772743100207e-05, "loss": 0.9371, "step": 2575 }, { "epoch": 1.1677243880326382, "grad_norm": 1.1560085041517394, "learning_rate": 7.573203961869927e-05, "loss": 0.8983, "step": 2576 }, { "epoch": 1.1681776971894833, "grad_norm": 0.9042942486061998, "learning_rate": 7.572634822772769e-05, "loss": 0.9196, "step": 2577 }, { "epoch": 1.168631006346328, "grad_norm": 1.3063598672672798, "learning_rate": 7.572065325865733e-05, "loss": 0.8997, "step": 2578 }, { "epoch": 1.1690843155031732, "grad_norm": 0.9140098752941759, "learning_rate": 7.571495471205858e-05, "loss": 0.9024, "step": 2579 }, { "epoch": 1.1695376246600182, "grad_norm": 0.9580370095668953, "learning_rate": 7.570925258850213e-05, "loss": 0.9232, "step": 2580 }, { "epoch": 1.169990933816863, "grad_norm": 1.2970026515606206, "learning_rate": 7.570354688855911e-05, "loss": 0.9118, "step": 2581 }, { "epoch": 1.170444242973708, "grad_norm": 1.2729236936409603, "learning_rate": 7.569783761280093e-05, "loss": 0.9033, "step": 2582 }, { "epoch": 1.1708975521305531, "grad_norm": 0.915531121794545, "learning_rate": 7.56921247617994e-05, "loss": 0.9064, "step": 2583 }, { "epoch": 1.171350861287398, "grad_norm": 0.7837210571598561, "learning_rate": 7.568640833612666e-05, "loss": 0.9091, "step": 2584 }, { "epoch": 1.171804170444243, "grad_norm": 0.8459915595379177, "learning_rate": 7.568068833635526e-05, "loss": 0.9154, "step": 2585 }, { "epoch": 1.1722574796010878, "grad_norm": 1.403026487243595, "learning_rate": 7.567496476305806e-05, "loss": 0.8976, "step": 2586 }, { "epoch": 1.1727107887579329, "grad_norm": 1.0947341757908708, "learning_rate": 7.566923761680828e-05, "loss": 0.9216, "step": 2587 }, { "epoch": 1.173164097914778, "grad_norm": 1.1444152651896877, "learning_rate": 7.566350689817954e-05, "loss": 0.9094, "step": 2588 }, { "epoch": 1.1736174070716228, "grad_norm": 0.721531629063778, "learning_rate": 7.565777260774576e-05, "loss": 0.8845, "step": 2589 }, { "epoch": 1.1740707162284678, "grad_norm": 0.47389687558665466, "learning_rate": 7.565203474608126e-05, "loss": 0.8979, "step": 2590 }, { "epoch": 1.1745240253853129, "grad_norm": 0.7297903795478407, "learning_rate": 7.56462933137607e-05, "loss": 0.894, "step": 2591 }, { "epoch": 1.1749773345421577, "grad_norm": 1.1368486100385988, "learning_rate": 7.564054831135911e-05, "loss": 0.9095, "step": 2592 }, { "epoch": 1.1754306436990027, "grad_norm": 1.430234405634418, "learning_rate": 7.563479973945186e-05, "loss": 0.9066, "step": 2593 }, { "epoch": 1.1758839528558478, "grad_norm": 0.8941898739479598, "learning_rate": 7.562904759861467e-05, "loss": 0.9166, "step": 2594 }, { "epoch": 1.1763372620126926, "grad_norm": 1.0823467345578222, "learning_rate": 7.562329188942366e-05, "loss": 0.9221, "step": 2595 }, { "epoch": 1.1767905711695377, "grad_norm": 1.0339334985644784, "learning_rate": 7.561753261245528e-05, "loss": 0.9211, "step": 2596 }, { "epoch": 1.1772438803263825, "grad_norm": 1.4217404586365392, "learning_rate": 7.561176976828632e-05, "loss": 0.8999, "step": 2597 }, { "epoch": 1.1776971894832275, "grad_norm": 0.8994501375759059, "learning_rate": 7.560600335749398e-05, "loss": 0.9206, "step": 2598 }, { "epoch": 1.1781504986400726, "grad_norm": 1.1773280373775339, "learning_rate": 7.560023338065574e-05, "loss": 0.896, "step": 2599 }, { "epoch": 1.1786038077969174, "grad_norm": 0.883565403360514, "learning_rate": 7.559445983834951e-05, "loss": 0.9033, "step": 2600 }, { "epoch": 1.1790571169537625, "grad_norm": 1.200158202234469, "learning_rate": 7.55886827311535e-05, "loss": 0.8924, "step": 2601 }, { "epoch": 1.1795104261106075, "grad_norm": 1.2745227279743483, "learning_rate": 7.558290205964632e-05, "loss": 0.9089, "step": 2602 }, { "epoch": 1.1799637352674524, "grad_norm": 0.9728126168860493, "learning_rate": 7.557711782440694e-05, "loss": 0.9029, "step": 2603 }, { "epoch": 1.1804170444242974, "grad_norm": 1.4680459273216226, "learning_rate": 7.557133002601465e-05, "loss": 0.8999, "step": 2604 }, { "epoch": 1.1808703535811422, "grad_norm": 0.5381806992809064, "learning_rate": 7.55655386650491e-05, "loss": 0.8952, "step": 2605 }, { "epoch": 1.1813236627379873, "grad_norm": 1.1357832029562371, "learning_rate": 7.555974374209035e-05, "loss": 0.8922, "step": 2606 }, { "epoch": 1.1817769718948323, "grad_norm": 1.403966335894116, "learning_rate": 7.555394525771874e-05, "loss": 0.9089, "step": 2607 }, { "epoch": 1.1822302810516772, "grad_norm": 0.8462602182312048, "learning_rate": 7.554814321251502e-05, "loss": 0.9206, "step": 2608 }, { "epoch": 1.1826835902085222, "grad_norm": 1.6106037251912577, "learning_rate": 7.554233760706028e-05, "loss": 0.8978, "step": 2609 }, { "epoch": 1.1831368993653673, "grad_norm": 0.8502276678738561, "learning_rate": 7.553652844193599e-05, "loss": 0.8999, "step": 2610 }, { "epoch": 1.183590208522212, "grad_norm": 1.1351174253171614, "learning_rate": 7.553071571772393e-05, "loss": 0.9047, "step": 2611 }, { "epoch": 1.1840435176790571, "grad_norm": 1.3217108594022002, "learning_rate": 7.552489943500626e-05, "loss": 0.926, "step": 2612 }, { "epoch": 1.1844968268359022, "grad_norm": 1.4748910644756517, "learning_rate": 7.55190795943655e-05, "loss": 0.8684, "step": 2613 }, { "epoch": 1.184950135992747, "grad_norm": 0.8434609778443781, "learning_rate": 7.551325619638455e-05, "loss": 0.9156, "step": 2614 }, { "epoch": 1.185403445149592, "grad_norm": 1.673707476901838, "learning_rate": 7.55074292416466e-05, "loss": 0.9196, "step": 2615 }, { "epoch": 1.185856754306437, "grad_norm": 0.7427058456753135, "learning_rate": 7.550159873073527e-05, "loss": 0.9253, "step": 2616 }, { "epoch": 1.186310063463282, "grad_norm": 1.8470255622612186, "learning_rate": 7.549576466423449e-05, "loss": 0.9236, "step": 2617 }, { "epoch": 1.186763372620127, "grad_norm": 1.0418666383818267, "learning_rate": 7.548992704272856e-05, "loss": 0.9145, "step": 2618 }, { "epoch": 1.1872166817769718, "grad_norm": 2.349151745715469, "learning_rate": 7.548408586680212e-05, "loss": 0.8904, "step": 2619 }, { "epoch": 1.1876699909338169, "grad_norm": 2.1347961178307138, "learning_rate": 7.547824113704021e-05, "loss": 0.9218, "step": 2620 }, { "epoch": 1.188123300090662, "grad_norm": 1.2925493224659417, "learning_rate": 7.547239285402818e-05, "loss": 0.9318, "step": 2621 }, { "epoch": 1.1885766092475067, "grad_norm": 1.5854116788266157, "learning_rate": 7.546654101835174e-05, "loss": 0.9335, "step": 2622 }, { "epoch": 1.1890299184043518, "grad_norm": 1.2864312829118831, "learning_rate": 7.5460685630597e-05, "loss": 0.9008, "step": 2623 }, { "epoch": 1.1894832275611966, "grad_norm": 1.161844792825842, "learning_rate": 7.545482669135037e-05, "loss": 0.9094, "step": 2624 }, { "epoch": 1.1899365367180417, "grad_norm": 1.0650017475228541, "learning_rate": 7.544896420119865e-05, "loss": 0.9242, "step": 2625 }, { "epoch": 1.1903898458748867, "grad_norm": 1.276221157246264, "learning_rate": 7.544309816072898e-05, "loss": 0.9086, "step": 2626 }, { "epoch": 1.1908431550317315, "grad_norm": 0.9674619073456585, "learning_rate": 7.543722857052885e-05, "loss": 0.9031, "step": 2627 }, { "epoch": 1.1912964641885766, "grad_norm": 1.1380973769767317, "learning_rate": 7.543135543118615e-05, "loss": 0.9094, "step": 2628 }, { "epoch": 1.1917497733454216, "grad_norm": 0.9299169596948971, "learning_rate": 7.542547874328906e-05, "loss": 0.8919, "step": 2629 }, { "epoch": 1.1922030825022665, "grad_norm": 0.9406379871441944, "learning_rate": 7.541959850742617e-05, "loss": 0.9068, "step": 2630 }, { "epoch": 1.1926563916591115, "grad_norm": 0.7344804244396188, "learning_rate": 7.54137147241864e-05, "loss": 0.9196, "step": 2631 }, { "epoch": 1.1931097008159566, "grad_norm": 0.8589554218091947, "learning_rate": 7.540782739415901e-05, "loss": 0.8818, "step": 2632 }, { "epoch": 1.1935630099728014, "grad_norm": 0.91060012356389, "learning_rate": 7.540193651793364e-05, "loss": 0.9094, "step": 2633 }, { "epoch": 1.1940163191296465, "grad_norm": 0.8699935781114383, "learning_rate": 7.53960420961003e-05, "loss": 0.8926, "step": 2634 }, { "epoch": 1.1944696282864915, "grad_norm": 0.9606345747838128, "learning_rate": 7.53901441292493e-05, "loss": 0.9056, "step": 2635 }, { "epoch": 1.1949229374433363, "grad_norm": 1.357214886618566, "learning_rate": 7.538424261797136e-05, "loss": 0.9164, "step": 2636 }, { "epoch": 1.1953762466001814, "grad_norm": 1.202870344900987, "learning_rate": 7.537833756285753e-05, "loss": 0.8896, "step": 2637 }, { "epoch": 1.1958295557570262, "grad_norm": 0.8172164533873729, "learning_rate": 7.537242896449923e-05, "loss": 0.9156, "step": 2638 }, { "epoch": 1.1962828649138713, "grad_norm": 0.737042114223655, "learning_rate": 7.536651682348819e-05, "loss": 0.9094, "step": 2639 }, { "epoch": 1.1967361740707163, "grad_norm": 0.7037588329150696, "learning_rate": 7.536060114041656e-05, "loss": 0.8872, "step": 2640 }, { "epoch": 1.1971894832275611, "grad_norm": 0.9709339373083604, "learning_rate": 7.535468191587682e-05, "loss": 0.9066, "step": 2641 }, { "epoch": 1.1976427923844062, "grad_norm": 1.5548033675403758, "learning_rate": 7.534875915046176e-05, "loss": 0.9166, "step": 2642 }, { "epoch": 1.198096101541251, "grad_norm": 0.8345502512260377, "learning_rate": 7.534283284476459e-05, "loss": 0.9066, "step": 2643 }, { "epoch": 1.198549410698096, "grad_norm": 0.9577566290064596, "learning_rate": 7.533690299937883e-05, "loss": 0.9124, "step": 2644 }, { "epoch": 1.1990027198549411, "grad_norm": 1.1195786940244128, "learning_rate": 7.53309696148984e-05, "loss": 0.9122, "step": 2645 }, { "epoch": 1.199456029011786, "grad_norm": 1.0391190932498615, "learning_rate": 7.53250326919175e-05, "loss": 0.9174, "step": 2646 }, { "epoch": 1.199909338168631, "grad_norm": 1.57935944606239, "learning_rate": 7.531909223103078e-05, "loss": 0.91, "step": 2647 }, { "epoch": 1.200362647325476, "grad_norm": 0.9424544460333251, "learning_rate": 7.531314823283317e-05, "loss": 0.9006, "step": 2648 }, { "epoch": 1.2008159564823209, "grad_norm": 1.1632747270687085, "learning_rate": 7.530720069791997e-05, "loss": 0.8896, "step": 2649 }, { "epoch": 1.201269265639166, "grad_norm": 1.027014364193908, "learning_rate": 7.530124962688686e-05, "loss": 0.9154, "step": 2650 }, { "epoch": 1.201722574796011, "grad_norm": 1.8653499734524381, "learning_rate": 7.529529502032985e-05, "loss": 0.8892, "step": 2651 }, { "epoch": 1.2021758839528558, "grad_norm": 0.7738775264147147, "learning_rate": 7.528933687884533e-05, "loss": 0.9183, "step": 2652 }, { "epoch": 1.2026291931097008, "grad_norm": 2.2101558736675804, "learning_rate": 7.528337520302999e-05, "loss": 0.8964, "step": 2653 }, { "epoch": 1.203082502266546, "grad_norm": 1.1581750053482687, "learning_rate": 7.527740999348093e-05, "loss": 0.9197, "step": 2654 }, { "epoch": 1.2035358114233907, "grad_norm": 2.564208346445842, "learning_rate": 7.527144125079558e-05, "loss": 0.9079, "step": 2655 }, { "epoch": 1.2039891205802358, "grad_norm": 2.1193363294595513, "learning_rate": 7.526546897557173e-05, "loss": 0.9098, "step": 2656 }, { "epoch": 1.2044424297370806, "grad_norm": 2.110680313267792, "learning_rate": 7.525949316840753e-05, "loss": 0.901, "step": 2657 }, { "epoch": 1.2048957388939256, "grad_norm": 1.861241694086222, "learning_rate": 7.525351382990144e-05, "loss": 0.9169, "step": 2658 }, { "epoch": 1.2053490480507707, "grad_norm": 2.043669767995286, "learning_rate": 7.524753096065235e-05, "loss": 0.902, "step": 2659 }, { "epoch": 1.2058023572076155, "grad_norm": 1.758584558250955, "learning_rate": 7.524154456125943e-05, "loss": 0.9125, "step": 2660 }, { "epoch": 1.2062556663644606, "grad_norm": 1.8708228663301656, "learning_rate": 7.523555463232227e-05, "loss": 0.9066, "step": 2661 }, { "epoch": 1.2067089755213056, "grad_norm": 1.623507384699055, "learning_rate": 7.522956117444073e-05, "loss": 0.9078, "step": 2662 }, { "epoch": 1.2071622846781505, "grad_norm": 1.7582875454261735, "learning_rate": 7.522356418821512e-05, "loss": 0.9104, "step": 2663 }, { "epoch": 1.2076155938349955, "grad_norm": 1.5142523014521052, "learning_rate": 7.521756367424603e-05, "loss": 0.9141, "step": 2664 }, { "epoch": 1.2080689029918403, "grad_norm": 1.7421184987618896, "learning_rate": 7.521155963313444e-05, "loss": 0.8948, "step": 2665 }, { "epoch": 1.2085222121486854, "grad_norm": 1.3882994635428016, "learning_rate": 7.520555206548166e-05, "loss": 0.8936, "step": 2666 }, { "epoch": 1.2089755213055304, "grad_norm": 1.3250084633631882, "learning_rate": 7.519954097188939e-05, "loss": 0.919, "step": 2667 }, { "epoch": 1.2094288304623753, "grad_norm": 1.5596474234799707, "learning_rate": 7.519352635295963e-05, "loss": 0.9017, "step": 2668 }, { "epoch": 1.2098821396192203, "grad_norm": 1.015325568934714, "learning_rate": 7.518750820929477e-05, "loss": 0.8914, "step": 2669 }, { "epoch": 1.2103354487760654, "grad_norm": 1.4806305040229892, "learning_rate": 7.518148654149756e-05, "loss": 0.9034, "step": 2670 }, { "epoch": 1.2107887579329102, "grad_norm": 1.4584846208882558, "learning_rate": 7.517546135017106e-05, "loss": 0.9031, "step": 2671 }, { "epoch": 1.2112420670897552, "grad_norm": 0.9064330457912367, "learning_rate": 7.516943263591873e-05, "loss": 0.9017, "step": 2672 }, { "epoch": 1.2116953762466003, "grad_norm": 2.1037956380914293, "learning_rate": 7.516340039934438e-05, "loss": 0.8673, "step": 2673 }, { "epoch": 1.2121486854034451, "grad_norm": 1.3498910115644396, "learning_rate": 7.515736464105212e-05, "loss": 0.8995, "step": 2674 }, { "epoch": 1.2126019945602902, "grad_norm": 2.6960629357798864, "learning_rate": 7.515132536164646e-05, "loss": 0.8998, "step": 2675 }, { "epoch": 1.213055303717135, "grad_norm": 2.4719238049118837, "learning_rate": 7.514528256173227e-05, "loss": 0.9281, "step": 2676 }, { "epoch": 1.21350861287398, "grad_norm": 1.7857512022848434, "learning_rate": 7.513923624191474e-05, "loss": 0.8952, "step": 2677 }, { "epoch": 1.213961922030825, "grad_norm": 1.559938035256402, "learning_rate": 7.513318640279943e-05, "loss": 0.9039, "step": 2678 }, { "epoch": 1.21441523118767, "grad_norm": 1.8757903349320926, "learning_rate": 7.512713304499225e-05, "loss": 0.914, "step": 2679 }, { "epoch": 1.214868540344515, "grad_norm": 1.368938234929664, "learning_rate": 7.512107616909944e-05, "loss": 0.9113, "step": 2680 }, { "epoch": 1.21532184950136, "grad_norm": 2.2012963769539606, "learning_rate": 7.511501577572765e-05, "loss": 0.902, "step": 2681 }, { "epoch": 1.2157751586582048, "grad_norm": 1.8316947084964343, "learning_rate": 7.510895186548383e-05, "loss": 0.9144, "step": 2682 }, { "epoch": 1.21622846781505, "grad_norm": 1.9830067262733106, "learning_rate": 7.510288443897528e-05, "loss": 0.8921, "step": 2683 }, { "epoch": 1.2166817769718947, "grad_norm": 1.7665818930151558, "learning_rate": 7.509681349680971e-05, "loss": 0.9314, "step": 2684 }, { "epoch": 1.2171350861287398, "grad_norm": 2.015599181884209, "learning_rate": 7.50907390395951e-05, "loss": 0.8986, "step": 2685 }, { "epoch": 1.2175883952855848, "grad_norm": 1.7375462144822016, "learning_rate": 7.508466106793987e-05, "loss": 0.8991, "step": 2686 }, { "epoch": 1.2180417044424297, "grad_norm": 1.945211335903417, "learning_rate": 7.50785795824527e-05, "loss": 0.9024, "step": 2687 }, { "epoch": 1.2184950135992747, "grad_norm": 1.6347855504544733, "learning_rate": 7.507249458374271e-05, "loss": 0.9012, "step": 2688 }, { "epoch": 1.2189483227561198, "grad_norm": 2.1300802502252267, "learning_rate": 7.506640607241929e-05, "loss": 0.9136, "step": 2689 }, { "epoch": 1.2194016319129646, "grad_norm": 1.7909918714733484, "learning_rate": 7.506031404909225e-05, "loss": 0.8872, "step": 2690 }, { "epoch": 1.2198549410698096, "grad_norm": 2.03219494262841, "learning_rate": 7.505421851437172e-05, "loss": 0.9305, "step": 2691 }, { "epoch": 1.2203082502266547, "grad_norm": 1.7963836740328543, "learning_rate": 7.504811946886819e-05, "loss": 0.9068, "step": 2692 }, { "epoch": 1.2207615593834995, "grad_norm": 1.9178597756301416, "learning_rate": 7.504201691319248e-05, "loss": 0.9008, "step": 2693 }, { "epoch": 1.2212148685403446, "grad_norm": 1.742025602530271, "learning_rate": 7.503591084795579e-05, "loss": 0.909, "step": 2694 }, { "epoch": 1.2216681776971896, "grad_norm": 1.8654612626709648, "learning_rate": 7.502980127376967e-05, "loss": 0.9031, "step": 2695 }, { "epoch": 1.2221214868540344, "grad_norm": 1.597133069533321, "learning_rate": 7.5023688191246e-05, "loss": 0.885, "step": 2696 }, { "epoch": 1.2225747960108795, "grad_norm": 2.1026772602110246, "learning_rate": 7.501757160099702e-05, "loss": 0.8845, "step": 2697 }, { "epoch": 1.2230281051677243, "grad_norm": 1.8708771698199107, "learning_rate": 7.501145150363533e-05, "loss": 0.8904, "step": 2698 }, { "epoch": 1.2234814143245694, "grad_norm": 1.8735471218277764, "learning_rate": 7.500532789977387e-05, "loss": 0.9136, "step": 2699 }, { "epoch": 1.2239347234814144, "grad_norm": 1.809683873043487, "learning_rate": 7.499920079002595e-05, "loss": 0.9207, "step": 2700 }, { "epoch": 1.2243880326382592, "grad_norm": 1.7742185709976002, "learning_rate": 7.49930701750052e-05, "loss": 0.9259, "step": 2701 }, { "epoch": 1.2248413417951043, "grad_norm": 1.4659117821896706, "learning_rate": 7.498693605532565e-05, "loss": 0.9013, "step": 2702 }, { "epoch": 1.2252946509519491, "grad_norm": 2.0729186372296056, "learning_rate": 7.498079843160161e-05, "loss": 0.9035, "step": 2703 }, { "epoch": 1.2257479601087942, "grad_norm": 1.8879889127138745, "learning_rate": 7.497465730444781e-05, "loss": 0.9067, "step": 2704 }, { "epoch": 1.2262012692656392, "grad_norm": 1.813488353166049, "learning_rate": 7.496851267447929e-05, "loss": 0.8882, "step": 2705 }, { "epoch": 1.226654578422484, "grad_norm": 1.5838815716262524, "learning_rate": 7.496236454231145e-05, "loss": 0.9, "step": 2706 }, { "epoch": 1.227107887579329, "grad_norm": 1.90146878547097, "learning_rate": 7.495621290856006e-05, "loss": 0.8831, "step": 2707 }, { "epoch": 1.2275611967361741, "grad_norm": 1.6420619755310977, "learning_rate": 7.49500577738412e-05, "loss": 0.8989, "step": 2708 }, { "epoch": 1.228014505893019, "grad_norm": 2.082441781138093, "learning_rate": 7.494389913877135e-05, "loss": 0.9048, "step": 2709 }, { "epoch": 1.228467815049864, "grad_norm": 1.9026520482755052, "learning_rate": 7.493773700396729e-05, "loss": 0.8861, "step": 2710 }, { "epoch": 1.228921124206709, "grad_norm": 1.5772012820829349, "learning_rate": 7.49315713700462e-05, "loss": 0.8958, "step": 2711 }, { "epoch": 1.229374433363554, "grad_norm": 1.3780131485779892, "learning_rate": 7.492540223762558e-05, "loss": 0.9041, "step": 2712 }, { "epoch": 1.229827742520399, "grad_norm": 2.1536790237191137, "learning_rate": 7.491922960732327e-05, "loss": 0.8976, "step": 2713 }, { "epoch": 1.230281051677244, "grad_norm": 1.9009457784483477, "learning_rate": 7.49130534797575e-05, "loss": 0.9116, "step": 2714 }, { "epoch": 1.2307343608340888, "grad_norm": 1.7520131747757777, "learning_rate": 7.490687385554679e-05, "loss": 0.9087, "step": 2715 }, { "epoch": 1.2311876699909339, "grad_norm": 1.5823769286929505, "learning_rate": 7.49006907353101e-05, "loss": 0.8909, "step": 2716 }, { "epoch": 1.2316409791477787, "grad_norm": 1.8905399898665851, "learning_rate": 7.489450411966664e-05, "loss": 0.903, "step": 2717 }, { "epoch": 1.2320942883046238, "grad_norm": 1.6931906725993902, "learning_rate": 7.488831400923606e-05, "loss": 0.8816, "step": 2718 }, { "epoch": 1.2325475974614688, "grad_norm": 1.9553616212446379, "learning_rate": 7.488212040463829e-05, "loss": 0.9182, "step": 2719 }, { "epoch": 1.2330009066183136, "grad_norm": 1.7159734030777902, "learning_rate": 7.487592330649364e-05, "loss": 0.8935, "step": 2720 }, { "epoch": 1.2334542157751587, "grad_norm": 1.8015132653417187, "learning_rate": 7.486972271542279e-05, "loss": 0.9022, "step": 2721 }, { "epoch": 1.2339075249320035, "grad_norm": 1.7243408159729097, "learning_rate": 7.486351863204671e-05, "loss": 0.9232, "step": 2722 }, { "epoch": 1.2343608340888486, "grad_norm": 1.8261865516342242, "learning_rate": 7.485731105698679e-05, "loss": 0.8873, "step": 2723 }, { "epoch": 1.2348141432456936, "grad_norm": 1.5991734998721627, "learning_rate": 7.485109999086471e-05, "loss": 0.9246, "step": 2724 }, { "epoch": 1.2352674524025384, "grad_norm": 1.8891654690467674, "learning_rate": 7.484488543430256e-05, "loss": 0.9029, "step": 2725 }, { "epoch": 1.2357207615593835, "grad_norm": 1.683855970941086, "learning_rate": 7.483866738792271e-05, "loss": 0.8914, "step": 2726 }, { "epoch": 1.2361740707162285, "grad_norm": 1.871828144972644, "learning_rate": 7.483244585234794e-05, "loss": 0.8963, "step": 2727 }, { "epoch": 1.2366273798730734, "grad_norm": 1.6746149335558325, "learning_rate": 7.482622082820135e-05, "loss": 0.8954, "step": 2728 }, { "epoch": 1.2370806890299184, "grad_norm": 1.7884878135942115, "learning_rate": 7.481999231610638e-05, "loss": 0.915, "step": 2729 }, { "epoch": 1.2375339981867635, "grad_norm": 1.6936682925557023, "learning_rate": 7.481376031668685e-05, "loss": 0.9166, "step": 2730 }, { "epoch": 1.2379873073436083, "grad_norm": 1.6727218200667755, "learning_rate": 7.480752483056691e-05, "loss": 0.8813, "step": 2731 }, { "epoch": 1.2384406165004533, "grad_norm": 1.4037228894457867, "learning_rate": 7.480128585837106e-05, "loss": 0.8989, "step": 2732 }, { "epoch": 1.2388939256572984, "grad_norm": 2.0601891950161106, "learning_rate": 7.479504340072415e-05, "loss": 0.9031, "step": 2733 }, { "epoch": 1.2393472348141432, "grad_norm": 1.8747959624147512, "learning_rate": 7.478879745825139e-05, "loss": 0.8839, "step": 2734 }, { "epoch": 1.2398005439709883, "grad_norm": 1.5659408410021274, "learning_rate": 7.478254803157832e-05, "loss": 0.9104, "step": 2735 }, { "epoch": 1.240253853127833, "grad_norm": 1.3583009423552534, "learning_rate": 7.477629512133083e-05, "loss": 0.9003, "step": 2736 }, { "epoch": 1.2407071622846781, "grad_norm": 2.008508425319694, "learning_rate": 7.477003872813519e-05, "loss": 0.8869, "step": 2737 }, { "epoch": 1.2411604714415232, "grad_norm": 1.7722915531831243, "learning_rate": 7.476377885261798e-05, "loss": 0.932, "step": 2738 }, { "epoch": 1.241613780598368, "grad_norm": 1.7252519321152524, "learning_rate": 7.475751549540616e-05, "loss": 0.8839, "step": 2739 }, { "epoch": 1.242067089755213, "grad_norm": 1.5274243511320271, "learning_rate": 7.4751248657127e-05, "loss": 0.8876, "step": 2740 }, { "epoch": 1.242520398912058, "grad_norm": 1.8420161707654008, "learning_rate": 7.474497833840816e-05, "loss": 0.9169, "step": 2741 }, { "epoch": 1.242973708068903, "grad_norm": 1.617215497924602, "learning_rate": 7.473870453987761e-05, "loss": 0.9132, "step": 2742 }, { "epoch": 1.243427017225748, "grad_norm": 1.6947892568151723, "learning_rate": 7.473242726216372e-05, "loss": 0.8973, "step": 2743 }, { "epoch": 1.2438803263825928, "grad_norm": 1.4996261694741249, "learning_rate": 7.472614650589517e-05, "loss": 0.9113, "step": 2744 }, { "epoch": 1.2443336355394379, "grad_norm": 1.9355126803283957, "learning_rate": 7.471986227170098e-05, "loss": 0.9103, "step": 2745 }, { "epoch": 1.244786944696283, "grad_norm": 1.7400198163351488, "learning_rate": 7.471357456021054e-05, "loss": 0.9016, "step": 2746 }, { "epoch": 1.2452402538531278, "grad_norm": 1.588577524635543, "learning_rate": 7.47072833720536e-05, "loss": 0.9188, "step": 2747 }, { "epoch": 1.2456935630099728, "grad_norm": 1.3395797782395478, "learning_rate": 7.470098870786021e-05, "loss": 0.9071, "step": 2748 }, { "epoch": 1.2461468721668179, "grad_norm": 2.0503788828560134, "learning_rate": 7.469469056826082e-05, "loss": 0.9158, "step": 2749 }, { "epoch": 1.2466001813236627, "grad_norm": 1.8284025029872917, "learning_rate": 7.468838895388622e-05, "loss": 0.9097, "step": 2750 }, { "epoch": 1.2470534904805077, "grad_norm": 1.5599825751058434, "learning_rate": 7.46820838653675e-05, "loss": 0.8955, "step": 2751 }, { "epoch": 1.2475067996373528, "grad_norm": 1.398240013161401, "learning_rate": 7.467577530333617e-05, "loss": 0.9033, "step": 2752 }, { "epoch": 1.2479601087941976, "grad_norm": 1.9367357360035298, "learning_rate": 7.466946326842402e-05, "loss": 0.9004, "step": 2753 }, { "epoch": 1.2484134179510427, "grad_norm": 1.6601739193168126, "learning_rate": 7.466314776126323e-05, "loss": 0.8973, "step": 2754 }, { "epoch": 1.2488667271078875, "grad_norm": 1.6498426152877235, "learning_rate": 7.465682878248632e-05, "loss": 0.8768, "step": 2755 }, { "epoch": 1.2493200362647325, "grad_norm": 1.4524108849897486, "learning_rate": 7.465050633272617e-05, "loss": 0.8952, "step": 2756 }, { "epoch": 1.2497733454215776, "grad_norm": 1.6907219219133094, "learning_rate": 7.464418041261594e-05, "loss": 0.906, "step": 2757 }, { "epoch": 1.2502266545784224, "grad_norm": 1.3537745282104527, "learning_rate": 7.463785102278925e-05, "loss": 0.8807, "step": 2758 }, { "epoch": 1.2506799637352675, "grad_norm": 1.9229887472129625, "learning_rate": 7.463151816387998e-05, "loss": 0.8928, "step": 2759 }, { "epoch": 1.2511332728921123, "grad_norm": 1.7400181994496158, "learning_rate": 7.462518183652239e-05, "loss": 0.9143, "step": 2760 }, { "epoch": 1.2515865820489573, "grad_norm": 1.46511799618278, "learning_rate": 7.461884204135107e-05, "loss": 0.9046, "step": 2761 }, { "epoch": 1.2520398912058024, "grad_norm": 1.254218207830477, "learning_rate": 7.461249877900097e-05, "loss": 0.9005, "step": 2762 }, { "epoch": 1.2524932003626472, "grad_norm": 1.7215257944744704, "learning_rate": 7.46061520501074e-05, "loss": 0.8947, "step": 2763 }, { "epoch": 1.2529465095194923, "grad_norm": 1.2961843150936996, "learning_rate": 7.4599801855306e-05, "loss": 0.9059, "step": 2764 }, { "epoch": 1.2533998186763373, "grad_norm": 1.875096489384654, "learning_rate": 7.459344819523274e-05, "loss": 0.9015, "step": 2765 }, { "epoch": 1.2538531278331821, "grad_norm": 1.662992670855282, "learning_rate": 7.4587091070524e-05, "loss": 0.9174, "step": 2766 }, { "epoch": 1.2543064369900272, "grad_norm": 1.4680303606350409, "learning_rate": 7.458073048181643e-05, "loss": 0.8962, "step": 2767 }, { "epoch": 1.2547597461468722, "grad_norm": 1.2907941800360798, "learning_rate": 7.457436642974707e-05, "loss": 0.9126, "step": 2768 }, { "epoch": 1.255213055303717, "grad_norm": 1.3213262720109202, "learning_rate": 7.45679989149533e-05, "loss": 0.9028, "step": 2769 }, { "epoch": 1.2556663644605621, "grad_norm": 1.041684042127033, "learning_rate": 7.456162793807284e-05, "loss": 0.9265, "step": 2770 }, { "epoch": 1.2561196736174072, "grad_norm": 1.3429189637957157, "learning_rate": 7.455525349974377e-05, "loss": 0.8939, "step": 2771 }, { "epoch": 1.256572982774252, "grad_norm": 0.8713616023441197, "learning_rate": 7.454887560060452e-05, "loss": 0.9153, "step": 2772 }, { "epoch": 1.257026291931097, "grad_norm": 1.594961744647598, "learning_rate": 7.454249424129383e-05, "loss": 0.8986, "step": 2773 }, { "epoch": 1.257479601087942, "grad_norm": 1.2351292255340587, "learning_rate": 7.453610942245082e-05, "loss": 0.9138, "step": 2774 }, { "epoch": 1.257932910244787, "grad_norm": 1.48091807514733, "learning_rate": 7.452972114471495e-05, "loss": 0.9075, "step": 2775 }, { "epoch": 1.258386219401632, "grad_norm": 1.4994931745104434, "learning_rate": 7.452332940872604e-05, "loss": 0.9066, "step": 2776 }, { "epoch": 1.2588395285584768, "grad_norm": 0.972044662773447, "learning_rate": 7.451693421512421e-05, "loss": 0.8761, "step": 2777 }, { "epoch": 1.2592928377153219, "grad_norm": 1.2024799628999079, "learning_rate": 7.451053556454999e-05, "loss": 0.8998, "step": 2778 }, { "epoch": 1.2597461468721667, "grad_norm": 1.0869982532299058, "learning_rate": 7.450413345764419e-05, "loss": 0.9263, "step": 2779 }, { "epoch": 1.2601994560290117, "grad_norm": 0.9441442222229482, "learning_rate": 7.449772789504803e-05, "loss": 0.9256, "step": 2780 }, { "epoch": 1.2606527651858568, "grad_norm": 0.9780116663416278, "learning_rate": 7.449131887740304e-05, "loss": 0.9187, "step": 2781 }, { "epoch": 1.2611060743427016, "grad_norm": 1.0358536243900056, "learning_rate": 7.448490640535109e-05, "loss": 0.9036, "step": 2782 }, { "epoch": 1.2615593834995467, "grad_norm": 0.9456460198915644, "learning_rate": 7.44784904795344e-05, "loss": 0.9135, "step": 2783 }, { "epoch": 1.2620126926563917, "grad_norm": 0.8749309871844312, "learning_rate": 7.447207110059559e-05, "loss": 0.9035, "step": 2784 }, { "epoch": 1.2624660018132365, "grad_norm": 0.9901683114187291, "learning_rate": 7.446564826917753e-05, "loss": 0.9215, "step": 2785 }, { "epoch": 1.2629193109700816, "grad_norm": 1.1614513822593815, "learning_rate": 7.44592219859235e-05, "loss": 0.9022, "step": 2786 }, { "epoch": 1.2633726201269266, "grad_norm": 0.649150738574164, "learning_rate": 7.445279225147712e-05, "loss": 0.908, "step": 2787 }, { "epoch": 1.2638259292837715, "grad_norm": 0.9139957395998561, "learning_rate": 7.444635906648234e-05, "loss": 0.9053, "step": 2788 }, { "epoch": 1.2642792384406165, "grad_norm": 1.0251810874947496, "learning_rate": 7.443992243158347e-05, "loss": 0.9058, "step": 2789 }, { "epoch": 1.2647325475974616, "grad_norm": 0.7458403985488183, "learning_rate": 7.443348234742513e-05, "loss": 0.9169, "step": 2790 }, { "epoch": 1.2651858567543064, "grad_norm": 0.8833017746077881, "learning_rate": 7.442703881465235e-05, "loss": 0.9184, "step": 2791 }, { "epoch": 1.2656391659111514, "grad_norm": 0.9033171015286059, "learning_rate": 7.442059183391046e-05, "loss": 0.894, "step": 2792 }, { "epoch": 1.2660924750679965, "grad_norm": 0.7257536767082956, "learning_rate": 7.441414140584513e-05, "loss": 0.8793, "step": 2793 }, { "epoch": 1.2665457842248413, "grad_norm": 0.6961662635470214, "learning_rate": 7.44076875311024e-05, "loss": 0.9084, "step": 2794 }, { "epoch": 1.2669990933816864, "grad_norm": 0.8392778304244884, "learning_rate": 7.440123021032863e-05, "loss": 0.8897, "step": 2795 }, { "epoch": 1.2674524025385312, "grad_norm": 0.8951828745794115, "learning_rate": 7.439476944417056e-05, "loss": 0.92, "step": 2796 }, { "epoch": 1.2679057116953762, "grad_norm": 0.9013212835822012, "learning_rate": 7.438830523327525e-05, "loss": 0.8934, "step": 2797 }, { "epoch": 1.2683590208522213, "grad_norm": 0.888067211175819, "learning_rate": 7.43818375782901e-05, "loss": 0.8829, "step": 2798 }, { "epoch": 1.2688123300090661, "grad_norm": 1.0110713461352794, "learning_rate": 7.437536647986287e-05, "loss": 0.9138, "step": 2799 }, { "epoch": 1.2692656391659112, "grad_norm": 1.3409204134814257, "learning_rate": 7.436889193864166e-05, "loss": 0.8924, "step": 2800 }, { "epoch": 1.269718948322756, "grad_norm": 0.4980900470681856, "learning_rate": 7.436241395527492e-05, "loss": 0.9266, "step": 2801 }, { "epoch": 1.270172257479601, "grad_norm": 0.8777564080683881, "learning_rate": 7.435593253041143e-05, "loss": 0.8841, "step": 2802 }, { "epoch": 1.270625566636446, "grad_norm": 1.37943870671828, "learning_rate": 7.434944766470032e-05, "loss": 0.9053, "step": 2803 }, { "epoch": 1.271078875793291, "grad_norm": 0.7554932691753187, "learning_rate": 7.434295935879107e-05, "loss": 0.8878, "step": 2804 }, { "epoch": 1.271532184950136, "grad_norm": 0.7929751180988278, "learning_rate": 7.433646761333353e-05, "loss": 0.9014, "step": 2805 }, { "epoch": 1.271985494106981, "grad_norm": 0.7004902441471335, "learning_rate": 7.432997242897782e-05, "loss": 0.8936, "step": 2806 }, { "epoch": 1.2724388032638259, "grad_norm": 0.7810761362172799, "learning_rate": 7.43234738063745e-05, "loss": 0.9112, "step": 2807 }, { "epoch": 1.272892112420671, "grad_norm": 1.0758208005212655, "learning_rate": 7.43169717461744e-05, "loss": 0.9044, "step": 2808 }, { "epoch": 1.273345421577516, "grad_norm": 0.9937596121398983, "learning_rate": 7.431046624902872e-05, "loss": 0.8982, "step": 2809 }, { "epoch": 1.2737987307343608, "grad_norm": 0.9949584909821253, "learning_rate": 7.430395731558901e-05, "loss": 0.909, "step": 2810 }, { "epoch": 1.2742520398912058, "grad_norm": 0.9974708349463909, "learning_rate": 7.429744494650715e-05, "loss": 0.9124, "step": 2811 }, { "epoch": 1.2747053490480509, "grad_norm": 1.0294334639365597, "learning_rate": 7.429092914243538e-05, "loss": 0.9126, "step": 2812 }, { "epoch": 1.2751586582048957, "grad_norm": 0.9801839918991749, "learning_rate": 7.428440990402629e-05, "loss": 0.9091, "step": 2813 }, { "epoch": 1.2756119673617408, "grad_norm": 0.7966394929115791, "learning_rate": 7.427788723193277e-05, "loss": 0.8904, "step": 2814 }, { "epoch": 1.2760652765185858, "grad_norm": 0.6092948883307407, "learning_rate": 7.427136112680813e-05, "loss": 0.8933, "step": 2815 }, { "epoch": 1.2765185856754306, "grad_norm": 0.49800574782716495, "learning_rate": 7.426483158930593e-05, "loss": 0.9117, "step": 2816 }, { "epoch": 1.2769718948322757, "grad_norm": 0.46278273407850185, "learning_rate": 7.425829862008014e-05, "loss": 0.8868, "step": 2817 }, { "epoch": 1.2774252039891205, "grad_norm": 0.48655073469382504, "learning_rate": 7.425176221978507e-05, "loss": 0.8857, "step": 2818 }, { "epoch": 1.2778785131459656, "grad_norm": 0.7482665169370137, "learning_rate": 7.424522238907534e-05, "loss": 0.9163, "step": 2819 }, { "epoch": 1.2783318223028104, "grad_norm": 1.0582179142978834, "learning_rate": 7.423867912860596e-05, "loss": 0.9086, "step": 2820 }, { "epoch": 1.2787851314596554, "grad_norm": 1.086794828597155, "learning_rate": 7.423213243903222e-05, "loss": 0.9103, "step": 2821 }, { "epoch": 1.2792384406165005, "grad_norm": 0.7942115464105454, "learning_rate": 7.422558232100983e-05, "loss": 0.8933, "step": 2822 }, { "epoch": 1.2796917497733453, "grad_norm": 0.985008591443496, "learning_rate": 7.421902877519477e-05, "loss": 0.9285, "step": 2823 }, { "epoch": 1.2801450589301904, "grad_norm": 0.5046315051217325, "learning_rate": 7.42124718022434e-05, "loss": 0.928, "step": 2824 }, { "epoch": 1.2805983680870354, "grad_norm": 0.5535682697353814, "learning_rate": 7.420591140281246e-05, "loss": 0.8883, "step": 2825 }, { "epoch": 1.2810516772438802, "grad_norm": 0.9130320151061047, "learning_rate": 7.419934757755895e-05, "loss": 0.899, "step": 2826 }, { "epoch": 1.2815049864007253, "grad_norm": 1.1893950467824734, "learning_rate": 7.419278032714026e-05, "loss": 0.929, "step": 2827 }, { "epoch": 1.2819582955575703, "grad_norm": 1.0227651199238244, "learning_rate": 7.418620965221414e-05, "loss": 0.9048, "step": 2828 }, { "epoch": 1.2824116047144152, "grad_norm": 0.85244263384307, "learning_rate": 7.417963555343865e-05, "loss": 0.9128, "step": 2829 }, { "epoch": 1.2828649138712602, "grad_norm": 0.7706451942033589, "learning_rate": 7.417305803147222e-05, "loss": 0.9087, "step": 2830 }, { "epoch": 1.2833182230281053, "grad_norm": 0.7580065360100893, "learning_rate": 7.416647708697357e-05, "loss": 0.9055, "step": 2831 }, { "epoch": 1.28377153218495, "grad_norm": 0.7352430694402456, "learning_rate": 7.415989272060183e-05, "loss": 0.889, "step": 2832 }, { "epoch": 1.2842248413417952, "grad_norm": 0.5719524114791386, "learning_rate": 7.415330493301645e-05, "loss": 0.8995, "step": 2833 }, { "epoch": 1.2846781504986402, "grad_norm": 0.5182972581525928, "learning_rate": 7.414671372487722e-05, "loss": 0.907, "step": 2834 }, { "epoch": 1.285131459655485, "grad_norm": 1.273065220770592, "learning_rate": 7.414011909684424e-05, "loss": 0.8926, "step": 2835 }, { "epoch": 1.28558476881233, "grad_norm": 0.5046523177249785, "learning_rate": 7.4133521049578e-05, "loss": 0.8979, "step": 2836 }, { "epoch": 1.286038077969175, "grad_norm": 0.6296165716369754, "learning_rate": 7.412691958373929e-05, "loss": 0.9351, "step": 2837 }, { "epoch": 1.28649138712602, "grad_norm": 0.6308121397511393, "learning_rate": 7.41203146999893e-05, "loss": 0.9162, "step": 2838 }, { "epoch": 1.2869446962828648, "grad_norm": 0.7474328349549887, "learning_rate": 7.411370639898951e-05, "loss": 0.8819, "step": 2839 }, { "epoch": 1.2873980054397098, "grad_norm": 0.9419680915471272, "learning_rate": 7.410709468140178e-05, "loss": 0.9161, "step": 2840 }, { "epoch": 1.2878513145965549, "grad_norm": 0.9999596514780268, "learning_rate": 7.410047954788828e-05, "loss": 0.9009, "step": 2841 }, { "epoch": 1.2883046237533997, "grad_norm": 1.1302892670679667, "learning_rate": 7.409386099911152e-05, "loss": 0.9063, "step": 2842 }, { "epoch": 1.2887579329102448, "grad_norm": 0.9700571083557177, "learning_rate": 7.408723903573439e-05, "loss": 0.9186, "step": 2843 }, { "epoch": 1.2892112420670898, "grad_norm": 1.002958079331834, "learning_rate": 7.40806136584201e-05, "loss": 0.9039, "step": 2844 }, { "epoch": 1.2896645512239346, "grad_norm": 1.0920836187039127, "learning_rate": 7.407398486783218e-05, "loss": 0.9074, "step": 2845 }, { "epoch": 1.2901178603807797, "grad_norm": 0.9394651351415685, "learning_rate": 7.406735266463452e-05, "loss": 0.8932, "step": 2846 }, { "epoch": 1.2905711695376247, "grad_norm": 0.8839424163854838, "learning_rate": 7.406071704949139e-05, "loss": 0.9, "step": 2847 }, { "epoch": 1.2910244786944696, "grad_norm": 1.091342857410111, "learning_rate": 7.405407802306733e-05, "loss": 0.9039, "step": 2848 }, { "epoch": 1.2914777878513146, "grad_norm": 1.0000072591616014, "learning_rate": 7.404743558602728e-05, "loss": 0.8706, "step": 2849 }, { "epoch": 1.2919310970081597, "grad_norm": 0.9343521101546638, "learning_rate": 7.40407897390365e-05, "loss": 0.8788, "step": 2850 }, { "epoch": 1.2923844061650045, "grad_norm": 0.8964729551799999, "learning_rate": 7.403414048276056e-05, "loss": 0.891, "step": 2851 }, { "epoch": 1.2928377153218495, "grad_norm": 0.8127383621882387, "learning_rate": 7.402748781786545e-05, "loss": 0.9078, "step": 2852 }, { "epoch": 1.2932910244786946, "grad_norm": 0.7593207431366779, "learning_rate": 7.402083174501742e-05, "loss": 0.9076, "step": 2853 }, { "epoch": 1.2937443336355394, "grad_norm": 0.782742940584997, "learning_rate": 7.40141722648831e-05, "loss": 0.8877, "step": 2854 }, { "epoch": 1.2941976427923845, "grad_norm": 0.7358200835686886, "learning_rate": 7.400750937812948e-05, "loss": 0.9092, "step": 2855 }, { "epoch": 1.2946509519492293, "grad_norm": 0.7007752785379677, "learning_rate": 7.400084308542383e-05, "loss": 0.9081, "step": 2856 }, { "epoch": 1.2951042611060744, "grad_norm": 0.658513761499961, "learning_rate": 7.399417338743382e-05, "loss": 0.9165, "step": 2857 }, { "epoch": 1.2955575702629192, "grad_norm": 0.7572329964912522, "learning_rate": 7.398750028482745e-05, "loss": 0.8997, "step": 2858 }, { "epoch": 1.2960108794197642, "grad_norm": 0.9642529612663617, "learning_rate": 7.398082377827303e-05, "loss": 0.8911, "step": 2859 }, { "epoch": 1.2964641885766093, "grad_norm": 1.2778413155741908, "learning_rate": 7.397414386843925e-05, "loss": 0.9197, "step": 2860 }, { "epoch": 1.296917497733454, "grad_norm": 0.660699099373726, "learning_rate": 7.39674605559951e-05, "loss": 0.9008, "step": 2861 }, { "epoch": 1.2973708068902992, "grad_norm": 0.5226072065553381, "learning_rate": 7.396077384160996e-05, "loss": 0.8971, "step": 2862 }, { "epoch": 1.2978241160471442, "grad_norm": 0.8441193802928766, "learning_rate": 7.39540837259535e-05, "loss": 0.8973, "step": 2863 }, { "epoch": 1.298277425203989, "grad_norm": 1.3174905516901871, "learning_rate": 7.394739020969579e-05, "loss": 0.9117, "step": 2864 }, { "epoch": 1.298730734360834, "grad_norm": 0.7690746071873821, "learning_rate": 7.394069329350714e-05, "loss": 0.8953, "step": 2865 }, { "epoch": 1.2991840435176791, "grad_norm": 0.6780358081780408, "learning_rate": 7.393399297805834e-05, "loss": 0.9433, "step": 2866 }, { "epoch": 1.299637352674524, "grad_norm": 0.8345198250897263, "learning_rate": 7.39272892640204e-05, "loss": 0.9045, "step": 2867 }, { "epoch": 1.300090661831369, "grad_norm": 1.0435416767681633, "learning_rate": 7.392058215206474e-05, "loss": 0.8984, "step": 2868 }, { "epoch": 1.300543970988214, "grad_norm": 0.9190976974141719, "learning_rate": 7.391387164286307e-05, "loss": 0.9018, "step": 2869 }, { "epoch": 1.3009972801450589, "grad_norm": 0.9193921432049867, "learning_rate": 7.390715773708749e-05, "loss": 0.8909, "step": 2870 }, { "epoch": 1.301450589301904, "grad_norm": 1.2534185142125913, "learning_rate": 7.390044043541041e-05, "loss": 0.9154, "step": 2871 }, { "epoch": 1.301903898458749, "grad_norm": 0.8798656777462052, "learning_rate": 7.38937197385046e-05, "loss": 0.8975, "step": 2872 }, { "epoch": 1.3023572076155938, "grad_norm": 0.7407851546296778, "learning_rate": 7.388699564704313e-05, "loss": 0.8941, "step": 2873 }, { "epoch": 1.3028105167724389, "grad_norm": 0.6009156886899882, "learning_rate": 7.388026816169947e-05, "loss": 0.9057, "step": 2874 }, { "epoch": 1.3032638259292837, "grad_norm": 0.7959769362547559, "learning_rate": 7.387353728314738e-05, "loss": 0.8883, "step": 2875 }, { "epoch": 1.3037171350861287, "grad_norm": 1.205873502867238, "learning_rate": 7.386680301206097e-05, "loss": 0.8913, "step": 2876 }, { "epoch": 1.3041704442429736, "grad_norm": 0.9440603717321702, "learning_rate": 7.386006534911471e-05, "loss": 0.898, "step": 2877 }, { "epoch": 1.3046237533998186, "grad_norm": 0.8674253529802938, "learning_rate": 7.385332429498339e-05, "loss": 0.8946, "step": 2878 }, { "epoch": 1.3050770625566637, "grad_norm": 0.8661327326303528, "learning_rate": 7.384657985034215e-05, "loss": 0.8944, "step": 2879 }, { "epoch": 1.3055303717135085, "grad_norm": 0.8814598280571068, "learning_rate": 7.383983201586645e-05, "loss": 0.9042, "step": 2880 }, { "epoch": 1.3059836808703535, "grad_norm": 1.0659377707934474, "learning_rate": 7.383308079223215e-05, "loss": 0.92, "step": 2881 }, { "epoch": 1.3064369900271986, "grad_norm": 1.0347338328608489, "learning_rate": 7.382632618011536e-05, "loss": 0.9106, "step": 2882 }, { "epoch": 1.3068902991840434, "grad_norm": 0.8681158264095176, "learning_rate": 7.381956818019258e-05, "loss": 0.9102, "step": 2883 }, { "epoch": 1.3073436083408885, "grad_norm": 0.8611081800663157, "learning_rate": 7.381280679314067e-05, "loss": 0.9033, "step": 2884 }, { "epoch": 1.3077969174977335, "grad_norm": 1.0055356132593627, "learning_rate": 7.380604201963677e-05, "loss": 0.9098, "step": 2885 }, { "epoch": 1.3082502266545784, "grad_norm": 1.001293690927422, "learning_rate": 7.379927386035841e-05, "loss": 0.9017, "step": 2886 }, { "epoch": 1.3087035358114234, "grad_norm": 1.1414233429542675, "learning_rate": 7.379250231598346e-05, "loss": 0.8971, "step": 2887 }, { "epoch": 1.3091568449682685, "grad_norm": 0.9209685151217206, "learning_rate": 7.378572738719007e-05, "loss": 0.8903, "step": 2888 }, { "epoch": 1.3096101541251133, "grad_norm": 0.7673543994027892, "learning_rate": 7.377894907465679e-05, "loss": 0.8913, "step": 2889 }, { "epoch": 1.3100634632819583, "grad_norm": 0.6353892089702549, "learning_rate": 7.377216737906247e-05, "loss": 0.9096, "step": 2890 }, { "epoch": 1.3105167724388034, "grad_norm": 0.6513162745162514, "learning_rate": 7.376538230108636e-05, "loss": 0.8955, "step": 2891 }, { "epoch": 1.3109700815956482, "grad_norm": 0.6326693372597099, "learning_rate": 7.375859384140797e-05, "loss": 0.8794, "step": 2892 }, { "epoch": 1.3114233907524933, "grad_norm": 0.617010622185089, "learning_rate": 7.375180200070718e-05, "loss": 0.9218, "step": 2893 }, { "epoch": 1.3118766999093383, "grad_norm": 0.580381014100176, "learning_rate": 7.374500677966424e-05, "loss": 0.8859, "step": 2894 }, { "epoch": 1.3123300090661831, "grad_norm": 0.4847734638933181, "learning_rate": 7.373820817895968e-05, "loss": 0.8932, "step": 2895 }, { "epoch": 1.3127833182230282, "grad_norm": 0.42468212059722316, "learning_rate": 7.373140619927444e-05, "loss": 0.9063, "step": 2896 }, { "epoch": 1.313236627379873, "grad_norm": 0.5587875451031711, "learning_rate": 7.372460084128971e-05, "loss": 0.9085, "step": 2897 }, { "epoch": 1.313689936536718, "grad_norm": 0.5939978833879807, "learning_rate": 7.37177921056871e-05, "loss": 0.8956, "step": 2898 }, { "epoch": 1.314143245693563, "grad_norm": 0.5739417007163263, "learning_rate": 7.371097999314852e-05, "loss": 0.9078, "step": 2899 }, { "epoch": 1.314596554850408, "grad_norm": 0.5506685952125578, "learning_rate": 7.370416450435622e-05, "loss": 0.9044, "step": 2900 }, { "epoch": 1.315049864007253, "grad_norm": 0.645396556450811, "learning_rate": 7.369734563999278e-05, "loss": 0.8976, "step": 2901 }, { "epoch": 1.3155031731640978, "grad_norm": 0.8290957116952254, "learning_rate": 7.369052340074116e-05, "loss": 0.8871, "step": 2902 }, { "epoch": 1.3159564823209429, "grad_norm": 0.9759915683814087, "learning_rate": 7.368369778728458e-05, "loss": 0.9126, "step": 2903 }, { "epoch": 1.316409791477788, "grad_norm": 1.0707187667451525, "learning_rate": 7.367686880030668e-05, "loss": 0.8999, "step": 2904 }, { "epoch": 1.3168631006346327, "grad_norm": 0.8788747545805095, "learning_rate": 7.36700364404914e-05, "loss": 0.8895, "step": 2905 }, { "epoch": 1.3173164097914778, "grad_norm": 0.756677476564175, "learning_rate": 7.366320070852302e-05, "loss": 0.9217, "step": 2906 }, { "epoch": 1.3177697189483228, "grad_norm": 0.779676925501923, "learning_rate": 7.365636160508615e-05, "loss": 0.9156, "step": 2907 }, { "epoch": 1.3182230281051677, "grad_norm": 1.0372016233599939, "learning_rate": 7.364951913086575e-05, "loss": 0.9073, "step": 2908 }, { "epoch": 1.3186763372620127, "grad_norm": 1.3512113967219046, "learning_rate": 7.364267328654712e-05, "loss": 0.9087, "step": 2909 }, { "epoch": 1.3191296464188578, "grad_norm": 0.7633052012659666, "learning_rate": 7.363582407281588e-05, "loss": 0.8896, "step": 2910 }, { "epoch": 1.3195829555757026, "grad_norm": 0.6735579205731137, "learning_rate": 7.362897149035802e-05, "loss": 0.8872, "step": 2911 }, { "epoch": 1.3200362647325476, "grad_norm": 0.7329080712441788, "learning_rate": 7.36221155398598e-05, "loss": 0.9163, "step": 2912 }, { "epoch": 1.3204895738893927, "grad_norm": 1.0357556201658245, "learning_rate": 7.361525622200792e-05, "loss": 0.8956, "step": 2913 }, { "epoch": 1.3209428830462375, "grad_norm": 1.333439730341162, "learning_rate": 7.360839353748933e-05, "loss": 0.8958, "step": 2914 }, { "epoch": 1.3213961922030826, "grad_norm": 0.6937496447373178, "learning_rate": 7.360152748699136e-05, "loss": 0.9092, "step": 2915 }, { "epoch": 1.3218495013599274, "grad_norm": 0.9403622123520983, "learning_rate": 7.359465807120166e-05, "loss": 0.8837, "step": 2916 }, { "epoch": 1.3223028105167725, "grad_norm": 1.2413947723350593, "learning_rate": 7.358778529080821e-05, "loss": 0.8888, "step": 2917 }, { "epoch": 1.3227561196736173, "grad_norm": 0.9948171359956905, "learning_rate": 7.358090914649935e-05, "loss": 0.9021, "step": 2918 }, { "epoch": 1.3232094288304623, "grad_norm": 1.4461414384242657, "learning_rate": 7.357402963896375e-05, "loss": 0.8918, "step": 2919 }, { "epoch": 1.3236627379873074, "grad_norm": 0.5657998991292276, "learning_rate": 7.356714676889041e-05, "loss": 0.8926, "step": 2920 }, { "epoch": 1.3241160471441522, "grad_norm": 1.22923428244796, "learning_rate": 7.356026053696867e-05, "loss": 0.914, "step": 2921 }, { "epoch": 1.3245693563009973, "grad_norm": 1.1951682932728906, "learning_rate": 7.355337094388821e-05, "loss": 0.9115, "step": 2922 }, { "epoch": 1.3250226654578423, "grad_norm": 1.0103246487817912, "learning_rate": 7.354647799033903e-05, "loss": 0.9163, "step": 2923 }, { "epoch": 1.3254759746146871, "grad_norm": 0.9627538832481481, "learning_rate": 7.35395816770115e-05, "loss": 0.8995, "step": 2924 }, { "epoch": 1.3259292837715322, "grad_norm": 0.6121644218383729, "learning_rate": 7.353268200459628e-05, "loss": 0.8902, "step": 2925 }, { "epoch": 1.3263825929283772, "grad_norm": 0.9171020581164868, "learning_rate": 7.352577897378441e-05, "loss": 0.8763, "step": 2926 }, { "epoch": 1.326835902085222, "grad_norm": 1.0994945969434344, "learning_rate": 7.351887258526725e-05, "loss": 0.9279, "step": 2927 }, { "epoch": 1.3272892112420671, "grad_norm": 1.0649405331579191, "learning_rate": 7.351196283973648e-05, "loss": 0.9058, "step": 2928 }, { "epoch": 1.3277425203989122, "grad_norm": 1.3283440589525823, "learning_rate": 7.350504973788416e-05, "loss": 0.8955, "step": 2929 }, { "epoch": 1.328195829555757, "grad_norm": 0.7402710184124817, "learning_rate": 7.349813328040263e-05, "loss": 0.9027, "step": 2930 }, { "epoch": 1.328649138712602, "grad_norm": 1.1567175009256359, "learning_rate": 7.349121346798461e-05, "loss": 0.9392, "step": 2931 }, { "epoch": 1.329102447869447, "grad_norm": 0.9860586132818991, "learning_rate": 7.348429030132312e-05, "loss": 0.899, "step": 2932 }, { "epoch": 1.329555757026292, "grad_norm": 1.2578597978669448, "learning_rate": 7.347736378111155e-05, "loss": 0.8848, "step": 2933 }, { "epoch": 1.330009066183137, "grad_norm": 0.7984297582789955, "learning_rate": 7.347043390804361e-05, "loss": 0.9268, "step": 2934 }, { "epoch": 1.3304623753399818, "grad_norm": 0.9027271067210628, "learning_rate": 7.346350068281335e-05, "loss": 0.8966, "step": 2935 }, { "epoch": 1.3309156844968268, "grad_norm": 0.839167514927094, "learning_rate": 7.345656410611515e-05, "loss": 0.9119, "step": 2936 }, { "epoch": 1.3313689936536717, "grad_norm": 0.8865241614370813, "learning_rate": 7.344962417864372e-05, "loss": 0.9142, "step": 2937 }, { "epoch": 1.3318223028105167, "grad_norm": 1.0955811283782844, "learning_rate": 7.344268090109414e-05, "loss": 0.9087, "step": 2938 }, { "epoch": 1.3322756119673618, "grad_norm": 1.004664735230567, "learning_rate": 7.343573427416175e-05, "loss": 0.9139, "step": 2939 }, { "epoch": 1.3327289211242066, "grad_norm": 1.153070526870687, "learning_rate": 7.342878429854233e-05, "loss": 0.8863, "step": 2940 }, { "epoch": 1.3331822302810517, "grad_norm": 0.901121427913594, "learning_rate": 7.342183097493192e-05, "loss": 0.8898, "step": 2941 }, { "epoch": 1.3336355394378967, "grad_norm": 0.9130704665676795, "learning_rate": 7.34148743040269e-05, "loss": 0.9146, "step": 2942 }, { "epoch": 1.3340888485947415, "grad_norm": 0.7458073600295562, "learning_rate": 7.340791428652401e-05, "loss": 0.9141, "step": 2943 }, { "epoch": 1.3345421577515866, "grad_norm": 0.798978924186664, "learning_rate": 7.340095092312034e-05, "loss": 0.9141, "step": 2944 }, { "epoch": 1.3349954669084316, "grad_norm": 0.8397225009178588, "learning_rate": 7.339398421451325e-05, "loss": 0.9066, "step": 2945 }, { "epoch": 1.3354487760652765, "grad_norm": 0.7399518594223755, "learning_rate": 7.338701416140051e-05, "loss": 0.9093, "step": 2946 }, { "epoch": 1.3359020852221215, "grad_norm": 0.963820662402483, "learning_rate": 7.338004076448018e-05, "loss": 0.9105, "step": 2947 }, { "epoch": 1.3363553943789666, "grad_norm": 1.2464560978595205, "learning_rate": 7.337306402445066e-05, "loss": 0.9172, "step": 2948 }, { "epoch": 1.3368087035358114, "grad_norm": 0.8981420268898725, "learning_rate": 7.33660839420107e-05, "loss": 0.919, "step": 2949 }, { "epoch": 1.3372620126926564, "grad_norm": 0.8485164708576425, "learning_rate": 7.335910051785938e-05, "loss": 0.8889, "step": 2950 }, { "epoch": 1.3377153218495015, "grad_norm": 0.7660953604999216, "learning_rate": 7.335211375269609e-05, "loss": 0.9051, "step": 2951 }, { "epoch": 1.3381686310063463, "grad_norm": 0.8093275121607648, "learning_rate": 7.334512364722059e-05, "loss": 0.9165, "step": 2952 }, { "epoch": 1.3386219401631914, "grad_norm": 0.7854584158925971, "learning_rate": 7.333813020213295e-05, "loss": 0.8754, "step": 2953 }, { "epoch": 1.3390752493200362, "grad_norm": 0.7794041815683792, "learning_rate": 7.333113341813362e-05, "loss": 0.8855, "step": 2954 }, { "epoch": 1.3395285584768812, "grad_norm": 0.8892802675021511, "learning_rate": 7.33241332959233e-05, "loss": 0.907, "step": 2955 }, { "epoch": 1.339981867633726, "grad_norm": 1.0864284786286493, "learning_rate": 7.331712983620308e-05, "loss": 0.9032, "step": 2956 }, { "epoch": 1.3404351767905711, "grad_norm": 1.1793224887564477, "learning_rate": 7.33101230396744e-05, "loss": 0.902, "step": 2957 }, { "epoch": 1.3408884859474162, "grad_norm": 0.8142491240260086, "learning_rate": 7.330311290703901e-05, "loss": 0.8855, "step": 2958 }, { "epoch": 1.341341795104261, "grad_norm": 0.8129179619106234, "learning_rate": 7.329609943899897e-05, "loss": 0.8926, "step": 2959 }, { "epoch": 1.341795104261106, "grad_norm": 0.848393386834815, "learning_rate": 7.328908263625673e-05, "loss": 0.8936, "step": 2960 }, { "epoch": 1.342248413417951, "grad_norm": 1.0764301336449005, "learning_rate": 7.328206249951502e-05, "loss": 0.9089, "step": 2961 }, { "epoch": 1.342701722574796, "grad_norm": 1.1766561040214507, "learning_rate": 7.327503902947695e-05, "loss": 0.8967, "step": 2962 }, { "epoch": 1.343155031731641, "grad_norm": 0.7618794012748528, "learning_rate": 7.326801222684591e-05, "loss": 0.9031, "step": 2963 }, { "epoch": 1.343608340888486, "grad_norm": 0.5932487548761755, "learning_rate": 7.326098209232568e-05, "loss": 0.8976, "step": 2964 }, { "epoch": 1.3440616500453308, "grad_norm": 0.5494601822471082, "learning_rate": 7.325394862662034e-05, "loss": 0.8969, "step": 2965 }, { "epoch": 1.344514959202176, "grad_norm": 0.536326972056143, "learning_rate": 7.324691183043432e-05, "loss": 0.8897, "step": 2966 }, { "epoch": 1.344968268359021, "grad_norm": 0.5173814225468397, "learning_rate": 7.323987170447237e-05, "loss": 0.8844, "step": 2967 }, { "epoch": 1.3454215775158658, "grad_norm": 0.6087731608555739, "learning_rate": 7.323282824943957e-05, "loss": 0.9057, "step": 2968 }, { "epoch": 1.3458748866727108, "grad_norm": 0.4815626399967318, "learning_rate": 7.322578146604135e-05, "loss": 0.9042, "step": 2969 }, { "epoch": 1.3463281958295559, "grad_norm": 0.6092501507444624, "learning_rate": 7.321873135498348e-05, "loss": 0.8892, "step": 2970 }, { "epoch": 1.3467815049864007, "grad_norm": 0.8397458605222651, "learning_rate": 7.321167791697203e-05, "loss": 0.9106, "step": 2971 }, { "epoch": 1.3472348141432458, "grad_norm": 1.185258525730492, "learning_rate": 7.320462115271342e-05, "loss": 0.9077, "step": 2972 }, { "epoch": 1.3476881233000906, "grad_norm": 1.187085139929021, "learning_rate": 7.319756106291443e-05, "loss": 0.907, "step": 2973 }, { "epoch": 1.3481414324569356, "grad_norm": 0.8844382191121565, "learning_rate": 7.319049764828213e-05, "loss": 0.9025, "step": 2974 }, { "epoch": 1.3485947416137807, "grad_norm": 0.7542142510620641, "learning_rate": 7.318343090952393e-05, "loss": 0.9232, "step": 2975 }, { "epoch": 1.3490480507706255, "grad_norm": 0.6821706312483432, "learning_rate": 7.317636084734763e-05, "loss": 0.9271, "step": 2976 }, { "epoch": 1.3495013599274706, "grad_norm": 0.7835920755661802, "learning_rate": 7.316928746246125e-05, "loss": 0.895, "step": 2977 }, { "epoch": 1.3499546690843154, "grad_norm": 0.8861972417474347, "learning_rate": 7.316221075557327e-05, "loss": 0.8902, "step": 2978 }, { "epoch": 1.3504079782411604, "grad_norm": 0.8355793320663961, "learning_rate": 7.315513072739243e-05, "loss": 0.8933, "step": 2979 }, { "epoch": 1.3508612873980055, "grad_norm": 0.7278685855416024, "learning_rate": 7.31480473786278e-05, "loss": 0.8946, "step": 2980 }, { "epoch": 1.3513145965548503, "grad_norm": 0.9288308013285924, "learning_rate": 7.31409607099888e-05, "loss": 0.9114, "step": 2981 }, { "epoch": 1.3517679057116954, "grad_norm": 1.065770517019232, "learning_rate": 7.313387072218518e-05, "loss": 0.9091, "step": 2982 }, { "epoch": 1.3522212148685404, "grad_norm": 0.8387982656427078, "learning_rate": 7.312677741592703e-05, "loss": 0.8929, "step": 2983 }, { "epoch": 1.3526745240253852, "grad_norm": 0.8368787365083383, "learning_rate": 7.311968079192478e-05, "loss": 0.9099, "step": 2984 }, { "epoch": 1.3531278331822303, "grad_norm": 0.9964965018896457, "learning_rate": 7.311258085088914e-05, "loss": 0.9064, "step": 2985 }, { "epoch": 1.3535811423390753, "grad_norm": 1.2223946955128178, "learning_rate": 7.310547759353122e-05, "loss": 0.9076, "step": 2986 }, { "epoch": 1.3540344514959202, "grad_norm": 0.8855812923490253, "learning_rate": 7.309837102056243e-05, "loss": 0.9131, "step": 2987 }, { "epoch": 1.3544877606527652, "grad_norm": 0.917047765129397, "learning_rate": 7.309126113269451e-05, "loss": 0.9149, "step": 2988 }, { "epoch": 1.3549410698096103, "grad_norm": 2.255240478673682, "learning_rate": 7.308414793063952e-05, "loss": 0.9353, "step": 2989 }, { "epoch": 1.355394378966455, "grad_norm": 1.0842003962060545, "learning_rate": 7.307703141510989e-05, "loss": 0.9013, "step": 2990 }, { "epoch": 1.3558476881233001, "grad_norm": 2.6023703629260986, "learning_rate": 7.306991158681835e-05, "loss": 0.923, "step": 2991 }, { "epoch": 1.3563009972801452, "grad_norm": 2.139635632024924, "learning_rate": 7.306278844647797e-05, "loss": 0.9273, "step": 2992 }, { "epoch": 1.35675430643699, "grad_norm": 1.837525496490951, "learning_rate": 7.305566199480217e-05, "loss": 0.9002, "step": 2993 }, { "epoch": 1.357207615593835, "grad_norm": 1.896821893120221, "learning_rate": 7.304853223250467e-05, "loss": 0.8979, "step": 2994 }, { "epoch": 1.35766092475068, "grad_norm": 1.811058819131917, "learning_rate": 7.304139916029953e-05, "loss": 0.8987, "step": 2995 }, { "epoch": 1.358114233907525, "grad_norm": 1.3692618250991744, "learning_rate": 7.303426277890116e-05, "loss": 0.8989, "step": 2996 }, { "epoch": 1.3585675430643698, "grad_norm": 2.068713287103102, "learning_rate": 7.302712308902429e-05, "loss": 0.8993, "step": 2997 }, { "epoch": 1.3590208522212148, "grad_norm": 1.7359472751304181, "learning_rate": 7.301998009138397e-05, "loss": 0.906, "step": 2998 }, { "epoch": 1.3594741613780599, "grad_norm": 1.9592198148083495, "learning_rate": 7.301283378669562e-05, "loss": 0.9265, "step": 2999 }, { "epoch": 1.3599274705349047, "grad_norm": 1.701003887798315, "learning_rate": 7.300568417567492e-05, "loss": 0.9113, "step": 3000 }, { "epoch": 1.3603807796917498, "grad_norm": 1.800670562434586, "learning_rate": 7.299853125903796e-05, "loss": 0.8912, "step": 3001 }, { "epoch": 1.3608340888485948, "grad_norm": 1.4854558266072382, "learning_rate": 7.299137503750112e-05, "loss": 0.9052, "step": 3002 }, { "epoch": 1.3612873980054396, "grad_norm": 2.1192651319607165, "learning_rate": 7.298421551178109e-05, "loss": 0.9345, "step": 3003 }, { "epoch": 1.3617407071622847, "grad_norm": 1.6734310262770855, "learning_rate": 7.297705268259496e-05, "loss": 0.8981, "step": 3004 }, { "epoch": 1.3621940163191297, "grad_norm": 1.9809457987659935, "learning_rate": 7.296988655066006e-05, "loss": 0.9149, "step": 3005 }, { "epoch": 1.3626473254759746, "grad_norm": 5.009701722759714, "learning_rate": 7.296271711669415e-05, "loss": 0.9164, "step": 3006 }, { "epoch": 1.3631006346328196, "grad_norm": 0.9918078485361573, "learning_rate": 7.295554438141523e-05, "loss": 0.9126, "step": 3007 }, { "epoch": 1.3635539437896647, "grad_norm": 2.1188861746390812, "learning_rate": 7.29483683455417e-05, "loss": 0.9252, "step": 3008 }, { "epoch": 1.3640072529465095, "grad_norm": 7.429519310704249, "learning_rate": 7.294118900979224e-05, "loss": 0.9119, "step": 3009 }, { "epoch": 1.3644605621033545, "grad_norm": 1.5880366888211854, "learning_rate": 7.293400637488588e-05, "loss": 0.9284, "step": 3010 }, { "epoch": 1.3649138712601996, "grad_norm": 1.2605915092832833, "learning_rate": 7.292682044154199e-05, "loss": 0.9289, "step": 3011 }, { "epoch": 1.3653671804170444, "grad_norm": 1.2080984076144865, "learning_rate": 7.291963121048027e-05, "loss": 0.9167, "step": 3012 }, { "epoch": 1.3658204895738895, "grad_norm": 1.2252550985622144, "learning_rate": 7.291243868242073e-05, "loss": 0.914, "step": 3013 }, { "epoch": 1.3662737987307343, "grad_norm": 1.1351607849720324, "learning_rate": 7.290524285808375e-05, "loss": 0.9068, "step": 3014 }, { "epoch": 1.3667271078875793, "grad_norm": 1.0918357622737496, "learning_rate": 7.289804373818996e-05, "loss": 0.9169, "step": 3015 }, { "epoch": 1.3671804170444242, "grad_norm": 1.1556314475864793, "learning_rate": 7.289084132346042e-05, "loss": 0.9109, "step": 3016 }, { "epoch": 1.3676337262012692, "grad_norm": 1.0106086988278864, "learning_rate": 7.288363561461645e-05, "loss": 0.9139, "step": 3017 }, { "epoch": 1.3680870353581143, "grad_norm": 1.4768104474611372, "learning_rate": 7.287642661237974e-05, "loss": 0.8834, "step": 3018 }, { "epoch": 1.368540344514959, "grad_norm": 0.8897538142167286, "learning_rate": 7.286921431747228e-05, "loss": 0.9021, "step": 3019 }, { "epoch": 1.3689936536718041, "grad_norm": 1.7082642205206588, "learning_rate": 7.286199873061639e-05, "loss": 0.8941, "step": 3020 }, { "epoch": 1.3694469628286492, "grad_norm": 1.262854132244431, "learning_rate": 7.285477985253477e-05, "loss": 0.9188, "step": 3021 }, { "epoch": 1.369900271985494, "grad_norm": 1.6166268863048596, "learning_rate": 7.284755768395037e-05, "loss": 0.9024, "step": 3022 }, { "epoch": 1.370353581142339, "grad_norm": 1.3664269984022732, "learning_rate": 7.284033222558656e-05, "loss": 0.9153, "step": 3023 }, { "epoch": 1.3708068902991841, "grad_norm": 1.4970548845893055, "learning_rate": 7.283310347816694e-05, "loss": 0.9045, "step": 3024 }, { "epoch": 1.371260199456029, "grad_norm": 1.2347493884849379, "learning_rate": 7.28258714424155e-05, "loss": 0.906, "step": 3025 }, { "epoch": 1.371713508612874, "grad_norm": 1.4258743208394589, "learning_rate": 7.281863611905659e-05, "loss": 0.9321, "step": 3026 }, { "epoch": 1.372166817769719, "grad_norm": 1.066822734220586, "learning_rate": 7.28113975088148e-05, "loss": 0.9109, "step": 3027 }, { "epoch": 1.3726201269265639, "grad_norm": 1.4779377802135993, "learning_rate": 7.280415561241513e-05, "loss": 0.908, "step": 3028 }, { "epoch": 1.373073436083409, "grad_norm": 1.1846982231679368, "learning_rate": 7.279691043058285e-05, "loss": 0.9114, "step": 3029 }, { "epoch": 1.373526745240254, "grad_norm": 1.419631479888708, "learning_rate": 7.278966196404361e-05, "loss": 0.9037, "step": 3030 }, { "epoch": 1.3739800543970988, "grad_norm": 1.2187336108531548, "learning_rate": 7.278241021352337e-05, "loss": 0.9173, "step": 3031 }, { "epoch": 1.3744333635539439, "grad_norm": 1.299540639552505, "learning_rate": 7.277515517974839e-05, "loss": 0.8787, "step": 3032 }, { "epoch": 1.3748866727107887, "grad_norm": 1.121264866388586, "learning_rate": 7.276789686344529e-05, "loss": 0.9306, "step": 3033 }, { "epoch": 1.3753399818676337, "grad_norm": 1.354883664535278, "learning_rate": 7.276063526534102e-05, "loss": 0.9106, "step": 3034 }, { "epoch": 1.3757932910244786, "grad_norm": 1.046042925375835, "learning_rate": 7.275337038616285e-05, "loss": 0.9224, "step": 3035 }, { "epoch": 1.3762466001813236, "grad_norm": 1.334429141434855, "learning_rate": 7.274610222663838e-05, "loss": 0.9073, "step": 3036 }, { "epoch": 1.3766999093381687, "grad_norm": 1.0528046496888825, "learning_rate": 7.273883078749551e-05, "loss": 0.9061, "step": 3037 }, { "epoch": 1.3771532184950135, "grad_norm": 1.2729576661418813, "learning_rate": 7.273155606946255e-05, "loss": 0.9031, "step": 3038 }, { "epoch": 1.3776065276518585, "grad_norm": 0.9961293793758205, "learning_rate": 7.272427807326803e-05, "loss": 0.9119, "step": 3039 }, { "epoch": 1.3780598368087036, "grad_norm": 1.3631861061418162, "learning_rate": 7.271699679964089e-05, "loss": 0.9295, "step": 3040 }, { "epoch": 1.3785131459655484, "grad_norm": 1.1341420000256996, "learning_rate": 7.270971224931038e-05, "loss": 0.8984, "step": 3041 }, { "epoch": 1.3789664551223935, "grad_norm": 1.2793303006413912, "learning_rate": 7.270242442300604e-05, "loss": 0.9097, "step": 3042 }, { "epoch": 1.3794197642792385, "grad_norm": 1.4908571683886942, "learning_rate": 7.26951333214578e-05, "loss": 0.8988, "step": 3043 }, { "epoch": 1.3798730734360833, "grad_norm": 0.8021902904038337, "learning_rate": 7.268783894539585e-05, "loss": 0.913, "step": 3044 }, { "epoch": 1.3803263825929284, "grad_norm": 1.0067133964596853, "learning_rate": 7.268054129555078e-05, "loss": 0.8995, "step": 3045 }, { "epoch": 1.3807796917497734, "grad_norm": 1.0130872285722223, "learning_rate": 7.267324037265344e-05, "loss": 0.926, "step": 3046 }, { "epoch": 1.3812330009066183, "grad_norm": 0.9092400842347191, "learning_rate": 7.266593617743505e-05, "loss": 0.8961, "step": 3047 }, { "epoch": 1.3816863100634633, "grad_norm": 0.8511664865933317, "learning_rate": 7.265862871062716e-05, "loss": 0.8954, "step": 3048 }, { "epoch": 1.3821396192203084, "grad_norm": 1.0251253404998417, "learning_rate": 7.265131797296162e-05, "loss": 0.9224, "step": 3049 }, { "epoch": 1.3825929283771532, "grad_norm": 0.8387771611555325, "learning_rate": 7.264400396517062e-05, "loss": 0.89, "step": 3050 }, { "epoch": 1.3830462375339982, "grad_norm": 0.5994310370173597, "learning_rate": 7.263668668798669e-05, "loss": 0.9237, "step": 3051 }, { "epoch": 1.383499546690843, "grad_norm": 0.7536322555118232, "learning_rate": 7.262936614214266e-05, "loss": 0.9034, "step": 3052 }, { "epoch": 1.3839528558476881, "grad_norm": 0.8672364962983329, "learning_rate": 7.262204232837173e-05, "loss": 0.8999, "step": 3053 }, { "epoch": 1.3844061650045332, "grad_norm": 0.7078909294873459, "learning_rate": 7.261471524740737e-05, "loss": 0.9009, "step": 3054 }, { "epoch": 1.384859474161378, "grad_norm": 0.6624521564241039, "learning_rate": 7.260738489998343e-05, "loss": 0.9068, "step": 3055 }, { "epoch": 1.385312783318223, "grad_norm": 0.6736424167268686, "learning_rate": 7.260005128683408e-05, "loss": 0.8997, "step": 3056 }, { "epoch": 1.3857660924750679, "grad_norm": 0.6567537795734767, "learning_rate": 7.259271440869377e-05, "loss": 0.8763, "step": 3057 }, { "epoch": 1.386219401631913, "grad_norm": 0.6200385071358802, "learning_rate": 7.258537426629731e-05, "loss": 0.933, "step": 3058 }, { "epoch": 1.386672710788758, "grad_norm": 0.5923664102131942, "learning_rate": 7.257803086037987e-05, "loss": 0.8953, "step": 3059 }, { "epoch": 1.3871260199456028, "grad_norm": 0.6081346986493755, "learning_rate": 7.257068419167688e-05, "loss": 0.8961, "step": 3060 }, { "epoch": 1.3875793291024479, "grad_norm": 0.7177473237668153, "learning_rate": 7.256333426092415e-05, "loss": 0.9119, "step": 3061 }, { "epoch": 1.388032638259293, "grad_norm": 0.7299519832244515, "learning_rate": 7.25559810688578e-05, "loss": 0.8844, "step": 3062 }, { "epoch": 1.3884859474161377, "grad_norm": 0.7917084571358092, "learning_rate": 7.254862461621426e-05, "loss": 0.889, "step": 3063 }, { "epoch": 1.3889392565729828, "grad_norm": 0.8998875446893657, "learning_rate": 7.254126490373031e-05, "loss": 0.8883, "step": 3064 }, { "epoch": 1.3893925657298278, "grad_norm": 1.0313069440224822, "learning_rate": 7.253390193214303e-05, "loss": 0.8929, "step": 3065 }, { "epoch": 1.3898458748866727, "grad_norm": 1.0859954300644439, "learning_rate": 7.252653570218986e-05, "loss": 0.8955, "step": 3066 }, { "epoch": 1.3902991840435177, "grad_norm": 1.0342982364540796, "learning_rate": 7.251916621460855e-05, "loss": 0.9122, "step": 3067 }, { "epoch": 1.3907524932003628, "grad_norm": 0.8757931360518354, "learning_rate": 7.251179347013717e-05, "loss": 0.8959, "step": 3068 }, { "epoch": 1.3912058023572076, "grad_norm": 0.7133227107166102, "learning_rate": 7.250441746951413e-05, "loss": 0.9133, "step": 3069 }, { "epoch": 1.3916591115140526, "grad_norm": 0.6248329769759586, "learning_rate": 7.249703821347815e-05, "loss": 0.8801, "step": 3070 }, { "epoch": 1.3921124206708977, "grad_norm": 0.5536182371402131, "learning_rate": 7.248965570276828e-05, "loss": 0.9045, "step": 3071 }, { "epoch": 1.3925657298277425, "grad_norm": 0.5692936482609597, "learning_rate": 7.24822699381239e-05, "loss": 0.9088, "step": 3072 }, { "epoch": 1.3930190389845876, "grad_norm": 0.9167642309742859, "learning_rate": 7.247488092028473e-05, "loss": 0.9179, "step": 3073 }, { "epoch": 1.3934723481414324, "grad_norm": 0.5224619633356817, "learning_rate": 7.24674886499908e-05, "loss": 0.8899, "step": 3074 }, { "epoch": 1.3939256572982774, "grad_norm": 0.633510493815836, "learning_rate": 7.246009312798245e-05, "loss": 0.9197, "step": 3075 }, { "epoch": 1.3943789664551223, "grad_norm": 0.8054704476527327, "learning_rate": 7.24526943550004e-05, "loss": 0.9035, "step": 3076 }, { "epoch": 1.3948322756119673, "grad_norm": 1.0098177871806395, "learning_rate": 7.24452923317856e-05, "loss": 0.8709, "step": 3077 }, { "epoch": 1.3952855847688124, "grad_norm": 1.2511633827478135, "learning_rate": 7.243788705907943e-05, "loss": 0.8926, "step": 3078 }, { "epoch": 1.3957388939256572, "grad_norm": 0.7971036572323551, "learning_rate": 7.243047853762355e-05, "loss": 0.9211, "step": 3079 }, { "epoch": 1.3961922030825022, "grad_norm": 0.5537317967039815, "learning_rate": 7.242306676815991e-05, "loss": 0.9102, "step": 3080 }, { "epoch": 1.3966455122393473, "grad_norm": 0.5269724859747938, "learning_rate": 7.241565175143086e-05, "loss": 0.89, "step": 3081 }, { "epoch": 1.3970988213961921, "grad_norm": 0.5996892176168334, "learning_rate": 7.240823348817902e-05, "loss": 0.9148, "step": 3082 }, { "epoch": 1.3975521305530372, "grad_norm": 0.8733090915462135, "learning_rate": 7.240081197914734e-05, "loss": 0.9068, "step": 3083 }, { "epoch": 1.3980054397098822, "grad_norm": 1.2711716456994768, "learning_rate": 7.239338722507913e-05, "loss": 0.9031, "step": 3084 }, { "epoch": 1.398458748866727, "grad_norm": 0.8335461743170226, "learning_rate": 7.238595922671797e-05, "loss": 0.9027, "step": 3085 }, { "epoch": 1.398912058023572, "grad_norm": 0.7675453544376697, "learning_rate": 7.237852798480783e-05, "loss": 0.8887, "step": 3086 }, { "epoch": 1.3993653671804172, "grad_norm": 0.6585875448478352, "learning_rate": 7.237109350009294e-05, "loss": 0.9003, "step": 3087 }, { "epoch": 1.399818676337262, "grad_norm": 0.4869195192126909, "learning_rate": 7.236365577331792e-05, "loss": 0.8909, "step": 3088 }, { "epoch": 1.400271985494107, "grad_norm": 0.6857896700898414, "learning_rate": 7.235621480522764e-05, "loss": 0.9192, "step": 3089 }, { "epoch": 1.400725294650952, "grad_norm": 0.6425198597134059, "learning_rate": 7.234877059656736e-05, "loss": 0.8732, "step": 3090 }, { "epoch": 1.401178603807797, "grad_norm": 0.7328659367472543, "learning_rate": 7.234132314808262e-05, "loss": 0.9254, "step": 3091 }, { "epoch": 1.401631912964642, "grad_norm": 1.0270530081731022, "learning_rate": 7.233387246051933e-05, "loss": 0.8932, "step": 3092 }, { "epoch": 1.4020852221214868, "grad_norm": 1.0668215116337025, "learning_rate": 7.232641853462369e-05, "loss": 0.8771, "step": 3093 }, { "epoch": 1.4025385312783318, "grad_norm": 1.0170912868070527, "learning_rate": 7.231896137114222e-05, "loss": 0.8894, "step": 3094 }, { "epoch": 1.4029918404351767, "grad_norm": 1.1977937632125089, "learning_rate": 7.231150097082178e-05, "loss": 0.8919, "step": 3095 }, { "epoch": 1.4034451495920217, "grad_norm": 0.8483567895237624, "learning_rate": 7.230403733440958e-05, "loss": 0.9268, "step": 3096 }, { "epoch": 1.4038984587488668, "grad_norm": 0.7031943523012035, "learning_rate": 7.229657046265308e-05, "loss": 0.9045, "step": 3097 }, { "epoch": 1.4043517679057116, "grad_norm": 0.6536657887081309, "learning_rate": 7.228910035630013e-05, "loss": 0.8866, "step": 3098 }, { "epoch": 1.4048050770625566, "grad_norm": 0.5330166822446128, "learning_rate": 7.22816270160989e-05, "loss": 0.9069, "step": 3099 }, { "epoch": 1.4052583862194017, "grad_norm": 0.4971050452777292, "learning_rate": 7.227415044279783e-05, "loss": 0.8982, "step": 3100 }, { "epoch": 1.4057116953762465, "grad_norm": 0.5106615065069346, "learning_rate": 7.226667063714574e-05, "loss": 0.9105, "step": 3101 }, { "epoch": 1.4061650045330916, "grad_norm": 0.47603904527679813, "learning_rate": 7.225918759989178e-05, "loss": 0.8948, "step": 3102 }, { "epoch": 1.4066183136899366, "grad_norm": 0.5543118502077504, "learning_rate": 7.225170133178535e-05, "loss": 0.9104, "step": 3103 }, { "epoch": 1.4070716228467814, "grad_norm": 0.5874613956526613, "learning_rate": 7.224421183357626e-05, "loss": 0.9136, "step": 3104 }, { "epoch": 1.4075249320036265, "grad_norm": 0.5599790011592388, "learning_rate": 7.22367191060146e-05, "loss": 0.8745, "step": 3105 }, { "epoch": 1.4079782411604715, "grad_norm": 0.5947127328039707, "learning_rate": 7.222922314985076e-05, "loss": 0.9092, "step": 3106 }, { "epoch": 1.4084315503173164, "grad_norm": 0.7564252044594217, "learning_rate": 7.222172396583551e-05, "loss": 0.9003, "step": 3107 }, { "epoch": 1.4088848594741614, "grad_norm": 1.0036329399774147, "learning_rate": 7.22142215547199e-05, "loss": 0.8833, "step": 3108 }, { "epoch": 1.4093381686310065, "grad_norm": 1.4602130310624974, "learning_rate": 7.220671591725532e-05, "loss": 0.9, "step": 3109 }, { "epoch": 1.4097914777878513, "grad_norm": 0.5295598037168637, "learning_rate": 7.21992070541935e-05, "loss": 0.9137, "step": 3110 }, { "epoch": 1.4102447869446963, "grad_norm": 0.8704238297532954, "learning_rate": 7.219169496628645e-05, "loss": 0.8934, "step": 3111 }, { "epoch": 1.4106980961015412, "grad_norm": 1.6719874280270053, "learning_rate": 7.218417965428655e-05, "loss": 0.9198, "step": 3112 }, { "epoch": 1.4111514052583862, "grad_norm": 0.5680983947951569, "learning_rate": 7.217666111894649e-05, "loss": 0.8993, "step": 3113 }, { "epoch": 1.411604714415231, "grad_norm": 1.6069087014303025, "learning_rate": 7.216913936101922e-05, "loss": 0.9354, "step": 3114 }, { "epoch": 1.412058023572076, "grad_norm": 0.9298052138242832, "learning_rate": 7.216161438125811e-05, "loss": 0.8977, "step": 3115 }, { "epoch": 1.4125113327289212, "grad_norm": 1.2467383760289377, "learning_rate": 7.21540861804168e-05, "loss": 0.8917, "step": 3116 }, { "epoch": 1.412964641885766, "grad_norm": 0.8141786047503973, "learning_rate": 7.214655475924926e-05, "loss": 0.8894, "step": 3117 }, { "epoch": 1.413417951042611, "grad_norm": 1.5091750770171173, "learning_rate": 7.213902011850979e-05, "loss": 0.9013, "step": 3118 }, { "epoch": 1.413871260199456, "grad_norm": 0.6256539720341481, "learning_rate": 7.213148225895298e-05, "loss": 0.8975, "step": 3119 }, { "epoch": 1.414324569356301, "grad_norm": 1.4809233233811945, "learning_rate": 7.212394118133381e-05, "loss": 0.9097, "step": 3120 }, { "epoch": 1.414777878513146, "grad_norm": 0.840386050832741, "learning_rate": 7.211639688640753e-05, "loss": 0.9015, "step": 3121 }, { "epoch": 1.415231187669991, "grad_norm": 1.2263624245932374, "learning_rate": 7.210884937492968e-05, "loss": 0.8951, "step": 3122 }, { "epoch": 1.4156844968268358, "grad_norm": 1.0726380072006712, "learning_rate": 7.210129864765621e-05, "loss": 0.908, "step": 3123 }, { "epoch": 1.4161378059836809, "grad_norm": 1.0639714835415737, "learning_rate": 7.209374470534335e-05, "loss": 0.8914, "step": 3124 }, { "epoch": 1.416591115140526, "grad_norm": 0.9118421369173038, "learning_rate": 7.208618754874762e-05, "loss": 0.9198, "step": 3125 }, { "epoch": 1.4170444242973708, "grad_norm": 1.0432771521492963, "learning_rate": 7.207862717862591e-05, "loss": 0.9144, "step": 3126 }, { "epoch": 1.4174977334542158, "grad_norm": 0.8854055089944401, "learning_rate": 7.207106359573543e-05, "loss": 0.9022, "step": 3127 }, { "epoch": 1.4179510426110609, "grad_norm": 1.129746629666448, "learning_rate": 7.206349680083366e-05, "loss": 0.9237, "step": 3128 }, { "epoch": 1.4184043517679057, "grad_norm": 0.8110764572595213, "learning_rate": 7.205592679467845e-05, "loss": 0.9138, "step": 3129 }, { "epoch": 1.4188576609247507, "grad_norm": 0.913628356727519, "learning_rate": 7.204835357802797e-05, "loss": 0.9049, "step": 3130 }, { "epoch": 1.4193109700815956, "grad_norm": 0.8587956730111521, "learning_rate": 7.204077715164068e-05, "loss": 0.9059, "step": 3131 }, { "epoch": 1.4197642792384406, "grad_norm": 0.8200727081282149, "learning_rate": 7.20331975162754e-05, "loss": 0.8994, "step": 3132 }, { "epoch": 1.4202175883952854, "grad_norm": 0.710021777168251, "learning_rate": 7.202561467269125e-05, "loss": 0.9069, "step": 3133 }, { "epoch": 1.4206708975521305, "grad_norm": 0.6298889417491629, "learning_rate": 7.201802862164766e-05, "loss": 0.8953, "step": 3134 }, { "epoch": 1.4211242067089755, "grad_norm": 0.7196142419338882, "learning_rate": 7.201043936390441e-05, "loss": 0.9065, "step": 3135 }, { "epoch": 1.4215775158658204, "grad_norm": 0.8268670517150054, "learning_rate": 7.200284690022158e-05, "loss": 0.8808, "step": 3136 }, { "epoch": 1.4220308250226654, "grad_norm": 1.0723662199491526, "learning_rate": 7.199525123135957e-05, "loss": 0.9024, "step": 3137 }, { "epoch": 1.4224841341795105, "grad_norm": 1.1168430977205495, "learning_rate": 7.198765235807912e-05, "loss": 0.8987, "step": 3138 }, { "epoch": 1.4229374433363553, "grad_norm": 0.9192920649772975, "learning_rate": 7.198005028114128e-05, "loss": 0.9095, "step": 3139 }, { "epoch": 1.4233907524932004, "grad_norm": 0.9078014576672288, "learning_rate": 7.197244500130742e-05, "loss": 0.9187, "step": 3140 }, { "epoch": 1.4238440616500454, "grad_norm": 0.9370924759449211, "learning_rate": 7.196483651933922e-05, "loss": 0.902, "step": 3141 }, { "epoch": 1.4242973708068902, "grad_norm": 1.015949333219389, "learning_rate": 7.19572248359987e-05, "loss": 0.8963, "step": 3142 }, { "epoch": 1.4247506799637353, "grad_norm": 1.0499215638550403, "learning_rate": 7.19496099520482e-05, "loss": 0.8637, "step": 3143 }, { "epoch": 1.4252039891205803, "grad_norm": 0.8935859382194378, "learning_rate": 7.194199186825036e-05, "loss": 0.9103, "step": 3144 }, { "epoch": 1.4256572982774252, "grad_norm": 0.9759864419641953, "learning_rate": 7.193437058536816e-05, "loss": 0.9181, "step": 3145 }, { "epoch": 1.4261106074342702, "grad_norm": 1.0642759563592108, "learning_rate": 7.19267461041649e-05, "loss": 0.8929, "step": 3146 }, { "epoch": 1.4265639165911153, "grad_norm": 1.0527131217410866, "learning_rate": 7.191911842540417e-05, "loss": 0.8937, "step": 3147 }, { "epoch": 1.42701722574796, "grad_norm": 0.8211732886183648, "learning_rate": 7.191148754984992e-05, "loss": 0.921, "step": 3148 }, { "epoch": 1.4274705349048051, "grad_norm": 0.6110793127379316, "learning_rate": 7.190385347826642e-05, "loss": 0.9028, "step": 3149 }, { "epoch": 1.4279238440616502, "grad_norm": 0.5701160373580392, "learning_rate": 7.189621621141823e-05, "loss": 0.8932, "step": 3150 }, { "epoch": 1.428377153218495, "grad_norm": 0.6317923399503657, "learning_rate": 7.188857575007025e-05, "loss": 0.8928, "step": 3151 }, { "epoch": 1.42883046237534, "grad_norm": 0.6769077689047815, "learning_rate": 7.188093209498768e-05, "loss": 0.8929, "step": 3152 }, { "epoch": 1.4292837715321849, "grad_norm": 0.807139540451113, "learning_rate": 7.187328524693605e-05, "loss": 0.9256, "step": 3153 }, { "epoch": 1.42973708068903, "grad_norm": 0.9261470371436863, "learning_rate": 7.186563520668124e-05, "loss": 0.8895, "step": 3154 }, { "epoch": 1.4301903898458748, "grad_norm": 1.1475835298392403, "learning_rate": 7.185798197498943e-05, "loss": 0.911, "step": 3155 }, { "epoch": 1.4306436990027198, "grad_norm": 0.9173025483190557, "learning_rate": 7.185032555262708e-05, "loss": 0.9103, "step": 3156 }, { "epoch": 1.4310970081595649, "grad_norm": 0.8656993318038244, "learning_rate": 7.184266594036102e-05, "loss": 0.8976, "step": 3157 }, { "epoch": 1.4315503173164097, "grad_norm": 0.9513623036828548, "learning_rate": 7.183500313895838e-05, "loss": 0.897, "step": 3158 }, { "epoch": 1.4320036264732547, "grad_norm": 1.1639445581592898, "learning_rate": 7.182733714918662e-05, "loss": 0.8998, "step": 3159 }, { "epoch": 1.4324569356300998, "grad_norm": 0.8347429527317303, "learning_rate": 7.181966797181351e-05, "loss": 0.9209, "step": 3160 }, { "epoch": 1.4329102447869446, "grad_norm": 0.7343427552230148, "learning_rate": 7.181199560760712e-05, "loss": 0.8989, "step": 3161 }, { "epoch": 1.4333635539437897, "grad_norm": 0.6176884725847238, "learning_rate": 7.18043200573359e-05, "loss": 0.8719, "step": 3162 }, { "epoch": 1.4338168631006347, "grad_norm": 0.5016571200979645, "learning_rate": 7.179664132176853e-05, "loss": 0.8781, "step": 3163 }, { "epoch": 1.4342701722574795, "grad_norm": 0.548159810124031, "learning_rate": 7.17889594016741e-05, "loss": 0.9093, "step": 3164 }, { "epoch": 1.4347234814143246, "grad_norm": 0.5698110564381629, "learning_rate": 7.178127429782196e-05, "loss": 0.8944, "step": 3165 }, { "epoch": 1.4351767905711696, "grad_norm": 0.5383089480260332, "learning_rate": 7.177358601098178e-05, "loss": 0.8723, "step": 3166 }, { "epoch": 1.4356300997280145, "grad_norm": 0.6316127284285209, "learning_rate": 7.17658945419236e-05, "loss": 0.8864, "step": 3167 }, { "epoch": 1.4360834088848595, "grad_norm": 0.7192284883510054, "learning_rate": 7.175819989141771e-05, "loss": 0.8863, "step": 3168 }, { "epoch": 1.4365367180417046, "grad_norm": 0.8150562271053579, "learning_rate": 7.175050206023477e-05, "loss": 0.9243, "step": 3169 }, { "epoch": 1.4369900271985494, "grad_norm": 1.0031799094125031, "learning_rate": 7.174280104914574e-05, "loss": 0.9082, "step": 3170 }, { "epoch": 1.4374433363553945, "grad_norm": 1.326102589766879, "learning_rate": 7.173509685892189e-05, "loss": 0.8982, "step": 3171 }, { "epoch": 1.4378966455122393, "grad_norm": 0.6055883263152967, "learning_rate": 7.172738949033481e-05, "loss": 0.9008, "step": 3172 }, { "epoch": 1.4383499546690843, "grad_norm": 0.4496318131656158, "learning_rate": 7.171967894415645e-05, "loss": 0.8873, "step": 3173 }, { "epoch": 1.4388032638259292, "grad_norm": 0.8310801017355538, "learning_rate": 7.1711965221159e-05, "loss": 0.9015, "step": 3174 }, { "epoch": 1.4392565729827742, "grad_norm": 1.3963432452041427, "learning_rate": 7.170424832211504e-05, "loss": 0.9098, "step": 3175 }, { "epoch": 1.4397098821396193, "grad_norm": 1.1955985845802235, "learning_rate": 7.169652824779745e-05, "loss": 0.8918, "step": 3176 }, { "epoch": 1.440163191296464, "grad_norm": 0.7161282403571807, "learning_rate": 7.168880499897939e-05, "loss": 0.8872, "step": 3177 }, { "epoch": 1.4406165004533091, "grad_norm": 1.7793592357349104, "learning_rate": 7.168107857643437e-05, "loss": 0.9027, "step": 3178 }, { "epoch": 1.4410698096101542, "grad_norm": 0.4592110212062206, "learning_rate": 7.167334898093623e-05, "loss": 0.9054, "step": 3179 }, { "epoch": 1.441523118766999, "grad_norm": 1.6785592141925805, "learning_rate": 7.16656162132591e-05, "loss": 0.906, "step": 3180 }, { "epoch": 1.441976427923844, "grad_norm": 0.7854267588153165, "learning_rate": 7.165788027417744e-05, "loss": 0.887, "step": 3181 }, { "epoch": 1.4424297370806891, "grad_norm": 1.2865546417913956, "learning_rate": 7.165014116446603e-05, "loss": 0.8972, "step": 3182 }, { "epoch": 1.442883046237534, "grad_norm": 1.3372357114654336, "learning_rate": 7.164239888489997e-05, "loss": 0.9048, "step": 3183 }, { "epoch": 1.443336355394379, "grad_norm": 1.0213287479972588, "learning_rate": 7.163465343625468e-05, "loss": 0.8951, "step": 3184 }, { "epoch": 1.443789664551224, "grad_norm": 1.3073463128875316, "learning_rate": 7.162690481930586e-05, "loss": 0.9124, "step": 3185 }, { "epoch": 1.4442429737080689, "grad_norm": 1.0900747240007347, "learning_rate": 7.161915303482957e-05, "loss": 0.9088, "step": 3186 }, { "epoch": 1.444696282864914, "grad_norm": 1.475952908792445, "learning_rate": 7.161139808360218e-05, "loss": 0.9135, "step": 3187 }, { "epoch": 1.445149592021759, "grad_norm": 0.6291658619897117, "learning_rate": 7.160363996640035e-05, "loss": 0.912, "step": 3188 }, { "epoch": 1.4456029011786038, "grad_norm": 1.6834802229248673, "learning_rate": 7.159587868400112e-05, "loss": 0.8946, "step": 3189 }, { "epoch": 1.4460562103354488, "grad_norm": 0.7017333978640705, "learning_rate": 7.158811423718177e-05, "loss": 0.8983, "step": 3190 }, { "epoch": 1.4465095194922937, "grad_norm": 1.7767886576195875, "learning_rate": 7.158034662671996e-05, "loss": 0.9244, "step": 3191 }, { "epoch": 1.4469628286491387, "grad_norm": 1.2211575260349345, "learning_rate": 7.157257585339361e-05, "loss": 0.8943, "step": 3192 }, { "epoch": 1.4474161378059835, "grad_norm": 1.8579214654034095, "learning_rate": 7.1564801917981e-05, "loss": 0.8945, "step": 3193 }, { "epoch": 1.4478694469628286, "grad_norm": 2.3661718606658324, "learning_rate": 7.155702482126071e-05, "loss": 0.9237, "step": 3194 }, { "epoch": 1.4483227561196736, "grad_norm": 1.2392845572072022, "learning_rate": 7.154924456401164e-05, "loss": 0.9014, "step": 3195 }, { "epoch": 1.4487760652765185, "grad_norm": 1.432629339909105, "learning_rate": 7.1541461147013e-05, "loss": 0.9143, "step": 3196 }, { "epoch": 1.4492293744333635, "grad_norm": 1.0518103473821645, "learning_rate": 7.153367457104432e-05, "loss": 0.9019, "step": 3197 }, { "epoch": 1.4496826835902086, "grad_norm": 1.5036933885353525, "learning_rate": 7.152588483688547e-05, "loss": 0.9116, "step": 3198 }, { "epoch": 1.4501359927470534, "grad_norm": 1.2542363061599409, "learning_rate": 7.151809194531661e-05, "loss": 0.9115, "step": 3199 }, { "epoch": 1.4505893019038985, "grad_norm": 1.3296676135485954, "learning_rate": 7.15102958971182e-05, "loss": 0.9011, "step": 3200 }, { "epoch": 1.4510426110607435, "grad_norm": 1.1675941773727512, "learning_rate": 7.150249669307104e-05, "loss": 0.9049, "step": 3201 }, { "epoch": 1.4514959202175883, "grad_norm": 0.9200422286976883, "learning_rate": 7.149469433395626e-05, "loss": 0.9076, "step": 3202 }, { "epoch": 1.4519492293744334, "grad_norm": 0.9081903997886037, "learning_rate": 7.148688882055528e-05, "loss": 0.8947, "step": 3203 }, { "epoch": 1.4524025385312784, "grad_norm": 0.7986595306132295, "learning_rate": 7.147908015364986e-05, "loss": 0.9023, "step": 3204 }, { "epoch": 1.4528558476881233, "grad_norm": 0.8251394491236508, "learning_rate": 7.147126833402203e-05, "loss": 0.8927, "step": 3205 }, { "epoch": 1.4533091568449683, "grad_norm": 0.7638759674476218, "learning_rate": 7.146345336245419e-05, "loss": 0.8928, "step": 3206 }, { "epoch": 1.4537624660018134, "grad_norm": 0.6434140766237536, "learning_rate": 7.145563523972902e-05, "loss": 0.8767, "step": 3207 }, { "epoch": 1.4542157751586582, "grad_norm": 0.6829122819946978, "learning_rate": 7.144781396662955e-05, "loss": 0.9007, "step": 3208 }, { "epoch": 1.4546690843155032, "grad_norm": 0.7481273119110196, "learning_rate": 7.143998954393908e-05, "loss": 0.9101, "step": 3209 }, { "epoch": 1.455122393472348, "grad_norm": 0.5772072279549394, "learning_rate": 7.143216197244124e-05, "loss": 0.8871, "step": 3210 }, { "epoch": 1.4555757026291931, "grad_norm": 0.5543038036613563, "learning_rate": 7.142433125292003e-05, "loss": 0.9115, "step": 3211 }, { "epoch": 1.456029011786038, "grad_norm": 0.6387594978308473, "learning_rate": 7.141649738615968e-05, "loss": 0.895, "step": 3212 }, { "epoch": 1.456482320942883, "grad_norm": 0.6522456953706728, "learning_rate": 7.140866037294478e-05, "loss": 0.9038, "step": 3213 }, { "epoch": 1.456935630099728, "grad_norm": 0.6380839782732546, "learning_rate": 7.140082021406023e-05, "loss": 0.91, "step": 3214 }, { "epoch": 1.4573889392565729, "grad_norm": 0.7922824024306959, "learning_rate": 7.139297691029127e-05, "loss": 0.8907, "step": 3215 }, { "epoch": 1.457842248413418, "grad_norm": 0.8003424186443753, "learning_rate": 7.138513046242338e-05, "loss": 0.9064, "step": 3216 }, { "epoch": 1.458295557570263, "grad_norm": 0.8531337210677881, "learning_rate": 7.137728087124246e-05, "loss": 0.907, "step": 3217 }, { "epoch": 1.4587488667271078, "grad_norm": 0.9720263921809812, "learning_rate": 7.136942813753465e-05, "loss": 0.8985, "step": 3218 }, { "epoch": 1.4592021758839528, "grad_norm": 1.0145795529137709, "learning_rate": 7.13615722620864e-05, "loss": 0.8933, "step": 3219 }, { "epoch": 1.459655485040798, "grad_norm": 1.1646601317062875, "learning_rate": 7.135371324568453e-05, "loss": 0.9072, "step": 3220 }, { "epoch": 1.4601087941976427, "grad_norm": 0.8450495510048055, "learning_rate": 7.134585108911612e-05, "loss": 0.8988, "step": 3221 }, { "epoch": 1.4605621033544878, "grad_norm": 0.7412539448104492, "learning_rate": 7.133798579316861e-05, "loss": 0.8734, "step": 3222 }, { "epoch": 1.4610154125113328, "grad_norm": 0.6949117982014194, "learning_rate": 7.133011735862971e-05, "loss": 0.9112, "step": 3223 }, { "epoch": 1.4614687216681777, "grad_norm": 0.6314769986260067, "learning_rate": 7.132224578628749e-05, "loss": 0.902, "step": 3224 }, { "epoch": 1.4619220308250227, "grad_norm": 0.6147014174578541, "learning_rate": 7.131437107693031e-05, "loss": 0.9019, "step": 3225 }, { "epoch": 1.4623753399818678, "grad_norm": 0.698490774256373, "learning_rate": 7.130649323134681e-05, "loss": 0.9046, "step": 3226 }, { "epoch": 1.4628286491387126, "grad_norm": 0.7733269183514531, "learning_rate": 7.1298612250326e-05, "loss": 0.8955, "step": 3227 }, { "epoch": 1.4632819582955576, "grad_norm": 0.8394683642537851, "learning_rate": 7.129072813465723e-05, "loss": 0.9097, "step": 3228 }, { "epoch": 1.4637352674524025, "grad_norm": 1.0632585038930649, "learning_rate": 7.128284088513004e-05, "loss": 0.9105, "step": 3229 }, { "epoch": 1.4641885766092475, "grad_norm": 1.1528534055317488, "learning_rate": 7.127495050253441e-05, "loss": 0.8946, "step": 3230 }, { "epoch": 1.4646418857660926, "grad_norm": 0.8502316711575557, "learning_rate": 7.126705698766057e-05, "loss": 0.8919, "step": 3231 }, { "epoch": 1.4650951949229374, "grad_norm": 0.7220283649943882, "learning_rate": 7.125916034129908e-05, "loss": 0.907, "step": 3232 }, { "epoch": 1.4655485040797824, "grad_norm": 0.5939919084429469, "learning_rate": 7.125126056424082e-05, "loss": 0.8919, "step": 3233 }, { "epoch": 1.4660018132366273, "grad_norm": 0.47246639184105194, "learning_rate": 7.124335765727696e-05, "loss": 0.9084, "step": 3234 }, { "epoch": 1.4664551223934723, "grad_norm": 0.49061150705493495, "learning_rate": 7.123545162119901e-05, "loss": 0.9045, "step": 3235 }, { "epoch": 1.4669084315503174, "grad_norm": 0.6783821355823716, "learning_rate": 7.122754245679877e-05, "loss": 0.9154, "step": 3236 }, { "epoch": 1.4673617407071622, "grad_norm": 1.0047594420961272, "learning_rate": 7.12196301648684e-05, "loss": 0.9037, "step": 3237 }, { "epoch": 1.4678150498640072, "grad_norm": 1.395469813359491, "learning_rate": 7.12117147462003e-05, "loss": 0.8924, "step": 3238 }, { "epoch": 1.4682683590208523, "grad_norm": 1.5092807764264908, "learning_rate": 7.120379620158723e-05, "loss": 0.9194, "step": 3239 }, { "epoch": 1.4687216681776971, "grad_norm": 1.1129504486297903, "learning_rate": 7.11958745318223e-05, "loss": 0.8888, "step": 3240 }, { "epoch": 1.4691749773345422, "grad_norm": 1.8167220245309383, "learning_rate": 7.118794973769881e-05, "loss": 0.9162, "step": 3241 }, { "epoch": 1.4696282864913872, "grad_norm": 0.7429717578141442, "learning_rate": 7.118002182001051e-05, "loss": 0.9172, "step": 3242 }, { "epoch": 1.470081595648232, "grad_norm": 2.506255249944693, "learning_rate": 7.11720907795514e-05, "loss": 0.9047, "step": 3243 }, { "epoch": 1.470534904805077, "grad_norm": 1.7170270088665711, "learning_rate": 7.116415661711579e-05, "loss": 0.8876, "step": 3244 }, { "epoch": 1.4709882139619221, "grad_norm": 2.440143087435275, "learning_rate": 7.115621933349828e-05, "loss": 0.929, "step": 3245 }, { "epoch": 1.471441523118767, "grad_norm": 2.3789029900377345, "learning_rate": 7.114827892949385e-05, "loss": 0.9061, "step": 3246 }, { "epoch": 1.471894832275612, "grad_norm": 1.2676066597334976, "learning_rate": 7.114033540589774e-05, "loss": 0.8941, "step": 3247 }, { "epoch": 1.472348141432457, "grad_norm": 1.013288972686442, "learning_rate": 7.113238876350553e-05, "loss": 0.8943, "step": 3248 }, { "epoch": 1.472801450589302, "grad_norm": 1.9588695326335641, "learning_rate": 7.112443900311308e-05, "loss": 0.8988, "step": 3249 }, { "epoch": 1.473254759746147, "grad_norm": 1.1508903214619797, "learning_rate": 7.111648612551659e-05, "loss": 0.8753, "step": 3250 }, { "epoch": 1.4737080689029918, "grad_norm": 2.4509719459795978, "learning_rate": 7.110853013151255e-05, "loss": 0.8851, "step": 3251 }, { "epoch": 1.4741613780598368, "grad_norm": 2.3735625474918116, "learning_rate": 7.110057102189782e-05, "loss": 0.9052, "step": 3252 }, { "epoch": 1.4746146872166817, "grad_norm": 1.1612416655957616, "learning_rate": 7.109260879746948e-05, "loss": 0.915, "step": 3253 }, { "epoch": 1.4750679963735267, "grad_norm": 1.4232112275275395, "learning_rate": 7.1084643459025e-05, "loss": 0.9134, "step": 3254 }, { "epoch": 1.4755213055303718, "grad_norm": 0.9350642921992706, "learning_rate": 7.107667500736212e-05, "loss": 0.9232, "step": 3255 }, { "epoch": 1.4759746146872166, "grad_norm": 1.6084797159781334, "learning_rate": 7.10687034432789e-05, "loss": 0.9296, "step": 3256 }, { "epoch": 1.4764279238440616, "grad_norm": 0.8871146874826087, "learning_rate": 7.106072876757373e-05, "loss": 0.9069, "step": 3257 }, { "epoch": 1.4768812330009067, "grad_norm": 1.6790697360389144, "learning_rate": 7.105275098104526e-05, "loss": 0.9148, "step": 3258 }, { "epoch": 1.4773345421577515, "grad_norm": 1.447454186711884, "learning_rate": 7.104477008449256e-05, "loss": 0.9188, "step": 3259 }, { "epoch": 1.4777878513145966, "grad_norm": 1.3919892557788291, "learning_rate": 7.103678607871486e-05, "loss": 0.8956, "step": 3260 }, { "epoch": 1.4782411604714416, "grad_norm": 1.6562279076531456, "learning_rate": 7.102879896451184e-05, "loss": 0.8984, "step": 3261 }, { "epoch": 1.4786944696282864, "grad_norm": 1.3354598441288585, "learning_rate": 7.102080874268341e-05, "loss": 0.9049, "step": 3262 }, { "epoch": 1.4791477787851315, "grad_norm": 1.8275006566139365, "learning_rate": 7.10128154140298e-05, "loss": 0.9046, "step": 3263 }, { "epoch": 1.4796010879419765, "grad_norm": 1.597143921726934, "learning_rate": 7.10048189793516e-05, "loss": 0.9324, "step": 3264 }, { "epoch": 1.4800543970988214, "grad_norm": 1.562355115452566, "learning_rate": 7.099681943944965e-05, "loss": 0.9036, "step": 3265 }, { "epoch": 1.4805077062556664, "grad_norm": 1.1563081048454444, "learning_rate": 7.098881679512513e-05, "loss": 0.9202, "step": 3266 }, { "epoch": 1.4809610154125115, "grad_norm": 1.7611516827998022, "learning_rate": 7.098081104717955e-05, "loss": 0.9067, "step": 3267 }, { "epoch": 1.4814143245693563, "grad_norm": 1.2023912024359766, "learning_rate": 7.097280219641468e-05, "loss": 0.9084, "step": 3268 }, { "epoch": 1.4818676337262013, "grad_norm": 1.902415957093118, "learning_rate": 7.096479024363265e-05, "loss": 0.8984, "step": 3269 }, { "epoch": 1.4823209428830462, "grad_norm": 1.443573019419363, "learning_rate": 7.095677518963586e-05, "loss": 0.9017, "step": 3270 }, { "epoch": 1.4827742520398912, "grad_norm": 1.8032899572081773, "learning_rate": 7.094875703522707e-05, "loss": 0.9058, "step": 3271 }, { "epoch": 1.483227561196736, "grad_norm": 1.3539517596332717, "learning_rate": 7.09407357812093e-05, "loss": 0.9125, "step": 3272 }, { "epoch": 1.483680870353581, "grad_norm": 1.835188729033868, "learning_rate": 7.093271142838589e-05, "loss": 0.9107, "step": 3273 }, { "epoch": 1.4841341795104261, "grad_norm": 1.3677815746687316, "learning_rate": 7.092468397756054e-05, "loss": 0.9017, "step": 3274 }, { "epoch": 1.484587488667271, "grad_norm": 1.9611693498289229, "learning_rate": 7.091665342953721e-05, "loss": 0.8921, "step": 3275 }, { "epoch": 1.485040797824116, "grad_norm": 1.5579918630728795, "learning_rate": 7.090861978512018e-05, "loss": 0.9229, "step": 3276 }, { "epoch": 1.485494106980961, "grad_norm": 1.7941446669276915, "learning_rate": 7.090058304511402e-05, "loss": 0.9, "step": 3277 }, { "epoch": 1.485947416137806, "grad_norm": 1.4703322264253023, "learning_rate": 7.089254321032366e-05, "loss": 0.8917, "step": 3278 }, { "epoch": 1.486400725294651, "grad_norm": 1.8397786721283123, "learning_rate": 7.088450028155431e-05, "loss": 0.8937, "step": 3279 }, { "epoch": 1.486854034451496, "grad_norm": 1.5294071751552087, "learning_rate": 7.087645425961148e-05, "loss": 0.9229, "step": 3280 }, { "epoch": 1.4873073436083408, "grad_norm": 1.6780856929444898, "learning_rate": 7.086840514530102e-05, "loss": 0.9194, "step": 3281 }, { "epoch": 1.4877606527651859, "grad_norm": 1.4225281062074104, "learning_rate": 7.086035293942905e-05, "loss": 0.9003, "step": 3282 }, { "epoch": 1.488213961922031, "grad_norm": 1.6329784025062504, "learning_rate": 7.085229764280205e-05, "loss": 0.8972, "step": 3283 }, { "epoch": 1.4886672710788758, "grad_norm": 1.3784895870955867, "learning_rate": 7.084423925622677e-05, "loss": 0.9046, "step": 3284 }, { "epoch": 1.4891205802357208, "grad_norm": 1.5911963236706699, "learning_rate": 7.083617778051027e-05, "loss": 0.8913, "step": 3285 }, { "epoch": 1.4895738893925659, "grad_norm": 1.3756590800513135, "learning_rate": 7.082811321645995e-05, "loss": 0.901, "step": 3286 }, { "epoch": 1.4900271985494107, "grad_norm": 1.5117347204825644, "learning_rate": 7.08200455648835e-05, "loss": 0.8817, "step": 3287 }, { "epoch": 1.4904805077062557, "grad_norm": 1.4644498534901156, "learning_rate": 7.081197482658887e-05, "loss": 0.8817, "step": 3288 }, { "epoch": 1.4909338168631006, "grad_norm": 1.2506868489199763, "learning_rate": 7.080390100238443e-05, "loss": 0.9307, "step": 3289 }, { "epoch": 1.4913871260199456, "grad_norm": 1.3043747747048569, "learning_rate": 7.079582409307877e-05, "loss": 0.8847, "step": 3290 }, { "epoch": 1.4918404351767904, "grad_norm": 0.9792419969841942, "learning_rate": 7.078774409948083e-05, "loss": 0.9146, "step": 3291 }, { "epoch": 1.4922937443336355, "grad_norm": 1.336746189677506, "learning_rate": 7.077966102239983e-05, "loss": 0.893, "step": 3292 }, { "epoch": 1.4927470534904805, "grad_norm": 0.8777292877462655, "learning_rate": 7.077157486264531e-05, "loss": 0.8889, "step": 3293 }, { "epoch": 1.4932003626473254, "grad_norm": 0.8990996081294912, "learning_rate": 7.076348562102713e-05, "loss": 0.8967, "step": 3294 }, { "epoch": 1.4936536718041704, "grad_norm": 0.75040700245139, "learning_rate": 7.075539329835547e-05, "loss": 0.9003, "step": 3295 }, { "epoch": 1.4941069809610155, "grad_norm": 0.9123555127305218, "learning_rate": 7.074729789544077e-05, "loss": 0.8929, "step": 3296 }, { "epoch": 1.4945602901178603, "grad_norm": 0.9509062101615217, "learning_rate": 7.073919941309383e-05, "loss": 0.8916, "step": 3297 }, { "epoch": 1.4950135992747053, "grad_norm": 0.7505501063977574, "learning_rate": 7.073109785212572e-05, "loss": 0.8849, "step": 3298 }, { "epoch": 1.4954669084315504, "grad_norm": 0.5655848842881394, "learning_rate": 7.072299321334783e-05, "loss": 0.8935, "step": 3299 }, { "epoch": 1.4959202175883952, "grad_norm": 0.6540167355922669, "learning_rate": 7.07148854975719e-05, "loss": 0.9066, "step": 3300 }, { "epoch": 1.4963735267452403, "grad_norm": 0.6202326548469641, "learning_rate": 7.070677470560991e-05, "loss": 0.9003, "step": 3301 }, { "epoch": 1.4968268359020853, "grad_norm": 0.6556701993820765, "learning_rate": 7.069866083827418e-05, "loss": 0.912, "step": 3302 }, { "epoch": 1.4972801450589301, "grad_norm": 0.8197576644806988, "learning_rate": 7.069054389637736e-05, "loss": 0.9057, "step": 3303 }, { "epoch": 1.4977334542157752, "grad_norm": 0.852560579976863, "learning_rate": 7.068242388073237e-05, "loss": 0.9042, "step": 3304 }, { "epoch": 1.4981867633726202, "grad_norm": 0.6860141325879088, "learning_rate": 7.067430079215244e-05, "loss": 0.8942, "step": 3305 }, { "epoch": 1.498640072529465, "grad_norm": 0.4526533742470864, "learning_rate": 7.066617463145114e-05, "loss": 0.918, "step": 3306 }, { "epoch": 1.4990933816863101, "grad_norm": 0.7592486861395077, "learning_rate": 7.065804539944232e-05, "loss": 0.8881, "step": 3307 }, { "epoch": 1.499546690843155, "grad_norm": 0.7365517170897338, "learning_rate": 7.064991309694016e-05, "loss": 0.9101, "step": 3308 }, { "epoch": 1.5, "grad_norm": 0.5890823076918769, "learning_rate": 7.064177772475912e-05, "loss": 0.8926, "step": 3309 }, { "epoch": 1.5004533091568448, "grad_norm": 0.5653683138639732, "learning_rate": 7.063363928371399e-05, "loss": 0.9015, "step": 3310 }, { "epoch": 1.50090661831369, "grad_norm": 0.5273691032072996, "learning_rate": 7.062549777461986e-05, "loss": 0.8938, "step": 3311 }, { "epoch": 1.501359927470535, "grad_norm": 0.46541007480786406, "learning_rate": 7.061735319829211e-05, "loss": 0.879, "step": 3312 }, { "epoch": 1.5018132366273798, "grad_norm": 0.4713838799274754, "learning_rate": 7.060920555554646e-05, "loss": 0.8759, "step": 3313 }, { "epoch": 1.5022665457842248, "grad_norm": 0.3955417921863602, "learning_rate": 7.060105484719892e-05, "loss": 0.8903, "step": 3314 }, { "epoch": 1.5027198549410699, "grad_norm": 0.3816685269424007, "learning_rate": 7.05929010740658e-05, "loss": 0.8952, "step": 3315 }, { "epoch": 1.5031731640979147, "grad_norm": 0.5190419671192998, "learning_rate": 7.058474423696372e-05, "loss": 0.9082, "step": 3316 }, { "epoch": 1.5036264732547597, "grad_norm": 0.5454633440090965, "learning_rate": 7.057658433670963e-05, "loss": 0.9086, "step": 3317 }, { "epoch": 1.5040797824116048, "grad_norm": 0.4021834349507755, "learning_rate": 7.056842137412074e-05, "loss": 0.899, "step": 3318 }, { "epoch": 1.5045330915684496, "grad_norm": 0.42788270520017135, "learning_rate": 7.056025535001463e-05, "loss": 0.8997, "step": 3319 }, { "epoch": 1.5049864007252947, "grad_norm": 0.39814113805394696, "learning_rate": 7.055208626520911e-05, "loss": 0.8956, "step": 3320 }, { "epoch": 1.5054397098821397, "grad_norm": 0.4405870168866256, "learning_rate": 7.054391412052237e-05, "loss": 0.9059, "step": 3321 }, { "epoch": 1.5058930190389845, "grad_norm": 0.5366976988878929, "learning_rate": 7.053573891677286e-05, "loss": 0.8991, "step": 3322 }, { "epoch": 1.5063463281958296, "grad_norm": 0.5054686870723185, "learning_rate": 7.052756065477935e-05, "loss": 0.9139, "step": 3323 }, { "epoch": 1.5067996373526746, "grad_norm": 0.43999582393264464, "learning_rate": 7.051937933536094e-05, "loss": 0.9084, "step": 3324 }, { "epoch": 1.5072529465095195, "grad_norm": 0.4926951676183928, "learning_rate": 7.051119495933698e-05, "loss": 0.8976, "step": 3325 }, { "epoch": 1.5077062556663645, "grad_norm": 0.6397742928154375, "learning_rate": 7.050300752752718e-05, "loss": 0.8915, "step": 3326 }, { "epoch": 1.5081595648232096, "grad_norm": 0.8346278706649632, "learning_rate": 7.049481704075152e-05, "loss": 0.8957, "step": 3327 }, { "epoch": 1.5086128739800544, "grad_norm": 0.9004008048402814, "learning_rate": 7.048662349983034e-05, "loss": 0.8894, "step": 3328 }, { "epoch": 1.5090661831368992, "grad_norm": 0.9496221999211547, "learning_rate": 7.047842690558419e-05, "loss": 0.9081, "step": 3329 }, { "epoch": 1.5095194922937445, "grad_norm": 0.9915316006264037, "learning_rate": 7.0470227258834e-05, "loss": 0.8813, "step": 3330 }, { "epoch": 1.5099728014505893, "grad_norm": 0.9669135163970264, "learning_rate": 7.046202456040103e-05, "loss": 0.9077, "step": 3331 }, { "epoch": 1.5104261106074341, "grad_norm": 0.8758135203084915, "learning_rate": 7.045381881110677e-05, "loss": 0.91, "step": 3332 }, { "epoch": 1.5108794197642792, "grad_norm": 0.912517255565864, "learning_rate": 7.044561001177304e-05, "loss": 0.9021, "step": 3333 }, { "epoch": 1.5113327289211242, "grad_norm": 0.7938574796467526, "learning_rate": 7.043739816322199e-05, "loss": 0.9004, "step": 3334 }, { "epoch": 1.511786038077969, "grad_norm": 0.688782370051854, "learning_rate": 7.042918326627607e-05, "loss": 0.9141, "step": 3335 }, { "epoch": 1.5122393472348141, "grad_norm": 0.5326383696137961, "learning_rate": 7.042096532175801e-05, "loss": 0.9114, "step": 3336 }, { "epoch": 1.5126926563916592, "grad_norm": 0.5241141906092749, "learning_rate": 7.041274433049087e-05, "loss": 0.9116, "step": 3337 }, { "epoch": 1.513145965548504, "grad_norm": 0.4547977338761953, "learning_rate": 7.0404520293298e-05, "loss": 0.8799, "step": 3338 }, { "epoch": 1.513599274705349, "grad_norm": 0.3594040940113845, "learning_rate": 7.039629321100307e-05, "loss": 0.909, "step": 3339 }, { "epoch": 1.514052583862194, "grad_norm": 0.3918782801833048, "learning_rate": 7.038806308443003e-05, "loss": 0.8778, "step": 3340 }, { "epoch": 1.514505893019039, "grad_norm": 0.48994967263384365, "learning_rate": 7.037982991440318e-05, "loss": 0.8936, "step": 3341 }, { "epoch": 1.514959202175884, "grad_norm": 0.5697444567085274, "learning_rate": 7.037159370174706e-05, "loss": 0.8858, "step": 3342 }, { "epoch": 1.515412511332729, "grad_norm": 0.7242146399222581, "learning_rate": 7.036335444728659e-05, "loss": 0.9347, "step": 3343 }, { "epoch": 1.5158658204895739, "grad_norm": 0.8943945679676468, "learning_rate": 7.035511215184693e-05, "loss": 0.8991, "step": 3344 }, { "epoch": 1.516319129646419, "grad_norm": 0.9687249309522145, "learning_rate": 7.034686681625356e-05, "loss": 0.8921, "step": 3345 }, { "epoch": 1.516772438803264, "grad_norm": 1.0235305929131058, "learning_rate": 7.033861844133231e-05, "loss": 0.8892, "step": 3346 }, { "epoch": 1.5172257479601088, "grad_norm": 1.0673292046505296, "learning_rate": 7.033036702790926e-05, "loss": 0.8988, "step": 3347 }, { "epoch": 1.5176790571169536, "grad_norm": 0.9591501078290576, "learning_rate": 7.032211257681081e-05, "loss": 0.8905, "step": 3348 }, { "epoch": 1.5181323662737989, "grad_norm": 0.9874092311163275, "learning_rate": 7.031385508886366e-05, "loss": 0.9019, "step": 3349 }, { "epoch": 1.5185856754306437, "grad_norm": 1.0717576107381335, "learning_rate": 7.030559456489485e-05, "loss": 0.9008, "step": 3350 }, { "epoch": 1.5190389845874885, "grad_norm": 0.8679688490971372, "learning_rate": 7.029733100573166e-05, "loss": 0.9016, "step": 3351 }, { "epoch": 1.5194922937443336, "grad_norm": 0.7520545748859458, "learning_rate": 7.028906441220175e-05, "loss": 0.8886, "step": 3352 }, { "epoch": 1.5199456029011786, "grad_norm": 0.6107535407183008, "learning_rate": 7.0280794785133e-05, "loss": 0.8898, "step": 3353 }, { "epoch": 1.5203989120580235, "grad_norm": 0.5336892549567048, "learning_rate": 7.027252212535366e-05, "loss": 0.9014, "step": 3354 }, { "epoch": 1.5208522212148685, "grad_norm": 0.7317971363348532, "learning_rate": 7.026424643369226e-05, "loss": 0.8946, "step": 3355 }, { "epoch": 1.5213055303717136, "grad_norm": 1.0016289026426834, "learning_rate": 7.025596771097765e-05, "loss": 0.8886, "step": 3356 }, { "epoch": 1.5217588395285584, "grad_norm": 1.2878188708118157, "learning_rate": 7.024768595803894e-05, "loss": 0.9133, "step": 3357 }, { "epoch": 1.5222121486854034, "grad_norm": 0.5252883848090227, "learning_rate": 7.023940117570557e-05, "loss": 0.8979, "step": 3358 }, { "epoch": 1.5226654578422485, "grad_norm": 0.5276166515097288, "learning_rate": 7.023111336480733e-05, "loss": 0.8885, "step": 3359 }, { "epoch": 1.5231187669990933, "grad_norm": 0.9789084059029532, "learning_rate": 7.022282252617424e-05, "loss": 0.9036, "step": 3360 }, { "epoch": 1.5235720761559384, "grad_norm": 1.3849525168381924, "learning_rate": 7.021452866063662e-05, "loss": 0.8768, "step": 3361 }, { "epoch": 1.5240253853127834, "grad_norm": 0.4741132653280992, "learning_rate": 7.020623176902518e-05, "loss": 0.9212, "step": 3362 }, { "epoch": 1.5244786944696282, "grad_norm": 1.111462738455124, "learning_rate": 7.019793185217084e-05, "loss": 0.8827, "step": 3363 }, { "epoch": 1.5249320036264733, "grad_norm": 1.497816980937771, "learning_rate": 7.018962891090488e-05, "loss": 0.8846, "step": 3364 }, { "epoch": 1.5253853127833183, "grad_norm": 0.6108396594491565, "learning_rate": 7.018132294605886e-05, "loss": 0.8955, "step": 3365 }, { "epoch": 1.5258386219401632, "grad_norm": 1.6223579011525113, "learning_rate": 7.017301395846465e-05, "loss": 0.9209, "step": 3366 }, { "epoch": 1.526291931097008, "grad_norm": 0.6580085945863995, "learning_rate": 7.016470194895441e-05, "loss": 0.8887, "step": 3367 }, { "epoch": 1.5267452402538533, "grad_norm": 1.50853196468573, "learning_rate": 7.015638691836062e-05, "loss": 0.9198, "step": 3368 }, { "epoch": 1.527198549410698, "grad_norm": 0.7070377971919184, "learning_rate": 7.014806886751607e-05, "loss": 0.9115, "step": 3369 }, { "epoch": 1.527651858567543, "grad_norm": 1.2103878732040383, "learning_rate": 7.013974779725378e-05, "loss": 0.9283, "step": 3370 }, { "epoch": 1.528105167724388, "grad_norm": 0.9769799095285435, "learning_rate": 7.013142370840718e-05, "loss": 0.9198, "step": 3371 }, { "epoch": 1.528558476881233, "grad_norm": 0.9101488604561656, "learning_rate": 7.012309660180997e-05, "loss": 0.8972, "step": 3372 }, { "epoch": 1.5290117860380779, "grad_norm": 0.8561616380193452, "learning_rate": 7.011476647829607e-05, "loss": 0.8887, "step": 3373 }, { "epoch": 1.529465095194923, "grad_norm": 0.7317976020934038, "learning_rate": 7.010643333869983e-05, "loss": 0.9168, "step": 3374 }, { "epoch": 1.529918404351768, "grad_norm": 0.6685895829805628, "learning_rate": 7.00980971838558e-05, "loss": 0.9158, "step": 3375 }, { "epoch": 1.5303717135086128, "grad_norm": 0.8167512804369763, "learning_rate": 7.008975801459887e-05, "loss": 0.9066, "step": 3376 }, { "epoch": 1.5308250226654578, "grad_norm": 0.833754564549505, "learning_rate": 7.008141583176425e-05, "loss": 0.8832, "step": 3377 }, { "epoch": 1.5312783318223029, "grad_norm": 0.8965520665338851, "learning_rate": 7.007307063618744e-05, "loss": 0.8929, "step": 3378 }, { "epoch": 1.5317316409791477, "grad_norm": 0.886864193927665, "learning_rate": 7.00647224287042e-05, "loss": 0.9092, "step": 3379 }, { "epoch": 1.5321849501359928, "grad_norm": 0.8699450942950521, "learning_rate": 7.005637121015065e-05, "loss": 0.8885, "step": 3380 }, { "epoch": 1.5326382592928378, "grad_norm": 0.8137933455051792, "learning_rate": 7.004801698136322e-05, "loss": 0.9095, "step": 3381 }, { "epoch": 1.5330915684496826, "grad_norm": 0.7285902798827357, "learning_rate": 7.003965974317854e-05, "loss": 0.8958, "step": 3382 }, { "epoch": 1.5335448776065277, "grad_norm": 0.7090372173601494, "learning_rate": 7.003129949643368e-05, "loss": 0.8957, "step": 3383 }, { "epoch": 1.5339981867633727, "grad_norm": 0.6531674523852011, "learning_rate": 7.00229362419659e-05, "loss": 0.8869, "step": 3384 }, { "epoch": 1.5344514959202176, "grad_norm": 0.6003696444961539, "learning_rate": 7.001456998061284e-05, "loss": 0.9165, "step": 3385 }, { "epoch": 1.5349048050770624, "grad_norm": 0.47392509676360667, "learning_rate": 7.000620071321236e-05, "loss": 0.9007, "step": 3386 }, { "epoch": 1.5353581142339077, "grad_norm": 0.39918819588074517, "learning_rate": 6.999782844060271e-05, "loss": 0.8948, "step": 3387 }, { "epoch": 1.5358114233907525, "grad_norm": 0.5221063524958204, "learning_rate": 6.998945316362237e-05, "loss": 0.9057, "step": 3388 }, { "epoch": 1.5362647325475973, "grad_norm": 0.9228123442906967, "learning_rate": 6.998107488311016e-05, "loss": 0.8937, "step": 3389 }, { "epoch": 1.5367180417044426, "grad_norm": 0.5343555116261027, "learning_rate": 6.997269359990519e-05, "loss": 0.9028, "step": 3390 }, { "epoch": 1.5371713508612874, "grad_norm": 0.5828914734242364, "learning_rate": 6.996430931484687e-05, "loss": 0.9044, "step": 3391 }, { "epoch": 1.5376246600181322, "grad_norm": 0.5322432898200647, "learning_rate": 6.99559220287749e-05, "loss": 0.9034, "step": 3392 }, { "epoch": 1.5380779691749773, "grad_norm": 0.5744264197458183, "learning_rate": 6.994753174252931e-05, "loss": 0.9047, "step": 3393 }, { "epoch": 1.5385312783318223, "grad_norm": 0.6694680227336541, "learning_rate": 6.99391384569504e-05, "loss": 0.9202, "step": 3394 }, { "epoch": 1.5389845874886672, "grad_norm": 0.8034527954030958, "learning_rate": 6.993074217287877e-05, "loss": 0.8843, "step": 3395 }, { "epoch": 1.5394378966455122, "grad_norm": 1.0222444658171286, "learning_rate": 6.992234289115536e-05, "loss": 0.9249, "step": 3396 }, { "epoch": 1.5398912058023573, "grad_norm": 1.180064655935955, "learning_rate": 6.991394061262137e-05, "loss": 0.9214, "step": 3397 }, { "epoch": 1.540344514959202, "grad_norm": 0.7935822458250976, "learning_rate": 6.99055353381183e-05, "loss": 0.8932, "step": 3398 }, { "epoch": 1.5407978241160472, "grad_norm": 0.6971538712908592, "learning_rate": 6.989712706848799e-05, "loss": 0.8871, "step": 3399 }, { "epoch": 1.5412511332728922, "grad_norm": 0.6533145569028125, "learning_rate": 6.988871580457254e-05, "loss": 0.927, "step": 3400 }, { "epoch": 1.541704442429737, "grad_norm": 0.6150714008753683, "learning_rate": 6.988030154721435e-05, "loss": 0.8704, "step": 3401 }, { "epoch": 1.542157751586582, "grad_norm": 0.4865609556401304, "learning_rate": 6.987188429725613e-05, "loss": 0.9193, "step": 3402 }, { "epoch": 1.5426110607434271, "grad_norm": 0.6035085221770325, "learning_rate": 6.986346405554093e-05, "loss": 0.9232, "step": 3403 }, { "epoch": 1.543064369900272, "grad_norm": 0.7311506136717091, "learning_rate": 6.985504082291203e-05, "loss": 0.906, "step": 3404 }, { "epoch": 1.543517679057117, "grad_norm": 0.8357909530675701, "learning_rate": 6.984661460021306e-05, "loss": 0.896, "step": 3405 }, { "epoch": 1.543970988213962, "grad_norm": 1.026077470920874, "learning_rate": 6.98381853882879e-05, "loss": 0.8744, "step": 3406 }, { "epoch": 1.5444242973708069, "grad_norm": 1.150779634252831, "learning_rate": 6.982975318798079e-05, "loss": 0.8974, "step": 3407 }, { "epoch": 1.5448776065276517, "grad_norm": 0.8562640108628576, "learning_rate": 6.982131800013623e-05, "loss": 0.8882, "step": 3408 }, { "epoch": 1.545330915684497, "grad_norm": 0.6419332139074357, "learning_rate": 6.981287982559903e-05, "loss": 0.9032, "step": 3409 }, { "epoch": 1.5457842248413418, "grad_norm": 0.572468817413538, "learning_rate": 6.98044386652143e-05, "loss": 0.9135, "step": 3410 }, { "epoch": 1.5462375339981866, "grad_norm": 0.5962601192884351, "learning_rate": 6.979599451982743e-05, "loss": 0.914, "step": 3411 }, { "epoch": 1.5466908431550317, "grad_norm": 0.7821330035186534, "learning_rate": 6.978754739028416e-05, "loss": 0.9059, "step": 3412 }, { "epoch": 1.5471441523118767, "grad_norm": 0.791601466619343, "learning_rate": 6.977909727743045e-05, "loss": 0.9048, "step": 3413 }, { "epoch": 1.5475974614687216, "grad_norm": 0.6415978982589958, "learning_rate": 6.977064418211266e-05, "loss": 0.9096, "step": 3414 }, { "epoch": 1.5480507706255666, "grad_norm": 0.6143459349325524, "learning_rate": 6.976218810517734e-05, "loss": 0.8819, "step": 3415 }, { "epoch": 1.5485040797824117, "grad_norm": 0.711029069031847, "learning_rate": 6.975372904747142e-05, "loss": 0.8853, "step": 3416 }, { "epoch": 1.5489573889392565, "grad_norm": 0.8341209328231186, "learning_rate": 6.97452670098421e-05, "loss": 0.883, "step": 3417 }, { "epoch": 1.5494106980961015, "grad_norm": 0.9248672492845741, "learning_rate": 6.973680199313684e-05, "loss": 0.8854, "step": 3418 }, { "epoch": 1.5498640072529466, "grad_norm": 1.1583456357142456, "learning_rate": 6.972833399820348e-05, "loss": 0.9021, "step": 3419 }, { "epoch": 1.5503173164097914, "grad_norm": 0.9140559549931877, "learning_rate": 6.97198630258901e-05, "loss": 0.8932, "step": 3420 }, { "epoch": 1.5507706255666365, "grad_norm": 0.8411550587773674, "learning_rate": 6.97113890770451e-05, "loss": 0.905, "step": 3421 }, { "epoch": 1.5512239347234815, "grad_norm": 0.7360667762364022, "learning_rate": 6.970291215251715e-05, "loss": 0.9285, "step": 3422 }, { "epoch": 1.5516772438803264, "grad_norm": 0.5859223611011717, "learning_rate": 6.969443225315527e-05, "loss": 0.8919, "step": 3423 }, { "epoch": 1.5521305530371714, "grad_norm": 0.5065398620356175, "learning_rate": 6.968594937980873e-05, "loss": 0.9055, "step": 3424 }, { "epoch": 1.5525838621940165, "grad_norm": 0.5969844181116204, "learning_rate": 6.96774635333271e-05, "loss": 0.8979, "step": 3425 }, { "epoch": 1.5530371713508613, "grad_norm": 0.7884158953415088, "learning_rate": 6.966897471456028e-05, "loss": 0.9079, "step": 3426 }, { "epoch": 1.553490480507706, "grad_norm": 0.9884854114470172, "learning_rate": 6.966048292435846e-05, "loss": 0.883, "step": 3427 }, { "epoch": 1.5539437896645514, "grad_norm": 1.0706041689487145, "learning_rate": 6.965198816357209e-05, "loss": 0.8781, "step": 3428 }, { "epoch": 1.5543970988213962, "grad_norm": 0.8713524558609721, "learning_rate": 6.964349043305195e-05, "loss": 0.8833, "step": 3429 }, { "epoch": 1.554850407978241, "grad_norm": 0.8643355559831445, "learning_rate": 6.963498973364914e-05, "loss": 0.8809, "step": 3430 }, { "epoch": 1.555303717135086, "grad_norm": 0.9873693246429236, "learning_rate": 6.962648606621502e-05, "loss": 0.8994, "step": 3431 }, { "epoch": 1.5557570262919311, "grad_norm": 1.0125334462256448, "learning_rate": 6.961797943160123e-05, "loss": 0.8991, "step": 3432 }, { "epoch": 1.556210335448776, "grad_norm": 0.7979227834705195, "learning_rate": 6.960946983065976e-05, "loss": 0.9225, "step": 3433 }, { "epoch": 1.556663644605621, "grad_norm": 0.7605212939947471, "learning_rate": 6.960095726424287e-05, "loss": 0.8914, "step": 3434 }, { "epoch": 1.557116953762466, "grad_norm": 0.982467368105426, "learning_rate": 6.959244173320311e-05, "loss": 0.8929, "step": 3435 }, { "epoch": 1.5575702629193109, "grad_norm": 0.9126229432614992, "learning_rate": 6.958392323839334e-05, "loss": 0.9008, "step": 3436 }, { "epoch": 1.558023572076156, "grad_norm": 0.9147660778390037, "learning_rate": 6.957540178066673e-05, "loss": 0.9056, "step": 3437 }, { "epoch": 1.558476881233001, "grad_norm": 1.040939884411926, "learning_rate": 6.956687736087669e-05, "loss": 0.8953, "step": 3438 }, { "epoch": 1.5589301903898458, "grad_norm": 1.1737851018063692, "learning_rate": 6.9558349979877e-05, "loss": 0.8972, "step": 3439 }, { "epoch": 1.5593834995466909, "grad_norm": 0.742000890846898, "learning_rate": 6.954981963852168e-05, "loss": 0.8995, "step": 3440 }, { "epoch": 1.559836808703536, "grad_norm": 0.42546161088504914, "learning_rate": 6.954128633766508e-05, "loss": 0.9062, "step": 3441 }, { "epoch": 1.5602901178603807, "grad_norm": 0.4421467208248111, "learning_rate": 6.953275007816182e-05, "loss": 0.8928, "step": 3442 }, { "epoch": 1.5607434270172258, "grad_norm": 0.7852179747152528, "learning_rate": 6.952421086086686e-05, "loss": 0.8996, "step": 3443 }, { "epoch": 1.5611967361740708, "grad_norm": 1.194824530971985, "learning_rate": 6.951566868663542e-05, "loss": 0.9029, "step": 3444 }, { "epoch": 1.5616500453309157, "grad_norm": 0.8820077893227413, "learning_rate": 6.950712355632301e-05, "loss": 0.9036, "step": 3445 }, { "epoch": 1.5621033544877605, "grad_norm": 0.6196913640747889, "learning_rate": 6.949857547078545e-05, "loss": 0.9043, "step": 3446 }, { "epoch": 1.5625566636446058, "grad_norm": 0.5204754674157664, "learning_rate": 6.949002443087886e-05, "loss": 0.8825, "step": 3447 }, { "epoch": 1.5630099728014506, "grad_norm": 0.4574161385072954, "learning_rate": 6.948147043745967e-05, "loss": 0.8806, "step": 3448 }, { "epoch": 1.5634632819582954, "grad_norm": 0.43641054433260623, "learning_rate": 6.947291349138455e-05, "loss": 0.8804, "step": 3449 }, { "epoch": 1.5639165911151405, "grad_norm": 0.5079248254867851, "learning_rate": 6.946435359351052e-05, "loss": 0.8948, "step": 3450 }, { "epoch": 1.5643699002719855, "grad_norm": 0.6111032963942783, "learning_rate": 6.945579074469491e-05, "loss": 0.8922, "step": 3451 }, { "epoch": 1.5648232094288304, "grad_norm": 1.5685819015900078, "learning_rate": 6.944722494579527e-05, "loss": 0.9088, "step": 3452 }, { "epoch": 1.5652765185856754, "grad_norm": 0.40374814395006287, "learning_rate": 6.943865619766952e-05, "loss": 0.9255, "step": 3453 }, { "epoch": 1.5657298277425205, "grad_norm": 0.9966534537545266, "learning_rate": 6.943008450117582e-05, "loss": 0.9143, "step": 3454 }, { "epoch": 1.5661831368993653, "grad_norm": 1.6065238905285104, "learning_rate": 6.942150985717266e-05, "loss": 0.9359, "step": 3455 }, { "epoch": 1.5666364460562103, "grad_norm": 0.6146411423743895, "learning_rate": 6.941293226651883e-05, "loss": 0.9035, "step": 3456 }, { "epoch": 1.5670897552130554, "grad_norm": 1.5187342055275161, "learning_rate": 6.94043517300734e-05, "loss": 0.9167, "step": 3457 }, { "epoch": 1.5675430643699002, "grad_norm": 0.9029724386166295, "learning_rate": 6.939576824869571e-05, "loss": 0.9214, "step": 3458 }, { "epoch": 1.5679963735267453, "grad_norm": 1.0426373544466878, "learning_rate": 6.938718182324546e-05, "loss": 0.9077, "step": 3459 }, { "epoch": 1.5684496826835903, "grad_norm": 1.0283102807760638, "learning_rate": 6.937859245458254e-05, "loss": 0.9221, "step": 3460 }, { "epoch": 1.5689029918404351, "grad_norm": 1.0007145305639593, "learning_rate": 6.937000014356728e-05, "loss": 0.9215, "step": 3461 }, { "epoch": 1.5693563009972802, "grad_norm": 1.095064127871526, "learning_rate": 6.936140489106019e-05, "loss": 0.9089, "step": 3462 }, { "epoch": 1.5698096101541252, "grad_norm": 1.0872916499437553, "learning_rate": 6.935280669792208e-05, "loss": 0.899, "step": 3463 }, { "epoch": 1.57026291931097, "grad_norm": 0.7978618995313694, "learning_rate": 6.934420556501414e-05, "loss": 0.9037, "step": 3464 }, { "epoch": 1.570716228467815, "grad_norm": 0.6509039044126267, "learning_rate": 6.933560149319776e-05, "loss": 0.9139, "step": 3465 }, { "epoch": 1.5711695376246602, "grad_norm": 0.6767452105865385, "learning_rate": 6.932699448333468e-05, "loss": 0.8923, "step": 3466 }, { "epoch": 1.571622846781505, "grad_norm": 0.607939486034077, "learning_rate": 6.93183845362869e-05, "loss": 0.9109, "step": 3467 }, { "epoch": 1.5720761559383498, "grad_norm": 0.7156869200550134, "learning_rate": 6.930977165291676e-05, "loss": 0.9057, "step": 3468 }, { "epoch": 1.572529465095195, "grad_norm": 0.7057062992843104, "learning_rate": 6.930115583408684e-05, "loss": 0.893, "step": 3469 }, { "epoch": 1.57298277425204, "grad_norm": 0.7127999203650709, "learning_rate": 6.929253708066004e-05, "loss": 0.8912, "step": 3470 }, { "epoch": 1.5734360834088847, "grad_norm": 0.8789778040263142, "learning_rate": 6.928391539349959e-05, "loss": 0.9152, "step": 3471 }, { "epoch": 1.5738893925657298, "grad_norm": 0.9973433637040727, "learning_rate": 6.927529077346892e-05, "loss": 0.9199, "step": 3472 }, { "epoch": 1.5743427017225748, "grad_norm": 1.069991067544346, "learning_rate": 6.926666322143186e-05, "loss": 0.8899, "step": 3473 }, { "epoch": 1.5747960108794197, "grad_norm": 0.8931699689212121, "learning_rate": 6.925803273825246e-05, "loss": 0.8987, "step": 3474 }, { "epoch": 1.5752493200362647, "grad_norm": 0.7495576945484714, "learning_rate": 6.924939932479509e-05, "loss": 0.8895, "step": 3475 }, { "epoch": 1.5757026291931098, "grad_norm": 0.627885771388301, "learning_rate": 6.924076298192442e-05, "loss": 0.9083, "step": 3476 }, { "epoch": 1.5761559383499546, "grad_norm": 0.5414245600153471, "learning_rate": 6.923212371050539e-05, "loss": 0.9007, "step": 3477 }, { "epoch": 1.5766092475067996, "grad_norm": 0.42650316011778633, "learning_rate": 6.922348151140327e-05, "loss": 0.8728, "step": 3478 }, { "epoch": 1.5770625566636447, "grad_norm": 0.48922025603627517, "learning_rate": 6.921483638548358e-05, "loss": 0.8788, "step": 3479 }, { "epoch": 1.5775158658204895, "grad_norm": 0.6618573444939764, "learning_rate": 6.920618833361218e-05, "loss": 0.8982, "step": 3480 }, { "epoch": 1.5779691749773346, "grad_norm": 0.7672499395943403, "learning_rate": 6.919753735665517e-05, "loss": 0.8826, "step": 3481 }, { "epoch": 1.5784224841341796, "grad_norm": 0.8680653944125539, "learning_rate": 6.918888345547898e-05, "loss": 0.9068, "step": 3482 }, { "epoch": 1.5788757932910245, "grad_norm": 0.9483423307200312, "learning_rate": 6.918022663095035e-05, "loss": 0.9154, "step": 3483 }, { "epoch": 1.5793291024478693, "grad_norm": 1.156731256192679, "learning_rate": 6.917156688393624e-05, "loss": 0.8998, "step": 3484 }, { "epoch": 1.5797824116047146, "grad_norm": 0.7952260685534042, "learning_rate": 6.916290421530398e-05, "loss": 0.8843, "step": 3485 }, { "epoch": 1.5802357207615594, "grad_norm": 0.6311029567435766, "learning_rate": 6.915423862592116e-05, "loss": 0.8964, "step": 3486 }, { "epoch": 1.5806890299184042, "grad_norm": 0.7196815630305526, "learning_rate": 6.914557011665566e-05, "loss": 0.8888, "step": 3487 }, { "epoch": 1.5811423390752495, "grad_norm": 0.7653997489374949, "learning_rate": 6.913689868837564e-05, "loss": 0.9083, "step": 3488 }, { "epoch": 1.5815956482320943, "grad_norm": 0.7094430965687532, "learning_rate": 6.91282243419496e-05, "loss": 0.9025, "step": 3489 }, { "epoch": 1.5820489573889391, "grad_norm": 0.5746846798808669, "learning_rate": 6.911954707824627e-05, "loss": 0.8915, "step": 3490 }, { "epoch": 1.5825022665457842, "grad_norm": 0.48119994967181157, "learning_rate": 6.911086689813473e-05, "loss": 0.8794, "step": 3491 }, { "epoch": 1.5829555757026292, "grad_norm": 0.5783284728165339, "learning_rate": 6.910218380248433e-05, "loss": 0.9377, "step": 3492 }, { "epoch": 1.583408884859474, "grad_norm": 0.7731426429567084, "learning_rate": 6.909349779216466e-05, "loss": 0.8959, "step": 3493 }, { "epoch": 1.5838621940163191, "grad_norm": 1.0121495279820456, "learning_rate": 6.908480886804572e-05, "loss": 0.892, "step": 3494 }, { "epoch": 1.5843155031731642, "grad_norm": 1.1830689501194072, "learning_rate": 6.907611703099767e-05, "loss": 0.9122, "step": 3495 }, { "epoch": 1.584768812330009, "grad_norm": 0.4864961538207769, "learning_rate": 6.906742228189105e-05, "loss": 0.8983, "step": 3496 }, { "epoch": 1.585222121486854, "grad_norm": 0.3634061184925307, "learning_rate": 6.905872462159667e-05, "loss": 0.9106, "step": 3497 }, { "epoch": 1.585675430643699, "grad_norm": 0.6408574796435869, "learning_rate": 6.90500240509856e-05, "loss": 0.9033, "step": 3498 }, { "epoch": 1.586128739800544, "grad_norm": 1.0534346334817264, "learning_rate": 6.904132057092925e-05, "loss": 0.8979, "step": 3499 }, { "epoch": 1.586582048957389, "grad_norm": 1.2632171406502637, "learning_rate": 6.90326141822993e-05, "loss": 0.8998, "step": 3500 }, { "epoch": 1.587035358114234, "grad_norm": 0.5941946024455969, "learning_rate": 6.902390488596772e-05, "loss": 0.9142, "step": 3501 }, { "epoch": 1.5874886672710788, "grad_norm": 0.5120139785163965, "learning_rate": 6.901519268280674e-05, "loss": 0.9169, "step": 3502 }, { "epoch": 1.587941976427924, "grad_norm": 0.9547296666082348, "learning_rate": 6.900647757368897e-05, "loss": 0.8875, "step": 3503 }, { "epoch": 1.588395285584769, "grad_norm": 1.4399489599626973, "learning_rate": 6.89977595594872e-05, "loss": 0.9045, "step": 3504 }, { "epoch": 1.5888485947416138, "grad_norm": 0.4418612797402434, "learning_rate": 6.898903864107459e-05, "loss": 0.91, "step": 3505 }, { "epoch": 1.5893019038984586, "grad_norm": 1.1302565903994612, "learning_rate": 6.898031481932457e-05, "loss": 0.8933, "step": 3506 }, { "epoch": 1.5897552130553039, "grad_norm": 1.314445550346696, "learning_rate": 6.897158809511085e-05, "loss": 0.9006, "step": 3507 }, { "epoch": 1.5902085222121487, "grad_norm": 0.6528430974958581, "learning_rate": 6.896285846930744e-05, "loss": 0.904, "step": 3508 }, { "epoch": 1.5906618313689935, "grad_norm": 1.0282358447529585, "learning_rate": 6.895412594278862e-05, "loss": 0.913, "step": 3509 }, { "epoch": 1.5911151405258386, "grad_norm": 1.3116471961910516, "learning_rate": 6.8945390516429e-05, "loss": 0.9021, "step": 3510 }, { "epoch": 1.5915684496826836, "grad_norm": 0.6634565174511762, "learning_rate": 6.893665219110346e-05, "loss": 0.8976, "step": 3511 }, { "epoch": 1.5920217588395285, "grad_norm": 1.1665099730274597, "learning_rate": 6.892791096768713e-05, "loss": 0.9071, "step": 3512 }, { "epoch": 1.5924750679963735, "grad_norm": 0.955441087396011, "learning_rate": 6.891916684705554e-05, "loss": 0.8959, "step": 3513 }, { "epoch": 1.5929283771532186, "grad_norm": 0.8699826377961576, "learning_rate": 6.891041983008437e-05, "loss": 0.9118, "step": 3514 }, { "epoch": 1.5933816863100634, "grad_norm": 1.0023910274672492, "learning_rate": 6.89016699176497e-05, "loss": 0.9154, "step": 3515 }, { "epoch": 1.5938349954669084, "grad_norm": 0.8842493081990018, "learning_rate": 6.889291711062784e-05, "loss": 0.8958, "step": 3516 }, { "epoch": 1.5942883046237535, "grad_norm": 0.9121814086242158, "learning_rate": 6.888416140989542e-05, "loss": 0.89, "step": 3517 }, { "epoch": 1.5947416137805983, "grad_norm": 1.0061300605239711, "learning_rate": 6.887540281632934e-05, "loss": 0.8772, "step": 3518 }, { "epoch": 1.5951949229374434, "grad_norm": 0.8829363064192381, "learning_rate": 6.886664133080681e-05, "loss": 0.8934, "step": 3519 }, { "epoch": 1.5956482320942884, "grad_norm": 0.7740001831175977, "learning_rate": 6.88578769542053e-05, "loss": 0.9136, "step": 3520 }, { "epoch": 1.5961015412511332, "grad_norm": 0.6317155146785054, "learning_rate": 6.884910968740264e-05, "loss": 0.8864, "step": 3521 }, { "epoch": 1.5965548504079783, "grad_norm": 0.5532794390073704, "learning_rate": 6.884033953127683e-05, "loss": 0.9064, "step": 3522 }, { "epoch": 1.5970081595648233, "grad_norm": 0.5372686427221548, "learning_rate": 6.883156648670626e-05, "loss": 0.8926, "step": 3523 }, { "epoch": 1.5974614687216682, "grad_norm": 0.5870798032007819, "learning_rate": 6.882279055456956e-05, "loss": 0.8883, "step": 3524 }, { "epoch": 1.597914777878513, "grad_norm": 0.5981439537058033, "learning_rate": 6.88140117357457e-05, "loss": 0.897, "step": 3525 }, { "epoch": 1.5983680870353583, "grad_norm": 0.6892704528813236, "learning_rate": 6.880523003111387e-05, "loss": 0.9017, "step": 3526 }, { "epoch": 1.598821396192203, "grad_norm": 0.7381602467190929, "learning_rate": 6.87964454415536e-05, "loss": 0.9088, "step": 3527 }, { "epoch": 1.599274705349048, "grad_norm": 0.7306021159213048, "learning_rate": 6.878765796794467e-05, "loss": 0.8925, "step": 3528 }, { "epoch": 1.599728014505893, "grad_norm": 0.7751719087247492, "learning_rate": 6.877886761116721e-05, "loss": 0.8836, "step": 3529 }, { "epoch": 1.600181323662738, "grad_norm": 0.8028622201532575, "learning_rate": 6.877007437210157e-05, "loss": 0.9233, "step": 3530 }, { "epoch": 1.6006346328195828, "grad_norm": 0.8287567926352678, "learning_rate": 6.876127825162843e-05, "loss": 0.8952, "step": 3531 }, { "epoch": 1.601087941976428, "grad_norm": 0.8324450514479146, "learning_rate": 6.875247925062875e-05, "loss": 0.9065, "step": 3532 }, { "epoch": 1.601541251133273, "grad_norm": 0.8212973184583815, "learning_rate": 6.874367736998377e-05, "loss": 0.9022, "step": 3533 }, { "epoch": 1.6019945602901178, "grad_norm": 0.9071916685634169, "learning_rate": 6.873487261057501e-05, "loss": 0.9018, "step": 3534 }, { "epoch": 1.6024478694469628, "grad_norm": 1.023372185902829, "learning_rate": 6.872606497328433e-05, "loss": 0.8669, "step": 3535 }, { "epoch": 1.6029011786038079, "grad_norm": 0.8402389101488351, "learning_rate": 6.87172544589938e-05, "loss": 0.8916, "step": 3536 }, { "epoch": 1.6033544877606527, "grad_norm": 0.6694585152906725, "learning_rate": 6.870844106858585e-05, "loss": 0.8977, "step": 3537 }, { "epoch": 1.6038077969174978, "grad_norm": 0.47013145062162326, "learning_rate": 6.869962480294315e-05, "loss": 0.8916, "step": 3538 }, { "epoch": 1.6042611060743428, "grad_norm": 0.4905640745963116, "learning_rate": 6.869080566294868e-05, "loss": 0.8982, "step": 3539 }, { "epoch": 1.6047144152311876, "grad_norm": 0.6950153420816058, "learning_rate": 6.86819836494857e-05, "loss": 0.9318, "step": 3540 }, { "epoch": 1.6051677243880327, "grad_norm": 0.38223484725197376, "learning_rate": 6.867315876343776e-05, "loss": 0.8975, "step": 3541 }, { "epoch": 1.6056210335448777, "grad_norm": 0.46016199562336324, "learning_rate": 6.866433100568871e-05, "loss": 0.8929, "step": 3542 }, { "epoch": 1.6060743427017226, "grad_norm": 0.48079690486640153, "learning_rate": 6.865550037712267e-05, "loss": 0.9183, "step": 3543 }, { "epoch": 1.6065276518585674, "grad_norm": 0.4671410304068627, "learning_rate": 6.864666687862404e-05, "loss": 0.911, "step": 3544 }, { "epoch": 1.6069809610154127, "grad_norm": 0.5243218314357839, "learning_rate": 6.863783051107753e-05, "loss": 0.896, "step": 3545 }, { "epoch": 1.6074342701722575, "grad_norm": 0.6184754206189658, "learning_rate": 6.862899127536814e-05, "loss": 0.91, "step": 3546 }, { "epoch": 1.6078875793291023, "grad_norm": 0.7839382450978375, "learning_rate": 6.862014917238112e-05, "loss": 0.9126, "step": 3547 }, { "epoch": 1.6083408884859474, "grad_norm": 0.7956009851337998, "learning_rate": 6.861130420300205e-05, "loss": 0.8831, "step": 3548 }, { "epoch": 1.6087941976427924, "grad_norm": 0.8970589514474093, "learning_rate": 6.860245636811679e-05, "loss": 0.9037, "step": 3549 }, { "epoch": 1.6092475067996372, "grad_norm": 1.0722938229124273, "learning_rate": 6.859360566861145e-05, "loss": 0.9074, "step": 3550 }, { "epoch": 1.6097008159564823, "grad_norm": 0.9411551064417908, "learning_rate": 6.858475210537248e-05, "loss": 0.8965, "step": 3551 }, { "epoch": 1.6101541251133273, "grad_norm": 0.786067315598286, "learning_rate": 6.857589567928657e-05, "loss": 0.8784, "step": 3552 }, { "epoch": 1.6106074342701722, "grad_norm": 0.6288063798432751, "learning_rate": 6.856703639124072e-05, "loss": 0.8685, "step": 3553 }, { "epoch": 1.6110607434270172, "grad_norm": 0.4789087145681329, "learning_rate": 6.855817424212224e-05, "loss": 0.8954, "step": 3554 }, { "epoch": 1.6115140525838623, "grad_norm": 0.5064985476311177, "learning_rate": 6.854930923281866e-05, "loss": 0.9092, "step": 3555 }, { "epoch": 1.611967361740707, "grad_norm": 0.5195751664602414, "learning_rate": 6.854044136421784e-05, "loss": 0.9087, "step": 3556 }, { "epoch": 1.6124206708975521, "grad_norm": 0.571452101387126, "learning_rate": 6.853157063720796e-05, "loss": 0.8919, "step": 3557 }, { "epoch": 1.6128739800543972, "grad_norm": 0.5507847608018447, "learning_rate": 6.852269705267743e-05, "loss": 0.9199, "step": 3558 }, { "epoch": 1.613327289211242, "grad_norm": 0.6065794885807064, "learning_rate": 6.851382061151496e-05, "loss": 0.91, "step": 3559 }, { "epoch": 1.613780598368087, "grad_norm": 0.7928312160475531, "learning_rate": 6.850494131460955e-05, "loss": 0.8954, "step": 3560 }, { "epoch": 1.6142339075249321, "grad_norm": 0.9674501781219166, "learning_rate": 6.84960591628505e-05, "loss": 0.9014, "step": 3561 }, { "epoch": 1.614687216681777, "grad_norm": 1.1199436164614807, "learning_rate": 6.848717415712737e-05, "loss": 0.8925, "step": 3562 }, { "epoch": 1.6151405258386218, "grad_norm": 0.8156634694063205, "learning_rate": 6.847828629833002e-05, "loss": 0.8845, "step": 3563 }, { "epoch": 1.615593834995467, "grad_norm": 0.6375156251789444, "learning_rate": 6.846939558734862e-05, "loss": 0.9097, "step": 3564 }, { "epoch": 1.6160471441523119, "grad_norm": 0.5887163158184114, "learning_rate": 6.846050202507358e-05, "loss": 0.9087, "step": 3565 }, { "epoch": 1.6165004533091567, "grad_norm": 0.6279702972559602, "learning_rate": 6.84516056123956e-05, "loss": 0.9022, "step": 3566 }, { "epoch": 1.616953762466002, "grad_norm": 0.6816655770533142, "learning_rate": 6.844270635020571e-05, "loss": 0.8851, "step": 3567 }, { "epoch": 1.6174070716228468, "grad_norm": 0.6703459016882779, "learning_rate": 6.843380423939519e-05, "loss": 0.9001, "step": 3568 }, { "epoch": 1.6178603807796916, "grad_norm": 0.7050315358211701, "learning_rate": 6.84248992808556e-05, "loss": 0.8983, "step": 3569 }, { "epoch": 1.6183136899365367, "grad_norm": 0.7206324005292364, "learning_rate": 6.841599147547881e-05, "loss": 0.9067, "step": 3570 }, { "epoch": 1.6187669990933817, "grad_norm": 0.7907283833518338, "learning_rate": 6.840708082415698e-05, "loss": 0.8964, "step": 3571 }, { "epoch": 1.6192203082502266, "grad_norm": 0.8946711602852425, "learning_rate": 6.839816732778251e-05, "loss": 0.9122, "step": 3572 }, { "epoch": 1.6196736174070716, "grad_norm": 1.026824527683341, "learning_rate": 6.83892509872481e-05, "loss": 0.9053, "step": 3573 }, { "epoch": 1.6201269265639167, "grad_norm": 1.087309805929588, "learning_rate": 6.83803318034468e-05, "loss": 0.8969, "step": 3574 }, { "epoch": 1.6205802357207615, "grad_norm": 0.8134494673736399, "learning_rate": 6.837140977727183e-05, "loss": 0.8907, "step": 3575 }, { "epoch": 1.6210335448776065, "grad_norm": 0.6260176329622036, "learning_rate": 6.836248490961681e-05, "loss": 0.89, "step": 3576 }, { "epoch": 1.6214868540344516, "grad_norm": 0.5996344648119839, "learning_rate": 6.835355720137556e-05, "loss": 0.9074, "step": 3577 }, { "epoch": 1.6219401631912964, "grad_norm": 0.7918760041083078, "learning_rate": 6.834462665344224e-05, "loss": 0.9141, "step": 3578 }, { "epoch": 1.6223934723481415, "grad_norm": 0.8645609126420297, "learning_rate": 6.833569326671125e-05, "loss": 0.9037, "step": 3579 }, { "epoch": 1.6228467815049865, "grad_norm": 0.772092561099653, "learning_rate": 6.83267570420773e-05, "loss": 0.9234, "step": 3580 }, { "epoch": 1.6233000906618313, "grad_norm": 0.7439394560451666, "learning_rate": 6.831781798043538e-05, "loss": 0.8805, "step": 3581 }, { "epoch": 1.6237533998186764, "grad_norm": 0.872882747883814, "learning_rate": 6.830887608268078e-05, "loss": 0.9161, "step": 3582 }, { "epoch": 1.6242067089755214, "grad_norm": 1.103219780112794, "learning_rate": 6.829993134970903e-05, "loss": 0.901, "step": 3583 }, { "epoch": 1.6246600181323663, "grad_norm": 0.9519555964713374, "learning_rate": 6.829098378241598e-05, "loss": 0.902, "step": 3584 }, { "epoch": 1.625113327289211, "grad_norm": 0.8057963417710626, "learning_rate": 6.828203338169775e-05, "loss": 0.8987, "step": 3585 }, { "epoch": 1.6255666364460564, "grad_norm": 0.6976793922840698, "learning_rate": 6.827308014845078e-05, "loss": 0.9068, "step": 3586 }, { "epoch": 1.6260199456029012, "grad_norm": 0.6189335870160746, "learning_rate": 6.826412408357174e-05, "loss": 0.8892, "step": 3587 }, { "epoch": 1.626473254759746, "grad_norm": 0.57753747397956, "learning_rate": 6.82551651879576e-05, "loss": 0.8804, "step": 3588 }, { "epoch": 1.626926563916591, "grad_norm": 0.4977009010773931, "learning_rate": 6.82462034625056e-05, "loss": 0.8915, "step": 3589 }, { "epoch": 1.6273798730734361, "grad_norm": 0.5367653360098366, "learning_rate": 6.823723890811334e-05, "loss": 0.9062, "step": 3590 }, { "epoch": 1.627833182230281, "grad_norm": 0.6208195288993608, "learning_rate": 6.822827152567861e-05, "loss": 0.9077, "step": 3591 }, { "epoch": 1.628286491387126, "grad_norm": 0.637917027011857, "learning_rate": 6.821930131609952e-05, "loss": 0.8942, "step": 3592 }, { "epoch": 1.628739800543971, "grad_norm": 0.6835031078574401, "learning_rate": 6.821032828027446e-05, "loss": 0.9054, "step": 3593 }, { "epoch": 1.6291931097008159, "grad_norm": 0.8221001229037813, "learning_rate": 6.820135241910214e-05, "loss": 0.8876, "step": 3594 }, { "epoch": 1.629646418857661, "grad_norm": 1.0058274100037405, "learning_rate": 6.819237373348147e-05, "loss": 0.9005, "step": 3595 }, { "epoch": 1.630099728014506, "grad_norm": 1.1072104219587726, "learning_rate": 6.818339222431173e-05, "loss": 0.922, "step": 3596 }, { "epoch": 1.6305530371713508, "grad_norm": 0.8095851153057454, "learning_rate": 6.817440789249242e-05, "loss": 0.8844, "step": 3597 }, { "epoch": 1.6310063463281959, "grad_norm": 0.5972652184055249, "learning_rate": 6.816542073892335e-05, "loss": 0.9032, "step": 3598 }, { "epoch": 1.631459655485041, "grad_norm": 0.4597924359755618, "learning_rate": 6.815643076450464e-05, "loss": 0.9068, "step": 3599 }, { "epoch": 1.6319129646418857, "grad_norm": 0.354782797471971, "learning_rate": 6.814743797013661e-05, "loss": 0.8955, "step": 3600 }, { "epoch": 1.6323662737987308, "grad_norm": 0.4419524471799645, "learning_rate": 6.813844235671996e-05, "loss": 0.8958, "step": 3601 }, { "epoch": 1.6328195829555758, "grad_norm": 0.6275067149799717, "learning_rate": 6.81294439251556e-05, "loss": 0.8846, "step": 3602 }, { "epoch": 1.6332728921124207, "grad_norm": 0.7460816002068694, "learning_rate": 6.812044267634478e-05, "loss": 0.8925, "step": 3603 }, { "epoch": 1.6337262012692655, "grad_norm": 0.8585609070663845, "learning_rate": 6.811143861118897e-05, "loss": 0.911, "step": 3604 }, { "epoch": 1.6341795104261108, "grad_norm": 0.968042733196462, "learning_rate": 6.810243173058996e-05, "loss": 0.9064, "step": 3605 }, { "epoch": 1.6346328195829556, "grad_norm": 1.1540774982159099, "learning_rate": 6.809342203544983e-05, "loss": 0.9083, "step": 3606 }, { "epoch": 1.6350861287398004, "grad_norm": 0.7536147595911316, "learning_rate": 6.808440952667091e-05, "loss": 0.8901, "step": 3607 }, { "epoch": 1.6355394378966455, "grad_norm": 0.5006995481147188, "learning_rate": 6.807539420515584e-05, "loss": 0.9093, "step": 3608 }, { "epoch": 1.6359927470534905, "grad_norm": 0.3862990054187506, "learning_rate": 6.806637607180753e-05, "loss": 0.8893, "step": 3609 }, { "epoch": 1.6364460562103353, "grad_norm": 0.4610610609597157, "learning_rate": 6.805735512752917e-05, "loss": 0.896, "step": 3610 }, { "epoch": 1.6368993653671804, "grad_norm": 0.5494738055477415, "learning_rate": 6.804833137322423e-05, "loss": 0.9134, "step": 3611 }, { "epoch": 1.6373526745240254, "grad_norm": 0.6603965489130472, "learning_rate": 6.803930480979647e-05, "loss": 0.8774, "step": 3612 }, { "epoch": 1.6378059836808703, "grad_norm": 0.8174145941668239, "learning_rate": 6.803027543814993e-05, "loss": 0.9178, "step": 3613 }, { "epoch": 1.6382592928377153, "grad_norm": 0.9111377701935647, "learning_rate": 6.802124325918893e-05, "loss": 0.8862, "step": 3614 }, { "epoch": 1.6387126019945604, "grad_norm": 1.0307734903419772, "learning_rate": 6.801220827381807e-05, "loss": 0.9096, "step": 3615 }, { "epoch": 1.6391659111514052, "grad_norm": 1.0152000356689281, "learning_rate": 6.800317048294221e-05, "loss": 0.9017, "step": 3616 }, { "epoch": 1.6396192203082502, "grad_norm": 0.9345178904602077, "learning_rate": 6.799412988746653e-05, "loss": 0.892, "step": 3617 }, { "epoch": 1.6400725294650953, "grad_norm": 0.8503740206474986, "learning_rate": 6.798508648829649e-05, "loss": 0.9054, "step": 3618 }, { "epoch": 1.6405258386219401, "grad_norm": 0.8173227326952438, "learning_rate": 6.797604028633777e-05, "loss": 0.8846, "step": 3619 }, { "epoch": 1.6409791477787852, "grad_norm": 0.78506646673894, "learning_rate": 6.796699128249639e-05, "loss": 0.8843, "step": 3620 }, { "epoch": 1.6414324569356302, "grad_norm": 0.7206491050075412, "learning_rate": 6.795793947767865e-05, "loss": 0.8951, "step": 3621 }, { "epoch": 1.641885766092475, "grad_norm": 0.6466286842039385, "learning_rate": 6.794888487279111e-05, "loss": 0.8966, "step": 3622 }, { "epoch": 1.6423390752493199, "grad_norm": 0.5722235062625487, "learning_rate": 6.79398274687406e-05, "loss": 0.894, "step": 3623 }, { "epoch": 1.6427923844061652, "grad_norm": 0.48202875307759907, "learning_rate": 6.793076726643426e-05, "loss": 0.8736, "step": 3624 }, { "epoch": 1.64324569356301, "grad_norm": 0.45770681124832197, "learning_rate": 6.792170426677949e-05, "loss": 0.8841, "step": 3625 }, { "epoch": 1.6436990027198548, "grad_norm": 0.45061221530577905, "learning_rate": 6.791263847068397e-05, "loss": 0.8914, "step": 3626 }, { "epoch": 1.6441523118766999, "grad_norm": 0.40975052532294676, "learning_rate": 6.790356987905568e-05, "loss": 0.8951, "step": 3627 }, { "epoch": 1.644605621033545, "grad_norm": 0.40729797859661043, "learning_rate": 6.789449849280286e-05, "loss": 0.8854, "step": 3628 }, { "epoch": 1.6450589301903897, "grad_norm": 0.4760270211102829, "learning_rate": 6.788542431283403e-05, "loss": 0.9038, "step": 3629 }, { "epoch": 1.6455122393472348, "grad_norm": 0.5525679856005661, "learning_rate": 6.787634734005801e-05, "loss": 0.9095, "step": 3630 }, { "epoch": 1.6459655485040798, "grad_norm": 0.6458016338095786, "learning_rate": 6.786726757538387e-05, "loss": 0.8895, "step": 3631 }, { "epoch": 1.6464188576609247, "grad_norm": 0.7300418732779198, "learning_rate": 6.785818501972099e-05, "loss": 0.9084, "step": 3632 }, { "epoch": 1.6468721668177697, "grad_norm": 0.7897662751923281, "learning_rate": 6.7849099673979e-05, "loss": 0.9093, "step": 3633 }, { "epoch": 1.6473254759746148, "grad_norm": 0.9235213687287538, "learning_rate": 6.784001153906783e-05, "loss": 0.8728, "step": 3634 }, { "epoch": 1.6477787851314596, "grad_norm": 1.040689805636278, "learning_rate": 6.783092061589769e-05, "loss": 0.9007, "step": 3635 }, { "epoch": 1.6482320942883046, "grad_norm": 1.0169434785416702, "learning_rate": 6.782182690537905e-05, "loss": 0.8947, "step": 3636 }, { "epoch": 1.6486854034451497, "grad_norm": 0.8831521880702005, "learning_rate": 6.781273040842269e-05, "loss": 0.8976, "step": 3637 }, { "epoch": 1.6491387126019945, "grad_norm": 0.7643503472295496, "learning_rate": 6.780363112593962e-05, "loss": 0.9015, "step": 3638 }, { "epoch": 1.6495920217588396, "grad_norm": 0.6081668797309726, "learning_rate": 6.779452905884119e-05, "loss": 0.8964, "step": 3639 }, { "epoch": 1.6500453309156846, "grad_norm": 0.4635981485410173, "learning_rate": 6.778542420803899e-05, "loss": 0.8932, "step": 3640 }, { "epoch": 1.6504986400725294, "grad_norm": 0.43823324239202754, "learning_rate": 6.77763165744449e-05, "loss": 0.9052, "step": 3641 }, { "epoch": 1.6509519492293743, "grad_norm": 0.5583505297955579, "learning_rate": 6.776720615897107e-05, "loss": 0.8981, "step": 3642 }, { "epoch": 1.6514052583862195, "grad_norm": 0.7704029272789742, "learning_rate": 6.775809296252994e-05, "loss": 0.8918, "step": 3643 }, { "epoch": 1.6518585675430644, "grad_norm": 0.996572135511601, "learning_rate": 6.77489769860342e-05, "loss": 0.9142, "step": 3644 }, { "epoch": 1.6523118766999092, "grad_norm": 1.1759882977162128, "learning_rate": 6.773985823039689e-05, "loss": 0.8821, "step": 3645 }, { "epoch": 1.6527651858567545, "grad_norm": 0.7111418591841604, "learning_rate": 6.773073669653123e-05, "loss": 0.8918, "step": 3646 }, { "epoch": 1.6532184950135993, "grad_norm": 0.5069146978260031, "learning_rate": 6.772161238535079e-05, "loss": 0.9174, "step": 3647 }, { "epoch": 1.6536718041704441, "grad_norm": 0.5277908597859619, "learning_rate": 6.771248529776941e-05, "loss": 0.897, "step": 3648 }, { "epoch": 1.6541251133272892, "grad_norm": 0.5348059146603726, "learning_rate": 6.770335543470119e-05, "loss": 0.8985, "step": 3649 }, { "epoch": 1.6545784224841342, "grad_norm": 0.5616280633571261, "learning_rate": 6.769422279706048e-05, "loss": 0.9051, "step": 3650 }, { "epoch": 1.655031731640979, "grad_norm": 0.639606168592396, "learning_rate": 6.768508738576198e-05, "loss": 0.8811, "step": 3651 }, { "epoch": 1.655485040797824, "grad_norm": 0.8508344692642427, "learning_rate": 6.76759492017206e-05, "loss": 0.8886, "step": 3652 }, { "epoch": 1.6559383499546692, "grad_norm": 1.109872429133024, "learning_rate": 6.76668082458516e-05, "loss": 0.9105, "step": 3653 }, { "epoch": 1.656391659111514, "grad_norm": 0.9279523516546986, "learning_rate": 6.765766451907042e-05, "loss": 0.8766, "step": 3654 }, { "epoch": 1.656844968268359, "grad_norm": 0.716245588675174, "learning_rate": 6.764851802229284e-05, "loss": 0.8944, "step": 3655 }, { "epoch": 1.657298277425204, "grad_norm": 0.5979038407975756, "learning_rate": 6.763936875643495e-05, "loss": 0.9021, "step": 3656 }, { "epoch": 1.657751586582049, "grad_norm": 0.5126493550522623, "learning_rate": 6.763021672241305e-05, "loss": 0.9124, "step": 3657 }, { "epoch": 1.658204895738894, "grad_norm": 0.4817223539229461, "learning_rate": 6.762106192114372e-05, "loss": 0.8955, "step": 3658 }, { "epoch": 1.658658204895739, "grad_norm": 0.553614947200673, "learning_rate": 6.761190435354387e-05, "loss": 0.8946, "step": 3659 }, { "epoch": 1.6591115140525838, "grad_norm": 0.6142691680743441, "learning_rate": 6.760274402053064e-05, "loss": 0.9153, "step": 3660 }, { "epoch": 1.6595648232094289, "grad_norm": 0.7417867760255848, "learning_rate": 6.75935809230215e-05, "loss": 0.9012, "step": 3661 }, { "epoch": 1.660018132366274, "grad_norm": 0.9284362010428335, "learning_rate": 6.758441506193412e-05, "loss": 0.8652, "step": 3662 }, { "epoch": 1.6604714415231188, "grad_norm": 1.1877643344920266, "learning_rate": 6.75752464381865e-05, "loss": 0.9111, "step": 3663 }, { "epoch": 1.6609247506799636, "grad_norm": 0.7336975485877402, "learning_rate": 6.756607505269691e-05, "loss": 0.89, "step": 3664 }, { "epoch": 1.6613780598368089, "grad_norm": 0.49857394156775864, "learning_rate": 6.755690090638388e-05, "loss": 0.9013, "step": 3665 }, { "epoch": 1.6618313689936537, "grad_norm": 0.6023559825660622, "learning_rate": 6.754772400016624e-05, "loss": 0.8933, "step": 3666 }, { "epoch": 1.6622846781504985, "grad_norm": 0.8634041229904621, "learning_rate": 6.753854433496308e-05, "loss": 0.8761, "step": 3667 }, { "epoch": 1.6627379873073436, "grad_norm": 1.0162664068129867, "learning_rate": 6.752936191169378e-05, "loss": 0.8923, "step": 3668 }, { "epoch": 1.6631912964641886, "grad_norm": 0.9127584389342842, "learning_rate": 6.752017673127797e-05, "loss": 0.9115, "step": 3669 }, { "epoch": 1.6636446056210334, "grad_norm": 0.7303160201974402, "learning_rate": 6.751098879463558e-05, "loss": 0.8909, "step": 3670 }, { "epoch": 1.6640979147778785, "grad_norm": 0.7249444205919668, "learning_rate": 6.75017981026868e-05, "loss": 0.9181, "step": 3671 }, { "epoch": 1.6645512239347235, "grad_norm": 0.8057239256199427, "learning_rate": 6.749260465635214e-05, "loss": 0.8982, "step": 3672 }, { "epoch": 1.6650045330915684, "grad_norm": 0.8449071467964603, "learning_rate": 6.74834084565523e-05, "loss": 0.8981, "step": 3673 }, { "epoch": 1.6654578422484134, "grad_norm": 0.9051618252765106, "learning_rate": 6.747420950420835e-05, "loss": 0.8975, "step": 3674 }, { "epoch": 1.6659111514052585, "grad_norm": 0.9487371173374164, "learning_rate": 6.746500780024155e-05, "loss": 0.8858, "step": 3675 }, { "epoch": 1.6663644605621033, "grad_norm": 0.96498300909604, "learning_rate": 6.745580334557352e-05, "loss": 0.8719, "step": 3676 }, { "epoch": 1.6668177697189483, "grad_norm": 0.8586068245256748, "learning_rate": 6.744659614112608e-05, "loss": 0.8952, "step": 3677 }, { "epoch": 1.6672710788757934, "grad_norm": 0.8288546713736509, "learning_rate": 6.743738618782137e-05, "loss": 0.8907, "step": 3678 }, { "epoch": 1.6677243880326382, "grad_norm": 0.7681528985546247, "learning_rate": 6.742817348658181e-05, "loss": 0.8754, "step": 3679 }, { "epoch": 1.6681776971894833, "grad_norm": 0.85301562971831, "learning_rate": 6.741895803833006e-05, "loss": 0.8929, "step": 3680 }, { "epoch": 1.6686310063463283, "grad_norm": 0.9428558596324974, "learning_rate": 6.740973984398908e-05, "loss": 0.8972, "step": 3681 }, { "epoch": 1.6690843155031732, "grad_norm": 0.8594843763822574, "learning_rate": 6.74005189044821e-05, "loss": 0.9071, "step": 3682 }, { "epoch": 1.669537624660018, "grad_norm": 0.6767798687296883, "learning_rate": 6.739129522073262e-05, "loss": 0.8949, "step": 3683 }, { "epoch": 1.6699909338168633, "grad_norm": 0.5425994445064896, "learning_rate": 6.738206879366442e-05, "loss": 0.889, "step": 3684 }, { "epoch": 1.670444242973708, "grad_norm": 0.5217898762513699, "learning_rate": 6.737283962420156e-05, "loss": 0.8958, "step": 3685 }, { "epoch": 1.670897552130553, "grad_norm": 0.5351402152482628, "learning_rate": 6.736360771326836e-05, "loss": 0.8767, "step": 3686 }, { "epoch": 1.671350861287398, "grad_norm": 0.4864511611165778, "learning_rate": 6.735437306178943e-05, "loss": 0.8989, "step": 3687 }, { "epoch": 1.671804170444243, "grad_norm": 0.49233165087484243, "learning_rate": 6.734513567068965e-05, "loss": 0.8995, "step": 3688 }, { "epoch": 1.6722574796010878, "grad_norm": 0.49099332595607836, "learning_rate": 6.733589554089416e-05, "loss": 0.9169, "step": 3689 }, { "epoch": 1.6727107887579329, "grad_norm": 0.4334157734250715, "learning_rate": 6.73266526733284e-05, "loss": 0.9024, "step": 3690 }, { "epoch": 1.673164097914778, "grad_norm": 0.5020563598375011, "learning_rate": 6.731740706891806e-05, "loss": 0.9058, "step": 3691 }, { "epoch": 1.6736174070716228, "grad_norm": 0.6099600697250663, "learning_rate": 6.730815872858912e-05, "loss": 0.9061, "step": 3692 }, { "epoch": 1.6740707162284678, "grad_norm": 0.7551759479782721, "learning_rate": 6.729890765326782e-05, "loss": 0.893, "step": 3693 }, { "epoch": 1.6745240253853129, "grad_norm": 0.9432921798131219, "learning_rate": 6.728965384388069e-05, "loss": 0.8982, "step": 3694 }, { "epoch": 1.6749773345421577, "grad_norm": 1.0980751787921423, "learning_rate": 6.728039730135451e-05, "loss": 0.8882, "step": 3695 }, { "epoch": 1.6754306436990027, "grad_norm": 0.657972417673878, "learning_rate": 6.72711380266164e-05, "loss": 0.9084, "step": 3696 }, { "epoch": 1.6758839528558478, "grad_norm": 0.4973651218380909, "learning_rate": 6.726187602059364e-05, "loss": 0.9147, "step": 3697 }, { "epoch": 1.6763372620126926, "grad_norm": 1.4996818121112656, "learning_rate": 6.725261128421389e-05, "loss": 0.9363, "step": 3698 }, { "epoch": 1.6767905711695377, "grad_norm": 0.4195629783117409, "learning_rate": 6.724334381840501e-05, "loss": 0.9018, "step": 3699 }, { "epoch": 1.6772438803263827, "grad_norm": 0.7428413330413355, "learning_rate": 6.72340736240952e-05, "loss": 0.9029, "step": 3700 }, { "epoch": 1.6776971894832275, "grad_norm": 1.0121418624966192, "learning_rate": 6.722480070221287e-05, "loss": 0.9312, "step": 3701 }, { "epoch": 1.6781504986400724, "grad_norm": 1.0436729931387536, "learning_rate": 6.721552505368673e-05, "loss": 0.9252, "step": 3702 }, { "epoch": 1.6786038077969176, "grad_norm": 1.0192355050113002, "learning_rate": 6.720624667944577e-05, "loss": 0.898, "step": 3703 }, { "epoch": 1.6790571169537625, "grad_norm": 0.8969341625452598, "learning_rate": 6.719696558041926e-05, "loss": 0.9009, "step": 3704 }, { "epoch": 1.6795104261106073, "grad_norm": 0.9116676032642704, "learning_rate": 6.71876817575367e-05, "loss": 0.9215, "step": 3705 }, { "epoch": 1.6799637352674524, "grad_norm": 0.9597856994463121, "learning_rate": 6.717839521172793e-05, "loss": 0.895, "step": 3706 }, { "epoch": 1.6804170444242974, "grad_norm": 1.0134529810888506, "learning_rate": 6.7169105943923e-05, "loss": 0.8933, "step": 3707 }, { "epoch": 1.6808703535811422, "grad_norm": 0.8643560408003673, "learning_rate": 6.715981395505224e-05, "loss": 0.912, "step": 3708 }, { "epoch": 1.6813236627379873, "grad_norm": 0.6983014904557351, "learning_rate": 6.715051924604631e-05, "loss": 0.9015, "step": 3709 }, { "epoch": 1.6817769718948323, "grad_norm": 0.5949719716661933, "learning_rate": 6.714122181783609e-05, "loss": 0.8969, "step": 3710 }, { "epoch": 1.6822302810516772, "grad_norm": 0.550660007948101, "learning_rate": 6.713192167135271e-05, "loss": 0.8784, "step": 3711 }, { "epoch": 1.6826835902085222, "grad_norm": 0.5705232666567623, "learning_rate": 6.712261880752765e-05, "loss": 0.885, "step": 3712 }, { "epoch": 1.6831368993653673, "grad_norm": 0.4739114508269938, "learning_rate": 6.71133132272926e-05, "loss": 0.8986, "step": 3713 }, { "epoch": 1.683590208522212, "grad_norm": 0.33738919297694825, "learning_rate": 6.710400493157956e-05, "loss": 0.9058, "step": 3714 }, { "epoch": 1.6840435176790571, "grad_norm": 0.37003132509339104, "learning_rate": 6.709469392132076e-05, "loss": 0.9218, "step": 3715 }, { "epoch": 1.6844968268359022, "grad_norm": 0.43639486197780836, "learning_rate": 6.708538019744873e-05, "loss": 0.8943, "step": 3716 }, { "epoch": 1.684950135992747, "grad_norm": 0.49065373955235875, "learning_rate": 6.707606376089628e-05, "loss": 0.8904, "step": 3717 }, { "epoch": 1.685403445149592, "grad_norm": 0.48639268746078657, "learning_rate": 6.706674461259647e-05, "loss": 0.8884, "step": 3718 }, { "epoch": 1.685856754306437, "grad_norm": 0.4487821005140561, "learning_rate": 6.705742275348263e-05, "loss": 0.8974, "step": 3719 }, { "epoch": 1.686310063463282, "grad_norm": 0.42988843077441463, "learning_rate": 6.704809818448838e-05, "loss": 0.917, "step": 3720 }, { "epoch": 1.6867633726201268, "grad_norm": 0.4445674090699786, "learning_rate": 6.703877090654761e-05, "loss": 0.9045, "step": 3721 }, { "epoch": 1.687216681776972, "grad_norm": 0.48005739279514725, "learning_rate": 6.702944092059447e-05, "loss": 0.9056, "step": 3722 }, { "epoch": 1.6876699909338169, "grad_norm": 0.5472104317864968, "learning_rate": 6.702010822756339e-05, "loss": 0.8906, "step": 3723 }, { "epoch": 1.6881233000906617, "grad_norm": 0.6867702321684315, "learning_rate": 6.701077282838905e-05, "loss": 0.9036, "step": 3724 }, { "epoch": 1.688576609247507, "grad_norm": 0.7597059566191885, "learning_rate": 6.700143472400643e-05, "loss": 0.9029, "step": 3725 }, { "epoch": 1.6890299184043518, "grad_norm": 0.8504040607145836, "learning_rate": 6.699209391535077e-05, "loss": 0.9032, "step": 3726 }, { "epoch": 1.6894832275611966, "grad_norm": 1.0485991739704106, "learning_rate": 6.698275040335757e-05, "loss": 0.8892, "step": 3727 }, { "epoch": 1.6899365367180417, "grad_norm": 1.0861026237837617, "learning_rate": 6.697340418896261e-05, "loss": 0.8833, "step": 3728 }, { "epoch": 1.6903898458748867, "grad_norm": 0.7860561153205056, "learning_rate": 6.696405527310196e-05, "loss": 0.9055, "step": 3729 }, { "epoch": 1.6908431550317315, "grad_norm": 0.6616057298828738, "learning_rate": 6.695470365671193e-05, "loss": 0.8915, "step": 3730 }, { "epoch": 1.6912964641885766, "grad_norm": 0.5803272888759361, "learning_rate": 6.69453493407291e-05, "loss": 0.9164, "step": 3731 }, { "epoch": 1.6917497733454216, "grad_norm": 0.6309582024625459, "learning_rate": 6.693599232609035e-05, "loss": 0.9117, "step": 3732 }, { "epoch": 1.6922030825022665, "grad_norm": 0.7504843073826812, "learning_rate": 6.69266326137328e-05, "loss": 0.8957, "step": 3733 }, { "epoch": 1.6926563916591115, "grad_norm": 0.9046869842622001, "learning_rate": 6.691727020459386e-05, "loss": 0.879, "step": 3734 }, { "epoch": 1.6931097008159566, "grad_norm": 1.0636558867607164, "learning_rate": 6.69079050996112e-05, "loss": 0.8748, "step": 3735 }, { "epoch": 1.6935630099728014, "grad_norm": 0.8830307993916582, "learning_rate": 6.689853729972276e-05, "loss": 0.8873, "step": 3736 }, { "epoch": 1.6940163191296465, "grad_norm": 0.6500230431042283, "learning_rate": 6.688916680586676e-05, "loss": 0.8913, "step": 3737 }, { "epoch": 1.6944696282864915, "grad_norm": 0.4234680479567685, "learning_rate": 6.687979361898167e-05, "loss": 0.9043, "step": 3738 }, { "epoch": 1.6949229374433363, "grad_norm": 0.5272422074278011, "learning_rate": 6.687041774000627e-05, "loss": 0.8944, "step": 3739 }, { "epoch": 1.6953762466001812, "grad_norm": 0.6133772892180377, "learning_rate": 6.686103916987956e-05, "loss": 0.8907, "step": 3740 }, { "epoch": 1.6958295557570264, "grad_norm": 0.6595386491798364, "learning_rate": 6.685165790954083e-05, "loss": 0.8936, "step": 3741 }, { "epoch": 1.6962828649138713, "grad_norm": 0.5898973452993214, "learning_rate": 6.684227395992963e-05, "loss": 0.877, "step": 3742 }, { "epoch": 1.696736174070716, "grad_norm": 0.4686395112769075, "learning_rate": 6.683288732198583e-05, "loss": 0.9048, "step": 3743 }, { "epoch": 1.6971894832275614, "grad_norm": 0.4454289661900001, "learning_rate": 6.682349799664947e-05, "loss": 0.904, "step": 3744 }, { "epoch": 1.6976427923844062, "grad_norm": 0.48150948527084936, "learning_rate": 6.681410598486098e-05, "loss": 0.8859, "step": 3745 }, { "epoch": 1.698096101541251, "grad_norm": 0.5427687802380835, "learning_rate": 6.680471128756097e-05, "loss": 0.8955, "step": 3746 }, { "epoch": 1.698549410698096, "grad_norm": 0.5763443598996874, "learning_rate": 6.679531390569034e-05, "loss": 0.9225, "step": 3747 }, { "epoch": 1.6990027198549411, "grad_norm": 0.5182690295368858, "learning_rate": 6.678591384019027e-05, "loss": 0.9086, "step": 3748 }, { "epoch": 1.699456029011786, "grad_norm": 0.5274359967336919, "learning_rate": 6.677651109200222e-05, "loss": 0.898, "step": 3749 }, { "epoch": 1.699909338168631, "grad_norm": 0.694981777944451, "learning_rate": 6.676710566206787e-05, "loss": 0.8956, "step": 3750 }, { "epoch": 1.700362647325476, "grad_norm": 0.8942531546597045, "learning_rate": 6.675769755132922e-05, "loss": 0.9109, "step": 3751 }, { "epoch": 1.7008159564823209, "grad_norm": 0.9329848271114726, "learning_rate": 6.674828676072853e-05, "loss": 0.8916, "step": 3752 }, { "epoch": 1.701269265639166, "grad_norm": 0.9854121780559459, "learning_rate": 6.67388732912083e-05, "loss": 0.9163, "step": 3753 }, { "epoch": 1.701722574796011, "grad_norm": 1.0324827382169761, "learning_rate": 6.672945714371136e-05, "loss": 0.9254, "step": 3754 }, { "epoch": 1.7021758839528558, "grad_norm": 0.914469077992546, "learning_rate": 6.67200383191807e-05, "loss": 0.8759, "step": 3755 }, { "epoch": 1.7026291931097008, "grad_norm": 0.8039497076549215, "learning_rate": 6.671061681855968e-05, "loss": 0.886, "step": 3756 }, { "epoch": 1.703082502266546, "grad_norm": 0.7949780318988283, "learning_rate": 6.670119264279188e-05, "loss": 0.9023, "step": 3757 }, { "epoch": 1.7035358114233907, "grad_norm": 0.7945586081577648, "learning_rate": 6.669176579282117e-05, "loss": 0.896, "step": 3758 }, { "epoch": 1.7039891205802358, "grad_norm": 0.7968649866852935, "learning_rate": 6.668233626959166e-05, "loss": 0.8751, "step": 3759 }, { "epoch": 1.7044424297370808, "grad_norm": 0.737507347367117, "learning_rate": 6.667290407404776e-05, "loss": 0.8882, "step": 3760 }, { "epoch": 1.7048957388939256, "grad_norm": 0.7591445747235319, "learning_rate": 6.666346920713415e-05, "loss": 0.8925, "step": 3761 }, { "epoch": 1.7053490480507705, "grad_norm": 0.7263594783407955, "learning_rate": 6.665403166979571e-05, "loss": 0.8925, "step": 3762 }, { "epoch": 1.7058023572076157, "grad_norm": 0.6835212290518361, "learning_rate": 6.664459146297767e-05, "loss": 0.8753, "step": 3763 }, { "epoch": 1.7062556663644606, "grad_norm": 0.6243077507902149, "learning_rate": 6.66351485876255e-05, "loss": 0.9164, "step": 3764 }, { "epoch": 1.7067089755213054, "grad_norm": 0.5839735247493731, "learning_rate": 6.66257030446849e-05, "loss": 0.9065, "step": 3765 }, { "epoch": 1.7071622846781505, "grad_norm": 0.5240044092036779, "learning_rate": 6.661625483510187e-05, "loss": 0.9023, "step": 3766 }, { "epoch": 1.7076155938349955, "grad_norm": 0.5047450028644846, "learning_rate": 6.660680395982274e-05, "loss": 0.8781, "step": 3767 }, { "epoch": 1.7080689029918403, "grad_norm": 0.5207106640565379, "learning_rate": 6.659735041979398e-05, "loss": 0.8971, "step": 3768 }, { "epoch": 1.7085222121486854, "grad_norm": 0.5467005168012489, "learning_rate": 6.65878942159624e-05, "loss": 0.895, "step": 3769 }, { "epoch": 1.7089755213055304, "grad_norm": 0.46690669970698107, "learning_rate": 6.657843534927507e-05, "loss": 0.8672, "step": 3770 }, { "epoch": 1.7094288304623753, "grad_norm": 0.5643991051559255, "learning_rate": 6.656897382067935e-05, "loss": 0.8775, "step": 3771 }, { "epoch": 1.7098821396192203, "grad_norm": 0.6817922498998549, "learning_rate": 6.65595096311228e-05, "loss": 0.8938, "step": 3772 }, { "epoch": 1.7103354487760654, "grad_norm": 0.8069649454047518, "learning_rate": 6.65500427815533e-05, "loss": 0.8992, "step": 3773 }, { "epoch": 1.7107887579329102, "grad_norm": 0.878908968239035, "learning_rate": 6.654057327291899e-05, "loss": 0.9174, "step": 3774 }, { "epoch": 1.7112420670897552, "grad_norm": 0.8866058537228136, "learning_rate": 6.653110110616827e-05, "loss": 0.8898, "step": 3775 }, { "epoch": 1.7116953762466003, "grad_norm": 0.8110320637282362, "learning_rate": 6.652162628224981e-05, "loss": 0.8899, "step": 3776 }, { "epoch": 1.7121486854034451, "grad_norm": 0.7982094768398954, "learning_rate": 6.651214880211252e-05, "loss": 0.9009, "step": 3777 }, { "epoch": 1.7126019945602902, "grad_norm": 0.7736819572755326, "learning_rate": 6.65026686667056e-05, "loss": 0.9144, "step": 3778 }, { "epoch": 1.7130553037171352, "grad_norm": 0.8239181825551296, "learning_rate": 6.649318587697855e-05, "loss": 0.9232, "step": 3779 }, { "epoch": 1.71350861287398, "grad_norm": 0.8426099808775985, "learning_rate": 6.648370043388104e-05, "loss": 0.894, "step": 3780 }, { "epoch": 1.7139619220308249, "grad_norm": 0.8289127373309025, "learning_rate": 6.647421233836312e-05, "loss": 0.8939, "step": 3781 }, { "epoch": 1.7144152311876701, "grad_norm": 0.715196489769326, "learning_rate": 6.646472159137502e-05, "loss": 0.9109, "step": 3782 }, { "epoch": 1.714868540344515, "grad_norm": 0.5926274015402634, "learning_rate": 6.645522819386727e-05, "loss": 0.8922, "step": 3783 }, { "epoch": 1.7153218495013598, "grad_norm": 0.5016107833317854, "learning_rate": 6.644573214679067e-05, "loss": 0.9128, "step": 3784 }, { "epoch": 1.7157751586582048, "grad_norm": 0.5577947121481835, "learning_rate": 6.643623345109629e-05, "loss": 0.8887, "step": 3785 }, { "epoch": 1.71622846781505, "grad_norm": 0.6575413248058302, "learning_rate": 6.642673210773541e-05, "loss": 0.9023, "step": 3786 }, { "epoch": 1.7166817769718947, "grad_norm": 0.6454858363201682, "learning_rate": 6.641722811765966e-05, "loss": 0.8794, "step": 3787 }, { "epoch": 1.7171350861287398, "grad_norm": 0.5553553320804184, "learning_rate": 6.640772148182086e-05, "loss": 0.889, "step": 3788 }, { "epoch": 1.7175883952855848, "grad_norm": 0.5743987620364641, "learning_rate": 6.639821220117116e-05, "loss": 0.8907, "step": 3789 }, { "epoch": 1.7180417044424297, "grad_norm": 0.6109795366310983, "learning_rate": 6.638870027666291e-05, "loss": 0.8933, "step": 3790 }, { "epoch": 1.7184950135992747, "grad_norm": 0.7174648515952795, "learning_rate": 6.637918570924878e-05, "loss": 0.8954, "step": 3791 }, { "epoch": 1.7189483227561198, "grad_norm": 0.9948530487399134, "learning_rate": 6.636966849988167e-05, "loss": 0.9126, "step": 3792 }, { "epoch": 1.7194016319129646, "grad_norm": 0.8561429046844133, "learning_rate": 6.636014864951477e-05, "loss": 0.8893, "step": 3793 }, { "epoch": 1.7198549410698096, "grad_norm": 0.982655180864914, "learning_rate": 6.63506261591015e-05, "loss": 0.9013, "step": 3794 }, { "epoch": 1.7203082502266547, "grad_norm": 0.8598433158700673, "learning_rate": 6.634110102959559e-05, "loss": 0.8881, "step": 3795 }, { "epoch": 1.7207615593834995, "grad_norm": 0.7975662035490311, "learning_rate": 6.633157326195098e-05, "loss": 0.9031, "step": 3796 }, { "epoch": 1.7212148685403446, "grad_norm": 0.8213767149444924, "learning_rate": 6.632204285712194e-05, "loss": 0.9025, "step": 3797 }, { "epoch": 1.7216681776971896, "grad_norm": 0.8390603542069657, "learning_rate": 6.631250981606294e-05, "loss": 0.8822, "step": 3798 }, { "epoch": 1.7221214868540344, "grad_norm": 0.8867047442405118, "learning_rate": 6.630297413972875e-05, "loss": 0.8941, "step": 3799 }, { "epoch": 1.7225747960108793, "grad_norm": 0.8883591245994537, "learning_rate": 6.629343582907439e-05, "loss": 0.8666, "step": 3800 }, { "epoch": 1.7230281051677245, "grad_norm": 0.9130053101224949, "learning_rate": 6.628389488505517e-05, "loss": 0.9061, "step": 3801 }, { "epoch": 1.7234814143245694, "grad_norm": 0.9315465064681377, "learning_rate": 6.627435130862661e-05, "loss": 0.896, "step": 3802 }, { "epoch": 1.7239347234814142, "grad_norm": 0.8855730886374048, "learning_rate": 6.626480510074456e-05, "loss": 0.885, "step": 3803 }, { "epoch": 1.7243880326382592, "grad_norm": 0.7582865654331835, "learning_rate": 6.62552562623651e-05, "loss": 0.8988, "step": 3804 }, { "epoch": 1.7248413417951043, "grad_norm": 0.6938300738007893, "learning_rate": 6.624570479444455e-05, "loss": 0.8962, "step": 3805 }, { "epoch": 1.7252946509519491, "grad_norm": 0.6715901337908422, "learning_rate": 6.623615069793954e-05, "loss": 0.9029, "step": 3806 }, { "epoch": 1.7257479601087942, "grad_norm": 0.6301748227819444, "learning_rate": 6.62265939738069e-05, "loss": 0.8972, "step": 3807 }, { "epoch": 1.7262012692656392, "grad_norm": 0.5106145159005249, "learning_rate": 6.621703462300382e-05, "loss": 0.8871, "step": 3808 }, { "epoch": 1.726654578422484, "grad_norm": 0.48617032380662056, "learning_rate": 6.620747264648767e-05, "loss": 0.8888, "step": 3809 }, { "epoch": 1.727107887579329, "grad_norm": 0.4926378118149106, "learning_rate": 6.619790804521612e-05, "loss": 0.8999, "step": 3810 }, { "epoch": 1.7275611967361741, "grad_norm": 0.5054492657733882, "learning_rate": 6.618834082014709e-05, "loss": 0.8948, "step": 3811 }, { "epoch": 1.728014505893019, "grad_norm": 0.5104879890242653, "learning_rate": 6.617877097223874e-05, "loss": 0.915, "step": 3812 }, { "epoch": 1.728467815049864, "grad_norm": 0.5808415523522088, "learning_rate": 6.616919850244954e-05, "loss": 0.8807, "step": 3813 }, { "epoch": 1.728921124206709, "grad_norm": 0.5875159213497355, "learning_rate": 6.61596234117382e-05, "loss": 0.8986, "step": 3814 }, { "epoch": 1.729374433363554, "grad_norm": 0.6019963313532509, "learning_rate": 6.615004570106371e-05, "loss": 0.9126, "step": 3815 }, { "epoch": 1.729827742520399, "grad_norm": 0.5432024377365419, "learning_rate": 6.614046537138528e-05, "loss": 0.8859, "step": 3816 }, { "epoch": 1.730281051677244, "grad_norm": 0.5211007307797942, "learning_rate": 6.613088242366241e-05, "loss": 0.8942, "step": 3817 }, { "epoch": 1.7307343608340888, "grad_norm": 0.42882286005817083, "learning_rate": 6.61212968588549e-05, "loss": 0.8894, "step": 3818 }, { "epoch": 1.7311876699909337, "grad_norm": 0.3705001444863107, "learning_rate": 6.61117086779227e-05, "loss": 0.8866, "step": 3819 }, { "epoch": 1.731640979147779, "grad_norm": 0.3923278457054998, "learning_rate": 6.610211788182614e-05, "loss": 0.9002, "step": 3820 }, { "epoch": 1.7320942883046238, "grad_norm": 0.4029522158664437, "learning_rate": 6.609252447152578e-05, "loss": 0.9009, "step": 3821 }, { "epoch": 1.7325475974614686, "grad_norm": 0.42717431241190784, "learning_rate": 6.60829284479824e-05, "loss": 0.8927, "step": 3822 }, { "epoch": 1.7330009066183139, "grad_norm": 0.45317075369825865, "learning_rate": 6.607332981215708e-05, "loss": 0.8899, "step": 3823 }, { "epoch": 1.7334542157751587, "grad_norm": 0.4951660274807649, "learning_rate": 6.606372856501116e-05, "loss": 0.8998, "step": 3824 }, { "epoch": 1.7339075249320035, "grad_norm": 0.5735722117630017, "learning_rate": 6.605412470750622e-05, "loss": 0.9015, "step": 3825 }, { "epoch": 1.7343608340888486, "grad_norm": 0.7366905694397976, "learning_rate": 6.604451824060411e-05, "loss": 0.9062, "step": 3826 }, { "epoch": 1.7348141432456936, "grad_norm": 0.8758209207466193, "learning_rate": 6.603490916526697e-05, "loss": 0.8928, "step": 3827 }, { "epoch": 1.7352674524025384, "grad_norm": 0.9454182959991223, "learning_rate": 6.602529748245716e-05, "loss": 0.9013, "step": 3828 }, { "epoch": 1.7357207615593835, "grad_norm": 1.113585441455221, "learning_rate": 6.60156831931373e-05, "loss": 0.8901, "step": 3829 }, { "epoch": 1.7361740707162285, "grad_norm": 0.9720323064604871, "learning_rate": 6.600606629827033e-05, "loss": 0.8992, "step": 3830 }, { "epoch": 1.7366273798730734, "grad_norm": 0.9101580292136993, "learning_rate": 6.599644679881937e-05, "loss": 0.8825, "step": 3831 }, { "epoch": 1.7370806890299184, "grad_norm": 0.7768883089119636, "learning_rate": 6.598682469574789e-05, "loss": 0.9004, "step": 3832 }, { "epoch": 1.7375339981867635, "grad_norm": 0.5823122702737005, "learning_rate": 6.597719999001952e-05, "loss": 0.8997, "step": 3833 }, { "epoch": 1.7379873073436083, "grad_norm": 0.4080454070966192, "learning_rate": 6.596757268259822e-05, "loss": 0.8953, "step": 3834 }, { "epoch": 1.7384406165004533, "grad_norm": 0.39501043267737174, "learning_rate": 6.595794277444822e-05, "loss": 0.8894, "step": 3835 }, { "epoch": 1.7388939256572984, "grad_norm": 0.4899837841423306, "learning_rate": 6.594831026653395e-05, "loss": 0.8785, "step": 3836 }, { "epoch": 1.7393472348141432, "grad_norm": 0.504535228683588, "learning_rate": 6.593867515982016e-05, "loss": 0.886, "step": 3837 }, { "epoch": 1.7398005439709883, "grad_norm": 0.5359420183332634, "learning_rate": 6.592903745527179e-05, "loss": 0.9128, "step": 3838 }, { "epoch": 1.7402538531278333, "grad_norm": 0.6097276592893291, "learning_rate": 6.591939715385415e-05, "loss": 0.883, "step": 3839 }, { "epoch": 1.7407071622846781, "grad_norm": 0.6982149695653571, "learning_rate": 6.590975425653269e-05, "loss": 0.8857, "step": 3840 }, { "epoch": 1.741160471441523, "grad_norm": 0.681480051396439, "learning_rate": 6.59001087642732e-05, "loss": 0.8971, "step": 3841 }, { "epoch": 1.7416137805983682, "grad_norm": 0.6579463397659712, "learning_rate": 6.58904606780417e-05, "loss": 0.906, "step": 3842 }, { "epoch": 1.742067089755213, "grad_norm": 0.6447873866444466, "learning_rate": 6.588080999880445e-05, "loss": 0.8931, "step": 3843 }, { "epoch": 1.742520398912058, "grad_norm": 0.6658858645375951, "learning_rate": 6.587115672752804e-05, "loss": 0.9008, "step": 3844 }, { "epoch": 1.742973708068903, "grad_norm": 0.727296068020592, "learning_rate": 6.586150086517925e-05, "loss": 0.8986, "step": 3845 }, { "epoch": 1.743427017225748, "grad_norm": 0.7730567339317145, "learning_rate": 6.585184241272514e-05, "loss": 0.8883, "step": 3846 }, { "epoch": 1.7438803263825928, "grad_norm": 0.7104772922343539, "learning_rate": 6.584218137113302e-05, "loss": 0.9167, "step": 3847 }, { "epoch": 1.7443336355394379, "grad_norm": 0.636488543636627, "learning_rate": 6.58325177413705e-05, "loss": 0.8848, "step": 3848 }, { "epoch": 1.744786944696283, "grad_norm": 0.5848480830171661, "learning_rate": 6.582285152440541e-05, "loss": 0.8964, "step": 3849 }, { "epoch": 1.7452402538531278, "grad_norm": 0.5642859466717631, "learning_rate": 6.581318272120584e-05, "loss": 0.8862, "step": 3850 }, { "epoch": 1.7456935630099728, "grad_norm": 0.605978126597611, "learning_rate": 6.580351133274017e-05, "loss": 0.9118, "step": 3851 }, { "epoch": 1.7461468721668179, "grad_norm": 0.7174998870431452, "learning_rate": 6.5793837359977e-05, "loss": 0.8881, "step": 3852 }, { "epoch": 1.7466001813236627, "grad_norm": 0.7507816945574268, "learning_rate": 6.57841608038852e-05, "loss": 0.9014, "step": 3853 }, { "epoch": 1.7470534904805077, "grad_norm": 0.7381466820842393, "learning_rate": 6.577448166543394e-05, "loss": 0.8962, "step": 3854 }, { "epoch": 1.7475067996373528, "grad_norm": 0.7061033703928796, "learning_rate": 6.576479994559257e-05, "loss": 0.9021, "step": 3855 }, { "epoch": 1.7479601087941976, "grad_norm": 0.8017412590436906, "learning_rate": 6.575511564533078e-05, "loss": 0.9102, "step": 3856 }, { "epoch": 1.7484134179510427, "grad_norm": 0.871050179415977, "learning_rate": 6.574542876561844e-05, "loss": 0.9043, "step": 3857 }, { "epoch": 1.7488667271078877, "grad_norm": 0.9207077031891274, "learning_rate": 6.573573930742576e-05, "loss": 0.8842, "step": 3858 }, { "epoch": 1.7493200362647325, "grad_norm": 0.9421116313845039, "learning_rate": 6.572604727172316e-05, "loss": 0.8725, "step": 3859 }, { "epoch": 1.7497733454215774, "grad_norm": 0.8930242320511158, "learning_rate": 6.571635265948131e-05, "loss": 0.89, "step": 3860 }, { "epoch": 1.7502266545784226, "grad_norm": 0.7314212836744549, "learning_rate": 6.570665547167116e-05, "loss": 0.8989, "step": 3861 }, { "epoch": 1.7506799637352675, "grad_norm": 0.6052533026437444, "learning_rate": 6.569695570926393e-05, "loss": 0.8848, "step": 3862 }, { "epoch": 1.7511332728921123, "grad_norm": 0.5954474443419195, "learning_rate": 6.568725337323104e-05, "loss": 0.9079, "step": 3863 }, { "epoch": 1.7515865820489573, "grad_norm": 0.5933937915081496, "learning_rate": 6.567754846454424e-05, "loss": 0.8961, "step": 3864 }, { "epoch": 1.7520398912058024, "grad_norm": 0.5033010811582114, "learning_rate": 6.566784098417551e-05, "loss": 0.9, "step": 3865 }, { "epoch": 1.7524932003626472, "grad_norm": 0.6745480915928966, "learning_rate": 6.565813093309706e-05, "loss": 0.9094, "step": 3866 }, { "epoch": 1.7529465095194923, "grad_norm": 0.5303658066288975, "learning_rate": 6.564841831228139e-05, "loss": 0.8985, "step": 3867 }, { "epoch": 1.7533998186763373, "grad_norm": 0.4424898138340191, "learning_rate": 6.563870312270124e-05, "loss": 0.8916, "step": 3868 }, { "epoch": 1.7538531278331821, "grad_norm": 0.4582157865908418, "learning_rate": 6.562898536532962e-05, "loss": 0.893, "step": 3869 }, { "epoch": 1.7543064369900272, "grad_norm": 0.5163874887752307, "learning_rate": 6.561926504113979e-05, "loss": 0.9119, "step": 3870 }, { "epoch": 1.7547597461468722, "grad_norm": 0.4967944932886949, "learning_rate": 6.560954215110529e-05, "loss": 0.8943, "step": 3871 }, { "epoch": 1.755213055303717, "grad_norm": 0.46005451411944337, "learning_rate": 6.559981669619988e-05, "loss": 0.9008, "step": 3872 }, { "epoch": 1.7556663644605621, "grad_norm": 0.4119585395967857, "learning_rate": 6.559008867739758e-05, "loss": 0.8984, "step": 3873 }, { "epoch": 1.7561196736174072, "grad_norm": 0.4700367954175148, "learning_rate": 6.55803580956727e-05, "loss": 0.8788, "step": 3874 }, { "epoch": 1.756572982774252, "grad_norm": 0.5362866873805032, "learning_rate": 6.557062495199976e-05, "loss": 0.9041, "step": 3875 }, { "epoch": 1.757026291931097, "grad_norm": 0.6239473696858702, "learning_rate": 6.556088924735358e-05, "loss": 0.9041, "step": 3876 }, { "epoch": 1.757479601087942, "grad_norm": 0.7469976206294955, "learning_rate": 6.555115098270923e-05, "loss": 0.8944, "step": 3877 }, { "epoch": 1.757932910244787, "grad_norm": 0.8198536451310724, "learning_rate": 6.554141015904203e-05, "loss": 0.8999, "step": 3878 }, { "epoch": 1.7583862194016318, "grad_norm": 0.8876635899111999, "learning_rate": 6.553166677732752e-05, "loss": 0.8948, "step": 3879 }, { "epoch": 1.758839528558477, "grad_norm": 0.9551682706388037, "learning_rate": 6.552192083854154e-05, "loss": 0.8924, "step": 3880 }, { "epoch": 1.7592928377153219, "grad_norm": 0.9824658207551072, "learning_rate": 6.55121723436602e-05, "loss": 0.8874, "step": 3881 }, { "epoch": 1.7597461468721667, "grad_norm": 0.9320226247370559, "learning_rate": 6.55024212936598e-05, "loss": 0.8991, "step": 3882 }, { "epoch": 1.7601994560290117, "grad_norm": 0.9726266399594202, "learning_rate": 6.549266768951697e-05, "loss": 0.8984, "step": 3883 }, { "epoch": 1.7606527651858568, "grad_norm": 1.0195407268979537, "learning_rate": 6.548291153220855e-05, "loss": 0.8808, "step": 3884 }, { "epoch": 1.7611060743427016, "grad_norm": 0.9587036970884857, "learning_rate": 6.547315282271164e-05, "loss": 0.9101, "step": 3885 }, { "epoch": 1.7615593834995467, "grad_norm": 0.9304123116760276, "learning_rate": 6.546339156200363e-05, "loss": 0.9117, "step": 3886 }, { "epoch": 1.7620126926563917, "grad_norm": 0.943472789322733, "learning_rate": 6.54536277510621e-05, "loss": 0.9086, "step": 3887 }, { "epoch": 1.7624660018132365, "grad_norm": 0.9298708448652561, "learning_rate": 6.544386139086494e-05, "loss": 0.9079, "step": 3888 }, { "epoch": 1.7629193109700816, "grad_norm": 0.9460490501966345, "learning_rate": 6.54340924823903e-05, "loss": 0.8933, "step": 3889 }, { "epoch": 1.7633726201269266, "grad_norm": 0.8320295002592298, "learning_rate": 6.542432102661653e-05, "loss": 0.8906, "step": 3890 }, { "epoch": 1.7638259292837715, "grad_norm": 0.7190855287729453, "learning_rate": 6.54145470245223e-05, "loss": 0.8845, "step": 3891 }, { "epoch": 1.7642792384406165, "grad_norm": 0.6403115398602236, "learning_rate": 6.540477047708651e-05, "loss": 0.8954, "step": 3892 }, { "epoch": 1.7647325475974616, "grad_norm": 0.6375405901569852, "learning_rate": 6.539499138528828e-05, "loss": 0.9114, "step": 3893 }, { "epoch": 1.7651858567543064, "grad_norm": 0.872188920017808, "learning_rate": 6.538520975010701e-05, "loss": 0.9121, "step": 3894 }, { "epoch": 1.7656391659111514, "grad_norm": 0.469801325133085, "learning_rate": 6.53754255725224e-05, "loss": 0.9084, "step": 3895 }, { "epoch": 1.7660924750679965, "grad_norm": 0.42107942784942615, "learning_rate": 6.536563885351433e-05, "loss": 0.9142, "step": 3896 }, { "epoch": 1.7665457842248413, "grad_norm": 0.47233665695579885, "learning_rate": 6.535584959406299e-05, "loss": 0.876, "step": 3897 }, { "epoch": 1.7669990933816861, "grad_norm": 0.6114582228394982, "learning_rate": 6.534605779514877e-05, "loss": 0.8789, "step": 3898 }, { "epoch": 1.7674524025385314, "grad_norm": 0.718382956217762, "learning_rate": 6.53362634577524e-05, "loss": 0.925, "step": 3899 }, { "epoch": 1.7679057116953762, "grad_norm": 0.8638036880241637, "learning_rate": 6.532646658285477e-05, "loss": 0.8991, "step": 3900 }, { "epoch": 1.768359020852221, "grad_norm": 0.9486379602176281, "learning_rate": 6.531666717143708e-05, "loss": 0.8928, "step": 3901 }, { "epoch": 1.7688123300090663, "grad_norm": 0.9298539623374948, "learning_rate": 6.530686522448078e-05, "loss": 0.8997, "step": 3902 }, { "epoch": 1.7692656391659112, "grad_norm": 0.9042247778597562, "learning_rate": 6.529706074296754e-05, "loss": 0.8943, "step": 3903 }, { "epoch": 1.769718948322756, "grad_norm": 0.9174738351964395, "learning_rate": 6.528725372787932e-05, "loss": 0.9147, "step": 3904 }, { "epoch": 1.770172257479601, "grad_norm": 0.9926497498819999, "learning_rate": 6.527744418019832e-05, "loss": 0.8756, "step": 3905 }, { "epoch": 1.770625566636446, "grad_norm": 1.094004762405957, "learning_rate": 6.5267632100907e-05, "loss": 0.9217, "step": 3906 }, { "epoch": 1.771078875793291, "grad_norm": 0.7251593759124922, "learning_rate": 6.525781749098807e-05, "loss": 0.9136, "step": 3907 }, { "epoch": 1.771532184950136, "grad_norm": 0.5793960650985687, "learning_rate": 6.524800035142448e-05, "loss": 0.8949, "step": 3908 }, { "epoch": 1.771985494106981, "grad_norm": 0.5178683987637865, "learning_rate": 6.523818068319946e-05, "loss": 0.8939, "step": 3909 }, { "epoch": 1.7724388032638259, "grad_norm": 0.6214311945597412, "learning_rate": 6.522835848729645e-05, "loss": 0.9043, "step": 3910 }, { "epoch": 1.772892112420671, "grad_norm": 0.7057102241480024, "learning_rate": 6.521853376469921e-05, "loss": 0.8951, "step": 3911 }, { "epoch": 1.773345421577516, "grad_norm": 0.8509152630877603, "learning_rate": 6.52087065163917e-05, "loss": 0.9099, "step": 3912 }, { "epoch": 1.7737987307343608, "grad_norm": 0.9916859420947791, "learning_rate": 6.519887674335814e-05, "loss": 0.9001, "step": 3913 }, { "epoch": 1.7742520398912058, "grad_norm": 1.0505585402080069, "learning_rate": 6.518904444658301e-05, "loss": 0.9106, "step": 3914 }, { "epoch": 1.7747053490480509, "grad_norm": 0.9483139640928896, "learning_rate": 6.517920962705106e-05, "loss": 0.8982, "step": 3915 }, { "epoch": 1.7751586582048957, "grad_norm": 0.7123784610485726, "learning_rate": 6.516937228574726e-05, "loss": 0.899, "step": 3916 }, { "epoch": 1.7756119673617408, "grad_norm": 0.47977992333010305, "learning_rate": 6.515953242365686e-05, "loss": 0.9056, "step": 3917 }, { "epoch": 1.7760652765185858, "grad_norm": 0.3608516068576516, "learning_rate": 6.514969004176534e-05, "loss": 0.9068, "step": 3918 }, { "epoch": 1.7765185856754306, "grad_norm": 0.5844397037459649, "learning_rate": 6.513984514105844e-05, "loss": 0.8917, "step": 3919 }, { "epoch": 1.7769718948322755, "grad_norm": 0.7842930246762153, "learning_rate": 6.512999772252217e-05, "loss": 0.8913, "step": 3920 }, { "epoch": 1.7774252039891207, "grad_norm": 0.8174609849640009, "learning_rate": 6.512014778714278e-05, "loss": 0.8963, "step": 3921 }, { "epoch": 1.7778785131459656, "grad_norm": 0.7959454418458121, "learning_rate": 6.511029533590676e-05, "loss": 0.8905, "step": 3922 }, { "epoch": 1.7783318223028104, "grad_norm": 0.8002540276501783, "learning_rate": 6.510044036980087e-05, "loss": 0.8982, "step": 3923 }, { "epoch": 1.7787851314596554, "grad_norm": 0.7432932155279288, "learning_rate": 6.50905828898121e-05, "loss": 0.8828, "step": 3924 }, { "epoch": 1.7792384406165005, "grad_norm": 0.5500938863394628, "learning_rate": 6.508072289692772e-05, "loss": 0.9032, "step": 3925 }, { "epoch": 1.7796917497733453, "grad_norm": 0.40129343625680264, "learning_rate": 6.507086039213522e-05, "loss": 0.8936, "step": 3926 }, { "epoch": 1.7801450589301904, "grad_norm": 0.38872327560774683, "learning_rate": 6.506099537642238e-05, "loss": 0.8961, "step": 3927 }, { "epoch": 1.7805983680870354, "grad_norm": 0.46029416263473816, "learning_rate": 6.505112785077719e-05, "loss": 0.8912, "step": 3928 }, { "epoch": 1.7810516772438802, "grad_norm": 0.5684589562134437, "learning_rate": 6.504125781618793e-05, "loss": 0.8945, "step": 3929 }, { "epoch": 1.7815049864007253, "grad_norm": 0.6770585370003914, "learning_rate": 6.50313852736431e-05, "loss": 0.8916, "step": 3930 }, { "epoch": 1.7819582955575703, "grad_norm": 0.685290463089577, "learning_rate": 6.502151022413147e-05, "loss": 0.9042, "step": 3931 }, { "epoch": 1.7824116047144152, "grad_norm": 0.5879735787329992, "learning_rate": 6.501163266864206e-05, "loss": 0.8893, "step": 3932 }, { "epoch": 1.7828649138712602, "grad_norm": 0.554686072251621, "learning_rate": 6.500175260816413e-05, "loss": 0.893, "step": 3933 }, { "epoch": 1.7833182230281053, "grad_norm": 0.5696899695728963, "learning_rate": 6.499187004368719e-05, "loss": 0.9023, "step": 3934 }, { "epoch": 1.78377153218495, "grad_norm": 0.5822993517865817, "learning_rate": 6.498198497620102e-05, "loss": 0.8874, "step": 3935 }, { "epoch": 1.7842248413417952, "grad_norm": 0.5519207203245556, "learning_rate": 6.497209740669563e-05, "loss": 0.8727, "step": 3936 }, { "epoch": 1.7846781504986402, "grad_norm": 0.6120127481447808, "learning_rate": 6.49622073361613e-05, "loss": 0.9013, "step": 3937 }, { "epoch": 1.785131459655485, "grad_norm": 0.42395159286415424, "learning_rate": 6.495231476558854e-05, "loss": 0.9111, "step": 3938 }, { "epoch": 1.7855847688123299, "grad_norm": 0.44431108615488907, "learning_rate": 6.494241969596811e-05, "loss": 0.8967, "step": 3939 }, { "epoch": 1.7860380779691751, "grad_norm": 0.5220773862685488, "learning_rate": 6.493252212829106e-05, "loss": 0.9114, "step": 3940 }, { "epoch": 1.78649138712602, "grad_norm": 0.6232735636145881, "learning_rate": 6.492262206354863e-05, "loss": 0.9126, "step": 3941 }, { "epoch": 1.7869446962828648, "grad_norm": 0.7884000692950898, "learning_rate": 6.491271950273237e-05, "loss": 0.9024, "step": 3942 }, { "epoch": 1.7873980054397098, "grad_norm": 0.7241839414131812, "learning_rate": 6.490281444683403e-05, "loss": 0.8994, "step": 3943 }, { "epoch": 1.7878513145965549, "grad_norm": 0.7385736722287218, "learning_rate": 6.489290689684563e-05, "loss": 0.9019, "step": 3944 }, { "epoch": 1.7883046237533997, "grad_norm": 0.7963783685677822, "learning_rate": 6.488299685375944e-05, "loss": 0.8968, "step": 3945 }, { "epoch": 1.7887579329102448, "grad_norm": 0.808186177449346, "learning_rate": 6.4873084318568e-05, "loss": 0.8844, "step": 3946 }, { "epoch": 1.7892112420670898, "grad_norm": 0.8346238493126195, "learning_rate": 6.486316929226405e-05, "loss": 0.8909, "step": 3947 }, { "epoch": 1.7896645512239346, "grad_norm": 0.7681008277354979, "learning_rate": 6.485325177584062e-05, "loss": 0.8845, "step": 3948 }, { "epoch": 1.7901178603807797, "grad_norm": 0.6874884834768893, "learning_rate": 6.484333177029101e-05, "loss": 0.9051, "step": 3949 }, { "epoch": 1.7905711695376247, "grad_norm": 0.6403520358270441, "learning_rate": 6.483340927660869e-05, "loss": 0.9011, "step": 3950 }, { "epoch": 1.7910244786944696, "grad_norm": 0.6528229207749708, "learning_rate": 6.482348429578745e-05, "loss": 0.9098, "step": 3951 }, { "epoch": 1.7914777878513146, "grad_norm": 0.6405615949128707, "learning_rate": 6.481355682882131e-05, "loss": 0.874, "step": 3952 }, { "epoch": 1.7919310970081597, "grad_norm": 0.6054370256202548, "learning_rate": 6.480362687670453e-05, "loss": 0.8984, "step": 3953 }, { "epoch": 1.7923844061650045, "grad_norm": 0.6340399127883456, "learning_rate": 6.47936944404316e-05, "loss": 0.9066, "step": 3954 }, { "epoch": 1.7928377153218495, "grad_norm": 0.7007778635819046, "learning_rate": 6.47837595209973e-05, "loss": 0.9177, "step": 3955 }, { "epoch": 1.7932910244786946, "grad_norm": 0.5535012976324646, "learning_rate": 6.477382211939667e-05, "loss": 0.8881, "step": 3956 }, { "epoch": 1.7937443336355394, "grad_norm": 0.5506734007753518, "learning_rate": 6.476388223662494e-05, "loss": 0.9148, "step": 3957 }, { "epoch": 1.7941976427923843, "grad_norm": 0.6117856128478971, "learning_rate": 6.475393987367763e-05, "loss": 0.8924, "step": 3958 }, { "epoch": 1.7946509519492295, "grad_norm": 0.6620927491664353, "learning_rate": 6.47439950315505e-05, "loss": 0.8998, "step": 3959 }, { "epoch": 1.7951042611060744, "grad_norm": 0.6546913914300613, "learning_rate": 6.473404771123951e-05, "loss": 0.8878, "step": 3960 }, { "epoch": 1.7955575702629192, "grad_norm": 0.567335505486501, "learning_rate": 6.472409791374098e-05, "loss": 0.8877, "step": 3961 }, { "epoch": 1.7960108794197642, "grad_norm": 0.5326868279471841, "learning_rate": 6.471414564005137e-05, "loss": 0.8979, "step": 3962 }, { "epoch": 1.7964641885766093, "grad_norm": 0.5443551461847739, "learning_rate": 6.470419089116744e-05, "loss": 0.8994, "step": 3963 }, { "epoch": 1.796917497733454, "grad_norm": 0.5970877612755152, "learning_rate": 6.469423366808618e-05, "loss": 0.9036, "step": 3964 }, { "epoch": 1.7973708068902992, "grad_norm": 0.7627803521191419, "learning_rate": 6.468427397180487e-05, "loss": 0.9115, "step": 3965 }, { "epoch": 1.7978241160471442, "grad_norm": 0.7512052342770181, "learning_rate": 6.467431180332095e-05, "loss": 0.9003, "step": 3966 }, { "epoch": 1.798277425203989, "grad_norm": 0.8586754655519705, "learning_rate": 6.46643471636322e-05, "loss": 0.8864, "step": 3967 }, { "epoch": 1.798730734360834, "grad_norm": 1.0471944996308065, "learning_rate": 6.465438005373658e-05, "loss": 0.9052, "step": 3968 }, { "epoch": 1.7991840435176791, "grad_norm": 0.9078548498210196, "learning_rate": 6.464441047463237e-05, "loss": 0.9086, "step": 3969 }, { "epoch": 1.799637352674524, "grad_norm": 0.7710068412867059, "learning_rate": 6.463443842731799e-05, "loss": 0.8977, "step": 3970 }, { "epoch": 1.800090661831369, "grad_norm": 0.812642241967833, "learning_rate": 6.462446391279222e-05, "loss": 0.8824, "step": 3971 }, { "epoch": 1.800543970988214, "grad_norm": 1.017979015772262, "learning_rate": 6.4614486932054e-05, "loss": 0.8994, "step": 3972 }, { "epoch": 1.8009972801450589, "grad_norm": 1.1314340541546892, "learning_rate": 6.46045074861026e-05, "loss": 0.8841, "step": 3973 }, { "epoch": 1.801450589301904, "grad_norm": 0.7836557092500817, "learning_rate": 6.459452557593745e-05, "loss": 0.8612, "step": 3974 }, { "epoch": 1.801903898458749, "grad_norm": 0.6113651801339595, "learning_rate": 6.458454120255829e-05, "loss": 0.888, "step": 3975 }, { "epoch": 1.8023572076155938, "grad_norm": 0.6499903704622901, "learning_rate": 6.457455436696506e-05, "loss": 0.8939, "step": 3976 }, { "epoch": 1.8028105167724386, "grad_norm": 0.7125541875823916, "learning_rate": 6.456456507015798e-05, "loss": 0.9063, "step": 3977 }, { "epoch": 1.803263825929284, "grad_norm": 0.7521559991669844, "learning_rate": 6.455457331313754e-05, "loss": 0.8903, "step": 3978 }, { "epoch": 1.8037171350861287, "grad_norm": 0.8517386614385944, "learning_rate": 6.454457909690442e-05, "loss": 0.8986, "step": 3979 }, { "epoch": 1.8041704442429736, "grad_norm": 1.0139787251665733, "learning_rate": 6.453458242245955e-05, "loss": 0.8948, "step": 3980 }, { "epoch": 1.8046237533998188, "grad_norm": 0.9946361537199938, "learning_rate": 6.452458329080415e-05, "loss": 0.9061, "step": 3981 }, { "epoch": 1.8050770625566637, "grad_norm": 0.8545022008250847, "learning_rate": 6.451458170293965e-05, "loss": 0.899, "step": 3982 }, { "epoch": 1.8055303717135085, "grad_norm": 0.7098983463628615, "learning_rate": 6.450457765986777e-05, "loss": 0.8808, "step": 3983 }, { "epoch": 1.8059836808703535, "grad_norm": 0.5523863095907762, "learning_rate": 6.449457116259039e-05, "loss": 0.9038, "step": 3984 }, { "epoch": 1.8064369900271986, "grad_norm": 0.6280048853213087, "learning_rate": 6.448456221210974e-05, "loss": 0.8873, "step": 3985 }, { "epoch": 1.8068902991840434, "grad_norm": 0.6931585997955584, "learning_rate": 6.447455080942821e-05, "loss": 0.9055, "step": 3986 }, { "epoch": 1.8073436083408885, "grad_norm": 0.7211032130473216, "learning_rate": 6.44645369555485e-05, "loss": 0.9071, "step": 3987 }, { "epoch": 1.8077969174977335, "grad_norm": 0.7729065475579528, "learning_rate": 6.44545206514735e-05, "loss": 0.9081, "step": 3988 }, { "epoch": 1.8082502266545784, "grad_norm": 0.8040985748073064, "learning_rate": 6.444450189820641e-05, "loss": 0.9149, "step": 3989 }, { "epoch": 1.8087035358114234, "grad_norm": 0.8333131507364324, "learning_rate": 6.443448069675059e-05, "loss": 0.8932, "step": 3990 }, { "epoch": 1.8091568449682685, "grad_norm": 0.7535030664754387, "learning_rate": 6.442445704810973e-05, "loss": 0.9171, "step": 3991 }, { "epoch": 1.8096101541251133, "grad_norm": 0.602605520682078, "learning_rate": 6.441443095328771e-05, "loss": 0.912, "step": 3992 }, { "epoch": 1.8100634632819583, "grad_norm": 0.46861442963762073, "learning_rate": 6.440440241328867e-05, "loss": 0.8876, "step": 3993 }, { "epoch": 1.8105167724388034, "grad_norm": 0.44218978155660876, "learning_rate": 6.439437142911703e-05, "loss": 0.8717, "step": 3994 }, { "epoch": 1.8109700815956482, "grad_norm": 0.5739836978064221, "learning_rate": 6.438433800177736e-05, "loss": 0.8949, "step": 3995 }, { "epoch": 1.811423390752493, "grad_norm": 0.6553232868696338, "learning_rate": 6.43743021322746e-05, "loss": 0.8923, "step": 3996 }, { "epoch": 1.8118766999093383, "grad_norm": 0.6555363807910665, "learning_rate": 6.436426382161384e-05, "loss": 0.8924, "step": 3997 }, { "epoch": 1.8123300090661831, "grad_norm": 0.6092816084886911, "learning_rate": 6.435422307080047e-05, "loss": 0.9014, "step": 3998 }, { "epoch": 1.812783318223028, "grad_norm": 0.5967538354458156, "learning_rate": 6.434417988084006e-05, "loss": 0.8921, "step": 3999 }, { "epoch": 1.8132366273798732, "grad_norm": 0.6252993734760232, "learning_rate": 6.433413425273851e-05, "loss": 0.8992, "step": 4000 }, { "epoch": 1.813689936536718, "grad_norm": 0.6438140435114073, "learning_rate": 6.432408618750188e-05, "loss": 0.9078, "step": 4001 }, { "epoch": 1.814143245693563, "grad_norm": 0.6927700403637734, "learning_rate": 6.431403568613654e-05, "loss": 0.8883, "step": 4002 }, { "epoch": 1.814596554850408, "grad_norm": 0.7466344306303481, "learning_rate": 6.430398274964904e-05, "loss": 0.8973, "step": 4003 }, { "epoch": 1.815049864007253, "grad_norm": 0.7719106751957889, "learning_rate": 6.429392737904628e-05, "loss": 0.8942, "step": 4004 }, { "epoch": 1.8155031731640978, "grad_norm": 0.7181594316748676, "learning_rate": 6.428386957533528e-05, "loss": 0.8778, "step": 4005 }, { "epoch": 1.8159564823209429, "grad_norm": 0.6930944332429062, "learning_rate": 6.427380933952336e-05, "loss": 0.8868, "step": 4006 }, { "epoch": 1.816409791477788, "grad_norm": 0.7235743861483859, "learning_rate": 6.426374667261812e-05, "loss": 0.9238, "step": 4007 }, { "epoch": 1.8168631006346327, "grad_norm": 0.78843477642288, "learning_rate": 6.425368157562731e-05, "loss": 0.8862, "step": 4008 }, { "epoch": 1.8173164097914778, "grad_norm": 0.8281171456660277, "learning_rate": 6.424361404955904e-05, "loss": 0.9034, "step": 4009 }, { "epoch": 1.8177697189483228, "grad_norm": 0.8155061703253222, "learning_rate": 6.423354409542157e-05, "loss": 0.8941, "step": 4010 }, { "epoch": 1.8182230281051677, "grad_norm": 0.6832736705955715, "learning_rate": 6.422347171422343e-05, "loss": 0.8951, "step": 4011 }, { "epoch": 1.8186763372620127, "grad_norm": 0.5639723098737546, "learning_rate": 6.42133969069734e-05, "loss": 0.8893, "step": 4012 }, { "epoch": 1.8191296464188578, "grad_norm": 0.4989781086249519, "learning_rate": 6.420331967468051e-05, "loss": 0.8901, "step": 4013 }, { "epoch": 1.8195829555757026, "grad_norm": 0.5303575396065485, "learning_rate": 6.419324001835403e-05, "loss": 0.8893, "step": 4014 }, { "epoch": 1.8200362647325476, "grad_norm": 0.4845481292112108, "learning_rate": 6.418315793900345e-05, "loss": 0.8833, "step": 4015 }, { "epoch": 1.8204895738893927, "grad_norm": 0.421696371270079, "learning_rate": 6.417307343763852e-05, "loss": 0.8843, "step": 4016 }, { "epoch": 1.8209428830462375, "grad_norm": 0.3890377689463516, "learning_rate": 6.416298651526927e-05, "loss": 0.8927, "step": 4017 }, { "epoch": 1.8213961922030824, "grad_norm": 0.407188576784572, "learning_rate": 6.415289717290589e-05, "loss": 0.8928, "step": 4018 }, { "epoch": 1.8218495013599276, "grad_norm": 0.4133410852250945, "learning_rate": 6.414280541155888e-05, "loss": 0.9053, "step": 4019 }, { "epoch": 1.8223028105167725, "grad_norm": 0.5344800695513124, "learning_rate": 6.413271123223894e-05, "loss": 0.9182, "step": 4020 }, { "epoch": 1.8227561196736173, "grad_norm": 0.6843912186681955, "learning_rate": 6.412261463595706e-05, "loss": 0.8926, "step": 4021 }, { "epoch": 1.8232094288304623, "grad_norm": 0.787716722225233, "learning_rate": 6.411251562372442e-05, "loss": 0.9089, "step": 4022 }, { "epoch": 1.8236627379873074, "grad_norm": 0.9102667090831926, "learning_rate": 6.41024141965525e-05, "loss": 0.9184, "step": 4023 }, { "epoch": 1.8241160471441522, "grad_norm": 1.0049653720687963, "learning_rate": 6.409231035545292e-05, "loss": 0.8875, "step": 4024 }, { "epoch": 1.8245693563009973, "grad_norm": 1.024374646820244, "learning_rate": 6.408220410143768e-05, "loss": 0.9186, "step": 4025 }, { "epoch": 1.8250226654578423, "grad_norm": 0.9469995507119913, "learning_rate": 6.407209543551892e-05, "loss": 0.9217, "step": 4026 }, { "epoch": 1.8254759746146871, "grad_norm": 0.8226472453651121, "learning_rate": 6.406198435870905e-05, "loss": 0.8806, "step": 4027 }, { "epoch": 1.8259292837715322, "grad_norm": 0.698420575060796, "learning_rate": 6.405187087202074e-05, "loss": 0.892, "step": 4028 }, { "epoch": 1.8263825929283772, "grad_norm": 0.5428562348751574, "learning_rate": 6.404175497646686e-05, "loss": 0.8863, "step": 4029 }, { "epoch": 1.826835902085222, "grad_norm": 0.4956474861110213, "learning_rate": 6.403163667306057e-05, "loss": 0.8984, "step": 4030 }, { "epoch": 1.8272892112420671, "grad_norm": 0.475712966491124, "learning_rate": 6.402151596281524e-05, "loss": 0.8926, "step": 4031 }, { "epoch": 1.8277425203989122, "grad_norm": 0.4445182669393753, "learning_rate": 6.401139284674448e-05, "loss": 0.8843, "step": 4032 }, { "epoch": 1.828195829555757, "grad_norm": 0.44391687738554175, "learning_rate": 6.400126732586217e-05, "loss": 0.8832, "step": 4033 }, { "epoch": 1.828649138712602, "grad_norm": 0.47538269588346627, "learning_rate": 6.39911394011824e-05, "loss": 0.869, "step": 4034 }, { "epoch": 1.829102447869447, "grad_norm": 0.49111409393546696, "learning_rate": 6.39810090737195e-05, "loss": 0.8998, "step": 4035 }, { "epoch": 1.829555757026292, "grad_norm": 0.44058060834510626, "learning_rate": 6.397087634448806e-05, "loss": 0.9127, "step": 4036 }, { "epoch": 1.8300090661831367, "grad_norm": 0.3570800723094871, "learning_rate": 6.396074121450292e-05, "loss": 0.8955, "step": 4037 }, { "epoch": 1.830462375339982, "grad_norm": 0.4227033251185345, "learning_rate": 6.395060368477912e-05, "loss": 0.9199, "step": 4038 }, { "epoch": 1.8309156844968268, "grad_norm": 0.43747116863567637, "learning_rate": 6.394046375633196e-05, "loss": 0.8944, "step": 4039 }, { "epoch": 1.8313689936536717, "grad_norm": 0.33313756401294037, "learning_rate": 6.393032143017701e-05, "loss": 0.862, "step": 4040 }, { "epoch": 1.8318223028105167, "grad_norm": 0.3702399352543698, "learning_rate": 6.392017670733003e-05, "loss": 0.8666, "step": 4041 }, { "epoch": 1.8322756119673618, "grad_norm": 0.43761074050401927, "learning_rate": 6.391002958880707e-05, "loss": 0.8656, "step": 4042 }, { "epoch": 1.8327289211242066, "grad_norm": 0.40209711318489816, "learning_rate": 6.389988007562435e-05, "loss": 0.8875, "step": 4043 }, { "epoch": 1.8331822302810517, "grad_norm": 0.46532609912568224, "learning_rate": 6.388972816879841e-05, "loss": 0.8992, "step": 4044 }, { "epoch": 1.8336355394378967, "grad_norm": 0.5425773163951751, "learning_rate": 6.3879573869346e-05, "loss": 0.8707, "step": 4045 }, { "epoch": 1.8340888485947415, "grad_norm": 0.4820382744975377, "learning_rate": 6.386941717828407e-05, "loss": 0.8757, "step": 4046 }, { "epoch": 1.8345421577515866, "grad_norm": 0.5818563067246578, "learning_rate": 6.385925809662987e-05, "loss": 0.8915, "step": 4047 }, { "epoch": 1.8349954669084316, "grad_norm": 0.6938761165412745, "learning_rate": 6.384909662540084e-05, "loss": 0.8848, "step": 4048 }, { "epoch": 1.8354487760652765, "grad_norm": 0.6473807163681281, "learning_rate": 6.38389327656147e-05, "loss": 0.8981, "step": 4049 }, { "epoch": 1.8359020852221215, "grad_norm": 0.6329374696086039, "learning_rate": 6.382876651828938e-05, "loss": 0.87, "step": 4050 }, { "epoch": 1.8363553943789666, "grad_norm": 0.6947342830614672, "learning_rate": 6.381859788444305e-05, "loss": 0.8948, "step": 4051 }, { "epoch": 1.8368087035358114, "grad_norm": 0.751601463875255, "learning_rate": 6.380842686509414e-05, "loss": 0.8951, "step": 4052 }, { "epoch": 1.8372620126926564, "grad_norm": 0.8946998005947113, "learning_rate": 6.379825346126133e-05, "loss": 0.8921, "step": 4053 }, { "epoch": 1.8377153218495015, "grad_norm": 0.9941562268817906, "learning_rate": 6.378807767396347e-05, "loss": 0.872, "step": 4054 }, { "epoch": 1.8381686310063463, "grad_norm": 1.0472902983085444, "learning_rate": 6.377789950421972e-05, "loss": 0.8805, "step": 4055 }, { "epoch": 1.8386219401631911, "grad_norm": 0.8809094161040311, "learning_rate": 6.376771895304948e-05, "loss": 0.9043, "step": 4056 }, { "epoch": 1.8390752493200364, "grad_norm": 0.7751679773248243, "learning_rate": 6.375753602147232e-05, "loss": 0.9096, "step": 4057 }, { "epoch": 1.8395285584768812, "grad_norm": 0.5577774732327065, "learning_rate": 6.374735071050809e-05, "loss": 0.8965, "step": 4058 }, { "epoch": 1.839981867633726, "grad_norm": 0.4919990441430391, "learning_rate": 6.373716302117691e-05, "loss": 0.9028, "step": 4059 }, { "epoch": 1.8404351767905711, "grad_norm": 0.5017142979905649, "learning_rate": 6.372697295449909e-05, "loss": 0.9224, "step": 4060 }, { "epoch": 1.8408884859474162, "grad_norm": 0.5382197372855064, "learning_rate": 6.37167805114952e-05, "loss": 0.8874, "step": 4061 }, { "epoch": 1.841341795104261, "grad_norm": 0.47775932587519826, "learning_rate": 6.370658569318602e-05, "loss": 0.9048, "step": 4062 }, { "epoch": 1.841795104261106, "grad_norm": 0.41785250529954937, "learning_rate": 6.369638850059263e-05, "loss": 0.878, "step": 4063 }, { "epoch": 1.842248413417951, "grad_norm": 0.5508545956692398, "learning_rate": 6.368618893473629e-05, "loss": 0.9026, "step": 4064 }, { "epoch": 1.842701722574796, "grad_norm": 0.4395094233407126, "learning_rate": 6.367598699663852e-05, "loss": 0.8814, "step": 4065 }, { "epoch": 1.843155031731641, "grad_norm": 0.42291348854895067, "learning_rate": 6.366578268732104e-05, "loss": 0.9039, "step": 4066 }, { "epoch": 1.843608340888486, "grad_norm": 0.4517971092596902, "learning_rate": 6.365557600780589e-05, "loss": 0.8733, "step": 4067 }, { "epoch": 1.8440616500453308, "grad_norm": 0.42547289245694814, "learning_rate": 6.36453669591153e-05, "loss": 0.8638, "step": 4068 }, { "epoch": 1.844514959202176, "grad_norm": 0.540508620769893, "learning_rate": 6.363515554227169e-05, "loss": 0.89, "step": 4069 }, { "epoch": 1.844968268359021, "grad_norm": 0.6533314298535922, "learning_rate": 6.362494175829778e-05, "loss": 0.8924, "step": 4070 }, { "epoch": 1.8454215775158658, "grad_norm": 0.7386236500467155, "learning_rate": 6.361472560821656e-05, "loss": 0.8894, "step": 4071 }, { "epoch": 1.8458748866727108, "grad_norm": 0.7699710023062802, "learning_rate": 6.360450709305113e-05, "loss": 0.8943, "step": 4072 }, { "epoch": 1.8463281958295559, "grad_norm": 0.858233340806284, "learning_rate": 6.359428621382495e-05, "loss": 0.8934, "step": 4073 }, { "epoch": 1.8467815049864007, "grad_norm": 0.9797970337084024, "learning_rate": 6.358406297156169e-05, "loss": 0.8992, "step": 4074 }, { "epoch": 1.8472348141432455, "grad_norm": 1.0347782972689834, "learning_rate": 6.35738373672852e-05, "loss": 0.8917, "step": 4075 }, { "epoch": 1.8476881233000908, "grad_norm": 0.9712485973767905, "learning_rate": 6.35636094020196e-05, "loss": 0.8907, "step": 4076 }, { "epoch": 1.8481414324569356, "grad_norm": 0.9601693709074832, "learning_rate": 6.355337907678927e-05, "loss": 0.8769, "step": 4077 }, { "epoch": 1.8485947416137805, "grad_norm": 0.79826809137413, "learning_rate": 6.354314639261881e-05, "loss": 0.8672, "step": 4078 }, { "epoch": 1.8490480507706257, "grad_norm": 0.6729959237535736, "learning_rate": 6.353291135053304e-05, "loss": 0.8902, "step": 4079 }, { "epoch": 1.8495013599274706, "grad_norm": 0.5871278593760876, "learning_rate": 6.352267395155703e-05, "loss": 0.8761, "step": 4080 }, { "epoch": 1.8499546690843154, "grad_norm": 0.4612683591332279, "learning_rate": 6.351243419671611e-05, "loss": 0.8873, "step": 4081 }, { "epoch": 1.8504079782411604, "grad_norm": 0.5546342804051085, "learning_rate": 6.35021920870358e-05, "loss": 0.8869, "step": 4082 }, { "epoch": 1.8508612873980055, "grad_norm": 0.5252835622007773, "learning_rate": 6.349194762354187e-05, "loss": 0.8905, "step": 4083 }, { "epoch": 1.8513145965548503, "grad_norm": 0.6521462851736936, "learning_rate": 6.348170080726035e-05, "loss": 0.8905, "step": 4084 }, { "epoch": 1.8517679057116954, "grad_norm": 0.684383622330021, "learning_rate": 6.34714516392175e-05, "loss": 0.8774, "step": 4085 }, { "epoch": 1.8522212148685404, "grad_norm": 0.7419957387621071, "learning_rate": 6.346120012043976e-05, "loss": 0.9064, "step": 4086 }, { "epoch": 1.8526745240253852, "grad_norm": 0.7139043092854056, "learning_rate": 6.345094625195389e-05, "loss": 0.8819, "step": 4087 }, { "epoch": 1.8531278331822303, "grad_norm": 0.8093266030050058, "learning_rate": 6.344069003478683e-05, "loss": 0.9009, "step": 4088 }, { "epoch": 1.8535811423390753, "grad_norm": 0.8647752045540145, "learning_rate": 6.343043146996577e-05, "loss": 0.91, "step": 4089 }, { "epoch": 1.8540344514959202, "grad_norm": 0.8886768566573897, "learning_rate": 6.342017055851814e-05, "loss": 0.8904, "step": 4090 }, { "epoch": 1.8544877606527652, "grad_norm": 0.8766134205964513, "learning_rate": 6.340990730147159e-05, "loss": 0.892, "step": 4091 }, { "epoch": 1.8549410698096103, "grad_norm": 0.7986308770737333, "learning_rate": 6.339964169985403e-05, "loss": 0.8793, "step": 4092 }, { "epoch": 1.855394378966455, "grad_norm": 0.6747796642476526, "learning_rate": 6.338937375469359e-05, "loss": 0.8998, "step": 4093 }, { "epoch": 1.8558476881233001, "grad_norm": 0.6429973597318264, "learning_rate": 6.337910346701862e-05, "loss": 0.8951, "step": 4094 }, { "epoch": 1.8563009972801452, "grad_norm": 0.6343248615393162, "learning_rate": 6.336883083785773e-05, "loss": 0.8906, "step": 4095 }, { "epoch": 1.85675430643699, "grad_norm": 0.5938451541122313, "learning_rate": 6.335855586823976e-05, "loss": 0.8775, "step": 4096 }, { "epoch": 1.8572076155938348, "grad_norm": 0.5511146992139071, "learning_rate": 6.334827855919375e-05, "loss": 0.9071, "step": 4097 }, { "epoch": 1.8576609247506801, "grad_norm": 0.4784759966541587, "learning_rate": 6.333799891174905e-05, "loss": 0.9097, "step": 4098 }, { "epoch": 1.858114233907525, "grad_norm": 0.43914737942104065, "learning_rate": 6.332771692693515e-05, "loss": 0.8961, "step": 4099 }, { "epoch": 1.8585675430643698, "grad_norm": 0.406110022516985, "learning_rate": 6.331743260578184e-05, "loss": 0.9183, "step": 4100 }, { "epoch": 1.8590208522212148, "grad_norm": 0.5429753823063206, "learning_rate": 6.330714594931913e-05, "loss": 0.9121, "step": 4101 }, { "epoch": 1.8594741613780599, "grad_norm": 0.6871950007371797, "learning_rate": 6.329685695857726e-05, "loss": 0.9198, "step": 4102 }, { "epoch": 1.8599274705349047, "grad_norm": 0.6717183090388308, "learning_rate": 6.328656563458668e-05, "loss": 0.9146, "step": 4103 }, { "epoch": 1.8603807796917498, "grad_norm": 0.5740461762305292, "learning_rate": 6.327627197837814e-05, "loss": 0.8923, "step": 4104 }, { "epoch": 1.8608340888485948, "grad_norm": 0.48804160834297444, "learning_rate": 6.326597599098254e-05, "loss": 0.8797, "step": 4105 }, { "epoch": 1.8612873980054396, "grad_norm": 0.541993031295347, "learning_rate": 6.325567767343106e-05, "loss": 0.9005, "step": 4106 }, { "epoch": 1.8617407071622847, "grad_norm": 0.5812577122013832, "learning_rate": 6.324537702675512e-05, "loss": 0.8786, "step": 4107 }, { "epoch": 1.8621940163191297, "grad_norm": 0.5512877364050504, "learning_rate": 6.323507405198636e-05, "loss": 0.8914, "step": 4108 }, { "epoch": 1.8626473254759746, "grad_norm": 0.6095368961478793, "learning_rate": 6.322476875015663e-05, "loss": 0.8826, "step": 4109 }, { "epoch": 1.8631006346328196, "grad_norm": 0.7043681715200577, "learning_rate": 6.321446112229808e-05, "loss": 0.8842, "step": 4110 }, { "epoch": 1.8635539437896647, "grad_norm": 0.8401566217015518, "learning_rate": 6.320415116944301e-05, "loss": 0.8867, "step": 4111 }, { "epoch": 1.8640072529465095, "grad_norm": 0.8743835914449349, "learning_rate": 6.319383889262399e-05, "loss": 0.8788, "step": 4112 }, { "epoch": 1.8644605621033545, "grad_norm": 0.868300313749438, "learning_rate": 6.318352429287387e-05, "loss": 0.8871, "step": 4113 }, { "epoch": 1.8649138712601996, "grad_norm": 0.8510918845349745, "learning_rate": 6.317320737122565e-05, "loss": 0.879, "step": 4114 }, { "epoch": 1.8653671804170444, "grad_norm": 0.8016918761891219, "learning_rate": 6.316288812871258e-05, "loss": 0.8876, "step": 4115 }, { "epoch": 1.8658204895738892, "grad_norm": 0.673972999657549, "learning_rate": 6.315256656636821e-05, "loss": 0.8905, "step": 4116 }, { "epoch": 1.8662737987307345, "grad_norm": 0.6305804606509811, "learning_rate": 6.314224268522624e-05, "loss": 0.9045, "step": 4117 }, { "epoch": 1.8667271078875793, "grad_norm": 0.6220573310745142, "learning_rate": 6.313191648632067e-05, "loss": 0.9067, "step": 4118 }, { "epoch": 1.8671804170444242, "grad_norm": 0.6226966931583, "learning_rate": 6.312158797068567e-05, "loss": 0.8976, "step": 4119 }, { "epoch": 1.8676337262012692, "grad_norm": 0.5655378787800734, "learning_rate": 6.311125713935569e-05, "loss": 0.886, "step": 4120 }, { "epoch": 1.8680870353581143, "grad_norm": 0.528000777501247, "learning_rate": 6.310092399336537e-05, "loss": 0.8897, "step": 4121 }, { "epoch": 1.868540344514959, "grad_norm": 0.5666141494314196, "learning_rate": 6.309058853374963e-05, "loss": 0.9114, "step": 4122 }, { "epoch": 1.8689936536718041, "grad_norm": 0.5363004619968242, "learning_rate": 6.308025076154358e-05, "loss": 0.9001, "step": 4123 }, { "epoch": 1.8694469628286492, "grad_norm": 0.4356187038939563, "learning_rate": 6.30699106777826e-05, "loss": 0.8883, "step": 4124 }, { "epoch": 1.869900271985494, "grad_norm": 0.414889775620737, "learning_rate": 6.305956828350224e-05, "loss": 0.8995, "step": 4125 }, { "epoch": 1.870353581142339, "grad_norm": 0.4322731374612673, "learning_rate": 6.304922357973837e-05, "loss": 0.9046, "step": 4126 }, { "epoch": 1.8708068902991841, "grad_norm": 0.4668489990958511, "learning_rate": 6.303887656752701e-05, "loss": 0.8641, "step": 4127 }, { "epoch": 1.871260199456029, "grad_norm": 0.4960658713108177, "learning_rate": 6.302852724790444e-05, "loss": 0.898, "step": 4128 }, { "epoch": 1.871713508612874, "grad_norm": 0.5717444253938594, "learning_rate": 6.30181756219072e-05, "loss": 0.8887, "step": 4129 }, { "epoch": 1.872166817769719, "grad_norm": 0.5845325712944254, "learning_rate": 6.300782169057202e-05, "loss": 0.9025, "step": 4130 }, { "epoch": 1.8726201269265639, "grad_norm": 0.5062744504234511, "learning_rate": 6.299746545493587e-05, "loss": 0.8981, "step": 4131 }, { "epoch": 1.873073436083409, "grad_norm": 0.41102953821964666, "learning_rate": 6.298710691603597e-05, "loss": 0.8789, "step": 4132 }, { "epoch": 1.873526745240254, "grad_norm": 0.3039689982001319, "learning_rate": 6.297674607490977e-05, "loss": 0.9131, "step": 4133 }, { "epoch": 1.8739800543970988, "grad_norm": 0.32089866194592365, "learning_rate": 6.29663829325949e-05, "loss": 0.8968, "step": 4134 }, { "epoch": 1.8744333635539436, "grad_norm": 0.4604856098964121, "learning_rate": 6.29560174901293e-05, "loss": 0.9017, "step": 4135 }, { "epoch": 1.874886672710789, "grad_norm": 0.5523610786462327, "learning_rate": 6.294564974855108e-05, "loss": 0.9042, "step": 4136 }, { "epoch": 1.8753399818676337, "grad_norm": 0.5527145208836146, "learning_rate": 6.293527970889859e-05, "loss": 0.8923, "step": 4137 }, { "epoch": 1.8757932910244786, "grad_norm": 0.590473624662427, "learning_rate": 6.292490737221043e-05, "loss": 0.9003, "step": 4138 }, { "epoch": 1.8762466001813236, "grad_norm": 0.6178258083868079, "learning_rate": 6.291453273952544e-05, "loss": 0.8914, "step": 4139 }, { "epoch": 1.8766999093381687, "grad_norm": 0.5564945028463888, "learning_rate": 6.290415581188264e-05, "loss": 0.8988, "step": 4140 }, { "epoch": 1.8771532184950135, "grad_norm": 0.4336021283986337, "learning_rate": 6.289377659032133e-05, "loss": 0.8712, "step": 4141 }, { "epoch": 1.8776065276518585, "grad_norm": 0.422135143551568, "learning_rate": 6.2883395075881e-05, "loss": 0.9105, "step": 4142 }, { "epoch": 1.8780598368087036, "grad_norm": 0.4610779728660864, "learning_rate": 6.28730112696014e-05, "loss": 0.8995, "step": 4143 }, { "epoch": 1.8785131459655484, "grad_norm": 0.4426598163732573, "learning_rate": 6.286262517252251e-05, "loss": 0.8886, "step": 4144 }, { "epoch": 1.8789664551223935, "grad_norm": 0.4878034624739042, "learning_rate": 6.28522367856845e-05, "loss": 0.874, "step": 4145 }, { "epoch": 1.8794197642792385, "grad_norm": 0.4811249425591701, "learning_rate": 6.284184611012783e-05, "loss": 0.8617, "step": 4146 }, { "epoch": 1.8798730734360833, "grad_norm": 0.398523401312176, "learning_rate": 6.283145314689313e-05, "loss": 0.9018, "step": 4147 }, { "epoch": 1.8803263825929284, "grad_norm": 0.43831392720761364, "learning_rate": 6.282105789702131e-05, "loss": 0.8819, "step": 4148 }, { "epoch": 1.8807796917497734, "grad_norm": 0.43299163163530446, "learning_rate": 6.281066036155346e-05, "loss": 0.884, "step": 4149 }, { "epoch": 1.8812330009066183, "grad_norm": 0.46560587410786475, "learning_rate": 6.280026054153094e-05, "loss": 0.881, "step": 4150 }, { "epoch": 1.8816863100634633, "grad_norm": 0.5837526950583559, "learning_rate": 6.278985843799531e-05, "loss": 0.8905, "step": 4151 }, { "epoch": 1.8821396192203084, "grad_norm": 0.6177439838163441, "learning_rate": 6.27794540519884e-05, "loss": 0.8972, "step": 4152 }, { "epoch": 1.8825929283771532, "grad_norm": 0.49614646628841147, "learning_rate": 6.27690473845522e-05, "loss": 0.8962, "step": 4153 }, { "epoch": 1.883046237533998, "grad_norm": 0.3765039661768695, "learning_rate": 6.275863843672899e-05, "loss": 0.8727, "step": 4154 }, { "epoch": 1.8834995466908433, "grad_norm": 0.3845067191840651, "learning_rate": 6.274822720956125e-05, "loss": 0.8825, "step": 4155 }, { "epoch": 1.8839528558476881, "grad_norm": 0.4332445548073036, "learning_rate": 6.273781370409172e-05, "loss": 0.893, "step": 4156 }, { "epoch": 1.884406165004533, "grad_norm": 0.5329190760778711, "learning_rate": 6.272739792136331e-05, "loss": 0.8901, "step": 4157 }, { "epoch": 1.8848594741613782, "grad_norm": 0.6886289420698846, "learning_rate": 6.27169798624192e-05, "loss": 0.9041, "step": 4158 }, { "epoch": 1.885312783318223, "grad_norm": 0.8082198737585714, "learning_rate": 6.270655952830279e-05, "loss": 0.8887, "step": 4159 }, { "epoch": 1.8857660924750679, "grad_norm": 0.9434601235162842, "learning_rate": 6.269613692005771e-05, "loss": 0.9077, "step": 4160 }, { "epoch": 1.886219401631913, "grad_norm": 1.030492716659545, "learning_rate": 6.268571203872784e-05, "loss": 0.8736, "step": 4161 }, { "epoch": 1.886672710788758, "grad_norm": 0.9585947284791836, "learning_rate": 6.267528488535721e-05, "loss": 0.8848, "step": 4162 }, { "epoch": 1.8871260199456028, "grad_norm": 0.9294775200152561, "learning_rate": 6.266485546099017e-05, "loss": 0.9126, "step": 4163 }, { "epoch": 1.8875793291024479, "grad_norm": 0.8593137125795962, "learning_rate": 6.265442376667124e-05, "loss": 0.8898, "step": 4164 }, { "epoch": 1.888032638259293, "grad_norm": 0.8332270135654987, "learning_rate": 6.264398980344518e-05, "loss": 0.895, "step": 4165 }, { "epoch": 1.8884859474161377, "grad_norm": 0.8866029922853187, "learning_rate": 6.2633553572357e-05, "loss": 0.8819, "step": 4166 }, { "epoch": 1.8889392565729828, "grad_norm": 0.9203849273986358, "learning_rate": 6.262311507445191e-05, "loss": 0.9137, "step": 4167 }, { "epoch": 1.8893925657298278, "grad_norm": 0.748997669861131, "learning_rate": 6.261267431077537e-05, "loss": 0.9119, "step": 4168 }, { "epoch": 1.8898458748866727, "grad_norm": 0.63560688651663, "learning_rate": 6.260223128237302e-05, "loss": 0.9064, "step": 4169 }, { "epoch": 1.8902991840435177, "grad_norm": 0.6152824901534124, "learning_rate": 6.259178599029078e-05, "loss": 0.9011, "step": 4170 }, { "epoch": 1.8907524932003628, "grad_norm": 0.7140844205364775, "learning_rate": 6.25813384355748e-05, "loss": 0.871, "step": 4171 }, { "epoch": 1.8912058023572076, "grad_norm": 0.7270944417343485, "learning_rate": 6.257088861927139e-05, "loss": 0.873, "step": 4172 }, { "epoch": 1.8916591115140526, "grad_norm": 0.5855639378814469, "learning_rate": 6.256043654242716e-05, "loss": 0.8876, "step": 4173 }, { "epoch": 1.8921124206708977, "grad_norm": 0.5317384835387086, "learning_rate": 6.254998220608891e-05, "loss": 0.911, "step": 4174 }, { "epoch": 1.8925657298277425, "grad_norm": 0.6507004649219627, "learning_rate": 6.253952561130366e-05, "loss": 0.8879, "step": 4175 }, { "epoch": 1.8930190389845873, "grad_norm": 0.6599048308500733, "learning_rate": 6.252906675911868e-05, "loss": 0.8947, "step": 4176 }, { "epoch": 1.8934723481414326, "grad_norm": 0.7503908320017902, "learning_rate": 6.251860565058146e-05, "loss": 0.885, "step": 4177 }, { "epoch": 1.8939256572982774, "grad_norm": 0.8322102804263362, "learning_rate": 6.250814228673969e-05, "loss": 0.8987, "step": 4178 }, { "epoch": 1.8943789664551223, "grad_norm": 0.8224214780524243, "learning_rate": 6.249767666864132e-05, "loss": 0.9099, "step": 4179 }, { "epoch": 1.8948322756119673, "grad_norm": 0.7650630859008393, "learning_rate": 6.248720879733452e-05, "loss": 0.8991, "step": 4180 }, { "epoch": 1.8952855847688124, "grad_norm": 0.7851325785906426, "learning_rate": 6.247673867386767e-05, "loss": 0.8754, "step": 4181 }, { "epoch": 1.8957388939256572, "grad_norm": 0.7834666143782496, "learning_rate": 6.246626629928938e-05, "loss": 0.8827, "step": 4182 }, { "epoch": 1.8961922030825022, "grad_norm": 0.8234970821669675, "learning_rate": 6.245579167464848e-05, "loss": 0.8752, "step": 4183 }, { "epoch": 1.8966455122393473, "grad_norm": 0.7661353689848013, "learning_rate": 6.244531480099409e-05, "loss": 0.9094, "step": 4184 }, { "epoch": 1.8970988213961921, "grad_norm": 0.6730115779965113, "learning_rate": 6.243483567937541e-05, "loss": 0.9202, "step": 4185 }, { "epoch": 1.8975521305530372, "grad_norm": 0.5434158028316058, "learning_rate": 6.242435431084202e-05, "loss": 0.8924, "step": 4186 }, { "epoch": 1.8980054397098822, "grad_norm": 0.4261984678422516, "learning_rate": 6.241387069644366e-05, "loss": 0.8626, "step": 4187 }, { "epoch": 1.898458748866727, "grad_norm": 0.43787705953311673, "learning_rate": 6.240338483723024e-05, "loss": 0.8905, "step": 4188 }, { "epoch": 1.898912058023572, "grad_norm": 0.46208921219168625, "learning_rate": 6.239289673425202e-05, "loss": 0.8969, "step": 4189 }, { "epoch": 1.8993653671804172, "grad_norm": 0.46883229910730434, "learning_rate": 6.238240638855936e-05, "loss": 0.8888, "step": 4190 }, { "epoch": 1.899818676337262, "grad_norm": 0.494551565410381, "learning_rate": 6.237191380120292e-05, "loss": 0.8963, "step": 4191 }, { "epoch": 1.900271985494107, "grad_norm": 0.4086803674661086, "learning_rate": 6.236141897323357e-05, "loss": 0.8943, "step": 4192 }, { "epoch": 1.900725294650952, "grad_norm": 0.3815528307662305, "learning_rate": 6.23509219057024e-05, "loss": 0.8979, "step": 4193 }, { "epoch": 1.901178603807797, "grad_norm": 0.41735612443239656, "learning_rate": 6.23404225996607e-05, "loss": 0.8861, "step": 4194 }, { "epoch": 1.9016319129646417, "grad_norm": 0.4702989006319909, "learning_rate": 6.232992105616002e-05, "loss": 0.881, "step": 4195 }, { "epoch": 1.902085222121487, "grad_norm": 0.4907959850258766, "learning_rate": 6.231941727625211e-05, "loss": 0.8844, "step": 4196 }, { "epoch": 1.9025385312783318, "grad_norm": 0.49557956459676966, "learning_rate": 6.230891126098899e-05, "loss": 0.8872, "step": 4197 }, { "epoch": 1.9029918404351767, "grad_norm": 0.3907808297848098, "learning_rate": 6.229840301142282e-05, "loss": 0.878, "step": 4198 }, { "epoch": 1.9034451495920217, "grad_norm": 0.3932589724162242, "learning_rate": 6.228789252860607e-05, "loss": 0.901, "step": 4199 }, { "epoch": 1.9038984587488668, "grad_norm": 0.4450864409442323, "learning_rate": 6.227737981359139e-05, "loss": 0.8829, "step": 4200 }, { "epoch": 1.9043517679057116, "grad_norm": 0.5187179840271785, "learning_rate": 6.226686486743162e-05, "loss": 0.883, "step": 4201 }, { "epoch": 1.9048050770625566, "grad_norm": 0.7101165329861685, "learning_rate": 6.22563476911799e-05, "loss": 0.8771, "step": 4202 }, { "epoch": 1.9052583862194017, "grad_norm": 0.9210282316246935, "learning_rate": 6.224582828588955e-05, "loss": 0.8782, "step": 4203 }, { "epoch": 1.9057116953762465, "grad_norm": 1.0089247581137875, "learning_rate": 6.223530665261413e-05, "loss": 0.9046, "step": 4204 }, { "epoch": 1.9061650045330916, "grad_norm": 1.117157083243214, "learning_rate": 6.22247827924074e-05, "loss": 0.8911, "step": 4205 }, { "epoch": 1.9066183136899366, "grad_norm": 0.8106417434587577, "learning_rate": 6.221425670632334e-05, "loss": 0.9235, "step": 4206 }, { "epoch": 1.9070716228467814, "grad_norm": 0.5645741649106086, "learning_rate": 6.220372839541618e-05, "loss": 0.8944, "step": 4207 }, { "epoch": 1.9075249320036265, "grad_norm": 0.44934431989659546, "learning_rate": 6.219319786074038e-05, "loss": 0.8908, "step": 4208 }, { "epoch": 1.9079782411604715, "grad_norm": 0.5115030856649944, "learning_rate": 6.21826651033506e-05, "loss": 0.9028, "step": 4209 }, { "epoch": 1.9084315503173164, "grad_norm": 0.6118699726729472, "learning_rate": 6.21721301243017e-05, "loss": 0.8968, "step": 4210 }, { "epoch": 1.9088848594741614, "grad_norm": 0.7139862763025705, "learning_rate": 6.216159292464881e-05, "loss": 0.8951, "step": 4211 }, { "epoch": 1.9093381686310065, "grad_norm": 0.762435112433903, "learning_rate": 6.215105350544726e-05, "loss": 0.9202, "step": 4212 }, { "epoch": 1.9097914777878513, "grad_norm": 0.8309807016657387, "learning_rate": 6.21405118677526e-05, "loss": 0.901, "step": 4213 }, { "epoch": 1.9102447869446961, "grad_norm": 0.9241390629783984, "learning_rate": 6.21299680126206e-05, "loss": 0.8808, "step": 4214 }, { "epoch": 1.9106980961015414, "grad_norm": 0.8677850520252168, "learning_rate": 6.211942194110726e-05, "loss": 0.8997, "step": 4215 }, { "epoch": 1.9111514052583862, "grad_norm": 0.754971119554309, "learning_rate": 6.210887365426881e-05, "loss": 0.9019, "step": 4216 }, { "epoch": 1.911604714415231, "grad_norm": 0.610379566895126, "learning_rate": 6.20983231531617e-05, "loss": 0.8829, "step": 4217 }, { "epoch": 1.912058023572076, "grad_norm": 0.5797734008758453, "learning_rate": 6.208777043884257e-05, "loss": 0.8755, "step": 4218 }, { "epoch": 1.9125113327289212, "grad_norm": 0.5391142619880548, "learning_rate": 6.207721551236832e-05, "loss": 0.8813, "step": 4219 }, { "epoch": 1.912964641885766, "grad_norm": 0.5285568247382887, "learning_rate": 6.206665837479606e-05, "loss": 0.8815, "step": 4220 }, { "epoch": 1.913417951042611, "grad_norm": 0.5879813087687527, "learning_rate": 6.205609902718309e-05, "loss": 0.9352, "step": 4221 }, { "epoch": 1.913871260199456, "grad_norm": 0.6244821710446665, "learning_rate": 6.204553747058699e-05, "loss": 0.8966, "step": 4222 }, { "epoch": 1.914324569356301, "grad_norm": 0.4929697504688915, "learning_rate": 6.203497370606552e-05, "loss": 0.9061, "step": 4223 }, { "epoch": 1.914777878513146, "grad_norm": 0.38858812445006863, "learning_rate": 6.202440773467667e-05, "loss": 0.8831, "step": 4224 }, { "epoch": 1.915231187669991, "grad_norm": 0.43935886854912526, "learning_rate": 6.201383955747866e-05, "loss": 0.8866, "step": 4225 }, { "epoch": 1.9156844968268358, "grad_norm": 0.519688884581569, "learning_rate": 6.200326917552992e-05, "loss": 0.8717, "step": 4226 }, { "epoch": 1.9161378059836809, "grad_norm": 0.5730348796289453, "learning_rate": 6.199269658988912e-05, "loss": 0.8964, "step": 4227 }, { "epoch": 1.916591115140526, "grad_norm": 0.5451261287093784, "learning_rate": 6.198212180161512e-05, "loss": 0.8755, "step": 4228 }, { "epoch": 1.9170444242973708, "grad_norm": 0.43997480091704416, "learning_rate": 6.1971544811767e-05, "loss": 0.8979, "step": 4229 }, { "epoch": 1.9174977334542158, "grad_norm": 0.4583970602723826, "learning_rate": 6.19609656214041e-05, "loss": 0.8799, "step": 4230 }, { "epoch": 1.9179510426110609, "grad_norm": 0.6090966381987081, "learning_rate": 6.195038423158596e-05, "loss": 0.882, "step": 4231 }, { "epoch": 1.9184043517679057, "grad_norm": 0.7456466894632745, "learning_rate": 6.193980064337232e-05, "loss": 0.8787, "step": 4232 }, { "epoch": 1.9188576609247505, "grad_norm": 0.7985269670056172, "learning_rate": 6.192921485782317e-05, "loss": 0.9163, "step": 4233 }, { "epoch": 1.9193109700815958, "grad_norm": 0.7860100532664257, "learning_rate": 6.19186268759987e-05, "loss": 0.8835, "step": 4234 }, { "epoch": 1.9197642792384406, "grad_norm": 0.8511662831708099, "learning_rate": 6.190803669895932e-05, "loss": 0.8867, "step": 4235 }, { "epoch": 1.9202175883952854, "grad_norm": 0.9065250908434862, "learning_rate": 6.18974443277657e-05, "loss": 0.8833, "step": 4236 }, { "epoch": 1.9206708975521307, "grad_norm": 0.9192455841588082, "learning_rate": 6.188684976347866e-05, "loss": 0.9109, "step": 4237 }, { "epoch": 1.9211242067089755, "grad_norm": 0.8932796101073472, "learning_rate": 6.187625300715929e-05, "loss": 0.8898, "step": 4238 }, { "epoch": 1.9215775158658204, "grad_norm": 0.8907145526040002, "learning_rate": 6.18656540598689e-05, "loss": 0.915, "step": 4239 }, { "epoch": 1.9220308250226654, "grad_norm": 0.7688288842594142, "learning_rate": 6.185505292266898e-05, "loss": 0.8812, "step": 4240 }, { "epoch": 1.9224841341795105, "grad_norm": 0.6152322424720795, "learning_rate": 6.18444495966213e-05, "loss": 0.8946, "step": 4241 }, { "epoch": 1.9229374433363553, "grad_norm": 0.5241064429080163, "learning_rate": 6.183384408278777e-05, "loss": 0.9042, "step": 4242 }, { "epoch": 1.9233907524932004, "grad_norm": 0.46394896121888046, "learning_rate": 6.182323638223061e-05, "loss": 0.8865, "step": 4243 }, { "epoch": 1.9238440616500454, "grad_norm": 0.4332491344178371, "learning_rate": 6.181262649601216e-05, "loss": 0.8876, "step": 4244 }, { "epoch": 1.9242973708068902, "grad_norm": 0.4321025460354482, "learning_rate": 6.180201442519508e-05, "loss": 0.8868, "step": 4245 }, { "epoch": 1.9247506799637353, "grad_norm": 0.4541990926865616, "learning_rate": 6.179140017084219e-05, "loss": 0.8837, "step": 4246 }, { "epoch": 1.9252039891205803, "grad_norm": 0.4196768094800348, "learning_rate": 6.178078373401651e-05, "loss": 0.8735, "step": 4247 }, { "epoch": 1.9256572982774252, "grad_norm": 0.3668710470401487, "learning_rate": 6.177016511578133e-05, "loss": 0.9012, "step": 4248 }, { "epoch": 1.9261106074342702, "grad_norm": 0.4167138737924348, "learning_rate": 6.175954431720013e-05, "loss": 0.891, "step": 4249 }, { "epoch": 1.9265639165911153, "grad_norm": 0.4543816015944773, "learning_rate": 6.174892133933663e-05, "loss": 0.8825, "step": 4250 }, { "epoch": 1.92701722574796, "grad_norm": 0.4538716692970924, "learning_rate": 6.173829618325473e-05, "loss": 0.9037, "step": 4251 }, { "epoch": 1.927470534904805, "grad_norm": 0.36583079523660045, "learning_rate": 6.172766885001857e-05, "loss": 0.9003, "step": 4252 }, { "epoch": 1.9279238440616502, "grad_norm": 0.41013768325084593, "learning_rate": 6.171703934069253e-05, "loss": 0.8928, "step": 4253 }, { "epoch": 1.928377153218495, "grad_norm": 0.5253556854343688, "learning_rate": 6.170640765634117e-05, "loss": 0.8918, "step": 4254 }, { "epoch": 1.9288304623753398, "grad_norm": 0.6185291190824481, "learning_rate": 6.16957737980293e-05, "loss": 0.9169, "step": 4255 }, { "epoch": 1.929283771532185, "grad_norm": 0.7171671339456588, "learning_rate": 6.16851377668219e-05, "loss": 0.888, "step": 4256 }, { "epoch": 1.92973708068903, "grad_norm": 0.731011155423579, "learning_rate": 6.167449956378425e-05, "loss": 0.8899, "step": 4257 }, { "epoch": 1.9301903898458748, "grad_norm": 0.8035207532873982, "learning_rate": 6.166385918998176e-05, "loss": 0.8983, "step": 4258 }, { "epoch": 1.9306436990027198, "grad_norm": 0.8580529674619894, "learning_rate": 6.16532166464801e-05, "loss": 0.9002, "step": 4259 }, { "epoch": 1.9310970081595649, "grad_norm": 0.8741394044748001, "learning_rate": 6.164257193434514e-05, "loss": 0.9041, "step": 4260 }, { "epoch": 1.9315503173164097, "grad_norm": 0.8678009011934474, "learning_rate": 6.1631925054643e-05, "loss": 0.8903, "step": 4261 }, { "epoch": 1.9320036264732547, "grad_norm": 0.8963606588481672, "learning_rate": 6.162127600844e-05, "loss": 0.8857, "step": 4262 }, { "epoch": 1.9324569356300998, "grad_norm": 0.9076901439935136, "learning_rate": 6.161062479680266e-05, "loss": 0.8634, "step": 4263 }, { "epoch": 1.9329102447869446, "grad_norm": 0.9589739700886213, "learning_rate": 6.159997142079774e-05, "loss": 0.892, "step": 4264 }, { "epoch": 1.9333635539437897, "grad_norm": 0.7675740676866123, "learning_rate": 6.15893158814922e-05, "loss": 0.8842, "step": 4265 }, { "epoch": 1.9338168631006347, "grad_norm": 0.7165229454887396, "learning_rate": 6.15786581799532e-05, "loss": 0.873, "step": 4266 }, { "epoch": 1.9342701722574795, "grad_norm": 0.6413095972986829, "learning_rate": 6.15679983172482e-05, "loss": 0.9065, "step": 4267 }, { "epoch": 1.9347234814143246, "grad_norm": 0.5959507575614662, "learning_rate": 6.155733629444477e-05, "loss": 0.8781, "step": 4268 }, { "epoch": 1.9351767905711696, "grad_norm": 0.5757730072599968, "learning_rate": 6.154667211261075e-05, "loss": 0.8805, "step": 4269 }, { "epoch": 1.9356300997280145, "grad_norm": 0.5506610567390939, "learning_rate": 6.15360057728142e-05, "loss": 0.8763, "step": 4270 }, { "epoch": 1.9360834088848595, "grad_norm": 0.5045933130357635, "learning_rate": 6.152533727612338e-05, "loss": 0.8995, "step": 4271 }, { "epoch": 1.9365367180417046, "grad_norm": 0.43833407688861176, "learning_rate": 6.151466662360675e-05, "loss": 0.89, "step": 4272 }, { "epoch": 1.9369900271985494, "grad_norm": 0.3984819308937653, "learning_rate": 6.150399381633303e-05, "loss": 0.886, "step": 4273 }, { "epoch": 1.9374433363553942, "grad_norm": 0.3908058319224183, "learning_rate": 6.149331885537115e-05, "loss": 0.9102, "step": 4274 }, { "epoch": 1.9378966455122395, "grad_norm": 0.30568774209697946, "learning_rate": 6.14826417417902e-05, "loss": 0.8777, "step": 4275 }, { "epoch": 1.9383499546690843, "grad_norm": 0.38274764126330535, "learning_rate": 6.147196247665956e-05, "loss": 0.8949, "step": 4276 }, { "epoch": 1.9388032638259292, "grad_norm": 0.42288933643246535, "learning_rate": 6.146128106104877e-05, "loss": 0.92, "step": 4277 }, { "epoch": 1.9392565729827742, "grad_norm": 0.37369819576539826, "learning_rate": 6.14505974960276e-05, "loss": 0.9016, "step": 4278 }, { "epoch": 1.9397098821396193, "grad_norm": 0.4772975526805561, "learning_rate": 6.143991178266606e-05, "loss": 0.8918, "step": 4279 }, { "epoch": 1.940163191296464, "grad_norm": 0.6056403923211979, "learning_rate": 6.142922392203433e-05, "loss": 0.8858, "step": 4280 }, { "epoch": 1.9406165004533091, "grad_norm": 0.6425484770118037, "learning_rate": 6.141853391520286e-05, "loss": 0.9016, "step": 4281 }, { "epoch": 1.9410698096101542, "grad_norm": 0.6282528323041735, "learning_rate": 6.140784176324226e-05, "loss": 0.8852, "step": 4282 }, { "epoch": 1.941523118766999, "grad_norm": 0.6627161550766829, "learning_rate": 6.139714746722339e-05, "loss": 0.8761, "step": 4283 }, { "epoch": 1.941976427923844, "grad_norm": 0.6518880817874622, "learning_rate": 6.138645102821732e-05, "loss": 0.9157, "step": 4284 }, { "epoch": 1.9424297370806891, "grad_norm": 0.5407958395608782, "learning_rate": 6.137575244729532e-05, "loss": 0.8815, "step": 4285 }, { "epoch": 1.942883046237534, "grad_norm": 0.4812199562122479, "learning_rate": 6.136505172552891e-05, "loss": 0.8698, "step": 4286 }, { "epoch": 1.943336355394379, "grad_norm": 0.468502137267378, "learning_rate": 6.135434886398976e-05, "loss": 0.904, "step": 4287 }, { "epoch": 1.943789664551224, "grad_norm": 0.4057491381982324, "learning_rate": 6.134364386374982e-05, "loss": 0.9068, "step": 4288 }, { "epoch": 1.9442429737080689, "grad_norm": 0.4490928839588027, "learning_rate": 6.13329367258812e-05, "loss": 0.9075, "step": 4289 }, { "epoch": 1.944696282864914, "grad_norm": 0.5002788335352963, "learning_rate": 6.132222745145629e-05, "loss": 0.8932, "step": 4290 }, { "epoch": 1.945149592021759, "grad_norm": 0.39113823697383554, "learning_rate": 6.131151604154764e-05, "loss": 0.8972, "step": 4291 }, { "epoch": 1.9456029011786038, "grad_norm": 0.3742315931682835, "learning_rate": 6.130080249722801e-05, "loss": 0.8982, "step": 4292 }, { "epoch": 1.9460562103354486, "grad_norm": 0.4253567428388246, "learning_rate": 6.129008681957042e-05, "loss": 0.8892, "step": 4293 }, { "epoch": 1.946509519492294, "grad_norm": 0.541038769577248, "learning_rate": 6.127936900964805e-05, "loss": 0.9047, "step": 4294 }, { "epoch": 1.9469628286491387, "grad_norm": 0.5184678848203109, "learning_rate": 6.126864906853435e-05, "loss": 0.9037, "step": 4295 }, { "epoch": 1.9474161378059835, "grad_norm": 0.46337343194061537, "learning_rate": 6.125792699730295e-05, "loss": 0.8932, "step": 4296 }, { "epoch": 1.9478694469628286, "grad_norm": 0.4822340390616215, "learning_rate": 6.124720279702766e-05, "loss": 0.8785, "step": 4297 }, { "epoch": 1.9483227561196736, "grad_norm": 0.523358748970124, "learning_rate": 6.123647646878257e-05, "loss": 0.8937, "step": 4298 }, { "epoch": 1.9487760652765185, "grad_norm": 0.6472220989973186, "learning_rate": 6.122574801364195e-05, "loss": 0.8972, "step": 4299 }, { "epoch": 1.9492293744333635, "grad_norm": 0.7964114888844782, "learning_rate": 6.121501743268029e-05, "loss": 0.8984, "step": 4300 }, { "epoch": 1.9496826835902086, "grad_norm": 0.9023237364327655, "learning_rate": 6.120428472697228e-05, "loss": 0.8904, "step": 4301 }, { "epoch": 1.9501359927470534, "grad_norm": 0.9864911577695431, "learning_rate": 6.119354989759283e-05, "loss": 0.889, "step": 4302 }, { "epoch": 1.9505893019038985, "grad_norm": 0.9994298592943544, "learning_rate": 6.118281294561709e-05, "loss": 0.8791, "step": 4303 }, { "epoch": 1.9510426110607435, "grad_norm": 0.8935174897219039, "learning_rate": 6.117207387212037e-05, "loss": 0.9057, "step": 4304 }, { "epoch": 1.9514959202175883, "grad_norm": 0.7789634876236665, "learning_rate": 6.116133267817822e-05, "loss": 0.9046, "step": 4305 }, { "epoch": 1.9519492293744334, "grad_norm": 0.8086335673337979, "learning_rate": 6.115058936486641e-05, "loss": 0.8952, "step": 4306 }, { "epoch": 1.9524025385312784, "grad_norm": 0.739189957777655, "learning_rate": 6.113984393326092e-05, "loss": 0.8984, "step": 4307 }, { "epoch": 1.9528558476881233, "grad_norm": 0.5209563626200889, "learning_rate": 6.112909638443795e-05, "loss": 0.8878, "step": 4308 }, { "epoch": 1.9533091568449683, "grad_norm": 0.39730948585516196, "learning_rate": 6.111834671947386e-05, "loss": 0.8991, "step": 4309 }, { "epoch": 1.9537624660018134, "grad_norm": 0.5284957214339676, "learning_rate": 6.110759493944528e-05, "loss": 0.8777, "step": 4310 }, { "epoch": 1.9542157751586582, "grad_norm": 0.6021405774055241, "learning_rate": 6.109684104542902e-05, "loss": 0.8954, "step": 4311 }, { "epoch": 1.954669084315503, "grad_norm": 0.5793785787506259, "learning_rate": 6.108608503850215e-05, "loss": 0.8957, "step": 4312 }, { "epoch": 1.9551223934723483, "grad_norm": 0.5801819438491267, "learning_rate": 6.107532691974188e-05, "loss": 0.8882, "step": 4313 }, { "epoch": 1.9555757026291931, "grad_norm": 0.5602207481624712, "learning_rate": 6.10645666902257e-05, "loss": 0.8867, "step": 4314 }, { "epoch": 1.956029011786038, "grad_norm": 0.5624375196923395, "learning_rate": 6.105380435103124e-05, "loss": 0.884, "step": 4315 }, { "epoch": 1.956482320942883, "grad_norm": 0.5344124754639994, "learning_rate": 6.10430399032364e-05, "loss": 0.8887, "step": 4316 }, { "epoch": 1.956935630099728, "grad_norm": 0.5755244449833603, "learning_rate": 6.1032273347919265e-05, "loss": 0.8902, "step": 4317 }, { "epoch": 1.9573889392565729, "grad_norm": 0.7243443814110515, "learning_rate": 6.1021504686158144e-05, "loss": 0.8828, "step": 4318 }, { "epoch": 1.957842248413418, "grad_norm": 0.9171033450072539, "learning_rate": 6.101073391903155e-05, "loss": 0.8989, "step": 4319 }, { "epoch": 1.958295557570263, "grad_norm": 0.9691417315231818, "learning_rate": 6.09999610476182e-05, "loss": 0.9081, "step": 4320 }, { "epoch": 1.9587488667271078, "grad_norm": 0.9149549555820755, "learning_rate": 6.0989186072997046e-05, "loss": 0.8896, "step": 4321 }, { "epoch": 1.9592021758839528, "grad_norm": 0.8287504370187602, "learning_rate": 6.0978408996247214e-05, "loss": 0.8973, "step": 4322 }, { "epoch": 1.959655485040798, "grad_norm": 0.8478279247024751, "learning_rate": 6.096762981844806e-05, "loss": 0.9147, "step": 4323 }, { "epoch": 1.9601087941976427, "grad_norm": 0.8234002259546819, "learning_rate": 6.095684854067918e-05, "loss": 0.8915, "step": 4324 }, { "epoch": 1.9605621033544878, "grad_norm": 0.7232125209730338, "learning_rate": 6.09460651640203e-05, "loss": 0.906, "step": 4325 }, { "epoch": 1.9610154125113328, "grad_norm": 0.5416930816437728, "learning_rate": 6.093527968955144e-05, "loss": 0.8866, "step": 4326 }, { "epoch": 1.9614687216681777, "grad_norm": 0.44836489380888045, "learning_rate": 6.09244921183528e-05, "loss": 0.9044, "step": 4327 }, { "epoch": 1.9619220308250227, "grad_norm": 0.40526191343460977, "learning_rate": 6.091370245150477e-05, "loss": 0.9007, "step": 4328 }, { "epoch": 1.9623753399818678, "grad_norm": 0.4490722379696141, "learning_rate": 6.090291069008798e-05, "loss": 0.8976, "step": 4329 }, { "epoch": 1.9628286491387126, "grad_norm": 0.5305722806181034, "learning_rate": 6.089211683518325e-05, "loss": 0.8918, "step": 4330 }, { "epoch": 1.9632819582955574, "grad_norm": 0.703041415054904, "learning_rate": 6.088132088787161e-05, "loss": 0.9025, "step": 4331 }, { "epoch": 1.9637352674524027, "grad_norm": 0.9014554261444366, "learning_rate": 6.087052284923433e-05, "loss": 0.8925, "step": 4332 }, { "epoch": 1.9641885766092475, "grad_norm": 1.0449274285748011, "learning_rate": 6.085972272035284e-05, "loss": 0.8991, "step": 4333 }, { "epoch": 1.9646418857660923, "grad_norm": 0.9775773966024461, "learning_rate": 6.084892050230883e-05, "loss": 0.8835, "step": 4334 }, { "epoch": 1.9650951949229376, "grad_norm": 0.8607541142786397, "learning_rate": 6.083811619618414e-05, "loss": 0.8899, "step": 4335 }, { "epoch": 1.9655485040797824, "grad_norm": 0.6191432399866257, "learning_rate": 6.082730980306088e-05, "loss": 0.8744, "step": 4336 }, { "epoch": 1.9660018132366273, "grad_norm": 0.3239165100473134, "learning_rate": 6.081650132402132e-05, "loss": 0.9067, "step": 4337 }, { "epoch": 1.9664551223934723, "grad_norm": 0.38564787457188243, "learning_rate": 6.080569076014799e-05, "loss": 0.8803, "step": 4338 }, { "epoch": 1.9669084315503174, "grad_norm": 0.6023621804125251, "learning_rate": 6.0794878112523566e-05, "loss": 0.872, "step": 4339 }, { "epoch": 1.9673617407071622, "grad_norm": 0.6806279127792966, "learning_rate": 6.0784063382231e-05, "loss": 0.8854, "step": 4340 }, { "epoch": 1.9678150498640072, "grad_norm": 0.7779568404396721, "learning_rate": 6.07732465703534e-05, "loss": 0.9002, "step": 4341 }, { "epoch": 1.9682683590208523, "grad_norm": 0.8287067866421964, "learning_rate": 6.076242767797409e-05, "loss": 0.9057, "step": 4342 }, { "epoch": 1.9687216681776971, "grad_norm": 0.840082503668762, "learning_rate": 6.0751606706176646e-05, "loss": 0.8911, "step": 4343 }, { "epoch": 1.9691749773345422, "grad_norm": 0.7888255286854541, "learning_rate": 6.0740783656044805e-05, "loss": 0.9041, "step": 4344 }, { "epoch": 1.9696282864913872, "grad_norm": 0.6637200700186041, "learning_rate": 6.072995852866252e-05, "loss": 0.8765, "step": 4345 }, { "epoch": 1.970081595648232, "grad_norm": 0.4842927185634085, "learning_rate": 6.071913132511395e-05, "loss": 0.8936, "step": 4346 }, { "epoch": 1.970534904805077, "grad_norm": 0.36107916459304384, "learning_rate": 6.0708302046483496e-05, "loss": 0.9073, "step": 4347 }, { "epoch": 1.9709882139619221, "grad_norm": 0.4357845923023063, "learning_rate": 6.069747069385573e-05, "loss": 0.8816, "step": 4348 }, { "epoch": 1.971441523118767, "grad_norm": 0.4317679725227376, "learning_rate": 6.068663726831546e-05, "loss": 0.8932, "step": 4349 }, { "epoch": 1.971894832275612, "grad_norm": 0.5728366923199912, "learning_rate": 6.0675801770947646e-05, "loss": 0.8903, "step": 4350 }, { "epoch": 1.972348141432457, "grad_norm": 0.7370377612545607, "learning_rate": 6.0664964202837534e-05, "loss": 0.8986, "step": 4351 }, { "epoch": 1.972801450589302, "grad_norm": 0.7327223768138144, "learning_rate": 6.065412456507052e-05, "loss": 0.8919, "step": 4352 }, { "epoch": 1.9732547597461467, "grad_norm": 0.631483668950669, "learning_rate": 6.0643282858732234e-05, "loss": 0.8807, "step": 4353 }, { "epoch": 1.973708068902992, "grad_norm": 0.5329538238824669, "learning_rate": 6.063243908490849e-05, "loss": 0.9025, "step": 4354 }, { "epoch": 1.9741613780598368, "grad_norm": 0.5299518406946434, "learning_rate": 6.062159324468534e-05, "loss": 0.8881, "step": 4355 }, { "epoch": 1.9746146872166817, "grad_norm": 0.5628662526790137, "learning_rate": 6.061074533914902e-05, "loss": 0.8613, "step": 4356 }, { "epoch": 1.9750679963735267, "grad_norm": 0.5194948531460198, "learning_rate": 6.059989536938599e-05, "loss": 0.8883, "step": 4357 }, { "epoch": 1.9755213055303718, "grad_norm": 0.5089371081520179, "learning_rate": 6.058904333648288e-05, "loss": 0.8977, "step": 4358 }, { "epoch": 1.9759746146872166, "grad_norm": 0.5876922503846671, "learning_rate": 6.057818924152658e-05, "loss": 0.8967, "step": 4359 }, { "epoch": 1.9764279238440616, "grad_norm": 0.6814718776665586, "learning_rate": 6.056733308560415e-05, "loss": 0.8767, "step": 4360 }, { "epoch": 1.9768812330009067, "grad_norm": 0.6848252663150104, "learning_rate": 6.055647486980286e-05, "loss": 0.8937, "step": 4361 }, { "epoch": 1.9773345421577515, "grad_norm": 0.6516010710405016, "learning_rate": 6.0545614595210205e-05, "loss": 0.9056, "step": 4362 }, { "epoch": 1.9777878513145966, "grad_norm": 0.5904775602325584, "learning_rate": 6.053475226291385e-05, "loss": 0.916, "step": 4363 }, { "epoch": 1.9782411604714416, "grad_norm": 0.6257082925183015, "learning_rate": 6.052388787400173e-05, "loss": 0.9027, "step": 4364 }, { "epoch": 1.9786944696282864, "grad_norm": 0.6519665930887986, "learning_rate": 6.05130214295619e-05, "loss": 0.8863, "step": 4365 }, { "epoch": 1.9791477787851315, "grad_norm": 0.6409833796074436, "learning_rate": 6.0502152930682696e-05, "loss": 0.8875, "step": 4366 }, { "epoch": 1.9796010879419765, "grad_norm": 0.6571912483287786, "learning_rate": 6.049128237845262e-05, "loss": 0.8843, "step": 4367 }, { "epoch": 1.9800543970988214, "grad_norm": 0.684373355520645, "learning_rate": 6.0480409773960386e-05, "loss": 0.8912, "step": 4368 }, { "epoch": 1.9805077062556664, "grad_norm": 0.7074984985386064, "learning_rate": 6.046953511829493e-05, "loss": 0.8873, "step": 4369 }, { "epoch": 1.9809610154125115, "grad_norm": 0.6579066286838406, "learning_rate": 6.045865841254536e-05, "loss": 0.8801, "step": 4370 }, { "epoch": 1.9814143245693563, "grad_norm": 0.5334619814751139, "learning_rate": 6.044777965780103e-05, "loss": 0.8811, "step": 4371 }, { "epoch": 1.9818676337262011, "grad_norm": 0.47795245848301976, "learning_rate": 6.0436898855151486e-05, "loss": 0.8961, "step": 4372 }, { "epoch": 1.9823209428830464, "grad_norm": 0.5781913519452522, "learning_rate": 6.0426016005686455e-05, "loss": 0.8863, "step": 4373 }, { "epoch": 1.9827742520398912, "grad_norm": 0.5339137050202987, "learning_rate": 6.041513111049588e-05, "loss": 0.9075, "step": 4374 }, { "epoch": 1.983227561196736, "grad_norm": 0.5195707578549125, "learning_rate": 6.040424417066993e-05, "loss": 0.8982, "step": 4375 }, { "epoch": 1.983680870353581, "grad_norm": 0.47675457568619184, "learning_rate": 6.039335518729895e-05, "loss": 0.8806, "step": 4376 }, { "epoch": 1.9841341795104261, "grad_norm": 0.5063223286377632, "learning_rate": 6.0382464161473524e-05, "loss": 0.902, "step": 4377 }, { "epoch": 1.984587488667271, "grad_norm": 0.5468461629496405, "learning_rate": 6.037157109428441e-05, "loss": 0.8764, "step": 4378 }, { "epoch": 1.985040797824116, "grad_norm": 0.3895310105963255, "learning_rate": 6.0360675986822574e-05, "loss": 0.8952, "step": 4379 }, { "epoch": 1.985494106980961, "grad_norm": 0.40156797770779484, "learning_rate": 6.03497788401792e-05, "loss": 0.8834, "step": 4380 }, { "epoch": 1.985947416137806, "grad_norm": 0.5177577821827013, "learning_rate": 6.0338879655445664e-05, "loss": 0.8814, "step": 4381 }, { "epoch": 1.986400725294651, "grad_norm": 0.5460472322401857, "learning_rate": 6.0327978433713574e-05, "loss": 0.8779, "step": 4382 }, { "epoch": 1.986854034451496, "grad_norm": 0.6678568574875138, "learning_rate": 6.031707517607469e-05, "loss": 0.9062, "step": 4383 }, { "epoch": 1.9873073436083408, "grad_norm": 0.7610842535823695, "learning_rate": 6.030616988362101e-05, "loss": 0.9046, "step": 4384 }, { "epoch": 1.9877606527651859, "grad_norm": 0.8968038739715484, "learning_rate": 6.029526255744474e-05, "loss": 0.8915, "step": 4385 }, { "epoch": 1.988213961922031, "grad_norm": 0.9513851187766068, "learning_rate": 6.028435319863827e-05, "loss": 0.8856, "step": 4386 }, { "epoch": 1.9886672710788758, "grad_norm": 0.9826235998098587, "learning_rate": 6.0273441808294224e-05, "loss": 0.8718, "step": 4387 }, { "epoch": 1.9891205802357208, "grad_norm": 1.0368441666957495, "learning_rate": 6.02625283875054e-05, "loss": 0.8789, "step": 4388 }, { "epoch": 1.9895738893925659, "grad_norm": 0.8479499367125268, "learning_rate": 6.02516129373648e-05, "loss": 0.8801, "step": 4389 }, { "epoch": 1.9900271985494107, "grad_norm": 4.479442943840953, "learning_rate": 6.0240695458965663e-05, "loss": 0.9343, "step": 4390 }, { "epoch": 1.9904805077062555, "grad_norm": 1.3043542923666624, "learning_rate": 6.0229775953401375e-05, "loss": 0.8863, "step": 4391 }, { "epoch": 1.9909338168631008, "grad_norm": 1.4486937196014096, "learning_rate": 6.021885442176556e-05, "loss": 0.8928, "step": 4392 }, { "epoch": 1.9913871260199456, "grad_norm": 0.6123235250524953, "learning_rate": 6.0207930865152085e-05, "loss": 0.8938, "step": 4393 }, { "epoch": 1.9918404351767904, "grad_norm": 1.9088506495194186, "learning_rate": 6.0197005284654916e-05, "loss": 0.8906, "step": 4394 }, { "epoch": 1.9922937443336355, "grad_norm": 0.8559287457673491, "learning_rate": 6.018607768136832e-05, "loss": 0.8874, "step": 4395 }, { "epoch": 1.9927470534904805, "grad_norm": 2.337706410645413, "learning_rate": 6.017514805638671e-05, "loss": 0.9166, "step": 4396 }, { "epoch": 1.9932003626473254, "grad_norm": 1.8922433633379754, "learning_rate": 6.016421641080474e-05, "loss": 0.8989, "step": 4397 }, { "epoch": 1.9936536718041704, "grad_norm": 1.603650773808297, "learning_rate": 6.0153282745717225e-05, "loss": 0.9009, "step": 4398 }, { "epoch": 1.9941069809610155, "grad_norm": 1.848741165073692, "learning_rate": 6.014234706221922e-05, "loss": 0.9149, "step": 4399 }, { "epoch": 1.9945602901178603, "grad_norm": 1.4699498518903622, "learning_rate": 6.0131409361405956e-05, "loss": 0.9043, "step": 4400 }, { "epoch": 1.9950135992747053, "grad_norm": 1.435666173090582, "learning_rate": 6.012046964437289e-05, "loss": 0.8911, "step": 4401 }, { "epoch": 1.9954669084315504, "grad_norm": 1.2952290581290675, "learning_rate": 6.010952791221565e-05, "loss": 0.9041, "step": 4402 }, { "epoch": 1.9959202175883952, "grad_norm": 1.3585897821350794, "learning_rate": 6.009858416603008e-05, "loss": 0.8762, "step": 4403 }, { "epoch": 1.9963735267452403, "grad_norm": 1.0644695951903798, "learning_rate": 6.008763840691224e-05, "loss": 0.8829, "step": 4404 }, { "epoch": 1.9968268359020853, "grad_norm": 1.5751656808472843, "learning_rate": 6.007669063595838e-05, "loss": 0.9139, "step": 4405 }, { "epoch": 1.9972801450589301, "grad_norm": 0.991250865004389, "learning_rate": 6.0065740854264955e-05, "loss": 0.8996, "step": 4406 }, { "epoch": 1.9977334542157752, "grad_norm": 1.8587232556057705, "learning_rate": 6.00547890629286e-05, "loss": 0.8834, "step": 4407 }, { "epoch": 1.9981867633726202, "grad_norm": 1.4481550775776828, "learning_rate": 6.004383526304619e-05, "loss": 0.9087, "step": 4408 }, { "epoch": 1.998640072529465, "grad_norm": 1.5981815406500834, "learning_rate": 6.003287945571476e-05, "loss": 0.8787, "step": 4409 }, { "epoch": 1.99909338168631, "grad_norm": 1.2621367186773798, "learning_rate": 6.0021921642031593e-05, "loss": 0.8962, "step": 4410 }, { "epoch": 1.9995466908431552, "grad_norm": 1.422988961298709, "learning_rate": 6.001096182309412e-05, "loss": 0.859, "step": 4411 }, { "epoch": 2.0, "grad_norm": 1.0579517603299193, "learning_rate": 6.000000000000001e-05, "loss": 0.9021, "step": 4412 }, { "epoch": 2.000453309156845, "grad_norm": 1.3693503507380156, "learning_rate": 5.998903617384712e-05, "loss": 0.8959, "step": 4413 }, { "epoch": 2.00090661831369, "grad_norm": 0.8828831754594394, "learning_rate": 5.9978070345733505e-05, "loss": 0.8846, "step": 4414 }, { "epoch": 2.001359927470535, "grad_norm": 1.3912387595878701, "learning_rate": 5.996710251675744e-05, "loss": 0.8893, "step": 4415 }, { "epoch": 2.0018132366273798, "grad_norm": 0.8915105445974222, "learning_rate": 5.995613268801737e-05, "loss": 0.9044, "step": 4416 }, { "epoch": 2.002266545784225, "grad_norm": 1.3371715963272475, "learning_rate": 5.994516086061197e-05, "loss": 0.885, "step": 4417 }, { "epoch": 2.00271985494107, "grad_norm": 0.966143456009693, "learning_rate": 5.9934187035640074e-05, "loss": 0.8871, "step": 4418 }, { "epoch": 2.0031731640979147, "grad_norm": 1.4217342810261684, "learning_rate": 5.992321121420079e-05, "loss": 0.8639, "step": 4419 }, { "epoch": 2.00362647325476, "grad_norm": 1.0808071833658555, "learning_rate": 5.991223339739332e-05, "loss": 0.8754, "step": 4420 }, { "epoch": 2.004079782411605, "grad_norm": 1.292437578602291, "learning_rate": 5.9901253586317175e-05, "loss": 0.8632, "step": 4421 }, { "epoch": 2.0045330915684496, "grad_norm": 1.1796723571980072, "learning_rate": 5.989027178207199e-05, "loss": 0.864, "step": 4422 }, { "epoch": 2.0049864007252944, "grad_norm": 1.0903977369284323, "learning_rate": 5.9879287985757624e-05, "loss": 0.8658, "step": 4423 }, { "epoch": 2.0054397098821397, "grad_norm": 1.087617383554328, "learning_rate": 5.986830219847414e-05, "loss": 0.8636, "step": 4424 }, { "epoch": 2.0058930190389845, "grad_norm": 0.8642163672687267, "learning_rate": 5.98573144213218e-05, "loss": 0.8799, "step": 4425 }, { "epoch": 2.0063463281958294, "grad_norm": 0.8183342862442501, "learning_rate": 5.984632465540107e-05, "loss": 0.8581, "step": 4426 }, { "epoch": 2.0067996373526746, "grad_norm": 0.7812570514830235, "learning_rate": 5.983533290181259e-05, "loss": 0.8925, "step": 4427 }, { "epoch": 2.0072529465095195, "grad_norm": 0.8043295712983869, "learning_rate": 5.9824339161657216e-05, "loss": 0.8847, "step": 4428 }, { "epoch": 2.0077062556663643, "grad_norm": 0.7235935773280668, "learning_rate": 5.981334343603603e-05, "loss": 0.8665, "step": 4429 }, { "epoch": 2.0081595648232096, "grad_norm": 0.8315540172254899, "learning_rate": 5.980234572605026e-05, "loss": 0.8974, "step": 4430 }, { "epoch": 2.0086128739800544, "grad_norm": 0.7302624377976467, "learning_rate": 5.9791346032801375e-05, "loss": 0.8799, "step": 4431 }, { "epoch": 2.009066183136899, "grad_norm": 0.5946408202164674, "learning_rate": 5.978034435739101e-05, "loss": 0.852, "step": 4432 }, { "epoch": 2.0095194922937445, "grad_norm": 0.7630621354511823, "learning_rate": 5.976934070092103e-05, "loss": 0.8832, "step": 4433 }, { "epoch": 2.0099728014505893, "grad_norm": 0.6691143664065395, "learning_rate": 5.9758335064493484e-05, "loss": 0.8737, "step": 4434 }, { "epoch": 2.010426110607434, "grad_norm": 0.585357783431062, "learning_rate": 5.97473274492106e-05, "loss": 0.8762, "step": 4435 }, { "epoch": 2.0108794197642794, "grad_norm": 0.61182036773268, "learning_rate": 5.9736317856174844e-05, "loss": 0.8713, "step": 4436 }, { "epoch": 2.0113327289211242, "grad_norm": 0.4714387909412953, "learning_rate": 5.972530628648884e-05, "loss": 0.8585, "step": 4437 }, { "epoch": 2.011786038077969, "grad_norm": 0.4888272816253385, "learning_rate": 5.971429274125545e-05, "loss": 0.8555, "step": 4438 }, { "epoch": 2.0122393472348143, "grad_norm": 0.4310515494877047, "learning_rate": 5.97032772215777e-05, "loss": 0.8752, "step": 4439 }, { "epoch": 2.012692656391659, "grad_norm": 0.5748180008778407, "learning_rate": 5.9692259728558834e-05, "loss": 0.872, "step": 4440 }, { "epoch": 2.013145965548504, "grad_norm": 0.5335792435495892, "learning_rate": 5.9681240263302275e-05, "loss": 0.8985, "step": 4441 }, { "epoch": 2.013599274705349, "grad_norm": 0.4553141987300091, "learning_rate": 5.967021882691166e-05, "loss": 0.8668, "step": 4442 }, { "epoch": 2.014052583862194, "grad_norm": 0.35294536290185724, "learning_rate": 5.965919542049083e-05, "loss": 0.8832, "step": 4443 }, { "epoch": 2.014505893019039, "grad_norm": 0.4033452405015096, "learning_rate": 5.964817004514379e-05, "loss": 0.8698, "step": 4444 }, { "epoch": 2.0149592021758838, "grad_norm": 0.4946055258597081, "learning_rate": 5.9637142701974774e-05, "loss": 0.8668, "step": 4445 }, { "epoch": 2.015412511332729, "grad_norm": 0.561570902098644, "learning_rate": 5.9626113392088214e-05, "loss": 0.8994, "step": 4446 }, { "epoch": 2.015865820489574, "grad_norm": 0.5382797939207867, "learning_rate": 5.9615082116588714e-05, "loss": 0.8791, "step": 4447 }, { "epoch": 2.0163191296464187, "grad_norm": 0.42622687977317536, "learning_rate": 5.96040488765811e-05, "loss": 0.8857, "step": 4448 }, { "epoch": 2.016772438803264, "grad_norm": 0.35887571588245143, "learning_rate": 5.959301367317037e-05, "loss": 0.8666, "step": 4449 }, { "epoch": 2.017225747960109, "grad_norm": 0.3617734286220932, "learning_rate": 5.958197650746173e-05, "loss": 0.872, "step": 4450 }, { "epoch": 2.0176790571169536, "grad_norm": 0.511402810323289, "learning_rate": 5.95709373805606e-05, "loss": 0.8583, "step": 4451 }, { "epoch": 2.018132366273799, "grad_norm": 0.5827522234559757, "learning_rate": 5.955989629357256e-05, "loss": 0.8885, "step": 4452 }, { "epoch": 2.0185856754306437, "grad_norm": 0.530827789283048, "learning_rate": 5.954885324760344e-05, "loss": 0.871, "step": 4453 }, { "epoch": 2.0190389845874885, "grad_norm": 0.41666005056817096, "learning_rate": 5.95378082437592e-05, "loss": 0.876, "step": 4454 }, { "epoch": 2.019492293744334, "grad_norm": 0.3727834656143721, "learning_rate": 5.952676128314605e-05, "loss": 0.8761, "step": 4455 }, { "epoch": 2.0199456029011786, "grad_norm": 0.4116798078709951, "learning_rate": 5.951571236687036e-05, "loss": 0.8677, "step": 4456 }, { "epoch": 2.0203989120580235, "grad_norm": 0.41371495894507987, "learning_rate": 5.9504661496038736e-05, "loss": 0.8715, "step": 4457 }, { "epoch": 2.0208522212148687, "grad_norm": 0.4358248234375833, "learning_rate": 5.9493608671757935e-05, "loss": 0.8833, "step": 4458 }, { "epoch": 2.0213055303717136, "grad_norm": 0.42466589843191915, "learning_rate": 5.9482553895134935e-05, "loss": 0.8752, "step": 4459 }, { "epoch": 2.0217588395285584, "grad_norm": 0.38756646913486437, "learning_rate": 5.9471497167276905e-05, "loss": 0.8747, "step": 4460 }, { "epoch": 2.0222121486854037, "grad_norm": 0.44194452611547214, "learning_rate": 5.9460438489291206e-05, "loss": 0.8779, "step": 4461 }, { "epoch": 2.0226654578422485, "grad_norm": 0.4579894239854562, "learning_rate": 5.94493778622854e-05, "loss": 0.881, "step": 4462 }, { "epoch": 2.0231187669990933, "grad_norm": 0.4158393656748826, "learning_rate": 5.9438315287367234e-05, "loss": 0.8745, "step": 4463 }, { "epoch": 2.023572076155938, "grad_norm": 0.39296016585011584, "learning_rate": 5.942725076564468e-05, "loss": 0.9005, "step": 4464 }, { "epoch": 2.0240253853127834, "grad_norm": 0.41853273507182476, "learning_rate": 5.941618429822585e-05, "loss": 0.8694, "step": 4465 }, { "epoch": 2.0244786944696282, "grad_norm": 0.4928133618014884, "learning_rate": 5.940511588621911e-05, "loss": 0.8762, "step": 4466 }, { "epoch": 2.024932003626473, "grad_norm": 0.5410015683251687, "learning_rate": 5.939404553073297e-05, "loss": 0.8739, "step": 4467 }, { "epoch": 2.0253853127833183, "grad_norm": 0.5260626023718433, "learning_rate": 5.938297323287618e-05, "loss": 0.8718, "step": 4468 }, { "epoch": 2.025838621940163, "grad_norm": 0.346935712879825, "learning_rate": 5.937189899375766e-05, "loss": 0.8768, "step": 4469 }, { "epoch": 2.026291931097008, "grad_norm": 0.3165950763072655, "learning_rate": 5.936082281448652e-05, "loss": 0.8653, "step": 4470 }, { "epoch": 2.0267452402538533, "grad_norm": 0.46830031206037454, "learning_rate": 5.9349744696172075e-05, "loss": 0.8905, "step": 4471 }, { "epoch": 2.027198549410698, "grad_norm": 0.44010011454346676, "learning_rate": 5.933866463992383e-05, "loss": 0.8709, "step": 4472 }, { "epoch": 2.027651858567543, "grad_norm": 0.38155558889248486, "learning_rate": 5.932758264685148e-05, "loss": 0.8494, "step": 4473 }, { "epoch": 2.028105167724388, "grad_norm": 0.41948133521653275, "learning_rate": 5.9316498718064925e-05, "loss": 0.8879, "step": 4474 }, { "epoch": 2.028558476881233, "grad_norm": 0.4553330394118193, "learning_rate": 5.930541285467427e-05, "loss": 0.8626, "step": 4475 }, { "epoch": 2.029011786038078, "grad_norm": 0.3168597042113871, "learning_rate": 5.9294325057789766e-05, "loss": 0.8602, "step": 4476 }, { "epoch": 2.029465095194923, "grad_norm": 0.3315163937698171, "learning_rate": 5.92832353285219e-05, "loss": 0.8734, "step": 4477 }, { "epoch": 2.029918404351768, "grad_norm": 0.38138125461256345, "learning_rate": 5.927214366798136e-05, "loss": 0.8968, "step": 4478 }, { "epoch": 2.030371713508613, "grad_norm": 0.4901611012766882, "learning_rate": 5.9261050077278974e-05, "loss": 0.8633, "step": 4479 }, { "epoch": 2.030825022665458, "grad_norm": 0.5627471632609867, "learning_rate": 5.924995455752582e-05, "loss": 0.856, "step": 4480 }, { "epoch": 2.031278331822303, "grad_norm": 0.620028015855765, "learning_rate": 5.9238857109833145e-05, "loss": 0.851, "step": 4481 }, { "epoch": 2.0317316409791477, "grad_norm": 0.6008454023799015, "learning_rate": 5.922775773531239e-05, "loss": 0.8945, "step": 4482 }, { "epoch": 2.0321849501359925, "grad_norm": 0.5085172928748548, "learning_rate": 5.9216656435075185e-05, "loss": 0.9039, "step": 4483 }, { "epoch": 2.032638259292838, "grad_norm": 0.4428819720239636, "learning_rate": 5.920555321023336e-05, "loss": 0.8826, "step": 4484 }, { "epoch": 2.0330915684496826, "grad_norm": 0.4339254649445022, "learning_rate": 5.919444806189895e-05, "loss": 0.8829, "step": 4485 }, { "epoch": 2.0335448776065275, "grad_norm": 0.48909281783407094, "learning_rate": 5.9183340991184134e-05, "loss": 0.8844, "step": 4486 }, { "epoch": 2.0339981867633727, "grad_norm": 0.6278517793850551, "learning_rate": 5.917223199920134e-05, "loss": 0.8864, "step": 4487 }, { "epoch": 2.0344514959202176, "grad_norm": 0.41668848176498907, "learning_rate": 5.916112108706319e-05, "loss": 0.8727, "step": 4488 }, { "epoch": 2.0349048050770624, "grad_norm": 0.34233275945803454, "learning_rate": 5.915000825588243e-05, "loss": 0.8899, "step": 4489 }, { "epoch": 2.0353581142339077, "grad_norm": 0.3126256014391702, "learning_rate": 5.913889350677207e-05, "loss": 0.8679, "step": 4490 }, { "epoch": 2.0358114233907525, "grad_norm": 0.318959347516722, "learning_rate": 5.9127776840845284e-05, "loss": 0.876, "step": 4491 }, { "epoch": 2.0362647325475973, "grad_norm": 0.3008174222198692, "learning_rate": 5.9116658259215414e-05, "loss": 0.8501, "step": 4492 }, { "epoch": 2.0367180417044426, "grad_norm": 0.3225087480982042, "learning_rate": 5.910553776299605e-05, "loss": 0.8736, "step": 4493 }, { "epoch": 2.0371713508612874, "grad_norm": 0.3727342053793816, "learning_rate": 5.9094415353300914e-05, "loss": 0.8937, "step": 4494 }, { "epoch": 2.0376246600181322, "grad_norm": 0.4665973772767037, "learning_rate": 5.908329103124397e-05, "loss": 0.8741, "step": 4495 }, { "epoch": 2.0380779691749775, "grad_norm": 0.4551070652251744, "learning_rate": 5.9072164797939346e-05, "loss": 0.8715, "step": 4496 }, { "epoch": 2.0385312783318223, "grad_norm": 0.44583425885419264, "learning_rate": 5.9061036654501384e-05, "loss": 0.8852, "step": 4497 }, { "epoch": 2.038984587488667, "grad_norm": 0.3586962098261952, "learning_rate": 5.9049906602044546e-05, "loss": 0.8886, "step": 4498 }, { "epoch": 2.0394378966455124, "grad_norm": 0.2851000208853214, "learning_rate": 5.9038774641683596e-05, "loss": 0.8712, "step": 4499 }, { "epoch": 2.0398912058023573, "grad_norm": 0.28402188052151345, "learning_rate": 5.902764077453341e-05, "loss": 0.8754, "step": 4500 }, { "epoch": 2.040344514959202, "grad_norm": 0.41447315822263037, "learning_rate": 5.901650500170908e-05, "loss": 0.8782, "step": 4501 }, { "epoch": 2.040797824116047, "grad_norm": 0.5275540876483987, "learning_rate": 5.900536732432588e-05, "loss": 0.8809, "step": 4502 }, { "epoch": 2.041251133272892, "grad_norm": 0.5930230498546067, "learning_rate": 5.899422774349929e-05, "loss": 0.8755, "step": 4503 }, { "epoch": 2.041704442429737, "grad_norm": 0.6592702871977485, "learning_rate": 5.898308626034498e-05, "loss": 0.8702, "step": 4504 }, { "epoch": 2.042157751586582, "grad_norm": 0.6660992728523727, "learning_rate": 5.897194287597877e-05, "loss": 0.8868, "step": 4505 }, { "epoch": 2.042611060743427, "grad_norm": 0.6563698092532015, "learning_rate": 5.8960797591516736e-05, "loss": 0.8779, "step": 4506 }, { "epoch": 2.043064369900272, "grad_norm": 0.566987573178707, "learning_rate": 5.8949650408075096e-05, "loss": 0.8832, "step": 4507 }, { "epoch": 2.043517679057117, "grad_norm": 0.5160006553631354, "learning_rate": 5.8938501326770276e-05, "loss": 0.8711, "step": 4508 }, { "epoch": 2.043970988213962, "grad_norm": 0.4475010487591147, "learning_rate": 5.892735034871889e-05, "loss": 0.8772, "step": 4509 }, { "epoch": 2.044424297370807, "grad_norm": 0.32709976595335793, "learning_rate": 5.8916197475037736e-05, "loss": 0.8884, "step": 4510 }, { "epoch": 2.0448776065276517, "grad_norm": 0.304998318082167, "learning_rate": 5.890504270684381e-05, "loss": 0.8757, "step": 4511 }, { "epoch": 2.045330915684497, "grad_norm": 0.323451296570382, "learning_rate": 5.88938860452543e-05, "loss": 0.8992, "step": 4512 }, { "epoch": 2.045784224841342, "grad_norm": 0.31718422595184015, "learning_rate": 5.888272749138657e-05, "loss": 0.8556, "step": 4513 }, { "epoch": 2.0462375339981866, "grad_norm": 0.40201172582155775, "learning_rate": 5.88715670463582e-05, "loss": 0.8986, "step": 4514 }, { "epoch": 2.046690843155032, "grad_norm": 0.4448065920248286, "learning_rate": 5.8860404711286906e-05, "loss": 0.8737, "step": 4515 }, { "epoch": 2.0471441523118767, "grad_norm": 0.4557609138938328, "learning_rate": 5.884924048729066e-05, "loss": 0.8832, "step": 4516 }, { "epoch": 2.0475974614687216, "grad_norm": 0.5649482858837344, "learning_rate": 5.8838074375487595e-05, "loss": 0.8755, "step": 4517 }, { "epoch": 2.048050770625567, "grad_norm": 0.6288555815903284, "learning_rate": 5.8826906376996e-05, "loss": 0.8758, "step": 4518 }, { "epoch": 2.0485040797824117, "grad_norm": 0.6355960053124403, "learning_rate": 5.881573649293442e-05, "loss": 0.8774, "step": 4519 }, { "epoch": 2.0489573889392565, "grad_norm": 0.5451789924822715, "learning_rate": 5.880456472442151e-05, "loss": 0.8719, "step": 4520 }, { "epoch": 2.0494106980961013, "grad_norm": 0.4695490292029792, "learning_rate": 5.879339107257619e-05, "loss": 0.8543, "step": 4521 }, { "epoch": 2.0498640072529466, "grad_norm": 0.3985693126750887, "learning_rate": 5.8782215538517516e-05, "loss": 0.901, "step": 4522 }, { "epoch": 2.0503173164097914, "grad_norm": 0.48176067179448256, "learning_rate": 5.877103812336476e-05, "loss": 0.8675, "step": 4523 }, { "epoch": 2.0507706255666363, "grad_norm": 0.3725372458333377, "learning_rate": 5.8759858828237366e-05, "loss": 0.8896, "step": 4524 }, { "epoch": 2.0512239347234815, "grad_norm": 0.3102898357844386, "learning_rate": 5.8748677654254976e-05, "loss": 0.8862, "step": 4525 }, { "epoch": 2.0516772438803264, "grad_norm": 0.30308879746017403, "learning_rate": 5.8737494602537424e-05, "loss": 0.8711, "step": 4526 }, { "epoch": 2.052130553037171, "grad_norm": 0.3502308720026267, "learning_rate": 5.87263096742047e-05, "loss": 0.8519, "step": 4527 }, { "epoch": 2.0525838621940165, "grad_norm": 0.4163613137660871, "learning_rate": 5.871512287037704e-05, "loss": 0.8768, "step": 4528 }, { "epoch": 2.0530371713508613, "grad_norm": 0.458452895508648, "learning_rate": 5.870393419217483e-05, "loss": 0.8873, "step": 4529 }, { "epoch": 2.053490480507706, "grad_norm": 0.5357598546782473, "learning_rate": 5.869274364071863e-05, "loss": 0.866, "step": 4530 }, { "epoch": 2.0539437896645514, "grad_norm": 0.5779117525894155, "learning_rate": 5.8681551217129215e-05, "loss": 0.8579, "step": 4531 }, { "epoch": 2.054397098821396, "grad_norm": 0.5900039415773937, "learning_rate": 5.867035692252756e-05, "loss": 0.8805, "step": 4532 }, { "epoch": 2.054850407978241, "grad_norm": 0.5554949210045693, "learning_rate": 5.865916075803476e-05, "loss": 0.8751, "step": 4533 }, { "epoch": 2.0553037171350863, "grad_norm": 0.5418874602314095, "learning_rate": 5.864796272477219e-05, "loss": 0.883, "step": 4534 }, { "epoch": 2.055757026291931, "grad_norm": 0.47730641272836577, "learning_rate": 5.863676282386134e-05, "loss": 0.8667, "step": 4535 }, { "epoch": 2.056210335448776, "grad_norm": 0.39539587062452264, "learning_rate": 5.862556105642394e-05, "loss": 0.8763, "step": 4536 }, { "epoch": 2.0566636446056212, "grad_norm": 0.3475106432553535, "learning_rate": 5.8614357423581846e-05, "loss": 0.9027, "step": 4537 }, { "epoch": 2.057116953762466, "grad_norm": 0.3489274705736764, "learning_rate": 5.860315192645714e-05, "loss": 0.8864, "step": 4538 }, { "epoch": 2.057570262919311, "grad_norm": 0.45610203095586327, "learning_rate": 5.859194456617211e-05, "loss": 0.8716, "step": 4539 }, { "epoch": 2.0580235720761557, "grad_norm": 0.4847082788744679, "learning_rate": 5.858073534384917e-05, "loss": 0.8822, "step": 4540 }, { "epoch": 2.058476881233001, "grad_norm": 0.5350081328670659, "learning_rate": 5.856952426061099e-05, "loss": 0.8854, "step": 4541 }, { "epoch": 2.058930190389846, "grad_norm": 0.5486716458468374, "learning_rate": 5.855831131758039e-05, "loss": 0.8608, "step": 4542 }, { "epoch": 2.0593834995466906, "grad_norm": 0.5801203772822727, "learning_rate": 5.8547096515880364e-05, "loss": 0.8826, "step": 4543 }, { "epoch": 2.059836808703536, "grad_norm": 0.5841033495289709, "learning_rate": 5.85358798566341e-05, "loss": 0.867, "step": 4544 }, { "epoch": 2.0602901178603807, "grad_norm": 0.5792692586404584, "learning_rate": 5.8524661340965014e-05, "loss": 0.8786, "step": 4545 }, { "epoch": 2.0607434270172256, "grad_norm": 0.5488692148355424, "learning_rate": 5.851344096999664e-05, "loss": 0.886, "step": 4546 }, { "epoch": 2.061196736174071, "grad_norm": 0.497182685645821, "learning_rate": 5.8502218744852754e-05, "loss": 0.8873, "step": 4547 }, { "epoch": 2.0616500453309157, "grad_norm": 0.42427438262862494, "learning_rate": 5.849099466665728e-05, "loss": 0.8835, "step": 4548 }, { "epoch": 2.0621033544877605, "grad_norm": 0.39151906444581347, "learning_rate": 5.847976873653433e-05, "loss": 0.8772, "step": 4549 }, { "epoch": 2.0625566636446058, "grad_norm": 0.3814635855210955, "learning_rate": 5.846854095560824e-05, "loss": 0.8766, "step": 4550 }, { "epoch": 2.0630099728014506, "grad_norm": 0.4476564703672927, "learning_rate": 5.8457311325003496e-05, "loss": 0.8735, "step": 4551 }, { "epoch": 2.0634632819582954, "grad_norm": 0.5167545301764351, "learning_rate": 5.8446079845844775e-05, "loss": 0.8775, "step": 4552 }, { "epoch": 2.0639165911151407, "grad_norm": 0.5448111244516555, "learning_rate": 5.8434846519256944e-05, "loss": 0.8711, "step": 4553 }, { "epoch": 2.0643699002719855, "grad_norm": 0.6327374305619716, "learning_rate": 5.842361134636505e-05, "loss": 0.8804, "step": 4554 }, { "epoch": 2.0648232094288304, "grad_norm": 0.7069176773001967, "learning_rate": 5.841237432829436e-05, "loss": 0.8935, "step": 4555 }, { "epoch": 2.0652765185856756, "grad_norm": 0.7818359421915334, "learning_rate": 5.840113546617025e-05, "loss": 0.8945, "step": 4556 }, { "epoch": 2.0657298277425205, "grad_norm": 0.811134944073197, "learning_rate": 5.838989476111834e-05, "loss": 0.8745, "step": 4557 }, { "epoch": 2.0661831368993653, "grad_norm": 0.6668574978698705, "learning_rate": 5.837865221426443e-05, "loss": 0.8939, "step": 4558 }, { "epoch": 2.0666364460562106, "grad_norm": 0.5070878676460848, "learning_rate": 5.836740782673447e-05, "loss": 0.872, "step": 4559 }, { "epoch": 2.0670897552130554, "grad_norm": 0.39768941522801304, "learning_rate": 5.835616159965466e-05, "loss": 0.8739, "step": 4560 }, { "epoch": 2.0675430643699, "grad_norm": 0.4090047027500746, "learning_rate": 5.83449135341513e-05, "loss": 0.8731, "step": 4561 }, { "epoch": 2.067996373526745, "grad_norm": 0.5616795952439557, "learning_rate": 5.833366363135095e-05, "loss": 0.8616, "step": 4562 }, { "epoch": 2.0684496826835903, "grad_norm": 0.5731586609990232, "learning_rate": 5.832241189238029e-05, "loss": 0.8825, "step": 4563 }, { "epoch": 2.068902991840435, "grad_norm": 0.6027908315821942, "learning_rate": 5.8311158318366234e-05, "loss": 0.8754, "step": 4564 }, { "epoch": 2.06935630099728, "grad_norm": 0.6650342007694588, "learning_rate": 5.829990291043586e-05, "loss": 0.8769, "step": 4565 }, { "epoch": 2.0698096101541252, "grad_norm": 0.6310788320956693, "learning_rate": 5.828864566971644e-05, "loss": 0.9011, "step": 4566 }, { "epoch": 2.07026291931097, "grad_norm": 0.5442854482003806, "learning_rate": 5.8277386597335394e-05, "loss": 0.9013, "step": 4567 }, { "epoch": 2.070716228467815, "grad_norm": 0.44439252470105706, "learning_rate": 5.826612569442036e-05, "loss": 0.8673, "step": 4568 }, { "epoch": 2.07116953762466, "grad_norm": 0.3099995976809445, "learning_rate": 5.8254862962099154e-05, "loss": 0.8653, "step": 4569 }, { "epoch": 2.071622846781505, "grad_norm": 0.309574362217333, "learning_rate": 5.824359840149978e-05, "loss": 0.8806, "step": 4570 }, { "epoch": 2.07207615593835, "grad_norm": 0.3287322182327067, "learning_rate": 5.8232332013750396e-05, "loss": 0.8717, "step": 4571 }, { "epoch": 2.072529465095195, "grad_norm": 0.35129000567021723, "learning_rate": 5.822106379997939e-05, "loss": 0.8911, "step": 4572 }, { "epoch": 2.07298277425204, "grad_norm": 0.4563554889733326, "learning_rate": 5.82097937613153e-05, "loss": 0.8821, "step": 4573 }, { "epoch": 2.0734360834088847, "grad_norm": 0.5430085354224156, "learning_rate": 5.819852189888684e-05, "loss": 0.8709, "step": 4574 }, { "epoch": 2.07388939256573, "grad_norm": 0.5662771215757916, "learning_rate": 5.818724821382292e-05, "loss": 0.8762, "step": 4575 }, { "epoch": 2.074342701722575, "grad_norm": 0.5102759691754044, "learning_rate": 5.817597270725266e-05, "loss": 0.8716, "step": 4576 }, { "epoch": 2.0747960108794197, "grad_norm": 0.4655079205590319, "learning_rate": 5.816469538030529e-05, "loss": 0.8911, "step": 4577 }, { "epoch": 2.075249320036265, "grad_norm": 0.40470728407168965, "learning_rate": 5.81534162341103e-05, "loss": 0.861, "step": 4578 }, { "epoch": 2.0757026291931098, "grad_norm": 0.3589165193458115, "learning_rate": 5.814213526979733e-05, "loss": 0.8765, "step": 4579 }, { "epoch": 2.0761559383499546, "grad_norm": 0.35522142436602666, "learning_rate": 5.813085248849619e-05, "loss": 0.8459, "step": 4580 }, { "epoch": 2.0766092475067994, "grad_norm": 0.38097401187354657, "learning_rate": 5.811956789133689e-05, "loss": 0.8758, "step": 4581 }, { "epoch": 2.0770625566636447, "grad_norm": 0.3828436982725671, "learning_rate": 5.8108281479449605e-05, "loss": 0.877, "step": 4582 }, { "epoch": 2.0775158658204895, "grad_norm": 0.4578799673155848, "learning_rate": 5.809699325396471e-05, "loss": 0.8897, "step": 4583 }, { "epoch": 2.0779691749773344, "grad_norm": 0.5057619807450817, "learning_rate": 5.8085703216012775e-05, "loss": 0.8876, "step": 4584 }, { "epoch": 2.0784224841341796, "grad_norm": 0.453504883334568, "learning_rate": 5.807441136672449e-05, "loss": 0.8814, "step": 4585 }, { "epoch": 2.0788757932910245, "grad_norm": 0.48533420181974285, "learning_rate": 5.80631177072308e-05, "loss": 0.8984, "step": 4586 }, { "epoch": 2.0793291024478693, "grad_norm": 0.577738069182516, "learning_rate": 5.8051822238662774e-05, "loss": 0.8873, "step": 4587 }, { "epoch": 2.0797824116047146, "grad_norm": 0.6316913766348288, "learning_rate": 5.804052496215169e-05, "loss": 0.8903, "step": 4588 }, { "epoch": 2.0802357207615594, "grad_norm": 0.6148074297108407, "learning_rate": 5.802922587882903e-05, "loss": 0.8961, "step": 4589 }, { "epoch": 2.080689029918404, "grad_norm": 0.556805303827392, "learning_rate": 5.80179249898264e-05, "loss": 0.858, "step": 4590 }, { "epoch": 2.0811423390752495, "grad_norm": 0.502156930135495, "learning_rate": 5.800662229627564e-05, "loss": 0.8633, "step": 4591 }, { "epoch": 2.0815956482320943, "grad_norm": 0.516861659447645, "learning_rate": 5.799531779930872e-05, "loss": 0.8878, "step": 4592 }, { "epoch": 2.082048957388939, "grad_norm": 0.3959355591829808, "learning_rate": 5.798401150005785e-05, "loss": 0.8729, "step": 4593 }, { "epoch": 2.0825022665457844, "grad_norm": 0.4152885924222527, "learning_rate": 5.7972703399655376e-05, "loss": 0.8773, "step": 4594 }, { "epoch": 2.0829555757026292, "grad_norm": 0.45398682943708396, "learning_rate": 5.796139349923382e-05, "loss": 0.8679, "step": 4595 }, { "epoch": 2.083408884859474, "grad_norm": 0.45204155280820224, "learning_rate": 5.795008179992593e-05, "loss": 0.8909, "step": 4596 }, { "epoch": 2.0838621940163193, "grad_norm": 0.3683640763279323, "learning_rate": 5.793876830286458e-05, "loss": 0.8709, "step": 4597 }, { "epoch": 2.084315503173164, "grad_norm": 0.3162672056126719, "learning_rate": 5.792745300918287e-05, "loss": 0.8735, "step": 4598 }, { "epoch": 2.084768812330009, "grad_norm": 0.38447366146164946, "learning_rate": 5.791613592001405e-05, "loss": 0.8826, "step": 4599 }, { "epoch": 2.0852221214868543, "grad_norm": 0.4657804155198531, "learning_rate": 5.790481703649157e-05, "loss": 0.8696, "step": 4600 }, { "epoch": 2.085675430643699, "grad_norm": 0.46632116164070136, "learning_rate": 5.789349635974905e-05, "loss": 0.866, "step": 4601 }, { "epoch": 2.086128739800544, "grad_norm": 0.40981533788970287, "learning_rate": 5.788217389092027e-05, "loss": 0.8738, "step": 4602 }, { "epoch": 2.0865820489573887, "grad_norm": 0.4637229966897603, "learning_rate": 5.787084963113923e-05, "loss": 0.8736, "step": 4603 }, { "epoch": 2.087035358114234, "grad_norm": 0.5656475493216265, "learning_rate": 5.785952358154007e-05, "loss": 0.8701, "step": 4604 }, { "epoch": 2.087488667271079, "grad_norm": 0.6493699059029064, "learning_rate": 5.784819574325713e-05, "loss": 0.8663, "step": 4605 }, { "epoch": 2.0879419764279237, "grad_norm": 0.6854208112126271, "learning_rate": 5.783686611742494e-05, "loss": 0.859, "step": 4606 }, { "epoch": 2.088395285584769, "grad_norm": 0.5986992075995311, "learning_rate": 5.782553470517818e-05, "loss": 0.873, "step": 4607 }, { "epoch": 2.0888485947416138, "grad_norm": 0.6034118560035834, "learning_rate": 5.781420150765174e-05, "loss": 0.8725, "step": 4608 }, { "epoch": 2.0893019038984586, "grad_norm": 0.6086667079890786, "learning_rate": 5.780286652598065e-05, "loss": 0.8945, "step": 4609 }, { "epoch": 2.089755213055304, "grad_norm": 0.5666511613107728, "learning_rate": 5.7791529761300165e-05, "loss": 0.8737, "step": 4610 }, { "epoch": 2.0902085222121487, "grad_norm": 0.5312290562054499, "learning_rate": 5.778019121474568e-05, "loss": 0.8783, "step": 4611 }, { "epoch": 2.0906618313689935, "grad_norm": 0.5310565173988439, "learning_rate": 5.77688508874528e-05, "loss": 0.8684, "step": 4612 }, { "epoch": 2.091115140525839, "grad_norm": 0.4973459647449567, "learning_rate": 5.7757508780557286e-05, "loss": 0.8831, "step": 4613 }, { "epoch": 2.0915684496826836, "grad_norm": 0.4386380950762032, "learning_rate": 5.774616489519507e-05, "loss": 0.8649, "step": 4614 }, { "epoch": 2.0920217588395285, "grad_norm": 0.38225375114037674, "learning_rate": 5.7734819232502284e-05, "loss": 0.8629, "step": 4615 }, { "epoch": 2.0924750679963737, "grad_norm": 0.329672259113762, "learning_rate": 5.772347179361523e-05, "loss": 0.8973, "step": 4616 }, { "epoch": 2.0929283771532186, "grad_norm": 0.45749460914365775, "learning_rate": 5.77121225796704e-05, "loss": 0.8658, "step": 4617 }, { "epoch": 2.0933816863100634, "grad_norm": 0.6076392942989817, "learning_rate": 5.7700771591804426e-05, "loss": 0.8696, "step": 4618 }, { "epoch": 2.093834995466908, "grad_norm": 0.7130098202113795, "learning_rate": 5.768941883115415e-05, "loss": 0.8613, "step": 4619 }, { "epoch": 2.0942883046237535, "grad_norm": 0.7070175174528951, "learning_rate": 5.76780642988566e-05, "loss": 0.8834, "step": 4620 }, { "epoch": 2.0947416137805983, "grad_norm": 0.7106169698148995, "learning_rate": 5.7666707996048954e-05, "loss": 0.8893, "step": 4621 }, { "epoch": 2.095194922937443, "grad_norm": 0.7414541704479877, "learning_rate": 5.7655349923868584e-05, "loss": 0.8788, "step": 4622 }, { "epoch": 2.0956482320942884, "grad_norm": 1.0242800133771288, "learning_rate": 5.7643990083453025e-05, "loss": 0.863, "step": 4623 }, { "epoch": 2.0961015412511332, "grad_norm": 0.7302534872712886, "learning_rate": 5.763262847594e-05, "loss": 0.8744, "step": 4624 }, { "epoch": 2.096554850407978, "grad_norm": 0.6456197575490655, "learning_rate": 5.762126510246741e-05, "loss": 0.8872, "step": 4625 }, { "epoch": 2.0970081595648233, "grad_norm": 0.5871330229244505, "learning_rate": 5.760989996417335e-05, "loss": 0.865, "step": 4626 }, { "epoch": 2.097461468721668, "grad_norm": 0.5192944047824262, "learning_rate": 5.759853306219604e-05, "loss": 0.8751, "step": 4627 }, { "epoch": 2.097914777878513, "grad_norm": 0.3839679482229879, "learning_rate": 5.758716439767392e-05, "loss": 0.8725, "step": 4628 }, { "epoch": 2.0983680870353583, "grad_norm": 0.39742051042731424, "learning_rate": 5.757579397174561e-05, "loss": 0.8744, "step": 4629 }, { "epoch": 2.098821396192203, "grad_norm": 0.4224738640286207, "learning_rate": 5.756442178554988e-05, "loss": 0.8713, "step": 4630 }, { "epoch": 2.099274705349048, "grad_norm": 0.47316581023903964, "learning_rate": 5.755304784022568e-05, "loss": 0.8921, "step": 4631 }, { "epoch": 2.099728014505893, "grad_norm": 0.5350488058782479, "learning_rate": 5.7541672136912164e-05, "loss": 0.8673, "step": 4632 }, { "epoch": 2.100181323662738, "grad_norm": 0.552030295331295, "learning_rate": 5.7530294676748605e-05, "loss": 0.8879, "step": 4633 }, { "epoch": 2.100634632819583, "grad_norm": 0.5863362137388385, "learning_rate": 5.751891546087453e-05, "loss": 0.8792, "step": 4634 }, { "epoch": 2.101087941976428, "grad_norm": 0.6127320480122495, "learning_rate": 5.7507534490429574e-05, "loss": 0.8868, "step": 4635 }, { "epoch": 2.101541251133273, "grad_norm": 0.6424313048407116, "learning_rate": 5.7496151766553595e-05, "loss": 0.8887, "step": 4636 }, { "epoch": 2.1019945602901178, "grad_norm": 0.6566501264031506, "learning_rate": 5.7484767290386596e-05, "loss": 0.8797, "step": 4637 }, { "epoch": 2.102447869446963, "grad_norm": 0.7176836088906126, "learning_rate": 5.747338106306876e-05, "loss": 0.881, "step": 4638 }, { "epoch": 2.102901178603808, "grad_norm": 0.6994836306358369, "learning_rate": 5.746199308574046e-05, "loss": 0.8751, "step": 4639 }, { "epoch": 2.1033544877606527, "grad_norm": 0.7719690254384831, "learning_rate": 5.745060335954223e-05, "loss": 0.9086, "step": 4640 }, { "epoch": 2.1038077969174975, "grad_norm": 0.8030776339876178, "learning_rate": 5.7439211885614804e-05, "loss": 0.8799, "step": 4641 }, { "epoch": 2.104261106074343, "grad_norm": 0.8267811702230194, "learning_rate": 5.7427818665099034e-05, "loss": 0.8832, "step": 4642 }, { "epoch": 2.1047144152311876, "grad_norm": 0.7852534695405192, "learning_rate": 5.741642369913601e-05, "loss": 0.889, "step": 4643 }, { "epoch": 2.1051677243880325, "grad_norm": 0.7826247686699569, "learning_rate": 5.740502698886697e-05, "loss": 0.8749, "step": 4644 }, { "epoch": 2.1056210335448777, "grad_norm": 0.7573024335328822, "learning_rate": 5.739362853543333e-05, "loss": 0.8777, "step": 4645 }, { "epoch": 2.1060743427017226, "grad_norm": 0.673825880883513, "learning_rate": 5.738222833997667e-05, "loss": 0.8491, "step": 4646 }, { "epoch": 2.1065276518585674, "grad_norm": 0.5259398585553748, "learning_rate": 5.7370826403638746e-05, "loss": 0.8815, "step": 4647 }, { "epoch": 2.1069809610154127, "grad_norm": 0.755604763073318, "learning_rate": 5.735942272756151e-05, "loss": 0.9475, "step": 4648 }, { "epoch": 2.1074342701722575, "grad_norm": 0.5010554695851565, "learning_rate": 5.734801731288707e-05, "loss": 0.8791, "step": 4649 }, { "epoch": 2.1078875793291023, "grad_norm": 0.5142385752072223, "learning_rate": 5.733661016075772e-05, "loss": 0.8888, "step": 4650 }, { "epoch": 2.1083408884859476, "grad_norm": 0.5351909018832351, "learning_rate": 5.73252012723159e-05, "loss": 0.884, "step": 4651 }, { "epoch": 2.1087941976427924, "grad_norm": 0.6233419691585887, "learning_rate": 5.731379064870426e-05, "loss": 0.873, "step": 4652 }, { "epoch": 2.1092475067996372, "grad_norm": 0.748858692651273, "learning_rate": 5.730237829106561e-05, "loss": 0.8815, "step": 4653 }, { "epoch": 2.1097008159564825, "grad_norm": 0.8036776360522795, "learning_rate": 5.729096420054291e-05, "loss": 0.8892, "step": 4654 }, { "epoch": 2.1101541251133273, "grad_norm": 0.8288316422191436, "learning_rate": 5.7279548378279335e-05, "loss": 0.8806, "step": 4655 }, { "epoch": 2.110607434270172, "grad_norm": 0.7699178052207398, "learning_rate": 5.72681308254182e-05, "loss": 0.8842, "step": 4656 }, { "epoch": 2.1110607434270174, "grad_norm": 0.6384327740083723, "learning_rate": 5.725671154310302e-05, "loss": 0.8911, "step": 4657 }, { "epoch": 2.1115140525838623, "grad_norm": 0.46565787022588595, "learning_rate": 5.724529053247747e-05, "loss": 0.8864, "step": 4658 }, { "epoch": 2.111967361740707, "grad_norm": 0.3737511507919331, "learning_rate": 5.7233867794685367e-05, "loss": 0.8767, "step": 4659 }, { "epoch": 2.112420670897552, "grad_norm": 0.3841361612981083, "learning_rate": 5.7222443330870775e-05, "loss": 0.8526, "step": 4660 }, { "epoch": 2.112873980054397, "grad_norm": 0.5335958615519514, "learning_rate": 5.7211017142177866e-05, "loss": 0.8915, "step": 4661 }, { "epoch": 2.113327289211242, "grad_norm": 0.715796301500934, "learning_rate": 5.7199589229750996e-05, "loss": 0.8949, "step": 4662 }, { "epoch": 2.113780598368087, "grad_norm": 0.7890551427276133, "learning_rate": 5.718815959473472e-05, "loss": 0.841, "step": 4663 }, { "epoch": 2.114233907524932, "grad_norm": 0.8191190002768579, "learning_rate": 5.717672823827375e-05, "loss": 0.8743, "step": 4664 }, { "epoch": 2.114687216681777, "grad_norm": 0.754121626944105, "learning_rate": 5.716529516151296e-05, "loss": 0.8856, "step": 4665 }, { "epoch": 2.1151405258386218, "grad_norm": 0.7140645830670148, "learning_rate": 5.715386036559741e-05, "loss": 0.8963, "step": 4666 }, { "epoch": 2.115593834995467, "grad_norm": 0.5428496539405008, "learning_rate": 5.714242385167234e-05, "loss": 0.8556, "step": 4667 }, { "epoch": 2.116047144152312, "grad_norm": 0.4631952403846576, "learning_rate": 5.713098562088313e-05, "loss": 0.8875, "step": 4668 }, { "epoch": 2.1165004533091567, "grad_norm": 0.34090913920558635, "learning_rate": 5.711954567437537e-05, "loss": 0.8707, "step": 4669 }, { "epoch": 2.116953762466002, "grad_norm": 0.41471938971692585, "learning_rate": 5.7108104013294806e-05, "loss": 0.877, "step": 4670 }, { "epoch": 2.117407071622847, "grad_norm": 0.5315284875166865, "learning_rate": 5.709666063878733e-05, "loss": 0.8824, "step": 4671 }, { "epoch": 2.1178603807796916, "grad_norm": 0.5788461797531296, "learning_rate": 5.7085215551999054e-05, "loss": 0.9043, "step": 4672 }, { "epoch": 2.118313689936537, "grad_norm": 0.543368707929793, "learning_rate": 5.7073768754076226e-05, "loss": 0.8804, "step": 4673 }, { "epoch": 2.1187669990933817, "grad_norm": 0.5464838963053923, "learning_rate": 5.7062320246165276e-05, "loss": 0.8869, "step": 4674 }, { "epoch": 2.1192203082502266, "grad_norm": 0.5916976530787661, "learning_rate": 5.70508700294128e-05, "loss": 0.9147, "step": 4675 }, { "epoch": 2.119673617407072, "grad_norm": 0.5264703572517389, "learning_rate": 5.7039418104965594e-05, "loss": 0.8507, "step": 4676 }, { "epoch": 2.1201269265639167, "grad_norm": 0.4226319399308346, "learning_rate": 5.702796447397058e-05, "loss": 0.8832, "step": 4677 }, { "epoch": 2.1205802357207615, "grad_norm": 0.3720407034145489, "learning_rate": 5.701650913757488e-05, "loss": 0.8806, "step": 4678 }, { "epoch": 2.1210335448776063, "grad_norm": 0.3282733693813455, "learning_rate": 5.700505209692578e-05, "loss": 0.8687, "step": 4679 }, { "epoch": 2.1214868540344516, "grad_norm": 0.3826340974845139, "learning_rate": 5.6993593353170736e-05, "loss": 0.8874, "step": 4680 }, { "epoch": 2.1219401631912964, "grad_norm": 0.39252324899980395, "learning_rate": 5.698213290745737e-05, "loss": 0.8718, "step": 4681 }, { "epoch": 2.1223934723481412, "grad_norm": 0.45171116029539343, "learning_rate": 5.6970670760933496e-05, "loss": 0.8841, "step": 4682 }, { "epoch": 2.1228467815049865, "grad_norm": 0.5291102940020158, "learning_rate": 5.6959206914747056e-05, "loss": 0.8965, "step": 4683 }, { "epoch": 2.1233000906618313, "grad_norm": 0.5191171476061135, "learning_rate": 5.694774137004621e-05, "loss": 0.883, "step": 4684 }, { "epoch": 2.123753399818676, "grad_norm": 0.43743325851989967, "learning_rate": 5.6936274127979246e-05, "loss": 0.8867, "step": 4685 }, { "epoch": 2.1242067089755214, "grad_norm": 0.4022423752788162, "learning_rate": 5.692480518969467e-05, "loss": 0.8739, "step": 4686 }, { "epoch": 2.1246600181323663, "grad_norm": 0.3709371077010507, "learning_rate": 5.6913334556341095e-05, "loss": 0.877, "step": 4687 }, { "epoch": 2.125113327289211, "grad_norm": 0.37825014117279887, "learning_rate": 5.690186222906737e-05, "loss": 0.8816, "step": 4688 }, { "epoch": 2.1255666364460564, "grad_norm": 0.3487995961534407, "learning_rate": 5.689038820902247e-05, "loss": 0.8581, "step": 4689 }, { "epoch": 2.126019945602901, "grad_norm": 0.31771199042000287, "learning_rate": 5.687891249735554e-05, "loss": 0.9051, "step": 4690 }, { "epoch": 2.126473254759746, "grad_norm": 0.3504111789053097, "learning_rate": 5.686743509521592e-05, "loss": 0.8874, "step": 4691 }, { "epoch": 2.1269265639165913, "grad_norm": 0.4908591847308177, "learning_rate": 5.685595600375311e-05, "loss": 0.8733, "step": 4692 }, { "epoch": 2.127379873073436, "grad_norm": 0.6694949441011244, "learning_rate": 5.684447522411676e-05, "loss": 0.8862, "step": 4693 }, { "epoch": 2.127833182230281, "grad_norm": 0.7598317466381515, "learning_rate": 5.683299275745672e-05, "loss": 0.8714, "step": 4694 }, { "epoch": 2.1282864913871262, "grad_norm": 0.7715605612079511, "learning_rate": 5.6821508604922965e-05, "loss": 0.8677, "step": 4695 }, { "epoch": 2.128739800543971, "grad_norm": 0.824680887745544, "learning_rate": 5.68100227676657e-05, "loss": 0.891, "step": 4696 }, { "epoch": 2.129193109700816, "grad_norm": 0.6423339657145106, "learning_rate": 5.679853524683525e-05, "loss": 0.8658, "step": 4697 }, { "epoch": 2.1296464188576607, "grad_norm": 0.5633415149290758, "learning_rate": 5.6787046043582125e-05, "loss": 0.8953, "step": 4698 }, { "epoch": 2.130099728014506, "grad_norm": 0.5071142237217281, "learning_rate": 5.677555515905701e-05, "loss": 0.8711, "step": 4699 }, { "epoch": 2.130553037171351, "grad_norm": 0.5596001505108458, "learning_rate": 5.6764062594410734e-05, "loss": 0.8566, "step": 4700 }, { "epoch": 2.1310063463281956, "grad_norm": 0.5493285937010377, "learning_rate": 5.6752568350794326e-05, "loss": 0.8596, "step": 4701 }, { "epoch": 2.131459655485041, "grad_norm": 0.5230061026785852, "learning_rate": 5.674107242935896e-05, "loss": 0.9018, "step": 4702 }, { "epoch": 2.1319129646418857, "grad_norm": 0.5465203435438957, "learning_rate": 5.672957483125599e-05, "loss": 0.8721, "step": 4703 }, { "epoch": 2.1323662737987306, "grad_norm": 0.5954743975335948, "learning_rate": 5.6718075557636924e-05, "loss": 0.8737, "step": 4704 }, { "epoch": 2.132819582955576, "grad_norm": 0.6693179218705259, "learning_rate": 5.670657460965347e-05, "loss": 0.8993, "step": 4705 }, { "epoch": 2.1332728921124207, "grad_norm": 0.748755955699696, "learning_rate": 5.669507198845747e-05, "loss": 0.8875, "step": 4706 }, { "epoch": 2.1337262012692655, "grad_norm": 0.7707863937291888, "learning_rate": 5.668356769520093e-05, "loss": 0.8701, "step": 4707 }, { "epoch": 2.1341795104261108, "grad_norm": 0.7309538630188778, "learning_rate": 5.667206173103607e-05, "loss": 0.9016, "step": 4708 }, { "epoch": 2.1346328195829556, "grad_norm": 0.8455954425669935, "learning_rate": 5.666055409711522e-05, "loss": 0.9047, "step": 4709 }, { "epoch": 2.1350861287398004, "grad_norm": 0.48456231383610604, "learning_rate": 5.6649044794590914e-05, "loss": 0.8918, "step": 4710 }, { "epoch": 2.1355394378966457, "grad_norm": 0.4401363000421173, "learning_rate": 5.6637533824615846e-05, "loss": 0.8708, "step": 4711 }, { "epoch": 2.1359927470534905, "grad_norm": 0.4391245453627379, "learning_rate": 5.662602118834286e-05, "loss": 0.8848, "step": 4712 }, { "epoch": 2.1364460562103353, "grad_norm": 0.45205828876205834, "learning_rate": 5.661450688692498e-05, "loss": 0.8519, "step": 4713 }, { "epoch": 2.1368993653671806, "grad_norm": 0.3871553815812103, "learning_rate": 5.660299092151542e-05, "loss": 0.8629, "step": 4714 }, { "epoch": 2.1373526745240254, "grad_norm": 0.34404314281632276, "learning_rate": 5.659147329326754e-05, "loss": 0.8647, "step": 4715 }, { "epoch": 2.1378059836808703, "grad_norm": 0.41403930625900875, "learning_rate": 5.657995400333482e-05, "loss": 0.8922, "step": 4716 }, { "epoch": 2.1382592928377155, "grad_norm": 0.5328621123691513, "learning_rate": 5.656843305287099e-05, "loss": 0.8777, "step": 4717 }, { "epoch": 2.1387126019945604, "grad_norm": 0.6036552244247506, "learning_rate": 5.655691044302991e-05, "loss": 0.8755, "step": 4718 }, { "epoch": 2.139165911151405, "grad_norm": 0.6036307864365794, "learning_rate": 5.654538617496557e-05, "loss": 0.8608, "step": 4719 }, { "epoch": 2.13961922030825, "grad_norm": 0.5761002532993573, "learning_rate": 5.6533860249832186e-05, "loss": 0.8692, "step": 4720 }, { "epoch": 2.1400725294650953, "grad_norm": 0.5656994633887272, "learning_rate": 5.6522332668784106e-05, "loss": 0.9026, "step": 4721 }, { "epoch": 2.14052583862194, "grad_norm": 0.6040761073323289, "learning_rate": 5.651080343297584e-05, "loss": 0.8958, "step": 4722 }, { "epoch": 2.140979147778785, "grad_norm": 0.6034345732453592, "learning_rate": 5.6499272543562096e-05, "loss": 0.8965, "step": 4723 }, { "epoch": 2.1414324569356302, "grad_norm": 0.8640570450195241, "learning_rate": 5.648774000169772e-05, "loss": 0.8948, "step": 4724 }, { "epoch": 2.141885766092475, "grad_norm": 0.5756907102983703, "learning_rate": 5.647620580853772e-05, "loss": 0.8694, "step": 4725 }, { "epoch": 2.14233907524932, "grad_norm": 0.5894778696563956, "learning_rate": 5.646466996523728e-05, "loss": 0.8839, "step": 4726 }, { "epoch": 2.142792384406165, "grad_norm": 0.5771008803546869, "learning_rate": 5.645313247295176e-05, "loss": 0.9251, "step": 4727 }, { "epoch": 2.14324569356301, "grad_norm": 0.5353704086233648, "learning_rate": 5.6441593332836676e-05, "loss": 0.8801, "step": 4728 }, { "epoch": 2.143699002719855, "grad_norm": 0.49326795424486186, "learning_rate": 5.6430052546047686e-05, "loss": 0.8939, "step": 4729 }, { "epoch": 2.1441523118767, "grad_norm": 0.35613806476637405, "learning_rate": 5.6418510113740664e-05, "loss": 0.8879, "step": 4730 }, { "epoch": 2.144605621033545, "grad_norm": 0.323313611906575, "learning_rate": 5.640696603707159e-05, "loss": 0.884, "step": 4731 }, { "epoch": 2.1450589301903897, "grad_norm": 0.35678861587255445, "learning_rate": 5.639542031719664e-05, "loss": 0.8797, "step": 4732 }, { "epoch": 2.145512239347235, "grad_norm": 0.34830692925084633, "learning_rate": 5.6383872955272175e-05, "loss": 0.8992, "step": 4733 }, { "epoch": 2.14596554850408, "grad_norm": 0.38182615973434697, "learning_rate": 5.637232395245467e-05, "loss": 0.9134, "step": 4734 }, { "epoch": 2.1464188576609247, "grad_norm": 0.48956836206613963, "learning_rate": 5.6360773309900804e-05, "loss": 0.8857, "step": 4735 }, { "epoch": 2.1468721668177695, "grad_norm": 0.5512286064207162, "learning_rate": 5.634922102876741e-05, "loss": 0.8804, "step": 4736 }, { "epoch": 2.1473254759746148, "grad_norm": 0.6452385131548105, "learning_rate": 5.633766711021149e-05, "loss": 0.9008, "step": 4737 }, { "epoch": 2.1477787851314596, "grad_norm": 0.6446461657389668, "learning_rate": 5.6326111555390186e-05, "loss": 0.8821, "step": 4738 }, { "epoch": 2.1482320942883044, "grad_norm": 0.651388281279698, "learning_rate": 5.631455436546082e-05, "loss": 0.8885, "step": 4739 }, { "epoch": 2.1486854034451497, "grad_norm": 0.6616127483005828, "learning_rate": 5.63029955415809e-05, "loss": 0.8664, "step": 4740 }, { "epoch": 2.1491387126019945, "grad_norm": 0.7127597259487963, "learning_rate": 5.629143508490806e-05, "loss": 0.8584, "step": 4741 }, { "epoch": 2.1495920217588393, "grad_norm": 0.752114531040743, "learning_rate": 5.6279872996600114e-05, "loss": 0.874, "step": 4742 }, { "epoch": 2.1500453309156846, "grad_norm": 0.7936736970466485, "learning_rate": 5.6268309277815046e-05, "loss": 0.8675, "step": 4743 }, { "epoch": 2.1504986400725294, "grad_norm": 0.7522376111075707, "learning_rate": 5.625674392971099e-05, "loss": 0.869, "step": 4744 }, { "epoch": 2.1509519492293743, "grad_norm": 0.6510091707473514, "learning_rate": 5.624517695344624e-05, "loss": 0.8658, "step": 4745 }, { "epoch": 2.1514052583862195, "grad_norm": 0.49174446368378294, "learning_rate": 5.623360835017929e-05, "loss": 0.8914, "step": 4746 }, { "epoch": 2.1518585675430644, "grad_norm": 0.45264892295694853, "learning_rate": 5.622203812106875e-05, "loss": 0.8801, "step": 4747 }, { "epoch": 2.152311876699909, "grad_norm": 0.4636267984067917, "learning_rate": 5.621046626727342e-05, "loss": 0.8917, "step": 4748 }, { "epoch": 2.1527651858567545, "grad_norm": 0.42840887927397775, "learning_rate": 5.6198892789952244e-05, "loss": 0.8686, "step": 4749 }, { "epoch": 2.1532184950135993, "grad_norm": 0.4070906925043849, "learning_rate": 5.618731769026434e-05, "loss": 0.874, "step": 4750 }, { "epoch": 2.153671804170444, "grad_norm": 0.3696458534859685, "learning_rate": 5.617574096936901e-05, "loss": 0.8614, "step": 4751 }, { "epoch": 2.1541251133272894, "grad_norm": 0.30376194921682864, "learning_rate": 5.616416262842568e-05, "loss": 0.8793, "step": 4752 }, { "epoch": 2.1545784224841342, "grad_norm": 0.2715216726244174, "learning_rate": 5.615258266859396e-05, "loss": 0.8853, "step": 4753 }, { "epoch": 2.155031731640979, "grad_norm": 0.27721574019280637, "learning_rate": 5.6141001091033605e-05, "loss": 0.8702, "step": 4754 }, { "epoch": 2.1554850407978243, "grad_norm": 0.3040289935158504, "learning_rate": 5.612941789690455e-05, "loss": 0.889, "step": 4755 }, { "epoch": 2.155938349954669, "grad_norm": 0.2819501916778873, "learning_rate": 5.6117833087366905e-05, "loss": 0.8635, "step": 4756 }, { "epoch": 2.156391659111514, "grad_norm": 0.2739266064855883, "learning_rate": 5.61062466635809e-05, "loss": 0.8815, "step": 4757 }, { "epoch": 2.1568449682683593, "grad_norm": 0.33205340164607056, "learning_rate": 5.609465862670696e-05, "loss": 0.8859, "step": 4758 }, { "epoch": 2.157298277425204, "grad_norm": 0.39087654516880016, "learning_rate": 5.608306897790565e-05, "loss": 0.877, "step": 4759 }, { "epoch": 2.157751586582049, "grad_norm": 0.447105850088045, "learning_rate": 5.607147771833772e-05, "loss": 0.8647, "step": 4760 }, { "epoch": 2.1582048957388937, "grad_norm": 0.5320308027987666, "learning_rate": 5.605988484916405e-05, "loss": 0.8654, "step": 4761 }, { "epoch": 2.158658204895739, "grad_norm": 0.587378137256658, "learning_rate": 5.6048290371545726e-05, "loss": 0.8876, "step": 4762 }, { "epoch": 2.159111514052584, "grad_norm": 0.6226268668221105, "learning_rate": 5.6036694286643954e-05, "loss": 0.8688, "step": 4763 }, { "epoch": 2.1595648232094287, "grad_norm": 0.71205327854182, "learning_rate": 5.602509659562011e-05, "loss": 0.879, "step": 4764 }, { "epoch": 2.160018132366274, "grad_norm": 0.7108317566602127, "learning_rate": 5.601349729963575e-05, "loss": 0.8968, "step": 4765 }, { "epoch": 2.1604714415231188, "grad_norm": 0.6807788935354908, "learning_rate": 5.600189639985258e-05, "loss": 0.8942, "step": 4766 }, { "epoch": 2.1609247506799636, "grad_norm": 0.6273927691739125, "learning_rate": 5.599029389743246e-05, "loss": 0.8818, "step": 4767 }, { "epoch": 2.161378059836809, "grad_norm": 0.5691993445193647, "learning_rate": 5.59786897935374e-05, "loss": 0.869, "step": 4768 }, { "epoch": 2.1618313689936537, "grad_norm": 0.516323479336695, "learning_rate": 5.5967084089329603e-05, "loss": 0.8638, "step": 4769 }, { "epoch": 2.1622846781504985, "grad_norm": 0.5024972254295083, "learning_rate": 5.59554767859714e-05, "loss": 0.9023, "step": 4770 }, { "epoch": 2.162737987307344, "grad_norm": 0.44222544669750213, "learning_rate": 5.5943867884625314e-05, "loss": 0.8976, "step": 4771 }, { "epoch": 2.1631912964641886, "grad_norm": 0.41875181749299156, "learning_rate": 5.5932257386454e-05, "loss": 0.8787, "step": 4772 }, { "epoch": 2.1636446056210334, "grad_norm": 0.36381104414288074, "learning_rate": 5.592064529262028e-05, "loss": 0.879, "step": 4773 }, { "epoch": 2.1640979147778787, "grad_norm": 0.29929662721714306, "learning_rate": 5.590903160428715e-05, "loss": 0.8854, "step": 4774 }, { "epoch": 2.1645512239347235, "grad_norm": 0.3255872603974135, "learning_rate": 5.589741632261774e-05, "loss": 0.8462, "step": 4775 }, { "epoch": 2.1650045330915684, "grad_norm": 0.4120698753158708, "learning_rate": 5.588579944877538e-05, "loss": 0.8876, "step": 4776 }, { "epoch": 2.165457842248413, "grad_norm": 0.472996073614943, "learning_rate": 5.58741809839235e-05, "loss": 0.8581, "step": 4777 }, { "epoch": 2.1659111514052585, "grad_norm": 0.5114191315501465, "learning_rate": 5.5862560929225745e-05, "loss": 0.869, "step": 4778 }, { "epoch": 2.1663644605621033, "grad_norm": 0.5954189408330235, "learning_rate": 5.5850939285845896e-05, "loss": 0.8769, "step": 4779 }, { "epoch": 2.166817769718948, "grad_norm": 0.7019902537388452, "learning_rate": 5.5839316054947884e-05, "loss": 0.8768, "step": 4780 }, { "epoch": 2.1672710788757934, "grad_norm": 0.7846005442643376, "learning_rate": 5.582769123769583e-05, "loss": 0.8828, "step": 4781 }, { "epoch": 2.1677243880326382, "grad_norm": 0.7981698768835164, "learning_rate": 5.581606483525399e-05, "loss": 0.8763, "step": 4782 }, { "epoch": 2.168177697189483, "grad_norm": 0.7057584252342092, "learning_rate": 5.5804436848786755e-05, "loss": 0.8695, "step": 4783 }, { "epoch": 2.1686310063463283, "grad_norm": 0.5726923723822434, "learning_rate": 5.579280727945874e-05, "loss": 0.8958, "step": 4784 }, { "epoch": 2.169084315503173, "grad_norm": 0.4723522477538353, "learning_rate": 5.5781176128434645e-05, "loss": 0.8849, "step": 4785 }, { "epoch": 2.169537624660018, "grad_norm": 0.3680109338250474, "learning_rate": 5.576954339687938e-05, "loss": 0.8952, "step": 4786 }, { "epoch": 2.1699909338168633, "grad_norm": 0.3588484037761595, "learning_rate": 5.5757909085958016e-05, "loss": 0.8959, "step": 4787 }, { "epoch": 2.170444242973708, "grad_norm": 0.5205557014332567, "learning_rate": 5.574627319683573e-05, "loss": 0.8925, "step": 4788 }, { "epoch": 2.170897552130553, "grad_norm": 0.46398384731607817, "learning_rate": 5.573463573067791e-05, "loss": 0.8659, "step": 4789 }, { "epoch": 2.171350861287398, "grad_norm": 0.5770977296515021, "learning_rate": 5.5722996688650076e-05, "loss": 0.8919, "step": 4790 }, { "epoch": 2.171804170444243, "grad_norm": 0.5753324509141756, "learning_rate": 5.571135607191792e-05, "loss": 0.8856, "step": 4791 }, { "epoch": 2.172257479601088, "grad_norm": 0.5193603289690785, "learning_rate": 5.5699713881647274e-05, "loss": 0.892, "step": 4792 }, { "epoch": 2.172710788757933, "grad_norm": 0.5461966879187565, "learning_rate": 5.568807011900414e-05, "loss": 0.8948, "step": 4793 }, { "epoch": 2.173164097914778, "grad_norm": 1.3883760874905469, "learning_rate": 5.5676424785154677e-05, "loss": 0.8663, "step": 4794 }, { "epoch": 2.1736174070716228, "grad_norm": 0.25862892642773355, "learning_rate": 5.5664777881265214e-05, "loss": 0.8856, "step": 4795 }, { "epoch": 2.174070716228468, "grad_norm": 0.6416874000277034, "learning_rate": 5.56531294085022e-05, "loss": 0.8712, "step": 4796 }, { "epoch": 2.174524025385313, "grad_norm": 1.107366747261881, "learning_rate": 5.564147936803226e-05, "loss": 0.8731, "step": 4797 }, { "epoch": 2.1749773345421577, "grad_norm": 1.0369737954831324, "learning_rate": 5.56298277610222e-05, "loss": 0.8543, "step": 4798 }, { "epoch": 2.1754306436990025, "grad_norm": 0.7235231035832544, "learning_rate": 5.5618174588638965e-05, "loss": 0.8755, "step": 4799 }, { "epoch": 2.175883952855848, "grad_norm": 0.4854571881133867, "learning_rate": 5.5606519852049634e-05, "loss": 0.893, "step": 4800 }, { "epoch": 2.1763372620126926, "grad_norm": 0.483701135145525, "learning_rate": 5.559486355242147e-05, "loss": 0.8828, "step": 4801 }, { "epoch": 2.1767905711695374, "grad_norm": 0.5580550946905651, "learning_rate": 5.558320569092189e-05, "loss": 0.8892, "step": 4802 }, { "epoch": 2.1772438803263827, "grad_norm": 0.6659739002695799, "learning_rate": 5.557154626871847e-05, "loss": 0.8739, "step": 4803 }, { "epoch": 2.1776971894832275, "grad_norm": 0.7555627234308688, "learning_rate": 5.5559885286978925e-05, "loss": 0.8856, "step": 4804 }, { "epoch": 2.1781504986400724, "grad_norm": 0.7521435613717428, "learning_rate": 5.5548222746871143e-05, "loss": 0.8742, "step": 4805 }, { "epoch": 2.1786038077969176, "grad_norm": 0.6485901640816881, "learning_rate": 5.5536558649563145e-05, "loss": 0.8965, "step": 4806 }, { "epoch": 2.1790571169537625, "grad_norm": 0.44045502425760796, "learning_rate": 5.552489299622314e-05, "loss": 0.8783, "step": 4807 }, { "epoch": 2.1795104261106073, "grad_norm": 0.36068432023509794, "learning_rate": 5.551322578801948e-05, "loss": 0.8918, "step": 4808 }, { "epoch": 2.1799637352674526, "grad_norm": 0.4422289096087341, "learning_rate": 5.550155702612065e-05, "loss": 0.8668, "step": 4809 }, { "epoch": 2.1804170444242974, "grad_norm": 0.5130832356770436, "learning_rate": 5.5489886711695345e-05, "loss": 0.878, "step": 4810 }, { "epoch": 2.1808703535811422, "grad_norm": 0.5740766339924029, "learning_rate": 5.547821484591235e-05, "loss": 0.8767, "step": 4811 }, { "epoch": 2.1813236627379875, "grad_norm": 0.6407322148816381, "learning_rate": 5.546654142994065e-05, "loss": 0.8632, "step": 4812 }, { "epoch": 2.1817769718948323, "grad_norm": 0.691986208068822, "learning_rate": 5.5454866464949385e-05, "loss": 0.8747, "step": 4813 }, { "epoch": 2.182230281051677, "grad_norm": 0.7589218133949989, "learning_rate": 5.5443189952107806e-05, "loss": 0.8753, "step": 4814 }, { "epoch": 2.182683590208522, "grad_norm": 0.7744665608296675, "learning_rate": 5.543151189258538e-05, "loss": 0.8756, "step": 4815 }, { "epoch": 2.1831368993653673, "grad_norm": 0.63504006344866, "learning_rate": 5.5419832287551675e-05, "loss": 0.8677, "step": 4816 }, { "epoch": 2.183590208522212, "grad_norm": 0.484932334984617, "learning_rate": 5.5408151138176446e-05, "loss": 0.8879, "step": 4817 }, { "epoch": 2.184043517679057, "grad_norm": 0.39851769587198527, "learning_rate": 5.539646844562961e-05, "loss": 0.8709, "step": 4818 }, { "epoch": 2.184496826835902, "grad_norm": 0.3965952086134999, "learning_rate": 5.538478421108119e-05, "loss": 0.8882, "step": 4819 }, { "epoch": 2.184950135992747, "grad_norm": 0.4080709324690937, "learning_rate": 5.5373098435701423e-05, "loss": 0.8738, "step": 4820 }, { "epoch": 2.185403445149592, "grad_norm": 0.4405543627317717, "learning_rate": 5.5361411120660675e-05, "loss": 0.8825, "step": 4821 }, { "epoch": 2.185856754306437, "grad_norm": 0.415259713788134, "learning_rate": 5.5349722267129455e-05, "loss": 0.861, "step": 4822 }, { "epoch": 2.186310063463282, "grad_norm": 0.35550124372548864, "learning_rate": 5.5338031876278436e-05, "loss": 0.8708, "step": 4823 }, { "epoch": 2.1867633726201268, "grad_norm": 0.3594161961836957, "learning_rate": 5.532633994927845e-05, "loss": 0.8779, "step": 4824 }, { "epoch": 2.187216681776972, "grad_norm": 0.3564011810007226, "learning_rate": 5.5314646487300473e-05, "loss": 0.8517, "step": 4825 }, { "epoch": 2.187669990933817, "grad_norm": 0.33741359935666837, "learning_rate": 5.530295149151564e-05, "loss": 0.8537, "step": 4826 }, { "epoch": 2.1881233000906617, "grad_norm": 0.40117564555283325, "learning_rate": 5.529125496309524e-05, "loss": 0.8779, "step": 4827 }, { "epoch": 2.188576609247507, "grad_norm": 0.46867077935827234, "learning_rate": 5.5279556903210725e-05, "loss": 0.8893, "step": 4828 }, { "epoch": 2.189029918404352, "grad_norm": 0.39507909804924746, "learning_rate": 5.526785731303367e-05, "loss": 0.872, "step": 4829 }, { "epoch": 2.1894832275611966, "grad_norm": 0.5596585036162312, "learning_rate": 5.525615619373583e-05, "loss": 0.8749, "step": 4830 }, { "epoch": 2.189936536718042, "grad_norm": 0.2938861907191448, "learning_rate": 5.524445354648912e-05, "loss": 0.8793, "step": 4831 }, { "epoch": 2.1903898458748867, "grad_norm": 0.40017601681920695, "learning_rate": 5.523274937246557e-05, "loss": 0.8931, "step": 4832 }, { "epoch": 2.1908431550317315, "grad_norm": 0.49940516723268236, "learning_rate": 5.522104367283742e-05, "loss": 0.9138, "step": 4833 }, { "epoch": 2.191296464188577, "grad_norm": 0.5377699275256324, "learning_rate": 5.520933644877701e-05, "loss": 0.8768, "step": 4834 }, { "epoch": 2.1917497733454216, "grad_norm": 0.5701546305217987, "learning_rate": 5.5197627701456846e-05, "loss": 0.8608, "step": 4835 }, { "epoch": 2.1922030825022665, "grad_norm": 0.6162881803333637, "learning_rate": 5.51859174320496e-05, "loss": 0.875, "step": 4836 }, { "epoch": 2.1926563916591117, "grad_norm": 0.6438797643345749, "learning_rate": 5.51742056417281e-05, "loss": 0.863, "step": 4837 }, { "epoch": 2.1931097008159566, "grad_norm": 0.6033240187624191, "learning_rate": 5.51624923316653e-05, "loss": 0.8768, "step": 4838 }, { "epoch": 2.1935630099728014, "grad_norm": 0.5494829430286348, "learning_rate": 5.5150777503034345e-05, "loss": 0.8801, "step": 4839 }, { "epoch": 2.1940163191296462, "grad_norm": 0.4889450959872193, "learning_rate": 5.513906115700849e-05, "loss": 0.8785, "step": 4840 }, { "epoch": 2.1944696282864915, "grad_norm": 0.39439072312148554, "learning_rate": 5.512734329476117e-05, "loss": 0.8995, "step": 4841 }, { "epoch": 2.1949229374433363, "grad_norm": 0.38075368928671915, "learning_rate": 5.511562391746596e-05, "loss": 0.8731, "step": 4842 }, { "epoch": 2.195376246600181, "grad_norm": 0.39419548455520054, "learning_rate": 5.51039030262966e-05, "loss": 0.8958, "step": 4843 }, { "epoch": 2.1958295557570264, "grad_norm": 0.4589942868912707, "learning_rate": 5.509218062242694e-05, "loss": 0.8834, "step": 4844 }, { "epoch": 2.1962828649138713, "grad_norm": 0.47731084170935845, "learning_rate": 5.5080456707031054e-05, "loss": 0.8706, "step": 4845 }, { "epoch": 2.196736174070716, "grad_norm": 0.48985711022665535, "learning_rate": 5.506873128128309e-05, "loss": 0.8815, "step": 4846 }, { "epoch": 2.1971894832275614, "grad_norm": 0.607660396933598, "learning_rate": 5.505700434635741e-05, "loss": 0.8794, "step": 4847 }, { "epoch": 2.197642792384406, "grad_norm": 0.6882828726813031, "learning_rate": 5.50452759034285e-05, "loss": 0.8919, "step": 4848 }, { "epoch": 2.198096101541251, "grad_norm": 0.7007881845207465, "learning_rate": 5.5033545953670984e-05, "loss": 0.8722, "step": 4849 }, { "epoch": 2.1985494106980963, "grad_norm": 0.6421018821114959, "learning_rate": 5.502181449825966e-05, "loss": 0.8809, "step": 4850 }, { "epoch": 2.199002719854941, "grad_norm": 0.5989681950406002, "learning_rate": 5.501008153836946e-05, "loss": 0.8916, "step": 4851 }, { "epoch": 2.199456029011786, "grad_norm": 0.5147421213357903, "learning_rate": 5.499834707517549e-05, "loss": 0.8939, "step": 4852 }, { "epoch": 2.199909338168631, "grad_norm": 0.3650270761523685, "learning_rate": 5.498661110985298e-05, "loss": 0.883, "step": 4853 }, { "epoch": 2.200362647325476, "grad_norm": 0.3325836703189744, "learning_rate": 5.497487364357732e-05, "loss": 0.8789, "step": 4854 }, { "epoch": 2.200815956482321, "grad_norm": 0.5650967544389922, "learning_rate": 5.496313467752405e-05, "loss": 0.8475, "step": 4855 }, { "epoch": 2.2012692656391657, "grad_norm": 0.6532400366740393, "learning_rate": 5.495139421286889e-05, "loss": 0.8645, "step": 4856 }, { "epoch": 2.201722574796011, "grad_norm": 0.6105080788171893, "learning_rate": 5.4939652250787625e-05, "loss": 0.8839, "step": 4857 }, { "epoch": 2.202175883952856, "grad_norm": 0.5501098847107261, "learning_rate": 5.49279087924563e-05, "loss": 0.8844, "step": 4858 }, { "epoch": 2.2026291931097006, "grad_norm": 0.470705631292352, "learning_rate": 5.4916163839051024e-05, "loss": 0.8895, "step": 4859 }, { "epoch": 2.203082502266546, "grad_norm": 0.36550366431691733, "learning_rate": 5.490441739174811e-05, "loss": 0.8784, "step": 4860 }, { "epoch": 2.2035358114233907, "grad_norm": 0.38441660376650016, "learning_rate": 5.4892669451723984e-05, "loss": 0.8825, "step": 4861 }, { "epoch": 2.2039891205802356, "grad_norm": 0.4284247823276656, "learning_rate": 5.488092002015526e-05, "loss": 0.8823, "step": 4862 }, { "epoch": 2.204442429737081, "grad_norm": 0.37614690131027845, "learning_rate": 5.4869169098218656e-05, "loss": 0.9098, "step": 4863 }, { "epoch": 2.2048957388939256, "grad_norm": 0.3365368988372933, "learning_rate": 5.4857416687091055e-05, "loss": 0.8842, "step": 4864 }, { "epoch": 2.2053490480507705, "grad_norm": 0.28962398927359784, "learning_rate": 5.484566278794951e-05, "loss": 0.8915, "step": 4865 }, { "epoch": 2.2058023572076157, "grad_norm": 0.29212648277849734, "learning_rate": 5.483390740197122e-05, "loss": 0.8767, "step": 4866 }, { "epoch": 2.2062556663644606, "grad_norm": 0.3496485369191403, "learning_rate": 5.482215053033349e-05, "loss": 0.8591, "step": 4867 }, { "epoch": 2.2067089755213054, "grad_norm": 0.38541951551843834, "learning_rate": 5.481039217421384e-05, "loss": 0.8641, "step": 4868 }, { "epoch": 2.2071622846781507, "grad_norm": 0.36492493924533803, "learning_rate": 5.4798632334789876e-05, "loss": 0.8844, "step": 4869 }, { "epoch": 2.2076155938349955, "grad_norm": 0.3083565460945788, "learning_rate": 5.478687101323939e-05, "loss": 0.8788, "step": 4870 }, { "epoch": 2.2080689029918403, "grad_norm": 0.3397278867109425, "learning_rate": 5.477510821074032e-05, "loss": 0.8868, "step": 4871 }, { "epoch": 2.2085222121486856, "grad_norm": 0.37721535436489784, "learning_rate": 5.476334392847074e-05, "loss": 0.8673, "step": 4872 }, { "epoch": 2.2089755213055304, "grad_norm": 0.365135371873888, "learning_rate": 5.475157816760888e-05, "loss": 0.8431, "step": 4873 }, { "epoch": 2.2094288304623753, "grad_norm": 0.3674202507657544, "learning_rate": 5.4739810929333096e-05, "loss": 0.8823, "step": 4874 }, { "epoch": 2.2098821396192205, "grad_norm": 0.36957693334447705, "learning_rate": 5.4728042214821945e-05, "loss": 0.8599, "step": 4875 }, { "epoch": 2.2103354487760654, "grad_norm": 0.32938146526170925, "learning_rate": 5.471627202525407e-05, "loss": 0.8802, "step": 4876 }, { "epoch": 2.21078875793291, "grad_norm": 0.3811534035621776, "learning_rate": 5.4704500361808305e-05, "loss": 0.8635, "step": 4877 }, { "epoch": 2.211242067089755, "grad_norm": 0.45016829230187694, "learning_rate": 5.469272722566361e-05, "loss": 0.8676, "step": 4878 }, { "epoch": 2.2116953762466003, "grad_norm": 0.4885957197720917, "learning_rate": 5.468095261799911e-05, "loss": 0.8847, "step": 4879 }, { "epoch": 2.212148685403445, "grad_norm": 0.5366909491823606, "learning_rate": 5.466917653999405e-05, "loss": 0.8701, "step": 4880 }, { "epoch": 2.21260199456029, "grad_norm": 0.6627013125417538, "learning_rate": 5.465739899282787e-05, "loss": 0.8785, "step": 4881 }, { "epoch": 2.213055303717135, "grad_norm": 0.6775193103580031, "learning_rate": 5.464561997768008e-05, "loss": 0.8761, "step": 4882 }, { "epoch": 2.21350861287398, "grad_norm": 0.6383309297624936, "learning_rate": 5.4633839495730415e-05, "loss": 0.8689, "step": 4883 }, { "epoch": 2.213961922030825, "grad_norm": 0.5389244039090572, "learning_rate": 5.462205754815871e-05, "loss": 0.8713, "step": 4884 }, { "epoch": 2.21441523118767, "grad_norm": 0.4703349692916239, "learning_rate": 5.461027413614497e-05, "loss": 0.8671, "step": 4885 }, { "epoch": 2.214868540344515, "grad_norm": 0.44997515536246147, "learning_rate": 5.459848926086935e-05, "loss": 0.8547, "step": 4886 }, { "epoch": 2.21532184950136, "grad_norm": 0.4619712800384572, "learning_rate": 5.458670292351211e-05, "loss": 0.8902, "step": 4887 }, { "epoch": 2.215775158658205, "grad_norm": 0.41683794651439404, "learning_rate": 5.457491512525371e-05, "loss": 0.8572, "step": 4888 }, { "epoch": 2.21622846781505, "grad_norm": 0.34225815762094686, "learning_rate": 5.456312586727472e-05, "loss": 0.88, "step": 4889 }, { "epoch": 2.2166817769718947, "grad_norm": 0.30786849634634705, "learning_rate": 5.455133515075588e-05, "loss": 0.8894, "step": 4890 }, { "epoch": 2.21713508612874, "grad_norm": 0.3309659796576903, "learning_rate": 5.453954297687806e-05, "loss": 0.8764, "step": 4891 }, { "epoch": 2.217588395285585, "grad_norm": 0.31213172977179493, "learning_rate": 5.452774934682227e-05, "loss": 0.87, "step": 4892 }, { "epoch": 2.2180417044424297, "grad_norm": 0.3073605637593185, "learning_rate": 5.451595426176969e-05, "loss": 0.8751, "step": 4893 }, { "epoch": 2.2184950135992745, "grad_norm": 0.3294403843260236, "learning_rate": 5.450415772290163e-05, "loss": 0.8619, "step": 4894 }, { "epoch": 2.2189483227561198, "grad_norm": 1.2612348005615244, "learning_rate": 5.4492359731399555e-05, "loss": 0.8881, "step": 4895 }, { "epoch": 2.2194016319129646, "grad_norm": 0.3016473336579799, "learning_rate": 5.4480560288445055e-05, "loss": 0.8895, "step": 4896 }, { "epoch": 2.2198549410698094, "grad_norm": 0.4156149990700991, "learning_rate": 5.446875939521988e-05, "loss": 0.8648, "step": 4897 }, { "epoch": 2.2203082502266547, "grad_norm": 1.1396226462424783, "learning_rate": 5.445695705290594e-05, "loss": 0.887, "step": 4898 }, { "epoch": 2.2207615593834995, "grad_norm": 0.32799083645265864, "learning_rate": 5.4445153262685256e-05, "loss": 0.8921, "step": 4899 }, { "epoch": 2.2212148685403443, "grad_norm": 0.35448241173431644, "learning_rate": 5.4433348025740014e-05, "loss": 0.866, "step": 4900 }, { "epoch": 2.2216681776971896, "grad_norm": 0.44753793815185605, "learning_rate": 5.4421541343252566e-05, "loss": 0.8883, "step": 4901 }, { "epoch": 2.2221214868540344, "grad_norm": 0.5715928967573293, "learning_rate": 5.4409733216405356e-05, "loss": 0.8829, "step": 4902 }, { "epoch": 2.2225747960108793, "grad_norm": 0.6058075440479654, "learning_rate": 5.439792364638101e-05, "loss": 0.8679, "step": 4903 }, { "epoch": 2.2230281051677245, "grad_norm": 0.6653213051649463, "learning_rate": 5.4386112634362304e-05, "loss": 0.8708, "step": 4904 }, { "epoch": 2.2234814143245694, "grad_norm": 0.7344331929762148, "learning_rate": 5.4374300181532136e-05, "loss": 0.8672, "step": 4905 }, { "epoch": 2.223934723481414, "grad_norm": 0.7616796545505374, "learning_rate": 5.436248628907355e-05, "loss": 0.8621, "step": 4906 }, { "epoch": 2.2243880326382595, "grad_norm": 0.7064080089532961, "learning_rate": 5.435067095816976e-05, "loss": 0.9048, "step": 4907 }, { "epoch": 2.2248413417951043, "grad_norm": 0.6728689050858431, "learning_rate": 5.43388541900041e-05, "loss": 0.8787, "step": 4908 }, { "epoch": 2.225294650951949, "grad_norm": 0.6280895524318776, "learning_rate": 5.432703598576004e-05, "loss": 0.8736, "step": 4909 }, { "epoch": 2.2257479601087944, "grad_norm": 0.5005726675408083, "learning_rate": 5.4315216346621246e-05, "loss": 0.8987, "step": 4910 }, { "epoch": 2.226201269265639, "grad_norm": 0.34535303920323235, "learning_rate": 5.430339527377144e-05, "loss": 0.8777, "step": 4911 }, { "epoch": 2.226654578422484, "grad_norm": 0.32362624671160434, "learning_rate": 5.429157276839457e-05, "loss": 0.8681, "step": 4912 }, { "epoch": 2.2271078875793293, "grad_norm": 0.3635228370634084, "learning_rate": 5.427974883167468e-05, "loss": 0.9012, "step": 4913 }, { "epoch": 2.227561196736174, "grad_norm": 0.41319362980813745, "learning_rate": 5.426792346479598e-05, "loss": 0.8638, "step": 4914 }, { "epoch": 2.228014505893019, "grad_norm": 0.42124915693877135, "learning_rate": 5.425609666894281e-05, "loss": 0.8889, "step": 4915 }, { "epoch": 2.2284678150498642, "grad_norm": 0.38978539297402287, "learning_rate": 5.4244268445299665e-05, "loss": 0.8777, "step": 4916 }, { "epoch": 2.228921124206709, "grad_norm": 0.4163926497266152, "learning_rate": 5.423243879505117e-05, "loss": 0.8719, "step": 4917 }, { "epoch": 2.229374433363554, "grad_norm": 0.44502219985697333, "learning_rate": 5.42206077193821e-05, "loss": 0.8751, "step": 4918 }, { "epoch": 2.2298277425203987, "grad_norm": 0.4479755397982564, "learning_rate": 5.420877521947737e-05, "loss": 0.891, "step": 4919 }, { "epoch": 2.230281051677244, "grad_norm": 0.4852826374781826, "learning_rate": 5.419694129652206e-05, "loss": 0.8852, "step": 4920 }, { "epoch": 2.230734360834089, "grad_norm": 0.607857588408353, "learning_rate": 5.4185105951701335e-05, "loss": 0.892, "step": 4921 }, { "epoch": 2.2311876699909337, "grad_norm": 0.6924000578649511, "learning_rate": 5.417326918620056e-05, "loss": 0.8921, "step": 4922 }, { "epoch": 2.231640979147779, "grad_norm": 0.7452563882033776, "learning_rate": 5.416143100120523e-05, "loss": 0.8925, "step": 4923 }, { "epoch": 2.2320942883046238, "grad_norm": 0.82242660526943, "learning_rate": 5.414959139790096e-05, "loss": 0.8821, "step": 4924 }, { "epoch": 2.2325475974614686, "grad_norm": 0.8230353651301163, "learning_rate": 5.4137750377473536e-05, "loss": 0.8904, "step": 4925 }, { "epoch": 2.233000906618314, "grad_norm": 0.7552090297213019, "learning_rate": 5.412590794110886e-05, "loss": 0.8871, "step": 4926 }, { "epoch": 2.2334542157751587, "grad_norm": 0.7666558954421583, "learning_rate": 5.411406408999298e-05, "loss": 0.8866, "step": 4927 }, { "epoch": 2.2339075249320035, "grad_norm": 0.7119450769112671, "learning_rate": 5.41022188253121e-05, "loss": 0.8682, "step": 4928 }, { "epoch": 2.234360834088849, "grad_norm": 0.5447554275183544, "learning_rate": 5.4090372148252575e-05, "loss": 0.8736, "step": 4929 }, { "epoch": 2.2348141432456936, "grad_norm": 0.3238200196527693, "learning_rate": 5.407852406000086e-05, "loss": 0.8749, "step": 4930 }, { "epoch": 2.2352674524025384, "grad_norm": 0.3463741627888589, "learning_rate": 5.406667456174358e-05, "loss": 0.8747, "step": 4931 }, { "epoch": 2.2357207615593837, "grad_norm": 0.5363215260087922, "learning_rate": 5.40548236546675e-05, "loss": 0.8891, "step": 4932 }, { "epoch": 2.2361740707162285, "grad_norm": 0.5923159149579007, "learning_rate": 5.4042971339959524e-05, "loss": 0.8766, "step": 4933 }, { "epoch": 2.2366273798730734, "grad_norm": 0.6300206713351357, "learning_rate": 5.40311176188067e-05, "loss": 0.8588, "step": 4934 }, { "epoch": 2.237080689029918, "grad_norm": 0.6557782332926844, "learning_rate": 5.401926249239621e-05, "loss": 0.8921, "step": 4935 }, { "epoch": 2.2375339981867635, "grad_norm": 0.6567345814318327, "learning_rate": 5.400740596191538e-05, "loss": 0.8785, "step": 4936 }, { "epoch": 2.2379873073436083, "grad_norm": 0.5290324369527498, "learning_rate": 5.399554802855167e-05, "loss": 0.8859, "step": 4937 }, { "epoch": 2.238440616500453, "grad_norm": 0.41477467026654946, "learning_rate": 5.39836886934927e-05, "loss": 0.8857, "step": 4938 }, { "epoch": 2.2388939256572984, "grad_norm": 0.35049802743413166, "learning_rate": 5.3971827957926214e-05, "loss": 0.8774, "step": 4939 }, { "epoch": 2.239347234814143, "grad_norm": 0.2967265134297082, "learning_rate": 5.395996582304009e-05, "loss": 0.8848, "step": 4940 }, { "epoch": 2.239800543970988, "grad_norm": 0.22962253309348635, "learning_rate": 5.394810229002237e-05, "loss": 0.8851, "step": 4941 }, { "epoch": 2.2402538531278333, "grad_norm": 0.33519773094244487, "learning_rate": 5.3936237360061205e-05, "loss": 0.8742, "step": 4942 }, { "epoch": 2.240707162284678, "grad_norm": 0.4767309593516099, "learning_rate": 5.392437103434491e-05, "loss": 0.8825, "step": 4943 }, { "epoch": 2.241160471441523, "grad_norm": 0.5325117996228746, "learning_rate": 5.391250331406193e-05, "loss": 0.8933, "step": 4944 }, { "epoch": 2.2416137805983682, "grad_norm": 0.6125308357309032, "learning_rate": 5.3900634200400856e-05, "loss": 0.8836, "step": 4945 }, { "epoch": 2.242067089755213, "grad_norm": 0.6908711113165782, "learning_rate": 5.388876369455041e-05, "loss": 0.8816, "step": 4946 }, { "epoch": 2.242520398912058, "grad_norm": 0.7569454476668338, "learning_rate": 5.3876891797699466e-05, "loss": 0.8945, "step": 4947 }, { "epoch": 2.242973708068903, "grad_norm": 0.7544117866772967, "learning_rate": 5.386501851103702e-05, "loss": 0.8674, "step": 4948 }, { "epoch": 2.243427017225748, "grad_norm": 0.6317782282933646, "learning_rate": 5.385314383575225e-05, "loss": 0.8674, "step": 4949 }, { "epoch": 2.243880326382593, "grad_norm": 0.4121317320200252, "learning_rate": 5.384126777303438e-05, "loss": 0.8844, "step": 4950 }, { "epoch": 2.244333635539438, "grad_norm": 0.32722108121801696, "learning_rate": 5.382939032407288e-05, "loss": 0.8634, "step": 4951 }, { "epoch": 2.244786944696283, "grad_norm": 0.23615664452310747, "learning_rate": 5.381751149005729e-05, "loss": 0.8788, "step": 4952 }, { "epoch": 2.2452402538531278, "grad_norm": 0.29805853687512585, "learning_rate": 5.3805631272177304e-05, "loss": 0.8842, "step": 4953 }, { "epoch": 2.245693563009973, "grad_norm": 0.4153851683419297, "learning_rate": 5.379374967162279e-05, "loss": 0.8788, "step": 4954 }, { "epoch": 2.246146872166818, "grad_norm": 0.4987653761799615, "learning_rate": 5.378186668958369e-05, "loss": 0.8688, "step": 4955 }, { "epoch": 2.2466001813236627, "grad_norm": 0.5221076784871495, "learning_rate": 5.3769982327250155e-05, "loss": 0.8782, "step": 4956 }, { "epoch": 2.2470534904805075, "grad_norm": 0.47849718252459017, "learning_rate": 5.37580965858124e-05, "loss": 0.8831, "step": 4957 }, { "epoch": 2.247506799637353, "grad_norm": 0.41440229592063843, "learning_rate": 5.374620946646086e-05, "loss": 0.8818, "step": 4958 }, { "epoch": 2.2479601087941976, "grad_norm": 0.415509843055896, "learning_rate": 5.3734320970386036e-05, "loss": 0.8804, "step": 4959 }, { "epoch": 2.2484134179510424, "grad_norm": 0.416951294917652, "learning_rate": 5.3722431098778594e-05, "loss": 0.8831, "step": 4960 }, { "epoch": 2.2488667271078877, "grad_norm": 0.4081541359955584, "learning_rate": 5.371053985282935e-05, "loss": 0.8703, "step": 4961 }, { "epoch": 2.2493200362647325, "grad_norm": 0.3852870745335183, "learning_rate": 5.3698647233729245e-05, "loss": 0.8879, "step": 4962 }, { "epoch": 2.2497733454215774, "grad_norm": 0.3711964793447073, "learning_rate": 5.368675324266936e-05, "loss": 0.9028, "step": 4963 }, { "epoch": 2.2502266545784226, "grad_norm": 0.3832133757472111, "learning_rate": 5.367485788084091e-05, "loss": 0.8835, "step": 4964 }, { "epoch": 2.2506799637352675, "grad_norm": 0.3499184474090866, "learning_rate": 5.3662961149435265e-05, "loss": 0.8827, "step": 4965 }, { "epoch": 2.2511332728921123, "grad_norm": 0.3505847681953082, "learning_rate": 5.36510630496439e-05, "loss": 0.8664, "step": 4966 }, { "epoch": 2.2515865820489576, "grad_norm": 0.3875724591996002, "learning_rate": 5.363916358265846e-05, "loss": 0.8853, "step": 4967 }, { "epoch": 2.2520398912058024, "grad_norm": 0.4423798690626043, "learning_rate": 5.362726274967069e-05, "loss": 0.8901, "step": 4968 }, { "epoch": 2.252493200362647, "grad_norm": 0.44266919535192817, "learning_rate": 5.361536055187251e-05, "loss": 0.8635, "step": 4969 }, { "epoch": 2.252946509519492, "grad_norm": 0.3812820252713256, "learning_rate": 5.3603456990455956e-05, "loss": 0.8759, "step": 4970 }, { "epoch": 2.2533998186763373, "grad_norm": 0.33979678078742415, "learning_rate": 5.359155206661321e-05, "loss": 0.875, "step": 4971 }, { "epoch": 2.253853127833182, "grad_norm": 2.487346031876852, "learning_rate": 5.3579645781536566e-05, "loss": 0.8976, "step": 4972 }, { "epoch": 2.254306436990027, "grad_norm": 0.45851618394546884, "learning_rate": 5.35677381364185e-05, "loss": 0.8839, "step": 4973 }, { "epoch": 2.2547597461468722, "grad_norm": 0.8555872411749581, "learning_rate": 5.355582913245157e-05, "loss": 0.869, "step": 4974 }, { "epoch": 2.255213055303717, "grad_norm": 1.1815367029521169, "learning_rate": 5.354391877082853e-05, "loss": 0.8614, "step": 4975 }, { "epoch": 2.255666364460562, "grad_norm": 0.7864897484248808, "learning_rate": 5.353200705274221e-05, "loss": 0.8865, "step": 4976 }, { "epoch": 2.256119673617407, "grad_norm": 0.5614992132989326, "learning_rate": 5.3520093979385617e-05, "loss": 0.883, "step": 4977 }, { "epoch": 2.256572982774252, "grad_norm": 0.6492019233691656, "learning_rate": 5.3508179551951876e-05, "loss": 0.8888, "step": 4978 }, { "epoch": 2.257026291931097, "grad_norm": 1.0574370325620897, "learning_rate": 5.349626377163425e-05, "loss": 0.8768, "step": 4979 }, { "epoch": 2.257479601087942, "grad_norm": 0.9690763141578795, "learning_rate": 5.348434663962614e-05, "loss": 0.8331, "step": 4980 }, { "epoch": 2.257932910244787, "grad_norm": 0.6645626032028035, "learning_rate": 5.347242815712109e-05, "loss": 0.8781, "step": 4981 }, { "epoch": 2.2583862194016318, "grad_norm": 0.5950176446511587, "learning_rate": 5.346050832531277e-05, "loss": 0.8603, "step": 4982 }, { "epoch": 2.258839528558477, "grad_norm": 0.6330200219431598, "learning_rate": 5.344858714539499e-05, "loss": 0.8952, "step": 4983 }, { "epoch": 2.259292837715322, "grad_norm": 0.7316350649474501, "learning_rate": 5.343666461856166e-05, "loss": 0.8925, "step": 4984 }, { "epoch": 2.2597461468721667, "grad_norm": 0.8747452672951347, "learning_rate": 5.342474074600689e-05, "loss": 0.8647, "step": 4985 }, { "epoch": 2.260199456029012, "grad_norm": 0.8297700718467051, "learning_rate": 5.341281552892487e-05, "loss": 0.8622, "step": 4986 }, { "epoch": 2.260652765185857, "grad_norm": 0.7267365484923471, "learning_rate": 5.3400888968509974e-05, "loss": 0.8739, "step": 4987 }, { "epoch": 2.2611060743427016, "grad_norm": 0.5779867988548791, "learning_rate": 5.338896106595665e-05, "loss": 0.8661, "step": 4988 }, { "epoch": 2.261559383499547, "grad_norm": 0.467972646611008, "learning_rate": 5.337703182245953e-05, "loss": 0.8773, "step": 4989 }, { "epoch": 2.2620126926563917, "grad_norm": 0.3899872243002075, "learning_rate": 5.336510123921336e-05, "loss": 0.8659, "step": 4990 }, { "epoch": 2.2624660018132365, "grad_norm": 0.4380264150723595, "learning_rate": 5.335316931741301e-05, "loss": 0.8726, "step": 4991 }, { "epoch": 2.262919310970082, "grad_norm": 0.6012968192534384, "learning_rate": 5.334123605825352e-05, "loss": 0.888, "step": 4992 }, { "epoch": 2.2633726201269266, "grad_norm": 1.020586235276103, "learning_rate": 5.332930146293002e-05, "loss": 0.9089, "step": 4993 }, { "epoch": 2.2638259292837715, "grad_norm": 0.5260590922687005, "learning_rate": 5.3317365532637814e-05, "loss": 0.8731, "step": 4994 }, { "epoch": 2.2642792384406167, "grad_norm": 0.49590162826902573, "learning_rate": 5.3305428268572304e-05, "loss": 0.8688, "step": 4995 }, { "epoch": 2.2647325475974616, "grad_norm": 0.5343244651127241, "learning_rate": 5.329348967192906e-05, "loss": 0.9023, "step": 4996 }, { "epoch": 2.2651858567543064, "grad_norm": 0.5207452556644578, "learning_rate": 5.3281549743903754e-05, "loss": 0.8855, "step": 4997 }, { "epoch": 2.265639165911151, "grad_norm": 0.5151874219206892, "learning_rate": 5.3269608485692206e-05, "loss": 0.8741, "step": 4998 }, { "epoch": 2.2660924750679965, "grad_norm": 0.5615767697135656, "learning_rate": 5.325766589849036e-05, "loss": 0.8755, "step": 4999 }, { "epoch": 2.2665457842248413, "grad_norm": 0.4834146583884921, "learning_rate": 5.324572198349432e-05, "loss": 0.8712, "step": 5000 }, { "epoch": 2.266999093381686, "grad_norm": 0.39315507840825753, "learning_rate": 5.3233776741900295e-05, "loss": 0.8703, "step": 5001 }, { "epoch": 2.2674524025385314, "grad_norm": 0.5277356854965118, "learning_rate": 5.3221830174904634e-05, "loss": 0.8704, "step": 5002 }, { "epoch": 2.2679057116953762, "grad_norm": 0.5633057270396042, "learning_rate": 5.320988228370382e-05, "loss": 0.8726, "step": 5003 }, { "epoch": 2.268359020852221, "grad_norm": 0.5042271942740735, "learning_rate": 5.319793306949448e-05, "loss": 0.8881, "step": 5004 }, { "epoch": 2.2688123300090663, "grad_norm": 0.5316450656043279, "learning_rate": 5.3185982533473344e-05, "loss": 0.8847, "step": 5005 }, { "epoch": 2.269265639165911, "grad_norm": 0.5677382587044464, "learning_rate": 5.3174030676837314e-05, "loss": 0.8896, "step": 5006 }, { "epoch": 2.269718948322756, "grad_norm": 0.5735441498328504, "learning_rate": 5.316207750078339e-05, "loss": 0.8837, "step": 5007 }, { "epoch": 2.2701722574796013, "grad_norm": 0.47065583260972177, "learning_rate": 5.315012300650871e-05, "loss": 0.8665, "step": 5008 }, { "epoch": 2.270625566636446, "grad_norm": 0.39232046690221356, "learning_rate": 5.3138167195210576e-05, "loss": 0.8996, "step": 5009 }, { "epoch": 2.271078875793291, "grad_norm": 0.38162674304494193, "learning_rate": 5.3126210068086386e-05, "loss": 0.8715, "step": 5010 }, { "epoch": 2.2715321849501358, "grad_norm": 0.4132927202854033, "learning_rate": 5.311425162633368e-05, "loss": 0.8891, "step": 5011 }, { "epoch": 2.271985494106981, "grad_norm": 0.4571105658114805, "learning_rate": 5.3102291871150134e-05, "loss": 0.8715, "step": 5012 }, { "epoch": 2.272438803263826, "grad_norm": 0.5173474999992865, "learning_rate": 5.309033080373354e-05, "loss": 0.8817, "step": 5013 }, { "epoch": 2.2728921124206707, "grad_norm": 0.417367623325475, "learning_rate": 5.3078368425281855e-05, "loss": 0.8755, "step": 5014 }, { "epoch": 2.273345421577516, "grad_norm": 0.2595865145838421, "learning_rate": 5.306640473699313e-05, "loss": 0.8888, "step": 5015 }, { "epoch": 2.273798730734361, "grad_norm": 0.316100029062705, "learning_rate": 5.305443974006557e-05, "loss": 0.8963, "step": 5016 }, { "epoch": 2.2742520398912056, "grad_norm": 0.3947235730222884, "learning_rate": 5.304247343569751e-05, "loss": 0.8683, "step": 5017 }, { "epoch": 2.274705349048051, "grad_norm": 0.38590778016413424, "learning_rate": 5.3030505825087394e-05, "loss": 0.8741, "step": 5018 }, { "epoch": 2.2751586582048957, "grad_norm": 0.42551502453073575, "learning_rate": 5.301853690943382e-05, "loss": 0.8907, "step": 5019 }, { "epoch": 2.2756119673617405, "grad_norm": 0.4713301597637494, "learning_rate": 5.3006566689935524e-05, "loss": 0.8921, "step": 5020 }, { "epoch": 2.276065276518586, "grad_norm": 0.4397610984079893, "learning_rate": 5.299459516779134e-05, "loss": 0.8688, "step": 5021 }, { "epoch": 2.2765185856754306, "grad_norm": 0.417439130092217, "learning_rate": 5.2982622344200264e-05, "loss": 0.8835, "step": 5022 }, { "epoch": 2.2769718948322755, "grad_norm": 0.4330917656722008, "learning_rate": 5.297064822036141e-05, "loss": 0.8806, "step": 5023 }, { "epoch": 2.2774252039891207, "grad_norm": 0.4318202238973824, "learning_rate": 5.295867279747402e-05, "loss": 0.8832, "step": 5024 }, { "epoch": 2.2778785131459656, "grad_norm": 0.42957641773363203, "learning_rate": 5.294669607673745e-05, "loss": 0.8717, "step": 5025 }, { "epoch": 2.2783318223028104, "grad_norm": 0.44013117780591704, "learning_rate": 5.2934718059351235e-05, "loss": 0.869, "step": 5026 }, { "epoch": 2.2787851314596557, "grad_norm": 0.38962852284880717, "learning_rate": 5.292273874651498e-05, "loss": 0.8769, "step": 5027 }, { "epoch": 2.2792384406165005, "grad_norm": 0.31063958437132627, "learning_rate": 5.2910758139428474e-05, "loss": 0.8811, "step": 5028 }, { "epoch": 2.2796917497733453, "grad_norm": 0.30005275513618057, "learning_rate": 5.2898776239291575e-05, "loss": 0.8776, "step": 5029 }, { "epoch": 2.2801450589301906, "grad_norm": 0.3319341766369852, "learning_rate": 5.2886793047304334e-05, "loss": 0.8769, "step": 5030 }, { "epoch": 2.2805983680870354, "grad_norm": 0.36543539944555237, "learning_rate": 5.2874808564666904e-05, "loss": 0.8706, "step": 5031 }, { "epoch": 2.2810516772438802, "grad_norm": 0.44308961338060404, "learning_rate": 5.286282279257955e-05, "loss": 0.8768, "step": 5032 }, { "epoch": 2.2815049864007255, "grad_norm": 0.9516246451283887, "learning_rate": 5.2850835732242684e-05, "loss": 0.8962, "step": 5033 }, { "epoch": 2.2819582955575703, "grad_norm": 0.48689993239890095, "learning_rate": 5.283884738485686e-05, "loss": 0.8887, "step": 5034 }, { "epoch": 2.282411604714415, "grad_norm": 0.5583619813744088, "learning_rate": 5.282685775162275e-05, "loss": 0.876, "step": 5035 }, { "epoch": 2.2828649138712604, "grad_norm": 0.6366590301385431, "learning_rate": 5.2814866833741125e-05, "loss": 0.865, "step": 5036 }, { "epoch": 2.2833182230281053, "grad_norm": 0.6301623118638524, "learning_rate": 5.280287463241292e-05, "loss": 0.8816, "step": 5037 }, { "epoch": 2.28377153218495, "grad_norm": 0.5670393667917114, "learning_rate": 5.2790881148839204e-05, "loss": 0.8532, "step": 5038 }, { "epoch": 2.284224841341795, "grad_norm": 0.5403972120161724, "learning_rate": 5.2778886384221144e-05, "loss": 0.8797, "step": 5039 }, { "epoch": 2.28467815049864, "grad_norm": 0.46459521065202003, "learning_rate": 5.276689033976006e-05, "loss": 0.869, "step": 5040 }, { "epoch": 2.285131459655485, "grad_norm": 0.39090296406178476, "learning_rate": 5.2754893016657384e-05, "loss": 0.8773, "step": 5041 }, { "epoch": 2.28558476881233, "grad_norm": 0.2765060102428211, "learning_rate": 5.274289441611469e-05, "loss": 0.8746, "step": 5042 }, { "epoch": 2.286038077969175, "grad_norm": 0.3069440243159633, "learning_rate": 5.2730894539333664e-05, "loss": 0.8835, "step": 5043 }, { "epoch": 2.28649138712602, "grad_norm": 0.36096209305210125, "learning_rate": 5.271889338751614e-05, "loss": 0.8646, "step": 5044 }, { "epoch": 2.286944696282865, "grad_norm": 0.32083005698980943, "learning_rate": 5.270689096186406e-05, "loss": 0.8742, "step": 5045 }, { "epoch": 2.28739800543971, "grad_norm": 0.3637239871108701, "learning_rate": 5.269488726357951e-05, "loss": 0.8841, "step": 5046 }, { "epoch": 2.287851314596555, "grad_norm": 0.36993684467670773, "learning_rate": 5.268288229386468e-05, "loss": 0.8727, "step": 5047 }, { "epoch": 2.2883046237533997, "grad_norm": 0.37134007578105926, "learning_rate": 5.267087605392192e-05, "loss": 0.8551, "step": 5048 }, { "epoch": 2.2887579329102445, "grad_norm": 0.41088766816396255, "learning_rate": 5.265886854495368e-05, "loss": 0.8896, "step": 5049 }, { "epoch": 2.28921124206709, "grad_norm": 0.4915116763631163, "learning_rate": 5.264685976816256e-05, "loss": 0.8777, "step": 5050 }, { "epoch": 2.2896645512239346, "grad_norm": 0.5438113142793051, "learning_rate": 5.263484972475126e-05, "loss": 0.8719, "step": 5051 }, { "epoch": 2.2901178603807795, "grad_norm": 0.5915494467176431, "learning_rate": 5.262283841592263e-05, "loss": 0.8604, "step": 5052 }, { "epoch": 2.2905711695376247, "grad_norm": 0.5201504212570348, "learning_rate": 5.2610825842879636e-05, "loss": 0.8827, "step": 5053 }, { "epoch": 2.2910244786944696, "grad_norm": 0.41428905958052575, "learning_rate": 5.2598812006825366e-05, "loss": 0.8794, "step": 5054 }, { "epoch": 2.2914777878513144, "grad_norm": 0.3369609611247756, "learning_rate": 5.2586796908963055e-05, "loss": 0.8816, "step": 5055 }, { "epoch": 2.2919310970081597, "grad_norm": 0.36689500741809444, "learning_rate": 5.257478055049603e-05, "loss": 0.8735, "step": 5056 }, { "epoch": 2.2923844061650045, "grad_norm": 0.38848821142971757, "learning_rate": 5.256276293262779e-05, "loss": 0.8839, "step": 5057 }, { "epoch": 2.2928377153218493, "grad_norm": 0.3313864539112403, "learning_rate": 5.2550744056561904e-05, "loss": 0.8573, "step": 5058 }, { "epoch": 2.2932910244786946, "grad_norm": 0.2910929483517332, "learning_rate": 5.253872392350213e-05, "loss": 0.8861, "step": 5059 }, { "epoch": 2.2937443336355394, "grad_norm": 0.34233108957687525, "learning_rate": 5.25267025346523e-05, "loss": 0.8938, "step": 5060 }, { "epoch": 2.2941976427923843, "grad_norm": 0.3713730167492331, "learning_rate": 5.2514679891216384e-05, "loss": 0.8771, "step": 5061 }, { "epoch": 2.2946509519492295, "grad_norm": 2.9756974546301507, "learning_rate": 5.250265599439851e-05, "loss": 0.8766, "step": 5062 }, { "epoch": 2.2951042611060744, "grad_norm": 0.6053003089756437, "learning_rate": 5.249063084540289e-05, "loss": 0.8586, "step": 5063 }, { "epoch": 2.295557570262919, "grad_norm": 1.0882111775977772, "learning_rate": 5.2478604445433885e-05, "loss": 0.8959, "step": 5064 }, { "epoch": 2.2960108794197644, "grad_norm": 0.9063930065201687, "learning_rate": 5.2466576795695965e-05, "loss": 0.9087, "step": 5065 }, { "epoch": 2.2964641885766093, "grad_norm": 0.5416927118470921, "learning_rate": 5.2454547897393744e-05, "loss": 0.8799, "step": 5066 }, { "epoch": 2.296917497733454, "grad_norm": 0.6762791473839218, "learning_rate": 5.244251775173195e-05, "loss": 0.8739, "step": 5067 }, { "epoch": 2.2973708068902994, "grad_norm": 0.9120377735183902, "learning_rate": 5.243048635991544e-05, "loss": 0.8791, "step": 5068 }, { "epoch": 2.297824116047144, "grad_norm": 0.7657659791905322, "learning_rate": 5.241845372314918e-05, "loss": 0.8824, "step": 5069 }, { "epoch": 2.298277425203989, "grad_norm": 0.4942964214378502, "learning_rate": 5.240641984263828e-05, "loss": 0.875, "step": 5070 }, { "epoch": 2.2987307343608343, "grad_norm": 0.5268891362544752, "learning_rate": 5.2394384719587974e-05, "loss": 0.8812, "step": 5071 }, { "epoch": 2.299184043517679, "grad_norm": 0.6815317915949027, "learning_rate": 5.238234835520361e-05, "loss": 0.8841, "step": 5072 }, { "epoch": 2.299637352674524, "grad_norm": 0.653605196429628, "learning_rate": 5.237031075069068e-05, "loss": 0.8907, "step": 5073 }, { "epoch": 2.3000906618313692, "grad_norm": 0.4980315299456739, "learning_rate": 5.235827190725475e-05, "loss": 0.8832, "step": 5074 }, { "epoch": 2.300543970988214, "grad_norm": 0.5317021086097149, "learning_rate": 5.2346231826101586e-05, "loss": 0.8631, "step": 5075 }, { "epoch": 2.300997280145059, "grad_norm": 0.5038684109979833, "learning_rate": 5.2334190508437006e-05, "loss": 0.8812, "step": 5076 }, { "epoch": 2.3014505893019037, "grad_norm": 0.5189642923644942, "learning_rate": 5.232214795546701e-05, "loss": 0.8758, "step": 5077 }, { "epoch": 2.301903898458749, "grad_norm": 0.5718839443439196, "learning_rate": 5.231010416839768e-05, "loss": 0.8757, "step": 5078 }, { "epoch": 2.302357207615594, "grad_norm": 0.5558348345422731, "learning_rate": 5.229805914843523e-05, "loss": 0.8766, "step": 5079 }, { "epoch": 2.3028105167724386, "grad_norm": 0.6172327843828324, "learning_rate": 5.2286012896786016e-05, "loss": 0.8917, "step": 5080 }, { "epoch": 2.303263825929284, "grad_norm": 0.7199681254847712, "learning_rate": 5.227396541465652e-05, "loss": 0.8741, "step": 5081 }, { "epoch": 2.3037171350861287, "grad_norm": 0.6846495905015629, "learning_rate": 5.22619167032533e-05, "loss": 0.8857, "step": 5082 }, { "epoch": 2.3041704442429736, "grad_norm": 0.5798560199356722, "learning_rate": 5.224986676378309e-05, "loss": 0.8861, "step": 5083 }, { "epoch": 2.304623753399819, "grad_norm": 0.46111812129050866, "learning_rate": 5.2237815597452715e-05, "loss": 0.8653, "step": 5084 }, { "epoch": 2.3050770625566637, "grad_norm": 0.3909274590536595, "learning_rate": 5.222576320546914e-05, "loss": 0.8861, "step": 5085 }, { "epoch": 2.3055303717135085, "grad_norm": 0.2876745681122052, "learning_rate": 5.2213709589039453e-05, "loss": 0.8916, "step": 5086 }, { "epoch": 2.3059836808703538, "grad_norm": 0.33634083865989295, "learning_rate": 5.2201654749370854e-05, "loss": 0.8804, "step": 5087 }, { "epoch": 2.3064369900271986, "grad_norm": 0.3899474401517561, "learning_rate": 5.218959868767068e-05, "loss": 0.8913, "step": 5088 }, { "epoch": 2.3068902991840434, "grad_norm": 0.4146770781301267, "learning_rate": 5.217754140514636e-05, "loss": 0.8573, "step": 5089 }, { "epoch": 2.3073436083408883, "grad_norm": 0.474554635414761, "learning_rate": 5.216548290300548e-05, "loss": 0.8843, "step": 5090 }, { "epoch": 2.3077969174977335, "grad_norm": 0.49725443322927454, "learning_rate": 5.215342318245574e-05, "loss": 0.8905, "step": 5091 }, { "epoch": 2.3082502266545784, "grad_norm": 0.47623548271031363, "learning_rate": 5.214136224470495e-05, "loss": 0.8848, "step": 5092 }, { "epoch": 2.308703535811423, "grad_norm": 0.4500851399358742, "learning_rate": 5.2129300090961055e-05, "loss": 0.8852, "step": 5093 }, { "epoch": 2.3091568449682685, "grad_norm": 0.5339941243214759, "learning_rate": 5.21172367224321e-05, "loss": 0.8705, "step": 5094 }, { "epoch": 2.3096101541251133, "grad_norm": 0.5258800180250066, "learning_rate": 5.210517214032627e-05, "loss": 0.8714, "step": 5095 }, { "epoch": 2.310063463281958, "grad_norm": 0.5427709513100741, "learning_rate": 5.209310634585189e-05, "loss": 0.8841, "step": 5096 }, { "epoch": 2.3105167724388034, "grad_norm": 0.5748053894364157, "learning_rate": 5.208103934021736e-05, "loss": 0.8724, "step": 5097 }, { "epoch": 2.310970081595648, "grad_norm": 0.5421089702955039, "learning_rate": 5.2068971124631235e-05, "loss": 0.8944, "step": 5098 }, { "epoch": 2.311423390752493, "grad_norm": 0.45997215452241474, "learning_rate": 5.205690170030218e-05, "loss": 0.8936, "step": 5099 }, { "epoch": 2.3118766999093383, "grad_norm": 0.40936236295663997, "learning_rate": 5.204483106843897e-05, "loss": 0.8723, "step": 5100 }, { "epoch": 2.312330009066183, "grad_norm": 0.2949387748270129, "learning_rate": 5.203275923025055e-05, "loss": 0.8671, "step": 5101 }, { "epoch": 2.312783318223028, "grad_norm": 0.3094812912814581, "learning_rate": 5.2020686186945935e-05, "loss": 0.8694, "step": 5102 }, { "epoch": 2.3132366273798732, "grad_norm": 0.353427943237671, "learning_rate": 5.200861193973426e-05, "loss": 0.8669, "step": 5103 }, { "epoch": 2.313689936536718, "grad_norm": 0.3488729459474434, "learning_rate": 5.19965364898248e-05, "loss": 0.8737, "step": 5104 }, { "epoch": 2.314143245693563, "grad_norm": 0.4445600216545959, "learning_rate": 5.1984459838426955e-05, "loss": 0.8708, "step": 5105 }, { "epoch": 2.314596554850408, "grad_norm": 0.5475516803230025, "learning_rate": 5.197238198675024e-05, "loss": 0.8804, "step": 5106 }, { "epoch": 2.315049864007253, "grad_norm": 0.65000068529758, "learning_rate": 5.196030293600428e-05, "loss": 0.8956, "step": 5107 }, { "epoch": 2.315503173164098, "grad_norm": 0.6362544307959659, "learning_rate": 5.194822268739883e-05, "loss": 0.87, "step": 5108 }, { "epoch": 2.315956482320943, "grad_norm": 0.6231525014516497, "learning_rate": 5.193614124214377e-05, "loss": 0.896, "step": 5109 }, { "epoch": 2.316409791477788, "grad_norm": 0.6114404275123781, "learning_rate": 5.1924058601449085e-05, "loss": 0.8542, "step": 5110 }, { "epoch": 2.3168631006346327, "grad_norm": 0.6303350757393784, "learning_rate": 5.1911974766524874e-05, "loss": 0.8947, "step": 5111 }, { "epoch": 2.317316409791478, "grad_norm": 0.6551065531583496, "learning_rate": 5.18998897385814e-05, "loss": 0.8712, "step": 5112 }, { "epoch": 2.317769718948323, "grad_norm": 0.645933511632691, "learning_rate": 5.188780351882899e-05, "loss": 0.8861, "step": 5113 }, { "epoch": 2.3182230281051677, "grad_norm": 0.5406105309882809, "learning_rate": 5.187571610847811e-05, "loss": 0.8948, "step": 5114 }, { "epoch": 2.3186763372620125, "grad_norm": 0.5052709117402752, "learning_rate": 5.186362750873937e-05, "loss": 0.8677, "step": 5115 }, { "epoch": 2.3191296464188578, "grad_norm": 0.5146110398280785, "learning_rate": 5.185153772082346e-05, "loss": 0.8694, "step": 5116 }, { "epoch": 2.3195829555757026, "grad_norm": 0.5324263665621561, "learning_rate": 5.1839446745941215e-05, "loss": 0.892, "step": 5117 }, { "epoch": 2.3200362647325474, "grad_norm": 0.6288912165051124, "learning_rate": 5.182735458530358e-05, "loss": 0.8761, "step": 5118 }, { "epoch": 2.3204895738893927, "grad_norm": 0.7236764553421411, "learning_rate": 5.1815261240121635e-05, "loss": 0.8833, "step": 5119 }, { "epoch": 2.3209428830462375, "grad_norm": 0.6400096169365467, "learning_rate": 5.180316671160655e-05, "loss": 0.8816, "step": 5120 }, { "epoch": 2.3213961922030824, "grad_norm": 0.5107271702266878, "learning_rate": 5.179107100096963e-05, "loss": 0.8599, "step": 5121 }, { "epoch": 2.3218495013599276, "grad_norm": 0.4211335615162259, "learning_rate": 5.17789741094223e-05, "loss": 0.8912, "step": 5122 }, { "epoch": 2.3223028105167725, "grad_norm": 0.3932709132601649, "learning_rate": 5.176687603817608e-05, "loss": 0.8911, "step": 5123 }, { "epoch": 2.3227561196736173, "grad_norm": 0.33797087508426954, "learning_rate": 5.1754776788442664e-05, "loss": 0.8881, "step": 5124 }, { "epoch": 2.3232094288304626, "grad_norm": 0.3478156104666955, "learning_rate": 5.1742676361433786e-05, "loss": 0.8736, "step": 5125 }, { "epoch": 2.3236627379873074, "grad_norm": 0.3635186219822697, "learning_rate": 5.173057475836137e-05, "loss": 0.8895, "step": 5126 }, { "epoch": 2.324116047144152, "grad_norm": 0.40953132816091764, "learning_rate": 5.1718471980437415e-05, "loss": 0.8846, "step": 5127 }, { "epoch": 2.324569356300997, "grad_norm": 0.4385997826858702, "learning_rate": 5.170636802887405e-05, "loss": 0.864, "step": 5128 }, { "epoch": 2.3250226654578423, "grad_norm": 0.47642934188993197, "learning_rate": 5.1694262904883515e-05, "loss": 0.8596, "step": 5129 }, { "epoch": 2.325475974614687, "grad_norm": 0.46975218129461566, "learning_rate": 5.1682156609678185e-05, "loss": 0.8916, "step": 5130 }, { "epoch": 2.325929283771532, "grad_norm": 0.47533295385521745, "learning_rate": 5.1670049144470555e-05, "loss": 0.8687, "step": 5131 }, { "epoch": 2.3263825929283772, "grad_norm": 0.4865176149076515, "learning_rate": 5.165794051047319e-05, "loss": 0.8854, "step": 5132 }, { "epoch": 2.326835902085222, "grad_norm": 0.4834853447185496, "learning_rate": 5.1645830708898816e-05, "loss": 0.8787, "step": 5133 }, { "epoch": 2.327289211242067, "grad_norm": 0.42230987505976336, "learning_rate": 5.163371974096028e-05, "loss": 0.8871, "step": 5134 }, { "epoch": 2.327742520398912, "grad_norm": 0.3962770133765391, "learning_rate": 5.162160760787052e-05, "loss": 0.861, "step": 5135 }, { "epoch": 2.328195829555757, "grad_norm": 0.3530624325231341, "learning_rate": 5.160949431084259e-05, "loss": 0.8841, "step": 5136 }, { "epoch": 2.328649138712602, "grad_norm": 0.28917553990583844, "learning_rate": 5.1597379851089704e-05, "loss": 0.8814, "step": 5137 }, { "epoch": 2.329102447869447, "grad_norm": 0.3328333872464081, "learning_rate": 5.1585264229825135e-05, "loss": 0.8789, "step": 5138 }, { "epoch": 2.329555757026292, "grad_norm": 0.49927567831106734, "learning_rate": 5.15731474482623e-05, "loss": 0.8759, "step": 5139 }, { "epoch": 2.3300090661831367, "grad_norm": 0.6988144584216138, "learning_rate": 5.156102950761474e-05, "loss": 0.8844, "step": 5140 }, { "epoch": 2.330462375339982, "grad_norm": 0.8311330340433402, "learning_rate": 5.1548910409096095e-05, "loss": 0.8783, "step": 5141 }, { "epoch": 2.330915684496827, "grad_norm": 0.9418602791887024, "learning_rate": 5.153679015392014e-05, "loss": 0.9004, "step": 5142 }, { "epoch": 2.3313689936536717, "grad_norm": 0.9626952103987497, "learning_rate": 5.152466874330073e-05, "loss": 0.8582, "step": 5143 }, { "epoch": 2.331822302810517, "grad_norm": 0.9246694334593211, "learning_rate": 5.151254617845188e-05, "loss": 0.872, "step": 5144 }, { "epoch": 2.3322756119673618, "grad_norm": 0.7778751756702601, "learning_rate": 5.1500422460587697e-05, "loss": 0.8928, "step": 5145 }, { "epoch": 2.3327289211242066, "grad_norm": 0.548236573353536, "learning_rate": 5.14882975909224e-05, "loss": 0.8662, "step": 5146 }, { "epoch": 2.333182230281052, "grad_norm": 0.32170145827036517, "learning_rate": 5.1476171570670336e-05, "loss": 0.8834, "step": 5147 }, { "epoch": 2.3336355394378967, "grad_norm": 0.3850827734122276, "learning_rate": 5.146404440104597e-05, "loss": 0.8591, "step": 5148 }, { "epoch": 2.3340888485947415, "grad_norm": 0.5410559610502989, "learning_rate": 5.1451916083263865e-05, "loss": 0.8655, "step": 5149 }, { "epoch": 2.334542157751587, "grad_norm": 0.6074854269078052, "learning_rate": 5.143978661853871e-05, "loss": 0.8777, "step": 5150 }, { "epoch": 2.3349954669084316, "grad_norm": 0.6189349716186807, "learning_rate": 5.142765600808529e-05, "loss": 0.8807, "step": 5151 }, { "epoch": 2.3354487760652765, "grad_norm": 0.5712440914220623, "learning_rate": 5.141552425311855e-05, "loss": 0.8885, "step": 5152 }, { "epoch": 2.3359020852221217, "grad_norm": 0.5784284393740415, "learning_rate": 5.14033913548535e-05, "loss": 0.8669, "step": 5153 }, { "epoch": 2.3363553943789666, "grad_norm": 0.45527948857784417, "learning_rate": 5.13912573145053e-05, "loss": 0.8853, "step": 5154 }, { "epoch": 2.3368087035358114, "grad_norm": 0.39111925396151764, "learning_rate": 5.13791221332892e-05, "loss": 0.8728, "step": 5155 }, { "epoch": 2.337262012692656, "grad_norm": 0.4440045292547264, "learning_rate": 5.136698581242057e-05, "loss": 0.8754, "step": 5156 }, { "epoch": 2.3377153218495015, "grad_norm": 0.4638442414683705, "learning_rate": 5.13548483531149e-05, "loss": 0.88, "step": 5157 }, { "epoch": 2.3381686310063463, "grad_norm": 0.38543710349177207, "learning_rate": 5.1342709756587796e-05, "loss": 0.8807, "step": 5158 }, { "epoch": 2.338621940163191, "grad_norm": 0.49678034230445406, "learning_rate": 5.1330570024054974e-05, "loss": 0.8739, "step": 5159 }, { "epoch": 2.3390752493200364, "grad_norm": 0.4058340490379552, "learning_rate": 5.131842915673227e-05, "loss": 0.8647, "step": 5160 }, { "epoch": 2.3395285584768812, "grad_norm": 0.34178387909876046, "learning_rate": 5.130628715583562e-05, "loss": 0.8738, "step": 5161 }, { "epoch": 2.339981867633726, "grad_norm": 0.3707906355106236, "learning_rate": 5.1294144022581086e-05, "loss": 0.8777, "step": 5162 }, { "epoch": 2.3404351767905713, "grad_norm": 0.3807551822305169, "learning_rate": 5.128199975818483e-05, "loss": 0.8721, "step": 5163 }, { "epoch": 2.340888485947416, "grad_norm": 0.3483651911765005, "learning_rate": 5.1269854363863144e-05, "loss": 0.8693, "step": 5164 }, { "epoch": 2.341341795104261, "grad_norm": 0.39384368360741284, "learning_rate": 5.1257707840832423e-05, "loss": 0.895, "step": 5165 }, { "epoch": 2.3417951042611063, "grad_norm": 0.4293949159959352, "learning_rate": 5.124556019030918e-05, "loss": 0.8728, "step": 5166 }, { "epoch": 2.342248413417951, "grad_norm": 0.38168999328726183, "learning_rate": 5.1233411413510026e-05, "loss": 0.8757, "step": 5167 }, { "epoch": 2.342701722574796, "grad_norm": 0.29537106550168524, "learning_rate": 5.1221261511651706e-05, "loss": 0.8744, "step": 5168 }, { "epoch": 2.3431550317316407, "grad_norm": 0.4216072083021824, "learning_rate": 5.1209110485951074e-05, "loss": 0.8923, "step": 5169 }, { "epoch": 2.343608340888486, "grad_norm": 0.5193534376853847, "learning_rate": 5.119695833762507e-05, "loss": 0.8884, "step": 5170 }, { "epoch": 2.344061650045331, "grad_norm": 0.4611016827861858, "learning_rate": 5.1184805067890794e-05, "loss": 0.873, "step": 5171 }, { "epoch": 2.3445149592021757, "grad_norm": 0.5828022923578471, "learning_rate": 5.117265067796541e-05, "loss": 0.8844, "step": 5172 }, { "epoch": 2.344968268359021, "grad_norm": 0.4337649423876924, "learning_rate": 5.116049516906623e-05, "loss": 0.8965, "step": 5173 }, { "epoch": 2.3454215775158658, "grad_norm": 0.4968868377527834, "learning_rate": 5.1148338542410657e-05, "loss": 0.8839, "step": 5174 }, { "epoch": 2.3458748866727106, "grad_norm": 0.4889565559009078, "learning_rate": 5.1136180799216215e-05, "loss": 0.8641, "step": 5175 }, { "epoch": 2.346328195829556, "grad_norm": 0.4204921613224516, "learning_rate": 5.112402194070053e-05, "loss": 0.8945, "step": 5176 }, { "epoch": 2.3467815049864007, "grad_norm": 0.34215359990741845, "learning_rate": 5.111186196808136e-05, "loss": 0.8677, "step": 5177 }, { "epoch": 2.3472348141432455, "grad_norm": 0.3368382671841561, "learning_rate": 5.1099700882576565e-05, "loss": 0.8878, "step": 5178 }, { "epoch": 2.347688123300091, "grad_norm": 0.2780009359193906, "learning_rate": 5.10875386854041e-05, "loss": 0.8943, "step": 5179 }, { "epoch": 2.3481414324569356, "grad_norm": 0.3051221880809021, "learning_rate": 5.1075375377782055e-05, "loss": 0.8801, "step": 5180 }, { "epoch": 2.3485947416137805, "grad_norm": 0.45229058526226484, "learning_rate": 5.106321096092861e-05, "loss": 0.8856, "step": 5181 }, { "epoch": 2.3490480507706257, "grad_norm": 0.5533581789850144, "learning_rate": 5.1051045436062074e-05, "loss": 0.8851, "step": 5182 }, { "epoch": 2.3495013599274706, "grad_norm": 0.5117937693224082, "learning_rate": 5.103887880440086e-05, "loss": 0.8805, "step": 5183 }, { "epoch": 2.3499546690843154, "grad_norm": 0.44818790625902055, "learning_rate": 5.102671106716348e-05, "loss": 0.8866, "step": 5184 }, { "epoch": 2.3504079782411607, "grad_norm": 0.4310051211762868, "learning_rate": 5.101454222556859e-05, "loss": 0.8838, "step": 5185 }, { "epoch": 2.3508612873980055, "grad_norm": 0.4471349314639501, "learning_rate": 5.100237228083492e-05, "loss": 0.8713, "step": 5186 }, { "epoch": 2.3513145965548503, "grad_norm": 0.5237026777332181, "learning_rate": 5.0990201234181335e-05, "loss": 0.878, "step": 5187 }, { "epoch": 2.3517679057116956, "grad_norm": 0.5200550180688442, "learning_rate": 5.09780290868268e-05, "loss": 0.8679, "step": 5188 }, { "epoch": 2.3522212148685404, "grad_norm": 0.46583372339634277, "learning_rate": 5.096585583999039e-05, "loss": 0.8733, "step": 5189 }, { "epoch": 2.3526745240253852, "grad_norm": 0.4055339553221472, "learning_rate": 5.095368149489128e-05, "loss": 0.8642, "step": 5190 }, { "epoch": 2.3531278331822305, "grad_norm": 0.3585713840195202, "learning_rate": 5.094150605274878e-05, "loss": 0.8876, "step": 5191 }, { "epoch": 2.3535811423390753, "grad_norm": 0.33050464454301565, "learning_rate": 5.0929329514782295e-05, "loss": 0.8908, "step": 5192 }, { "epoch": 2.35403445149592, "grad_norm": 0.3197175272778693, "learning_rate": 5.091715188221133e-05, "loss": 0.8936, "step": 5193 }, { "epoch": 2.354487760652765, "grad_norm": 0.27084833017018656, "learning_rate": 5.0904973156255536e-05, "loss": 0.8747, "step": 5194 }, { "epoch": 2.3549410698096103, "grad_norm": 0.33591749226373097, "learning_rate": 5.089279333813461e-05, "loss": 0.8761, "step": 5195 }, { "epoch": 2.355394378966455, "grad_norm": 0.3053155144912048, "learning_rate": 5.088061242906843e-05, "loss": 0.8687, "step": 5196 }, { "epoch": 2.3558476881233, "grad_norm": 0.34539287136017316, "learning_rate": 5.086843043027694e-05, "loss": 0.9042, "step": 5197 }, { "epoch": 2.356300997280145, "grad_norm": 0.4315260495461991, "learning_rate": 5.085624734298019e-05, "loss": 0.8851, "step": 5198 }, { "epoch": 2.35675430643699, "grad_norm": 0.4686442389682185, "learning_rate": 5.084406316839838e-05, "loss": 0.8601, "step": 5199 }, { "epoch": 2.357207615593835, "grad_norm": 0.4239209745507728, "learning_rate": 5.083187790775176e-05, "loss": 0.8835, "step": 5200 }, { "epoch": 2.35766092475068, "grad_norm": 0.36863686272694257, "learning_rate": 5.0819691562260734e-05, "loss": 0.8918, "step": 5201 }, { "epoch": 2.358114233907525, "grad_norm": 0.36043656869160007, "learning_rate": 5.0807504133145795e-05, "loss": 0.8781, "step": 5202 }, { "epoch": 2.3585675430643698, "grad_norm": 0.402662927846602, "learning_rate": 5.079531562162756e-05, "loss": 0.8551, "step": 5203 }, { "epoch": 2.359020852221215, "grad_norm": 0.4092363468730264, "learning_rate": 5.078312602892674e-05, "loss": 0.8955, "step": 5204 }, { "epoch": 2.35947416137806, "grad_norm": 0.37563392518883887, "learning_rate": 5.077093535626415e-05, "loss": 0.8733, "step": 5205 }, { "epoch": 2.3599274705349047, "grad_norm": 0.37605205987525747, "learning_rate": 5.075874360486074e-05, "loss": 0.865, "step": 5206 }, { "epoch": 2.3603807796917495, "grad_norm": 0.42779386906916983, "learning_rate": 5.074655077593754e-05, "loss": 0.8783, "step": 5207 }, { "epoch": 2.360834088848595, "grad_norm": 0.46620694749756686, "learning_rate": 5.073435687071569e-05, "loss": 0.8762, "step": 5208 }, { "epoch": 2.3612873980054396, "grad_norm": 0.4849549343669799, "learning_rate": 5.072216189041646e-05, "loss": 0.8794, "step": 5209 }, { "epoch": 2.3617407071622845, "grad_norm": 0.49769038971729934, "learning_rate": 5.0709965836261194e-05, "loss": 0.8787, "step": 5210 }, { "epoch": 2.3621940163191297, "grad_norm": 0.48215087041434856, "learning_rate": 5.069776870947138e-05, "loss": 0.8837, "step": 5211 }, { "epoch": 2.3626473254759746, "grad_norm": 0.5942078317622705, "learning_rate": 5.068557051126858e-05, "loss": 0.8686, "step": 5212 }, { "epoch": 2.3631006346328194, "grad_norm": 0.6220052861444667, "learning_rate": 5.0673371242874496e-05, "loss": 0.8872, "step": 5213 }, { "epoch": 2.3635539437896647, "grad_norm": 0.6278313720819784, "learning_rate": 5.066117090551091e-05, "loss": 0.8743, "step": 5214 }, { "epoch": 2.3640072529465095, "grad_norm": 0.5902794500122057, "learning_rate": 5.064896950039973e-05, "loss": 0.8822, "step": 5215 }, { "epoch": 2.3644605621033543, "grad_norm": 0.585719762936012, "learning_rate": 5.0636767028762954e-05, "loss": 0.8818, "step": 5216 }, { "epoch": 2.3649138712601996, "grad_norm": 0.4576986358627108, "learning_rate": 5.06245634918227e-05, "loss": 0.8705, "step": 5217 }, { "epoch": 2.3653671804170444, "grad_norm": 0.3988780038973461, "learning_rate": 5.0612358890801195e-05, "loss": 0.8945, "step": 5218 }, { "epoch": 2.3658204895738892, "grad_norm": 0.35860770148192933, "learning_rate": 5.0600153226920745e-05, "loss": 0.8495, "step": 5219 }, { "epoch": 2.3662737987307345, "grad_norm": 0.28636248661423525, "learning_rate": 5.05879465014038e-05, "loss": 0.8838, "step": 5220 }, { "epoch": 2.3667271078875793, "grad_norm": 0.31950518535018685, "learning_rate": 5.0575738715472896e-05, "loss": 0.8758, "step": 5221 }, { "epoch": 2.367180417044424, "grad_norm": 0.31813777487165557, "learning_rate": 5.0563529870350684e-05, "loss": 0.8812, "step": 5222 }, { "epoch": 2.3676337262012694, "grad_norm": 0.37726297520150653, "learning_rate": 5.0551319967259906e-05, "loss": 0.8863, "step": 5223 }, { "epoch": 2.3680870353581143, "grad_norm": 0.5179410195215615, "learning_rate": 5.053910900742343e-05, "loss": 0.8783, "step": 5224 }, { "epoch": 2.368540344514959, "grad_norm": 0.6453536283430271, "learning_rate": 5.05268969920642e-05, "loss": 0.8682, "step": 5225 }, { "epoch": 2.3689936536718044, "grad_norm": 0.7405270076768682, "learning_rate": 5.05146839224053e-05, "loss": 0.8826, "step": 5226 }, { "epoch": 2.369446962828649, "grad_norm": 0.7615068210509947, "learning_rate": 5.050246979966991e-05, "loss": 0.9024, "step": 5227 }, { "epoch": 2.369900271985494, "grad_norm": 0.6572450131157982, "learning_rate": 5.04902546250813e-05, "loss": 0.865, "step": 5228 }, { "epoch": 2.3703535811423393, "grad_norm": 0.549040017268499, "learning_rate": 5.0478038399862866e-05, "loss": 0.9026, "step": 5229 }, { "epoch": 2.370806890299184, "grad_norm": 0.4202395326512334, "learning_rate": 5.046582112523808e-05, "loss": 0.8717, "step": 5230 }, { "epoch": 2.371260199456029, "grad_norm": 0.39389661606813775, "learning_rate": 5.045360280243056e-05, "loss": 0.8662, "step": 5231 }, { "epoch": 2.371713508612874, "grad_norm": 0.47236877116229703, "learning_rate": 5.044138343266399e-05, "loss": 0.8905, "step": 5232 }, { "epoch": 2.372166817769719, "grad_norm": 0.5389172595883874, "learning_rate": 5.0429163017162175e-05, "loss": 0.8921, "step": 5233 }, { "epoch": 2.372620126926564, "grad_norm": 0.7351333476778535, "learning_rate": 5.041694155714904e-05, "loss": 0.8802, "step": 5234 }, { "epoch": 2.3730734360834087, "grad_norm": 0.8304937984465337, "learning_rate": 5.040471905384861e-05, "loss": 0.8817, "step": 5235 }, { "epoch": 2.373526745240254, "grad_norm": 0.8121675164533314, "learning_rate": 5.0392495508484966e-05, "loss": 0.8856, "step": 5236 }, { "epoch": 2.373980054397099, "grad_norm": 0.750201924191632, "learning_rate": 5.038027092228237e-05, "loss": 0.8871, "step": 5237 }, { "epoch": 2.3744333635539436, "grad_norm": 0.7492833077704567, "learning_rate": 5.036804529646513e-05, "loss": 0.8758, "step": 5238 }, { "epoch": 2.374886672710789, "grad_norm": 0.6656684449444542, "learning_rate": 5.035581863225767e-05, "loss": 0.8764, "step": 5239 }, { "epoch": 2.3753399818676337, "grad_norm": 0.43498898544110703, "learning_rate": 5.034359093088455e-05, "loss": 0.8588, "step": 5240 }, { "epoch": 2.3757932910244786, "grad_norm": 0.343128537314031, "learning_rate": 5.033136219357039e-05, "loss": 0.8787, "step": 5241 }, { "epoch": 2.376246600181324, "grad_norm": 0.4291740883524962, "learning_rate": 5.031913242153995e-05, "loss": 0.8882, "step": 5242 }, { "epoch": 2.3766999093381687, "grad_norm": 0.4324491738175963, "learning_rate": 5.030690161601807e-05, "loss": 0.8839, "step": 5243 }, { "epoch": 2.3771532184950135, "grad_norm": 0.48980122881350835, "learning_rate": 5.02946697782297e-05, "loss": 0.8925, "step": 5244 }, { "epoch": 2.3776065276518588, "grad_norm": 0.5409310846802745, "learning_rate": 5.02824369093999e-05, "loss": 0.8954, "step": 5245 }, { "epoch": 2.3780598368087036, "grad_norm": 0.579761531181306, "learning_rate": 5.027020301075382e-05, "loss": 0.8856, "step": 5246 }, { "epoch": 2.3785131459655484, "grad_norm": 0.6588997067023833, "learning_rate": 5.0257968083516735e-05, "loss": 0.858, "step": 5247 }, { "epoch": 2.3789664551223932, "grad_norm": 0.5986181971023108, "learning_rate": 5.024573212891398e-05, "loss": 0.8839, "step": 5248 }, { "epoch": 2.3794197642792385, "grad_norm": 0.46441796761461607, "learning_rate": 5.023349514817105e-05, "loss": 0.8831, "step": 5249 }, { "epoch": 2.3798730734360833, "grad_norm": 0.3809239040926077, "learning_rate": 5.022125714251352e-05, "loss": 0.8824, "step": 5250 }, { "epoch": 2.380326382592928, "grad_norm": 0.3373230625232598, "learning_rate": 5.020901811316703e-05, "loss": 0.8646, "step": 5251 }, { "epoch": 2.3807796917497734, "grad_norm": 0.3937276361406932, "learning_rate": 5.019677806135738e-05, "loss": 0.8742, "step": 5252 }, { "epoch": 2.3812330009066183, "grad_norm": 0.4161859671329133, "learning_rate": 5.018453698831043e-05, "loss": 0.8858, "step": 5253 }, { "epoch": 2.381686310063463, "grad_norm": 0.40599482548173077, "learning_rate": 5.017229489525218e-05, "loss": 0.8753, "step": 5254 }, { "epoch": 2.3821396192203084, "grad_norm": 0.4474202559210028, "learning_rate": 5.0160051783408684e-05, "loss": 0.8952, "step": 5255 }, { "epoch": 2.382592928377153, "grad_norm": 0.5674266096556327, "learning_rate": 5.014780765400616e-05, "loss": 0.8689, "step": 5256 }, { "epoch": 2.383046237533998, "grad_norm": 0.6500478533095756, "learning_rate": 5.013556250827087e-05, "loss": 0.8757, "step": 5257 }, { "epoch": 2.3834995466908433, "grad_norm": 0.599929948729522, "learning_rate": 5.0123316347429204e-05, "loss": 0.8733, "step": 5258 }, { "epoch": 2.383952855847688, "grad_norm": 0.4745172132030613, "learning_rate": 5.011106917270766e-05, "loss": 0.8928, "step": 5259 }, { "epoch": 2.384406165004533, "grad_norm": 0.3359975578868226, "learning_rate": 5.009882098533281e-05, "loss": 0.8847, "step": 5260 }, { "epoch": 2.3848594741613782, "grad_norm": 0.31876293983347137, "learning_rate": 5.0086571786531366e-05, "loss": 0.8823, "step": 5261 }, { "epoch": 2.385312783318223, "grad_norm": 0.30373970289921975, "learning_rate": 5.007432157753011e-05, "loss": 0.8918, "step": 5262 }, { "epoch": 2.385766092475068, "grad_norm": 0.40213496669218873, "learning_rate": 5.006207035955595e-05, "loss": 0.8648, "step": 5263 }, { "epoch": 2.386219401631913, "grad_norm": 0.47114201162898983, "learning_rate": 5.004981813383587e-05, "loss": 0.8622, "step": 5264 }, { "epoch": 2.386672710788758, "grad_norm": 0.4773399681793328, "learning_rate": 5.003756490159696e-05, "loss": 0.9019, "step": 5265 }, { "epoch": 2.387126019945603, "grad_norm": 0.4515997008883914, "learning_rate": 5.0025310664066435e-05, "loss": 0.8725, "step": 5266 }, { "epoch": 2.387579329102448, "grad_norm": 0.3722445707278127, "learning_rate": 5.0013055422471575e-05, "loss": 0.9049, "step": 5267 }, { "epoch": 2.388032638259293, "grad_norm": 0.2990378259026441, "learning_rate": 5.000079917803979e-05, "loss": 0.9035, "step": 5268 }, { "epoch": 2.3884859474161377, "grad_norm": 0.3355148944712761, "learning_rate": 4.998854193199857e-05, "loss": 0.8845, "step": 5269 }, { "epoch": 2.388939256572983, "grad_norm": 0.4064008826711339, "learning_rate": 4.9976283685575516e-05, "loss": 0.8843, "step": 5270 }, { "epoch": 2.389392565729828, "grad_norm": 0.37751422665512047, "learning_rate": 4.996402443999834e-05, "loss": 0.8905, "step": 5271 }, { "epoch": 2.3898458748866727, "grad_norm": 0.38138479335759873, "learning_rate": 4.995176419649483e-05, "loss": 0.903, "step": 5272 }, { "epoch": 2.3902991840435175, "grad_norm": 0.5221723672300087, "learning_rate": 4.993950295629288e-05, "loss": 0.878, "step": 5273 }, { "epoch": 2.3907524932003628, "grad_norm": 0.5252059246503796, "learning_rate": 4.992724072062052e-05, "loss": 0.8627, "step": 5274 }, { "epoch": 2.3912058023572076, "grad_norm": 0.41054216432765633, "learning_rate": 4.991497749070582e-05, "loss": 0.8763, "step": 5275 }, { "epoch": 2.3916591115140524, "grad_norm": 0.3978903455568497, "learning_rate": 4.9902713267776965e-05, "loss": 0.8776, "step": 5276 }, { "epoch": 2.3921124206708977, "grad_norm": 0.39284796261897426, "learning_rate": 4.989044805306229e-05, "loss": 0.858, "step": 5277 }, { "epoch": 2.3925657298277425, "grad_norm": 0.3483218676061506, "learning_rate": 4.987818184779016e-05, "loss": 0.8899, "step": 5278 }, { "epoch": 2.3930190389845873, "grad_norm": 0.35165919445789395, "learning_rate": 4.9865914653189116e-05, "loss": 0.8849, "step": 5279 }, { "epoch": 2.3934723481414326, "grad_norm": 0.4376935752530494, "learning_rate": 4.98536464704877e-05, "loss": 0.8794, "step": 5280 }, { "epoch": 2.3939256572982774, "grad_norm": 0.418201631430149, "learning_rate": 4.984137730091464e-05, "loss": 0.8744, "step": 5281 }, { "epoch": 2.3943789664551223, "grad_norm": 0.23540224254580458, "learning_rate": 4.9829107145698706e-05, "loss": 0.8685, "step": 5282 }, { "epoch": 2.3948322756119675, "grad_norm": 0.3991971719998409, "learning_rate": 4.9816836006068814e-05, "loss": 0.8701, "step": 5283 }, { "epoch": 2.3952855847688124, "grad_norm": 0.5188715496648005, "learning_rate": 4.9804563883253946e-05, "loss": 0.895, "step": 5284 }, { "epoch": 2.395738893925657, "grad_norm": 0.4701030890855219, "learning_rate": 4.9792290778483195e-05, "loss": 0.8802, "step": 5285 }, { "epoch": 2.396192203082502, "grad_norm": 0.43787442100343343, "learning_rate": 4.978001669298573e-05, "loss": 0.8757, "step": 5286 }, { "epoch": 2.3966455122393473, "grad_norm": 0.45217150162321756, "learning_rate": 4.9767741627990864e-05, "loss": 0.8603, "step": 5287 }, { "epoch": 2.397098821396192, "grad_norm": 0.46462237969747927, "learning_rate": 4.975546558472795e-05, "loss": 0.8857, "step": 5288 }, { "epoch": 2.397552130553037, "grad_norm": 0.4682296254190874, "learning_rate": 4.97431885644265e-05, "loss": 0.889, "step": 5289 }, { "epoch": 2.3980054397098822, "grad_norm": 0.43044893115556787, "learning_rate": 4.9730910568316074e-05, "loss": 0.8859, "step": 5290 }, { "epoch": 2.398458748866727, "grad_norm": 0.44493871377918137, "learning_rate": 4.9718631597626366e-05, "loss": 0.8736, "step": 5291 }, { "epoch": 2.398912058023572, "grad_norm": 0.45817808997378673, "learning_rate": 4.970635165358714e-05, "loss": 0.9001, "step": 5292 }, { "epoch": 2.399365367180417, "grad_norm": 0.34489322238493697, "learning_rate": 4.969407073742826e-05, "loss": 0.8771, "step": 5293 }, { "epoch": 2.399818676337262, "grad_norm": 0.3023256652167846, "learning_rate": 4.968178885037972e-05, "loss": 0.8867, "step": 5294 }, { "epoch": 2.400271985494107, "grad_norm": 0.3253410559685095, "learning_rate": 4.966950599367156e-05, "loss": 0.8602, "step": 5295 }, { "epoch": 2.400725294650952, "grad_norm": 0.36854918173878815, "learning_rate": 4.965722216853396e-05, "loss": 0.8857, "step": 5296 }, { "epoch": 2.401178603807797, "grad_norm": 0.4013378516681496, "learning_rate": 4.964493737619717e-05, "loss": 0.8632, "step": 5297 }, { "epoch": 2.4016319129646417, "grad_norm": 0.36633414043186185, "learning_rate": 4.9632651617891564e-05, "loss": 0.8735, "step": 5298 }, { "epoch": 2.402085222121487, "grad_norm": 0.3814883268949225, "learning_rate": 4.962036489484758e-05, "loss": 0.8862, "step": 5299 }, { "epoch": 2.402538531278332, "grad_norm": 0.4818615981477088, "learning_rate": 4.9608077208295784e-05, "loss": 0.8679, "step": 5300 }, { "epoch": 2.4029918404351767, "grad_norm": 0.4767901520874524, "learning_rate": 4.9595788559466825e-05, "loss": 0.8835, "step": 5301 }, { "epoch": 2.403445149592022, "grad_norm": 0.39935492503589937, "learning_rate": 4.958349894959143e-05, "loss": 0.9042, "step": 5302 }, { "epoch": 2.4038984587488668, "grad_norm": 0.32326988315021454, "learning_rate": 4.9571208379900456e-05, "loss": 0.8722, "step": 5303 }, { "epoch": 2.4043517679057116, "grad_norm": 0.23852917856534878, "learning_rate": 4.955891685162483e-05, "loss": 0.8726, "step": 5304 }, { "epoch": 2.404805077062557, "grad_norm": 0.2916444459230298, "learning_rate": 4.9546624365995594e-05, "loss": 0.8761, "step": 5305 }, { "epoch": 2.4052583862194017, "grad_norm": 0.34670697920610843, "learning_rate": 4.953433092424386e-05, "loss": 0.8583, "step": 5306 }, { "epoch": 2.4057116953762465, "grad_norm": 0.42533635847561707, "learning_rate": 4.952203652760088e-05, "loss": 0.8598, "step": 5307 }, { "epoch": 2.406165004533092, "grad_norm": 0.47718051095318076, "learning_rate": 4.9509741177297945e-05, "loss": 0.8785, "step": 5308 }, { "epoch": 2.4066183136899366, "grad_norm": 0.5286572497346848, "learning_rate": 4.949744487456649e-05, "loss": 0.8922, "step": 5309 }, { "epoch": 2.4070716228467814, "grad_norm": 0.559625511273886, "learning_rate": 4.9485147620638e-05, "loss": 0.8713, "step": 5310 }, { "epoch": 2.4075249320036267, "grad_norm": 1.6649756358300813, "learning_rate": 4.947284941674411e-05, "loss": 0.9121, "step": 5311 }, { "epoch": 2.4079782411604715, "grad_norm": 0.4087308094746959, "learning_rate": 4.9460550264116517e-05, "loss": 0.8632, "step": 5312 }, { "epoch": 2.4084315503173164, "grad_norm": 0.9911140040831391, "learning_rate": 4.944825016398701e-05, "loss": 0.8726, "step": 5313 }, { "epoch": 2.408884859474161, "grad_norm": 1.3141164112103585, "learning_rate": 4.943594911758748e-05, "loss": 0.8734, "step": 5314 }, { "epoch": 2.4093381686310065, "grad_norm": 0.47921865905917294, "learning_rate": 4.942364712614991e-05, "loss": 0.8851, "step": 5315 }, { "epoch": 2.4097914777878513, "grad_norm": 0.9008367728581655, "learning_rate": 4.941134419090638e-05, "loss": 0.8833, "step": 5316 }, { "epoch": 2.410244786944696, "grad_norm": 1.5022530685709574, "learning_rate": 4.939904031308908e-05, "loss": 0.8905, "step": 5317 }, { "epoch": 2.4106980961015414, "grad_norm": 0.45219173804991947, "learning_rate": 4.938673549393026e-05, "loss": 0.8647, "step": 5318 }, { "epoch": 2.4111514052583862, "grad_norm": 1.5190519457857659, "learning_rate": 4.9374429734662296e-05, "loss": 0.8789, "step": 5319 }, { "epoch": 2.411604714415231, "grad_norm": 0.642820454199188, "learning_rate": 4.936212303651766e-05, "loss": 0.8785, "step": 5320 }, { "epoch": 2.4120580235720763, "grad_norm": 1.1985528901056135, "learning_rate": 4.934981540072887e-05, "loss": 0.8581, "step": 5321 }, { "epoch": 2.412511332728921, "grad_norm": 1.1159874551073923, "learning_rate": 4.933750682852859e-05, "loss": 0.9068, "step": 5322 }, { "epoch": 2.412964641885766, "grad_norm": 0.8901439040069259, "learning_rate": 4.9325197321149566e-05, "loss": 0.8874, "step": 5323 }, { "epoch": 2.4134179510426113, "grad_norm": 1.1768617584413759, "learning_rate": 4.931288687982462e-05, "loss": 0.8861, "step": 5324 }, { "epoch": 2.413871260199456, "grad_norm": 0.7111282791003398, "learning_rate": 4.930057550578668e-05, "loss": 0.8869, "step": 5325 }, { "epoch": 2.414324569356301, "grad_norm": 0.8333406004627978, "learning_rate": 4.9288263200268776e-05, "loss": 0.8676, "step": 5326 }, { "epoch": 2.4147778785131457, "grad_norm": 0.9604553888190404, "learning_rate": 4.9275949964504e-05, "loss": 0.8629, "step": 5327 }, { "epoch": 2.415231187669991, "grad_norm": 0.9755110167331937, "learning_rate": 4.9263635799725584e-05, "loss": 0.8815, "step": 5328 }, { "epoch": 2.415684496826836, "grad_norm": 0.7880914004844906, "learning_rate": 4.925132070716682e-05, "loss": 0.8991, "step": 5329 }, { "epoch": 2.4161378059836807, "grad_norm": 0.5859988383485621, "learning_rate": 4.923900468806109e-05, "loss": 0.8754, "step": 5330 }, { "epoch": 2.416591115140526, "grad_norm": 0.772890441123839, "learning_rate": 4.922668774364189e-05, "loss": 0.86, "step": 5331 }, { "epoch": 2.4170444242973708, "grad_norm": 0.7029177749858493, "learning_rate": 4.9214369875142796e-05, "loss": 0.8619, "step": 5332 }, { "epoch": 2.4174977334542156, "grad_norm": 0.7938140352681888, "learning_rate": 4.920205108379749e-05, "loss": 0.8808, "step": 5333 }, { "epoch": 2.417951042611061, "grad_norm": 1.0111763653622272, "learning_rate": 4.918973137083972e-05, "loss": 0.866, "step": 5334 }, { "epoch": 2.4184043517679057, "grad_norm": 0.7158813145121151, "learning_rate": 4.9177410737503336e-05, "loss": 0.8848, "step": 5335 }, { "epoch": 2.4188576609247505, "grad_norm": 0.5169104953053236, "learning_rate": 4.91650891850223e-05, "loss": 0.9032, "step": 5336 }, { "epoch": 2.419310970081596, "grad_norm": 0.642535143786341, "learning_rate": 4.915276671463064e-05, "loss": 0.8678, "step": 5337 }, { "epoch": 2.4197642792384406, "grad_norm": 0.6185615625612365, "learning_rate": 4.91404433275625e-05, "loss": 0.8888, "step": 5338 }, { "epoch": 2.4202175883952854, "grad_norm": 0.5914977776951622, "learning_rate": 4.912811902505209e-05, "loss": 0.8724, "step": 5339 }, { "epoch": 2.4206708975521307, "grad_norm": 0.5500711531683257, "learning_rate": 4.9115793808333736e-05, "loss": 0.8756, "step": 5340 }, { "epoch": 2.4211242067089755, "grad_norm": 0.43145703142421565, "learning_rate": 4.910346767864185e-05, "loss": 0.8712, "step": 5341 }, { "epoch": 2.4215775158658204, "grad_norm": 0.4143373357256692, "learning_rate": 4.909114063721092e-05, "loss": 0.872, "step": 5342 }, { "epoch": 2.4220308250226656, "grad_norm": 0.46466776143750127, "learning_rate": 4.9078812685275534e-05, "loss": 0.8866, "step": 5343 }, { "epoch": 2.4224841341795105, "grad_norm": 0.47229848964360904, "learning_rate": 4.906648382407037e-05, "loss": 0.8692, "step": 5344 }, { "epoch": 2.4229374433363553, "grad_norm": 0.5678833161912438, "learning_rate": 4.905415405483021e-05, "loss": 0.8942, "step": 5345 }, { "epoch": 2.4233907524932006, "grad_norm": 0.5818330989239985, "learning_rate": 4.904182337878991e-05, "loss": 0.8963, "step": 5346 }, { "epoch": 2.4238440616500454, "grad_norm": 0.3293747918885042, "learning_rate": 4.902949179718442e-05, "loss": 0.8734, "step": 5347 }, { "epoch": 2.4242973708068902, "grad_norm": 0.3659982104726782, "learning_rate": 4.90171593112488e-05, "loss": 0.8593, "step": 5348 }, { "epoch": 2.4247506799637355, "grad_norm": 0.48225680450863145, "learning_rate": 4.900482592221817e-05, "loss": 0.889, "step": 5349 }, { "epoch": 2.4252039891205803, "grad_norm": 0.3894044916082666, "learning_rate": 4.899249163132775e-05, "loss": 0.8826, "step": 5350 }, { "epoch": 2.425657298277425, "grad_norm": 0.38753294577507386, "learning_rate": 4.8980156439812875e-05, "loss": 0.8596, "step": 5351 }, { "epoch": 2.42611060743427, "grad_norm": 0.5611777184904975, "learning_rate": 4.896782034890894e-05, "loss": 0.8746, "step": 5352 }, { "epoch": 2.4265639165911153, "grad_norm": 0.48845870220857457, "learning_rate": 4.895548335985144e-05, "loss": 0.8805, "step": 5353 }, { "epoch": 2.42701722574796, "grad_norm": 0.37164298002219937, "learning_rate": 4.894314547387597e-05, "loss": 0.8578, "step": 5354 }, { "epoch": 2.427470534904805, "grad_norm": 0.42240882357049603, "learning_rate": 4.893080669221818e-05, "loss": 0.8677, "step": 5355 }, { "epoch": 2.42792384406165, "grad_norm": 0.44143384399888735, "learning_rate": 4.891846701611385e-05, "loss": 0.8668, "step": 5356 }, { "epoch": 2.428377153218495, "grad_norm": 0.3451278203750534, "learning_rate": 4.8906126446798855e-05, "loss": 0.8994, "step": 5357 }, { "epoch": 2.42883046237534, "grad_norm": 0.341554364707886, "learning_rate": 4.889378498550912e-05, "loss": 0.8709, "step": 5358 }, { "epoch": 2.429283771532185, "grad_norm": 0.3274947357226111, "learning_rate": 4.888144263348067e-05, "loss": 0.8751, "step": 5359 }, { "epoch": 2.42973708068903, "grad_norm": 0.33855700938694594, "learning_rate": 4.886909939194964e-05, "loss": 0.8876, "step": 5360 }, { "epoch": 2.4301903898458748, "grad_norm": 0.29724210776160914, "learning_rate": 4.885675526215226e-05, "loss": 0.8739, "step": 5361 }, { "epoch": 2.43064369900272, "grad_norm": 0.29074525376179555, "learning_rate": 4.884441024532479e-05, "loss": 0.9135, "step": 5362 }, { "epoch": 2.431097008159565, "grad_norm": 0.3466653007912508, "learning_rate": 4.883206434270363e-05, "loss": 0.867, "step": 5363 }, { "epoch": 2.4315503173164097, "grad_norm": 0.34054967214749576, "learning_rate": 4.88197175555253e-05, "loss": 0.8667, "step": 5364 }, { "epoch": 2.4320036264732545, "grad_norm": 0.3116610197961883, "learning_rate": 4.880736988502632e-05, "loss": 0.8761, "step": 5365 }, { "epoch": 2.4324569356301, "grad_norm": 0.3407841897692054, "learning_rate": 4.879502133244336e-05, "loss": 0.8735, "step": 5366 }, { "epoch": 2.4329102447869446, "grad_norm": 0.39237164153880744, "learning_rate": 4.878267189901317e-05, "loss": 0.8726, "step": 5367 }, { "epoch": 2.4333635539437894, "grad_norm": 0.3804511172518264, "learning_rate": 4.8770321585972575e-05, "loss": 0.8808, "step": 5368 }, { "epoch": 2.4338168631006347, "grad_norm": 0.3470490241838002, "learning_rate": 4.875797039455849e-05, "loss": 0.8648, "step": 5369 }, { "epoch": 2.4342701722574795, "grad_norm": 0.3427476056245708, "learning_rate": 4.874561832600794e-05, "loss": 0.8684, "step": 5370 }, { "epoch": 2.4347234814143244, "grad_norm": 0.28405800346058446, "learning_rate": 4.873326538155802e-05, "loss": 0.8787, "step": 5371 }, { "epoch": 2.4351767905711696, "grad_norm": 0.2595780592468819, "learning_rate": 4.8720911562445896e-05, "loss": 0.8675, "step": 5372 }, { "epoch": 2.4356300997280145, "grad_norm": 0.3359139785905444, "learning_rate": 4.870855686990885e-05, "loss": 0.853, "step": 5373 }, { "epoch": 2.4360834088848593, "grad_norm": 0.4306238310666022, "learning_rate": 4.8696201305184235e-05, "loss": 0.873, "step": 5374 }, { "epoch": 2.4365367180417046, "grad_norm": 0.4862118549873189, "learning_rate": 4.86838448695095e-05, "loss": 0.878, "step": 5375 }, { "epoch": 2.4369900271985494, "grad_norm": 0.48034229139730245, "learning_rate": 4.867148756412219e-05, "loss": 0.8803, "step": 5376 }, { "epoch": 2.4374433363553942, "grad_norm": 0.3873583583167069, "learning_rate": 4.8659129390259916e-05, "loss": 0.8854, "step": 5377 }, { "epoch": 2.4378966455122395, "grad_norm": 0.5287487006733117, "learning_rate": 4.864677034916037e-05, "loss": 0.8772, "step": 5378 }, { "epoch": 2.4383499546690843, "grad_norm": 0.33218601975714407, "learning_rate": 4.8634410442061376e-05, "loss": 0.8659, "step": 5379 }, { "epoch": 2.438803263825929, "grad_norm": 0.3013210483397206, "learning_rate": 4.862204967020079e-05, "loss": 0.8947, "step": 5380 }, { "epoch": 2.4392565729827744, "grad_norm": 0.3035605189394623, "learning_rate": 4.86096880348166e-05, "loss": 0.8759, "step": 5381 }, { "epoch": 2.4397098821396193, "grad_norm": 0.3365434412978244, "learning_rate": 4.859732553714684e-05, "loss": 0.8615, "step": 5382 }, { "epoch": 2.440163191296464, "grad_norm": 0.4440664398186987, "learning_rate": 4.858496217842967e-05, "loss": 0.8811, "step": 5383 }, { "epoch": 2.4406165004533094, "grad_norm": 0.46360078914224767, "learning_rate": 4.85725979599033e-05, "loss": 0.881, "step": 5384 }, { "epoch": 2.441069809610154, "grad_norm": 0.410080854575634, "learning_rate": 4.8560232882806046e-05, "loss": 0.8718, "step": 5385 }, { "epoch": 2.441523118766999, "grad_norm": 0.39224265698071087, "learning_rate": 4.854786694837632e-05, "loss": 0.8895, "step": 5386 }, { "epoch": 2.4419764279238443, "grad_norm": 0.3950127304075945, "learning_rate": 4.853550015785259e-05, "loss": 0.88, "step": 5387 }, { "epoch": 2.442429737080689, "grad_norm": 0.42424956479796566, "learning_rate": 4.8523132512473436e-05, "loss": 0.8742, "step": 5388 }, { "epoch": 2.442883046237534, "grad_norm": 0.4184611110679396, "learning_rate": 4.851076401347751e-05, "loss": 0.8618, "step": 5389 }, { "epoch": 2.443336355394379, "grad_norm": 0.3547638385885123, "learning_rate": 4.8498394662103564e-05, "loss": 0.8997, "step": 5390 }, { "epoch": 2.443789664551224, "grad_norm": 0.3320813079519983, "learning_rate": 4.8486024459590416e-05, "loss": 0.8794, "step": 5391 }, { "epoch": 2.444242973708069, "grad_norm": 0.4252818055953489, "learning_rate": 4.847365340717698e-05, "loss": 0.879, "step": 5392 }, { "epoch": 2.4446962828649137, "grad_norm": 0.5365666754563821, "learning_rate": 4.846128150610224e-05, "loss": 0.8942, "step": 5393 }, { "epoch": 2.445149592021759, "grad_norm": 0.6231584347487227, "learning_rate": 4.84489087576053e-05, "loss": 0.8594, "step": 5394 }, { "epoch": 2.445602901178604, "grad_norm": 0.545890577061619, "learning_rate": 4.843653516292531e-05, "loss": 0.8707, "step": 5395 }, { "epoch": 2.4460562103354486, "grad_norm": 0.5209378103497359, "learning_rate": 4.842416072330152e-05, "loss": 0.8676, "step": 5396 }, { "epoch": 2.446509519492294, "grad_norm": 0.47848765856981473, "learning_rate": 4.841178543997329e-05, "loss": 0.8827, "step": 5397 }, { "epoch": 2.4469628286491387, "grad_norm": 0.3405159452648545, "learning_rate": 4.839940931418002e-05, "loss": 0.8602, "step": 5398 }, { "epoch": 2.4474161378059835, "grad_norm": 0.3401262770106382, "learning_rate": 4.838703234716122e-05, "loss": 0.8857, "step": 5399 }, { "epoch": 2.447869446962829, "grad_norm": 0.34992444030240655, "learning_rate": 4.837465454015649e-05, "loss": 0.852, "step": 5400 }, { "epoch": 2.4483227561196736, "grad_norm": 0.3676772283001786, "learning_rate": 4.8362275894405495e-05, "loss": 0.8811, "step": 5401 }, { "epoch": 2.4487760652765185, "grad_norm": 0.3456759166161624, "learning_rate": 4.834989641114799e-05, "loss": 0.8448, "step": 5402 }, { "epoch": 2.4492293744333633, "grad_norm": 0.41459541169515207, "learning_rate": 4.833751609162382e-05, "loss": 0.8832, "step": 5403 }, { "epoch": 2.4496826835902086, "grad_norm": 0.4688391009358986, "learning_rate": 4.8325134937072904e-05, "loss": 0.9057, "step": 5404 }, { "epoch": 2.4501359927470534, "grad_norm": 0.5061400858029779, "learning_rate": 4.831275294873527e-05, "loss": 0.8746, "step": 5405 }, { "epoch": 2.4505893019038982, "grad_norm": 0.505192950254817, "learning_rate": 4.8300370127850984e-05, "loss": 0.8698, "step": 5406 }, { "epoch": 2.4510426110607435, "grad_norm": 0.4935309278024096, "learning_rate": 4.828798647566024e-05, "loss": 0.8602, "step": 5407 }, { "epoch": 2.4514959202175883, "grad_norm": 0.37554617537535684, "learning_rate": 4.827560199340329e-05, "loss": 0.8821, "step": 5408 }, { "epoch": 2.451949229374433, "grad_norm": 0.28190709710888806, "learning_rate": 4.8263216682320485e-05, "loss": 0.8752, "step": 5409 }, { "epoch": 2.4524025385312784, "grad_norm": 0.2631669580783247, "learning_rate": 4.825083054365224e-05, "loss": 0.8818, "step": 5410 }, { "epoch": 2.4528558476881233, "grad_norm": 0.3300651620037356, "learning_rate": 4.823844357863907e-05, "loss": 0.8855, "step": 5411 }, { "epoch": 2.453309156844968, "grad_norm": 0.38895715828725225, "learning_rate": 4.8226055788521564e-05, "loss": 0.8782, "step": 5412 }, { "epoch": 2.4537624660018134, "grad_norm": 0.3903585905537491, "learning_rate": 4.8213667174540394e-05, "loss": 0.8699, "step": 5413 }, { "epoch": 2.454215775158658, "grad_norm": 0.4247089499700825, "learning_rate": 4.820127773793631e-05, "loss": 0.8992, "step": 5414 }, { "epoch": 2.454669084315503, "grad_norm": 0.40510595909696406, "learning_rate": 4.818888747995016e-05, "loss": 0.867, "step": 5415 }, { "epoch": 2.4551223934723483, "grad_norm": 0.42302791519979527, "learning_rate": 4.817649640182286e-05, "loss": 0.8823, "step": 5416 }, { "epoch": 2.455575702629193, "grad_norm": 0.44567836538347566, "learning_rate": 4.816410450479542e-05, "loss": 0.8695, "step": 5417 }, { "epoch": 2.456029011786038, "grad_norm": 0.3938033603033576, "learning_rate": 4.815171179010893e-05, "loss": 0.9063, "step": 5418 }, { "epoch": 2.456482320942883, "grad_norm": 0.25421816336664455, "learning_rate": 4.8139318259004526e-05, "loss": 0.851, "step": 5419 }, { "epoch": 2.456935630099728, "grad_norm": 0.2863806000042518, "learning_rate": 4.81269239127235e-05, "loss": 0.8722, "step": 5420 }, { "epoch": 2.457388939256573, "grad_norm": 0.3649254161988491, "learning_rate": 4.811452875250714e-05, "loss": 0.8733, "step": 5421 }, { "epoch": 2.457842248413418, "grad_norm": 0.3583679898444726, "learning_rate": 4.810213277959689e-05, "loss": 0.8901, "step": 5422 }, { "epoch": 2.458295557570263, "grad_norm": 0.37969146290134514, "learning_rate": 4.8089735995234227e-05, "loss": 0.86, "step": 5423 }, { "epoch": 2.458748866727108, "grad_norm": 0.310030674680303, "learning_rate": 4.807733840066072e-05, "loss": 0.8846, "step": 5424 }, { "epoch": 2.459202175883953, "grad_norm": 1.5011075140711663, "learning_rate": 4.806493999711804e-05, "loss": 0.8832, "step": 5425 }, { "epoch": 2.459655485040798, "grad_norm": 0.312930200426933, "learning_rate": 4.805254078584791e-05, "loss": 0.8851, "step": 5426 }, { "epoch": 2.4601087941976427, "grad_norm": 0.44295252042800637, "learning_rate": 4.8040140768092164e-05, "loss": 0.883, "step": 5427 }, { "epoch": 2.460562103354488, "grad_norm": 0.5438805813237402, "learning_rate": 4.8027739945092685e-05, "loss": 0.8675, "step": 5428 }, { "epoch": 2.461015412511333, "grad_norm": 0.4953883978318142, "learning_rate": 4.801533831809147e-05, "loss": 0.8634, "step": 5429 }, { "epoch": 2.4614687216681777, "grad_norm": 0.41036957985936284, "learning_rate": 4.800293588833056e-05, "loss": 0.8852, "step": 5430 }, { "epoch": 2.4619220308250225, "grad_norm": 0.32706401460397166, "learning_rate": 4.799053265705209e-05, "loss": 0.8809, "step": 5431 }, { "epoch": 2.4623753399818678, "grad_norm": 0.3676703315097909, "learning_rate": 4.7978128625498294e-05, "loss": 0.8776, "step": 5432 }, { "epoch": 2.4628286491387126, "grad_norm": 0.5281786060397857, "learning_rate": 4.796572379491148e-05, "loss": 0.859, "step": 5433 }, { "epoch": 2.4632819582955574, "grad_norm": 0.5748808348084197, "learning_rate": 4.7953318166534006e-05, "loss": 0.879, "step": 5434 }, { "epoch": 2.4637352674524027, "grad_norm": 0.5439822147300265, "learning_rate": 4.7940911741608344e-05, "loss": 0.8844, "step": 5435 }, { "epoch": 2.4641885766092475, "grad_norm": 0.4732155872194248, "learning_rate": 4.792850452137704e-05, "loss": 0.8725, "step": 5436 }, { "epoch": 2.4646418857660923, "grad_norm": 0.44541560851412165, "learning_rate": 4.79160965070827e-05, "loss": 0.8604, "step": 5437 }, { "epoch": 2.4650951949229376, "grad_norm": 0.42207221718055365, "learning_rate": 4.7903687699968035e-05, "loss": 0.8641, "step": 5438 }, { "epoch": 2.4655485040797824, "grad_norm": 0.44645801462719004, "learning_rate": 4.789127810127582e-05, "loss": 0.8882, "step": 5439 }, { "epoch": 2.4660018132366273, "grad_norm": 0.4646420383895577, "learning_rate": 4.78788677122489e-05, "loss": 0.8876, "step": 5440 }, { "epoch": 2.4664551223934725, "grad_norm": 0.4679476779051183, "learning_rate": 4.7866456534130235e-05, "loss": 0.8899, "step": 5441 }, { "epoch": 2.4669084315503174, "grad_norm": 0.2950357735465817, "learning_rate": 4.785404456816282e-05, "loss": 0.867, "step": 5442 }, { "epoch": 2.467361740707162, "grad_norm": 0.3245051114115228, "learning_rate": 4.784163181558976e-05, "loss": 0.8839, "step": 5443 }, { "epoch": 2.467815049864007, "grad_norm": 0.43474999744294446, "learning_rate": 4.782921827765423e-05, "loss": 0.8586, "step": 5444 }, { "epoch": 2.4682683590208523, "grad_norm": 0.4369600093457574, "learning_rate": 4.781680395559948e-05, "loss": 0.8739, "step": 5445 }, { "epoch": 2.468721668177697, "grad_norm": 0.3480210349125851, "learning_rate": 4.7804388850668846e-05, "loss": 0.8923, "step": 5446 }, { "epoch": 2.469174977334542, "grad_norm": 0.2755103063838401, "learning_rate": 4.779197296410573e-05, "loss": 0.8637, "step": 5447 }, { "epoch": 2.469628286491387, "grad_norm": 0.3281488805512811, "learning_rate": 4.777955629715362e-05, "loss": 0.876, "step": 5448 }, { "epoch": 2.470081595648232, "grad_norm": 0.4167901063426073, "learning_rate": 4.776713885105608e-05, "loss": 0.8618, "step": 5449 }, { "epoch": 2.470534904805077, "grad_norm": 0.5129828867980764, "learning_rate": 4.7754720627056756e-05, "loss": 0.8967, "step": 5450 }, { "epoch": 2.470988213961922, "grad_norm": 0.4922354772385084, "learning_rate": 4.774230162639938e-05, "loss": 0.8706, "step": 5451 }, { "epoch": 2.471441523118767, "grad_norm": 0.40424830764843783, "learning_rate": 4.772988185032772e-05, "loss": 0.9128, "step": 5452 }, { "epoch": 2.471894832275612, "grad_norm": 0.3680625147931365, "learning_rate": 4.77174613000857e-05, "loss": 0.8558, "step": 5453 }, { "epoch": 2.472348141432457, "grad_norm": 0.42874331181632, "learning_rate": 4.770503997691723e-05, "loss": 0.8751, "step": 5454 }, { "epoch": 2.472801450589302, "grad_norm": 0.41775099228023677, "learning_rate": 4.769261788206638e-05, "loss": 0.8854, "step": 5455 }, { "epoch": 2.4732547597461467, "grad_norm": 0.37562332751480626, "learning_rate": 4.768019501677723e-05, "loss": 0.8804, "step": 5456 }, { "epoch": 2.473708068902992, "grad_norm": 0.35071809313048974, "learning_rate": 4.7667771382293974e-05, "loss": 0.8942, "step": 5457 }, { "epoch": 2.474161378059837, "grad_norm": 0.30103676812697455, "learning_rate": 4.765534697986088e-05, "loss": 0.8646, "step": 5458 }, { "epoch": 2.4746146872166817, "grad_norm": 0.293564168303737, "learning_rate": 4.76429218107223e-05, "loss": 0.8726, "step": 5459 }, { "epoch": 2.475067996373527, "grad_norm": 0.3678766358004095, "learning_rate": 4.763049587612263e-05, "loss": 0.8717, "step": 5460 }, { "epoch": 2.4755213055303718, "grad_norm": 0.44998499702672645, "learning_rate": 4.761806917730637e-05, "loss": 0.8771, "step": 5461 }, { "epoch": 2.4759746146872166, "grad_norm": 0.5032551074736745, "learning_rate": 4.76056417155181e-05, "loss": 0.8602, "step": 5462 }, { "epoch": 2.476427923844062, "grad_norm": 0.43766708010434824, "learning_rate": 4.759321349200245e-05, "loss": 0.87, "step": 5463 }, { "epoch": 2.4768812330009067, "grad_norm": 0.3160371076103249, "learning_rate": 4.7580784508004154e-05, "loss": 0.8755, "step": 5464 }, { "epoch": 2.4773345421577515, "grad_norm": 0.24990572304324826, "learning_rate": 4.7568354764768014e-05, "loss": 0.8598, "step": 5465 }, { "epoch": 2.477787851314597, "grad_norm": 0.25947772495412774, "learning_rate": 4.75559242635389e-05, "loss": 0.8752, "step": 5466 }, { "epoch": 2.4782411604714416, "grad_norm": 0.25761731341511684, "learning_rate": 4.754349300556177e-05, "loss": 0.8628, "step": 5467 }, { "epoch": 2.4786944696282864, "grad_norm": 0.2625332158912322, "learning_rate": 4.753106099208165e-05, "loss": 0.868, "step": 5468 }, { "epoch": 2.4791477787851317, "grad_norm": 0.3220258317203931, "learning_rate": 4.751862822434362e-05, "loss": 0.8802, "step": 5469 }, { "epoch": 2.4796010879419765, "grad_norm": 0.3148641812760161, "learning_rate": 4.7506194703592884e-05, "loss": 0.8933, "step": 5470 }, { "epoch": 2.4800543970988214, "grad_norm": 0.3024835784488438, "learning_rate": 4.749376043107469e-05, "loss": 0.8887, "step": 5471 }, { "epoch": 2.480507706255666, "grad_norm": 0.2555799424149101, "learning_rate": 4.748132540803436e-05, "loss": 0.8691, "step": 5472 }, { "epoch": 2.4809610154125115, "grad_norm": 0.22552656964622808, "learning_rate": 4.746888963571731e-05, "loss": 0.8647, "step": 5473 }, { "epoch": 2.4814143245693563, "grad_norm": 0.2998076257992529, "learning_rate": 4.745645311536901e-05, "loss": 0.8665, "step": 5474 }, { "epoch": 2.481867633726201, "grad_norm": 0.3469190766474229, "learning_rate": 4.744401584823502e-05, "loss": 0.9133, "step": 5475 }, { "epoch": 2.4823209428830464, "grad_norm": 0.35497172980146496, "learning_rate": 4.743157783556096e-05, "loss": 0.8715, "step": 5476 }, { "epoch": 2.482774252039891, "grad_norm": 0.3790858407956024, "learning_rate": 4.741913907859255e-05, "loss": 0.8861, "step": 5477 }, { "epoch": 2.483227561196736, "grad_norm": 0.40301441974026253, "learning_rate": 4.740669957857555e-05, "loss": 0.8924, "step": 5478 }, { "epoch": 2.4836808703535813, "grad_norm": 0.39336991003161526, "learning_rate": 4.739425933675581e-05, "loss": 0.8917, "step": 5479 }, { "epoch": 2.484134179510426, "grad_norm": 0.38153371211692816, "learning_rate": 4.738181835437927e-05, "loss": 0.8899, "step": 5480 }, { "epoch": 2.484587488667271, "grad_norm": 0.4126393039473719, "learning_rate": 4.736937663269193e-05, "loss": 0.8971, "step": 5481 }, { "epoch": 2.485040797824116, "grad_norm": 0.45607865795598984, "learning_rate": 4.735693417293986e-05, "loss": 0.8778, "step": 5482 }, { "epoch": 2.485494106980961, "grad_norm": 0.47118468013311854, "learning_rate": 4.734449097636922e-05, "loss": 0.8835, "step": 5483 }, { "epoch": 2.485947416137806, "grad_norm": 0.45983145616839943, "learning_rate": 4.733204704422621e-05, "loss": 0.8619, "step": 5484 }, { "epoch": 2.4864007252946507, "grad_norm": 0.3917821388370337, "learning_rate": 4.731960237775715e-05, "loss": 0.8849, "step": 5485 }, { "epoch": 2.486854034451496, "grad_norm": 0.3489611217908264, "learning_rate": 4.73071569782084e-05, "loss": 0.8851, "step": 5486 }, { "epoch": 2.487307343608341, "grad_norm": 0.3717532745514684, "learning_rate": 4.72947108468264e-05, "loss": 0.8737, "step": 5487 }, { "epoch": 2.4877606527651857, "grad_norm": 0.34136444150808626, "learning_rate": 4.728226398485767e-05, "loss": 0.8751, "step": 5488 }, { "epoch": 2.488213961922031, "grad_norm": 0.39439871052056846, "learning_rate": 4.726981639354879e-05, "loss": 0.8973, "step": 5489 }, { "epoch": 2.4886672710788758, "grad_norm": 0.3911914411095558, "learning_rate": 4.725736807414645e-05, "loss": 0.8807, "step": 5490 }, { "epoch": 2.4891205802357206, "grad_norm": 0.3717439019880251, "learning_rate": 4.724491902789736e-05, "loss": 0.876, "step": 5491 }, { "epoch": 2.489573889392566, "grad_norm": 0.30808931967290365, "learning_rate": 4.723246925604834e-05, "loss": 0.8831, "step": 5492 }, { "epoch": 2.4900271985494107, "grad_norm": 0.29071206896601326, "learning_rate": 4.722001875984626e-05, "loss": 0.8769, "step": 5493 }, { "epoch": 2.4904805077062555, "grad_norm": 0.2679571443367385, "learning_rate": 4.720756754053809e-05, "loss": 0.8663, "step": 5494 }, { "epoch": 2.490933816863101, "grad_norm": 0.30132822633506945, "learning_rate": 4.7195115599370846e-05, "loss": 0.8805, "step": 5495 }, { "epoch": 2.4913871260199456, "grad_norm": 0.3110757895295651, "learning_rate": 4.718266293759163e-05, "loss": 0.8818, "step": 5496 }, { "epoch": 2.4918404351767904, "grad_norm": 0.4133104171474226, "learning_rate": 4.717020955644762e-05, "loss": 0.866, "step": 5497 }, { "epoch": 2.4922937443336357, "grad_norm": 0.5135775580308751, "learning_rate": 4.715775545718603e-05, "loss": 0.8738, "step": 5498 }, { "epoch": 2.4927470534904805, "grad_norm": 0.5318109087931773, "learning_rate": 4.7145300641054197e-05, "loss": 0.8877, "step": 5499 }, { "epoch": 2.4932003626473254, "grad_norm": 0.5533407222555866, "learning_rate": 4.713284510929952e-05, "loss": 0.8814, "step": 5500 }, { "epoch": 2.4936536718041706, "grad_norm": 0.5875661873525808, "learning_rate": 4.712038886316944e-05, "loss": 0.8766, "step": 5501 }, { "epoch": 2.4941069809610155, "grad_norm": 0.6639789916128458, "learning_rate": 4.7107931903911475e-05, "loss": 0.8826, "step": 5502 }, { "epoch": 2.4945602901178603, "grad_norm": 0.5821591436548675, "learning_rate": 4.709547423277325e-05, "loss": 0.8675, "step": 5503 }, { "epoch": 2.4950135992747056, "grad_norm": 0.4505835361873875, "learning_rate": 4.708301585100243e-05, "loss": 0.8646, "step": 5504 }, { "epoch": 2.4954669084315504, "grad_norm": 0.29680770578957855, "learning_rate": 4.7070556759846764e-05, "loss": 0.8658, "step": 5505 }, { "epoch": 2.495920217588395, "grad_norm": 0.2998148001002336, "learning_rate": 4.705809696055404e-05, "loss": 0.9021, "step": 5506 }, { "epoch": 2.4963735267452405, "grad_norm": 0.41677096812859243, "learning_rate": 4.704563645437217e-05, "loss": 0.8825, "step": 5507 }, { "epoch": 2.4968268359020853, "grad_norm": 0.5364753342296026, "learning_rate": 4.7033175242549105e-05, "loss": 0.8808, "step": 5508 }, { "epoch": 2.49728014505893, "grad_norm": 0.628234727118295, "learning_rate": 4.702071332633286e-05, "loss": 0.8505, "step": 5509 }, { "epoch": 2.497733454215775, "grad_norm": 0.6002009912184146, "learning_rate": 4.700825070697154e-05, "loss": 0.8816, "step": 5510 }, { "epoch": 2.4981867633726202, "grad_norm": 0.5461104230143402, "learning_rate": 4.6995787385713325e-05, "loss": 0.8822, "step": 5511 }, { "epoch": 2.498640072529465, "grad_norm": 0.5037346946584944, "learning_rate": 4.6983323363806435e-05, "loss": 0.8681, "step": 5512 }, { "epoch": 2.49909338168631, "grad_norm": 0.44658140088784437, "learning_rate": 4.6970858642499196e-05, "loss": 0.8556, "step": 5513 }, { "epoch": 2.499546690843155, "grad_norm": 0.3862959815684738, "learning_rate": 4.6958393223039965e-05, "loss": 0.8736, "step": 5514 }, { "epoch": 2.5, "grad_norm": 0.38119942791454575, "learning_rate": 4.694592710667723e-05, "loss": 0.8811, "step": 5515 }, { "epoch": 2.500453309156845, "grad_norm": 0.36826897631871647, "learning_rate": 4.693346029465945e-05, "loss": 0.8882, "step": 5516 }, { "epoch": 2.50090661831369, "grad_norm": 0.3405490447380599, "learning_rate": 4.692099278823525e-05, "loss": 0.8742, "step": 5517 }, { "epoch": 2.501359927470535, "grad_norm": 0.39405322381910607, "learning_rate": 4.69085245886533e-05, "loss": 0.8954, "step": 5518 }, { "epoch": 2.5018132366273798, "grad_norm": 0.40699448101099345, "learning_rate": 4.6896055697162295e-05, "loss": 0.8741, "step": 5519 }, { "epoch": 2.5022665457842246, "grad_norm": 0.3886812539138604, "learning_rate": 4.688358611501104e-05, "loss": 0.8969, "step": 5520 }, { "epoch": 2.50271985494107, "grad_norm": 0.3225494889114954, "learning_rate": 4.687111584344841e-05, "loss": 0.8846, "step": 5521 }, { "epoch": 2.5031731640979147, "grad_norm": 0.34464237842229317, "learning_rate": 4.685864488372334e-05, "loss": 0.8627, "step": 5522 }, { "epoch": 2.5036264732547595, "grad_norm": 0.3536008331243863, "learning_rate": 4.684617323708482e-05, "loss": 0.872, "step": 5523 }, { "epoch": 2.504079782411605, "grad_norm": 0.3630754404561008, "learning_rate": 4.683370090478193e-05, "loss": 0.8741, "step": 5524 }, { "epoch": 2.5045330915684496, "grad_norm": 0.4737337554348843, "learning_rate": 4.682122788806383e-05, "loss": 0.8921, "step": 5525 }, { "epoch": 2.5049864007252944, "grad_norm": 0.4261692111319818, "learning_rate": 4.6808754188179685e-05, "loss": 0.8865, "step": 5526 }, { "epoch": 2.5054397098821397, "grad_norm": 0.5070187530666659, "learning_rate": 4.679627980637881e-05, "loss": 0.8937, "step": 5527 }, { "epoch": 2.5058930190389845, "grad_norm": 0.4031330180568247, "learning_rate": 4.678380474391053e-05, "loss": 0.8759, "step": 5528 }, { "epoch": 2.5063463281958294, "grad_norm": 0.3945110724343244, "learning_rate": 4.677132900202427e-05, "loss": 0.8974, "step": 5529 }, { "epoch": 2.5067996373526746, "grad_norm": 0.3551785048687741, "learning_rate": 4.675885258196952e-05, "loss": 0.9008, "step": 5530 }, { "epoch": 2.5072529465095195, "grad_norm": 0.39029555094330204, "learning_rate": 4.674637548499581e-05, "loss": 0.8747, "step": 5531 }, { "epoch": 2.5077062556663643, "grad_norm": 0.39420885741663986, "learning_rate": 4.673389771235277e-05, "loss": 0.8768, "step": 5532 }, { "epoch": 2.5081595648232096, "grad_norm": 0.33025379409806477, "learning_rate": 4.6721419265290084e-05, "loss": 0.8845, "step": 5533 }, { "epoch": 2.5086128739800544, "grad_norm": 0.30127496197978565, "learning_rate": 4.6708940145057515e-05, "loss": 0.8898, "step": 5534 }, { "epoch": 2.509066183136899, "grad_norm": 0.5433875325828915, "learning_rate": 4.669646035290486e-05, "loss": 0.8973, "step": 5535 }, { "epoch": 2.5095194922937445, "grad_norm": 0.41300550016795223, "learning_rate": 4.668397989008202e-05, "loss": 0.8613, "step": 5536 }, { "epoch": 2.5099728014505893, "grad_norm": 0.30808650016656136, "learning_rate": 4.667149875783895e-05, "loss": 0.8813, "step": 5537 }, { "epoch": 2.510426110607434, "grad_norm": 0.32639035339028305, "learning_rate": 4.665901695742568e-05, "loss": 0.8996, "step": 5538 }, { "epoch": 2.5108794197642794, "grad_norm": 0.30673523523699725, "learning_rate": 4.664653449009228e-05, "loss": 0.8595, "step": 5539 }, { "epoch": 2.5113327289211242, "grad_norm": 0.36308834244712135, "learning_rate": 4.6634051357088925e-05, "loss": 0.8873, "step": 5540 }, { "epoch": 2.511786038077969, "grad_norm": 0.35675671574436957, "learning_rate": 4.662156755966582e-05, "loss": 0.8619, "step": 5541 }, { "epoch": 2.5122393472348143, "grad_norm": 0.3436511527610942, "learning_rate": 4.660908309907328e-05, "loss": 0.8845, "step": 5542 }, { "epoch": 2.512692656391659, "grad_norm": 0.3244206907182932, "learning_rate": 4.6596597976561624e-05, "loss": 0.8739, "step": 5543 }, { "epoch": 2.513145965548504, "grad_norm": 0.329695856707433, "learning_rate": 4.6584112193381314e-05, "loss": 0.8727, "step": 5544 }, { "epoch": 2.5135992747053493, "grad_norm": 0.34081478288744177, "learning_rate": 4.657162575078281e-05, "loss": 0.8898, "step": 5545 }, { "epoch": 2.514052583862194, "grad_norm": 0.34989283637979945, "learning_rate": 4.655913865001667e-05, "loss": 0.8767, "step": 5546 }, { "epoch": 2.514505893019039, "grad_norm": 0.34234840844094755, "learning_rate": 4.6546650892333526e-05, "loss": 0.8836, "step": 5547 }, { "epoch": 2.514959202175884, "grad_norm": 0.34897618193555013, "learning_rate": 4.653416247898404e-05, "loss": 0.8887, "step": 5548 }, { "epoch": 2.515412511332729, "grad_norm": 0.296993339775738, "learning_rate": 4.6521673411219e-05, "loss": 0.8834, "step": 5549 }, { "epoch": 2.515865820489574, "grad_norm": 0.2955291006064988, "learning_rate": 4.650918369028918e-05, "loss": 0.8945, "step": 5550 }, { "epoch": 2.516319129646419, "grad_norm": 0.2959504008900902, "learning_rate": 4.649669331744548e-05, "loss": 0.8531, "step": 5551 }, { "epoch": 2.516772438803264, "grad_norm": 0.28612470879017154, "learning_rate": 4.648420229393886e-05, "loss": 0.8698, "step": 5552 }, { "epoch": 2.517225747960109, "grad_norm": 0.28997928422845576, "learning_rate": 4.647171062102034e-05, "loss": 0.8821, "step": 5553 }, { "epoch": 2.5176790571169536, "grad_norm": 0.28968108489190825, "learning_rate": 4.6459218299940967e-05, "loss": 0.8813, "step": 5554 }, { "epoch": 2.518132366273799, "grad_norm": 0.2676665565152899, "learning_rate": 4.6446725331951885e-05, "loss": 0.8771, "step": 5555 }, { "epoch": 2.5185856754306437, "grad_norm": 0.24592000614192072, "learning_rate": 4.643423171830433e-05, "loss": 0.8723, "step": 5556 }, { "epoch": 2.5190389845874885, "grad_norm": 0.26428602248608424, "learning_rate": 4.642173746024956e-05, "loss": 0.8762, "step": 5557 }, { "epoch": 2.5194922937443334, "grad_norm": 0.284782028266064, "learning_rate": 4.64092425590389e-05, "loss": 0.865, "step": 5558 }, { "epoch": 2.5199456029011786, "grad_norm": 0.3428433157612863, "learning_rate": 4.639674701592376e-05, "loss": 0.867, "step": 5559 }, { "epoch": 2.5203989120580235, "grad_norm": 0.4431863878290127, "learning_rate": 4.6384250832155626e-05, "loss": 0.8937, "step": 5560 }, { "epoch": 2.5208522212148683, "grad_norm": 0.4036896979817021, "learning_rate": 4.637175400898599e-05, "loss": 0.855, "step": 5561 }, { "epoch": 2.5213055303717136, "grad_norm": 0.3582166803819985, "learning_rate": 4.635925654766647e-05, "loss": 0.8693, "step": 5562 }, { "epoch": 2.5217588395285584, "grad_norm": 0.35901143231009336, "learning_rate": 4.6346758449448726e-05, "loss": 0.8807, "step": 5563 }, { "epoch": 2.522212148685403, "grad_norm": 0.382698198700608, "learning_rate": 4.6334259715584454e-05, "loss": 0.8623, "step": 5564 }, { "epoch": 2.5226654578422485, "grad_norm": 0.3285743419884047, "learning_rate": 4.6321760347325465e-05, "loss": 0.8808, "step": 5565 }, { "epoch": 2.5231187669990933, "grad_norm": 0.3131369475548153, "learning_rate": 4.630926034592359e-05, "loss": 0.8964, "step": 5566 }, { "epoch": 2.523572076155938, "grad_norm": 0.373686441087009, "learning_rate": 4.6296759712630755e-05, "loss": 0.8502, "step": 5567 }, { "epoch": 2.5240253853127834, "grad_norm": 0.3791472264793105, "learning_rate": 4.6284258448698924e-05, "loss": 0.8559, "step": 5568 }, { "epoch": 2.5244786944696282, "grad_norm": 0.41782124682304594, "learning_rate": 4.627175655538015e-05, "loss": 0.8758, "step": 5569 }, { "epoch": 2.524932003626473, "grad_norm": 0.4210177110629557, "learning_rate": 4.6259254033926516e-05, "loss": 0.8685, "step": 5570 }, { "epoch": 2.5253853127833183, "grad_norm": 0.4232857505938946, "learning_rate": 4.624675088559019e-05, "loss": 0.9063, "step": 5571 }, { "epoch": 2.525838621940163, "grad_norm": 0.48432957743782346, "learning_rate": 4.623424711162342e-05, "loss": 0.8755, "step": 5572 }, { "epoch": 2.526291931097008, "grad_norm": 0.47967696248391206, "learning_rate": 4.622174271327847e-05, "loss": 0.8728, "step": 5573 }, { "epoch": 2.5267452402538533, "grad_norm": 0.432739144460773, "learning_rate": 4.620923769180771e-05, "loss": 0.8828, "step": 5574 }, { "epoch": 2.527198549410698, "grad_norm": 0.3608612249399145, "learning_rate": 4.6196732048463544e-05, "loss": 0.8705, "step": 5575 }, { "epoch": 2.527651858567543, "grad_norm": 0.38469975183438887, "learning_rate": 4.618422578449845e-05, "loss": 0.862, "step": 5576 }, { "epoch": 2.528105167724388, "grad_norm": 0.41769118796432264, "learning_rate": 4.617171890116495e-05, "loss": 0.8672, "step": 5577 }, { "epoch": 2.528558476881233, "grad_norm": 0.4622651361591734, "learning_rate": 4.615921139971569e-05, "loss": 0.8859, "step": 5578 }, { "epoch": 2.529011786038078, "grad_norm": 0.47186903918049644, "learning_rate": 4.614670328140329e-05, "loss": 0.8727, "step": 5579 }, { "epoch": 2.529465095194923, "grad_norm": 0.4169168798427771, "learning_rate": 4.61341945474805e-05, "loss": 0.8631, "step": 5580 }, { "epoch": 2.529918404351768, "grad_norm": 0.4754082461698161, "learning_rate": 4.6121685199200094e-05, "loss": 0.8815, "step": 5581 }, { "epoch": 2.530371713508613, "grad_norm": 0.6112798373888094, "learning_rate": 4.610917523781493e-05, "loss": 0.8786, "step": 5582 }, { "epoch": 2.530825022665458, "grad_norm": 0.6819981117926267, "learning_rate": 4.609666466457791e-05, "loss": 0.8795, "step": 5583 }, { "epoch": 2.531278331822303, "grad_norm": 0.6127135309259948, "learning_rate": 4.6084153480742e-05, "loss": 0.8641, "step": 5584 }, { "epoch": 2.5317316409791477, "grad_norm": 0.5289093731246506, "learning_rate": 4.607164168756024e-05, "loss": 0.885, "step": 5585 }, { "epoch": 2.532184950135993, "grad_norm": 0.5376356447856495, "learning_rate": 4.605912928628572e-05, "loss": 0.8729, "step": 5586 }, { "epoch": 2.532638259292838, "grad_norm": 0.5581348661960093, "learning_rate": 4.6046616278171594e-05, "loss": 0.8895, "step": 5587 }, { "epoch": 2.5330915684496826, "grad_norm": 0.5920086191151377, "learning_rate": 4.603410266447109e-05, "loss": 0.8749, "step": 5588 }, { "epoch": 2.533544877606528, "grad_norm": 0.5826138046888624, "learning_rate": 4.602158844643746e-05, "loss": 0.8744, "step": 5589 }, { "epoch": 2.5339981867633727, "grad_norm": 0.45993416443986374, "learning_rate": 4.600907362532405e-05, "loss": 0.8813, "step": 5590 }, { "epoch": 2.5344514959202176, "grad_norm": 0.3810068820748897, "learning_rate": 4.5996558202384256e-05, "loss": 0.9037, "step": 5591 }, { "epoch": 2.5349048050770624, "grad_norm": 0.3671513776779765, "learning_rate": 4.598404217887153e-05, "loss": 0.8569, "step": 5592 }, { "epoch": 2.5353581142339077, "grad_norm": 0.37105173979275385, "learning_rate": 4.597152555603939e-05, "loss": 0.8828, "step": 5593 }, { "epoch": 2.5358114233907525, "grad_norm": 0.46734933048562327, "learning_rate": 4.595900833514144e-05, "loss": 0.8655, "step": 5594 }, { "epoch": 2.5362647325475973, "grad_norm": 0.40118874275083755, "learning_rate": 4.594649051743126e-05, "loss": 0.8682, "step": 5595 }, { "epoch": 2.5367180417044426, "grad_norm": 0.3717522587017892, "learning_rate": 4.59339721041626e-05, "loss": 0.8844, "step": 5596 }, { "epoch": 2.5371713508612874, "grad_norm": 0.47801243039621416, "learning_rate": 4.592145309658918e-05, "loss": 0.8754, "step": 5597 }, { "epoch": 2.5376246600181322, "grad_norm": 0.41322232942674636, "learning_rate": 4.590893349596484e-05, "loss": 0.8619, "step": 5598 }, { "epoch": 2.538077969174977, "grad_norm": 0.35177219801734044, "learning_rate": 4.589641330354344e-05, "loss": 0.8666, "step": 5599 }, { "epoch": 2.5385312783318223, "grad_norm": 0.381099929915266, "learning_rate": 4.5883892520578915e-05, "loss": 0.868, "step": 5600 }, { "epoch": 2.538984587488667, "grad_norm": 0.35251671318928646, "learning_rate": 4.587137114832528e-05, "loss": 0.89, "step": 5601 }, { "epoch": 2.539437896645512, "grad_norm": 0.3398903589334157, "learning_rate": 4.585884918803655e-05, "loss": 0.8644, "step": 5602 }, { "epoch": 2.5398912058023573, "grad_norm": 0.3072295729122076, "learning_rate": 4.5846326640966866e-05, "loss": 0.8834, "step": 5603 }, { "epoch": 2.540344514959202, "grad_norm": 0.3011833399784324, "learning_rate": 4.583380350837038e-05, "loss": 0.8887, "step": 5604 }, { "epoch": 2.540797824116047, "grad_norm": 0.3141466562286501, "learning_rate": 4.582127979150133e-05, "loss": 0.8502, "step": 5605 }, { "epoch": 2.541251133272892, "grad_norm": 0.2972282853811104, "learning_rate": 4.580875549161399e-05, "loss": 0.8846, "step": 5606 }, { "epoch": 2.541704442429737, "grad_norm": 0.364804168259479, "learning_rate": 4.579623060996273e-05, "loss": 0.8911, "step": 5607 }, { "epoch": 2.542157751586582, "grad_norm": 0.4113200626744191, "learning_rate": 4.578370514780194e-05, "loss": 0.8729, "step": 5608 }, { "epoch": 2.542611060743427, "grad_norm": 0.3805955860557126, "learning_rate": 4.577117910638607e-05, "loss": 0.8782, "step": 5609 }, { "epoch": 2.543064369900272, "grad_norm": 0.39457328302743366, "learning_rate": 4.575865248696965e-05, "loss": 0.8656, "step": 5610 }, { "epoch": 2.543517679057117, "grad_norm": 0.4767289801118861, "learning_rate": 4.5746125290807276e-05, "loss": 0.8753, "step": 5611 }, { "epoch": 2.543970988213962, "grad_norm": 0.5276293177115423, "learning_rate": 4.573359751915355e-05, "loss": 0.856, "step": 5612 }, { "epoch": 2.544424297370807, "grad_norm": 0.47140701296511245, "learning_rate": 4.572106917326319e-05, "loss": 0.8808, "step": 5613 }, { "epoch": 2.5448776065276517, "grad_norm": 0.48960302561675095, "learning_rate": 4.570854025439094e-05, "loss": 0.8721, "step": 5614 }, { "epoch": 2.545330915684497, "grad_norm": 0.5162647392474662, "learning_rate": 4.5696010763791596e-05, "loss": 0.8904, "step": 5615 }, { "epoch": 2.545784224841342, "grad_norm": 0.48539862142564777, "learning_rate": 4.568348070272005e-05, "loss": 0.8742, "step": 5616 }, { "epoch": 2.5462375339981866, "grad_norm": 0.40860945710613555, "learning_rate": 4.5670950072431204e-05, "loss": 0.8734, "step": 5617 }, { "epoch": 2.546690843155032, "grad_norm": 0.41603439589758817, "learning_rate": 4.5658418874180034e-05, "loss": 0.8733, "step": 5618 }, { "epoch": 2.5471441523118767, "grad_norm": 0.4848041001218365, "learning_rate": 4.5645887109221586e-05, "loss": 0.8807, "step": 5619 }, { "epoch": 2.5475974614687216, "grad_norm": 0.5653492268288695, "learning_rate": 4.5633354778810945e-05, "loss": 0.909, "step": 5620 }, { "epoch": 2.548050770625567, "grad_norm": 0.5163107369991693, "learning_rate": 4.562082188420329e-05, "loss": 0.8868, "step": 5621 }, { "epoch": 2.5485040797824117, "grad_norm": 0.4540242841622267, "learning_rate": 4.560828842665378e-05, "loss": 0.8635, "step": 5622 }, { "epoch": 2.5489573889392565, "grad_norm": 0.40369205314080436, "learning_rate": 4.5595754407417714e-05, "loss": 0.8711, "step": 5623 }, { "epoch": 2.5494106980961018, "grad_norm": 0.44968396158293994, "learning_rate": 4.558321982775039e-05, "loss": 0.8646, "step": 5624 }, { "epoch": 2.5498640072529466, "grad_norm": 0.4974738944636876, "learning_rate": 4.557068468890719e-05, "loss": 0.8808, "step": 5625 }, { "epoch": 2.5503173164097914, "grad_norm": 0.43225671578602665, "learning_rate": 4.5558148992143547e-05, "loss": 0.8661, "step": 5626 }, { "epoch": 2.5507706255666367, "grad_norm": 0.3225112335928232, "learning_rate": 4.5545612738714956e-05, "loss": 0.8783, "step": 5627 }, { "epoch": 2.5512239347234815, "grad_norm": 0.37057779623501735, "learning_rate": 4.553307592987694e-05, "loss": 0.8805, "step": 5628 }, { "epoch": 2.5516772438803264, "grad_norm": 0.46250146260412855, "learning_rate": 4.5520538566885124e-05, "loss": 0.8689, "step": 5629 }, { "epoch": 2.5521305530371716, "grad_norm": 0.5120490177079945, "learning_rate": 4.5508000650995146e-05, "loss": 0.8784, "step": 5630 }, { "epoch": 2.5525838621940165, "grad_norm": 0.5520212654032604, "learning_rate": 4.5495462183462696e-05, "loss": 0.858, "step": 5631 }, { "epoch": 2.5530371713508613, "grad_norm": 0.5785633523185959, "learning_rate": 4.548292316554357e-05, "loss": 0.878, "step": 5632 }, { "epoch": 2.553490480507706, "grad_norm": 0.4627684624490426, "learning_rate": 4.5470383598493576e-05, "loss": 0.8395, "step": 5633 }, { "epoch": 2.5539437896645514, "grad_norm": 0.4075831183523659, "learning_rate": 4.5457843483568583e-05, "loss": 0.8659, "step": 5634 }, { "epoch": 2.554397098821396, "grad_norm": 0.357728844887925, "learning_rate": 4.5445302822024525e-05, "loss": 0.8761, "step": 5635 }, { "epoch": 2.554850407978241, "grad_norm": 0.30264772376768884, "learning_rate": 4.543276161511739e-05, "loss": 0.8862, "step": 5636 }, { "epoch": 2.555303717135086, "grad_norm": 0.2812894123072208, "learning_rate": 4.542021986410321e-05, "loss": 0.8652, "step": 5637 }, { "epoch": 2.555757026291931, "grad_norm": 0.4269714918375115, "learning_rate": 4.540767757023808e-05, "loss": 0.8848, "step": 5638 }, { "epoch": 2.556210335448776, "grad_norm": 0.4141229983561375, "learning_rate": 4.539513473477814e-05, "loss": 0.8867, "step": 5639 }, { "epoch": 2.556663644605621, "grad_norm": 0.42824640985435747, "learning_rate": 4.538259135897962e-05, "loss": 0.893, "step": 5640 }, { "epoch": 2.557116953762466, "grad_norm": 0.40578639215359136, "learning_rate": 4.537004744409874e-05, "loss": 0.873, "step": 5641 }, { "epoch": 2.557570262919311, "grad_norm": 0.4541550316067125, "learning_rate": 4.535750299139183e-05, "loss": 0.8731, "step": 5642 }, { "epoch": 2.5580235720761557, "grad_norm": 0.483508808114005, "learning_rate": 4.534495800211526e-05, "loss": 0.8782, "step": 5643 }, { "epoch": 2.558476881233001, "grad_norm": 0.4468550324494443, "learning_rate": 4.533241247752542e-05, "loss": 0.8839, "step": 5644 }, { "epoch": 2.558930190389846, "grad_norm": 0.3755064520468057, "learning_rate": 4.531986641887882e-05, "loss": 0.9068, "step": 5645 }, { "epoch": 2.5593834995466906, "grad_norm": 0.3476275036756545, "learning_rate": 4.530731982743195e-05, "loss": 0.8773, "step": 5646 }, { "epoch": 2.559836808703536, "grad_norm": 0.34314917695409686, "learning_rate": 4.52947727044414e-05, "loss": 0.8912, "step": 5647 }, { "epoch": 2.5602901178603807, "grad_norm": 0.31070834590444446, "learning_rate": 4.5282225051163805e-05, "loss": 0.8646, "step": 5648 }, { "epoch": 2.5607434270172256, "grad_norm": 0.2929677273066068, "learning_rate": 4.526967686885585e-05, "loss": 0.8816, "step": 5649 }, { "epoch": 2.561196736174071, "grad_norm": 0.2696127611922488, "learning_rate": 4.525712815877427e-05, "loss": 0.8675, "step": 5650 }, { "epoch": 2.5616500453309157, "grad_norm": 0.30711522191219026, "learning_rate": 4.5244578922175846e-05, "loss": 0.8729, "step": 5651 }, { "epoch": 2.5621033544877605, "grad_norm": 0.34052851852907684, "learning_rate": 4.523202916031743e-05, "loss": 0.8713, "step": 5652 }, { "epoch": 2.5625566636446058, "grad_norm": 0.337249068867083, "learning_rate": 4.521947887445592e-05, "loss": 0.8844, "step": 5653 }, { "epoch": 2.5630099728014506, "grad_norm": 0.33644447523440796, "learning_rate": 4.5206928065848266e-05, "loss": 0.8948, "step": 5654 }, { "epoch": 2.5634632819582954, "grad_norm": 0.40899946330440345, "learning_rate": 4.5194376735751456e-05, "loss": 0.8699, "step": 5655 }, { "epoch": 2.5639165911151407, "grad_norm": 0.44467500840305957, "learning_rate": 4.5181824885422555e-05, "loss": 0.8906, "step": 5656 }, { "epoch": 2.5643699002719855, "grad_norm": 0.45367311666123666, "learning_rate": 4.516927251611866e-05, "loss": 0.8892, "step": 5657 }, { "epoch": 2.5648232094288304, "grad_norm": 0.3927187337310338, "learning_rate": 4.515671962909694e-05, "loss": 0.8555, "step": 5658 }, { "epoch": 2.5652765185856756, "grad_norm": 0.3640283912053851, "learning_rate": 4.514416622561458e-05, "loss": 0.8812, "step": 5659 }, { "epoch": 2.5657298277425205, "grad_norm": 0.4836503718470184, "learning_rate": 4.5131612306928866e-05, "loss": 0.8474, "step": 5660 }, { "epoch": 2.5661831368993653, "grad_norm": 0.7336233288049114, "learning_rate": 4.511905787429709e-05, "loss": 0.8856, "step": 5661 }, { "epoch": 2.5666364460562106, "grad_norm": 0.9010918093876618, "learning_rate": 4.510650292897662e-05, "loss": 0.9127, "step": 5662 }, { "epoch": 2.5670897552130554, "grad_norm": 0.770063032526249, "learning_rate": 4.509394747222488e-05, "loss": 0.8849, "step": 5663 }, { "epoch": 2.5675430643699, "grad_norm": 0.8794326844389323, "learning_rate": 4.508139150529933e-05, "loss": 0.8833, "step": 5664 }, { "epoch": 2.5679963735267455, "grad_norm": 0.8023770200580465, "learning_rate": 4.5068835029457475e-05, "loss": 0.8731, "step": 5665 }, { "epoch": 2.5684496826835903, "grad_norm": 0.7334277932828152, "learning_rate": 4.50562780459569e-05, "loss": 0.8749, "step": 5666 }, { "epoch": 2.568902991840435, "grad_norm": 0.6471371707056286, "learning_rate": 4.5043720556055234e-05, "loss": 0.8879, "step": 5667 }, { "epoch": 2.5693563009972804, "grad_norm": 0.5773864789366067, "learning_rate": 4.5031162561010114e-05, "loss": 0.8742, "step": 5668 }, { "epoch": 2.5698096101541252, "grad_norm": 0.48063133253172535, "learning_rate": 4.5018604062079296e-05, "loss": 0.9044, "step": 5669 }, { "epoch": 2.57026291931097, "grad_norm": 0.3008489371816648, "learning_rate": 4.5006045060520516e-05, "loss": 0.8769, "step": 5670 }, { "epoch": 2.570716228467815, "grad_norm": 0.3696237080773799, "learning_rate": 4.499348555759161e-05, "loss": 0.8763, "step": 5671 }, { "epoch": 2.57116953762466, "grad_norm": 0.5180835321680161, "learning_rate": 4.4980925554550455e-05, "loss": 0.8778, "step": 5672 }, { "epoch": 2.571622846781505, "grad_norm": 0.5110341100924983, "learning_rate": 4.4968365052654975e-05, "loss": 0.8701, "step": 5673 }, { "epoch": 2.57207615593835, "grad_norm": 0.504458570988877, "learning_rate": 4.495580405316312e-05, "loss": 0.8904, "step": 5674 }, { "epoch": 2.572529465095195, "grad_norm": 0.49763418682044586, "learning_rate": 4.4943242557332936e-05, "loss": 0.9019, "step": 5675 }, { "epoch": 2.57298277425204, "grad_norm": 0.46809186019585464, "learning_rate": 4.493068056642247e-05, "loss": 0.8694, "step": 5676 }, { "epoch": 2.5734360834088847, "grad_norm": 0.36876933227772707, "learning_rate": 4.491811808168986e-05, "loss": 0.8792, "step": 5677 }, { "epoch": 2.5738893925657296, "grad_norm": 0.31554351649234047, "learning_rate": 4.490555510439327e-05, "loss": 0.8794, "step": 5678 }, { "epoch": 2.574342701722575, "grad_norm": 0.2516346799971417, "learning_rate": 4.489299163579092e-05, "loss": 0.8914, "step": 5679 }, { "epoch": 2.5747960108794197, "grad_norm": 0.3021373522114211, "learning_rate": 4.488042767714107e-05, "loss": 0.8881, "step": 5680 }, { "epoch": 2.5752493200362645, "grad_norm": 0.3561738725728908, "learning_rate": 4.486786322970205e-05, "loss": 0.8791, "step": 5681 }, { "epoch": 2.5757026291931098, "grad_norm": 0.40481507725258103, "learning_rate": 4.4855298294732214e-05, "loss": 0.8639, "step": 5682 }, { "epoch": 2.5761559383499546, "grad_norm": 0.3978864515071203, "learning_rate": 4.484273287348999e-05, "loss": 0.8777, "step": 5683 }, { "epoch": 2.5766092475067994, "grad_norm": 0.4046163241972828, "learning_rate": 4.4830166967233845e-05, "loss": 0.8778, "step": 5684 }, { "epoch": 2.5770625566636447, "grad_norm": 0.41194815890983666, "learning_rate": 4.4817600577222267e-05, "loss": 0.8969, "step": 5685 }, { "epoch": 2.5775158658204895, "grad_norm": 0.3662285400144278, "learning_rate": 4.4805033704713854e-05, "loss": 0.8543, "step": 5686 }, { "epoch": 2.5779691749773344, "grad_norm": 0.38940231410012943, "learning_rate": 4.4792466350967176e-05, "loss": 0.8774, "step": 5687 }, { "epoch": 2.5784224841341796, "grad_norm": 0.3778322286939644, "learning_rate": 4.477989851724092e-05, "loss": 0.8645, "step": 5688 }, { "epoch": 2.5788757932910245, "grad_norm": 0.3984418388550153, "learning_rate": 4.476733020479377e-05, "loss": 0.8853, "step": 5689 }, { "epoch": 2.5793291024478693, "grad_norm": 0.4558898268556811, "learning_rate": 4.475476141488449e-05, "loss": 0.8897, "step": 5690 }, { "epoch": 2.5797824116047146, "grad_norm": 0.5124880709368048, "learning_rate": 4.474219214877188e-05, "loss": 0.8793, "step": 5691 }, { "epoch": 2.5802357207615594, "grad_norm": 0.5037651816298417, "learning_rate": 4.4729622407714796e-05, "loss": 0.8994, "step": 5692 }, { "epoch": 2.580689029918404, "grad_norm": 0.45409391699949103, "learning_rate": 4.471705219297212e-05, "loss": 0.8639, "step": 5693 }, { "epoch": 2.5811423390752495, "grad_norm": 0.538336537054793, "learning_rate": 4.4704481505802804e-05, "loss": 0.8855, "step": 5694 }, { "epoch": 2.5815956482320943, "grad_norm": 0.46643142130707904, "learning_rate": 4.469191034746584e-05, "loss": 0.8633, "step": 5695 }, { "epoch": 2.582048957388939, "grad_norm": 0.3687408265800859, "learning_rate": 4.467933871922027e-05, "loss": 0.8835, "step": 5696 }, { "epoch": 2.5825022665457844, "grad_norm": 0.33312167285425454, "learning_rate": 4.466676662232518e-05, "loss": 0.8926, "step": 5697 }, { "epoch": 2.5829555757026292, "grad_norm": 0.38507409307182017, "learning_rate": 4.465419405803968e-05, "loss": 0.88, "step": 5698 }, { "epoch": 2.583408884859474, "grad_norm": 0.31455941661455844, "learning_rate": 4.464162102762298e-05, "loss": 0.8788, "step": 5699 }, { "epoch": 2.5838621940163193, "grad_norm": 0.2870655576933641, "learning_rate": 4.4629047532334286e-05, "loss": 0.8886, "step": 5700 }, { "epoch": 2.584315503173164, "grad_norm": 0.38957549754911147, "learning_rate": 4.461647357343289e-05, "loss": 0.8814, "step": 5701 }, { "epoch": 2.584768812330009, "grad_norm": 0.4472498700449439, "learning_rate": 4.460389915217809e-05, "loss": 0.8785, "step": 5702 }, { "epoch": 2.5852221214868543, "grad_norm": 0.43267581734007454, "learning_rate": 4.459132426982925e-05, "loss": 0.9012, "step": 5703 }, { "epoch": 2.585675430643699, "grad_norm": 0.35345552873867153, "learning_rate": 4.4578748927645796e-05, "loss": 0.8727, "step": 5704 }, { "epoch": 2.586128739800544, "grad_norm": 0.33847157789730825, "learning_rate": 4.456617312688718e-05, "loss": 0.8849, "step": 5705 }, { "epoch": 2.586582048957389, "grad_norm": 0.32726355554938663, "learning_rate": 4.4553596868812904e-05, "loss": 0.8971, "step": 5706 }, { "epoch": 2.587035358114234, "grad_norm": 0.3188935570137418, "learning_rate": 4.4541020154682535e-05, "loss": 0.8634, "step": 5707 }, { "epoch": 2.587488667271079, "grad_norm": 0.32320672831995667, "learning_rate": 4.452844298575563e-05, "loss": 0.8573, "step": 5708 }, { "epoch": 2.587941976427924, "grad_norm": 0.3764928638710812, "learning_rate": 4.4515865363291865e-05, "loss": 0.8832, "step": 5709 }, { "epoch": 2.588395285584769, "grad_norm": 0.36821670207276413, "learning_rate": 4.4503287288550904e-05, "loss": 0.8694, "step": 5710 }, { "epoch": 2.5888485947416138, "grad_norm": 0.2917796649779044, "learning_rate": 4.449070876279249e-05, "loss": 0.8525, "step": 5711 }, { "epoch": 2.5893019038984586, "grad_norm": 0.37878419367007904, "learning_rate": 4.4478129787276395e-05, "loss": 0.8866, "step": 5712 }, { "epoch": 2.589755213055304, "grad_norm": 0.4591090524141822, "learning_rate": 4.446555036326244e-05, "loss": 0.8802, "step": 5713 }, { "epoch": 2.5902085222121487, "grad_norm": 0.41488536371220175, "learning_rate": 4.44529704920105e-05, "loss": 0.8839, "step": 5714 }, { "epoch": 2.5906618313689935, "grad_norm": 0.37512418539099823, "learning_rate": 4.444039017478047e-05, "loss": 0.8931, "step": 5715 }, { "epoch": 2.5911151405258384, "grad_norm": 0.30735977714735246, "learning_rate": 4.442780941283231e-05, "loss": 0.8724, "step": 5716 }, { "epoch": 2.5915684496826836, "grad_norm": 0.3102749851917463, "learning_rate": 4.441522820742604e-05, "loss": 0.8964, "step": 5717 }, { "epoch": 2.5920217588395285, "grad_norm": 0.29494984594005885, "learning_rate": 4.440264655982167e-05, "loss": 0.8874, "step": 5718 }, { "epoch": 2.5924750679963733, "grad_norm": 0.26271712901420585, "learning_rate": 4.439006447127932e-05, "loss": 0.8741, "step": 5719 }, { "epoch": 2.5929283771532186, "grad_norm": 0.28180180956807316, "learning_rate": 4.43774819430591e-05, "loss": 0.8709, "step": 5720 }, { "epoch": 2.5933816863100634, "grad_norm": 0.31388145979064624, "learning_rate": 4.436489897642121e-05, "loss": 0.8752, "step": 5721 }, { "epoch": 2.593834995466908, "grad_norm": 0.30528708570407836, "learning_rate": 4.435231557262585e-05, "loss": 0.9016, "step": 5722 }, { "epoch": 2.5942883046237535, "grad_norm": 0.28724868025605355, "learning_rate": 4.43397317329333e-05, "loss": 0.919, "step": 5723 }, { "epoch": 2.5947416137805983, "grad_norm": 0.334406618582032, "learning_rate": 4.432714745860386e-05, "loss": 0.8649, "step": 5724 }, { "epoch": 2.595194922937443, "grad_norm": 0.3212619367980426, "learning_rate": 4.4314562750897886e-05, "loss": 0.8947, "step": 5725 }, { "epoch": 2.5956482320942884, "grad_norm": 0.27257397649182113, "learning_rate": 4.430197761107578e-05, "loss": 0.8742, "step": 5726 }, { "epoch": 2.5961015412511332, "grad_norm": 0.2770993557847155, "learning_rate": 4.4289392040397964e-05, "loss": 0.8698, "step": 5727 }, { "epoch": 2.596554850407978, "grad_norm": 0.2905269956290974, "learning_rate": 4.4276806040124926e-05, "loss": 0.8898, "step": 5728 }, { "epoch": 2.5970081595648233, "grad_norm": 0.3669825202668953, "learning_rate": 4.42642196115172e-05, "loss": 0.8777, "step": 5729 }, { "epoch": 2.597461468721668, "grad_norm": 0.38848500141120024, "learning_rate": 4.425163275583535e-05, "loss": 0.8626, "step": 5730 }, { "epoch": 2.597914777878513, "grad_norm": 0.4068459386458721, "learning_rate": 4.4239045474339986e-05, "loss": 0.87, "step": 5731 }, { "epoch": 2.5983680870353583, "grad_norm": 0.5323635823057011, "learning_rate": 4.422645776829175e-05, "loss": 0.8858, "step": 5732 }, { "epoch": 2.598821396192203, "grad_norm": 0.6153081262938523, "learning_rate": 4.421386963895135e-05, "loss": 0.8776, "step": 5733 }, { "epoch": 2.599274705349048, "grad_norm": 0.5952133421207828, "learning_rate": 4.4201281087579515e-05, "loss": 0.8816, "step": 5734 }, { "epoch": 2.599728014505893, "grad_norm": 0.4418600172885792, "learning_rate": 4.4188692115437036e-05, "loss": 0.8749, "step": 5735 }, { "epoch": 2.600181323662738, "grad_norm": 0.43257293809905917, "learning_rate": 4.4176102723784745e-05, "loss": 0.8893, "step": 5736 }, { "epoch": 2.600634632819583, "grad_norm": 0.36171052635555245, "learning_rate": 4.416351291388348e-05, "loss": 0.8877, "step": 5737 }, { "epoch": 2.601087941976428, "grad_norm": 0.28391258487368526, "learning_rate": 4.4150922686994154e-05, "loss": 0.874, "step": 5738 }, { "epoch": 2.601541251133273, "grad_norm": 0.28673330381657725, "learning_rate": 4.4138332044377735e-05, "loss": 0.862, "step": 5739 }, { "epoch": 2.6019945602901178, "grad_norm": 0.35798124729619574, "learning_rate": 4.412574098729519e-05, "loss": 0.8715, "step": 5740 }, { "epoch": 2.602447869446963, "grad_norm": 0.3938417628184884, "learning_rate": 4.411314951700757e-05, "loss": 0.88, "step": 5741 }, { "epoch": 2.602901178603808, "grad_norm": 0.43526997022609004, "learning_rate": 4.410055763477592e-05, "loss": 0.8432, "step": 5742 }, { "epoch": 2.6033544877606527, "grad_norm": 0.4282135460555934, "learning_rate": 4.408796534186139e-05, "loss": 0.887, "step": 5743 }, { "epoch": 2.603807796917498, "grad_norm": 0.39738628391823794, "learning_rate": 4.407537263952511e-05, "loss": 0.8687, "step": 5744 }, { "epoch": 2.604261106074343, "grad_norm": 0.35405538378759904, "learning_rate": 4.4062779529028295e-05, "loss": 0.8758, "step": 5745 }, { "epoch": 2.6047144152311876, "grad_norm": 0.29182031317671425, "learning_rate": 4.405018601163216e-05, "loss": 0.9, "step": 5746 }, { "epoch": 2.605167724388033, "grad_norm": 0.2979642135646095, "learning_rate": 4.4037592088597984e-05, "loss": 0.8805, "step": 5747 }, { "epoch": 2.6056210335448777, "grad_norm": 0.31888868520563146, "learning_rate": 4.40249977611871e-05, "loss": 0.9005, "step": 5748 }, { "epoch": 2.6060743427017226, "grad_norm": 0.3764903222339265, "learning_rate": 4.401240303066086e-05, "loss": 0.8766, "step": 5749 }, { "epoch": 2.6065276518585674, "grad_norm": 0.3960736626450706, "learning_rate": 4.399980789828066e-05, "loss": 0.8803, "step": 5750 }, { "epoch": 2.6069809610154127, "grad_norm": 0.4226663923030406, "learning_rate": 4.398721236530795e-05, "loss": 0.8602, "step": 5751 }, { "epoch": 2.6074342701722575, "grad_norm": 0.4798458895826599, "learning_rate": 4.397461643300419e-05, "loss": 0.8825, "step": 5752 }, { "epoch": 2.6078875793291023, "grad_norm": 0.48902747945191605, "learning_rate": 4.3962020102630935e-05, "loss": 0.8669, "step": 5753 }, { "epoch": 2.608340888485947, "grad_norm": 0.496817597490097, "learning_rate": 4.3949423375449705e-05, "loss": 0.8892, "step": 5754 }, { "epoch": 2.6087941976427924, "grad_norm": 0.42924772479477696, "learning_rate": 4.393682625272214e-05, "loss": 0.8785, "step": 5755 }, { "epoch": 2.6092475067996372, "grad_norm": 0.3372504705196389, "learning_rate": 4.392422873570984e-05, "loss": 0.8524, "step": 5756 }, { "epoch": 2.609700815956482, "grad_norm": 0.30239142715779005, "learning_rate": 4.3911630825674487e-05, "loss": 0.8712, "step": 5757 }, { "epoch": 2.6101541251133273, "grad_norm": 0.33542830465976714, "learning_rate": 4.389903252387783e-05, "loss": 0.8601, "step": 5758 }, { "epoch": 2.610607434270172, "grad_norm": 0.3718406346042426, "learning_rate": 4.388643383158161e-05, "loss": 0.9045, "step": 5759 }, { "epoch": 2.611060743427017, "grad_norm": 0.3762213750421688, "learning_rate": 4.38738347500476e-05, "loss": 0.8766, "step": 5760 }, { "epoch": 2.6115140525838623, "grad_norm": 0.3912593223255763, "learning_rate": 4.3861235280537666e-05, "loss": 0.877, "step": 5761 }, { "epoch": 2.611967361740707, "grad_norm": 0.3331138494581023, "learning_rate": 4.384863542431367e-05, "loss": 0.8739, "step": 5762 }, { "epoch": 2.612420670897552, "grad_norm": 0.3110998285460133, "learning_rate": 4.383603518263752e-05, "loss": 0.8799, "step": 5763 }, { "epoch": 2.612873980054397, "grad_norm": 0.25888158248503396, "learning_rate": 4.382343455677118e-05, "loss": 0.8805, "step": 5764 }, { "epoch": 2.613327289211242, "grad_norm": 0.30273122226976784, "learning_rate": 4.3810833547976636e-05, "loss": 0.8785, "step": 5765 }, { "epoch": 2.613780598368087, "grad_norm": 0.3138459749583387, "learning_rate": 4.3798232157515905e-05, "loss": 0.8732, "step": 5766 }, { "epoch": 2.614233907524932, "grad_norm": 0.346331516778546, "learning_rate": 4.378563038665106e-05, "loss": 0.8774, "step": 5767 }, { "epoch": 2.614687216681777, "grad_norm": 0.29181769816428743, "learning_rate": 4.3773028236644206e-05, "loss": 0.8717, "step": 5768 }, { "epoch": 2.6151405258386218, "grad_norm": 0.2764648938893759, "learning_rate": 4.3760425708757485e-05, "loss": 0.8813, "step": 5769 }, { "epoch": 2.615593834995467, "grad_norm": 0.3262987673122555, "learning_rate": 4.374782280425308e-05, "loss": 0.8558, "step": 5770 }, { "epoch": 2.616047144152312, "grad_norm": 0.37043506128123976, "learning_rate": 4.373521952439321e-05, "loss": 0.8783, "step": 5771 }, { "epoch": 2.6165004533091567, "grad_norm": 0.3240804085931681, "learning_rate": 4.372261587044012e-05, "loss": 0.8651, "step": 5772 }, { "epoch": 2.616953762466002, "grad_norm": 0.24228842606808515, "learning_rate": 4.3710011843656115e-05, "loss": 0.8865, "step": 5773 }, { "epoch": 2.617407071622847, "grad_norm": 0.3504785523642531, "learning_rate": 4.369740744530353e-05, "loss": 0.8455, "step": 5774 }, { "epoch": 2.6178603807796916, "grad_norm": 0.41333872726366994, "learning_rate": 4.3684802676644705e-05, "loss": 0.889, "step": 5775 }, { "epoch": 2.618313689936537, "grad_norm": 0.3853045508958026, "learning_rate": 4.3672197538942064e-05, "loss": 0.8999, "step": 5776 }, { "epoch": 2.6187669990933817, "grad_norm": 0.33784276181636025, "learning_rate": 4.3659592033458055e-05, "loss": 0.8885, "step": 5777 }, { "epoch": 2.6192203082502266, "grad_norm": 0.407903777800908, "learning_rate": 4.3646986161455145e-05, "loss": 0.8792, "step": 5778 }, { "epoch": 2.619673617407072, "grad_norm": 0.2994795091131912, "learning_rate": 4.3634379924195846e-05, "loss": 0.8895, "step": 5779 }, { "epoch": 2.6201269265639167, "grad_norm": 0.25911724616859616, "learning_rate": 4.3621773322942725e-05, "loss": 0.8901, "step": 5780 }, { "epoch": 2.6205802357207615, "grad_norm": 0.24772031148080917, "learning_rate": 4.3609166358958356e-05, "loss": 0.8631, "step": 5781 }, { "epoch": 2.6210335448776068, "grad_norm": 0.27102325218243517, "learning_rate": 4.359655903350537e-05, "loss": 0.8695, "step": 5782 }, { "epoch": 2.6214868540344516, "grad_norm": 0.4073579407986265, "learning_rate": 4.3583951347846434e-05, "loss": 0.8674, "step": 5783 }, { "epoch": 2.6219401631912964, "grad_norm": 0.46448883033579563, "learning_rate": 4.357134330324424e-05, "loss": 0.8697, "step": 5784 }, { "epoch": 2.6223934723481417, "grad_norm": 0.398192848795283, "learning_rate": 4.355873490096151e-05, "loss": 0.852, "step": 5785 }, { "epoch": 2.6228467815049865, "grad_norm": 0.35656782741186027, "learning_rate": 4.3546126142261024e-05, "loss": 0.8777, "step": 5786 }, { "epoch": 2.6233000906618313, "grad_norm": 0.3096534602833606, "learning_rate": 4.3533517028405574e-05, "loss": 0.8818, "step": 5787 }, { "epoch": 2.6237533998186766, "grad_norm": 0.2823762509696381, "learning_rate": 4.352090756065802e-05, "loss": 0.8824, "step": 5788 }, { "epoch": 2.6242067089755214, "grad_norm": 0.28390716482579975, "learning_rate": 4.350829774028122e-05, "loss": 0.8856, "step": 5789 }, { "epoch": 2.6246600181323663, "grad_norm": 0.33504735790868795, "learning_rate": 4.349568756853809e-05, "loss": 0.8814, "step": 5790 }, { "epoch": 2.625113327289211, "grad_norm": 0.3173248374694659, "learning_rate": 4.348307704669158e-05, "loss": 0.8826, "step": 5791 }, { "epoch": 2.6255666364460564, "grad_norm": 0.25683793637902996, "learning_rate": 4.347046617600466e-05, "loss": 0.8768, "step": 5792 }, { "epoch": 2.626019945602901, "grad_norm": 0.2883396588216147, "learning_rate": 4.345785495774037e-05, "loss": 0.9054, "step": 5793 }, { "epoch": 2.626473254759746, "grad_norm": 0.2967380206914427, "learning_rate": 4.3445243393161726e-05, "loss": 0.8776, "step": 5794 }, { "epoch": 2.626926563916591, "grad_norm": 0.3195175924225291, "learning_rate": 4.343263148353184e-05, "loss": 0.8714, "step": 5795 }, { "epoch": 2.627379873073436, "grad_norm": 0.2568592066592151, "learning_rate": 4.342001923011382e-05, "loss": 0.8872, "step": 5796 }, { "epoch": 2.627833182230281, "grad_norm": 0.37513272465755443, "learning_rate": 4.340740663417082e-05, "loss": 0.8916, "step": 5797 }, { "epoch": 2.628286491387126, "grad_norm": 0.46966032949976744, "learning_rate": 4.339479369696603e-05, "loss": 0.884, "step": 5798 }, { "epoch": 2.628739800543971, "grad_norm": 0.533877454098109, "learning_rate": 4.3382180419762686e-05, "loss": 0.8733, "step": 5799 }, { "epoch": 2.629193109700816, "grad_norm": 0.575954919668876, "learning_rate": 4.336956680382402e-05, "loss": 0.8541, "step": 5800 }, { "epoch": 2.6296464188576607, "grad_norm": 0.5676695046754546, "learning_rate": 4.335695285041334e-05, "loss": 0.8503, "step": 5801 }, { "epoch": 2.630099728014506, "grad_norm": 0.5052112294078548, "learning_rate": 4.334433856079397e-05, "loss": 0.8904, "step": 5802 }, { "epoch": 2.630553037171351, "grad_norm": 0.3904114255702677, "learning_rate": 4.333172393622927e-05, "loss": 0.8829, "step": 5803 }, { "epoch": 2.6310063463281956, "grad_norm": 0.24833833698031302, "learning_rate": 4.331910897798261e-05, "loss": 0.8613, "step": 5804 }, { "epoch": 2.631459655485041, "grad_norm": 0.2509472236836431, "learning_rate": 4.330649368731744e-05, "loss": 0.8906, "step": 5805 }, { "epoch": 2.6319129646418857, "grad_norm": 0.28336860891557325, "learning_rate": 4.3293878065497204e-05, "loss": 0.8807, "step": 5806 }, { "epoch": 2.6323662737987306, "grad_norm": 0.2940543741282835, "learning_rate": 4.328126211378541e-05, "loss": 0.8982, "step": 5807 }, { "epoch": 2.632819582955576, "grad_norm": 0.30973995361779977, "learning_rate": 4.326864583344556e-05, "loss": 0.9136, "step": 5808 }, { "epoch": 2.6332728921124207, "grad_norm": 0.3730836125039869, "learning_rate": 4.3256029225741226e-05, "loss": 0.8757, "step": 5809 }, { "epoch": 2.6337262012692655, "grad_norm": 0.5724804258326537, "learning_rate": 4.3243412291936e-05, "loss": 0.8931, "step": 5810 }, { "epoch": 2.6341795104261108, "grad_norm": 0.6626960231900365, "learning_rate": 4.323079503329349e-05, "loss": 0.8765, "step": 5811 }, { "epoch": 2.6346328195829556, "grad_norm": 0.6742700042781729, "learning_rate": 4.321817745107739e-05, "loss": 0.8764, "step": 5812 }, { "epoch": 2.6350861287398004, "grad_norm": 0.6473070579099056, "learning_rate": 4.3205559546551336e-05, "loss": 0.8954, "step": 5813 }, { "epoch": 2.6355394378966457, "grad_norm": 0.5648002729682227, "learning_rate": 4.319294132097908e-05, "loss": 0.8819, "step": 5814 }, { "epoch": 2.6359927470534905, "grad_norm": 0.4221391860665615, "learning_rate": 4.318032277562436e-05, "loss": 0.8645, "step": 5815 }, { "epoch": 2.6364460562103353, "grad_norm": 0.4906998769809952, "learning_rate": 4.316770391175098e-05, "loss": 0.8644, "step": 5816 }, { "epoch": 2.6368993653671806, "grad_norm": 0.4792628542468413, "learning_rate": 4.315508473062273e-05, "loss": 0.865, "step": 5817 }, { "epoch": 2.6373526745240254, "grad_norm": 0.2917770591894847, "learning_rate": 4.314246523350347e-05, "loss": 0.8911, "step": 5818 }, { "epoch": 2.6378059836808703, "grad_norm": 0.4186093173669434, "learning_rate": 4.312984542165709e-05, "loss": 0.8679, "step": 5819 }, { "epoch": 2.6382592928377155, "grad_norm": 0.4908113792354741, "learning_rate": 4.31172252963475e-05, "loss": 0.8751, "step": 5820 }, { "epoch": 2.6387126019945604, "grad_norm": 0.47287779239461586, "learning_rate": 4.310460485883861e-05, "loss": 0.8811, "step": 5821 }, { "epoch": 2.639165911151405, "grad_norm": 0.49025102062293935, "learning_rate": 4.309198411039445e-05, "loss": 0.8871, "step": 5822 }, { "epoch": 2.6396192203082505, "grad_norm": 0.5392233828214541, "learning_rate": 4.307936305227898e-05, "loss": 0.8717, "step": 5823 }, { "epoch": 2.6400725294650953, "grad_norm": 0.5147894169829097, "learning_rate": 4.306674168575624e-05, "loss": 0.8676, "step": 5824 }, { "epoch": 2.64052583862194, "grad_norm": 0.4589206285368694, "learning_rate": 4.305412001209032e-05, "loss": 0.8799, "step": 5825 }, { "epoch": 2.6409791477787854, "grad_norm": 0.3678671390527449, "learning_rate": 4.304149803254531e-05, "loss": 0.9039, "step": 5826 }, { "epoch": 2.6414324569356302, "grad_norm": 0.3578955645728604, "learning_rate": 4.302887574838533e-05, "loss": 0.8642, "step": 5827 }, { "epoch": 2.641885766092475, "grad_norm": 0.2972067330399155, "learning_rate": 4.301625316087453e-05, "loss": 0.8768, "step": 5828 }, { "epoch": 2.64233907524932, "grad_norm": 0.2762311223534963, "learning_rate": 4.300363027127712e-05, "loss": 0.8812, "step": 5829 }, { "epoch": 2.642792384406165, "grad_norm": 0.3240156154705935, "learning_rate": 4.29910070808573e-05, "loss": 0.9028, "step": 5830 }, { "epoch": 2.64324569356301, "grad_norm": 0.2854057619356003, "learning_rate": 4.2978383590879344e-05, "loss": 0.886, "step": 5831 }, { "epoch": 2.643699002719855, "grad_norm": 0.3097857186700373, "learning_rate": 4.296575980260752e-05, "loss": 0.889, "step": 5832 }, { "epoch": 2.6441523118766996, "grad_norm": 0.31030925565946493, "learning_rate": 4.295313571730613e-05, "loss": 0.9016, "step": 5833 }, { "epoch": 2.644605621033545, "grad_norm": 0.32696981343736176, "learning_rate": 4.294051133623952e-05, "loss": 0.8755, "step": 5834 }, { "epoch": 2.6450589301903897, "grad_norm": 0.3638061627642944, "learning_rate": 4.292788666067205e-05, "loss": 0.8828, "step": 5835 }, { "epoch": 2.6455122393472346, "grad_norm": 0.33660761357923985, "learning_rate": 4.2915261691868125e-05, "loss": 0.8679, "step": 5836 }, { "epoch": 2.64596554850408, "grad_norm": 0.29122783392771034, "learning_rate": 4.2902636431092184e-05, "loss": 0.8673, "step": 5837 }, { "epoch": 2.6464188576609247, "grad_norm": 0.3223695944492579, "learning_rate": 4.2890010879608674e-05, "loss": 0.8864, "step": 5838 }, { "epoch": 2.6468721668177695, "grad_norm": 0.32203878766488553, "learning_rate": 4.287738503868207e-05, "loss": 0.899, "step": 5839 }, { "epoch": 2.6473254759746148, "grad_norm": 0.3229040503272708, "learning_rate": 4.286475890957691e-05, "loss": 0.8774, "step": 5840 }, { "epoch": 2.6477787851314596, "grad_norm": 0.39220606244012624, "learning_rate": 4.2852132493557726e-05, "loss": 0.9141, "step": 5841 }, { "epoch": 2.6482320942883044, "grad_norm": 0.4317348318583508, "learning_rate": 4.283950579188908e-05, "loss": 0.881, "step": 5842 }, { "epoch": 2.6486854034451497, "grad_norm": 0.5045309772359299, "learning_rate": 4.28268788058356e-05, "loss": 0.9088, "step": 5843 }, { "epoch": 2.6491387126019945, "grad_norm": 0.45862834633589494, "learning_rate": 4.281425153666188e-05, "loss": 0.8779, "step": 5844 }, { "epoch": 2.6495920217588393, "grad_norm": 0.460202085405437, "learning_rate": 4.2801623985632606e-05, "loss": 0.8935, "step": 5845 }, { "epoch": 2.6500453309156846, "grad_norm": 0.4816735816274237, "learning_rate": 4.278899615401246e-05, "loss": 0.8914, "step": 5846 }, { "epoch": 2.6504986400725294, "grad_norm": 0.5370030872549255, "learning_rate": 4.277636804306615e-05, "loss": 0.8755, "step": 5847 }, { "epoch": 2.6509519492293743, "grad_norm": 0.5231839421615454, "learning_rate": 4.276373965405842e-05, "loss": 0.8609, "step": 5848 }, { "epoch": 2.6514052583862195, "grad_norm": 0.432143436365859, "learning_rate": 4.275111098825403e-05, "loss": 0.8827, "step": 5849 }, { "epoch": 2.6518585675430644, "grad_norm": 0.34828069710668724, "learning_rate": 4.273848204691781e-05, "loss": 0.8695, "step": 5850 }, { "epoch": 2.652311876699909, "grad_norm": 0.3099611075170249, "learning_rate": 4.2725852831314556e-05, "loss": 0.8742, "step": 5851 }, { "epoch": 2.6527651858567545, "grad_norm": 0.24953142850699706, "learning_rate": 4.271322334270912e-05, "loss": 0.8969, "step": 5852 }, { "epoch": 2.6532184950135993, "grad_norm": 0.2713176958787826, "learning_rate": 4.270059358236641e-05, "loss": 0.8807, "step": 5853 }, { "epoch": 2.653671804170444, "grad_norm": 0.30820993441507577, "learning_rate": 4.2687963551551305e-05, "loss": 0.88, "step": 5854 }, { "epoch": 2.6541251133272894, "grad_norm": 0.30084053166345714, "learning_rate": 4.267533325152875e-05, "loss": 0.881, "step": 5855 }, { "epoch": 2.6545784224841342, "grad_norm": 0.3985092286164874, "learning_rate": 4.266270268356372e-05, "loss": 0.8821, "step": 5856 }, { "epoch": 2.655031731640979, "grad_norm": 0.4399328374528351, "learning_rate": 4.265007184892117e-05, "loss": 0.867, "step": 5857 }, { "epoch": 2.6554850407978243, "grad_norm": 0.47195598164238695, "learning_rate": 4.263744074886615e-05, "loss": 0.8941, "step": 5858 }, { "epoch": 2.655938349954669, "grad_norm": 0.44588914378453715, "learning_rate": 4.262480938466368e-05, "loss": 0.8638, "step": 5859 }, { "epoch": 2.656391659111514, "grad_norm": 0.39348822395865923, "learning_rate": 4.261217775757884e-05, "loss": 0.8856, "step": 5860 }, { "epoch": 2.6568449682683593, "grad_norm": 0.4390186223899267, "learning_rate": 4.259954586887673e-05, "loss": 0.8641, "step": 5861 }, { "epoch": 2.657298277425204, "grad_norm": 0.48441472010317743, "learning_rate": 4.258691371982244e-05, "loss": 0.8889, "step": 5862 }, { "epoch": 2.657751586582049, "grad_norm": 0.3694786839760859, "learning_rate": 4.257428131168116e-05, "loss": 0.8795, "step": 5863 }, { "epoch": 2.658204895738894, "grad_norm": 0.31080582528072104, "learning_rate": 4.2561648645718024e-05, "loss": 0.8937, "step": 5864 }, { "epoch": 2.658658204895739, "grad_norm": 0.35847847409543554, "learning_rate": 4.254901572319825e-05, "loss": 0.8771, "step": 5865 }, { "epoch": 2.659111514052584, "grad_norm": 0.3584365745364404, "learning_rate": 4.2536382545387065e-05, "loss": 0.8614, "step": 5866 }, { "epoch": 2.659564823209429, "grad_norm": 0.31289904643898675, "learning_rate": 4.252374911354971e-05, "loss": 0.8842, "step": 5867 }, { "epoch": 2.660018132366274, "grad_norm": 0.3348199855557196, "learning_rate": 4.2511115428951465e-05, "loss": 0.8689, "step": 5868 }, { "epoch": 2.6604714415231188, "grad_norm": 0.4257807747875663, "learning_rate": 4.249848149285765e-05, "loss": 0.8664, "step": 5869 }, { "epoch": 2.6609247506799636, "grad_norm": 0.5494774627436787, "learning_rate": 4.248584730653354e-05, "loss": 0.8872, "step": 5870 }, { "epoch": 2.661378059836809, "grad_norm": 0.49897662152774386, "learning_rate": 4.2473212871244535e-05, "loss": 0.8685, "step": 5871 }, { "epoch": 2.6618313689936537, "grad_norm": 0.3995748871341462, "learning_rate": 4.246057818825599e-05, "loss": 0.872, "step": 5872 }, { "epoch": 2.6622846781504985, "grad_norm": 0.37497146991864033, "learning_rate": 4.24479432588333e-05, "loss": 0.8728, "step": 5873 }, { "epoch": 2.6627379873073433, "grad_norm": 0.3627815118404913, "learning_rate": 4.24353080842419e-05, "loss": 0.8703, "step": 5874 }, { "epoch": 2.6631912964641886, "grad_norm": 0.2903122854041225, "learning_rate": 4.242267266574724e-05, "loss": 0.8742, "step": 5875 }, { "epoch": 2.6636446056210334, "grad_norm": 0.28118488735585306, "learning_rate": 4.2410037004614795e-05, "loss": 0.8682, "step": 5876 }, { "epoch": 2.6640979147778783, "grad_norm": 0.3278685788785086, "learning_rate": 4.239740110211006e-05, "loss": 0.8848, "step": 5877 }, { "epoch": 2.6645512239347235, "grad_norm": 0.40951786156735076, "learning_rate": 4.238476495949855e-05, "loss": 0.8687, "step": 5878 }, { "epoch": 2.6650045330915684, "grad_norm": 0.4480763902041912, "learning_rate": 4.237212857804583e-05, "loss": 0.8893, "step": 5879 }, { "epoch": 2.665457842248413, "grad_norm": 0.40655713162019047, "learning_rate": 4.235949195901747e-05, "loss": 0.8809, "step": 5880 }, { "epoch": 2.6659111514052585, "grad_norm": 0.35080164050978074, "learning_rate": 4.234685510367904e-05, "loss": 0.9031, "step": 5881 }, { "epoch": 2.6663644605621033, "grad_norm": 0.2853865448787389, "learning_rate": 4.233421801329618e-05, "loss": 0.8632, "step": 5882 }, { "epoch": 2.666817769718948, "grad_norm": 0.2772087140132203, "learning_rate": 4.2321580689134524e-05, "loss": 0.8826, "step": 5883 }, { "epoch": 2.6672710788757934, "grad_norm": 0.2505661280512699, "learning_rate": 4.2308943132459746e-05, "loss": 0.8852, "step": 5884 }, { "epoch": 2.6677243880326382, "grad_norm": 0.2737373709428012, "learning_rate": 4.229630534453752e-05, "loss": 0.8915, "step": 5885 }, { "epoch": 2.668177697189483, "grad_norm": 0.41131915031885097, "learning_rate": 4.228366732663356e-05, "loss": 0.8815, "step": 5886 }, { "epoch": 2.6686310063463283, "grad_norm": 0.3864017882923296, "learning_rate": 4.227102908001362e-05, "loss": 0.8627, "step": 5887 }, { "epoch": 2.669084315503173, "grad_norm": 0.35560654019003946, "learning_rate": 4.225839060594343e-05, "loss": 0.8713, "step": 5888 }, { "epoch": 2.669537624660018, "grad_norm": 0.3313132615323578, "learning_rate": 4.22457519056888e-05, "loss": 0.8644, "step": 5889 }, { "epoch": 2.6699909338168633, "grad_norm": 0.2971964740293958, "learning_rate": 4.223311298051551e-05, "loss": 0.8772, "step": 5890 }, { "epoch": 2.670444242973708, "grad_norm": 0.27110315038183075, "learning_rate": 4.222047383168938e-05, "loss": 0.896, "step": 5891 }, { "epoch": 2.670897552130553, "grad_norm": 0.2782855330923926, "learning_rate": 4.2207834460476274e-05, "loss": 0.8777, "step": 5892 }, { "epoch": 2.671350861287398, "grad_norm": 0.2705988207958185, "learning_rate": 4.219519486814206e-05, "loss": 0.8707, "step": 5893 }, { "epoch": 2.671804170444243, "grad_norm": 0.27772960933678814, "learning_rate": 4.218255505595263e-05, "loss": 0.8871, "step": 5894 }, { "epoch": 2.672257479601088, "grad_norm": 0.2883916436668078, "learning_rate": 4.21699150251739e-05, "loss": 0.8873, "step": 5895 }, { "epoch": 2.672710788757933, "grad_norm": 0.2534342328299184, "learning_rate": 4.215727477707179e-05, "loss": 0.8874, "step": 5896 }, { "epoch": 2.673164097914778, "grad_norm": 0.2872441968891712, "learning_rate": 4.214463431291229e-05, "loss": 0.8759, "step": 5897 }, { "epoch": 2.6736174070716228, "grad_norm": 0.28140700323449885, "learning_rate": 4.213199363396135e-05, "loss": 0.8767, "step": 5898 }, { "epoch": 2.674070716228468, "grad_norm": 0.3159937470273067, "learning_rate": 4.2119352741484996e-05, "loss": 0.8641, "step": 5899 }, { "epoch": 2.674524025385313, "grad_norm": 0.2652752852045863, "learning_rate": 4.210671163674922e-05, "loss": 0.8856, "step": 5900 }, { "epoch": 2.6749773345421577, "grad_norm": 0.2557341910114316, "learning_rate": 4.2094070321020085e-05, "loss": 0.8694, "step": 5901 }, { "epoch": 2.675430643699003, "grad_norm": 0.30183663218291523, "learning_rate": 4.208142879556366e-05, "loss": 0.8642, "step": 5902 }, { "epoch": 2.675883952855848, "grad_norm": 0.32393197314966465, "learning_rate": 4.2068787061646005e-05, "loss": 0.8655, "step": 5903 }, { "epoch": 2.6763372620126926, "grad_norm": 0.40389116258897156, "learning_rate": 4.205614512053326e-05, "loss": 0.8794, "step": 5904 }, { "epoch": 2.676790571169538, "grad_norm": 0.47618337815108375, "learning_rate": 4.2043502973491534e-05, "loss": 0.8582, "step": 5905 }, { "epoch": 2.6772438803263827, "grad_norm": 0.447245020333917, "learning_rate": 4.203086062178698e-05, "loss": 0.8713, "step": 5906 }, { "epoch": 2.6776971894832275, "grad_norm": 0.31269328069104824, "learning_rate": 4.2018218066685765e-05, "loss": 0.8722, "step": 5907 }, { "epoch": 2.6781504986400724, "grad_norm": 0.27478430066642073, "learning_rate": 4.200557530945409e-05, "loss": 0.8899, "step": 5908 }, { "epoch": 2.6786038077969176, "grad_norm": 0.3675617812163144, "learning_rate": 4.1992932351358154e-05, "loss": 0.8737, "step": 5909 }, { "epoch": 2.6790571169537625, "grad_norm": 0.41433068559269365, "learning_rate": 4.198028919366417e-05, "loss": 0.8612, "step": 5910 }, { "epoch": 2.6795104261106073, "grad_norm": 0.5532171218032518, "learning_rate": 4.1967645837638414e-05, "loss": 0.8961, "step": 5911 }, { "epoch": 2.679963735267452, "grad_norm": 0.7172143926693726, "learning_rate": 4.195500228454714e-05, "loss": 0.8862, "step": 5912 }, { "epoch": 2.6804170444242974, "grad_norm": 0.7512440389399889, "learning_rate": 4.194235853565665e-05, "loss": 0.8804, "step": 5913 }, { "epoch": 2.6808703535811422, "grad_norm": 0.6048281808234872, "learning_rate": 4.192971459223324e-05, "loss": 0.8912, "step": 5914 }, { "epoch": 2.681323662737987, "grad_norm": 0.5224829983366013, "learning_rate": 4.191707045554323e-05, "loss": 0.8685, "step": 5915 }, { "epoch": 2.6817769718948323, "grad_norm": 0.38414674219716544, "learning_rate": 4.190442612685299e-05, "loss": 0.8738, "step": 5916 }, { "epoch": 2.682230281051677, "grad_norm": 0.3055878846847181, "learning_rate": 4.189178160742887e-05, "loss": 0.8752, "step": 5917 }, { "epoch": 2.682683590208522, "grad_norm": 0.26948131135023795, "learning_rate": 4.187913689853728e-05, "loss": 0.8804, "step": 5918 }, { "epoch": 2.6831368993653673, "grad_norm": 0.2887312826728196, "learning_rate": 4.1866492001444585e-05, "loss": 0.8878, "step": 5919 }, { "epoch": 2.683590208522212, "grad_norm": 0.33721317577252385, "learning_rate": 4.1853846917417246e-05, "loss": 0.8955, "step": 5920 }, { "epoch": 2.684043517679057, "grad_norm": 0.29052083687732266, "learning_rate": 4.184120164772168e-05, "loss": 0.8722, "step": 5921 }, { "epoch": 2.684496826835902, "grad_norm": 0.23176089412570006, "learning_rate": 4.182855619362436e-05, "loss": 0.8834, "step": 5922 }, { "epoch": 2.684950135992747, "grad_norm": 0.24568353692676947, "learning_rate": 4.181591055639177e-05, "loss": 0.8735, "step": 5923 }, { "epoch": 2.685403445149592, "grad_norm": 0.30723156833746207, "learning_rate": 4.180326473729039e-05, "loss": 0.877, "step": 5924 }, { "epoch": 2.685856754306437, "grad_norm": 0.3349188424622035, "learning_rate": 4.1790618737586765e-05, "loss": 0.8863, "step": 5925 }, { "epoch": 2.686310063463282, "grad_norm": 0.4129836390736259, "learning_rate": 4.177797255854739e-05, "loss": 0.9148, "step": 5926 }, { "epoch": 2.6867633726201268, "grad_norm": 0.47872457947805086, "learning_rate": 4.176532620143885e-05, "loss": 0.8787, "step": 5927 }, { "epoch": 2.687216681776972, "grad_norm": 0.5131729817093316, "learning_rate": 4.1752679667527706e-05, "loss": 0.8888, "step": 5928 }, { "epoch": 2.687669990933817, "grad_norm": 0.5269444960836895, "learning_rate": 4.174003295808053e-05, "loss": 0.867, "step": 5929 }, { "epoch": 2.6881233000906617, "grad_norm": 0.5673517450366239, "learning_rate": 4.172738607436395e-05, "loss": 0.8806, "step": 5930 }, { "epoch": 2.688576609247507, "grad_norm": 0.5553724811748905, "learning_rate": 4.171473901764457e-05, "loss": 0.8812, "step": 5931 }, { "epoch": 2.689029918404352, "grad_norm": 0.4685387232190202, "learning_rate": 4.170209178918904e-05, "loss": 0.8681, "step": 5932 }, { "epoch": 2.6894832275611966, "grad_norm": 0.3479199295727952, "learning_rate": 4.168944439026401e-05, "loss": 0.8716, "step": 5933 }, { "epoch": 2.689936536718042, "grad_norm": 0.34764051547335456, "learning_rate": 4.1676796822136165e-05, "loss": 0.8893, "step": 5934 }, { "epoch": 2.6903898458748867, "grad_norm": 0.36165963866523737, "learning_rate": 4.166414908607219e-05, "loss": 0.881, "step": 5935 }, { "epoch": 2.6908431550317315, "grad_norm": 0.3444503276447592, "learning_rate": 4.16515011833388e-05, "loss": 0.8806, "step": 5936 }, { "epoch": 2.691296464188577, "grad_norm": 0.42011275331818937, "learning_rate": 4.163885311520271e-05, "loss": 0.8847, "step": 5937 }, { "epoch": 2.6917497733454216, "grad_norm": 0.5179075973132685, "learning_rate": 4.162620488293065e-05, "loss": 0.8682, "step": 5938 }, { "epoch": 2.6922030825022665, "grad_norm": 0.6096728560682791, "learning_rate": 4.1613556487789405e-05, "loss": 0.8736, "step": 5939 }, { "epoch": 2.6926563916591117, "grad_norm": 0.638169523519956, "learning_rate": 4.160090793104573e-05, "loss": 0.8771, "step": 5940 }, { "epoch": 2.6931097008159566, "grad_norm": 0.5399307165518655, "learning_rate": 4.158825921396643e-05, "loss": 0.8681, "step": 5941 }, { "epoch": 2.6935630099728014, "grad_norm": 0.45818116211970467, "learning_rate": 4.15756103378183e-05, "loss": 0.8782, "step": 5942 }, { "epoch": 2.6940163191296467, "grad_norm": 0.3286360404585114, "learning_rate": 4.156296130386816e-05, "loss": 0.8819, "step": 5943 }, { "epoch": 2.6944696282864915, "grad_norm": 0.2623091319312336, "learning_rate": 4.155031211338284e-05, "loss": 0.8881, "step": 5944 }, { "epoch": 2.6949229374433363, "grad_norm": 0.3173689820462578, "learning_rate": 4.1537662767629216e-05, "loss": 0.8764, "step": 5945 }, { "epoch": 2.695376246600181, "grad_norm": 0.2691697585005706, "learning_rate": 4.152501326787415e-05, "loss": 0.8803, "step": 5946 }, { "epoch": 2.6958295557570264, "grad_norm": 0.26309593713291995, "learning_rate": 4.1512363615384525e-05, "loss": 0.8793, "step": 5947 }, { "epoch": 2.6962828649138713, "grad_norm": 0.33416935768759093, "learning_rate": 4.149971381142724e-05, "loss": 0.8716, "step": 5948 }, { "epoch": 2.696736174070716, "grad_norm": 0.2962857214515543, "learning_rate": 4.14870638572692e-05, "loss": 0.8717, "step": 5949 }, { "epoch": 2.6971894832275614, "grad_norm": 0.24444598035864407, "learning_rate": 4.1474413754177346e-05, "loss": 0.8854, "step": 5950 }, { "epoch": 2.697642792384406, "grad_norm": 0.2726894495346894, "learning_rate": 4.146176350341862e-05, "loss": 0.8823, "step": 5951 }, { "epoch": 2.698096101541251, "grad_norm": 0.32471685385591775, "learning_rate": 4.144911310625998e-05, "loss": 0.8692, "step": 5952 }, { "epoch": 2.698549410698096, "grad_norm": 0.476841940910996, "learning_rate": 4.143646256396841e-05, "loss": 0.894, "step": 5953 }, { "epoch": 2.699002719854941, "grad_norm": 0.6152700218366967, "learning_rate": 4.142381187781091e-05, "loss": 0.8818, "step": 5954 }, { "epoch": 2.699456029011786, "grad_norm": 0.6533981828701466, "learning_rate": 4.1411161049054444e-05, "loss": 0.8861, "step": 5955 }, { "epoch": 2.6999093381686308, "grad_norm": 0.6829375722085027, "learning_rate": 4.139851007896606e-05, "loss": 0.8772, "step": 5956 }, { "epoch": 2.700362647325476, "grad_norm": 0.6868927596274963, "learning_rate": 4.138585896881278e-05, "loss": 0.8839, "step": 5957 }, { "epoch": 2.700815956482321, "grad_norm": 0.6509294104531982, "learning_rate": 4.137320771986165e-05, "loss": 0.8915, "step": 5958 }, { "epoch": 2.7012692656391657, "grad_norm": 0.4934299027781941, "learning_rate": 4.1360556333379725e-05, "loss": 0.8759, "step": 5959 }, { "epoch": 2.701722574796011, "grad_norm": 0.32363131453532934, "learning_rate": 4.134790481063409e-05, "loss": 0.8818, "step": 5960 }, { "epoch": 2.702175883952856, "grad_norm": 0.2594679099221728, "learning_rate": 4.133525315289182e-05, "loss": 0.8756, "step": 5961 }, { "epoch": 2.7026291931097006, "grad_norm": 0.28707011101267516, "learning_rate": 4.132260136142003e-05, "loss": 0.8809, "step": 5962 }, { "epoch": 2.703082502266546, "grad_norm": 0.35410866882957076, "learning_rate": 4.130994943748583e-05, "loss": 0.87, "step": 5963 }, { "epoch": 2.7035358114233907, "grad_norm": 0.4025410458385838, "learning_rate": 4.129729738235633e-05, "loss": 0.8543, "step": 5964 }, { "epoch": 2.7039891205802356, "grad_norm": 0.38736078702543836, "learning_rate": 4.128464519729869e-05, "loss": 0.8892, "step": 5965 }, { "epoch": 2.704442429737081, "grad_norm": 0.31652773194065925, "learning_rate": 4.127199288358007e-05, "loss": 0.8865, "step": 5966 }, { "epoch": 2.7048957388939256, "grad_norm": 0.26715926681557994, "learning_rate": 4.125934044246762e-05, "loss": 0.89, "step": 5967 }, { "epoch": 2.7053490480507705, "grad_norm": 0.2805074199643201, "learning_rate": 4.124668787522852e-05, "loss": 0.8991, "step": 5968 }, { "epoch": 2.7058023572076157, "grad_norm": 0.31435322620047124, "learning_rate": 4.123403518312997e-05, "loss": 0.8791, "step": 5969 }, { "epoch": 2.7062556663644606, "grad_norm": 0.3597502667369465, "learning_rate": 4.122138236743917e-05, "loss": 0.8838, "step": 5970 }, { "epoch": 2.7067089755213054, "grad_norm": 0.45379596259184224, "learning_rate": 4.1208729429423326e-05, "loss": 0.8971, "step": 5971 }, { "epoch": 2.7071622846781507, "grad_norm": 0.48961459625523035, "learning_rate": 4.1196076370349684e-05, "loss": 0.905, "step": 5972 }, { "epoch": 2.7076155938349955, "grad_norm": 0.4663307338804258, "learning_rate": 4.118342319148547e-05, "loss": 0.8869, "step": 5973 }, { "epoch": 2.7080689029918403, "grad_norm": 0.4891803796315549, "learning_rate": 4.117076989409795e-05, "loss": 0.8784, "step": 5974 }, { "epoch": 2.7085222121486856, "grad_norm": 0.3809550715580573, "learning_rate": 4.1158116479454386e-05, "loss": 0.8624, "step": 5975 }, { "epoch": 2.7089755213055304, "grad_norm": 0.25808429784087245, "learning_rate": 4.1145462948822066e-05, "loss": 0.8761, "step": 5976 }, { "epoch": 2.7094288304623753, "grad_norm": 0.3820993258854631, "learning_rate": 4.1132809303468254e-05, "loss": 0.8585, "step": 5977 }, { "epoch": 2.7098821396192205, "grad_norm": 0.3992701627599507, "learning_rate": 4.112015554466025e-05, "loss": 0.8873, "step": 5978 }, { "epoch": 2.7103354487760654, "grad_norm": 0.345360308262866, "learning_rate": 4.110750167366539e-05, "loss": 0.8852, "step": 5979 }, { "epoch": 2.71078875793291, "grad_norm": 0.38122106773967557, "learning_rate": 4.109484769175097e-05, "loss": 0.8947, "step": 5980 }, { "epoch": 2.7112420670897555, "grad_norm": 0.3548071392985407, "learning_rate": 4.108219360018435e-05, "loss": 0.8627, "step": 5981 }, { "epoch": 2.7116953762466003, "grad_norm": 0.33432729980403825, "learning_rate": 4.106953940023286e-05, "loss": 0.8915, "step": 5982 }, { "epoch": 2.712148685403445, "grad_norm": 0.39027894162399307, "learning_rate": 4.105688509316385e-05, "loss": 0.8854, "step": 5983 }, { "epoch": 2.7126019945602904, "grad_norm": 0.2745598593168288, "learning_rate": 4.104423068024469e-05, "loss": 0.8748, "step": 5984 }, { "epoch": 2.713055303717135, "grad_norm": 0.31098669289486836, "learning_rate": 4.103157616274277e-05, "loss": 0.8879, "step": 5985 }, { "epoch": 2.71350861287398, "grad_norm": 0.4192648460998749, "learning_rate": 4.101892154192546e-05, "loss": 0.8861, "step": 5986 }, { "epoch": 2.713961922030825, "grad_norm": 0.4747606692110344, "learning_rate": 4.1006266819060146e-05, "loss": 0.8672, "step": 5987 }, { "epoch": 2.71441523118767, "grad_norm": 0.43552871832219203, "learning_rate": 4.099361199541427e-05, "loss": 0.875, "step": 5988 }, { "epoch": 2.714868540344515, "grad_norm": 0.34312862352767354, "learning_rate": 4.0980957072255226e-05, "loss": 0.8827, "step": 5989 }, { "epoch": 2.71532184950136, "grad_norm": 0.27340115828533307, "learning_rate": 4.096830205085045e-05, "loss": 0.8735, "step": 5990 }, { "epoch": 2.7157751586582046, "grad_norm": 0.24372094309815806, "learning_rate": 4.0955646932467384e-05, "loss": 0.8752, "step": 5991 }, { "epoch": 2.71622846781505, "grad_norm": 0.3132058902858666, "learning_rate": 4.094299171837346e-05, "loss": 0.8831, "step": 5992 }, { "epoch": 2.7166817769718947, "grad_norm": 0.32190924848444563, "learning_rate": 4.093033640983614e-05, "loss": 0.8599, "step": 5993 }, { "epoch": 2.7171350861287396, "grad_norm": 0.21864989817190175, "learning_rate": 4.091768100812291e-05, "loss": 0.8799, "step": 5994 }, { "epoch": 2.717588395285585, "grad_norm": 0.24132713405824183, "learning_rate": 4.090502551450122e-05, "loss": 0.8933, "step": 5995 }, { "epoch": 2.7180417044424297, "grad_norm": 0.22634909298938133, "learning_rate": 4.089236993023857e-05, "loss": 0.8826, "step": 5996 }, { "epoch": 2.7184950135992745, "grad_norm": 0.2517882770631009, "learning_rate": 4.087971425660245e-05, "loss": 0.8742, "step": 5997 }, { "epoch": 2.7189483227561198, "grad_norm": 0.2892341763220258, "learning_rate": 4.086705849486036e-05, "loss": 0.8638, "step": 5998 }, { "epoch": 2.7194016319129646, "grad_norm": 0.2568272999185166, "learning_rate": 4.085440264627981e-05, "loss": 0.8852, "step": 5999 }, { "epoch": 2.7198549410698094, "grad_norm": 0.25704669699039984, "learning_rate": 4.0841746712128325e-05, "loss": 0.8758, "step": 6000 }, { "epoch": 2.7203082502266547, "grad_norm": 0.23431801145560985, "learning_rate": 4.082909069367342e-05, "loss": 0.8671, "step": 6001 }, { "epoch": 2.7207615593834995, "grad_norm": 0.2566072181415202, "learning_rate": 4.081643459218266e-05, "loss": 0.8838, "step": 6002 }, { "epoch": 2.7212148685403443, "grad_norm": 0.28922136454837877, "learning_rate": 4.080377840892357e-05, "loss": 0.8847, "step": 6003 }, { "epoch": 2.7216681776971896, "grad_norm": 0.25096970948278324, "learning_rate": 4.079112214516372e-05, "loss": 0.8735, "step": 6004 }, { "epoch": 2.7221214868540344, "grad_norm": 0.27433399423181626, "learning_rate": 4.077846580217066e-05, "loss": 0.8802, "step": 6005 }, { "epoch": 2.7225747960108793, "grad_norm": 0.2764745961337998, "learning_rate": 4.076580938121196e-05, "loss": 0.8786, "step": 6006 }, { "epoch": 2.7230281051677245, "grad_norm": 0.28063775117628026, "learning_rate": 4.07531528835552e-05, "loss": 0.8829, "step": 6007 }, { "epoch": 2.7234814143245694, "grad_norm": 0.4024887780888554, "learning_rate": 4.074049631046796e-05, "loss": 0.8608, "step": 6008 }, { "epoch": 2.723934723481414, "grad_norm": 0.3277654257683327, "learning_rate": 4.072783966321784e-05, "loss": 0.8555, "step": 6009 }, { "epoch": 2.7243880326382595, "grad_norm": 0.25631089012735225, "learning_rate": 4.071518294307245e-05, "loss": 0.8707, "step": 6010 }, { "epoch": 2.7248413417951043, "grad_norm": 0.305820441867732, "learning_rate": 4.070252615129937e-05, "loss": 0.8852, "step": 6011 }, { "epoch": 2.725294650951949, "grad_norm": 0.27306488039542304, "learning_rate": 4.068986928916624e-05, "loss": 0.8959, "step": 6012 }, { "epoch": 2.7257479601087944, "grad_norm": 0.3127151038755841, "learning_rate": 4.067721235794067e-05, "loss": 0.89, "step": 6013 }, { "epoch": 2.726201269265639, "grad_norm": 0.4458181374608773, "learning_rate": 4.06645553588903e-05, "loss": 0.8758, "step": 6014 }, { "epoch": 2.726654578422484, "grad_norm": 0.36521408448795223, "learning_rate": 4.065189829328275e-05, "loss": 0.8898, "step": 6015 }, { "epoch": 2.7271078875793293, "grad_norm": 0.2565140149743659, "learning_rate": 4.063924116238567e-05, "loss": 0.8988, "step": 6016 }, { "epoch": 2.727561196736174, "grad_norm": 0.2726653368215798, "learning_rate": 4.0626583967466716e-05, "loss": 0.8797, "step": 6017 }, { "epoch": 2.728014505893019, "grad_norm": 0.2508763196205694, "learning_rate": 4.0613926709793524e-05, "loss": 0.9033, "step": 6018 }, { "epoch": 2.7284678150498642, "grad_norm": 0.24413803509720206, "learning_rate": 4.060126939063377e-05, "loss": 0.9064, "step": 6019 }, { "epoch": 2.728921124206709, "grad_norm": 0.2708408388107043, "learning_rate": 4.058861201125512e-05, "loss": 0.8621, "step": 6020 }, { "epoch": 2.729374433363554, "grad_norm": 0.24831515134784013, "learning_rate": 4.057595457292525e-05, "loss": 0.8851, "step": 6021 }, { "epoch": 2.729827742520399, "grad_norm": 0.2341869493615707, "learning_rate": 4.056329707691184e-05, "loss": 0.8736, "step": 6022 }, { "epoch": 2.730281051677244, "grad_norm": 0.29332459711843206, "learning_rate": 4.055063952448257e-05, "loss": 0.8734, "step": 6023 }, { "epoch": 2.730734360834089, "grad_norm": 0.2802576514084309, "learning_rate": 4.053798191690514e-05, "loss": 0.8754, "step": 6024 }, { "epoch": 2.7311876699909337, "grad_norm": 0.2813585848215943, "learning_rate": 4.052532425544723e-05, "loss": 0.8742, "step": 6025 }, { "epoch": 2.731640979147779, "grad_norm": 0.3996362210257967, "learning_rate": 4.0512666541376564e-05, "loss": 0.8695, "step": 6026 }, { "epoch": 2.7320942883046238, "grad_norm": 0.2979513800643169, "learning_rate": 4.050000877596082e-05, "loss": 0.845, "step": 6027 }, { "epoch": 2.7325475974614686, "grad_norm": 0.2583773268288633, "learning_rate": 4.048735096046774e-05, "loss": 0.898, "step": 6028 }, { "epoch": 2.733000906618314, "grad_norm": 0.3281898511411466, "learning_rate": 4.047469309616502e-05, "loss": 0.8786, "step": 6029 }, { "epoch": 2.7334542157751587, "grad_norm": 0.2738948139296202, "learning_rate": 4.046203518432039e-05, "loss": 0.8697, "step": 6030 }, { "epoch": 2.7339075249320035, "grad_norm": 0.36819865471976304, "learning_rate": 4.044937722620159e-05, "loss": 0.8586, "step": 6031 }, { "epoch": 2.7343608340888483, "grad_norm": 0.3974628634450035, "learning_rate": 4.043671922307633e-05, "loss": 0.8604, "step": 6032 }, { "epoch": 2.7348141432456936, "grad_norm": 0.34691672445338917, "learning_rate": 4.0424061176212375e-05, "loss": 0.8686, "step": 6033 }, { "epoch": 2.7352674524025384, "grad_norm": 0.3390832423332367, "learning_rate": 4.041140308687743e-05, "loss": 0.8901, "step": 6034 }, { "epoch": 2.7357207615593833, "grad_norm": 0.39510017874340586, "learning_rate": 4.039874495633925e-05, "loss": 0.8821, "step": 6035 }, { "epoch": 2.7361740707162285, "grad_norm": 0.44464415841892563, "learning_rate": 4.0386086785865606e-05, "loss": 0.879, "step": 6036 }, { "epoch": 2.7366273798730734, "grad_norm": 0.47535428689016745, "learning_rate": 4.037342857672422e-05, "loss": 0.8847, "step": 6037 }, { "epoch": 2.737080689029918, "grad_norm": 0.5004700745636044, "learning_rate": 4.0360770330182875e-05, "loss": 0.8657, "step": 6038 }, { "epoch": 2.7375339981867635, "grad_norm": 0.484323973787452, "learning_rate": 4.034811204750933e-05, "loss": 0.8826, "step": 6039 }, { "epoch": 2.7379873073436083, "grad_norm": 0.37977512205228586, "learning_rate": 4.033545372997133e-05, "loss": 0.8881, "step": 6040 }, { "epoch": 2.738440616500453, "grad_norm": 0.30057796371148504, "learning_rate": 4.032279537883665e-05, "loss": 0.8701, "step": 6041 }, { "epoch": 2.7388939256572984, "grad_norm": 0.3009474780901464, "learning_rate": 4.031013699537307e-05, "loss": 0.8728, "step": 6042 }, { "epoch": 2.739347234814143, "grad_norm": 0.2933731649829465, "learning_rate": 4.029747858084837e-05, "loss": 0.8802, "step": 6043 }, { "epoch": 2.739800543970988, "grad_norm": 0.32274355039059455, "learning_rate": 4.02848201365303e-05, "loss": 0.8799, "step": 6044 }, { "epoch": 2.7402538531278333, "grad_norm": 0.424830162242949, "learning_rate": 4.027216166368665e-05, "loss": 0.8939, "step": 6045 }, { "epoch": 2.740707162284678, "grad_norm": 0.5180981879833223, "learning_rate": 4.025950316358522e-05, "loss": 0.885, "step": 6046 }, { "epoch": 2.741160471441523, "grad_norm": 0.5080829789765267, "learning_rate": 4.0246844637493784e-05, "loss": 0.8766, "step": 6047 }, { "epoch": 2.7416137805983682, "grad_norm": 0.46048977613510356, "learning_rate": 4.0234186086680124e-05, "loss": 0.8691, "step": 6048 }, { "epoch": 2.742067089755213, "grad_norm": 0.42617393048165036, "learning_rate": 4.022152751241205e-05, "loss": 0.8795, "step": 6049 }, { "epoch": 2.742520398912058, "grad_norm": 0.36086397079256266, "learning_rate": 4.020886891595733e-05, "loss": 0.8739, "step": 6050 }, { "epoch": 2.742973708068903, "grad_norm": 0.3199442651353173, "learning_rate": 4.019621029858379e-05, "loss": 0.8885, "step": 6051 }, { "epoch": 2.743427017225748, "grad_norm": 0.3862264237758552, "learning_rate": 4.018355166155922e-05, "loss": 0.8832, "step": 6052 }, { "epoch": 2.743880326382593, "grad_norm": 0.3178923741147127, "learning_rate": 4.0170893006151397e-05, "loss": 0.8665, "step": 6053 }, { "epoch": 2.744333635539438, "grad_norm": 0.3691506307230035, "learning_rate": 4.015823433362815e-05, "loss": 0.8869, "step": 6054 }, { "epoch": 2.744786944696283, "grad_norm": 0.4683025807723327, "learning_rate": 4.0145575645257254e-05, "loss": 0.8645, "step": 6055 }, { "epoch": 2.7452402538531278, "grad_norm": 0.4249543368301541, "learning_rate": 4.0132916942306536e-05, "loss": 0.8819, "step": 6056 }, { "epoch": 2.745693563009973, "grad_norm": 0.34910811868057107, "learning_rate": 4.0120258226043794e-05, "loss": 0.8846, "step": 6057 }, { "epoch": 2.746146872166818, "grad_norm": 0.29174255691405115, "learning_rate": 4.0107599497736834e-05, "loss": 0.8967, "step": 6058 }, { "epoch": 2.7466001813236627, "grad_norm": 0.33195021402509256, "learning_rate": 4.0094940758653466e-05, "loss": 0.8881, "step": 6059 }, { "epoch": 2.747053490480508, "grad_norm": 0.28131354211601683, "learning_rate": 4.008228201006151e-05, "loss": 0.8768, "step": 6060 }, { "epoch": 2.747506799637353, "grad_norm": 0.2814999027192777, "learning_rate": 4.006962325322876e-05, "loss": 0.8808, "step": 6061 }, { "epoch": 2.7479601087941976, "grad_norm": 0.26901645249685946, "learning_rate": 4.005696448942305e-05, "loss": 0.8859, "step": 6062 }, { "epoch": 2.748413417951043, "grad_norm": 0.28180261488438185, "learning_rate": 4.0044305719912166e-05, "loss": 0.8804, "step": 6063 }, { "epoch": 2.7488667271078877, "grad_norm": 0.29066273983994473, "learning_rate": 4.003164694596394e-05, "loss": 0.8687, "step": 6064 }, { "epoch": 2.7493200362647325, "grad_norm": 0.3350927118538583, "learning_rate": 4.001898816884618e-05, "loss": 0.9013, "step": 6065 }, { "epoch": 2.7497733454215774, "grad_norm": 0.32906964042047576, "learning_rate": 4.000632938982669e-05, "loss": 0.8744, "step": 6066 }, { "epoch": 2.7502266545784226, "grad_norm": 0.30377577419761154, "learning_rate": 3.999367061017332e-05, "loss": 0.8796, "step": 6067 }, { "epoch": 2.7506799637352675, "grad_norm": 0.26234732499845886, "learning_rate": 3.9981011831153836e-05, "loss": 0.878, "step": 6068 }, { "epoch": 2.7511332728921123, "grad_norm": 0.27312636558240816, "learning_rate": 3.996835305403608e-05, "loss": 0.8655, "step": 6069 }, { "epoch": 2.751586582048957, "grad_norm": 0.3158322359376824, "learning_rate": 3.995569428008785e-05, "loss": 0.8629, "step": 6070 }, { "epoch": 2.7520398912058024, "grad_norm": 0.2814189862250625, "learning_rate": 3.994303551057697e-05, "loss": 0.891, "step": 6071 }, { "epoch": 2.752493200362647, "grad_norm": 0.34044537512875755, "learning_rate": 3.993037674677125e-05, "loss": 0.8916, "step": 6072 }, { "epoch": 2.752946509519492, "grad_norm": 0.48858575647914193, "learning_rate": 3.9917717989938504e-05, "loss": 0.8946, "step": 6073 }, { "epoch": 2.7533998186763373, "grad_norm": 0.46308291304217525, "learning_rate": 3.990505924134655e-05, "loss": 0.884, "step": 6074 }, { "epoch": 2.753853127833182, "grad_norm": 0.4208294606961422, "learning_rate": 3.989240050226318e-05, "loss": 0.8872, "step": 6075 }, { "epoch": 2.754306436990027, "grad_norm": 0.31893873078045853, "learning_rate": 3.9879741773956226e-05, "loss": 0.8578, "step": 6076 }, { "epoch": 2.7547597461468722, "grad_norm": 0.2711476877933372, "learning_rate": 3.9867083057693484e-05, "loss": 0.8823, "step": 6077 }, { "epoch": 2.755213055303717, "grad_norm": 0.286007341675459, "learning_rate": 3.985442435474275e-05, "loss": 0.8895, "step": 6078 }, { "epoch": 2.755666364460562, "grad_norm": 0.27013961638190637, "learning_rate": 3.9841765666371864e-05, "loss": 0.8831, "step": 6079 }, { "epoch": 2.756119673617407, "grad_norm": 0.28091698928215575, "learning_rate": 3.982910699384862e-05, "loss": 0.8797, "step": 6080 }, { "epoch": 2.756572982774252, "grad_norm": 0.3184092291863173, "learning_rate": 3.9816448338440795e-05, "loss": 0.89, "step": 6081 }, { "epoch": 2.757026291931097, "grad_norm": 0.43354295808174187, "learning_rate": 3.980378970141621e-05, "loss": 0.8969, "step": 6082 }, { "epoch": 2.757479601087942, "grad_norm": 0.4582094356694416, "learning_rate": 3.979113108404266e-05, "loss": 0.885, "step": 6083 }, { "epoch": 2.757932910244787, "grad_norm": 0.4800149996733413, "learning_rate": 3.977847248758795e-05, "loss": 0.8679, "step": 6084 }, { "epoch": 2.7583862194016318, "grad_norm": 0.5185873981838997, "learning_rate": 3.9765813913319876e-05, "loss": 0.8794, "step": 6085 }, { "epoch": 2.758839528558477, "grad_norm": 0.5449660803330681, "learning_rate": 3.9753155362506236e-05, "loss": 0.8832, "step": 6086 }, { "epoch": 2.759292837715322, "grad_norm": 1.61244897869741, "learning_rate": 3.97404968364148e-05, "loss": 0.8814, "step": 6087 }, { "epoch": 2.7597461468721667, "grad_norm": 0.4149699311590512, "learning_rate": 3.972783833631337e-05, "loss": 0.8966, "step": 6088 }, { "epoch": 2.760199456029012, "grad_norm": 0.9749459304727475, "learning_rate": 3.971517986346972e-05, "loss": 0.8984, "step": 6089 }, { "epoch": 2.760652765185857, "grad_norm": 1.2112411355866706, "learning_rate": 3.970252141915166e-05, "loss": 0.8967, "step": 6090 }, { "epoch": 2.7611060743427016, "grad_norm": 0.6375532886505565, "learning_rate": 3.968986300462694e-05, "loss": 0.8974, "step": 6091 }, { "epoch": 2.761559383499547, "grad_norm": 0.48718199656381356, "learning_rate": 3.9677204621163356e-05, "loss": 0.8917, "step": 6092 }, { "epoch": 2.7620126926563917, "grad_norm": 0.7600020928208014, "learning_rate": 3.966454627002868e-05, "loss": 0.8744, "step": 6093 }, { "epoch": 2.7624660018132365, "grad_norm": 0.9781315396251884, "learning_rate": 3.965188795249068e-05, "loss": 0.8725, "step": 6094 }, { "epoch": 2.762919310970082, "grad_norm": 1.0689970495876113, "learning_rate": 3.963922966981713e-05, "loss": 0.8886, "step": 6095 }, { "epoch": 2.7633726201269266, "grad_norm": 0.7023912189087739, "learning_rate": 3.9626571423275786e-05, "loss": 0.8465, "step": 6096 }, { "epoch": 2.7638259292837715, "grad_norm": 0.3962577348791257, "learning_rate": 3.961391321413441e-05, "loss": 0.8748, "step": 6097 }, { "epoch": 2.7642792384406167, "grad_norm": 0.5829269724998474, "learning_rate": 3.9601255043660754e-05, "loss": 0.8666, "step": 6098 }, { "epoch": 2.7647325475974616, "grad_norm": 0.8311273728198666, "learning_rate": 3.958859691312259e-05, "loss": 0.8663, "step": 6099 }, { "epoch": 2.7651858567543064, "grad_norm": 0.8797845088640909, "learning_rate": 3.9575938823787645e-05, "loss": 0.8871, "step": 6100 }, { "epoch": 2.7656391659111517, "grad_norm": 0.789216204999004, "learning_rate": 3.956328077692367e-05, "loss": 0.8889, "step": 6101 }, { "epoch": 2.7660924750679965, "grad_norm": 0.6502077164978416, "learning_rate": 3.955062277379842e-05, "loss": 0.8974, "step": 6102 }, { "epoch": 2.7665457842248413, "grad_norm": 0.5984201887237827, "learning_rate": 3.9537964815679604e-05, "loss": 0.8791, "step": 6103 }, { "epoch": 2.766999093381686, "grad_norm": 0.5387715994898872, "learning_rate": 3.952530690383498e-05, "loss": 0.8736, "step": 6104 }, { "epoch": 2.7674524025385314, "grad_norm": 0.4861922022254606, "learning_rate": 3.951264903953228e-05, "loss": 0.873, "step": 6105 }, { "epoch": 2.7679057116953762, "grad_norm": 0.4148610397037762, "learning_rate": 3.94999912240392e-05, "loss": 0.8822, "step": 6106 }, { "epoch": 2.768359020852221, "grad_norm": 0.38139518568773456, "learning_rate": 3.948733345862346e-05, "loss": 0.8744, "step": 6107 }, { "epoch": 2.7688123300090663, "grad_norm": 0.4898444247025592, "learning_rate": 3.9474675744552784e-05, "loss": 0.8879, "step": 6108 }, { "epoch": 2.769265639165911, "grad_norm": 0.4827180763493824, "learning_rate": 3.946201808309487e-05, "loss": 0.8696, "step": 6109 }, { "epoch": 2.769718948322756, "grad_norm": 0.31831927196581833, "learning_rate": 3.944936047551744e-05, "loss": 0.879, "step": 6110 }, { "epoch": 2.770172257479601, "grad_norm": 0.3605813220292502, "learning_rate": 3.943670292308818e-05, "loss": 0.8735, "step": 6111 }, { "epoch": 2.770625566636446, "grad_norm": 0.45535451402338695, "learning_rate": 3.942404542707476e-05, "loss": 0.8862, "step": 6112 }, { "epoch": 2.771078875793291, "grad_norm": 0.48364775537798727, "learning_rate": 3.9411387988744884e-05, "loss": 0.8919, "step": 6113 }, { "epoch": 2.7715321849501358, "grad_norm": 0.3915572350916613, "learning_rate": 3.939873060936624e-05, "loss": 0.9184, "step": 6114 }, { "epoch": 2.771985494106981, "grad_norm": 0.2546997901684052, "learning_rate": 3.938607329020649e-05, "loss": 0.8799, "step": 6115 }, { "epoch": 2.772438803263826, "grad_norm": 0.35701423346277816, "learning_rate": 3.9373416032533304e-05, "loss": 0.8822, "step": 6116 }, { "epoch": 2.7728921124206707, "grad_norm": 0.459885609354512, "learning_rate": 3.936075883761434e-05, "loss": 0.8649, "step": 6117 }, { "epoch": 2.773345421577516, "grad_norm": 0.3895671268739787, "learning_rate": 3.9348101706717264e-05, "loss": 0.8765, "step": 6118 }, { "epoch": 2.773798730734361, "grad_norm": 0.29711750235080303, "learning_rate": 3.933544464110971e-05, "loss": 0.8805, "step": 6119 }, { "epoch": 2.7742520398912056, "grad_norm": 0.33102378608482685, "learning_rate": 3.932278764205933e-05, "loss": 0.8935, "step": 6120 }, { "epoch": 2.774705349048051, "grad_norm": 0.387540393145134, "learning_rate": 3.9310130710833766e-05, "loss": 0.8839, "step": 6121 }, { "epoch": 2.7751586582048957, "grad_norm": 0.31192713768429314, "learning_rate": 3.9297473848700634e-05, "loss": 0.8822, "step": 6122 }, { "epoch": 2.7756119673617405, "grad_norm": 0.2943472179179411, "learning_rate": 3.928481705692756e-05, "loss": 0.8682, "step": 6123 }, { "epoch": 2.776065276518586, "grad_norm": 0.30992521161847514, "learning_rate": 3.927216033678216e-05, "loss": 0.8698, "step": 6124 }, { "epoch": 2.7765185856754306, "grad_norm": 0.312090975400174, "learning_rate": 3.925950368953205e-05, "loss": 0.902, "step": 6125 }, { "epoch": 2.7769718948322755, "grad_norm": 0.36026862712502605, "learning_rate": 3.9246847116444825e-05, "loss": 0.8987, "step": 6126 }, { "epoch": 2.7774252039891207, "grad_norm": 0.28153717326456584, "learning_rate": 3.923419061878806e-05, "loss": 0.8864, "step": 6127 }, { "epoch": 2.7778785131459656, "grad_norm": 0.246974994925805, "learning_rate": 3.922153419782936e-05, "loss": 0.8801, "step": 6128 }, { "epoch": 2.7783318223028104, "grad_norm": 0.41156265462301106, "learning_rate": 3.920887785483629e-05, "loss": 0.8659, "step": 6129 }, { "epoch": 2.7787851314596557, "grad_norm": 0.2938772884923773, "learning_rate": 3.9196221591076436e-05, "loss": 0.868, "step": 6130 }, { "epoch": 2.7792384406165005, "grad_norm": 0.24135577232290767, "learning_rate": 3.918356540781735e-05, "loss": 0.8866, "step": 6131 }, { "epoch": 2.7796917497733453, "grad_norm": 0.3151626258550177, "learning_rate": 3.9170909306326585e-05, "loss": 0.8802, "step": 6132 }, { "epoch": 2.7801450589301906, "grad_norm": 0.2714048701529538, "learning_rate": 3.9158253287871695e-05, "loss": 0.8665, "step": 6133 }, { "epoch": 2.7805983680870354, "grad_norm": 0.28143568582548667, "learning_rate": 3.9145597353720205e-05, "loss": 0.8781, "step": 6134 }, { "epoch": 2.7810516772438802, "grad_norm": 0.26700452760329224, "learning_rate": 3.9132941505139657e-05, "loss": 0.8828, "step": 6135 }, { "epoch": 2.7815049864007255, "grad_norm": 0.29366619317950715, "learning_rate": 3.9120285743397556e-05, "loss": 0.8823, "step": 6136 }, { "epoch": 2.7819582955575703, "grad_norm": 0.3681690020464256, "learning_rate": 3.9107630069761444e-05, "loss": 0.886, "step": 6137 }, { "epoch": 2.782411604714415, "grad_norm": 0.335748527886002, "learning_rate": 3.909497448549879e-05, "loss": 0.885, "step": 6138 }, { "epoch": 2.7828649138712604, "grad_norm": 0.27099686766519443, "learning_rate": 3.90823189918771e-05, "loss": 0.896, "step": 6139 }, { "epoch": 2.7833182230281053, "grad_norm": 0.2938789639924382, "learning_rate": 3.906966359016386e-05, "loss": 0.8829, "step": 6140 }, { "epoch": 2.78377153218495, "grad_norm": 0.3390834258039275, "learning_rate": 3.9057008281626547e-05, "loss": 0.883, "step": 6141 }, { "epoch": 2.7842248413417954, "grad_norm": 0.410045986622981, "learning_rate": 3.904435306753262e-05, "loss": 0.8946, "step": 6142 }, { "epoch": 2.78467815049864, "grad_norm": 0.4478655141436937, "learning_rate": 3.903169794914955e-05, "loss": 0.8764, "step": 6143 }, { "epoch": 2.785131459655485, "grad_norm": 0.40303359472285216, "learning_rate": 3.9019042927744794e-05, "loss": 0.8894, "step": 6144 }, { "epoch": 2.78558476881233, "grad_norm": 0.2771535642765404, "learning_rate": 3.900638800458575e-05, "loss": 0.8769, "step": 6145 }, { "epoch": 2.786038077969175, "grad_norm": 0.2806444203616299, "learning_rate": 3.899373318093987e-05, "loss": 0.8942, "step": 6146 }, { "epoch": 2.78649138712602, "grad_norm": 0.3123559904952473, "learning_rate": 3.898107845807457e-05, "loss": 0.8611, "step": 6147 }, { "epoch": 2.786944696282865, "grad_norm": 0.3296136432659561, "learning_rate": 3.896842383725725e-05, "loss": 0.8687, "step": 6148 }, { "epoch": 2.7873980054397096, "grad_norm": 0.23674826384797926, "learning_rate": 3.895576931975532e-05, "loss": 0.8612, "step": 6149 }, { "epoch": 2.787851314596555, "grad_norm": 0.27717242273124054, "learning_rate": 3.8943114906836165e-05, "loss": 0.894, "step": 6150 }, { "epoch": 2.7883046237533997, "grad_norm": 0.3448819430387344, "learning_rate": 3.893046059976715e-05, "loss": 0.8835, "step": 6151 }, { "epoch": 2.7887579329102445, "grad_norm": 0.32659024885264465, "learning_rate": 3.8917806399815663e-05, "loss": 0.8669, "step": 6152 }, { "epoch": 2.78921124206709, "grad_norm": 0.2710175205427032, "learning_rate": 3.8905152308249035e-05, "loss": 0.8561, "step": 6153 }, { "epoch": 2.7896645512239346, "grad_norm": 0.25669349458594337, "learning_rate": 3.8892498326334624e-05, "loss": 0.8627, "step": 6154 }, { "epoch": 2.7901178603807795, "grad_norm": 0.3140769530615051, "learning_rate": 3.887984445533976e-05, "loss": 0.8847, "step": 6155 }, { "epoch": 2.7905711695376247, "grad_norm": 0.33710422621512065, "learning_rate": 3.886719069653176e-05, "loss": 0.8861, "step": 6156 }, { "epoch": 2.7910244786944696, "grad_norm": 0.3234057570260941, "learning_rate": 3.885453705117795e-05, "loss": 0.8714, "step": 6157 }, { "epoch": 2.7914777878513144, "grad_norm": 0.30133393000181624, "learning_rate": 3.8841883520545614e-05, "loss": 0.885, "step": 6158 }, { "epoch": 2.7919310970081597, "grad_norm": 0.25134542832365, "learning_rate": 3.882923010590205e-05, "loss": 0.8825, "step": 6159 }, { "epoch": 2.7923844061650045, "grad_norm": 0.23043113982136806, "learning_rate": 3.881657680851453e-05, "loss": 0.8864, "step": 6160 }, { "epoch": 2.7928377153218493, "grad_norm": 0.29487972959925135, "learning_rate": 3.880392362965032e-05, "loss": 0.8939, "step": 6161 }, { "epoch": 2.7932910244786946, "grad_norm": 0.332644057338735, "learning_rate": 3.879127057057668e-05, "loss": 0.8916, "step": 6162 }, { "epoch": 2.7937443336355394, "grad_norm": 0.33783798896329925, "learning_rate": 3.877861763256085e-05, "loss": 0.8643, "step": 6163 }, { "epoch": 2.7941976427923843, "grad_norm": 0.2773299515842344, "learning_rate": 3.876596481687005e-05, "loss": 0.8568, "step": 6164 }, { "epoch": 2.7946509519492295, "grad_norm": 0.2805342412672011, "learning_rate": 3.875331212477149e-05, "loss": 0.8781, "step": 6165 }, { "epoch": 2.7951042611060744, "grad_norm": 0.30141720781108783, "learning_rate": 3.874065955753239e-05, "loss": 0.8887, "step": 6166 }, { "epoch": 2.795557570262919, "grad_norm": 0.3343178801376389, "learning_rate": 3.872800711641994e-05, "loss": 0.8694, "step": 6167 }, { "epoch": 2.7960108794197644, "grad_norm": 0.35514662073302267, "learning_rate": 3.8715354802701316e-05, "loss": 0.8857, "step": 6168 }, { "epoch": 2.7964641885766093, "grad_norm": 0.3379789257917202, "learning_rate": 3.870270261764368e-05, "loss": 0.8708, "step": 6169 }, { "epoch": 2.796917497733454, "grad_norm": 0.2923678471476138, "learning_rate": 3.869005056251419e-05, "loss": 0.8907, "step": 6170 }, { "epoch": 2.7973708068902994, "grad_norm": 0.2939770955730156, "learning_rate": 3.867739863857998e-05, "loss": 0.8885, "step": 6171 }, { "epoch": 2.797824116047144, "grad_norm": 0.3090077960359177, "learning_rate": 3.8664746847108186e-05, "loss": 0.8849, "step": 6172 }, { "epoch": 2.798277425203989, "grad_norm": 0.2596366671302071, "learning_rate": 3.8652095189365927e-05, "loss": 0.8797, "step": 6173 }, { "epoch": 2.7987307343608343, "grad_norm": 0.25571024845196366, "learning_rate": 3.863944366662029e-05, "loss": 0.8797, "step": 6174 }, { "epoch": 2.799184043517679, "grad_norm": 0.2692914071282436, "learning_rate": 3.8626792280138365e-05, "loss": 0.8873, "step": 6175 }, { "epoch": 2.799637352674524, "grad_norm": 0.30313137173987253, "learning_rate": 3.861414103118723e-05, "loss": 0.897, "step": 6176 }, { "epoch": 2.8000906618313692, "grad_norm": 0.2881362476597029, "learning_rate": 3.860148992103395e-05, "loss": 0.8943, "step": 6177 }, { "epoch": 2.800543970988214, "grad_norm": 0.28451432846342567, "learning_rate": 3.858883895094557e-05, "loss": 0.8717, "step": 6178 }, { "epoch": 2.800997280145059, "grad_norm": 0.262528505015299, "learning_rate": 3.857618812218911e-05, "loss": 0.8822, "step": 6179 }, { "epoch": 2.801450589301904, "grad_norm": 0.2485769702666244, "learning_rate": 3.856353743603159e-05, "loss": 0.8685, "step": 6180 }, { "epoch": 2.801903898458749, "grad_norm": 0.27653706007492673, "learning_rate": 3.8550886893740017e-05, "loss": 0.8883, "step": 6181 }, { "epoch": 2.802357207615594, "grad_norm": 0.30835300270246063, "learning_rate": 3.853823649658139e-05, "loss": 0.8852, "step": 6182 }, { "epoch": 2.8028105167724386, "grad_norm": 0.335182763315055, "learning_rate": 3.8525586245822674e-05, "loss": 0.8785, "step": 6183 }, { "epoch": 2.803263825929284, "grad_norm": 0.3163777460339215, "learning_rate": 3.851293614273082e-05, "loss": 0.8838, "step": 6184 }, { "epoch": 2.8037171350861287, "grad_norm": 0.29676650264929405, "learning_rate": 3.8500286188572787e-05, "loss": 0.8716, "step": 6185 }, { "epoch": 2.8041704442429736, "grad_norm": 0.30821578475297845, "learning_rate": 3.8487636384615495e-05, "loss": 0.8612, "step": 6186 }, { "epoch": 2.804623753399819, "grad_norm": 0.2796440368362066, "learning_rate": 3.847498673212586e-05, "loss": 0.8849, "step": 6187 }, { "epoch": 2.8050770625566637, "grad_norm": 0.23638863451854217, "learning_rate": 3.846233723237079e-05, "loss": 0.8847, "step": 6188 }, { "epoch": 2.8055303717135085, "grad_norm": 0.23022938303700224, "learning_rate": 3.844968788661717e-05, "loss": 0.8745, "step": 6189 }, { "epoch": 2.8059836808703533, "grad_norm": 0.4368934830553695, "learning_rate": 3.843703869613186e-05, "loss": 0.8921, "step": 6190 }, { "epoch": 2.8064369900271986, "grad_norm": 0.23328925312044513, "learning_rate": 3.8424389662181716e-05, "loss": 0.8889, "step": 6191 }, { "epoch": 2.8068902991840434, "grad_norm": 0.29137212697585135, "learning_rate": 3.841174078603358e-05, "loss": 0.8841, "step": 6192 }, { "epoch": 2.8073436083408883, "grad_norm": 0.2893179856620248, "learning_rate": 3.839909206895428e-05, "loss": 0.8744, "step": 6193 }, { "epoch": 2.8077969174977335, "grad_norm": 0.3614889637573261, "learning_rate": 3.838644351221061e-05, "loss": 0.8823, "step": 6194 }, { "epoch": 2.8082502266545784, "grad_norm": 0.4064216447541747, "learning_rate": 3.8373795117069353e-05, "loss": 0.8815, "step": 6195 }, { "epoch": 2.808703535811423, "grad_norm": 0.4177631749934668, "learning_rate": 3.836114688479731e-05, "loss": 0.8812, "step": 6196 }, { "epoch": 2.8091568449682685, "grad_norm": 0.38040884353135246, "learning_rate": 3.834849881666121e-05, "loss": 0.8871, "step": 6197 }, { "epoch": 2.8096101541251133, "grad_norm": 0.2930489773960027, "learning_rate": 3.833585091392781e-05, "loss": 0.8609, "step": 6198 }, { "epoch": 2.810063463281958, "grad_norm": 0.1999170860645616, "learning_rate": 3.8323203177863835e-05, "loss": 0.9078, "step": 6199 }, { "epoch": 2.8105167724388034, "grad_norm": 0.2683079445666463, "learning_rate": 3.831055560973599e-05, "loss": 0.857, "step": 6200 }, { "epoch": 2.810970081595648, "grad_norm": 0.428376504917596, "learning_rate": 3.829790821081098e-05, "loss": 0.8656, "step": 6201 }, { "epoch": 2.811423390752493, "grad_norm": 0.5010854410041042, "learning_rate": 3.828526098235545e-05, "loss": 0.8831, "step": 6202 }, { "epoch": 2.8118766999093383, "grad_norm": 0.5393705638104727, "learning_rate": 3.827261392563607e-05, "loss": 0.9013, "step": 6203 }, { "epoch": 2.812330009066183, "grad_norm": 0.5392988909854911, "learning_rate": 3.8259967041919484e-05, "loss": 0.9039, "step": 6204 }, { "epoch": 2.812783318223028, "grad_norm": 0.5095681789973199, "learning_rate": 3.8247320332472314e-05, "loss": 0.8655, "step": 6205 }, { "epoch": 2.8132366273798732, "grad_norm": 0.3780104885461652, "learning_rate": 3.823467379856116e-05, "loss": 0.8754, "step": 6206 }, { "epoch": 2.813689936536718, "grad_norm": 0.2917276115405788, "learning_rate": 3.8222027441452615e-05, "loss": 0.8898, "step": 6207 }, { "epoch": 2.814143245693563, "grad_norm": 0.3022022508112911, "learning_rate": 3.8209381262413255e-05, "loss": 0.879, "step": 6208 }, { "epoch": 2.814596554850408, "grad_norm": 0.34773465479375837, "learning_rate": 3.819673526270962e-05, "loss": 0.8652, "step": 6209 }, { "epoch": 2.815049864007253, "grad_norm": 0.45927220068067093, "learning_rate": 3.818408944360824e-05, "loss": 0.8877, "step": 6210 }, { "epoch": 2.815503173164098, "grad_norm": 0.4759305535510418, "learning_rate": 3.8171443806375646e-05, "loss": 0.8871, "step": 6211 }, { "epoch": 2.815956482320943, "grad_norm": 0.44779298687251967, "learning_rate": 3.8158798352278325e-05, "loss": 0.8764, "step": 6212 }, { "epoch": 2.816409791477788, "grad_norm": 0.2702659621621992, "learning_rate": 3.814615308258277e-05, "loss": 0.8767, "step": 6213 }, { "epoch": 2.8168631006346327, "grad_norm": 0.2852363483009499, "learning_rate": 3.813350799855542e-05, "loss": 0.8909, "step": 6214 }, { "epoch": 2.817316409791478, "grad_norm": 0.35091935373662, "learning_rate": 3.812086310146273e-05, "loss": 0.8853, "step": 6215 }, { "epoch": 2.817769718948323, "grad_norm": 0.40681800255926726, "learning_rate": 3.8108218392571126e-05, "loss": 0.9045, "step": 6216 }, { "epoch": 2.8182230281051677, "grad_norm": 0.3911211144816597, "learning_rate": 3.8095573873147015e-05, "loss": 0.8691, "step": 6217 }, { "epoch": 2.818676337262013, "grad_norm": 0.3285192226252565, "learning_rate": 3.808292954445677e-05, "loss": 0.8549, "step": 6218 }, { "epoch": 2.8191296464188578, "grad_norm": 0.27314813560742146, "learning_rate": 3.8070285407766776e-05, "loss": 0.8725, "step": 6219 }, { "epoch": 2.8195829555757026, "grad_norm": 0.31124059788624614, "learning_rate": 3.8057641464343365e-05, "loss": 0.8897, "step": 6220 }, { "epoch": 2.820036264732548, "grad_norm": 0.36702274758081815, "learning_rate": 3.8044997715452875e-05, "loss": 0.896, "step": 6221 }, { "epoch": 2.8204895738893927, "grad_norm": 0.4996803018547748, "learning_rate": 3.80323541623616e-05, "loss": 0.8729, "step": 6222 }, { "epoch": 2.8209428830462375, "grad_norm": 0.43250589029538605, "learning_rate": 3.801971080633584e-05, "loss": 0.8765, "step": 6223 }, { "epoch": 2.8213961922030824, "grad_norm": 0.28847073333203394, "learning_rate": 3.800706764864187e-05, "loss": 0.8837, "step": 6224 }, { "epoch": 2.8218495013599276, "grad_norm": 0.24532349484706276, "learning_rate": 3.799442469054593e-05, "loss": 0.8848, "step": 6225 }, { "epoch": 2.8223028105167725, "grad_norm": 0.24532081050856266, "learning_rate": 3.798178193331424e-05, "loss": 0.8586, "step": 6226 }, { "epoch": 2.8227561196736173, "grad_norm": 0.23437649157619464, "learning_rate": 3.796913937821303e-05, "loss": 0.8685, "step": 6227 }, { "epoch": 2.823209428830462, "grad_norm": 0.291817385359558, "learning_rate": 3.795649702650848e-05, "loss": 0.8744, "step": 6228 }, { "epoch": 2.8236627379873074, "grad_norm": 0.3280127296310442, "learning_rate": 3.794385487946675e-05, "loss": 0.8924, "step": 6229 }, { "epoch": 2.824116047144152, "grad_norm": 0.2908610842445327, "learning_rate": 3.7931212938354e-05, "loss": 0.9065, "step": 6230 }, { "epoch": 2.824569356300997, "grad_norm": 0.3007160205929872, "learning_rate": 3.7918571204436356e-05, "loss": 0.8898, "step": 6231 }, { "epoch": 2.8250226654578423, "grad_norm": 0.2737593372447412, "learning_rate": 3.790592967897992e-05, "loss": 0.8761, "step": 6232 }, { "epoch": 2.825475974614687, "grad_norm": 0.24773687036020287, "learning_rate": 3.789328836325079e-05, "loss": 0.889, "step": 6233 }, { "epoch": 2.825929283771532, "grad_norm": 0.20511684350673856, "learning_rate": 3.788064725851502e-05, "loss": 0.8574, "step": 6234 }, { "epoch": 2.8263825929283772, "grad_norm": 0.23218064487773796, "learning_rate": 3.7868006366038655e-05, "loss": 0.8814, "step": 6235 }, { "epoch": 2.826835902085222, "grad_norm": 0.2599007257179044, "learning_rate": 3.785536568708772e-05, "loss": 0.9084, "step": 6236 }, { "epoch": 2.827289211242067, "grad_norm": 0.24672088643078985, "learning_rate": 3.784272522292821e-05, "loss": 0.8897, "step": 6237 }, { "epoch": 2.827742520398912, "grad_norm": 0.19809760665439363, "learning_rate": 3.783008497482611e-05, "loss": 0.8752, "step": 6238 }, { "epoch": 2.828195829555757, "grad_norm": 0.27670828318173174, "learning_rate": 3.781744494404737e-05, "loss": 0.8824, "step": 6239 }, { "epoch": 2.828649138712602, "grad_norm": 0.3131078938642687, "learning_rate": 3.780480513185796e-05, "loss": 0.8544, "step": 6240 }, { "epoch": 2.829102447869447, "grad_norm": 0.36709082513300295, "learning_rate": 3.7792165539523746e-05, "loss": 0.8867, "step": 6241 }, { "epoch": 2.829555757026292, "grad_norm": 0.34880320190774944, "learning_rate": 3.777952616831064e-05, "loss": 0.8775, "step": 6242 }, { "epoch": 2.8300090661831367, "grad_norm": 0.2603695762823064, "learning_rate": 3.776688701948452e-05, "loss": 0.8823, "step": 6243 }, { "epoch": 2.830462375339982, "grad_norm": 0.2060386247460911, "learning_rate": 3.775424809431122e-05, "loss": 0.8776, "step": 6244 }, { "epoch": 2.830915684496827, "grad_norm": 0.2685247516109654, "learning_rate": 3.7741609394056575e-05, "loss": 0.8846, "step": 6245 }, { "epoch": 2.8313689936536717, "grad_norm": 0.3386479515299825, "learning_rate": 3.772897091998639e-05, "loss": 0.8772, "step": 6246 }, { "epoch": 2.831822302810517, "grad_norm": 0.3921982492768128, "learning_rate": 3.7716332673366444e-05, "loss": 0.8677, "step": 6247 }, { "epoch": 2.8322756119673618, "grad_norm": 0.4310789573093802, "learning_rate": 3.7703694655462494e-05, "loss": 0.8955, "step": 6248 }, { "epoch": 2.8327289211242066, "grad_norm": 0.38824678844625143, "learning_rate": 3.769105686754027e-05, "loss": 0.8852, "step": 6249 }, { "epoch": 2.833182230281052, "grad_norm": 0.26469265223788385, "learning_rate": 3.767841931086549e-05, "loss": 0.8754, "step": 6250 }, { "epoch": 2.8336355394378967, "grad_norm": 0.31789888068902666, "learning_rate": 3.766578198670383e-05, "loss": 0.8736, "step": 6251 }, { "epoch": 2.8340888485947415, "grad_norm": 0.4011095908883563, "learning_rate": 3.765314489632097e-05, "loss": 0.8681, "step": 6252 }, { "epoch": 2.834542157751587, "grad_norm": 0.3573363892956795, "learning_rate": 3.7640508040982546e-05, "loss": 0.8883, "step": 6253 }, { "epoch": 2.8349954669084316, "grad_norm": 0.34144527486145515, "learning_rate": 3.762787142195417e-05, "loss": 0.8761, "step": 6254 }, { "epoch": 2.8354487760652765, "grad_norm": 0.29531961191026723, "learning_rate": 3.761523504050145e-05, "loss": 0.877, "step": 6255 }, { "epoch": 2.8359020852221217, "grad_norm": 0.26980909636436434, "learning_rate": 3.760259889788995e-05, "loss": 0.8821, "step": 6256 }, { "epoch": 2.8363553943789666, "grad_norm": 0.2996649452134225, "learning_rate": 3.7589962995385205e-05, "loss": 0.8665, "step": 6257 }, { "epoch": 2.8368087035358114, "grad_norm": 0.27870481897620536, "learning_rate": 3.7577327334252764e-05, "loss": 0.8614, "step": 6258 }, { "epoch": 2.8372620126926567, "grad_norm": 0.3348327288805529, "learning_rate": 3.7564691915758116e-05, "loss": 0.892, "step": 6259 }, { "epoch": 2.8377153218495015, "grad_norm": 0.42172275808060417, "learning_rate": 3.755205674116672e-05, "loss": 0.8881, "step": 6260 }, { "epoch": 2.8381686310063463, "grad_norm": 0.36075592528277733, "learning_rate": 3.753942181174403e-05, "loss": 0.8745, "step": 6261 }, { "epoch": 2.838621940163191, "grad_norm": 0.2812676544712938, "learning_rate": 3.7526787128755485e-05, "loss": 0.8934, "step": 6262 }, { "epoch": 2.8390752493200364, "grad_norm": 0.5061876298899771, "learning_rate": 3.7514152693466466e-05, "loss": 0.8931, "step": 6263 }, { "epoch": 2.8395285584768812, "grad_norm": 0.3477799639927694, "learning_rate": 3.750151850714237e-05, "loss": 0.8805, "step": 6264 }, { "epoch": 2.839981867633726, "grad_norm": 0.35464610370986027, "learning_rate": 3.748888457104854e-05, "loss": 0.8876, "step": 6265 }, { "epoch": 2.840435176790571, "grad_norm": 0.2738637658997471, "learning_rate": 3.74762508864503e-05, "loss": 0.8968, "step": 6266 }, { "epoch": 2.840888485947416, "grad_norm": 0.29829559191317934, "learning_rate": 3.746361745461295e-05, "loss": 0.8811, "step": 6267 }, { "epoch": 2.841341795104261, "grad_norm": 0.34403429940806535, "learning_rate": 3.745098427680176e-05, "loss": 0.8829, "step": 6268 }, { "epoch": 2.841795104261106, "grad_norm": 0.3941126470873288, "learning_rate": 3.743835135428198e-05, "loss": 0.8674, "step": 6269 }, { "epoch": 2.842248413417951, "grad_norm": 0.3668782995004719, "learning_rate": 3.742571868831886e-05, "loss": 0.8857, "step": 6270 }, { "epoch": 2.842701722574796, "grad_norm": 0.3214644948270158, "learning_rate": 3.7413086280177565e-05, "loss": 0.895, "step": 6271 }, { "epoch": 2.8431550317316407, "grad_norm": 0.29563649227104344, "learning_rate": 3.7400454131123285e-05, "loss": 0.884, "step": 6272 }, { "epoch": 2.843608340888486, "grad_norm": 0.2878551864573817, "learning_rate": 3.738782224242116e-05, "loss": 0.8807, "step": 6273 }, { "epoch": 2.844061650045331, "grad_norm": 0.2696224241260502, "learning_rate": 3.737519061533632e-05, "loss": 0.8685, "step": 6274 }, { "epoch": 2.8445149592021757, "grad_norm": 0.33592952048804314, "learning_rate": 3.736255925113386e-05, "loss": 0.8943, "step": 6275 }, { "epoch": 2.844968268359021, "grad_norm": 0.40597289648771173, "learning_rate": 3.734992815107884e-05, "loss": 0.8914, "step": 6276 }, { "epoch": 2.8454215775158658, "grad_norm": 0.37716085120558634, "learning_rate": 3.73372973164363e-05, "loss": 0.8591, "step": 6277 }, { "epoch": 2.8458748866727106, "grad_norm": 0.2897183110481195, "learning_rate": 3.7324666748471264e-05, "loss": 0.8859, "step": 6278 }, { "epoch": 2.846328195829556, "grad_norm": 0.2576485231785989, "learning_rate": 3.731203644844871e-05, "loss": 0.8927, "step": 6279 }, { "epoch": 2.8467815049864007, "grad_norm": 0.2452021030567583, "learning_rate": 3.729940641763361e-05, "loss": 0.8758, "step": 6280 }, { "epoch": 2.8472348141432455, "grad_norm": 0.28607442556225193, "learning_rate": 3.728677665729089e-05, "loss": 0.8896, "step": 6281 }, { "epoch": 2.847688123300091, "grad_norm": 0.3214490384250522, "learning_rate": 3.7274147168685464e-05, "loss": 0.8927, "step": 6282 }, { "epoch": 2.8481414324569356, "grad_norm": 0.37352354712729513, "learning_rate": 3.7261517953082206e-05, "loss": 0.8759, "step": 6283 }, { "epoch": 2.8485947416137805, "grad_norm": 0.27539596545444023, "learning_rate": 3.724888901174598e-05, "loss": 0.8652, "step": 6284 }, { "epoch": 2.8490480507706257, "grad_norm": 0.2652966719311011, "learning_rate": 3.7236260345941597e-05, "loss": 0.8806, "step": 6285 }, { "epoch": 2.8495013599274706, "grad_norm": 0.3163811564094791, "learning_rate": 3.7223631956933865e-05, "loss": 0.8878, "step": 6286 }, { "epoch": 2.8499546690843154, "grad_norm": 0.37719536039829077, "learning_rate": 3.7211003845987554e-05, "loss": 0.8634, "step": 6287 }, { "epoch": 2.8504079782411607, "grad_norm": 0.46281040638736143, "learning_rate": 3.71983760143674e-05, "loss": 0.8866, "step": 6288 }, { "epoch": 2.8508612873980055, "grad_norm": 0.43791821127773856, "learning_rate": 3.7185748463338125e-05, "loss": 0.8761, "step": 6289 }, { "epoch": 2.8513145965548503, "grad_norm": 0.38288676416212447, "learning_rate": 3.717312119416441e-05, "loss": 0.9054, "step": 6290 }, { "epoch": 2.8517679057116956, "grad_norm": 0.262836324048374, "learning_rate": 3.716049420811093e-05, "loss": 0.8841, "step": 6291 }, { "epoch": 2.8522212148685404, "grad_norm": 0.22946279365217115, "learning_rate": 3.714786750644229e-05, "loss": 0.868, "step": 6292 }, { "epoch": 2.8526745240253852, "grad_norm": 0.2952245452247361, "learning_rate": 3.71352410904231e-05, "loss": 0.8795, "step": 6293 }, { "epoch": 2.8531278331822305, "grad_norm": 0.3115938555648703, "learning_rate": 3.7122614961317933e-05, "loss": 0.8658, "step": 6294 }, { "epoch": 2.8535811423390753, "grad_norm": 0.3404640950903566, "learning_rate": 3.710998912039133e-05, "loss": 0.8914, "step": 6295 }, { "epoch": 2.85403445149592, "grad_norm": 0.3267461483452965, "learning_rate": 3.7097363568907816e-05, "loss": 0.8929, "step": 6296 }, { "epoch": 2.8544877606527654, "grad_norm": 0.29753155805147846, "learning_rate": 3.708473830813189e-05, "loss": 0.8632, "step": 6297 }, { "epoch": 2.8549410698096103, "grad_norm": 0.20472107916473223, "learning_rate": 3.7072113339327974e-05, "loss": 0.8735, "step": 6298 }, { "epoch": 2.855394378966455, "grad_norm": 0.27413961151924865, "learning_rate": 3.70594886637605e-05, "loss": 0.8727, "step": 6299 }, { "epoch": 2.8558476881233004, "grad_norm": 0.35177233379527045, "learning_rate": 3.704686428269389e-05, "loss": 0.8583, "step": 6300 }, { "epoch": 2.856300997280145, "grad_norm": 0.41580167868503376, "learning_rate": 3.70342401973925e-05, "loss": 0.9026, "step": 6301 }, { "epoch": 2.85675430643699, "grad_norm": 0.439719704858271, "learning_rate": 3.702161640912067e-05, "loss": 0.8731, "step": 6302 }, { "epoch": 2.857207615593835, "grad_norm": 0.4799418083369069, "learning_rate": 3.700899291914271e-05, "loss": 0.8898, "step": 6303 }, { "epoch": 2.85766092475068, "grad_norm": 0.46536076797512116, "learning_rate": 3.6996369728722894e-05, "loss": 0.8783, "step": 6304 }, { "epoch": 2.858114233907525, "grad_norm": 0.3477333549154172, "learning_rate": 3.6983746839125484e-05, "loss": 0.8688, "step": 6305 }, { "epoch": 2.8585675430643698, "grad_norm": 0.23541193593965412, "learning_rate": 3.697112425161469e-05, "loss": 0.8931, "step": 6306 }, { "epoch": 2.8590208522212146, "grad_norm": 0.21850855811286388, "learning_rate": 3.695850196745471e-05, "loss": 0.869, "step": 6307 }, { "epoch": 2.85947416137806, "grad_norm": 0.2927248837224789, "learning_rate": 3.694587998790969e-05, "loss": 0.8809, "step": 6308 }, { "epoch": 2.8599274705349047, "grad_norm": 0.3466992101047666, "learning_rate": 3.693325831424377e-05, "loss": 0.8915, "step": 6309 }, { "epoch": 2.8603807796917495, "grad_norm": 0.3289405722423622, "learning_rate": 3.692063694772103e-05, "loss": 0.8834, "step": 6310 }, { "epoch": 2.860834088848595, "grad_norm": 0.3356347253659286, "learning_rate": 3.690801588960556e-05, "loss": 0.8776, "step": 6311 }, { "epoch": 2.8612873980054396, "grad_norm": 0.370906959629084, "learning_rate": 3.689539514116138e-05, "loss": 0.8795, "step": 6312 }, { "epoch": 2.8617407071622845, "grad_norm": 0.3480855504001318, "learning_rate": 3.688277470365251e-05, "loss": 0.8923, "step": 6313 }, { "epoch": 2.8621940163191297, "grad_norm": 0.2487543819913017, "learning_rate": 3.687015457834291e-05, "loss": 0.8667, "step": 6314 }, { "epoch": 2.8626473254759746, "grad_norm": 0.22926115375153683, "learning_rate": 3.685753476649652e-05, "loss": 0.8893, "step": 6315 }, { "epoch": 2.8631006346328194, "grad_norm": 0.26712157164907346, "learning_rate": 3.684491526937727e-05, "loss": 0.8811, "step": 6316 }, { "epoch": 2.8635539437896647, "grad_norm": 0.2495154246269076, "learning_rate": 3.683229608824904e-05, "loss": 0.8725, "step": 6317 }, { "epoch": 2.8640072529465095, "grad_norm": 0.24827682003181847, "learning_rate": 3.681967722437565e-05, "loss": 0.9063, "step": 6318 }, { "epoch": 2.8644605621033543, "grad_norm": 0.23707730356135825, "learning_rate": 3.680705867902094e-05, "loss": 0.8763, "step": 6319 }, { "epoch": 2.8649138712601996, "grad_norm": 0.22896748830063648, "learning_rate": 3.679444045344868e-05, "loss": 0.881, "step": 6320 }, { "epoch": 2.8653671804170444, "grad_norm": 0.28462730993425167, "learning_rate": 3.678182254892263e-05, "loss": 0.8881, "step": 6321 }, { "epoch": 2.8658204895738892, "grad_norm": 0.30289393149063654, "learning_rate": 3.676920496670652e-05, "loss": 0.8735, "step": 6322 }, { "epoch": 2.8662737987307345, "grad_norm": 0.29819103012196185, "learning_rate": 3.675658770806402e-05, "loss": 0.887, "step": 6323 }, { "epoch": 2.8667271078875793, "grad_norm": 0.28584942376146233, "learning_rate": 3.674397077425878e-05, "loss": 0.8998, "step": 6324 }, { "epoch": 2.867180417044424, "grad_norm": 0.28785731620865845, "learning_rate": 3.673135416655445e-05, "loss": 0.8991, "step": 6325 }, { "epoch": 2.8676337262012694, "grad_norm": 0.27771398714984835, "learning_rate": 3.6718737886214603e-05, "loss": 0.8835, "step": 6326 }, { "epoch": 2.8680870353581143, "grad_norm": 0.23394322669575454, "learning_rate": 3.67061219345028e-05, "loss": 0.8751, "step": 6327 }, { "epoch": 2.868540344514959, "grad_norm": 0.216354254893085, "learning_rate": 3.669350631268257e-05, "loss": 0.8789, "step": 6328 }, { "epoch": 2.8689936536718044, "grad_norm": 0.2196127411437509, "learning_rate": 3.6680891022017393e-05, "loss": 0.8801, "step": 6329 }, { "epoch": 2.869446962828649, "grad_norm": 0.25009155676805717, "learning_rate": 3.666827606377074e-05, "loss": 0.8968, "step": 6330 }, { "epoch": 2.869900271985494, "grad_norm": 0.22181081780704917, "learning_rate": 3.665566143920603e-05, "loss": 0.8631, "step": 6331 }, { "epoch": 2.8703535811423393, "grad_norm": 0.2401624691008328, "learning_rate": 3.664304714958666e-05, "loss": 0.8786, "step": 6332 }, { "epoch": 2.870806890299184, "grad_norm": 0.2530652788361371, "learning_rate": 3.6630433196175986e-05, "loss": 0.8612, "step": 6333 }, { "epoch": 2.871260199456029, "grad_norm": 0.288148201724063, "learning_rate": 3.661781958023732e-05, "loss": 0.9066, "step": 6334 }, { "epoch": 2.871713508612874, "grad_norm": 0.2953000109058463, "learning_rate": 3.660520630303397e-05, "loss": 0.8666, "step": 6335 }, { "epoch": 2.872166817769719, "grad_norm": 0.3480165814656687, "learning_rate": 3.659259336582919e-05, "loss": 0.8924, "step": 6336 }, { "epoch": 2.872620126926564, "grad_norm": 0.3496121385249228, "learning_rate": 3.65799807698862e-05, "loss": 0.8746, "step": 6337 }, { "epoch": 2.873073436083409, "grad_norm": 0.25649684481088075, "learning_rate": 3.656736851646818e-05, "loss": 0.8881, "step": 6338 }, { "epoch": 2.873526745240254, "grad_norm": 0.3076757980472284, "learning_rate": 3.655475660683829e-05, "loss": 0.866, "step": 6339 }, { "epoch": 2.873980054397099, "grad_norm": 0.28855880526667743, "learning_rate": 3.6542145042259646e-05, "loss": 0.8705, "step": 6340 }, { "epoch": 2.8744333635539436, "grad_norm": 0.3246766033441759, "learning_rate": 3.6529533823995345e-05, "loss": 0.8576, "step": 6341 }, { "epoch": 2.874886672710789, "grad_norm": 0.3841388912452113, "learning_rate": 3.651692295330843e-05, "loss": 0.8605, "step": 6342 }, { "epoch": 2.8753399818676337, "grad_norm": 0.3158763249914843, "learning_rate": 3.6504312431461915e-05, "loss": 0.8621, "step": 6343 }, { "epoch": 2.8757932910244786, "grad_norm": 0.27794266589594857, "learning_rate": 3.649170225971879e-05, "loss": 0.8772, "step": 6344 }, { "epoch": 2.8762466001813234, "grad_norm": 0.21065640038123754, "learning_rate": 3.647909243934199e-05, "loss": 0.9021, "step": 6345 }, { "epoch": 2.8766999093381687, "grad_norm": 0.3168525273267963, "learning_rate": 3.646648297159443e-05, "loss": 0.8695, "step": 6346 }, { "epoch": 2.8771532184950135, "grad_norm": 0.2774121546152305, "learning_rate": 3.645387385773899e-05, "loss": 0.8723, "step": 6347 }, { "epoch": 2.8776065276518583, "grad_norm": 0.33754360912613807, "learning_rate": 3.6441265099038505e-05, "loss": 0.9026, "step": 6348 }, { "epoch": 2.8780598368087036, "grad_norm": 0.35199113383002884, "learning_rate": 3.6428656696755776e-05, "loss": 0.8909, "step": 6349 }, { "epoch": 2.8785131459655484, "grad_norm": 0.36792467466337675, "learning_rate": 3.641604865215357e-05, "loss": 0.902, "step": 6350 }, { "epoch": 2.8789664551223932, "grad_norm": 0.41492917775958943, "learning_rate": 3.640344096649463e-05, "loss": 0.8908, "step": 6351 }, { "epoch": 2.8794197642792385, "grad_norm": 0.32883697188020866, "learning_rate": 3.639083364104165e-05, "loss": 0.9063, "step": 6352 }, { "epoch": 2.8798730734360833, "grad_norm": 0.27831450452523354, "learning_rate": 3.6378226677057275e-05, "loss": 0.8853, "step": 6353 }, { "epoch": 2.880326382592928, "grad_norm": 0.29400043060434755, "learning_rate": 3.6365620075804154e-05, "loss": 0.8934, "step": 6354 }, { "epoch": 2.8807796917497734, "grad_norm": 0.29764652047053286, "learning_rate": 3.6353013838544875e-05, "loss": 0.8786, "step": 6355 }, { "epoch": 2.8812330009066183, "grad_norm": 0.3247965590856455, "learning_rate": 3.6340407966541965e-05, "loss": 0.8671, "step": 6356 }, { "epoch": 2.881686310063463, "grad_norm": 0.3650130929798007, "learning_rate": 3.6327802461057957e-05, "loss": 0.8819, "step": 6357 }, { "epoch": 2.8821396192203084, "grad_norm": 0.3791008337664259, "learning_rate": 3.6315197323355315e-05, "loss": 0.8778, "step": 6358 }, { "epoch": 2.882592928377153, "grad_norm": 0.3432307584957783, "learning_rate": 3.630259255469649e-05, "loss": 0.8764, "step": 6359 }, { "epoch": 2.883046237533998, "grad_norm": 0.29557551701593, "learning_rate": 3.62899881563439e-05, "loss": 0.8814, "step": 6360 }, { "epoch": 2.8834995466908433, "grad_norm": 0.24959614340291658, "learning_rate": 3.6277384129559885e-05, "loss": 0.8888, "step": 6361 }, { "epoch": 2.883952855847688, "grad_norm": 0.23213880849470264, "learning_rate": 3.62647804756068e-05, "loss": 0.871, "step": 6362 }, { "epoch": 2.884406165004533, "grad_norm": 0.30701338939245953, "learning_rate": 3.625217719574694e-05, "loss": 0.8872, "step": 6363 }, { "epoch": 2.8848594741613782, "grad_norm": 0.32319995782543204, "learning_rate": 3.623957429124253e-05, "loss": 0.8788, "step": 6364 }, { "epoch": 2.885312783318223, "grad_norm": 0.38715919946982913, "learning_rate": 3.622697176335581e-05, "loss": 0.896, "step": 6365 }, { "epoch": 2.885766092475068, "grad_norm": 0.42892896615837206, "learning_rate": 3.621436961334895e-05, "loss": 0.9029, "step": 6366 }, { "epoch": 2.886219401631913, "grad_norm": 0.44017872638992084, "learning_rate": 3.620176784248411e-05, "loss": 0.8867, "step": 6367 }, { "epoch": 2.886672710788758, "grad_norm": 0.308218895376885, "learning_rate": 3.618916645202338e-05, "loss": 0.8743, "step": 6368 }, { "epoch": 2.887126019945603, "grad_norm": 0.29151149170787855, "learning_rate": 3.617656544322883e-05, "loss": 0.8815, "step": 6369 }, { "epoch": 2.887579329102448, "grad_norm": 0.30140716819053076, "learning_rate": 3.616396481736248e-05, "loss": 0.8896, "step": 6370 }, { "epoch": 2.888032638259293, "grad_norm": 0.2779971764988013, "learning_rate": 3.615136457568633e-05, "loss": 0.8818, "step": 6371 }, { "epoch": 2.8884859474161377, "grad_norm": 0.380363988564186, "learning_rate": 3.613876471946233e-05, "loss": 0.9106, "step": 6372 }, { "epoch": 2.888939256572983, "grad_norm": 0.3806783958355455, "learning_rate": 3.612616524995239e-05, "loss": 0.9036, "step": 6373 }, { "epoch": 2.889392565729828, "grad_norm": 0.33805340697311, "learning_rate": 3.611356616841841e-05, "loss": 0.884, "step": 6374 }, { "epoch": 2.8898458748866727, "grad_norm": 0.30890674844812716, "learning_rate": 3.610096747612218e-05, "loss": 0.8829, "step": 6375 }, { "epoch": 2.890299184043518, "grad_norm": 0.28887758729643676, "learning_rate": 3.608836917432552e-05, "loss": 0.869, "step": 6376 }, { "epoch": 2.8907524932003628, "grad_norm": 0.31048455154557336, "learning_rate": 3.6075771264290175e-05, "loss": 0.8784, "step": 6377 }, { "epoch": 2.8912058023572076, "grad_norm": 0.2993933942534631, "learning_rate": 3.606317374727789e-05, "loss": 0.8837, "step": 6378 }, { "epoch": 2.891659111514053, "grad_norm": 0.2753874464467583, "learning_rate": 3.60505766245503e-05, "loss": 0.9017, "step": 6379 }, { "epoch": 2.8921124206708977, "grad_norm": 0.24541447399100424, "learning_rate": 3.603797989736908e-05, "loss": 0.8865, "step": 6380 }, { "epoch": 2.8925657298277425, "grad_norm": 0.27285323655088006, "learning_rate": 3.6025383566995814e-05, "loss": 0.8757, "step": 6381 }, { "epoch": 2.8930190389845873, "grad_norm": 0.28166966992657017, "learning_rate": 3.6012787634692067e-05, "loss": 0.8705, "step": 6382 }, { "epoch": 2.8934723481414326, "grad_norm": 0.20567609403983206, "learning_rate": 3.600019210171935e-05, "loss": 0.8817, "step": 6383 }, { "epoch": 2.8939256572982774, "grad_norm": 0.2898402138421751, "learning_rate": 3.5987596969339155e-05, "loss": 0.857, "step": 6384 }, { "epoch": 2.8943789664551223, "grad_norm": 0.32718548200030195, "learning_rate": 3.5975002238812915e-05, "loss": 0.8759, "step": 6385 }, { "epoch": 2.894832275611967, "grad_norm": 0.2646266896284652, "learning_rate": 3.596240791140203e-05, "loss": 0.8996, "step": 6386 }, { "epoch": 2.8952855847688124, "grad_norm": 0.3151603110243066, "learning_rate": 3.594981398836786e-05, "loss": 0.8775, "step": 6387 }, { "epoch": 2.895738893925657, "grad_norm": 0.2566423396483705, "learning_rate": 3.593722047097172e-05, "loss": 0.8458, "step": 6388 }, { "epoch": 2.896192203082502, "grad_norm": 0.22397654715690227, "learning_rate": 3.59246273604749e-05, "loss": 0.8712, "step": 6389 }, { "epoch": 2.8966455122393473, "grad_norm": 0.3095059507777026, "learning_rate": 3.5912034658138614e-05, "loss": 0.8639, "step": 6390 }, { "epoch": 2.897098821396192, "grad_norm": 0.27376865650650495, "learning_rate": 3.5899442365224085e-05, "loss": 0.8953, "step": 6391 }, { "epoch": 2.897552130553037, "grad_norm": 0.30369811464748847, "learning_rate": 3.588685048299244e-05, "loss": 0.8903, "step": 6392 }, { "epoch": 2.8980054397098822, "grad_norm": 0.21928197081046782, "learning_rate": 3.587425901270482e-05, "loss": 0.8576, "step": 6393 }, { "epoch": 2.898458748866727, "grad_norm": 0.3793386461073858, "learning_rate": 3.5861667955622285e-05, "loss": 0.881, "step": 6394 }, { "epoch": 2.898912058023572, "grad_norm": 0.4035355649482683, "learning_rate": 3.584907731300586e-05, "loss": 0.8946, "step": 6395 }, { "epoch": 2.899365367180417, "grad_norm": 0.34515478344247774, "learning_rate": 3.5836487086116545e-05, "loss": 0.8767, "step": 6396 }, { "epoch": 2.899818676337262, "grad_norm": 0.33815331941033583, "learning_rate": 3.5823897276215275e-05, "loss": 0.8802, "step": 6397 }, { "epoch": 2.900271985494107, "grad_norm": 0.3311395030125725, "learning_rate": 3.581130788456297e-05, "loss": 0.8838, "step": 6398 }, { "epoch": 2.900725294650952, "grad_norm": 0.317354991862968, "learning_rate": 3.57987189124205e-05, "loss": 0.8746, "step": 6399 }, { "epoch": 2.901178603807797, "grad_norm": 0.2716284646236348, "learning_rate": 3.578613036104867e-05, "loss": 0.8681, "step": 6400 }, { "epoch": 2.9016319129646417, "grad_norm": 0.24803530680959643, "learning_rate": 3.577354223170827e-05, "loss": 0.8724, "step": 6401 }, { "epoch": 2.902085222121487, "grad_norm": 0.33963048419794684, "learning_rate": 3.5760954525660034e-05, "loss": 0.874, "step": 6402 }, { "epoch": 2.902538531278332, "grad_norm": 0.36402417196410397, "learning_rate": 3.574836724416466e-05, "loss": 0.8794, "step": 6403 }, { "epoch": 2.9029918404351767, "grad_norm": 0.3503129974976808, "learning_rate": 3.5735780388482814e-05, "loss": 0.8778, "step": 6404 }, { "epoch": 2.903445149592022, "grad_norm": 0.23457880740039175, "learning_rate": 3.572319395987508e-05, "loss": 0.8825, "step": 6405 }, { "epoch": 2.9038984587488668, "grad_norm": 0.30207153492196326, "learning_rate": 3.571060795960205e-05, "loss": 0.8678, "step": 6406 }, { "epoch": 2.9043517679057116, "grad_norm": 0.34292672151823855, "learning_rate": 3.5698022388924234e-05, "loss": 0.8795, "step": 6407 }, { "epoch": 2.904805077062557, "grad_norm": 0.33893742598845283, "learning_rate": 3.568543724910212e-05, "loss": 0.8804, "step": 6408 }, { "epoch": 2.9052583862194017, "grad_norm": 0.27523417127446637, "learning_rate": 3.567285254139614e-05, "loss": 0.88, "step": 6409 }, { "epoch": 2.9057116953762465, "grad_norm": 0.3549873360601669, "learning_rate": 3.5660268267066704e-05, "loss": 0.883, "step": 6410 }, { "epoch": 2.906165004533092, "grad_norm": 0.3961023841704148, "learning_rate": 3.564768442737415e-05, "loss": 0.8741, "step": 6411 }, { "epoch": 2.9066183136899366, "grad_norm": 0.27088835819864204, "learning_rate": 3.563510102357879e-05, "loss": 0.8975, "step": 6412 }, { "epoch": 2.9070716228467814, "grad_norm": 0.41831031464303836, "learning_rate": 3.562251805694092e-05, "loss": 0.8778, "step": 6413 }, { "epoch": 2.9075249320036267, "grad_norm": 0.4356867228770331, "learning_rate": 3.56099355287207e-05, "loss": 0.8728, "step": 6414 }, { "epoch": 2.9079782411604715, "grad_norm": 0.3214463454165222, "learning_rate": 3.559735344017834e-05, "loss": 0.8892, "step": 6415 }, { "epoch": 2.9084315503173164, "grad_norm": 0.32201918389461204, "learning_rate": 3.558477179257398e-05, "loss": 0.8872, "step": 6416 }, { "epoch": 2.9088848594741616, "grad_norm": 0.32681153159712895, "learning_rate": 3.55721905871677e-05, "loss": 0.877, "step": 6417 }, { "epoch": 2.9093381686310065, "grad_norm": 0.36667003254771235, "learning_rate": 3.555960982521955e-05, "loss": 0.8547, "step": 6418 }, { "epoch": 2.9097914777878513, "grad_norm": 0.2994164021643649, "learning_rate": 3.5547029507989514e-05, "loss": 0.8722, "step": 6419 }, { "epoch": 2.910244786944696, "grad_norm": 0.26927911246212644, "learning_rate": 3.5534449636737574e-05, "loss": 0.8899, "step": 6420 }, { "epoch": 2.9106980961015414, "grad_norm": 0.3300970441425962, "learning_rate": 3.552187021272362e-05, "loss": 0.9058, "step": 6421 }, { "epoch": 2.9111514052583862, "grad_norm": 0.3894729980846933, "learning_rate": 3.550929123720752e-05, "loss": 0.8681, "step": 6422 }, { "epoch": 2.911604714415231, "grad_norm": 0.34176334918939694, "learning_rate": 3.54967127114491e-05, "loss": 0.886, "step": 6423 }, { "epoch": 2.912058023572076, "grad_norm": 0.35737407972166374, "learning_rate": 3.548413463670814e-05, "loss": 0.8861, "step": 6424 }, { "epoch": 2.912511332728921, "grad_norm": 0.32542371867146713, "learning_rate": 3.5471557014244374e-05, "loss": 0.9, "step": 6425 }, { "epoch": 2.912964641885766, "grad_norm": 0.3186876140364769, "learning_rate": 3.545897984531748e-05, "loss": 0.8624, "step": 6426 }, { "epoch": 2.913417951042611, "grad_norm": 0.3300064008147913, "learning_rate": 3.5446403131187096e-05, "loss": 0.8815, "step": 6427 }, { "epoch": 2.913871260199456, "grad_norm": 0.2275166425921681, "learning_rate": 3.5433826873112825e-05, "loss": 0.8812, "step": 6428 }, { "epoch": 2.914324569356301, "grad_norm": 0.2715178095290377, "learning_rate": 3.542125107235421e-05, "loss": 0.8512, "step": 6429 }, { "epoch": 2.9147778785131457, "grad_norm": 0.25898933743105096, "learning_rate": 3.540867573017076e-05, "loss": 0.8738, "step": 6430 }, { "epoch": 2.915231187669991, "grad_norm": 0.258385520701323, "learning_rate": 3.5396100847821926e-05, "loss": 0.8837, "step": 6431 }, { "epoch": 2.915684496826836, "grad_norm": 0.2632362791364958, "learning_rate": 3.538352642656713e-05, "loss": 0.8728, "step": 6432 }, { "epoch": 2.9161378059836807, "grad_norm": 0.28445866835784195, "learning_rate": 3.537095246766573e-05, "loss": 0.8836, "step": 6433 }, { "epoch": 2.916591115140526, "grad_norm": 0.33024389982193597, "learning_rate": 3.535837897237703e-05, "loss": 0.8784, "step": 6434 }, { "epoch": 2.9170444242973708, "grad_norm": 0.3386311507459742, "learning_rate": 3.534580594196033e-05, "loss": 0.8943, "step": 6435 }, { "epoch": 2.9174977334542156, "grad_norm": 0.29164658915367797, "learning_rate": 3.533323337767484e-05, "loss": 0.8734, "step": 6436 }, { "epoch": 2.917951042611061, "grad_norm": 0.25295522028057926, "learning_rate": 3.532066128077975e-05, "loss": 0.8799, "step": 6437 }, { "epoch": 2.9184043517679057, "grad_norm": 0.18969304589536276, "learning_rate": 3.530808965253417e-05, "loss": 0.8907, "step": 6438 }, { "epoch": 2.9188576609247505, "grad_norm": 0.2780606186215746, "learning_rate": 3.529551849419721e-05, "loss": 0.8604, "step": 6439 }, { "epoch": 2.919310970081596, "grad_norm": 0.24413917023290496, "learning_rate": 3.528294780702789e-05, "loss": 0.8826, "step": 6440 }, { "epoch": 2.9197642792384406, "grad_norm": 0.23046913452223125, "learning_rate": 3.527037759228522e-05, "loss": 0.8732, "step": 6441 }, { "epoch": 2.9202175883952854, "grad_norm": 0.30132926086009726, "learning_rate": 3.5257807851228124e-05, "loss": 0.8927, "step": 6442 }, { "epoch": 2.9206708975521307, "grad_norm": 0.3309530568749677, "learning_rate": 3.5245238585115516e-05, "loss": 0.8799, "step": 6443 }, { "epoch": 2.9211242067089755, "grad_norm": 0.33417389195789926, "learning_rate": 3.5232669795206234e-05, "loss": 0.8874, "step": 6444 }, { "epoch": 2.9215775158658204, "grad_norm": 0.3186769422208477, "learning_rate": 3.522010148275909e-05, "loss": 0.86, "step": 6445 }, { "epoch": 2.9220308250226656, "grad_norm": 0.2766732833796192, "learning_rate": 3.520753364903284e-05, "loss": 0.873, "step": 6446 }, { "epoch": 2.9224841341795105, "grad_norm": 0.26589909832121783, "learning_rate": 3.519496629528616e-05, "loss": 0.8833, "step": 6447 }, { "epoch": 2.9229374433363553, "grad_norm": 0.22973576910848856, "learning_rate": 3.518239942277773e-05, "loss": 0.8855, "step": 6448 }, { "epoch": 2.9233907524932006, "grad_norm": 0.30160472775862907, "learning_rate": 3.516983303276616e-05, "loss": 0.8824, "step": 6449 }, { "epoch": 2.9238440616500454, "grad_norm": 0.3821567894698147, "learning_rate": 3.515726712651001e-05, "loss": 0.8803, "step": 6450 }, { "epoch": 2.9242973708068902, "grad_norm": 0.3625653477757634, "learning_rate": 3.5144701705267806e-05, "loss": 0.8772, "step": 6451 }, { "epoch": 2.9247506799637355, "grad_norm": 0.29061210049838543, "learning_rate": 3.5132136770297977e-05, "loss": 0.889, "step": 6452 }, { "epoch": 2.9252039891205803, "grad_norm": 0.19838058252142846, "learning_rate": 3.511957232285895e-05, "loss": 0.8684, "step": 6453 }, { "epoch": 2.925657298277425, "grad_norm": 0.2366551246398075, "learning_rate": 3.510700836420911e-05, "loss": 0.8883, "step": 6454 }, { "epoch": 2.9261106074342704, "grad_norm": 0.3355602945349227, "learning_rate": 3.509444489560675e-05, "loss": 0.8731, "step": 6455 }, { "epoch": 2.9265639165911153, "grad_norm": 0.3820506441063505, "learning_rate": 3.508188191831016e-05, "loss": 0.8762, "step": 6456 }, { "epoch": 2.92701722574796, "grad_norm": 0.343597135716649, "learning_rate": 3.506931943357755e-05, "loss": 0.8684, "step": 6457 }, { "epoch": 2.927470534904805, "grad_norm": 0.328180144108336, "learning_rate": 3.5056757442667084e-05, "loss": 0.8796, "step": 6458 }, { "epoch": 2.92792384406165, "grad_norm": 0.3418191136501641, "learning_rate": 3.5044195946836886e-05, "loss": 0.8808, "step": 6459 }, { "epoch": 2.928377153218495, "grad_norm": 0.2972731756983337, "learning_rate": 3.503163494734504e-05, "loss": 0.8837, "step": 6460 }, { "epoch": 2.92883046237534, "grad_norm": 0.3633853697863633, "learning_rate": 3.501907444544955e-05, "loss": 0.8778, "step": 6461 }, { "epoch": 2.929283771532185, "grad_norm": 0.31179456247022513, "learning_rate": 3.50065144424084e-05, "loss": 0.8798, "step": 6462 }, { "epoch": 2.92973708068903, "grad_norm": 0.2835402164428079, "learning_rate": 3.499395493947949e-05, "loss": 0.8796, "step": 6463 }, { "epoch": 2.9301903898458748, "grad_norm": 0.2727225567340506, "learning_rate": 3.498139593792072e-05, "loss": 0.873, "step": 6464 }, { "epoch": 2.9306436990027196, "grad_norm": 0.2419535166034816, "learning_rate": 3.4968837438989886e-05, "loss": 0.8835, "step": 6465 }, { "epoch": 2.931097008159565, "grad_norm": 0.25281810478796696, "learning_rate": 3.495627944394477e-05, "loss": 0.8642, "step": 6466 }, { "epoch": 2.9315503173164097, "grad_norm": 0.22416206098743827, "learning_rate": 3.494372195404309e-05, "loss": 0.8604, "step": 6467 }, { "epoch": 2.9320036264732545, "grad_norm": 0.26246182727274975, "learning_rate": 3.493116497054252e-05, "loss": 0.8876, "step": 6468 }, { "epoch": 2.9324569356301, "grad_norm": 0.25406053204010337, "learning_rate": 3.491860849470067e-05, "loss": 0.8654, "step": 6469 }, { "epoch": 2.9329102447869446, "grad_norm": 0.24027442949370648, "learning_rate": 3.490605252777514e-05, "loss": 0.8674, "step": 6470 }, { "epoch": 2.9333635539437894, "grad_norm": 0.23561531463647947, "learning_rate": 3.489349707102339e-05, "loss": 0.8877, "step": 6471 }, { "epoch": 2.9338168631006347, "grad_norm": 0.3127458491015143, "learning_rate": 3.488094212570293e-05, "loss": 0.8804, "step": 6472 }, { "epoch": 2.9342701722574795, "grad_norm": 0.2790463365079401, "learning_rate": 3.4868387693071154e-05, "loss": 0.891, "step": 6473 }, { "epoch": 2.9347234814143244, "grad_norm": 0.2799173073654266, "learning_rate": 3.485583377438543e-05, "loss": 0.8867, "step": 6474 }, { "epoch": 2.9351767905711696, "grad_norm": 0.2685514628955899, "learning_rate": 3.4843280370903074e-05, "loss": 0.8933, "step": 6475 }, { "epoch": 2.9356300997280145, "grad_norm": 0.240818857866496, "learning_rate": 3.483072748388136e-05, "loss": 0.892, "step": 6476 }, { "epoch": 2.9360834088848593, "grad_norm": 0.2968604746793986, "learning_rate": 3.481817511457746e-05, "loss": 0.8746, "step": 6477 }, { "epoch": 2.9365367180417046, "grad_norm": 0.27533974327061195, "learning_rate": 3.480562326424855e-05, "loss": 0.8978, "step": 6478 }, { "epoch": 2.9369900271985494, "grad_norm": 0.2703441846893393, "learning_rate": 3.479307193415175e-05, "loss": 0.8903, "step": 6479 }, { "epoch": 2.9374433363553942, "grad_norm": 0.2905876235447651, "learning_rate": 3.478052112554409e-05, "loss": 0.8821, "step": 6480 }, { "epoch": 2.9378966455122395, "grad_norm": 0.20368182257882275, "learning_rate": 3.476797083968258e-05, "loss": 0.8773, "step": 6481 }, { "epoch": 2.9383499546690843, "grad_norm": 0.29152279467646164, "learning_rate": 3.475542107782417e-05, "loss": 0.9051, "step": 6482 }, { "epoch": 2.938803263825929, "grad_norm": 0.31517234450353043, "learning_rate": 3.474287184122575e-05, "loss": 0.8772, "step": 6483 }, { "epoch": 2.9392565729827744, "grad_norm": 0.27541391871415555, "learning_rate": 3.473032313114416e-05, "loss": 0.8737, "step": 6484 }, { "epoch": 2.9397098821396193, "grad_norm": 0.2531834674404645, "learning_rate": 3.47177749488362e-05, "loss": 0.8798, "step": 6485 }, { "epoch": 2.940163191296464, "grad_norm": 0.20629099175195328, "learning_rate": 3.47052272955586e-05, "loss": 0.8739, "step": 6486 }, { "epoch": 2.9406165004533094, "grad_norm": 0.23621884170497576, "learning_rate": 3.469268017256807e-05, "loss": 0.8809, "step": 6487 }, { "epoch": 2.941069809610154, "grad_norm": 0.28052615100694767, "learning_rate": 3.4680133581121194e-05, "loss": 0.8862, "step": 6488 }, { "epoch": 2.941523118766999, "grad_norm": 0.26712683348639177, "learning_rate": 3.4667587522474585e-05, "loss": 0.8759, "step": 6489 }, { "epoch": 2.9419764279238443, "grad_norm": 0.2690903217708349, "learning_rate": 3.4655041997884756e-05, "loss": 0.887, "step": 6490 }, { "epoch": 2.942429737080689, "grad_norm": 0.29636199422420134, "learning_rate": 3.4642497008608177e-05, "loss": 0.8911, "step": 6491 }, { "epoch": 2.942883046237534, "grad_norm": 0.271655945381079, "learning_rate": 3.462995255590128e-05, "loss": 0.8878, "step": 6492 }, { "epoch": 2.943336355394379, "grad_norm": 0.20230405490862358, "learning_rate": 3.46174086410204e-05, "loss": 0.8906, "step": 6493 }, { "epoch": 2.943789664551224, "grad_norm": 0.2596677712045442, "learning_rate": 3.460486526522187e-05, "loss": 0.8887, "step": 6494 }, { "epoch": 2.944242973708069, "grad_norm": 0.26241341680567376, "learning_rate": 3.4592322429761937e-05, "loss": 0.8818, "step": 6495 }, { "epoch": 2.944696282864914, "grad_norm": 0.23887953601441644, "learning_rate": 3.45797801358968e-05, "loss": 0.8694, "step": 6496 }, { "epoch": 2.945149592021759, "grad_norm": 0.25036054829158944, "learning_rate": 3.4567238384882624e-05, "loss": 0.9025, "step": 6497 }, { "epoch": 2.945602901178604, "grad_norm": 0.2689774238592585, "learning_rate": 3.455469717797549e-05, "loss": 0.8672, "step": 6498 }, { "epoch": 2.9460562103354486, "grad_norm": 0.26456926098979167, "learning_rate": 3.454215651643143e-05, "loss": 0.8918, "step": 6499 }, { "epoch": 2.946509519492294, "grad_norm": 0.26869864909425284, "learning_rate": 3.452961640150643e-05, "loss": 0.8728, "step": 6500 }, { "epoch": 2.9469628286491387, "grad_norm": 0.24173958923693353, "learning_rate": 3.4517076834456435e-05, "loss": 0.8751, "step": 6501 }, { "epoch": 2.9474161378059835, "grad_norm": 0.3027534460448017, "learning_rate": 3.450453781653731e-05, "loss": 0.8793, "step": 6502 }, { "epoch": 2.9478694469628284, "grad_norm": 0.34974462548645985, "learning_rate": 3.4491999349004874e-05, "loss": 0.8825, "step": 6503 }, { "epoch": 2.9483227561196736, "grad_norm": 0.28629564413301806, "learning_rate": 3.447946143311488e-05, "loss": 0.8648, "step": 6504 }, { "epoch": 2.9487760652765185, "grad_norm": 0.2412523212040073, "learning_rate": 3.446692407012306e-05, "loss": 0.8811, "step": 6505 }, { "epoch": 2.9492293744333633, "grad_norm": 0.26056106539267954, "learning_rate": 3.445438726128505e-05, "loss": 0.8682, "step": 6506 }, { "epoch": 2.9496826835902086, "grad_norm": 0.2516991668039863, "learning_rate": 3.444185100785645e-05, "loss": 0.8709, "step": 6507 }, { "epoch": 2.9501359927470534, "grad_norm": 0.21404289443480706, "learning_rate": 3.442931531109281e-05, "loss": 0.8686, "step": 6508 }, { "epoch": 2.9505893019038982, "grad_norm": 0.25161822731129607, "learning_rate": 3.4416780172249636e-05, "loss": 0.9019, "step": 6509 }, { "epoch": 2.9510426110607435, "grad_norm": 0.32408478445471306, "learning_rate": 3.440424559258231e-05, "loss": 0.8863, "step": 6510 }, { "epoch": 2.9514959202175883, "grad_norm": 0.3261910884655477, "learning_rate": 3.4391711573346236e-05, "loss": 0.8805, "step": 6511 }, { "epoch": 2.951949229374433, "grad_norm": 0.21609934163344785, "learning_rate": 3.437917811579673e-05, "loss": 0.8919, "step": 6512 }, { "epoch": 2.9524025385312784, "grad_norm": 0.26702812801736076, "learning_rate": 3.436664522118906e-05, "loss": 0.877, "step": 6513 }, { "epoch": 2.9528558476881233, "grad_norm": 0.35241623363690733, "learning_rate": 3.435411289077843e-05, "loss": 0.8815, "step": 6514 }, { "epoch": 2.953309156844968, "grad_norm": 0.27234376495155926, "learning_rate": 3.434158112581998e-05, "loss": 0.8667, "step": 6515 }, { "epoch": 2.9537624660018134, "grad_norm": 0.23098884976644277, "learning_rate": 3.432904992756881e-05, "loss": 0.8883, "step": 6516 }, { "epoch": 2.954215775158658, "grad_norm": 0.29856515092207025, "learning_rate": 3.4316519297279956e-05, "loss": 0.8962, "step": 6517 }, { "epoch": 2.954669084315503, "grad_norm": 0.28734792950495636, "learning_rate": 3.430398923620841e-05, "loss": 0.8562, "step": 6518 }, { "epoch": 2.9551223934723483, "grad_norm": 0.24999850410665997, "learning_rate": 3.4291459745609076e-05, "loss": 0.8808, "step": 6519 }, { "epoch": 2.955575702629193, "grad_norm": 0.2363092762507418, "learning_rate": 3.4278930826736815e-05, "loss": 0.8638, "step": 6520 }, { "epoch": 2.956029011786038, "grad_norm": 0.2733818443109773, "learning_rate": 3.4266402480846455e-05, "loss": 0.8871, "step": 6521 }, { "epoch": 2.956482320942883, "grad_norm": 0.24492814392639028, "learning_rate": 3.425387470919273e-05, "loss": 0.8815, "step": 6522 }, { "epoch": 2.956935630099728, "grad_norm": 0.20925473711238948, "learning_rate": 3.424134751303035e-05, "loss": 0.8786, "step": 6523 }, { "epoch": 2.957388939256573, "grad_norm": 0.239788972496802, "learning_rate": 3.422882089361394e-05, "loss": 0.8927, "step": 6524 }, { "epoch": 2.957842248413418, "grad_norm": 0.25580484875673054, "learning_rate": 3.421629485219807e-05, "loss": 0.8833, "step": 6525 }, { "epoch": 2.958295557570263, "grad_norm": 0.2742829350155543, "learning_rate": 3.4203769390037274e-05, "loss": 0.8759, "step": 6526 }, { "epoch": 2.958748866727108, "grad_norm": 0.2844468762774366, "learning_rate": 3.4191244508386e-05, "loss": 0.8774, "step": 6527 }, { "epoch": 2.959202175883953, "grad_norm": 0.32306820088030347, "learning_rate": 3.417872020849869e-05, "loss": 0.8756, "step": 6528 }, { "epoch": 2.959655485040798, "grad_norm": 0.37004166665614363, "learning_rate": 3.416619649162964e-05, "loss": 0.9089, "step": 6529 }, { "epoch": 2.9601087941976427, "grad_norm": 0.38340134459664443, "learning_rate": 3.415367335903315e-05, "loss": 0.8778, "step": 6530 }, { "epoch": 2.960562103354488, "grad_norm": 0.34011606112963383, "learning_rate": 3.414115081196346e-05, "loss": 0.8877, "step": 6531 }, { "epoch": 2.961015412511333, "grad_norm": 0.25132368168406033, "learning_rate": 3.4128628851674736e-05, "loss": 0.8935, "step": 6532 }, { "epoch": 2.9614687216681777, "grad_norm": 0.2606057998561131, "learning_rate": 3.411610747942109e-05, "loss": 0.8787, "step": 6533 }, { "epoch": 2.961922030825023, "grad_norm": 0.28974582600979065, "learning_rate": 3.410358669645657e-05, "loss": 0.8822, "step": 6534 }, { "epoch": 2.9623753399818678, "grad_norm": 0.33504998361819505, "learning_rate": 3.409106650403517e-05, "loss": 0.8657, "step": 6535 }, { "epoch": 2.9628286491387126, "grad_norm": 0.39895892683610473, "learning_rate": 3.4078546903410825e-05, "loss": 0.8761, "step": 6536 }, { "epoch": 2.9632819582955574, "grad_norm": 0.43155543521112216, "learning_rate": 3.406602789583741e-05, "loss": 0.8759, "step": 6537 }, { "epoch": 2.9637352674524027, "grad_norm": 0.39501896075336135, "learning_rate": 3.4053509482568744e-05, "loss": 0.8727, "step": 6538 }, { "epoch": 2.9641885766092475, "grad_norm": 0.27884842836099927, "learning_rate": 3.404099166485858e-05, "loss": 0.8827, "step": 6539 }, { "epoch": 2.9646418857660923, "grad_norm": 0.3132673438579099, "learning_rate": 3.4028474443960613e-05, "loss": 0.8823, "step": 6540 }, { "epoch": 2.9650951949229376, "grad_norm": 0.2972338850907511, "learning_rate": 3.4015957821128474e-05, "loss": 0.8561, "step": 6541 }, { "epoch": 2.9655485040797824, "grad_norm": 0.2781815711655358, "learning_rate": 3.400344179761575e-05, "loss": 0.8673, "step": 6542 }, { "epoch": 2.9660018132366273, "grad_norm": 0.31033717103892894, "learning_rate": 3.3990926374675955e-05, "loss": 0.8687, "step": 6543 }, { "epoch": 2.966455122393472, "grad_norm": 0.3705863234535467, "learning_rate": 3.3978411553562557e-05, "loss": 0.8845, "step": 6544 }, { "epoch": 2.9669084315503174, "grad_norm": 0.32207809071345045, "learning_rate": 3.396589733552892e-05, "loss": 0.8718, "step": 6545 }, { "epoch": 2.967361740707162, "grad_norm": 0.26876780470532624, "learning_rate": 3.395338372182841e-05, "loss": 0.8888, "step": 6546 }, { "epoch": 2.967815049864007, "grad_norm": 0.3293226607790775, "learning_rate": 3.3940870713714295e-05, "loss": 0.883, "step": 6547 }, { "epoch": 2.9682683590208523, "grad_norm": 0.26606550297267023, "learning_rate": 3.392835831243978e-05, "loss": 0.8819, "step": 6548 }, { "epoch": 2.968721668177697, "grad_norm": 0.2567413064325023, "learning_rate": 3.391584651925802e-05, "loss": 0.886, "step": 6549 }, { "epoch": 2.969174977334542, "grad_norm": 0.3013982991145944, "learning_rate": 3.390333533542211e-05, "loss": 0.8808, "step": 6550 }, { "epoch": 2.969628286491387, "grad_norm": 0.2985465467814195, "learning_rate": 3.389082476218509e-05, "loss": 0.8788, "step": 6551 }, { "epoch": 2.970081595648232, "grad_norm": 0.2720962421111112, "learning_rate": 3.387831480079992e-05, "loss": 0.8899, "step": 6552 }, { "epoch": 2.970534904805077, "grad_norm": 0.2831213326737512, "learning_rate": 3.3865805452519516e-05, "loss": 0.8948, "step": 6553 }, { "epoch": 2.970988213961922, "grad_norm": 0.2184731203603584, "learning_rate": 3.385329671859672e-05, "loss": 0.8803, "step": 6554 }, { "epoch": 2.971441523118767, "grad_norm": 0.22405475348287116, "learning_rate": 3.3840788600284325e-05, "loss": 0.9051, "step": 6555 }, { "epoch": 2.971894832275612, "grad_norm": 0.2619120595248383, "learning_rate": 3.3828281098835054e-05, "loss": 0.8965, "step": 6556 }, { "epoch": 2.972348141432457, "grad_norm": 0.2680598995949106, "learning_rate": 3.381577421550157e-05, "loss": 0.8673, "step": 6557 }, { "epoch": 2.972801450589302, "grad_norm": 0.28575751760087786, "learning_rate": 3.380326795153647e-05, "loss": 0.8809, "step": 6558 }, { "epoch": 2.9732547597461467, "grad_norm": 0.30043629931103066, "learning_rate": 3.3790762308192305e-05, "loss": 0.9112, "step": 6559 }, { "epoch": 2.973708068902992, "grad_norm": 0.24689134245629102, "learning_rate": 3.377825728672154e-05, "loss": 0.8647, "step": 6560 }, { "epoch": 2.974161378059837, "grad_norm": 0.2842602548256748, "learning_rate": 3.376575288837659e-05, "loss": 0.886, "step": 6561 }, { "epoch": 2.9746146872166817, "grad_norm": 0.22269196744245537, "learning_rate": 3.3753249114409805e-05, "loss": 0.8693, "step": 6562 }, { "epoch": 2.975067996373527, "grad_norm": 0.2040247720223917, "learning_rate": 3.374074596607349e-05, "loss": 0.872, "step": 6563 }, { "epoch": 2.9755213055303718, "grad_norm": 0.26650214569408814, "learning_rate": 3.372824344461986e-05, "loss": 0.8739, "step": 6564 }, { "epoch": 2.9759746146872166, "grad_norm": 0.2361811317243839, "learning_rate": 3.3715741551301076e-05, "loss": 0.8854, "step": 6565 }, { "epoch": 2.976427923844062, "grad_norm": 0.2214742952622839, "learning_rate": 3.3703240287369265e-05, "loss": 0.8799, "step": 6566 }, { "epoch": 2.9768812330009067, "grad_norm": 0.24650868104336557, "learning_rate": 3.369073965407643e-05, "loss": 0.8999, "step": 6567 }, { "epoch": 2.9773345421577515, "grad_norm": 0.2274664842890826, "learning_rate": 3.3678239652674555e-05, "loss": 0.8813, "step": 6568 }, { "epoch": 2.977787851314597, "grad_norm": 0.2283322614096848, "learning_rate": 3.3665740284415566e-05, "loss": 0.899, "step": 6569 }, { "epoch": 2.9782411604714416, "grad_norm": 0.28137709030461067, "learning_rate": 3.3653241550551294e-05, "loss": 0.8861, "step": 6570 }, { "epoch": 2.9786944696282864, "grad_norm": 0.2844834054619371, "learning_rate": 3.364074345233354e-05, "loss": 0.8886, "step": 6571 }, { "epoch": 2.9791477787851317, "grad_norm": 0.24658422758424312, "learning_rate": 3.362824599101402e-05, "loss": 0.8918, "step": 6572 }, { "epoch": 2.9796010879419765, "grad_norm": 0.34390647691604737, "learning_rate": 3.361574916784439e-05, "loss": 0.8741, "step": 6573 }, { "epoch": 2.9800543970988214, "grad_norm": 0.4843914824510498, "learning_rate": 3.3603252984076243e-05, "loss": 0.8859, "step": 6574 }, { "epoch": 2.9805077062556666, "grad_norm": 0.5268503570301065, "learning_rate": 3.3590757440961117e-05, "loss": 0.8653, "step": 6575 }, { "epoch": 2.9809610154125115, "grad_norm": 0.48638751103940214, "learning_rate": 3.357826253975046e-05, "loss": 0.8609, "step": 6576 }, { "epoch": 2.9814143245693563, "grad_norm": 0.38567778484926746, "learning_rate": 3.356576828169568e-05, "loss": 0.8887, "step": 6577 }, { "epoch": 2.981867633726201, "grad_norm": 0.306114188735612, "learning_rate": 3.355327466804812e-05, "loss": 0.8888, "step": 6578 }, { "epoch": 2.9823209428830464, "grad_norm": 0.4064204534513284, "learning_rate": 3.3540781700059054e-05, "loss": 0.8797, "step": 6579 }, { "epoch": 2.982774252039891, "grad_norm": 0.38529465769321747, "learning_rate": 3.352828937897967e-05, "loss": 0.8749, "step": 6580 }, { "epoch": 2.983227561196736, "grad_norm": 0.41667025980963945, "learning_rate": 3.351579770606113e-05, "loss": 0.895, "step": 6581 }, { "epoch": 2.983680870353581, "grad_norm": 0.4340750143629695, "learning_rate": 3.3503306682554515e-05, "loss": 0.8806, "step": 6582 }, { "epoch": 2.984134179510426, "grad_norm": 0.40486483279650326, "learning_rate": 3.3490816309710826e-05, "loss": 0.878, "step": 6583 }, { "epoch": 2.984587488667271, "grad_norm": 0.28478419072545974, "learning_rate": 3.347832658878101e-05, "loss": 0.8788, "step": 6584 }, { "epoch": 2.985040797824116, "grad_norm": 0.29235633460386085, "learning_rate": 3.346583752101597e-05, "loss": 0.8687, "step": 6585 }, { "epoch": 2.985494106980961, "grad_norm": 0.37688574759109894, "learning_rate": 3.3453349107666495e-05, "loss": 0.885, "step": 6586 }, { "epoch": 2.985947416137806, "grad_norm": 0.3442058285392243, "learning_rate": 3.344086134998334e-05, "loss": 0.87, "step": 6587 }, { "epoch": 2.9864007252946507, "grad_norm": 0.3429015063797836, "learning_rate": 3.34283742492172e-05, "loss": 0.9086, "step": 6588 }, { "epoch": 2.986854034451496, "grad_norm": 0.4755717216302537, "learning_rate": 3.341588780661869e-05, "loss": 0.8764, "step": 6589 }, { "epoch": 2.987307343608341, "grad_norm": 0.47421943339945194, "learning_rate": 3.340340202343838e-05, "loss": 0.911, "step": 6590 }, { "epoch": 2.9877606527651857, "grad_norm": 0.3938441639819891, "learning_rate": 3.3390916900926736e-05, "loss": 0.8863, "step": 6591 }, { "epoch": 2.988213961922031, "grad_norm": 0.26462295061963054, "learning_rate": 3.337843244033419e-05, "loss": 0.8953, "step": 6592 }, { "epoch": 2.9886672710788758, "grad_norm": 0.33992230867075274, "learning_rate": 3.336594864291109e-05, "loss": 0.8831, "step": 6593 }, { "epoch": 2.9891205802357206, "grad_norm": 0.3993311744244858, "learning_rate": 3.335346550990773e-05, "loss": 0.9166, "step": 6594 }, { "epoch": 2.989573889392566, "grad_norm": 0.41821670178330056, "learning_rate": 3.334098304257434e-05, "loss": 0.8605, "step": 6595 }, { "epoch": 2.9900271985494107, "grad_norm": 0.5476385397592922, "learning_rate": 3.3328501242161055e-05, "loss": 0.8935, "step": 6596 }, { "epoch": 2.9904805077062555, "grad_norm": 0.26235229265599325, "learning_rate": 3.331602010991799e-05, "loss": 0.865, "step": 6597 }, { "epoch": 2.990933816863101, "grad_norm": 0.32902572402215463, "learning_rate": 3.3303539647095154e-05, "loss": 0.8989, "step": 6598 }, { "epoch": 2.9913871260199456, "grad_norm": 0.37421502648932015, "learning_rate": 3.32910598549425e-05, "loss": 0.8689, "step": 6599 }, { "epoch": 2.9918404351767904, "grad_norm": 0.3306939206065631, "learning_rate": 3.327858073470993e-05, "loss": 0.8807, "step": 6600 }, { "epoch": 2.9922937443336357, "grad_norm": 0.23265372020179048, "learning_rate": 3.326610228764724e-05, "loss": 0.8588, "step": 6601 }, { "epoch": 2.9927470534904805, "grad_norm": 0.19740322160704557, "learning_rate": 3.32536245150042e-05, "loss": 0.8764, "step": 6602 }, { "epoch": 2.9932003626473254, "grad_norm": 0.2117471870519739, "learning_rate": 3.324114741803049e-05, "loss": 0.893, "step": 6603 }, { "epoch": 2.9936536718041706, "grad_norm": 0.26676658897211664, "learning_rate": 3.322867099797573e-05, "loss": 0.905, "step": 6604 }, { "epoch": 2.9941069809610155, "grad_norm": 0.311638834321624, "learning_rate": 3.321619525608949e-05, "loss": 0.8859, "step": 6605 }, { "epoch": 2.9945602901178603, "grad_norm": 0.3602172717595824, "learning_rate": 3.320372019362121e-05, "loss": 0.8652, "step": 6606 }, { "epoch": 2.9950135992747056, "grad_norm": 0.331745667636148, "learning_rate": 3.319124581182033e-05, "loss": 0.8769, "step": 6607 }, { "epoch": 2.9954669084315504, "grad_norm": 0.31156131832821554, "learning_rate": 3.3178772111936195e-05, "loss": 0.8718, "step": 6608 }, { "epoch": 2.995920217588395, "grad_norm": 0.2769527648094418, "learning_rate": 3.3166299095218076e-05, "loss": 0.8816, "step": 6609 }, { "epoch": 2.9963735267452405, "grad_norm": 0.3254666184428981, "learning_rate": 3.315382676291519e-05, "loss": 0.8873, "step": 6610 }, { "epoch": 2.9968268359020853, "grad_norm": 0.3543448268156201, "learning_rate": 3.314135511627667e-05, "loss": 0.8815, "step": 6611 }, { "epoch": 2.99728014505893, "grad_norm": 0.3698237809187057, "learning_rate": 3.3128884156551594e-05, "loss": 0.8838, "step": 6612 }, { "epoch": 2.9977334542157754, "grad_norm": 0.3255710803683088, "learning_rate": 3.3116413884988964e-05, "loss": 0.903, "step": 6613 }, { "epoch": 2.9981867633726202, "grad_norm": 0.33446641609932964, "learning_rate": 3.310394430283772e-05, "loss": 0.8765, "step": 6614 }, { "epoch": 2.998640072529465, "grad_norm": 0.3331726930293665, "learning_rate": 3.309147541134671e-05, "loss": 0.8765, "step": 6615 }, { "epoch": 2.99909338168631, "grad_norm": 0.24003905212895507, "learning_rate": 3.3079007211764754e-05, "loss": 0.8827, "step": 6616 }, { "epoch": 2.999546690843155, "grad_norm": 0.31741214913625665, "learning_rate": 3.306653970534056e-05, "loss": 0.8832, "step": 6617 }, { "epoch": 3.0, "grad_norm": 0.36119418274495274, "learning_rate": 3.305407289332279e-05, "loss": 0.8865, "step": 6618 }, { "epoch": 3.000453309156845, "grad_norm": 0.3427513870582783, "learning_rate": 3.3041606776960035e-05, "loss": 0.8588, "step": 6619 }, { "epoch": 3.00090661831369, "grad_norm": 0.29687491490764717, "learning_rate": 3.302914135750081e-05, "loss": 0.894, "step": 6620 }, { "epoch": 3.001359927470535, "grad_norm": 0.2910542471668288, "learning_rate": 3.3016676636193565e-05, "loss": 0.8575, "step": 6621 }, { "epoch": 3.0018132366273798, "grad_norm": 0.33723523466260036, "learning_rate": 3.300421261428668e-05, "loss": 0.8653, "step": 6622 }, { "epoch": 3.002266545784225, "grad_norm": 0.30760435236582284, "learning_rate": 3.299174929302846e-05, "loss": 0.8686, "step": 6623 }, { "epoch": 3.00271985494107, "grad_norm": 0.29241794836477125, "learning_rate": 3.297928667366716e-05, "loss": 0.8751, "step": 6624 }, { "epoch": 3.0031731640979147, "grad_norm": 0.3413778681467983, "learning_rate": 3.296682475745092e-05, "loss": 0.8721, "step": 6625 }, { "epoch": 3.00362647325476, "grad_norm": 0.27871702255072955, "learning_rate": 3.295436354562785e-05, "loss": 0.8724, "step": 6626 }, { "epoch": 3.004079782411605, "grad_norm": 0.246302658681288, "learning_rate": 3.2941903039445984e-05, "loss": 0.8602, "step": 6627 }, { "epoch": 3.0045330915684496, "grad_norm": 0.23813197473731113, "learning_rate": 3.292944324015326e-05, "loss": 0.8807, "step": 6628 }, { "epoch": 3.0049864007252944, "grad_norm": 0.26018928273904224, "learning_rate": 3.291698414899758e-05, "loss": 0.8661, "step": 6629 }, { "epoch": 3.0054397098821397, "grad_norm": 0.25997164301283227, "learning_rate": 3.2904525767226755e-05, "loss": 0.8658, "step": 6630 }, { "epoch": 3.0058930190389845, "grad_norm": 0.26304228731733087, "learning_rate": 3.289206809608854e-05, "loss": 0.8822, "step": 6631 }, { "epoch": 3.0063463281958294, "grad_norm": 0.24866880130486704, "learning_rate": 3.287961113683058e-05, "loss": 0.8749, "step": 6632 }, { "epoch": 3.0067996373526746, "grad_norm": 0.23610086775712671, "learning_rate": 3.2867154890700494e-05, "loss": 0.8515, "step": 6633 }, { "epoch": 3.0072529465095195, "grad_norm": 0.20954519677937103, "learning_rate": 3.285469935894581e-05, "loss": 0.8682, "step": 6634 }, { "epoch": 3.0077062556663643, "grad_norm": 0.21447477148578786, "learning_rate": 3.284224454281398e-05, "loss": 0.8701, "step": 6635 }, { "epoch": 3.0081595648232096, "grad_norm": 0.2189035293562137, "learning_rate": 3.2829790443552396e-05, "loss": 0.874, "step": 6636 }, { "epoch": 3.0086128739800544, "grad_norm": 0.18208221363822405, "learning_rate": 3.2817337062408374e-05, "loss": 0.8823, "step": 6637 }, { "epoch": 3.009066183136899, "grad_norm": 0.22347231324639383, "learning_rate": 3.280488440062916e-05, "loss": 0.8545, "step": 6638 }, { "epoch": 3.0095194922937445, "grad_norm": 0.25593847365872185, "learning_rate": 3.279243245946191e-05, "loss": 0.8533, "step": 6639 }, { "epoch": 3.0099728014505893, "grad_norm": 0.2350071599815082, "learning_rate": 3.277998124015374e-05, "loss": 0.8598, "step": 6640 }, { "epoch": 3.010426110607434, "grad_norm": 0.29604040450071, "learning_rate": 3.276753074395166e-05, "loss": 0.8535, "step": 6641 }, { "epoch": 3.0108794197642794, "grad_norm": 0.25949700824492383, "learning_rate": 3.275508097210265e-05, "loss": 0.8766, "step": 6642 }, { "epoch": 3.0113327289211242, "grad_norm": 0.23165809651591504, "learning_rate": 3.274263192585357e-05, "loss": 0.8726, "step": 6643 }, { "epoch": 3.011786038077969, "grad_norm": 0.45595200440790534, "learning_rate": 3.273018360645122e-05, "loss": 0.8584, "step": 6644 }, { "epoch": 3.0122393472348143, "grad_norm": 0.2404210575738737, "learning_rate": 3.2717736015142346e-05, "loss": 0.8682, "step": 6645 }, { "epoch": 3.012692656391659, "grad_norm": 0.2891761684863227, "learning_rate": 3.270528915317362e-05, "loss": 0.8722, "step": 6646 }, { "epoch": 3.013145965548504, "grad_norm": 0.3510353268291218, "learning_rate": 3.269284302179162e-05, "loss": 0.8564, "step": 6647 }, { "epoch": 3.013599274705349, "grad_norm": 0.3773347017293069, "learning_rate": 3.268039762224286e-05, "loss": 0.8649, "step": 6648 }, { "epoch": 3.014052583862194, "grad_norm": 0.37446029936552694, "learning_rate": 3.2667952955773805e-05, "loss": 0.8842, "step": 6649 }, { "epoch": 3.014505893019039, "grad_norm": 0.3422048707079327, "learning_rate": 3.26555090236308e-05, "loss": 0.8744, "step": 6650 }, { "epoch": 3.0149592021758838, "grad_norm": 0.33035208888546347, "learning_rate": 3.2643065827060144e-05, "loss": 0.871, "step": 6651 }, { "epoch": 3.015412511332729, "grad_norm": 0.30543325203782223, "learning_rate": 3.2630623367308074e-05, "loss": 0.8707, "step": 6652 }, { "epoch": 3.015865820489574, "grad_norm": 0.35531049925181185, "learning_rate": 3.261818164562074e-05, "loss": 0.8675, "step": 6653 }, { "epoch": 3.0163191296464187, "grad_norm": 0.3588206226857358, "learning_rate": 3.26057406632442e-05, "loss": 0.8662, "step": 6654 }, { "epoch": 3.016772438803264, "grad_norm": 0.2860124228921052, "learning_rate": 3.259330042142446e-05, "loss": 0.8675, "step": 6655 }, { "epoch": 3.017225747960109, "grad_norm": 0.36286269197688004, "learning_rate": 3.258086092140746e-05, "loss": 0.8778, "step": 6656 }, { "epoch": 3.0176790571169536, "grad_norm": 0.2673797230391106, "learning_rate": 3.2568422164439044e-05, "loss": 0.8572, "step": 6657 }, { "epoch": 3.018132366273799, "grad_norm": 0.24603533299109567, "learning_rate": 3.255598415176499e-05, "loss": 0.856, "step": 6658 }, { "epoch": 3.0185856754306437, "grad_norm": 0.5526211652638051, "learning_rate": 3.2543546884630995e-05, "loss": 0.8727, "step": 6659 }, { "epoch": 3.0190389845874885, "grad_norm": 0.23766965005764204, "learning_rate": 3.253111036428269e-05, "loss": 0.8669, "step": 6660 }, { "epoch": 3.019492293744334, "grad_norm": 0.25472088905346957, "learning_rate": 3.251867459196564e-05, "loss": 0.8909, "step": 6661 }, { "epoch": 3.0199456029011786, "grad_norm": 0.22818541608560458, "learning_rate": 3.250623956892533e-05, "loss": 0.86, "step": 6662 }, { "epoch": 3.0203989120580235, "grad_norm": 0.24245283290839337, "learning_rate": 3.2493805296407136e-05, "loss": 0.8735, "step": 6663 }, { "epoch": 3.0208522212148687, "grad_norm": 0.3170746078602302, "learning_rate": 3.24813717756564e-05, "loss": 0.8822, "step": 6664 }, { "epoch": 3.0213055303717136, "grad_norm": 0.3657375160422951, "learning_rate": 3.246893900791838e-05, "loss": 0.8879, "step": 6665 }, { "epoch": 3.0217588395285584, "grad_norm": 0.33528572027073683, "learning_rate": 3.245650699443824e-05, "loss": 0.8632, "step": 6666 }, { "epoch": 3.0222121486854037, "grad_norm": 0.3272138373838642, "learning_rate": 3.244407573646111e-05, "loss": 0.8804, "step": 6667 }, { "epoch": 3.0226654578422485, "grad_norm": 0.24330116129969015, "learning_rate": 3.243164523523199e-05, "loss": 0.8703, "step": 6668 }, { "epoch": 3.0231187669990933, "grad_norm": 0.27347677206754123, "learning_rate": 3.241921549199585e-05, "loss": 0.8658, "step": 6669 }, { "epoch": 3.023572076155938, "grad_norm": 0.32387095372015273, "learning_rate": 3.240678650799756e-05, "loss": 0.8773, "step": 6670 }, { "epoch": 3.0240253853127834, "grad_norm": 0.3012960431432415, "learning_rate": 3.239435828448191e-05, "loss": 0.8662, "step": 6671 }, { "epoch": 3.0244786944696282, "grad_norm": 0.23935595716596855, "learning_rate": 3.238193082269365e-05, "loss": 0.8504, "step": 6672 }, { "epoch": 3.024932003626473, "grad_norm": 0.309670880910683, "learning_rate": 3.236950412387739e-05, "loss": 0.8757, "step": 6673 }, { "epoch": 3.0253853127833183, "grad_norm": 0.26966552464401855, "learning_rate": 3.2357078189277713e-05, "loss": 0.8807, "step": 6674 }, { "epoch": 3.025838621940163, "grad_norm": 0.2949868879584435, "learning_rate": 3.2344653020139123e-05, "loss": 0.8581, "step": 6675 }, { "epoch": 3.026291931097008, "grad_norm": 0.3516005204076819, "learning_rate": 3.233222861770603e-05, "loss": 0.871, "step": 6676 }, { "epoch": 3.0267452402538533, "grad_norm": 0.32385515177853386, "learning_rate": 3.231980498322278e-05, "loss": 0.8565, "step": 6677 }, { "epoch": 3.027198549410698, "grad_norm": 0.2610150503970392, "learning_rate": 3.230738211793363e-05, "loss": 0.8762, "step": 6678 }, { "epoch": 3.027651858567543, "grad_norm": 0.20507242790101662, "learning_rate": 3.229496002308277e-05, "loss": 0.8693, "step": 6679 }, { "epoch": 3.028105167724388, "grad_norm": 0.21078917486858387, "learning_rate": 3.228253869991431e-05, "loss": 0.8632, "step": 6680 }, { "epoch": 3.028558476881233, "grad_norm": 0.23939708854114447, "learning_rate": 3.227011814967229e-05, "loss": 0.8856, "step": 6681 }, { "epoch": 3.029011786038078, "grad_norm": 0.24412735734841068, "learning_rate": 3.225769837360065e-05, "loss": 0.8495, "step": 6682 }, { "epoch": 3.029465095194923, "grad_norm": 0.3100403177985203, "learning_rate": 3.2245279372943264e-05, "loss": 0.8612, "step": 6683 }, { "epoch": 3.029918404351768, "grad_norm": 0.32796926619982547, "learning_rate": 3.2232861148943935e-05, "loss": 0.8721, "step": 6684 }, { "epoch": 3.030371713508613, "grad_norm": 0.3258228132759127, "learning_rate": 3.22204437028464e-05, "loss": 0.871, "step": 6685 }, { "epoch": 3.030825022665458, "grad_norm": 0.2609336337646969, "learning_rate": 3.220802703589429e-05, "loss": 0.8603, "step": 6686 }, { "epoch": 3.031278331822303, "grad_norm": 0.20663520178881023, "learning_rate": 3.219561114933116e-05, "loss": 0.8626, "step": 6687 }, { "epoch": 3.0317316409791477, "grad_norm": 0.2514316896768957, "learning_rate": 3.218319604440053e-05, "loss": 0.8662, "step": 6688 }, { "epoch": 3.0321849501359925, "grad_norm": 0.2687393787786191, "learning_rate": 3.217078172234578e-05, "loss": 0.8445, "step": 6689 }, { "epoch": 3.032638259292838, "grad_norm": 0.236979786468636, "learning_rate": 3.2158368184410246e-05, "loss": 0.8587, "step": 6690 }, { "epoch": 3.0330915684496826, "grad_norm": 0.22289273991926387, "learning_rate": 3.2145955431837194e-05, "loss": 0.8465, "step": 6691 }, { "epoch": 3.0335448776065275, "grad_norm": 0.25154570638005525, "learning_rate": 3.213354346586978e-05, "loss": 0.8733, "step": 6692 }, { "epoch": 3.0339981867633727, "grad_norm": 0.32574470457920934, "learning_rate": 3.2121132287751106e-05, "loss": 0.8682, "step": 6693 }, { "epoch": 3.0344514959202176, "grad_norm": 0.3456159802044545, "learning_rate": 3.2108721898724194e-05, "loss": 0.8745, "step": 6694 }, { "epoch": 3.0349048050770624, "grad_norm": 0.29310988994991205, "learning_rate": 3.209631230003197e-05, "loss": 0.849, "step": 6695 }, { "epoch": 3.0353581142339077, "grad_norm": 0.32131298180568957, "learning_rate": 3.2083903492917306e-05, "loss": 0.8733, "step": 6696 }, { "epoch": 3.0358114233907525, "grad_norm": 0.29111748843731894, "learning_rate": 3.2071495478622966e-05, "loss": 0.8479, "step": 6697 }, { "epoch": 3.0362647325475973, "grad_norm": 0.2485975554403887, "learning_rate": 3.205908825839166e-05, "loss": 0.8588, "step": 6698 }, { "epoch": 3.0367180417044426, "grad_norm": 0.3200467320799245, "learning_rate": 3.2046681833466e-05, "loss": 0.8737, "step": 6699 }, { "epoch": 3.0371713508612874, "grad_norm": 0.4373118980735874, "learning_rate": 3.203427620508853e-05, "loss": 0.8617, "step": 6700 }, { "epoch": 3.0376246600181322, "grad_norm": 0.4699262386278818, "learning_rate": 3.202187137450171e-05, "loss": 0.8413, "step": 6701 }, { "epoch": 3.0380779691749775, "grad_norm": 0.4086497677175102, "learning_rate": 3.200946734294792e-05, "loss": 0.8687, "step": 6702 }, { "epoch": 3.0385312783318223, "grad_norm": 0.3530842586933905, "learning_rate": 3.199706411166946e-05, "loss": 0.8575, "step": 6703 }, { "epoch": 3.038984587488667, "grad_norm": 0.35092766569563255, "learning_rate": 3.198466168190855e-05, "loss": 0.8591, "step": 6704 }, { "epoch": 3.0394378966455124, "grad_norm": 0.3479597013899658, "learning_rate": 3.197226005490732e-05, "loss": 0.8717, "step": 6705 }, { "epoch": 3.0398912058023573, "grad_norm": 0.3825536090684345, "learning_rate": 3.195985923190785e-05, "loss": 0.8669, "step": 6706 }, { "epoch": 3.040344514959202, "grad_norm": 0.33058667987959894, "learning_rate": 3.1947459214152094e-05, "loss": 0.8545, "step": 6707 }, { "epoch": 3.040797824116047, "grad_norm": 0.2799138561403128, "learning_rate": 3.193506000288197e-05, "loss": 0.8537, "step": 6708 }, { "epoch": 3.041251133272892, "grad_norm": 0.27158628117619266, "learning_rate": 3.192266159933929e-05, "loss": 0.8498, "step": 6709 }, { "epoch": 3.041704442429737, "grad_norm": 0.2645282957861585, "learning_rate": 3.191026400476579e-05, "loss": 0.8686, "step": 6710 }, { "epoch": 3.042157751586582, "grad_norm": 0.34217845020508875, "learning_rate": 3.189786722040312e-05, "loss": 0.8688, "step": 6711 }, { "epoch": 3.042611060743427, "grad_norm": 0.444871916290934, "learning_rate": 3.188547124749287e-05, "loss": 0.8702, "step": 6712 }, { "epoch": 3.043064369900272, "grad_norm": 0.4170443628805125, "learning_rate": 3.187307608727651e-05, "loss": 0.8742, "step": 6713 }, { "epoch": 3.043517679057117, "grad_norm": 0.2713541064464956, "learning_rate": 3.186068174099548e-05, "loss": 0.8649, "step": 6714 }, { "epoch": 3.043970988213962, "grad_norm": 0.263120752244134, "learning_rate": 3.184828820989108e-05, "loss": 0.8662, "step": 6715 }, { "epoch": 3.044424297370807, "grad_norm": 0.35081852262487395, "learning_rate": 3.183589549520458e-05, "loss": 0.8633, "step": 6716 }, { "epoch": 3.0448776065276517, "grad_norm": 0.30588537516623093, "learning_rate": 3.182350359817714e-05, "loss": 0.8632, "step": 6717 }, { "epoch": 3.045330915684497, "grad_norm": 0.283485184864662, "learning_rate": 3.181111252004985e-05, "loss": 0.8995, "step": 6718 }, { "epoch": 3.045784224841342, "grad_norm": 0.26999905983242595, "learning_rate": 3.17987222620637e-05, "loss": 0.8696, "step": 6719 }, { "epoch": 3.0462375339981866, "grad_norm": 0.2507771580144439, "learning_rate": 3.178633282545963e-05, "loss": 0.8641, "step": 6720 }, { "epoch": 3.046690843155032, "grad_norm": 0.22674899876283372, "learning_rate": 3.177394421147846e-05, "loss": 0.8618, "step": 6721 }, { "epoch": 3.0471441523118767, "grad_norm": 0.21132055775106098, "learning_rate": 3.176155642136095e-05, "loss": 0.8786, "step": 6722 }, { "epoch": 3.0475974614687216, "grad_norm": 0.24181677280227717, "learning_rate": 3.1749169456347775e-05, "loss": 0.8663, "step": 6723 }, { "epoch": 3.048050770625567, "grad_norm": 0.22427388600785875, "learning_rate": 3.173678331767953e-05, "loss": 0.8773, "step": 6724 }, { "epoch": 3.0485040797824117, "grad_norm": 0.24835904761373784, "learning_rate": 3.172439800659672e-05, "loss": 0.8779, "step": 6725 }, { "epoch": 3.0489573889392565, "grad_norm": 0.19602302580347095, "learning_rate": 3.1712013524339774e-05, "loss": 0.8828, "step": 6726 }, { "epoch": 3.0494106980961013, "grad_norm": 0.2518246582184627, "learning_rate": 3.169962987214903e-05, "loss": 0.8794, "step": 6727 }, { "epoch": 3.0498640072529466, "grad_norm": 0.25193542606093694, "learning_rate": 3.1687247051264744e-05, "loss": 0.8579, "step": 6728 }, { "epoch": 3.0503173164097914, "grad_norm": 0.20890757433552287, "learning_rate": 3.167486506292711e-05, "loss": 0.8609, "step": 6729 }, { "epoch": 3.0507706255666363, "grad_norm": 0.21986981776141404, "learning_rate": 3.1662483908376195e-05, "loss": 0.8669, "step": 6730 }, { "epoch": 3.0512239347234815, "grad_norm": 0.24525741890410738, "learning_rate": 3.1650103588852025e-05, "loss": 0.8521, "step": 6731 }, { "epoch": 3.0516772438803264, "grad_norm": 0.20798514758050998, "learning_rate": 3.163772410559451e-05, "loss": 0.8676, "step": 6732 }, { "epoch": 3.052130553037171, "grad_norm": 0.22467663141054062, "learning_rate": 3.1625345459843514e-05, "loss": 0.8796, "step": 6733 }, { "epoch": 3.0525838621940165, "grad_norm": 0.21438806490850498, "learning_rate": 3.161296765283878e-05, "loss": 0.8814, "step": 6734 }, { "epoch": 3.0530371713508613, "grad_norm": 0.21534688998913726, "learning_rate": 3.160059068581999e-05, "loss": 0.8709, "step": 6735 }, { "epoch": 3.053490480507706, "grad_norm": 0.20869449764861464, "learning_rate": 3.158821456002672e-05, "loss": 0.8753, "step": 6736 }, { "epoch": 3.0539437896645514, "grad_norm": 0.21494139267052598, "learning_rate": 3.1575839276698474e-05, "loss": 0.8771, "step": 6737 }, { "epoch": 3.054397098821396, "grad_norm": 0.275035463554469, "learning_rate": 3.15634648370747e-05, "loss": 0.8603, "step": 6738 }, { "epoch": 3.054850407978241, "grad_norm": 0.2716266419644364, "learning_rate": 3.1551091242394726e-05, "loss": 0.8735, "step": 6739 }, { "epoch": 3.0553037171350863, "grad_norm": 0.2539460167247427, "learning_rate": 3.153871849389778e-05, "loss": 0.8857, "step": 6740 }, { "epoch": 3.055757026291931, "grad_norm": 0.2187160088718892, "learning_rate": 3.152634659282305e-05, "loss": 0.8626, "step": 6741 }, { "epoch": 3.056210335448776, "grad_norm": 0.188029512790935, "learning_rate": 3.15139755404096e-05, "loss": 0.8735, "step": 6742 }, { "epoch": 3.0566636446056212, "grad_norm": 0.2465515250303918, "learning_rate": 3.150160533789644e-05, "loss": 0.8825, "step": 6743 }, { "epoch": 3.057116953762466, "grad_norm": 0.21502519166106424, "learning_rate": 3.14892359865225e-05, "loss": 0.8743, "step": 6744 }, { "epoch": 3.057570262919311, "grad_norm": 0.2293548296359317, "learning_rate": 3.147686748752658e-05, "loss": 0.863, "step": 6745 }, { "epoch": 3.0580235720761557, "grad_norm": 0.23965198175725475, "learning_rate": 3.1464499842147424e-05, "loss": 0.858, "step": 6746 }, { "epoch": 3.058476881233001, "grad_norm": 0.24011312586749217, "learning_rate": 3.14521330516237e-05, "loss": 0.8623, "step": 6747 }, { "epoch": 3.058930190389846, "grad_norm": 0.23511051919904577, "learning_rate": 3.143976711719397e-05, "loss": 0.8669, "step": 6748 }, { "epoch": 3.0593834995466906, "grad_norm": 0.279583751988113, "learning_rate": 3.142740204009671e-05, "loss": 0.8658, "step": 6749 }, { "epoch": 3.059836808703536, "grad_norm": 0.25481072534588156, "learning_rate": 3.141503782157035e-05, "loss": 0.8812, "step": 6750 }, { "epoch": 3.0602901178603807, "grad_norm": 0.2180233016209016, "learning_rate": 3.1402674462853164e-05, "loss": 0.8685, "step": 6751 }, { "epoch": 3.0607434270172256, "grad_norm": 0.27888857533477246, "learning_rate": 3.139031196518341e-05, "loss": 0.884, "step": 6752 }, { "epoch": 3.061196736174071, "grad_norm": 0.2559658022227573, "learning_rate": 3.137795032979922e-05, "loss": 0.8778, "step": 6753 }, { "epoch": 3.0616500453309157, "grad_norm": 0.2713691791663324, "learning_rate": 3.136558955793863e-05, "loss": 0.8612, "step": 6754 }, { "epoch": 3.0621033544877605, "grad_norm": 0.34376295093821213, "learning_rate": 3.1353229650839635e-05, "loss": 0.8534, "step": 6755 }, { "epoch": 3.0625566636446058, "grad_norm": 0.2974694040281412, "learning_rate": 3.1340870609740104e-05, "loss": 0.8474, "step": 6756 }, { "epoch": 3.0630099728014506, "grad_norm": 0.28312295862102116, "learning_rate": 3.132851243587782e-05, "loss": 0.8494, "step": 6757 }, { "epoch": 3.0634632819582954, "grad_norm": 0.23205934417820312, "learning_rate": 3.131615513049051e-05, "loss": 0.8631, "step": 6758 }, { "epoch": 3.0639165911151407, "grad_norm": 0.2554512529013222, "learning_rate": 3.130379869481578e-05, "loss": 0.8527, "step": 6759 }, { "epoch": 3.0643699002719855, "grad_norm": 0.25158765321869175, "learning_rate": 3.129144313009118e-05, "loss": 0.8589, "step": 6760 }, { "epoch": 3.0648232094288304, "grad_norm": 0.281872158669947, "learning_rate": 3.1279088437554124e-05, "loss": 0.8511, "step": 6761 }, { "epoch": 3.0652765185856756, "grad_norm": 0.34480079058763563, "learning_rate": 3.1266734618442e-05, "loss": 0.8784, "step": 6762 }, { "epoch": 3.0657298277425205, "grad_norm": 0.3596951866366137, "learning_rate": 3.125438167399207e-05, "loss": 0.8798, "step": 6763 }, { "epoch": 3.0661831368993653, "grad_norm": 0.3307677562570365, "learning_rate": 3.1242029605441515e-05, "loss": 0.8375, "step": 6764 }, { "epoch": 3.0666364460562106, "grad_norm": 0.2778718993836123, "learning_rate": 3.122967841402744e-05, "loss": 0.8572, "step": 6765 }, { "epoch": 3.0670897552130554, "grad_norm": 0.2671070914465489, "learning_rate": 3.121732810098684e-05, "loss": 0.8916, "step": 6766 }, { "epoch": 3.0675430643699, "grad_norm": 0.25663876191191215, "learning_rate": 3.120497866755665e-05, "loss": 0.871, "step": 6767 }, { "epoch": 3.067996373526745, "grad_norm": 0.22022628875884473, "learning_rate": 3.119263011497369e-05, "loss": 0.8547, "step": 6768 }, { "epoch": 3.0684496826835903, "grad_norm": 0.33159800429536174, "learning_rate": 3.1180282444474706e-05, "loss": 0.8698, "step": 6769 }, { "epoch": 3.068902991840435, "grad_norm": 0.33985993996552205, "learning_rate": 3.1167935657296374e-05, "loss": 0.8538, "step": 6770 }, { "epoch": 3.06935630099728, "grad_norm": 0.2906384918099532, "learning_rate": 3.1155589754675226e-05, "loss": 0.8618, "step": 6771 }, { "epoch": 3.0698096101541252, "grad_norm": 0.22472147547669347, "learning_rate": 3.114324473784776e-05, "loss": 0.8581, "step": 6772 }, { "epoch": 3.07026291931097, "grad_norm": 0.20501334517293598, "learning_rate": 3.1130900608050364e-05, "loss": 0.881, "step": 6773 }, { "epoch": 3.070716228467815, "grad_norm": 0.30259799641408597, "learning_rate": 3.1118557366519335e-05, "loss": 0.8519, "step": 6774 }, { "epoch": 3.07116953762466, "grad_norm": 0.39089966854646074, "learning_rate": 3.110621501449089e-05, "loss": 0.8774, "step": 6775 }, { "epoch": 3.071622846781505, "grad_norm": 0.3869260766776637, "learning_rate": 3.109387355320115e-05, "loss": 0.8598, "step": 6776 }, { "epoch": 3.07207615593835, "grad_norm": 0.2369311024203603, "learning_rate": 3.108153298388616e-05, "loss": 0.8455, "step": 6777 }, { "epoch": 3.072529465095195, "grad_norm": 0.19702397504559563, "learning_rate": 3.106919330778184e-05, "loss": 0.8687, "step": 6778 }, { "epoch": 3.07298277425204, "grad_norm": 0.2445466720302129, "learning_rate": 3.105685452612406e-05, "loss": 0.9014, "step": 6779 }, { "epoch": 3.0734360834088847, "grad_norm": 0.30777546600132955, "learning_rate": 3.104451664014858e-05, "loss": 0.8579, "step": 6780 }, { "epoch": 3.07388939256573, "grad_norm": 0.37916253153046603, "learning_rate": 3.103217965109107e-05, "loss": 0.8676, "step": 6781 }, { "epoch": 3.074342701722575, "grad_norm": 0.3219578970098882, "learning_rate": 3.101984356018714e-05, "loss": 0.8821, "step": 6782 }, { "epoch": 3.0747960108794197, "grad_norm": 0.22656272095002375, "learning_rate": 3.1007508368672255e-05, "loss": 0.8638, "step": 6783 }, { "epoch": 3.075249320036265, "grad_norm": 0.2721147590378564, "learning_rate": 3.099517407778184e-05, "loss": 0.8657, "step": 6784 }, { "epoch": 3.0757026291931098, "grad_norm": 0.2801104930863645, "learning_rate": 3.098284068875121e-05, "loss": 0.8576, "step": 6785 }, { "epoch": 3.0761559383499546, "grad_norm": 0.29389991773917234, "learning_rate": 3.0970508202815595e-05, "loss": 0.8876, "step": 6786 }, { "epoch": 3.0766092475067994, "grad_norm": 0.25660024479018345, "learning_rate": 3.095817662121011e-05, "loss": 0.8682, "step": 6787 }, { "epoch": 3.0770625566636447, "grad_norm": 0.23113169305422215, "learning_rate": 3.0945845945169806e-05, "loss": 0.8755, "step": 6788 }, { "epoch": 3.0775158658204895, "grad_norm": 0.26741389644533653, "learning_rate": 3.0933516175929646e-05, "loss": 0.8889, "step": 6789 }, { "epoch": 3.0779691749773344, "grad_norm": 0.321211726324061, "learning_rate": 3.0921187314724486e-05, "loss": 0.87, "step": 6790 }, { "epoch": 3.0784224841341796, "grad_norm": 0.2884657927696219, "learning_rate": 3.090885936278909e-05, "loss": 0.8548, "step": 6791 }, { "epoch": 3.0788757932910245, "grad_norm": 0.2734440470734266, "learning_rate": 3.089653232135815e-05, "loss": 0.8787, "step": 6792 }, { "epoch": 3.0793291024478693, "grad_norm": 0.3457316901955548, "learning_rate": 3.0884206191666264e-05, "loss": 0.8507, "step": 6793 }, { "epoch": 3.0797824116047146, "grad_norm": 0.3475809435760021, "learning_rate": 3.087188097494791e-05, "loss": 0.8767, "step": 6794 }, { "epoch": 3.0802357207615594, "grad_norm": 0.32299222124258553, "learning_rate": 3.0859556672437507e-05, "loss": 0.8642, "step": 6795 }, { "epoch": 3.080689029918404, "grad_norm": 0.30149421657925407, "learning_rate": 3.0847233285369366e-05, "loss": 0.8605, "step": 6796 }, { "epoch": 3.0811423390752495, "grad_norm": 0.2323490755996479, "learning_rate": 3.0834910814977716e-05, "loss": 0.8741, "step": 6797 }, { "epoch": 3.0815956482320943, "grad_norm": 0.2592935781536578, "learning_rate": 3.0822589262496685e-05, "loss": 0.8738, "step": 6798 }, { "epoch": 3.082048957388939, "grad_norm": 0.2849352562102695, "learning_rate": 3.08102686291603e-05, "loss": 0.8518, "step": 6799 }, { "epoch": 3.0825022665457844, "grad_norm": 0.2536403146790516, "learning_rate": 3.079794891620252e-05, "loss": 0.8773, "step": 6800 }, { "epoch": 3.0829555757026292, "grad_norm": 0.27245306849528644, "learning_rate": 3.078563012485722e-05, "loss": 0.856, "step": 6801 }, { "epoch": 3.083408884859474, "grad_norm": 0.28374882079483804, "learning_rate": 3.077331225635812e-05, "loss": 0.8676, "step": 6802 }, { "epoch": 3.0838621940163193, "grad_norm": 0.2995497432608759, "learning_rate": 3.0760995311938916e-05, "loss": 0.8647, "step": 6803 }, { "epoch": 3.084315503173164, "grad_norm": 0.38580238306447145, "learning_rate": 3.0748679292833197e-05, "loss": 0.8531, "step": 6804 }, { "epoch": 3.084768812330009, "grad_norm": 0.35071350904459303, "learning_rate": 3.073636420027443e-05, "loss": 0.8618, "step": 6805 }, { "epoch": 3.0852221214868543, "grad_norm": 0.27843892042802165, "learning_rate": 3.072405003549601e-05, "loss": 0.8922, "step": 6806 }, { "epoch": 3.085675430643699, "grad_norm": 0.21875036802264353, "learning_rate": 3.0711736799731244e-05, "loss": 0.8557, "step": 6807 }, { "epoch": 3.086128739800544, "grad_norm": 0.2649735591384151, "learning_rate": 3.069942449421333e-05, "loss": 0.8742, "step": 6808 }, { "epoch": 3.0865820489573887, "grad_norm": 0.2960817306309814, "learning_rate": 3.068711312017539e-05, "loss": 0.8628, "step": 6809 }, { "epoch": 3.087035358114234, "grad_norm": 0.3700182735228946, "learning_rate": 3.067480267885044e-05, "loss": 0.8541, "step": 6810 }, { "epoch": 3.087488667271079, "grad_norm": 0.4423318544911209, "learning_rate": 3.066249317147141e-05, "loss": 0.8594, "step": 6811 }, { "epoch": 3.0879419764279237, "grad_norm": 0.4192059070514196, "learning_rate": 3.0650184599271144e-05, "loss": 0.8588, "step": 6812 }, { "epoch": 3.088395285584769, "grad_norm": 0.293938115986666, "learning_rate": 3.0637876963482354e-05, "loss": 0.8716, "step": 6813 }, { "epoch": 3.0888485947416138, "grad_norm": 0.22587641925547594, "learning_rate": 3.0625570265337704e-05, "loss": 0.8679, "step": 6814 }, { "epoch": 3.0893019038984586, "grad_norm": 0.4120045515471505, "learning_rate": 3.0613264506069736e-05, "loss": 0.8603, "step": 6815 }, { "epoch": 3.089755213055304, "grad_norm": 0.43371531811405617, "learning_rate": 3.0600959686910945e-05, "loss": 0.8736, "step": 6816 }, { "epoch": 3.0902085222121487, "grad_norm": 0.28241899628509554, "learning_rate": 3.0588655809093634e-05, "loss": 0.8785, "step": 6817 }, { "epoch": 3.0906618313689935, "grad_norm": 0.3213665916410064, "learning_rate": 3.057635287385011e-05, "loss": 0.8756, "step": 6818 }, { "epoch": 3.091115140525839, "grad_norm": 0.3184283888041792, "learning_rate": 3.056405088241254e-05, "loss": 0.8653, "step": 6819 }, { "epoch": 3.0915684496826836, "grad_norm": 0.24828307499568866, "learning_rate": 3.055174983601301e-05, "loss": 0.8605, "step": 6820 }, { "epoch": 3.0920217588395285, "grad_norm": 0.27511479345344436, "learning_rate": 3.05394497358835e-05, "loss": 0.8611, "step": 6821 }, { "epoch": 3.0924750679963737, "grad_norm": 0.3206367546513383, "learning_rate": 3.0527150583255894e-05, "loss": 0.8906, "step": 6822 }, { "epoch": 3.0929283771532186, "grad_norm": 0.2723101010624193, "learning_rate": 3.051485237936201e-05, "loss": 0.863, "step": 6823 }, { "epoch": 3.0933816863100634, "grad_norm": 0.21277987931940595, "learning_rate": 3.050255512543353e-05, "loss": 0.8578, "step": 6824 }, { "epoch": 3.093834995466908, "grad_norm": 0.2593615636408207, "learning_rate": 3.049025882270207e-05, "loss": 0.8569, "step": 6825 }, { "epoch": 3.0942883046237535, "grad_norm": 0.2569106489740553, "learning_rate": 3.047796347239913e-05, "loss": 0.8695, "step": 6826 }, { "epoch": 3.0947416137805983, "grad_norm": 0.25642120103404265, "learning_rate": 3.0465669075756147e-05, "loss": 0.8585, "step": 6827 }, { "epoch": 3.095194922937443, "grad_norm": 0.3045619657923179, "learning_rate": 3.045337563400442e-05, "loss": 0.8516, "step": 6828 }, { "epoch": 3.0956482320942884, "grad_norm": 0.33117620573644785, "learning_rate": 3.0441083148375177e-05, "loss": 0.8651, "step": 6829 }, { "epoch": 3.0961015412511332, "grad_norm": 0.3844423726419092, "learning_rate": 3.042879162009955e-05, "loss": 0.8734, "step": 6830 }, { "epoch": 3.096554850407978, "grad_norm": 0.31604546131645905, "learning_rate": 3.0416501050408573e-05, "loss": 0.8637, "step": 6831 }, { "epoch": 3.0970081595648233, "grad_norm": 0.879526639381843, "learning_rate": 3.0404211440533182e-05, "loss": 0.8569, "step": 6832 }, { "epoch": 3.097461468721668, "grad_norm": 0.24245485349842658, "learning_rate": 3.0391922791704216e-05, "loss": 0.8554, "step": 6833 }, { "epoch": 3.097914777878513, "grad_norm": 0.23635504568142499, "learning_rate": 3.0379635105152417e-05, "loss": 0.8555, "step": 6834 }, { "epoch": 3.0983680870353583, "grad_norm": 0.35372935677364364, "learning_rate": 3.036734838210846e-05, "loss": 0.8765, "step": 6835 }, { "epoch": 3.098821396192203, "grad_norm": 0.2811755392874431, "learning_rate": 3.0355062623802848e-05, "loss": 0.8816, "step": 6836 }, { "epoch": 3.099274705349048, "grad_norm": 0.2632277497800812, "learning_rate": 3.0342777831466065e-05, "loss": 0.8621, "step": 6837 }, { "epoch": 3.099728014505893, "grad_norm": 0.35722221465524673, "learning_rate": 3.0330494006328458e-05, "loss": 0.8935, "step": 6838 }, { "epoch": 3.100181323662738, "grad_norm": 0.3556399812491913, "learning_rate": 3.0318211149620304e-05, "loss": 0.8813, "step": 6839 }, { "epoch": 3.100634632819583, "grad_norm": 0.26161295223227843, "learning_rate": 3.0305929262571747e-05, "loss": 0.8777, "step": 6840 }, { "epoch": 3.101087941976428, "grad_norm": 0.3355366947884857, "learning_rate": 3.0293648346412872e-05, "loss": 0.8549, "step": 6841 }, { "epoch": 3.101541251133273, "grad_norm": 0.4443215306244027, "learning_rate": 3.0281368402373647e-05, "loss": 0.8667, "step": 6842 }, { "epoch": 3.1019945602901178, "grad_norm": 0.3643025077238908, "learning_rate": 3.0269089431683932e-05, "loss": 0.8671, "step": 6843 }, { "epoch": 3.102447869446963, "grad_norm": 0.3141456991150922, "learning_rate": 3.0256811435573508e-05, "loss": 0.8681, "step": 6844 }, { "epoch": 3.102901178603808, "grad_norm": 0.2532473306658191, "learning_rate": 3.0244534415272056e-05, "loss": 0.8842, "step": 6845 }, { "epoch": 3.1033544877606527, "grad_norm": 0.22159979574702565, "learning_rate": 3.0232258372009153e-05, "loss": 0.8855, "step": 6846 }, { "epoch": 3.1038077969174975, "grad_norm": 0.21725910367517562, "learning_rate": 3.0219983307014277e-05, "loss": 0.8728, "step": 6847 }, { "epoch": 3.104261106074343, "grad_norm": 0.2818716071817324, "learning_rate": 3.0207709221516815e-05, "loss": 0.8778, "step": 6848 }, { "epoch": 3.1047144152311876, "grad_norm": 0.3701584554484048, "learning_rate": 3.019543611674606e-05, "loss": 0.8914, "step": 6849 }, { "epoch": 3.1051677243880325, "grad_norm": 0.2735425954876108, "learning_rate": 3.018316399393119e-05, "loss": 0.8581, "step": 6850 }, { "epoch": 3.1056210335448777, "grad_norm": 0.2152253826677586, "learning_rate": 3.0170892854301294e-05, "loss": 0.8831, "step": 6851 }, { "epoch": 3.1060743427017226, "grad_norm": 0.27921545078287807, "learning_rate": 3.0158622699085368e-05, "loss": 0.8765, "step": 6852 }, { "epoch": 3.1065276518585674, "grad_norm": 0.2863374074039194, "learning_rate": 3.0146353529512315e-05, "loss": 0.8741, "step": 6853 }, { "epoch": 3.1069809610154127, "grad_norm": 0.31208843083877, "learning_rate": 3.0134085346810907e-05, "loss": 0.8761, "step": 6854 }, { "epoch": 3.1074342701722575, "grad_norm": 0.2097766456913727, "learning_rate": 3.012181815220984e-05, "loss": 0.8723, "step": 6855 }, { "epoch": 3.1078875793291023, "grad_norm": 0.27810299827040563, "learning_rate": 3.0109551946937722e-05, "loss": 0.8822, "step": 6856 }, { "epoch": 3.1083408884859476, "grad_norm": 0.20210495366346679, "learning_rate": 3.0097286732223038e-05, "loss": 0.8755, "step": 6857 }, { "epoch": 3.1087941976427924, "grad_norm": 0.26804787417995574, "learning_rate": 3.0085022509294207e-05, "loss": 0.8656, "step": 6858 }, { "epoch": 3.1092475067996372, "grad_norm": 0.3211735849155731, "learning_rate": 3.00727592793795e-05, "loss": 0.8545, "step": 6859 }, { "epoch": 3.1097008159564825, "grad_norm": 0.2637493716534845, "learning_rate": 3.0060497043707123e-05, "loss": 0.8724, "step": 6860 }, { "epoch": 3.1101541251133273, "grad_norm": 0.19725349138691253, "learning_rate": 3.004823580350518e-05, "loss": 0.8618, "step": 6861 }, { "epoch": 3.110607434270172, "grad_norm": 0.28347910242781066, "learning_rate": 3.003597556000167e-05, "loss": 0.8843, "step": 6862 }, { "epoch": 3.1110607434270174, "grad_norm": 0.2384525941155289, "learning_rate": 3.002371631442449e-05, "loss": 0.8753, "step": 6863 }, { "epoch": 3.1115140525838623, "grad_norm": 0.2759544014509861, "learning_rate": 3.0011458068001446e-05, "loss": 0.8822, "step": 6864 }, { "epoch": 3.111967361740707, "grad_norm": 0.2358599500585341, "learning_rate": 2.9999200821960226e-05, "loss": 0.8606, "step": 6865 }, { "epoch": 3.112420670897552, "grad_norm": 0.23302433639714829, "learning_rate": 2.998694457752844e-05, "loss": 0.8778, "step": 6866 }, { "epoch": 3.112873980054397, "grad_norm": 0.2299689364446447, "learning_rate": 2.997468933593358e-05, "loss": 0.8681, "step": 6867 }, { "epoch": 3.113327289211242, "grad_norm": 0.23938443684096947, "learning_rate": 2.9962435098403055e-05, "loss": 0.8611, "step": 6868 }, { "epoch": 3.113780598368087, "grad_norm": 0.2838973860665008, "learning_rate": 2.995018186616415e-05, "loss": 0.8637, "step": 6869 }, { "epoch": 3.114233907524932, "grad_norm": 0.21807596184496125, "learning_rate": 2.9937929640444064e-05, "loss": 0.8766, "step": 6870 }, { "epoch": 3.114687216681777, "grad_norm": 0.25863876368555316, "learning_rate": 2.992567842246989e-05, "loss": 0.8561, "step": 6871 }, { "epoch": 3.1151405258386218, "grad_norm": 0.3548794123765615, "learning_rate": 2.991342821346864e-05, "loss": 0.8525, "step": 6872 }, { "epoch": 3.115593834995467, "grad_norm": 0.28561014570436977, "learning_rate": 2.9901179014667213e-05, "loss": 0.8894, "step": 6873 }, { "epoch": 3.116047144152312, "grad_norm": 0.23178391763206013, "learning_rate": 2.988893082729237e-05, "loss": 0.8732, "step": 6874 }, { "epoch": 3.1165004533091567, "grad_norm": 0.24311129148069888, "learning_rate": 2.987668365257082e-05, "loss": 0.8613, "step": 6875 }, { "epoch": 3.116953762466002, "grad_norm": 0.2832967871461975, "learning_rate": 2.986443749172915e-05, "loss": 0.8699, "step": 6876 }, { "epoch": 3.117407071622847, "grad_norm": 0.2695449057594331, "learning_rate": 2.9852192345993857e-05, "loss": 0.8463, "step": 6877 }, { "epoch": 3.1178603807796916, "grad_norm": 0.22375048019439067, "learning_rate": 2.9839948216591326e-05, "loss": 0.8714, "step": 6878 }, { "epoch": 3.118313689936537, "grad_norm": 0.19805607525623262, "learning_rate": 2.982770510474784e-05, "loss": 0.8731, "step": 6879 }, { "epoch": 3.1187669990933817, "grad_norm": 0.20963506475752908, "learning_rate": 2.981546301168958e-05, "loss": 0.8879, "step": 6880 }, { "epoch": 3.1192203082502266, "grad_norm": 0.3017468522612877, "learning_rate": 2.9803221938642637e-05, "loss": 0.8548, "step": 6881 }, { "epoch": 3.119673617407072, "grad_norm": 0.30333019751281254, "learning_rate": 2.979098188683298e-05, "loss": 0.871, "step": 6882 }, { "epoch": 3.1201269265639167, "grad_norm": 0.20599326589431943, "learning_rate": 2.9778742857486487e-05, "loss": 0.8688, "step": 6883 }, { "epoch": 3.1205802357207615, "grad_norm": 0.23675134142309234, "learning_rate": 2.976650485182895e-05, "loss": 0.871, "step": 6884 }, { "epoch": 3.1210335448776063, "grad_norm": 0.1882228290424865, "learning_rate": 2.9754267871086024e-05, "loss": 0.842, "step": 6885 }, { "epoch": 3.1214868540344516, "grad_norm": 0.2705946780293412, "learning_rate": 2.9742031916483278e-05, "loss": 0.8592, "step": 6886 }, { "epoch": 3.1219401631912964, "grad_norm": 0.2936303932848504, "learning_rate": 2.9729796989246185e-05, "loss": 0.8499, "step": 6887 }, { "epoch": 3.1223934723481412, "grad_norm": 0.33988270868072173, "learning_rate": 2.9717563090600108e-05, "loss": 0.8745, "step": 6888 }, { "epoch": 3.1228467815049865, "grad_norm": 0.29108229119724444, "learning_rate": 2.97053302217703e-05, "loss": 0.8765, "step": 6889 }, { "epoch": 3.1233000906618313, "grad_norm": 0.3076975484607649, "learning_rate": 2.969309838398193e-05, "loss": 0.8517, "step": 6890 }, { "epoch": 3.123753399818676, "grad_norm": 0.2982449980796256, "learning_rate": 2.968086757846005e-05, "loss": 0.8625, "step": 6891 }, { "epoch": 3.1242067089755214, "grad_norm": 0.20199119287479758, "learning_rate": 2.9668637806429607e-05, "loss": 0.8717, "step": 6892 }, { "epoch": 3.1246600181323663, "grad_norm": 0.23056446457146773, "learning_rate": 2.9656409069115467e-05, "loss": 0.8667, "step": 6893 }, { "epoch": 3.125113327289211, "grad_norm": 0.3505697114335065, "learning_rate": 2.964418136774234e-05, "loss": 0.857, "step": 6894 }, { "epoch": 3.1255666364460564, "grad_norm": 0.2653925856809053, "learning_rate": 2.963195470353489e-05, "loss": 0.862, "step": 6895 }, { "epoch": 3.126019945602901, "grad_norm": 0.25406560786069815, "learning_rate": 2.9619729077717644e-05, "loss": 0.8627, "step": 6896 }, { "epoch": 3.126473254759746, "grad_norm": 0.3538304461659733, "learning_rate": 2.9607504491515037e-05, "loss": 0.8521, "step": 6897 }, { "epoch": 3.1269265639165913, "grad_norm": 0.30162802017571055, "learning_rate": 2.9595280946151404e-05, "loss": 0.8776, "step": 6898 }, { "epoch": 3.127379873073436, "grad_norm": 0.2837298016404098, "learning_rate": 2.9583058442850966e-05, "loss": 0.8559, "step": 6899 }, { "epoch": 3.127833182230281, "grad_norm": 0.3118800284909707, "learning_rate": 2.9570836982837828e-05, "loss": 0.8632, "step": 6900 }, { "epoch": 3.1282864913871262, "grad_norm": 0.2380000070623176, "learning_rate": 2.9558616567336026e-05, "loss": 0.856, "step": 6901 }, { "epoch": 3.128739800543971, "grad_norm": 0.22886988593491253, "learning_rate": 2.9546397197569456e-05, "loss": 0.8653, "step": 6902 }, { "epoch": 3.129193109700816, "grad_norm": 0.22762245043049278, "learning_rate": 2.9534178874761932e-05, "loss": 0.8703, "step": 6903 }, { "epoch": 3.1296464188576607, "grad_norm": 0.21923899710381167, "learning_rate": 2.9521961600137147e-05, "loss": 0.8713, "step": 6904 }, { "epoch": 3.130099728014506, "grad_norm": 0.2558153359715103, "learning_rate": 2.9509745374918707e-05, "loss": 0.8638, "step": 6905 }, { "epoch": 3.130553037171351, "grad_norm": 0.2869348030842721, "learning_rate": 2.9497530200330094e-05, "loss": 0.8668, "step": 6906 }, { "epoch": 3.1310063463281956, "grad_norm": 0.24925063269858783, "learning_rate": 2.9485316077594707e-05, "loss": 0.8629, "step": 6907 }, { "epoch": 3.131459655485041, "grad_norm": 0.24097197756682648, "learning_rate": 2.9473103007935803e-05, "loss": 0.8623, "step": 6908 }, { "epoch": 3.1319129646418857, "grad_norm": 0.2964704253887973, "learning_rate": 2.946089099257658e-05, "loss": 0.8644, "step": 6909 }, { "epoch": 3.1323662737987306, "grad_norm": 0.24902393049512392, "learning_rate": 2.9448680032740104e-05, "loss": 0.8746, "step": 6910 }, { "epoch": 3.132819582955576, "grad_norm": 0.24108034620988752, "learning_rate": 2.9436470129649322e-05, "loss": 0.8635, "step": 6911 }, { "epoch": 3.1332728921124207, "grad_norm": 0.30161949580671366, "learning_rate": 2.9424261284527117e-05, "loss": 0.8638, "step": 6912 }, { "epoch": 3.1337262012692655, "grad_norm": 0.21399402689817354, "learning_rate": 2.941205349859621e-05, "loss": 0.8737, "step": 6913 }, { "epoch": 3.1341795104261108, "grad_norm": 0.27094288476685724, "learning_rate": 2.939984677307927e-05, "loss": 0.8877, "step": 6914 }, { "epoch": 3.1346328195829556, "grad_norm": 0.31247049854310194, "learning_rate": 2.9387641109198832e-05, "loss": 0.8686, "step": 6915 }, { "epoch": 3.1350861287398004, "grad_norm": 0.19539652709328992, "learning_rate": 2.9375436508177317e-05, "loss": 0.8548, "step": 6916 }, { "epoch": 3.1355394378966457, "grad_norm": 0.24278909466095572, "learning_rate": 2.9363232971237062e-05, "loss": 0.8492, "step": 6917 }, { "epoch": 3.1359927470534905, "grad_norm": 0.30110492259604843, "learning_rate": 2.9351030499600286e-05, "loss": 0.8699, "step": 6918 }, { "epoch": 3.1364460562103353, "grad_norm": 0.29210823241251854, "learning_rate": 2.9338829094489104e-05, "loss": 0.8642, "step": 6919 }, { "epoch": 3.1368993653671806, "grad_norm": 0.26470458692419097, "learning_rate": 2.9326628757125517e-05, "loss": 0.8502, "step": 6920 }, { "epoch": 3.1373526745240254, "grad_norm": 0.27699128918947663, "learning_rate": 2.9314429488731432e-05, "loss": 0.8522, "step": 6921 }, { "epoch": 3.1378059836808703, "grad_norm": 0.28893211730729623, "learning_rate": 2.9302231290528635e-05, "loss": 0.8649, "step": 6922 }, { "epoch": 3.1382592928377155, "grad_norm": 0.2871337408216156, "learning_rate": 2.9290034163738816e-05, "loss": 0.8718, "step": 6923 }, { "epoch": 3.1387126019945604, "grad_norm": 0.2061338152733656, "learning_rate": 2.9277838109583553e-05, "loss": 0.8799, "step": 6924 }, { "epoch": 3.139165911151405, "grad_norm": 0.26905839298579576, "learning_rate": 2.9265643129284317e-05, "loss": 0.8741, "step": 6925 }, { "epoch": 3.13961922030825, "grad_norm": 0.19164297169077849, "learning_rate": 2.9253449224062472e-05, "loss": 0.8458, "step": 6926 }, { "epoch": 3.1400725294650953, "grad_norm": 0.2604966871007538, "learning_rate": 2.9241256395139264e-05, "loss": 0.8773, "step": 6927 }, { "epoch": 3.14052583862194, "grad_norm": 0.2883671430918621, "learning_rate": 2.9229064643735844e-05, "loss": 0.8669, "step": 6928 }, { "epoch": 3.140979147778785, "grad_norm": 0.268920614337893, "learning_rate": 2.9216873971073263e-05, "loss": 0.8761, "step": 6929 }, { "epoch": 3.1414324569356302, "grad_norm": 0.24565496436619613, "learning_rate": 2.920468437837244e-05, "loss": 0.8767, "step": 6930 }, { "epoch": 3.141885766092475, "grad_norm": 0.19426042430093968, "learning_rate": 2.919249586685422e-05, "loss": 0.8604, "step": 6931 }, { "epoch": 3.14233907524932, "grad_norm": 0.17796039708324474, "learning_rate": 2.918030843773929e-05, "loss": 0.8571, "step": 6932 }, { "epoch": 3.142792384406165, "grad_norm": 0.2421543320507795, "learning_rate": 2.9168122092248264e-05, "loss": 0.8767, "step": 6933 }, { "epoch": 3.14324569356301, "grad_norm": 0.28116250681669, "learning_rate": 2.9155936831601645e-05, "loss": 0.864, "step": 6934 }, { "epoch": 3.143699002719855, "grad_norm": 0.2523367884387069, "learning_rate": 2.914375265701982e-05, "loss": 0.8905, "step": 6935 }, { "epoch": 3.1441523118767, "grad_norm": 0.19207956740480206, "learning_rate": 2.9131569569723077e-05, "loss": 0.8634, "step": 6936 }, { "epoch": 3.144605621033545, "grad_norm": 0.24673915044478562, "learning_rate": 2.9119387570931583e-05, "loss": 0.8498, "step": 6937 }, { "epoch": 3.1450589301903897, "grad_norm": 0.22934436090036062, "learning_rate": 2.9107206661865393e-05, "loss": 0.8593, "step": 6938 }, { "epoch": 3.145512239347235, "grad_norm": 0.22207664731420784, "learning_rate": 2.9095026843744477e-05, "loss": 0.8718, "step": 6939 }, { "epoch": 3.14596554850408, "grad_norm": 0.20847860716562241, "learning_rate": 2.9082848117788683e-05, "loss": 0.893, "step": 6940 }, { "epoch": 3.1464188576609247, "grad_norm": 0.2365687492431127, "learning_rate": 2.9070670485217722e-05, "loss": 0.8663, "step": 6941 }, { "epoch": 3.1468721668177695, "grad_norm": 0.198926620615235, "learning_rate": 2.905849394725123e-05, "loss": 0.8593, "step": 6942 }, { "epoch": 3.1473254759746148, "grad_norm": 0.2464997330089236, "learning_rate": 2.9046318505108727e-05, "loss": 0.8584, "step": 6943 }, { "epoch": 3.1477787851314596, "grad_norm": 0.19274254057838472, "learning_rate": 2.9034144160009624e-05, "loss": 0.8717, "step": 6944 }, { "epoch": 3.1482320942883044, "grad_norm": 0.2455409167510631, "learning_rate": 2.9021970913173202e-05, "loss": 0.8802, "step": 6945 }, { "epoch": 3.1486854034451497, "grad_norm": 0.24017678561981254, "learning_rate": 2.9009798765818665e-05, "loss": 0.8358, "step": 6946 }, { "epoch": 3.1491387126019945, "grad_norm": 0.21186281780275526, "learning_rate": 2.899762771916508e-05, "loss": 0.8746, "step": 6947 }, { "epoch": 3.1495920217588393, "grad_norm": 0.23794500124084023, "learning_rate": 2.898545777443141e-05, "loss": 0.8779, "step": 6948 }, { "epoch": 3.1500453309156846, "grad_norm": 0.25705285942466, "learning_rate": 2.8973288932836517e-05, "loss": 0.845, "step": 6949 }, { "epoch": 3.1504986400725294, "grad_norm": 0.2637930197674585, "learning_rate": 2.8961121195599166e-05, "loss": 0.8641, "step": 6950 }, { "epoch": 3.1509519492293743, "grad_norm": 0.20625380347430503, "learning_rate": 2.894895456393795e-05, "loss": 0.8322, "step": 6951 }, { "epoch": 3.1514052583862195, "grad_norm": 0.27409985533010894, "learning_rate": 2.8936789039071408e-05, "loss": 0.8728, "step": 6952 }, { "epoch": 3.1518585675430644, "grad_norm": 0.29902423439141684, "learning_rate": 2.892462462221796e-05, "loss": 0.8768, "step": 6953 }, { "epoch": 3.152311876699909, "grad_norm": 0.2603711946019893, "learning_rate": 2.891246131459591e-05, "loss": 0.8629, "step": 6954 }, { "epoch": 3.1527651858567545, "grad_norm": 0.22087000658575628, "learning_rate": 2.890029911742345e-05, "loss": 0.8631, "step": 6955 }, { "epoch": 3.1532184950135993, "grad_norm": 0.2318627341134867, "learning_rate": 2.8888138031918647e-05, "loss": 0.8582, "step": 6956 }, { "epoch": 3.153671804170444, "grad_norm": 0.27081878757212274, "learning_rate": 2.8875978059299478e-05, "loss": 0.8515, "step": 6957 }, { "epoch": 3.1541251133272894, "grad_norm": 0.28043712343437543, "learning_rate": 2.8863819200783802e-05, "loss": 0.8524, "step": 6958 }, { "epoch": 3.1545784224841342, "grad_norm": 0.2562062438291131, "learning_rate": 2.8851661457589357e-05, "loss": 0.88, "step": 6959 }, { "epoch": 3.155031731640979, "grad_norm": 0.21739776601864433, "learning_rate": 2.8839504830933783e-05, "loss": 0.8772, "step": 6960 }, { "epoch": 3.1554850407978243, "grad_norm": 0.1883227322180692, "learning_rate": 2.8827349322034596e-05, "loss": 0.8752, "step": 6961 }, { "epoch": 3.155938349954669, "grad_norm": 0.2637718834560836, "learning_rate": 2.8815194932109216e-05, "loss": 0.8892, "step": 6962 }, { "epoch": 3.156391659111514, "grad_norm": 0.3055369425840871, "learning_rate": 2.880304166237493e-05, "loss": 0.8906, "step": 6963 }, { "epoch": 3.1568449682683593, "grad_norm": 0.2687555341827451, "learning_rate": 2.8790889514048936e-05, "loss": 0.873, "step": 6964 }, { "epoch": 3.157298277425204, "grad_norm": 0.20049359599370142, "learning_rate": 2.8778738488348294e-05, "loss": 0.8688, "step": 6965 }, { "epoch": 3.157751586582049, "grad_norm": 0.25929527341187325, "learning_rate": 2.8766588586489984e-05, "loss": 0.8566, "step": 6966 }, { "epoch": 3.1582048957388937, "grad_norm": 0.2549711915983522, "learning_rate": 2.875443980969083e-05, "loss": 0.8705, "step": 6967 }, { "epoch": 3.158658204895739, "grad_norm": 0.2480678725472705, "learning_rate": 2.8742292159167583e-05, "loss": 0.88, "step": 6968 }, { "epoch": 3.159111514052584, "grad_norm": 0.2933891121473232, "learning_rate": 2.8730145636136866e-05, "loss": 0.8781, "step": 6969 }, { "epoch": 3.1595648232094287, "grad_norm": 0.27849999837111644, "learning_rate": 2.871800024181518e-05, "loss": 0.8611, "step": 6970 }, { "epoch": 3.160018132366274, "grad_norm": 0.21029203569735633, "learning_rate": 2.8705855977418927e-05, "loss": 0.8891, "step": 6971 }, { "epoch": 3.1604714415231188, "grad_norm": 0.2975711840370749, "learning_rate": 2.869371284416439e-05, "loss": 0.8531, "step": 6972 }, { "epoch": 3.1609247506799636, "grad_norm": 0.3610182089677336, "learning_rate": 2.8681570843267745e-05, "loss": 0.8715, "step": 6973 }, { "epoch": 3.161378059836809, "grad_norm": 0.3089989178348143, "learning_rate": 2.8669429975945036e-05, "loss": 0.8651, "step": 6974 }, { "epoch": 3.1618313689936537, "grad_norm": 0.2466358012750128, "learning_rate": 2.8657290243412217e-05, "loss": 0.8505, "step": 6975 }, { "epoch": 3.1622846781504985, "grad_norm": 0.28684692878251467, "learning_rate": 2.8645151646885117e-05, "loss": 0.8599, "step": 6976 }, { "epoch": 3.162737987307344, "grad_norm": 0.2870145980034456, "learning_rate": 2.8633014187579448e-05, "loss": 0.8557, "step": 6977 }, { "epoch": 3.1631912964641886, "grad_norm": 0.3287629815622556, "learning_rate": 2.8620877866710816e-05, "loss": 0.8811, "step": 6978 }, { "epoch": 3.1636446056210334, "grad_norm": 0.2098727755082707, "learning_rate": 2.860874268549471e-05, "loss": 0.8533, "step": 6979 }, { "epoch": 3.1640979147778787, "grad_norm": 0.27343561685517065, "learning_rate": 2.8596608645146502e-05, "loss": 0.8491, "step": 6980 }, { "epoch": 3.1645512239347235, "grad_norm": 0.25700855470520845, "learning_rate": 2.858447574688146e-05, "loss": 0.8624, "step": 6981 }, { "epoch": 3.1650045330915684, "grad_norm": 0.19862276437408424, "learning_rate": 2.8572343991914713e-05, "loss": 0.852, "step": 6982 }, { "epoch": 3.165457842248413, "grad_norm": 0.2548989110723322, "learning_rate": 2.85602133814613e-05, "loss": 0.8796, "step": 6983 }, { "epoch": 3.1659111514052585, "grad_norm": 0.3387943267318919, "learning_rate": 2.854808391673614e-05, "loss": 0.8688, "step": 6984 }, { "epoch": 3.1663644605621033, "grad_norm": 0.2220967457759192, "learning_rate": 2.8535955598954025e-05, "loss": 0.8498, "step": 6985 }, { "epoch": 3.166817769718948, "grad_norm": 0.247587230959362, "learning_rate": 2.852382842932966e-05, "loss": 0.8558, "step": 6986 }, { "epoch": 3.1672710788757934, "grad_norm": 0.2799334187030419, "learning_rate": 2.85117024090776e-05, "loss": 0.8834, "step": 6987 }, { "epoch": 3.1677243880326382, "grad_norm": 0.21661665103927905, "learning_rate": 2.8499577539412303e-05, "loss": 0.8837, "step": 6988 }, { "epoch": 3.168177697189483, "grad_norm": 0.23548526065761005, "learning_rate": 2.8487453821548138e-05, "loss": 0.8825, "step": 6989 }, { "epoch": 3.1686310063463283, "grad_norm": 0.31261066287782124, "learning_rate": 2.847533125669929e-05, "loss": 0.8658, "step": 6990 }, { "epoch": 3.169084315503173, "grad_norm": 0.21016991904446714, "learning_rate": 2.8463209846079886e-05, "loss": 0.8695, "step": 6991 }, { "epoch": 3.169537624660018, "grad_norm": 0.288324197450245, "learning_rate": 2.845108959090392e-05, "loss": 0.8717, "step": 6992 }, { "epoch": 3.1699909338168633, "grad_norm": 0.3284733319247781, "learning_rate": 2.8438970492385275e-05, "loss": 0.8634, "step": 6993 }, { "epoch": 3.170444242973708, "grad_norm": 0.25002844734240737, "learning_rate": 2.842685255173771e-05, "loss": 0.8446, "step": 6994 }, { "epoch": 3.170897552130553, "grad_norm": 0.348679939161987, "learning_rate": 2.841473577017488e-05, "loss": 0.8646, "step": 6995 }, { "epoch": 3.171350861287398, "grad_norm": 0.3684462226257516, "learning_rate": 2.840262014891031e-05, "loss": 0.8676, "step": 6996 }, { "epoch": 3.171804170444243, "grad_norm": 0.22709302657326774, "learning_rate": 2.8390505689157416e-05, "loss": 0.8742, "step": 6997 }, { "epoch": 3.172257479601088, "grad_norm": 0.27375909349028565, "learning_rate": 2.83783923921295e-05, "loss": 0.8725, "step": 6998 }, { "epoch": 3.172710788757933, "grad_norm": 0.2935445723706622, "learning_rate": 2.8366280259039736e-05, "loss": 0.8811, "step": 6999 }, { "epoch": 3.173164097914778, "grad_norm": 0.36118956440822103, "learning_rate": 2.835416929110119e-05, "loss": 0.8442, "step": 7000 }, { "epoch": 3.1736174070716228, "grad_norm": 0.4061849292837669, "learning_rate": 2.8342059489526824e-05, "loss": 0.8632, "step": 7001 }, { "epoch": 3.174070716228468, "grad_norm": 0.29801711546010046, "learning_rate": 2.8329950855529458e-05, "loss": 0.8884, "step": 7002 }, { "epoch": 3.174524025385313, "grad_norm": 0.24528213775262087, "learning_rate": 2.831784339032181e-05, "loss": 0.8787, "step": 7003 }, { "epoch": 3.1749773345421577, "grad_norm": 0.3336704111641633, "learning_rate": 2.8305737095116484e-05, "loss": 0.8572, "step": 7004 }, { "epoch": 3.1754306436990025, "grad_norm": 0.2999662323022854, "learning_rate": 2.8293631971125955e-05, "loss": 0.8601, "step": 7005 }, { "epoch": 3.175883952855848, "grad_norm": 0.24679009819821005, "learning_rate": 2.8281528019562592e-05, "loss": 0.8697, "step": 7006 }, { "epoch": 3.1763372620126926, "grad_norm": 0.259289536775999, "learning_rate": 2.8269425241638634e-05, "loss": 0.8508, "step": 7007 }, { "epoch": 3.1767905711695374, "grad_norm": 0.21066782706498954, "learning_rate": 2.8257323638566227e-05, "loss": 0.8643, "step": 7008 }, { "epoch": 3.1772438803263827, "grad_norm": 0.19536342416825248, "learning_rate": 2.824522321155736e-05, "loss": 0.8634, "step": 7009 }, { "epoch": 3.1776971894832275, "grad_norm": 0.23538785196555306, "learning_rate": 2.823312396182393e-05, "loss": 0.8806, "step": 7010 }, { "epoch": 3.1781504986400724, "grad_norm": 0.2107859114316996, "learning_rate": 2.8221025890577716e-05, "loss": 0.8495, "step": 7011 }, { "epoch": 3.1786038077969176, "grad_norm": 0.20962995785188374, "learning_rate": 2.8208928999030387e-05, "loss": 0.8811, "step": 7012 }, { "epoch": 3.1790571169537625, "grad_norm": 0.18752409904204942, "learning_rate": 2.8196833288393466e-05, "loss": 0.8656, "step": 7013 }, { "epoch": 3.1795104261106073, "grad_norm": 0.22172138333328842, "learning_rate": 2.8184738759878375e-05, "loss": 0.8546, "step": 7014 }, { "epoch": 3.1799637352674526, "grad_norm": 0.25532128966778467, "learning_rate": 2.8172645414696423e-05, "loss": 0.8616, "step": 7015 }, { "epoch": 3.1804170444242974, "grad_norm": 0.3210163340925334, "learning_rate": 2.81605532540588e-05, "loss": 0.8826, "step": 7016 }, { "epoch": 3.1808703535811422, "grad_norm": 0.207948701120001, "learning_rate": 2.814846227917656e-05, "loss": 0.8593, "step": 7017 }, { "epoch": 3.1813236627379875, "grad_norm": 0.27138805537596256, "learning_rate": 2.8136372491260648e-05, "loss": 0.8767, "step": 7018 }, { "epoch": 3.1817769718948323, "grad_norm": 0.29438984433656523, "learning_rate": 2.8124283891521904e-05, "loss": 0.8784, "step": 7019 }, { "epoch": 3.182230281051677, "grad_norm": 0.20813375758286856, "learning_rate": 2.811219648117103e-05, "loss": 0.8679, "step": 7020 }, { "epoch": 3.182683590208522, "grad_norm": 0.3227121904132127, "learning_rate": 2.8100110261418606e-05, "loss": 0.8637, "step": 7021 }, { "epoch": 3.1831368993653673, "grad_norm": 0.2580579981013693, "learning_rate": 2.8088025233475123e-05, "loss": 0.882, "step": 7022 }, { "epoch": 3.183590208522212, "grad_norm": 0.2778074580435852, "learning_rate": 2.807594139855093e-05, "loss": 0.8486, "step": 7023 }, { "epoch": 3.184043517679057, "grad_norm": 0.2851299918406578, "learning_rate": 2.8063858757856234e-05, "loss": 0.8724, "step": 7024 }, { "epoch": 3.184496826835902, "grad_norm": 0.21771659031868298, "learning_rate": 2.805177731260117e-05, "loss": 0.8584, "step": 7025 }, { "epoch": 3.184950135992747, "grad_norm": 0.2859556541102524, "learning_rate": 2.8039697063995725e-05, "loss": 0.8733, "step": 7026 }, { "epoch": 3.185403445149592, "grad_norm": 0.2567365619659707, "learning_rate": 2.802761801324978e-05, "loss": 0.8596, "step": 7027 }, { "epoch": 3.185856754306437, "grad_norm": 0.1919800119592503, "learning_rate": 2.8015540161573062e-05, "loss": 0.8498, "step": 7028 }, { "epoch": 3.186310063463282, "grad_norm": 0.28941898409851513, "learning_rate": 2.800346351017522e-05, "loss": 0.8561, "step": 7029 }, { "epoch": 3.1867633726201268, "grad_norm": 0.22949077125713613, "learning_rate": 2.7991388060265766e-05, "loss": 0.8917, "step": 7030 }, { "epoch": 3.187216681776972, "grad_norm": 0.23988505864439696, "learning_rate": 2.797931381305409e-05, "loss": 0.861, "step": 7031 }, { "epoch": 3.187669990933817, "grad_norm": 0.22613308710109137, "learning_rate": 2.796724076974946e-05, "loss": 0.8715, "step": 7032 }, { "epoch": 3.1881233000906617, "grad_norm": 0.22539984513030858, "learning_rate": 2.7955168931561034e-05, "loss": 0.8598, "step": 7033 }, { "epoch": 3.188576609247507, "grad_norm": 0.2151448111852147, "learning_rate": 2.794309829969784e-05, "loss": 0.8736, "step": 7034 }, { "epoch": 3.189029918404352, "grad_norm": 0.22673186729104428, "learning_rate": 2.7931028875368782e-05, "loss": 0.8707, "step": 7035 }, { "epoch": 3.1894832275611966, "grad_norm": 0.2250134767305316, "learning_rate": 2.7918960659782652e-05, "loss": 0.8777, "step": 7036 }, { "epoch": 3.189936536718042, "grad_norm": 0.24361982382225164, "learning_rate": 2.790689365414812e-05, "loss": 0.8814, "step": 7037 }, { "epoch": 3.1903898458748867, "grad_norm": 0.26427002660125076, "learning_rate": 2.7894827859673738e-05, "loss": 0.8862, "step": 7038 }, { "epoch": 3.1908431550317315, "grad_norm": 0.2570473797302161, "learning_rate": 2.7882763277567915e-05, "loss": 0.8695, "step": 7039 }, { "epoch": 3.191296464188577, "grad_norm": 0.25718185074528666, "learning_rate": 2.787069990903896e-05, "loss": 0.8848, "step": 7040 }, { "epoch": 3.1917497733454216, "grad_norm": 0.25468066256810284, "learning_rate": 2.7858637755295057e-05, "loss": 0.8619, "step": 7041 }, { "epoch": 3.1922030825022665, "grad_norm": 0.20185331950228208, "learning_rate": 2.784657681754426e-05, "loss": 0.8606, "step": 7042 }, { "epoch": 3.1926563916591117, "grad_norm": 0.22031907612490312, "learning_rate": 2.7834517096994518e-05, "loss": 0.8873, "step": 7043 }, { "epoch": 3.1931097008159566, "grad_norm": 0.2545507893105532, "learning_rate": 2.7822458594853646e-05, "loss": 0.8886, "step": 7044 }, { "epoch": 3.1935630099728014, "grad_norm": 0.2388702367441169, "learning_rate": 2.7810401312329332e-05, "loss": 0.8609, "step": 7045 }, { "epoch": 3.1940163191296462, "grad_norm": 0.20793644433831554, "learning_rate": 2.7798345250629162e-05, "loss": 0.8677, "step": 7046 }, { "epoch": 3.1944696282864915, "grad_norm": 0.20971409853771064, "learning_rate": 2.7786290410960567e-05, "loss": 0.8898, "step": 7047 }, { "epoch": 3.1949229374433363, "grad_norm": 0.23601012708070127, "learning_rate": 2.7774236794530875e-05, "loss": 0.8869, "step": 7048 }, { "epoch": 3.195376246600181, "grad_norm": 0.22272085315299947, "learning_rate": 2.776218440254731e-05, "loss": 0.8773, "step": 7049 }, { "epoch": 3.1958295557570264, "grad_norm": 0.2385071964154399, "learning_rate": 2.775013323621693e-05, "loss": 0.8607, "step": 7050 }, { "epoch": 3.1962828649138713, "grad_norm": 0.22617826734757215, "learning_rate": 2.7738083296746715e-05, "loss": 0.8768, "step": 7051 }, { "epoch": 3.196736174070716, "grad_norm": 0.24289829225010073, "learning_rate": 2.7726034585343495e-05, "loss": 0.8628, "step": 7052 }, { "epoch": 3.1971894832275614, "grad_norm": 0.22609194035250577, "learning_rate": 2.7713987103213994e-05, "loss": 0.877, "step": 7053 }, { "epoch": 3.197642792384406, "grad_norm": 0.2069374358548237, "learning_rate": 2.7701940851564776e-05, "loss": 0.885, "step": 7054 }, { "epoch": 3.198096101541251, "grad_norm": 0.20223546282834698, "learning_rate": 2.7689895831602335e-05, "loss": 0.8567, "step": 7055 }, { "epoch": 3.1985494106980963, "grad_norm": 0.20975401501733387, "learning_rate": 2.7677852044532998e-05, "loss": 0.8736, "step": 7056 }, { "epoch": 3.199002719854941, "grad_norm": 0.17877963739675004, "learning_rate": 2.7665809491562998e-05, "loss": 0.8683, "step": 7057 }, { "epoch": 3.199456029011786, "grad_norm": 0.1879526883847707, "learning_rate": 2.7653768173898424e-05, "loss": 0.8583, "step": 7058 }, { "epoch": 3.199909338168631, "grad_norm": 0.19414071052424592, "learning_rate": 2.7641728092745252e-05, "loss": 0.8472, "step": 7059 }, { "epoch": 3.200362647325476, "grad_norm": 0.19544481430732857, "learning_rate": 2.7629689249309336e-05, "loss": 0.8743, "step": 7060 }, { "epoch": 3.200815956482321, "grad_norm": 0.23621332892840305, "learning_rate": 2.7617651644796395e-05, "loss": 0.8827, "step": 7061 }, { "epoch": 3.2012692656391657, "grad_norm": 0.19546370977419714, "learning_rate": 2.760561528041203e-05, "loss": 0.8653, "step": 7062 }, { "epoch": 3.201722574796011, "grad_norm": 0.18579385596291215, "learning_rate": 2.7593580157361725e-05, "loss": 0.8549, "step": 7063 }, { "epoch": 3.202175883952856, "grad_norm": 0.26885441272519545, "learning_rate": 2.758154627685083e-05, "loss": 0.8573, "step": 7064 }, { "epoch": 3.2026291931097006, "grad_norm": 0.26091255619884696, "learning_rate": 2.7569513640084583e-05, "loss": 0.8367, "step": 7065 }, { "epoch": 3.203082502266546, "grad_norm": 0.21172365081578495, "learning_rate": 2.755748224826806e-05, "loss": 0.8627, "step": 7066 }, { "epoch": 3.2035358114233907, "grad_norm": 0.1751487372063716, "learning_rate": 2.7545452102606262e-05, "loss": 0.8732, "step": 7067 }, { "epoch": 3.2039891205802356, "grad_norm": 0.22509314866384822, "learning_rate": 2.753342320430404e-05, "loss": 0.8728, "step": 7068 }, { "epoch": 3.204442429737081, "grad_norm": 0.30118513214935766, "learning_rate": 2.752139555456613e-05, "loss": 0.8686, "step": 7069 }, { "epoch": 3.2048957388939256, "grad_norm": 0.28159331106267915, "learning_rate": 2.750936915459712e-05, "loss": 0.855, "step": 7070 }, { "epoch": 3.2053490480507705, "grad_norm": 0.2597931143419873, "learning_rate": 2.74973440056015e-05, "loss": 0.8514, "step": 7071 }, { "epoch": 3.2058023572076157, "grad_norm": 0.21359942295248302, "learning_rate": 2.7485320108783626e-05, "loss": 0.8553, "step": 7072 }, { "epoch": 3.2062556663644606, "grad_norm": 0.23610742141066066, "learning_rate": 2.7473297465347716e-05, "loss": 0.8543, "step": 7073 }, { "epoch": 3.2067089755213054, "grad_norm": 0.2893811985593424, "learning_rate": 2.7461276076497882e-05, "loss": 0.8515, "step": 7074 }, { "epoch": 3.2071622846781507, "grad_norm": 0.3248646277597937, "learning_rate": 2.7449255943438102e-05, "loss": 0.8894, "step": 7075 }, { "epoch": 3.2076155938349955, "grad_norm": 0.24170282263243223, "learning_rate": 2.7437237067372225e-05, "loss": 0.8576, "step": 7076 }, { "epoch": 3.2080689029918403, "grad_norm": 0.18407331015871886, "learning_rate": 2.742521944950397e-05, "loss": 0.8671, "step": 7077 }, { "epoch": 3.2085222121486856, "grad_norm": 0.19414469844510868, "learning_rate": 2.741320309103695e-05, "loss": 0.8746, "step": 7078 }, { "epoch": 3.2089755213055304, "grad_norm": 0.2473811281580896, "learning_rate": 2.7401187993174644e-05, "loss": 0.8976, "step": 7079 }, { "epoch": 3.2094288304623753, "grad_norm": 0.2488037555027017, "learning_rate": 2.7389174157120374e-05, "loss": 0.874, "step": 7080 }, { "epoch": 3.2098821396192205, "grad_norm": 0.24246962686473514, "learning_rate": 2.7377161584077375e-05, "loss": 0.8809, "step": 7081 }, { "epoch": 3.2103354487760654, "grad_norm": 0.2120381916257018, "learning_rate": 2.7365150275248743e-05, "loss": 0.8472, "step": 7082 }, { "epoch": 3.21078875793291, "grad_norm": 0.24752629721607464, "learning_rate": 2.7353140231837448e-05, "loss": 0.8674, "step": 7083 }, { "epoch": 3.211242067089755, "grad_norm": 0.2988834764464828, "learning_rate": 2.734113145504632e-05, "loss": 0.8649, "step": 7084 }, { "epoch": 3.2116953762466003, "grad_norm": 0.34368497014264876, "learning_rate": 2.73291239460781e-05, "loss": 0.848, "step": 7085 }, { "epoch": 3.212148685403445, "grad_norm": 0.20917645152831016, "learning_rate": 2.7317117706135338e-05, "loss": 0.8706, "step": 7086 }, { "epoch": 3.21260199456029, "grad_norm": 0.2320938759092581, "learning_rate": 2.7305112736420514e-05, "loss": 0.8619, "step": 7087 }, { "epoch": 3.213055303717135, "grad_norm": 0.28479379728805876, "learning_rate": 2.7293109038135957e-05, "loss": 0.8738, "step": 7088 }, { "epoch": 3.21350861287398, "grad_norm": 0.2332242106505845, "learning_rate": 2.7281106612483878e-05, "loss": 0.8611, "step": 7089 }, { "epoch": 3.213961922030825, "grad_norm": 0.2566764233881585, "learning_rate": 2.726910546066635e-05, "loss": 0.875, "step": 7090 }, { "epoch": 3.21441523118767, "grad_norm": 0.2179824305479911, "learning_rate": 2.7257105583885324e-05, "loss": 0.8637, "step": 7091 }, { "epoch": 3.214868540344515, "grad_norm": 0.29221856109367905, "learning_rate": 2.7245106983342626e-05, "loss": 0.8638, "step": 7092 }, { "epoch": 3.21532184950136, "grad_norm": 0.35866560272387454, "learning_rate": 2.723310966023995e-05, "loss": 0.8841, "step": 7093 }, { "epoch": 3.215775158658205, "grad_norm": 0.3235438664676268, "learning_rate": 2.722111361577886e-05, "loss": 0.8604, "step": 7094 }, { "epoch": 3.21622846781505, "grad_norm": 0.270938992676611, "learning_rate": 2.7209118851160813e-05, "loss": 0.8748, "step": 7095 }, { "epoch": 3.2166817769718947, "grad_norm": 0.19919365792742125, "learning_rate": 2.7197125367587087e-05, "loss": 0.8821, "step": 7096 }, { "epoch": 3.21713508612874, "grad_norm": 0.30125264238415195, "learning_rate": 2.718513316625889e-05, "loss": 0.8737, "step": 7097 }, { "epoch": 3.217588395285585, "grad_norm": 0.3764213625784563, "learning_rate": 2.7173142248377264e-05, "loss": 0.869, "step": 7098 }, { "epoch": 3.2180417044424297, "grad_norm": 0.29590058378547485, "learning_rate": 2.7161152615143138e-05, "loss": 0.8771, "step": 7099 }, { "epoch": 3.2184950135992745, "grad_norm": 0.20479617623226587, "learning_rate": 2.7149164267757315e-05, "loss": 0.8539, "step": 7100 }, { "epoch": 3.2189483227561198, "grad_norm": 0.3535006208266525, "learning_rate": 2.7137177207420458e-05, "loss": 0.8911, "step": 7101 }, { "epoch": 3.2194016319129646, "grad_norm": 0.4255060555756457, "learning_rate": 2.7125191435333103e-05, "loss": 0.8741, "step": 7102 }, { "epoch": 3.2198549410698094, "grad_norm": 0.36524203361390983, "learning_rate": 2.711320695269567e-05, "loss": 0.8643, "step": 7103 }, { "epoch": 3.2203082502266547, "grad_norm": 0.26046864492159105, "learning_rate": 2.7101223760708442e-05, "loss": 0.871, "step": 7104 }, { "epoch": 3.2207615593834995, "grad_norm": 0.21330646450341276, "learning_rate": 2.7089241860571553e-05, "loss": 0.8702, "step": 7105 }, { "epoch": 3.2212148685403443, "grad_norm": 0.3220936461782329, "learning_rate": 2.7077261253485034e-05, "loss": 0.8654, "step": 7106 }, { "epoch": 3.2216681776971896, "grad_norm": 0.3677154370281295, "learning_rate": 2.706528194064878e-05, "loss": 0.8594, "step": 7107 }, { "epoch": 3.2221214868540344, "grad_norm": 0.28743196616696554, "learning_rate": 2.7053303923262557e-05, "loss": 0.8598, "step": 7108 }, { "epoch": 3.2225747960108793, "grad_norm": 0.23276900236703654, "learning_rate": 2.7041327202525996e-05, "loss": 0.8532, "step": 7109 }, { "epoch": 3.2230281051677245, "grad_norm": 0.2869965693119986, "learning_rate": 2.70293517796386e-05, "loss": 0.8693, "step": 7110 }, { "epoch": 3.2234814143245694, "grad_norm": 0.33850937157955635, "learning_rate": 2.7017377655799742e-05, "loss": 0.8671, "step": 7111 }, { "epoch": 3.223934723481414, "grad_norm": 0.46722737811015325, "learning_rate": 2.7005404832208663e-05, "loss": 0.8664, "step": 7112 }, { "epoch": 3.2243880326382595, "grad_norm": 0.39580335601589706, "learning_rate": 2.699343331006449e-05, "loss": 0.8858, "step": 7113 }, { "epoch": 3.2248413417951043, "grad_norm": 0.21555827417178836, "learning_rate": 2.6981463090566187e-05, "loss": 0.8859, "step": 7114 }, { "epoch": 3.225294650951949, "grad_norm": 0.2569025978584129, "learning_rate": 2.696949417491262e-05, "loss": 0.8834, "step": 7115 }, { "epoch": 3.2257479601087944, "grad_norm": 0.3401529856480801, "learning_rate": 2.6957526564302506e-05, "loss": 0.8673, "step": 7116 }, { "epoch": 3.226201269265639, "grad_norm": 0.35108204529770104, "learning_rate": 2.6945560259934437e-05, "loss": 0.8761, "step": 7117 }, { "epoch": 3.226654578422484, "grad_norm": 0.30423180616152007, "learning_rate": 2.6933595263006876e-05, "loss": 0.8677, "step": 7118 }, { "epoch": 3.2271078875793293, "grad_norm": 0.24179504866158522, "learning_rate": 2.692163157471815e-05, "loss": 0.8651, "step": 7119 }, { "epoch": 3.227561196736174, "grad_norm": 0.18829583177857484, "learning_rate": 2.6909669196266458e-05, "loss": 0.8741, "step": 7120 }, { "epoch": 3.228014505893019, "grad_norm": 0.271423702315048, "learning_rate": 2.6897708128849876e-05, "loss": 0.8603, "step": 7121 }, { "epoch": 3.2284678150498642, "grad_norm": 0.25424080401716254, "learning_rate": 2.688574837366633e-05, "loss": 0.8817, "step": 7122 }, { "epoch": 3.228921124206709, "grad_norm": 0.23272947749440964, "learning_rate": 2.6873789931913624e-05, "loss": 0.8606, "step": 7123 }, { "epoch": 3.229374433363554, "grad_norm": 0.18650860545021472, "learning_rate": 2.686183280478943e-05, "loss": 0.8471, "step": 7124 }, { "epoch": 3.2298277425203987, "grad_norm": 0.216730076695262, "learning_rate": 2.6849876993491303e-05, "loss": 0.861, "step": 7125 }, { "epoch": 3.230281051677244, "grad_norm": 0.22202526299806077, "learning_rate": 2.6837922499216634e-05, "loss": 0.8698, "step": 7126 }, { "epoch": 3.230734360834089, "grad_norm": 0.21030005513615974, "learning_rate": 2.682596932316271e-05, "loss": 0.8875, "step": 7127 }, { "epoch": 3.2311876699909337, "grad_norm": 0.21525328437695784, "learning_rate": 2.6814017466526673e-05, "loss": 0.8688, "step": 7128 }, { "epoch": 3.231640979147779, "grad_norm": 0.21869620490153482, "learning_rate": 2.6802066930505543e-05, "loss": 0.8834, "step": 7129 }, { "epoch": 3.2320942883046238, "grad_norm": 0.1715589308237483, "learning_rate": 2.6790117716296193e-05, "loss": 0.8543, "step": 7130 }, { "epoch": 3.2325475974614686, "grad_norm": 0.23488599975971247, "learning_rate": 2.677816982509538e-05, "loss": 0.8758, "step": 7131 }, { "epoch": 3.233000906618314, "grad_norm": 0.2842257436237308, "learning_rate": 2.6766223258099712e-05, "loss": 0.8702, "step": 7132 }, { "epoch": 3.2334542157751587, "grad_norm": 0.2886678403557759, "learning_rate": 2.6754278016505683e-05, "loss": 0.8658, "step": 7133 }, { "epoch": 3.2339075249320035, "grad_norm": 0.18978194652109148, "learning_rate": 2.674233410150964e-05, "loss": 0.8615, "step": 7134 }, { "epoch": 3.234360834088849, "grad_norm": 0.3171838617314516, "learning_rate": 2.6730391514307804e-05, "loss": 0.854, "step": 7135 }, { "epoch": 3.2348141432456936, "grad_norm": 0.2246730078655768, "learning_rate": 2.671845025609626e-05, "loss": 0.8517, "step": 7136 }, { "epoch": 3.2352674524025384, "grad_norm": 0.2979983212519794, "learning_rate": 2.6706510328070944e-05, "loss": 0.8869, "step": 7137 }, { "epoch": 3.2357207615593837, "grad_norm": 0.2538337882814677, "learning_rate": 2.6694571731427693e-05, "loss": 0.8769, "step": 7138 }, { "epoch": 3.2361740707162285, "grad_norm": 0.21841288757666819, "learning_rate": 2.6682634467362186e-05, "loss": 0.875, "step": 7139 }, { "epoch": 3.2366273798730734, "grad_norm": 0.2664456084303657, "learning_rate": 2.6670698537069975e-05, "loss": 0.87, "step": 7140 }, { "epoch": 3.237080689029918, "grad_norm": 0.2157659243014918, "learning_rate": 2.6658763941746486e-05, "loss": 0.8742, "step": 7141 }, { "epoch": 3.2375339981867635, "grad_norm": 0.2775512166239588, "learning_rate": 2.6646830682587005e-05, "loss": 0.8566, "step": 7142 }, { "epoch": 3.2379873073436083, "grad_norm": 0.28042003102580204, "learning_rate": 2.663489876078666e-05, "loss": 0.8865, "step": 7143 }, { "epoch": 3.238440616500453, "grad_norm": 0.21903260680340034, "learning_rate": 2.6622968177540493e-05, "loss": 0.8585, "step": 7144 }, { "epoch": 3.2388939256572984, "grad_norm": 0.2153557837704783, "learning_rate": 2.6611038934043367e-05, "loss": 0.8513, "step": 7145 }, { "epoch": 3.239347234814143, "grad_norm": 0.2464068369116426, "learning_rate": 2.6599111031490043e-05, "loss": 0.8858, "step": 7146 }, { "epoch": 3.239800543970988, "grad_norm": 0.22527666927502402, "learning_rate": 2.6587184471075132e-05, "loss": 0.8672, "step": 7147 }, { "epoch": 3.2402538531278333, "grad_norm": 0.23518422738049147, "learning_rate": 2.6575259253993123e-05, "loss": 0.8731, "step": 7148 }, { "epoch": 3.240707162284678, "grad_norm": 0.2917012366323951, "learning_rate": 2.656333538143835e-05, "loss": 0.8731, "step": 7149 }, { "epoch": 3.241160471441523, "grad_norm": 0.3110784126839292, "learning_rate": 2.655141285460503e-05, "loss": 0.8569, "step": 7150 }, { "epoch": 3.2416137805983682, "grad_norm": 0.21033588192070124, "learning_rate": 2.6539491674687243e-05, "loss": 0.8627, "step": 7151 }, { "epoch": 3.242067089755213, "grad_norm": 0.2746509516116878, "learning_rate": 2.6527571842878915e-05, "loss": 0.8782, "step": 7152 }, { "epoch": 3.242520398912058, "grad_norm": 0.28512839289229863, "learning_rate": 2.6515653360373863e-05, "loss": 0.8742, "step": 7153 }, { "epoch": 3.242973708068903, "grad_norm": 0.1669809324254879, "learning_rate": 2.6503736228365758e-05, "loss": 0.8707, "step": 7154 }, { "epoch": 3.243427017225748, "grad_norm": 0.2679716899524469, "learning_rate": 2.6491820448048134e-05, "loss": 0.8496, "step": 7155 }, { "epoch": 3.243880326382593, "grad_norm": 0.23287084214962503, "learning_rate": 2.6479906020614393e-05, "loss": 0.8728, "step": 7156 }, { "epoch": 3.244333635539438, "grad_norm": 0.2341775333754215, "learning_rate": 2.64679929472578e-05, "loss": 0.8731, "step": 7157 }, { "epoch": 3.244786944696283, "grad_norm": 0.2471120434394026, "learning_rate": 2.645608122917148e-05, "loss": 0.872, "step": 7158 }, { "epoch": 3.2452402538531278, "grad_norm": 0.19723065475617033, "learning_rate": 2.6444170867548432e-05, "loss": 0.8802, "step": 7159 }, { "epoch": 3.245693563009973, "grad_norm": 0.2701897105253945, "learning_rate": 2.643226186358151e-05, "loss": 0.859, "step": 7160 }, { "epoch": 3.246146872166818, "grad_norm": 0.2489064847815917, "learning_rate": 2.6420354218463454e-05, "loss": 0.8557, "step": 7161 }, { "epoch": 3.2466001813236627, "grad_norm": 0.22401903809847865, "learning_rate": 2.6408447933386816e-05, "loss": 0.8638, "step": 7162 }, { "epoch": 3.2470534904805075, "grad_norm": 0.2629862731824095, "learning_rate": 2.6396543009544057e-05, "loss": 0.8717, "step": 7163 }, { "epoch": 3.247506799637353, "grad_norm": 0.20764463310118195, "learning_rate": 2.638463944812751e-05, "loss": 0.8591, "step": 7164 }, { "epoch": 3.2479601087941976, "grad_norm": 0.2336174555147234, "learning_rate": 2.6372737250329322e-05, "loss": 0.8582, "step": 7165 }, { "epoch": 3.2484134179510424, "grad_norm": 0.24144168804807636, "learning_rate": 2.6360836417341555e-05, "loss": 0.8955, "step": 7166 }, { "epoch": 3.2488667271078877, "grad_norm": 0.2532804038784487, "learning_rate": 2.6348936950356113e-05, "loss": 0.8795, "step": 7167 }, { "epoch": 3.2493200362647325, "grad_norm": 0.24524069419826103, "learning_rate": 2.6337038850564745e-05, "loss": 0.864, "step": 7168 }, { "epoch": 3.2497733454215774, "grad_norm": 0.24328112106480598, "learning_rate": 2.6325142119159095e-05, "loss": 0.866, "step": 7169 }, { "epoch": 3.2502266545784226, "grad_norm": 0.24265653351342886, "learning_rate": 2.6313246757330647e-05, "loss": 0.8624, "step": 7170 }, { "epoch": 3.2506799637352675, "grad_norm": 0.1905022866887128, "learning_rate": 2.6301352766270762e-05, "loss": 0.8549, "step": 7171 }, { "epoch": 3.2511332728921123, "grad_norm": 0.26395040765620753, "learning_rate": 2.628946014717066e-05, "loss": 0.864, "step": 7172 }, { "epoch": 3.2515865820489576, "grad_norm": 0.22660274638416356, "learning_rate": 2.627756890122142e-05, "loss": 0.8798, "step": 7173 }, { "epoch": 3.2520398912058024, "grad_norm": 0.2696303704265977, "learning_rate": 2.626567902961398e-05, "loss": 0.8532, "step": 7174 }, { "epoch": 3.252493200362647, "grad_norm": 0.2446973180356918, "learning_rate": 2.625379053353915e-05, "loss": 0.8484, "step": 7175 }, { "epoch": 3.252946509519492, "grad_norm": 0.2320072338316388, "learning_rate": 2.6241903414187597e-05, "loss": 0.8696, "step": 7176 }, { "epoch": 3.2533998186763373, "grad_norm": 0.3160514928618922, "learning_rate": 2.6230017672749865e-05, "loss": 0.8709, "step": 7177 }, { "epoch": 3.253853127833182, "grad_norm": 0.22820551602272077, "learning_rate": 2.6218133310416317e-05, "loss": 0.8691, "step": 7178 }, { "epoch": 3.254306436990027, "grad_norm": 0.2536475280344736, "learning_rate": 2.6206250328377225e-05, "loss": 0.8463, "step": 7179 }, { "epoch": 3.2547597461468722, "grad_norm": 0.29053207278883414, "learning_rate": 2.61943687278227e-05, "loss": 0.8626, "step": 7180 }, { "epoch": 3.255213055303717, "grad_norm": 0.25339554608094167, "learning_rate": 2.618248850994273e-05, "loss": 0.8728, "step": 7181 }, { "epoch": 3.255666364460562, "grad_norm": 0.23716308528287977, "learning_rate": 2.6170609675927146e-05, "loss": 0.8768, "step": 7182 }, { "epoch": 3.256119673617407, "grad_norm": 0.21996537702763674, "learning_rate": 2.6158732226965638e-05, "loss": 0.8583, "step": 7183 }, { "epoch": 3.256572982774252, "grad_norm": 0.2298211011603112, "learning_rate": 2.6146856164247778e-05, "loss": 0.8677, "step": 7184 }, { "epoch": 3.257026291931097, "grad_norm": 0.21336209059841588, "learning_rate": 2.613498148896298e-05, "loss": 0.8446, "step": 7185 }, { "epoch": 3.257479601087942, "grad_norm": 0.24929038618495863, "learning_rate": 2.6123108202300537e-05, "loss": 0.8592, "step": 7186 }, { "epoch": 3.257932910244787, "grad_norm": 0.20481647712834392, "learning_rate": 2.6111236305449598e-05, "loss": 0.8567, "step": 7187 }, { "epoch": 3.2583862194016318, "grad_norm": 0.26356378901967087, "learning_rate": 2.6099365799599153e-05, "loss": 0.868, "step": 7188 }, { "epoch": 3.258839528558477, "grad_norm": 0.21171343151573332, "learning_rate": 2.6087496685938085e-05, "loss": 0.8646, "step": 7189 }, { "epoch": 3.259292837715322, "grad_norm": 0.2195410419152678, "learning_rate": 2.6075628965655105e-05, "loss": 0.8598, "step": 7190 }, { "epoch": 3.2597461468721667, "grad_norm": 0.23217392804223735, "learning_rate": 2.6063762639938808e-05, "loss": 0.8694, "step": 7191 }, { "epoch": 3.260199456029012, "grad_norm": 0.21836930876455946, "learning_rate": 2.6051897709977638e-05, "loss": 0.8669, "step": 7192 }, { "epoch": 3.260652765185857, "grad_norm": 0.1881409107165801, "learning_rate": 2.604003417695992e-05, "loss": 0.8594, "step": 7193 }, { "epoch": 3.2611060743427016, "grad_norm": 0.23980519659286378, "learning_rate": 2.6028172042073793e-05, "loss": 0.8745, "step": 7194 }, { "epoch": 3.261559383499547, "grad_norm": 0.17937022824968818, "learning_rate": 2.6016311306507302e-05, "loss": 0.8556, "step": 7195 }, { "epoch": 3.2620126926563917, "grad_norm": 0.25196923471507576, "learning_rate": 2.600445197144833e-05, "loss": 0.8634, "step": 7196 }, { "epoch": 3.2624660018132365, "grad_norm": 0.2129680476270928, "learning_rate": 2.5992594038084623e-05, "loss": 0.8596, "step": 7197 }, { "epoch": 3.262919310970082, "grad_norm": 0.23806699989488433, "learning_rate": 2.5980737507603794e-05, "loss": 0.8548, "step": 7198 }, { "epoch": 3.2633726201269266, "grad_norm": 0.25569837523023115, "learning_rate": 2.5968882381193306e-05, "loss": 0.8601, "step": 7199 }, { "epoch": 3.2638259292837715, "grad_norm": 0.2481012383341175, "learning_rate": 2.5957028660040492e-05, "loss": 0.8685, "step": 7200 }, { "epoch": 3.2642792384406167, "grad_norm": 0.20935467377760117, "learning_rate": 2.594517634533252e-05, "loss": 0.8545, "step": 7201 }, { "epoch": 3.2647325475974616, "grad_norm": 0.28044167080839527, "learning_rate": 2.5933325438256446e-05, "loss": 0.8636, "step": 7202 }, { "epoch": 3.2651858567543064, "grad_norm": 0.600172375743633, "learning_rate": 2.5921475939999164e-05, "loss": 0.9047, "step": 7203 }, { "epoch": 3.265639165911151, "grad_norm": 0.21991784957649657, "learning_rate": 2.5909627851747442e-05, "loss": 0.8937, "step": 7204 }, { "epoch": 3.2660924750679965, "grad_norm": 0.2578323361758706, "learning_rate": 2.5897781174687907e-05, "loss": 0.8726, "step": 7205 }, { "epoch": 3.2665457842248413, "grad_norm": 0.24146855932270045, "learning_rate": 2.5885935910007033e-05, "loss": 0.8737, "step": 7206 }, { "epoch": 3.266999093381686, "grad_norm": 0.21069116564541093, "learning_rate": 2.5874092058891157e-05, "loss": 0.8606, "step": 7207 }, { "epoch": 3.2674524025385314, "grad_norm": 0.20846931999176452, "learning_rate": 2.586224962252648e-05, "loss": 0.8974, "step": 7208 }, { "epoch": 3.2679057116953762, "grad_norm": 0.2222198350288991, "learning_rate": 2.585040860209905e-05, "loss": 0.8624, "step": 7209 }, { "epoch": 3.268359020852221, "grad_norm": 0.2145151588098671, "learning_rate": 2.5838568998794783e-05, "loss": 0.862, "step": 7210 }, { "epoch": 3.2688123300090663, "grad_norm": 0.18771233216790345, "learning_rate": 2.582673081379945e-05, "loss": 0.8685, "step": 7211 }, { "epoch": 3.269265639165911, "grad_norm": 0.22766542890238503, "learning_rate": 2.581489404829868e-05, "loss": 0.8796, "step": 7212 }, { "epoch": 3.269718948322756, "grad_norm": 0.22297189975469184, "learning_rate": 2.5803058703477958e-05, "loss": 0.872, "step": 7213 }, { "epoch": 3.2701722574796013, "grad_norm": 0.2016709156657167, "learning_rate": 2.5791224780522636e-05, "loss": 0.8659, "step": 7214 }, { "epoch": 3.270625566636446, "grad_norm": 0.33994716063905617, "learning_rate": 2.5779392280617906e-05, "loss": 0.8711, "step": 7215 }, { "epoch": 3.271078875793291, "grad_norm": 0.27385821937610466, "learning_rate": 2.5767561204948838e-05, "loss": 0.8688, "step": 7216 }, { "epoch": 3.2715321849501358, "grad_norm": 0.22521483392498173, "learning_rate": 2.575573155470034e-05, "loss": 0.8596, "step": 7217 }, { "epoch": 3.271985494106981, "grad_norm": 0.18021793682404055, "learning_rate": 2.574390333105719e-05, "loss": 0.869, "step": 7218 }, { "epoch": 3.272438803263826, "grad_norm": 0.19525720550399556, "learning_rate": 2.5732076535204037e-05, "loss": 0.8676, "step": 7219 }, { "epoch": 3.2728921124206707, "grad_norm": 0.21707520082208814, "learning_rate": 2.5720251168325333e-05, "loss": 0.8986, "step": 7220 }, { "epoch": 3.273345421577516, "grad_norm": 0.22890842351880158, "learning_rate": 2.570842723160545e-05, "loss": 0.856, "step": 7221 }, { "epoch": 3.273798730734361, "grad_norm": 0.21221176263638394, "learning_rate": 2.5696604726228575e-05, "loss": 0.8578, "step": 7222 }, { "epoch": 3.2742520398912056, "grad_norm": 0.205906581263666, "learning_rate": 2.5684783653378778e-05, "loss": 0.8527, "step": 7223 }, { "epoch": 3.274705349048051, "grad_norm": 0.24643121228032178, "learning_rate": 2.5672964014239963e-05, "loss": 0.8491, "step": 7224 }, { "epoch": 3.2751586582048957, "grad_norm": 0.2924968080481721, "learning_rate": 2.5661145809995917e-05, "loss": 0.8727, "step": 7225 }, { "epoch": 3.2756119673617405, "grad_norm": 0.3402381960579194, "learning_rate": 2.5649329041830252e-05, "loss": 0.863, "step": 7226 }, { "epoch": 3.276065276518586, "grad_norm": 0.2370025060683127, "learning_rate": 2.5637513710926457e-05, "loss": 0.8548, "step": 7227 }, { "epoch": 3.2765185856754306, "grad_norm": 0.1844643390339872, "learning_rate": 2.562569981846788e-05, "loss": 0.8592, "step": 7228 }, { "epoch": 3.2769718948322755, "grad_norm": 0.2933355931103168, "learning_rate": 2.561388736563771e-05, "loss": 0.875, "step": 7229 }, { "epoch": 3.2774252039891207, "grad_norm": 0.3355513759858816, "learning_rate": 2.5602076353618997e-05, "loss": 0.8654, "step": 7230 }, { "epoch": 3.2778785131459656, "grad_norm": 0.261797833975673, "learning_rate": 2.559026678359466e-05, "loss": 0.8648, "step": 7231 }, { "epoch": 3.2783318223028104, "grad_norm": 0.2512901180005046, "learning_rate": 2.557845865674745e-05, "loss": 0.8732, "step": 7232 }, { "epoch": 3.2787851314596557, "grad_norm": 0.36449086470398495, "learning_rate": 2.5566651974259982e-05, "loss": 0.8741, "step": 7233 }, { "epoch": 3.2792384406165005, "grad_norm": 0.38859708033891127, "learning_rate": 2.5554846737314754e-05, "loss": 0.8751, "step": 7234 }, { "epoch": 3.2796917497733453, "grad_norm": 0.30003897110653677, "learning_rate": 2.554304294709407e-05, "loss": 0.8613, "step": 7235 }, { "epoch": 3.2801450589301906, "grad_norm": 0.2432607407965832, "learning_rate": 2.553124060478012e-05, "loss": 0.8486, "step": 7236 }, { "epoch": 3.2805983680870354, "grad_norm": 0.3121281440126788, "learning_rate": 2.551943971155495e-05, "loss": 0.8555, "step": 7237 }, { "epoch": 3.2810516772438802, "grad_norm": 0.4166510979605544, "learning_rate": 2.550764026860046e-05, "loss": 0.8768, "step": 7238 }, { "epoch": 3.2815049864007255, "grad_norm": 0.29355013564666377, "learning_rate": 2.549584227709838e-05, "loss": 0.8608, "step": 7239 }, { "epoch": 3.2819582955575703, "grad_norm": 0.2463535147767191, "learning_rate": 2.5484045738230327e-05, "loss": 0.8518, "step": 7240 }, { "epoch": 3.282411604714415, "grad_norm": 0.31758183873785556, "learning_rate": 2.5472250653177744e-05, "loss": 0.8679, "step": 7241 }, { "epoch": 3.2828649138712604, "grad_norm": 0.327140414028994, "learning_rate": 2.546045702312196e-05, "loss": 0.8608, "step": 7242 }, { "epoch": 3.2833182230281053, "grad_norm": 0.21536827210763362, "learning_rate": 2.5448664849244134e-05, "loss": 0.8562, "step": 7243 }, { "epoch": 3.28377153218495, "grad_norm": 0.2793684182335213, "learning_rate": 2.5436874132725295e-05, "loss": 0.8739, "step": 7244 }, { "epoch": 3.284224841341795, "grad_norm": 0.24302934516477576, "learning_rate": 2.5425084874746305e-05, "loss": 0.8827, "step": 7245 }, { "epoch": 3.28467815049864, "grad_norm": 0.28997269325863956, "learning_rate": 2.5413297076487903e-05, "loss": 0.8665, "step": 7246 }, { "epoch": 3.285131459655485, "grad_norm": 0.24848505258169346, "learning_rate": 2.5401510739130668e-05, "loss": 0.8896, "step": 7247 }, { "epoch": 3.28558476881233, "grad_norm": 0.22258822136290637, "learning_rate": 2.5389725863855032e-05, "loss": 0.864, "step": 7248 }, { "epoch": 3.286038077969175, "grad_norm": 0.22595528151524127, "learning_rate": 2.53779424518413e-05, "loss": 0.8627, "step": 7249 }, { "epoch": 3.28649138712602, "grad_norm": 0.2721200093799047, "learning_rate": 2.53661605042696e-05, "loss": 0.8473, "step": 7250 }, { "epoch": 3.286944696282865, "grad_norm": 0.18546756174947535, "learning_rate": 2.5354380022319936e-05, "loss": 0.8545, "step": 7251 }, { "epoch": 3.28739800543971, "grad_norm": 0.24921288731354105, "learning_rate": 2.534260100717215e-05, "loss": 0.8617, "step": 7252 }, { "epoch": 3.287851314596555, "grad_norm": 0.2606124915070228, "learning_rate": 2.533082346000595e-05, "loss": 0.8679, "step": 7253 }, { "epoch": 3.2883046237533997, "grad_norm": 0.22189280964315736, "learning_rate": 2.5319047382000893e-05, "loss": 0.8626, "step": 7254 }, { "epoch": 3.2887579329102445, "grad_norm": 0.27493741387362824, "learning_rate": 2.5307272774336386e-05, "loss": 0.863, "step": 7255 }, { "epoch": 3.28921124206709, "grad_norm": 0.1905396804426044, "learning_rate": 2.5295499638191698e-05, "loss": 0.8593, "step": 7256 }, { "epoch": 3.2896645512239346, "grad_norm": 0.2635559493406132, "learning_rate": 2.528372797474595e-05, "loss": 0.8881, "step": 7257 }, { "epoch": 3.2901178603807795, "grad_norm": 0.26346238150119716, "learning_rate": 2.5271957785178075e-05, "loss": 0.8569, "step": 7258 }, { "epoch": 3.2905711695376247, "grad_norm": 0.20083983580494802, "learning_rate": 2.5260189070666918e-05, "loss": 0.8891, "step": 7259 }, { "epoch": 3.2910244786944696, "grad_norm": 0.2292377610820346, "learning_rate": 2.5248421832391144e-05, "loss": 0.8854, "step": 7260 }, { "epoch": 3.2914777878513144, "grad_norm": 0.20237991796976126, "learning_rate": 2.5236656071529274e-05, "loss": 0.866, "step": 7261 }, { "epoch": 3.2919310970081597, "grad_norm": 0.20507197050001277, "learning_rate": 2.522489178925969e-05, "loss": 0.8813, "step": 7262 }, { "epoch": 3.2923844061650045, "grad_norm": 0.265520387977556, "learning_rate": 2.5213128986760618e-05, "loss": 0.8667, "step": 7263 }, { "epoch": 3.2928377153218493, "grad_norm": 0.2717322762172908, "learning_rate": 2.5201367665210134e-05, "loss": 0.8537, "step": 7264 }, { "epoch": 3.2932910244786946, "grad_norm": 0.21465976784970978, "learning_rate": 2.518960782578618e-05, "loss": 0.8598, "step": 7265 }, { "epoch": 3.2937443336355394, "grad_norm": 0.2636118284072555, "learning_rate": 2.5177849469666517e-05, "loss": 0.8727, "step": 7266 }, { "epoch": 3.2941976427923843, "grad_norm": 0.3108380905519306, "learning_rate": 2.5166092598028796e-05, "loss": 0.8578, "step": 7267 }, { "epoch": 3.2946509519492295, "grad_norm": 0.2714714666092737, "learning_rate": 2.5154337212050496e-05, "loss": 0.8542, "step": 7268 }, { "epoch": 3.2951042611060744, "grad_norm": 0.25082476687018124, "learning_rate": 2.5142583312908952e-05, "loss": 0.8523, "step": 7269 }, { "epoch": 3.295557570262919, "grad_norm": 0.2272134057052852, "learning_rate": 2.513083090178136e-05, "loss": 0.8603, "step": 7270 }, { "epoch": 3.2960108794197644, "grad_norm": 0.301578628426096, "learning_rate": 2.5119079979844746e-05, "loss": 0.88, "step": 7271 }, { "epoch": 3.2964641885766093, "grad_norm": 0.2847572128122353, "learning_rate": 2.5107330548276015e-05, "loss": 0.8667, "step": 7272 }, { "epoch": 3.296917497733454, "grad_norm": 0.2590092404066702, "learning_rate": 2.5095582608251894e-05, "loss": 0.8946, "step": 7273 }, { "epoch": 3.2973708068902994, "grad_norm": 0.20103470323713682, "learning_rate": 2.5083836160948973e-05, "loss": 0.8633, "step": 7274 }, { "epoch": 3.297824116047144, "grad_norm": 0.21321814257612687, "learning_rate": 2.5072091207543715e-05, "loss": 0.8812, "step": 7275 }, { "epoch": 3.298277425203989, "grad_norm": 0.25076068659672346, "learning_rate": 2.506034774921238e-05, "loss": 0.8603, "step": 7276 }, { "epoch": 3.2987307343608343, "grad_norm": 0.18444601042255962, "learning_rate": 2.504860578713114e-05, "loss": 0.8556, "step": 7277 }, { "epoch": 3.299184043517679, "grad_norm": 0.22297657833293305, "learning_rate": 2.5036865322475955e-05, "loss": 0.8823, "step": 7278 }, { "epoch": 3.299637352674524, "grad_norm": 0.19050893461929927, "learning_rate": 2.5025126356422692e-05, "loss": 0.8749, "step": 7279 }, { "epoch": 3.3000906618313692, "grad_norm": 0.16719433119134694, "learning_rate": 2.5013388890147033e-05, "loss": 0.8782, "step": 7280 }, { "epoch": 3.300543970988214, "grad_norm": 0.1848411822833879, "learning_rate": 2.500165292482452e-05, "loss": 0.8499, "step": 7281 }, { "epoch": 3.300997280145059, "grad_norm": 0.18356294524521377, "learning_rate": 2.4989918461630546e-05, "loss": 0.8785, "step": 7282 }, { "epoch": 3.3014505893019037, "grad_norm": 0.17451290636808678, "learning_rate": 2.4978185501740352e-05, "loss": 0.8676, "step": 7283 }, { "epoch": 3.301903898458749, "grad_norm": 0.22122964906203751, "learning_rate": 2.4966454046329026e-05, "loss": 0.8626, "step": 7284 }, { "epoch": 3.302357207615594, "grad_norm": 0.19709977064913595, "learning_rate": 2.4954724096571513e-05, "loss": 0.8607, "step": 7285 }, { "epoch": 3.3028105167724386, "grad_norm": 0.190293720782103, "learning_rate": 2.4942995653642594e-05, "loss": 0.8957, "step": 7286 }, { "epoch": 3.303263825929284, "grad_norm": 0.24024737597894244, "learning_rate": 2.4931268718716918e-05, "loss": 0.8654, "step": 7287 }, { "epoch": 3.3037171350861287, "grad_norm": 0.207121067492197, "learning_rate": 2.4919543292968962e-05, "loss": 0.8624, "step": 7288 }, { "epoch": 3.3041704442429736, "grad_norm": 0.22508452056684725, "learning_rate": 2.490781937757307e-05, "loss": 0.8625, "step": 7289 }, { "epoch": 3.304623753399819, "grad_norm": 0.1998057100064862, "learning_rate": 2.4896096973703417e-05, "loss": 0.8754, "step": 7290 }, { "epoch": 3.3050770625566637, "grad_norm": 0.24820569114486504, "learning_rate": 2.4884376082534053e-05, "loss": 0.8586, "step": 7291 }, { "epoch": 3.3055303717135085, "grad_norm": 0.19968215980035242, "learning_rate": 2.4872656705238835e-05, "loss": 0.872, "step": 7292 }, { "epoch": 3.3059836808703538, "grad_norm": 0.19214071303869337, "learning_rate": 2.4860938842991514e-05, "loss": 0.8793, "step": 7293 }, { "epoch": 3.3064369900271986, "grad_norm": 0.2323806203165372, "learning_rate": 2.4849222496965658e-05, "loss": 0.8567, "step": 7294 }, { "epoch": 3.3068902991840434, "grad_norm": 0.19161132451226673, "learning_rate": 2.4837507668334694e-05, "loss": 0.8707, "step": 7295 }, { "epoch": 3.3073436083408883, "grad_norm": 0.23322998237349055, "learning_rate": 2.4825794358271917e-05, "loss": 0.8787, "step": 7296 }, { "epoch": 3.3077969174977335, "grad_norm": 0.21036339068500157, "learning_rate": 2.4814082567950415e-05, "loss": 0.8702, "step": 7297 }, { "epoch": 3.3082502266545784, "grad_norm": 0.20403518421764388, "learning_rate": 2.4802372298543178e-05, "loss": 0.8748, "step": 7298 }, { "epoch": 3.308703535811423, "grad_norm": 0.23622886929569048, "learning_rate": 2.479066355122301e-05, "loss": 0.8512, "step": 7299 }, { "epoch": 3.3091568449682685, "grad_norm": 0.1864450431543695, "learning_rate": 2.4778956327162597e-05, "loss": 0.8552, "step": 7300 }, { "epoch": 3.3096101541251133, "grad_norm": 0.18520853517174934, "learning_rate": 2.4767250627534434e-05, "loss": 0.8751, "step": 7301 }, { "epoch": 3.310063463281958, "grad_norm": 0.18841583044131854, "learning_rate": 2.47555464535109e-05, "loss": 0.8642, "step": 7302 }, { "epoch": 3.3105167724388034, "grad_norm": 0.204737919244479, "learning_rate": 2.4743843806264184e-05, "loss": 0.8652, "step": 7303 }, { "epoch": 3.310970081595648, "grad_norm": 0.1910250736817801, "learning_rate": 2.4732142686966344e-05, "loss": 0.8704, "step": 7304 }, { "epoch": 3.311423390752493, "grad_norm": 0.18115235878352715, "learning_rate": 2.4720443096789292e-05, "loss": 0.8985, "step": 7305 }, { "epoch": 3.3118766999093383, "grad_norm": 0.21460429465542083, "learning_rate": 2.4708745036904774e-05, "loss": 0.8795, "step": 7306 }, { "epoch": 3.312330009066183, "grad_norm": 0.19909666867892925, "learning_rate": 2.4697048508484373e-05, "loss": 0.8583, "step": 7307 }, { "epoch": 3.312783318223028, "grad_norm": 0.24230289134540084, "learning_rate": 2.468535351269954e-05, "loss": 0.8725, "step": 7308 }, { "epoch": 3.3132366273798732, "grad_norm": 0.21925259548345294, "learning_rate": 2.467366005072156e-05, "loss": 0.8573, "step": 7309 }, { "epoch": 3.313689936536718, "grad_norm": 0.1997972022707895, "learning_rate": 2.466196812372157e-05, "loss": 0.8661, "step": 7310 }, { "epoch": 3.314143245693563, "grad_norm": 0.29248434643319643, "learning_rate": 2.4650277732870552e-05, "loss": 0.8644, "step": 7311 }, { "epoch": 3.314596554850408, "grad_norm": 0.24332391654326607, "learning_rate": 2.4638588879339325e-05, "loss": 0.8629, "step": 7312 }, { "epoch": 3.315049864007253, "grad_norm": 0.2235305324364914, "learning_rate": 2.462690156429857e-05, "loss": 0.8587, "step": 7313 }, { "epoch": 3.315503173164098, "grad_norm": 0.21868481450165356, "learning_rate": 2.461521578891881e-05, "loss": 0.8712, "step": 7314 }, { "epoch": 3.315956482320943, "grad_norm": 0.23378131550377254, "learning_rate": 2.4603531554370417e-05, "loss": 0.8653, "step": 7315 }, { "epoch": 3.316409791477788, "grad_norm": 0.24250726082483456, "learning_rate": 2.4591848861823567e-05, "loss": 0.8476, "step": 7316 }, { "epoch": 3.3168631006346327, "grad_norm": 0.2441629900851792, "learning_rate": 2.458016771244834e-05, "loss": 0.8664, "step": 7317 }, { "epoch": 3.317316409791478, "grad_norm": 0.23165313845685076, "learning_rate": 2.4568488107414638e-05, "loss": 0.8725, "step": 7318 }, { "epoch": 3.317769718948323, "grad_norm": 0.23936281132233062, "learning_rate": 2.45568100478922e-05, "loss": 0.8637, "step": 7319 }, { "epoch": 3.3182230281051677, "grad_norm": 0.2210992912502654, "learning_rate": 2.4545133535050628e-05, "loss": 0.8799, "step": 7320 }, { "epoch": 3.3186763372620125, "grad_norm": 0.22893085916322153, "learning_rate": 2.453345857005936e-05, "loss": 0.8516, "step": 7321 }, { "epoch": 3.3191296464188578, "grad_norm": 0.21229239740435452, "learning_rate": 2.4521785154087662e-05, "loss": 0.8673, "step": 7322 }, { "epoch": 3.3195829555757026, "grad_norm": 0.17762506438784695, "learning_rate": 2.451011328830467e-05, "loss": 0.885, "step": 7323 }, { "epoch": 3.3200362647325474, "grad_norm": 0.24127032497896142, "learning_rate": 2.4498442973879354e-05, "loss": 0.8704, "step": 7324 }, { "epoch": 3.3204895738893927, "grad_norm": 0.2461485331126551, "learning_rate": 2.4486774211980535e-05, "loss": 0.8862, "step": 7325 }, { "epoch": 3.3209428830462375, "grad_norm": 0.28523635224798305, "learning_rate": 2.447510700377687e-05, "loss": 0.8601, "step": 7326 }, { "epoch": 3.3213961922030824, "grad_norm": 0.23174445123997792, "learning_rate": 2.4463441350436865e-05, "loss": 0.8651, "step": 7327 }, { "epoch": 3.3218495013599276, "grad_norm": 0.2113625478619112, "learning_rate": 2.4451777253128873e-05, "loss": 0.8771, "step": 7328 }, { "epoch": 3.3223028105167725, "grad_norm": 0.24158191084383612, "learning_rate": 2.4440114713021085e-05, "loss": 0.8607, "step": 7329 }, { "epoch": 3.3227561196736173, "grad_norm": 0.26175817614005087, "learning_rate": 2.4428453731281538e-05, "loss": 0.878, "step": 7330 }, { "epoch": 3.3232094288304626, "grad_norm": 0.20607619610403752, "learning_rate": 2.4416794309078108e-05, "loss": 0.8633, "step": 7331 }, { "epoch": 3.3236627379873074, "grad_norm": 0.2622948139145269, "learning_rate": 2.4405136447578538e-05, "loss": 0.8707, "step": 7332 }, { "epoch": 3.324116047144152, "grad_norm": 0.3376855775675858, "learning_rate": 2.4393480147950372e-05, "loss": 0.8559, "step": 7333 }, { "epoch": 3.324569356300997, "grad_norm": 0.31050753514646545, "learning_rate": 2.4381825411361052e-05, "loss": 0.8715, "step": 7334 }, { "epoch": 3.3250226654578423, "grad_norm": 0.21963012220211064, "learning_rate": 2.4370172238977806e-05, "loss": 0.8758, "step": 7335 }, { "epoch": 3.325475974614687, "grad_norm": 0.21140457357324552, "learning_rate": 2.4358520631967747e-05, "loss": 0.8665, "step": 7336 }, { "epoch": 3.325929283771532, "grad_norm": 0.19903680639872237, "learning_rate": 2.4346870591497825e-05, "loss": 0.8756, "step": 7337 }, { "epoch": 3.3263825929283772, "grad_norm": 0.22298637238334781, "learning_rate": 2.4335222118734806e-05, "loss": 0.8755, "step": 7338 }, { "epoch": 3.326835902085222, "grad_norm": 0.19708223178574724, "learning_rate": 2.4323575214845333e-05, "loss": 0.8731, "step": 7339 }, { "epoch": 3.327289211242067, "grad_norm": 0.18795208243878114, "learning_rate": 2.431192988099587e-05, "loss": 0.8787, "step": 7340 }, { "epoch": 3.327742520398912, "grad_norm": 0.20518206568948014, "learning_rate": 2.430028611835274e-05, "loss": 0.8812, "step": 7341 }, { "epoch": 3.328195829555757, "grad_norm": 0.2027304199896448, "learning_rate": 2.4288643928082092e-05, "loss": 0.8738, "step": 7342 }, { "epoch": 3.328649138712602, "grad_norm": 0.1613764826880516, "learning_rate": 2.427700331134993e-05, "loss": 0.8691, "step": 7343 }, { "epoch": 3.329102447869447, "grad_norm": 0.18225686863459367, "learning_rate": 2.4265364269322103e-05, "loss": 0.877, "step": 7344 }, { "epoch": 3.329555757026292, "grad_norm": 0.17658127210368954, "learning_rate": 2.4253726803164276e-05, "loss": 0.8818, "step": 7345 }, { "epoch": 3.3300090661831367, "grad_norm": 0.19599154665588794, "learning_rate": 2.4242090914041994e-05, "loss": 0.8694, "step": 7346 }, { "epoch": 3.330462375339982, "grad_norm": 0.27954880004017446, "learning_rate": 2.4230456603120626e-05, "loss": 0.8887, "step": 7347 }, { "epoch": 3.330915684496827, "grad_norm": 0.21478286203263205, "learning_rate": 2.4218823871565365e-05, "loss": 0.8618, "step": 7348 }, { "epoch": 3.3313689936536717, "grad_norm": 0.19853589863993507, "learning_rate": 2.4207192720541278e-05, "loss": 0.8549, "step": 7349 }, { "epoch": 3.331822302810517, "grad_norm": 0.21430624242880425, "learning_rate": 2.4195563151213248e-05, "loss": 0.8567, "step": 7350 }, { "epoch": 3.3322756119673618, "grad_norm": 0.1930305847825246, "learning_rate": 2.418393516474602e-05, "loss": 0.856, "step": 7351 }, { "epoch": 3.3327289211242066, "grad_norm": 0.20759351201767337, "learning_rate": 2.4172308762304168e-05, "loss": 0.8591, "step": 7352 }, { "epoch": 3.333182230281052, "grad_norm": 0.24712659785174884, "learning_rate": 2.416068394505213e-05, "loss": 0.8805, "step": 7353 }, { "epoch": 3.3336355394378967, "grad_norm": 0.18672882140340935, "learning_rate": 2.414906071415412e-05, "loss": 0.8643, "step": 7354 }, { "epoch": 3.3340888485947415, "grad_norm": 0.21021783900534544, "learning_rate": 2.413743907077427e-05, "loss": 0.8698, "step": 7355 }, { "epoch": 3.334542157751587, "grad_norm": 0.19718203498016199, "learning_rate": 2.412581901607652e-05, "loss": 0.8582, "step": 7356 }, { "epoch": 3.3349954669084316, "grad_norm": 0.18433164811936728, "learning_rate": 2.4114200551224648e-05, "loss": 0.8409, "step": 7357 }, { "epoch": 3.3354487760652765, "grad_norm": 0.16545551273115316, "learning_rate": 2.4102583677382276e-05, "loss": 0.8735, "step": 7358 }, { "epoch": 3.3359020852221217, "grad_norm": 0.21617974878378216, "learning_rate": 2.4090968395712864e-05, "loss": 0.8807, "step": 7359 }, { "epoch": 3.3363553943789666, "grad_norm": 0.19954472816202595, "learning_rate": 2.407935470737973e-05, "loss": 0.8698, "step": 7360 }, { "epoch": 3.3368087035358114, "grad_norm": 0.18047461840520118, "learning_rate": 2.406774261354601e-05, "loss": 0.8799, "step": 7361 }, { "epoch": 3.337262012692656, "grad_norm": 0.18050448595585988, "learning_rate": 2.4056132115374692e-05, "loss": 0.8834, "step": 7362 }, { "epoch": 3.3377153218495015, "grad_norm": 0.16112310142392453, "learning_rate": 2.404452321402861e-05, "loss": 0.8667, "step": 7363 }, { "epoch": 3.3381686310063463, "grad_norm": 0.1843941879408786, "learning_rate": 2.403291591067041e-05, "loss": 0.8465, "step": 7364 }, { "epoch": 3.338621940163191, "grad_norm": 0.177188021794928, "learning_rate": 2.4021310206462608e-05, "loss": 0.8626, "step": 7365 }, { "epoch": 3.3390752493200364, "grad_norm": 0.18652845947659233, "learning_rate": 2.4009706102567553e-05, "loss": 0.879, "step": 7366 }, { "epoch": 3.3395285584768812, "grad_norm": 0.20489878396214775, "learning_rate": 2.3998103600147422e-05, "loss": 0.8773, "step": 7367 }, { "epoch": 3.339981867633726, "grad_norm": 0.22083212453605092, "learning_rate": 2.3986502700364246e-05, "loss": 0.8524, "step": 7368 }, { "epoch": 3.3404351767905713, "grad_norm": 0.21995459952993993, "learning_rate": 2.3974903404379887e-05, "loss": 0.8643, "step": 7369 }, { "epoch": 3.340888485947416, "grad_norm": 0.15949731449055143, "learning_rate": 2.3963305713356052e-05, "loss": 0.8679, "step": 7370 }, { "epoch": 3.341341795104261, "grad_norm": 0.18708736957314825, "learning_rate": 2.3951709628454277e-05, "loss": 0.8707, "step": 7371 }, { "epoch": 3.3417951042611063, "grad_norm": 0.19903749205162952, "learning_rate": 2.394011515083595e-05, "loss": 0.8679, "step": 7372 }, { "epoch": 3.342248413417951, "grad_norm": 0.2198274561265706, "learning_rate": 2.3928522281662304e-05, "loss": 0.8606, "step": 7373 }, { "epoch": 3.342701722574796, "grad_norm": 0.1867468846513203, "learning_rate": 2.3916931022094366e-05, "loss": 0.8639, "step": 7374 }, { "epoch": 3.3431550317316407, "grad_norm": 0.22919054544778897, "learning_rate": 2.390534137329306e-05, "loss": 0.8953, "step": 7375 }, { "epoch": 3.343608340888486, "grad_norm": 0.19387773856821036, "learning_rate": 2.3893753336419114e-05, "loss": 0.8555, "step": 7376 }, { "epoch": 3.344061650045331, "grad_norm": 0.24287987829428162, "learning_rate": 2.3882166912633108e-05, "loss": 0.8634, "step": 7377 }, { "epoch": 3.3445149592021757, "grad_norm": 0.25249120461952407, "learning_rate": 2.3870582103095455e-05, "loss": 0.8649, "step": 7378 }, { "epoch": 3.344968268359021, "grad_norm": 0.2341102607994931, "learning_rate": 2.385899890896641e-05, "loss": 0.8602, "step": 7379 }, { "epoch": 3.3454215775158658, "grad_norm": 0.24116884045267623, "learning_rate": 2.3847417331406057e-05, "loss": 0.8652, "step": 7380 }, { "epoch": 3.3458748866727106, "grad_norm": 0.20507410340346333, "learning_rate": 2.3835837371574334e-05, "loss": 0.8671, "step": 7381 }, { "epoch": 3.346328195829556, "grad_norm": 0.22946811562919794, "learning_rate": 2.3824259030630998e-05, "loss": 0.8711, "step": 7382 }, { "epoch": 3.3467815049864007, "grad_norm": 0.18128752899838677, "learning_rate": 2.3812682309735663e-05, "loss": 0.8596, "step": 7383 }, { "epoch": 3.3472348141432455, "grad_norm": 0.2003297669859454, "learning_rate": 2.3801107210047773e-05, "loss": 0.8693, "step": 7384 }, { "epoch": 3.347688123300091, "grad_norm": 0.18215673558485823, "learning_rate": 2.3789533732726593e-05, "loss": 0.8719, "step": 7385 }, { "epoch": 3.3481414324569356, "grad_norm": 0.2026371081927297, "learning_rate": 2.3777961878931258e-05, "loss": 0.87, "step": 7386 }, { "epoch": 3.3485947416137805, "grad_norm": 0.2395166034871391, "learning_rate": 2.3766391649820718e-05, "loss": 0.8643, "step": 7387 }, { "epoch": 3.3490480507706257, "grad_norm": 0.2341482814739495, "learning_rate": 2.3754823046553755e-05, "loss": 0.8615, "step": 7388 }, { "epoch": 3.3495013599274706, "grad_norm": 0.1886844424067505, "learning_rate": 2.3743256070289027e-05, "loss": 0.8652, "step": 7389 }, { "epoch": 3.3499546690843154, "grad_norm": 0.2709012875314153, "learning_rate": 2.3731690722184964e-05, "loss": 0.8632, "step": 7390 }, { "epoch": 3.3504079782411607, "grad_norm": 0.25649832351948726, "learning_rate": 2.3720127003399896e-05, "loss": 0.8608, "step": 7391 }, { "epoch": 3.3508612873980055, "grad_norm": 0.2124591997166793, "learning_rate": 2.3708564915091953e-05, "loss": 0.8647, "step": 7392 }, { "epoch": 3.3513145965548503, "grad_norm": 0.24983567369021387, "learning_rate": 2.3697004458419118e-05, "loss": 0.8522, "step": 7393 }, { "epoch": 3.3517679057116956, "grad_norm": 0.17268122068106462, "learning_rate": 2.3685445634539193e-05, "loss": 0.8737, "step": 7394 }, { "epoch": 3.3522212148685404, "grad_norm": 0.20172581727455213, "learning_rate": 2.367388844460983e-05, "loss": 0.8763, "step": 7395 }, { "epoch": 3.3526745240253852, "grad_norm": 0.21872353203318345, "learning_rate": 2.3662332889788527e-05, "loss": 0.8731, "step": 7396 }, { "epoch": 3.3531278331822305, "grad_norm": 0.2037231689179369, "learning_rate": 2.3650778971232596e-05, "loss": 0.8574, "step": 7397 }, { "epoch": 3.3535811423390753, "grad_norm": 0.21459331830167439, "learning_rate": 2.3639226690099206e-05, "loss": 0.8731, "step": 7398 }, { "epoch": 3.35403445149592, "grad_norm": 0.2558054385179132, "learning_rate": 2.3627676047545344e-05, "loss": 0.8618, "step": 7399 }, { "epoch": 3.354487760652765, "grad_norm": 0.21232182859249366, "learning_rate": 2.3616127044727842e-05, "loss": 0.8771, "step": 7400 }, { "epoch": 3.3549410698096103, "grad_norm": 0.204634373559339, "learning_rate": 2.360457968280337e-05, "loss": 0.8585, "step": 7401 }, { "epoch": 3.355394378966455, "grad_norm": 0.2501078674095112, "learning_rate": 2.3593033962928423e-05, "loss": 0.8612, "step": 7402 }, { "epoch": 3.3558476881233, "grad_norm": 0.1806040038598417, "learning_rate": 2.358148988625935e-05, "loss": 0.8708, "step": 7403 }, { "epoch": 3.356300997280145, "grad_norm": 0.23622545208827514, "learning_rate": 2.3569947453952324e-05, "loss": 0.8759, "step": 7404 }, { "epoch": 3.35675430643699, "grad_norm": 0.21687662497839733, "learning_rate": 2.3558406667163337e-05, "loss": 0.8778, "step": 7405 }, { "epoch": 3.357207615593835, "grad_norm": 0.2774970759241026, "learning_rate": 2.354686752704824e-05, "loss": 0.8658, "step": 7406 }, { "epoch": 3.35766092475068, "grad_norm": 0.26984403554044495, "learning_rate": 2.3535330034762724e-05, "loss": 0.8732, "step": 7407 }, { "epoch": 3.358114233907525, "grad_norm": 0.18547867862586442, "learning_rate": 2.352379419146229e-05, "loss": 0.8576, "step": 7408 }, { "epoch": 3.3585675430643698, "grad_norm": 0.24821102663475797, "learning_rate": 2.3512259998302288e-05, "loss": 0.8631, "step": 7409 }, { "epoch": 3.359020852221215, "grad_norm": 0.19678993661593217, "learning_rate": 2.3500727456437904e-05, "loss": 0.877, "step": 7410 }, { "epoch": 3.35947416137806, "grad_norm": 0.22606500707321076, "learning_rate": 2.3489196567024175e-05, "loss": 0.8695, "step": 7411 }, { "epoch": 3.3599274705349047, "grad_norm": 0.18882672352257138, "learning_rate": 2.347766733121592e-05, "loss": 0.853, "step": 7412 }, { "epoch": 3.3603807796917495, "grad_norm": 0.1940982161568819, "learning_rate": 2.3466139750167837e-05, "loss": 0.8616, "step": 7413 }, { "epoch": 3.360834088848595, "grad_norm": 0.19009118497056912, "learning_rate": 2.3454613825034444e-05, "loss": 0.8852, "step": 7414 }, { "epoch": 3.3612873980054396, "grad_norm": 0.19438061184982935, "learning_rate": 2.3443089556970112e-05, "loss": 0.8607, "step": 7415 }, { "epoch": 3.3617407071622845, "grad_norm": 0.2011566913408708, "learning_rate": 2.3431566947129017e-05, "loss": 0.8699, "step": 7416 }, { "epoch": 3.3621940163191297, "grad_norm": 0.18734744372917567, "learning_rate": 2.3420045996665186e-05, "loss": 0.8751, "step": 7417 }, { "epoch": 3.3626473254759746, "grad_norm": 0.1808277257946062, "learning_rate": 2.3408526706732476e-05, "loss": 0.8462, "step": 7418 }, { "epoch": 3.3631006346328194, "grad_norm": 0.1989359609309082, "learning_rate": 2.3397009078484587e-05, "loss": 0.8696, "step": 7419 }, { "epoch": 3.3635539437896647, "grad_norm": 0.21067931947981686, "learning_rate": 2.338549311307502e-05, "loss": 0.8603, "step": 7420 }, { "epoch": 3.3640072529465095, "grad_norm": 0.1759165379699468, "learning_rate": 2.3373978811657155e-05, "loss": 0.8727, "step": 7421 }, { "epoch": 3.3644605621033543, "grad_norm": 0.21974659424340098, "learning_rate": 2.336246617538417e-05, "loss": 0.8612, "step": 7422 }, { "epoch": 3.3649138712601996, "grad_norm": 0.19209284509376004, "learning_rate": 2.3350955205409092e-05, "loss": 0.8685, "step": 7423 }, { "epoch": 3.3653671804170444, "grad_norm": 0.18115003896541887, "learning_rate": 2.333944590288479e-05, "loss": 0.8766, "step": 7424 }, { "epoch": 3.3658204895738892, "grad_norm": 0.2020460377167516, "learning_rate": 2.3327938268963947e-05, "loss": 0.8413, "step": 7425 }, { "epoch": 3.3662737987307345, "grad_norm": 0.19734650860748784, "learning_rate": 2.331643230479907e-05, "loss": 0.8786, "step": 7426 }, { "epoch": 3.3667271078875793, "grad_norm": 0.2323050860076111, "learning_rate": 2.3304928011542546e-05, "loss": 0.8671, "step": 7427 }, { "epoch": 3.367180417044424, "grad_norm": 0.2630722282305213, "learning_rate": 2.3293425390346533e-05, "loss": 0.8583, "step": 7428 }, { "epoch": 3.3676337262012694, "grad_norm": 0.20565599413172445, "learning_rate": 2.328192444236308e-05, "loss": 0.8695, "step": 7429 }, { "epoch": 3.3680870353581143, "grad_norm": 0.303351467252471, "learning_rate": 2.3270425168744025e-05, "loss": 0.8477, "step": 7430 }, { "epoch": 3.368540344514959, "grad_norm": 0.26461492030266315, "learning_rate": 2.3258927570641064e-05, "loss": 0.879, "step": 7431 }, { "epoch": 3.3689936536718044, "grad_norm": 0.2298735742580249, "learning_rate": 2.3247431649205687e-05, "loss": 0.886, "step": 7432 }, { "epoch": 3.369446962828649, "grad_norm": 0.26033882821781745, "learning_rate": 2.3235937405589282e-05, "loss": 0.8689, "step": 7433 }, { "epoch": 3.369900271985494, "grad_norm": 0.16336328375863335, "learning_rate": 2.3224444840943e-05, "loss": 0.8473, "step": 7434 }, { "epoch": 3.3703535811423393, "grad_norm": 0.27477713307106594, "learning_rate": 2.3212953956417885e-05, "loss": 0.8624, "step": 7435 }, { "epoch": 3.370806890299184, "grad_norm": 0.2181918083460668, "learning_rate": 2.3201464753164752e-05, "loss": 0.8811, "step": 7436 }, { "epoch": 3.371260199456029, "grad_norm": 0.27588547352358184, "learning_rate": 2.3189977232334307e-05, "loss": 0.8743, "step": 7437 }, { "epoch": 3.371713508612874, "grad_norm": 0.23573734801351376, "learning_rate": 2.317849139507703e-05, "loss": 0.8778, "step": 7438 }, { "epoch": 3.372166817769719, "grad_norm": 0.2472013573139642, "learning_rate": 2.3167007242543296e-05, "loss": 0.8732, "step": 7439 }, { "epoch": 3.372620126926564, "grad_norm": 0.36694398535801076, "learning_rate": 2.3155524775883258e-05, "loss": 0.8615, "step": 7440 }, { "epoch": 3.3730734360834087, "grad_norm": 0.28697346873489027, "learning_rate": 2.31440439962469e-05, "loss": 0.8724, "step": 7441 }, { "epoch": 3.373526745240254, "grad_norm": 0.19676341700838945, "learning_rate": 2.313256490478409e-05, "loss": 0.877, "step": 7442 }, { "epoch": 3.373980054397099, "grad_norm": 0.23999952474616656, "learning_rate": 2.3121087502644464e-05, "loss": 0.863, "step": 7443 }, { "epoch": 3.3744333635539436, "grad_norm": 0.2725024423702691, "learning_rate": 2.310961179097755e-05, "loss": 0.8557, "step": 7444 }, { "epoch": 3.374886672710789, "grad_norm": 0.23494989240474498, "learning_rate": 2.3098137770932634e-05, "loss": 0.853, "step": 7445 }, { "epoch": 3.3753399818676337, "grad_norm": 0.2637085146031462, "learning_rate": 2.3086665443658908e-05, "loss": 0.8819, "step": 7446 }, { "epoch": 3.3757932910244786, "grad_norm": 0.26118478706564235, "learning_rate": 2.307519481030534e-05, "loss": 0.8836, "step": 7447 }, { "epoch": 3.376246600181324, "grad_norm": 0.2873131136009189, "learning_rate": 2.3063725872020757e-05, "loss": 0.853, "step": 7448 }, { "epoch": 3.3766999093381687, "grad_norm": 0.27211193513788245, "learning_rate": 2.3052258629953807e-05, "loss": 0.877, "step": 7449 }, { "epoch": 3.3771532184950135, "grad_norm": 0.1959094392025743, "learning_rate": 2.3040793085252964e-05, "loss": 0.8742, "step": 7450 }, { "epoch": 3.3776065276518588, "grad_norm": 0.3094167304418335, "learning_rate": 2.302932923906652e-05, "loss": 0.8563, "step": 7451 }, { "epoch": 3.3780598368087036, "grad_norm": 0.3244158517640174, "learning_rate": 2.3017867092542644e-05, "loss": 0.8757, "step": 7452 }, { "epoch": 3.3785131459655484, "grad_norm": 0.24659264148629498, "learning_rate": 2.300640664682927e-05, "loss": 0.8613, "step": 7453 }, { "epoch": 3.3789664551223932, "grad_norm": 0.23067583437740566, "learning_rate": 2.2994947903074234e-05, "loss": 0.8745, "step": 7454 }, { "epoch": 3.3794197642792385, "grad_norm": 0.2986831280045326, "learning_rate": 2.298349086242514e-05, "loss": 0.8646, "step": 7455 }, { "epoch": 3.3798730734360833, "grad_norm": 0.27812657936852364, "learning_rate": 2.2972035526029428e-05, "loss": 0.8466, "step": 7456 }, { "epoch": 3.380326382592928, "grad_norm": 0.1970299267220464, "learning_rate": 2.2960581895034423e-05, "loss": 0.8636, "step": 7457 }, { "epoch": 3.3807796917497734, "grad_norm": 0.210424312270876, "learning_rate": 2.2949129970587203e-05, "loss": 0.8474, "step": 7458 }, { "epoch": 3.3812330009066183, "grad_norm": 0.2585840369714823, "learning_rate": 2.293767975383474e-05, "loss": 0.8805, "step": 7459 }, { "epoch": 3.381686310063463, "grad_norm": 0.2176418430021887, "learning_rate": 2.2926231245923784e-05, "loss": 0.8661, "step": 7460 }, { "epoch": 3.3821396192203084, "grad_norm": 0.20230576807926184, "learning_rate": 2.2914784448000963e-05, "loss": 0.8617, "step": 7461 }, { "epoch": 3.382592928377153, "grad_norm": 0.2569783425545793, "learning_rate": 2.2903339361212672e-05, "loss": 0.8708, "step": 7462 }, { "epoch": 3.383046237533998, "grad_norm": 0.2186063666332499, "learning_rate": 2.2891895986705207e-05, "loss": 0.866, "step": 7463 }, { "epoch": 3.3834995466908433, "grad_norm": 0.27078511199807215, "learning_rate": 2.2880454325624625e-05, "loss": 0.8842, "step": 7464 }, { "epoch": 3.383952855847688, "grad_norm": 0.2497872654968895, "learning_rate": 2.286901437911687e-05, "loss": 0.8856, "step": 7465 }, { "epoch": 3.384406165004533, "grad_norm": 0.20986508749779573, "learning_rate": 2.2857576148327674e-05, "loss": 0.8515, "step": 7466 }, { "epoch": 3.3848594741613782, "grad_norm": 0.21837244220695784, "learning_rate": 2.2846139634402586e-05, "loss": 0.8721, "step": 7467 }, { "epoch": 3.385312783318223, "grad_norm": 0.19057350836318765, "learning_rate": 2.2834704838487047e-05, "loss": 0.8682, "step": 7468 }, { "epoch": 3.385766092475068, "grad_norm": 0.21104312827764415, "learning_rate": 2.282327176172627e-05, "loss": 0.8557, "step": 7469 }, { "epoch": 3.386219401631913, "grad_norm": 0.22793510555738497, "learning_rate": 2.2811840405265306e-05, "loss": 0.8589, "step": 7470 }, { "epoch": 3.386672710788758, "grad_norm": 0.1946928804485043, "learning_rate": 2.280041077024902e-05, "loss": 0.8651, "step": 7471 }, { "epoch": 3.387126019945603, "grad_norm": 0.19920453007978534, "learning_rate": 2.2788982857822167e-05, "loss": 0.8948, "step": 7472 }, { "epoch": 3.387579329102448, "grad_norm": 0.22700903089996052, "learning_rate": 2.277755666912924e-05, "loss": 0.852, "step": 7473 }, { "epoch": 3.388032638259293, "grad_norm": 0.169270608016195, "learning_rate": 2.2766132205314647e-05, "loss": 0.8808, "step": 7474 }, { "epoch": 3.3884859474161377, "grad_norm": 0.20182156293689776, "learning_rate": 2.2754709467522548e-05, "loss": 0.8564, "step": 7475 }, { "epoch": 3.388939256572983, "grad_norm": 1.5602415528511289, "learning_rate": 2.2743288456896995e-05, "loss": 0.8755, "step": 7476 }, { "epoch": 3.389392565729828, "grad_norm": 0.23273044034996854, "learning_rate": 2.2731869174581802e-05, "loss": 0.8642, "step": 7477 }, { "epoch": 3.3898458748866727, "grad_norm": 0.2531043586666865, "learning_rate": 2.272045162172068e-05, "loss": 0.8561, "step": 7478 }, { "epoch": 3.3902991840435175, "grad_norm": 0.21897949994681676, "learning_rate": 2.270903579945709e-05, "loss": 0.9056, "step": 7479 }, { "epoch": 3.3907524932003628, "grad_norm": 0.2863465444316875, "learning_rate": 2.2697621708934405e-05, "loss": 0.8756, "step": 7480 }, { "epoch": 3.3912058023572076, "grad_norm": 0.20299795733572118, "learning_rate": 2.2686209351295752e-05, "loss": 0.8722, "step": 7481 }, { "epoch": 3.3916591115140524, "grad_norm": 0.25150488179257974, "learning_rate": 2.2674798727684104e-05, "loss": 0.8685, "step": 7482 }, { "epoch": 3.3921124206708977, "grad_norm": 0.22187266441207362, "learning_rate": 2.2663389839242294e-05, "loss": 0.8597, "step": 7483 }, { "epoch": 3.3925657298277425, "grad_norm": 0.28991145299765436, "learning_rate": 2.2651982687112934e-05, "loss": 0.859, "step": 7484 }, { "epoch": 3.3930190389845873, "grad_norm": 0.2836074973650536, "learning_rate": 2.2640577272438497e-05, "loss": 0.8526, "step": 7485 }, { "epoch": 3.3934723481414326, "grad_norm": 0.2768097583377648, "learning_rate": 2.2629173596361254e-05, "loss": 0.8829, "step": 7486 }, { "epoch": 3.3939256572982774, "grad_norm": 0.21753831598239579, "learning_rate": 2.2617771660023346e-05, "loss": 0.866, "step": 7487 }, { "epoch": 3.3943789664551223, "grad_norm": 0.18712235084581705, "learning_rate": 2.2606371464566694e-05, "loss": 0.8714, "step": 7488 }, { "epoch": 3.3948322756119675, "grad_norm": 0.2331169066466924, "learning_rate": 2.2594973011133048e-05, "loss": 0.8455, "step": 7489 }, { "epoch": 3.3952855847688124, "grad_norm": 0.1813034668852031, "learning_rate": 2.2583576300863998e-05, "loss": 0.8601, "step": 7490 }, { "epoch": 3.395738893925657, "grad_norm": 0.2099933340929198, "learning_rate": 2.2572181334900983e-05, "loss": 0.8593, "step": 7491 }, { "epoch": 3.396192203082502, "grad_norm": 0.24652328918889627, "learning_rate": 2.2560788114385216e-05, "loss": 0.8591, "step": 7492 }, { "epoch": 3.3966455122393473, "grad_norm": 0.24434066949868113, "learning_rate": 2.254939664045778e-05, "loss": 0.8727, "step": 7493 }, { "epoch": 3.397098821396192, "grad_norm": 0.2516837246828894, "learning_rate": 2.253800691425954e-05, "loss": 0.8645, "step": 7494 }, { "epoch": 3.397552130553037, "grad_norm": 0.19671606612033465, "learning_rate": 2.252661893693125e-05, "loss": 0.8765, "step": 7495 }, { "epoch": 3.3980054397098822, "grad_norm": 0.24083650934037068, "learning_rate": 2.2515232709613407e-05, "loss": 0.871, "step": 7496 }, { "epoch": 3.398458748866727, "grad_norm": 0.2661652833631995, "learning_rate": 2.2503848233446415e-05, "loss": 0.8717, "step": 7497 }, { "epoch": 3.398912058023572, "grad_norm": 0.19327705224451974, "learning_rate": 2.249246550957044e-05, "loss": 0.8638, "step": 7498 }, { "epoch": 3.399365367180417, "grad_norm": 0.18628949831900254, "learning_rate": 2.248108453912548e-05, "loss": 0.8732, "step": 7499 }, { "epoch": 3.399818676337262, "grad_norm": 0.18369692488757752, "learning_rate": 2.2469705323251408e-05, "loss": 0.8787, "step": 7500 }, { "epoch": 3.400271985494107, "grad_norm": 0.20167547279819462, "learning_rate": 2.2458327863087856e-05, "loss": 0.8531, "step": 7501 }, { "epoch": 3.400725294650952, "grad_norm": 0.17934823211631115, "learning_rate": 2.244695215977433e-05, "loss": 0.8779, "step": 7502 }, { "epoch": 3.401178603807797, "grad_norm": 0.18757977220987573, "learning_rate": 2.2435578214450127e-05, "loss": 0.8694, "step": 7503 }, { "epoch": 3.4016319129646417, "grad_norm": 0.20371400599829687, "learning_rate": 2.2424206028254398e-05, "loss": 0.8497, "step": 7504 }, { "epoch": 3.402085222121487, "grad_norm": 0.21166887826962263, "learning_rate": 2.241283560232607e-05, "loss": 0.857, "step": 7505 }, { "epoch": 3.402538531278332, "grad_norm": 0.22557137424565774, "learning_rate": 2.2401466937803962e-05, "loss": 0.8712, "step": 7506 }, { "epoch": 3.4029918404351767, "grad_norm": 0.19265723685370417, "learning_rate": 2.2390100035826662e-05, "loss": 0.8923, "step": 7507 }, { "epoch": 3.403445149592022, "grad_norm": 0.18675757750604674, "learning_rate": 2.23787348975326e-05, "loss": 0.8441, "step": 7508 }, { "epoch": 3.4038984587488668, "grad_norm": 0.16339257272655064, "learning_rate": 2.2367371524060005e-05, "loss": 0.8661, "step": 7509 }, { "epoch": 3.4043517679057116, "grad_norm": 0.20310807004788642, "learning_rate": 2.2356009916546995e-05, "loss": 0.8449, "step": 7510 }, { "epoch": 3.404805077062557, "grad_norm": 0.23596182687487793, "learning_rate": 2.234465007613143e-05, "loss": 0.8765, "step": 7511 }, { "epoch": 3.4052583862194017, "grad_norm": 0.18430061660226768, "learning_rate": 2.2333292003951063e-05, "loss": 0.8618, "step": 7512 }, { "epoch": 3.4057116953762465, "grad_norm": 0.23571233820793622, "learning_rate": 2.232193570114342e-05, "loss": 0.8782, "step": 7513 }, { "epoch": 3.406165004533092, "grad_norm": 0.19154506635183807, "learning_rate": 2.2310581168845858e-05, "loss": 0.8813, "step": 7514 }, { "epoch": 3.4066183136899366, "grad_norm": 0.19881056403893754, "learning_rate": 2.2299228408195597e-05, "loss": 0.8594, "step": 7515 }, { "epoch": 3.4070716228467814, "grad_norm": 0.21438882332799422, "learning_rate": 2.2287877420329615e-05, "loss": 0.8632, "step": 7516 }, { "epoch": 3.4075249320036267, "grad_norm": 0.1864135660103843, "learning_rate": 2.227652820638478e-05, "loss": 0.8625, "step": 7517 }, { "epoch": 3.4079782411604715, "grad_norm": 0.2041717485446374, "learning_rate": 2.226518076749772e-05, "loss": 0.8574, "step": 7518 }, { "epoch": 3.4084315503173164, "grad_norm": 0.18349647778942216, "learning_rate": 2.2253835104804944e-05, "loss": 0.8591, "step": 7519 }, { "epoch": 3.408884859474161, "grad_norm": 0.19687826154870955, "learning_rate": 2.2242491219442718e-05, "loss": 0.8593, "step": 7520 }, { "epoch": 3.4093381686310065, "grad_norm": 0.1703881281800158, "learning_rate": 2.22311491125472e-05, "loss": 0.8688, "step": 7521 }, { "epoch": 3.4097914777878513, "grad_norm": 0.22531627541330432, "learning_rate": 2.2219808785254307e-05, "loss": 0.8759, "step": 7522 }, { "epoch": 3.410244786944696, "grad_norm": 0.15709312436216172, "learning_rate": 2.2208470238699835e-05, "loss": 0.8911, "step": 7523 }, { "epoch": 3.4106980961015414, "grad_norm": 0.20946311012975138, "learning_rate": 2.2197133474019354e-05, "loss": 0.8632, "step": 7524 }, { "epoch": 3.4111514052583862, "grad_norm": 0.20054093347366536, "learning_rate": 2.2185798492348266e-05, "loss": 0.8587, "step": 7525 }, { "epoch": 3.411604714415231, "grad_norm": 0.13993384464595507, "learning_rate": 2.2174465294821825e-05, "loss": 0.8598, "step": 7526 }, { "epoch": 3.4120580235720763, "grad_norm": 0.2127651584506655, "learning_rate": 2.2163133882575075e-05, "loss": 0.8856, "step": 7527 }, { "epoch": 3.412511332728921, "grad_norm": 0.1716287329587766, "learning_rate": 2.215180425674289e-05, "loss": 0.8714, "step": 7528 }, { "epoch": 3.412964641885766, "grad_norm": 0.19515263313282585, "learning_rate": 2.214047641845995e-05, "loss": 0.8693, "step": 7529 }, { "epoch": 3.4134179510426113, "grad_norm": 0.22533691811542883, "learning_rate": 2.2129150368860793e-05, "loss": 0.8421, "step": 7530 }, { "epoch": 3.413871260199456, "grad_norm": 0.18575002686018655, "learning_rate": 2.2117826109079735e-05, "loss": 0.8695, "step": 7531 }, { "epoch": 3.414324569356301, "grad_norm": 0.24348247998780753, "learning_rate": 2.2106503640250965e-05, "loss": 0.8559, "step": 7532 }, { "epoch": 3.4147778785131457, "grad_norm": 0.19591714037489133, "learning_rate": 2.2095182963508426e-05, "loss": 0.8683, "step": 7533 }, { "epoch": 3.415231187669991, "grad_norm": 0.245795393989745, "learning_rate": 2.2083864079985953e-05, "loss": 0.8831, "step": 7534 }, { "epoch": 3.415684496826836, "grad_norm": 0.22895538026283682, "learning_rate": 2.207254699081713e-05, "loss": 0.8481, "step": 7535 }, { "epoch": 3.4161378059836807, "grad_norm": 0.20481855338518917, "learning_rate": 2.2061231697135427e-05, "loss": 0.8584, "step": 7536 }, { "epoch": 3.416591115140526, "grad_norm": 0.25919787448378867, "learning_rate": 2.2049918200074078e-05, "loss": 0.8516, "step": 7537 }, { "epoch": 3.4170444242973708, "grad_norm": 0.22723751521500674, "learning_rate": 2.203860650076619e-05, "loss": 0.8695, "step": 7538 }, { "epoch": 3.4174977334542156, "grad_norm": 0.23517981350326186, "learning_rate": 2.2027296600344648e-05, "loss": 0.8619, "step": 7539 }, { "epoch": 3.417951042611061, "grad_norm": 0.2801979712493573, "learning_rate": 2.2015988499942157e-05, "loss": 0.8582, "step": 7540 }, { "epoch": 3.4184043517679057, "grad_norm": 0.18765723111513163, "learning_rate": 2.2004682200691288e-05, "loss": 0.872, "step": 7541 }, { "epoch": 3.4188576609247505, "grad_norm": 0.2027335933817281, "learning_rate": 2.199337770372437e-05, "loss": 0.8637, "step": 7542 }, { "epoch": 3.419310970081596, "grad_norm": 0.21466016396310136, "learning_rate": 2.1982075010173603e-05, "loss": 0.862, "step": 7543 }, { "epoch": 3.4197642792384406, "grad_norm": 0.23814779528285293, "learning_rate": 2.1970774121170972e-05, "loss": 0.8523, "step": 7544 }, { "epoch": 3.4202175883952854, "grad_norm": 0.2814140169330891, "learning_rate": 2.1959475037848323e-05, "loss": 0.8609, "step": 7545 }, { "epoch": 3.4206708975521307, "grad_norm": 0.17861801970021451, "learning_rate": 2.1948177761337243e-05, "loss": 0.8506, "step": 7546 }, { "epoch": 3.4211242067089755, "grad_norm": 0.2463636506993777, "learning_rate": 2.1936882292769228e-05, "loss": 0.8616, "step": 7547 }, { "epoch": 3.4215775158658204, "grad_norm": 0.28555354178531417, "learning_rate": 2.192558863327552e-05, "loss": 0.8655, "step": 7548 }, { "epoch": 3.4220308250226656, "grad_norm": 0.19673859402943472, "learning_rate": 2.191429678398725e-05, "loss": 0.8756, "step": 7549 }, { "epoch": 3.4224841341795105, "grad_norm": 0.2689886194895122, "learning_rate": 2.190300674603529e-05, "loss": 0.868, "step": 7550 }, { "epoch": 3.4229374433363553, "grad_norm": 0.18658391675758662, "learning_rate": 2.1891718520550408e-05, "loss": 0.8633, "step": 7551 }, { "epoch": 3.4233907524932006, "grad_norm": 0.24874595360097349, "learning_rate": 2.1880432108663118e-05, "loss": 0.8846, "step": 7552 }, { "epoch": 3.4238440616500454, "grad_norm": 0.2852602618979654, "learning_rate": 2.1869147511503822e-05, "loss": 0.8625, "step": 7553 }, { "epoch": 3.4242973708068902, "grad_norm": 0.20977086814288023, "learning_rate": 2.1857864730202687e-05, "loss": 0.8532, "step": 7554 }, { "epoch": 3.4247506799637355, "grad_norm": 0.19643557157288696, "learning_rate": 2.1846583765889707e-05, "loss": 0.8806, "step": 7555 }, { "epoch": 3.4252039891205803, "grad_norm": 0.251353539268238, "learning_rate": 2.1835304619694727e-05, "loss": 0.8778, "step": 7556 }, { "epoch": 3.425657298277425, "grad_norm": 0.1977923776887601, "learning_rate": 2.1824027292747357e-05, "loss": 0.8597, "step": 7557 }, { "epoch": 3.42611060743427, "grad_norm": 0.17527931671152153, "learning_rate": 2.1812751786177087e-05, "loss": 0.8474, "step": 7558 }, { "epoch": 3.4265639165911153, "grad_norm": 0.20850206948582908, "learning_rate": 2.1801478101113167e-05, "loss": 0.8553, "step": 7559 }, { "epoch": 3.42701722574796, "grad_norm": 0.2417711465447516, "learning_rate": 2.1790206238684713e-05, "loss": 0.8757, "step": 7560 }, { "epoch": 3.427470534904805, "grad_norm": 0.18479468316740705, "learning_rate": 2.17789362000206e-05, "loss": 0.8751, "step": 7561 }, { "epoch": 3.42792384406165, "grad_norm": 0.19686289218722064, "learning_rate": 2.17676679862496e-05, "loss": 0.8676, "step": 7562 }, { "epoch": 3.428377153218495, "grad_norm": 0.20399424265108831, "learning_rate": 2.175640159850022e-05, "loss": 0.8642, "step": 7563 }, { "epoch": 3.42883046237534, "grad_norm": 0.2077835441553632, "learning_rate": 2.174513703790085e-05, "loss": 0.8852, "step": 7564 }, { "epoch": 3.429283771532185, "grad_norm": 0.19970637607706057, "learning_rate": 2.1733874305579656e-05, "loss": 0.8774, "step": 7565 }, { "epoch": 3.42973708068903, "grad_norm": 0.1864189108252847, "learning_rate": 2.172261340266463e-05, "loss": 0.854, "step": 7566 }, { "epoch": 3.4301903898458748, "grad_norm": 0.19536602490885868, "learning_rate": 2.1711354330283575e-05, "loss": 0.8771, "step": 7567 }, { "epoch": 3.43064369900272, "grad_norm": 0.17974477861946822, "learning_rate": 2.170009708956415e-05, "loss": 0.8591, "step": 7568 }, { "epoch": 3.431097008159565, "grad_norm": 0.2132146282140445, "learning_rate": 2.168884168163378e-05, "loss": 0.8704, "step": 7569 }, { "epoch": 3.4315503173164097, "grad_norm": 0.24420145135499968, "learning_rate": 2.167758810761972e-05, "loss": 0.8589, "step": 7570 }, { "epoch": 3.4320036264732545, "grad_norm": 0.2194910923764329, "learning_rate": 2.1666336368649072e-05, "loss": 0.88, "step": 7571 }, { "epoch": 3.4324569356301, "grad_norm": 0.21536208302771778, "learning_rate": 2.1655086465848704e-05, "loss": 0.8575, "step": 7572 }, { "epoch": 3.4329102447869446, "grad_norm": 0.24690314108794084, "learning_rate": 2.1643838400345357e-05, "loss": 0.8625, "step": 7573 }, { "epoch": 3.4333635539437894, "grad_norm": 0.25309506472751636, "learning_rate": 2.1632592173265524e-05, "loss": 0.8563, "step": 7574 }, { "epoch": 3.4338168631006347, "grad_norm": 0.21628365836005467, "learning_rate": 2.1621347785735586e-05, "loss": 0.8575, "step": 7575 }, { "epoch": 3.4342701722574795, "grad_norm": 0.1882119746422609, "learning_rate": 2.1610105238881664e-05, "loss": 0.8633, "step": 7576 }, { "epoch": 3.4347234814143244, "grad_norm": 0.23618757158784798, "learning_rate": 2.1598864533829764e-05, "loss": 0.8606, "step": 7577 }, { "epoch": 3.4351767905711696, "grad_norm": 0.22718519409027466, "learning_rate": 2.1587625671705643e-05, "loss": 0.877, "step": 7578 }, { "epoch": 3.4356300997280145, "grad_norm": 0.20953803706078505, "learning_rate": 2.1576388653634942e-05, "loss": 0.892, "step": 7579 }, { "epoch": 3.4360834088848593, "grad_norm": 0.23188512620471546, "learning_rate": 2.1565153480743066e-05, "loss": 0.8448, "step": 7580 }, { "epoch": 3.4365367180417046, "grad_norm": 0.24001957367344218, "learning_rate": 2.155392015415523e-05, "loss": 0.8693, "step": 7581 }, { "epoch": 3.4369900271985494, "grad_norm": 0.20454236868014564, "learning_rate": 2.154268867499652e-05, "loss": 0.8766, "step": 7582 }, { "epoch": 3.4374433363553942, "grad_norm": 0.22499784349363375, "learning_rate": 2.1531459044391763e-05, "loss": 0.8639, "step": 7583 }, { "epoch": 3.4378966455122395, "grad_norm": 0.24397326055813243, "learning_rate": 2.1520231263465698e-05, "loss": 0.8644, "step": 7584 }, { "epoch": 3.4383499546690843, "grad_norm": 0.23651929045697453, "learning_rate": 2.1509005333342746e-05, "loss": 0.8646, "step": 7585 }, { "epoch": 3.438803263825929, "grad_norm": 0.19615226003754233, "learning_rate": 2.1497781255147273e-05, "loss": 0.8757, "step": 7586 }, { "epoch": 3.4392565729827744, "grad_norm": 0.21529551086885773, "learning_rate": 2.1486559030003367e-05, "loss": 0.8838, "step": 7587 }, { "epoch": 3.4397098821396193, "grad_norm": 0.17212709423897182, "learning_rate": 2.1475338659035003e-05, "loss": 0.8646, "step": 7588 }, { "epoch": 3.440163191296464, "grad_norm": 0.21919293684501773, "learning_rate": 2.1464120143365898e-05, "loss": 0.8708, "step": 7589 }, { "epoch": 3.4406165004533094, "grad_norm": 0.2177308132052484, "learning_rate": 2.1452903484119652e-05, "loss": 0.8524, "step": 7590 }, { "epoch": 3.441069809610154, "grad_norm": 0.20385299031901108, "learning_rate": 2.1441688682419618e-05, "loss": 0.8613, "step": 7591 }, { "epoch": 3.441523118766999, "grad_norm": 0.1894404916095677, "learning_rate": 2.143047573938901e-05, "loss": 0.8873, "step": 7592 }, { "epoch": 3.4419764279238443, "grad_norm": 0.20096816013034421, "learning_rate": 2.1419264656150825e-05, "loss": 0.8705, "step": 7593 }, { "epoch": 3.442429737080689, "grad_norm": 0.1908786101861783, "learning_rate": 2.1408055433827907e-05, "loss": 0.8682, "step": 7594 }, { "epoch": 3.442883046237534, "grad_norm": 0.19092044118879808, "learning_rate": 2.1396848073542876e-05, "loss": 0.86, "step": 7595 }, { "epoch": 3.443336355394379, "grad_norm": 0.2004089965277209, "learning_rate": 2.138564257641817e-05, "loss": 0.8836, "step": 7596 }, { "epoch": 3.443789664551224, "grad_norm": 0.1849452121177509, "learning_rate": 2.1374438943576083e-05, "loss": 0.8648, "step": 7597 }, { "epoch": 3.444242973708069, "grad_norm": 0.2015526949587346, "learning_rate": 2.136323717613866e-05, "loss": 0.8722, "step": 7598 }, { "epoch": 3.4446962828649137, "grad_norm": 0.226744023198571, "learning_rate": 2.1352037275227818e-05, "loss": 0.8716, "step": 7599 }, { "epoch": 3.445149592021759, "grad_norm": 0.16586865190793604, "learning_rate": 2.1340839241965235e-05, "loss": 0.8572, "step": 7600 }, { "epoch": 3.445602901178604, "grad_norm": 0.18513491204992716, "learning_rate": 2.132964307747246e-05, "loss": 0.8701, "step": 7601 }, { "epoch": 3.4460562103354486, "grad_norm": 0.17691645026042493, "learning_rate": 2.131844878287078e-05, "loss": 0.8797, "step": 7602 }, { "epoch": 3.446509519492294, "grad_norm": 0.17872784287217755, "learning_rate": 2.1307256359281393e-05, "loss": 0.8732, "step": 7603 }, { "epoch": 3.4469628286491387, "grad_norm": 0.20346857071319144, "learning_rate": 2.129606580782518e-05, "loss": 0.8774, "step": 7604 }, { "epoch": 3.4474161378059835, "grad_norm": 0.1867491081860655, "learning_rate": 2.128487712962297e-05, "loss": 0.8637, "step": 7605 }, { "epoch": 3.447869446962829, "grad_norm": 0.19744468719049116, "learning_rate": 2.12736903257953e-05, "loss": 0.866, "step": 7606 }, { "epoch": 3.4483227561196736, "grad_norm": 0.2111420151612983, "learning_rate": 2.12625053974626e-05, "loss": 0.8555, "step": 7607 }, { "epoch": 3.4487760652765185, "grad_norm": 0.2130873340637586, "learning_rate": 2.125132234574503e-05, "loss": 0.8584, "step": 7608 }, { "epoch": 3.4492293744333633, "grad_norm": 0.20000628925481034, "learning_rate": 2.1240141171762648e-05, "loss": 0.8694, "step": 7609 }, { "epoch": 3.4496826835902086, "grad_norm": 0.20100066364891267, "learning_rate": 2.122896187663526e-05, "loss": 0.8765, "step": 7610 }, { "epoch": 3.4501359927470534, "grad_norm": 0.23301753025674113, "learning_rate": 2.121778446148249e-05, "loss": 0.8574, "step": 7611 }, { "epoch": 3.4505893019038982, "grad_norm": 0.1934312961232302, "learning_rate": 2.1206608927423824e-05, "loss": 0.8754, "step": 7612 }, { "epoch": 3.4510426110607435, "grad_norm": 0.192699365739108, "learning_rate": 2.1195435275578493e-05, "loss": 0.8727, "step": 7613 }, { "epoch": 3.4514959202175883, "grad_norm": 0.20191425889851564, "learning_rate": 2.11842635070656e-05, "loss": 0.8627, "step": 7614 }, { "epoch": 3.451949229374433, "grad_norm": 0.2102993173441929, "learning_rate": 2.1173093623003998e-05, "loss": 0.8564, "step": 7615 }, { "epoch": 3.4524025385312784, "grad_norm": 0.16397675453735583, "learning_rate": 2.1161925624512415e-05, "loss": 0.8521, "step": 7616 }, { "epoch": 3.4528558476881233, "grad_norm": 0.18829632170960223, "learning_rate": 2.1150759512709333e-05, "loss": 0.869, "step": 7617 }, { "epoch": 3.453309156844968, "grad_norm": 0.24594892185693426, "learning_rate": 2.1139595288713094e-05, "loss": 0.8823, "step": 7618 }, { "epoch": 3.4537624660018134, "grad_norm": 0.18230156225850674, "learning_rate": 2.11284329536418e-05, "loss": 0.8806, "step": 7619 }, { "epoch": 3.454215775158658, "grad_norm": 0.19946390038727027, "learning_rate": 2.111727250861343e-05, "loss": 0.8596, "step": 7620 }, { "epoch": 3.454669084315503, "grad_norm": 0.1963452669462669, "learning_rate": 2.110611395474571e-05, "loss": 0.8698, "step": 7621 }, { "epoch": 3.4551223934723483, "grad_norm": 0.2251844356857301, "learning_rate": 2.1094957293156205e-05, "loss": 0.8558, "step": 7622 }, { "epoch": 3.455575702629193, "grad_norm": 0.22513774593872196, "learning_rate": 2.1083802524962274e-05, "loss": 0.8742, "step": 7623 }, { "epoch": 3.456029011786038, "grad_norm": 0.22492362137370414, "learning_rate": 2.107264965128113e-05, "loss": 0.8676, "step": 7624 }, { "epoch": 3.456482320942883, "grad_norm": 0.20827246121507506, "learning_rate": 2.1061498673229748e-05, "loss": 0.8742, "step": 7625 }, { "epoch": 3.456935630099728, "grad_norm": 0.2844944789584176, "learning_rate": 2.1050349591924917e-05, "loss": 0.8622, "step": 7626 }, { "epoch": 3.457388939256573, "grad_norm": 0.1958514370340798, "learning_rate": 2.1039202408483284e-05, "loss": 0.8709, "step": 7627 }, { "epoch": 3.457842248413418, "grad_norm": 0.21079531995528386, "learning_rate": 2.102805712402124e-05, "loss": 0.8589, "step": 7628 }, { "epoch": 3.458295557570263, "grad_norm": 0.22251511217691333, "learning_rate": 2.1016913739655046e-05, "loss": 0.8629, "step": 7629 }, { "epoch": 3.458748866727108, "grad_norm": 0.16297282312999897, "learning_rate": 2.1005772256500717e-05, "loss": 0.8731, "step": 7630 }, { "epoch": 3.459202175883953, "grad_norm": 0.18473549537496742, "learning_rate": 2.0994632675674128e-05, "loss": 0.8586, "step": 7631 }, { "epoch": 3.459655485040798, "grad_norm": 0.15417386051239354, "learning_rate": 2.0983494998290928e-05, "loss": 0.8798, "step": 7632 }, { "epoch": 3.4601087941976427, "grad_norm": 0.18685802412134586, "learning_rate": 2.09723592254666e-05, "loss": 0.8537, "step": 7633 }, { "epoch": 3.460562103354488, "grad_norm": 0.2065173952596365, "learning_rate": 2.0961225358316404e-05, "loss": 0.8663, "step": 7634 }, { "epoch": 3.461015412511333, "grad_norm": 0.16975549913374968, "learning_rate": 2.0950093397955457e-05, "loss": 0.8703, "step": 7635 }, { "epoch": 3.4614687216681777, "grad_norm": 0.1924963111585964, "learning_rate": 2.0938963345498643e-05, "loss": 0.8665, "step": 7636 }, { "epoch": 3.4619220308250225, "grad_norm": 0.17525623636890478, "learning_rate": 2.0927835202060657e-05, "loss": 0.8408, "step": 7637 }, { "epoch": 3.4623753399818678, "grad_norm": 0.18521270420475852, "learning_rate": 2.0916708968756038e-05, "loss": 0.8545, "step": 7638 }, { "epoch": 3.4628286491387126, "grad_norm": 0.21098259421195278, "learning_rate": 2.0905584646699086e-05, "loss": 0.8627, "step": 7639 }, { "epoch": 3.4632819582955574, "grad_norm": 0.164355635484537, "learning_rate": 2.0894462237003966e-05, "loss": 0.8552, "step": 7640 }, { "epoch": 3.4637352674524027, "grad_norm": 0.20864377568471143, "learning_rate": 2.0883341740784603e-05, "loss": 0.8744, "step": 7641 }, { "epoch": 3.4641885766092475, "grad_norm": 0.1682048884768107, "learning_rate": 2.087222315915475e-05, "loss": 0.8674, "step": 7642 }, { "epoch": 3.4646418857660923, "grad_norm": 0.17918097012342793, "learning_rate": 2.0861106493227945e-05, "loss": 0.8556, "step": 7643 }, { "epoch": 3.4650951949229376, "grad_norm": 0.1819500782627258, "learning_rate": 2.0849991744117588e-05, "loss": 0.8775, "step": 7644 }, { "epoch": 3.4655485040797824, "grad_norm": 0.19593823648711486, "learning_rate": 2.0838878912936825e-05, "loss": 0.8712, "step": 7645 }, { "epoch": 3.4660018132366273, "grad_norm": 0.17193266903499133, "learning_rate": 2.0827768000798664e-05, "loss": 0.8798, "step": 7646 }, { "epoch": 3.4664551223934725, "grad_norm": 0.220190142773648, "learning_rate": 2.0816659008815873e-05, "loss": 0.8649, "step": 7647 }, { "epoch": 3.4669084315503174, "grad_norm": 0.19771142350662974, "learning_rate": 2.0805551938101073e-05, "loss": 0.8953, "step": 7648 }, { "epoch": 3.467361740707162, "grad_norm": 0.2604753007149304, "learning_rate": 2.079444678976664e-05, "loss": 0.8802, "step": 7649 }, { "epoch": 3.467815049864007, "grad_norm": 0.1866907308360328, "learning_rate": 2.078334356492483e-05, "loss": 0.8559, "step": 7650 }, { "epoch": 3.4682683590208523, "grad_norm": 0.19964368682543487, "learning_rate": 2.077224226468763e-05, "loss": 0.8717, "step": 7651 }, { "epoch": 3.468721668177697, "grad_norm": 0.22416542213145355, "learning_rate": 2.0761142890166862e-05, "loss": 0.8636, "step": 7652 }, { "epoch": 3.469174977334542, "grad_norm": 0.2534044277570479, "learning_rate": 2.0750045442474195e-05, "loss": 0.8737, "step": 7653 }, { "epoch": 3.469628286491387, "grad_norm": 0.2038718119654201, "learning_rate": 2.0738949922721033e-05, "loss": 0.8558, "step": 7654 }, { "epoch": 3.470081595648232, "grad_norm": 0.19841809957880996, "learning_rate": 2.0727856332018658e-05, "loss": 0.8821, "step": 7655 }, { "epoch": 3.470534904805077, "grad_norm": 0.2422795248354855, "learning_rate": 2.07167646714781e-05, "loss": 0.8595, "step": 7656 }, { "epoch": 3.470988213961922, "grad_norm": 0.5223018126219096, "learning_rate": 2.0705674942210248e-05, "loss": 0.8624, "step": 7657 }, { "epoch": 3.471441523118767, "grad_norm": 0.20100346863251986, "learning_rate": 2.069458714532574e-05, "loss": 0.867, "step": 7658 }, { "epoch": 3.471894832275612, "grad_norm": 0.20990446009196798, "learning_rate": 2.068350128193507e-05, "loss": 0.9009, "step": 7659 }, { "epoch": 3.472348141432457, "grad_norm": 0.1854946285399625, "learning_rate": 2.067241735314852e-05, "loss": 0.858, "step": 7660 }, { "epoch": 3.472801450589302, "grad_norm": 0.18562136128592752, "learning_rate": 2.0661335360076197e-05, "loss": 0.8569, "step": 7661 }, { "epoch": 3.4732547597461467, "grad_norm": 0.26784831890054833, "learning_rate": 2.065025530382794e-05, "loss": 0.8559, "step": 7662 }, { "epoch": 3.473708068902992, "grad_norm": 0.21387068974887402, "learning_rate": 2.06391771855135e-05, "loss": 0.8795, "step": 7663 }, { "epoch": 3.474161378059837, "grad_norm": 0.17802186974381054, "learning_rate": 2.062810100624235e-05, "loss": 0.8714, "step": 7664 }, { "epoch": 3.4746146872166817, "grad_norm": 0.17966408336121362, "learning_rate": 2.0617026767123832e-05, "loss": 0.8702, "step": 7665 }, { "epoch": 3.475067996373527, "grad_norm": 0.23216036070697377, "learning_rate": 2.0605954469267047e-05, "loss": 0.8469, "step": 7666 }, { "epoch": 3.4755213055303718, "grad_norm": 0.38280835367036026, "learning_rate": 2.0594884113780907e-05, "loss": 0.8623, "step": 7667 }, { "epoch": 3.4759746146872166, "grad_norm": 0.18155779845887676, "learning_rate": 2.0583815701774168e-05, "loss": 0.8778, "step": 7668 }, { "epoch": 3.476427923844062, "grad_norm": 0.22701807279382943, "learning_rate": 2.0572749234355335e-05, "loss": 0.8521, "step": 7669 }, { "epoch": 3.4768812330009067, "grad_norm": 0.21094558420661513, "learning_rate": 2.056168471263278e-05, "loss": 0.8846, "step": 7670 }, { "epoch": 3.4773345421577515, "grad_norm": 0.2699038305627486, "learning_rate": 2.0550622137714607e-05, "loss": 0.8836, "step": 7671 }, { "epoch": 3.477787851314597, "grad_norm": 0.21864118916725048, "learning_rate": 2.0539561510708808e-05, "loss": 0.8774, "step": 7672 }, { "epoch": 3.4782411604714416, "grad_norm": 0.20487768401295758, "learning_rate": 2.0528502832723102e-05, "loss": 0.8578, "step": 7673 }, { "epoch": 3.4786944696282864, "grad_norm": 0.2220692361889258, "learning_rate": 2.051744610486507e-05, "loss": 0.8553, "step": 7674 }, { "epoch": 3.4791477787851317, "grad_norm": 0.18708166319192882, "learning_rate": 2.0506391328242065e-05, "loss": 0.8568, "step": 7675 }, { "epoch": 3.4796010879419765, "grad_norm": 0.192717898214449, "learning_rate": 2.0495338503961267e-05, "loss": 0.8513, "step": 7676 }, { "epoch": 3.4800543970988214, "grad_norm": 0.2165819042160191, "learning_rate": 2.0484287633129644e-05, "loss": 0.8615, "step": 7677 }, { "epoch": 3.480507706255666, "grad_norm": 0.1930451844297358, "learning_rate": 2.0473238716853954e-05, "loss": 0.8703, "step": 7678 }, { "epoch": 3.4809610154125115, "grad_norm": 0.2021967165232774, "learning_rate": 2.046219175624081e-05, "loss": 0.8761, "step": 7679 }, { "epoch": 3.4814143245693563, "grad_norm": 0.24771853711782207, "learning_rate": 2.045114675239658e-05, "loss": 0.881, "step": 7680 }, { "epoch": 3.481867633726201, "grad_norm": 0.1840040315665036, "learning_rate": 2.0440103706427442e-05, "loss": 0.858, "step": 7681 }, { "epoch": 3.4823209428830464, "grad_norm": 0.22689109742196983, "learning_rate": 2.0429062619439423e-05, "loss": 0.8701, "step": 7682 }, { "epoch": 3.482774252039891, "grad_norm": 0.2605702933835505, "learning_rate": 2.0418023492538298e-05, "loss": 0.8688, "step": 7683 }, { "epoch": 3.483227561196736, "grad_norm": 0.18542049802935504, "learning_rate": 2.0406986326829653e-05, "loss": 0.8699, "step": 7684 }, { "epoch": 3.4836808703535813, "grad_norm": 0.24536845947311223, "learning_rate": 2.0395951123418926e-05, "loss": 0.8903, "step": 7685 }, { "epoch": 3.484134179510426, "grad_norm": 0.20794050567928019, "learning_rate": 2.0384917883411296e-05, "loss": 0.8611, "step": 7686 }, { "epoch": 3.484587488667271, "grad_norm": 0.22884884680131679, "learning_rate": 2.0373886607911802e-05, "loss": 0.8405, "step": 7687 }, { "epoch": 3.485040797824116, "grad_norm": 0.18729481778663487, "learning_rate": 2.036285729802523e-05, "loss": 0.8554, "step": 7688 }, { "epoch": 3.485494106980961, "grad_norm": 0.17315122122982485, "learning_rate": 2.035182995485622e-05, "loss": 0.8679, "step": 7689 }, { "epoch": 3.485947416137806, "grad_norm": 0.18745776032047848, "learning_rate": 2.0340804579509176e-05, "loss": 0.8764, "step": 7690 }, { "epoch": 3.4864007252946507, "grad_norm": 0.18333588994348235, "learning_rate": 2.0329781173088348e-05, "loss": 0.8567, "step": 7691 }, { "epoch": 3.486854034451496, "grad_norm": 0.2088465090586286, "learning_rate": 2.031875973669774e-05, "loss": 0.8708, "step": 7692 }, { "epoch": 3.487307343608341, "grad_norm": 0.19346630841914003, "learning_rate": 2.0307740271441176e-05, "loss": 0.862, "step": 7693 }, { "epoch": 3.4877606527651857, "grad_norm": 0.20976736667634938, "learning_rate": 2.0296722778422308e-05, "loss": 0.8708, "step": 7694 }, { "epoch": 3.488213961922031, "grad_norm": 0.1760987066982333, "learning_rate": 2.028570725874455e-05, "loss": 0.8668, "step": 7695 }, { "epoch": 3.4886672710788758, "grad_norm": 0.22921949500158015, "learning_rate": 2.027469371351116e-05, "loss": 0.8881, "step": 7696 }, { "epoch": 3.4891205802357206, "grad_norm": 0.20885399961301437, "learning_rate": 2.0263682143825162e-05, "loss": 0.8791, "step": 7697 }, { "epoch": 3.489573889392566, "grad_norm": 0.14494078852377418, "learning_rate": 2.0252672550789408e-05, "loss": 0.8526, "step": 7698 }, { "epoch": 3.4900271985494107, "grad_norm": 0.23477963061950505, "learning_rate": 2.0241664935506533e-05, "loss": 0.8725, "step": 7699 }, { "epoch": 3.4904805077062555, "grad_norm": 0.16810096607247907, "learning_rate": 2.0230659299078982e-05, "loss": 0.8506, "step": 7700 }, { "epoch": 3.490933816863101, "grad_norm": 0.21074136034045, "learning_rate": 2.021965564260899e-05, "loss": 0.8936, "step": 7701 }, { "epoch": 3.4913871260199456, "grad_norm": 0.24925124724234132, "learning_rate": 2.0208653967198638e-05, "loss": 0.8806, "step": 7702 }, { "epoch": 3.4918404351767904, "grad_norm": 0.15585848117563003, "learning_rate": 2.0197654273949743e-05, "loss": 0.8579, "step": 7703 }, { "epoch": 3.4922937443336357, "grad_norm": 0.2196325120263003, "learning_rate": 2.0186656563963983e-05, "loss": 0.8706, "step": 7704 }, { "epoch": 3.4927470534904805, "grad_norm": 0.2150146999232804, "learning_rate": 2.017566083834278e-05, "loss": 0.8538, "step": 7705 }, { "epoch": 3.4932003626473254, "grad_norm": 0.2425815215138058, "learning_rate": 2.016466709818742e-05, "loss": 0.8723, "step": 7706 }, { "epoch": 3.4936536718041706, "grad_norm": 0.2209610104378104, "learning_rate": 2.0153675344598937e-05, "loss": 0.864, "step": 7707 }, { "epoch": 3.4941069809610155, "grad_norm": 0.20821776001259257, "learning_rate": 2.014268557867821e-05, "loss": 0.8702, "step": 7708 }, { "epoch": 3.4945602901178603, "grad_norm": 0.21958358926373048, "learning_rate": 2.0131697801525876e-05, "loss": 0.86, "step": 7709 }, { "epoch": 3.4950135992747056, "grad_norm": 0.2564736179633967, "learning_rate": 2.012071201424239e-05, "loss": 0.8591, "step": 7710 }, { "epoch": 3.4954669084315504, "grad_norm": 0.16891844624872382, "learning_rate": 2.010972821792803e-05, "loss": 0.8845, "step": 7711 }, { "epoch": 3.495920217588395, "grad_norm": 0.21795103756454884, "learning_rate": 2.009874641368283e-05, "loss": 0.8747, "step": 7712 }, { "epoch": 3.4963735267452405, "grad_norm": 0.18699541039491047, "learning_rate": 2.0087766602606687e-05, "loss": 0.8419, "step": 7713 }, { "epoch": 3.4968268359020853, "grad_norm": 0.23282387915399091, "learning_rate": 2.0076788785799222e-05, "loss": 0.8637, "step": 7714 }, { "epoch": 3.49728014505893, "grad_norm": 0.18050765384205564, "learning_rate": 2.0065812964359926e-05, "loss": 0.8445, "step": 7715 }, { "epoch": 3.497733454215775, "grad_norm": 0.20214551104405679, "learning_rate": 2.005483913938803e-05, "loss": 0.8672, "step": 7716 }, { "epoch": 3.4981867633726202, "grad_norm": 0.176653190160293, "learning_rate": 2.0043867311982634e-05, "loss": 0.8562, "step": 7717 }, { "epoch": 3.498640072529465, "grad_norm": 0.22290984991319626, "learning_rate": 2.003289748324257e-05, "loss": 0.8575, "step": 7718 }, { "epoch": 3.49909338168631, "grad_norm": 0.19670027143795368, "learning_rate": 2.002192965426651e-05, "loss": 0.8541, "step": 7719 }, { "epoch": 3.499546690843155, "grad_norm": 0.19097007320263673, "learning_rate": 2.0010963826152895e-05, "loss": 0.8543, "step": 7720 }, { "epoch": 3.5, "grad_norm": 0.21960957409842669, "learning_rate": 2.0000000000000012e-05, "loss": 0.8742, "step": 7721 }, { "epoch": 3.500453309156845, "grad_norm": 0.21140242001560597, "learning_rate": 1.998903817690589e-05, "loss": 0.8728, "step": 7722 }, { "epoch": 3.50090661831369, "grad_norm": 0.21439050630822257, "learning_rate": 1.9978078357968423e-05, "loss": 0.8716, "step": 7723 }, { "epoch": 3.501359927470535, "grad_norm": 0.19503482318541962, "learning_rate": 1.9967120544285254e-05, "loss": 0.883, "step": 7724 }, { "epoch": 3.5018132366273798, "grad_norm": 0.1949150999295767, "learning_rate": 1.995616473695382e-05, "loss": 0.8484, "step": 7725 }, { "epoch": 3.5022665457842246, "grad_norm": 0.18321078717462555, "learning_rate": 1.9945210937071415e-05, "loss": 0.8659, "step": 7726 }, { "epoch": 3.50271985494107, "grad_norm": 0.17364557248504836, "learning_rate": 1.9934259145735052e-05, "loss": 0.874, "step": 7727 }, { "epoch": 3.5031731640979147, "grad_norm": 0.2225927780176339, "learning_rate": 1.9923309364041633e-05, "loss": 0.8727, "step": 7728 }, { "epoch": 3.5036264732547595, "grad_norm": 0.17800535094646863, "learning_rate": 1.9912361593087763e-05, "loss": 0.8656, "step": 7729 }, { "epoch": 3.504079782411605, "grad_norm": 0.18236294905558117, "learning_rate": 1.990141583396993e-05, "loss": 0.8852, "step": 7730 }, { "epoch": 3.5045330915684496, "grad_norm": 0.21256820983924807, "learning_rate": 1.989047208778436e-05, "loss": 0.862, "step": 7731 }, { "epoch": 3.5049864007252944, "grad_norm": 0.17020795096984412, "learning_rate": 1.9879530355627122e-05, "loss": 0.8498, "step": 7732 }, { "epoch": 3.5054397098821397, "grad_norm": 0.2338800217885927, "learning_rate": 1.986859063859404e-05, "loss": 0.86, "step": 7733 }, { "epoch": 3.5058930190389845, "grad_norm": 0.2001605152223514, "learning_rate": 1.985765293778078e-05, "loss": 0.8701, "step": 7734 }, { "epoch": 3.5063463281958294, "grad_norm": 0.24860532279867714, "learning_rate": 1.9846717254282785e-05, "loss": 0.8656, "step": 7735 }, { "epoch": 3.5067996373526746, "grad_norm": 0.23440373042689505, "learning_rate": 1.9835783589195262e-05, "loss": 0.881, "step": 7736 }, { "epoch": 3.5072529465095195, "grad_norm": 0.19581435730398555, "learning_rate": 1.9824851943613294e-05, "loss": 0.8462, "step": 7737 }, { "epoch": 3.5077062556663643, "grad_norm": 0.2503116310607518, "learning_rate": 1.9813922318631698e-05, "loss": 0.8697, "step": 7738 }, { "epoch": 3.5081595648232096, "grad_norm": 0.2429389194128982, "learning_rate": 1.9802994715345108e-05, "loss": 0.8551, "step": 7739 }, { "epoch": 3.5086128739800544, "grad_norm": 0.22923552197329647, "learning_rate": 1.9792069134847938e-05, "loss": 0.8735, "step": 7740 }, { "epoch": 3.509066183136899, "grad_norm": 0.18657073568484103, "learning_rate": 1.978114557823445e-05, "loss": 0.8813, "step": 7741 }, { "epoch": 3.5095194922937445, "grad_norm": 0.2354399904353071, "learning_rate": 1.9770224046598638e-05, "loss": 0.8775, "step": 7742 }, { "epoch": 3.5099728014505893, "grad_norm": 0.18761494298209472, "learning_rate": 1.975930454103436e-05, "loss": 0.8769, "step": 7743 }, { "epoch": 3.510426110607434, "grad_norm": 0.21846635053425428, "learning_rate": 1.97483870626352e-05, "loss": 0.8622, "step": 7744 }, { "epoch": 3.5108794197642794, "grad_norm": 0.15931053811486845, "learning_rate": 1.973747161249461e-05, "loss": 0.8271, "step": 7745 }, { "epoch": 3.5113327289211242, "grad_norm": 0.19729073970988853, "learning_rate": 1.9726558191705772e-05, "loss": 0.8483, "step": 7746 }, { "epoch": 3.511786038077969, "grad_norm": 0.18917776133783226, "learning_rate": 1.9715646801361738e-05, "loss": 0.8538, "step": 7747 }, { "epoch": 3.5122393472348143, "grad_norm": 0.15889289177894, "learning_rate": 1.9704737442555263e-05, "loss": 0.8619, "step": 7748 }, { "epoch": 3.512692656391659, "grad_norm": 0.21388851187024527, "learning_rate": 1.9693830116379003e-05, "loss": 0.8603, "step": 7749 }, { "epoch": 3.513145965548504, "grad_norm": 0.16387178744656675, "learning_rate": 1.9682924823925336e-05, "loss": 0.8588, "step": 7750 }, { "epoch": 3.5135992747053493, "grad_norm": 0.18829656346998083, "learning_rate": 1.967202156628644e-05, "loss": 0.8934, "step": 7751 }, { "epoch": 3.514052583862194, "grad_norm": 0.23661176249109672, "learning_rate": 1.9661120344554346e-05, "loss": 0.884, "step": 7752 }, { "epoch": 3.514505893019039, "grad_norm": 0.26198910470950876, "learning_rate": 1.9650221159820803e-05, "loss": 0.8664, "step": 7753 }, { "epoch": 3.514959202175884, "grad_norm": 0.21834162409988156, "learning_rate": 1.963932401317744e-05, "loss": 0.8725, "step": 7754 }, { "epoch": 3.515412511332729, "grad_norm": 0.20946525797352314, "learning_rate": 1.9628428905715596e-05, "loss": 0.9051, "step": 7755 }, { "epoch": 3.515865820489574, "grad_norm": 0.22812769353458962, "learning_rate": 1.9617535838526486e-05, "loss": 0.8778, "step": 7756 }, { "epoch": 3.516319129646419, "grad_norm": 0.2136733505907888, "learning_rate": 1.9606644812701062e-05, "loss": 0.8686, "step": 7757 }, { "epoch": 3.516772438803264, "grad_norm": 0.1963597371202185, "learning_rate": 1.9595755829330097e-05, "loss": 0.8727, "step": 7758 }, { "epoch": 3.517225747960109, "grad_norm": 0.22306381378505907, "learning_rate": 1.9584868889504133e-05, "loss": 0.8789, "step": 7759 }, { "epoch": 3.5176790571169536, "grad_norm": 0.1895896167459454, "learning_rate": 1.957398399431357e-05, "loss": 0.869, "step": 7760 }, { "epoch": 3.518132366273799, "grad_norm": 0.1864463103063664, "learning_rate": 1.956310114484852e-05, "loss": 0.8924, "step": 7761 }, { "epoch": 3.5185856754306437, "grad_norm": 0.16513617669702488, "learning_rate": 1.9552220342198973e-05, "loss": 0.8665, "step": 7762 }, { "epoch": 3.5190389845874885, "grad_norm": 0.1702654442668788, "learning_rate": 1.954134158745464e-05, "loss": 0.8697, "step": 7763 }, { "epoch": 3.5194922937443334, "grad_norm": 0.15894840772752147, "learning_rate": 1.9530464881705083e-05, "loss": 0.8597, "step": 7764 }, { "epoch": 3.5199456029011786, "grad_norm": 0.17367993960643144, "learning_rate": 1.9519590226039634e-05, "loss": 0.8501, "step": 7765 }, { "epoch": 3.5203989120580235, "grad_norm": 0.15952197555901607, "learning_rate": 1.950871762154739e-05, "loss": 0.8293, "step": 7766 }, { "epoch": 3.5208522212148683, "grad_norm": 0.1799164603549324, "learning_rate": 1.949784706931732e-05, "loss": 0.8792, "step": 7767 }, { "epoch": 3.5213055303717136, "grad_norm": 0.18546211328202575, "learning_rate": 1.9486978570438107e-05, "loss": 0.8614, "step": 7768 }, { "epoch": 3.5217588395285584, "grad_norm": 0.15791583179027474, "learning_rate": 1.9476112125998293e-05, "loss": 0.8651, "step": 7769 }, { "epoch": 3.522212148685403, "grad_norm": 0.22120419970192226, "learning_rate": 1.946524773708615e-05, "loss": 0.8677, "step": 7770 }, { "epoch": 3.5226654578422485, "grad_norm": 0.18386555353664574, "learning_rate": 1.9454385404789812e-05, "loss": 0.8644, "step": 7771 }, { "epoch": 3.5231187669990933, "grad_norm": 0.21248392333301366, "learning_rate": 1.9443525130197142e-05, "loss": 0.8461, "step": 7772 }, { "epoch": 3.523572076155938, "grad_norm": 0.2122315606132163, "learning_rate": 1.9432666914395863e-05, "loss": 0.8751, "step": 7773 }, { "epoch": 3.5240253853127834, "grad_norm": 0.20461617219544095, "learning_rate": 1.942181075847342e-05, "loss": 0.8891, "step": 7774 }, { "epoch": 3.5244786944696282, "grad_norm": 0.20993829900276234, "learning_rate": 1.9410956663517124e-05, "loss": 0.8798, "step": 7775 }, { "epoch": 3.524932003626473, "grad_norm": 0.2603708095551194, "learning_rate": 1.940010463061403e-05, "loss": 0.8747, "step": 7776 }, { "epoch": 3.5253853127833183, "grad_norm": 0.24039665654533532, "learning_rate": 1.9389254660850997e-05, "loss": 0.8642, "step": 7777 }, { "epoch": 3.525838621940163, "grad_norm": 0.19316737469148182, "learning_rate": 1.937840675531467e-05, "loss": 0.8614, "step": 7778 }, { "epoch": 3.526291931097008, "grad_norm": 0.32295171770826, "learning_rate": 1.9367560915091524e-05, "loss": 0.8516, "step": 7779 }, { "epoch": 3.5267452402538533, "grad_norm": 0.28417845837292727, "learning_rate": 1.9356717141267793e-05, "loss": 0.8522, "step": 7780 }, { "epoch": 3.527198549410698, "grad_norm": 0.21214861824934142, "learning_rate": 1.9345875434929496e-05, "loss": 0.8844, "step": 7781 }, { "epoch": 3.527651858567543, "grad_norm": 0.2871870392284906, "learning_rate": 1.933503579716249e-05, "loss": 0.8783, "step": 7782 }, { "epoch": 3.528105167724388, "grad_norm": 0.2442518112237541, "learning_rate": 1.9324198229052364e-05, "loss": 0.8579, "step": 7783 }, { "epoch": 3.528558476881233, "grad_norm": 0.2085532346847097, "learning_rate": 1.931336273168457e-05, "loss": 0.8545, "step": 7784 }, { "epoch": 3.529011786038078, "grad_norm": 0.21449173476164193, "learning_rate": 1.9302529306144273e-05, "loss": 0.8676, "step": 7785 }, { "epoch": 3.529465095194923, "grad_norm": 0.19475011283946755, "learning_rate": 1.9291697953516517e-05, "loss": 0.8611, "step": 7786 }, { "epoch": 3.529918404351768, "grad_norm": 0.18013682324923835, "learning_rate": 1.9280868674886057e-05, "loss": 0.8733, "step": 7787 }, { "epoch": 3.530371713508613, "grad_norm": 0.17125946926399013, "learning_rate": 1.92700414713375e-05, "loss": 0.864, "step": 7788 }, { "epoch": 3.530825022665458, "grad_norm": 0.18304420837060537, "learning_rate": 1.9259216343955205e-05, "loss": 0.8758, "step": 7789 }, { "epoch": 3.531278331822303, "grad_norm": 0.21072244871135523, "learning_rate": 1.9248393293823357e-05, "loss": 0.8623, "step": 7790 }, { "epoch": 3.5317316409791477, "grad_norm": 0.22718102255236955, "learning_rate": 1.9237572322025918e-05, "loss": 0.8582, "step": 7791 }, { "epoch": 3.532184950135993, "grad_norm": 0.17878851365887405, "learning_rate": 1.922675342964661e-05, "loss": 0.8656, "step": 7792 }, { "epoch": 3.532638259292838, "grad_norm": 0.24287873874625307, "learning_rate": 1.921593661776901e-05, "loss": 0.8487, "step": 7793 }, { "epoch": 3.5330915684496826, "grad_norm": 0.20421790605077098, "learning_rate": 1.920512188747643e-05, "loss": 0.862, "step": 7794 }, { "epoch": 3.533544877606528, "grad_norm": 0.2232240114051508, "learning_rate": 1.9194309239852038e-05, "loss": 0.8603, "step": 7795 }, { "epoch": 3.5339981867633727, "grad_norm": 0.21257523447200524, "learning_rate": 1.9183498675978694e-05, "loss": 0.8608, "step": 7796 }, { "epoch": 3.5344514959202176, "grad_norm": 0.180895270510944, "learning_rate": 1.9172690196939148e-05, "loss": 0.8671, "step": 7797 }, { "epoch": 3.5349048050770624, "grad_norm": 0.196662527630458, "learning_rate": 1.9161883803815873e-05, "loss": 0.8562, "step": 7798 }, { "epoch": 3.5353581142339077, "grad_norm": 0.18317690097057104, "learning_rate": 1.9151079497691192e-05, "loss": 0.8573, "step": 7799 }, { "epoch": 3.5358114233907525, "grad_norm": 0.22032055388759297, "learning_rate": 1.9140277279647163e-05, "loss": 0.8862, "step": 7800 }, { "epoch": 3.5362647325475973, "grad_norm": 0.19980912907655526, "learning_rate": 1.9129477150765682e-05, "loss": 0.8666, "step": 7801 }, { "epoch": 3.5367180417044426, "grad_norm": 0.1772357451724393, "learning_rate": 1.9118679112128388e-05, "loss": 0.8553, "step": 7802 }, { "epoch": 3.5371713508612874, "grad_norm": 0.18527339126713013, "learning_rate": 1.9107883164816762e-05, "loss": 0.858, "step": 7803 }, { "epoch": 3.5376246600181322, "grad_norm": 0.18742354929519614, "learning_rate": 1.9097089309912026e-05, "loss": 0.8653, "step": 7804 }, { "epoch": 3.538077969174977, "grad_norm": 0.16614957055171103, "learning_rate": 1.9086297548495242e-05, "loss": 0.8498, "step": 7805 }, { "epoch": 3.5385312783318223, "grad_norm": 0.16491015798900233, "learning_rate": 1.9075507881647223e-05, "loss": 0.8557, "step": 7806 }, { "epoch": 3.538984587488667, "grad_norm": 0.17184914463058032, "learning_rate": 1.906472031044857e-05, "loss": 0.8632, "step": 7807 }, { "epoch": 3.539437896645512, "grad_norm": 0.1913621709609223, "learning_rate": 1.9053934835979723e-05, "loss": 0.8908, "step": 7808 }, { "epoch": 3.5398912058023573, "grad_norm": 0.1793351045588777, "learning_rate": 1.9043151459320838e-05, "loss": 0.8668, "step": 7809 }, { "epoch": 3.540344514959202, "grad_norm": 0.20503664593257212, "learning_rate": 1.903237018155195e-05, "loss": 0.846, "step": 7810 }, { "epoch": 3.540797824116047, "grad_norm": 0.18397241359524244, "learning_rate": 1.902159100375279e-05, "loss": 0.8548, "step": 7811 }, { "epoch": 3.541251133272892, "grad_norm": 0.35481857699691377, "learning_rate": 1.9010813927002964e-05, "loss": 0.8619, "step": 7812 }, { "epoch": 3.541704442429737, "grad_norm": 0.23277592665610947, "learning_rate": 1.9000038952381798e-05, "loss": 0.8829, "step": 7813 }, { "epoch": 3.542157751586582, "grad_norm": 0.22595792699305425, "learning_rate": 1.898926608096847e-05, "loss": 0.8691, "step": 7814 }, { "epoch": 3.542611060743427, "grad_norm": 0.19235288126998773, "learning_rate": 1.897849531384187e-05, "loss": 0.8519, "step": 7815 }, { "epoch": 3.543064369900272, "grad_norm": 0.22056360746524736, "learning_rate": 1.8967726652080758e-05, "loss": 0.8649, "step": 7816 }, { "epoch": 3.543517679057117, "grad_norm": 0.20352119434995755, "learning_rate": 1.8956960096763616e-05, "loss": 0.8607, "step": 7817 }, { "epoch": 3.543970988213962, "grad_norm": 0.1813285344750851, "learning_rate": 1.8946195648968785e-05, "loss": 0.8494, "step": 7818 }, { "epoch": 3.544424297370807, "grad_norm": 0.1881611284109176, "learning_rate": 1.8935433309774316e-05, "loss": 0.861, "step": 7819 }, { "epoch": 3.5448776065276517, "grad_norm": 0.1710087740827164, "learning_rate": 1.8924673080258128e-05, "loss": 0.8588, "step": 7820 }, { "epoch": 3.545330915684497, "grad_norm": 0.204670984127969, "learning_rate": 1.8913914961497866e-05, "loss": 0.8671, "step": 7821 }, { "epoch": 3.545784224841342, "grad_norm": 0.16775259044427956, "learning_rate": 1.890315895457098e-05, "loss": 0.8739, "step": 7822 }, { "epoch": 3.5462375339981866, "grad_norm": 0.19719445384059173, "learning_rate": 1.889240506055474e-05, "loss": 0.8813, "step": 7823 }, { "epoch": 3.546690843155032, "grad_norm": 0.20016397556569282, "learning_rate": 1.888165328052616e-05, "loss": 0.8756, "step": 7824 }, { "epoch": 3.5471441523118767, "grad_norm": 0.2186886041600053, "learning_rate": 1.8870903615562076e-05, "loss": 0.8708, "step": 7825 }, { "epoch": 3.5475974614687216, "grad_norm": 0.13857443903065478, "learning_rate": 1.8860156066739077e-05, "loss": 0.8573, "step": 7826 }, { "epoch": 3.548050770625567, "grad_norm": 0.22427527495128696, "learning_rate": 1.8849410635133595e-05, "loss": 0.8684, "step": 7827 }, { "epoch": 3.5485040797824117, "grad_norm": 0.19900729309861628, "learning_rate": 1.8838667321821786e-05, "loss": 0.8919, "step": 7828 }, { "epoch": 3.5489573889392565, "grad_norm": 0.1817579074357655, "learning_rate": 1.8827926127879645e-05, "loss": 0.8653, "step": 7829 }, { "epoch": 3.5494106980961018, "grad_norm": 0.21279979844047647, "learning_rate": 1.881718705438291e-05, "loss": 0.8738, "step": 7830 }, { "epoch": 3.5498640072529466, "grad_norm": 0.17565335582874478, "learning_rate": 1.8806450102407168e-05, "loss": 0.8516, "step": 7831 }, { "epoch": 3.5503173164097914, "grad_norm": 0.1948419575864782, "learning_rate": 1.8795715273027736e-05, "loss": 0.8778, "step": 7832 }, { "epoch": 3.5507706255666367, "grad_norm": 0.1718527906798051, "learning_rate": 1.8784982567319718e-05, "loss": 0.8706, "step": 7833 }, { "epoch": 3.5512239347234815, "grad_norm": 0.1967726756045344, "learning_rate": 1.877425198635806e-05, "loss": 0.8546, "step": 7834 }, { "epoch": 3.5516772438803264, "grad_norm": 0.16891512547988854, "learning_rate": 1.8763523531217452e-05, "loss": 0.8531, "step": 7835 }, { "epoch": 3.5521305530371716, "grad_norm": 0.1978340800109562, "learning_rate": 1.875279720297237e-05, "loss": 0.8725, "step": 7836 }, { "epoch": 3.5525838621940165, "grad_norm": 0.2260739469549671, "learning_rate": 1.8742073002697076e-05, "loss": 0.8836, "step": 7837 }, { "epoch": 3.5530371713508613, "grad_norm": 0.15530606596903093, "learning_rate": 1.8731350931465667e-05, "loss": 0.8684, "step": 7838 }, { "epoch": 3.553490480507706, "grad_norm": 0.2315872997034847, "learning_rate": 1.8720630990351956e-05, "loss": 0.8823, "step": 7839 }, { "epoch": 3.5539437896645514, "grad_norm": 0.16882545257778944, "learning_rate": 1.8709913180429596e-05, "loss": 0.8713, "step": 7840 }, { "epoch": 3.554397098821396, "grad_norm": 0.21738008063922284, "learning_rate": 1.8699197502771995e-05, "loss": 0.8875, "step": 7841 }, { "epoch": 3.554850407978241, "grad_norm": 0.2502658402044779, "learning_rate": 1.868848395845238e-05, "loss": 0.8559, "step": 7842 }, { "epoch": 3.555303717135086, "grad_norm": 0.15354621655936504, "learning_rate": 1.8677772548543713e-05, "loss": 0.886, "step": 7843 }, { "epoch": 3.555757026291931, "grad_norm": 0.21497518689203812, "learning_rate": 1.8667063274118806e-05, "loss": 0.8646, "step": 7844 }, { "epoch": 3.556210335448776, "grad_norm": 0.1900848494436193, "learning_rate": 1.8656356136250192e-05, "loss": 0.8697, "step": 7845 }, { "epoch": 3.556663644605621, "grad_norm": 0.1625172078751201, "learning_rate": 1.8645651136010255e-05, "loss": 0.8651, "step": 7846 }, { "epoch": 3.557116953762466, "grad_norm": 0.2237986096859722, "learning_rate": 1.8634948274471118e-05, "loss": 0.8848, "step": 7847 }, { "epoch": 3.557570262919311, "grad_norm": 0.19351361642288545, "learning_rate": 1.862424755270468e-05, "loss": 0.8507, "step": 7848 }, { "epoch": 3.5580235720761557, "grad_norm": 0.19082189282626652, "learning_rate": 1.861354897178269e-05, "loss": 0.8622, "step": 7849 }, { "epoch": 3.558476881233001, "grad_norm": 0.21961190517318144, "learning_rate": 1.8602852532776615e-05, "loss": 0.8639, "step": 7850 }, { "epoch": 3.558930190389846, "grad_norm": 0.1626989413174897, "learning_rate": 1.859215823675775e-05, "loss": 0.8654, "step": 7851 }, { "epoch": 3.5593834995466906, "grad_norm": 0.20011460701008651, "learning_rate": 1.8581466084797148e-05, "loss": 0.871, "step": 7852 }, { "epoch": 3.559836808703536, "grad_norm": 0.15676117689917068, "learning_rate": 1.8570776077965693e-05, "loss": 0.8741, "step": 7853 }, { "epoch": 3.5602901178603807, "grad_norm": 0.18918601847364747, "learning_rate": 1.856008821733396e-05, "loss": 0.8801, "step": 7854 }, { "epoch": 3.5607434270172256, "grad_norm": 0.19612350711599222, "learning_rate": 1.8549402503972418e-05, "loss": 0.8844, "step": 7855 }, { "epoch": 3.561196736174071, "grad_norm": 0.19433391032674002, "learning_rate": 1.853871893895124e-05, "loss": 0.8726, "step": 7856 }, { "epoch": 3.5616500453309157, "grad_norm": 0.17501047354988672, "learning_rate": 1.8528037523340452e-05, "loss": 0.8691, "step": 7857 }, { "epoch": 3.5621033544877605, "grad_norm": 0.1904622005124782, "learning_rate": 1.85173582582098e-05, "loss": 0.867, "step": 7858 }, { "epoch": 3.5625566636446058, "grad_norm": 0.19484877540049433, "learning_rate": 1.850668114462886e-05, "loss": 0.885, "step": 7859 }, { "epoch": 3.5630099728014506, "grad_norm": 0.15934863604075908, "learning_rate": 1.8496006183666962e-05, "loss": 0.8943, "step": 7860 }, { "epoch": 3.5634632819582954, "grad_norm": 0.18651001862345548, "learning_rate": 1.848533337639326e-05, "loss": 0.8788, "step": 7861 }, { "epoch": 3.5639165911151407, "grad_norm": 0.2144312323771961, "learning_rate": 1.8474662723876654e-05, "loss": 0.8703, "step": 7862 }, { "epoch": 3.5643699002719855, "grad_norm": 0.183576947861269, "learning_rate": 1.8463994227185815e-05, "loss": 0.8764, "step": 7863 }, { "epoch": 3.5648232094288304, "grad_norm": 0.18940004774130778, "learning_rate": 1.845332788738927e-05, "loss": 0.8823, "step": 7864 }, { "epoch": 3.5652765185856756, "grad_norm": 0.24358915965285427, "learning_rate": 1.844266370555524e-05, "loss": 0.8596, "step": 7865 }, { "epoch": 3.5657298277425205, "grad_norm": 0.26859786567847255, "learning_rate": 1.843200168275181e-05, "loss": 0.8721, "step": 7866 }, { "epoch": 3.5661831368993653, "grad_norm": 0.1598314051607467, "learning_rate": 1.8421341820046787e-05, "loss": 0.8661, "step": 7867 }, { "epoch": 3.5666364460562106, "grad_norm": 0.19320039553185558, "learning_rate": 1.8410684118507817e-05, "loss": 0.8773, "step": 7868 }, { "epoch": 3.5670897552130554, "grad_norm": 0.2273823776408868, "learning_rate": 1.840002857920226e-05, "loss": 0.8743, "step": 7869 }, { "epoch": 3.5675430643699, "grad_norm": 0.1740356395338686, "learning_rate": 1.8389375203197345e-05, "loss": 0.8793, "step": 7870 }, { "epoch": 3.5679963735267455, "grad_norm": 0.16895235905920655, "learning_rate": 1.8378723991559994e-05, "loss": 0.8673, "step": 7871 }, { "epoch": 3.5684496826835903, "grad_norm": 0.23787733174686743, "learning_rate": 1.8368074945357013e-05, "loss": 0.8811, "step": 7872 }, { "epoch": 3.568902991840435, "grad_norm": 0.16201609939485964, "learning_rate": 1.8357428065654874e-05, "loss": 0.8695, "step": 7873 }, { "epoch": 3.5693563009972804, "grad_norm": 0.1704870283174212, "learning_rate": 1.834678335351993e-05, "loss": 0.8654, "step": 7874 }, { "epoch": 3.5698096101541252, "grad_norm": 0.1994890398511594, "learning_rate": 1.8336140810018256e-05, "loss": 0.8589, "step": 7875 }, { "epoch": 3.57026291931097, "grad_norm": 0.17637753759789845, "learning_rate": 1.8325500436215767e-05, "loss": 0.8642, "step": 7876 }, { "epoch": 3.570716228467815, "grad_norm": 0.15163223820018826, "learning_rate": 1.8314862233178093e-05, "loss": 0.8603, "step": 7877 }, { "epoch": 3.57116953762466, "grad_norm": 0.18104926922954168, "learning_rate": 1.830422620197071e-05, "loss": 0.8823, "step": 7878 }, { "epoch": 3.571622846781505, "grad_norm": 0.15971587493097156, "learning_rate": 1.829359234365884e-05, "loss": 0.855, "step": 7879 }, { "epoch": 3.57207615593835, "grad_norm": 0.18282674603847768, "learning_rate": 1.8282960659307476e-05, "loss": 0.8563, "step": 7880 }, { "epoch": 3.572529465095195, "grad_norm": 0.18817726587611433, "learning_rate": 1.8272331149981437e-05, "loss": 0.8597, "step": 7881 }, { "epoch": 3.57298277425204, "grad_norm": 0.14523669984763996, "learning_rate": 1.826170381674528e-05, "loss": 0.8684, "step": 7882 }, { "epoch": 3.5734360834088847, "grad_norm": 0.17609776345106565, "learning_rate": 1.8251078660663383e-05, "loss": 0.8743, "step": 7883 }, { "epoch": 3.5738893925657296, "grad_norm": 0.18048080085025123, "learning_rate": 1.824045568279987e-05, "loss": 0.8744, "step": 7884 }, { "epoch": 3.574342701722575, "grad_norm": 0.17266207206803902, "learning_rate": 1.822983488421868e-05, "loss": 0.8596, "step": 7885 }, { "epoch": 3.5747960108794197, "grad_norm": 0.1426709225459361, "learning_rate": 1.8219216265983494e-05, "loss": 0.8674, "step": 7886 }, { "epoch": 3.5752493200362645, "grad_norm": 0.18030543477947478, "learning_rate": 1.8208599829157826e-05, "loss": 0.8761, "step": 7887 }, { "epoch": 3.5757026291931098, "grad_norm": 0.15742200264915723, "learning_rate": 1.8197985574804927e-05, "loss": 0.8697, "step": 7888 }, { "epoch": 3.5761559383499546, "grad_norm": 0.16684499397492691, "learning_rate": 1.8187373503987837e-05, "loss": 0.8644, "step": 7889 }, { "epoch": 3.5766092475067994, "grad_norm": 0.18158353307330655, "learning_rate": 1.8176763617769406e-05, "loss": 0.8597, "step": 7890 }, { "epoch": 3.5770625566636447, "grad_norm": 0.1836262296386174, "learning_rate": 1.8166155917212242e-05, "loss": 0.8697, "step": 7891 }, { "epoch": 3.5775158658204895, "grad_norm": 0.17441594585558243, "learning_rate": 1.8155550403378712e-05, "loss": 0.8466, "step": 7892 }, { "epoch": 3.5779691749773344, "grad_norm": 0.20936430190464006, "learning_rate": 1.8144947077331027e-05, "loss": 0.8557, "step": 7893 }, { "epoch": 3.5784224841341796, "grad_norm": 0.1476812345752682, "learning_rate": 1.813434594013112e-05, "loss": 0.8453, "step": 7894 }, { "epoch": 3.5788757932910245, "grad_norm": 0.19119905703819437, "learning_rate": 1.8123746992840714e-05, "loss": 0.8771, "step": 7895 }, { "epoch": 3.5793291024478693, "grad_norm": 0.16677136589486224, "learning_rate": 1.8113150236521358e-05, "loss": 0.8646, "step": 7896 }, { "epoch": 3.5797824116047146, "grad_norm": 0.16404769025653995, "learning_rate": 1.810255567223431e-05, "loss": 0.8506, "step": 7897 }, { "epoch": 3.5802357207615594, "grad_norm": 0.16754799816391774, "learning_rate": 1.8091963301040688e-05, "loss": 0.876, "step": 7898 }, { "epoch": 3.580689029918404, "grad_norm": 0.16348319533841688, "learning_rate": 1.8081373124001313e-05, "loss": 0.8651, "step": 7899 }, { "epoch": 3.5811423390752495, "grad_norm": 0.13835896013158888, "learning_rate": 1.8070785142176847e-05, "loss": 0.8612, "step": 7900 }, { "epoch": 3.5815956482320943, "grad_norm": 0.16905024767742888, "learning_rate": 1.8060199356627682e-05, "loss": 0.8863, "step": 7901 }, { "epoch": 3.582048957388939, "grad_norm": 0.13502962639350827, "learning_rate": 1.804961576841405e-05, "loss": 0.8744, "step": 7902 }, { "epoch": 3.5825022665457844, "grad_norm": 0.18221246582298622, "learning_rate": 1.8039034378595898e-05, "loss": 0.8699, "step": 7903 }, { "epoch": 3.5829555757026292, "grad_norm": 0.17072729061530495, "learning_rate": 1.8028455188233003e-05, "loss": 0.8788, "step": 7904 }, { "epoch": 3.583408884859474, "grad_norm": 0.17107369266365866, "learning_rate": 1.8017878198384897e-05, "loss": 0.8883, "step": 7905 }, { "epoch": 3.5838621940163193, "grad_norm": 0.16033148077739845, "learning_rate": 1.800730341011088e-05, "loss": 0.8842, "step": 7906 }, { "epoch": 3.584315503173164, "grad_norm": 0.16958901561495893, "learning_rate": 1.799673082447008e-05, "loss": 0.8519, "step": 7907 }, { "epoch": 3.584768812330009, "grad_norm": 0.175229286106887, "learning_rate": 1.7986160442521333e-05, "loss": 0.8684, "step": 7908 }, { "epoch": 3.5852221214868543, "grad_norm": 0.20163858939342325, "learning_rate": 1.7975592265323332e-05, "loss": 0.886, "step": 7909 }, { "epoch": 3.585675430643699, "grad_norm": 0.15731842693487647, "learning_rate": 1.7965026293934493e-05, "loss": 0.8472, "step": 7910 }, { "epoch": 3.586128739800544, "grad_norm": 0.21916975324628035, "learning_rate": 1.7954462529413032e-05, "loss": 0.8653, "step": 7911 }, { "epoch": 3.586582048957389, "grad_norm": 0.15007466328742503, "learning_rate": 1.7943900972816926e-05, "loss": 0.8514, "step": 7912 }, { "epoch": 3.587035358114234, "grad_norm": 0.1894149684184324, "learning_rate": 1.7933341625203972e-05, "loss": 0.8467, "step": 7913 }, { "epoch": 3.587488667271079, "grad_norm": 0.15828874899348394, "learning_rate": 1.792278448763169e-05, "loss": 0.87, "step": 7914 }, { "epoch": 3.587941976427924, "grad_norm": 0.18252985010726203, "learning_rate": 1.7912229561157444e-05, "loss": 0.8689, "step": 7915 }, { "epoch": 3.588395285584769, "grad_norm": 0.19983547194998527, "learning_rate": 1.7901676846838305e-05, "loss": 0.8541, "step": 7916 }, { "epoch": 3.5888485947416138, "grad_norm": 0.1901724872570542, "learning_rate": 1.7891126345731195e-05, "loss": 0.8551, "step": 7917 }, { "epoch": 3.5893019038984586, "grad_norm": 0.17059496588677145, "learning_rate": 1.788057805889274e-05, "loss": 0.8413, "step": 7918 }, { "epoch": 3.589755213055304, "grad_norm": 0.17159371790761535, "learning_rate": 1.7870031987379413e-05, "loss": 0.8508, "step": 7919 }, { "epoch": 3.5902085222121487, "grad_norm": 0.18287314245267283, "learning_rate": 1.785948813224742e-05, "loss": 0.8562, "step": 7920 }, { "epoch": 3.5906618313689935, "grad_norm": 0.1633411851528427, "learning_rate": 1.784894649455275e-05, "loss": 0.8579, "step": 7921 }, { "epoch": 3.5911151405258384, "grad_norm": 0.19917288388300186, "learning_rate": 1.7838407075351205e-05, "loss": 0.8846, "step": 7922 }, { "epoch": 3.5915684496826836, "grad_norm": 0.16749809134904045, "learning_rate": 1.7827869875698302e-05, "loss": 0.8696, "step": 7923 }, { "epoch": 3.5920217588395285, "grad_norm": 0.20971190234556583, "learning_rate": 1.7817334896649416e-05, "loss": 0.8652, "step": 7924 }, { "epoch": 3.5924750679963733, "grad_norm": 0.19184163827901518, "learning_rate": 1.7806802139259612e-05, "loss": 0.8643, "step": 7925 }, { "epoch": 3.5929283771532186, "grad_norm": 0.16712209823309648, "learning_rate": 1.779627160458382e-05, "loss": 0.8927, "step": 7926 }, { "epoch": 3.5933816863100634, "grad_norm": 0.1953734286388986, "learning_rate": 1.778574329367666e-05, "loss": 0.8593, "step": 7927 }, { "epoch": 3.593834995466908, "grad_norm": 0.17195240517343016, "learning_rate": 1.777521720759262e-05, "loss": 0.8721, "step": 7928 }, { "epoch": 3.5942883046237535, "grad_norm": 0.22102011859468626, "learning_rate": 1.7764693347385868e-05, "loss": 0.8671, "step": 7929 }, { "epoch": 3.5947416137805983, "grad_norm": 0.18394907746250788, "learning_rate": 1.7754171714110463e-05, "loss": 0.8533, "step": 7930 }, { "epoch": 3.595194922937443, "grad_norm": 0.1937066318444046, "learning_rate": 1.774365230882011e-05, "loss": 0.868, "step": 7931 }, { "epoch": 3.5956482320942884, "grad_norm": 0.21559139040056582, "learning_rate": 1.7733135132568402e-05, "loss": 0.8474, "step": 7932 }, { "epoch": 3.5961015412511332, "grad_norm": 0.17683996671416838, "learning_rate": 1.7722620186408638e-05, "loss": 0.8461, "step": 7933 }, { "epoch": 3.596554850407978, "grad_norm": 0.2136419036899005, "learning_rate": 1.7712107471393946e-05, "loss": 0.8577, "step": 7934 }, { "epoch": 3.5970081595648233, "grad_norm": 0.18396219674178255, "learning_rate": 1.7701596988577193e-05, "loss": 0.8662, "step": 7935 }, { "epoch": 3.597461468721668, "grad_norm": 0.17749740369851175, "learning_rate": 1.7691088739011023e-05, "loss": 0.8824, "step": 7936 }, { "epoch": 3.597914777878513, "grad_norm": 0.1875961549592077, "learning_rate": 1.7680582723747894e-05, "loss": 0.8499, "step": 7937 }, { "epoch": 3.5983680870353583, "grad_norm": 0.19578353107418553, "learning_rate": 1.767007894383999e-05, "loss": 0.8792, "step": 7938 }, { "epoch": 3.598821396192203, "grad_norm": 0.1593338664157306, "learning_rate": 1.7659577400339318e-05, "loss": 0.876, "step": 7939 }, { "epoch": 3.599274705349048, "grad_norm": 0.20879796731032485, "learning_rate": 1.764907809429761e-05, "loss": 0.8597, "step": 7940 }, { "epoch": 3.599728014505893, "grad_norm": 0.1993883476256691, "learning_rate": 1.7638581026766436e-05, "loss": 0.8996, "step": 7941 }, { "epoch": 3.600181323662738, "grad_norm": 0.1944162508950195, "learning_rate": 1.7628086198797077e-05, "loss": 0.8567, "step": 7942 }, { "epoch": 3.600634632819583, "grad_norm": 0.20424297759713939, "learning_rate": 1.761759361144065e-05, "loss": 0.8664, "step": 7943 }, { "epoch": 3.601087941976428, "grad_norm": 0.18158389757262183, "learning_rate": 1.7607103265747986e-05, "loss": 0.8764, "step": 7944 }, { "epoch": 3.601541251133273, "grad_norm": 0.17520798809260596, "learning_rate": 1.759661516276976e-05, "loss": 0.849, "step": 7945 }, { "epoch": 3.6019945602901178, "grad_norm": 0.2196086294665604, "learning_rate": 1.7586129303556364e-05, "loss": 0.8857, "step": 7946 }, { "epoch": 3.602447869446963, "grad_norm": 0.1677622141316975, "learning_rate": 1.7575645689157978e-05, "loss": 0.8883, "step": 7947 }, { "epoch": 3.602901178603808, "grad_norm": 0.21179810707922275, "learning_rate": 1.7565164320624597e-05, "loss": 0.8674, "step": 7948 }, { "epoch": 3.6033544877606527, "grad_norm": 0.19299393075470642, "learning_rate": 1.7554685199005945e-05, "loss": 0.8632, "step": 7949 }, { "epoch": 3.603807796917498, "grad_norm": 0.19627017122572196, "learning_rate": 1.754420832535153e-05, "loss": 0.8696, "step": 7950 }, { "epoch": 3.604261106074343, "grad_norm": 0.18566774460353244, "learning_rate": 1.7533733700710636e-05, "loss": 0.8649, "step": 7951 }, { "epoch": 3.6047144152311876, "grad_norm": 0.18907024726033297, "learning_rate": 1.7523261326132352e-05, "loss": 0.8752, "step": 7952 }, { "epoch": 3.605167724388033, "grad_norm": 0.19857504121188743, "learning_rate": 1.7512791202665492e-05, "loss": 0.8735, "step": 7953 }, { "epoch": 3.6056210335448777, "grad_norm": 0.19615994676676593, "learning_rate": 1.7502323331358696e-05, "loss": 0.8888, "step": 7954 }, { "epoch": 3.6060743427017226, "grad_norm": 0.2102305565008125, "learning_rate": 1.7491857713260322e-05, "loss": 0.8734, "step": 7955 }, { "epoch": 3.6065276518585674, "grad_norm": 0.1813678975995554, "learning_rate": 1.7481394349418565e-05, "loss": 0.8785, "step": 7956 }, { "epoch": 3.6069809610154127, "grad_norm": 0.1918944805072537, "learning_rate": 1.747093324088133e-05, "loss": 0.8612, "step": 7957 }, { "epoch": 3.6074342701722575, "grad_norm": 0.23361003978383568, "learning_rate": 1.7460474388696354e-05, "loss": 0.8741, "step": 7958 }, { "epoch": 3.6078875793291023, "grad_norm": 0.16262407696269768, "learning_rate": 1.7450017793911097e-05, "loss": 0.8609, "step": 7959 }, { "epoch": 3.608340888485947, "grad_norm": 0.20190662123390982, "learning_rate": 1.7439563457572846e-05, "loss": 0.8708, "step": 7960 }, { "epoch": 3.6087941976427924, "grad_norm": 0.1672739121136691, "learning_rate": 1.742911138072862e-05, "loss": 0.8764, "step": 7961 }, { "epoch": 3.6092475067996372, "grad_norm": 0.19697890738035612, "learning_rate": 1.741866156442521e-05, "loss": 0.8743, "step": 7962 }, { "epoch": 3.609700815956482, "grad_norm": 0.1640735435018243, "learning_rate": 1.7408214009709222e-05, "loss": 0.8643, "step": 7963 }, { "epoch": 3.6101541251133273, "grad_norm": 0.1673464444974834, "learning_rate": 1.739776871762698e-05, "loss": 0.8623, "step": 7964 }, { "epoch": 3.610607434270172, "grad_norm": 0.183649335449492, "learning_rate": 1.7387325689224645e-05, "loss": 0.8778, "step": 7965 }, { "epoch": 3.611060743427017, "grad_norm": 0.19727896062094838, "learning_rate": 1.737688492554809e-05, "loss": 0.8815, "step": 7966 }, { "epoch": 3.6115140525838623, "grad_norm": 0.1837464439044281, "learning_rate": 1.7366446427643008e-05, "loss": 0.8657, "step": 7967 }, { "epoch": 3.611967361740707, "grad_norm": 0.22327064056289678, "learning_rate": 1.735601019655483e-05, "loss": 0.8671, "step": 7968 }, { "epoch": 3.612420670897552, "grad_norm": 0.17289093229103084, "learning_rate": 1.7345576233328788e-05, "loss": 0.8654, "step": 7969 }, { "epoch": 3.612873980054397, "grad_norm": 0.1586114628918905, "learning_rate": 1.733514453900985e-05, "loss": 0.8731, "step": 7970 }, { "epoch": 3.613327289211242, "grad_norm": 0.19248983822980886, "learning_rate": 1.7324715114642807e-05, "loss": 0.8698, "step": 7971 }, { "epoch": 3.613780598368087, "grad_norm": 0.5164650168306827, "learning_rate": 1.7314287961272178e-05, "loss": 0.8766, "step": 7972 }, { "epoch": 3.614233907524932, "grad_norm": 0.17930781160772497, "learning_rate": 1.7303863079942296e-05, "loss": 0.8745, "step": 7973 }, { "epoch": 3.614687216681777, "grad_norm": 0.1706410101110211, "learning_rate": 1.729344047169721e-05, "loss": 0.8697, "step": 7974 }, { "epoch": 3.6151405258386218, "grad_norm": 0.19594512672439762, "learning_rate": 1.7283020137580812e-05, "loss": 0.8602, "step": 7975 }, { "epoch": 3.615593834995467, "grad_norm": 0.1800660864470222, "learning_rate": 1.7272602078636713e-05, "loss": 0.8684, "step": 7976 }, { "epoch": 3.616047144152312, "grad_norm": 0.22396224199237494, "learning_rate": 1.726218629590829e-05, "loss": 0.8643, "step": 7977 }, { "epoch": 3.6165004533091567, "grad_norm": 0.16952946546914363, "learning_rate": 1.7251772790438752e-05, "loss": 0.8607, "step": 7978 }, { "epoch": 3.616953762466002, "grad_norm": 0.17694721390806475, "learning_rate": 1.7241361563271012e-05, "loss": 0.859, "step": 7979 }, { "epoch": 3.617407071622847, "grad_norm": 0.1729949068743132, "learning_rate": 1.7230952615447814e-05, "loss": 0.8585, "step": 7980 }, { "epoch": 3.6178603807796916, "grad_norm": 0.1557944849186986, "learning_rate": 1.722054594801161e-05, "loss": 0.8718, "step": 7981 }, { "epoch": 3.618313689936537, "grad_norm": 0.17909252107312784, "learning_rate": 1.7210141562004696e-05, "loss": 0.8676, "step": 7982 }, { "epoch": 3.6187669990933817, "grad_norm": 0.1678038917324217, "learning_rate": 1.7199739458469062e-05, "loss": 0.8768, "step": 7983 }, { "epoch": 3.6192203082502266, "grad_norm": 0.16479737384984816, "learning_rate": 1.7189339638446546e-05, "loss": 0.854, "step": 7984 }, { "epoch": 3.619673617407072, "grad_norm": 0.19589664115599273, "learning_rate": 1.7178942102978692e-05, "loss": 0.8523, "step": 7985 }, { "epoch": 3.6201269265639167, "grad_norm": 0.14796775680405558, "learning_rate": 1.716854685310687e-05, "loss": 0.8624, "step": 7986 }, { "epoch": 3.6205802357207615, "grad_norm": 0.19549750342700647, "learning_rate": 1.7158153889872183e-05, "loss": 0.8686, "step": 7987 }, { "epoch": 3.6210335448776068, "grad_norm": 0.15909673729378518, "learning_rate": 1.7147763214315515e-05, "loss": 0.8874, "step": 7988 }, { "epoch": 3.6214868540344516, "grad_norm": 0.15368991472633173, "learning_rate": 1.7137374827477503e-05, "loss": 0.8751, "step": 7989 }, { "epoch": 3.6219401631912964, "grad_norm": 0.17542092383600458, "learning_rate": 1.7126988730398612e-05, "loss": 0.8692, "step": 7990 }, { "epoch": 3.6223934723481417, "grad_norm": 0.18073259874031872, "learning_rate": 1.7116604924119023e-05, "loss": 0.8743, "step": 7991 }, { "epoch": 3.6228467815049865, "grad_norm": 0.19267791946988083, "learning_rate": 1.7106223409678685e-05, "loss": 0.8682, "step": 7992 }, { "epoch": 3.6233000906618313, "grad_norm": 0.16009694184459122, "learning_rate": 1.7095844188117375e-05, "loss": 0.8523, "step": 7993 }, { "epoch": 3.6237533998186766, "grad_norm": 0.2313318108118491, "learning_rate": 1.7085467260474572e-05, "loss": 0.8823, "step": 7994 }, { "epoch": 3.6242067089755214, "grad_norm": 0.17732653946402369, "learning_rate": 1.7075092627789572e-05, "loss": 0.8583, "step": 7995 }, { "epoch": 3.6246600181323663, "grad_norm": 0.20125598920311585, "learning_rate": 1.7064720291101413e-05, "loss": 0.8687, "step": 7996 }, { "epoch": 3.625113327289211, "grad_norm": 0.22273594383176484, "learning_rate": 1.7054350251448937e-05, "loss": 0.8775, "step": 7997 }, { "epoch": 3.6255666364460564, "grad_norm": 0.17504609334601043, "learning_rate": 1.7043982509870702e-05, "loss": 0.8639, "step": 7998 }, { "epoch": 3.626019945602901, "grad_norm": 0.20427926446637135, "learning_rate": 1.7033617067405105e-05, "loss": 0.8876, "step": 7999 }, { "epoch": 3.626473254759746, "grad_norm": 0.21706333999612942, "learning_rate": 1.7023253925090237e-05, "loss": 0.8765, "step": 8000 }, { "epoch": 3.626926563916591, "grad_norm": 0.18551439353722377, "learning_rate": 1.7012893083964027e-05, "loss": 0.8622, "step": 8001 }, { "epoch": 3.627379873073436, "grad_norm": 0.21529365255768626, "learning_rate": 1.700253454506414e-05, "loss": 0.8808, "step": 8002 }, { "epoch": 3.627833182230281, "grad_norm": 0.2015109702648719, "learning_rate": 1.6992178309427986e-05, "loss": 0.8625, "step": 8003 }, { "epoch": 3.628286491387126, "grad_norm": 0.1902064699195901, "learning_rate": 1.698182437809281e-05, "loss": 0.8618, "step": 8004 }, { "epoch": 3.628739800543971, "grad_norm": 0.17934481837581512, "learning_rate": 1.697147275209556e-05, "loss": 0.8514, "step": 8005 }, { "epoch": 3.629193109700816, "grad_norm": 0.1837208500110439, "learning_rate": 1.6961123432473016e-05, "loss": 0.8691, "step": 8006 }, { "epoch": 3.6296464188576607, "grad_norm": 0.17169099245220992, "learning_rate": 1.6950776420261646e-05, "loss": 0.8787, "step": 8007 }, { "epoch": 3.630099728014506, "grad_norm": 0.21606019085563788, "learning_rate": 1.694043171649777e-05, "loss": 0.8676, "step": 8008 }, { "epoch": 3.630553037171351, "grad_norm": 0.18420676176140038, "learning_rate": 1.6930089322217417e-05, "loss": 0.854, "step": 8009 }, { "epoch": 3.6310063463281956, "grad_norm": 0.17207363152895538, "learning_rate": 1.6919749238456435e-05, "loss": 0.8596, "step": 8010 }, { "epoch": 3.631459655485041, "grad_norm": 0.22341014116517013, "learning_rate": 1.6909411466250375e-05, "loss": 0.8704, "step": 8011 }, { "epoch": 3.6319129646418857, "grad_norm": 0.19015100270616217, "learning_rate": 1.6899076006634643e-05, "loss": 0.8843, "step": 8012 }, { "epoch": 3.6323662737987306, "grad_norm": 0.15182482374378478, "learning_rate": 1.688874286064432e-05, "loss": 0.8691, "step": 8013 }, { "epoch": 3.632819582955576, "grad_norm": 0.20497936061045224, "learning_rate": 1.687841202931434e-05, "loss": 0.8786, "step": 8014 }, { "epoch": 3.6332728921124207, "grad_norm": 0.20929298273402383, "learning_rate": 1.6868083513679335e-05, "loss": 0.8638, "step": 8015 }, { "epoch": 3.6337262012692655, "grad_norm": 0.17625376228593043, "learning_rate": 1.6857757314773763e-05, "loss": 0.8757, "step": 8016 }, { "epoch": 3.6341795104261108, "grad_norm": 0.1854350280874724, "learning_rate": 1.6847433433631806e-05, "loss": 0.8548, "step": 8017 }, { "epoch": 3.6346328195829556, "grad_norm": 0.20536086861127179, "learning_rate": 1.6837111871287426e-05, "loss": 0.865, "step": 8018 }, { "epoch": 3.6350861287398004, "grad_norm": 0.17027510603798812, "learning_rate": 1.682679262877438e-05, "loss": 0.8961, "step": 8019 }, { "epoch": 3.6355394378966457, "grad_norm": 0.1737690421785, "learning_rate": 1.681647570712614e-05, "loss": 0.8504, "step": 8020 }, { "epoch": 3.6359927470534905, "grad_norm": 0.1807629577177738, "learning_rate": 1.680616110737601e-05, "loss": 0.8471, "step": 8021 }, { "epoch": 3.6364460562103353, "grad_norm": 0.1487665386927012, "learning_rate": 1.6795848830556995e-05, "loss": 0.8658, "step": 8022 }, { "epoch": 3.6368993653671806, "grad_norm": 0.1696103054879212, "learning_rate": 1.678553887770193e-05, "loss": 0.8383, "step": 8023 }, { "epoch": 3.6373526745240254, "grad_norm": 0.19549690671085967, "learning_rate": 1.677523124984336e-05, "loss": 0.8641, "step": 8024 }, { "epoch": 3.6378059836808703, "grad_norm": 0.1560645196606542, "learning_rate": 1.6764925948013645e-05, "loss": 0.8721, "step": 8025 }, { "epoch": 3.6382592928377155, "grad_norm": 0.18173588659502452, "learning_rate": 1.675462297324489e-05, "loss": 0.8741, "step": 8026 }, { "epoch": 3.6387126019945604, "grad_norm": 0.15821608225535538, "learning_rate": 1.6744322326568957e-05, "loss": 0.8585, "step": 8027 }, { "epoch": 3.639165911151405, "grad_norm": 0.16338467458098535, "learning_rate": 1.6734024009017474e-05, "loss": 0.8734, "step": 8028 }, { "epoch": 3.6396192203082505, "grad_norm": 0.1573719846851183, "learning_rate": 1.6723728021621882e-05, "loss": 0.8679, "step": 8029 }, { "epoch": 3.6400725294650953, "grad_norm": 0.1475513356182183, "learning_rate": 1.6713434365413318e-05, "loss": 0.8584, "step": 8030 }, { "epoch": 3.64052583862194, "grad_norm": 0.16956007923081948, "learning_rate": 1.6703143041422757e-05, "loss": 0.8794, "step": 8031 }, { "epoch": 3.6409791477787854, "grad_norm": 0.14924530485532783, "learning_rate": 1.669285405068089e-05, "loss": 0.8557, "step": 8032 }, { "epoch": 3.6414324569356302, "grad_norm": 0.1852030850705823, "learning_rate": 1.668256739421817e-05, "loss": 0.8725, "step": 8033 }, { "epoch": 3.641885766092475, "grad_norm": 0.15521596890674375, "learning_rate": 1.6672283073064875e-05, "loss": 0.8789, "step": 8034 }, { "epoch": 3.64233907524932, "grad_norm": 0.19666635092283163, "learning_rate": 1.666200108825097e-05, "loss": 0.8602, "step": 8035 }, { "epoch": 3.642792384406165, "grad_norm": 0.15479074598332795, "learning_rate": 1.6651721440806262e-05, "loss": 0.8759, "step": 8036 }, { "epoch": 3.64324569356301, "grad_norm": 0.17820410298877704, "learning_rate": 1.6641444131760254e-05, "loss": 0.8848, "step": 8037 }, { "epoch": 3.643699002719855, "grad_norm": 0.16691763321316133, "learning_rate": 1.6631169162142282e-05, "loss": 0.8608, "step": 8038 }, { "epoch": 3.6441523118766996, "grad_norm": 0.15255499344239304, "learning_rate": 1.6620896532981382e-05, "loss": 0.8586, "step": 8039 }, { "epoch": 3.644605621033545, "grad_norm": 0.1706855636755965, "learning_rate": 1.661062624530642e-05, "loss": 0.8362, "step": 8040 }, { "epoch": 3.6450589301903897, "grad_norm": 0.17820641549673705, "learning_rate": 1.6600358300145965e-05, "loss": 0.8491, "step": 8041 }, { "epoch": 3.6455122393472346, "grad_norm": 0.18377155785712132, "learning_rate": 1.6590092698528414e-05, "loss": 0.867, "step": 8042 }, { "epoch": 3.64596554850408, "grad_norm": 0.17611331850076625, "learning_rate": 1.6579829441481872e-05, "loss": 0.8594, "step": 8043 }, { "epoch": 3.6464188576609247, "grad_norm": 0.193553923494444, "learning_rate": 1.656956853003424e-05, "loss": 0.8763, "step": 8044 }, { "epoch": 3.6468721668177695, "grad_norm": 0.19182056191736171, "learning_rate": 1.655930996521318e-05, "loss": 0.8852, "step": 8045 }, { "epoch": 3.6473254759746148, "grad_norm": 0.18981125034091317, "learning_rate": 1.6549053748046128e-05, "loss": 0.8795, "step": 8046 }, { "epoch": 3.6477787851314596, "grad_norm": 0.1490247879091745, "learning_rate": 1.6538799879560245e-05, "loss": 0.8478, "step": 8047 }, { "epoch": 3.6482320942883044, "grad_norm": 0.20861856924705455, "learning_rate": 1.652854836078252e-05, "loss": 0.8492, "step": 8048 }, { "epoch": 3.6486854034451497, "grad_norm": 0.14277107590038352, "learning_rate": 1.6518299192739662e-05, "loss": 0.8591, "step": 8049 }, { "epoch": 3.6491387126019945, "grad_norm": 0.21159928016246818, "learning_rate": 1.6508052376458136e-05, "loss": 0.8512, "step": 8050 }, { "epoch": 3.6495920217588393, "grad_norm": 0.14490600201452603, "learning_rate": 1.6497807912964215e-05, "loss": 0.8694, "step": 8051 }, { "epoch": 3.6500453309156846, "grad_norm": 0.17874162874082256, "learning_rate": 1.6487565803283898e-05, "loss": 0.8664, "step": 8052 }, { "epoch": 3.6504986400725294, "grad_norm": 0.15950576476298217, "learning_rate": 1.6477326048442973e-05, "loss": 0.8582, "step": 8053 }, { "epoch": 3.6509519492293743, "grad_norm": 0.17450755358836267, "learning_rate": 1.6467088649466965e-05, "loss": 0.8768, "step": 8054 }, { "epoch": 3.6514052583862195, "grad_norm": 0.19597948035416068, "learning_rate": 1.6456853607381207e-05, "loss": 0.8534, "step": 8055 }, { "epoch": 3.6518585675430644, "grad_norm": 0.1408740456181833, "learning_rate": 1.6446620923210734e-05, "loss": 0.8762, "step": 8056 }, { "epoch": 3.652311876699909, "grad_norm": 0.1946124874794978, "learning_rate": 1.6436390597980416e-05, "loss": 0.8525, "step": 8057 }, { "epoch": 3.6527651858567545, "grad_norm": 0.15515136418464623, "learning_rate": 1.642616263271483e-05, "loss": 0.8534, "step": 8058 }, { "epoch": 3.6532184950135993, "grad_norm": 0.1731885757915372, "learning_rate": 1.6415937028438326e-05, "loss": 0.8688, "step": 8059 }, { "epoch": 3.653671804170444, "grad_norm": 0.18222284524472132, "learning_rate": 1.640571378617505e-05, "loss": 0.8591, "step": 8060 }, { "epoch": 3.6541251133272894, "grad_norm": 0.17076228235330962, "learning_rate": 1.639549290694887e-05, "loss": 0.8653, "step": 8061 }, { "epoch": 3.6545784224841342, "grad_norm": 0.16871696590352459, "learning_rate": 1.638527439178346e-05, "loss": 0.8707, "step": 8062 }, { "epoch": 3.655031731640979, "grad_norm": 0.17323270210576913, "learning_rate": 1.637505824170221e-05, "loss": 0.86, "step": 8063 }, { "epoch": 3.6554850407978243, "grad_norm": 0.18583903246272052, "learning_rate": 1.6364844457728338e-05, "loss": 0.8613, "step": 8064 }, { "epoch": 3.655938349954669, "grad_norm": 0.1634810063613239, "learning_rate": 1.635463304088473e-05, "loss": 0.8895, "step": 8065 }, { "epoch": 3.656391659111514, "grad_norm": 0.18913715897728295, "learning_rate": 1.634442399219412e-05, "loss": 0.8553, "step": 8066 }, { "epoch": 3.6568449682683593, "grad_norm": 0.17789236846700787, "learning_rate": 1.6334217312678967e-05, "loss": 0.865, "step": 8067 }, { "epoch": 3.657298277425204, "grad_norm": 0.1784081619202631, "learning_rate": 1.632401300336151e-05, "loss": 0.865, "step": 8068 }, { "epoch": 3.657751586582049, "grad_norm": 0.1896794187124639, "learning_rate": 1.631381106526372e-05, "loss": 0.8709, "step": 8069 }, { "epoch": 3.658204895738894, "grad_norm": 0.19810687382182157, "learning_rate": 1.6303611499407384e-05, "loss": 0.8587, "step": 8070 }, { "epoch": 3.658658204895739, "grad_norm": 0.20334526192838345, "learning_rate": 1.6293414306813976e-05, "loss": 0.85, "step": 8071 }, { "epoch": 3.659111514052584, "grad_norm": 0.1908530735729352, "learning_rate": 1.6283219488504814e-05, "loss": 0.8587, "step": 8072 }, { "epoch": 3.659564823209429, "grad_norm": 0.2979894290148619, "learning_rate": 1.627302704550091e-05, "loss": 0.8832, "step": 8073 }, { "epoch": 3.660018132366274, "grad_norm": 0.2104350016692748, "learning_rate": 1.6262836978823096e-05, "loss": 0.8749, "step": 8074 }, { "epoch": 3.6604714415231188, "grad_norm": 0.2243716814458314, "learning_rate": 1.625264928949192e-05, "loss": 0.8667, "step": 8075 }, { "epoch": 3.6609247506799636, "grad_norm": 0.25638496311495185, "learning_rate": 1.6242463978527695e-05, "loss": 0.8609, "step": 8076 }, { "epoch": 3.661378059836809, "grad_norm": 0.20812398059115253, "learning_rate": 1.6232281046950537e-05, "loss": 0.858, "step": 8077 }, { "epoch": 3.6618313689936537, "grad_norm": 0.16407496028079427, "learning_rate": 1.622210049578027e-05, "loss": 0.8816, "step": 8078 }, { "epoch": 3.6622846781504985, "grad_norm": 0.2462836073550724, "learning_rate": 1.6211922326036534e-05, "loss": 0.8678, "step": 8079 }, { "epoch": 3.6627379873073433, "grad_norm": 0.17322744409598959, "learning_rate": 1.6201746538738677e-05, "loss": 0.8631, "step": 8080 }, { "epoch": 3.6631912964641886, "grad_norm": 0.18369636557260974, "learning_rate": 1.619157313490586e-05, "loss": 0.8703, "step": 8081 }, { "epoch": 3.6636446056210334, "grad_norm": 0.20563199061464635, "learning_rate": 1.6181402115556954e-05, "loss": 0.8473, "step": 8082 }, { "epoch": 3.6640979147778783, "grad_norm": 0.16348257817576609, "learning_rate": 1.6171233481710648e-05, "loss": 0.8584, "step": 8083 }, { "epoch": 3.6645512239347235, "grad_norm": 0.19853626809345218, "learning_rate": 1.6161067234385316e-05, "loss": 0.8519, "step": 8084 }, { "epoch": 3.6650045330915684, "grad_norm": 0.24808981812006003, "learning_rate": 1.615090337459918e-05, "loss": 0.8627, "step": 8085 }, { "epoch": 3.665457842248413, "grad_norm": 0.18642418071317912, "learning_rate": 1.6140741903370144e-05, "loss": 0.8781, "step": 8086 }, { "epoch": 3.6659111514052585, "grad_norm": 0.27714672192179424, "learning_rate": 1.6130582821715937e-05, "loss": 0.8665, "step": 8087 }, { "epoch": 3.6663644605621033, "grad_norm": 0.21828292750124226, "learning_rate": 1.6120426130654006e-05, "loss": 0.8477, "step": 8088 }, { "epoch": 3.666817769718948, "grad_norm": 0.19328436034983248, "learning_rate": 1.6110271831201588e-05, "loss": 0.8701, "step": 8089 }, { "epoch": 3.6672710788757934, "grad_norm": 0.2344698859159846, "learning_rate": 1.610011992437566e-05, "loss": 0.8554, "step": 8090 }, { "epoch": 3.6677243880326382, "grad_norm": 0.15377068287303639, "learning_rate": 1.6089970411192942e-05, "loss": 0.8702, "step": 8091 }, { "epoch": 3.668177697189483, "grad_norm": 0.20821208460054522, "learning_rate": 1.6079823292669976e-05, "loss": 0.8713, "step": 8092 }, { "epoch": 3.6686310063463283, "grad_norm": 0.20268581629827676, "learning_rate": 1.6069678569822993e-05, "loss": 0.8595, "step": 8093 }, { "epoch": 3.669084315503173, "grad_norm": 0.1494799288890957, "learning_rate": 1.6059536243668044e-05, "loss": 0.8587, "step": 8094 }, { "epoch": 3.669537624660018, "grad_norm": 0.1840279379129244, "learning_rate": 1.6049396315220887e-05, "loss": 0.874, "step": 8095 }, { "epoch": 3.6699909338168633, "grad_norm": 0.1718810035709623, "learning_rate": 1.6039258785497094e-05, "loss": 0.8613, "step": 8096 }, { "epoch": 3.670444242973708, "grad_norm": 0.17267680713158304, "learning_rate": 1.602912365551194e-05, "loss": 0.8613, "step": 8097 }, { "epoch": 3.670897552130553, "grad_norm": 0.1646660542962838, "learning_rate": 1.6018990926280513e-05, "loss": 0.8767, "step": 8098 }, { "epoch": 3.671350861287398, "grad_norm": 0.16283725048623932, "learning_rate": 1.6008860598817605e-05, "loss": 0.8692, "step": 8099 }, { "epoch": 3.671804170444243, "grad_norm": 0.16354345336561826, "learning_rate": 1.5998732674137836e-05, "loss": 0.8847, "step": 8100 }, { "epoch": 3.672257479601088, "grad_norm": 0.21326879214161565, "learning_rate": 1.598860715325553e-05, "loss": 0.8585, "step": 8101 }, { "epoch": 3.672710788757933, "grad_norm": 0.15029064803651818, "learning_rate": 1.597848403718478e-05, "loss": 0.8704, "step": 8102 }, { "epoch": 3.673164097914778, "grad_norm": 0.17819195865362894, "learning_rate": 1.5968363326939437e-05, "loss": 0.8569, "step": 8103 }, { "epoch": 3.6736174070716228, "grad_norm": 0.16332345613333366, "learning_rate": 1.5958245023533156e-05, "loss": 0.8409, "step": 8104 }, { "epoch": 3.674070716228468, "grad_norm": 0.14277350221347382, "learning_rate": 1.5948129127979285e-05, "loss": 0.8672, "step": 8105 }, { "epoch": 3.674524025385313, "grad_norm": 0.1779111729716305, "learning_rate": 1.5938015641290962e-05, "loss": 0.8815, "step": 8106 }, { "epoch": 3.6749773345421577, "grad_norm": 0.1528663532082498, "learning_rate": 1.59279045644811e-05, "loss": 0.8577, "step": 8107 }, { "epoch": 3.675430643699003, "grad_norm": 0.1573358829894495, "learning_rate": 1.5917795898562327e-05, "loss": 0.871, "step": 8108 }, { "epoch": 3.675883952855848, "grad_norm": 0.14871758846492802, "learning_rate": 1.590768964454709e-05, "loss": 0.8646, "step": 8109 }, { "epoch": 3.6763372620126926, "grad_norm": 0.1575355449407423, "learning_rate": 1.5897585803447523e-05, "loss": 0.8761, "step": 8110 }, { "epoch": 3.676790571169538, "grad_norm": 0.1682060830986654, "learning_rate": 1.5887484376275586e-05, "loss": 0.8603, "step": 8111 }, { "epoch": 3.6772438803263827, "grad_norm": 0.16798401846977778, "learning_rate": 1.5877385364042943e-05, "loss": 0.8523, "step": 8112 }, { "epoch": 3.6776971894832275, "grad_norm": 0.15159326392767164, "learning_rate": 1.5867288767761065e-05, "loss": 0.8737, "step": 8113 }, { "epoch": 3.6781504986400724, "grad_norm": 0.16893331352490248, "learning_rate": 1.585719458844113e-05, "loss": 0.8655, "step": 8114 }, { "epoch": 3.6786038077969176, "grad_norm": 0.17547041933991073, "learning_rate": 1.584710282709412e-05, "loss": 0.8939, "step": 8115 }, { "epoch": 3.6790571169537625, "grad_norm": 0.1677548785474805, "learning_rate": 1.5837013484730745e-05, "loss": 0.8597, "step": 8116 }, { "epoch": 3.6795104261106073, "grad_norm": 0.527928092072223, "learning_rate": 1.5826926562361474e-05, "loss": 0.8884, "step": 8117 }, { "epoch": 3.679963735267452, "grad_norm": 0.18693655416311228, "learning_rate": 1.5816842060996563e-05, "loss": 0.867, "step": 8118 }, { "epoch": 3.6804170444242974, "grad_norm": 0.19785485355971913, "learning_rate": 1.580675998164598e-05, "loss": 0.8525, "step": 8119 }, { "epoch": 3.6808703535811422, "grad_norm": 0.14204537128047098, "learning_rate": 1.57966803253195e-05, "loss": 0.8678, "step": 8120 }, { "epoch": 3.681323662737987, "grad_norm": 0.20987683065548352, "learning_rate": 1.5786603093026603e-05, "loss": 0.8861, "step": 8121 }, { "epoch": 3.6817769718948323, "grad_norm": 0.17526588083935973, "learning_rate": 1.57765282857766e-05, "loss": 0.8719, "step": 8122 }, { "epoch": 3.682230281051677, "grad_norm": 0.21455305169578828, "learning_rate": 1.5766455904578447e-05, "loss": 0.8632, "step": 8123 }, { "epoch": 3.682683590208522, "grad_norm": 0.1681072135108491, "learning_rate": 1.5756385950440972e-05, "loss": 0.8793, "step": 8124 }, { "epoch": 3.6831368993653673, "grad_norm": 0.20179479114356802, "learning_rate": 1.5746318424372683e-05, "loss": 0.8596, "step": 8125 }, { "epoch": 3.683590208522212, "grad_norm": 0.14074198447608058, "learning_rate": 1.57362533273819e-05, "loss": 0.8662, "step": 8126 }, { "epoch": 3.684043517679057, "grad_norm": 0.20265594361608988, "learning_rate": 1.572619066047664e-05, "loss": 0.8753, "step": 8127 }, { "epoch": 3.684496826835902, "grad_norm": 0.14355946303094314, "learning_rate": 1.5716130424664734e-05, "loss": 0.8774, "step": 8128 }, { "epoch": 3.684950135992747, "grad_norm": 0.17379555009734524, "learning_rate": 1.5706072620953727e-05, "loss": 0.8459, "step": 8129 }, { "epoch": 3.685403445149592, "grad_norm": 0.1754817879349993, "learning_rate": 1.5696017250350955e-05, "loss": 0.8803, "step": 8130 }, { "epoch": 3.685856754306437, "grad_norm": 0.16394554092100733, "learning_rate": 1.5685964313863488e-05, "loss": 0.8605, "step": 8131 }, { "epoch": 3.686310063463282, "grad_norm": 0.1975297890509491, "learning_rate": 1.5675913812498132e-05, "loss": 0.8712, "step": 8132 }, { "epoch": 3.6867633726201268, "grad_norm": 0.15465235926553625, "learning_rate": 1.566586574726152e-05, "loss": 0.8762, "step": 8133 }, { "epoch": 3.687216681776972, "grad_norm": 0.20147965122420813, "learning_rate": 1.5655820119159946e-05, "loss": 0.8569, "step": 8134 }, { "epoch": 3.687669990933817, "grad_norm": 0.16154942000132635, "learning_rate": 1.564577692919955e-05, "loss": 0.8567, "step": 8135 }, { "epoch": 3.6881233000906617, "grad_norm": 0.20837077172151852, "learning_rate": 1.5635736178386157e-05, "loss": 0.8697, "step": 8136 }, { "epoch": 3.688576609247507, "grad_norm": 0.20157546175050758, "learning_rate": 1.5625697867725403e-05, "loss": 0.907, "step": 8137 }, { "epoch": 3.689029918404352, "grad_norm": 0.19739428258458377, "learning_rate": 1.5615661998222633e-05, "loss": 0.8638, "step": 8138 }, { "epoch": 3.6894832275611966, "grad_norm": 0.17816689690543708, "learning_rate": 1.5605628570882986e-05, "loss": 0.8723, "step": 8139 }, { "epoch": 3.689936536718042, "grad_norm": 0.19435095267992455, "learning_rate": 1.5595597586711325e-05, "loss": 0.8606, "step": 8140 }, { "epoch": 3.6903898458748867, "grad_norm": 0.16210518667005377, "learning_rate": 1.5585569046712312e-05, "loss": 0.8571, "step": 8141 }, { "epoch": 3.6908431550317315, "grad_norm": 0.1981508517993694, "learning_rate": 1.557554295189028e-05, "loss": 0.865, "step": 8142 }, { "epoch": 3.691296464188577, "grad_norm": 0.19830541090030132, "learning_rate": 1.5565519303249424e-05, "loss": 0.8716, "step": 8143 }, { "epoch": 3.6917497733454216, "grad_norm": 0.16653408111454074, "learning_rate": 1.5555498101793606e-05, "loss": 0.8793, "step": 8144 }, { "epoch": 3.6922030825022665, "grad_norm": 0.19447782280429304, "learning_rate": 1.5545479348526504e-05, "loss": 0.8373, "step": 8145 }, { "epoch": 3.6926563916591117, "grad_norm": 0.1785473996087168, "learning_rate": 1.5535463044451523e-05, "loss": 0.8718, "step": 8146 }, { "epoch": 3.6931097008159566, "grad_norm": 0.19432310359152247, "learning_rate": 1.55254491905718e-05, "loss": 0.8449, "step": 8147 }, { "epoch": 3.6935630099728014, "grad_norm": 0.17080365043850454, "learning_rate": 1.5515437787890283e-05, "loss": 0.8653, "step": 8148 }, { "epoch": 3.6940163191296467, "grad_norm": 0.1989451706196327, "learning_rate": 1.5505428837409615e-05, "loss": 0.8605, "step": 8149 }, { "epoch": 3.6944696282864915, "grad_norm": 0.15244865500271745, "learning_rate": 1.5495422340132254e-05, "loss": 0.8709, "step": 8150 }, { "epoch": 3.6949229374433363, "grad_norm": 0.18044724334943663, "learning_rate": 1.548541829706035e-05, "loss": 0.871, "step": 8151 }, { "epoch": 3.695376246600181, "grad_norm": 0.15055542774466846, "learning_rate": 1.5475416709195867e-05, "loss": 0.8578, "step": 8152 }, { "epoch": 3.6958295557570264, "grad_norm": 0.16700109790619744, "learning_rate": 1.5465417577540453e-05, "loss": 0.873, "step": 8153 }, { "epoch": 3.6962828649138713, "grad_norm": 0.1588288380832494, "learning_rate": 1.5455420903095597e-05, "loss": 0.8643, "step": 8154 }, { "epoch": 3.696736174070716, "grad_norm": 0.18399319178207088, "learning_rate": 1.5445426686862453e-05, "loss": 0.8772, "step": 8155 }, { "epoch": 3.6971894832275614, "grad_norm": 0.17028380011001673, "learning_rate": 1.5435434929842013e-05, "loss": 0.8486, "step": 8156 }, { "epoch": 3.697642792384406, "grad_norm": 0.1487307226715019, "learning_rate": 1.5425445633034955e-05, "loss": 0.8545, "step": 8157 }, { "epoch": 3.698096101541251, "grad_norm": 0.14702556532670075, "learning_rate": 1.5415458797441723e-05, "loss": 0.8734, "step": 8158 }, { "epoch": 3.698549410698096, "grad_norm": 0.1564723961662751, "learning_rate": 1.540547442406256e-05, "loss": 0.8819, "step": 8159 }, { "epoch": 3.699002719854941, "grad_norm": 0.16402239701629948, "learning_rate": 1.5395492513897417e-05, "loss": 0.8685, "step": 8160 }, { "epoch": 3.699456029011786, "grad_norm": 0.15312989652434095, "learning_rate": 1.538551306794601e-05, "loss": 0.8639, "step": 8161 }, { "epoch": 3.6999093381686308, "grad_norm": 0.19214845022228294, "learning_rate": 1.5375536087207796e-05, "loss": 0.8619, "step": 8162 }, { "epoch": 3.700362647325476, "grad_norm": 0.17239881449215252, "learning_rate": 1.5365561572682026e-05, "loss": 0.8711, "step": 8163 }, { "epoch": 3.700815956482321, "grad_norm": 0.18748130759471954, "learning_rate": 1.535558952536765e-05, "loss": 0.874, "step": 8164 }, { "epoch": 3.7012692656391657, "grad_norm": 0.133597796527886, "learning_rate": 1.534561994626343e-05, "loss": 0.8701, "step": 8165 }, { "epoch": 3.701722574796011, "grad_norm": 0.19401945609234691, "learning_rate": 1.5335652836367808e-05, "loss": 0.8796, "step": 8166 }, { "epoch": 3.702175883952856, "grad_norm": 0.15708511769575037, "learning_rate": 1.532568819667906e-05, "loss": 0.8748, "step": 8167 }, { "epoch": 3.7026291931097006, "grad_norm": 0.19553830697261976, "learning_rate": 1.5315726028195143e-05, "loss": 0.868, "step": 8168 }, { "epoch": 3.703082502266546, "grad_norm": 0.14957212939354664, "learning_rate": 1.530576633191382e-05, "loss": 0.8569, "step": 8169 }, { "epoch": 3.7035358114233907, "grad_norm": 0.1806317877809897, "learning_rate": 1.529580910883256e-05, "loss": 0.855, "step": 8170 }, { "epoch": 3.7039891205802356, "grad_norm": 0.15004929486687488, "learning_rate": 1.528585435994864e-05, "loss": 0.8646, "step": 8171 }, { "epoch": 3.704442429737081, "grad_norm": 0.14501312957665025, "learning_rate": 1.527590208625904e-05, "loss": 0.8629, "step": 8172 }, { "epoch": 3.7048957388939256, "grad_norm": 0.1364160527194733, "learning_rate": 1.526595228876049e-05, "loss": 0.8766, "step": 8173 }, { "epoch": 3.7053490480507705, "grad_norm": 0.1727786349428715, "learning_rate": 1.5256004968449527e-05, "loss": 0.8548, "step": 8174 }, { "epoch": 3.7058023572076157, "grad_norm": 0.13334703764977643, "learning_rate": 1.5246060126322376e-05, "loss": 0.8759, "step": 8175 }, { "epoch": 3.7062556663644606, "grad_norm": 0.1726669504753101, "learning_rate": 1.5236117763375062e-05, "loss": 0.8689, "step": 8176 }, { "epoch": 3.7067089755213054, "grad_norm": 0.1435223588556626, "learning_rate": 1.5226177880603325e-05, "loss": 0.8624, "step": 8177 }, { "epoch": 3.7071622846781507, "grad_norm": 0.17620933731677513, "learning_rate": 1.5216240479002693e-05, "loss": 0.8679, "step": 8178 }, { "epoch": 3.7076155938349955, "grad_norm": 0.17328668832079966, "learning_rate": 1.5206305559568417e-05, "loss": 0.881, "step": 8179 }, { "epoch": 3.7080689029918403, "grad_norm": 0.18882820636908332, "learning_rate": 1.5196373123295502e-05, "loss": 0.8655, "step": 8180 }, { "epoch": 3.7085222121486856, "grad_norm": 0.15672247819451596, "learning_rate": 1.5186443171178708e-05, "loss": 0.8682, "step": 8181 }, { "epoch": 3.7089755213055304, "grad_norm": 0.1753285993831113, "learning_rate": 1.5176515704212564e-05, "loss": 0.8483, "step": 8182 }, { "epoch": 3.7094288304623753, "grad_norm": 0.1672310538812343, "learning_rate": 1.5166590723391318e-05, "loss": 0.8777, "step": 8183 }, { "epoch": 3.7098821396192205, "grad_norm": 0.15503447060333803, "learning_rate": 1.5156668229709009e-05, "loss": 0.8733, "step": 8184 }, { "epoch": 3.7103354487760654, "grad_norm": 0.14990499492018458, "learning_rate": 1.5146748224159372e-05, "loss": 0.8531, "step": 8185 }, { "epoch": 3.71078875793291, "grad_norm": 0.15416481427735193, "learning_rate": 1.513683070773596e-05, "loss": 0.88, "step": 8186 }, { "epoch": 3.7112420670897555, "grad_norm": 0.14790247557063368, "learning_rate": 1.5126915681432021e-05, "loss": 0.8693, "step": 8187 }, { "epoch": 3.7116953762466003, "grad_norm": 0.18575251949849794, "learning_rate": 1.5117003146240565e-05, "loss": 0.8497, "step": 8188 }, { "epoch": 3.712148685403445, "grad_norm": 0.20337901884791262, "learning_rate": 1.510709310315439e-05, "loss": 0.8698, "step": 8189 }, { "epoch": 3.7126019945602904, "grad_norm": 0.1495197926890527, "learning_rate": 1.5097185553165985e-05, "loss": 0.8717, "step": 8190 }, { "epoch": 3.713055303717135, "grad_norm": 0.18138927972483468, "learning_rate": 1.5087280497267647e-05, "loss": 0.8599, "step": 8191 }, { "epoch": 3.71350861287398, "grad_norm": 0.16004324899693176, "learning_rate": 1.5077377936451369e-05, "loss": 0.8503, "step": 8192 }, { "epoch": 3.713961922030825, "grad_norm": 0.1662046373294843, "learning_rate": 1.5067477871708951e-05, "loss": 0.868, "step": 8193 }, { "epoch": 3.71441523118767, "grad_norm": 0.1433015209017861, "learning_rate": 1.5057580304031887e-05, "loss": 0.8668, "step": 8194 }, { "epoch": 3.714868540344515, "grad_norm": 0.16758991791939942, "learning_rate": 1.5047685234411478e-05, "loss": 0.8665, "step": 8195 }, { "epoch": 3.71532184950136, "grad_norm": 0.17817894301999912, "learning_rate": 1.5037792663838705e-05, "loss": 0.8572, "step": 8196 }, { "epoch": 3.7157751586582046, "grad_norm": 0.16218468756169577, "learning_rate": 1.5027902593304378e-05, "loss": 0.8675, "step": 8197 }, { "epoch": 3.71622846781505, "grad_norm": 0.23138678140317648, "learning_rate": 1.5018015023798995e-05, "loss": 0.8768, "step": 8198 }, { "epoch": 3.7166817769718947, "grad_norm": 0.16614099835201357, "learning_rate": 1.500812995631283e-05, "loss": 0.8696, "step": 8199 }, { "epoch": 3.7171350861287396, "grad_norm": 0.17344422687731947, "learning_rate": 1.4998247391835885e-05, "loss": 0.8743, "step": 8200 }, { "epoch": 3.717588395285585, "grad_norm": 0.15319216510752307, "learning_rate": 1.4988367331357956e-05, "loss": 0.8599, "step": 8201 }, { "epoch": 3.7180417044424297, "grad_norm": 0.16702159230105237, "learning_rate": 1.497848977586855e-05, "loss": 0.87, "step": 8202 }, { "epoch": 3.7184950135992745, "grad_norm": 0.16826525072291418, "learning_rate": 1.4968614726356911e-05, "loss": 0.8359, "step": 8203 }, { "epoch": 3.7189483227561198, "grad_norm": 0.16070535388527077, "learning_rate": 1.495874218381209e-05, "loss": 0.8487, "step": 8204 }, { "epoch": 3.7194016319129646, "grad_norm": 0.21435126927653436, "learning_rate": 1.4948872149222818e-05, "loss": 0.8668, "step": 8205 }, { "epoch": 3.7198549410698094, "grad_norm": 0.15526739634986855, "learning_rate": 1.4939004623577643e-05, "loss": 0.8792, "step": 8206 }, { "epoch": 3.7203082502266547, "grad_norm": 0.1953645939077843, "learning_rate": 1.4929139607864786e-05, "loss": 0.8606, "step": 8207 }, { "epoch": 3.7207615593834995, "grad_norm": 0.1885940635552877, "learning_rate": 1.49192771030723e-05, "loss": 0.8769, "step": 8208 }, { "epoch": 3.7212148685403443, "grad_norm": 0.1814993843429299, "learning_rate": 1.4909417110187905e-05, "loss": 0.8776, "step": 8209 }, { "epoch": 3.7216681776971896, "grad_norm": 0.47749769182964347, "learning_rate": 1.4899559630199143e-05, "loss": 0.8661, "step": 8210 }, { "epoch": 3.7221214868540344, "grad_norm": 0.17748854864384392, "learning_rate": 1.4889704664093238e-05, "loss": 0.8458, "step": 8211 }, { "epoch": 3.7225747960108793, "grad_norm": 0.1509969558517047, "learning_rate": 1.487985221285722e-05, "loss": 0.8717, "step": 8212 }, { "epoch": 3.7230281051677245, "grad_norm": 0.19088626176903073, "learning_rate": 1.4870002277477835e-05, "loss": 0.8776, "step": 8213 }, { "epoch": 3.7234814143245694, "grad_norm": 0.16155078053788818, "learning_rate": 1.4860154858941558e-05, "loss": 0.8523, "step": 8214 }, { "epoch": 3.723934723481414, "grad_norm": 0.16122563524841602, "learning_rate": 1.4850309958234674e-05, "loss": 0.8639, "step": 8215 }, { "epoch": 3.7243880326382595, "grad_norm": 0.20732300932193629, "learning_rate": 1.4840467576343146e-05, "loss": 0.8635, "step": 8216 }, { "epoch": 3.7248413417951043, "grad_norm": 0.14587465207615657, "learning_rate": 1.4830627714252747e-05, "loss": 0.8651, "step": 8217 }, { "epoch": 3.725294650951949, "grad_norm": 0.21321508795244012, "learning_rate": 1.4820790372948954e-05, "loss": 0.8524, "step": 8218 }, { "epoch": 3.7257479601087944, "grad_norm": 0.18635509989587787, "learning_rate": 1.4810955553417006e-05, "loss": 0.8468, "step": 8219 }, { "epoch": 3.726201269265639, "grad_norm": 0.16200188033489787, "learning_rate": 1.4801123256641873e-05, "loss": 0.8575, "step": 8220 }, { "epoch": 3.726654578422484, "grad_norm": 0.2280085063729398, "learning_rate": 1.4791293483608317e-05, "loss": 0.8813, "step": 8221 }, { "epoch": 3.7271078875793293, "grad_norm": 0.198441689501382, "learning_rate": 1.4781466235300794e-05, "loss": 0.8861, "step": 8222 }, { "epoch": 3.727561196736174, "grad_norm": 0.17700131189808585, "learning_rate": 1.477164151270356e-05, "loss": 0.873, "step": 8223 }, { "epoch": 3.728014505893019, "grad_norm": 0.1933865515356514, "learning_rate": 1.4761819316800555e-05, "loss": 0.8685, "step": 8224 }, { "epoch": 3.7284678150498642, "grad_norm": 0.17339909681191423, "learning_rate": 1.4751999648575534e-05, "loss": 0.8554, "step": 8225 }, { "epoch": 3.728921124206709, "grad_norm": 0.16229587219067299, "learning_rate": 1.4742182509011937e-05, "loss": 0.8477, "step": 8226 }, { "epoch": 3.729374433363554, "grad_norm": 0.17971616588781578, "learning_rate": 1.4732367899093008e-05, "loss": 0.8889, "step": 8227 }, { "epoch": 3.729827742520399, "grad_norm": 0.1448586020490163, "learning_rate": 1.4722555819801692e-05, "loss": 0.8699, "step": 8228 }, { "epoch": 3.730281051677244, "grad_norm": 0.19154647377976333, "learning_rate": 1.4712746272120688e-05, "loss": 0.8581, "step": 8229 }, { "epoch": 3.730734360834089, "grad_norm": 0.14134163685978207, "learning_rate": 1.4702939257032474e-05, "loss": 0.8637, "step": 8230 }, { "epoch": 3.7311876699909337, "grad_norm": 0.19745338705004073, "learning_rate": 1.469313477551923e-05, "loss": 0.8752, "step": 8231 }, { "epoch": 3.731640979147779, "grad_norm": 0.1500157711629783, "learning_rate": 1.4683332828562926e-05, "loss": 0.8843, "step": 8232 }, { "epoch": 3.7320942883046238, "grad_norm": 0.16708924021533983, "learning_rate": 1.4673533417145227e-05, "loss": 0.8603, "step": 8233 }, { "epoch": 3.7325475974614686, "grad_norm": 0.1426248358680628, "learning_rate": 1.4663736542247606e-05, "loss": 0.8802, "step": 8234 }, { "epoch": 3.733000906618314, "grad_norm": 0.21119021985015837, "learning_rate": 1.4653942204851217e-05, "loss": 0.8656, "step": 8235 }, { "epoch": 3.7334542157751587, "grad_norm": 0.1417592230626061, "learning_rate": 1.464415040593702e-05, "loss": 0.8816, "step": 8236 }, { "epoch": 3.7339075249320035, "grad_norm": 0.20483886792694933, "learning_rate": 1.4634361146485683e-05, "loss": 0.8776, "step": 8237 }, { "epoch": 3.7343608340888483, "grad_norm": 0.1359471450383502, "learning_rate": 1.4624574427477623e-05, "loss": 0.8748, "step": 8238 }, { "epoch": 3.7348141432456936, "grad_norm": 0.19324757881981816, "learning_rate": 1.4614790249892999e-05, "loss": 0.848, "step": 8239 }, { "epoch": 3.7352674524025384, "grad_norm": 0.15176515662909792, "learning_rate": 1.4605008614711746e-05, "loss": 0.8832, "step": 8240 }, { "epoch": 3.7357207615593833, "grad_norm": 0.16566108699500245, "learning_rate": 1.4595229522913506e-05, "loss": 0.8629, "step": 8241 }, { "epoch": 3.7361740707162285, "grad_norm": 0.15889645643644199, "learning_rate": 1.4585452975477706e-05, "loss": 0.8684, "step": 8242 }, { "epoch": 3.7366273798730734, "grad_norm": 0.15518768631776472, "learning_rate": 1.4575678973383478e-05, "loss": 0.8653, "step": 8243 }, { "epoch": 3.737080689029918, "grad_norm": 0.15430280538506766, "learning_rate": 1.4565907517609713e-05, "loss": 0.8796, "step": 8244 }, { "epoch": 3.7375339981867635, "grad_norm": 0.1636681822501628, "learning_rate": 1.4556138609135069e-05, "loss": 0.8677, "step": 8245 }, { "epoch": 3.7379873073436083, "grad_norm": 0.16852847927473705, "learning_rate": 1.4546372248937911e-05, "loss": 0.8832, "step": 8246 }, { "epoch": 3.738440616500453, "grad_norm": 0.16649590406322984, "learning_rate": 1.453660843799639e-05, "loss": 0.8666, "step": 8247 }, { "epoch": 3.7388939256572984, "grad_norm": 0.19187188252303153, "learning_rate": 1.4526847177288356e-05, "loss": 0.8694, "step": 8248 }, { "epoch": 3.739347234814143, "grad_norm": 0.1729471040297731, "learning_rate": 1.4517088467791461e-05, "loss": 0.8686, "step": 8249 }, { "epoch": 3.739800543970988, "grad_norm": 0.17700228944334487, "learning_rate": 1.4507332310483029e-05, "loss": 0.8715, "step": 8250 }, { "epoch": 3.7402538531278333, "grad_norm": 0.16623700905009015, "learning_rate": 1.44975787063402e-05, "loss": 0.8669, "step": 8251 }, { "epoch": 3.740707162284678, "grad_norm": 0.17573568741684095, "learning_rate": 1.4487827656339804e-05, "loss": 0.8609, "step": 8252 }, { "epoch": 3.741160471441523, "grad_norm": 0.17166054090208105, "learning_rate": 1.447807916145846e-05, "loss": 0.8734, "step": 8253 }, { "epoch": 3.7416137805983682, "grad_norm": 0.15151405943510338, "learning_rate": 1.4468333222672497e-05, "loss": 0.861, "step": 8254 }, { "epoch": 3.742067089755213, "grad_norm": 0.18909341870532967, "learning_rate": 1.4458589840957982e-05, "loss": 0.856, "step": 8255 }, { "epoch": 3.742520398912058, "grad_norm": 0.19205586275481024, "learning_rate": 1.4448849017290774e-05, "loss": 0.867, "step": 8256 }, { "epoch": 3.742973708068903, "grad_norm": 0.16306571691730315, "learning_rate": 1.443911075264643e-05, "loss": 0.8986, "step": 8257 }, { "epoch": 3.743427017225748, "grad_norm": 0.17476678996414283, "learning_rate": 1.4429375048000252e-05, "loss": 0.9017, "step": 8258 }, { "epoch": 3.743880326382593, "grad_norm": 0.1652086507976759, "learning_rate": 1.4419641904327329e-05, "loss": 0.8653, "step": 8259 }, { "epoch": 3.744333635539438, "grad_norm": 0.1771980889027628, "learning_rate": 1.4409911322602446e-05, "loss": 0.8651, "step": 8260 }, { "epoch": 3.744786944696283, "grad_norm": 0.19732753642197637, "learning_rate": 1.4400183303800139e-05, "loss": 0.8764, "step": 8261 }, { "epoch": 3.7452402538531278, "grad_norm": 0.13696544915252487, "learning_rate": 1.4390457848894724e-05, "loss": 0.852, "step": 8262 }, { "epoch": 3.745693563009973, "grad_norm": 0.1782108665576462, "learning_rate": 1.438073495886021e-05, "loss": 0.85, "step": 8263 }, { "epoch": 3.746146872166818, "grad_norm": 0.16367230756728773, "learning_rate": 1.4371014634670393e-05, "loss": 0.8607, "step": 8264 }, { "epoch": 3.7466001813236627, "grad_norm": 0.18873607296021952, "learning_rate": 1.436129687729877e-05, "loss": 0.8786, "step": 8265 }, { "epoch": 3.747053490480508, "grad_norm": 0.14872256155987865, "learning_rate": 1.4351581687718631e-05, "loss": 0.8776, "step": 8266 }, { "epoch": 3.747506799637353, "grad_norm": 0.2023205911680118, "learning_rate": 1.4341869066902949e-05, "loss": 0.8701, "step": 8267 }, { "epoch": 3.7479601087941976, "grad_norm": 0.14796383308314104, "learning_rate": 1.4332159015824503e-05, "loss": 0.8757, "step": 8268 }, { "epoch": 3.748413417951043, "grad_norm": 0.20325264018698197, "learning_rate": 1.4322451535455769e-05, "loss": 0.874, "step": 8269 }, { "epoch": 3.7488667271078877, "grad_norm": 0.16121402447515207, "learning_rate": 1.4312746626768963e-05, "loss": 0.8771, "step": 8270 }, { "epoch": 3.7493200362647325, "grad_norm": 0.21926262773846666, "learning_rate": 1.4303044290736092e-05, "loss": 0.891, "step": 8271 }, { "epoch": 3.7497733454215774, "grad_norm": 0.17750537650792728, "learning_rate": 1.429334452832884e-05, "loss": 0.8561, "step": 8272 }, { "epoch": 3.7502266545784226, "grad_norm": 0.16703089801807444, "learning_rate": 1.4283647340518702e-05, "loss": 0.8714, "step": 8273 }, { "epoch": 3.7506799637352675, "grad_norm": 0.2096679364443424, "learning_rate": 1.4273952728276844e-05, "loss": 0.8351, "step": 8274 }, { "epoch": 3.7511332728921123, "grad_norm": 0.14382317294192784, "learning_rate": 1.4264260692574259e-05, "loss": 0.8756, "step": 8275 }, { "epoch": 3.751586582048957, "grad_norm": 0.18377149655085803, "learning_rate": 1.4254571234381568e-05, "loss": 0.8804, "step": 8276 }, { "epoch": 3.7520398912058024, "grad_norm": 0.15476367778621056, "learning_rate": 1.424488435466925e-05, "loss": 0.8505, "step": 8277 }, { "epoch": 3.752493200362647, "grad_norm": 0.15450867483209485, "learning_rate": 1.4235200054407442e-05, "loss": 0.8498, "step": 8278 }, { "epoch": 3.752946509519492, "grad_norm": 0.156744726167166, "learning_rate": 1.4225518334566087e-05, "loss": 0.851, "step": 8279 }, { "epoch": 3.7533998186763373, "grad_norm": 0.14472125625126103, "learning_rate": 1.4215839196114804e-05, "loss": 0.8501, "step": 8280 }, { "epoch": 3.753853127833182, "grad_norm": 0.16895769865759544, "learning_rate": 1.4206162640023019e-05, "loss": 0.8548, "step": 8281 }, { "epoch": 3.754306436990027, "grad_norm": 0.14310657609782787, "learning_rate": 1.4196488667259836e-05, "loss": 0.8739, "step": 8282 }, { "epoch": 3.7547597461468722, "grad_norm": 0.1690246777278434, "learning_rate": 1.4186817278794163e-05, "loss": 0.8708, "step": 8283 }, { "epoch": 3.755213055303717, "grad_norm": 0.15859105978665014, "learning_rate": 1.4177148475594588e-05, "loss": 0.8543, "step": 8284 }, { "epoch": 3.755666364460562, "grad_norm": 0.14795560337470548, "learning_rate": 1.4167482258629503e-05, "loss": 0.8674, "step": 8285 }, { "epoch": 3.756119673617407, "grad_norm": 0.15200839895271667, "learning_rate": 1.4157818628866987e-05, "loss": 0.8633, "step": 8286 }, { "epoch": 3.756572982774252, "grad_norm": 0.2046986019481979, "learning_rate": 1.4148157587274871e-05, "loss": 0.8617, "step": 8287 }, { "epoch": 3.757026291931097, "grad_norm": 0.13649359127067434, "learning_rate": 1.4138499134820763e-05, "loss": 0.869, "step": 8288 }, { "epoch": 3.757479601087942, "grad_norm": 0.15636948910415663, "learning_rate": 1.4128843272471961e-05, "loss": 0.8752, "step": 8289 }, { "epoch": 3.757932910244787, "grad_norm": 0.17545326587738996, "learning_rate": 1.411919000119555e-05, "loss": 0.8611, "step": 8290 }, { "epoch": 3.7583862194016318, "grad_norm": 0.13373128027252218, "learning_rate": 1.410953932195831e-05, "loss": 0.8805, "step": 8291 }, { "epoch": 3.758839528558477, "grad_norm": 0.19943890163606115, "learning_rate": 1.409989123572681e-05, "loss": 0.8833, "step": 8292 }, { "epoch": 3.759292837715322, "grad_norm": 0.15443826628627402, "learning_rate": 1.4090245743467312e-05, "loss": 0.8603, "step": 8293 }, { "epoch": 3.7597461468721667, "grad_norm": 0.19453835880776926, "learning_rate": 1.4080602846145874e-05, "loss": 0.8602, "step": 8294 }, { "epoch": 3.760199456029012, "grad_norm": 0.18066424129669661, "learning_rate": 1.4070962544728209e-05, "loss": 0.851, "step": 8295 }, { "epoch": 3.760652765185857, "grad_norm": 0.15845578196487267, "learning_rate": 1.4061324840179862e-05, "loss": 0.851, "step": 8296 }, { "epoch": 3.7611060743427016, "grad_norm": 0.1579941604671543, "learning_rate": 1.4051689733466054e-05, "loss": 0.8642, "step": 8297 }, { "epoch": 3.761559383499547, "grad_norm": 0.13654469546799458, "learning_rate": 1.404205722555179e-05, "loss": 0.8544, "step": 8298 }, { "epoch": 3.7620126926563917, "grad_norm": 0.19452985310127874, "learning_rate": 1.4032427317401775e-05, "loss": 0.8932, "step": 8299 }, { "epoch": 3.7624660018132365, "grad_norm": 0.14643552816544483, "learning_rate": 1.4022800009980495e-05, "loss": 0.8796, "step": 8300 }, { "epoch": 3.762919310970082, "grad_norm": 0.2316827895033946, "learning_rate": 1.4013175304252134e-05, "loss": 0.8739, "step": 8301 }, { "epoch": 3.7633726201269266, "grad_norm": 0.18704442076109135, "learning_rate": 1.400355320118063e-05, "loss": 0.8709, "step": 8302 }, { "epoch": 3.7638259292837715, "grad_norm": 0.19907020196433298, "learning_rate": 1.3993933701729687e-05, "loss": 0.846, "step": 8303 }, { "epoch": 3.7642792384406167, "grad_norm": 0.19552830480156078, "learning_rate": 1.3984316806862706e-05, "loss": 0.8832, "step": 8304 }, { "epoch": 3.7647325475974616, "grad_norm": 0.17970678827632833, "learning_rate": 1.3974702517542862e-05, "loss": 0.881, "step": 8305 }, { "epoch": 3.7651858567543064, "grad_norm": 0.19463259928283091, "learning_rate": 1.396509083473304e-05, "loss": 0.8695, "step": 8306 }, { "epoch": 3.7656391659111517, "grad_norm": 0.17300586238604768, "learning_rate": 1.3955481759395895e-05, "loss": 0.8654, "step": 8307 }, { "epoch": 3.7660924750679965, "grad_norm": 0.1809293418136271, "learning_rate": 1.3945875292493782e-05, "loss": 0.8765, "step": 8308 }, { "epoch": 3.7665457842248413, "grad_norm": 0.1620891942997032, "learning_rate": 1.3936271434988849e-05, "loss": 0.8721, "step": 8309 }, { "epoch": 3.766999093381686, "grad_norm": 0.15862972847377485, "learning_rate": 1.3926670187842915e-05, "loss": 0.8729, "step": 8310 }, { "epoch": 3.7674524025385314, "grad_norm": 0.18235352662387655, "learning_rate": 1.3917071552017597e-05, "loss": 0.8695, "step": 8311 }, { "epoch": 3.7679057116953762, "grad_norm": 0.20237389120051782, "learning_rate": 1.3907475528474229e-05, "loss": 0.8483, "step": 8312 }, { "epoch": 3.768359020852221, "grad_norm": 0.1577910604967603, "learning_rate": 1.3897882118173853e-05, "loss": 0.872, "step": 8313 }, { "epoch": 3.7688123300090663, "grad_norm": 0.15544590735527447, "learning_rate": 1.388829132207731e-05, "loss": 0.8728, "step": 8314 }, { "epoch": 3.769265639165911, "grad_norm": 0.1624852269789361, "learning_rate": 1.3878703141145132e-05, "loss": 0.8738, "step": 8315 }, { "epoch": 3.769718948322756, "grad_norm": 0.1681644196348099, "learning_rate": 1.3869117576337603e-05, "loss": 0.8879, "step": 8316 }, { "epoch": 3.770172257479601, "grad_norm": 0.1609101687003592, "learning_rate": 1.3859534628614734e-05, "loss": 0.887, "step": 8317 }, { "epoch": 3.770625566636446, "grad_norm": 0.16086914936862504, "learning_rate": 1.384995429893631e-05, "loss": 0.8371, "step": 8318 }, { "epoch": 3.771078875793291, "grad_norm": 0.16205052617992144, "learning_rate": 1.3840376588261802e-05, "loss": 0.8825, "step": 8319 }, { "epoch": 3.7715321849501358, "grad_norm": 0.19218673737643302, "learning_rate": 1.3830801497550473e-05, "loss": 0.87, "step": 8320 }, { "epoch": 3.771985494106981, "grad_norm": 0.17403738388313186, "learning_rate": 1.3821229027761271e-05, "loss": 0.866, "step": 8321 }, { "epoch": 3.772438803263826, "grad_norm": 0.16828445611067652, "learning_rate": 1.3811659179852935e-05, "loss": 0.8581, "step": 8322 }, { "epoch": 3.7728921124206707, "grad_norm": 0.1642792008281721, "learning_rate": 1.3802091954783885e-05, "loss": 0.8698, "step": 8323 }, { "epoch": 3.773345421577516, "grad_norm": 0.1664157315387949, "learning_rate": 1.3792527353512336e-05, "loss": 0.8788, "step": 8324 }, { "epoch": 3.773798730734361, "grad_norm": 0.14638501279193883, "learning_rate": 1.3782965376996177e-05, "loss": 0.8474, "step": 8325 }, { "epoch": 3.7742520398912056, "grad_norm": 0.17821651987174517, "learning_rate": 1.3773406026193099e-05, "loss": 0.8703, "step": 8326 }, { "epoch": 3.774705349048051, "grad_norm": 0.16668880778942421, "learning_rate": 1.3763849302060486e-05, "loss": 0.861, "step": 8327 }, { "epoch": 3.7751586582048957, "grad_norm": 0.18714635693400142, "learning_rate": 1.375429520555546e-05, "loss": 0.8861, "step": 8328 }, { "epoch": 3.7756119673617405, "grad_norm": 0.2538905901201943, "learning_rate": 1.374474373763492e-05, "loss": 0.8736, "step": 8329 }, { "epoch": 3.776065276518586, "grad_norm": 0.17532952597128115, "learning_rate": 1.3735194899255438e-05, "loss": 0.8585, "step": 8330 }, { "epoch": 3.7765185856754306, "grad_norm": 0.22010768433997027, "learning_rate": 1.3725648691373397e-05, "loss": 0.8677, "step": 8331 }, { "epoch": 3.7769718948322755, "grad_norm": 0.23213185665142072, "learning_rate": 1.3716105114944842e-05, "loss": 0.859, "step": 8332 }, { "epoch": 3.7774252039891207, "grad_norm": 0.12876286443812845, "learning_rate": 1.3706564170925631e-05, "loss": 0.8474, "step": 8333 }, { "epoch": 3.7778785131459656, "grad_norm": 0.218004795298325, "learning_rate": 1.3697025860271272e-05, "loss": 0.8579, "step": 8334 }, { "epoch": 3.7783318223028104, "grad_norm": 0.16450310471319518, "learning_rate": 1.3687490183937087e-05, "loss": 0.8778, "step": 8335 }, { "epoch": 3.7787851314596557, "grad_norm": 0.18596375654202427, "learning_rate": 1.3677957142878077e-05, "loss": 0.8766, "step": 8336 }, { "epoch": 3.7792384406165005, "grad_norm": 0.16920179132073182, "learning_rate": 1.3668426738049032e-05, "loss": 0.8594, "step": 8337 }, { "epoch": 3.7796917497733453, "grad_norm": 0.2123769140435139, "learning_rate": 1.3658898970404421e-05, "loss": 0.8596, "step": 8338 }, { "epoch": 3.7801450589301906, "grad_norm": 0.21523849199924477, "learning_rate": 1.364937384089851e-05, "loss": 0.8811, "step": 8339 }, { "epoch": 3.7805983680870354, "grad_norm": 0.24856809878877328, "learning_rate": 1.3639851350485236e-05, "loss": 0.8798, "step": 8340 }, { "epoch": 3.7810516772438802, "grad_norm": 0.19661601337417922, "learning_rate": 1.3630331500118334e-05, "loss": 0.861, "step": 8341 }, { "epoch": 3.7815049864007255, "grad_norm": 0.2204243222261221, "learning_rate": 1.3620814290751234e-05, "loss": 0.8593, "step": 8342 }, { "epoch": 3.7819582955575703, "grad_norm": 0.17062844848184788, "learning_rate": 1.3611299723337097e-05, "loss": 0.8568, "step": 8343 }, { "epoch": 3.782411604714415, "grad_norm": 0.1804731817025905, "learning_rate": 1.3601787798828858e-05, "loss": 0.8678, "step": 8344 }, { "epoch": 3.7828649138712604, "grad_norm": 0.19867466016650293, "learning_rate": 1.3592278518179138e-05, "loss": 0.8662, "step": 8345 }, { "epoch": 3.7833182230281053, "grad_norm": 0.1637325231347063, "learning_rate": 1.3582771882340354e-05, "loss": 0.8759, "step": 8346 }, { "epoch": 3.78377153218495, "grad_norm": 0.21693828924116523, "learning_rate": 1.357326789226459e-05, "loss": 0.8696, "step": 8347 }, { "epoch": 3.7842248413417954, "grad_norm": 0.15405316505299158, "learning_rate": 1.3563766548903722e-05, "loss": 0.8607, "step": 8348 }, { "epoch": 3.78467815049864, "grad_norm": 0.22033050274913982, "learning_rate": 1.355426785320932e-05, "loss": 0.8751, "step": 8349 }, { "epoch": 3.785131459655485, "grad_norm": 0.16688111761974814, "learning_rate": 1.354477180613273e-05, "loss": 0.857, "step": 8350 }, { "epoch": 3.78558476881233, "grad_norm": 0.20018589705891468, "learning_rate": 1.3535278408624976e-05, "loss": 0.8675, "step": 8351 }, { "epoch": 3.786038077969175, "grad_norm": 0.20475536423354568, "learning_rate": 1.3525787661636898e-05, "loss": 0.8716, "step": 8352 }, { "epoch": 3.78649138712602, "grad_norm": 0.19513548749132514, "learning_rate": 1.3516299566118965e-05, "loss": 0.8626, "step": 8353 }, { "epoch": 3.786944696282865, "grad_norm": 0.15150195751141057, "learning_rate": 1.3506814123021474e-05, "loss": 0.8981, "step": 8354 }, { "epoch": 3.7873980054397096, "grad_norm": 0.21741799602015913, "learning_rate": 1.3497331333294402e-05, "loss": 0.8779, "step": 8355 }, { "epoch": 3.787851314596555, "grad_norm": 0.15275789816430904, "learning_rate": 1.34878511978875e-05, "loss": 0.8666, "step": 8356 }, { "epoch": 3.7883046237533997, "grad_norm": 0.2213903402496919, "learning_rate": 1.3478373717750218e-05, "loss": 0.861, "step": 8357 }, { "epoch": 3.7887579329102445, "grad_norm": 0.16064048110076165, "learning_rate": 1.3468898893831735e-05, "loss": 0.8802, "step": 8358 }, { "epoch": 3.78921124206709, "grad_norm": 0.17734577493436499, "learning_rate": 1.3459426727081022e-05, "loss": 0.8568, "step": 8359 }, { "epoch": 3.7896645512239346, "grad_norm": 0.18172427281590245, "learning_rate": 1.3449957218446703e-05, "loss": 0.8515, "step": 8360 }, { "epoch": 3.7901178603807795, "grad_norm": 0.17116698860655613, "learning_rate": 1.344049036887722e-05, "loss": 0.869, "step": 8361 }, { "epoch": 3.7905711695376247, "grad_norm": 0.1970977587226363, "learning_rate": 1.3431026179320661e-05, "loss": 0.8775, "step": 8362 }, { "epoch": 3.7910244786944696, "grad_norm": 0.18011794253749583, "learning_rate": 1.3421564650724932e-05, "loss": 0.8678, "step": 8363 }, { "epoch": 3.7914777878513144, "grad_norm": 0.16151074727382977, "learning_rate": 1.3412105784037603e-05, "loss": 0.8577, "step": 8364 }, { "epoch": 3.7919310970081597, "grad_norm": 0.16881761888289987, "learning_rate": 1.3402649580206037e-05, "loss": 0.8588, "step": 8365 }, { "epoch": 3.7923844061650045, "grad_norm": 0.15322830139922486, "learning_rate": 1.3393196040177263e-05, "loss": 0.864, "step": 8366 }, { "epoch": 3.7928377153218493, "grad_norm": 0.1544647696829164, "learning_rate": 1.338374516489812e-05, "loss": 0.8507, "step": 8367 }, { "epoch": 3.7932910244786946, "grad_norm": 0.15781444127023234, "learning_rate": 1.3374296955315123e-05, "loss": 0.8757, "step": 8368 }, { "epoch": 3.7937443336355394, "grad_norm": 0.20555995587995746, "learning_rate": 1.3364851412374522e-05, "loss": 0.8641, "step": 8369 }, { "epoch": 3.7941976427923843, "grad_norm": 0.14505751728324723, "learning_rate": 1.3355408537022342e-05, "loss": 0.8646, "step": 8370 }, { "epoch": 3.7946509519492295, "grad_norm": 0.14773420555273392, "learning_rate": 1.3345968330204308e-05, "loss": 0.8615, "step": 8371 }, { "epoch": 3.7951042611060744, "grad_norm": 0.14276812579973686, "learning_rate": 1.333653079286588e-05, "loss": 0.8635, "step": 8372 }, { "epoch": 3.795557570262919, "grad_norm": 0.18320058864858763, "learning_rate": 1.332709592595224e-05, "loss": 0.8621, "step": 8373 }, { "epoch": 3.7960108794197644, "grad_norm": 0.150397505091882, "learning_rate": 1.3317663730408349e-05, "loss": 0.8541, "step": 8374 }, { "epoch": 3.7964641885766093, "grad_norm": 0.20013544086034435, "learning_rate": 1.3308234207178838e-05, "loss": 0.8668, "step": 8375 }, { "epoch": 3.796917497733454, "grad_norm": 0.17775328901442364, "learning_rate": 1.3298807357208134e-05, "loss": 0.8715, "step": 8376 }, { "epoch": 3.7973708068902994, "grad_norm": 0.14704009067079046, "learning_rate": 1.328938318144033e-05, "loss": 0.8646, "step": 8377 }, { "epoch": 3.797824116047144, "grad_norm": 0.16942291562988712, "learning_rate": 1.3279961680819313e-05, "loss": 0.8561, "step": 8378 }, { "epoch": 3.798277425203989, "grad_norm": 0.16297248982094464, "learning_rate": 1.3270542856288655e-05, "loss": 0.8729, "step": 8379 }, { "epoch": 3.7987307343608343, "grad_norm": 0.1684345591008798, "learning_rate": 1.3261126708791694e-05, "loss": 0.8704, "step": 8380 }, { "epoch": 3.799184043517679, "grad_norm": 0.19162327727940093, "learning_rate": 1.3251713239271467e-05, "loss": 0.8784, "step": 8381 }, { "epoch": 3.799637352674524, "grad_norm": 0.14561631675319922, "learning_rate": 1.3242302448670783e-05, "loss": 0.8537, "step": 8382 }, { "epoch": 3.8000906618313692, "grad_norm": 0.19005990441076406, "learning_rate": 1.3232894337932148e-05, "loss": 0.8712, "step": 8383 }, { "epoch": 3.800543970988214, "grad_norm": 0.18299265153449598, "learning_rate": 1.3223488907997797e-05, "loss": 0.8612, "step": 8384 }, { "epoch": 3.800997280145059, "grad_norm": 0.1768689942699595, "learning_rate": 1.3214086159809743e-05, "loss": 0.8663, "step": 8385 }, { "epoch": 3.801450589301904, "grad_norm": 0.23570872899830203, "learning_rate": 1.3204686094309667e-05, "loss": 0.8883, "step": 8386 }, { "epoch": 3.801903898458749, "grad_norm": 0.1595681467858597, "learning_rate": 1.3195288712439042e-05, "loss": 0.8722, "step": 8387 }, { "epoch": 3.802357207615594, "grad_norm": 0.22201433306751292, "learning_rate": 1.318589401513902e-05, "loss": 0.8737, "step": 8388 }, { "epoch": 3.8028105167724386, "grad_norm": 0.19104522206873675, "learning_rate": 1.317650200335053e-05, "loss": 0.8519, "step": 8389 }, { "epoch": 3.803263825929284, "grad_norm": 0.17090609273984594, "learning_rate": 1.3167112678014196e-05, "loss": 0.872, "step": 8390 }, { "epoch": 3.8037171350861287, "grad_norm": 0.19264746317614878, "learning_rate": 1.3157726040070387e-05, "loss": 0.8517, "step": 8391 }, { "epoch": 3.8041704442429736, "grad_norm": 0.139432887020941, "learning_rate": 1.3148342090459193e-05, "loss": 0.8786, "step": 8392 }, { "epoch": 3.804623753399819, "grad_norm": 0.162719714495317, "learning_rate": 1.313896083012046e-05, "loss": 0.8586, "step": 8393 }, { "epoch": 3.8050770625566637, "grad_norm": 0.14984127101905992, "learning_rate": 1.3129582259993735e-05, "loss": 0.8528, "step": 8394 }, { "epoch": 3.8055303717135085, "grad_norm": 0.16830487557976112, "learning_rate": 1.3120206381018332e-05, "loss": 0.8647, "step": 8395 }, { "epoch": 3.8059836808703533, "grad_norm": 0.1577112766285116, "learning_rate": 1.3110833194133243e-05, "loss": 0.858, "step": 8396 }, { "epoch": 3.8064369900271986, "grad_norm": 0.15527955639682708, "learning_rate": 1.3101462700277248e-05, "loss": 0.8645, "step": 8397 }, { "epoch": 3.8068902991840434, "grad_norm": 0.19460891753323062, "learning_rate": 1.3092094900388816e-05, "loss": 0.8589, "step": 8398 }, { "epoch": 3.8073436083408883, "grad_norm": 0.1779663265893554, "learning_rate": 1.308272979540615e-05, "loss": 0.8808, "step": 8399 }, { "epoch": 3.8077969174977335, "grad_norm": 0.21100098348289456, "learning_rate": 1.3073367386267214e-05, "loss": 0.8691, "step": 8400 }, { "epoch": 3.8082502266545784, "grad_norm": 0.22608582350690823, "learning_rate": 1.306400767390966e-05, "loss": 0.8728, "step": 8401 }, { "epoch": 3.808703535811423, "grad_norm": 0.16307557618159524, "learning_rate": 1.3054650659270914e-05, "loss": 0.8835, "step": 8402 }, { "epoch": 3.8091568449682685, "grad_norm": 0.18682196587944325, "learning_rate": 1.3045296343288079e-05, "loss": 0.8687, "step": 8403 }, { "epoch": 3.8096101541251133, "grad_norm": 0.13815895635697478, "learning_rate": 1.3035944726898051e-05, "loss": 0.8399, "step": 8404 }, { "epoch": 3.810063463281958, "grad_norm": 0.17853862966677186, "learning_rate": 1.3026595811037387e-05, "loss": 0.8577, "step": 8405 }, { "epoch": 3.8105167724388034, "grad_norm": 0.17811593753319246, "learning_rate": 1.3017249596642443e-05, "loss": 0.8456, "step": 8406 }, { "epoch": 3.810970081595648, "grad_norm": 0.15056544695878224, "learning_rate": 1.3007906084649235e-05, "loss": 0.8564, "step": 8407 }, { "epoch": 3.811423390752493, "grad_norm": 0.19896374246323925, "learning_rate": 1.2998565275993577e-05, "loss": 0.8721, "step": 8408 }, { "epoch": 3.8118766999093383, "grad_norm": 0.15848546826010843, "learning_rate": 1.2989227171610965e-05, "loss": 0.8597, "step": 8409 }, { "epoch": 3.812330009066183, "grad_norm": 0.19067206743759088, "learning_rate": 1.2979891772436629e-05, "loss": 0.8843, "step": 8410 }, { "epoch": 3.812783318223028, "grad_norm": 0.17150509340348072, "learning_rate": 1.2970559079405534e-05, "loss": 0.8624, "step": 8411 }, { "epoch": 3.8132366273798732, "grad_norm": 0.17537138185544382, "learning_rate": 1.2961229093452396e-05, "loss": 0.8407, "step": 8412 }, { "epoch": 3.813689936536718, "grad_norm": 0.15442847953924654, "learning_rate": 1.2951901815511629e-05, "loss": 0.866, "step": 8413 }, { "epoch": 3.814143245693563, "grad_norm": 0.1786161235905587, "learning_rate": 1.2942577246517378e-05, "loss": 0.8623, "step": 8414 }, { "epoch": 3.814596554850408, "grad_norm": 0.18495794079981936, "learning_rate": 1.293325538740355e-05, "loss": 0.8864, "step": 8415 }, { "epoch": 3.815049864007253, "grad_norm": 0.16089857429477591, "learning_rate": 1.2923936239103725e-05, "loss": 0.8723, "step": 8416 }, { "epoch": 3.815503173164098, "grad_norm": 0.16953974561460325, "learning_rate": 1.2914619802551278e-05, "loss": 0.8329, "step": 8417 }, { "epoch": 3.815956482320943, "grad_norm": 0.17502850552483193, "learning_rate": 1.2905306078679245e-05, "loss": 0.856, "step": 8418 }, { "epoch": 3.816409791477788, "grad_norm": 0.15575705356666836, "learning_rate": 1.2895995068420448e-05, "loss": 0.8823, "step": 8419 }, { "epoch": 3.8168631006346327, "grad_norm": 0.23652419248819942, "learning_rate": 1.2886686772707395e-05, "loss": 0.8752, "step": 8420 }, { "epoch": 3.817316409791478, "grad_norm": 0.15010643315483477, "learning_rate": 1.2877381192472358e-05, "loss": 0.8535, "step": 8421 }, { "epoch": 3.817769718948323, "grad_norm": 0.1949655391009599, "learning_rate": 1.2868078328647289e-05, "loss": 0.8468, "step": 8422 }, { "epoch": 3.8182230281051677, "grad_norm": 0.17880365756434383, "learning_rate": 1.2858778182163932e-05, "loss": 0.8596, "step": 8423 }, { "epoch": 3.818676337262013, "grad_norm": 0.14316967559978466, "learning_rate": 1.2849480753953704e-05, "loss": 0.8607, "step": 8424 }, { "epoch": 3.8191296464188578, "grad_norm": 0.1495833552399565, "learning_rate": 1.2840186044947762e-05, "loss": 0.8656, "step": 8425 }, { "epoch": 3.8195829555757026, "grad_norm": 0.15965044946854048, "learning_rate": 1.2830894056077021e-05, "loss": 0.8656, "step": 8426 }, { "epoch": 3.820036264732548, "grad_norm": 0.1763881581501009, "learning_rate": 1.2821604788272075e-05, "loss": 0.8782, "step": 8427 }, { "epoch": 3.8204895738893927, "grad_norm": 0.15161062682095794, "learning_rate": 1.28123182424633e-05, "loss": 0.8664, "step": 8428 }, { "epoch": 3.8209428830462375, "grad_norm": 0.1468231396514731, "learning_rate": 1.2803034419580756e-05, "loss": 0.8795, "step": 8429 }, { "epoch": 3.8213961922030824, "grad_norm": 0.1442005908952038, "learning_rate": 1.2793753320554241e-05, "loss": 0.8863, "step": 8430 }, { "epoch": 3.8218495013599276, "grad_norm": 0.14217102383799893, "learning_rate": 1.2784474946313279e-05, "loss": 0.8817, "step": 8431 }, { "epoch": 3.8223028105167725, "grad_norm": 0.15583091903097898, "learning_rate": 1.2775199297787148e-05, "loss": 0.8744, "step": 8432 }, { "epoch": 3.8227561196736173, "grad_norm": 0.13002418479753983, "learning_rate": 1.276592637590481e-05, "loss": 0.8608, "step": 8433 }, { "epoch": 3.823209428830462, "grad_norm": 0.17362126025606278, "learning_rate": 1.2756656181594998e-05, "loss": 0.8733, "step": 8434 }, { "epoch": 3.8236627379873074, "grad_norm": 0.14286604594117694, "learning_rate": 1.2747388715786117e-05, "loss": 0.857, "step": 8435 }, { "epoch": 3.824116047144152, "grad_norm": 0.17368469679785703, "learning_rate": 1.2738123979406369e-05, "loss": 0.8623, "step": 8436 }, { "epoch": 3.824569356300997, "grad_norm": 0.17456008511617532, "learning_rate": 1.2728861973383611e-05, "loss": 0.8733, "step": 8437 }, { "epoch": 3.8250226654578423, "grad_norm": 0.14747243264649143, "learning_rate": 1.2719602698645485e-05, "loss": 0.8686, "step": 8438 }, { "epoch": 3.825475974614687, "grad_norm": 0.1673701682494268, "learning_rate": 1.2710346156119324e-05, "loss": 0.8786, "step": 8439 }, { "epoch": 3.825929283771532, "grad_norm": 0.14546830098216806, "learning_rate": 1.2701092346732189e-05, "loss": 0.8606, "step": 8440 }, { "epoch": 3.8263825929283772, "grad_norm": 0.20244506645832858, "learning_rate": 1.2691841271410898e-05, "loss": 0.8682, "step": 8441 }, { "epoch": 3.826835902085222, "grad_norm": 0.1768573931463237, "learning_rate": 1.2682592931081947e-05, "loss": 0.8597, "step": 8442 }, { "epoch": 3.827289211242067, "grad_norm": 0.14651519789352568, "learning_rate": 1.2673347326671612e-05, "loss": 0.8606, "step": 8443 }, { "epoch": 3.827742520398912, "grad_norm": 0.17421728879657936, "learning_rate": 1.2664104459105837e-05, "loss": 0.8661, "step": 8444 }, { "epoch": 3.828195829555757, "grad_norm": 0.1716167331177481, "learning_rate": 1.2654864329310357e-05, "loss": 0.8697, "step": 8445 }, { "epoch": 3.828649138712602, "grad_norm": 0.1752100033621081, "learning_rate": 1.2645626938210565e-05, "loss": 0.8671, "step": 8446 }, { "epoch": 3.829102447869447, "grad_norm": 0.12784280213535143, "learning_rate": 1.263639228673164e-05, "loss": 0.8669, "step": 8447 }, { "epoch": 3.829555757026292, "grad_norm": 0.153180970175782, "learning_rate": 1.2627160375798448e-05, "loss": 0.8718, "step": 8448 }, { "epoch": 3.8300090661831367, "grad_norm": 0.13666503894337376, "learning_rate": 1.2617931206335596e-05, "loss": 0.8702, "step": 8449 }, { "epoch": 3.830462375339982, "grad_norm": 0.1638657383786323, "learning_rate": 1.2608704779267394e-05, "loss": 0.868, "step": 8450 }, { "epoch": 3.830915684496827, "grad_norm": 0.1447411120862144, "learning_rate": 1.2599481095517918e-05, "loss": 0.8565, "step": 8451 }, { "epoch": 3.8313689936536717, "grad_norm": 0.1399450860875307, "learning_rate": 1.2590260156010929e-05, "loss": 0.8478, "step": 8452 }, { "epoch": 3.831822302810517, "grad_norm": 0.15337928247368912, "learning_rate": 1.2581041961669955e-05, "loss": 0.8854, "step": 8453 }, { "epoch": 3.8322756119673618, "grad_norm": 0.14906802981387016, "learning_rate": 1.2571826513418194e-05, "loss": 0.894, "step": 8454 }, { "epoch": 3.8327289211242066, "grad_norm": 0.12942424243130418, "learning_rate": 1.2562613812178635e-05, "loss": 0.8619, "step": 8455 }, { "epoch": 3.833182230281052, "grad_norm": 0.15641236086797938, "learning_rate": 1.255340385887394e-05, "loss": 0.8591, "step": 8456 }, { "epoch": 3.8336355394378967, "grad_norm": 0.13888806797451958, "learning_rate": 1.25441966544265e-05, "loss": 0.8665, "step": 8457 }, { "epoch": 3.8340888485947415, "grad_norm": 0.14944400328408458, "learning_rate": 1.2534992199758463e-05, "loss": 0.858, "step": 8458 }, { "epoch": 3.834542157751587, "grad_norm": 0.11996321298360806, "learning_rate": 1.252579049579167e-05, "loss": 0.8303, "step": 8459 }, { "epoch": 3.8349954669084316, "grad_norm": 0.14294776848768556, "learning_rate": 1.2516591543447713e-05, "loss": 0.8521, "step": 8460 }, { "epoch": 3.8354487760652765, "grad_norm": 0.13241143975296418, "learning_rate": 1.250739534364787e-05, "loss": 0.8654, "step": 8461 }, { "epoch": 3.8359020852221217, "grad_norm": 0.12957744818835207, "learning_rate": 1.2498201897313199e-05, "loss": 0.8801, "step": 8462 }, { "epoch": 3.8363553943789666, "grad_norm": 0.14310970424251077, "learning_rate": 1.2489011205364422e-05, "loss": 0.8922, "step": 8463 }, { "epoch": 3.8368087035358114, "grad_norm": 0.13281469585461375, "learning_rate": 1.2479823268722036e-05, "loss": 0.8526, "step": 8464 }, { "epoch": 3.8372620126926567, "grad_norm": 0.14402474480551597, "learning_rate": 1.2470638088306234e-05, "loss": 0.8803, "step": 8465 }, { "epoch": 3.8377153218495015, "grad_norm": 0.14803400725253843, "learning_rate": 1.2461455665036918e-05, "loss": 0.8699, "step": 8466 }, { "epoch": 3.8381686310063463, "grad_norm": 0.15172275193101936, "learning_rate": 1.2452275999833767e-05, "loss": 0.8678, "step": 8467 }, { "epoch": 3.838621940163191, "grad_norm": 0.15293945100876574, "learning_rate": 1.2443099093616136e-05, "loss": 0.8659, "step": 8468 }, { "epoch": 3.8390752493200364, "grad_norm": 0.14190581435577082, "learning_rate": 1.2433924947303102e-05, "loss": 0.8797, "step": 8469 }, { "epoch": 3.8395285584768812, "grad_norm": 0.1853348438393179, "learning_rate": 1.2424753561813518e-05, "loss": 0.8707, "step": 8470 }, { "epoch": 3.839981867633726, "grad_norm": 0.13950077044115547, "learning_rate": 1.2415584938065904e-05, "loss": 0.8653, "step": 8471 }, { "epoch": 3.840435176790571, "grad_norm": 0.1504116677845372, "learning_rate": 1.2406419076978512e-05, "loss": 0.8515, "step": 8472 }, { "epoch": 3.840888485947416, "grad_norm": 0.1693240117603649, "learning_rate": 1.2397255979469365e-05, "loss": 0.8674, "step": 8473 }, { "epoch": 3.841341795104261, "grad_norm": 0.14900318114247985, "learning_rate": 1.2388095646456137e-05, "loss": 0.8718, "step": 8474 }, { "epoch": 3.841795104261106, "grad_norm": 0.16640586155248255, "learning_rate": 1.2378938078856292e-05, "loss": 0.877, "step": 8475 }, { "epoch": 3.842248413417951, "grad_norm": 0.13934250325095954, "learning_rate": 1.236978327758697e-05, "loss": 0.8674, "step": 8476 }, { "epoch": 3.842701722574796, "grad_norm": 0.14234067465217976, "learning_rate": 1.2360631243565062e-05, "loss": 0.8568, "step": 8477 }, { "epoch": 3.8431550317316407, "grad_norm": 0.1346896316705787, "learning_rate": 1.2351481977707151e-05, "loss": 0.8739, "step": 8478 }, { "epoch": 3.843608340888486, "grad_norm": 0.15750083004728233, "learning_rate": 1.2342335480929598e-05, "loss": 0.8739, "step": 8479 }, { "epoch": 3.844061650045331, "grad_norm": 0.12967481242027526, "learning_rate": 1.2333191754148413e-05, "loss": 0.8811, "step": 8480 }, { "epoch": 3.8445149592021757, "grad_norm": 0.4505920311839128, "learning_rate": 1.2324050798279394e-05, "loss": 0.8967, "step": 8481 }, { "epoch": 3.844968268359021, "grad_norm": 0.14367519760147532, "learning_rate": 1.2314912614238033e-05, "loss": 0.8944, "step": 8482 }, { "epoch": 3.8454215775158658, "grad_norm": 0.17737118831045182, "learning_rate": 1.2305777202939523e-05, "loss": 0.8659, "step": 8483 }, { "epoch": 3.8458748866727106, "grad_norm": 0.14817261489705646, "learning_rate": 1.229664456529883e-05, "loss": 0.8796, "step": 8484 }, { "epoch": 3.846328195829556, "grad_norm": 0.1392670931703586, "learning_rate": 1.2287514702230592e-05, "loss": 0.8635, "step": 8485 }, { "epoch": 3.8467815049864007, "grad_norm": 0.13851694059156047, "learning_rate": 1.2278387614649225e-05, "loss": 0.8529, "step": 8486 }, { "epoch": 3.8472348141432455, "grad_norm": 0.15651937586249276, "learning_rate": 1.2269263303468786e-05, "loss": 0.872, "step": 8487 }, { "epoch": 3.847688123300091, "grad_norm": 0.14422516043012434, "learning_rate": 1.2260141769603142e-05, "loss": 0.8716, "step": 8488 }, { "epoch": 3.8481414324569356, "grad_norm": 0.21481379193754693, "learning_rate": 1.2251023013965808e-05, "loss": 0.8839, "step": 8489 }, { "epoch": 3.8485947416137805, "grad_norm": 0.13315848981266973, "learning_rate": 1.224190703747009e-05, "loss": 0.8731, "step": 8490 }, { "epoch": 3.8490480507706257, "grad_norm": 0.1469268395121675, "learning_rate": 1.2232793841028942e-05, "loss": 0.8521, "step": 8491 }, { "epoch": 3.8495013599274706, "grad_norm": 0.13676164710756258, "learning_rate": 1.2223683425555116e-05, "loss": 0.8775, "step": 8492 }, { "epoch": 3.8499546690843154, "grad_norm": 0.17514895990524365, "learning_rate": 1.2214575791961015e-05, "loss": 0.8698, "step": 8493 }, { "epoch": 3.8504079782411607, "grad_norm": 0.13934411086250226, "learning_rate": 1.2205470941158818e-05, "loss": 0.854, "step": 8494 }, { "epoch": 3.8508612873980055, "grad_norm": 0.1320436681160387, "learning_rate": 1.2196368874060381e-05, "loss": 0.8666, "step": 8495 }, { "epoch": 3.8513145965548503, "grad_norm": 0.15798185164080897, "learning_rate": 1.2187269591577327e-05, "loss": 0.8632, "step": 8496 }, { "epoch": 3.8517679057116956, "grad_norm": 0.15112294684968328, "learning_rate": 1.2178173094620962e-05, "loss": 0.8416, "step": 8497 }, { "epoch": 3.8522212148685404, "grad_norm": 0.15933302196830004, "learning_rate": 1.2169079384102319e-05, "loss": 0.8793, "step": 8498 }, { "epoch": 3.8526745240253852, "grad_norm": 0.17033768708730612, "learning_rate": 1.2159988460932181e-05, "loss": 0.8713, "step": 8499 }, { "epoch": 3.8531278331822305, "grad_norm": 0.13366994534847204, "learning_rate": 1.2150900326021007e-05, "loss": 0.8669, "step": 8500 }, { "epoch": 3.8535811423390753, "grad_norm": 0.15964847115033573, "learning_rate": 1.214181498027902e-05, "loss": 0.8734, "step": 8501 }, { "epoch": 3.85403445149592, "grad_norm": 0.1473295387284055, "learning_rate": 1.2132732424616132e-05, "loss": 0.8568, "step": 8502 }, { "epoch": 3.8544877606527654, "grad_norm": 0.1503125010453061, "learning_rate": 1.2123652659941998e-05, "loss": 0.8594, "step": 8503 }, { "epoch": 3.8549410698096103, "grad_norm": 0.16910627635784217, "learning_rate": 1.2114575687165969e-05, "loss": 0.8736, "step": 8504 }, { "epoch": 3.855394378966455, "grad_norm": 0.14369406508842217, "learning_rate": 1.2105501507197146e-05, "loss": 0.8712, "step": 8505 }, { "epoch": 3.8558476881233004, "grad_norm": 0.1714415040807548, "learning_rate": 1.2096430120944333e-05, "loss": 0.8592, "step": 8506 }, { "epoch": 3.856300997280145, "grad_norm": 0.14220689439227696, "learning_rate": 1.2087361529316048e-05, "loss": 0.8773, "step": 8507 }, { "epoch": 3.85675430643699, "grad_norm": 0.17540741181607364, "learning_rate": 1.2078295733220529e-05, "loss": 0.8773, "step": 8508 }, { "epoch": 3.857207615593835, "grad_norm": 0.13502347166103446, "learning_rate": 1.2069232733565763e-05, "loss": 0.8665, "step": 8509 }, { "epoch": 3.85766092475068, "grad_norm": 0.1656901465039133, "learning_rate": 1.2060172531259414e-05, "loss": 0.8711, "step": 8510 }, { "epoch": 3.858114233907525, "grad_norm": 0.13469689144603061, "learning_rate": 1.2051115127208907e-05, "loss": 0.8428, "step": 8511 }, { "epoch": 3.8585675430643698, "grad_norm": 0.1509541877849702, "learning_rate": 1.2042060522321366e-05, "loss": 0.8471, "step": 8512 }, { "epoch": 3.8590208522212146, "grad_norm": 0.14258727332432833, "learning_rate": 1.2033008717503619e-05, "loss": 0.8756, "step": 8513 }, { "epoch": 3.85947416137806, "grad_norm": 0.14307910559329556, "learning_rate": 1.2023959713662255e-05, "loss": 0.8512, "step": 8514 }, { "epoch": 3.8599274705349047, "grad_norm": 0.12504802153071046, "learning_rate": 1.2014913511703528e-05, "loss": 0.8687, "step": 8515 }, { "epoch": 3.8603807796917495, "grad_norm": 0.18156515128316184, "learning_rate": 1.2005870112533478e-05, "loss": 0.8823, "step": 8516 }, { "epoch": 3.860834088848595, "grad_norm": 0.13855156095204954, "learning_rate": 1.1996829517057793e-05, "loss": 0.8866, "step": 8517 }, { "epoch": 3.8612873980054396, "grad_norm": 0.16523724144244042, "learning_rate": 1.1987791726181945e-05, "loss": 0.8614, "step": 8518 }, { "epoch": 3.8617407071622845, "grad_norm": 0.13126751811266654, "learning_rate": 1.1978756740811068e-05, "loss": 0.8545, "step": 8519 }, { "epoch": 3.8621940163191297, "grad_norm": 0.15297004266123265, "learning_rate": 1.1969724561850073e-05, "loss": 0.8645, "step": 8520 }, { "epoch": 3.8626473254759746, "grad_norm": 0.17175040706124806, "learning_rate": 1.1960695190203527e-05, "loss": 0.8508, "step": 8521 }, { "epoch": 3.8631006346328194, "grad_norm": 0.1630344399342654, "learning_rate": 1.1951668626775779e-05, "loss": 0.8915, "step": 8522 }, { "epoch": 3.8635539437896647, "grad_norm": 0.17575601952622028, "learning_rate": 1.1942644872470846e-05, "loss": 0.8512, "step": 8523 }, { "epoch": 3.8640072529465095, "grad_norm": 0.17118434869652346, "learning_rate": 1.193362392819248e-05, "loss": 0.8761, "step": 8524 }, { "epoch": 3.8644605621033543, "grad_norm": 0.1750100283300416, "learning_rate": 1.192460579484417e-05, "loss": 0.8902, "step": 8525 }, { "epoch": 3.8649138712601996, "grad_norm": 0.1587292025321191, "learning_rate": 1.1915590473329108e-05, "loss": 0.8817, "step": 8526 }, { "epoch": 3.8653671804170444, "grad_norm": 0.16564848832728762, "learning_rate": 1.1906577964550196e-05, "loss": 0.8609, "step": 8527 }, { "epoch": 3.8658204895738892, "grad_norm": 0.1903845045538452, "learning_rate": 1.189756826941005e-05, "loss": 0.8486, "step": 8528 }, { "epoch": 3.8662737987307345, "grad_norm": 0.17832396191452124, "learning_rate": 1.1888561388811054e-05, "loss": 0.8716, "step": 8529 }, { "epoch": 3.8667271078875793, "grad_norm": 0.1831648609260826, "learning_rate": 1.1879557323655236e-05, "loss": 0.8537, "step": 8530 }, { "epoch": 3.867180417044424, "grad_norm": 0.1808121827857336, "learning_rate": 1.1870556074844406e-05, "loss": 0.8704, "step": 8531 }, { "epoch": 3.8676337262012694, "grad_norm": 0.1838963716959412, "learning_rate": 1.1861557643280048e-05, "loss": 0.8646, "step": 8532 }, { "epoch": 3.8680870353581143, "grad_norm": 0.1789993688659228, "learning_rate": 1.1852562029863397e-05, "loss": 0.8762, "step": 8533 }, { "epoch": 3.868540344514959, "grad_norm": 0.18993264401190316, "learning_rate": 1.1843569235495376e-05, "loss": 0.8462, "step": 8534 }, { "epoch": 3.8689936536718044, "grad_norm": 0.14016244900659805, "learning_rate": 1.1834579261076655e-05, "loss": 0.8744, "step": 8535 }, { "epoch": 3.869446962828649, "grad_norm": 0.20038580582828347, "learning_rate": 1.1825592107507586e-05, "loss": 0.8616, "step": 8536 }, { "epoch": 3.869900271985494, "grad_norm": 0.15782183435909786, "learning_rate": 1.181660777568828e-05, "loss": 0.8681, "step": 8537 }, { "epoch": 3.8703535811423393, "grad_norm": 0.16541764110380736, "learning_rate": 1.1807626266518542e-05, "loss": 0.8642, "step": 8538 }, { "epoch": 3.870806890299184, "grad_norm": 0.174609802471546, "learning_rate": 1.1798647580897868e-05, "loss": 0.8932, "step": 8539 }, { "epoch": 3.871260199456029, "grad_norm": 0.14786363462034463, "learning_rate": 1.1789671719725541e-05, "loss": 0.855, "step": 8540 }, { "epoch": 3.871713508612874, "grad_norm": 0.15908804066511234, "learning_rate": 1.1780698683900482e-05, "loss": 0.8638, "step": 8541 }, { "epoch": 3.872166817769719, "grad_norm": 0.1727451354405276, "learning_rate": 1.1771728474321402e-05, "loss": 0.8711, "step": 8542 }, { "epoch": 3.872620126926564, "grad_norm": 0.1700045664180198, "learning_rate": 1.1762761091886663e-05, "loss": 0.8748, "step": 8543 }, { "epoch": 3.873073436083409, "grad_norm": 0.1601759429878176, "learning_rate": 1.1753796537494408e-05, "loss": 0.8639, "step": 8544 }, { "epoch": 3.873526745240254, "grad_norm": 0.19445915876048897, "learning_rate": 1.1744834812042427e-05, "loss": 0.8808, "step": 8545 }, { "epoch": 3.873980054397099, "grad_norm": 0.1440379134539822, "learning_rate": 1.1735875916428285e-05, "loss": 0.864, "step": 8546 }, { "epoch": 3.8744333635539436, "grad_norm": 0.18840326966296078, "learning_rate": 1.172691985154923e-05, "loss": 0.8703, "step": 8547 }, { "epoch": 3.874886672710789, "grad_norm": 0.13977817513409788, "learning_rate": 1.1717966618302255e-05, "loss": 0.8689, "step": 8548 }, { "epoch": 3.8753399818676337, "grad_norm": 0.15809465368410877, "learning_rate": 1.1709016217584029e-05, "loss": 0.8682, "step": 8549 }, { "epoch": 3.8757932910244786, "grad_norm": 0.13596875763531366, "learning_rate": 1.170006865029099e-05, "loss": 0.8585, "step": 8550 }, { "epoch": 3.8762466001813234, "grad_norm": 0.12616029531955444, "learning_rate": 1.1691123917319231e-05, "loss": 0.8564, "step": 8551 }, { "epoch": 3.8766999093381687, "grad_norm": 0.14708005932217472, "learning_rate": 1.1682182019564627e-05, "loss": 0.8503, "step": 8552 }, { "epoch": 3.8771532184950135, "grad_norm": 0.15022683417263874, "learning_rate": 1.1673242957922715e-05, "loss": 0.8517, "step": 8553 }, { "epoch": 3.8776065276518583, "grad_norm": 0.15138622593924986, "learning_rate": 1.166430673328876e-05, "loss": 0.8756, "step": 8554 }, { "epoch": 3.8780598368087036, "grad_norm": 0.13028247846472824, "learning_rate": 1.1655373346557775e-05, "loss": 0.8593, "step": 8555 }, { "epoch": 3.8785131459655484, "grad_norm": 0.18259703377915587, "learning_rate": 1.164644279862444e-05, "loss": 0.865, "step": 8556 }, { "epoch": 3.8789664551223932, "grad_norm": 0.13044668182078814, "learning_rate": 1.16375150903832e-05, "loss": 0.8804, "step": 8557 }, { "epoch": 3.8794197642792385, "grad_norm": 0.13767337885449746, "learning_rate": 1.1628590222728167e-05, "loss": 0.8786, "step": 8558 }, { "epoch": 3.8798730734360833, "grad_norm": 0.1532323720418233, "learning_rate": 1.161966819655322e-05, "loss": 0.8524, "step": 8559 }, { "epoch": 3.880326382592928, "grad_norm": 0.1362610757679243, "learning_rate": 1.16107490127519e-05, "loss": 0.8921, "step": 8560 }, { "epoch": 3.8807796917497734, "grad_norm": 0.1456314687862223, "learning_rate": 1.160183267221751e-05, "loss": 0.875, "step": 8561 }, { "epoch": 3.8812330009066183, "grad_norm": 0.14848627750090887, "learning_rate": 1.1592919175843024e-05, "loss": 0.8684, "step": 8562 }, { "epoch": 3.881686310063463, "grad_norm": 0.14651163650805346, "learning_rate": 1.15840085245212e-05, "loss": 0.8655, "step": 8563 }, { "epoch": 3.8821396192203084, "grad_norm": 0.17141817535174908, "learning_rate": 1.157510071914441e-05, "loss": 0.8753, "step": 8564 }, { "epoch": 3.882592928377153, "grad_norm": 0.1524855732966824, "learning_rate": 1.156619576060483e-05, "loss": 0.856, "step": 8565 }, { "epoch": 3.883046237533998, "grad_norm": 0.1597413611713039, "learning_rate": 1.15572936497943e-05, "loss": 0.8654, "step": 8566 }, { "epoch": 3.8834995466908433, "grad_norm": 0.14924542163845025, "learning_rate": 1.1548394387604414e-05, "loss": 0.8669, "step": 8567 }, { "epoch": 3.883952855847688, "grad_norm": 0.14669140809997924, "learning_rate": 1.1539497974926452e-05, "loss": 0.8604, "step": 8568 }, { "epoch": 3.884406165004533, "grad_norm": 0.15551803354483396, "learning_rate": 1.1530604412651396e-05, "loss": 0.8749, "step": 8569 }, { "epoch": 3.8848594741613782, "grad_norm": 0.16844979868748106, "learning_rate": 1.152171370166999e-05, "loss": 0.8676, "step": 8570 }, { "epoch": 3.885312783318223, "grad_norm": 0.13388435165443519, "learning_rate": 1.1512825842872641e-05, "loss": 0.853, "step": 8571 }, { "epoch": 3.885766092475068, "grad_norm": 0.19886804467238703, "learning_rate": 1.1503940837149519e-05, "loss": 0.8486, "step": 8572 }, { "epoch": 3.886219401631913, "grad_norm": 0.16392554226920278, "learning_rate": 1.1495058685390456e-05, "loss": 0.8565, "step": 8573 }, { "epoch": 3.886672710788758, "grad_norm": 0.1908412024209008, "learning_rate": 1.1486179388485056e-05, "loss": 0.8436, "step": 8574 }, { "epoch": 3.887126019945603, "grad_norm": 0.1809576283193221, "learning_rate": 1.1477302947322575e-05, "loss": 0.8546, "step": 8575 }, { "epoch": 3.887579329102448, "grad_norm": 0.1840250425086609, "learning_rate": 1.1468429362792044e-05, "loss": 0.8367, "step": 8576 }, { "epoch": 3.888032638259293, "grad_norm": 0.1620295858694924, "learning_rate": 1.1459558635782156e-05, "loss": 0.8614, "step": 8577 }, { "epoch": 3.8884859474161377, "grad_norm": 0.15530899910928928, "learning_rate": 1.1450690767181359e-05, "loss": 0.8742, "step": 8578 }, { "epoch": 3.888939256572983, "grad_norm": 0.16991936030445193, "learning_rate": 1.1441825757877782e-05, "loss": 0.8623, "step": 8579 }, { "epoch": 3.889392565729828, "grad_norm": 0.14156527506884914, "learning_rate": 1.1432963608759282e-05, "loss": 0.8802, "step": 8580 }, { "epoch": 3.8898458748866727, "grad_norm": 0.17879721678123933, "learning_rate": 1.142410432071344e-05, "loss": 0.8774, "step": 8581 }, { "epoch": 3.890299184043518, "grad_norm": 0.15062293265528467, "learning_rate": 1.141524789462754e-05, "loss": 0.8612, "step": 8582 }, { "epoch": 3.8907524932003628, "grad_norm": 0.14560322398625078, "learning_rate": 1.1406394331388566e-05, "loss": 0.8648, "step": 8583 }, { "epoch": 3.8912058023572076, "grad_norm": 0.199790680175622, "learning_rate": 1.1397543631883226e-05, "loss": 0.8629, "step": 8584 }, { "epoch": 3.891659111514053, "grad_norm": 0.22817501782185273, "learning_rate": 1.138869579699796e-05, "loss": 0.8782, "step": 8585 }, { "epoch": 3.8921124206708977, "grad_norm": 0.18231128429813523, "learning_rate": 1.137985082761889e-05, "loss": 0.8546, "step": 8586 }, { "epoch": 3.8925657298277425, "grad_norm": 0.12974485793526946, "learning_rate": 1.1371008724631882e-05, "loss": 0.8618, "step": 8587 }, { "epoch": 3.8930190389845873, "grad_norm": 0.17752485825500627, "learning_rate": 1.1362169488922477e-05, "loss": 0.8527, "step": 8588 }, { "epoch": 3.8934723481414326, "grad_norm": 0.15337039781296607, "learning_rate": 1.1353333121375978e-05, "loss": 0.8526, "step": 8589 }, { "epoch": 3.8939256572982774, "grad_norm": 0.15101320556964593, "learning_rate": 1.1344499622877342e-05, "loss": 0.8762, "step": 8590 }, { "epoch": 3.8943789664551223, "grad_norm": 0.16858834950795226, "learning_rate": 1.1335668994311297e-05, "loss": 0.8388, "step": 8591 }, { "epoch": 3.894832275611967, "grad_norm": 0.15444611559545607, "learning_rate": 1.1326841236562238e-05, "loss": 0.8565, "step": 8592 }, { "epoch": 3.8952855847688124, "grad_norm": 0.20540609359405804, "learning_rate": 1.1318016350514305e-05, "loss": 0.8587, "step": 8593 }, { "epoch": 3.895738893925657, "grad_norm": 0.1588827793384538, "learning_rate": 1.1309194337051332e-05, "loss": 0.871, "step": 8594 }, { "epoch": 3.896192203082502, "grad_norm": 0.2058557606191269, "learning_rate": 1.1300375197056855e-05, "loss": 0.871, "step": 8595 }, { "epoch": 3.8966455122393473, "grad_norm": 0.1584655587353434, "learning_rate": 1.129155893141416e-05, "loss": 0.8714, "step": 8596 }, { "epoch": 3.897098821396192, "grad_norm": 0.1583999196916172, "learning_rate": 1.1282745541006199e-05, "loss": 0.8682, "step": 8597 }, { "epoch": 3.897552130553037, "grad_norm": 0.16909527097111882, "learning_rate": 1.1273935026715681e-05, "loss": 0.8597, "step": 8598 }, { "epoch": 3.8980054397098822, "grad_norm": 0.15728442861827907, "learning_rate": 1.1265127389424984e-05, "loss": 0.8602, "step": 8599 }, { "epoch": 3.898458748866727, "grad_norm": 0.16006432979443053, "learning_rate": 1.1256322630016241e-05, "loss": 0.8619, "step": 8600 }, { "epoch": 3.898912058023572, "grad_norm": 0.17470398578207377, "learning_rate": 1.124752074937125e-05, "loss": 0.8583, "step": 8601 }, { "epoch": 3.899365367180417, "grad_norm": 0.15191752724006952, "learning_rate": 1.1238721748371587e-05, "loss": 0.8485, "step": 8602 }, { "epoch": 3.899818676337262, "grad_norm": 0.17950513244402092, "learning_rate": 1.1229925627898437e-05, "loss": 0.8597, "step": 8603 }, { "epoch": 3.900271985494107, "grad_norm": 0.1328014454514356, "learning_rate": 1.1221132388832805e-05, "loss": 0.8587, "step": 8604 }, { "epoch": 3.900725294650952, "grad_norm": 0.1879874342912766, "learning_rate": 1.1212342032055333e-05, "loss": 0.8624, "step": 8605 }, { "epoch": 3.901178603807797, "grad_norm": 0.12766134692546963, "learning_rate": 1.1203554558446421e-05, "loss": 0.8738, "step": 8606 }, { "epoch": 3.9016319129646417, "grad_norm": 0.19667264806926277, "learning_rate": 1.1194769968886142e-05, "loss": 0.8944, "step": 8607 }, { "epoch": 3.902085222121487, "grad_norm": 0.1381333012138782, "learning_rate": 1.1185988264254316e-05, "loss": 0.8427, "step": 8608 }, { "epoch": 3.902538531278332, "grad_norm": 0.15436067209592105, "learning_rate": 1.1177209445430451e-05, "loss": 0.8677, "step": 8609 }, { "epoch": 3.9029918404351767, "grad_norm": 0.17428225440225506, "learning_rate": 1.1168433513293753e-05, "loss": 0.8481, "step": 8610 }, { "epoch": 3.903445149592022, "grad_norm": 0.14195531252837665, "learning_rate": 1.1159660468723188e-05, "loss": 0.8763, "step": 8611 }, { "epoch": 3.9038984587488668, "grad_norm": 0.18874685757473597, "learning_rate": 1.1150890312597373e-05, "loss": 0.8574, "step": 8612 }, { "epoch": 3.9043517679057116, "grad_norm": 0.13734934948463917, "learning_rate": 1.1142123045794695e-05, "loss": 0.8666, "step": 8613 }, { "epoch": 3.904805077062557, "grad_norm": 0.18575580485804155, "learning_rate": 1.1133358669193192e-05, "loss": 0.8814, "step": 8614 }, { "epoch": 3.9052583862194017, "grad_norm": 0.15985811458063653, "learning_rate": 1.1124597183670667e-05, "loss": 0.8734, "step": 8615 }, { "epoch": 3.9057116953762465, "grad_norm": 0.18095712512015744, "learning_rate": 1.1115838590104585e-05, "loss": 0.8624, "step": 8616 }, { "epoch": 3.906165004533092, "grad_norm": 0.1572543158421929, "learning_rate": 1.1107082889372172e-05, "loss": 0.8573, "step": 8617 }, { "epoch": 3.9066183136899366, "grad_norm": 0.16612989100203332, "learning_rate": 1.1098330082350305e-05, "loss": 0.8837, "step": 8618 }, { "epoch": 3.9070716228467814, "grad_norm": 0.14420542722180585, "learning_rate": 1.1089580169915638e-05, "loss": 0.8712, "step": 8619 }, { "epoch": 3.9075249320036267, "grad_norm": 0.15357864634479898, "learning_rate": 1.1080833152944481e-05, "loss": 0.8906, "step": 8620 }, { "epoch": 3.9079782411604715, "grad_norm": 0.14877375207397456, "learning_rate": 1.1072089032312876e-05, "loss": 0.8643, "step": 8621 }, { "epoch": 3.9084315503173164, "grad_norm": 0.15752010028964175, "learning_rate": 1.1063347808896561e-05, "loss": 0.8758, "step": 8622 }, { "epoch": 3.9088848594741616, "grad_norm": 0.14396979239672608, "learning_rate": 1.1054609483571017e-05, "loss": 0.873, "step": 8623 }, { "epoch": 3.9093381686310065, "grad_norm": 0.1410950957461436, "learning_rate": 1.104587405721139e-05, "loss": 0.8725, "step": 8624 }, { "epoch": 3.9097914777878513, "grad_norm": 0.15325248595014798, "learning_rate": 1.1037141530692583e-05, "loss": 0.8732, "step": 8625 }, { "epoch": 3.910244786944696, "grad_norm": 0.12398809646344022, "learning_rate": 1.102841190488917e-05, "loss": 0.845, "step": 8626 }, { "epoch": 3.9106980961015414, "grad_norm": 0.12957609751408997, "learning_rate": 1.1019685180675439e-05, "loss": 0.8711, "step": 8627 }, { "epoch": 3.9111514052583862, "grad_norm": 0.14059040702806289, "learning_rate": 1.1010961358925422e-05, "loss": 0.8841, "step": 8628 }, { "epoch": 3.911604714415231, "grad_norm": 0.13405554015259558, "learning_rate": 1.1002240440512812e-05, "loss": 0.8502, "step": 8629 }, { "epoch": 3.912058023572076, "grad_norm": 0.15257616118796305, "learning_rate": 1.0993522426311056e-05, "loss": 0.8719, "step": 8630 }, { "epoch": 3.912511332728921, "grad_norm": 0.13954373216288934, "learning_rate": 1.0984807317193264e-05, "loss": 0.8725, "step": 8631 }, { "epoch": 3.912964641885766, "grad_norm": 0.1343504006699594, "learning_rate": 1.0976095114032303e-05, "loss": 0.853, "step": 8632 }, { "epoch": 3.913417951042611, "grad_norm": 0.14249747916251446, "learning_rate": 1.0967385817700711e-05, "loss": 0.8763, "step": 8633 }, { "epoch": 3.913871260199456, "grad_norm": 0.12344215618273162, "learning_rate": 1.0958679429070762e-05, "loss": 0.8679, "step": 8634 }, { "epoch": 3.914324569356301, "grad_norm": 0.1372252262918707, "learning_rate": 1.0949975949014418e-05, "loss": 0.8611, "step": 8635 }, { "epoch": 3.9147778785131457, "grad_norm": 0.1417135443530343, "learning_rate": 1.094127537840335e-05, "loss": 0.8303, "step": 8636 }, { "epoch": 3.915231187669991, "grad_norm": 0.13558454926719388, "learning_rate": 1.0932577718108966e-05, "loss": 0.8557, "step": 8637 }, { "epoch": 3.915684496826836, "grad_norm": 0.14016416630606435, "learning_rate": 1.0923882969002335e-05, "loss": 0.8604, "step": 8638 }, { "epoch": 3.9161378059836807, "grad_norm": 0.12780154964426704, "learning_rate": 1.0915191131954295e-05, "loss": 0.8784, "step": 8639 }, { "epoch": 3.916591115140526, "grad_norm": 0.148898042467384, "learning_rate": 1.090650220783534e-05, "loss": 0.8588, "step": 8640 }, { "epoch": 3.9170444242973708, "grad_norm": 0.13703745035842052, "learning_rate": 1.0897816197515691e-05, "loss": 0.8652, "step": 8641 }, { "epoch": 3.9174977334542156, "grad_norm": 0.1642774925598848, "learning_rate": 1.088913310186527e-05, "loss": 0.8594, "step": 8642 }, { "epoch": 3.917951042611061, "grad_norm": 0.1254065323805134, "learning_rate": 1.0880452921753739e-05, "loss": 0.8695, "step": 8643 }, { "epoch": 3.9184043517679057, "grad_norm": 0.15458498689104438, "learning_rate": 1.0871775658050411e-05, "loss": 0.885, "step": 8644 }, { "epoch": 3.9188576609247505, "grad_norm": 0.1250843854478672, "learning_rate": 1.086310131162437e-05, "loss": 0.8729, "step": 8645 }, { "epoch": 3.919310970081596, "grad_norm": 0.1389913769821898, "learning_rate": 1.0854429883344357e-05, "loss": 0.8691, "step": 8646 }, { "epoch": 3.9197642792384406, "grad_norm": 0.13657579739554074, "learning_rate": 1.0845761374078857e-05, "loss": 0.8532, "step": 8647 }, { "epoch": 3.9202175883952854, "grad_norm": 0.1517831485604416, "learning_rate": 1.0837095784696028e-05, "loss": 0.8484, "step": 8648 }, { "epoch": 3.9206708975521307, "grad_norm": 0.133266986627788, "learning_rate": 1.0828433116063773e-05, "loss": 0.8829, "step": 8649 }, { "epoch": 3.9211242067089755, "grad_norm": 0.13444205171751292, "learning_rate": 1.0819773369049664e-05, "loss": 0.8552, "step": 8650 }, { "epoch": 3.9215775158658204, "grad_norm": 0.19173438903654452, "learning_rate": 1.0811116544521028e-05, "loss": 0.8747, "step": 8651 }, { "epoch": 3.9220308250226656, "grad_norm": 0.14227121214647617, "learning_rate": 1.0802462643344849e-05, "loss": 0.8609, "step": 8652 }, { "epoch": 3.9224841341795105, "grad_norm": 0.14101021828704413, "learning_rate": 1.0793811666387835e-05, "loss": 0.8912, "step": 8653 }, { "epoch": 3.9229374433363553, "grad_norm": 0.1291497252492836, "learning_rate": 1.078516361451643e-05, "loss": 0.8755, "step": 8654 }, { "epoch": 3.9233907524932006, "grad_norm": 0.12598469168893422, "learning_rate": 1.0776518488596741e-05, "loss": 0.8617, "step": 8655 }, { "epoch": 3.9238440616500454, "grad_norm": 0.14470643854494394, "learning_rate": 1.076787628949462e-05, "loss": 0.8757, "step": 8656 }, { "epoch": 3.9242973708068902, "grad_norm": 0.14553249991260658, "learning_rate": 1.0759237018075593e-05, "loss": 0.8797, "step": 8657 }, { "epoch": 3.9247506799637355, "grad_norm": 0.12715495325463602, "learning_rate": 1.0750600675204921e-05, "loss": 0.8534, "step": 8658 }, { "epoch": 3.9252039891205803, "grad_norm": 0.155330321021393, "learning_rate": 1.0741967261747556e-05, "loss": 0.8734, "step": 8659 }, { "epoch": 3.925657298277425, "grad_norm": 0.1344940385263514, "learning_rate": 1.073333677856816e-05, "loss": 0.8669, "step": 8660 }, { "epoch": 3.9261106074342704, "grad_norm": 0.13408304659875767, "learning_rate": 1.0724709226531086e-05, "loss": 0.8557, "step": 8661 }, { "epoch": 3.9265639165911153, "grad_norm": 0.14071547662531197, "learning_rate": 1.0716084606500434e-05, "loss": 0.8651, "step": 8662 }, { "epoch": 3.92701722574796, "grad_norm": 0.15367516628333575, "learning_rate": 1.0707462919339959e-05, "loss": 0.8503, "step": 8663 }, { "epoch": 3.927470534904805, "grad_norm": 0.1323098942463158, "learning_rate": 1.0698844165913172e-05, "loss": 0.8622, "step": 8664 }, { "epoch": 3.92792384406165, "grad_norm": 0.1307789259709397, "learning_rate": 1.0690228347083247e-05, "loss": 0.874, "step": 8665 }, { "epoch": 3.928377153218495, "grad_norm": 0.14587646530673706, "learning_rate": 1.0681615463713104e-05, "loss": 0.8615, "step": 8666 }, { "epoch": 3.92883046237534, "grad_norm": 0.1542854906007598, "learning_rate": 1.0673005516665339e-05, "loss": 0.8721, "step": 8667 }, { "epoch": 3.929283771532185, "grad_norm": 0.13310115702877678, "learning_rate": 1.0664398506802249e-05, "loss": 0.891, "step": 8668 }, { "epoch": 3.92973708068903, "grad_norm": 0.14761024488803462, "learning_rate": 1.0655794434985873e-05, "loss": 0.8656, "step": 8669 }, { "epoch": 3.9301903898458748, "grad_norm": 0.15589432460314678, "learning_rate": 1.064719330207792e-05, "loss": 0.8498, "step": 8670 }, { "epoch": 3.9306436990027196, "grad_norm": 0.16893228912911887, "learning_rate": 1.0638595108939831e-05, "loss": 0.8614, "step": 8671 }, { "epoch": 3.931097008159565, "grad_norm": 0.14384034116486916, "learning_rate": 1.0629999856432725e-05, "loss": 0.866, "step": 8672 }, { "epoch": 3.9315503173164097, "grad_norm": 0.12930259721262366, "learning_rate": 1.062140754541746e-05, "loss": 0.8841, "step": 8673 }, { "epoch": 3.9320036264732545, "grad_norm": 0.14290958024161035, "learning_rate": 1.0612818176754557e-05, "loss": 0.8675, "step": 8674 }, { "epoch": 3.9324569356301, "grad_norm": 0.1474990706323588, "learning_rate": 1.0604231751304295e-05, "loss": 0.8681, "step": 8675 }, { "epoch": 3.9329102447869446, "grad_norm": 0.1439151493677013, "learning_rate": 1.0595648269926607e-05, "loss": 0.8508, "step": 8676 }, { "epoch": 3.9333635539437894, "grad_norm": 0.1294598773484825, "learning_rate": 1.0587067733481171e-05, "loss": 0.8509, "step": 8677 }, { "epoch": 3.9338168631006347, "grad_norm": 0.1599279286799101, "learning_rate": 1.0578490142827342e-05, "loss": 0.902, "step": 8678 }, { "epoch": 3.9342701722574795, "grad_norm": 0.13184163411544533, "learning_rate": 1.0569915498824197e-05, "loss": 0.8556, "step": 8679 }, { "epoch": 3.9347234814143244, "grad_norm": 0.14215198253059488, "learning_rate": 1.0561343802330496e-05, "loss": 0.8702, "step": 8680 }, { "epoch": 3.9351767905711696, "grad_norm": 0.13617756071087272, "learning_rate": 1.0552775054204742e-05, "loss": 0.8532, "step": 8681 }, { "epoch": 3.9356300997280145, "grad_norm": 0.1359840663647679, "learning_rate": 1.0544209255305113e-05, "loss": 0.8533, "step": 8682 }, { "epoch": 3.9360834088848593, "grad_norm": 0.13003172457941276, "learning_rate": 1.0535646406489484e-05, "loss": 0.862, "step": 8683 }, { "epoch": 3.9365367180417046, "grad_norm": 0.12515953235363889, "learning_rate": 1.0527086508615475e-05, "loss": 0.8789, "step": 8684 }, { "epoch": 3.9369900271985494, "grad_norm": 0.15020916455872405, "learning_rate": 1.0518529562540354e-05, "loss": 0.8663, "step": 8685 }, { "epoch": 3.9374433363553942, "grad_norm": 0.12878626512028118, "learning_rate": 1.0509975569121159e-05, "loss": 0.8527, "step": 8686 }, { "epoch": 3.9378966455122395, "grad_norm": 0.1572511398541704, "learning_rate": 1.050142452921456e-05, "loss": 0.8695, "step": 8687 }, { "epoch": 3.9383499546690843, "grad_norm": 0.12846463129263733, "learning_rate": 1.0492876443677012e-05, "loss": 0.8457, "step": 8688 }, { "epoch": 3.938803263825929, "grad_norm": 0.15437181756221113, "learning_rate": 1.048433131336459e-05, "loss": 0.8569, "step": 8689 }, { "epoch": 3.9392565729827744, "grad_norm": 0.14318743168298415, "learning_rate": 1.0475789139133141e-05, "loss": 0.8623, "step": 8690 }, { "epoch": 3.9397098821396193, "grad_norm": 0.13897463282954756, "learning_rate": 1.0467249921838172e-05, "loss": 0.8599, "step": 8691 }, { "epoch": 3.940163191296464, "grad_norm": 0.15535233352196892, "learning_rate": 1.0458713662334934e-05, "loss": 0.8599, "step": 8692 }, { "epoch": 3.9406165004533094, "grad_norm": 0.1454130341312731, "learning_rate": 1.0450180361478334e-05, "loss": 0.8407, "step": 8693 }, { "epoch": 3.941069809610154, "grad_norm": 0.1389864460255382, "learning_rate": 1.0441650020123011e-05, "loss": 0.851, "step": 8694 }, { "epoch": 3.941523118766999, "grad_norm": 0.16328376977713058, "learning_rate": 1.0433122639123318e-05, "loss": 0.8628, "step": 8695 }, { "epoch": 3.9419764279238443, "grad_norm": 0.12142459268250488, "learning_rate": 1.0424598219333278e-05, "loss": 0.8752, "step": 8696 }, { "epoch": 3.942429737080689, "grad_norm": 0.13394282599323784, "learning_rate": 1.0416076761606657e-05, "loss": 0.8566, "step": 8697 }, { "epoch": 3.942883046237534, "grad_norm": 0.1342314283046109, "learning_rate": 1.0407558266796896e-05, "loss": 0.862, "step": 8698 }, { "epoch": 3.943336355394379, "grad_norm": 0.13547528162100017, "learning_rate": 1.0399042735757146e-05, "loss": 0.8641, "step": 8699 }, { "epoch": 3.943789664551224, "grad_norm": 0.14542393021499944, "learning_rate": 1.0390530169340245e-05, "loss": 0.8489, "step": 8700 }, { "epoch": 3.944242973708069, "grad_norm": 0.13505454446109422, "learning_rate": 1.0382020568398782e-05, "loss": 0.8694, "step": 8701 }, { "epoch": 3.944696282864914, "grad_norm": 0.13300258717877939, "learning_rate": 1.0373513933784998e-05, "loss": 0.8682, "step": 8702 }, { "epoch": 3.945149592021759, "grad_norm": 0.14689992753760656, "learning_rate": 1.036501026635087e-05, "loss": 0.8773, "step": 8703 }, { "epoch": 3.945602901178604, "grad_norm": 0.13038122196512603, "learning_rate": 1.0356509566948047e-05, "loss": 0.8851, "step": 8704 }, { "epoch": 3.9460562103354486, "grad_norm": 0.14758573940529282, "learning_rate": 1.0348011836427925e-05, "loss": 0.8791, "step": 8705 }, { "epoch": 3.946509519492294, "grad_norm": 0.12817005676791982, "learning_rate": 1.0339517075641555e-05, "loss": 0.8562, "step": 8706 }, { "epoch": 3.9469628286491387, "grad_norm": 0.13661525231702767, "learning_rate": 1.0331025285439727e-05, "loss": 0.8268, "step": 8707 }, { "epoch": 3.9474161378059835, "grad_norm": 0.13890764562921967, "learning_rate": 1.0322536466672917e-05, "loss": 0.8611, "step": 8708 }, { "epoch": 3.9478694469628284, "grad_norm": 0.1556129816502613, "learning_rate": 1.0314050620191285e-05, "loss": 0.8669, "step": 8709 }, { "epoch": 3.9483227561196736, "grad_norm": 0.11455118278553558, "learning_rate": 1.0305567746844738e-05, "loss": 0.8875, "step": 8710 }, { "epoch": 3.9487760652765185, "grad_norm": 0.16838250804798463, "learning_rate": 1.0297087847482845e-05, "loss": 0.8628, "step": 8711 }, { "epoch": 3.9492293744333633, "grad_norm": 0.15098269169459483, "learning_rate": 1.028861092295491e-05, "loss": 0.8482, "step": 8712 }, { "epoch": 3.9496826835902086, "grad_norm": 0.16148961520565283, "learning_rate": 1.0280136974109895e-05, "loss": 0.8671, "step": 8713 }, { "epoch": 3.9501359927470534, "grad_norm": 0.15526147367128929, "learning_rate": 1.0271666001796525e-05, "loss": 0.8741, "step": 8714 }, { "epoch": 3.9505893019038982, "grad_norm": 0.17896970549743513, "learning_rate": 1.026319800686316e-05, "loss": 0.8534, "step": 8715 }, { "epoch": 3.9510426110607435, "grad_norm": 0.13656266603278266, "learning_rate": 1.025473299015792e-05, "loss": 0.8597, "step": 8716 }, { "epoch": 3.9514959202175883, "grad_norm": 0.13179814704057755, "learning_rate": 1.0246270952528597e-05, "loss": 0.8534, "step": 8717 }, { "epoch": 3.951949229374433, "grad_norm": 0.14802106084015515, "learning_rate": 1.0237811894822677e-05, "loss": 0.8747, "step": 8718 }, { "epoch": 3.9524025385312784, "grad_norm": 0.15639307932840363, "learning_rate": 1.0229355817887354e-05, "loss": 0.8806, "step": 8719 }, { "epoch": 3.9528558476881233, "grad_norm": 0.16085758490072277, "learning_rate": 1.0220902722569553e-05, "loss": 0.8591, "step": 8720 }, { "epoch": 3.953309156844968, "grad_norm": 0.14307355714463715, "learning_rate": 1.0212452609715853e-05, "loss": 0.8613, "step": 8721 }, { "epoch": 3.9537624660018134, "grad_norm": 0.12845279847114097, "learning_rate": 1.0204005480172574e-05, "loss": 0.8623, "step": 8722 }, { "epoch": 3.954215775158658, "grad_norm": 0.13696584513438725, "learning_rate": 1.019556133478572e-05, "loss": 0.871, "step": 8723 }, { "epoch": 3.954669084315503, "grad_norm": 0.12443051395969457, "learning_rate": 1.0187120174400981e-05, "loss": 0.8758, "step": 8724 }, { "epoch": 3.9551223934723483, "grad_norm": 0.12891759961361418, "learning_rate": 1.0178681999863782e-05, "loss": 0.8821, "step": 8725 }, { "epoch": 3.955575702629193, "grad_norm": 0.20337707644334965, "learning_rate": 1.0170246812019218e-05, "loss": 0.8811, "step": 8726 }, { "epoch": 3.956029011786038, "grad_norm": 0.14760116803243187, "learning_rate": 1.016181461171211e-05, "loss": 0.8861, "step": 8727 }, { "epoch": 3.956482320942883, "grad_norm": 0.12592884439489757, "learning_rate": 1.0153385399786955e-05, "loss": 0.8497, "step": 8728 }, { "epoch": 3.956935630099728, "grad_norm": 0.1621922823306846, "learning_rate": 1.014495917708798e-05, "loss": 0.8653, "step": 8729 }, { "epoch": 3.957388939256573, "grad_norm": 0.12249225312709475, "learning_rate": 1.013653594445907e-05, "loss": 0.8689, "step": 8730 }, { "epoch": 3.957842248413418, "grad_norm": 0.15706399055852563, "learning_rate": 1.0128115702743871e-05, "loss": 0.8569, "step": 8731 }, { "epoch": 3.958295557570263, "grad_norm": 0.12219492458239548, "learning_rate": 1.011969845278566e-05, "loss": 0.864, "step": 8732 }, { "epoch": 3.958748866727108, "grad_norm": 0.14727551523476115, "learning_rate": 1.0111284195427476e-05, "loss": 0.8788, "step": 8733 }, { "epoch": 3.959202175883953, "grad_norm": 0.12479154331817577, "learning_rate": 1.0102872931512025e-05, "loss": 0.8512, "step": 8734 }, { "epoch": 3.959655485040798, "grad_norm": 0.14690136396275896, "learning_rate": 1.00944646618817e-05, "loss": 0.8628, "step": 8735 }, { "epoch": 3.9601087941976427, "grad_norm": 0.12244393377163548, "learning_rate": 1.0086059387378642e-05, "loss": 0.8664, "step": 8736 }, { "epoch": 3.960562103354488, "grad_norm": 0.14554250199608673, "learning_rate": 1.0077657108844656e-05, "loss": 0.8895, "step": 8737 }, { "epoch": 3.961015412511333, "grad_norm": 0.12318832584951761, "learning_rate": 1.0069257827121248e-05, "loss": 0.8652, "step": 8738 }, { "epoch": 3.9614687216681777, "grad_norm": 0.1214618462994439, "learning_rate": 1.006086154304962e-05, "loss": 0.8724, "step": 8739 }, { "epoch": 3.961922030825023, "grad_norm": 0.1394181406387694, "learning_rate": 1.0052468257470713e-05, "loss": 0.8623, "step": 8740 }, { "epoch": 3.9623753399818678, "grad_norm": 0.12284605802002295, "learning_rate": 1.0044077971225112e-05, "loss": 0.88, "step": 8741 }, { "epoch": 3.9628286491387126, "grad_norm": 0.1414362376674521, "learning_rate": 1.003569068515315e-05, "loss": 0.8543, "step": 8742 }, { "epoch": 3.9632819582955574, "grad_norm": 0.12566211476378739, "learning_rate": 1.0027306400094821e-05, "loss": 0.8661, "step": 8743 }, { "epoch": 3.9637352674524027, "grad_norm": 0.1593442418808219, "learning_rate": 1.0018925116889852e-05, "loss": 0.8752, "step": 8744 }, { "epoch": 3.9641885766092475, "grad_norm": 0.13622306590124253, "learning_rate": 1.0010546836377636e-05, "loss": 0.8791, "step": 8745 }, { "epoch": 3.9646418857660923, "grad_norm": 0.1301578646959358, "learning_rate": 1.0002171559397307e-05, "loss": 0.8561, "step": 8746 }, { "epoch": 3.9650951949229376, "grad_norm": 0.15281167935660822, "learning_rate": 9.993799286787639e-06, "loss": 0.8694, "step": 8747 }, { "epoch": 3.9655485040797824, "grad_norm": 0.13573213649975807, "learning_rate": 9.985430019387174e-06, "loss": 0.8562, "step": 8748 }, { "epoch": 3.9660018132366273, "grad_norm": 0.14819383167710523, "learning_rate": 9.977063758034106e-06, "loss": 0.8628, "step": 8749 }, { "epoch": 3.966455122393472, "grad_norm": 0.1594148224959517, "learning_rate": 9.968700503566327e-06, "loss": 0.8593, "step": 8750 }, { "epoch": 3.9669084315503174, "grad_norm": 0.1447243854667321, "learning_rate": 9.960340256821462e-06, "loss": 0.8569, "step": 8751 }, { "epoch": 3.967361740707162, "grad_norm": 0.14314007525307157, "learning_rate": 9.951983018636792e-06, "loss": 0.8502, "step": 8752 }, { "epoch": 3.967815049864007, "grad_norm": 0.1469685565143469, "learning_rate": 9.943628789849349e-06, "loss": 0.8569, "step": 8753 }, { "epoch": 3.9682683590208523, "grad_norm": 0.1332214090461139, "learning_rate": 9.935277571295803e-06, "loss": 0.843, "step": 8754 }, { "epoch": 3.968721668177697, "grad_norm": 0.1423946915148195, "learning_rate": 9.926929363812588e-06, "loss": 0.8677, "step": 8755 }, { "epoch": 3.969174977334542, "grad_norm": 0.13446406308073194, "learning_rate": 9.91858416823576e-06, "loss": 0.8928, "step": 8756 }, { "epoch": 3.969628286491387, "grad_norm": 0.12555958765708736, "learning_rate": 9.910241985401142e-06, "loss": 0.8657, "step": 8757 }, { "epoch": 3.970081595648232, "grad_norm": 0.13680919457835441, "learning_rate": 9.901902816144213e-06, "loss": 0.8857, "step": 8758 }, { "epoch": 3.970534904805077, "grad_norm": 0.12469208801347693, "learning_rate": 9.893566661300187e-06, "loss": 0.8586, "step": 8759 }, { "epoch": 3.970988213961922, "grad_norm": 0.14703146689088337, "learning_rate": 9.885233521703928e-06, "loss": 0.8816, "step": 8760 }, { "epoch": 3.971441523118767, "grad_norm": 0.13448668012663342, "learning_rate": 9.876903398190048e-06, "loss": 0.856, "step": 8761 }, { "epoch": 3.971894832275612, "grad_norm": 0.12891979688366104, "learning_rate": 9.868576291592808e-06, "loss": 0.8593, "step": 8762 }, { "epoch": 3.972348141432457, "grad_norm": 0.15699976148039219, "learning_rate": 9.860252202746223e-06, "loss": 0.8644, "step": 8763 }, { "epoch": 3.972801450589302, "grad_norm": 0.15639993622936055, "learning_rate": 9.851931132483959e-06, "loss": 0.8704, "step": 8764 }, { "epoch": 3.9732547597461467, "grad_norm": 0.14846043684698285, "learning_rate": 9.843613081639382e-06, "loss": 0.8694, "step": 8765 }, { "epoch": 3.973708068902992, "grad_norm": 0.15385297967183176, "learning_rate": 9.835298051045599e-06, "loss": 0.8696, "step": 8766 }, { "epoch": 3.974161378059837, "grad_norm": 0.13808278221249565, "learning_rate": 9.826986041535354e-06, "loss": 0.8669, "step": 8767 }, { "epoch": 3.9746146872166817, "grad_norm": 0.1594223912572024, "learning_rate": 9.818677053941145e-06, "loss": 0.8792, "step": 8768 }, { "epoch": 3.975067996373527, "grad_norm": 0.14651345444536343, "learning_rate": 9.810371089095119e-06, "loss": 0.8645, "step": 8769 }, { "epoch": 3.9755213055303718, "grad_norm": 0.13431190593036008, "learning_rate": 9.802068147829167e-06, "loss": 0.8738, "step": 8770 }, { "epoch": 3.9759746146872166, "grad_norm": 0.1487998032605582, "learning_rate": 9.793768230974824e-06, "loss": 0.8648, "step": 8771 }, { "epoch": 3.976427923844062, "grad_norm": 0.12009280448060282, "learning_rate": 9.785471339363383e-06, "loss": 0.8688, "step": 8772 }, { "epoch": 3.9768812330009067, "grad_norm": 0.13519970908853912, "learning_rate": 9.777177473825774e-06, "loss": 0.8315, "step": 8773 }, { "epoch": 3.9773345421577515, "grad_norm": 0.1352575240503575, "learning_rate": 9.768886635192688e-06, "loss": 0.8586, "step": 8774 }, { "epoch": 3.977787851314597, "grad_norm": 0.11844467221876069, "learning_rate": 9.760598824294428e-06, "loss": 0.8815, "step": 8775 }, { "epoch": 3.9782411604714416, "grad_norm": 0.14519696860338763, "learning_rate": 9.752314041961082e-06, "loss": 0.8483, "step": 8776 }, { "epoch": 3.9786944696282864, "grad_norm": 0.12077496148309745, "learning_rate": 9.744032289022364e-06, "loss": 0.8626, "step": 8777 }, { "epoch": 3.9791477787851317, "grad_norm": 0.13841421970460288, "learning_rate": 9.73575356630775e-06, "loss": 0.8453, "step": 8778 }, { "epoch": 3.9796010879419765, "grad_norm": 0.12403084085973108, "learning_rate": 9.727477874646359e-06, "loss": 0.8578, "step": 8779 }, { "epoch": 3.9800543970988214, "grad_norm": 0.1390862292086526, "learning_rate": 9.719205214867018e-06, "loss": 0.8712, "step": 8780 }, { "epoch": 3.9805077062556666, "grad_norm": 0.12574370015874953, "learning_rate": 9.710935587798276e-06, "loss": 0.8716, "step": 8781 }, { "epoch": 3.9809610154125115, "grad_norm": 0.13748370495007822, "learning_rate": 9.702668994268345e-06, "loss": 0.8752, "step": 8782 }, { "epoch": 3.9814143245693563, "grad_norm": 0.12301394417653655, "learning_rate": 9.694405435105167e-06, "loss": 0.865, "step": 8783 }, { "epoch": 3.981867633726201, "grad_norm": 0.1449191580911615, "learning_rate": 9.686144911136339e-06, "loss": 0.8729, "step": 8784 }, { "epoch": 3.9823209428830464, "grad_norm": 0.12208999601663287, "learning_rate": 9.677887423189203e-06, "loss": 0.8607, "step": 8785 }, { "epoch": 3.982774252039891, "grad_norm": 0.15551233110481227, "learning_rate": 9.66963297209074e-06, "loss": 0.8602, "step": 8786 }, { "epoch": 3.983227561196736, "grad_norm": 0.1253111343497961, "learning_rate": 9.661381558667692e-06, "loss": 0.8623, "step": 8787 }, { "epoch": 3.983680870353581, "grad_norm": 0.14013795012617744, "learning_rate": 9.653133183746428e-06, "loss": 0.892, "step": 8788 }, { "epoch": 3.984134179510426, "grad_norm": 0.12166764019321923, "learning_rate": 9.644887848153076e-06, "loss": 0.8702, "step": 8789 }, { "epoch": 3.984587488667271, "grad_norm": 0.13560915871144025, "learning_rate": 9.63664555271342e-06, "loss": 0.8628, "step": 8790 }, { "epoch": 3.985040797824116, "grad_norm": 0.14467449240094368, "learning_rate": 9.628406298252937e-06, "loss": 0.8522, "step": 8791 }, { "epoch": 3.985494106980961, "grad_norm": 0.12268861835134415, "learning_rate": 9.620170085596831e-06, "loss": 0.8649, "step": 8792 }, { "epoch": 3.985947416137806, "grad_norm": 0.14551154141018466, "learning_rate": 9.611936915569968e-06, "loss": 0.8591, "step": 8793 }, { "epoch": 3.9864007252946507, "grad_norm": 0.12242938013646518, "learning_rate": 9.603706788996954e-06, "loss": 0.853, "step": 8794 }, { "epoch": 3.986854034451496, "grad_norm": 0.14976913767006014, "learning_rate": 9.595479706702013e-06, "loss": 0.8563, "step": 8795 }, { "epoch": 3.987307343608341, "grad_norm": 0.1310257564458422, "learning_rate": 9.58725566950915e-06, "loss": 0.8499, "step": 8796 }, { "epoch": 3.9877606527651857, "grad_norm": 0.1422231950383188, "learning_rate": 9.579034678242002e-06, "loss": 0.8782, "step": 8797 }, { "epoch": 3.988213961922031, "grad_norm": 0.11942374873411316, "learning_rate": 9.57081673372395e-06, "loss": 0.8643, "step": 8798 }, { "epoch": 3.9886672710788758, "grad_norm": 0.15830163565516286, "learning_rate": 9.562601836778018e-06, "loss": 0.8791, "step": 8799 }, { "epoch": 3.9891205802357206, "grad_norm": 0.1460766961024829, "learning_rate": 9.554389988226976e-06, "loss": 0.8682, "step": 8800 }, { "epoch": 3.989573889392566, "grad_norm": 0.16296084568958658, "learning_rate": 9.546181188893241e-06, "loss": 0.8567, "step": 8801 }, { "epoch": 3.9900271985494107, "grad_norm": 0.16107993762741035, "learning_rate": 9.537975439598979e-06, "loss": 0.8683, "step": 8802 }, { "epoch": 3.9904805077062555, "grad_norm": 0.17869675643009536, "learning_rate": 9.52977274116599e-06, "loss": 0.8668, "step": 8803 }, { "epoch": 3.990933816863101, "grad_norm": 0.1386900530287897, "learning_rate": 9.521573094415823e-06, "loss": 0.8765, "step": 8804 }, { "epoch": 3.9913871260199456, "grad_norm": 0.17346408926866413, "learning_rate": 9.513376500169685e-06, "loss": 0.8661, "step": 8805 }, { "epoch": 3.9918404351767904, "grad_norm": 0.13389441947341887, "learning_rate": 9.505182959248476e-06, "loss": 0.8918, "step": 8806 }, { "epoch": 3.9922937443336357, "grad_norm": 0.15733019267051762, "learning_rate": 9.496992472472825e-06, "loss": 0.8669, "step": 8807 }, { "epoch": 3.9927470534904805, "grad_norm": 0.13790454208878558, "learning_rate": 9.488805040663021e-06, "loss": 0.8649, "step": 8808 }, { "epoch": 3.9932003626473254, "grad_norm": 0.13256930765972733, "learning_rate": 9.480620664639066e-06, "loss": 0.8874, "step": 8809 }, { "epoch": 3.9936536718041706, "grad_norm": 0.12424555440560578, "learning_rate": 9.47243934522064e-06, "loss": 0.8576, "step": 8810 }, { "epoch": 3.9941069809610155, "grad_norm": 0.14632271077373574, "learning_rate": 9.464261083227142e-06, "loss": 0.8708, "step": 8811 }, { "epoch": 3.9945602901178603, "grad_norm": 0.1277909972774532, "learning_rate": 9.45608587947763e-06, "loss": 0.8665, "step": 8812 }, { "epoch": 3.9950135992747056, "grad_norm": 0.12606112602982397, "learning_rate": 9.447913734790908e-06, "loss": 0.8727, "step": 8813 }, { "epoch": 3.9954669084315504, "grad_norm": 0.12478148091835455, "learning_rate": 9.439744649985387e-06, "loss": 0.8505, "step": 8814 }, { "epoch": 3.995920217588395, "grad_norm": 0.12811540968803242, "learning_rate": 9.431578625879271e-06, "loss": 0.8376, "step": 8815 }, { "epoch": 3.9963735267452405, "grad_norm": 0.13274959257982602, "learning_rate": 9.423415663290382e-06, "loss": 0.8676, "step": 8816 }, { "epoch": 3.9968268359020853, "grad_norm": 0.13633294419038106, "learning_rate": 9.415255763036288e-06, "loss": 0.8644, "step": 8817 }, { "epoch": 3.99728014505893, "grad_norm": 0.1377666279609311, "learning_rate": 9.407098925934207e-06, "loss": 0.883, "step": 8818 }, { "epoch": 3.9977334542157754, "grad_norm": 0.14993865833086217, "learning_rate": 9.398945152801087e-06, "loss": 0.8511, "step": 8819 }, { "epoch": 3.9981867633726202, "grad_norm": 0.1373170682309801, "learning_rate": 9.390794444453548e-06, "loss": 0.8765, "step": 8820 }, { "epoch": 3.998640072529465, "grad_norm": 0.13858182102999042, "learning_rate": 9.382646801707894e-06, "loss": 0.8724, "step": 8821 }, { "epoch": 3.99909338168631, "grad_norm": 0.1334587026039683, "learning_rate": 9.374502225380153e-06, "loss": 0.8455, "step": 8822 }, { "epoch": 3.999546690843155, "grad_norm": 0.14450740152100897, "learning_rate": 9.36636071628601e-06, "loss": 0.8427, "step": 8823 }, { "epoch": 4.0, "grad_norm": 0.1444769668157407, "learning_rate": 9.358222275240884e-06, "loss": 0.8534, "step": 8824 }, { "epoch": 4.000453309156845, "grad_norm": 0.14835306146966498, "learning_rate": 9.350086903059842e-06, "loss": 0.8501, "step": 8825 }, { "epoch": 4.00090661831369, "grad_norm": 0.12603597586962875, "learning_rate": 9.341954600557685e-06, "loss": 0.8683, "step": 8826 }, { "epoch": 4.001359927470535, "grad_norm": 0.13612796106870126, "learning_rate": 9.333825368548864e-06, "loss": 0.842, "step": 8827 }, { "epoch": 4.00181323662738, "grad_norm": 0.14225838187295856, "learning_rate": 9.325699207847574e-06, "loss": 0.8601, "step": 8828 }, { "epoch": 4.002266545784225, "grad_norm": 0.12723560694376307, "learning_rate": 9.317576119267641e-06, "loss": 0.8603, "step": 8829 }, { "epoch": 4.00271985494107, "grad_norm": 0.14860903945598722, "learning_rate": 9.309456103622652e-06, "loss": 0.8508, "step": 8830 }, { "epoch": 4.003173164097915, "grad_norm": 0.15037494703511728, "learning_rate": 9.301339161725829e-06, "loss": 0.858, "step": 8831 }, { "epoch": 4.0036264732547595, "grad_norm": 0.16753654365818102, "learning_rate": 9.293225294390109e-06, "loss": 0.8639, "step": 8832 }, { "epoch": 4.004079782411605, "grad_norm": 0.15308467130164072, "learning_rate": 9.28511450242811e-06, "loss": 0.8442, "step": 8833 }, { "epoch": 4.00453309156845, "grad_norm": 0.1922809960915847, "learning_rate": 9.277006786652176e-06, "loss": 0.8599, "step": 8834 }, { "epoch": 4.004986400725294, "grad_norm": 0.1458300302271452, "learning_rate": 9.268902147874294e-06, "loss": 0.8546, "step": 8835 }, { "epoch": 4.00543970988214, "grad_norm": 0.16216920052121161, "learning_rate": 9.26080058690619e-06, "loss": 0.845, "step": 8836 }, { "epoch": 4.005893019038985, "grad_norm": 0.13151035978915043, "learning_rate": 9.25270210455925e-06, "loss": 0.8548, "step": 8837 }, { "epoch": 4.006346328195829, "grad_norm": 0.16271452712360024, "learning_rate": 9.244606701644546e-06, "loss": 0.8737, "step": 8838 }, { "epoch": 4.006799637352675, "grad_norm": 0.16344788563548554, "learning_rate": 9.236514378972878e-06, "loss": 0.8511, "step": 8839 }, { "epoch": 4.00725294650952, "grad_norm": 0.14363070358847707, "learning_rate": 9.2284251373547e-06, "loss": 0.8518, "step": 8840 }, { "epoch": 4.007706255666364, "grad_norm": 0.14731087818039784, "learning_rate": 9.220338977600187e-06, "loss": 0.8783, "step": 8841 }, { "epoch": 4.00815956482321, "grad_norm": 0.14369309598127084, "learning_rate": 9.212255900519178e-06, "loss": 0.8524, "step": 8842 }, { "epoch": 4.008612873980055, "grad_norm": 0.16176657096323738, "learning_rate": 9.204175906921233e-06, "loss": 0.8469, "step": 8843 }, { "epoch": 4.009066183136899, "grad_norm": 0.17142995296907243, "learning_rate": 9.196098997615572e-06, "loss": 0.8305, "step": 8844 }, { "epoch": 4.0095194922937445, "grad_norm": 0.1696672857972172, "learning_rate": 9.188025173411135e-06, "loss": 0.852, "step": 8845 }, { "epoch": 4.009972801450589, "grad_norm": 0.16031269288762137, "learning_rate": 9.179954435116527e-06, "loss": 0.8374, "step": 8846 }, { "epoch": 4.010426110607434, "grad_norm": 0.1204832621668557, "learning_rate": 9.171886783540058e-06, "loss": 0.8478, "step": 8847 }, { "epoch": 4.010879419764279, "grad_norm": 0.2093109427314353, "learning_rate": 9.163822219489735e-06, "loss": 0.8411, "step": 8848 }, { "epoch": 4.011332728921124, "grad_norm": 0.1287304162518913, "learning_rate": 9.155760743773233e-06, "loss": 0.8682, "step": 8849 }, { "epoch": 4.011786038077969, "grad_norm": 0.19128984720477232, "learning_rate": 9.147702357197952e-06, "loss": 0.8314, "step": 8850 }, { "epoch": 4.012239347234814, "grad_norm": 0.13765777527312822, "learning_rate": 9.139647060570955e-06, "loss": 0.8545, "step": 8851 }, { "epoch": 4.012692656391659, "grad_norm": 0.1559579712613772, "learning_rate": 9.131594854699001e-06, "loss": 0.8484, "step": 8852 }, { "epoch": 4.013145965548504, "grad_norm": 0.14463300561176964, "learning_rate": 9.123545740388536e-06, "loss": 0.8636, "step": 8853 }, { "epoch": 4.013599274705349, "grad_norm": 0.16378818429073316, "learning_rate": 9.115499718445716e-06, "loss": 0.8281, "step": 8854 }, { "epoch": 4.014052583862194, "grad_norm": 0.15438631176455112, "learning_rate": 9.107456789676354e-06, "loss": 0.8706, "step": 8855 }, { "epoch": 4.014505893019039, "grad_norm": 0.15940368501816382, "learning_rate": 9.099416954886e-06, "loss": 0.8381, "step": 8856 }, { "epoch": 4.014959202175884, "grad_norm": 0.17878953710255382, "learning_rate": 9.091380214879844e-06, "loss": 0.864, "step": 8857 }, { "epoch": 4.015412511332729, "grad_norm": 0.14693643919787422, "learning_rate": 9.083346570462806e-06, "loss": 0.8339, "step": 8858 }, { "epoch": 4.015865820489574, "grad_norm": 0.16597828040746906, "learning_rate": 9.07531602243946e-06, "loss": 0.8753, "step": 8859 }, { "epoch": 4.016319129646419, "grad_norm": 0.16509360563154962, "learning_rate": 9.067288571614114e-06, "loss": 0.8526, "step": 8860 }, { "epoch": 4.0167724388032635, "grad_norm": 0.14018175577764044, "learning_rate": 9.059264218790714e-06, "loss": 0.8454, "step": 8861 }, { "epoch": 4.017225747960109, "grad_norm": 0.1606425222763413, "learning_rate": 9.051242964772947e-06, "loss": 0.836, "step": 8862 }, { "epoch": 4.017679057116954, "grad_norm": 0.13053678311910716, "learning_rate": 9.043224810364153e-06, "loss": 0.859, "step": 8863 }, { "epoch": 4.018132366273798, "grad_norm": 0.1475357916086943, "learning_rate": 9.035209756367366e-06, "loss": 0.8633, "step": 8864 }, { "epoch": 4.018585675430644, "grad_norm": 0.12710768689868787, "learning_rate": 9.027197803585333e-06, "loss": 0.852, "step": 8865 }, { "epoch": 4.019038984587489, "grad_norm": 0.15142123416906478, "learning_rate": 9.019188952820461e-06, "loss": 0.8787, "step": 8866 }, { "epoch": 4.019492293744333, "grad_norm": 0.13843594000520365, "learning_rate": 9.011183204874876e-06, "loss": 0.8588, "step": 8867 }, { "epoch": 4.019945602901179, "grad_norm": 0.15005725833742067, "learning_rate": 9.003180560550352e-06, "loss": 0.8465, "step": 8868 }, { "epoch": 4.020398912058024, "grad_norm": 0.14498263726807234, "learning_rate": 8.995181020648406e-06, "loss": 0.8345, "step": 8869 }, { "epoch": 4.020852221214868, "grad_norm": 0.13340952930699834, "learning_rate": 8.987184585970206e-06, "loss": 0.8653, "step": 8870 }, { "epoch": 4.021305530371714, "grad_norm": 0.14640479095234704, "learning_rate": 8.979191257316611e-06, "loss": 0.8455, "step": 8871 }, { "epoch": 4.021758839528559, "grad_norm": 0.1281460983870801, "learning_rate": 8.971201035488172e-06, "loss": 0.86, "step": 8872 }, { "epoch": 4.022212148685403, "grad_norm": 0.11916897253347933, "learning_rate": 8.96321392128515e-06, "loss": 0.8592, "step": 8873 }, { "epoch": 4.0226654578422485, "grad_norm": 0.14919013644265333, "learning_rate": 8.955229915507458e-06, "loss": 0.8508, "step": 8874 }, { "epoch": 4.023118766999094, "grad_norm": 0.12854263137211366, "learning_rate": 8.94724901895474e-06, "loss": 0.8712, "step": 8875 }, { "epoch": 4.023572076155938, "grad_norm": 0.1303082330104462, "learning_rate": 8.939271232426288e-06, "loss": 0.8518, "step": 8876 }, { "epoch": 4.024025385312783, "grad_norm": 0.12649985609962675, "learning_rate": 8.931296556721115e-06, "loss": 0.8485, "step": 8877 }, { "epoch": 4.024478694469629, "grad_norm": 0.1410921108705833, "learning_rate": 8.923324992637901e-06, "loss": 0.873, "step": 8878 }, { "epoch": 4.024932003626473, "grad_norm": 0.12451106327850135, "learning_rate": 8.915356540975013e-06, "loss": 0.8465, "step": 8879 }, { "epoch": 4.025385312783318, "grad_norm": 0.11929303386390966, "learning_rate": 8.907391202530534e-06, "loss": 0.8739, "step": 8880 }, { "epoch": 4.025838621940164, "grad_norm": 0.13585798456352316, "learning_rate": 8.899428978102192e-06, "loss": 0.8731, "step": 8881 }, { "epoch": 4.026291931097008, "grad_norm": 0.11438944605343496, "learning_rate": 8.891469868487453e-06, "loss": 0.8613, "step": 8882 }, { "epoch": 4.026745240253853, "grad_norm": 0.15037595927633368, "learning_rate": 8.883513874483421e-06, "loss": 0.8489, "step": 8883 }, { "epoch": 4.027198549410698, "grad_norm": 0.12868719350987318, "learning_rate": 8.875560996886934e-06, "loss": 0.8703, "step": 8884 }, { "epoch": 4.027651858567543, "grad_norm": 0.13790889966295172, "learning_rate": 8.86761123649448e-06, "loss": 0.855, "step": 8885 }, { "epoch": 4.028105167724388, "grad_norm": 0.11717823924898468, "learning_rate": 8.859664594102266e-06, "loss": 0.847, "step": 8886 }, { "epoch": 4.028558476881233, "grad_norm": 0.13773574606929975, "learning_rate": 8.85172107050615e-06, "loss": 0.8453, "step": 8887 }, { "epoch": 4.029011786038078, "grad_norm": 0.11246905311859723, "learning_rate": 8.843780666501724e-06, "loss": 0.854, "step": 8888 }, { "epoch": 4.029465095194923, "grad_norm": 0.12714039236400893, "learning_rate": 8.83584338288423e-06, "loss": 0.8702, "step": 8889 }, { "epoch": 4.0299184043517675, "grad_norm": 0.12983882117854878, "learning_rate": 8.827909220448614e-06, "loss": 0.8537, "step": 8890 }, { "epoch": 4.030371713508613, "grad_norm": 0.11042993342696389, "learning_rate": 8.819978179989489e-06, "loss": 0.857, "step": 8891 }, { "epoch": 4.030825022665458, "grad_norm": 0.13606074856951272, "learning_rate": 8.812050262301196e-06, "loss": 0.832, "step": 8892 }, { "epoch": 4.031278331822302, "grad_norm": 0.11695525970198914, "learning_rate": 8.804125468177732e-06, "loss": 0.8721, "step": 8893 }, { "epoch": 4.031731640979148, "grad_norm": 0.14969737000245384, "learning_rate": 8.796203798412769e-06, "loss": 0.8581, "step": 8894 }, { "epoch": 4.032184950135993, "grad_norm": 0.14749912310927354, "learning_rate": 8.788285253799715e-06, "loss": 0.8599, "step": 8895 }, { "epoch": 4.032638259292837, "grad_norm": 0.14059766997136147, "learning_rate": 8.780369835131614e-06, "loss": 0.8429, "step": 8896 }, { "epoch": 4.033091568449683, "grad_norm": 0.1622502914089052, "learning_rate": 8.772457543201237e-06, "loss": 0.858, "step": 8897 }, { "epoch": 4.033544877606528, "grad_norm": 0.1179471286977934, "learning_rate": 8.764548378800998e-06, "loss": 0.852, "step": 8898 }, { "epoch": 4.033998186763372, "grad_norm": 0.15065182964677723, "learning_rate": 8.756642342723056e-06, "loss": 0.8424, "step": 8899 }, { "epoch": 4.034451495920218, "grad_norm": 0.14133594258824772, "learning_rate": 8.748739435759188e-06, "loss": 0.8405, "step": 8900 }, { "epoch": 4.034904805077063, "grad_norm": 0.1357058644896233, "learning_rate": 8.740839658700926e-06, "loss": 0.8618, "step": 8901 }, { "epoch": 4.035358114233907, "grad_norm": 0.17580099474672578, "learning_rate": 8.732943012339432e-06, "loss": 0.8578, "step": 8902 }, { "epoch": 4.0358114233907525, "grad_norm": 0.1480181162118561, "learning_rate": 8.725049497465593e-06, "loss": 0.829, "step": 8903 }, { "epoch": 4.036264732547598, "grad_norm": 0.16967161252506047, "learning_rate": 8.717159114869966e-06, "loss": 0.8318, "step": 8904 }, { "epoch": 4.036718041704442, "grad_norm": 0.12758140426923775, "learning_rate": 8.709271865342783e-06, "loss": 0.8508, "step": 8905 }, { "epoch": 4.037171350861287, "grad_norm": 0.1527136027778194, "learning_rate": 8.701387749673995e-06, "loss": 0.8515, "step": 8906 }, { "epoch": 4.037624660018133, "grad_norm": 0.12942492460835975, "learning_rate": 8.693506768653193e-06, "loss": 0.8696, "step": 8907 }, { "epoch": 4.038077969174977, "grad_norm": 0.14439617032723387, "learning_rate": 8.685628923069713e-06, "loss": 0.8665, "step": 8908 }, { "epoch": 4.038531278331822, "grad_norm": 0.1453252400131079, "learning_rate": 8.67775421371252e-06, "loss": 0.8607, "step": 8909 }, { "epoch": 4.038984587488668, "grad_norm": 0.12712663399204968, "learning_rate": 8.669882641370306e-06, "loss": 0.8486, "step": 8910 }, { "epoch": 4.039437896645512, "grad_norm": 0.1685844196287884, "learning_rate": 8.662014206831406e-06, "loss": 0.8417, "step": 8911 }, { "epoch": 4.039891205802357, "grad_norm": 0.12089877392329519, "learning_rate": 8.654148910883897e-06, "loss": 0.8719, "step": 8912 }, { "epoch": 4.0403445149592025, "grad_norm": 0.1407580318423928, "learning_rate": 8.646286754315486e-06, "loss": 0.8646, "step": 8913 }, { "epoch": 4.040797824116047, "grad_norm": 0.14422474361115017, "learning_rate": 8.638427737913617e-06, "loss": 0.8485, "step": 8914 }, { "epoch": 4.041251133272892, "grad_norm": 0.12870081048315468, "learning_rate": 8.630571862465365e-06, "loss": 0.8399, "step": 8915 }, { "epoch": 4.0417044424297375, "grad_norm": 0.17069394647921501, "learning_rate": 8.622719128757548e-06, "loss": 0.8332, "step": 8916 }, { "epoch": 4.042157751586582, "grad_norm": 0.11838045625570642, "learning_rate": 8.614869537576615e-06, "loss": 0.8723, "step": 8917 }, { "epoch": 4.042611060743427, "grad_norm": 0.13857987741366587, "learning_rate": 8.607023089708746e-06, "loss": 0.8526, "step": 8918 }, { "epoch": 4.043064369900272, "grad_norm": 0.13166229715499453, "learning_rate": 8.599179785939778e-06, "loss": 0.8509, "step": 8919 }, { "epoch": 4.043517679057117, "grad_norm": 0.12409665236857076, "learning_rate": 8.591339627055228e-06, "loss": 0.8485, "step": 8920 }, { "epoch": 4.043970988213962, "grad_norm": 0.12264790907303436, "learning_rate": 8.583502613840337e-06, "loss": 0.8556, "step": 8921 }, { "epoch": 4.044424297370807, "grad_norm": 0.13266873486416692, "learning_rate": 8.575668747079979e-06, "loss": 0.8599, "step": 8922 }, { "epoch": 4.044877606527652, "grad_norm": 0.1396171476755367, "learning_rate": 8.567838027558757e-06, "loss": 0.8511, "step": 8923 }, { "epoch": 4.045330915684497, "grad_norm": 0.13188078125116495, "learning_rate": 8.560010456060928e-06, "loss": 0.8351, "step": 8924 }, { "epoch": 4.045784224841341, "grad_norm": 0.1353556320115364, "learning_rate": 8.55218603337046e-06, "loss": 0.8782, "step": 8925 }, { "epoch": 4.046237533998187, "grad_norm": 0.1512107424481331, "learning_rate": 8.544364760270975e-06, "loss": 0.8668, "step": 8926 }, { "epoch": 4.046690843155032, "grad_norm": 0.12447726531425725, "learning_rate": 8.536546637545817e-06, "loss": 0.866, "step": 8927 }, { "epoch": 4.047144152311876, "grad_norm": 0.1483334553242719, "learning_rate": 8.528731665977981e-06, "loss": 0.8456, "step": 8928 }, { "epoch": 4.047597461468722, "grad_norm": 0.12131721280498348, "learning_rate": 8.52091984635016e-06, "loss": 0.8609, "step": 8929 }, { "epoch": 4.048050770625567, "grad_norm": 0.134748328349641, "learning_rate": 8.513111179444724e-06, "loss": 0.8461, "step": 8930 }, { "epoch": 4.048504079782411, "grad_norm": 0.14873122398009447, "learning_rate": 8.50530566604375e-06, "loss": 0.8525, "step": 8931 }, { "epoch": 4.0489573889392565, "grad_norm": 0.11465134319731685, "learning_rate": 8.497503306928964e-06, "loss": 0.8615, "step": 8932 }, { "epoch": 4.049410698096102, "grad_norm": 0.16791276754561685, "learning_rate": 8.489704102881816e-06, "loss": 0.8562, "step": 8933 }, { "epoch": 4.049864007252946, "grad_norm": 0.12412029258562343, "learning_rate": 8.481908054683411e-06, "loss": 0.8497, "step": 8934 }, { "epoch": 4.050317316409791, "grad_norm": 0.14541365711653056, "learning_rate": 8.474115163114534e-06, "loss": 0.8312, "step": 8935 }, { "epoch": 4.050770625566637, "grad_norm": 0.11532638731606416, "learning_rate": 8.466325428955686e-06, "loss": 0.866, "step": 8936 }, { "epoch": 4.051223934723481, "grad_norm": 0.1326001194675848, "learning_rate": 8.45853885298701e-06, "loss": 0.8573, "step": 8937 }, { "epoch": 4.051677243880326, "grad_norm": 0.14865238055358435, "learning_rate": 8.45075543598838e-06, "loss": 0.862, "step": 8938 }, { "epoch": 4.052130553037172, "grad_norm": 0.1108741257309898, "learning_rate": 8.4429751787393e-06, "loss": 0.8512, "step": 8939 }, { "epoch": 4.052583862194016, "grad_norm": 0.15561197699010706, "learning_rate": 8.435198082019016e-06, "loss": 0.8531, "step": 8940 }, { "epoch": 4.053037171350861, "grad_norm": 0.11328742462487901, "learning_rate": 8.4274241466064e-06, "loss": 0.8771, "step": 8941 }, { "epoch": 4.0534904805077066, "grad_norm": 0.12305460117501513, "learning_rate": 8.419653373280052e-06, "loss": 0.8558, "step": 8942 }, { "epoch": 4.053943789664551, "grad_norm": 0.14980790093969337, "learning_rate": 8.411885762818227e-06, "loss": 0.8608, "step": 8943 }, { "epoch": 4.054397098821396, "grad_norm": 0.12952210377674536, "learning_rate": 8.40412131599888e-06, "loss": 0.8685, "step": 8944 }, { "epoch": 4.0548504079782415, "grad_norm": 0.14034324938322332, "learning_rate": 8.39636003359965e-06, "loss": 0.852, "step": 8945 }, { "epoch": 4.055303717135086, "grad_norm": 0.13393424398400358, "learning_rate": 8.388601916397831e-06, "loss": 0.8478, "step": 8946 }, { "epoch": 4.055757026291931, "grad_norm": 0.14639414632853673, "learning_rate": 8.380846965170444e-06, "loss": 0.8331, "step": 8947 }, { "epoch": 4.056210335448776, "grad_norm": 0.12559595902204782, "learning_rate": 8.373095180694162e-06, "loss": 0.8475, "step": 8948 }, { "epoch": 4.056663644605621, "grad_norm": 0.12798723900081044, "learning_rate": 8.365346563745346e-06, "loss": 0.8359, "step": 8949 }, { "epoch": 4.057116953762466, "grad_norm": 0.12989384135936577, "learning_rate": 8.357601115100036e-06, "loss": 0.8587, "step": 8950 }, { "epoch": 4.057570262919311, "grad_norm": 0.12126930162822108, "learning_rate": 8.349858835533977e-06, "loss": 0.8749, "step": 8951 }, { "epoch": 4.058023572076156, "grad_norm": 0.12944384676263931, "learning_rate": 8.342119725822564e-06, "loss": 0.8679, "step": 8952 }, { "epoch": 4.058476881233001, "grad_norm": 0.12473305968350616, "learning_rate": 8.334383786740915e-06, "loss": 0.861, "step": 8953 }, { "epoch": 4.058930190389846, "grad_norm": 0.13043761346004243, "learning_rate": 8.326651019063781e-06, "loss": 0.8384, "step": 8954 }, { "epoch": 4.059383499546691, "grad_norm": 0.16778272162239477, "learning_rate": 8.318921423565642e-06, "loss": 0.8513, "step": 8955 }, { "epoch": 4.059836808703536, "grad_norm": 0.11228051391750292, "learning_rate": 8.311195001020623e-06, "loss": 0.8629, "step": 8956 }, { "epoch": 4.060290117860381, "grad_norm": 0.12990528572474835, "learning_rate": 8.303471752202564e-06, "loss": 0.8546, "step": 8957 }, { "epoch": 4.060743427017226, "grad_norm": 0.1297179374421311, "learning_rate": 8.295751677884954e-06, "loss": 0.85, "step": 8958 }, { "epoch": 4.061196736174071, "grad_norm": 0.1358992292506874, "learning_rate": 8.288034778841e-06, "loss": 0.8676, "step": 8959 }, { "epoch": 4.061650045330916, "grad_norm": 0.11284338333799473, "learning_rate": 8.280321055843568e-06, "loss": 0.8507, "step": 8960 }, { "epoch": 4.0621033544877605, "grad_norm": 0.11388282401197465, "learning_rate": 8.272610509665191e-06, "loss": 0.847, "step": 8961 }, { "epoch": 4.062556663644606, "grad_norm": 0.11243737101663509, "learning_rate": 8.264903141078124e-06, "loss": 0.8492, "step": 8962 }, { "epoch": 4.06300997280145, "grad_norm": 0.12228355078924327, "learning_rate": 8.257198950854271e-06, "loss": 0.8452, "step": 8963 }, { "epoch": 4.063463281958295, "grad_norm": 0.10670492258070628, "learning_rate": 8.249497939765238e-06, "loss": 0.8499, "step": 8964 }, { "epoch": 4.063916591115141, "grad_norm": 0.12066594024853496, "learning_rate": 8.241800108582292e-06, "loss": 0.8796, "step": 8965 }, { "epoch": 4.064369900271985, "grad_norm": 0.10703009776620899, "learning_rate": 8.234105458076422e-06, "loss": 0.8474, "step": 8966 }, { "epoch": 4.06482320942883, "grad_norm": 0.11277013263859183, "learning_rate": 8.226413989018223e-06, "loss": 0.865, "step": 8967 }, { "epoch": 4.065276518585676, "grad_norm": 0.10932866204557966, "learning_rate": 8.218725702178058e-06, "loss": 0.8526, "step": 8968 }, { "epoch": 4.06572982774252, "grad_norm": 0.11716385456735867, "learning_rate": 8.21104059832591e-06, "loss": 0.8773, "step": 8969 }, { "epoch": 4.066183136899365, "grad_norm": 0.1098449588896559, "learning_rate": 8.203358678231477e-06, "loss": 0.8411, "step": 8970 }, { "epoch": 4.0666364460562106, "grad_norm": 0.1468388961903274, "learning_rate": 8.195679942664112e-06, "loss": 0.8535, "step": 8971 }, { "epoch": 4.067089755213055, "grad_norm": 0.10907941179058932, "learning_rate": 8.188004392392886e-06, "loss": 0.8593, "step": 8972 }, { "epoch": 4.0675430643699, "grad_norm": 0.12346092113292302, "learning_rate": 8.180332028186498e-06, "loss": 0.8531, "step": 8973 }, { "epoch": 4.0679963735267455, "grad_norm": 0.12121900196962161, "learning_rate": 8.172662850813387e-06, "loss": 0.824, "step": 8974 }, { "epoch": 4.06844968268359, "grad_norm": 0.1273420141133991, "learning_rate": 8.164996861041632e-06, "loss": 0.8483, "step": 8975 }, { "epoch": 4.068902991840435, "grad_norm": 0.14017700085089213, "learning_rate": 8.157334059638989e-06, "loss": 0.858, "step": 8976 }, { "epoch": 4.06935630099728, "grad_norm": 0.13247510497528156, "learning_rate": 8.149674447372934e-06, "loss": 0.8579, "step": 8977 }, { "epoch": 4.069809610154125, "grad_norm": 0.13536517824159572, "learning_rate": 8.142018025010583e-06, "loss": 0.8422, "step": 8978 }, { "epoch": 4.07026291931097, "grad_norm": 0.1063894368992025, "learning_rate": 8.134364793318762e-06, "loss": 0.8492, "step": 8979 }, { "epoch": 4.070716228467815, "grad_norm": 0.1373833561565556, "learning_rate": 8.126714753063952e-06, "loss": 0.8431, "step": 8980 }, { "epoch": 4.07116953762466, "grad_norm": 0.12336714916809881, "learning_rate": 8.119067905012339e-06, "loss": 0.8671, "step": 8981 }, { "epoch": 4.071622846781505, "grad_norm": 0.12527202372829768, "learning_rate": 8.111424249929762e-06, "loss": 0.8441, "step": 8982 }, { "epoch": 4.07207615593835, "grad_norm": 0.12311558702232768, "learning_rate": 8.103783788581779e-06, "loss": 0.8495, "step": 8983 }, { "epoch": 4.072529465095195, "grad_norm": 0.11380771843042224, "learning_rate": 8.096146521733579e-06, "loss": 0.8463, "step": 8984 }, { "epoch": 4.07298277425204, "grad_norm": 0.11912476230242883, "learning_rate": 8.088512450150076e-06, "loss": 0.8684, "step": 8985 }, { "epoch": 4.073436083408885, "grad_norm": 0.13780013605564864, "learning_rate": 8.080881574595842e-06, "loss": 0.8501, "step": 8986 }, { "epoch": 4.07388939256573, "grad_norm": 0.11269516914300969, "learning_rate": 8.073253895835122e-06, "loss": 0.8555, "step": 8987 }, { "epoch": 4.074342701722575, "grad_norm": 0.12288320619092298, "learning_rate": 8.065629414631847e-06, "loss": 0.8483, "step": 8988 }, { "epoch": 4.07479601087942, "grad_norm": 0.11503098742373352, "learning_rate": 8.058008131749653e-06, "loss": 0.8457, "step": 8989 }, { "epoch": 4.0752493200362645, "grad_norm": 0.11443739051392744, "learning_rate": 8.050390047951819e-06, "loss": 0.8327, "step": 8990 }, { "epoch": 4.07570262919311, "grad_norm": 0.11863294631335977, "learning_rate": 8.042775164001307e-06, "loss": 0.8749, "step": 8991 }, { "epoch": 4.076155938349955, "grad_norm": 0.10906837550351502, "learning_rate": 8.035163480660792e-06, "loss": 0.8611, "step": 8992 }, { "epoch": 4.076609247506799, "grad_norm": 0.13661900770877455, "learning_rate": 8.027554998692592e-06, "loss": 0.8723, "step": 8993 }, { "epoch": 4.077062556663645, "grad_norm": 0.11837203864747993, "learning_rate": 8.019949718858732e-06, "loss": 0.8366, "step": 8994 }, { "epoch": 4.07751586582049, "grad_norm": 0.1290024014487687, "learning_rate": 8.012347641920883e-06, "loss": 0.8651, "step": 8995 }, { "epoch": 4.077969174977334, "grad_norm": 0.11806784636216891, "learning_rate": 8.00474876864044e-06, "loss": 0.8549, "step": 8996 }, { "epoch": 4.07842248413418, "grad_norm": 0.12880452235103232, "learning_rate": 7.99715309977843e-06, "loss": 0.8534, "step": 8997 }, { "epoch": 4.078875793291025, "grad_norm": 0.12260343674985606, "learning_rate": 7.989560636095603e-06, "loss": 0.8522, "step": 8998 }, { "epoch": 4.079329102447869, "grad_norm": 0.13722634944686107, "learning_rate": 7.98197137835234e-06, "loss": 0.8557, "step": 8999 }, { "epoch": 4.0797824116047146, "grad_norm": 0.12775628577269776, "learning_rate": 7.97438532730876e-06, "loss": 0.8453, "step": 9000 }, { "epoch": 4.080235720761559, "grad_norm": 0.13665582571135834, "learning_rate": 7.966802483724607e-06, "loss": 0.8327, "step": 9001 }, { "epoch": 4.080689029918404, "grad_norm": 0.14048949727883478, "learning_rate": 7.959222848359318e-06, "loss": 0.8586, "step": 9002 }, { "epoch": 4.0811423390752495, "grad_norm": 0.1271314329417991, "learning_rate": 7.95164642197204e-06, "loss": 0.8524, "step": 9003 }, { "epoch": 4.081595648232094, "grad_norm": 0.13554354538895486, "learning_rate": 7.944073205321547e-06, "loss": 0.8472, "step": 9004 }, { "epoch": 4.082048957388939, "grad_norm": 0.11285438770625945, "learning_rate": 7.936503199166346e-06, "loss": 0.882, "step": 9005 }, { "epoch": 4.082502266545784, "grad_norm": 0.1351688658078771, "learning_rate": 7.928936404264585e-06, "loss": 0.8537, "step": 9006 }, { "epoch": 4.082955575702629, "grad_norm": 0.1144240133443146, "learning_rate": 7.921372821374093e-06, "loss": 0.8761, "step": 9007 }, { "epoch": 4.083408884859474, "grad_norm": 0.10973157137024149, "learning_rate": 7.913812451252383e-06, "loss": 0.8559, "step": 9008 }, { "epoch": 4.083862194016319, "grad_norm": 0.12869290227814775, "learning_rate": 7.906255294656665e-06, "loss": 0.859, "step": 9009 }, { "epoch": 4.084315503173164, "grad_norm": 0.11858281176770814, "learning_rate": 7.898701352343794e-06, "loss": 0.8342, "step": 9010 }, { "epoch": 4.084768812330009, "grad_norm": 0.13887567721337266, "learning_rate": 7.891150625070332e-06, "loss": 0.8705, "step": 9011 }, { "epoch": 4.085222121486854, "grad_norm": 0.12904893406327686, "learning_rate": 7.88360311359249e-06, "loss": 0.8284, "step": 9012 }, { "epoch": 4.085675430643699, "grad_norm": 0.14690771919170706, "learning_rate": 7.8760588186662e-06, "loss": 0.8419, "step": 9013 }, { "epoch": 4.086128739800544, "grad_norm": 0.11995078147554958, "learning_rate": 7.868517741047017e-06, "loss": 0.8686, "step": 9014 }, { "epoch": 4.086582048957389, "grad_norm": 0.14499722662569478, "learning_rate": 7.860979881490225e-06, "loss": 0.8451, "step": 9015 }, { "epoch": 4.087035358114234, "grad_norm": 0.13783205307314864, "learning_rate": 7.853445240750751e-06, "loss": 0.8524, "step": 9016 }, { "epoch": 4.087488667271079, "grad_norm": 0.1971416821813805, "learning_rate": 7.84591381958321e-06, "loss": 0.8759, "step": 9017 }, { "epoch": 4.087941976427924, "grad_norm": 0.12597335983445296, "learning_rate": 7.838385618741901e-06, "loss": 0.8621, "step": 9018 }, { "epoch": 4.0883952855847685, "grad_norm": 0.13457835163500306, "learning_rate": 7.830860638980788e-06, "loss": 0.8419, "step": 9019 }, { "epoch": 4.088848594741614, "grad_norm": 0.1373612440507375, "learning_rate": 7.823338881053533e-06, "loss": 0.8527, "step": 9020 }, { "epoch": 4.089301903898459, "grad_norm": 0.1284656884452861, "learning_rate": 7.815820345713443e-06, "loss": 0.8459, "step": 9021 }, { "epoch": 4.089755213055303, "grad_norm": 0.1320127556031706, "learning_rate": 7.808305033713548e-06, "loss": 0.8613, "step": 9022 }, { "epoch": 4.090208522212149, "grad_norm": 0.13659601105470806, "learning_rate": 7.800792945806499e-06, "loss": 0.8633, "step": 9023 }, { "epoch": 4.090661831368994, "grad_norm": 0.12937386229059605, "learning_rate": 7.79328408274469e-06, "loss": 0.8718, "step": 9024 }, { "epoch": 4.091115140525838, "grad_norm": 0.1382751583364022, "learning_rate": 7.785778445280115e-06, "loss": 0.8499, "step": 9025 }, { "epoch": 4.091568449682684, "grad_norm": 0.10914383177855659, "learning_rate": 7.778276034164514e-06, "loss": 0.8474, "step": 9026 }, { "epoch": 4.092021758839529, "grad_norm": 0.16206575643905757, "learning_rate": 7.770776850149256e-06, "loss": 0.8491, "step": 9027 }, { "epoch": 4.092475067996373, "grad_norm": 0.13816019527944118, "learning_rate": 7.763280893985423e-06, "loss": 0.8578, "step": 9028 }, { "epoch": 4.0929283771532186, "grad_norm": 0.15303615177688254, "learning_rate": 7.755788166423746e-06, "loss": 0.8869, "step": 9029 }, { "epoch": 4.093381686310064, "grad_norm": 0.13903435985061363, "learning_rate": 7.748298668214654e-06, "loss": 0.8441, "step": 9030 }, { "epoch": 4.093834995466908, "grad_norm": 0.13081249467470416, "learning_rate": 7.74081240010823e-06, "loss": 0.8745, "step": 9031 }, { "epoch": 4.0942883046237535, "grad_norm": 0.13897458950054764, "learning_rate": 7.733329362854261e-06, "loss": 0.8649, "step": 9032 }, { "epoch": 4.094741613780599, "grad_norm": 0.15260374310543107, "learning_rate": 7.725849557202183e-06, "loss": 0.8583, "step": 9033 }, { "epoch": 4.095194922937443, "grad_norm": 0.12710496985341038, "learning_rate": 7.718372983901114e-06, "loss": 0.881, "step": 9034 }, { "epoch": 4.095648232094288, "grad_norm": 0.15797737317151986, "learning_rate": 7.710899643699878e-06, "loss": 0.8396, "step": 9035 }, { "epoch": 4.096101541251134, "grad_norm": 0.138491001153014, "learning_rate": 7.70342953734693e-06, "loss": 0.8412, "step": 9036 }, { "epoch": 4.096554850407978, "grad_norm": 0.14624531170813362, "learning_rate": 7.695962665590438e-06, "loss": 0.8326, "step": 9037 }, { "epoch": 4.097008159564823, "grad_norm": 0.15740670582685098, "learning_rate": 7.688499029178213e-06, "loss": 0.8278, "step": 9038 }, { "epoch": 4.097461468721669, "grad_norm": 0.13835488323087203, "learning_rate": 7.68103862885779e-06, "loss": 0.8592, "step": 9039 }, { "epoch": 4.097914777878513, "grad_norm": 0.11759332490306096, "learning_rate": 7.673581465376312e-06, "loss": 0.8371, "step": 9040 }, { "epoch": 4.098368087035358, "grad_norm": 0.14014521381621303, "learning_rate": 7.666127539480674e-06, "loss": 0.8588, "step": 9041 }, { "epoch": 4.098821396192203, "grad_norm": 0.7326475531167587, "learning_rate": 7.658676851917386e-06, "loss": 0.8463, "step": 9042 }, { "epoch": 4.099274705349048, "grad_norm": 0.17051386560099382, "learning_rate": 7.651229403432663e-06, "loss": 0.849, "step": 9043 }, { "epoch": 4.099728014505893, "grad_norm": 0.12518931917692847, "learning_rate": 7.643785194772375e-06, "loss": 0.8579, "step": 9044 }, { "epoch": 4.100181323662738, "grad_norm": 0.13881498760519556, "learning_rate": 7.636344226682104e-06, "loss": 0.8522, "step": 9045 }, { "epoch": 4.100634632819583, "grad_norm": 0.15479543872159218, "learning_rate": 7.628906499907063e-06, "loss": 0.8782, "step": 9046 }, { "epoch": 4.101087941976428, "grad_norm": 0.12602846222867695, "learning_rate": 7.6214720151921795e-06, "loss": 0.8562, "step": 9047 }, { "epoch": 4.1015412511332725, "grad_norm": 0.11570495881362565, "learning_rate": 7.614040773282036e-06, "loss": 0.8679, "step": 9048 }, { "epoch": 4.101994560290118, "grad_norm": 0.12637143900947062, "learning_rate": 7.606612774920883e-06, "loss": 0.8565, "step": 9049 }, { "epoch": 4.102447869446963, "grad_norm": 0.12443516788033097, "learning_rate": 7.599188020852666e-06, "loss": 0.8572, "step": 9050 }, { "epoch": 4.102901178603807, "grad_norm": 0.12804651927303826, "learning_rate": 7.591766511820986e-06, "loss": 0.8655, "step": 9051 }, { "epoch": 4.103354487760653, "grad_norm": 0.1287770982703611, "learning_rate": 7.584348248569147e-06, "loss": 0.8854, "step": 9052 }, { "epoch": 4.103807796917498, "grad_norm": 0.12494182439431976, "learning_rate": 7.576933231840087e-06, "loss": 0.844, "step": 9053 }, { "epoch": 4.104261106074342, "grad_norm": 0.1263292124800653, "learning_rate": 7.569521462376466e-06, "loss": 0.8588, "step": 9054 }, { "epoch": 4.104714415231188, "grad_norm": 0.12660506865060964, "learning_rate": 7.56211294092057e-06, "loss": 0.8557, "step": 9055 }, { "epoch": 4.105167724388033, "grad_norm": 0.12615062865924248, "learning_rate": 7.554707668214405e-06, "loss": 0.8535, "step": 9056 }, { "epoch": 4.105621033544877, "grad_norm": 0.12673547758938541, "learning_rate": 7.547305644999614e-06, "loss": 0.8534, "step": 9057 }, { "epoch": 4.106074342701723, "grad_norm": 0.1303764280262862, "learning_rate": 7.5399068720175505e-06, "loss": 0.8634, "step": 9058 }, { "epoch": 4.106527651858568, "grad_norm": 0.12592511269096351, "learning_rate": 7.532511350009208e-06, "loss": 0.8555, "step": 9059 }, { "epoch": 4.106980961015412, "grad_norm": 0.12920386772078565, "learning_rate": 7.525119079715271e-06, "loss": 0.8517, "step": 9060 }, { "epoch": 4.1074342701722575, "grad_norm": 0.11931940062570583, "learning_rate": 7.517730061876105e-06, "loss": 0.8542, "step": 9061 }, { "epoch": 4.107887579329103, "grad_norm": 0.13204866784132874, "learning_rate": 7.510344297231737e-06, "loss": 0.865, "step": 9062 }, { "epoch": 4.108340888485947, "grad_norm": 0.10737235958347198, "learning_rate": 7.502961786521874e-06, "loss": 0.841, "step": 9063 }, { "epoch": 4.108794197642792, "grad_norm": 0.13046726266455574, "learning_rate": 7.495582530485883e-06, "loss": 0.8325, "step": 9064 }, { "epoch": 4.109247506799638, "grad_norm": 0.10901737307426237, "learning_rate": 7.48820652986284e-06, "loss": 0.8504, "step": 9065 }, { "epoch": 4.109700815956482, "grad_norm": 0.13152771190302343, "learning_rate": 7.480833785391457e-06, "loss": 0.833, "step": 9066 }, { "epoch": 4.110154125113327, "grad_norm": 0.11708724387648557, "learning_rate": 7.4734642978101515e-06, "loss": 0.8489, "step": 9067 }, { "epoch": 4.110607434270173, "grad_norm": 0.10152313108839744, "learning_rate": 7.466098067856977e-06, "loss": 0.8406, "step": 9068 }, { "epoch": 4.111060743427017, "grad_norm": 0.11041290890896108, "learning_rate": 7.458735096269714e-06, "loss": 0.8654, "step": 9069 }, { "epoch": 4.111514052583862, "grad_norm": 0.1072296113064941, "learning_rate": 7.451375383785748e-06, "loss": 0.8426, "step": 9070 }, { "epoch": 4.1119673617407075, "grad_norm": 0.11101729677184924, "learning_rate": 7.444018931142212e-06, "loss": 0.8576, "step": 9071 }, { "epoch": 4.112420670897552, "grad_norm": 0.11827926636591171, "learning_rate": 7.4366657390758524e-06, "loss": 0.8577, "step": 9072 }, { "epoch": 4.112873980054397, "grad_norm": 0.11313621979129918, "learning_rate": 7.429315808323125e-06, "loss": 0.8377, "step": 9073 }, { "epoch": 4.1133272892112425, "grad_norm": 0.11292406951488035, "learning_rate": 7.421969139620149e-06, "loss": 0.8603, "step": 9074 }, { "epoch": 4.113780598368087, "grad_norm": 0.11576759445887198, "learning_rate": 7.414625733702694e-06, "loss": 0.8411, "step": 9075 }, { "epoch": 4.114233907524932, "grad_norm": 0.135902706899065, "learning_rate": 7.407285591306248e-06, "loss": 0.8775, "step": 9076 }, { "epoch": 4.114687216681777, "grad_norm": 0.11811175112542013, "learning_rate": 7.39994871316593e-06, "loss": 0.8525, "step": 9077 }, { "epoch": 4.115140525838622, "grad_norm": 0.12405393316270072, "learning_rate": 7.392615100016569e-06, "loss": 0.861, "step": 9078 }, { "epoch": 4.115593834995467, "grad_norm": 0.11601142814005486, "learning_rate": 7.3852847525926274e-06, "loss": 0.8458, "step": 9079 }, { "epoch": 4.116047144152311, "grad_norm": 0.12403802925217497, "learning_rate": 7.377957671628277e-06, "loss": 0.8369, "step": 9080 }, { "epoch": 4.116500453309157, "grad_norm": 0.11428187080689292, "learning_rate": 7.370633857857337e-06, "loss": 0.8463, "step": 9081 }, { "epoch": 4.116953762466002, "grad_norm": 0.13654403034197515, "learning_rate": 7.363313312013325e-06, "loss": 0.8502, "step": 9082 }, { "epoch": 4.117407071622846, "grad_norm": 0.10506747788131507, "learning_rate": 7.355996034829384e-06, "loss": 0.8538, "step": 9083 }, { "epoch": 4.117860380779692, "grad_norm": 0.1202219852951705, "learning_rate": 7.3486820270383915e-06, "loss": 0.8422, "step": 9084 }, { "epoch": 4.118313689936537, "grad_norm": 0.1140819701624222, "learning_rate": 7.341371289372845e-06, "loss": 0.8486, "step": 9085 }, { "epoch": 4.118766999093381, "grad_norm": 0.10065512236322083, "learning_rate": 7.334063822564954e-06, "loss": 0.8603, "step": 9086 }, { "epoch": 4.119220308250227, "grad_norm": 0.11297997673129163, "learning_rate": 7.3267596273465645e-06, "loss": 0.8611, "step": 9087 }, { "epoch": 4.119673617407072, "grad_norm": 0.1040671060084105, "learning_rate": 7.319458704449234e-06, "loss": 0.8553, "step": 9088 }, { "epoch": 4.120126926563916, "grad_norm": 0.11448290811226014, "learning_rate": 7.3121610546041586e-06, "loss": 0.8403, "step": 9089 }, { "epoch": 4.1205802357207615, "grad_norm": 0.1164970713669212, "learning_rate": 7.304866678542213e-06, "loss": 0.8518, "step": 9090 }, { "epoch": 4.121033544877607, "grad_norm": 0.1098105048781621, "learning_rate": 7.2975755769939675e-06, "loss": 0.8463, "step": 9091 }, { "epoch": 4.121486854034451, "grad_norm": 0.12574690917176026, "learning_rate": 7.290287750689629e-06, "loss": 0.8276, "step": 9092 }, { "epoch": 4.121940163191296, "grad_norm": 0.11461684311177497, "learning_rate": 7.283003200359116e-06, "loss": 0.8531, "step": 9093 }, { "epoch": 4.122393472348142, "grad_norm": 0.11038071726741833, "learning_rate": 7.2757219267319735e-06, "loss": 0.8519, "step": 9094 }, { "epoch": 4.122846781504986, "grad_norm": 0.11323196102400976, "learning_rate": 7.2684439305374674e-06, "loss": 0.8507, "step": 9095 }, { "epoch": 4.123300090661831, "grad_norm": 0.12180430807335434, "learning_rate": 7.261169212504487e-06, "loss": 0.8647, "step": 9096 }, { "epoch": 4.123753399818677, "grad_norm": 0.11658862461566208, "learning_rate": 7.253897773361638e-06, "loss": 0.8489, "step": 9097 }, { "epoch": 4.124206708975521, "grad_norm": 0.1255040529193668, "learning_rate": 7.246629613837153e-06, "loss": 0.8514, "step": 9098 }, { "epoch": 4.124660018132366, "grad_norm": 0.10791350435792045, "learning_rate": 7.239364734658986e-06, "loss": 0.8456, "step": 9099 }, { "epoch": 4.1251133272892115, "grad_norm": 0.12401283068257704, "learning_rate": 7.232103136554722e-06, "loss": 0.8424, "step": 9100 }, { "epoch": 4.125566636446056, "grad_norm": 0.1124026202979358, "learning_rate": 7.224844820251631e-06, "loss": 0.8271, "step": 9101 }, { "epoch": 4.126019945602901, "grad_norm": 0.11178944927715072, "learning_rate": 7.217589786476647e-06, "loss": 0.84, "step": 9102 }, { "epoch": 4.1264732547597465, "grad_norm": 0.11949087673510204, "learning_rate": 7.210338035956401e-06, "loss": 0.8687, "step": 9103 }, { "epoch": 4.126926563916591, "grad_norm": 0.11803676886092444, "learning_rate": 7.203089569417167e-06, "loss": 0.8684, "step": 9104 }, { "epoch": 4.127379873073436, "grad_norm": 0.11756518089664175, "learning_rate": 7.19584438758489e-06, "loss": 0.8515, "step": 9105 }, { "epoch": 4.127833182230281, "grad_norm": 0.12579089163913584, "learning_rate": 7.188602491185217e-06, "loss": 0.882, "step": 9106 }, { "epoch": 4.128286491387126, "grad_norm": 0.13782560847658534, "learning_rate": 7.1813638809434285e-06, "loss": 0.8679, "step": 9107 }, { "epoch": 4.128739800543971, "grad_norm": 0.1437528517837005, "learning_rate": 7.1741285575845076e-06, "loss": 0.8394, "step": 9108 }, { "epoch": 4.129193109700816, "grad_norm": 0.13481852222844323, "learning_rate": 7.166896521833076e-06, "loss": 0.8445, "step": 9109 }, { "epoch": 4.129646418857661, "grad_norm": 0.115498567459327, "learning_rate": 7.1596677744134635e-06, "loss": 0.8518, "step": 9110 }, { "epoch": 4.130099728014506, "grad_norm": 0.12917775331761872, "learning_rate": 7.152442316049626e-06, "loss": 0.8812, "step": 9111 }, { "epoch": 4.130553037171351, "grad_norm": 0.11640566515282876, "learning_rate": 7.1452201474652415e-06, "loss": 0.8356, "step": 9112 }, { "epoch": 4.131006346328196, "grad_norm": 0.11243599982182712, "learning_rate": 7.138001269383603e-06, "loss": 0.8494, "step": 9113 }, { "epoch": 4.131459655485041, "grad_norm": 0.13872077841182642, "learning_rate": 7.130785682527732e-06, "loss": 0.8465, "step": 9114 }, { "epoch": 4.131912964641886, "grad_norm": 0.1064427186892733, "learning_rate": 7.123573387620273e-06, "loss": 0.8536, "step": 9115 }, { "epoch": 4.132366273798731, "grad_norm": 0.1359527199112936, "learning_rate": 7.116364385383549e-06, "loss": 0.8411, "step": 9116 }, { "epoch": 4.132819582955576, "grad_norm": 0.10969678779514005, "learning_rate": 7.109158676539589e-06, "loss": 0.8491, "step": 9117 }, { "epoch": 4.133272892112421, "grad_norm": 0.13131595641362045, "learning_rate": 7.101956261810041e-06, "loss": 0.8489, "step": 9118 }, { "epoch": 4.1337262012692655, "grad_norm": 0.11373722424857802, "learning_rate": 7.0947571419162666e-06, "loss": 0.8532, "step": 9119 }, { "epoch": 4.134179510426111, "grad_norm": 0.11285153578659043, "learning_rate": 7.087561317579275e-06, "loss": 0.8604, "step": 9120 }, { "epoch": 4.134632819582956, "grad_norm": 0.12887576088691813, "learning_rate": 7.080368789519743e-06, "loss": 0.8438, "step": 9121 }, { "epoch": 4.1350861287398, "grad_norm": 0.12360425396285449, "learning_rate": 7.0731795584580145e-06, "loss": 0.842, "step": 9122 }, { "epoch": 4.135539437896646, "grad_norm": 0.1294022293749803, "learning_rate": 7.065993625114136e-06, "loss": 0.8665, "step": 9123 }, { "epoch": 4.13599274705349, "grad_norm": 0.1181721823722193, "learning_rate": 7.058810990207776e-06, "loss": 0.8493, "step": 9124 }, { "epoch": 4.136446056210335, "grad_norm": 0.1266110118235886, "learning_rate": 7.05163165445832e-06, "loss": 0.838, "step": 9125 }, { "epoch": 4.136899365367181, "grad_norm": 0.12076625899936964, "learning_rate": 7.0444556185847736e-06, "loss": 0.8463, "step": 9126 }, { "epoch": 4.137352674524025, "grad_norm": 0.11730972356090862, "learning_rate": 7.037282883305865e-06, "loss": 0.8411, "step": 9127 }, { "epoch": 4.13780598368087, "grad_norm": 0.10184597876613248, "learning_rate": 7.0301134493399395e-06, "loss": 0.8467, "step": 9128 }, { "epoch": 4.1382592928377155, "grad_norm": 0.12756540432106217, "learning_rate": 7.022947317405058e-06, "loss": 0.8713, "step": 9129 }, { "epoch": 4.13871260199456, "grad_norm": 0.11848571591741076, "learning_rate": 7.015784488218922e-06, "loss": 0.8689, "step": 9130 }, { "epoch": 4.139165911151405, "grad_norm": 0.14255056472124317, "learning_rate": 7.0086249624988955e-06, "loss": 0.8594, "step": 9131 }, { "epoch": 4.1396192203082505, "grad_norm": 0.10470342273864525, "learning_rate": 7.001468740962049e-06, "loss": 0.846, "step": 9132 }, { "epoch": 4.140072529465095, "grad_norm": 0.1384317349040384, "learning_rate": 6.994315824325082e-06, "loss": 0.8399, "step": 9133 }, { "epoch": 4.14052583862194, "grad_norm": 0.12224664714311477, "learning_rate": 6.987166213304397e-06, "loss": 0.8495, "step": 9134 }, { "epoch": 4.140979147778785, "grad_norm": 0.13085701686657034, "learning_rate": 6.980019908616027e-06, "loss": 0.8512, "step": 9135 }, { "epoch": 4.14143245693563, "grad_norm": 0.1253205297768279, "learning_rate": 6.9728769109757185e-06, "loss": 0.8672, "step": 9136 }, { "epoch": 4.141885766092475, "grad_norm": 0.12024388930127486, "learning_rate": 6.965737221098838e-06, "loss": 0.872, "step": 9137 }, { "epoch": 4.14233907524932, "grad_norm": 0.1396735681121534, "learning_rate": 6.958600839700476e-06, "loss": 0.8345, "step": 9138 }, { "epoch": 4.142792384406165, "grad_norm": 0.1393091112246275, "learning_rate": 6.951467767495343e-06, "loss": 0.8647, "step": 9139 }, { "epoch": 4.14324569356301, "grad_norm": 0.10944710419640542, "learning_rate": 6.9443380051978435e-06, "loss": 0.8633, "step": 9140 }, { "epoch": 4.143699002719855, "grad_norm": 0.12419968337604462, "learning_rate": 6.937211553522032e-06, "loss": 0.8507, "step": 9141 }, { "epoch": 4.1441523118767, "grad_norm": 0.1352893860617036, "learning_rate": 6.930088413181662e-06, "loss": 0.8543, "step": 9142 }, { "epoch": 4.144605621033545, "grad_norm": 0.12330509858584003, "learning_rate": 6.922968584890117e-06, "loss": 0.8509, "step": 9143 }, { "epoch": 4.14505893019039, "grad_norm": 0.13365765993148881, "learning_rate": 6.915852069360491e-06, "loss": 0.8398, "step": 9144 }, { "epoch": 4.145512239347235, "grad_norm": 0.11872607856242338, "learning_rate": 6.9087388673055156e-06, "loss": 0.8439, "step": 9145 }, { "epoch": 4.14596554850408, "grad_norm": 0.14492369392983961, "learning_rate": 6.90162897943758e-06, "loss": 0.858, "step": 9146 }, { "epoch": 4.146418857660925, "grad_norm": 0.10580569384839393, "learning_rate": 6.894522406468791e-06, "loss": 0.8414, "step": 9147 }, { "epoch": 4.1468721668177695, "grad_norm": 0.1280314486045261, "learning_rate": 6.8874191491108675e-06, "loss": 0.8468, "step": 9148 }, { "epoch": 4.147325475974615, "grad_norm": 0.14945879116438382, "learning_rate": 6.880319208075241e-06, "loss": 0.8481, "step": 9149 }, { "epoch": 4.14777878513146, "grad_norm": 0.13207572095463985, "learning_rate": 6.873222584072974e-06, "loss": 0.8731, "step": 9150 }, { "epoch": 4.148232094288304, "grad_norm": 0.15568350853054727, "learning_rate": 6.86612927781483e-06, "loss": 0.8538, "step": 9151 }, { "epoch": 4.14868540344515, "grad_norm": 0.12281181322086547, "learning_rate": 6.859039290011211e-06, "loss": 0.8467, "step": 9152 }, { "epoch": 4.149138712601995, "grad_norm": 0.16653108258515986, "learning_rate": 6.851952621372216e-06, "loss": 0.859, "step": 9153 }, { "epoch": 4.149592021758839, "grad_norm": 0.13202859728769115, "learning_rate": 6.844869272607577e-06, "loss": 0.8547, "step": 9154 }, { "epoch": 4.150045330915685, "grad_norm": 0.15122238003460273, "learning_rate": 6.837789244426729e-06, "loss": 0.8776, "step": 9155 }, { "epoch": 4.15049864007253, "grad_norm": 0.11974182706114833, "learning_rate": 6.8307125375387525e-06, "loss": 0.8713, "step": 9156 }, { "epoch": 4.150951949229374, "grad_norm": 0.12513830622457678, "learning_rate": 6.823639152652384e-06, "loss": 0.8744, "step": 9157 }, { "epoch": 4.1514052583862195, "grad_norm": 0.13485538262721955, "learning_rate": 6.816569090476073e-06, "loss": 0.8578, "step": 9158 }, { "epoch": 4.151858567543064, "grad_norm": 0.1097133329446329, "learning_rate": 6.80950235171789e-06, "loss": 0.8575, "step": 9159 }, { "epoch": 4.152311876699909, "grad_norm": 0.13173016964511677, "learning_rate": 6.802438937085591e-06, "loss": 0.8602, "step": 9160 }, { "epoch": 4.1527651858567545, "grad_norm": 0.12101927551837638, "learning_rate": 6.795378847286587e-06, "loss": 0.8619, "step": 9161 }, { "epoch": 4.153218495013599, "grad_norm": 0.12419743928262195, "learning_rate": 6.78832208302799e-06, "loss": 0.8542, "step": 9162 }, { "epoch": 4.153671804170444, "grad_norm": 0.12456302181299876, "learning_rate": 6.781268645016532e-06, "loss": 0.8427, "step": 9163 }, { "epoch": 4.154125113327289, "grad_norm": 0.12747309399161616, "learning_rate": 6.774218533958659e-06, "loss": 0.8593, "step": 9164 }, { "epoch": 4.154578422484134, "grad_norm": 0.12603498501795313, "learning_rate": 6.76717175056044e-06, "loss": 0.8802, "step": 9165 }, { "epoch": 4.155031731640979, "grad_norm": 0.1294832261636481, "learning_rate": 6.760128295527648e-06, "loss": 0.8489, "step": 9166 }, { "epoch": 4.155485040797824, "grad_norm": 0.11386387760951619, "learning_rate": 6.7530881695656845e-06, "loss": 0.8466, "step": 9167 }, { "epoch": 4.155938349954669, "grad_norm": 0.13535199133327933, "learning_rate": 6.746051373379665e-06, "loss": 0.84, "step": 9168 }, { "epoch": 4.156391659111514, "grad_norm": 0.11919087734945294, "learning_rate": 6.739017907674324e-06, "loss": 0.8486, "step": 9169 }, { "epoch": 4.156844968268359, "grad_norm": 0.12438336419882083, "learning_rate": 6.731987773154096e-06, "loss": 0.8735, "step": 9170 }, { "epoch": 4.157298277425204, "grad_norm": 0.11136424192970786, "learning_rate": 6.724960970523069e-06, "loss": 0.8573, "step": 9171 }, { "epoch": 4.157751586582049, "grad_norm": 0.12374924161559134, "learning_rate": 6.717937500484985e-06, "loss": 0.8583, "step": 9172 }, { "epoch": 4.158204895738894, "grad_norm": 0.10922609024740722, "learning_rate": 6.710917363743283e-06, "loss": 0.8619, "step": 9173 }, { "epoch": 4.158658204895739, "grad_norm": 0.11217443120303842, "learning_rate": 6.703900561001031e-06, "loss": 0.8655, "step": 9174 }, { "epoch": 4.159111514052584, "grad_norm": 0.11427556765563991, "learning_rate": 6.696887092961004e-06, "loss": 0.8565, "step": 9175 }, { "epoch": 4.159564823209429, "grad_norm": 0.1257088707725748, "learning_rate": 6.689876960325601e-06, "loss": 0.8663, "step": 9176 }, { "epoch": 4.1600181323662735, "grad_norm": 0.12882749919766875, "learning_rate": 6.682870163796926e-06, "loss": 0.8627, "step": 9177 }, { "epoch": 4.160471441523119, "grad_norm": 0.12217909386870196, "learning_rate": 6.675866704076725e-06, "loss": 0.8499, "step": 9178 }, { "epoch": 4.160924750679964, "grad_norm": 0.12940448662964354, "learning_rate": 6.668866581866407e-06, "loss": 0.8495, "step": 9179 }, { "epoch": 4.161378059836808, "grad_norm": 0.12489022251233196, "learning_rate": 6.6618697978670536e-06, "loss": 0.8565, "step": 9180 }, { "epoch": 4.161831368993654, "grad_norm": 0.11008951446119612, "learning_rate": 6.654876352779425e-06, "loss": 0.8652, "step": 9181 }, { "epoch": 4.162284678150499, "grad_norm": 0.10893834955125034, "learning_rate": 6.647886247303921e-06, "loss": 0.8613, "step": 9182 }, { "epoch": 4.162737987307343, "grad_norm": 0.108040906794142, "learning_rate": 6.6408994821406395e-06, "loss": 0.8646, "step": 9183 }, { "epoch": 4.163191296464189, "grad_norm": 0.22654620948318063, "learning_rate": 6.633916057989305e-06, "loss": 0.8474, "step": 9184 }, { "epoch": 4.163644605621034, "grad_norm": 0.10178815784755457, "learning_rate": 6.626935975549345e-06, "loss": 0.8534, "step": 9185 }, { "epoch": 4.164097914777878, "grad_norm": 0.11408899882482317, "learning_rate": 6.619959235519831e-06, "loss": 0.8492, "step": 9186 }, { "epoch": 4.1645512239347235, "grad_norm": 0.11902245355390856, "learning_rate": 6.612985838599492e-06, "loss": 0.8549, "step": 9187 }, { "epoch": 4.165004533091569, "grad_norm": 0.12005825187906455, "learning_rate": 6.606015785486754e-06, "loss": 0.8592, "step": 9188 }, { "epoch": 4.165457842248413, "grad_norm": 0.11719157312492222, "learning_rate": 6.599049076879671e-06, "loss": 0.8588, "step": 9189 }, { "epoch": 4.1659111514052585, "grad_norm": 0.1039189148169718, "learning_rate": 6.592085713475995e-06, "loss": 0.8641, "step": 9190 }, { "epoch": 4.166364460562104, "grad_norm": 0.13544964764049489, "learning_rate": 6.585125695973107e-06, "loss": 0.852, "step": 9191 }, { "epoch": 4.166817769718948, "grad_norm": 0.12806131534667387, "learning_rate": 6.578169025068098e-06, "loss": 0.8439, "step": 9192 }, { "epoch": 4.167271078875793, "grad_norm": 0.11235061373506369, "learning_rate": 6.571215701457671e-06, "loss": 0.8584, "step": 9193 }, { "epoch": 4.167724388032639, "grad_norm": 0.13729015587533513, "learning_rate": 6.56426572583825e-06, "loss": 0.8725, "step": 9194 }, { "epoch": 4.168177697189483, "grad_norm": 0.1155443795108974, "learning_rate": 6.5573190989058724e-06, "loss": 0.8627, "step": 9195 }, { "epoch": 4.168631006346328, "grad_norm": 0.1457927531216816, "learning_rate": 6.550375821356283e-06, "loss": 0.8671, "step": 9196 }, { "epoch": 4.169084315503174, "grad_norm": 0.11566688216447041, "learning_rate": 6.543435893884864e-06, "loss": 0.8554, "step": 9197 }, { "epoch": 4.169537624660018, "grad_norm": 0.10883366040889204, "learning_rate": 6.536499317186664e-06, "loss": 0.8384, "step": 9198 }, { "epoch": 4.169990933816863, "grad_norm": 0.11620335148897061, "learning_rate": 6.5295660919563985e-06, "loss": 0.8447, "step": 9199 }, { "epoch": 4.1704442429737085, "grad_norm": 0.11017150585860916, "learning_rate": 6.522636218888463e-06, "loss": 0.8634, "step": 9200 }, { "epoch": 4.170897552130553, "grad_norm": 0.12036709280578578, "learning_rate": 6.515709698676889e-06, "loss": 0.8561, "step": 9201 }, { "epoch": 4.171350861287398, "grad_norm": 0.10154010739155289, "learning_rate": 6.50878653201541e-06, "loss": 0.8604, "step": 9202 }, { "epoch": 4.171804170444243, "grad_norm": 0.11150739956443212, "learning_rate": 6.501866719597383e-06, "loss": 0.8454, "step": 9203 }, { "epoch": 4.172257479601088, "grad_norm": 0.10827825105281429, "learning_rate": 6.4949502621158485e-06, "loss": 0.87, "step": 9204 }, { "epoch": 4.172710788757933, "grad_norm": 0.10616230027699501, "learning_rate": 6.488037160263521e-06, "loss": 0.8602, "step": 9205 }, { "epoch": 4.1731640979147775, "grad_norm": 0.10407896840097525, "learning_rate": 6.481127414732755e-06, "loss": 0.8624, "step": 9206 }, { "epoch": 4.173617407071623, "grad_norm": 0.10609493246593091, "learning_rate": 6.474221026215599e-06, "loss": 0.8303, "step": 9207 }, { "epoch": 4.174070716228468, "grad_norm": 0.09904090451361926, "learning_rate": 6.467317995403726e-06, "loss": 0.8525, "step": 9208 }, { "epoch": 4.174524025385312, "grad_norm": 0.10961427059233535, "learning_rate": 6.460418322988516e-06, "loss": 0.8668, "step": 9209 }, { "epoch": 4.174977334542158, "grad_norm": 0.10258405662654571, "learning_rate": 6.453522009660971e-06, "loss": 0.8349, "step": 9210 }, { "epoch": 4.175430643699003, "grad_norm": 0.12276155844241743, "learning_rate": 6.446629056111797e-06, "loss": 0.8575, "step": 9211 }, { "epoch": 4.175883952855847, "grad_norm": 0.12000748518575019, "learning_rate": 6.43973946303134e-06, "loss": 0.8424, "step": 9212 }, { "epoch": 4.176337262012693, "grad_norm": 0.13780716470148374, "learning_rate": 6.432853231109595e-06, "loss": 0.8595, "step": 9213 }, { "epoch": 4.176790571169538, "grad_norm": 0.11927760703677474, "learning_rate": 6.425970361036258e-06, "loss": 0.8395, "step": 9214 }, { "epoch": 4.177243880326382, "grad_norm": 0.12872653464591013, "learning_rate": 6.419090853500654e-06, "loss": 0.8692, "step": 9215 }, { "epoch": 4.1776971894832275, "grad_norm": 0.11949111970823156, "learning_rate": 6.412214709191804e-06, "loss": 0.8725, "step": 9216 }, { "epoch": 4.178150498640073, "grad_norm": 0.12280602735812184, "learning_rate": 6.405341928798363e-06, "loss": 0.8594, "step": 9217 }, { "epoch": 4.178603807796917, "grad_norm": 0.12160200769371479, "learning_rate": 6.3984725130086604e-06, "loss": 0.8548, "step": 9218 }, { "epoch": 4.1790571169537625, "grad_norm": 0.1396094277762603, "learning_rate": 6.391606462510678e-06, "loss": 0.8667, "step": 9219 }, { "epoch": 4.179510426110608, "grad_norm": 0.12127818142119809, "learning_rate": 6.384743777992093e-06, "loss": 0.8569, "step": 9220 }, { "epoch": 4.179963735267452, "grad_norm": 0.1200469001409995, "learning_rate": 6.3778844601402e-06, "loss": 0.8452, "step": 9221 }, { "epoch": 4.180417044424297, "grad_norm": 0.15950672761348575, "learning_rate": 6.371028509642006e-06, "loss": 0.867, "step": 9222 }, { "epoch": 4.180870353581143, "grad_norm": 0.10537705209927452, "learning_rate": 6.3641759271841285e-06, "loss": 0.8621, "step": 9223 }, { "epoch": 4.181323662737987, "grad_norm": 0.15534321319570465, "learning_rate": 6.357326713452896e-06, "loss": 0.8457, "step": 9224 }, { "epoch": 4.181776971894832, "grad_norm": 0.11136282390247067, "learning_rate": 6.350480869134257e-06, "loss": 0.8555, "step": 9225 }, { "epoch": 4.182230281051678, "grad_norm": 0.12247079409346857, "learning_rate": 6.343638394913858e-06, "loss": 0.8498, "step": 9226 }, { "epoch": 4.182683590208522, "grad_norm": 0.14513548242413435, "learning_rate": 6.336799291476983e-06, "loss": 0.8622, "step": 9227 }, { "epoch": 4.183136899365367, "grad_norm": 0.13991275560222655, "learning_rate": 6.329963559508603e-06, "loss": 0.8649, "step": 9228 }, { "epoch": 4.1835902085222125, "grad_norm": 0.12112986690535807, "learning_rate": 6.323131199693327e-06, "loss": 0.8477, "step": 9229 }, { "epoch": 4.184043517679057, "grad_norm": 0.11795196287438502, "learning_rate": 6.3163022127154286e-06, "loss": 0.8503, "step": 9230 }, { "epoch": 4.184496826835902, "grad_norm": 0.15020847609014834, "learning_rate": 6.309476599258863e-06, "loss": 0.8525, "step": 9231 }, { "epoch": 4.1849501359927475, "grad_norm": 0.12347401004001525, "learning_rate": 6.302654360007223e-06, "loss": 0.869, "step": 9232 }, { "epoch": 4.185403445149592, "grad_norm": 0.14159036794639798, "learning_rate": 6.295835495643792e-06, "loss": 0.8816, "step": 9233 }, { "epoch": 4.185856754306437, "grad_norm": 0.10464352834341265, "learning_rate": 6.289020006851481e-06, "loss": 0.8565, "step": 9234 }, { "epoch": 4.186310063463282, "grad_norm": 0.1378766532499929, "learning_rate": 6.282207894312912e-06, "loss": 0.8618, "step": 9235 }, { "epoch": 4.186763372620127, "grad_norm": 0.11027075725351405, "learning_rate": 6.275399158710298e-06, "loss": 0.8563, "step": 9236 }, { "epoch": 4.187216681776972, "grad_norm": 0.10966754240955932, "learning_rate": 6.2685938007255845e-06, "loss": 0.8436, "step": 9237 }, { "epoch": 4.187669990933816, "grad_norm": 0.12581463362235767, "learning_rate": 6.261791821040324e-06, "loss": 0.8654, "step": 9238 }, { "epoch": 4.188123300090662, "grad_norm": 0.09816889357869087, "learning_rate": 6.254993220335776e-06, "loss": 0.863, "step": 9239 }, { "epoch": 4.188576609247507, "grad_norm": 0.13426479174832998, "learning_rate": 6.248197999292824e-06, "loss": 0.8421, "step": 9240 }, { "epoch": 4.189029918404351, "grad_norm": 0.11242692041117115, "learning_rate": 6.241406158592047e-06, "loss": 0.8431, "step": 9241 }, { "epoch": 4.189483227561197, "grad_norm": 0.11121630728805025, "learning_rate": 6.234617698913648e-06, "loss": 0.8616, "step": 9242 }, { "epoch": 4.189936536718042, "grad_norm": 0.11953251047677875, "learning_rate": 6.227832620937531e-06, "loss": 0.8718, "step": 9243 }, { "epoch": 4.190389845874886, "grad_norm": 0.12821574643950714, "learning_rate": 6.221050925343228e-06, "loss": 0.8532, "step": 9244 }, { "epoch": 4.1908431550317315, "grad_norm": 0.1216296580140616, "learning_rate": 6.214272612809944e-06, "loss": 0.8479, "step": 9245 }, { "epoch": 4.191296464188577, "grad_norm": 0.12263616838477921, "learning_rate": 6.207497684016561e-06, "loss": 0.866, "step": 9246 }, { "epoch": 4.191749773345421, "grad_norm": 0.11687717903551002, "learning_rate": 6.200726139641587e-06, "loss": 0.8704, "step": 9247 }, { "epoch": 4.1922030825022665, "grad_norm": 0.12776236620454687, "learning_rate": 6.1939579803632365e-06, "loss": 0.8668, "step": 9248 }, { "epoch": 4.192656391659112, "grad_norm": 0.11084323299879052, "learning_rate": 6.187193206859339e-06, "loss": 0.8521, "step": 9249 }, { "epoch": 4.193109700815956, "grad_norm": 0.10485771808189624, "learning_rate": 6.180431819807426e-06, "loss": 0.8565, "step": 9250 }, { "epoch": 4.193563009972801, "grad_norm": 0.11174528607948468, "learning_rate": 6.173673819884647e-06, "loss": 0.847, "step": 9251 }, { "epoch": 4.194016319129647, "grad_norm": 0.12315020296741666, "learning_rate": 6.166919207767863e-06, "loss": 0.8684, "step": 9252 }, { "epoch": 4.194469628286491, "grad_norm": 0.1083687961048849, "learning_rate": 6.160167984133538e-06, "loss": 0.8572, "step": 9253 }, { "epoch": 4.194922937443336, "grad_norm": 0.11068850095924941, "learning_rate": 6.15342014965787e-06, "loss": 0.8704, "step": 9254 }, { "epoch": 4.195376246600182, "grad_norm": 0.2940550927354745, "learning_rate": 6.146675705016622e-06, "loss": 0.8427, "step": 9255 }, { "epoch": 4.195829555757026, "grad_norm": 0.10503662350849649, "learning_rate": 6.1399346508853065e-06, "loss": 0.8728, "step": 9256 }, { "epoch": 4.196282864913871, "grad_norm": 0.10465468811838002, "learning_rate": 6.133196987939039e-06, "loss": 0.8237, "step": 9257 }, { "epoch": 4.1967361740707165, "grad_norm": 0.09820645428101261, "learning_rate": 6.126462716852635e-06, "loss": 0.8532, "step": 9258 }, { "epoch": 4.197189483227561, "grad_norm": 0.11623559522807124, "learning_rate": 6.1197318383005465e-06, "loss": 0.8388, "step": 9259 }, { "epoch": 4.197642792384406, "grad_norm": 0.10861694518726926, "learning_rate": 6.1130043529568705e-06, "loss": 0.8532, "step": 9260 }, { "epoch": 4.1980961015412515, "grad_norm": 0.11092750179991301, "learning_rate": 6.106280261495414e-06, "loss": 0.8338, "step": 9261 }, { "epoch": 4.198549410698096, "grad_norm": 0.12564921027752182, "learning_rate": 6.09955956458959e-06, "loss": 0.8583, "step": 9262 }, { "epoch": 4.199002719854941, "grad_norm": 0.10228026499158978, "learning_rate": 6.092842262912518e-06, "loss": 0.8537, "step": 9263 }, { "epoch": 4.199456029011786, "grad_norm": 0.10665772670833125, "learning_rate": 6.086128357136938e-06, "loss": 0.8445, "step": 9264 }, { "epoch": 4.199909338168631, "grad_norm": 0.11414856751345905, "learning_rate": 6.079417847935278e-06, "loss": 0.8462, "step": 9265 }, { "epoch": 4.200362647325476, "grad_norm": 0.10780656560765514, "learning_rate": 6.0727107359796054e-06, "loss": 0.8444, "step": 9266 }, { "epoch": 4.200815956482321, "grad_norm": 0.10497611053199622, "learning_rate": 6.066007021941671e-06, "loss": 0.8426, "step": 9267 }, { "epoch": 4.201269265639166, "grad_norm": 0.1148018409807479, "learning_rate": 6.059306706492854e-06, "loss": 0.8638, "step": 9268 }, { "epoch": 4.201722574796011, "grad_norm": 0.11469442422733421, "learning_rate": 6.052609790304229e-06, "loss": 0.8658, "step": 9269 }, { "epoch": 4.202175883952856, "grad_norm": 0.116687500916083, "learning_rate": 6.045916274046506e-06, "loss": 0.8659, "step": 9270 }, { "epoch": 4.202629193109701, "grad_norm": 0.12375491613635249, "learning_rate": 6.039226158390046e-06, "loss": 0.843, "step": 9271 }, { "epoch": 4.203082502266546, "grad_norm": 0.11451082027538881, "learning_rate": 6.032539444004903e-06, "loss": 0.8667, "step": 9272 }, { "epoch": 4.203535811423391, "grad_norm": 0.09406511336076767, "learning_rate": 6.0258561315607565e-06, "loss": 0.8572, "step": 9273 }, { "epoch": 4.2039891205802356, "grad_norm": 0.10680472371845692, "learning_rate": 6.019176221726981e-06, "loss": 0.8527, "step": 9274 }, { "epoch": 4.204442429737081, "grad_norm": 0.11961799225000169, "learning_rate": 6.012499715172561e-06, "loss": 0.8552, "step": 9275 }, { "epoch": 4.204895738893926, "grad_norm": 0.11169344370584648, "learning_rate": 6.00582661256619e-06, "loss": 0.8553, "step": 9276 }, { "epoch": 4.2053490480507705, "grad_norm": 0.1098964622496317, "learning_rate": 5.999156914576181e-06, "loss": 0.8522, "step": 9277 }, { "epoch": 4.205802357207616, "grad_norm": 0.12310117963609052, "learning_rate": 5.992490621870541e-06, "loss": 0.8317, "step": 9278 }, { "epoch": 4.20625566636446, "grad_norm": 0.12694137132306302, "learning_rate": 5.985827735116902e-06, "loss": 0.8127, "step": 9279 }, { "epoch": 4.206708975521305, "grad_norm": 0.12216874922821466, "learning_rate": 5.979168254982597e-06, "loss": 0.8608, "step": 9280 }, { "epoch": 4.207162284678151, "grad_norm": 0.1256121070914895, "learning_rate": 5.9725121821345625e-06, "loss": 0.8452, "step": 9281 }, { "epoch": 4.207615593834995, "grad_norm": 0.11023339291645128, "learning_rate": 5.965859517239447e-06, "loss": 0.8348, "step": 9282 }, { "epoch": 4.20806890299184, "grad_norm": 0.12728774806571236, "learning_rate": 5.959210260963515e-06, "loss": 0.8539, "step": 9283 }, { "epoch": 4.208522212148686, "grad_norm": 0.12928002477809147, "learning_rate": 5.95256441397273e-06, "loss": 0.8575, "step": 9284 }, { "epoch": 4.20897552130553, "grad_norm": 0.1068292384771184, "learning_rate": 5.94592197693268e-06, "loss": 0.8495, "step": 9285 }, { "epoch": 4.209428830462375, "grad_norm": 0.11215006761974186, "learning_rate": 5.939282950508625e-06, "loss": 0.8454, "step": 9286 }, { "epoch": 4.2098821396192205, "grad_norm": 0.11541798094565697, "learning_rate": 5.932647335365489e-06, "loss": 0.877, "step": 9287 }, { "epoch": 4.210335448776065, "grad_norm": 0.11229217034809173, "learning_rate": 5.926015132167835e-06, "loss": 0.8541, "step": 9288 }, { "epoch": 4.21078875793291, "grad_norm": 0.111668940581694, "learning_rate": 5.919386341579918e-06, "loss": 0.8538, "step": 9289 }, { "epoch": 4.2112420670897555, "grad_norm": 0.10168638124777007, "learning_rate": 5.912760964265607e-06, "loss": 0.8745, "step": 9290 }, { "epoch": 4.2116953762466, "grad_norm": 0.11852430187845218, "learning_rate": 5.906139000888483e-06, "loss": 0.8613, "step": 9291 }, { "epoch": 4.212148685403445, "grad_norm": 0.11097473779527817, "learning_rate": 5.89952045211172e-06, "loss": 0.8624, "step": 9292 }, { "epoch": 4.21260199456029, "grad_norm": 0.11828328801065756, "learning_rate": 5.8929053185982255e-06, "loss": 0.8634, "step": 9293 }, { "epoch": 4.213055303717135, "grad_norm": 0.11453126256270789, "learning_rate": 5.886293601010486e-06, "loss": 0.8509, "step": 9294 }, { "epoch": 4.21350861287398, "grad_norm": 0.10707397802600505, "learning_rate": 5.879685300010707e-06, "loss": 0.8504, "step": 9295 }, { "epoch": 4.213961922030825, "grad_norm": 0.09985719637640858, "learning_rate": 5.8730804162607166e-06, "loss": 0.8544, "step": 9296 }, { "epoch": 4.21441523118767, "grad_norm": 0.10473680610300148, "learning_rate": 5.866478950422028e-06, "loss": 0.8476, "step": 9297 }, { "epoch": 4.214868540344515, "grad_norm": 0.11181689392570071, "learning_rate": 5.8598809031557764e-06, "loss": 0.8626, "step": 9298 }, { "epoch": 4.21532184950136, "grad_norm": 0.10271800578987517, "learning_rate": 5.8532862751228e-06, "loss": 0.8537, "step": 9299 }, { "epoch": 4.215775158658205, "grad_norm": 0.09895343601559733, "learning_rate": 5.846695066983557e-06, "loss": 0.8587, "step": 9300 }, { "epoch": 4.21622846781505, "grad_norm": 0.1191855863690266, "learning_rate": 5.840107279398171e-06, "loss": 0.8605, "step": 9301 }, { "epoch": 4.216681776971895, "grad_norm": 0.10840337046515217, "learning_rate": 5.833522913026444e-06, "loss": 0.8616, "step": 9302 }, { "epoch": 4.2171350861287396, "grad_norm": 0.11195823497766329, "learning_rate": 5.8269419685278e-06, "loss": 0.8528, "step": 9303 }, { "epoch": 4.217588395285585, "grad_norm": 0.11205559945722963, "learning_rate": 5.8203644465613594e-06, "loss": 0.8586, "step": 9304 }, { "epoch": 4.21804170444243, "grad_norm": 0.10553088688181952, "learning_rate": 5.8137903477858636e-06, "loss": 0.8534, "step": 9305 }, { "epoch": 4.2184950135992745, "grad_norm": 0.11969664891872349, "learning_rate": 5.8072196728597455e-06, "loss": 0.8519, "step": 9306 }, { "epoch": 4.21894832275612, "grad_norm": 0.10711410363164875, "learning_rate": 5.80065242244106e-06, "loss": 0.8465, "step": 9307 }, { "epoch": 4.219401631912965, "grad_norm": 0.13720058698852275, "learning_rate": 5.794088597187553e-06, "loss": 0.8593, "step": 9308 }, { "epoch": 4.219854941069809, "grad_norm": 0.10261779548215634, "learning_rate": 5.787528197756591e-06, "loss": 0.8511, "step": 9309 }, { "epoch": 4.220308250226655, "grad_norm": 0.10726231200278503, "learning_rate": 5.780971224805236e-06, "loss": 0.8537, "step": 9310 }, { "epoch": 4.2207615593835, "grad_norm": 0.10128471280005782, "learning_rate": 5.774417678990185e-06, "loss": 0.8219, "step": 9311 }, { "epoch": 4.221214868540344, "grad_norm": 0.11135366702566031, "learning_rate": 5.767867560967788e-06, "loss": 0.8413, "step": 9312 }, { "epoch": 4.22166817769719, "grad_norm": 0.11845038686242822, "learning_rate": 5.7613208713940535e-06, "loss": 0.8498, "step": 9313 }, { "epoch": 4.222121486854035, "grad_norm": 0.10328276111282299, "learning_rate": 5.754777610924666e-06, "loss": 0.8455, "step": 9314 }, { "epoch": 4.222574796010879, "grad_norm": 0.13381764990139686, "learning_rate": 5.748237780214947e-06, "loss": 0.8639, "step": 9315 }, { "epoch": 4.2230281051677245, "grad_norm": 0.12586645054288442, "learning_rate": 5.741701379919873e-06, "loss": 0.8673, "step": 9316 }, { "epoch": 4.223481414324569, "grad_norm": 0.11400364356419372, "learning_rate": 5.7351684106940945e-06, "loss": 0.8725, "step": 9317 }, { "epoch": 4.223934723481414, "grad_norm": 0.11408820718776674, "learning_rate": 5.7286388731918915e-06, "loss": 0.8561, "step": 9318 }, { "epoch": 4.2243880326382595, "grad_norm": 0.10873595425334351, "learning_rate": 5.72211276806724e-06, "loss": 0.8659, "step": 9319 }, { "epoch": 4.224841341795104, "grad_norm": 0.11405418851362532, "learning_rate": 5.715590095973725e-06, "loss": 0.8491, "step": 9320 }, { "epoch": 4.225294650951949, "grad_norm": 0.09764628703002207, "learning_rate": 5.709070857564629e-06, "loss": 0.8649, "step": 9321 }, { "epoch": 4.225747960108794, "grad_norm": 0.10078189761392484, "learning_rate": 5.702555053492859e-06, "loss": 0.8472, "step": 9322 }, { "epoch": 4.226201269265639, "grad_norm": 0.10770302434907847, "learning_rate": 5.696042684411005e-06, "loss": 0.8607, "step": 9323 }, { "epoch": 4.226654578422484, "grad_norm": 0.10247852312021948, "learning_rate": 5.68953375097129e-06, "loss": 0.8592, "step": 9324 }, { "epoch": 4.227107887579329, "grad_norm": 0.11186644405723142, "learning_rate": 5.683028253825611e-06, "loss": 0.8715, "step": 9325 }, { "epoch": 4.227561196736174, "grad_norm": 0.10763191422609084, "learning_rate": 5.676526193625509e-06, "loss": 0.8552, "step": 9326 }, { "epoch": 4.228014505893019, "grad_norm": 0.10445597748414612, "learning_rate": 5.6700275710221746e-06, "loss": 0.8485, "step": 9327 }, { "epoch": 4.228467815049864, "grad_norm": 0.11439752979849928, "learning_rate": 5.6635323866664815e-06, "loss": 0.8497, "step": 9328 }, { "epoch": 4.228921124206709, "grad_norm": 0.10532789658245938, "learning_rate": 5.657040641208924e-06, "loss": 0.8458, "step": 9329 }, { "epoch": 4.229374433363554, "grad_norm": 0.10910047900912362, "learning_rate": 5.650552335299688e-06, "loss": 0.8534, "step": 9330 }, { "epoch": 4.229827742520399, "grad_norm": 0.11213321875075723, "learning_rate": 5.6440674695885875e-06, "loss": 0.8329, "step": 9331 }, { "epoch": 4.2302810516772436, "grad_norm": 0.11244062115429881, "learning_rate": 5.637586044725098e-06, "loss": 0.8338, "step": 9332 }, { "epoch": 4.230734360834089, "grad_norm": 0.10296174672664316, "learning_rate": 5.6311080613583505e-06, "loss": 0.8392, "step": 9333 }, { "epoch": 4.231187669990934, "grad_norm": 0.1080102052999728, "learning_rate": 5.624633520137144e-06, "loss": 0.8663, "step": 9334 }, { "epoch": 4.2316409791477785, "grad_norm": 0.1080181631597641, "learning_rate": 5.618162421709912e-06, "loss": 0.8639, "step": 9335 }, { "epoch": 4.232094288304624, "grad_norm": 0.10768750057752138, "learning_rate": 5.611694766724763e-06, "loss": 0.8353, "step": 9336 }, { "epoch": 4.232547597461469, "grad_norm": 0.10112008038050369, "learning_rate": 5.605230555829444e-06, "loss": 0.8492, "step": 9337 }, { "epoch": 4.233000906618313, "grad_norm": 0.1278606606792553, "learning_rate": 5.598769789671377e-06, "loss": 0.841, "step": 9338 }, { "epoch": 4.233454215775159, "grad_norm": 0.1146502756630061, "learning_rate": 5.5923124688976115e-06, "loss": 0.8587, "step": 9339 }, { "epoch": 4.233907524932004, "grad_norm": 0.11604181836172953, "learning_rate": 5.5858585941548805e-06, "loss": 0.8556, "step": 9340 }, { "epoch": 4.234360834088848, "grad_norm": 0.13779384238999884, "learning_rate": 5.579408166089555e-06, "loss": 0.8748, "step": 9341 }, { "epoch": 4.234814143245694, "grad_norm": 0.10871932387124182, "learning_rate": 5.572961185347652e-06, "loss": 0.8582, "step": 9342 }, { "epoch": 4.235267452402539, "grad_norm": 0.13418790269982767, "learning_rate": 5.566517652574876e-06, "loss": 0.8414, "step": 9343 }, { "epoch": 4.235720761559383, "grad_norm": 0.10358782609726037, "learning_rate": 5.560077568416544e-06, "loss": 0.8476, "step": 9344 }, { "epoch": 4.2361740707162285, "grad_norm": 0.10755056844639473, "learning_rate": 5.5536409335176725e-06, "loss": 0.8655, "step": 9345 }, { "epoch": 4.236627379873074, "grad_norm": 0.11979586333788579, "learning_rate": 5.547207748522887e-06, "loss": 0.8599, "step": 9346 }, { "epoch": 4.237080689029918, "grad_norm": 0.10024369890681302, "learning_rate": 5.540778014076509e-06, "loss": 0.8551, "step": 9347 }, { "epoch": 4.2375339981867635, "grad_norm": 0.11774862609345126, "learning_rate": 5.534351730822476e-06, "loss": 0.8518, "step": 9348 }, { "epoch": 4.237987307343609, "grad_norm": 0.11070042723575589, "learning_rate": 5.52792889940442e-06, "loss": 0.8602, "step": 9349 }, { "epoch": 4.238440616500453, "grad_norm": 0.11020968108368566, "learning_rate": 5.5215095204656e-06, "loss": 0.8607, "step": 9350 }, { "epoch": 4.238893925657298, "grad_norm": 0.11644236195485354, "learning_rate": 5.515093594648928e-06, "loss": 0.8611, "step": 9351 }, { "epoch": 4.239347234814144, "grad_norm": 0.11243270734098021, "learning_rate": 5.50868112259697e-06, "loss": 0.8658, "step": 9352 }, { "epoch": 4.239800543970988, "grad_norm": 0.11156721497527133, "learning_rate": 5.502272104951982e-06, "loss": 0.8658, "step": 9353 }, { "epoch": 4.240253853127833, "grad_norm": 0.10587751721247415, "learning_rate": 5.495866542355814e-06, "loss": 0.8478, "step": 9354 }, { "epoch": 4.240707162284679, "grad_norm": 0.11429100450438912, "learning_rate": 5.489464435450029e-06, "loss": 0.8499, "step": 9355 }, { "epoch": 4.241160471441523, "grad_norm": 0.09881741423187988, "learning_rate": 5.483065784875807e-06, "loss": 0.8531, "step": 9356 }, { "epoch": 4.241613780598368, "grad_norm": 0.09602830238766888, "learning_rate": 5.476670591273978e-06, "loss": 0.8458, "step": 9357 }, { "epoch": 4.242067089755213, "grad_norm": 0.12092474678403503, "learning_rate": 5.470278855285065e-06, "loss": 0.8684, "step": 9358 }, { "epoch": 4.242520398912058, "grad_norm": 0.10648962652748893, "learning_rate": 5.463890577549191e-06, "loss": 0.8705, "step": 9359 }, { "epoch": 4.242973708068903, "grad_norm": 0.12363095031683162, "learning_rate": 5.457505758706187e-06, "loss": 0.8502, "step": 9360 }, { "epoch": 4.2434270172257476, "grad_norm": 0.1132577407585533, "learning_rate": 5.4511243993954935e-06, "loss": 0.8603, "step": 9361 }, { "epoch": 4.243880326382593, "grad_norm": 0.10510825519182798, "learning_rate": 5.444746500256237e-06, "loss": 0.8702, "step": 9362 }, { "epoch": 4.244333635539438, "grad_norm": 0.12852186152621659, "learning_rate": 5.438372061927162e-06, "loss": 0.843, "step": 9363 }, { "epoch": 4.2447869446962825, "grad_norm": 0.11384806556870954, "learning_rate": 5.432001085046712e-06, "loss": 0.8346, "step": 9364 }, { "epoch": 4.245240253853128, "grad_norm": 0.1401611182104351, "learning_rate": 5.425633570252933e-06, "loss": 0.8493, "step": 9365 }, { "epoch": 4.245693563009973, "grad_norm": 0.11122035975397418, "learning_rate": 5.419269518183576e-06, "loss": 0.8444, "step": 9366 }, { "epoch": 4.246146872166817, "grad_norm": 0.12132013555667824, "learning_rate": 5.412908929476009e-06, "loss": 0.8533, "step": 9367 }, { "epoch": 4.246600181323663, "grad_norm": 0.10437153800280212, "learning_rate": 5.406551804767253e-06, "loss": 0.8466, "step": 9368 }, { "epoch": 4.247053490480508, "grad_norm": 0.09962388775745974, "learning_rate": 5.4001981446940084e-06, "loss": 0.8501, "step": 9369 }, { "epoch": 4.247506799637352, "grad_norm": 0.12358672726098141, "learning_rate": 5.393847949892612e-06, "loss": 0.8647, "step": 9370 }, { "epoch": 4.247960108794198, "grad_norm": 0.10868758105750338, "learning_rate": 5.387501220999043e-06, "loss": 0.8585, "step": 9371 }, { "epoch": 4.248413417951043, "grad_norm": 0.11182190142700867, "learning_rate": 5.381157958648948e-06, "loss": 0.8324, "step": 9372 }, { "epoch": 4.248866727107887, "grad_norm": 0.12773445089906132, "learning_rate": 5.374818163477629e-06, "loss": 0.8726, "step": 9373 }, { "epoch": 4.2493200362647325, "grad_norm": 0.12610877268183998, "learning_rate": 5.3684818361200255e-06, "loss": 0.8326, "step": 9374 }, { "epoch": 4.249773345421578, "grad_norm": 0.10544323773546682, "learning_rate": 5.3621489772107596e-06, "loss": 0.8403, "step": 9375 }, { "epoch": 4.250226654578422, "grad_norm": 0.1275797042836495, "learning_rate": 5.355819587384057e-06, "loss": 0.8435, "step": 9376 }, { "epoch": 4.2506799637352675, "grad_norm": 0.12291237752466316, "learning_rate": 5.349493667273851e-06, "loss": 0.8531, "step": 9377 }, { "epoch": 4.251133272892113, "grad_norm": 0.10621663733203143, "learning_rate": 5.343171217513683e-06, "loss": 0.8445, "step": 9378 }, { "epoch": 4.251586582048957, "grad_norm": 0.1294563062386802, "learning_rate": 5.336852238736781e-06, "loss": 0.8743, "step": 9379 }, { "epoch": 4.252039891205802, "grad_norm": 0.12107958258075914, "learning_rate": 5.330536731575993e-06, "loss": 0.8446, "step": 9380 }, { "epoch": 4.252493200362648, "grad_norm": 0.10702216412940735, "learning_rate": 5.324224696663849e-06, "loss": 0.8362, "step": 9381 }, { "epoch": 4.252946509519492, "grad_norm": 0.11003674806614124, "learning_rate": 5.317916134632511e-06, "loss": 0.8727, "step": 9382 }, { "epoch": 4.253399818676337, "grad_norm": 0.12097559631439221, "learning_rate": 5.311611046113792e-06, "loss": 0.8443, "step": 9383 }, { "epoch": 4.253853127833183, "grad_norm": 0.1318708855911713, "learning_rate": 5.305309431739188e-06, "loss": 0.8728, "step": 9384 }, { "epoch": 4.254306436990027, "grad_norm": 0.11794700899462372, "learning_rate": 5.299011292139793e-06, "loss": 0.8576, "step": 9385 }, { "epoch": 4.254759746146872, "grad_norm": 0.1153167315172711, "learning_rate": 5.292716627946415e-06, "loss": 0.8794, "step": 9386 }, { "epoch": 4.2552130553037175, "grad_norm": 0.14628127259254367, "learning_rate": 5.286425439789465e-06, "loss": 0.8506, "step": 9387 }, { "epoch": 4.255666364460562, "grad_norm": 0.10080949378266184, "learning_rate": 5.280137728299028e-06, "loss": 0.8475, "step": 9388 }, { "epoch": 4.256119673617407, "grad_norm": 0.12787132369097434, "learning_rate": 5.273853494104844e-06, "loss": 0.8464, "step": 9389 }, { "epoch": 4.2565729827742524, "grad_norm": 0.1266338485295343, "learning_rate": 5.267572737836291e-06, "loss": 0.8389, "step": 9390 }, { "epoch": 4.257026291931097, "grad_norm": 0.11536559437061109, "learning_rate": 5.261295460122395e-06, "loss": 0.8584, "step": 9391 }, { "epoch": 4.257479601087942, "grad_norm": 0.10459793484821504, "learning_rate": 5.255021661591859e-06, "loss": 0.8651, "step": 9392 }, { "epoch": 4.2579329102447865, "grad_norm": 0.12975402407813713, "learning_rate": 5.248751342873011e-06, "loss": 0.8578, "step": 9393 }, { "epoch": 4.258386219401632, "grad_norm": 0.11876545432276511, "learning_rate": 5.242484504593859e-06, "loss": 0.8441, "step": 9394 }, { "epoch": 4.258839528558477, "grad_norm": 0.11108062706713702, "learning_rate": 5.236221147382021e-06, "loss": 0.8592, "step": 9395 }, { "epoch": 4.259292837715321, "grad_norm": 0.11861158705986341, "learning_rate": 5.229961271864819e-06, "loss": 0.8549, "step": 9396 }, { "epoch": 4.259746146872167, "grad_norm": 0.13449783675737628, "learning_rate": 5.223704878669176e-06, "loss": 0.8717, "step": 9397 }, { "epoch": 4.260199456029012, "grad_norm": 0.11539158073330087, "learning_rate": 5.217451968421689e-06, "loss": 0.8599, "step": 9398 }, { "epoch": 4.260652765185856, "grad_norm": 0.10248729798643265, "learning_rate": 5.211202541748624e-06, "loss": 0.8769, "step": 9399 }, { "epoch": 4.261106074342702, "grad_norm": 0.12015860520327405, "learning_rate": 5.204956599275854e-06, "loss": 0.8522, "step": 9400 }, { "epoch": 4.261559383499547, "grad_norm": 0.11825721636228244, "learning_rate": 5.1987141416289485e-06, "loss": 0.8627, "step": 9401 }, { "epoch": 4.262012692656391, "grad_norm": 0.12952095633049465, "learning_rate": 5.1924751694330954e-06, "loss": 0.8454, "step": 9402 }, { "epoch": 4.2624660018132365, "grad_norm": 0.11656610425335003, "learning_rate": 5.186239683313159e-06, "loss": 0.8412, "step": 9403 }, { "epoch": 4.262919310970082, "grad_norm": 0.12157053783420896, "learning_rate": 5.180007683893626e-06, "loss": 0.8392, "step": 9404 }, { "epoch": 4.263372620126926, "grad_norm": 0.11870763759007198, "learning_rate": 5.173779171798665e-06, "loss": 0.8356, "step": 9405 }, { "epoch": 4.2638259292837715, "grad_norm": 0.13273521144816047, "learning_rate": 5.1675541476520655e-06, "loss": 0.8674, "step": 9406 }, { "epoch": 4.264279238440617, "grad_norm": 0.11846810540247066, "learning_rate": 5.161332612077297e-06, "loss": 0.8804, "step": 9407 }, { "epoch": 4.264732547597461, "grad_norm": 0.13138106084997933, "learning_rate": 5.1551145656974564e-06, "loss": 0.8574, "step": 9408 }, { "epoch": 4.265185856754306, "grad_norm": 0.1413546006972058, "learning_rate": 5.1489000091353e-06, "loss": 0.8551, "step": 9409 }, { "epoch": 4.265639165911152, "grad_norm": 0.1064375540120899, "learning_rate": 5.142688943013223e-06, "loss": 0.8509, "step": 9410 }, { "epoch": 4.266092475067996, "grad_norm": 0.12306216657933157, "learning_rate": 5.1364813679533006e-06, "loss": 0.8499, "step": 9411 }, { "epoch": 4.266545784224841, "grad_norm": 0.12350129562299854, "learning_rate": 5.130277284577223e-06, "loss": 0.8702, "step": 9412 }, { "epoch": 4.266999093381687, "grad_norm": 0.12544466411355604, "learning_rate": 5.124076693506364e-06, "loss": 0.8796, "step": 9413 }, { "epoch": 4.267452402538531, "grad_norm": 0.1264511763387126, "learning_rate": 5.117879595361723e-06, "loss": 0.8562, "step": 9414 }, { "epoch": 4.267905711695376, "grad_norm": 0.1129503665919243, "learning_rate": 5.111685990763948e-06, "loss": 0.8604, "step": 9415 }, { "epoch": 4.2683590208522215, "grad_norm": 0.11690213625384595, "learning_rate": 5.105495880333364e-06, "loss": 0.8597, "step": 9416 }, { "epoch": 4.268812330009066, "grad_norm": 0.11680984826497783, "learning_rate": 5.099309264689911e-06, "loss": 0.8594, "step": 9417 }, { "epoch": 4.269265639165911, "grad_norm": 0.11694535576188944, "learning_rate": 5.093126144453217e-06, "loss": 0.8475, "step": 9418 }, { "epoch": 4.2697189483227564, "grad_norm": 0.1227345302167215, "learning_rate": 5.086946520242521e-06, "loss": 0.8752, "step": 9419 }, { "epoch": 4.270172257479601, "grad_norm": 0.10333917423686452, "learning_rate": 5.080770392676746e-06, "loss": 0.8527, "step": 9420 }, { "epoch": 4.270625566636446, "grad_norm": 0.11945181247336459, "learning_rate": 5.074597762374436e-06, "loss": 0.8635, "step": 9421 }, { "epoch": 4.271078875793291, "grad_norm": 0.10751550453680017, "learning_rate": 5.068428629953807e-06, "loss": 0.8476, "step": 9422 }, { "epoch": 4.271532184950136, "grad_norm": 0.12189492433701817, "learning_rate": 5.062262996032718e-06, "loss": 0.8653, "step": 9423 }, { "epoch": 4.271985494106981, "grad_norm": 0.12010508161147208, "learning_rate": 5.056100861228657e-06, "loss": 0.8689, "step": 9424 }, { "epoch": 4.272438803263826, "grad_norm": 0.10491837628791943, "learning_rate": 5.049942226158804e-06, "loss": 0.8592, "step": 9425 }, { "epoch": 4.272892112420671, "grad_norm": 0.12073533912330082, "learning_rate": 5.043787091439947e-06, "loss": 0.8655, "step": 9426 }, { "epoch": 4.273345421577516, "grad_norm": 0.11986440361180911, "learning_rate": 5.037635457688552e-06, "loss": 0.886, "step": 9427 }, { "epoch": 4.273798730734361, "grad_norm": 0.11633846186557256, "learning_rate": 5.031487325520718e-06, "loss": 0.8288, "step": 9428 }, { "epoch": 4.274252039891206, "grad_norm": 0.12118982925506452, "learning_rate": 5.025342695552202e-06, "loss": 0.8566, "step": 9429 }, { "epoch": 4.274705349048051, "grad_norm": 0.11625845310167425, "learning_rate": 5.019201568398395e-06, "loss": 0.8387, "step": 9430 }, { "epoch": 4.275158658204896, "grad_norm": 0.11102961315599558, "learning_rate": 5.013063944674365e-06, "loss": 0.862, "step": 9431 }, { "epoch": 4.2756119673617405, "grad_norm": 0.10754574765791569, "learning_rate": 5.006929824994795e-06, "loss": 0.8571, "step": 9432 }, { "epoch": 4.276065276518586, "grad_norm": 0.11966213783000947, "learning_rate": 5.000799209974059e-06, "loss": 0.8351, "step": 9433 }, { "epoch": 4.276518585675431, "grad_norm": 0.13116370136713046, "learning_rate": 4.994672100226132e-06, "loss": 0.8621, "step": 9434 }, { "epoch": 4.2769718948322755, "grad_norm": 0.11798105650787924, "learning_rate": 4.98854849636468e-06, "loss": 0.8479, "step": 9435 }, { "epoch": 4.277425203989121, "grad_norm": 0.12818982821870487, "learning_rate": 4.982428399002985e-06, "loss": 0.8583, "step": 9436 }, { "epoch": 4.277878513145966, "grad_norm": 0.10292871168722763, "learning_rate": 4.976311808754011e-06, "loss": 0.8528, "step": 9437 }, { "epoch": 4.27833182230281, "grad_norm": 0.10949013788284272, "learning_rate": 4.970198726230333e-06, "loss": 0.8697, "step": 9438 }, { "epoch": 4.278785131459656, "grad_norm": 0.11127892922284129, "learning_rate": 4.964089152044209e-06, "loss": 0.8673, "step": 9439 }, { "epoch": 4.2792384406165, "grad_norm": 0.11175759097301544, "learning_rate": 4.957983086807527e-06, "loss": 0.8587, "step": 9440 }, { "epoch": 4.279691749773345, "grad_norm": 0.10566471011532225, "learning_rate": 4.951880531131817e-06, "loss": 0.8599, "step": 9441 }, { "epoch": 4.280145058930191, "grad_norm": 0.10621058979691513, "learning_rate": 4.945781485628285e-06, "loss": 0.8401, "step": 9442 }, { "epoch": 4.280598368087035, "grad_norm": 0.10996372232889634, "learning_rate": 4.939685950907751e-06, "loss": 0.8549, "step": 9443 }, { "epoch": 4.28105167724388, "grad_norm": 0.11668648398081727, "learning_rate": 4.9335939275807175e-06, "loss": 0.8367, "step": 9444 }, { "epoch": 4.2815049864007255, "grad_norm": 0.11457019608205951, "learning_rate": 4.927505416257301e-06, "loss": 0.8409, "step": 9445 }, { "epoch": 4.28195829555757, "grad_norm": 0.0973059075421198, "learning_rate": 4.921420417547311e-06, "loss": 0.8313, "step": 9446 }, { "epoch": 4.282411604714415, "grad_norm": 0.13485336858720948, "learning_rate": 4.915338932060146e-06, "loss": 0.8675, "step": 9447 }, { "epoch": 4.2828649138712604, "grad_norm": 0.10635642479184554, "learning_rate": 4.9092609604049025e-06, "loss": 0.8635, "step": 9448 }, { "epoch": 4.283318223028105, "grad_norm": 0.10515579458813351, "learning_rate": 4.9031865031903e-06, "loss": 0.8533, "step": 9449 }, { "epoch": 4.28377153218495, "grad_norm": 0.11355504414130674, "learning_rate": 4.897115561024723e-06, "loss": 0.8709, "step": 9450 }, { "epoch": 4.284224841341795, "grad_norm": 0.10694993288052874, "learning_rate": 4.891048134516178e-06, "loss": 0.8766, "step": 9451 }, { "epoch": 4.28467815049864, "grad_norm": 0.09491488477710823, "learning_rate": 4.884984224272358e-06, "loss": 0.8406, "step": 9452 }, { "epoch": 4.285131459655485, "grad_norm": 0.10819350652950945, "learning_rate": 4.878923830900557e-06, "loss": 0.8476, "step": 9453 }, { "epoch": 4.28558476881233, "grad_norm": 0.09942948889551464, "learning_rate": 4.872866955007762e-06, "loss": 0.8591, "step": 9454 }, { "epoch": 4.286038077969175, "grad_norm": 0.12692701607531626, "learning_rate": 4.866813597200577e-06, "loss": 0.8552, "step": 9455 }, { "epoch": 4.28649138712602, "grad_norm": 0.08936252426155736, "learning_rate": 4.860763758085258e-06, "loss": 0.8507, "step": 9456 }, { "epoch": 4.286944696282865, "grad_norm": 0.10392482731186634, "learning_rate": 4.854717438267731e-06, "loss": 0.8568, "step": 9457 }, { "epoch": 4.28739800543971, "grad_norm": 0.11973129461783441, "learning_rate": 4.8486746383535324e-06, "loss": 0.8404, "step": 9458 }, { "epoch": 4.287851314596555, "grad_norm": 0.10397866851509746, "learning_rate": 4.842635358947889e-06, "loss": 0.8429, "step": 9459 }, { "epoch": 4.2883046237534, "grad_norm": 0.09918074967219842, "learning_rate": 4.8365996006556245e-06, "loss": 0.8427, "step": 9460 }, { "epoch": 4.2887579329102445, "grad_norm": 0.11397775018516902, "learning_rate": 4.830567364081269e-06, "loss": 0.8566, "step": 9461 }, { "epoch": 4.28921124206709, "grad_norm": 0.11430099408649848, "learning_rate": 4.824538649828938e-06, "loss": 0.8483, "step": 9462 }, { "epoch": 4.289664551223935, "grad_norm": 0.11077762632305764, "learning_rate": 4.8185134585024515e-06, "loss": 0.8495, "step": 9463 }, { "epoch": 4.2901178603807795, "grad_norm": 0.09687646417828884, "learning_rate": 4.81249179070523e-06, "loss": 0.8342, "step": 9464 }, { "epoch": 4.290571169537625, "grad_norm": 0.11638021972546764, "learning_rate": 4.806473647040379e-06, "loss": 0.8694, "step": 9465 }, { "epoch": 4.29102447869447, "grad_norm": 0.10666850962698245, "learning_rate": 4.800459028110624e-06, "loss": 0.8713, "step": 9466 }, { "epoch": 4.291477787851314, "grad_norm": 0.1101019335943472, "learning_rate": 4.794447934518345e-06, "loss": 0.8616, "step": 9467 }, { "epoch": 4.29193109700816, "grad_norm": 0.09973690093261338, "learning_rate": 4.788440366865565e-06, "loss": 0.8615, "step": 9468 }, { "epoch": 4.292384406165005, "grad_norm": 0.11819783357109524, "learning_rate": 4.7824363257539784e-06, "loss": 0.8487, "step": 9469 }, { "epoch": 4.292837715321849, "grad_norm": 0.11135519599816826, "learning_rate": 4.776435811784894e-06, "loss": 0.8618, "step": 9470 }, { "epoch": 4.293291024478695, "grad_norm": 0.10157661324596154, "learning_rate": 4.770438825559276e-06, "loss": 0.8599, "step": 9471 }, { "epoch": 4.293744333635539, "grad_norm": 0.10831289783796698, "learning_rate": 4.764445367677755e-06, "loss": 0.8469, "step": 9472 }, { "epoch": 4.294197642792384, "grad_norm": 0.08885808133020377, "learning_rate": 4.758455438740574e-06, "loss": 0.8684, "step": 9473 }, { "epoch": 4.2946509519492295, "grad_norm": 0.09312774153495476, "learning_rate": 4.7524690393476645e-06, "loss": 0.8761, "step": 9474 }, { "epoch": 4.295104261106074, "grad_norm": 0.10203992937574392, "learning_rate": 4.746486170098563e-06, "loss": 0.8604, "step": 9475 }, { "epoch": 4.295557570262919, "grad_norm": 0.10640736984244542, "learning_rate": 4.740506831592488e-06, "loss": 0.8524, "step": 9476 }, { "epoch": 4.2960108794197644, "grad_norm": 0.0940251229458805, "learning_rate": 4.734531024428273e-06, "loss": 0.8415, "step": 9477 }, { "epoch": 4.296464188576609, "grad_norm": 0.11216286505999415, "learning_rate": 4.728558749204428e-06, "loss": 0.8751, "step": 9478 }, { "epoch": 4.296917497733454, "grad_norm": 0.10894941918369268, "learning_rate": 4.722590006519072e-06, "loss": 0.8471, "step": 9479 }, { "epoch": 4.297370806890299, "grad_norm": 0.10362002590229168, "learning_rate": 4.71662479697002e-06, "loss": 0.841, "step": 9480 }, { "epoch": 4.297824116047144, "grad_norm": 0.10769408541525174, "learning_rate": 4.710663121154686e-06, "loss": 0.8557, "step": 9481 }, { "epoch": 4.298277425203989, "grad_norm": 0.10113876077469908, "learning_rate": 4.704704979670145e-06, "loss": 0.8745, "step": 9482 }, { "epoch": 4.298730734360834, "grad_norm": 0.13468151126631867, "learning_rate": 4.698750373113141e-06, "loss": 0.8712, "step": 9483 }, { "epoch": 4.299184043517679, "grad_norm": 0.09716229909622653, "learning_rate": 4.692799302080029e-06, "loss": 0.8283, "step": 9484 }, { "epoch": 4.299637352674524, "grad_norm": 0.09538102293517213, "learning_rate": 4.686851767166847e-06, "loss": 0.8667, "step": 9485 }, { "epoch": 4.300090661831369, "grad_norm": 0.10450673769975234, "learning_rate": 4.68090776896923e-06, "loss": 0.8852, "step": 9486 }, { "epoch": 4.300543970988214, "grad_norm": 0.09646254338253286, "learning_rate": 4.67496730808251e-06, "loss": 0.8477, "step": 9487 }, { "epoch": 4.300997280145059, "grad_norm": 0.09143224224700235, "learning_rate": 4.66903038510162e-06, "loss": 0.8576, "step": 9488 }, { "epoch": 4.301450589301904, "grad_norm": 0.10729702475695306, "learning_rate": 4.663097000621184e-06, "loss": 0.8367, "step": 9489 }, { "epoch": 4.3019038984587485, "grad_norm": 0.11027607683346, "learning_rate": 4.657167155235427e-06, "loss": 0.8625, "step": 9490 }, { "epoch": 4.302357207615594, "grad_norm": 0.11041019796855304, "learning_rate": 4.651240849538256e-06, "loss": 0.8652, "step": 9491 }, { "epoch": 4.302810516772439, "grad_norm": 0.18008272645554746, "learning_rate": 4.645318084123199e-06, "loss": 0.8717, "step": 9492 }, { "epoch": 4.3032638259292835, "grad_norm": 0.11757392896138213, "learning_rate": 4.639398859583444e-06, "loss": 0.8303, "step": 9493 }, { "epoch": 4.303717135086129, "grad_norm": 0.10313646805453407, "learning_rate": 4.633483176511808e-06, "loss": 0.8405, "step": 9494 }, { "epoch": 4.304170444242974, "grad_norm": 0.13187879173390848, "learning_rate": 4.627571035500787e-06, "loss": 0.8708, "step": 9495 }, { "epoch": 4.304623753399818, "grad_norm": 0.129388299109953, "learning_rate": 4.621662437142478e-06, "loss": 0.8642, "step": 9496 }, { "epoch": 4.305077062556664, "grad_norm": 0.13024655901527696, "learning_rate": 4.615757382028645e-06, "loss": 0.861, "step": 9497 }, { "epoch": 4.305530371713509, "grad_norm": 0.11290096095543842, "learning_rate": 4.6098558707507125e-06, "loss": 0.8596, "step": 9498 }, { "epoch": 4.305983680870353, "grad_norm": 0.14015179685155119, "learning_rate": 4.603957903899714e-06, "loss": 0.8735, "step": 9499 }, { "epoch": 4.306436990027199, "grad_norm": 0.12664809981866856, "learning_rate": 4.598063482066368e-06, "loss": 0.8488, "step": 9500 }, { "epoch": 4.306890299184044, "grad_norm": 0.10697018697240818, "learning_rate": 4.592172605840999e-06, "loss": 0.8395, "step": 9501 }, { "epoch": 4.307343608340888, "grad_norm": 0.11929995023283059, "learning_rate": 4.586285275813614e-06, "loss": 0.8385, "step": 9502 }, { "epoch": 4.3077969174977335, "grad_norm": 0.1206629874936286, "learning_rate": 4.580401492573829e-06, "loss": 0.8397, "step": 9503 }, { "epoch": 4.308250226654579, "grad_norm": 0.13012869207594482, "learning_rate": 4.574521256710949e-06, "loss": 0.8485, "step": 9504 }, { "epoch": 4.308703535811423, "grad_norm": 0.1187724718721318, "learning_rate": 4.56864456881386e-06, "loss": 0.8565, "step": 9505 }, { "epoch": 4.3091568449682685, "grad_norm": 0.10201634453835805, "learning_rate": 4.5627714294711555e-06, "loss": 0.8593, "step": 9506 }, { "epoch": 4.309610154125114, "grad_norm": 0.1209323165969495, "learning_rate": 4.556901839271035e-06, "loss": 0.8555, "step": 9507 }, { "epoch": 4.310063463281958, "grad_norm": 0.11583291366414358, "learning_rate": 4.551035798801367e-06, "loss": 0.8564, "step": 9508 }, { "epoch": 4.310516772438803, "grad_norm": 0.10084490061750324, "learning_rate": 4.54517330864964e-06, "loss": 0.8466, "step": 9509 }, { "epoch": 4.310970081595649, "grad_norm": 0.12083796895995842, "learning_rate": 4.539314369403011e-06, "loss": 0.868, "step": 9510 }, { "epoch": 4.311423390752493, "grad_norm": 0.10635355391449064, "learning_rate": 4.5334589816482666e-06, "loss": 0.8592, "step": 9511 }, { "epoch": 4.311876699909338, "grad_norm": 0.10719638644214406, "learning_rate": 4.527607145971833e-06, "loss": 0.8395, "step": 9512 }, { "epoch": 4.312330009066184, "grad_norm": 0.11952298744750013, "learning_rate": 4.5217588629598015e-06, "loss": 0.8443, "step": 9513 }, { "epoch": 4.312783318223028, "grad_norm": 0.10978281508174274, "learning_rate": 4.515914133197883e-06, "loss": 0.8416, "step": 9514 }, { "epoch": 4.313236627379873, "grad_norm": 0.1379165973711206, "learning_rate": 4.5100729572714566e-06, "loss": 0.8446, "step": 9515 }, { "epoch": 4.3136899365367185, "grad_norm": 0.11718628974891973, "learning_rate": 4.504235335765517e-06, "loss": 0.8565, "step": 9516 }, { "epoch": 4.314143245693563, "grad_norm": 0.1286911024183255, "learning_rate": 4.498401269264734e-06, "loss": 0.865, "step": 9517 }, { "epoch": 4.314596554850408, "grad_norm": 0.12531890846946842, "learning_rate": 4.492570758353401e-06, "loss": 0.8595, "step": 9518 }, { "epoch": 4.3150498640072525, "grad_norm": 0.1350955084839858, "learning_rate": 4.486743803615463e-06, "loss": 0.852, "step": 9519 }, { "epoch": 4.315503173164098, "grad_norm": 0.11803466838808419, "learning_rate": 4.480920405634499e-06, "loss": 0.8547, "step": 9520 }, { "epoch": 4.315956482320943, "grad_norm": 0.13201918682227295, "learning_rate": 4.475100564993749e-06, "loss": 0.8714, "step": 9521 }, { "epoch": 4.3164097914777875, "grad_norm": 0.1351907031901681, "learning_rate": 4.469284282276087e-06, "loss": 0.85, "step": 9522 }, { "epoch": 4.316863100634633, "grad_norm": 0.10303937708593833, "learning_rate": 4.463471558064027e-06, "loss": 0.8653, "step": 9523 }, { "epoch": 4.317316409791478, "grad_norm": 0.10580180261451035, "learning_rate": 4.457662392939721e-06, "loss": 0.8465, "step": 9524 }, { "epoch": 4.317769718948322, "grad_norm": 0.11928699503020132, "learning_rate": 4.451856787484991e-06, "loss": 0.8606, "step": 9525 }, { "epoch": 4.318223028105168, "grad_norm": 0.09684698896068986, "learning_rate": 4.44605474228128e-06, "loss": 0.8543, "step": 9526 }, { "epoch": 4.318676337262013, "grad_norm": 0.10387017638729303, "learning_rate": 4.44025625790967e-06, "loss": 0.8628, "step": 9527 }, { "epoch": 4.319129646418857, "grad_norm": 0.12343032104651704, "learning_rate": 4.434461334950908e-06, "loss": 0.8507, "step": 9528 }, { "epoch": 4.319582955575703, "grad_norm": 0.10057760162913464, "learning_rate": 4.428669973985363e-06, "loss": 0.848, "step": 9529 }, { "epoch": 4.320036264732548, "grad_norm": 0.10193084768628718, "learning_rate": 4.42288217559307e-06, "loss": 0.8686, "step": 9530 }, { "epoch": 4.320489573889392, "grad_norm": 0.11610512973770121, "learning_rate": 4.4170979403536805e-06, "loss": 0.8617, "step": 9531 }, { "epoch": 4.3209428830462375, "grad_norm": 0.1011754098509756, "learning_rate": 4.411317268846511e-06, "loss": 0.826, "step": 9532 }, { "epoch": 4.321396192203083, "grad_norm": 0.11062080047292835, "learning_rate": 4.405540161650508e-06, "loss": 0.8413, "step": 9533 }, { "epoch": 4.321849501359927, "grad_norm": 0.11288024863839327, "learning_rate": 4.399766619344275e-06, "loss": 0.8557, "step": 9534 }, { "epoch": 4.3223028105167725, "grad_norm": 0.10392984135756181, "learning_rate": 4.393996642506033e-06, "loss": 0.8418, "step": 9535 }, { "epoch": 4.322756119673618, "grad_norm": 0.10221004906965721, "learning_rate": 4.388230231713677e-06, "loss": 0.8365, "step": 9536 }, { "epoch": 4.323209428830462, "grad_norm": 0.1184698123799368, "learning_rate": 4.382467387544726e-06, "loss": 0.8508, "step": 9537 }, { "epoch": 4.323662737987307, "grad_norm": 0.11139404145218706, "learning_rate": 4.376708110576337e-06, "loss": 0.8283, "step": 9538 }, { "epoch": 4.324116047144153, "grad_norm": 0.1293280305129736, "learning_rate": 4.370952401385334e-06, "loss": 0.8463, "step": 9539 }, { "epoch": 4.324569356300997, "grad_norm": 0.13130020864885236, "learning_rate": 4.36520026054815e-06, "loss": 0.8483, "step": 9540 }, { "epoch": 4.325022665457842, "grad_norm": 0.10265416736527337, "learning_rate": 4.359451688640901e-06, "loss": 0.8491, "step": 9541 }, { "epoch": 4.325475974614688, "grad_norm": 0.13911198165887353, "learning_rate": 4.353706686239307e-06, "loss": 0.8458, "step": 9542 }, { "epoch": 4.325929283771532, "grad_norm": 0.13972188142753109, "learning_rate": 4.347965253918749e-06, "loss": 0.8518, "step": 9543 }, { "epoch": 4.326382592928377, "grad_norm": 0.11062773221908778, "learning_rate": 4.342227392254245e-06, "loss": 0.8645, "step": 9544 }, { "epoch": 4.3268359020852225, "grad_norm": 0.13825099088677043, "learning_rate": 4.336493101820476e-06, "loss": 0.85, "step": 9545 }, { "epoch": 4.327289211242067, "grad_norm": 0.1152814828233211, "learning_rate": 4.330762383191722e-06, "loss": 0.8508, "step": 9546 }, { "epoch": 4.327742520398912, "grad_norm": 0.1102453421761806, "learning_rate": 4.325035236941957e-06, "loss": 0.8524, "step": 9547 }, { "epoch": 4.328195829555757, "grad_norm": 0.12125070409473931, "learning_rate": 4.319311663644747e-06, "loss": 0.8595, "step": 9548 }, { "epoch": 4.328649138712602, "grad_norm": 0.11944307738516023, "learning_rate": 4.313591663873351e-06, "loss": 0.8603, "step": 9549 }, { "epoch": 4.329102447869447, "grad_norm": 0.10231059499214536, "learning_rate": 4.307875238200616e-06, "loss": 0.8539, "step": 9550 }, { "epoch": 4.3295557570262915, "grad_norm": 0.09899868849238445, "learning_rate": 4.302162387199089e-06, "loss": 0.8578, "step": 9551 }, { "epoch": 4.330009066183137, "grad_norm": 0.10592381411424368, "learning_rate": 4.296453111440908e-06, "loss": 0.8566, "step": 9552 }, { "epoch": 4.330462375339982, "grad_norm": 0.0999442419289302, "learning_rate": 4.290747411497869e-06, "loss": 0.8569, "step": 9553 }, { "epoch": 4.330915684496826, "grad_norm": 0.10777718098666025, "learning_rate": 4.285045287941434e-06, "loss": 0.8589, "step": 9554 }, { "epoch": 4.331368993653672, "grad_norm": 0.10130485030246168, "learning_rate": 4.279346741342671e-06, "loss": 0.8676, "step": 9555 }, { "epoch": 4.331822302810517, "grad_norm": 0.10224806496764145, "learning_rate": 4.2736517722723205e-06, "loss": 0.8502, "step": 9556 }, { "epoch": 4.332275611967361, "grad_norm": 0.09471942149170741, "learning_rate": 4.267960381300737e-06, "loss": 0.8575, "step": 9557 }, { "epoch": 4.332728921124207, "grad_norm": 0.10738944572648115, "learning_rate": 4.262272568997938e-06, "loss": 0.8548, "step": 9558 }, { "epoch": 4.333182230281052, "grad_norm": 0.11145823393522777, "learning_rate": 4.256588335933569e-06, "loss": 0.8317, "step": 9559 }, { "epoch": 4.333635539437896, "grad_norm": 0.10349457590701949, "learning_rate": 4.250907682676934e-06, "loss": 0.8552, "step": 9560 }, { "epoch": 4.3340888485947415, "grad_norm": 0.10424759426309722, "learning_rate": 4.245230609796949e-06, "loss": 0.8687, "step": 9561 }, { "epoch": 4.334542157751587, "grad_norm": 0.11571571072291263, "learning_rate": 4.239557117862219e-06, "loss": 0.8526, "step": 9562 }, { "epoch": 4.334995466908431, "grad_norm": 0.1014483877695179, "learning_rate": 4.233887207440926e-06, "loss": 0.8468, "step": 9563 }, { "epoch": 4.3354487760652765, "grad_norm": 0.09928653867961919, "learning_rate": 4.228220879100948e-06, "loss": 0.8384, "step": 9564 }, { "epoch": 4.335902085222122, "grad_norm": 0.11070834729873168, "learning_rate": 4.222558133409771e-06, "loss": 0.8616, "step": 9565 }, { "epoch": 4.336355394378966, "grad_norm": 0.11669036920271618, "learning_rate": 4.216898970934557e-06, "loss": 0.8524, "step": 9566 }, { "epoch": 4.336808703535811, "grad_norm": 0.1029470893360332, "learning_rate": 4.211243392242072e-06, "loss": 0.8375, "step": 9567 }, { "epoch": 4.337262012692657, "grad_norm": 0.11296557078979254, "learning_rate": 4.2055913978987384e-06, "loss": 0.8558, "step": 9568 }, { "epoch": 4.337715321849501, "grad_norm": 0.09849117977114359, "learning_rate": 4.199942988470631e-06, "loss": 0.8572, "step": 9569 }, { "epoch": 4.338168631006346, "grad_norm": 0.10650613882222244, "learning_rate": 4.1942981645234404e-06, "loss": 0.8403, "step": 9570 }, { "epoch": 4.338621940163192, "grad_norm": 0.09972857620497053, "learning_rate": 4.188656926622528e-06, "loss": 0.8647, "step": 9571 }, { "epoch": 4.339075249320036, "grad_norm": 0.10398467325695201, "learning_rate": 4.183019275332862e-06, "loss": 0.862, "step": 9572 }, { "epoch": 4.339528558476881, "grad_norm": 0.10165620157159033, "learning_rate": 4.1773852112190875e-06, "loss": 0.8531, "step": 9573 }, { "epoch": 4.3399818676337265, "grad_norm": 0.09603270807290708, "learning_rate": 4.1717547348454566e-06, "loss": 0.8449, "step": 9574 }, { "epoch": 4.340435176790571, "grad_norm": 0.10599935832801761, "learning_rate": 4.166127846775893e-06, "loss": 0.8703, "step": 9575 }, { "epoch": 4.340888485947416, "grad_norm": 0.10404175868037398, "learning_rate": 4.160504547573938e-06, "loss": 0.8571, "step": 9576 }, { "epoch": 4.341341795104261, "grad_norm": 0.09893859593761839, "learning_rate": 4.154884837802784e-06, "loss": 0.8461, "step": 9577 }, { "epoch": 4.341795104261106, "grad_norm": 0.10600443402056736, "learning_rate": 4.149268718025261e-06, "loss": 0.8505, "step": 9578 }, { "epoch": 4.342248413417951, "grad_norm": 0.10093812164602242, "learning_rate": 4.143656188803835e-06, "loss": 0.8509, "step": 9579 }, { "epoch": 4.342701722574796, "grad_norm": 0.10688816955129887, "learning_rate": 4.138047250700629e-06, "loss": 0.864, "step": 9580 }, { "epoch": 4.343155031731641, "grad_norm": 0.10858618024878669, "learning_rate": 4.132441904277382e-06, "loss": 0.8471, "step": 9581 }, { "epoch": 4.343608340888486, "grad_norm": 0.10059703250552245, "learning_rate": 4.126840150095488e-06, "loss": 0.8402, "step": 9582 }, { "epoch": 4.344061650045331, "grad_norm": 0.09986628807270491, "learning_rate": 4.121241988715992e-06, "loss": 0.8384, "step": 9583 }, { "epoch": 4.344514959202176, "grad_norm": 0.10508639722175796, "learning_rate": 4.1156474206995515e-06, "loss": 0.8431, "step": 9584 }, { "epoch": 4.344968268359021, "grad_norm": 0.091319701351002, "learning_rate": 4.110056446606479e-06, "loss": 0.8488, "step": 9585 }, { "epoch": 4.345421577515866, "grad_norm": 0.1159813804809012, "learning_rate": 4.104469066996739e-06, "loss": 0.8435, "step": 9586 }, { "epoch": 4.345874886672711, "grad_norm": 0.11410085399347059, "learning_rate": 4.098885282429908e-06, "loss": 0.8665, "step": 9587 }, { "epoch": 4.346328195829556, "grad_norm": 0.0939155509126875, "learning_rate": 4.093305093465239e-06, "loss": 0.8405, "step": 9588 }, { "epoch": 4.346781504986401, "grad_norm": 0.11683616940080482, "learning_rate": 4.087728500661579e-06, "loss": 0.8537, "step": 9589 }, { "epoch": 4.3472348141432455, "grad_norm": 0.31087960444833435, "learning_rate": 4.082155504577467e-06, "loss": 0.8786, "step": 9590 }, { "epoch": 4.347688123300091, "grad_norm": 0.10919587748557662, "learning_rate": 4.076586105771032e-06, "loss": 0.8479, "step": 9591 }, { "epoch": 4.348141432456936, "grad_norm": 0.11173245953328728, "learning_rate": 4.071020304800084e-06, "loss": 0.851, "step": 9592 }, { "epoch": 4.3485947416137805, "grad_norm": 0.11307306524317325, "learning_rate": 4.065458102222044e-06, "loss": 0.8488, "step": 9593 }, { "epoch": 4.349048050770626, "grad_norm": 0.3602448715999201, "learning_rate": 4.059899498593978e-06, "loss": 0.862, "step": 9594 }, { "epoch": 4.349501359927471, "grad_norm": 0.09847950734987641, "learning_rate": 4.054344494472608e-06, "loss": 0.858, "step": 9595 }, { "epoch": 4.349954669084315, "grad_norm": 0.1141584765014094, "learning_rate": 4.048793090414278e-06, "loss": 0.853, "step": 9596 }, { "epoch": 4.350407978241161, "grad_norm": 0.10687667359826115, "learning_rate": 4.043245286974981e-06, "loss": 0.8466, "step": 9597 }, { "epoch": 4.350861287398005, "grad_norm": 0.0990372389453144, "learning_rate": 4.037701084710337e-06, "loss": 0.865, "step": 9598 }, { "epoch": 4.35131459655485, "grad_norm": 0.11640275434954254, "learning_rate": 4.032160484175629e-06, "loss": 0.8639, "step": 9599 }, { "epoch": 4.351767905711696, "grad_norm": 0.10415461419773817, "learning_rate": 4.0266234859257554e-06, "loss": 0.8606, "step": 9600 }, { "epoch": 4.35222121486854, "grad_norm": 0.12893834862469303, "learning_rate": 4.021090090515265e-06, "loss": 0.8531, "step": 9601 }, { "epoch": 4.352674524025385, "grad_norm": 0.10611031172750578, "learning_rate": 4.015560298498331e-06, "loss": 0.8334, "step": 9602 }, { "epoch": 4.3531278331822305, "grad_norm": 0.11382673361626583, "learning_rate": 4.0100341104288e-06, "loss": 0.847, "step": 9603 }, { "epoch": 4.353581142339075, "grad_norm": 0.12020468017152526, "learning_rate": 4.004511526860118e-06, "loss": 0.8548, "step": 9604 }, { "epoch": 4.35403445149592, "grad_norm": 0.0935731091970512, "learning_rate": 3.998992548345402e-06, "loss": 0.8363, "step": 9605 }, { "epoch": 4.354487760652765, "grad_norm": 0.09844588795308024, "learning_rate": 3.993477175437379e-06, "loss": 0.8575, "step": 9606 }, { "epoch": 4.35494106980961, "grad_norm": 0.11928770929863536, "learning_rate": 3.987965408688448e-06, "loss": 0.8651, "step": 9607 }, { "epoch": 4.355394378966455, "grad_norm": 0.12285158059445621, "learning_rate": 3.9824572486506106e-06, "loss": 0.8545, "step": 9608 }, { "epoch": 4.3558476881233, "grad_norm": 0.10257203853975304, "learning_rate": 3.9769526958755375e-06, "loss": 0.845, "step": 9609 }, { "epoch": 4.356300997280145, "grad_norm": 0.10556115142264431, "learning_rate": 3.971451750914526e-06, "loss": 0.8905, "step": 9610 }, { "epoch": 4.35675430643699, "grad_norm": 0.10613225750579079, "learning_rate": 3.965954414318498e-06, "loss": 0.8483, "step": 9611 }, { "epoch": 4.357207615593835, "grad_norm": 0.10571964983793551, "learning_rate": 3.960460686638046e-06, "loss": 0.8678, "step": 9612 }, { "epoch": 4.35766092475068, "grad_norm": 0.10357316862065369, "learning_rate": 3.954970568423364e-06, "loss": 0.8564, "step": 9613 }, { "epoch": 4.358114233907525, "grad_norm": 0.10098229263870728, "learning_rate": 3.949484060224324e-06, "loss": 0.8619, "step": 9614 }, { "epoch": 4.35856754306437, "grad_norm": 0.09223865882708672, "learning_rate": 3.9440011625903985e-06, "loss": 0.8621, "step": 9615 }, { "epoch": 4.359020852221215, "grad_norm": 0.0949731744289951, "learning_rate": 3.938521876070729e-06, "loss": 0.8454, "step": 9616 }, { "epoch": 4.35947416137806, "grad_norm": 0.10529520079764994, "learning_rate": 3.933046201214073e-06, "loss": 0.8684, "step": 9617 }, { "epoch": 4.359927470534905, "grad_norm": 0.1013921295258887, "learning_rate": 3.927574138568844e-06, "loss": 0.8487, "step": 9618 }, { "epoch": 4.3603807796917495, "grad_norm": 0.10512189383464561, "learning_rate": 3.92210568868308e-06, "loss": 0.8641, "step": 9619 }, { "epoch": 4.360834088848595, "grad_norm": 0.10144885221817355, "learning_rate": 3.916640852104459e-06, "loss": 0.8692, "step": 9620 }, { "epoch": 4.36128739800544, "grad_norm": 0.10399656601674749, "learning_rate": 3.911179629380297e-06, "loss": 0.8575, "step": 9621 }, { "epoch": 4.3617407071622845, "grad_norm": 0.10221345684263244, "learning_rate": 3.905722021057568e-06, "loss": 0.8584, "step": 9622 }, { "epoch": 4.36219401631913, "grad_norm": 0.10571118812991671, "learning_rate": 3.90026802768285e-06, "loss": 0.8385, "step": 9623 }, { "epoch": 4.362647325475975, "grad_norm": 0.09533912604211935, "learning_rate": 3.894817649802391e-06, "loss": 0.8608, "step": 9624 }, { "epoch": 4.363100634632819, "grad_norm": 0.09564212435108743, "learning_rate": 3.889370887962059e-06, "loss": 0.8561, "step": 9625 }, { "epoch": 4.363553943789665, "grad_norm": 0.10121211777560803, "learning_rate": 3.883927742707347e-06, "loss": 0.8378, "step": 9626 }, { "epoch": 4.36400725294651, "grad_norm": 0.10278931339176604, "learning_rate": 3.878488214583427e-06, "loss": 0.8451, "step": 9627 }, { "epoch": 4.364460562103354, "grad_norm": 0.10225162012214147, "learning_rate": 3.873052304135061e-06, "loss": 0.854, "step": 9628 }, { "epoch": 4.3649138712602, "grad_norm": 0.10042802612294256, "learning_rate": 3.867620011906689e-06, "loss": 0.8665, "step": 9629 }, { "epoch": 4.365367180417044, "grad_norm": 0.1170436312884319, "learning_rate": 3.862191338442353e-06, "loss": 0.8559, "step": 9630 }, { "epoch": 4.365820489573889, "grad_norm": 0.10029886969138437, "learning_rate": 3.856766284285778e-06, "loss": 0.8466, "step": 9631 }, { "epoch": 4.3662737987307345, "grad_norm": 0.10301208871129991, "learning_rate": 3.85134484998027e-06, "loss": 0.8558, "step": 9632 }, { "epoch": 4.366727107887579, "grad_norm": 0.0972354530097949, "learning_rate": 3.8459270360688216e-06, "loss": 0.8304, "step": 9633 }, { "epoch": 4.367180417044424, "grad_norm": 0.11576614932779043, "learning_rate": 3.840512843094027e-06, "loss": 0.8655, "step": 9634 }, { "epoch": 4.367633726201269, "grad_norm": 0.10820969747774425, "learning_rate": 3.835102271598152e-06, "loss": 0.8358, "step": 9635 }, { "epoch": 4.368087035358114, "grad_norm": 0.11014685634309787, "learning_rate": 3.8296953221230685e-06, "loss": 0.8502, "step": 9636 }, { "epoch": 4.368540344514959, "grad_norm": 0.10277250349913092, "learning_rate": 3.824291995210296e-06, "loss": 0.8393, "step": 9637 }, { "epoch": 4.368993653671804, "grad_norm": 0.09626402422812319, "learning_rate": 3.818892291401004e-06, "loss": 0.8625, "step": 9638 }, { "epoch": 4.369446962828649, "grad_norm": 0.1014550351989499, "learning_rate": 3.8134962112359854e-06, "loss": 0.8503, "step": 9639 }, { "epoch": 4.369900271985494, "grad_norm": 0.09762871315857073, "learning_rate": 3.80810375525567e-06, "loss": 0.838, "step": 9640 }, { "epoch": 4.370353581142339, "grad_norm": 0.09763424013515468, "learning_rate": 3.8027149240001194e-06, "loss": 0.8594, "step": 9641 }, { "epoch": 4.370806890299184, "grad_norm": 0.09957803124016645, "learning_rate": 3.7973297180090616e-06, "loss": 0.8461, "step": 9642 }, { "epoch": 4.371260199456029, "grad_norm": 0.10900531023102537, "learning_rate": 3.7919481378218258e-06, "loss": 0.8686, "step": 9643 }, { "epoch": 4.371713508612874, "grad_norm": 0.09429192682357267, "learning_rate": 3.7865701839773983e-06, "loss": 0.8433, "step": 9644 }, { "epoch": 4.372166817769719, "grad_norm": 0.09938614097060985, "learning_rate": 3.7811958570143924e-06, "loss": 0.8854, "step": 9645 }, { "epoch": 4.372620126926564, "grad_norm": 0.10078764935592188, "learning_rate": 3.7758251574710757e-06, "loss": 0.8561, "step": 9646 }, { "epoch": 4.373073436083409, "grad_norm": 0.09299997983013367, "learning_rate": 3.77045808588532e-06, "loss": 0.8712, "step": 9647 }, { "epoch": 4.3735267452402535, "grad_norm": 0.09253436876123078, "learning_rate": 3.765094642794673e-06, "loss": 0.8429, "step": 9648 }, { "epoch": 4.373980054397099, "grad_norm": 0.09596303193399927, "learning_rate": 3.7597348287362833e-06, "loss": 0.8325, "step": 9649 }, { "epoch": 4.374433363553944, "grad_norm": 0.10563443289604134, "learning_rate": 3.754378644246961e-06, "loss": 0.8385, "step": 9650 }, { "epoch": 4.3748866727107885, "grad_norm": 0.12004756884244323, "learning_rate": 3.7490260898631483e-06, "loss": 0.8412, "step": 9651 }, { "epoch": 4.375339981867634, "grad_norm": 0.1145040499860451, "learning_rate": 3.7436771661209e-06, "loss": 0.8451, "step": 9652 }, { "epoch": 4.375793291024479, "grad_norm": 0.10029331501191613, "learning_rate": 3.7383318735559494e-06, "loss": 0.8492, "step": 9653 }, { "epoch": 4.376246600181323, "grad_norm": 0.09695323710686157, "learning_rate": 3.732990212703622e-06, "loss": 0.8724, "step": 9654 }, { "epoch": 4.376699909338169, "grad_norm": 0.12367949002089236, "learning_rate": 3.727652184098922e-06, "loss": 0.8695, "step": 9655 }, { "epoch": 4.377153218495014, "grad_norm": 0.10839472585054065, "learning_rate": 3.7223177882764483e-06, "loss": 0.8455, "step": 9656 }, { "epoch": 4.377606527651858, "grad_norm": 0.09725569739440772, "learning_rate": 3.7169870257704756e-06, "loss": 0.8499, "step": 9657 }, { "epoch": 4.378059836808704, "grad_norm": 0.09301171824587998, "learning_rate": 3.711659897114883e-06, "loss": 0.853, "step": 9658 }, { "epoch": 4.378513145965549, "grad_norm": 0.10158812375584579, "learning_rate": 3.706336402843205e-06, "loss": 0.8554, "step": 9659 }, { "epoch": 4.378966455122393, "grad_norm": 0.1022033921262482, "learning_rate": 3.7010165434885913e-06, "loss": 0.855, "step": 9660 }, { "epoch": 4.3794197642792385, "grad_norm": 0.09176198435582744, "learning_rate": 3.6957003195838615e-06, "loss": 0.8545, "step": 9661 }, { "epoch": 4.379873073436084, "grad_norm": 0.09236450894310463, "learning_rate": 3.690387731661429e-06, "loss": 0.8408, "step": 9662 }, { "epoch": 4.380326382592928, "grad_norm": 0.10110102225560902, "learning_rate": 3.6850787802533884e-06, "loss": 0.8629, "step": 9663 }, { "epoch": 4.380779691749773, "grad_norm": 0.10505161601451171, "learning_rate": 3.67977346589143e-06, "loss": 0.8527, "step": 9664 }, { "epoch": 4.381233000906619, "grad_norm": 0.09366175611523732, "learning_rate": 3.674471789106906e-06, "loss": 0.8642, "step": 9665 }, { "epoch": 4.381686310063463, "grad_norm": 0.1049154822384733, "learning_rate": 3.6691737504307923e-06, "loss": 0.8555, "step": 9666 }, { "epoch": 4.382139619220308, "grad_norm": 0.10503417843245928, "learning_rate": 3.6638793503936954e-06, "loss": 0.8513, "step": 9667 }, { "epoch": 4.382592928377154, "grad_norm": 0.0972081448856853, "learning_rate": 3.6585885895258753e-06, "loss": 0.853, "step": 9668 }, { "epoch": 4.383046237533998, "grad_norm": 0.10266601308681904, "learning_rate": 3.6533014683572066e-06, "loss": 0.8418, "step": 9669 }, { "epoch": 4.383499546690843, "grad_norm": 0.11038921270927661, "learning_rate": 3.6480179874172296e-06, "loss": 0.8609, "step": 9670 }, { "epoch": 4.383952855847689, "grad_norm": 0.10551611496433301, "learning_rate": 3.6427381472350765e-06, "loss": 0.8633, "step": 9671 }, { "epoch": 4.384406165004533, "grad_norm": 0.10219895890471832, "learning_rate": 3.6374619483395555e-06, "loss": 0.8617, "step": 9672 }, { "epoch": 4.384859474161378, "grad_norm": 0.11124512844034248, "learning_rate": 3.6321893912590844e-06, "loss": 0.8558, "step": 9673 }, { "epoch": 4.3853127833182235, "grad_norm": 0.10714297920888102, "learning_rate": 3.6269204765217334e-06, "loss": 0.8595, "step": 9674 }, { "epoch": 4.385766092475068, "grad_norm": 0.10227365861525334, "learning_rate": 3.6216552046551877e-06, "loss": 0.8633, "step": 9675 }, { "epoch": 4.386219401631913, "grad_norm": 0.10477502909275378, "learning_rate": 3.6163935761867943e-06, "loss": 0.847, "step": 9676 }, { "epoch": 4.3866727107887575, "grad_norm": 0.10654321595803819, "learning_rate": 3.6111355916435177e-06, "loss": 0.8494, "step": 9677 }, { "epoch": 4.387126019945603, "grad_norm": 0.10146837981936041, "learning_rate": 3.6058812515519503e-06, "loss": 0.8564, "step": 9678 }, { "epoch": 4.387579329102448, "grad_norm": 0.10184226806034816, "learning_rate": 3.600630556438334e-06, "loss": 0.8515, "step": 9679 }, { "epoch": 4.3880326382592925, "grad_norm": 0.10407372481035414, "learning_rate": 3.595383506828549e-06, "loss": 0.8634, "step": 9680 }, { "epoch": 4.388485947416138, "grad_norm": 0.09397569392939616, "learning_rate": 3.5901401032480964e-06, "loss": 0.8619, "step": 9681 }, { "epoch": 4.388939256572983, "grad_norm": 0.11944355783128183, "learning_rate": 3.584900346222111e-06, "loss": 0.8519, "step": 9682 }, { "epoch": 4.389392565729827, "grad_norm": 0.09765326460821437, "learning_rate": 3.5796642362753866e-06, "loss": 0.8467, "step": 9683 }, { "epoch": 4.389845874886673, "grad_norm": 0.10466545452358128, "learning_rate": 3.5744317739323164e-06, "loss": 0.8657, "step": 9684 }, { "epoch": 4.390299184043518, "grad_norm": 0.11032922103090496, "learning_rate": 3.5692029597169663e-06, "loss": 0.8576, "step": 9685 }, { "epoch": 4.390752493200362, "grad_norm": 0.10521645889673213, "learning_rate": 3.563977794153002e-06, "loss": 0.8476, "step": 9686 }, { "epoch": 4.391205802357208, "grad_norm": 0.10290647419921253, "learning_rate": 3.5587562777637508e-06, "loss": 0.8362, "step": 9687 }, { "epoch": 4.391659111514053, "grad_norm": 0.11708808943113638, "learning_rate": 3.5535384110721503e-06, "loss": 0.8488, "step": 9688 }, { "epoch": 4.392112420670897, "grad_norm": 0.1096964468349136, "learning_rate": 3.5483241946008008e-06, "loss": 0.8459, "step": 9689 }, { "epoch": 4.3925657298277425, "grad_norm": 0.10557024135905096, "learning_rate": 3.5431136288719013e-06, "loss": 0.8522, "step": 9690 }, { "epoch": 4.393019038984588, "grad_norm": 0.10496572702982866, "learning_rate": 3.537906714407329e-06, "loss": 0.8399, "step": 9691 }, { "epoch": 4.393472348141432, "grad_norm": 0.10540550528575668, "learning_rate": 3.532703451728554e-06, "loss": 0.8543, "step": 9692 }, { "epoch": 4.393925657298277, "grad_norm": 0.10534878257029277, "learning_rate": 3.5275038413566987e-06, "loss": 0.8385, "step": 9693 }, { "epoch": 4.394378966455123, "grad_norm": 0.10677609099396294, "learning_rate": 3.5223078838125325e-06, "loss": 0.8458, "step": 9694 }, { "epoch": 4.394832275611967, "grad_norm": 0.10950861727377062, "learning_rate": 3.5171155796164304e-06, "loss": 0.8367, "step": 9695 }, { "epoch": 4.395285584768812, "grad_norm": 0.10253953394513723, "learning_rate": 3.5119269292884384e-06, "loss": 0.8832, "step": 9696 }, { "epoch": 4.395738893925658, "grad_norm": 0.16120492342864598, "learning_rate": 3.5067419333481854e-06, "loss": 0.8561, "step": 9697 }, { "epoch": 4.396192203082502, "grad_norm": 0.13383650953453813, "learning_rate": 3.50156059231499e-06, "loss": 0.8461, "step": 9698 }, { "epoch": 4.396645512239347, "grad_norm": 0.11407210493642476, "learning_rate": 3.4963829067077605e-06, "loss": 0.8707, "step": 9699 }, { "epoch": 4.397098821396193, "grad_norm": 0.1090810604457818, "learning_rate": 3.491208877045069e-06, "loss": 0.857, "step": 9700 }, { "epoch": 4.397552130553037, "grad_norm": 0.11070273732108292, "learning_rate": 3.4860385038451015e-06, "loss": 0.8561, "step": 9701 }, { "epoch": 4.398005439709882, "grad_norm": 0.11323941356089022, "learning_rate": 3.4808717876256925e-06, "loss": 0.8367, "step": 9702 }, { "epoch": 4.3984587488667275, "grad_norm": 0.12183104151503595, "learning_rate": 3.475708728904299e-06, "loss": 0.8483, "step": 9703 }, { "epoch": 4.398912058023572, "grad_norm": 0.10232224326616429, "learning_rate": 3.4705493281980273e-06, "loss": 0.8565, "step": 9704 }, { "epoch": 4.399365367180417, "grad_norm": 0.11340582586219225, "learning_rate": 3.4653935860235887e-06, "loss": 0.847, "step": 9705 }, { "epoch": 4.399818676337262, "grad_norm": 0.13421251813075455, "learning_rate": 3.4602415028973657e-06, "loss": 0.8526, "step": 9706 }, { "epoch": 4.400271985494107, "grad_norm": 0.09827160036045486, "learning_rate": 3.455093079335341e-06, "loss": 0.8517, "step": 9707 }, { "epoch": 4.400725294650952, "grad_norm": 0.10082612529278165, "learning_rate": 3.449948315853142e-06, "loss": 0.8634, "step": 9708 }, { "epoch": 4.4011786038077965, "grad_norm": 0.10212867439306983, "learning_rate": 3.444807212966046e-06, "loss": 0.8498, "step": 9709 }, { "epoch": 4.401631912964642, "grad_norm": 0.10347318213558855, "learning_rate": 3.4396697711889337e-06, "loss": 0.8617, "step": 9710 }, { "epoch": 4.402085222121487, "grad_norm": 0.0969533238072967, "learning_rate": 3.4345359910363497e-06, "loss": 0.8553, "step": 9711 }, { "epoch": 4.402538531278331, "grad_norm": 0.10458744504304496, "learning_rate": 3.4294058730224423e-06, "loss": 0.8547, "step": 9712 }, { "epoch": 4.402991840435177, "grad_norm": 0.10655371484797438, "learning_rate": 3.424279417661023e-06, "loss": 0.8604, "step": 9713 }, { "epoch": 4.403445149592022, "grad_norm": 0.09760931084188945, "learning_rate": 3.419156625465507e-06, "loss": 0.8607, "step": 9714 }, { "epoch": 4.403898458748866, "grad_norm": 0.10307216854059376, "learning_rate": 3.4140374969489788e-06, "loss": 0.8519, "step": 9715 }, { "epoch": 4.404351767905712, "grad_norm": 0.10300644759534751, "learning_rate": 3.408922032624107e-06, "loss": 0.8664, "step": 9716 }, { "epoch": 4.404805077062557, "grad_norm": 0.1021089686946012, "learning_rate": 3.4038102330032374e-06, "loss": 0.8614, "step": 9717 }, { "epoch": 4.405258386219401, "grad_norm": 0.09219312569995125, "learning_rate": 3.398702098598321e-06, "loss": 0.8676, "step": 9718 }, { "epoch": 4.4057116953762465, "grad_norm": 0.10840620595136531, "learning_rate": 3.3935976299209617e-06, "loss": 0.8682, "step": 9719 }, { "epoch": 4.406165004533092, "grad_norm": 0.09915586977050005, "learning_rate": 3.3884968274823813e-06, "loss": 0.8586, "step": 9720 }, { "epoch": 4.406618313689936, "grad_norm": 0.08805013416910085, "learning_rate": 3.383399691793452e-06, "loss": 0.8434, "step": 9721 }, { "epoch": 4.4070716228467814, "grad_norm": 0.09550273455073427, "learning_rate": 3.3783062233646533e-06, "loss": 0.8768, "step": 9722 }, { "epoch": 4.407524932003627, "grad_norm": 0.11032916196539841, "learning_rate": 3.3732164227061115e-06, "loss": 0.8569, "step": 9723 }, { "epoch": 4.407978241160471, "grad_norm": 0.10387542047906377, "learning_rate": 3.3681302903275912e-06, "loss": 0.8563, "step": 9724 }, { "epoch": 4.408431550317316, "grad_norm": 0.09655508271421018, "learning_rate": 3.3630478267384813e-06, "loss": 0.8527, "step": 9725 }, { "epoch": 4.408884859474162, "grad_norm": 0.09399914194021225, "learning_rate": 3.35796903244781e-06, "loss": 0.8686, "step": 9726 }, { "epoch": 4.409338168631006, "grad_norm": 0.09354279900280596, "learning_rate": 3.352893907964219e-06, "loss": 0.8439, "step": 9727 }, { "epoch": 4.409791477787851, "grad_norm": 0.110312309089862, "learning_rate": 3.347822453796017e-06, "loss": 0.8585, "step": 9728 }, { "epoch": 4.410244786944697, "grad_norm": 0.11171176520397486, "learning_rate": 3.342754670451105e-06, "loss": 0.8548, "step": 9729 }, { "epoch": 4.410698096101541, "grad_norm": 0.09355483348094079, "learning_rate": 3.337690558437059e-06, "loss": 0.8546, "step": 9730 }, { "epoch": 4.411151405258386, "grad_norm": 0.09966377054999442, "learning_rate": 3.3326301182610376e-06, "loss": 0.8335, "step": 9731 }, { "epoch": 4.4116047144152315, "grad_norm": 0.10065239198130362, "learning_rate": 3.3275733504298847e-06, "loss": 0.8632, "step": 9732 }, { "epoch": 4.412058023572076, "grad_norm": 0.09492132128863065, "learning_rate": 3.322520255450039e-06, "loss": 0.8469, "step": 9733 }, { "epoch": 4.412511332728921, "grad_norm": 0.09624011061718196, "learning_rate": 3.317470833827585e-06, "loss": 0.8716, "step": 9734 }, { "epoch": 4.412964641885766, "grad_norm": 0.10879379030073634, "learning_rate": 3.312425086068225e-06, "loss": 0.8664, "step": 9735 }, { "epoch": 4.413417951042611, "grad_norm": 0.09849740995088774, "learning_rate": 3.307383012677323e-06, "loss": 0.8378, "step": 9736 }, { "epoch": 4.413871260199456, "grad_norm": 0.09259670797151053, "learning_rate": 3.302344614159849e-06, "loss": 0.8387, "step": 9737 }, { "epoch": 4.414324569356301, "grad_norm": 0.09882535145272149, "learning_rate": 3.297309891020408e-06, "loss": 0.84, "step": 9738 }, { "epoch": 4.414777878513146, "grad_norm": 0.10338550187133277, "learning_rate": 3.292278843763255e-06, "loss": 0.8582, "step": 9739 }, { "epoch": 4.415231187669991, "grad_norm": 0.10665813664268832, "learning_rate": 3.2872514728922522e-06, "loss": 0.868, "step": 9740 }, { "epoch": 4.415684496826836, "grad_norm": 0.09340841740520173, "learning_rate": 3.282227778910918e-06, "loss": 0.8381, "step": 9741 }, { "epoch": 4.416137805983681, "grad_norm": 0.09705487098008145, "learning_rate": 3.277207762322374e-06, "loss": 0.8719, "step": 9742 }, { "epoch": 4.416591115140526, "grad_norm": 0.11035354932311331, "learning_rate": 3.2721914236294095e-06, "loss": 0.8736, "step": 9743 }, { "epoch": 4.417044424297371, "grad_norm": 0.09273209438526024, "learning_rate": 3.2671787633344043e-06, "loss": 0.8698, "step": 9744 }, { "epoch": 4.417497733454216, "grad_norm": 0.09290992494930833, "learning_rate": 3.2621697819394106e-06, "loss": 0.8467, "step": 9745 }, { "epoch": 4.417951042611061, "grad_norm": 0.10322854575396648, "learning_rate": 3.2571644799460756e-06, "loss": 0.863, "step": 9746 }, { "epoch": 4.418404351767906, "grad_norm": 0.10560106991347797, "learning_rate": 3.252162857855714e-06, "loss": 0.847, "step": 9747 }, { "epoch": 4.4188576609247505, "grad_norm": 0.08893353610797623, "learning_rate": 3.2471649161692366e-06, "loss": 0.8649, "step": 9748 }, { "epoch": 4.419310970081596, "grad_norm": 0.09435768705610957, "learning_rate": 3.2421706553872067e-06, "loss": 0.8564, "step": 9749 }, { "epoch": 4.419764279238441, "grad_norm": 0.09767015891797486, "learning_rate": 3.237180076009816e-06, "loss": 0.8598, "step": 9750 }, { "epoch": 4.4202175883952854, "grad_norm": 0.09739244122989335, "learning_rate": 3.2321931785368823e-06, "loss": 0.8463, "step": 9751 }, { "epoch": 4.420670897552131, "grad_norm": 0.09403201410654198, "learning_rate": 3.2272099634678633e-06, "loss": 0.8408, "step": 9752 }, { "epoch": 4.421124206708976, "grad_norm": 0.08746916890168699, "learning_rate": 3.222230431301836e-06, "loss": 0.8414, "step": 9753 }, { "epoch": 4.42157751586582, "grad_norm": 0.1003914419937456, "learning_rate": 3.21725458253753e-06, "loss": 0.8574, "step": 9754 }, { "epoch": 4.422030825022666, "grad_norm": 0.09302141802108978, "learning_rate": 3.212282417673267e-06, "loss": 0.8263, "step": 9755 }, { "epoch": 4.42248413417951, "grad_norm": 0.09921741361103302, "learning_rate": 3.207313937207044e-06, "loss": 0.846, "step": 9756 }, { "epoch": 4.422937443336355, "grad_norm": 0.10279309221297517, "learning_rate": 3.202349141636454e-06, "loss": 0.8717, "step": 9757 }, { "epoch": 4.423390752493201, "grad_norm": 0.09081468215181218, "learning_rate": 3.1973880314587526e-06, "loss": 0.8458, "step": 9758 }, { "epoch": 4.423844061650045, "grad_norm": 0.10504504427488492, "learning_rate": 3.1924306071707956e-06, "loss": 0.8455, "step": 9759 }, { "epoch": 4.42429737080689, "grad_norm": 0.09960677439342813, "learning_rate": 3.187476869269093e-06, "loss": 0.8452, "step": 9760 }, { "epoch": 4.4247506799637355, "grad_norm": 0.09460657129411111, "learning_rate": 3.1825268182497626e-06, "loss": 0.8297, "step": 9761 }, { "epoch": 4.42520398912058, "grad_norm": 0.09794971111919408, "learning_rate": 3.1775804546085863e-06, "loss": 0.8328, "step": 9762 }, { "epoch": 4.425657298277425, "grad_norm": 0.08995589342561698, "learning_rate": 3.1726377788409457e-06, "loss": 0.8461, "step": 9763 }, { "epoch": 4.42611060743427, "grad_norm": 0.11561130514557028, "learning_rate": 3.167698791441858e-06, "loss": 0.8531, "step": 9764 }, { "epoch": 4.426563916591115, "grad_norm": 0.10464441456630647, "learning_rate": 3.162763492905994e-06, "loss": 0.8425, "step": 9765 }, { "epoch": 4.42701722574796, "grad_norm": 0.09827920232204179, "learning_rate": 3.1578318837276155e-06, "loss": 0.8464, "step": 9766 }, { "epoch": 4.427470534904805, "grad_norm": 0.08641858399968314, "learning_rate": 3.152903964400662e-06, "loss": 0.832, "step": 9767 }, { "epoch": 4.42792384406165, "grad_norm": 0.10023147677261487, "learning_rate": 3.1479797354186626e-06, "loss": 0.8515, "step": 9768 }, { "epoch": 4.428377153218495, "grad_norm": 0.11113042510729931, "learning_rate": 3.1430591972748047e-06, "loss": 0.8595, "step": 9769 }, { "epoch": 4.42883046237534, "grad_norm": 0.10227864284096486, "learning_rate": 3.1381423504618813e-06, "loss": 0.8396, "step": 9770 }, { "epoch": 4.429283771532185, "grad_norm": 0.09691220505620973, "learning_rate": 3.1332291954723425e-06, "loss": 0.8507, "step": 9771 }, { "epoch": 4.42973708068903, "grad_norm": 0.09680666034953203, "learning_rate": 3.128319732798244e-06, "loss": 0.8477, "step": 9772 }, { "epoch": 4.430190389845875, "grad_norm": 0.11388904269806488, "learning_rate": 3.123413962931299e-06, "loss": 0.8602, "step": 9773 }, { "epoch": 4.43064369900272, "grad_norm": 0.09267255139185787, "learning_rate": 3.118511886362816e-06, "loss": 0.845, "step": 9774 }, { "epoch": 4.431097008159565, "grad_norm": 0.10039317777803003, "learning_rate": 3.1136135035837633e-06, "loss": 0.853, "step": 9775 }, { "epoch": 4.43155031731641, "grad_norm": 0.10915904053079255, "learning_rate": 3.108718815084717e-06, "loss": 0.8685, "step": 9776 }, { "epoch": 4.4320036264732545, "grad_norm": 0.09065160859662667, "learning_rate": 3.1038278213559112e-06, "loss": 0.8589, "step": 9777 }, { "epoch": 4.4324569356301, "grad_norm": 0.09912757695575959, "learning_rate": 3.098940522887177e-06, "loss": 0.8451, "step": 9778 }, { "epoch": 4.432910244786945, "grad_norm": 0.09891097031211332, "learning_rate": 3.094056920168007e-06, "loss": 0.8691, "step": 9779 }, { "epoch": 4.4333635539437894, "grad_norm": 0.11061656197581696, "learning_rate": 3.089177013687503e-06, "loss": 0.8562, "step": 9780 }, { "epoch": 4.433816863100635, "grad_norm": 0.09998224714576705, "learning_rate": 3.0843008039343903e-06, "loss": 0.8595, "step": 9781 }, { "epoch": 4.43427017225748, "grad_norm": 0.10558042933120089, "learning_rate": 3.079428291397055e-06, "loss": 0.8661, "step": 9782 }, { "epoch": 4.434723481414324, "grad_norm": 0.10673004339557234, "learning_rate": 3.074559476563477e-06, "loss": 0.853, "step": 9783 }, { "epoch": 4.43517679057117, "grad_norm": 0.09457700042252078, "learning_rate": 3.0696943599212915e-06, "loss": 0.8478, "step": 9784 }, { "epoch": 4.435630099728015, "grad_norm": 0.09915679646504516, "learning_rate": 3.0648329419577493e-06, "loss": 0.8679, "step": 9785 }, { "epoch": 4.436083408884859, "grad_norm": 0.09681904705126486, "learning_rate": 3.0599752231597457e-06, "loss": 0.8697, "step": 9786 }, { "epoch": 4.436536718041705, "grad_norm": 0.08577150301262598, "learning_rate": 3.05512120401378e-06, "loss": 0.8567, "step": 9787 }, { "epoch": 4.436990027198549, "grad_norm": 0.09131097826555531, "learning_rate": 3.05027088500601e-06, "loss": 0.8507, "step": 9788 }, { "epoch": 4.437443336355394, "grad_norm": 0.09191315719819511, "learning_rate": 3.0454242666222078e-06, "loss": 0.8468, "step": 9789 }, { "epoch": 4.4378966455122395, "grad_norm": 0.0978098289450232, "learning_rate": 3.0405813493477622e-06, "loss": 0.8503, "step": 9790 }, { "epoch": 4.438349954669084, "grad_norm": 0.08466808310152325, "learning_rate": 3.0357421336677252e-06, "loss": 0.8719, "step": 9791 }, { "epoch": 4.438803263825929, "grad_norm": 0.0939283662030847, "learning_rate": 3.030906620066749e-06, "loss": 0.844, "step": 9792 }, { "epoch": 4.439256572982774, "grad_norm": 0.10196304543846288, "learning_rate": 3.026074809029118e-06, "loss": 0.8702, "step": 9793 }, { "epoch": 4.439709882139619, "grad_norm": 0.09694075454483896, "learning_rate": 3.0212467010387648e-06, "loss": 0.8425, "step": 9794 }, { "epoch": 4.440163191296464, "grad_norm": 0.10642795376050981, "learning_rate": 3.0164222965792355e-06, "loss": 0.8447, "step": 9795 }, { "epoch": 4.440616500453309, "grad_norm": 0.08949875139694335, "learning_rate": 3.0116015961337e-06, "loss": 0.8588, "step": 9796 }, { "epoch": 4.441069809610154, "grad_norm": 0.10195312274270375, "learning_rate": 3.0067846001849753e-06, "loss": 0.8406, "step": 9797 }, { "epoch": 4.441523118766999, "grad_norm": 0.11080134584942648, "learning_rate": 3.001971309215486e-06, "loss": 0.8543, "step": 9798 }, { "epoch": 4.441976427923844, "grad_norm": 0.09599996223468339, "learning_rate": 2.9971617237073157e-06, "loss": 0.8321, "step": 9799 }, { "epoch": 4.442429737080689, "grad_norm": 0.09492513023360381, "learning_rate": 2.9923558441421383e-06, "loss": 0.8577, "step": 9800 }, { "epoch": 4.442883046237534, "grad_norm": 0.09673875243133613, "learning_rate": 2.9875536710012974e-06, "loss": 0.8512, "step": 9801 }, { "epoch": 4.443336355394379, "grad_norm": 0.09972727950810681, "learning_rate": 2.982755204765724e-06, "loss": 0.8576, "step": 9802 }, { "epoch": 4.443789664551224, "grad_norm": 0.09887202990138233, "learning_rate": 2.977960445916015e-06, "loss": 0.8402, "step": 9803 }, { "epoch": 4.444242973708069, "grad_norm": 0.0986590407127255, "learning_rate": 2.9731693949323693e-06, "loss": 0.8586, "step": 9804 }, { "epoch": 4.444696282864914, "grad_norm": 0.09699533073641962, "learning_rate": 2.968382052294634e-06, "loss": 0.8407, "step": 9805 }, { "epoch": 4.4451495920217585, "grad_norm": 0.10219219659671194, "learning_rate": 2.9635984184822695e-06, "loss": 0.8583, "step": 9806 }, { "epoch": 4.445602901178604, "grad_norm": 0.09461433014932304, "learning_rate": 2.958818493974365e-06, "loss": 0.8797, "step": 9807 }, { "epoch": 4.446056210335449, "grad_norm": 0.13382920392572473, "learning_rate": 2.954042279249656e-06, "loss": 0.8689, "step": 9808 }, { "epoch": 4.4465095194922934, "grad_norm": 0.11394606667448955, "learning_rate": 2.949269774786481e-06, "loss": 0.8757, "step": 9809 }, { "epoch": 4.446962828649139, "grad_norm": 0.0987973530497954, "learning_rate": 2.9445009810628346e-06, "loss": 0.8541, "step": 9810 }, { "epoch": 4.447416137805984, "grad_norm": 0.10120029784699906, "learning_rate": 2.939735898556322e-06, "loss": 0.847, "step": 9811 }, { "epoch": 4.447869446962828, "grad_norm": 0.09993735294221158, "learning_rate": 2.934974527744179e-06, "loss": 0.8474, "step": 9812 }, { "epoch": 4.448322756119674, "grad_norm": 0.10204804276275686, "learning_rate": 2.930216869103255e-06, "loss": 0.8559, "step": 9813 }, { "epoch": 4.448776065276519, "grad_norm": 0.08842197328634625, "learning_rate": 2.9254629231100716e-06, "loss": 0.8471, "step": 9814 }, { "epoch": 4.449229374433363, "grad_norm": 0.11073744533902806, "learning_rate": 2.920712690240728e-06, "loss": 0.846, "step": 9815 }, { "epoch": 4.449682683590209, "grad_norm": 0.10336390413990273, "learning_rate": 2.9159661709709852e-06, "loss": 0.8497, "step": 9816 }, { "epoch": 4.450135992747054, "grad_norm": 0.09173579637942562, "learning_rate": 2.9112233657762148e-06, "loss": 0.8522, "step": 9817 }, { "epoch": 4.450589301903898, "grad_norm": 0.09489741668768972, "learning_rate": 2.906484275131436e-06, "loss": 0.8614, "step": 9818 }, { "epoch": 4.4510426110607435, "grad_norm": 0.08413940431918063, "learning_rate": 2.9017488995112608e-06, "loss": 0.8427, "step": 9819 }, { "epoch": 4.451495920217589, "grad_norm": 0.09864943872877066, "learning_rate": 2.897017239389972e-06, "loss": 0.8614, "step": 9820 }, { "epoch": 4.451949229374433, "grad_norm": 0.09791187662433061, "learning_rate": 2.8922892952414527e-06, "loss": 0.8628, "step": 9821 }, { "epoch": 4.452402538531278, "grad_norm": 0.09145485326710591, "learning_rate": 2.8875650675392086e-06, "loss": 0.8583, "step": 9822 }, { "epoch": 4.452855847688124, "grad_norm": 0.09325625247205645, "learning_rate": 2.882844556756399e-06, "loss": 0.8719, "step": 9823 }, { "epoch": 4.453309156844968, "grad_norm": 0.09209951856638655, "learning_rate": 2.878127763365788e-06, "loss": 0.8556, "step": 9824 }, { "epoch": 4.453762466001813, "grad_norm": 0.10331513465910006, "learning_rate": 2.873414687839788e-06, "loss": 0.8521, "step": 9825 }, { "epoch": 4.454215775158659, "grad_norm": 0.09181353142720594, "learning_rate": 2.8687053306504144e-06, "loss": 0.8283, "step": 9826 }, { "epoch": 4.454669084315503, "grad_norm": 0.10194446831682298, "learning_rate": 2.8639996922693325e-06, "loss": 0.8512, "step": 9827 }, { "epoch": 4.455122393472348, "grad_norm": 0.0921352359608796, "learning_rate": 2.8592977731678194e-06, "loss": 0.8722, "step": 9828 }, { "epoch": 4.455575702629194, "grad_norm": 0.08748221355222349, "learning_rate": 2.854599573816792e-06, "loss": 0.8617, "step": 9829 }, { "epoch": 4.456029011786038, "grad_norm": 0.09301592731580965, "learning_rate": 2.8499050946867846e-06, "loss": 0.8355, "step": 9830 }, { "epoch": 4.456482320942883, "grad_norm": 0.10778828863927568, "learning_rate": 2.845214336247968e-06, "loss": 0.8322, "step": 9831 }, { "epoch": 4.4569356300997285, "grad_norm": 0.0976486261967195, "learning_rate": 2.8405272989701215e-06, "loss": 0.8695, "step": 9832 }, { "epoch": 4.457388939256573, "grad_norm": 0.09409332844956186, "learning_rate": 2.8358439833226836e-06, "loss": 0.8536, "step": 9833 }, { "epoch": 4.457842248413418, "grad_norm": 0.09737098838867723, "learning_rate": 2.831164389774688e-06, "loss": 0.8517, "step": 9834 }, { "epoch": 4.4582955575702625, "grad_norm": 0.09700614669816326, "learning_rate": 2.826488518794821e-06, "loss": 0.8674, "step": 9835 }, { "epoch": 4.458748866727108, "grad_norm": 0.09005934951493812, "learning_rate": 2.8218163708513847e-06, "loss": 0.839, "step": 9836 }, { "epoch": 4.459202175883953, "grad_norm": 0.09715632305855748, "learning_rate": 2.817147946412293e-06, "loss": 0.8529, "step": 9837 }, { "epoch": 4.4596554850407975, "grad_norm": 0.09645047169232633, "learning_rate": 2.8124832459451192e-06, "loss": 0.8468, "step": 9838 }, { "epoch": 4.460108794197643, "grad_norm": 0.09504894051272196, "learning_rate": 2.807822269917031e-06, "loss": 0.8637, "step": 9839 }, { "epoch": 4.460562103354488, "grad_norm": 0.09094650600043051, "learning_rate": 2.80316501879486e-06, "loss": 0.8533, "step": 9840 }, { "epoch": 4.461015412511332, "grad_norm": 0.08723174425740271, "learning_rate": 2.798511493045024e-06, "loss": 0.8435, "step": 9841 }, { "epoch": 4.461468721668178, "grad_norm": 0.09301923135505631, "learning_rate": 2.7938616931335994e-06, "loss": 0.8518, "step": 9842 }, { "epoch": 4.461922030825023, "grad_norm": 0.09175170041253297, "learning_rate": 2.789215619526271e-06, "loss": 0.8597, "step": 9843 }, { "epoch": 4.462375339981867, "grad_norm": 0.09470159133265758, "learning_rate": 2.78457327268836e-06, "loss": 0.8528, "step": 9844 }, { "epoch": 4.462828649138713, "grad_norm": 0.10393960949662646, "learning_rate": 2.779934653084806e-06, "loss": 0.8611, "step": 9845 }, { "epoch": 4.463281958295558, "grad_norm": 0.09341483855892176, "learning_rate": 2.7752997611801925e-06, "loss": 0.8522, "step": 9846 }, { "epoch": 4.463735267452402, "grad_norm": 0.09743672675439227, "learning_rate": 2.7706685974387083e-06, "loss": 0.8539, "step": 9847 }, { "epoch": 4.4641885766092475, "grad_norm": 0.10104872190646559, "learning_rate": 2.766041162324169e-06, "loss": 0.8465, "step": 9848 }, { "epoch": 4.464641885766093, "grad_norm": 0.08911639449185851, "learning_rate": 2.761417456300044e-06, "loss": 0.8452, "step": 9849 }, { "epoch": 4.465095194922937, "grad_norm": 0.09487655424798644, "learning_rate": 2.7567974798294073e-06, "loss": 0.8567, "step": 9850 }, { "epoch": 4.465548504079782, "grad_norm": 0.12033869308057313, "learning_rate": 2.7521812333749553e-06, "loss": 0.8645, "step": 9851 }, { "epoch": 4.466001813236628, "grad_norm": 0.10950466556691843, "learning_rate": 2.747568717399012e-06, "loss": 0.8309, "step": 9852 }, { "epoch": 4.466455122393472, "grad_norm": 0.10410776322763514, "learning_rate": 2.742959932363558e-06, "loss": 0.8494, "step": 9853 }, { "epoch": 4.466908431550317, "grad_norm": 0.12036445692177834, "learning_rate": 2.7383548787301537e-06, "loss": 0.8578, "step": 9854 }, { "epoch": 4.467361740707163, "grad_norm": 0.10548359809209708, "learning_rate": 2.7337535569600217e-06, "loss": 0.8568, "step": 9855 }, { "epoch": 4.467815049864007, "grad_norm": 0.10796419262645278, "learning_rate": 2.7291559675139924e-06, "loss": 0.8419, "step": 9856 }, { "epoch": 4.468268359020852, "grad_norm": 0.09817977484298335, "learning_rate": 2.7245621108525333e-06, "loss": 0.8471, "step": 9857 }, { "epoch": 4.468721668177698, "grad_norm": 0.12217156224877203, "learning_rate": 2.7199719874357255e-06, "loss": 0.8465, "step": 9858 }, { "epoch": 4.469174977334542, "grad_norm": 0.09327830857157184, "learning_rate": 2.71538559772329e-06, "loss": 0.8699, "step": 9859 }, { "epoch": 4.469628286491387, "grad_norm": 0.10432114586739148, "learning_rate": 2.710802942174562e-06, "loss": 0.8512, "step": 9860 }, { "epoch": 4.4700815956482325, "grad_norm": 0.1123484154964966, "learning_rate": 2.706224021248516e-06, "loss": 0.8745, "step": 9861 }, { "epoch": 4.470534904805077, "grad_norm": 0.1007055357944021, "learning_rate": 2.701648835403736e-06, "loss": 0.8619, "step": 9862 }, { "epoch": 4.470988213961922, "grad_norm": 0.10860850297377181, "learning_rate": 2.6970773850984388e-06, "loss": 0.8605, "step": 9863 }, { "epoch": 4.471441523118767, "grad_norm": 0.12852550898867424, "learning_rate": 2.6925096707904797e-06, "loss": 0.8441, "step": 9864 }, { "epoch": 4.471894832275612, "grad_norm": 0.09900551249733269, "learning_rate": 2.6879456929373104e-06, "loss": 0.8616, "step": 9865 }, { "epoch": 4.472348141432457, "grad_norm": 0.09769833998997382, "learning_rate": 2.68338545199605e-06, "loss": 0.8599, "step": 9866 }, { "epoch": 4.4728014505893015, "grad_norm": 0.10821005491546797, "learning_rate": 2.6788289484233996e-06, "loss": 0.8658, "step": 9867 }, { "epoch": 4.473254759746147, "grad_norm": 0.10845038914179912, "learning_rate": 2.6742761826757237e-06, "loss": 0.8563, "step": 9868 }, { "epoch": 4.473708068902992, "grad_norm": 0.0909588525949302, "learning_rate": 2.6697271552089808e-06, "loss": 0.862, "step": 9869 }, { "epoch": 4.474161378059836, "grad_norm": 0.10055485778311472, "learning_rate": 2.665181866478781e-06, "loss": 0.8378, "step": 9870 }, { "epoch": 4.474614687216682, "grad_norm": 0.11033577376521939, "learning_rate": 2.660640316940333e-06, "loss": 0.8621, "step": 9871 }, { "epoch": 4.475067996373527, "grad_norm": 0.09420682705616118, "learning_rate": 2.6561025070485034e-06, "loss": 0.864, "step": 9872 }, { "epoch": 4.475521305530371, "grad_norm": 0.08742469922703536, "learning_rate": 2.651568437257752e-06, "loss": 0.8553, "step": 9873 }, { "epoch": 4.475974614687217, "grad_norm": 0.09545635905546843, "learning_rate": 2.6470381080221906e-06, "loss": 0.8339, "step": 9874 }, { "epoch": 4.476427923844062, "grad_norm": 0.10551080412470897, "learning_rate": 2.6425115197955364e-06, "loss": 0.8518, "step": 9875 }, { "epoch": 4.476881233000906, "grad_norm": 0.09970782005370193, "learning_rate": 2.637988673031151e-06, "loss": 0.8432, "step": 9876 }, { "epoch": 4.4773345421577515, "grad_norm": 0.10220917921876188, "learning_rate": 2.63346956818201e-06, "loss": 0.8532, "step": 9877 }, { "epoch": 4.477787851314597, "grad_norm": 0.09690762174001893, "learning_rate": 2.6289542057006978e-06, "loss": 0.8402, "step": 9878 }, { "epoch": 4.478241160471441, "grad_norm": 0.09130974840282878, "learning_rate": 2.624442586039462e-06, "loss": 0.8396, "step": 9879 }, { "epoch": 4.478694469628286, "grad_norm": 0.10212495680719759, "learning_rate": 2.619934709650136e-06, "loss": 0.8462, "step": 9880 }, { "epoch": 4.479147778785132, "grad_norm": 0.09698468965358502, "learning_rate": 2.615430576984217e-06, "loss": 0.8522, "step": 9881 }, { "epoch": 4.479601087941976, "grad_norm": 0.086270683408742, "learning_rate": 2.610930188492793e-06, "loss": 0.8534, "step": 9882 }, { "epoch": 4.480054397098821, "grad_norm": 0.13516961956447665, "learning_rate": 2.6064335446265963e-06, "loss": 0.858, "step": 9883 }, { "epoch": 4.480507706255667, "grad_norm": 0.09716536694040769, "learning_rate": 2.601940645835974e-06, "loss": 0.8597, "step": 9884 }, { "epoch": 4.480961015412511, "grad_norm": 0.08416996369115035, "learning_rate": 2.597451492570913e-06, "loss": 0.8522, "step": 9885 }, { "epoch": 4.481414324569356, "grad_norm": 0.10467763504432315, "learning_rate": 2.592966085280999e-06, "loss": 0.8744, "step": 9886 }, { "epoch": 4.481867633726202, "grad_norm": 0.0913359581555134, "learning_rate": 2.5884844244154782e-06, "loss": 0.861, "step": 9887 }, { "epoch": 4.482320942883046, "grad_norm": 0.08902607024167221, "learning_rate": 2.584006510423196e-06, "loss": 0.8781, "step": 9888 }, { "epoch": 4.482774252039891, "grad_norm": 0.08545980871469581, "learning_rate": 2.57953234375262e-06, "loss": 0.8448, "step": 9889 }, { "epoch": 4.4832275611967365, "grad_norm": 0.09074576136895415, "learning_rate": 2.575061924851854e-06, "loss": 0.8613, "step": 9890 }, { "epoch": 4.483680870353581, "grad_norm": 0.11049792945962843, "learning_rate": 2.5705952541686285e-06, "loss": 0.8611, "step": 9891 }, { "epoch": 4.484134179510426, "grad_norm": 0.10374092658908003, "learning_rate": 2.5661323321502974e-06, "loss": 0.8832, "step": 9892 }, { "epoch": 4.484587488667271, "grad_norm": 0.10470161825312851, "learning_rate": 2.561673159243818e-06, "loss": 0.8662, "step": 9893 }, { "epoch": 4.485040797824116, "grad_norm": 0.09868204955145479, "learning_rate": 2.557217735895807e-06, "loss": 0.8714, "step": 9894 }, { "epoch": 4.485494106980961, "grad_norm": 0.091389212637639, "learning_rate": 2.5527660625524763e-06, "loss": 0.8682, "step": 9895 }, { "epoch": 4.485947416137806, "grad_norm": 0.09403046669028392, "learning_rate": 2.5483181396596914e-06, "loss": 0.8612, "step": 9896 }, { "epoch": 4.486400725294651, "grad_norm": 0.09295118669632577, "learning_rate": 2.5438739676629e-06, "loss": 0.8591, "step": 9897 }, { "epoch": 4.486854034451496, "grad_norm": 0.09612049079107882, "learning_rate": 2.539433547007222e-06, "loss": 0.8521, "step": 9898 }, { "epoch": 4.487307343608341, "grad_norm": 0.09124633155527934, "learning_rate": 2.534996878137359e-06, "loss": 0.8492, "step": 9899 }, { "epoch": 4.487760652765186, "grad_norm": 0.09444878346326233, "learning_rate": 2.5305639614976763e-06, "loss": 0.8435, "step": 9900 }, { "epoch": 4.488213961922031, "grad_norm": 0.09369261654700178, "learning_rate": 2.526134797532129e-06, "loss": 0.8497, "step": 9901 }, { "epoch": 4.488667271078876, "grad_norm": 0.09182813055426814, "learning_rate": 2.521709386684319e-06, "loss": 0.8682, "step": 9902 }, { "epoch": 4.489120580235721, "grad_norm": 0.09295029718742555, "learning_rate": 2.5172877293974595e-06, "loss": 0.8546, "step": 9903 }, { "epoch": 4.489573889392566, "grad_norm": 0.0881049400763621, "learning_rate": 2.512869826114388e-06, "loss": 0.8637, "step": 9904 }, { "epoch": 4.490027198549411, "grad_norm": 0.09666240659718033, "learning_rate": 2.5084556772775814e-06, "loss": 0.8658, "step": 9905 }, { "epoch": 4.4904805077062555, "grad_norm": 0.09799390410225377, "learning_rate": 2.5040452833291173e-06, "loss": 0.8723, "step": 9906 }, { "epoch": 4.490933816863101, "grad_norm": 0.09677587612851023, "learning_rate": 2.499638644710731e-06, "loss": 0.8809, "step": 9907 }, { "epoch": 4.491387126019946, "grad_norm": 0.09469674932783308, "learning_rate": 2.4952357618637367e-06, "loss": 0.8606, "step": 9908 }, { "epoch": 4.49184043517679, "grad_norm": 0.09026139241666141, "learning_rate": 2.490836635229106e-06, "loss": 0.8467, "step": 9909 }, { "epoch": 4.492293744333636, "grad_norm": 0.09608141435963435, "learning_rate": 2.486441265247419e-06, "loss": 0.8544, "step": 9910 }, { "epoch": 4.49274705349048, "grad_norm": 0.08835866793107541, "learning_rate": 2.482049652358898e-06, "loss": 0.871, "step": 9911 }, { "epoch": 4.493200362647325, "grad_norm": 0.09384829781616197, "learning_rate": 2.477661797003359e-06, "loss": 0.8575, "step": 9912 }, { "epoch": 4.493653671804171, "grad_norm": 0.09412320182175023, "learning_rate": 2.4732776996202778e-06, "loss": 0.8463, "step": 9913 }, { "epoch": 4.494106980961015, "grad_norm": 0.10869650878505681, "learning_rate": 2.468897360648712e-06, "loss": 0.8553, "step": 9914 }, { "epoch": 4.49456029011786, "grad_norm": 0.09537393102872385, "learning_rate": 2.4645207805273863e-06, "loss": 0.8684, "step": 9915 }, { "epoch": 4.495013599274706, "grad_norm": 0.08643480611068434, "learning_rate": 2.460147959694612e-06, "loss": 0.8752, "step": 9916 }, { "epoch": 4.49546690843155, "grad_norm": 0.08745585491739184, "learning_rate": 2.4557788985883548e-06, "loss": 0.8525, "step": 9917 }, { "epoch": 4.495920217588395, "grad_norm": 0.0959486386950686, "learning_rate": 2.4514135976461794e-06, "loss": 0.8505, "step": 9918 }, { "epoch": 4.4963735267452405, "grad_norm": 0.10100671487114553, "learning_rate": 2.447052057305279e-06, "loss": 0.8652, "step": 9919 }, { "epoch": 4.496826835902085, "grad_norm": 0.08504505514048064, "learning_rate": 2.442694278002491e-06, "loss": 0.8694, "step": 9920 }, { "epoch": 4.49728014505893, "grad_norm": 0.09543964269934767, "learning_rate": 2.438340260174239e-06, "loss": 0.8517, "step": 9921 }, { "epoch": 4.497733454215775, "grad_norm": 0.09572817545821964, "learning_rate": 2.43399000425661e-06, "loss": 0.8686, "step": 9922 }, { "epoch": 4.49818676337262, "grad_norm": 0.08866545211690545, "learning_rate": 2.4296435106852823e-06, "loss": 0.8615, "step": 9923 }, { "epoch": 4.498640072529465, "grad_norm": 0.0866491316544971, "learning_rate": 2.4253007798955784e-06, "loss": 0.8589, "step": 9924 }, { "epoch": 4.49909338168631, "grad_norm": 0.09587632316596524, "learning_rate": 2.4209618123224266e-06, "loss": 0.8348, "step": 9925 }, { "epoch": 4.499546690843155, "grad_norm": 0.09603532226773209, "learning_rate": 2.416626608400403e-06, "loss": 0.8753, "step": 9926 }, { "epoch": 4.5, "grad_norm": 0.08992806107144233, "learning_rate": 2.4122951685636674e-06, "loss": 0.8645, "step": 9927 }, { "epoch": 4.500453309156845, "grad_norm": 0.09104356410255758, "learning_rate": 2.4079674932460463e-06, "loss": 0.845, "step": 9928 }, { "epoch": 4.50090661831369, "grad_norm": 0.09568643891992576, "learning_rate": 2.4036435828809525e-06, "loss": 0.8529, "step": 9929 }, { "epoch": 4.501359927470535, "grad_norm": 0.09337038710458347, "learning_rate": 2.399323437901457e-06, "loss": 0.8799, "step": 9930 }, { "epoch": 4.50181323662738, "grad_norm": 0.09246855404604477, "learning_rate": 2.3950070587402195e-06, "loss": 0.8551, "step": 9931 }, { "epoch": 4.502266545784225, "grad_norm": 0.09333024606969682, "learning_rate": 2.3906944458295467e-06, "loss": 0.8377, "step": 9932 }, { "epoch": 4.50271985494107, "grad_norm": 0.08856359895620843, "learning_rate": 2.3863855996013596e-06, "loss": 0.8678, "step": 9933 }, { "epoch": 4.503173164097915, "grad_norm": 0.08984919319188935, "learning_rate": 2.382080520487193e-06, "loss": 0.8415, "step": 9934 }, { "epoch": 4.5036264732547595, "grad_norm": 0.10759497780168473, "learning_rate": 2.3777792089182272e-06, "loss": 0.8497, "step": 9935 }, { "epoch": 4.504079782411605, "grad_norm": 0.09177803189275212, "learning_rate": 2.3734816653252324e-06, "loss": 0.8543, "step": 9936 }, { "epoch": 4.50453309156845, "grad_norm": 0.09256211977823034, "learning_rate": 2.3691878901386424e-06, "loss": 0.8413, "step": 9937 }, { "epoch": 4.504986400725294, "grad_norm": 0.08910620754482523, "learning_rate": 2.3648978837884683e-06, "loss": 0.8578, "step": 9938 }, { "epoch": 4.50543970988214, "grad_norm": 0.09285606223680866, "learning_rate": 2.3606116467043892e-06, "loss": 0.847, "step": 9939 }, { "epoch": 4.505893019038984, "grad_norm": 0.09498543629576794, "learning_rate": 2.356329179315666e-06, "loss": 0.8611, "step": 9940 }, { "epoch": 4.506346328195829, "grad_norm": 0.1024025056486236, "learning_rate": 2.3520504820512134e-06, "loss": 0.8537, "step": 9941 }, { "epoch": 4.506799637352675, "grad_norm": 0.09668901432024933, "learning_rate": 2.3477755553395464e-06, "loss": 0.847, "step": 9942 }, { "epoch": 4.507252946509519, "grad_norm": 0.09909065593428633, "learning_rate": 2.3435043996088202e-06, "loss": 0.8362, "step": 9943 }, { "epoch": 4.507706255666364, "grad_norm": 0.11090588664770572, "learning_rate": 2.3392370152867995e-06, "loss": 0.8447, "step": 9944 }, { "epoch": 4.50815956482321, "grad_norm": 0.10118775541915277, "learning_rate": 2.334973402800871e-06, "loss": 0.8425, "step": 9945 }, { "epoch": 4.508612873980054, "grad_norm": 0.08994266451504437, "learning_rate": 2.3307135625780532e-06, "loss": 0.8602, "step": 9946 }, { "epoch": 4.509066183136899, "grad_norm": 0.08355942047635014, "learning_rate": 2.3264574950449827e-06, "loss": 0.842, "step": 9947 }, { "epoch": 4.5095194922937445, "grad_norm": 0.08918165677133595, "learning_rate": 2.3222052006279138e-06, "loss": 0.8715, "step": 9948 }, { "epoch": 4.509972801450589, "grad_norm": 0.08529393024316442, "learning_rate": 2.31795667975272e-06, "loss": 0.8519, "step": 9949 }, { "epoch": 4.510426110607434, "grad_norm": 0.09471859972149724, "learning_rate": 2.3137119328449175e-06, "loss": 0.8721, "step": 9950 }, { "epoch": 4.510879419764279, "grad_norm": 0.09385411234033919, "learning_rate": 2.3094709603296163e-06, "loss": 0.8414, "step": 9951 }, { "epoch": 4.511332728921124, "grad_norm": 0.09060961897853108, "learning_rate": 2.3052337626315734e-06, "loss": 0.8626, "step": 9952 }, { "epoch": 4.511786038077969, "grad_norm": 0.08988474266028963, "learning_rate": 2.301000340175148e-06, "loss": 0.8503, "step": 9953 }, { "epoch": 4.512239347234814, "grad_norm": 0.11719209443495736, "learning_rate": 2.2967706933843425e-06, "loss": 0.8569, "step": 9954 }, { "epoch": 4.512692656391659, "grad_norm": 0.09136732342815328, "learning_rate": 2.2925448226827473e-06, "loss": 0.8373, "step": 9955 }, { "epoch": 4.513145965548504, "grad_norm": 0.0908556276830977, "learning_rate": 2.2883227284936195e-06, "loss": 0.8497, "step": 9956 }, { "epoch": 4.513599274705349, "grad_norm": 0.09466763982366468, "learning_rate": 2.2841044112397936e-06, "loss": 0.8626, "step": 9957 }, { "epoch": 4.514052583862194, "grad_norm": 0.08727694116885013, "learning_rate": 2.2798898713437634e-06, "loss": 0.8422, "step": 9958 }, { "epoch": 4.514505893019039, "grad_norm": 0.08699802081633767, "learning_rate": 2.275679109227622e-06, "loss": 0.8561, "step": 9959 }, { "epoch": 4.514959202175884, "grad_norm": 0.08760163650280263, "learning_rate": 2.2714721253130814e-06, "loss": 0.8566, "step": 9960 }, { "epoch": 4.515412511332729, "grad_norm": 0.0831144893555214, "learning_rate": 2.2672689200214924e-06, "loss": 0.8764, "step": 9961 }, { "epoch": 4.515865820489574, "grad_norm": 0.08675983906366062, "learning_rate": 2.263069493773813e-06, "loss": 0.8599, "step": 9962 }, { "epoch": 4.516319129646419, "grad_norm": 0.08698924138084026, "learning_rate": 2.2588738469906303e-06, "loss": 0.8535, "step": 9963 }, { "epoch": 4.5167724388032635, "grad_norm": 0.08971310041935335, "learning_rate": 2.2546819800921505e-06, "loss": 0.838, "step": 9964 }, { "epoch": 4.517225747960109, "grad_norm": 0.1054884326272803, "learning_rate": 2.2504938934982113e-06, "loss": 0.8497, "step": 9965 }, { "epoch": 4.517679057116954, "grad_norm": 0.08688767090743625, "learning_rate": 2.2463095876282415e-06, "loss": 0.8557, "step": 9966 }, { "epoch": 4.518132366273798, "grad_norm": 0.08647185991539887, "learning_rate": 2.2421290629013327e-06, "loss": 0.8382, "step": 9967 }, { "epoch": 4.518585675430644, "grad_norm": 0.1215642324301731, "learning_rate": 2.237952319736154e-06, "loss": 0.8537, "step": 9968 }, { "epoch": 4.519038984587489, "grad_norm": 0.10213479968781866, "learning_rate": 2.233779358551038e-06, "loss": 0.8809, "step": 9969 }, { "epoch": 4.519492293744333, "grad_norm": 0.09865996918530962, "learning_rate": 2.229610179763908e-06, "loss": 0.8358, "step": 9970 }, { "epoch": 4.519945602901179, "grad_norm": 0.09150301714369372, "learning_rate": 2.2254447837923322e-06, "loss": 0.8475, "step": 9971 }, { "epoch": 4.520398912058024, "grad_norm": 0.1051984843142184, "learning_rate": 2.22128317105347e-06, "loss": 0.8542, "step": 9972 }, { "epoch": 4.520852221214868, "grad_norm": 0.09587756334259054, "learning_rate": 2.2171253419641304e-06, "loss": 0.8521, "step": 9973 }, { "epoch": 4.521305530371714, "grad_norm": 0.09638633114021006, "learning_rate": 2.212971296940736e-06, "loss": 0.8575, "step": 9974 }, { "epoch": 4.521758839528559, "grad_norm": 0.09318272163464963, "learning_rate": 2.208821036399309e-06, "loss": 0.8876, "step": 9975 }, { "epoch": 4.522212148685403, "grad_norm": 0.08879226504097133, "learning_rate": 2.204674560755531e-06, "loss": 0.8669, "step": 9976 }, { "epoch": 4.5226654578422485, "grad_norm": 0.1052145685132351, "learning_rate": 2.2005318704246648e-06, "loss": 0.8615, "step": 9977 }, { "epoch": 4.523118766999094, "grad_norm": 0.10634789938206603, "learning_rate": 2.196392965821632e-06, "loss": 0.8637, "step": 9978 }, { "epoch": 4.523572076155938, "grad_norm": 0.08288722567366846, "learning_rate": 2.1922578473609367e-06, "loss": 0.8433, "step": 9979 }, { "epoch": 4.524025385312783, "grad_norm": 0.08679611448613803, "learning_rate": 2.188126515456741e-06, "loss": 0.8471, "step": 9980 }, { "epoch": 4.524478694469629, "grad_norm": 0.09442166604677114, "learning_rate": 2.183998970522798e-06, "loss": 0.8636, "step": 9981 }, { "epoch": 4.524932003626473, "grad_norm": 0.0927174157241285, "learning_rate": 2.179875212972502e-06, "loss": 0.8636, "step": 9982 }, { "epoch": 4.525385312783318, "grad_norm": 0.08371483705479638, "learning_rate": 2.1757552432188466e-06, "loss": 0.8596, "step": 9983 }, { "epoch": 4.525838621940164, "grad_norm": 0.08952303288146139, "learning_rate": 2.17163906167448e-06, "loss": 0.8332, "step": 9984 }, { "epoch": 4.526291931097008, "grad_norm": 0.08808130104040439, "learning_rate": 2.167526668751623e-06, "loss": 0.8536, "step": 9985 }, { "epoch": 4.526745240253853, "grad_norm": 0.0932044013746318, "learning_rate": 2.1634180648621684e-06, "loss": 0.8548, "step": 9986 }, { "epoch": 4.5271985494106985, "grad_norm": 0.08879657688042748, "learning_rate": 2.159313250417583e-06, "loss": 0.8542, "step": 9987 }, { "epoch": 4.527651858567543, "grad_norm": 0.08839936791411411, "learning_rate": 2.1552122258289953e-06, "loss": 0.8385, "step": 9988 }, { "epoch": 4.528105167724388, "grad_norm": 0.08801926254362433, "learning_rate": 2.1511149915071215e-06, "loss": 0.8507, "step": 9989 }, { "epoch": 4.5285584768812335, "grad_norm": 0.08874390628897406, "learning_rate": 2.1470215478623226e-06, "loss": 0.8612, "step": 9990 }, { "epoch": 4.529011786038078, "grad_norm": 0.08181295104347748, "learning_rate": 2.142931895304563e-06, "loss": 0.8684, "step": 9991 }, { "epoch": 4.529465095194923, "grad_norm": 0.09117087565204406, "learning_rate": 2.1388460342434314e-06, "loss": 0.8174, "step": 9992 }, { "epoch": 4.529918404351768, "grad_norm": 0.09536637671104295, "learning_rate": 2.1347639650881426e-06, "loss": 0.8449, "step": 9993 }, { "epoch": 4.530371713508613, "grad_norm": 0.08574773148419682, "learning_rate": 2.1306856882475246e-06, "loss": 0.8584, "step": 9994 }, { "epoch": 4.530825022665458, "grad_norm": 0.08748878813887043, "learning_rate": 2.1266112041300336e-06, "loss": 0.8662, "step": 9995 }, { "epoch": 4.531278331822302, "grad_norm": 0.09162737152609456, "learning_rate": 2.1225405131437338e-06, "loss": 0.8456, "step": 9996 }, { "epoch": 4.531731640979148, "grad_norm": 0.1046608898728702, "learning_rate": 2.1184736156963304e-06, "loss": 0.8684, "step": 9997 }, { "epoch": 4.532184950135993, "grad_norm": 0.09497950067611681, "learning_rate": 2.1144105121951154e-06, "loss": 0.8439, "step": 9998 }, { "epoch": 4.532638259292837, "grad_norm": 0.09666101363613044, "learning_rate": 2.1103512030470386e-06, "loss": 0.8621, "step": 9999 }, { "epoch": 4.533091568449683, "grad_norm": 0.09711722282693266, "learning_rate": 2.106295688658646e-06, "loss": 0.8488, "step": 10000 }, { "epoch": 4.533544877606528, "grad_norm": 0.08729333434659382, "learning_rate": 2.102243969436102e-06, "loss": 0.848, "step": 10001 }, { "epoch": 4.533998186763372, "grad_norm": 0.08423367374174692, "learning_rate": 2.098196045785206e-06, "loss": 0.8504, "step": 10002 }, { "epoch": 4.534451495920218, "grad_norm": 0.0948082207878607, "learning_rate": 2.0941519181113714e-06, "loss": 0.8514, "step": 10003 }, { "epoch": 4.534904805077063, "grad_norm": 0.09267509094849999, "learning_rate": 2.0901115868196166e-06, "loss": 0.8396, "step": 10004 }, { "epoch": 4.535358114233907, "grad_norm": 0.09100993406029587, "learning_rate": 2.0860750523146087e-06, "loss": 0.8422, "step": 10005 }, { "epoch": 4.5358114233907525, "grad_norm": 0.08721043185057882, "learning_rate": 2.082042315000612e-06, "loss": 0.8394, "step": 10006 }, { "epoch": 4.536264732547598, "grad_norm": 0.10310248664001152, "learning_rate": 2.078013375281507e-06, "loss": 0.8604, "step": 10007 }, { "epoch": 4.536718041704442, "grad_norm": 0.10123246635499342, "learning_rate": 2.0739882335608243e-06, "loss": 0.8407, "step": 10008 }, { "epoch": 4.537171350861287, "grad_norm": 0.08994879252690524, "learning_rate": 2.0699668902416725e-06, "loss": 0.8586, "step": 10009 }, { "epoch": 4.537624660018133, "grad_norm": 0.09033664999777427, "learning_rate": 2.065949345726819e-06, "loss": 0.8626, "step": 10010 }, { "epoch": 4.538077969174977, "grad_norm": 0.09175349004721876, "learning_rate": 2.0619356004186165e-06, "loss": 0.849, "step": 10011 }, { "epoch": 4.538531278331822, "grad_norm": 0.09668073970427295, "learning_rate": 2.0579256547190686e-06, "loss": 0.8507, "step": 10012 }, { "epoch": 4.538984587488668, "grad_norm": 0.09605379655310729, "learning_rate": 2.0539195090297735e-06, "loss": 0.8299, "step": 10013 }, { "epoch": 4.539437896645512, "grad_norm": 0.09898948404113457, "learning_rate": 2.049917163751962e-06, "loss": 0.8589, "step": 10014 }, { "epoch": 4.539891205802357, "grad_norm": 0.08449537492662608, "learning_rate": 2.0459186192864776e-06, "loss": 0.8631, "step": 10015 }, { "epoch": 4.5403445149592025, "grad_norm": 0.08892992519225416, "learning_rate": 2.041923876033791e-06, "loss": 0.8559, "step": 10016 }, { "epoch": 4.540797824116047, "grad_norm": 0.08716083029207239, "learning_rate": 2.037932934393987e-06, "loss": 0.8584, "step": 10017 }, { "epoch": 4.541251133272892, "grad_norm": 0.09815506997304142, "learning_rate": 2.033945794766763e-06, "loss": 0.8545, "step": 10018 }, { "epoch": 4.541704442429737, "grad_norm": 0.09608227115812298, "learning_rate": 2.0299624575514486e-06, "loss": 0.8454, "step": 10019 }, { "epoch": 4.542157751586582, "grad_norm": 0.0810676374555769, "learning_rate": 2.0259829231469875e-06, "loss": 0.8529, "step": 10020 }, { "epoch": 4.542611060743427, "grad_norm": 0.09429113060246071, "learning_rate": 2.0220071919519403e-06, "loss": 0.8478, "step": 10021 }, { "epoch": 4.5430643699002715, "grad_norm": 0.09541133295996454, "learning_rate": 2.0180352643644906e-06, "loss": 0.854, "step": 10022 }, { "epoch": 4.543517679057117, "grad_norm": 0.09878392461958695, "learning_rate": 2.0140671407824362e-06, "loss": 0.8588, "step": 10023 }, { "epoch": 4.543970988213962, "grad_norm": 0.09896209069979557, "learning_rate": 2.0101028216031883e-06, "loss": 0.846, "step": 10024 }, { "epoch": 4.544424297370806, "grad_norm": 0.10766380490447991, "learning_rate": 2.0061423072238018e-06, "loss": 0.8676, "step": 10025 }, { "epoch": 4.544877606527652, "grad_norm": 0.08763399857434481, "learning_rate": 2.00218559804092e-06, "loss": 0.8582, "step": 10026 }, { "epoch": 4.545330915684497, "grad_norm": 0.08605705601052795, "learning_rate": 1.9982326944508257e-06, "loss": 0.8621, "step": 10027 }, { "epoch": 4.545784224841341, "grad_norm": 0.09771019211573952, "learning_rate": 1.9942835968494113e-06, "loss": 0.8555, "step": 10028 }, { "epoch": 4.546237533998187, "grad_norm": 0.098994017180664, "learning_rate": 1.9903383056321913e-06, "loss": 0.8329, "step": 10029 }, { "epoch": 4.546690843155032, "grad_norm": 0.08300346128114218, "learning_rate": 1.9863968211942987e-06, "loss": 0.8481, "step": 10030 }, { "epoch": 4.547144152311876, "grad_norm": 0.09147442481897661, "learning_rate": 1.982459143930484e-06, "loss": 0.8565, "step": 10031 }, { "epoch": 4.547597461468722, "grad_norm": 0.09723039897449261, "learning_rate": 1.9785252742351213e-06, "loss": 0.8325, "step": 10032 }, { "epoch": 4.548050770625567, "grad_norm": 0.08746890677234832, "learning_rate": 1.974595212502188e-06, "loss": 0.8519, "step": 10033 }, { "epoch": 4.548504079782411, "grad_norm": 0.0942570750133531, "learning_rate": 1.970668959125308e-06, "loss": 0.846, "step": 10034 }, { "epoch": 4.5489573889392565, "grad_norm": 0.08749974771803762, "learning_rate": 1.9667465144976904e-06, "loss": 0.8651, "step": 10035 }, { "epoch": 4.549410698096102, "grad_norm": 0.08654193299348799, "learning_rate": 1.96282787901219e-06, "loss": 0.8727, "step": 10036 }, { "epoch": 4.549864007252946, "grad_norm": 0.08609272126673374, "learning_rate": 1.9589130530612623e-06, "loss": 0.8656, "step": 10037 }, { "epoch": 4.550317316409791, "grad_norm": 0.08822739912684695, "learning_rate": 1.9550020370370014e-06, "loss": 0.8379, "step": 10038 }, { "epoch": 4.550770625566637, "grad_norm": 0.08045521843600137, "learning_rate": 1.9510948313310906e-06, "loss": 0.855, "step": 10039 }, { "epoch": 4.551223934723481, "grad_norm": 0.08936604544437218, "learning_rate": 1.947191436334861e-06, "loss": 0.8739, "step": 10040 }, { "epoch": 4.551677243880326, "grad_norm": 0.08191426043471312, "learning_rate": 1.9432918524392397e-06, "loss": 0.8379, "step": 10041 }, { "epoch": 4.552130553037172, "grad_norm": 0.10046705940835544, "learning_rate": 1.939396080034799e-06, "loss": 0.848, "step": 10042 }, { "epoch": 4.552583862194016, "grad_norm": 0.09197975275096781, "learning_rate": 1.9355041195116843e-06, "loss": 0.8426, "step": 10043 }, { "epoch": 4.553037171350861, "grad_norm": 0.08649895844694015, "learning_rate": 1.931615971259713e-06, "loss": 0.8426, "step": 10044 }, { "epoch": 4.5534904805077066, "grad_norm": 0.0811151696066973, "learning_rate": 1.9277316356682707e-06, "loss": 0.8479, "step": 10045 }, { "epoch": 4.553943789664551, "grad_norm": 0.09160743040303453, "learning_rate": 1.923851113126407e-06, "loss": 0.8516, "step": 10046 }, { "epoch": 4.554397098821396, "grad_norm": 0.09671221710650431, "learning_rate": 1.9199744040227574e-06, "loss": 0.8731, "step": 10047 }, { "epoch": 4.5548504079782415, "grad_norm": 0.08623192626812172, "learning_rate": 1.91610150874558e-06, "loss": 0.8536, "step": 10048 }, { "epoch": 4.555303717135086, "grad_norm": 0.0986984442199136, "learning_rate": 1.9122324276827654e-06, "loss": 0.849, "step": 10049 }, { "epoch": 4.555757026291931, "grad_norm": 0.0987865478280844, "learning_rate": 1.9083671612218113e-06, "loss": 0.8348, "step": 10050 }, { "epoch": 4.556210335448776, "grad_norm": 0.07597161001056688, "learning_rate": 1.90450570974984e-06, "loss": 0.8516, "step": 10051 }, { "epoch": 4.556663644605621, "grad_norm": 0.08254336160866085, "learning_rate": 1.9006480736535726e-06, "loss": 0.8534, "step": 10052 }, { "epoch": 4.557116953762466, "grad_norm": 0.08562130208300668, "learning_rate": 1.8967942533193806e-06, "loss": 0.8581, "step": 10053 }, { "epoch": 4.557570262919311, "grad_norm": 0.09401949511519495, "learning_rate": 1.8929442491332218e-06, "loss": 0.8516, "step": 10054 }, { "epoch": 4.558023572076156, "grad_norm": 0.08915024427272739, "learning_rate": 1.8890980614806987e-06, "loss": 0.8358, "step": 10055 }, { "epoch": 4.558476881233001, "grad_norm": 0.08455998279956016, "learning_rate": 1.885255690747001e-06, "loss": 0.8417, "step": 10056 }, { "epoch": 4.558930190389846, "grad_norm": 0.07884297759907953, "learning_rate": 1.8814171373169721e-06, "loss": 0.8645, "step": 10057 }, { "epoch": 4.559383499546691, "grad_norm": 0.09181474996538126, "learning_rate": 1.877582401575042e-06, "loss": 0.8502, "step": 10058 }, { "epoch": 4.559836808703536, "grad_norm": 0.09481354838487428, "learning_rate": 1.8737514839052729e-06, "loss": 0.8492, "step": 10059 }, { "epoch": 4.560290117860381, "grad_norm": 0.09632425569048796, "learning_rate": 1.8699243846913439e-06, "loss": 0.8451, "step": 10060 }, { "epoch": 4.560743427017226, "grad_norm": 0.08566952458605781, "learning_rate": 1.8661011043165534e-06, "loss": 0.8476, "step": 10061 }, { "epoch": 4.561196736174071, "grad_norm": 0.09014172874216969, "learning_rate": 1.8622816431638125e-06, "loss": 0.8556, "step": 10062 }, { "epoch": 4.561650045330916, "grad_norm": 0.09091758258950092, "learning_rate": 1.8584660016156419e-06, "loss": 0.8829, "step": 10063 }, { "epoch": 4.5621033544877605, "grad_norm": 0.08250126649811937, "learning_rate": 1.854654180054203e-06, "loss": 0.8554, "step": 10064 }, { "epoch": 4.562556663644606, "grad_norm": 0.08290545206589592, "learning_rate": 1.8508461788612476e-06, "loss": 0.85, "step": 10065 }, { "epoch": 4.563009972801451, "grad_norm": 0.08758652329012856, "learning_rate": 1.8470419984181731e-06, "loss": 0.8595, "step": 10066 }, { "epoch": 4.563463281958295, "grad_norm": 0.08670210337555638, "learning_rate": 1.8432416391059638e-06, "loss": 0.8437, "step": 10067 }, { "epoch": 4.563916591115141, "grad_norm": 0.08965439341837557, "learning_rate": 1.8394451013052528e-06, "loss": 0.835, "step": 10068 }, { "epoch": 4.564369900271986, "grad_norm": 0.0849515404269043, "learning_rate": 1.8356523853962605e-06, "loss": 0.8457, "step": 10069 }, { "epoch": 4.56482320942883, "grad_norm": 0.09177284768935207, "learning_rate": 1.8318634917588517e-06, "loss": 0.8474, "step": 10070 }, { "epoch": 4.565276518585676, "grad_norm": 0.0880819873486341, "learning_rate": 1.8280784207724832e-06, "loss": 0.8865, "step": 10071 }, { "epoch": 4.565729827742521, "grad_norm": 0.08474404411627372, "learning_rate": 1.8242971728162517e-06, "loss": 0.8723, "step": 10072 }, { "epoch": 4.566183136899365, "grad_norm": 0.09073404449517328, "learning_rate": 1.8205197482688498e-06, "loss": 0.8456, "step": 10073 }, { "epoch": 4.5666364460562106, "grad_norm": 0.08358540785382085, "learning_rate": 1.8167461475086013e-06, "loss": 0.8528, "step": 10074 }, { "epoch": 4.567089755213055, "grad_norm": 0.09108436083524668, "learning_rate": 1.8129763709134485e-06, "loss": 0.865, "step": 10075 }, { "epoch": 4.5675430643699, "grad_norm": 0.09487157429627974, "learning_rate": 1.8092104188609383e-06, "loss": 0.8566, "step": 10076 }, { "epoch": 4.5679963735267455, "grad_norm": 0.0896281896892985, "learning_rate": 1.805448291728249e-06, "loss": 0.8684, "step": 10077 }, { "epoch": 4.56844968268359, "grad_norm": 0.0805207056496783, "learning_rate": 1.8016899898921635e-06, "loss": 0.8689, "step": 10078 }, { "epoch": 4.568902991840435, "grad_norm": 0.08078270999477344, "learning_rate": 1.7979355137290878e-06, "loss": 0.8567, "step": 10079 }, { "epoch": 4.56935630099728, "grad_norm": 0.08404218474833304, "learning_rate": 1.7941848636150494e-06, "loss": 0.8461, "step": 10080 }, { "epoch": 4.569809610154125, "grad_norm": 0.08680068599145771, "learning_rate": 1.790438039925677e-06, "loss": 0.8711, "step": 10081 }, { "epoch": 4.57026291931097, "grad_norm": 0.0849247108642712, "learning_rate": 1.7866950430362262e-06, "loss": 0.8571, "step": 10082 }, { "epoch": 4.570716228467815, "grad_norm": 0.0911398503411376, "learning_rate": 1.7829558733215835e-06, "loss": 0.8468, "step": 10083 }, { "epoch": 4.57116953762466, "grad_norm": 0.08068895171265865, "learning_rate": 1.7792205311562184e-06, "loss": 0.8389, "step": 10084 }, { "epoch": 4.571622846781505, "grad_norm": 0.08775265432596205, "learning_rate": 1.7754890169142492e-06, "loss": 0.8475, "step": 10085 }, { "epoch": 4.57207615593835, "grad_norm": 0.08511024262202724, "learning_rate": 1.771761330969395e-06, "loss": 0.8465, "step": 10086 }, { "epoch": 4.572529465095195, "grad_norm": 0.16991907997346517, "learning_rate": 1.7680374736949968e-06, "loss": 0.842, "step": 10087 }, { "epoch": 4.57298277425204, "grad_norm": 0.0865733527388611, "learning_rate": 1.7643174454640056e-06, "loss": 0.8497, "step": 10088 }, { "epoch": 4.573436083408885, "grad_norm": 0.08493591274617879, "learning_rate": 1.760601246648994e-06, "loss": 0.8368, "step": 10089 }, { "epoch": 4.57388939256573, "grad_norm": 0.09432449467838346, "learning_rate": 1.7568888776221538e-06, "loss": 0.8554, "step": 10090 }, { "epoch": 4.574342701722575, "grad_norm": 0.08771871175188142, "learning_rate": 1.7531803387552805e-06, "loss": 0.8423, "step": 10091 }, { "epoch": 4.57479601087942, "grad_norm": 0.08434088989473915, "learning_rate": 1.7494756304198057e-06, "loss": 0.8641, "step": 10092 }, { "epoch": 4.5752493200362645, "grad_norm": 0.08373335018987697, "learning_rate": 1.7457747529867618e-06, "loss": 0.8601, "step": 10093 }, { "epoch": 4.57570262919311, "grad_norm": 0.08794850383075574, "learning_rate": 1.7420777068268035e-06, "loss": 0.8781, "step": 10094 }, { "epoch": 4.576155938349955, "grad_norm": 0.08368563664395856, "learning_rate": 1.738384492310199e-06, "loss": 0.8599, "step": 10095 }, { "epoch": 4.576609247506799, "grad_norm": 0.07894593581179006, "learning_rate": 1.7346951098068432e-06, "loss": 0.8659, "step": 10096 }, { "epoch": 4.577062556663645, "grad_norm": 0.08783714545670218, "learning_rate": 1.7310095596862232e-06, "loss": 0.8502, "step": 10097 }, { "epoch": 4.577515865820489, "grad_norm": 0.0976900677073141, "learning_rate": 1.7273278423174745e-06, "loss": 0.8455, "step": 10098 }, { "epoch": 4.577969174977334, "grad_norm": 0.086406729896526, "learning_rate": 1.72364995806932e-06, "loss": 0.8557, "step": 10099 }, { "epoch": 4.57842248413418, "grad_norm": 0.08134342011742009, "learning_rate": 1.7199759073101185e-06, "loss": 0.8598, "step": 10100 }, { "epoch": 4.578875793291024, "grad_norm": 0.09055474095891457, "learning_rate": 1.7163056904078246e-06, "loss": 0.8751, "step": 10101 }, { "epoch": 4.579329102447869, "grad_norm": 0.08960067633896408, "learning_rate": 1.7126393077300375e-06, "loss": 0.8427, "step": 10102 }, { "epoch": 4.5797824116047146, "grad_norm": 0.08864174208363182, "learning_rate": 1.7089767596439432e-06, "loss": 0.8423, "step": 10103 }, { "epoch": 4.580235720761559, "grad_norm": 0.09371150738067711, "learning_rate": 1.70531804651636e-06, "loss": 0.86, "step": 10104 }, { "epoch": 4.580689029918404, "grad_norm": 0.09923982126415586, "learning_rate": 1.7016631687137275e-06, "loss": 0.8388, "step": 10105 }, { "epoch": 4.5811423390752495, "grad_norm": 0.08314817414545482, "learning_rate": 1.6980121266020776e-06, "loss": 0.8685, "step": 10106 }, { "epoch": 4.581595648232094, "grad_norm": 0.08252084303937449, "learning_rate": 1.694364920547087e-06, "loss": 0.8637, "step": 10107 }, { "epoch": 4.582048957388939, "grad_norm": 0.09293267728311086, "learning_rate": 1.6907215509140184e-06, "loss": 0.8346, "step": 10108 }, { "epoch": 4.582502266545784, "grad_norm": 0.08962752502203557, "learning_rate": 1.687082018067785e-06, "loss": 0.8423, "step": 10109 }, { "epoch": 4.582955575702629, "grad_norm": 0.08069570775579914, "learning_rate": 1.6834463223728814e-06, "loss": 0.8498, "step": 10110 }, { "epoch": 4.583408884859474, "grad_norm": 0.08588481616825189, "learning_rate": 1.6798144641934432e-06, "loss": 0.8579, "step": 10111 }, { "epoch": 4.583862194016319, "grad_norm": 0.08723838507735372, "learning_rate": 1.6761864438932017e-06, "loss": 0.8556, "step": 10112 }, { "epoch": 4.584315503173164, "grad_norm": 0.08709585809332979, "learning_rate": 1.6725622618355286e-06, "loss": 0.8619, "step": 10113 }, { "epoch": 4.584768812330009, "grad_norm": 0.08625549600891659, "learning_rate": 1.6689419183833822e-06, "loss": 0.857, "step": 10114 }, { "epoch": 4.585222121486854, "grad_norm": 0.07928119922505866, "learning_rate": 1.6653254138993524e-06, "loss": 0.8468, "step": 10115 }, { "epoch": 4.585675430643699, "grad_norm": 0.08037077851899313, "learning_rate": 1.661712748745652e-06, "loss": 0.858, "step": 10116 }, { "epoch": 4.586128739800544, "grad_norm": 0.0837954529469153, "learning_rate": 1.6581039232840845e-06, "loss": 0.852, "step": 10117 }, { "epoch": 4.586582048957389, "grad_norm": 0.07981697526189449, "learning_rate": 1.6544989378761078e-06, "loss": 0.836, "step": 10118 }, { "epoch": 4.587035358114234, "grad_norm": 0.08253990572974215, "learning_rate": 1.6508977928827485e-06, "loss": 0.871, "step": 10119 }, { "epoch": 4.587488667271079, "grad_norm": 0.16650704870293173, "learning_rate": 1.6473004886646826e-06, "loss": 0.8891, "step": 10120 }, { "epoch": 4.587941976427924, "grad_norm": 0.09382258710736553, "learning_rate": 1.6437070255821863e-06, "loss": 0.8628, "step": 10121 }, { "epoch": 4.5883952855847685, "grad_norm": 0.09057227807446606, "learning_rate": 1.6401174039951673e-06, "loss": 0.861, "step": 10122 }, { "epoch": 4.588848594741614, "grad_norm": 0.08727470198047381, "learning_rate": 1.6365316242631158e-06, "loss": 0.8416, "step": 10123 }, { "epoch": 4.589301903898459, "grad_norm": 0.10115273962782441, "learning_rate": 1.6329496867451799e-06, "loss": 0.8583, "step": 10124 }, { "epoch": 4.589755213055303, "grad_norm": 0.13619021960920705, "learning_rate": 1.6293715918000863e-06, "loss": 0.8652, "step": 10125 }, { "epoch": 4.590208522212149, "grad_norm": 0.08488891751173376, "learning_rate": 1.6257973397862015e-06, "loss": 0.8676, "step": 10126 }, { "epoch": 4.590661831368994, "grad_norm": 0.08785483767797686, "learning_rate": 1.622226931061488e-06, "loss": 0.8528, "step": 10127 }, { "epoch": 4.591115140525838, "grad_norm": 0.09143006587668524, "learning_rate": 1.61866036598354e-06, "loss": 0.8551, "step": 10128 }, { "epoch": 4.591568449682684, "grad_norm": 0.08863896594443875, "learning_rate": 1.6150976449095602e-06, "loss": 0.8607, "step": 10129 }, { "epoch": 4.592021758839529, "grad_norm": 0.08837053328255794, "learning_rate": 1.6115387681963568e-06, "loss": 0.8432, "step": 10130 }, { "epoch": 4.592475067996373, "grad_norm": 0.08222512663126724, "learning_rate": 1.6079837362003692e-06, "loss": 0.871, "step": 10131 }, { "epoch": 4.5929283771532186, "grad_norm": 0.10021980374667976, "learning_rate": 1.6044325492776414e-06, "loss": 0.8597, "step": 10132 }, { "epoch": 4.593381686310064, "grad_norm": 0.08631536707418888, "learning_rate": 1.6008852077838399e-06, "loss": 0.8427, "step": 10133 }, { "epoch": 4.593834995466908, "grad_norm": 0.08986238623325982, "learning_rate": 1.597341712074232e-06, "loss": 0.8644, "step": 10134 }, { "epoch": 4.5942883046237535, "grad_norm": 0.08343185216139067, "learning_rate": 1.5938020625037199e-06, "loss": 0.8433, "step": 10135 }, { "epoch": 4.594741613780599, "grad_norm": 0.0800531218459978, "learning_rate": 1.5902662594268025e-06, "loss": 0.8374, "step": 10136 }, { "epoch": 4.595194922937443, "grad_norm": 0.08088676928654276, "learning_rate": 1.5867343031976056e-06, "loss": 0.8515, "step": 10137 }, { "epoch": 4.595648232094288, "grad_norm": 0.08964816472144767, "learning_rate": 1.583206194169864e-06, "loss": 0.8567, "step": 10138 }, { "epoch": 4.596101541251134, "grad_norm": 0.09513886294366695, "learning_rate": 1.579681932696926e-06, "loss": 0.8573, "step": 10139 }, { "epoch": 4.596554850407978, "grad_norm": 0.09336993567957788, "learning_rate": 1.5761615191317535e-06, "loss": 0.8582, "step": 10140 }, { "epoch": 4.597008159564823, "grad_norm": 0.07908583759358426, "learning_rate": 1.5726449538269361e-06, "loss": 0.8554, "step": 10141 }, { "epoch": 4.597461468721669, "grad_norm": 0.08232588773787851, "learning_rate": 1.569132237134654e-06, "loss": 0.8474, "step": 10142 }, { "epoch": 4.597914777878513, "grad_norm": 0.08539993728002826, "learning_rate": 1.5656233694067324e-06, "loss": 0.8537, "step": 10143 }, { "epoch": 4.598368087035358, "grad_norm": 0.08664278306774448, "learning_rate": 1.5621183509945881e-06, "loss": 0.8441, "step": 10144 }, { "epoch": 4.5988213961922035, "grad_norm": 0.08087459996033926, "learning_rate": 1.5586171822492514e-06, "loss": 0.8536, "step": 10145 }, { "epoch": 4.599274705349048, "grad_norm": 0.08851788747425218, "learning_rate": 1.5551198635213838e-06, "loss": 0.8445, "step": 10146 }, { "epoch": 4.599728014505893, "grad_norm": 0.10297628608157389, "learning_rate": 1.5516263951612476e-06, "loss": 0.8652, "step": 10147 }, { "epoch": 4.6001813236627385, "grad_norm": 0.08869810213845455, "learning_rate": 1.5481367775187316e-06, "loss": 0.8654, "step": 10148 }, { "epoch": 4.600634632819583, "grad_norm": 0.08502556436242542, "learning_rate": 1.5446510109433166e-06, "loss": 0.8564, "step": 10149 }, { "epoch": 4.601087941976428, "grad_norm": 0.08263295475507819, "learning_rate": 1.5411690957841274e-06, "loss": 0.8723, "step": 10150 }, { "epoch": 4.601541251133273, "grad_norm": 0.08070726384899608, "learning_rate": 1.5376910323898763e-06, "loss": 0.8563, "step": 10151 }, { "epoch": 4.601994560290118, "grad_norm": 0.08545237146160854, "learning_rate": 1.5342168211089115e-06, "loss": 0.8514, "step": 10152 }, { "epoch": 4.602447869446963, "grad_norm": 0.09199236240200255, "learning_rate": 1.5307464622891766e-06, "loss": 0.8399, "step": 10153 }, { "epoch": 4.602901178603807, "grad_norm": 0.08195092597209687, "learning_rate": 1.5272799562782515e-06, "loss": 0.855, "step": 10154 }, { "epoch": 4.603354487760653, "grad_norm": 0.08106032907885387, "learning_rate": 1.5238173034233027e-06, "loss": 0.8487, "step": 10155 }, { "epoch": 4.603807796917498, "grad_norm": 0.0887777504501776, "learning_rate": 1.5203585040711289e-06, "loss": 0.8622, "step": 10156 }, { "epoch": 4.604261106074342, "grad_norm": 0.09418860367853135, "learning_rate": 1.5169035585681412e-06, "loss": 0.8684, "step": 10157 }, { "epoch": 4.604714415231188, "grad_norm": 0.08395346350774018, "learning_rate": 1.5134524672603657e-06, "loss": 0.8502, "step": 10158 }, { "epoch": 4.605167724388033, "grad_norm": 0.08034364660655895, "learning_rate": 1.5100052304934277e-06, "loss": 0.8634, "step": 10159 }, { "epoch": 4.605621033544877, "grad_norm": 0.08830218909001233, "learning_rate": 1.5065618486125933e-06, "loss": 0.8446, "step": 10160 }, { "epoch": 4.606074342701723, "grad_norm": 0.08581780632367071, "learning_rate": 1.50312232196272e-06, "loss": 0.8417, "step": 10161 }, { "epoch": 4.606527651858568, "grad_norm": 0.08266443695663443, "learning_rate": 1.499686650888279e-06, "loss": 0.8573, "step": 10162 }, { "epoch": 4.606980961015412, "grad_norm": 0.09144709464677653, "learning_rate": 1.4962548357333774e-06, "loss": 0.8692, "step": 10163 }, { "epoch": 4.6074342701722575, "grad_norm": 0.08151706915226163, "learning_rate": 1.4928268768417087e-06, "loss": 0.8699, "step": 10164 }, { "epoch": 4.607887579329103, "grad_norm": 0.08346174879283105, "learning_rate": 1.4894027745566031e-06, "loss": 0.8527, "step": 10165 }, { "epoch": 4.608340888485947, "grad_norm": 0.08458220605429377, "learning_rate": 1.4859825292209861e-06, "loss": 0.8551, "step": 10166 }, { "epoch": 4.608794197642792, "grad_norm": 0.07980752440597422, "learning_rate": 1.4825661411774105e-06, "loss": 0.8498, "step": 10167 }, { "epoch": 4.609247506799638, "grad_norm": 0.07800363559019871, "learning_rate": 1.4791536107680383e-06, "loss": 0.8306, "step": 10168 }, { "epoch": 4.609700815956482, "grad_norm": 0.0905519732834813, "learning_rate": 1.4757449383346401e-06, "loss": 0.8534, "step": 10169 }, { "epoch": 4.610154125113327, "grad_norm": 0.08212595016420685, "learning_rate": 1.4723401242186143e-06, "loss": 0.8498, "step": 10170 }, { "epoch": 4.610607434270173, "grad_norm": 0.0968354399954973, "learning_rate": 1.4689391687609456e-06, "loss": 0.8801, "step": 10171 }, { "epoch": 4.611060743427017, "grad_norm": 0.087611438940547, "learning_rate": 1.4655420723022685e-06, "loss": 0.8605, "step": 10172 }, { "epoch": 4.611514052583862, "grad_norm": 0.08168145242564682, "learning_rate": 1.462148835182795e-06, "loss": 0.862, "step": 10173 }, { "epoch": 4.6119673617407075, "grad_norm": 0.08386673410581318, "learning_rate": 1.4587594577423826e-06, "loss": 0.8588, "step": 10174 }, { "epoch": 4.612420670897552, "grad_norm": 0.077573066379191, "learning_rate": 1.4553739403204791e-06, "loss": 0.8602, "step": 10175 }, { "epoch": 4.612873980054397, "grad_norm": 0.08141711237522148, "learning_rate": 1.451992283256165e-06, "loss": 0.8705, "step": 10176 }, { "epoch": 4.613327289211242, "grad_norm": 0.07732765351464824, "learning_rate": 1.4486144868881024e-06, "loss": 0.8492, "step": 10177 }, { "epoch": 4.613780598368087, "grad_norm": 0.08537843127040168, "learning_rate": 1.445240551554603e-06, "loss": 0.8482, "step": 10178 }, { "epoch": 4.614233907524932, "grad_norm": 0.08392581438968359, "learning_rate": 1.4418704775935698e-06, "loss": 0.867, "step": 10179 }, { "epoch": 4.6146872166817765, "grad_norm": 0.08241949649494777, "learning_rate": 1.438504265342533e-06, "loss": 0.8467, "step": 10180 }, { "epoch": 4.615140525838622, "grad_norm": 0.08305159170139247, "learning_rate": 1.4351419151386181e-06, "loss": 0.8509, "step": 10181 }, { "epoch": 4.615593834995467, "grad_norm": 0.086917350158416, "learning_rate": 1.431783427318587e-06, "loss": 0.8554, "step": 10182 }, { "epoch": 4.616047144152311, "grad_norm": 0.08142217555927868, "learning_rate": 1.428428802218793e-06, "loss": 0.8721, "step": 10183 }, { "epoch": 4.616500453309157, "grad_norm": 0.08544635403017638, "learning_rate": 1.4250780401752163e-06, "loss": 0.8571, "step": 10184 }, { "epoch": 4.616953762466002, "grad_norm": 0.08619153292109362, "learning_rate": 1.421731141523437e-06, "loss": 0.8719, "step": 10185 }, { "epoch": 4.617407071622846, "grad_norm": 0.0838216050079352, "learning_rate": 1.4183881065986716e-06, "loss": 0.8486, "step": 10186 }, { "epoch": 4.617860380779692, "grad_norm": 0.0836652145125662, "learning_rate": 1.4150489357357232e-06, "loss": 0.8535, "step": 10187 }, { "epoch": 4.618313689936537, "grad_norm": 0.0883383338373107, "learning_rate": 1.4117136292690225e-06, "loss": 0.8656, "step": 10188 }, { "epoch": 4.618766999093381, "grad_norm": 0.08096257986165727, "learning_rate": 1.408382187532613e-06, "loss": 0.8424, "step": 10189 }, { "epoch": 4.619220308250227, "grad_norm": 0.08738304097868256, "learning_rate": 1.4050546108601393e-06, "loss": 0.8605, "step": 10190 }, { "epoch": 4.619673617407072, "grad_norm": 0.08106371695053786, "learning_rate": 1.4017308995848811e-06, "loss": 0.842, "step": 10191 }, { "epoch": 4.620126926563916, "grad_norm": 0.08290911657435057, "learning_rate": 1.39841105403971e-06, "loss": 0.8546, "step": 10192 }, { "epoch": 4.6205802357207615, "grad_norm": 0.08224514214183769, "learning_rate": 1.3950950745571202e-06, "loss": 0.8412, "step": 10193 }, { "epoch": 4.621033544877607, "grad_norm": 0.08785460568663624, "learning_rate": 1.3917829614692102e-06, "loss": 0.8598, "step": 10194 }, { "epoch": 4.621486854034451, "grad_norm": 0.08691394722645641, "learning_rate": 1.3884747151077193e-06, "loss": 0.8568, "step": 10195 }, { "epoch": 4.621940163191296, "grad_norm": 0.09073682503418257, "learning_rate": 1.3851703358039515e-06, "loss": 0.8506, "step": 10196 }, { "epoch": 4.622393472348142, "grad_norm": 0.09102296604466163, "learning_rate": 1.3818698238888638e-06, "loss": 0.8711, "step": 10197 }, { "epoch": 4.622846781504986, "grad_norm": 0.08609006182825715, "learning_rate": 1.3785731796930058e-06, "loss": 0.8572, "step": 10198 }, { "epoch": 4.623300090661831, "grad_norm": 0.08179966745174759, "learning_rate": 1.3752804035465573e-06, "loss": 0.8524, "step": 10199 }, { "epoch": 4.623753399818677, "grad_norm": 0.08132688069403068, "learning_rate": 1.3719914957792857e-06, "loss": 0.839, "step": 10200 }, { "epoch": 4.624206708975521, "grad_norm": 0.08738395991123449, "learning_rate": 1.3687064567205943e-06, "loss": 0.8518, "step": 10201 }, { "epoch": 4.624660018132366, "grad_norm": 0.07921385066877787, "learning_rate": 1.365425286699491e-06, "loss": 0.8497, "step": 10202 }, { "epoch": 4.6251133272892115, "grad_norm": 0.07992884446481473, "learning_rate": 1.3621479860445796e-06, "loss": 0.8592, "step": 10203 }, { "epoch": 4.625566636446056, "grad_norm": 0.08720265290974383, "learning_rate": 1.358874555084113e-06, "loss": 0.8425, "step": 10204 }, { "epoch": 4.626019945602901, "grad_norm": 0.0840894496459717, "learning_rate": 1.3556049941459138e-06, "loss": 0.8478, "step": 10205 }, { "epoch": 4.6264732547597465, "grad_norm": 0.08739064694104758, "learning_rate": 1.3523393035574573e-06, "loss": 0.8431, "step": 10206 }, { "epoch": 4.626926563916591, "grad_norm": 0.08193686696049078, "learning_rate": 1.3490774836457977e-06, "loss": 0.8432, "step": 10207 }, { "epoch": 4.627379873073436, "grad_norm": 0.43625266955677194, "learning_rate": 1.3458195347376202e-06, "loss": 0.8409, "step": 10208 }, { "epoch": 4.627833182230281, "grad_norm": 0.08329803294979264, "learning_rate": 1.3425654571592196e-06, "loss": 0.853, "step": 10209 }, { "epoch": 4.628286491387126, "grad_norm": 0.07921872414221334, "learning_rate": 1.3393152512365037e-06, "loss": 0.8377, "step": 10210 }, { "epoch": 4.628739800543971, "grad_norm": 0.08375447197563149, "learning_rate": 1.3360689172949814e-06, "loss": 0.8543, "step": 10211 }, { "epoch": 4.629193109700816, "grad_norm": 0.08902933382291336, "learning_rate": 1.3328264556597969e-06, "loss": 0.8356, "step": 10212 }, { "epoch": 4.629646418857661, "grad_norm": 0.09855572760926141, "learning_rate": 1.3295878666556816e-06, "loss": 0.8807, "step": 10213 }, { "epoch": 4.630099728014506, "grad_norm": 0.09311249527101044, "learning_rate": 1.3263531506069893e-06, "loss": 0.8404, "step": 10214 }, { "epoch": 4.630553037171351, "grad_norm": 0.08928518603513245, "learning_rate": 1.3231223078376876e-06, "loss": 0.8867, "step": 10215 }, { "epoch": 4.631006346328196, "grad_norm": 0.08330135935910606, "learning_rate": 1.3198953386713576e-06, "loss": 0.879, "step": 10216 }, { "epoch": 4.631459655485041, "grad_norm": 0.10309709250964556, "learning_rate": 1.3166722434311896e-06, "loss": 0.8796, "step": 10217 }, { "epoch": 4.631912964641886, "grad_norm": 0.08723878183028032, "learning_rate": 1.3134530224399788e-06, "loss": 0.8405, "step": 10218 }, { "epoch": 4.632366273798731, "grad_norm": 0.09334395755053922, "learning_rate": 1.3102376760201518e-06, "loss": 0.8419, "step": 10219 }, { "epoch": 4.632819582955576, "grad_norm": 0.08419453116713094, "learning_rate": 1.307026204493722e-06, "loss": 0.8624, "step": 10220 }, { "epoch": 4.633272892112421, "grad_norm": 0.084389703523477, "learning_rate": 1.303818608182339e-06, "loss": 0.8496, "step": 10221 }, { "epoch": 4.6337262012692655, "grad_norm": 0.08045157404617335, "learning_rate": 1.3006148874072477e-06, "loss": 0.8431, "step": 10222 }, { "epoch": 4.634179510426111, "grad_norm": 0.08492911369993257, "learning_rate": 1.2974150424893117e-06, "loss": 0.8623, "step": 10223 }, { "epoch": 4.634632819582956, "grad_norm": 0.08072136224078866, "learning_rate": 1.294219073748999e-06, "loss": 0.8478, "step": 10224 }, { "epoch": 4.6350861287398, "grad_norm": 0.0807305148706993, "learning_rate": 1.2910269815064047e-06, "loss": 0.8336, "step": 10225 }, { "epoch": 4.635539437896646, "grad_norm": 0.08182469784674984, "learning_rate": 1.2878387660812197e-06, "loss": 0.8414, "step": 10226 }, { "epoch": 4.635992747053491, "grad_norm": 0.08900804865765982, "learning_rate": 1.2846544277927575e-06, "loss": 0.8725, "step": 10227 }, { "epoch": 4.636446056210335, "grad_norm": 0.08863144521141249, "learning_rate": 1.2814739669599407e-06, "loss": 0.8423, "step": 10228 }, { "epoch": 4.636899365367181, "grad_norm": 0.0816133159397302, "learning_rate": 1.278297383901288e-06, "loss": 0.8432, "step": 10229 }, { "epoch": 4.637352674524025, "grad_norm": 0.08061435891744646, "learning_rate": 1.2751246789349624e-06, "loss": 0.8464, "step": 10230 }, { "epoch": 4.63780598368087, "grad_norm": 0.09026993285419208, "learning_rate": 1.2719558523787012e-06, "loss": 0.8346, "step": 10231 }, { "epoch": 4.6382592928377155, "grad_norm": 0.07823426972469903, "learning_rate": 1.2687909045498903e-06, "loss": 0.8516, "step": 10232 }, { "epoch": 4.63871260199456, "grad_norm": 0.07976643520740555, "learning_rate": 1.2656298357654939e-06, "loss": 0.8689, "step": 10233 }, { "epoch": 4.639165911151405, "grad_norm": 0.08762724184918971, "learning_rate": 1.2624726463421122e-06, "loss": 0.8415, "step": 10234 }, { "epoch": 4.6396192203082505, "grad_norm": 0.08601800782931537, "learning_rate": 1.2593193365959366e-06, "loss": 0.8708, "step": 10235 }, { "epoch": 4.640072529465095, "grad_norm": 0.08774483912042835, "learning_rate": 1.2561699068427902e-06, "loss": 0.8711, "step": 10236 }, { "epoch": 4.64052583862194, "grad_norm": 0.08255004535358199, "learning_rate": 1.2530243573980916e-06, "loss": 0.8521, "step": 10237 }, { "epoch": 4.640979147778785, "grad_norm": 0.0811276853474464, "learning_rate": 1.2498826885768821e-06, "loss": 0.837, "step": 10238 }, { "epoch": 4.64143245693563, "grad_norm": 0.08433216716340668, "learning_rate": 1.2467449006937993e-06, "loss": 0.8509, "step": 10239 }, { "epoch": 4.641885766092475, "grad_norm": 0.08030387089161442, "learning_rate": 1.2436109940631157e-06, "loss": 0.867, "step": 10240 }, { "epoch": 4.64233907524932, "grad_norm": 0.08361523104960206, "learning_rate": 1.2404809689986874e-06, "loss": 0.8527, "step": 10241 }, { "epoch": 4.642792384406165, "grad_norm": 0.08021052375059869, "learning_rate": 1.2373548258140056e-06, "loss": 0.8503, "step": 10242 }, { "epoch": 4.64324569356301, "grad_norm": 0.07997501898236963, "learning_rate": 1.234232564822162e-06, "loss": 0.8552, "step": 10243 }, { "epoch": 4.643699002719855, "grad_norm": 0.09441314915190116, "learning_rate": 1.2311141863358534e-06, "loss": 0.8637, "step": 10244 }, { "epoch": 4.6441523118767, "grad_norm": 0.08438666285250385, "learning_rate": 1.2279996906673985e-06, "loss": 0.8472, "step": 10245 }, { "epoch": 4.644605621033545, "grad_norm": 0.08161321318792106, "learning_rate": 1.2248890781287216e-06, "loss": 0.8644, "step": 10246 }, { "epoch": 4.64505893019039, "grad_norm": 0.0804762556344347, "learning_rate": 1.2217823490313685e-06, "loss": 0.8648, "step": 10247 }, { "epoch": 4.645512239347235, "grad_norm": 0.08705647578662999, "learning_rate": 1.218679503686473e-06, "loss": 0.8552, "step": 10248 }, { "epoch": 4.64596554850408, "grad_norm": 0.08102731304107434, "learning_rate": 1.2155805424048085e-06, "loss": 0.8659, "step": 10249 }, { "epoch": 4.646418857660925, "grad_norm": 0.08621473980959787, "learning_rate": 1.212485465496731e-06, "loss": 0.8529, "step": 10250 }, { "epoch": 4.6468721668177695, "grad_norm": 0.08035771754127566, "learning_rate": 1.209394273272233e-06, "loss": 0.8259, "step": 10251 }, { "epoch": 4.647325475974615, "grad_norm": 0.08139225420847718, "learning_rate": 1.2063069660408978e-06, "loss": 0.8513, "step": 10252 }, { "epoch": 4.64777878513146, "grad_norm": 0.09565437538719479, "learning_rate": 1.2032235441119399e-06, "loss": 0.8718, "step": 10253 }, { "epoch": 4.648232094288304, "grad_norm": 0.0805245505096966, "learning_rate": 1.2001440077941618e-06, "loss": 0.8568, "step": 10254 }, { "epoch": 4.64868540344515, "grad_norm": 0.08238639346356212, "learning_rate": 1.1970683573959917e-06, "loss": 0.85, "step": 10255 }, { "epoch": 4.649138712601994, "grad_norm": 0.10497588946310893, "learning_rate": 1.1939965932254638e-06, "loss": 0.8782, "step": 10256 }, { "epoch": 4.649592021758839, "grad_norm": 0.09353816793736226, "learning_rate": 1.1909287155902295e-06, "loss": 0.8565, "step": 10257 }, { "epoch": 4.650045330915685, "grad_norm": 0.08895079652944589, "learning_rate": 1.1878647247975406e-06, "loss": 0.8548, "step": 10258 }, { "epoch": 4.650498640072529, "grad_norm": 0.08379417328029756, "learning_rate": 1.184804621154263e-06, "loss": 0.8492, "step": 10259 }, { "epoch": 4.650951949229374, "grad_norm": 0.10428936506276955, "learning_rate": 1.1817484049668804e-06, "loss": 0.8318, "step": 10260 }, { "epoch": 4.6514052583862195, "grad_norm": 0.08466151348197709, "learning_rate": 1.1786960765414768e-06, "loss": 0.848, "step": 10261 }, { "epoch": 4.651858567543064, "grad_norm": 0.07812052474808723, "learning_rate": 1.1756476361837632e-06, "loss": 0.8361, "step": 10262 }, { "epoch": 4.652311876699909, "grad_norm": 0.08241980530222418, "learning_rate": 1.1726030841990332e-06, "loss": 0.8501, "step": 10263 }, { "epoch": 4.6527651858567545, "grad_norm": 0.09527513924536729, "learning_rate": 1.1695624208922207e-06, "loss": 0.8423, "step": 10264 }, { "epoch": 4.653218495013599, "grad_norm": 0.09324580837494272, "learning_rate": 1.1665256465678465e-06, "loss": 0.8616, "step": 10265 }, { "epoch": 4.653671804170444, "grad_norm": 0.08515676444305936, "learning_rate": 1.163492761530063e-06, "loss": 0.8607, "step": 10266 }, { "epoch": 4.654125113327289, "grad_norm": 0.08235281776592451, "learning_rate": 1.160463766082618e-06, "loss": 0.863, "step": 10267 }, { "epoch": 4.654578422484134, "grad_norm": 0.08597626642659562, "learning_rate": 1.1574386605288734e-06, "loss": 0.8534, "step": 10268 }, { "epoch": 4.655031731640979, "grad_norm": 0.08768399886473213, "learning_rate": 1.154417445171805e-06, "loss": 0.842, "step": 10269 }, { "epoch": 4.655485040797824, "grad_norm": 0.08074265977706853, "learning_rate": 1.151400120313988e-06, "loss": 0.8522, "step": 10270 }, { "epoch": 4.655938349954669, "grad_norm": 0.08491216766583784, "learning_rate": 1.1483866862576298e-06, "loss": 0.8618, "step": 10271 }, { "epoch": 4.656391659111514, "grad_norm": 0.0749059905723375, "learning_rate": 1.1453771433045292e-06, "loss": 0.8441, "step": 10272 }, { "epoch": 4.656844968268359, "grad_norm": 0.0763796867693737, "learning_rate": 1.142371491756098e-06, "loss": 0.8429, "step": 10273 }, { "epoch": 4.657298277425204, "grad_norm": 0.07661993034501992, "learning_rate": 1.1393697319133578e-06, "loss": 0.8342, "step": 10274 }, { "epoch": 4.657751586582049, "grad_norm": 0.08052394712520733, "learning_rate": 1.1363718640769483e-06, "loss": 0.8527, "step": 10275 }, { "epoch": 4.658204895738894, "grad_norm": 0.09110354534252649, "learning_rate": 1.1333778885471181e-06, "loss": 0.8291, "step": 10276 }, { "epoch": 4.658658204895739, "grad_norm": 0.08332523595898515, "learning_rate": 1.130387805623716e-06, "loss": 0.8746, "step": 10277 }, { "epoch": 4.659111514052584, "grad_norm": 0.08535711805455207, "learning_rate": 1.1274016156062139e-06, "loss": 0.8368, "step": 10278 }, { "epoch": 4.659564823209429, "grad_norm": 0.07461025715100163, "learning_rate": 1.1244193187936836e-06, "loss": 0.8572, "step": 10279 }, { "epoch": 4.6600181323662735, "grad_norm": 0.08183931266326715, "learning_rate": 1.1214409154848106e-06, "loss": 0.8465, "step": 10280 }, { "epoch": 4.660471441523119, "grad_norm": 0.0799183899008404, "learning_rate": 1.1184664059778938e-06, "loss": 0.8742, "step": 10281 }, { "epoch": 4.660924750679964, "grad_norm": 0.07807190828579443, "learning_rate": 1.115495790570833e-06, "loss": 0.8366, "step": 10282 }, { "epoch": 4.661378059836808, "grad_norm": 0.08313514939242321, "learning_rate": 1.1125290695611545e-06, "loss": 0.8503, "step": 10283 }, { "epoch": 4.661831368993654, "grad_norm": 0.0783356615616068, "learning_rate": 1.1095662432459764e-06, "loss": 0.8607, "step": 10284 }, { "epoch": 4.662284678150499, "grad_norm": 0.08627322183070979, "learning_rate": 1.10660731192203e-06, "loss": 0.8588, "step": 10285 }, { "epoch": 4.662737987307343, "grad_norm": 0.07670561508243476, "learning_rate": 1.1036522758856739e-06, "loss": 0.8759, "step": 10286 }, { "epoch": 4.663191296464189, "grad_norm": 0.08399874262006352, "learning_rate": 1.100701135432849e-06, "loss": 0.8362, "step": 10287 }, { "epoch": 4.663644605621034, "grad_norm": 0.07916803887202191, "learning_rate": 1.0977538908591368e-06, "loss": 0.8547, "step": 10288 }, { "epoch": 4.664097914777878, "grad_norm": 0.0766271583244182, "learning_rate": 1.0948105424596966e-06, "loss": 0.8234, "step": 10289 }, { "epoch": 4.6645512239347235, "grad_norm": 0.07783397611699584, "learning_rate": 1.0918710905293283e-06, "loss": 0.8555, "step": 10290 }, { "epoch": 4.665004533091569, "grad_norm": 0.08880362929959891, "learning_rate": 1.0889355353624142e-06, "loss": 0.8579, "step": 10291 }, { "epoch": 4.665457842248413, "grad_norm": 0.07769592747550845, "learning_rate": 1.086003877252968e-06, "loss": 0.8382, "step": 10292 }, { "epoch": 4.6659111514052585, "grad_norm": 0.08807223048671711, "learning_rate": 1.0830761164945946e-06, "loss": 0.8695, "step": 10293 }, { "epoch": 4.666364460562104, "grad_norm": 0.07796563338483531, "learning_rate": 1.0801522533805263e-06, "loss": 0.8571, "step": 10294 }, { "epoch": 4.666817769718948, "grad_norm": 0.0811888113701953, "learning_rate": 1.0772322882035868e-06, "loss": 0.855, "step": 10295 }, { "epoch": 4.667271078875793, "grad_norm": 0.07826003821550036, "learning_rate": 1.0743162212562352e-06, "loss": 0.8593, "step": 10296 }, { "epoch": 4.667724388032639, "grad_norm": 0.08773480267274417, "learning_rate": 1.0714040528305047e-06, "loss": 0.8492, "step": 10297 }, { "epoch": 4.668177697189483, "grad_norm": 0.0793023755438468, "learning_rate": 1.0684957832180732e-06, "loss": 0.8598, "step": 10298 }, { "epoch": 4.668631006346328, "grad_norm": 0.07866815191546359, "learning_rate": 1.0655914127102096e-06, "loss": 0.8418, "step": 10299 }, { "epoch": 4.669084315503174, "grad_norm": 0.08836238957939886, "learning_rate": 1.0626909415977838e-06, "loss": 0.8521, "step": 10300 }, { "epoch": 4.669537624660018, "grad_norm": 0.08158824368010331, "learning_rate": 1.0597943701713009e-06, "loss": 0.838, "step": 10301 }, { "epoch": 4.669990933816863, "grad_norm": 0.08074154456932851, "learning_rate": 1.0569016987208536e-06, "loss": 0.8566, "step": 10302 }, { "epoch": 4.6704442429737085, "grad_norm": 0.08118375997279777, "learning_rate": 1.0540129275361521e-06, "loss": 0.8681, "step": 10303 }, { "epoch": 4.670897552130553, "grad_norm": 0.07779431153967861, "learning_rate": 1.0511280569065164e-06, "loss": 0.8602, "step": 10304 }, { "epoch": 4.671350861287398, "grad_norm": 0.08951279121371812, "learning_rate": 1.048247087120875e-06, "loss": 0.8661, "step": 10305 }, { "epoch": 4.6718041704442435, "grad_norm": 0.0753297551955938, "learning_rate": 1.0453700184677617e-06, "loss": 0.8473, "step": 10306 }, { "epoch": 4.672257479601088, "grad_norm": 0.08716117397763107, "learning_rate": 1.0424968512353327e-06, "loss": 0.8552, "step": 10307 }, { "epoch": 4.672710788757933, "grad_norm": 0.08435222882386134, "learning_rate": 1.0396275857113315e-06, "loss": 0.8636, "step": 10308 }, { "epoch": 4.6731640979147775, "grad_norm": 0.07944267225946433, "learning_rate": 1.0367622221831363e-06, "loss": 0.8533, "step": 10309 }, { "epoch": 4.673617407071623, "grad_norm": 0.07529363690405662, "learning_rate": 1.033900760937714e-06, "loss": 0.861, "step": 10310 }, { "epoch": 4.674070716228468, "grad_norm": 0.08406281017368683, "learning_rate": 1.0310432022616478e-06, "loss": 0.8549, "step": 10311 }, { "epoch": 4.674524025385312, "grad_norm": 0.0821212252467358, "learning_rate": 1.0281895464411317e-06, "loss": 0.8459, "step": 10312 }, { "epoch": 4.674977334542158, "grad_norm": 0.08054727091034117, "learning_rate": 1.025339793761968e-06, "loss": 0.8561, "step": 10313 }, { "epoch": 4.675430643699003, "grad_norm": 0.07775868533122104, "learning_rate": 1.0224939445095727e-06, "loss": 0.8501, "step": 10314 }, { "epoch": 4.675883952855847, "grad_norm": 0.08314379693498357, "learning_rate": 1.019651998968958e-06, "loss": 0.852, "step": 10315 }, { "epoch": 4.676337262012693, "grad_norm": 0.08848947967985878, "learning_rate": 1.0168139574247583e-06, "loss": 0.8597, "step": 10316 }, { "epoch": 4.676790571169538, "grad_norm": 0.08431696060701657, "learning_rate": 1.0139798201612084e-06, "loss": 0.8592, "step": 10317 }, { "epoch": 4.677243880326382, "grad_norm": 0.08457523650437959, "learning_rate": 1.0111495874621568e-06, "loss": 0.8616, "step": 10318 }, { "epoch": 4.6776971894832275, "grad_norm": 0.07728027188520409, "learning_rate": 1.0083232596110616e-06, "loss": 0.8666, "step": 10319 }, { "epoch": 4.678150498640073, "grad_norm": 0.08374508376246877, "learning_rate": 1.005500836890989e-06, "loss": 0.8579, "step": 10320 }, { "epoch": 4.678603807796917, "grad_norm": 0.08462321338740461, "learning_rate": 1.0026823195846025e-06, "loss": 0.8506, "step": 10321 }, { "epoch": 4.6790571169537625, "grad_norm": 0.07984100999770798, "learning_rate": 9.998677079742003e-07, "loss": 0.8539, "step": 10322 }, { "epoch": 4.679510426110608, "grad_norm": 0.07942336234790515, "learning_rate": 9.970570023416637e-07, "loss": 0.8472, "step": 10323 }, { "epoch": 4.679963735267452, "grad_norm": 0.08853021385847323, "learning_rate": 9.942502029684964e-07, "loss": 0.8444, "step": 10324 }, { "epoch": 4.680417044424297, "grad_norm": 0.07886197705796609, "learning_rate": 9.914473101358114e-07, "loss": 0.8322, "step": 10325 }, { "epoch": 4.680870353581143, "grad_norm": 0.08430922202367204, "learning_rate": 9.886483241243128e-07, "loss": 0.8616, "step": 10326 }, { "epoch": 4.681323662737987, "grad_norm": 0.07646282333527174, "learning_rate": 9.858532452143454e-07, "loss": 0.8468, "step": 10327 }, { "epoch": 4.681776971894832, "grad_norm": 0.0770964666470925, "learning_rate": 9.830620736858277e-07, "loss": 0.8665, "step": 10328 }, { "epoch": 4.682230281051678, "grad_norm": 0.07289597439631229, "learning_rate": 9.802748098183178e-07, "loss": 0.8567, "step": 10329 }, { "epoch": 4.682683590208522, "grad_norm": 0.08787613515572393, "learning_rate": 9.774914538909664e-07, "loss": 0.8529, "step": 10330 }, { "epoch": 4.683136899365367, "grad_norm": 0.08025542842879523, "learning_rate": 9.747120061825277e-07, "loss": 0.8597, "step": 10331 }, { "epoch": 4.6835902085222125, "grad_norm": 0.08496671277187973, "learning_rate": 9.719364669713705e-07, "loss": 0.8565, "step": 10332 }, { "epoch": 4.684043517679057, "grad_norm": 0.07210923644843098, "learning_rate": 9.691648365354812e-07, "loss": 0.8453, "step": 10333 }, { "epoch": 4.684496826835902, "grad_norm": 0.07677899547319109, "learning_rate": 9.663971151524375e-07, "loss": 0.8422, "step": 10334 }, { "epoch": 4.684950135992747, "grad_norm": 0.07531899041184287, "learning_rate": 9.636333030994493e-07, "loss": 0.8597, "step": 10335 }, { "epoch": 4.685403445149592, "grad_norm": 0.07816918756879536, "learning_rate": 9.608734006533039e-07, "loss": 0.8467, "step": 10336 }, { "epoch": 4.685856754306437, "grad_norm": 0.0817867802081224, "learning_rate": 9.581174080904244e-07, "loss": 0.846, "step": 10337 }, { "epoch": 4.6863100634632815, "grad_norm": 0.07507899236612056, "learning_rate": 9.553653256868257e-07, "loss": 0.8443, "step": 10338 }, { "epoch": 4.686763372620127, "grad_norm": 0.07284349194058688, "learning_rate": 9.52617153718145e-07, "loss": 0.8421, "step": 10339 }, { "epoch": 4.687216681776972, "grad_norm": 0.07753840701367398, "learning_rate": 9.498728924596112e-07, "loss": 0.8393, "step": 10340 }, { "epoch": 4.687669990933816, "grad_norm": 0.07936036936169073, "learning_rate": 9.471325421860756e-07, "loss": 0.8527, "step": 10341 }, { "epoch": 4.688123300090662, "grad_norm": 0.08084397013896552, "learning_rate": 9.443961031719895e-07, "loss": 0.8348, "step": 10342 }, { "epoch": 4.688576609247507, "grad_norm": 0.09211124730011015, "learning_rate": 9.416635756914094e-07, "loss": 0.8885, "step": 10343 }, { "epoch": 4.689029918404351, "grad_norm": 0.07787928289199046, "learning_rate": 9.389349600180231e-07, "loss": 0.8426, "step": 10344 }, { "epoch": 4.689483227561197, "grad_norm": 0.08185835824844158, "learning_rate": 9.362102564250919e-07, "loss": 0.8396, "step": 10345 }, { "epoch": 4.689936536718042, "grad_norm": 0.07871298176268914, "learning_rate": 9.334894651855131e-07, "loss": 0.8339, "step": 10346 }, { "epoch": 4.690389845874886, "grad_norm": 0.0772912937546811, "learning_rate": 9.307725865717754e-07, "loss": 0.8637, "step": 10347 }, { "epoch": 4.6908431550317315, "grad_norm": 0.08654827277046648, "learning_rate": 9.2805962085599e-07, "loss": 0.8606, "step": 10348 }, { "epoch": 4.691296464188577, "grad_norm": 0.07265748410102534, "learning_rate": 9.253505683098596e-07, "loss": 0.8494, "step": 10349 }, { "epoch": 4.691749773345421, "grad_norm": 0.07779190504053135, "learning_rate": 9.226454292047138e-07, "loss": 0.8427, "step": 10350 }, { "epoch": 4.6922030825022665, "grad_norm": 0.07576050226007174, "learning_rate": 9.199442038114692e-07, "loss": 0.8643, "step": 10351 }, { "epoch": 4.692656391659112, "grad_norm": 0.08459599029463494, "learning_rate": 9.172468924006695e-07, "loss": 0.855, "step": 10352 }, { "epoch": 4.693109700815956, "grad_norm": 0.07749264897049125, "learning_rate": 9.145534952424495e-07, "loss": 0.8489, "step": 10353 }, { "epoch": 4.693563009972801, "grad_norm": 0.07476215471437203, "learning_rate": 9.118640126065759e-07, "loss": 0.8465, "step": 10354 }, { "epoch": 4.694016319129647, "grad_norm": 0.08021360855037181, "learning_rate": 9.09178444762393e-07, "loss": 0.8568, "step": 10355 }, { "epoch": 4.694469628286491, "grad_norm": 0.08842023167565115, "learning_rate": 9.064967919788769e-07, "loss": 0.8708, "step": 10356 }, { "epoch": 4.694922937443336, "grad_norm": 0.07238185276840521, "learning_rate": 9.038190545246039e-07, "loss": 0.8418, "step": 10357 }, { "epoch": 4.695376246600182, "grad_norm": 0.08080358175318819, "learning_rate": 9.011452326677506e-07, "loss": 0.864, "step": 10358 }, { "epoch": 4.695829555757026, "grad_norm": 0.0861168965746262, "learning_rate": 8.984753266761115e-07, "loss": 0.873, "step": 10359 }, { "epoch": 4.696282864913871, "grad_norm": 0.0776767138691433, "learning_rate": 8.958093368170861e-07, "loss": 0.8762, "step": 10360 }, { "epoch": 4.6967361740707165, "grad_norm": 0.08432072132897643, "learning_rate": 8.931472633576787e-07, "loss": 0.8755, "step": 10361 }, { "epoch": 4.697189483227561, "grad_norm": 0.07812483656609057, "learning_rate": 8.904891065645072e-07, "loss": 0.838, "step": 10362 }, { "epoch": 4.697642792384406, "grad_norm": 0.07701477876608301, "learning_rate": 8.878348667037939e-07, "loss": 0.8533, "step": 10363 }, { "epoch": 4.6980961015412515, "grad_norm": 0.08038449448098034, "learning_rate": 8.851845440413664e-07, "loss": 0.8772, "step": 10364 }, { "epoch": 4.698549410698096, "grad_norm": 0.07625133474569426, "learning_rate": 8.825381388426657e-07, "loss": 0.8731, "step": 10365 }, { "epoch": 4.699002719854941, "grad_norm": 0.07947829984948473, "learning_rate": 8.798956513727331e-07, "loss": 0.8612, "step": 10366 }, { "epoch": 4.699456029011786, "grad_norm": 0.08000081493408694, "learning_rate": 8.77257081896219e-07, "loss": 0.856, "step": 10367 }, { "epoch": 4.699909338168631, "grad_norm": 0.07166944055760277, "learning_rate": 8.746224306773921e-07, "loss": 0.86, "step": 10368 }, { "epoch": 4.700362647325476, "grad_norm": 0.07647669007584529, "learning_rate": 8.719916979801169e-07, "loss": 0.8598, "step": 10369 }, { "epoch": 4.700815956482321, "grad_norm": 0.08169377260351328, "learning_rate": 8.693648840678626e-07, "loss": 0.8461, "step": 10370 }, { "epoch": 4.701269265639166, "grad_norm": 0.0773019697430656, "learning_rate": 8.66741989203721e-07, "loss": 0.8643, "step": 10371 }, { "epoch": 4.701722574796011, "grad_norm": 0.07780502086328502, "learning_rate": 8.641230136503843e-07, "loss": 0.8391, "step": 10372 }, { "epoch": 4.702175883952856, "grad_norm": 0.07764785921934488, "learning_rate": 8.615079576701402e-07, "loss": 0.8509, "step": 10373 }, { "epoch": 4.702629193109701, "grad_norm": 0.07714723027642348, "learning_rate": 8.588968215249038e-07, "loss": 0.8506, "step": 10374 }, { "epoch": 4.703082502266546, "grad_norm": 0.07701686407952643, "learning_rate": 8.562896054761771e-07, "loss": 0.8414, "step": 10375 }, { "epoch": 4.703535811423391, "grad_norm": 0.08360077395732916, "learning_rate": 8.536863097850934e-07, "loss": 0.8658, "step": 10376 }, { "epoch": 4.7039891205802356, "grad_norm": 0.07784350357781399, "learning_rate": 8.51086934712373e-07, "loss": 0.8344, "step": 10377 }, { "epoch": 4.704442429737081, "grad_norm": 0.07478338996218156, "learning_rate": 8.484914805183542e-07, "loss": 0.8552, "step": 10378 }, { "epoch": 4.704895738893926, "grad_norm": 0.08261557029496815, "learning_rate": 8.458999474629759e-07, "loss": 0.8596, "step": 10379 }, { "epoch": 4.7053490480507705, "grad_norm": 0.07612804172833412, "learning_rate": 8.433123358057904e-07, "loss": 0.8519, "step": 10380 }, { "epoch": 4.705802357207616, "grad_norm": 0.0839137857785578, "learning_rate": 8.407286458059549e-07, "loss": 0.8502, "step": 10381 }, { "epoch": 4.706255666364461, "grad_norm": 0.0743750892974289, "learning_rate": 8.381488777222314e-07, "loss": 0.8498, "step": 10382 }, { "epoch": 4.706708975521305, "grad_norm": 0.08169721860473868, "learning_rate": 8.355730318129951e-07, "loss": 0.8437, "step": 10383 }, { "epoch": 4.707162284678151, "grad_norm": 0.07861479815690094, "learning_rate": 8.330011083362178e-07, "loss": 0.8554, "step": 10384 }, { "epoch": 4.707615593834996, "grad_norm": 0.08021063888425484, "learning_rate": 8.304331075494931e-07, "loss": 0.8412, "step": 10385 }, { "epoch": 4.70806890299184, "grad_norm": 0.07755659888558893, "learning_rate": 8.278690297100067e-07, "loss": 0.8741, "step": 10386 }, { "epoch": 4.708522212148686, "grad_norm": 0.07831612751803646, "learning_rate": 8.253088750745664e-07, "loss": 0.8505, "step": 10387 }, { "epoch": 4.70897552130553, "grad_norm": 0.0815953303757934, "learning_rate": 8.22752643899567e-07, "loss": 0.851, "step": 10388 }, { "epoch": 4.709428830462375, "grad_norm": 0.0745850088373066, "learning_rate": 8.20200336441035e-07, "loss": 0.8498, "step": 10389 }, { "epoch": 4.7098821396192205, "grad_norm": 0.08471206359832521, "learning_rate": 8.176519529545835e-07, "loss": 0.8582, "step": 10390 }, { "epoch": 4.710335448776065, "grad_norm": 0.08079468867560835, "learning_rate": 8.151074936954484e-07, "loss": 0.861, "step": 10391 }, { "epoch": 4.71078875793291, "grad_norm": 0.07779165314259051, "learning_rate": 8.12566958918457e-07, "loss": 0.859, "step": 10392 }, { "epoch": 4.7112420670897555, "grad_norm": 0.08837494571731042, "learning_rate": 8.100303488780547e-07, "loss": 0.8547, "step": 10393 }, { "epoch": 4.7116953762466, "grad_norm": 0.07655619859536501, "learning_rate": 8.074976638282873e-07, "loss": 0.8464, "step": 10394 }, { "epoch": 4.712148685403445, "grad_norm": 0.07715297339560906, "learning_rate": 8.049689040228181e-07, "loss": 0.8459, "step": 10395 }, { "epoch": 4.71260199456029, "grad_norm": 0.07691209868145878, "learning_rate": 8.024440697149027e-07, "loss": 0.8496, "step": 10396 }, { "epoch": 4.713055303717135, "grad_norm": 0.08216816556259131, "learning_rate": 7.999231611574143e-07, "loss": 0.8675, "step": 10397 }, { "epoch": 4.71350861287398, "grad_norm": 0.07553661033843397, "learning_rate": 7.974061786028309e-07, "loss": 0.8455, "step": 10398 }, { "epoch": 4.713961922030825, "grad_norm": 0.07264205923340905, "learning_rate": 7.948931223032264e-07, "loss": 0.853, "step": 10399 }, { "epoch": 4.71441523118767, "grad_norm": 0.07856685808520712, "learning_rate": 7.923839925103061e-07, "loss": 0.8683, "step": 10400 }, { "epoch": 4.714868540344515, "grad_norm": 0.07584155136926353, "learning_rate": 7.89878789475349e-07, "loss": 0.8322, "step": 10401 }, { "epoch": 4.71532184950136, "grad_norm": 0.07625016069232818, "learning_rate": 7.873775134492745e-07, "loss": 0.8482, "step": 10402 }, { "epoch": 4.715775158658205, "grad_norm": 0.07980612754747769, "learning_rate": 7.848801646825798e-07, "loss": 0.8497, "step": 10403 }, { "epoch": 4.71622846781505, "grad_norm": 0.07805585002192222, "learning_rate": 7.82386743425394e-07, "loss": 0.8669, "step": 10404 }, { "epoch": 4.716681776971895, "grad_norm": 0.07923332854826072, "learning_rate": 7.798972499274282e-07, "loss": 0.8783, "step": 10405 }, { "epoch": 4.7171350861287396, "grad_norm": 0.07879236523001906, "learning_rate": 7.774116844380253e-07, "loss": 0.8495, "step": 10406 }, { "epoch": 4.717588395285585, "grad_norm": 0.07894063173017604, "learning_rate": 7.749300472061106e-07, "loss": 0.8286, "step": 10407 }, { "epoch": 4.71804170444243, "grad_norm": 0.0779215122352862, "learning_rate": 7.724523384802318e-07, "loss": 0.838, "step": 10408 }, { "epoch": 4.7184950135992745, "grad_norm": 0.08197970814409854, "learning_rate": 7.699785585085373e-07, "loss": 0.8524, "step": 10409 }, { "epoch": 4.71894832275612, "grad_norm": 0.07928573653492316, "learning_rate": 7.675087075387932e-07, "loss": 0.8635, "step": 10410 }, { "epoch": 4.719401631912965, "grad_norm": 0.07960223080518192, "learning_rate": 7.65042785818344e-07, "loss": 0.8348, "step": 10411 }, { "epoch": 4.719854941069809, "grad_norm": 0.07526280709445834, "learning_rate": 7.625807935941742e-07, "loss": 0.8677, "step": 10412 }, { "epoch": 4.720308250226655, "grad_norm": 0.07678997329675195, "learning_rate": 7.601227311128556e-07, "loss": 0.8441, "step": 10413 }, { "epoch": 4.720761559383499, "grad_norm": 0.07869386205056188, "learning_rate": 7.576685986205689e-07, "loss": 0.8561, "step": 10414 }, { "epoch": 4.721214868540344, "grad_norm": 0.07517599012094191, "learning_rate": 7.552183963631043e-07, "loss": 0.8482, "step": 10415 }, { "epoch": 4.72166817769719, "grad_norm": 0.08125465374642349, "learning_rate": 7.527721245858522e-07, "loss": 0.8481, "step": 10416 }, { "epoch": 4.722121486854034, "grad_norm": 0.07951432617465318, "learning_rate": 7.503297835338163e-07, "loss": 0.857, "step": 10417 }, { "epoch": 4.722574796010879, "grad_norm": 0.07397957579981453, "learning_rate": 7.478913734516058e-07, "loss": 0.8638, "step": 10418 }, { "epoch": 4.7230281051677245, "grad_norm": 0.07554964705891283, "learning_rate": 7.454568945834384e-07, "loss": 0.8467, "step": 10419 }, { "epoch": 4.723481414324569, "grad_norm": 0.08475357142684534, "learning_rate": 7.430263471731236e-07, "loss": 0.8268, "step": 10420 }, { "epoch": 4.723934723481414, "grad_norm": 0.13189940041426523, "learning_rate": 7.405997314641022e-07, "loss": 0.8618, "step": 10421 }, { "epoch": 4.7243880326382595, "grad_norm": 0.07900584048521118, "learning_rate": 7.381770476993932e-07, "loss": 0.8615, "step": 10422 }, { "epoch": 4.724841341795104, "grad_norm": 0.08085104603336489, "learning_rate": 7.357582961216425e-07, "loss": 0.8543, "step": 10423 }, { "epoch": 4.725294650951949, "grad_norm": 0.0845319849008638, "learning_rate": 7.333434769730963e-07, "loss": 0.855, "step": 10424 }, { "epoch": 4.725747960108794, "grad_norm": 0.07652646418073943, "learning_rate": 7.30932590495601e-07, "loss": 0.8474, "step": 10425 }, { "epoch": 4.726201269265639, "grad_norm": 0.07704463705651397, "learning_rate": 7.28525636930617e-07, "loss": 0.8436, "step": 10426 }, { "epoch": 4.726654578422484, "grad_norm": 0.07412313006772382, "learning_rate": 7.261226165192093e-07, "loss": 0.8739, "step": 10427 }, { "epoch": 4.727107887579329, "grad_norm": 0.08171732351136199, "learning_rate": 7.237235295020428e-07, "loss": 0.8218, "step": 10428 }, { "epoch": 4.727561196736174, "grad_norm": 0.0767930271535892, "learning_rate": 7.213283761193968e-07, "loss": 0.8616, "step": 10429 }, { "epoch": 4.728014505893019, "grad_norm": 0.10101333846777838, "learning_rate": 7.189371566111503e-07, "loss": 0.8479, "step": 10430 }, { "epoch": 4.728467815049864, "grad_norm": 0.07300941713915256, "learning_rate": 7.165498712167917e-07, "loss": 0.856, "step": 10431 }, { "epoch": 4.728921124206709, "grad_norm": 0.07451946196881841, "learning_rate": 7.141665201754189e-07, "loss": 0.8617, "step": 10432 }, { "epoch": 4.729374433363554, "grad_norm": 0.07712922651897647, "learning_rate": 7.117871037257207e-07, "loss": 0.8492, "step": 10433 }, { "epoch": 4.729827742520399, "grad_norm": 0.0826278666270488, "learning_rate": 7.094116221060177e-07, "loss": 0.8495, "step": 10434 }, { "epoch": 4.7302810516772436, "grad_norm": 0.08368881456479722, "learning_rate": 7.070400755542084e-07, "loss": 0.8341, "step": 10435 }, { "epoch": 4.730734360834089, "grad_norm": 0.07739848499238282, "learning_rate": 7.046724643078229e-07, "loss": 0.8565, "step": 10436 }, { "epoch": 4.731187669990934, "grad_norm": 0.07499072803629842, "learning_rate": 7.023087886039693e-07, "loss": 0.8453, "step": 10437 }, { "epoch": 4.7316409791477785, "grad_norm": 0.08909062801557299, "learning_rate": 6.999490486793869e-07, "loss": 0.8538, "step": 10438 }, { "epoch": 4.732094288304624, "grad_norm": 0.09046542127478084, "learning_rate": 6.975932447704115e-07, "loss": 0.8334, "step": 10439 }, { "epoch": 4.732547597461469, "grad_norm": 0.07925448923085325, "learning_rate": 6.95241377112974e-07, "loss": 0.8486, "step": 10440 }, { "epoch": 4.733000906618313, "grad_norm": 0.08618513240496478, "learning_rate": 6.928934459426329e-07, "loss": 0.8739, "step": 10441 }, { "epoch": 4.733454215775159, "grad_norm": 0.07354443500359552, "learning_rate": 6.905494514945288e-07, "loss": 0.867, "step": 10442 }, { "epoch": 4.733907524932004, "grad_norm": 0.07898059927503022, "learning_rate": 6.882093940034295e-07, "loss": 0.8809, "step": 10443 }, { "epoch": 4.734360834088848, "grad_norm": 0.0756769855620378, "learning_rate": 6.858732737036944e-07, "loss": 0.8678, "step": 10444 }, { "epoch": 4.734814143245694, "grad_norm": 0.07934903587965995, "learning_rate": 6.83541090829296e-07, "loss": 0.8458, "step": 10445 }, { "epoch": 4.735267452402539, "grad_norm": 0.08223824701039745, "learning_rate": 6.81212845613799e-07, "loss": 0.8387, "step": 10446 }, { "epoch": 4.735720761559383, "grad_norm": 0.07535503541508864, "learning_rate": 6.788885382903987e-07, "loss": 0.8374, "step": 10447 }, { "epoch": 4.7361740707162285, "grad_norm": 0.08326748315579839, "learning_rate": 6.76568169091869e-07, "loss": 0.8421, "step": 10448 }, { "epoch": 4.736627379873074, "grad_norm": 0.0848115702761386, "learning_rate": 6.742517382506064e-07, "loss": 0.8532, "step": 10449 }, { "epoch": 4.737080689029918, "grad_norm": 0.07649352786383894, "learning_rate": 6.719392459986074e-07, "loss": 0.8472, "step": 10450 }, { "epoch": 4.7375339981867635, "grad_norm": 0.07534884283139387, "learning_rate": 6.696306925674823e-07, "loss": 0.8327, "step": 10451 }, { "epoch": 4.737987307343609, "grad_norm": 0.07485747987005162, "learning_rate": 6.673260781884239e-07, "loss": 0.8338, "step": 10452 }, { "epoch": 4.738440616500453, "grad_norm": 0.08120245483293825, "learning_rate": 6.650254030922654e-07, "loss": 0.8505, "step": 10453 }, { "epoch": 4.738893925657298, "grad_norm": 0.07234179197060245, "learning_rate": 6.627286675094136e-07, "loss": 0.8573, "step": 10454 }, { "epoch": 4.739347234814144, "grad_norm": 0.08036760819389613, "learning_rate": 6.604358716698889e-07, "loss": 0.8638, "step": 10455 }, { "epoch": 4.739800543970988, "grad_norm": 0.08301478338703364, "learning_rate": 6.581470158033343e-07, "loss": 0.8615, "step": 10456 }, { "epoch": 4.740253853127833, "grad_norm": 0.0893914419713817, "learning_rate": 6.558621001389798e-07, "loss": 0.8832, "step": 10457 }, { "epoch": 4.740707162284679, "grad_norm": 0.07861888514698764, "learning_rate": 6.535811249056645e-07, "loss": 0.8405, "step": 10458 }, { "epoch": 4.741160471441523, "grad_norm": 0.06966403511768293, "learning_rate": 6.513040903318368e-07, "loss": 0.8319, "step": 10459 }, { "epoch": 4.741613780598368, "grad_norm": 0.07500415936716984, "learning_rate": 6.490309966455499e-07, "loss": 0.8592, "step": 10460 }, { "epoch": 4.7420670897552135, "grad_norm": 0.07132183186226346, "learning_rate": 6.467618440744528e-07, "loss": 0.8691, "step": 10461 }, { "epoch": 4.742520398912058, "grad_norm": 0.07555084784091289, "learning_rate": 6.444966328458214e-07, "loss": 0.8527, "step": 10462 }, { "epoch": 4.742973708068903, "grad_norm": 0.07569301285141769, "learning_rate": 6.422353631865097e-07, "loss": 0.8422, "step": 10463 }, { "epoch": 4.743427017225748, "grad_norm": 0.08026104512926306, "learning_rate": 6.399780353230078e-07, "loss": 0.8548, "step": 10464 }, { "epoch": 4.743880326382593, "grad_norm": 0.07883793746013695, "learning_rate": 6.377246494813705e-07, "loss": 0.8519, "step": 10465 }, { "epoch": 4.744333635539438, "grad_norm": 0.07601300065054585, "learning_rate": 6.354752058873015e-07, "loss": 0.8524, "step": 10466 }, { "epoch": 4.7447869446962825, "grad_norm": 0.07799654782702917, "learning_rate": 6.332297047660741e-07, "loss": 0.8393, "step": 10467 }, { "epoch": 4.745240253853128, "grad_norm": 0.07641461477756234, "learning_rate": 6.309881463425926e-07, "loss": 0.8644, "step": 10468 }, { "epoch": 4.745693563009973, "grad_norm": 0.07904918141246249, "learning_rate": 6.287505308413533e-07, "loss": 0.8295, "step": 10469 }, { "epoch": 4.746146872166817, "grad_norm": 0.07315304142918497, "learning_rate": 6.265168584864523e-07, "loss": 0.8516, "step": 10470 }, { "epoch": 4.746600181323663, "grad_norm": 0.07344575482879821, "learning_rate": 6.242871295016084e-07, "loss": 0.843, "step": 10471 }, { "epoch": 4.747053490480508, "grad_norm": 0.07752381832299547, "learning_rate": 6.22061344110132e-07, "loss": 0.8731, "step": 10472 }, { "epoch": 4.747506799637352, "grad_norm": 0.08038145951249226, "learning_rate": 6.19839502534938e-07, "loss": 0.8671, "step": 10473 }, { "epoch": 4.747960108794198, "grad_norm": 0.07974451577006385, "learning_rate": 6.17621604998555e-07, "loss": 0.8687, "step": 10474 }, { "epoch": 4.748413417951043, "grad_norm": 0.07680883762713935, "learning_rate": 6.154076517231167e-07, "loss": 0.8575, "step": 10475 }, { "epoch": 4.748866727107887, "grad_norm": 0.07331722706189299, "learning_rate": 6.131976429303432e-07, "loss": 0.8392, "step": 10476 }, { "epoch": 4.7493200362647325, "grad_norm": 0.07199634192291988, "learning_rate": 6.109915788415865e-07, "loss": 0.866, "step": 10477 }, { "epoch": 4.749773345421578, "grad_norm": 0.07401310126589669, "learning_rate": 6.08789459677781e-07, "loss": 0.8514, "step": 10478 }, { "epoch": 4.750226654578422, "grad_norm": 0.07897650425170653, "learning_rate": 6.065912856594835e-07, "loss": 0.8635, "step": 10479 }, { "epoch": 4.7506799637352675, "grad_norm": 0.07590127064876985, "learning_rate": 6.043970570068469e-07, "loss": 0.8415, "step": 10480 }, { "epoch": 4.751133272892113, "grad_norm": 0.0736002523997185, "learning_rate": 6.022067739396198e-07, "loss": 0.8464, "step": 10481 }, { "epoch": 4.751586582048957, "grad_norm": 0.08007762324982046, "learning_rate": 6.000204366771778e-07, "loss": 0.8589, "step": 10482 }, { "epoch": 4.752039891205802, "grad_norm": 0.07421216031284801, "learning_rate": 5.978380454384836e-07, "loss": 0.8647, "step": 10483 }, { "epoch": 4.752493200362648, "grad_norm": 0.07809310401445349, "learning_rate": 5.956596004421045e-07, "loss": 0.84, "step": 10484 }, { "epoch": 4.752946509519492, "grad_norm": 0.07745303019247549, "learning_rate": 5.93485101906226e-07, "loss": 0.8559, "step": 10485 }, { "epoch": 4.753399818676337, "grad_norm": 0.07544454681719287, "learning_rate": 5.913145500486294e-07, "loss": 0.8757, "step": 10486 }, { "epoch": 4.753853127833183, "grad_norm": 0.07732400930510222, "learning_rate": 5.89147945086701e-07, "loss": 0.851, "step": 10487 }, { "epoch": 4.754306436990027, "grad_norm": 0.08110728389671787, "learning_rate": 5.869852872374315e-07, "loss": 0.8427, "step": 10488 }, { "epoch": 4.754759746146872, "grad_norm": 0.07186318353694464, "learning_rate": 5.848265767174166e-07, "loss": 0.8488, "step": 10489 }, { "epoch": 4.7552130553037175, "grad_norm": 0.07711232757881586, "learning_rate": 5.826718137428611e-07, "loss": 0.858, "step": 10490 }, { "epoch": 4.755666364460562, "grad_norm": 0.07741993373649399, "learning_rate": 5.805209985295657e-07, "loss": 0.8784, "step": 10491 }, { "epoch": 4.756119673617407, "grad_norm": 0.0810916789955847, "learning_rate": 5.783741312929448e-07, "loss": 0.8625, "step": 10492 }, { "epoch": 4.756572982774252, "grad_norm": 0.07667043494787465, "learning_rate": 5.76231212248013e-07, "loss": 0.8558, "step": 10493 }, { "epoch": 4.757026291931097, "grad_norm": 0.07457640886694726, "learning_rate": 5.740922416093941e-07, "loss": 0.8652, "step": 10494 }, { "epoch": 4.757479601087942, "grad_norm": 0.07435531532746703, "learning_rate": 5.719572195913037e-07, "loss": 0.8533, "step": 10495 }, { "epoch": 4.7579329102447865, "grad_norm": 0.08153963138170127, "learning_rate": 5.698261464075749e-07, "loss": 0.8639, "step": 10496 }, { "epoch": 4.758386219401632, "grad_norm": 0.07528055371913497, "learning_rate": 5.676990222716416e-07, "loss": 0.8408, "step": 10497 }, { "epoch": 4.758839528558477, "grad_norm": 0.0755676004819849, "learning_rate": 5.655758473965378e-07, "loss": 0.8625, "step": 10498 }, { "epoch": 4.759292837715321, "grad_norm": 0.07825481668258227, "learning_rate": 5.634566219949111e-07, "loss": 0.8442, "step": 10499 }, { "epoch": 4.759746146872167, "grad_norm": 0.08271937025463008, "learning_rate": 5.613413462790051e-07, "loss": 0.846, "step": 10500 }, { "epoch": 4.760199456029012, "grad_norm": 0.0739857765127675, "learning_rate": 5.592300204606727e-07, "loss": 0.8439, "step": 10501 }, { "epoch": 4.760652765185856, "grad_norm": 0.07554211977250898, "learning_rate": 5.571226447513667e-07, "loss": 0.8546, "step": 10502 }, { "epoch": 4.761106074342702, "grad_norm": 0.07337667804344974, "learning_rate": 5.550192193621495e-07, "loss": 0.8576, "step": 10503 }, { "epoch": 4.761559383499547, "grad_norm": 0.07257076633808926, "learning_rate": 5.529197445036838e-07, "loss": 0.8308, "step": 10504 }, { "epoch": 4.762012692656391, "grad_norm": 0.07786856115842997, "learning_rate": 5.508242203862368e-07, "loss": 0.8517, "step": 10505 }, { "epoch": 4.7624660018132365, "grad_norm": 0.0760108962624538, "learning_rate": 5.487326472196852e-07, "loss": 0.8336, "step": 10506 }, { "epoch": 4.762919310970082, "grad_norm": 0.07492875293579361, "learning_rate": 5.466450252135014e-07, "loss": 0.8453, "step": 10507 }, { "epoch": 4.763372620126926, "grad_norm": 0.07851574937866686, "learning_rate": 5.445613545767714e-07, "loss": 0.8466, "step": 10508 }, { "epoch": 4.7638259292837715, "grad_norm": 0.07084715336444697, "learning_rate": 5.424816355181817e-07, "loss": 0.8528, "step": 10509 }, { "epoch": 4.764279238440617, "grad_norm": 0.07660642286730424, "learning_rate": 5.40405868246019e-07, "loss": 0.8598, "step": 10510 }, { "epoch": 4.764732547597461, "grad_norm": 0.1511640215787473, "learning_rate": 5.383340529681702e-07, "loss": 0.8933, "step": 10511 }, { "epoch": 4.765185856754306, "grad_norm": 0.07670433569521544, "learning_rate": 5.362661898921495e-07, "loss": 0.8621, "step": 10512 }, { "epoch": 4.765639165911152, "grad_norm": 0.07267787512757802, "learning_rate": 5.342022792250489e-07, "loss": 0.8424, "step": 10513 }, { "epoch": 4.766092475067996, "grad_norm": 0.0795237721359174, "learning_rate": 5.321423211735788e-07, "loss": 0.8562, "step": 10514 }, { "epoch": 4.766545784224841, "grad_norm": 0.07434860526629525, "learning_rate": 5.300863159440495e-07, "loss": 0.8513, "step": 10515 }, { "epoch": 4.766999093381687, "grad_norm": 0.07697098915974397, "learning_rate": 5.280342637423764e-07, "loss": 0.8525, "step": 10516 }, { "epoch": 4.767452402538531, "grad_norm": 0.07383080471304142, "learning_rate": 5.25986164774075e-07, "loss": 0.8514, "step": 10517 }, { "epoch": 4.767905711695376, "grad_norm": 0.0769177382082394, "learning_rate": 5.239420192442746e-07, "loss": 0.8467, "step": 10518 }, { "epoch": 4.7683590208522215, "grad_norm": 0.08504503982818824, "learning_rate": 5.219018273576959e-07, "loss": 0.8723, "step": 10519 }, { "epoch": 4.768812330009066, "grad_norm": 0.08137292319230614, "learning_rate": 5.198655893186733e-07, "loss": 0.8651, "step": 10520 }, { "epoch": 4.769265639165911, "grad_norm": 0.07571850737013659, "learning_rate": 5.178333053311458e-07, "loss": 0.8578, "step": 10521 }, { "epoch": 4.7697189483227564, "grad_norm": 0.07894891692802779, "learning_rate": 5.158049755986483e-07, "loss": 0.8558, "step": 10522 }, { "epoch": 4.770172257479601, "grad_norm": 0.07514841957140501, "learning_rate": 5.137806003243162e-07, "loss": 0.8728, "step": 10523 }, { "epoch": 4.770625566636446, "grad_norm": 0.07894543285598445, "learning_rate": 5.117601797109162e-07, "loss": 0.8493, "step": 10524 }, { "epoch": 4.771078875793291, "grad_norm": 0.0832231692542738, "learning_rate": 5.097437139607797e-07, "loss": 0.8653, "step": 10525 }, { "epoch": 4.771532184950136, "grad_norm": 0.08027250242223505, "learning_rate": 5.077312032758741e-07, "loss": 0.8493, "step": 10526 }, { "epoch": 4.771985494106981, "grad_norm": 0.07630609514761255, "learning_rate": 5.057226478577537e-07, "loss": 0.8566, "step": 10527 }, { "epoch": 4.772438803263826, "grad_norm": 0.08757662788640748, "learning_rate": 5.037180479075776e-07, "loss": 0.8583, "step": 10528 }, { "epoch": 4.772892112420671, "grad_norm": 0.0781156069664937, "learning_rate": 5.01717403626123e-07, "loss": 0.8536, "step": 10529 }, { "epoch": 4.773345421577516, "grad_norm": 0.08022692579934573, "learning_rate": 4.997207152137451e-07, "loss": 0.8579, "step": 10530 }, { "epoch": 4.773798730734361, "grad_norm": 0.07762131949467059, "learning_rate": 4.977279828704351e-07, "loss": 0.8568, "step": 10531 }, { "epoch": 4.774252039891206, "grad_norm": 0.0794565002500699, "learning_rate": 4.957392067957578e-07, "loss": 0.8673, "step": 10532 }, { "epoch": 4.774705349048051, "grad_norm": 0.08137413392051468, "learning_rate": 4.937543871889006e-07, "loss": 0.8648, "step": 10533 }, { "epoch": 4.775158658204896, "grad_norm": 0.0790527056174317, "learning_rate": 4.917735242486465e-07, "loss": 0.8443, "step": 10534 }, { "epoch": 4.7756119673617405, "grad_norm": 0.0741559974550898, "learning_rate": 4.897966181733926e-07, "loss": 0.8579, "step": 10535 }, { "epoch": 4.776065276518586, "grad_norm": 0.07152527341430225, "learning_rate": 4.878236691611227e-07, "loss": 0.8518, "step": 10536 }, { "epoch": 4.776518585675431, "grad_norm": 0.07334600395009123, "learning_rate": 4.858546774094341e-07, "loss": 0.8457, "step": 10537 }, { "epoch": 4.7769718948322755, "grad_norm": 0.07746425760699038, "learning_rate": 4.838896431155338e-07, "loss": 0.8864, "step": 10538 }, { "epoch": 4.777425203989121, "grad_norm": 0.07343907609666422, "learning_rate": 4.819285664762152e-07, "loss": 0.8704, "step": 10539 }, { "epoch": 4.777878513145966, "grad_norm": 0.07615175601094629, "learning_rate": 4.799714476878992e-07, "loss": 0.8495, "step": 10540 }, { "epoch": 4.77833182230281, "grad_norm": 0.07255633398499954, "learning_rate": 4.780182869465888e-07, "loss": 0.8693, "step": 10541 }, { "epoch": 4.778785131459656, "grad_norm": 0.07489185899722266, "learning_rate": 4.760690844478966e-07, "loss": 0.8575, "step": 10542 }, { "epoch": 4.779238440616501, "grad_norm": 0.07290444811438955, "learning_rate": 4.7412384038704405e-07, "loss": 0.8524, "step": 10543 }, { "epoch": 4.779691749773345, "grad_norm": 0.07666524288325594, "learning_rate": 4.721825549588577e-07, "loss": 0.8428, "step": 10544 }, { "epoch": 4.780145058930191, "grad_norm": 0.07598220299015357, "learning_rate": 4.702452283577508e-07, "loss": 0.8613, "step": 10545 }, { "epoch": 4.780598368087035, "grad_norm": 0.07398317084756213, "learning_rate": 4.683118607777681e-07, "loss": 0.8545, "step": 10546 }, { "epoch": 4.78105167724388, "grad_norm": 0.08128229590766368, "learning_rate": 4.663824524125282e-07, "loss": 0.8568, "step": 10547 }, { "epoch": 4.7815049864007255, "grad_norm": 0.07795099808103735, "learning_rate": 4.6445700345527645e-07, "loss": 0.8606, "step": 10548 }, { "epoch": 4.78195829555757, "grad_norm": 0.0771730531069197, "learning_rate": 4.6253551409884524e-07, "loss": 0.8663, "step": 10549 }, { "epoch": 4.782411604714415, "grad_norm": 0.07118765762938079, "learning_rate": 4.606179845356851e-07, "loss": 0.8349, "step": 10550 }, { "epoch": 4.7828649138712604, "grad_norm": 0.07082770634996625, "learning_rate": 4.587044149578379e-07, "loss": 0.8599, "step": 10551 }, { "epoch": 4.783318223028105, "grad_norm": 0.0835200730001397, "learning_rate": 4.5679480555695044e-07, "loss": 0.886, "step": 10552 }, { "epoch": 4.78377153218495, "grad_norm": 0.07188384237363893, "learning_rate": 4.548891565242786e-07, "loss": 0.8488, "step": 10553 }, { "epoch": 4.784224841341795, "grad_norm": 0.07650680548208624, "learning_rate": 4.529874680506785e-07, "loss": 0.8567, "step": 10554 }, { "epoch": 4.78467815049864, "grad_norm": 0.07520304567961059, "learning_rate": 4.5108974032661125e-07, "loss": 0.8294, "step": 10555 }, { "epoch": 4.785131459655485, "grad_norm": 0.07873729385159414, "learning_rate": 4.4919597354213807e-07, "loss": 0.8595, "step": 10556 }, { "epoch": 4.78558476881233, "grad_norm": 0.07162361489410261, "learning_rate": 4.4730616788692504e-07, "loss": 0.8686, "step": 10557 }, { "epoch": 4.786038077969175, "grad_norm": 0.07989593712074143, "learning_rate": 4.45420323550243e-07, "loss": 0.846, "step": 10558 }, { "epoch": 4.78649138712602, "grad_norm": 0.07772931159681588, "learning_rate": 4.435384407209675e-07, "loss": 0.8489, "step": 10559 }, { "epoch": 4.786944696282865, "grad_norm": 0.07940202666470482, "learning_rate": 4.4166051958757006e-07, "loss": 0.8603, "step": 10560 }, { "epoch": 4.78739800543971, "grad_norm": 0.07977336696451057, "learning_rate": 4.3978656033812683e-07, "loss": 0.8646, "step": 10561 }, { "epoch": 4.787851314596555, "grad_norm": 0.07474356539026387, "learning_rate": 4.379165631603277e-07, "loss": 0.8465, "step": 10562 }, { "epoch": 4.7883046237534, "grad_norm": 0.07103931783238158, "learning_rate": 4.3605052824145844e-07, "loss": 0.8417, "step": 10563 }, { "epoch": 4.7887579329102445, "grad_norm": 0.07819683456662062, "learning_rate": 4.341884557684006e-07, "loss": 0.8547, "step": 10564 }, { "epoch": 4.78921124206709, "grad_norm": 0.07590302494534702, "learning_rate": 4.32330345927654e-07, "loss": 0.861, "step": 10565 }, { "epoch": 4.789664551223935, "grad_norm": 0.08067512837932632, "learning_rate": 4.3047619890530966e-07, "loss": 0.8566, "step": 10566 }, { "epoch": 4.7901178603807795, "grad_norm": 0.07323186030394178, "learning_rate": 4.2862601488706355e-07, "loss": 0.8584, "step": 10567 }, { "epoch": 4.790571169537625, "grad_norm": 0.07168888057160597, "learning_rate": 4.267797940582252e-07, "loss": 0.8519, "step": 10568 }, { "epoch": 4.79102447869447, "grad_norm": 0.07466380881614089, "learning_rate": 4.249375366036912e-07, "loss": 0.8671, "step": 10569 }, { "epoch": 4.791477787851314, "grad_norm": 0.07425397913844196, "learning_rate": 4.2309924270797166e-07, "loss": 0.8687, "step": 10570 }, { "epoch": 4.79193109700816, "grad_norm": 0.07508024237260279, "learning_rate": 4.212649125551771e-07, "loss": 0.8537, "step": 10571 }, { "epoch": 4.792384406165004, "grad_norm": 0.0760413654069483, "learning_rate": 4.194345463290228e-07, "loss": 0.8483, "step": 10572 }, { "epoch": 4.792837715321849, "grad_norm": 0.0805172968939755, "learning_rate": 4.1760814421282437e-07, "loss": 0.8486, "step": 10573 }, { "epoch": 4.793291024478695, "grad_norm": 0.07876850346932358, "learning_rate": 4.1578570638949765e-07, "loss": 0.8694, "step": 10574 }, { "epoch": 4.793744333635539, "grad_norm": 0.07144081061099838, "learning_rate": 4.139672330415723e-07, "loss": 0.827, "step": 10575 }, { "epoch": 4.794197642792384, "grad_norm": 0.08303288043840508, "learning_rate": 4.121527243511647e-07, "loss": 0.8534, "step": 10576 }, { "epoch": 4.7946509519492295, "grad_norm": 0.07746520522336597, "learning_rate": 4.1034218050001406e-07, "loss": 0.8537, "step": 10577 }, { "epoch": 4.795104261106074, "grad_norm": 0.08035024202485781, "learning_rate": 4.0853560166944196e-07, "loss": 0.8498, "step": 10578 }, { "epoch": 4.795557570262919, "grad_norm": 0.07912796108578765, "learning_rate": 4.067329880403881e-07, "loss": 0.8598, "step": 10579 }, { "epoch": 4.7960108794197644, "grad_norm": 0.07773976075116856, "learning_rate": 4.0493433979338805e-07, "loss": 0.8613, "step": 10580 }, { "epoch": 4.796464188576609, "grad_norm": 0.07825401756661789, "learning_rate": 4.031396571085822e-07, "loss": 0.869, "step": 10581 }, { "epoch": 4.796917497733454, "grad_norm": 0.0773965919536164, "learning_rate": 4.0134894016571114e-07, "loss": 0.845, "step": 10582 }, { "epoch": 4.797370806890299, "grad_norm": 0.07465401746772568, "learning_rate": 3.9956218914412484e-07, "loss": 0.8574, "step": 10583 }, { "epoch": 4.797824116047144, "grad_norm": 0.07970021908177319, "learning_rate": 3.977794042227645e-07, "loss": 0.8437, "step": 10584 }, { "epoch": 4.798277425203989, "grad_norm": 0.08634194852114035, "learning_rate": 3.960005855801896e-07, "loss": 0.8538, "step": 10585 }, { "epoch": 4.798730734360834, "grad_norm": 0.07350104294157818, "learning_rate": 3.942257333945465e-07, "loss": 0.8707, "step": 10586 }, { "epoch": 4.799184043517679, "grad_norm": 0.08220137971627617, "learning_rate": 3.924548478435952e-07, "loss": 0.8617, "step": 10587 }, { "epoch": 4.799637352674524, "grad_norm": 0.07572609455653412, "learning_rate": 3.906879291046961e-07, "loss": 0.8376, "step": 10588 }, { "epoch": 4.800090661831369, "grad_norm": 0.07807282391825306, "learning_rate": 3.889249773548143e-07, "loss": 0.8617, "step": 10589 }, { "epoch": 4.800543970988214, "grad_norm": 0.07850968569978664, "learning_rate": 3.871659927705063e-07, "loss": 0.8519, "step": 10590 }, { "epoch": 4.800997280145059, "grad_norm": 0.07347468170866071, "learning_rate": 3.854109755279467e-07, "loss": 0.8554, "step": 10591 }, { "epoch": 4.801450589301904, "grad_norm": 0.07050128711719728, "learning_rate": 3.8365992580290166e-07, "loss": 0.8535, "step": 10592 }, { "epoch": 4.8019038984587485, "grad_norm": 0.07136514806256974, "learning_rate": 3.8191284377074646e-07, "loss": 0.8498, "step": 10593 }, { "epoch": 4.802357207615594, "grad_norm": 0.075558338851183, "learning_rate": 3.801697296064566e-07, "loss": 0.8712, "step": 10594 }, { "epoch": 4.802810516772439, "grad_norm": 0.06804881187732856, "learning_rate": 3.784305834846036e-07, "loss": 0.8447, "step": 10595 }, { "epoch": 4.8032638259292835, "grad_norm": 0.07205776610498552, "learning_rate": 3.76695405579377e-07, "loss": 0.8462, "step": 10596 }, { "epoch": 4.803717135086129, "grad_norm": 0.07671585628715291, "learning_rate": 3.749641960645534e-07, "loss": 0.8611, "step": 10597 }, { "epoch": 4.804170444242974, "grad_norm": 0.0762441015634757, "learning_rate": 3.732369551135273e-07, "loss": 0.8694, "step": 10598 }, { "epoch": 4.804623753399818, "grad_norm": 0.07406659519511337, "learning_rate": 3.71513682899276e-07, "loss": 0.8517, "step": 10599 }, { "epoch": 4.805077062556664, "grad_norm": 0.07744322307540814, "learning_rate": 3.697943795943992e-07, "loss": 0.8516, "step": 10600 }, { "epoch": 4.805530371713509, "grad_norm": 0.08077076030860791, "learning_rate": 3.6807904537108363e-07, "loss": 0.8415, "step": 10601 }, { "epoch": 4.805983680870353, "grad_norm": 0.073349962749365, "learning_rate": 3.6636768040112957e-07, "loss": 0.8655, "step": 10602 }, { "epoch": 4.806436990027199, "grad_norm": 0.07776605266207415, "learning_rate": 3.6466028485592887e-07, "loss": 0.8399, "step": 10603 }, { "epoch": 4.806890299184044, "grad_norm": 0.08157033467350866, "learning_rate": 3.629568589064913e-07, "loss": 0.8553, "step": 10604 }, { "epoch": 4.807343608340888, "grad_norm": 0.07695257776204369, "learning_rate": 3.6125740272341393e-07, "loss": 0.8421, "step": 10605 }, { "epoch": 4.8077969174977335, "grad_norm": 0.07785761415156066, "learning_rate": 3.595619164769026e-07, "loss": 0.8361, "step": 10606 }, { "epoch": 4.808250226654579, "grad_norm": 0.0756377392213603, "learning_rate": 3.578704003367683e-07, "loss": 0.8607, "step": 10607 }, { "epoch": 4.808703535811423, "grad_norm": 0.07249537657988868, "learning_rate": 3.5618285447241773e-07, "loss": 0.8525, "step": 10608 }, { "epoch": 4.8091568449682685, "grad_norm": 0.07111079283916046, "learning_rate": 3.544992790528712e-07, "loss": 0.8586, "step": 10609 }, { "epoch": 4.809610154125114, "grad_norm": 0.07071904986922418, "learning_rate": 3.528196742467316e-07, "loss": 0.848, "step": 10610 }, { "epoch": 4.810063463281958, "grad_norm": 0.07378072269014667, "learning_rate": 3.511440402222244e-07, "loss": 0.8365, "step": 10611 }, { "epoch": 4.810516772438803, "grad_norm": 0.0729439734318188, "learning_rate": 3.4947237714716643e-07, "loss": 0.8428, "step": 10612 }, { "epoch": 4.810970081595649, "grad_norm": 0.1009101703150573, "learning_rate": 3.478046851889794e-07, "loss": 0.838, "step": 10613 }, { "epoch": 4.811423390752493, "grad_norm": 0.07726375621212661, "learning_rate": 3.4614096451468957e-07, "loss": 0.8565, "step": 10614 }, { "epoch": 4.811876699909338, "grad_norm": 0.06913378955560652, "learning_rate": 3.4448121529092383e-07, "loss": 0.8514, "step": 10615 }, { "epoch": 4.812330009066184, "grad_norm": 0.08076261677125816, "learning_rate": 3.428254376839091e-07, "loss": 0.8647, "step": 10616 }, { "epoch": 4.812783318223028, "grad_norm": 0.07585617110104936, "learning_rate": 3.411736318594816e-07, "loss": 0.8622, "step": 10617 }, { "epoch": 4.813236627379873, "grad_norm": 0.07112830338869926, "learning_rate": 3.395257979830646e-07, "loss": 0.823, "step": 10618 }, { "epoch": 4.8136899365367185, "grad_norm": 0.07023564688005764, "learning_rate": 3.3788193621970387e-07, "loss": 0.8398, "step": 10619 }, { "epoch": 4.814143245693563, "grad_norm": 0.07321940298621722, "learning_rate": 3.3624204673402325e-07, "loss": 0.8707, "step": 10620 }, { "epoch": 4.814596554850408, "grad_norm": 0.0791274006014912, "learning_rate": 3.3460612969027806e-07, "loss": 0.8592, "step": 10621 }, { "epoch": 4.815049864007253, "grad_norm": 0.08176791907383746, "learning_rate": 3.329741852523016e-07, "loss": 0.8695, "step": 10622 }, { "epoch": 4.815503173164098, "grad_norm": 0.07283313477737398, "learning_rate": 3.31346213583541e-07, "loss": 0.8557, "step": 10623 }, { "epoch": 4.815956482320943, "grad_norm": 0.0757155323168614, "learning_rate": 3.297222148470436e-07, "loss": 0.8436, "step": 10624 }, { "epoch": 4.8164097914777875, "grad_norm": 0.10626098747611726, "learning_rate": 3.2810218920544814e-07, "loss": 0.8507, "step": 10625 }, { "epoch": 4.816863100634633, "grad_norm": 0.0804927543551598, "learning_rate": 3.264861368210159e-07, "loss": 0.8501, "step": 10626 }, { "epoch": 4.817316409791478, "grad_norm": 0.07709050699196426, "learning_rate": 3.2487405785559536e-07, "loss": 0.8419, "step": 10627 }, { "epoch": 4.817769718948322, "grad_norm": 0.07305036106450842, "learning_rate": 3.232659524706394e-07, "loss": 0.8536, "step": 10628 }, { "epoch": 4.818223028105168, "grad_norm": 0.06865484307403136, "learning_rate": 3.21661820827206e-07, "loss": 0.8551, "step": 10629 }, { "epoch": 4.818676337262013, "grad_norm": 0.07399136717368213, "learning_rate": 3.2006166308595766e-07, "loss": 0.842, "step": 10630 }, { "epoch": 4.819129646418857, "grad_norm": 0.0768567177197009, "learning_rate": 3.1846547940714844e-07, "loss": 0.8456, "step": 10631 }, { "epoch": 4.819582955575703, "grad_norm": 0.07304538025416549, "learning_rate": 3.168732699506416e-07, "loss": 0.8481, "step": 10632 }, { "epoch": 4.820036264732548, "grad_norm": 0.07968180501872246, "learning_rate": 3.152850348759051e-07, "loss": 0.8566, "step": 10633 }, { "epoch": 4.820489573889392, "grad_norm": 0.07125220781966457, "learning_rate": 3.1370077434200285e-07, "loss": 0.8536, "step": 10634 }, { "epoch": 4.8209428830462375, "grad_norm": 0.0730712364534785, "learning_rate": 3.121204885076079e-07, "loss": 0.8453, "step": 10635 }, { "epoch": 4.821396192203083, "grad_norm": 0.07211460708568221, "learning_rate": 3.105441775309803e-07, "loss": 0.8553, "step": 10636 }, { "epoch": 4.821849501359927, "grad_norm": 0.07183730005728334, "learning_rate": 3.089718415700027e-07, "loss": 0.8803, "step": 10637 }, { "epoch": 4.8223028105167725, "grad_norm": 0.0695076983676156, "learning_rate": 3.0740348078214465e-07, "loss": 0.8588, "step": 10638 }, { "epoch": 4.822756119673618, "grad_norm": 0.07128581199071626, "learning_rate": 3.058390953244805e-07, "loss": 0.8488, "step": 10639 }, { "epoch": 4.823209428830462, "grad_norm": 0.08217905512709742, "learning_rate": 3.042786853536894e-07, "loss": 0.8604, "step": 10640 }, { "epoch": 4.823662737987307, "grad_norm": 0.07392238877954502, "learning_rate": 3.027222510260552e-07, "loss": 0.8509, "step": 10641 }, { "epoch": 4.824116047144153, "grad_norm": 0.07452317186071598, "learning_rate": 3.0116979249745327e-07, "loss": 0.8549, "step": 10642 }, { "epoch": 4.824569356300997, "grad_norm": 0.07269909478080938, "learning_rate": 2.99621309923368e-07, "loss": 0.8557, "step": 10643 }, { "epoch": 4.825022665457842, "grad_norm": 0.07305827368741998, "learning_rate": 2.980768034588888e-07, "loss": 0.8702, "step": 10644 }, { "epoch": 4.825475974614688, "grad_norm": 0.07630754711921747, "learning_rate": 2.9653627325869626e-07, "loss": 0.8589, "step": 10645 }, { "epoch": 4.825929283771532, "grad_norm": 0.0708990351630464, "learning_rate": 2.949997194770848e-07, "loss": 0.8513, "step": 10646 }, { "epoch": 4.826382592928377, "grad_norm": 0.07240778450517246, "learning_rate": 2.9346714226794024e-07, "loss": 0.8576, "step": 10647 }, { "epoch": 4.8268359020852225, "grad_norm": 0.06946274272726563, "learning_rate": 2.9193854178475756e-07, "loss": 0.8417, "step": 10648 }, { "epoch": 4.827289211242067, "grad_norm": 0.083641469641881, "learning_rate": 2.9041391818063204e-07, "loss": 0.8508, "step": 10649 }, { "epoch": 4.827742520398912, "grad_norm": 0.07726465132188642, "learning_rate": 2.888932716082549e-07, "loss": 0.8342, "step": 10650 }, { "epoch": 4.8281958295557565, "grad_norm": 0.07320169240931247, "learning_rate": 2.8737660221992647e-07, "loss": 0.847, "step": 10651 }, { "epoch": 4.828649138712602, "grad_norm": 0.0673666162077393, "learning_rate": 2.858639101675431e-07, "loss": 0.8413, "step": 10652 }, { "epoch": 4.829102447869447, "grad_norm": 0.07084912198162854, "learning_rate": 2.8435519560260584e-07, "loss": 0.8714, "step": 10653 }, { "epoch": 4.8295557570262915, "grad_norm": 0.07858547271437243, "learning_rate": 2.8285045867622043e-07, "loss": 0.851, "step": 10654 }, { "epoch": 4.830009066183137, "grad_norm": 0.079466737667044, "learning_rate": 2.813496995390841e-07, "loss": 0.8558, "step": 10655 }, { "epoch": 4.830462375339982, "grad_norm": 0.07529649332141913, "learning_rate": 2.798529183415122e-07, "loss": 0.856, "step": 10656 }, { "epoch": 4.830915684496826, "grad_norm": 0.07400382069625212, "learning_rate": 2.783601152334026e-07, "loss": 0.8545, "step": 10657 }, { "epoch": 4.831368993653672, "grad_norm": 0.06924018259479138, "learning_rate": 2.7687129036427116e-07, "loss": 0.8572, "step": 10658 }, { "epoch": 4.831822302810517, "grad_norm": 0.0762140605537139, "learning_rate": 2.7538644388321655e-07, "loss": 0.8397, "step": 10659 }, { "epoch": 4.832275611967361, "grad_norm": 0.08266526690279613, "learning_rate": 2.7390557593896417e-07, "loss": 0.8545, "step": 10660 }, { "epoch": 4.832728921124207, "grad_norm": 0.07059973303728835, "learning_rate": 2.724286866798176e-07, "loss": 0.8546, "step": 10661 }, { "epoch": 4.833182230281052, "grad_norm": 0.07247434651489526, "learning_rate": 2.709557762536985e-07, "loss": 0.862, "step": 10662 }, { "epoch": 4.833635539437896, "grad_norm": 0.07590481229895543, "learning_rate": 2.694868448081156e-07, "loss": 0.8534, "step": 10663 }, { "epoch": 4.8340888485947415, "grad_norm": 0.07158351262276888, "learning_rate": 2.680218924901956e-07, "loss": 0.861, "step": 10664 }, { "epoch": 4.834542157751587, "grad_norm": 0.07349852651626096, "learning_rate": 2.665609194466523e-07, "loss": 0.871, "step": 10665 }, { "epoch": 4.834995466908431, "grad_norm": 0.08023785676990007, "learning_rate": 2.651039258238042e-07, "loss": 0.8482, "step": 10666 }, { "epoch": 4.8354487760652765, "grad_norm": 0.07113523782983747, "learning_rate": 2.6365091176757896e-07, "loss": 0.8465, "step": 10667 }, { "epoch": 4.835902085222122, "grad_norm": 0.07660718745448673, "learning_rate": 2.622018774234958e-07, "loss": 0.8645, "step": 10668 }, { "epoch": 4.836355394378966, "grad_norm": 0.0778871041358519, "learning_rate": 2.6075682293668305e-07, "loss": 0.8615, "step": 10669 }, { "epoch": 4.836808703535811, "grad_norm": 0.07164928095019073, "learning_rate": 2.593157484518649e-07, "loss": 0.8412, "step": 10670 }, { "epoch": 4.837262012692657, "grad_norm": 0.07217912096789375, "learning_rate": 2.5787865411337485e-07, "loss": 0.8538, "step": 10671 }, { "epoch": 4.837715321849501, "grad_norm": 0.07993374806933226, "learning_rate": 2.5644554006513336e-07, "loss": 0.8562, "step": 10672 }, { "epoch": 4.838168631006346, "grad_norm": 0.07661848110338534, "learning_rate": 2.5501640645067436e-07, "loss": 0.8559, "step": 10673 }, { "epoch": 4.838621940163192, "grad_norm": 0.07083216365925703, "learning_rate": 2.535912534131324e-07, "loss": 0.8458, "step": 10674 }, { "epoch": 4.839075249320036, "grad_norm": 0.06996104339695314, "learning_rate": 2.521700810952421e-07, "loss": 0.858, "step": 10675 }, { "epoch": 4.839528558476881, "grad_norm": 0.0706021759976011, "learning_rate": 2.5075288963932966e-07, "loss": 0.8567, "step": 10676 }, { "epoch": 4.8399818676337265, "grad_norm": 0.0737314903076706, "learning_rate": 2.4933967918733926e-07, "loss": 0.8559, "step": 10677 }, { "epoch": 4.840435176790571, "grad_norm": 0.07627172502809222, "learning_rate": 2.479304498808066e-07, "loss": 0.8552, "step": 10678 }, { "epoch": 4.840888485947416, "grad_norm": 0.07753788600652443, "learning_rate": 2.465252018608633e-07, "loss": 0.8331, "step": 10679 }, { "epoch": 4.841341795104261, "grad_norm": 0.07201655863189586, "learning_rate": 2.451239352682588e-07, "loss": 0.8591, "step": 10680 }, { "epoch": 4.841795104261106, "grad_norm": 0.07235717034737604, "learning_rate": 2.4372665024332996e-07, "loss": 0.8454, "step": 10681 }, { "epoch": 4.842248413417951, "grad_norm": 0.07216359039211634, "learning_rate": 2.423333469260136e-07, "loss": 0.8465, "step": 10682 }, { "epoch": 4.842701722574796, "grad_norm": 0.07514034228594421, "learning_rate": 2.409440254558604e-07, "loss": 0.8445, "step": 10683 }, { "epoch": 4.843155031731641, "grad_norm": 0.07133814850068525, "learning_rate": 2.395586859720167e-07, "loss": 0.8477, "step": 10684 }, { "epoch": 4.843608340888486, "grad_norm": 0.07390365796908496, "learning_rate": 2.3817732861322051e-07, "loss": 0.8627, "step": 10685 }, { "epoch": 4.844061650045331, "grad_norm": 0.07175729429883618, "learning_rate": 2.3679995351782337e-07, "loss": 0.8511, "step": 10686 }, { "epoch": 4.844514959202176, "grad_norm": 0.07023921985412834, "learning_rate": 2.3542656082377268e-07, "loss": 0.848, "step": 10687 }, { "epoch": 4.844968268359021, "grad_norm": 0.07157324801912039, "learning_rate": 2.3405715066861623e-07, "loss": 0.8415, "step": 10688 }, { "epoch": 4.845421577515866, "grad_norm": 0.07506583809344339, "learning_rate": 2.326917231895065e-07, "loss": 0.8669, "step": 10689 }, { "epoch": 4.845874886672711, "grad_norm": 0.07366659749313301, "learning_rate": 2.3133027852319634e-07, "loss": 0.8749, "step": 10690 }, { "epoch": 4.846328195829556, "grad_norm": 0.07085322624549961, "learning_rate": 2.299728168060389e-07, "loss": 0.8544, "step": 10691 }, { "epoch": 4.846781504986401, "grad_norm": 0.07163161911448132, "learning_rate": 2.286193381739832e-07, "loss": 0.8401, "step": 10692 }, { "epoch": 4.8472348141432455, "grad_norm": 0.0707708762797177, "learning_rate": 2.2726984276258745e-07, "loss": 0.851, "step": 10693 }, { "epoch": 4.847688123300091, "grad_norm": 0.07434283358870114, "learning_rate": 2.259243307070058e-07, "loss": 0.8539, "step": 10694 }, { "epoch": 4.848141432456936, "grad_norm": 0.07076093972822571, "learning_rate": 2.2458280214199712e-07, "loss": 0.8507, "step": 10695 }, { "epoch": 4.8485947416137805, "grad_norm": 0.07336958451814526, "learning_rate": 2.2324525720191615e-07, "loss": 0.8552, "step": 10696 }, { "epoch": 4.849048050770626, "grad_norm": 0.07410594612203467, "learning_rate": 2.219116960207268e-07, "loss": 0.8618, "step": 10697 }, { "epoch": 4.849501359927471, "grad_norm": 0.06890300461308638, "learning_rate": 2.2058211873198897e-07, "loss": 0.8513, "step": 10698 }, { "epoch": 4.849954669084315, "grad_norm": 0.07915171053486733, "learning_rate": 2.1925652546885835e-07, "loss": 0.856, "step": 10699 }, { "epoch": 4.850407978241161, "grad_norm": 0.08244794191182789, "learning_rate": 2.179349163640998e-07, "loss": 0.8551, "step": 10700 }, { "epoch": 4.850861287398006, "grad_norm": 0.07262602560953242, "learning_rate": 2.166172915500786e-07, "loss": 0.8351, "step": 10701 }, { "epoch": 4.85131459655485, "grad_norm": 0.07375762711241912, "learning_rate": 2.153036511587514e-07, "loss": 0.8672, "step": 10702 }, { "epoch": 4.851767905711696, "grad_norm": 0.072299238150799, "learning_rate": 2.139939953216974e-07, "loss": 0.8554, "step": 10703 }, { "epoch": 4.85222121486854, "grad_norm": 0.07485344088517866, "learning_rate": 2.1268832417006501e-07, "loss": 0.8433, "step": 10704 }, { "epoch": 4.852674524025385, "grad_norm": 0.07692378318815267, "learning_rate": 2.113866378346341e-07, "loss": 0.8536, "step": 10705 }, { "epoch": 4.8531278331822305, "grad_norm": 0.07297818510502732, "learning_rate": 2.1008893644577145e-07, "loss": 0.8629, "step": 10706 }, { "epoch": 4.853581142339075, "grad_norm": 0.08302379024089969, "learning_rate": 2.0879522013343534e-07, "loss": 0.8541, "step": 10707 }, { "epoch": 4.85403445149592, "grad_norm": 0.07803191467137612, "learning_rate": 2.0750548902720657e-07, "loss": 0.8608, "step": 10708 }, { "epoch": 4.854487760652765, "grad_norm": 0.07303432155364714, "learning_rate": 2.062197432562485e-07, "loss": 0.8507, "step": 10709 }, { "epoch": 4.85494106980961, "grad_norm": 0.0720657172391939, "learning_rate": 2.0493798294933808e-07, "loss": 0.8503, "step": 10710 }, { "epoch": 4.855394378966455, "grad_norm": 0.07437182711838772, "learning_rate": 2.0366020823484377e-07, "loss": 0.8603, "step": 10711 }, { "epoch": 4.8558476881233, "grad_norm": 0.07279360293771354, "learning_rate": 2.0238641924073432e-07, "loss": 0.8692, "step": 10712 }, { "epoch": 4.856300997280145, "grad_norm": 0.07106302301534272, "learning_rate": 2.011166160945921e-07, "loss": 0.8518, "step": 10713 }, { "epoch": 4.85675430643699, "grad_norm": 0.0808991473703073, "learning_rate": 1.9985079892359095e-07, "loss": 0.8482, "step": 10714 }, { "epoch": 4.857207615593835, "grad_norm": 0.06933994864260502, "learning_rate": 1.985889678544961e-07, "loss": 0.847, "step": 10715 }, { "epoch": 4.85766092475068, "grad_norm": 0.07412468859651401, "learning_rate": 1.9733112301369094e-07, "loss": 0.8534, "step": 10716 }, { "epoch": 4.858114233907525, "grad_norm": 0.07632311332178879, "learning_rate": 1.960772645271547e-07, "loss": 0.8517, "step": 10717 }, { "epoch": 4.85856754306437, "grad_norm": 0.07024256796405541, "learning_rate": 1.9482739252046246e-07, "loss": 0.8501, "step": 10718 }, { "epoch": 4.859020852221215, "grad_norm": 0.07315079399653511, "learning_rate": 1.9358150711878965e-07, "loss": 0.8553, "step": 10719 }, { "epoch": 4.85947416137806, "grad_norm": 0.075475693189387, "learning_rate": 1.9233960844691647e-07, "loss": 0.8465, "step": 10720 }, { "epoch": 4.859927470534905, "grad_norm": 0.06929270134030718, "learning_rate": 1.9110169662922784e-07, "loss": 0.8471, "step": 10721 }, { "epoch": 4.8603807796917495, "grad_norm": 0.07196233164573536, "learning_rate": 1.8986777178969573e-07, "loss": 0.87, "step": 10722 }, { "epoch": 4.860834088848595, "grad_norm": 0.0761974107053024, "learning_rate": 1.8863783405191015e-07, "loss": 0.8514, "step": 10723 }, { "epoch": 4.86128739800544, "grad_norm": 0.07715236852190413, "learning_rate": 1.8741188353904815e-07, "loss": 0.88, "step": 10724 }, { "epoch": 4.8617407071622845, "grad_norm": 0.07215071630267395, "learning_rate": 1.861899203738915e-07, "loss": 0.8453, "step": 10725 }, { "epoch": 4.86219401631913, "grad_norm": 0.07350771422066332, "learning_rate": 1.8497194467882674e-07, "loss": 0.8551, "step": 10726 }, { "epoch": 4.862647325475975, "grad_norm": 0.07176095350706178, "learning_rate": 1.837579565758363e-07, "loss": 0.844, "step": 10727 }, { "epoch": 4.863100634632819, "grad_norm": 0.08198506168102558, "learning_rate": 1.825479561865029e-07, "loss": 0.8648, "step": 10728 }, { "epoch": 4.863553943789665, "grad_norm": 0.07428456173927125, "learning_rate": 1.8134194363201407e-07, "loss": 0.8656, "step": 10729 }, { "epoch": 4.864007252946509, "grad_norm": 0.08163228547138762, "learning_rate": 1.801399190331532e-07, "loss": 0.8552, "step": 10730 }, { "epoch": 4.864460562103354, "grad_norm": 0.07202102924447643, "learning_rate": 1.7894188251030843e-07, "loss": 0.861, "step": 10731 }, { "epoch": 4.8649138712602, "grad_norm": 0.0733916190853858, "learning_rate": 1.777478341834682e-07, "loss": 0.8679, "step": 10732 }, { "epoch": 4.865367180417044, "grad_norm": 0.07489245077291605, "learning_rate": 1.7655777417221243e-07, "loss": 0.8648, "step": 10733 }, { "epoch": 4.865820489573889, "grad_norm": 0.0717453619722988, "learning_rate": 1.753717025957391e-07, "loss": 0.8597, "step": 10734 }, { "epoch": 4.8662737987307345, "grad_norm": 0.06870521111242898, "learning_rate": 1.741896195728332e-07, "loss": 0.8626, "step": 10735 }, { "epoch": 4.866727107887579, "grad_norm": 0.07787869504243577, "learning_rate": 1.7301152522187558e-07, "loss": 0.8601, "step": 10736 }, { "epoch": 4.867180417044424, "grad_norm": 0.0727364990884962, "learning_rate": 1.7183741966086965e-07, "loss": 0.87, "step": 10737 }, { "epoch": 4.867633726201269, "grad_norm": 0.0742387389334784, "learning_rate": 1.7066730300739686e-07, "loss": 0.8417, "step": 10738 }, { "epoch": 4.868087035358114, "grad_norm": 0.06803881897548318, "learning_rate": 1.695011753786524e-07, "loss": 0.875, "step": 10739 }, { "epoch": 4.868540344514959, "grad_norm": 0.07605041604558359, "learning_rate": 1.6833903689142283e-07, "loss": 0.8624, "step": 10740 }, { "epoch": 4.868993653671804, "grad_norm": 0.07132342302872954, "learning_rate": 1.671808876620995e-07, "loss": 0.8545, "step": 10741 }, { "epoch": 4.869446962828649, "grad_norm": 0.07176085954483293, "learning_rate": 1.660267278066785e-07, "loss": 0.8495, "step": 10742 }, { "epoch": 4.869900271985494, "grad_norm": 0.07128929299253885, "learning_rate": 1.6487655744075183e-07, "loss": 0.8617, "step": 10743 }, { "epoch": 4.870353581142339, "grad_norm": 0.07490505558390963, "learning_rate": 1.6373037667951176e-07, "loss": 0.86, "step": 10744 }, { "epoch": 4.870806890299184, "grad_norm": 0.07822107359495459, "learning_rate": 1.6258818563774647e-07, "loss": 0.855, "step": 10745 }, { "epoch": 4.871260199456029, "grad_norm": 0.07767774921353122, "learning_rate": 1.6144998442986226e-07, "loss": 0.8494, "step": 10746 }, { "epoch": 4.871713508612874, "grad_norm": 0.07216211055719658, "learning_rate": 1.6031577316983904e-07, "loss": 0.8445, "step": 10747 }, { "epoch": 4.872166817769719, "grad_norm": 0.07633521591662966, "learning_rate": 1.5918555197127928e-07, "loss": 0.8395, "step": 10748 }, { "epoch": 4.872620126926564, "grad_norm": 0.07622888103312149, "learning_rate": 1.5805932094737686e-07, "loss": 0.8566, "step": 10749 }, { "epoch": 4.873073436083409, "grad_norm": 0.07352089070407476, "learning_rate": 1.5693708021092602e-07, "loss": 0.862, "step": 10750 }, { "epoch": 4.8735267452402535, "grad_norm": 0.06905761966654005, "learning_rate": 1.5581882987432574e-07, "loss": 0.8541, "step": 10751 }, { "epoch": 4.873980054397099, "grad_norm": 0.07734810408521509, "learning_rate": 1.547045700495664e-07, "loss": 0.8477, "step": 10752 }, { "epoch": 4.874433363553944, "grad_norm": 0.06942633572042792, "learning_rate": 1.5359430084825211e-07, "loss": 0.8669, "step": 10753 }, { "epoch": 4.8748866727107885, "grad_norm": 0.07054863529725326, "learning_rate": 1.5248802238156945e-07, "loss": 0.8563, "step": 10754 }, { "epoch": 4.875339981867634, "grad_norm": 0.07115085150740884, "learning_rate": 1.5138573476032315e-07, "loss": 0.8462, "step": 10755 }, { "epoch": 4.875793291024479, "grad_norm": 0.07107688359643907, "learning_rate": 1.502874380949093e-07, "loss": 0.8491, "step": 10756 }, { "epoch": 4.876246600181323, "grad_norm": 0.06653094239132214, "learning_rate": 1.4919313249532441e-07, "loss": 0.8452, "step": 10757 }, { "epoch": 4.876699909338169, "grad_norm": 0.072300299547095, "learning_rate": 1.4810281807116523e-07, "loss": 0.8403, "step": 10758 }, { "epoch": 4.877153218495014, "grad_norm": 0.0747090630444509, "learning_rate": 1.470164949316333e-07, "loss": 0.8645, "step": 10759 }, { "epoch": 4.877606527651858, "grad_norm": 0.07245168357398835, "learning_rate": 1.4593416318552155e-07, "loss": 0.8437, "step": 10760 }, { "epoch": 4.878059836808704, "grad_norm": 0.10715225626533686, "learning_rate": 1.4485582294123667e-07, "loss": 0.8589, "step": 10761 }, { "epoch": 4.878513145965549, "grad_norm": 0.06973129274357574, "learning_rate": 1.4378147430677226e-07, "loss": 0.8567, "step": 10762 }, { "epoch": 4.878966455122393, "grad_norm": 0.07190850211479011, "learning_rate": 1.4271111738972665e-07, "loss": 0.8656, "step": 10763 }, { "epoch": 4.8794197642792385, "grad_norm": 0.07010577181656762, "learning_rate": 1.41644752297303e-07, "loss": 0.8507, "step": 10764 }, { "epoch": 4.879873073436084, "grad_norm": 0.07421681782693203, "learning_rate": 1.4058237913629592e-07, "loss": 0.8392, "step": 10765 }, { "epoch": 4.880326382592928, "grad_norm": 0.06786534354850841, "learning_rate": 1.3952399801311357e-07, "loss": 0.8579, "step": 10766 }, { "epoch": 4.880779691749773, "grad_norm": 0.07278937221747114, "learning_rate": 1.3846960903374673e-07, "loss": 0.8421, "step": 10767 }, { "epoch": 4.881233000906619, "grad_norm": 0.07446683234077713, "learning_rate": 1.3741921230379985e-07, "loss": 0.8741, "step": 10768 }, { "epoch": 4.881686310063463, "grad_norm": 0.07258863961547853, "learning_rate": 1.363728079284732e-07, "loss": 0.8766, "step": 10769 }, { "epoch": 4.882139619220308, "grad_norm": 0.07314324701401362, "learning_rate": 1.3533039601256736e-07, "loss": 0.8544, "step": 10770 }, { "epoch": 4.882592928377154, "grad_norm": 0.07598097664296614, "learning_rate": 1.3429197666047888e-07, "loss": 0.87, "step": 10771 }, { "epoch": 4.883046237533998, "grad_norm": 0.06940645875609563, "learning_rate": 1.3325754997621788e-07, "loss": 0.8628, "step": 10772 }, { "epoch": 4.883499546690843, "grad_norm": 0.07463652942037988, "learning_rate": 1.3222711606337258e-07, "loss": 0.8415, "step": 10773 }, { "epoch": 4.883952855847689, "grad_norm": 0.06904922201712146, "learning_rate": 1.312006750251582e-07, "loss": 0.8559, "step": 10774 }, { "epoch": 4.884406165004533, "grad_norm": 0.07045310627735407, "learning_rate": 1.3017822696436368e-07, "loss": 0.8359, "step": 10775 }, { "epoch": 4.884859474161378, "grad_norm": 0.06916058277536816, "learning_rate": 1.291597719833959e-07, "loss": 0.8578, "step": 10776 }, { "epoch": 4.8853127833182235, "grad_norm": 0.07619750101344733, "learning_rate": 1.281453101842578e-07, "loss": 0.8613, "step": 10777 }, { "epoch": 4.885766092475068, "grad_norm": 0.07842434085057706, "learning_rate": 1.271348416685436e-07, "loss": 0.8336, "step": 10778 }, { "epoch": 4.886219401631913, "grad_norm": 0.07137842791426044, "learning_rate": 1.261283665374613e-07, "loss": 0.8448, "step": 10779 }, { "epoch": 4.886672710788758, "grad_norm": 0.08205872239657327, "learning_rate": 1.2512588489181021e-07, "loss": 0.8564, "step": 10780 }, { "epoch": 4.887126019945603, "grad_norm": 0.07281835039164415, "learning_rate": 1.2412739683199448e-07, "loss": 0.856, "step": 10781 }, { "epoch": 4.887579329102448, "grad_norm": 0.07146858162216431, "learning_rate": 1.2313290245800968e-07, "loss": 0.8442, "step": 10782 }, { "epoch": 4.8880326382592925, "grad_norm": 0.06744461454940753, "learning_rate": 1.221424018694606e-07, "loss": 0.8489, "step": 10783 }, { "epoch": 4.888485947416138, "grad_norm": 0.07375554471411389, "learning_rate": 1.2115589516554782e-07, "loss": 0.8448, "step": 10784 }, { "epoch": 4.888939256572983, "grad_norm": 0.0717683796219891, "learning_rate": 1.201733824450768e-07, "loss": 0.8642, "step": 10785 }, { "epoch": 4.889392565729827, "grad_norm": 0.07434705372229741, "learning_rate": 1.1919486380644441e-07, "loss": 0.847, "step": 10786 }, { "epoch": 4.889845874886673, "grad_norm": 0.07248890347765288, "learning_rate": 1.1822033934765665e-07, "loss": 0.8635, "step": 10787 }, { "epoch": 4.890299184043518, "grad_norm": 0.07105306932936893, "learning_rate": 1.1724980916631102e-07, "loss": 0.8546, "step": 10788 }, { "epoch": 4.890752493200362, "grad_norm": 0.07234318293532246, "learning_rate": 1.1628327335960976e-07, "loss": 0.8378, "step": 10789 }, { "epoch": 4.891205802357208, "grad_norm": 0.07390802290436875, "learning_rate": 1.1532073202435545e-07, "loss": 0.8547, "step": 10790 }, { "epoch": 4.891659111514053, "grad_norm": 0.07297352554981362, "learning_rate": 1.1436218525694653e-07, "loss": 0.8563, "step": 10791 }, { "epoch": 4.892112420670897, "grad_norm": 0.07459613365070893, "learning_rate": 1.1340763315338622e-07, "loss": 0.8868, "step": 10792 }, { "epoch": 4.8925657298277425, "grad_norm": 0.06908258425321039, "learning_rate": 1.1245707580927801e-07, "loss": 0.86, "step": 10793 }, { "epoch": 4.893019038984588, "grad_norm": 0.07345747687536357, "learning_rate": 1.1151051331982133e-07, "loss": 0.846, "step": 10794 }, { "epoch": 4.893472348141432, "grad_norm": 0.0737724804412717, "learning_rate": 1.105679457798159e-07, "loss": 0.8519, "step": 10795 }, { "epoch": 4.893925657298277, "grad_norm": 0.07004231212954568, "learning_rate": 1.0962937328366618e-07, "loss": 0.8546, "step": 10796 }, { "epoch": 4.894378966455123, "grad_norm": 0.07898874238530214, "learning_rate": 1.0869479592536813e-07, "loss": 0.8385, "step": 10797 }, { "epoch": 4.894832275611967, "grad_norm": 0.0719455306084566, "learning_rate": 1.0776421379852242e-07, "loss": 0.8528, "step": 10798 }, { "epoch": 4.895285584768812, "grad_norm": 0.06977613267398343, "learning_rate": 1.0683762699633448e-07, "loss": 0.8476, "step": 10799 }, { "epoch": 4.895738893925658, "grad_norm": 0.07489617841395209, "learning_rate": 1.0591503561160121e-07, "loss": 0.8721, "step": 10800 }, { "epoch": 4.896192203082502, "grad_norm": 0.06887357878827279, "learning_rate": 1.0499643973672424e-07, "loss": 0.8433, "step": 10801 }, { "epoch": 4.896645512239347, "grad_norm": 0.07401814588849298, "learning_rate": 1.040818394637011e-07, "loss": 0.8418, "step": 10802 }, { "epoch": 4.897098821396193, "grad_norm": 0.07277283167882766, "learning_rate": 1.0317123488413406e-07, "loss": 0.8447, "step": 10803 }, { "epoch": 4.897552130553037, "grad_norm": 0.07327557804979581, "learning_rate": 1.0226462608922571e-07, "loss": 0.858, "step": 10804 }, { "epoch": 4.898005439709882, "grad_norm": 0.07242870192026421, "learning_rate": 1.0136201316977012e-07, "loss": 0.8268, "step": 10805 }, { "epoch": 4.898458748866727, "grad_norm": 0.0784881962964586, "learning_rate": 1.0046339621617052e-07, "loss": 0.8613, "step": 10806 }, { "epoch": 4.898912058023572, "grad_norm": 0.07705861982587607, "learning_rate": 9.956877531842158e-08, "loss": 0.8498, "step": 10807 }, { "epoch": 4.899365367180417, "grad_norm": 0.07384306251632183, "learning_rate": 9.867815056612717e-08, "loss": 0.8398, "step": 10808 }, { "epoch": 4.8998186763372615, "grad_norm": 0.07046839918475759, "learning_rate": 9.779152204848264e-08, "loss": 0.8458, "step": 10809 }, { "epoch": 4.900271985494107, "grad_norm": 0.07804020987396698, "learning_rate": 9.690888985428804e-08, "loss": 0.8574, "step": 10810 }, { "epoch": 4.900725294650952, "grad_norm": 0.07365520682994448, "learning_rate": 9.603025407193933e-08, "loss": 0.8635, "step": 10811 }, { "epoch": 4.9011786038077965, "grad_norm": 0.07173201530823223, "learning_rate": 9.515561478943725e-08, "loss": 0.8354, "step": 10812 }, { "epoch": 4.901631912964642, "grad_norm": 0.07037525154721833, "learning_rate": 9.428497209438281e-08, "loss": 0.8586, "step": 10813 }, { "epoch": 4.902085222121487, "grad_norm": 0.07144999016652055, "learning_rate": 9.341832607396406e-08, "loss": 0.8573, "step": 10814 }, { "epoch": 4.902538531278331, "grad_norm": 0.07210140936399108, "learning_rate": 9.255567681498268e-08, "loss": 0.8747, "step": 10815 }, { "epoch": 4.902991840435177, "grad_norm": 0.08906644127521304, "learning_rate": 9.16970244038362e-08, "loss": 0.8466, "step": 10816 }, { "epoch": 4.903445149592022, "grad_norm": 0.07313125833432993, "learning_rate": 9.084236892652697e-08, "loss": 0.8472, "step": 10817 }, { "epoch": 4.903898458748866, "grad_norm": 0.0766043924200107, "learning_rate": 8.999171046863986e-08, "loss": 0.8686, "step": 10818 }, { "epoch": 4.904351767905712, "grad_norm": 0.07017618147199202, "learning_rate": 8.91450491153778e-08, "loss": 0.8419, "step": 10819 }, { "epoch": 4.904805077062557, "grad_norm": 0.07070123938127158, "learning_rate": 8.830238495153521e-08, "loss": 0.844, "step": 10820 }, { "epoch": 4.905258386219401, "grad_norm": 0.07627637633554185, "learning_rate": 8.746371806150677e-08, "loss": 0.8448, "step": 10821 }, { "epoch": 4.9057116953762465, "grad_norm": 0.07488811152597905, "learning_rate": 8.662904852928755e-08, "loss": 0.8701, "step": 10822 }, { "epoch": 4.906165004533092, "grad_norm": 0.07998272970800134, "learning_rate": 8.579837643847289e-08, "loss": 0.8693, "step": 10823 }, { "epoch": 4.906618313689936, "grad_norm": 0.07744334892342782, "learning_rate": 8.4971701872254e-08, "loss": 0.8542, "step": 10824 }, { "epoch": 4.9070716228467814, "grad_norm": 0.0758454182209573, "learning_rate": 8.414902491343136e-08, "loss": 0.8601, "step": 10825 }, { "epoch": 4.907524932003627, "grad_norm": 0.07838678787294966, "learning_rate": 8.333034564439236e-08, "loss": 0.8658, "step": 10826 }, { "epoch": 4.907978241160471, "grad_norm": 0.10820303237801611, "learning_rate": 8.25156641471292e-08, "loss": 0.8618, "step": 10827 }, { "epoch": 4.908431550317316, "grad_norm": 0.07204971391322237, "learning_rate": 8.170498050324327e-08, "loss": 0.8552, "step": 10828 }, { "epoch": 4.908884859474162, "grad_norm": 0.06845406577688544, "learning_rate": 8.089829479391852e-08, "loss": 0.8432, "step": 10829 }, { "epoch": 4.909338168631006, "grad_norm": 0.07258328587785126, "learning_rate": 8.009560709994812e-08, "loss": 0.8511, "step": 10830 }, { "epoch": 4.909791477787851, "grad_norm": 0.07111702746532299, "learning_rate": 7.929691750172553e-08, "loss": 0.8506, "step": 10831 }, { "epoch": 4.910244786944697, "grad_norm": 0.07834831839978451, "learning_rate": 7.850222607924452e-08, "loss": 0.8576, "step": 10832 }, { "epoch": 4.910698096101541, "grad_norm": 0.07095864957168942, "learning_rate": 7.771153291209033e-08, "loss": 0.8607, "step": 10833 }, { "epoch": 4.911151405258386, "grad_norm": 0.07348475439115128, "learning_rate": 7.692483807945739e-08, "loss": 0.8425, "step": 10834 }, { "epoch": 4.9116047144152315, "grad_norm": 0.07103893574851348, "learning_rate": 7.614214166013156e-08, "loss": 0.8378, "step": 10835 }, { "epoch": 4.912058023572076, "grad_norm": 0.070858337746872, "learning_rate": 7.536344373250348e-08, "loss": 0.8583, "step": 10836 }, { "epoch": 4.912511332728921, "grad_norm": 0.0754664923150713, "learning_rate": 7.458874437456409e-08, "loss": 0.8729, "step": 10837 }, { "epoch": 4.912964641885766, "grad_norm": 0.07241461988415891, "learning_rate": 7.381804366390022e-08, "loss": 0.8636, "step": 10838 }, { "epoch": 4.913417951042611, "grad_norm": 0.07030853136247872, "learning_rate": 7.305134167770344e-08, "loss": 0.8582, "step": 10839 }, { "epoch": 4.913871260199456, "grad_norm": 0.07212520710036109, "learning_rate": 7.228863849275235e-08, "loss": 0.8383, "step": 10840 }, { "epoch": 4.914324569356301, "grad_norm": 0.0739839591924685, "learning_rate": 7.152993418544363e-08, "loss": 0.8513, "step": 10841 }, { "epoch": 4.914777878513146, "grad_norm": 0.07897066354767326, "learning_rate": 7.077522883175647e-08, "loss": 0.8604, "step": 10842 }, { "epoch": 4.915231187669991, "grad_norm": 0.07086820123032714, "learning_rate": 7.002452250728375e-08, "loss": 0.8576, "step": 10843 }, { "epoch": 4.915684496826836, "grad_norm": 0.06975599212651659, "learning_rate": 6.927781528720534e-08, "loss": 0.8406, "step": 10844 }, { "epoch": 4.916137805983681, "grad_norm": 0.07191673270486008, "learning_rate": 6.853510724630585e-08, "loss": 0.8486, "step": 10845 }, { "epoch": 4.916591115140526, "grad_norm": 0.07287307781157328, "learning_rate": 6.779639845897467e-08, "loss": 0.8527, "step": 10846 }, { "epoch": 4.917044424297371, "grad_norm": 0.07200035406526217, "learning_rate": 6.706168899919264e-08, "loss": 0.8529, "step": 10847 }, { "epoch": 4.917497733454216, "grad_norm": 0.07542311936253747, "learning_rate": 6.633097894054086e-08, "loss": 0.8646, "step": 10848 }, { "epoch": 4.917951042611061, "grad_norm": 0.06694144309288673, "learning_rate": 6.560426835620526e-08, "loss": 0.8479, "step": 10849 }, { "epoch": 4.918404351767906, "grad_norm": 0.07831617526823789, "learning_rate": 6.488155731896761e-08, "loss": 0.8626, "step": 10850 }, { "epoch": 4.9188576609247505, "grad_norm": 0.0691587715013022, "learning_rate": 6.416284590121002e-08, "loss": 0.876, "step": 10851 }, { "epoch": 4.919310970081596, "grad_norm": 0.0711611846418456, "learning_rate": 6.344813417491491e-08, "loss": 0.8477, "step": 10852 }, { "epoch": 4.919764279238441, "grad_norm": 0.06986667055899655, "learning_rate": 6.273742221165613e-08, "loss": 0.8536, "step": 10853 }, { "epoch": 4.9202175883952854, "grad_norm": 0.06929084131851548, "learning_rate": 6.203071008261674e-08, "loss": 0.842, "step": 10854 }, { "epoch": 4.920670897552131, "grad_norm": 0.07148714362746861, "learning_rate": 6.132799785858012e-08, "loss": 0.8674, "step": 10855 }, { "epoch": 4.921124206708976, "grad_norm": 0.08399171410669189, "learning_rate": 6.062928560992554e-08, "loss": 0.8638, "step": 10856 }, { "epoch": 4.92157751586582, "grad_norm": 0.075344659074703, "learning_rate": 5.993457340662367e-08, "loss": 0.8549, "step": 10857 }, { "epoch": 4.922030825022666, "grad_norm": 0.07443982956436951, "learning_rate": 5.9243861318254435e-08, "loss": 0.859, "step": 10858 }, { "epoch": 4.922484134179511, "grad_norm": 0.07199097046057348, "learning_rate": 5.855714941399804e-08, "loss": 0.8586, "step": 10859 }, { "epoch": 4.922937443336355, "grad_norm": 0.06900854237462915, "learning_rate": 5.787443776263058e-08, "loss": 0.8549, "step": 10860 }, { "epoch": 4.923390752493201, "grad_norm": 0.06808001265965444, "learning_rate": 5.7195726432524024e-08, "loss": 0.845, "step": 10861 }, { "epoch": 4.923844061650045, "grad_norm": 0.0701287537381092, "learning_rate": 5.652101549165512e-08, "loss": 0.8618, "step": 10862 }, { "epoch": 4.92429737080689, "grad_norm": 0.07092596361305217, "learning_rate": 5.585030500759647e-08, "loss": 0.843, "step": 10863 }, { "epoch": 4.9247506799637355, "grad_norm": 0.072320935714157, "learning_rate": 5.518359504752546e-08, "loss": 0.8475, "step": 10864 }, { "epoch": 4.92520398912058, "grad_norm": 0.08003384409891365, "learning_rate": 5.452088567821534e-08, "loss": 0.8568, "step": 10865 }, { "epoch": 4.925657298277425, "grad_norm": 0.07939673617603105, "learning_rate": 5.38621769660308e-08, "loss": 0.8651, "step": 10866 }, { "epoch": 4.92611060743427, "grad_norm": 0.06940372659913556, "learning_rate": 5.320746897695461e-08, "loss": 0.8525, "step": 10867 }, { "epoch": 4.926563916591115, "grad_norm": 0.06984473302130734, "learning_rate": 5.255676177654767e-08, "loss": 0.831, "step": 10868 }, { "epoch": 4.92701722574796, "grad_norm": 0.07317351624538221, "learning_rate": 5.191005542998895e-08, "loss": 0.84, "step": 10869 }, { "epoch": 4.927470534904805, "grad_norm": 0.07008862901470622, "learning_rate": 5.126735000203553e-08, "loss": 0.8471, "step": 10870 }, { "epoch": 4.92792384406165, "grad_norm": 0.07591477116568622, "learning_rate": 5.062864555707148e-08, "loss": 0.8523, "step": 10871 }, { "epoch": 4.928377153218495, "grad_norm": 0.07275357302762979, "learning_rate": 4.999394215905451e-08, "loss": 0.8376, "step": 10872 }, { "epoch": 4.92883046237534, "grad_norm": 0.0736676238331784, "learning_rate": 4.9363239871556004e-08, "loss": 0.8606, "step": 10873 }, { "epoch": 4.929283771532185, "grad_norm": 0.073526509806377, "learning_rate": 4.873653875773876e-08, "loss": 0.8418, "step": 10874 }, { "epoch": 4.92973708068903, "grad_norm": 0.07272413327009557, "learning_rate": 4.8113838880374796e-08, "loss": 0.8514, "step": 10875 }, { "epoch": 4.930190389845875, "grad_norm": 0.06914376963492898, "learning_rate": 4.749514030182756e-08, "loss": 0.8393, "step": 10876 }, { "epoch": 4.93064369900272, "grad_norm": 0.06818478268072875, "learning_rate": 4.688044308406081e-08, "loss": 0.8495, "step": 10877 }, { "epoch": 4.931097008159565, "grad_norm": 0.07466355211550574, "learning_rate": 4.6269747288634205e-08, "loss": 0.8426, "step": 10878 }, { "epoch": 4.93155031731641, "grad_norm": 0.07704846456672652, "learning_rate": 4.5663052976716583e-08, "loss": 0.8515, "step": 10879 }, { "epoch": 4.9320036264732545, "grad_norm": 0.06918626936930124, "learning_rate": 4.506036020906379e-08, "loss": 0.8681, "step": 10880 }, { "epoch": 4.9324569356301, "grad_norm": 0.07971039200814443, "learning_rate": 4.446166904604532e-08, "loss": 0.8567, "step": 10881 }, { "epoch": 4.932910244786945, "grad_norm": 0.06890065204788386, "learning_rate": 4.386697954761765e-08, "loss": 0.8561, "step": 10882 }, { "epoch": 4.9333635539437894, "grad_norm": 0.07081104404828469, "learning_rate": 4.327629177333759e-08, "loss": 0.8666, "step": 10883 }, { "epoch": 4.933816863100635, "grad_norm": 0.0706927532457192, "learning_rate": 4.268960578237114e-08, "loss": 0.8545, "step": 10884 }, { "epoch": 4.934270172257479, "grad_norm": 0.07983683435035693, "learning_rate": 4.21069216334713e-08, "loss": 0.8567, "step": 10885 }, { "epoch": 4.934723481414324, "grad_norm": 0.0700104068286981, "learning_rate": 4.152823938500028e-08, "loss": 0.8574, "step": 10886 }, { "epoch": 4.93517679057117, "grad_norm": 0.07045846830926496, "learning_rate": 4.095355909491172e-08, "loss": 0.8496, "step": 10887 }, { "epoch": 4.935630099728014, "grad_norm": 0.07041497703979514, "learning_rate": 4.0382880820759586e-08, "loss": 0.8509, "step": 10888 }, { "epoch": 4.936083408884859, "grad_norm": 0.07179957747327485, "learning_rate": 3.981620461969815e-08, "loss": 0.858, "step": 10889 }, { "epoch": 4.936536718041705, "grad_norm": 0.0681967141162593, "learning_rate": 3.925353054848646e-08, "loss": 0.8514, "step": 10890 }, { "epoch": 4.936990027198549, "grad_norm": 0.07094936838651296, "learning_rate": 3.869485866347944e-08, "loss": 0.8557, "step": 10891 }, { "epoch": 4.937443336355394, "grad_norm": 0.07295521184686665, "learning_rate": 3.814018902062344e-08, "loss": 0.852, "step": 10892 }, { "epoch": 4.9378966455122395, "grad_norm": 0.0670310043136111, "learning_rate": 3.7589521675474025e-08, "loss": 0.8441, "step": 10893 }, { "epoch": 4.938349954669084, "grad_norm": 0.0665121188771586, "learning_rate": 3.704285668318264e-08, "loss": 0.8313, "step": 10894 }, { "epoch": 4.938803263825929, "grad_norm": 0.06865024858938337, "learning_rate": 3.650019409849659e-08, "loss": 0.8521, "step": 10895 }, { "epoch": 4.939256572982774, "grad_norm": 0.07375538425349819, "learning_rate": 3.596153397576796e-08, "loss": 0.8425, "step": 10896 }, { "epoch": 4.939709882139619, "grad_norm": 0.07197844176995212, "learning_rate": 3.5426876368940265e-08, "loss": 0.864, "step": 10897 }, { "epoch": 4.940163191296464, "grad_norm": 0.07236009985357937, "learning_rate": 3.489622133156623e-08, "loss": 0.8401, "step": 10898 }, { "epoch": 4.940616500453309, "grad_norm": 0.07162462238172404, "learning_rate": 3.436956891679444e-08, "loss": 0.8841, "step": 10899 }, { "epoch": 4.941069809610154, "grad_norm": 0.07378940352550453, "learning_rate": 3.3846919177364936e-08, "loss": 0.8398, "step": 10900 }, { "epoch": 4.941523118766999, "grad_norm": 0.07333982672461968, "learning_rate": 3.332827216562251e-08, "loss": 0.8788, "step": 10901 }, { "epoch": 4.941976427923844, "grad_norm": 0.06797946747112323, "learning_rate": 3.281362793351672e-08, "loss": 0.8386, "step": 10902 }, { "epoch": 4.942429737080689, "grad_norm": 0.07036765357216754, "learning_rate": 3.2302986532584125e-08, "loss": 0.8549, "step": 10903 }, { "epoch": 4.942883046237534, "grad_norm": 0.07211749992116465, "learning_rate": 3.1796348013974906e-08, "loss": 0.8607, "step": 10904 }, { "epoch": 4.943336355394379, "grad_norm": 0.0717130568504481, "learning_rate": 3.129371242842183e-08, "loss": 0.8612, "step": 10905 }, { "epoch": 4.943789664551224, "grad_norm": 0.06582284992730286, "learning_rate": 3.079507982627128e-08, "loss": 0.8505, "step": 10906 }, { "epoch": 4.944242973708069, "grad_norm": 0.07075824988871308, "learning_rate": 3.030045025746109e-08, "loss": 0.8507, "step": 10907 }, { "epoch": 4.944696282864914, "grad_norm": 0.0722252625284295, "learning_rate": 2.980982377152941e-08, "loss": 0.8643, "step": 10908 }, { "epoch": 4.9451495920217585, "grad_norm": 0.07366007736370704, "learning_rate": 2.9323200417610276e-08, "loss": 0.8633, "step": 10909 }, { "epoch": 4.945602901178604, "grad_norm": 0.06943183314422842, "learning_rate": 2.8840580244451355e-08, "loss": 0.8441, "step": 10910 }, { "epoch": 4.946056210335449, "grad_norm": 0.07100499589990819, "learning_rate": 2.8361963300373994e-08, "loss": 0.842, "step": 10911 }, { "epoch": 4.9465095194922934, "grad_norm": 0.06842459578266459, "learning_rate": 2.788734963332651e-08, "loss": 0.8316, "step": 10912 }, { "epoch": 4.946962828649139, "grad_norm": 0.07117544776874243, "learning_rate": 2.7416739290830886e-08, "loss": 0.8501, "step": 10913 }, { "epoch": 4.947416137805984, "grad_norm": 0.07107969385623414, "learning_rate": 2.6950132320031632e-08, "loss": 0.8609, "step": 10914 }, { "epoch": 4.947869446962828, "grad_norm": 0.0718865489541239, "learning_rate": 2.6487528767651372e-08, "loss": 0.8566, "step": 10915 }, { "epoch": 4.948322756119674, "grad_norm": 0.07072636502864371, "learning_rate": 2.6028928680026377e-08, "loss": 0.8431, "step": 10916 }, { "epoch": 4.948776065276519, "grad_norm": 0.06800997273592006, "learning_rate": 2.5574332103088795e-08, "loss": 0.8629, "step": 10917 }, { "epoch": 4.949229374433363, "grad_norm": 0.07331771282624823, "learning_rate": 2.5123739082362208e-08, "loss": 0.851, "step": 10918 }, { "epoch": 4.949682683590209, "grad_norm": 0.06763363770242352, "learning_rate": 2.4677149662974965e-08, "loss": 0.8403, "step": 10919 }, { "epoch": 4.950135992747054, "grad_norm": 0.07756585958177022, "learning_rate": 2.4234563889660166e-08, "loss": 0.8456, "step": 10920 }, { "epoch": 4.950589301903898, "grad_norm": 0.06847554091979027, "learning_rate": 2.3795981806737923e-08, "loss": 0.8501, "step": 10921 }, { "epoch": 4.9510426110607435, "grad_norm": 0.06837190642142807, "learning_rate": 2.336140345813309e-08, "loss": 0.8555, "step": 10922 }, { "epoch": 4.951495920217589, "grad_norm": 0.07195215206288685, "learning_rate": 2.293082888737974e-08, "loss": 0.8383, "step": 10923 }, { "epoch": 4.951949229374433, "grad_norm": 0.06859014555321469, "learning_rate": 2.250425813759005e-08, "loss": 0.8557, "step": 10924 }, { "epoch": 4.952402538531278, "grad_norm": 0.07011853890939518, "learning_rate": 2.2081691251489845e-08, "loss": 0.8666, "step": 10925 }, { "epoch": 4.952855847688124, "grad_norm": 0.07138397597076719, "learning_rate": 2.1663128271405266e-08, "loss": 0.85, "step": 10926 }, { "epoch": 4.953309156844968, "grad_norm": 0.07472103648273275, "learning_rate": 2.124856923924945e-08, "loss": 0.8778, "step": 10927 }, { "epoch": 4.953762466001813, "grad_norm": 0.06907065592535915, "learning_rate": 2.0838014196544743e-08, "loss": 0.8543, "step": 10928 }, { "epoch": 4.954215775158659, "grad_norm": 0.07327934623537491, "learning_rate": 2.043146318440936e-08, "loss": 0.8462, "step": 10929 }, { "epoch": 4.954669084315503, "grad_norm": 0.07765686186871218, "learning_rate": 2.0028916243561846e-08, "loss": 0.8523, "step": 10930 }, { "epoch": 4.955122393472348, "grad_norm": 0.07203794858977176, "learning_rate": 1.9630373414316618e-08, "loss": 0.8326, "step": 10931 }, { "epoch": 4.955575702629194, "grad_norm": 0.07295911793550625, "learning_rate": 1.923583473658841e-08, "loss": 0.8566, "step": 10932 }, { "epoch": 4.956029011786038, "grad_norm": 0.07594818434069098, "learning_rate": 1.8845300249896726e-08, "loss": 0.8637, "step": 10933 }, { "epoch": 4.956482320942883, "grad_norm": 0.0692817521828546, "learning_rate": 1.8458769993348058e-08, "loss": 0.8728, "step": 10934 }, { "epoch": 4.9569356300997285, "grad_norm": 0.07027617881138326, "learning_rate": 1.8076244005653665e-08, "loss": 0.8453, "step": 10935 }, { "epoch": 4.957388939256573, "grad_norm": 0.06725870818846198, "learning_rate": 1.7697722325134005e-08, "loss": 0.8617, "step": 10936 }, { "epoch": 4.957842248413418, "grad_norm": 0.07308098529831712, "learning_rate": 1.7323204989692087e-08, "loss": 0.8519, "step": 10937 }, { "epoch": 4.958295557570263, "grad_norm": 0.07170143626176319, "learning_rate": 1.6952692036835693e-08, "loss": 0.8438, "step": 10938 }, { "epoch": 4.958748866727108, "grad_norm": 0.17071528537600203, "learning_rate": 1.6586183503677355e-08, "loss": 0.8664, "step": 10939 }, { "epoch": 4.959202175883953, "grad_norm": 0.07315612368093548, "learning_rate": 1.6223679426921047e-08, "loss": 0.8521, "step": 10940 }, { "epoch": 4.9596554850407975, "grad_norm": 0.0745230507624495, "learning_rate": 1.586517984287106e-08, "loss": 0.8435, "step": 10941 }, { "epoch": 4.960108794197643, "grad_norm": 0.06821473240109936, "learning_rate": 1.5510684787436448e-08, "loss": 0.8694, "step": 10942 }, { "epoch": 4.960562103354488, "grad_norm": 0.07303275620453534, "learning_rate": 1.5160194296117703e-08, "loss": 0.8536, "step": 10943 }, { "epoch": 4.961015412511332, "grad_norm": 0.06823166050286568, "learning_rate": 1.4813708404024519e-08, "loss": 0.8431, "step": 10944 }, { "epoch": 4.961468721668178, "grad_norm": 0.07044496689429201, "learning_rate": 1.4471227145844701e-08, "loss": 0.8561, "step": 10945 }, { "epoch": 4.961922030825023, "grad_norm": 0.0733905097223399, "learning_rate": 1.4132750555893027e-08, "loss": 0.8572, "step": 10946 }, { "epoch": 4.962375339981867, "grad_norm": 0.07516949279206996, "learning_rate": 1.3798278668057941e-08, "loss": 0.875, "step": 10947 }, { "epoch": 4.962828649138713, "grad_norm": 0.07472774415340931, "learning_rate": 1.3467811515845974e-08, "loss": 0.8427, "step": 10948 }, { "epoch": 4.963281958295558, "grad_norm": 0.073696942089791, "learning_rate": 1.3141349132350655e-08, "loss": 0.8493, "step": 10949 }, { "epoch": 4.963735267452402, "grad_norm": 0.07752289909920977, "learning_rate": 1.2818891550265833e-08, "loss": 0.8777, "step": 10950 }, { "epoch": 4.9641885766092475, "grad_norm": 0.0697600272617098, "learning_rate": 1.2500438801890113e-08, "loss": 0.8477, "step": 10951 }, { "epoch": 4.964641885766093, "grad_norm": 0.06884816045468711, "learning_rate": 1.2185990919117985e-08, "loss": 0.8512, "step": 10952 }, { "epoch": 4.965095194922937, "grad_norm": 0.07364041066962744, "learning_rate": 1.1875547933439813e-08, "loss": 0.8775, "step": 10953 }, { "epoch": 4.965548504079782, "grad_norm": 0.07283935367029004, "learning_rate": 1.1569109875950724e-08, "loss": 0.8497, "step": 10954 }, { "epoch": 4.966001813236628, "grad_norm": 0.07089980181035943, "learning_rate": 1.1266676777337282e-08, "loss": 0.8426, "step": 10955 }, { "epoch": 4.966455122393472, "grad_norm": 0.06720426714028835, "learning_rate": 1.0968248667890813e-08, "loss": 0.8491, "step": 10956 }, { "epoch": 4.966908431550317, "grad_norm": 0.07032717443374295, "learning_rate": 1.067382557749852e-08, "loss": 0.8602, "step": 10957 }, { "epoch": 4.967361740707163, "grad_norm": 0.07093863768127648, "learning_rate": 1.0383407535652368e-08, "loss": 0.8434, "step": 10958 }, { "epoch": 4.967815049864007, "grad_norm": 0.07185390669365396, "learning_rate": 1.0096994571431318e-08, "loss": 0.8767, "step": 10959 }, { "epoch": 4.968268359020852, "grad_norm": 0.06824684950363652, "learning_rate": 9.814586713527974e-09, "loss": 0.8545, "step": 10960 }, { "epoch": 4.968721668177698, "grad_norm": 0.07306374426788341, "learning_rate": 9.536183990217495e-09, "loss": 0.8556, "step": 10961 }, { "epoch": 4.969174977334542, "grad_norm": 0.06767379309027823, "learning_rate": 9.261786429393127e-09, "loss": 0.8371, "step": 10962 }, { "epoch": 4.969628286491387, "grad_norm": 0.07024495702540379, "learning_rate": 8.991394058530667e-09, "loss": 0.8655, "step": 10963 }, { "epoch": 4.970081595648232, "grad_norm": 0.07080115717606093, "learning_rate": 8.725006904710675e-09, "loss": 0.8669, "step": 10964 }, { "epoch": 4.970534904805077, "grad_norm": 0.06987029294926282, "learning_rate": 8.462624994614032e-09, "loss": 0.8661, "step": 10965 }, { "epoch": 4.970988213961922, "grad_norm": 0.07154209857697337, "learning_rate": 8.204248354517497e-09, "loss": 0.8345, "step": 10966 }, { "epoch": 4.9714415231187665, "grad_norm": 0.06749566971971352, "learning_rate": 7.949877010298146e-09, "loss": 0.8478, "step": 10967 }, { "epoch": 4.971894832275612, "grad_norm": 0.07833982170790801, "learning_rate": 7.699510987437819e-09, "loss": 0.8646, "step": 10968 }, { "epoch": 4.972348141432457, "grad_norm": 0.06907471954970962, "learning_rate": 7.453150311000912e-09, "loss": 0.8364, "step": 10969 }, { "epoch": 4.9728014505893015, "grad_norm": 0.06815883979922442, "learning_rate": 7.210795005669901e-09, "loss": 0.8468, "step": 10970 }, { "epoch": 4.973254759746147, "grad_norm": 0.06946795100494477, "learning_rate": 6.972445095714264e-09, "loss": 0.8352, "step": 10971 }, { "epoch": 4.973708068902992, "grad_norm": 0.07312348230538714, "learning_rate": 6.7381006050037945e-09, "loss": 0.8452, "step": 10972 }, { "epoch": 4.974161378059836, "grad_norm": 0.06935052081417592, "learning_rate": 6.5077615570130485e-09, "loss": 0.8602, "step": 10973 }, { "epoch": 4.974614687216682, "grad_norm": 0.07415610472792299, "learning_rate": 6.2814279748080185e-09, "loss": 0.8412, "step": 10974 }, { "epoch": 4.975067996373527, "grad_norm": 0.07141189585797755, "learning_rate": 6.05909988105502e-09, "loss": 0.8693, "step": 10975 }, { "epoch": 4.975521305530371, "grad_norm": 0.07438683450144334, "learning_rate": 5.840777298025125e-09, "loss": 0.8505, "step": 10976 }, { "epoch": 4.975974614687217, "grad_norm": 0.06875372457519163, "learning_rate": 5.626460247585286e-09, "loss": 0.8602, "step": 10977 }, { "epoch": 4.976427923844062, "grad_norm": 0.0705144078254744, "learning_rate": 5.416148751193895e-09, "loss": 0.8396, "step": 10978 }, { "epoch": 4.976881233000906, "grad_norm": 0.07090595807844496, "learning_rate": 5.209842829914102e-09, "loss": 0.8572, "step": 10979 }, { "epoch": 4.9773345421577515, "grad_norm": 0.06924647407922745, "learning_rate": 5.007542504413821e-09, "loss": 0.8193, "step": 10980 }, { "epoch": 4.977787851314597, "grad_norm": 0.06866013116550396, "learning_rate": 4.809247794952399e-09, "loss": 0.8641, "step": 10981 }, { "epoch": 4.978241160471441, "grad_norm": 0.07376646206100122, "learning_rate": 4.614958721385065e-09, "loss": 0.8394, "step": 10982 }, { "epoch": 4.978694469628286, "grad_norm": 0.07077341360153261, "learning_rate": 4.424675303176251e-09, "loss": 0.8434, "step": 10983 }, { "epoch": 4.979147778785132, "grad_norm": 0.06844292264727711, "learning_rate": 4.2383975593818236e-09, "loss": 0.8437, "step": 10984 }, { "epoch": 4.979601087941976, "grad_norm": 0.06840698928578334, "learning_rate": 4.056125508657971e-09, "loss": 0.8363, "step": 10985 }, { "epoch": 4.980054397098821, "grad_norm": 0.07465238335063269, "learning_rate": 3.877859169256759e-09, "loss": 0.8665, "step": 10986 }, { "epoch": 4.980507706255667, "grad_norm": 0.06846263025309236, "learning_rate": 3.7035985590350153e-09, "loss": 0.847, "step": 10987 }, { "epoch": 4.980961015412511, "grad_norm": 0.07079274498097517, "learning_rate": 3.533343695445446e-09, "loss": 0.8719, "step": 10988 }, { "epoch": 4.981414324569356, "grad_norm": 0.071621426260187, "learning_rate": 3.367094595536635e-09, "loss": 0.8647, "step": 10989 }, { "epoch": 4.981867633726202, "grad_norm": 0.06854323455846983, "learning_rate": 3.204851275961929e-09, "loss": 0.8532, "step": 10990 }, { "epoch": 4.982320942883046, "grad_norm": 0.0682164133095486, "learning_rate": 3.046613752970551e-09, "loss": 0.8625, "step": 10991 }, { "epoch": 4.982774252039891, "grad_norm": 0.06743744821191022, "learning_rate": 2.8923820424120453e-09, "loss": 0.8548, "step": 10992 }, { "epoch": 4.9832275611967365, "grad_norm": 0.07142289102109214, "learning_rate": 2.7421561597273938e-09, "loss": 0.8641, "step": 10993 }, { "epoch": 4.983680870353581, "grad_norm": 0.06626468793761803, "learning_rate": 2.5959361199667797e-09, "loss": 0.8551, "step": 10994 }, { "epoch": 4.984134179510426, "grad_norm": 0.07394875704569635, "learning_rate": 2.4537219377718245e-09, "loss": 0.858, "step": 10995 }, { "epoch": 4.984587488667271, "grad_norm": 0.07018766103106938, "learning_rate": 2.31551362738891e-09, "loss": 0.8672, "step": 10996 }, { "epoch": 4.985040797824116, "grad_norm": 0.07388543527871509, "learning_rate": 2.1813112026602966e-09, "loss": 0.8606, "step": 10997 }, { "epoch": 4.985494106980961, "grad_norm": 0.07071371721099061, "learning_rate": 2.051114677024124e-09, "loss": 0.866, "step": 10998 }, { "epoch": 4.985947416137806, "grad_norm": 0.06817219011943554, "learning_rate": 1.9249240635188515e-09, "loss": 0.8579, "step": 10999 }, { "epoch": 4.986400725294651, "grad_norm": 0.06689515259173237, "learning_rate": 1.8027393747832577e-09, "loss": 0.8541, "step": 11000 }, { "epoch": 4.986854034451496, "grad_norm": 0.06772621166173957, "learning_rate": 1.6845606230564416e-09, "loss": 0.8495, "step": 11001 }, { "epoch": 4.987307343608341, "grad_norm": 0.07363016046676793, "learning_rate": 1.570387820177821e-09, "loss": 0.8628, "step": 11002 }, { "epoch": 4.987760652765186, "grad_norm": 0.07420404298964406, "learning_rate": 1.4602209775738118e-09, "loss": 0.8604, "step": 11003 }, { "epoch": 4.988213961922031, "grad_norm": 0.06855230755391765, "learning_rate": 1.3540601062844716e-09, "loss": 0.8578, "step": 11004 }, { "epoch": 4.988667271078876, "grad_norm": 0.06594972831314524, "learning_rate": 1.2519052169368551e-09, "loss": 0.8464, "step": 11005 }, { "epoch": 4.989120580235721, "grad_norm": 0.07016782703040234, "learning_rate": 1.1537563197672187e-09, "loss": 0.8585, "step": 11006 }, { "epoch": 4.989573889392566, "grad_norm": 0.06870143757708885, "learning_rate": 1.0596134246032563e-09, "loss": 0.8694, "step": 11007 }, { "epoch": 4.990027198549411, "grad_norm": 0.06945659551505583, "learning_rate": 9.694765408729822e-10, "loss": 0.8476, "step": 11008 }, { "epoch": 4.9904805077062555, "grad_norm": 0.07085237075514067, "learning_rate": 8.8334567760473e-10, "loss": 0.8513, "step": 11009 }, { "epoch": 4.990933816863101, "grad_norm": 0.06947831498854283, "learning_rate": 8.012208434271529e-10, "loss": 0.8513, "step": 11010 }, { "epoch": 4.991387126019946, "grad_norm": 0.07448731948227764, "learning_rate": 7.231020465603422e-10, "loss": 0.8416, "step": 11011 }, { "epoch": 4.99184043517679, "grad_norm": 0.07653630064208565, "learning_rate": 6.489892948291499e-10, "loss": 0.8645, "step": 11012 }, { "epoch": 4.992293744333636, "grad_norm": 0.07147019257161563, "learning_rate": 5.788825956587474e-10, "loss": 0.8493, "step": 11013 }, { "epoch": 4.992747053490481, "grad_norm": 0.07342770399716086, "learning_rate": 5.127819560701852e-10, "loss": 0.857, "step": 11014 }, { "epoch": 4.993200362647325, "grad_norm": 0.06857083419157299, "learning_rate": 4.5068738268483345e-10, "loss": 0.8531, "step": 11015 }, { "epoch": 4.993653671804171, "grad_norm": 0.06764336104981886, "learning_rate": 3.925988817155002e-10, "loss": 0.867, "step": 11016 }, { "epoch": 4.994106980961016, "grad_norm": 0.07232417431685507, "learning_rate": 3.385164589886358e-10, "loss": 0.8549, "step": 11017 }, { "epoch": 4.99456029011786, "grad_norm": 0.07451797142429244, "learning_rate": 2.8844011991324696e-10, "loss": 0.8657, "step": 11018 }, { "epoch": 4.995013599274706, "grad_norm": 0.06923607058604082, "learning_rate": 2.423698695075416e-10, "loss": 0.839, "step": 11019 }, { "epoch": 4.99546690843155, "grad_norm": 0.06838955725282134, "learning_rate": 2.003057123856067e-10, "loss": 0.8797, "step": 11020 }, { "epoch": 4.995920217588395, "grad_norm": 0.07507104471899696, "learning_rate": 1.6224765276184885e-10, "loss": 0.864, "step": 11021 }, { "epoch": 4.9963735267452405, "grad_norm": 0.07052376584620196, "learning_rate": 1.2819569444655344e-10, "loss": 0.8659, "step": 11022 }, { "epoch": 4.996826835902085, "grad_norm": 0.07306984353468565, "learning_rate": 9.814984084588475e-11, "loss": 0.8671, "step": 11023 }, { "epoch": 4.99728014505893, "grad_norm": 0.06788362638713523, "learning_rate": 7.211009497520849e-11, "loss": 0.8671, "step": 11024 }, { "epoch": 4.997733454215775, "grad_norm": 0.06853298362723625, "learning_rate": 5.007645944132833e-11, "loss": 0.8466, "step": 11025 }, { "epoch": 4.99818676337262, "grad_norm": 0.07474201003510449, "learning_rate": 3.204893645136764e-11, "loss": 0.8577, "step": 11026 }, { "epoch": 4.998640072529465, "grad_norm": 0.06812490779696442, "learning_rate": 1.8027527808328616e-11, "loss": 0.866, "step": 11027 }, { "epoch": 4.99909338168631, "grad_norm": 0.06921788922338136, "learning_rate": 8.012234915533157e-12, "loss": 0.8519, "step": 11028 }, { "epoch": 4.999546690843155, "grad_norm": 0.07168051716379939, "learning_rate": 2.0030587810637713e-12, "loss": 0.8543, "step": 11029 }, { "epoch": 5.0, "grad_norm": 0.07249081317989815, "learning_rate": 0.0, "loss": 0.8464, "step": 11030 }, { "epoch": 5.0, "step": 11030, "total_flos": 1.8504084910768128e+17, "train_loss": 0.0797391855489743, "train_runtime": 13114.3231, "train_samples_per_second": 430.543, "train_steps_per_second": 0.841 } ], "logging_steps": 1, "max_steps": 11030, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8504084910768128e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }