{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34919, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 19.461179384912874, "learning_rate": 9.541984732824428e-09, "loss": 0.4135, "step": 1 }, { "epoch": 0.0, "grad_norm": 27.40173483598227, "learning_rate": 1.9083969465648856e-08, "loss": 1.4675, "step": 2 }, { "epoch": 0.0, "grad_norm": 35.85346407574126, "learning_rate": 2.8625954198473286e-08, "loss": 0.798, "step": 3 }, { "epoch": 0.0, "grad_norm": 34.01998045008578, "learning_rate": 3.816793893129771e-08, "loss": 1.2956, "step": 4 }, { "epoch": 0.0, "grad_norm": 11.710114819805039, "learning_rate": 4.7709923664122145e-08, "loss": 0.2335, "step": 5 }, { "epoch": 0.0, "grad_norm": 18.434795426965877, "learning_rate": 5.725190839694657e-08, "loss": 0.4186, "step": 6 }, { "epoch": 0.0, "grad_norm": 17.458934885858095, "learning_rate": 6.6793893129771e-08, "loss": 0.7398, "step": 7 }, { "epoch": 0.0, "grad_norm": 9.10852025832883, "learning_rate": 7.633587786259542e-08, "loss": 0.3402, "step": 8 }, { "epoch": 0.0, "grad_norm": 26.36156016339031, "learning_rate": 8.587786259541986e-08, "loss": 1.189, "step": 9 }, { "epoch": 0.0, "grad_norm": 28.961689867486474, "learning_rate": 9.541984732824429e-08, "loss": 1.1181, "step": 10 }, { "epoch": 0.0, "grad_norm": 13.968224022710718, "learning_rate": 1.0496183206106872e-07, "loss": 0.7535, "step": 11 }, { "epoch": 0.0, "grad_norm": 26.742228297058425, "learning_rate": 1.1450381679389314e-07, "loss": 0.6667, "step": 12 }, { "epoch": 0.0, "grad_norm": 20.53679091006648, "learning_rate": 1.2404580152671756e-07, "loss": 0.6877, "step": 13 }, { "epoch": 0.0, "grad_norm": 29.646203287854004, "learning_rate": 1.33587786259542e-07, "loss": 1.2676, "step": 14 }, { "epoch": 0.0, "grad_norm": 15.71642879388327, "learning_rate": 1.431297709923664e-07, "loss": 0.5889, "step": 15 }, { "epoch": 0.0, "grad_norm": 22.185070667786206, "learning_rate": 1.5267175572519085e-07, "loss": 0.6627, "step": 16 }, { "epoch": 0.0, "grad_norm": 25.934188006258175, "learning_rate": 1.6221374045801526e-07, "loss": 0.8706, "step": 17 }, { "epoch": 0.0, "grad_norm": 49.87216079011416, "learning_rate": 1.7175572519083973e-07, "loss": 1.3748, "step": 18 }, { "epoch": 0.0, "grad_norm": 37.11629129627371, "learning_rate": 1.8129770992366411e-07, "loss": 0.8949, "step": 19 }, { "epoch": 0.0, "grad_norm": 13.46772185116516, "learning_rate": 1.9083969465648858e-07, "loss": 0.6412, "step": 20 }, { "epoch": 0.0, "grad_norm": 21.221006113374248, "learning_rate": 2.00381679389313e-07, "loss": 0.8523, "step": 21 }, { "epoch": 0.0, "grad_norm": 17.911998525069986, "learning_rate": 2.0992366412213743e-07, "loss": 0.6389, "step": 22 }, { "epoch": 0.0, "grad_norm": 21.78318482484325, "learning_rate": 2.1946564885496185e-07, "loss": 0.926, "step": 23 }, { "epoch": 0.0, "grad_norm": 25.529462810143727, "learning_rate": 2.2900763358778629e-07, "loss": 0.718, "step": 24 }, { "epoch": 0.0, "grad_norm": 20.824752179671346, "learning_rate": 2.385496183206107e-07, "loss": 0.6338, "step": 25 }, { "epoch": 0.0, "grad_norm": 23.892707050380718, "learning_rate": 2.480916030534351e-07, "loss": 1.2316, "step": 26 }, { "epoch": 0.0, "grad_norm": 16.386305771762686, "learning_rate": 2.5763358778625955e-07, "loss": 0.4449, "step": 27 }, { "epoch": 0.0, "grad_norm": 16.61122007699281, "learning_rate": 2.67175572519084e-07, "loss": 0.7832, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.85709221997833, "learning_rate": 2.7671755725190843e-07, "loss": 0.3819, "step": 29 }, { "epoch": 0.0, "grad_norm": 10.248203957605911, "learning_rate": 2.862595419847328e-07, "loss": 0.1432, "step": 30 }, { "epoch": 0.0, "grad_norm": 22.310357340879467, "learning_rate": 2.958015267175573e-07, "loss": 0.7677, "step": 31 }, { "epoch": 0.0, "grad_norm": 23.274030182953517, "learning_rate": 3.053435114503817e-07, "loss": 0.6585, "step": 32 }, { "epoch": 0.0, "grad_norm": 30.510342454716916, "learning_rate": 3.148854961832061e-07, "loss": 0.7281, "step": 33 }, { "epoch": 0.0, "grad_norm": 29.643455197313898, "learning_rate": 3.244274809160305e-07, "loss": 0.9699, "step": 34 }, { "epoch": 0.0, "grad_norm": 4.554130088238115, "learning_rate": 3.33969465648855e-07, "loss": 0.4279, "step": 35 }, { "epoch": 0.0, "grad_norm": 16.40878909853423, "learning_rate": 3.4351145038167945e-07, "loss": 0.4446, "step": 36 }, { "epoch": 0.0, "grad_norm": 10.149297124269209, "learning_rate": 3.530534351145039e-07, "loss": 0.3739, "step": 37 }, { "epoch": 0.0, "grad_norm": 45.08849449488672, "learning_rate": 3.6259541984732823e-07, "loss": 0.3913, "step": 38 }, { "epoch": 0.0, "grad_norm": 16.972340915151825, "learning_rate": 3.721374045801527e-07, "loss": 0.4422, "step": 39 }, { "epoch": 0.0, "grad_norm": 16.154823461599307, "learning_rate": 3.8167938931297716e-07, "loss": 0.5238, "step": 40 }, { "epoch": 0.0, "grad_norm": 13.919665228665703, "learning_rate": 3.912213740458016e-07, "loss": 0.5284, "step": 41 }, { "epoch": 0.0, "grad_norm": 16.5442614053701, "learning_rate": 4.00763358778626e-07, "loss": 0.8258, "step": 42 }, { "epoch": 0.0, "grad_norm": 17.677781741077023, "learning_rate": 4.103053435114504e-07, "loss": 0.6064, "step": 43 }, { "epoch": 0.0, "grad_norm": 13.369616129007284, "learning_rate": 4.1984732824427486e-07, "loss": 0.507, "step": 44 }, { "epoch": 0.0, "grad_norm": 10.764102291056332, "learning_rate": 4.293893129770993e-07, "loss": 0.3782, "step": 45 }, { "epoch": 0.0, "grad_norm": 13.690470612645248, "learning_rate": 4.389312977099237e-07, "loss": 0.2553, "step": 46 }, { "epoch": 0.0, "grad_norm": 12.951376434583816, "learning_rate": 4.4847328244274813e-07, "loss": 0.962, "step": 47 }, { "epoch": 0.0, "grad_norm": 15.289319487971772, "learning_rate": 4.5801526717557257e-07, "loss": 0.73, "step": 48 }, { "epoch": 0.0, "grad_norm": 12.832349632180364, "learning_rate": 4.67557251908397e-07, "loss": 0.49, "step": 49 }, { "epoch": 0.0, "grad_norm": 29.890265614671694, "learning_rate": 4.770992366412214e-07, "loss": 0.5535, "step": 50 }, { "epoch": 0.0, "grad_norm": 14.893173015782342, "learning_rate": 4.866412213740458e-07, "loss": 0.6596, "step": 51 }, { "epoch": 0.0, "grad_norm": 12.838507331766124, "learning_rate": 4.961832061068702e-07, "loss": 0.2895, "step": 52 }, { "epoch": 0.0, "grad_norm": 9.007433047081518, "learning_rate": 5.057251908396947e-07, "loss": 0.4208, "step": 53 }, { "epoch": 0.0, "grad_norm": 3.057199055418722, "learning_rate": 5.152671755725191e-07, "loss": 0.0882, "step": 54 }, { "epoch": 0.0, "grad_norm": 13.768213450903449, "learning_rate": 5.248091603053435e-07, "loss": 0.7915, "step": 55 }, { "epoch": 0.0, "grad_norm": 16.728979100956934, "learning_rate": 5.34351145038168e-07, "loss": 0.4498, "step": 56 }, { "epoch": 0.0, "grad_norm": 10.452917168209048, "learning_rate": 5.438931297709924e-07, "loss": 0.3562, "step": 57 }, { "epoch": 0.0, "grad_norm": 13.423128897701044, "learning_rate": 5.534351145038169e-07, "loss": 0.2803, "step": 58 }, { "epoch": 0.0, "grad_norm": 8.718454007282462, "learning_rate": 5.629770992366412e-07, "loss": 0.5016, "step": 59 }, { "epoch": 0.0, "grad_norm": 16.220671493986316, "learning_rate": 5.725190839694656e-07, "loss": 0.7688, "step": 60 }, { "epoch": 0.0, "grad_norm": 8.281490912621944, "learning_rate": 5.820610687022901e-07, "loss": 0.657, "step": 61 }, { "epoch": 0.0, "grad_norm": 11.402106883459563, "learning_rate": 5.916030534351146e-07, "loss": 0.6175, "step": 62 }, { "epoch": 0.0, "grad_norm": 15.497929588599993, "learning_rate": 6.011450381679391e-07, "loss": 0.6824, "step": 63 }, { "epoch": 0.0, "grad_norm": 6.704703049651699, "learning_rate": 6.106870229007634e-07, "loss": 0.2249, "step": 64 }, { "epoch": 0.0, "grad_norm": 11.652049435291651, "learning_rate": 6.202290076335878e-07, "loss": 0.3392, "step": 65 }, { "epoch": 0.0, "grad_norm": 9.267762939411163, "learning_rate": 6.297709923664122e-07, "loss": 0.6022, "step": 66 }, { "epoch": 0.0, "grad_norm": 10.02026935557992, "learning_rate": 6.393129770992366e-07, "loss": 0.8172, "step": 67 }, { "epoch": 0.0, "grad_norm": 14.572258426105646, "learning_rate": 6.48854961832061e-07, "loss": 0.7418, "step": 68 }, { "epoch": 0.0, "grad_norm": 6.719049810773072, "learning_rate": 6.583969465648855e-07, "loss": 0.6507, "step": 69 }, { "epoch": 0.0, "grad_norm": 7.0590107831006605, "learning_rate": 6.6793893129771e-07, "loss": 0.5126, "step": 70 }, { "epoch": 0.0, "grad_norm": 22.62870777160956, "learning_rate": 6.774809160305345e-07, "loss": 0.3423, "step": 71 }, { "epoch": 0.0, "grad_norm": 16.718864540349465, "learning_rate": 6.870229007633589e-07, "loss": 0.6875, "step": 72 }, { "epoch": 0.0, "grad_norm": 15.561099560090314, "learning_rate": 6.965648854961833e-07, "loss": 0.4547, "step": 73 }, { "epoch": 0.0, "grad_norm": 8.974051739425576, "learning_rate": 7.061068702290078e-07, "loss": 0.8457, "step": 74 }, { "epoch": 0.0, "grad_norm": 4.770154207803899, "learning_rate": 7.15648854961832e-07, "loss": 0.2566, "step": 75 }, { "epoch": 0.0, "grad_norm": 7.341142361899362, "learning_rate": 7.251908396946565e-07, "loss": 0.4309, "step": 76 }, { "epoch": 0.0, "grad_norm": 13.495977296892663, "learning_rate": 7.34732824427481e-07, "loss": 0.3442, "step": 77 }, { "epoch": 0.0, "grad_norm": 10.835453256404575, "learning_rate": 7.442748091603054e-07, "loss": 0.205, "step": 78 }, { "epoch": 0.0, "grad_norm": 8.278862029487863, "learning_rate": 7.538167938931299e-07, "loss": 0.5472, "step": 79 }, { "epoch": 0.0, "grad_norm": 17.298781375190867, "learning_rate": 7.633587786259543e-07, "loss": 0.7141, "step": 80 }, { "epoch": 0.0, "grad_norm": 12.655217032016536, "learning_rate": 7.729007633587788e-07, "loss": 0.5763, "step": 81 }, { "epoch": 0.0, "grad_norm": 13.212207735378591, "learning_rate": 7.824427480916032e-07, "loss": 0.3343, "step": 82 }, { "epoch": 0.0, "grad_norm": 6.717501431157279, "learning_rate": 7.919847328244274e-07, "loss": 0.383, "step": 83 }, { "epoch": 0.0, "grad_norm": 9.18340308779644, "learning_rate": 8.01526717557252e-07, "loss": 0.4636, "step": 84 }, { "epoch": 0.0, "grad_norm": 11.626838784766612, "learning_rate": 8.110687022900764e-07, "loss": 0.4651, "step": 85 }, { "epoch": 0.0, "grad_norm": 10.05450700125113, "learning_rate": 8.206106870229009e-07, "loss": 0.5706, "step": 86 }, { "epoch": 0.0, "grad_norm": 18.939584950911488, "learning_rate": 8.301526717557253e-07, "loss": 0.5415, "step": 87 }, { "epoch": 0.0, "grad_norm": 15.299056149958725, "learning_rate": 8.396946564885497e-07, "loss": 0.9979, "step": 88 }, { "epoch": 0.0, "grad_norm": 14.80114168970095, "learning_rate": 8.492366412213742e-07, "loss": 0.4835, "step": 89 }, { "epoch": 0.0, "grad_norm": 6.570045275249116, "learning_rate": 8.587786259541986e-07, "loss": 0.1498, "step": 90 }, { "epoch": 0.0, "grad_norm": 8.156965787033196, "learning_rate": 8.68320610687023e-07, "loss": 0.7094, "step": 91 }, { "epoch": 0.0, "grad_norm": 13.598561805526016, "learning_rate": 8.778625954198474e-07, "loss": 0.7781, "step": 92 }, { "epoch": 0.0, "grad_norm": 7.337056132575648, "learning_rate": 8.874045801526718e-07, "loss": 0.2046, "step": 93 }, { "epoch": 0.0, "grad_norm": 6.69303151895285, "learning_rate": 8.969465648854963e-07, "loss": 0.3091, "step": 94 }, { "epoch": 0.0, "grad_norm": 5.996828592109914, "learning_rate": 9.064885496183207e-07, "loss": 0.7086, "step": 95 }, { "epoch": 0.0, "grad_norm": 6.005717414480776, "learning_rate": 9.160305343511451e-07, "loss": 0.3392, "step": 96 }, { "epoch": 0.0, "grad_norm": 6.577557935760423, "learning_rate": 9.255725190839696e-07, "loss": 0.3042, "step": 97 }, { "epoch": 0.0, "grad_norm": 5.62529851333289, "learning_rate": 9.35114503816794e-07, "loss": 0.1053, "step": 98 }, { "epoch": 0.0, "grad_norm": 3.1079915290002744, "learning_rate": 9.446564885496185e-07, "loss": 0.2476, "step": 99 }, { "epoch": 0.0, "grad_norm": 9.492365429627377, "learning_rate": 9.54198473282443e-07, "loss": 0.4156, "step": 100 }, { "epoch": 0.0, "grad_norm": 30.337254183495954, "learning_rate": 9.637404580152671e-07, "loss": 0.7147, "step": 101 }, { "epoch": 0.0, "grad_norm": 9.139381363214389, "learning_rate": 9.732824427480916e-07, "loss": 0.4541, "step": 102 }, { "epoch": 0.0, "grad_norm": 6.595051713788448, "learning_rate": 9.82824427480916e-07, "loss": 0.3729, "step": 103 }, { "epoch": 0.0, "grad_norm": 8.090398853216445, "learning_rate": 9.923664122137404e-07, "loss": 0.6013, "step": 104 }, { "epoch": 0.0, "grad_norm": 8.295012586513947, "learning_rate": 1.0019083969465649e-06, "loss": 0.4782, "step": 105 }, { "epoch": 0.0, "grad_norm": 6.537500849007137, "learning_rate": 1.0114503816793893e-06, "loss": 0.5183, "step": 106 }, { "epoch": 0.0, "grad_norm": 10.36283377189244, "learning_rate": 1.0209923664122138e-06, "loss": 0.4518, "step": 107 }, { "epoch": 0.0, "grad_norm": 14.862133841018577, "learning_rate": 1.0305343511450382e-06, "loss": 0.331, "step": 108 }, { "epoch": 0.0, "grad_norm": 11.39983197807836, "learning_rate": 1.0400763358778626e-06, "loss": 0.7702, "step": 109 }, { "epoch": 0.0, "grad_norm": 7.342348719887, "learning_rate": 1.049618320610687e-06, "loss": 0.2767, "step": 110 }, { "epoch": 0.0, "grad_norm": 8.629615875394475, "learning_rate": 1.0591603053435115e-06, "loss": 0.2088, "step": 111 }, { "epoch": 0.0, "grad_norm": 12.319188039453577, "learning_rate": 1.068702290076336e-06, "loss": 0.8217, "step": 112 }, { "epoch": 0.0, "grad_norm": 6.942091822646396, "learning_rate": 1.0782442748091604e-06, "loss": 0.1793, "step": 113 }, { "epoch": 0.0, "grad_norm": 7.322819267337869, "learning_rate": 1.0877862595419848e-06, "loss": 0.574, "step": 114 }, { "epoch": 0.0, "grad_norm": 11.859515806224175, "learning_rate": 1.0973282442748093e-06, "loss": 0.6202, "step": 115 }, { "epoch": 0.0, "grad_norm": 13.476917029983403, "learning_rate": 1.1068702290076337e-06, "loss": 0.6757, "step": 116 }, { "epoch": 0.0, "grad_norm": 10.489055060854676, "learning_rate": 1.1164122137404582e-06, "loss": 0.3365, "step": 117 }, { "epoch": 0.0, "grad_norm": 8.425255488022435, "learning_rate": 1.1259541984732824e-06, "loss": 0.3025, "step": 118 }, { "epoch": 0.0, "grad_norm": 13.942262391843991, "learning_rate": 1.1354961832061068e-06, "loss": 0.5635, "step": 119 }, { "epoch": 0.0, "grad_norm": 10.499165501811849, "learning_rate": 1.1450381679389313e-06, "loss": 0.8242, "step": 120 }, { "epoch": 0.0, "grad_norm": 4.679454638462124, "learning_rate": 1.1545801526717557e-06, "loss": 0.1634, "step": 121 }, { "epoch": 0.0, "grad_norm": 6.1586469447388685, "learning_rate": 1.1641221374045801e-06, "loss": 0.5359, "step": 122 }, { "epoch": 0.0, "grad_norm": 16.079714208122375, "learning_rate": 1.1736641221374046e-06, "loss": 1.0868, "step": 123 }, { "epoch": 0.0, "grad_norm": 11.904120467571106, "learning_rate": 1.1832061068702292e-06, "loss": 0.4149, "step": 124 }, { "epoch": 0.0, "grad_norm": 7.11880960414406, "learning_rate": 1.1927480916030537e-06, "loss": 0.0899, "step": 125 }, { "epoch": 0.0, "grad_norm": 5.524298960886062, "learning_rate": 1.2022900763358781e-06, "loss": 0.3039, "step": 126 }, { "epoch": 0.0, "grad_norm": 6.326640918158072, "learning_rate": 1.2118320610687023e-06, "loss": 0.5305, "step": 127 }, { "epoch": 0.0, "grad_norm": 11.841298758052929, "learning_rate": 1.2213740458015268e-06, "loss": 0.6823, "step": 128 }, { "epoch": 0.0, "grad_norm": 7.865332709645718, "learning_rate": 1.2309160305343512e-06, "loss": 0.5019, "step": 129 }, { "epoch": 0.0, "grad_norm": 4.455738625738411, "learning_rate": 1.2404580152671757e-06, "loss": 0.2804, "step": 130 }, { "epoch": 0.0, "grad_norm": 18.816983623329858, "learning_rate": 1.25e-06, "loss": 0.5206, "step": 131 }, { "epoch": 0.0, "grad_norm": 5.4427934071099235, "learning_rate": 1.2595419847328243e-06, "loss": 0.4138, "step": 132 }, { "epoch": 0.0, "grad_norm": 6.321771894095947, "learning_rate": 1.269083969465649e-06, "loss": 0.5054, "step": 133 }, { "epoch": 0.0, "grad_norm": 3.85074970575292, "learning_rate": 1.2786259541984732e-06, "loss": 0.188, "step": 134 }, { "epoch": 0.0, "grad_norm": 7.441980759277731, "learning_rate": 1.2881679389312979e-06, "loss": 0.6552, "step": 135 }, { "epoch": 0.0, "grad_norm": 5.257905004177898, "learning_rate": 1.297709923664122e-06, "loss": 0.3111, "step": 136 }, { "epoch": 0.0, "grad_norm": 5.534274637442045, "learning_rate": 1.3072519083969467e-06, "loss": 0.5844, "step": 137 }, { "epoch": 0.0, "grad_norm": 7.268507322161942, "learning_rate": 1.316793893129771e-06, "loss": 0.2098, "step": 138 }, { "epoch": 0.0, "grad_norm": 8.327566417977781, "learning_rate": 1.3263358778625956e-06, "loss": 0.4362, "step": 139 }, { "epoch": 0.0, "grad_norm": 7.882770579805998, "learning_rate": 1.33587786259542e-06, "loss": 0.2899, "step": 140 }, { "epoch": 0.0, "grad_norm": 8.982899844928887, "learning_rate": 1.3454198473282443e-06, "loss": 0.5584, "step": 141 }, { "epoch": 0.0, "grad_norm": 7.714634218871394, "learning_rate": 1.354961832061069e-06, "loss": 0.4142, "step": 142 }, { "epoch": 0.0, "grad_norm": 13.310444059695536, "learning_rate": 1.3645038167938932e-06, "loss": 0.4826, "step": 143 }, { "epoch": 0.0, "grad_norm": 14.313638033869951, "learning_rate": 1.3740458015267178e-06, "loss": 0.6142, "step": 144 }, { "epoch": 0.0, "grad_norm": 19.987602778511057, "learning_rate": 1.383587786259542e-06, "loss": 0.4687, "step": 145 }, { "epoch": 0.0, "grad_norm": 9.943820119211349, "learning_rate": 1.3931297709923667e-06, "loss": 0.4156, "step": 146 }, { "epoch": 0.0, "grad_norm": 7.990445690209404, "learning_rate": 1.402671755725191e-06, "loss": 0.3068, "step": 147 }, { "epoch": 0.0, "grad_norm": 6.073874195789443, "learning_rate": 1.4122137404580156e-06, "loss": 0.1898, "step": 148 }, { "epoch": 0.0, "grad_norm": 7.500305805329668, "learning_rate": 1.4217557251908398e-06, "loss": 0.7386, "step": 149 }, { "epoch": 0.0, "grad_norm": 8.901385025952367, "learning_rate": 1.431297709923664e-06, "loss": 0.4294, "step": 150 }, { "epoch": 0.0, "grad_norm": 8.843560853694514, "learning_rate": 1.4408396946564887e-06, "loss": 0.8194, "step": 151 }, { "epoch": 0.0, "grad_norm": 8.578981784077765, "learning_rate": 1.450381679389313e-06, "loss": 0.2544, "step": 152 }, { "epoch": 0.0, "grad_norm": 6.690940542321487, "learning_rate": 1.4599236641221376e-06, "loss": 0.6735, "step": 153 }, { "epoch": 0.0, "grad_norm": 2.807680217532988, "learning_rate": 1.469465648854962e-06, "loss": 0.1931, "step": 154 }, { "epoch": 0.0, "grad_norm": 5.0675035875079075, "learning_rate": 1.4790076335877864e-06, "loss": 0.6736, "step": 155 }, { "epoch": 0.0, "grad_norm": 11.290805647751315, "learning_rate": 1.4885496183206109e-06, "loss": 0.4744, "step": 156 }, { "epoch": 0.0, "grad_norm": 12.139982582092358, "learning_rate": 1.4980916030534353e-06, "loss": 0.73, "step": 157 }, { "epoch": 0.0, "grad_norm": 11.525381129948904, "learning_rate": 1.5076335877862598e-06, "loss": 0.5174, "step": 158 }, { "epoch": 0.0, "grad_norm": 6.148570891778875, "learning_rate": 1.517175572519084e-06, "loss": 0.6588, "step": 159 }, { "epoch": 0.0, "grad_norm": 6.669440582470577, "learning_rate": 1.5267175572519086e-06, "loss": 0.1843, "step": 160 }, { "epoch": 0.0, "grad_norm": 4.540391947258408, "learning_rate": 1.5362595419847329e-06, "loss": 0.0977, "step": 161 }, { "epoch": 0.0, "grad_norm": 21.905175513282945, "learning_rate": 1.5458015267175575e-06, "loss": 0.2583, "step": 162 }, { "epoch": 0.0, "grad_norm": 5.827183788071506, "learning_rate": 1.5553435114503817e-06, "loss": 0.6262, "step": 163 }, { "epoch": 0.0, "grad_norm": 12.217274452345961, "learning_rate": 1.5648854961832064e-06, "loss": 0.6146, "step": 164 }, { "epoch": 0.0, "grad_norm": 5.13828811169445, "learning_rate": 1.5744274809160306e-06, "loss": 0.4177, "step": 165 }, { "epoch": 0.0, "grad_norm": 16.712238690909132, "learning_rate": 1.5839694656488549e-06, "loss": 0.2592, "step": 166 }, { "epoch": 0.0, "grad_norm": 8.509735198422911, "learning_rate": 1.5935114503816795e-06, "loss": 0.9257, "step": 167 }, { "epoch": 0.0, "grad_norm": 12.678594912347977, "learning_rate": 1.603053435114504e-06, "loss": 0.2473, "step": 168 }, { "epoch": 0.0, "grad_norm": 12.805964892049573, "learning_rate": 1.6125954198473284e-06, "loss": 0.5479, "step": 169 }, { "epoch": 0.0, "grad_norm": 9.025065908429106, "learning_rate": 1.6221374045801528e-06, "loss": 0.5433, "step": 170 }, { "epoch": 0.0, "grad_norm": 7.559452609373045, "learning_rate": 1.6316793893129773e-06, "loss": 0.4386, "step": 171 }, { "epoch": 0.0, "grad_norm": 4.648292757233111, "learning_rate": 1.6412213740458017e-06, "loss": 0.2735, "step": 172 }, { "epoch": 0.0, "grad_norm": 15.00863557829282, "learning_rate": 1.6507633587786261e-06, "loss": 1.0064, "step": 173 }, { "epoch": 0.0, "grad_norm": 7.981778732387166, "learning_rate": 1.6603053435114506e-06, "loss": 0.4496, "step": 174 }, { "epoch": 0.01, "grad_norm": 7.562262854522844, "learning_rate": 1.6698473282442748e-06, "loss": 0.5142, "step": 175 }, { "epoch": 0.01, "grad_norm": 8.02518872666, "learning_rate": 1.6793893129770995e-06, "loss": 0.2208, "step": 176 }, { "epoch": 0.01, "grad_norm": 6.646002461385027, "learning_rate": 1.6889312977099237e-06, "loss": 0.4303, "step": 177 }, { "epoch": 0.01, "grad_norm": 14.727004407328618, "learning_rate": 1.6984732824427483e-06, "loss": 0.6025, "step": 178 }, { "epoch": 0.01, "grad_norm": 12.79208835979479, "learning_rate": 1.7080152671755726e-06, "loss": 0.5098, "step": 179 }, { "epoch": 0.01, "grad_norm": 10.177560368852916, "learning_rate": 1.7175572519083972e-06, "loss": 0.5403, "step": 180 }, { "epoch": 0.01, "grad_norm": 5.936253587501199, "learning_rate": 1.7270992366412214e-06, "loss": 0.2745, "step": 181 }, { "epoch": 0.01, "grad_norm": 6.704116783485999, "learning_rate": 1.736641221374046e-06, "loss": 0.4868, "step": 182 }, { "epoch": 0.01, "grad_norm": 7.678922752885029, "learning_rate": 1.7461832061068703e-06, "loss": 0.6133, "step": 183 }, { "epoch": 0.01, "grad_norm": 4.297342809007025, "learning_rate": 1.7557251908396948e-06, "loss": 0.3143, "step": 184 }, { "epoch": 0.01, "grad_norm": 9.916383007277476, "learning_rate": 1.7652671755725192e-06, "loss": 0.6086, "step": 185 }, { "epoch": 0.01, "grad_norm": 5.814128606554231, "learning_rate": 1.7748091603053436e-06, "loss": 0.5477, "step": 186 }, { "epoch": 0.01, "grad_norm": 8.766590629605664, "learning_rate": 1.784351145038168e-06, "loss": 0.6918, "step": 187 }, { "epoch": 0.01, "grad_norm": 5.772339299584916, "learning_rate": 1.7938931297709925e-06, "loss": 0.2571, "step": 188 }, { "epoch": 0.01, "grad_norm": 14.10927825083465, "learning_rate": 1.803435114503817e-06, "loss": 0.1511, "step": 189 }, { "epoch": 0.01, "grad_norm": 8.938646016306267, "learning_rate": 1.8129770992366414e-06, "loss": 0.3436, "step": 190 }, { "epoch": 0.01, "grad_norm": 10.217383803753918, "learning_rate": 1.8225190839694658e-06, "loss": 0.5396, "step": 191 }, { "epoch": 0.01, "grad_norm": 8.538473292175473, "learning_rate": 1.8320610687022903e-06, "loss": 0.5867, "step": 192 }, { "epoch": 0.01, "grad_norm": 5.194783348728046, "learning_rate": 1.8416030534351145e-06, "loss": 0.3013, "step": 193 }, { "epoch": 0.01, "grad_norm": 10.229380173536478, "learning_rate": 1.8511450381679392e-06, "loss": 0.3913, "step": 194 }, { "epoch": 0.01, "grad_norm": 6.891654873998932, "learning_rate": 1.8606870229007634e-06, "loss": 0.6917, "step": 195 }, { "epoch": 0.01, "grad_norm": 16.557871679286848, "learning_rate": 1.870229007633588e-06, "loss": 0.7315, "step": 196 }, { "epoch": 0.01, "grad_norm": 5.829890947516121, "learning_rate": 1.8797709923664123e-06, "loss": 0.0932, "step": 197 }, { "epoch": 0.01, "grad_norm": 6.812948089701964, "learning_rate": 1.889312977099237e-06, "loss": 0.3199, "step": 198 }, { "epoch": 0.01, "grad_norm": 16.694843405245933, "learning_rate": 1.8988549618320611e-06, "loss": 0.5089, "step": 199 }, { "epoch": 0.01, "grad_norm": 13.374600128185785, "learning_rate": 1.908396946564886e-06, "loss": 0.5667, "step": 200 }, { "epoch": 0.01, "grad_norm": 4.7603338917464635, "learning_rate": 1.9179389312977102e-06, "loss": 0.3522, "step": 201 }, { "epoch": 0.01, "grad_norm": 13.387445737869468, "learning_rate": 1.9274809160305343e-06, "loss": 0.4846, "step": 202 }, { "epoch": 0.01, "grad_norm": 6.655394091670447, "learning_rate": 1.937022900763359e-06, "loss": 0.1724, "step": 203 }, { "epoch": 0.01, "grad_norm": 10.056731424608907, "learning_rate": 1.946564885496183e-06, "loss": 0.6234, "step": 204 }, { "epoch": 0.01, "grad_norm": 11.939712399288954, "learning_rate": 1.956106870229008e-06, "loss": 0.5313, "step": 205 }, { "epoch": 0.01, "grad_norm": 8.585208018364925, "learning_rate": 1.965648854961832e-06, "loss": 0.327, "step": 206 }, { "epoch": 0.01, "grad_norm": 6.724733075088026, "learning_rate": 1.975190839694657e-06, "loss": 0.4, "step": 207 }, { "epoch": 0.01, "grad_norm": 7.859860839388365, "learning_rate": 1.984732824427481e-06, "loss": 0.296, "step": 208 }, { "epoch": 0.01, "grad_norm": 7.129630341009166, "learning_rate": 1.9942748091603058e-06, "loss": 0.2878, "step": 209 }, { "epoch": 0.01, "grad_norm": 13.892961028877263, "learning_rate": 2.0038167938931298e-06, "loss": 0.3679, "step": 210 }, { "epoch": 0.01, "grad_norm": 11.31856496523806, "learning_rate": 2.013358778625954e-06, "loss": 0.3448, "step": 211 }, { "epoch": 0.01, "grad_norm": 8.052726440277743, "learning_rate": 2.0229007633587786e-06, "loss": 0.2327, "step": 212 }, { "epoch": 0.01, "grad_norm": 5.790686128708501, "learning_rate": 2.032442748091603e-06, "loss": 0.1064, "step": 213 }, { "epoch": 0.01, "grad_norm": 13.103482873943644, "learning_rate": 2.0419847328244275e-06, "loss": 0.442, "step": 214 }, { "epoch": 0.01, "grad_norm": 6.624435112820175, "learning_rate": 2.051526717557252e-06, "loss": 0.2277, "step": 215 }, { "epoch": 0.01, "grad_norm": 11.76078106991673, "learning_rate": 2.0610687022900764e-06, "loss": 0.2329, "step": 216 }, { "epoch": 0.01, "grad_norm": 17.303009847430403, "learning_rate": 2.070610687022901e-06, "loss": 0.5925, "step": 217 }, { "epoch": 0.01, "grad_norm": 6.445420734768302, "learning_rate": 2.0801526717557253e-06, "loss": 0.4006, "step": 218 }, { "epoch": 0.01, "grad_norm": 8.665136630152688, "learning_rate": 2.0896946564885497e-06, "loss": 0.2364, "step": 219 }, { "epoch": 0.01, "grad_norm": 4.119468216510575, "learning_rate": 2.099236641221374e-06, "loss": 0.3081, "step": 220 }, { "epoch": 0.01, "grad_norm": 3.7067661974486072, "learning_rate": 2.1087786259541986e-06, "loss": 0.2343, "step": 221 }, { "epoch": 0.01, "grad_norm": 4.013868251924948, "learning_rate": 2.118320610687023e-06, "loss": 0.2081, "step": 222 }, { "epoch": 0.01, "grad_norm": 10.085263868746623, "learning_rate": 2.1278625954198475e-06, "loss": 0.5842, "step": 223 }, { "epoch": 0.01, "grad_norm": 7.808145392838687, "learning_rate": 2.137404580152672e-06, "loss": 0.5001, "step": 224 }, { "epoch": 0.01, "grad_norm": 8.566458295323972, "learning_rate": 2.1469465648854964e-06, "loss": 0.5007, "step": 225 }, { "epoch": 0.01, "grad_norm": 9.277834228399724, "learning_rate": 2.156488549618321e-06, "loss": 0.7321, "step": 226 }, { "epoch": 0.01, "grad_norm": 25.390800423823084, "learning_rate": 2.1660305343511452e-06, "loss": 0.3018, "step": 227 }, { "epoch": 0.01, "grad_norm": 6.144210816774936, "learning_rate": 2.1755725190839697e-06, "loss": 0.4682, "step": 228 }, { "epoch": 0.01, "grad_norm": 9.77046257134391, "learning_rate": 2.185114503816794e-06, "loss": 0.5446, "step": 229 }, { "epoch": 0.01, "grad_norm": 9.351622056253547, "learning_rate": 2.1946564885496186e-06, "loss": 0.473, "step": 230 }, { "epoch": 0.01, "grad_norm": 12.801715461695242, "learning_rate": 2.204198473282443e-06, "loss": 0.4284, "step": 231 }, { "epoch": 0.01, "grad_norm": 5.007391306382616, "learning_rate": 2.2137404580152674e-06, "loss": 0.5145, "step": 232 }, { "epoch": 0.01, "grad_norm": 5.32100656158209, "learning_rate": 2.223282442748092e-06, "loss": 0.4312, "step": 233 }, { "epoch": 0.01, "grad_norm": 7.582407010970664, "learning_rate": 2.2328244274809163e-06, "loss": 0.2825, "step": 234 }, { "epoch": 0.01, "grad_norm": 14.511989109401128, "learning_rate": 2.2423664122137408e-06, "loss": 0.5003, "step": 235 }, { "epoch": 0.01, "grad_norm": 8.664475110396182, "learning_rate": 2.2519083969465648e-06, "loss": 0.2663, "step": 236 }, { "epoch": 0.01, "grad_norm": 11.456897400556356, "learning_rate": 2.2614503816793896e-06, "loss": 0.4265, "step": 237 }, { "epoch": 0.01, "grad_norm": 6.246849295871935, "learning_rate": 2.2709923664122137e-06, "loss": 0.4576, "step": 238 }, { "epoch": 0.01, "grad_norm": 9.22341989460279, "learning_rate": 2.2805343511450385e-06, "loss": 0.4203, "step": 239 }, { "epoch": 0.01, "grad_norm": 7.773235928016242, "learning_rate": 2.2900763358778625e-06, "loss": 0.46, "step": 240 }, { "epoch": 0.01, "grad_norm": 5.791679501233414, "learning_rate": 2.2996183206106874e-06, "loss": 0.5495, "step": 241 }, { "epoch": 0.01, "grad_norm": 5.164616218924862, "learning_rate": 2.3091603053435114e-06, "loss": 0.1131, "step": 242 }, { "epoch": 0.01, "grad_norm": 7.809497256197851, "learning_rate": 2.3187022900763363e-06, "loss": 0.3466, "step": 243 }, { "epoch": 0.01, "grad_norm": 10.319395915471105, "learning_rate": 2.3282442748091603e-06, "loss": 0.5107, "step": 244 }, { "epoch": 0.01, "grad_norm": 11.553237566142883, "learning_rate": 2.3377862595419847e-06, "loss": 0.3743, "step": 245 }, { "epoch": 0.01, "grad_norm": 8.946046917347779, "learning_rate": 2.347328244274809e-06, "loss": 0.2813, "step": 246 }, { "epoch": 0.01, "grad_norm": 8.451108794288787, "learning_rate": 2.3568702290076336e-06, "loss": 0.5428, "step": 247 }, { "epoch": 0.01, "grad_norm": 6.570778992338893, "learning_rate": 2.3664122137404585e-06, "loss": 0.2076, "step": 248 }, { "epoch": 0.01, "grad_norm": 8.101496520550851, "learning_rate": 2.3759541984732825e-06, "loss": 0.4525, "step": 249 }, { "epoch": 0.01, "grad_norm": 10.326964528134898, "learning_rate": 2.3854961832061074e-06, "loss": 0.4428, "step": 250 }, { "epoch": 0.01, "grad_norm": 7.291808107003389, "learning_rate": 2.3950381679389314e-06, "loss": 0.5462, "step": 251 }, { "epoch": 0.01, "grad_norm": 12.487110587798492, "learning_rate": 2.4045801526717562e-06, "loss": 0.682, "step": 252 }, { "epoch": 0.01, "grad_norm": 10.490096553507232, "learning_rate": 2.4141221374045802e-06, "loss": 0.6414, "step": 253 }, { "epoch": 0.01, "grad_norm": 9.1738131553263, "learning_rate": 2.4236641221374047e-06, "loss": 0.7733, "step": 254 }, { "epoch": 0.01, "grad_norm": 28.54354618501743, "learning_rate": 2.433206106870229e-06, "loss": 0.6308, "step": 255 }, { "epoch": 0.01, "grad_norm": 12.474865018615073, "learning_rate": 2.4427480916030536e-06, "loss": 0.3026, "step": 256 }, { "epoch": 0.01, "grad_norm": 13.196644220630981, "learning_rate": 2.452290076335878e-06, "loss": 0.3815, "step": 257 }, { "epoch": 0.01, "grad_norm": 7.5399202323357075, "learning_rate": 2.4618320610687024e-06, "loss": 0.3289, "step": 258 }, { "epoch": 0.01, "grad_norm": 13.628337766250002, "learning_rate": 2.471374045801527e-06, "loss": 0.3473, "step": 259 }, { "epoch": 0.01, "grad_norm": 11.336620433802457, "learning_rate": 2.4809160305343513e-06, "loss": 0.9127, "step": 260 }, { "epoch": 0.01, "grad_norm": 8.775055516167896, "learning_rate": 2.4904580152671758e-06, "loss": 0.3617, "step": 261 }, { "epoch": 0.01, "grad_norm": 11.111225387197514, "learning_rate": 2.5e-06, "loss": 0.3659, "step": 262 }, { "epoch": 0.01, "grad_norm": 8.362684275787815, "learning_rate": 2.5095419847328246e-06, "loss": 0.4468, "step": 263 }, { "epoch": 0.01, "grad_norm": 7.058451884901859, "learning_rate": 2.5190839694656487e-06, "loss": 0.3273, "step": 264 }, { "epoch": 0.01, "grad_norm": 6.158359727643774, "learning_rate": 2.5286259541984735e-06, "loss": 0.4528, "step": 265 }, { "epoch": 0.01, "grad_norm": 5.707373303992646, "learning_rate": 2.538167938931298e-06, "loss": 0.3078, "step": 266 }, { "epoch": 0.01, "grad_norm": 4.304447672786978, "learning_rate": 2.5477099236641224e-06, "loss": 0.1659, "step": 267 }, { "epoch": 0.01, "grad_norm": 5.331850153603892, "learning_rate": 2.5572519083969464e-06, "loss": 0.2319, "step": 268 }, { "epoch": 0.01, "grad_norm": 9.424632717433635, "learning_rate": 2.5667938931297713e-06, "loss": 0.4431, "step": 269 }, { "epoch": 0.01, "grad_norm": 13.642616432480219, "learning_rate": 2.5763358778625957e-06, "loss": 0.1478, "step": 270 }, { "epoch": 0.01, "grad_norm": 6.881689112041465, "learning_rate": 2.58587786259542e-06, "loss": 0.4386, "step": 271 }, { "epoch": 0.01, "grad_norm": 5.023741195302719, "learning_rate": 2.595419847328244e-06, "loss": 0.2515, "step": 272 }, { "epoch": 0.01, "grad_norm": 8.82423625807526, "learning_rate": 2.6049618320610686e-06, "loss": 0.8651, "step": 273 }, { "epoch": 0.01, "grad_norm": 12.504110194613338, "learning_rate": 2.6145038167938935e-06, "loss": 0.6303, "step": 274 }, { "epoch": 0.01, "grad_norm": 6.278031544727943, "learning_rate": 2.624045801526718e-06, "loss": 0.5402, "step": 275 }, { "epoch": 0.01, "grad_norm": 11.591062496482508, "learning_rate": 2.633587786259542e-06, "loss": 0.4222, "step": 276 }, { "epoch": 0.01, "grad_norm": 62.20670771875493, "learning_rate": 2.6431297709923664e-06, "loss": 0.6725, "step": 277 }, { "epoch": 0.01, "grad_norm": 7.3220748164239335, "learning_rate": 2.6526717557251912e-06, "loss": 0.3947, "step": 278 }, { "epoch": 0.01, "grad_norm": 11.249448805127718, "learning_rate": 2.6622137404580157e-06, "loss": 0.5091, "step": 279 }, { "epoch": 0.01, "grad_norm": 4.989842970217764, "learning_rate": 2.67175572519084e-06, "loss": 0.2716, "step": 280 }, { "epoch": 0.01, "grad_norm": 5.734718000667528, "learning_rate": 2.681297709923664e-06, "loss": 0.2922, "step": 281 }, { "epoch": 0.01, "grad_norm": 4.854399534978511, "learning_rate": 2.6908396946564886e-06, "loss": 0.4904, "step": 282 }, { "epoch": 0.01, "grad_norm": 9.390299507377645, "learning_rate": 2.7003816793893134e-06, "loss": 0.3279, "step": 283 }, { "epoch": 0.01, "grad_norm": 16.255477862296644, "learning_rate": 2.709923664122138e-06, "loss": 0.8624, "step": 284 }, { "epoch": 0.01, "grad_norm": 15.496336596321536, "learning_rate": 2.719465648854962e-06, "loss": 0.8259, "step": 285 }, { "epoch": 0.01, "grad_norm": 7.970132146910048, "learning_rate": 2.7290076335877863e-06, "loss": 0.6483, "step": 286 }, { "epoch": 0.01, "grad_norm": 9.483961973042476, "learning_rate": 2.738549618320611e-06, "loss": 0.444, "step": 287 }, { "epoch": 0.01, "grad_norm": 9.905787797127186, "learning_rate": 2.7480916030534356e-06, "loss": 0.4776, "step": 288 }, { "epoch": 0.01, "grad_norm": 5.205894122834622, "learning_rate": 2.7576335877862596e-06, "loss": 0.1948, "step": 289 }, { "epoch": 0.01, "grad_norm": 6.46493211207551, "learning_rate": 2.767175572519084e-06, "loss": 0.2886, "step": 290 }, { "epoch": 0.01, "grad_norm": 6.202790758174683, "learning_rate": 2.7767175572519085e-06, "loss": 0.3608, "step": 291 }, { "epoch": 0.01, "grad_norm": 8.55016050355405, "learning_rate": 2.7862595419847334e-06, "loss": 0.5746, "step": 292 }, { "epoch": 0.01, "grad_norm": 8.371491736467151, "learning_rate": 2.7958015267175574e-06, "loss": 0.4912, "step": 293 }, { "epoch": 0.01, "grad_norm": 13.510389639249704, "learning_rate": 2.805343511450382e-06, "loss": 0.6147, "step": 294 }, { "epoch": 0.01, "grad_norm": 5.5656683824273845, "learning_rate": 2.8148854961832063e-06, "loss": 0.5481, "step": 295 }, { "epoch": 0.01, "grad_norm": 6.316886548599491, "learning_rate": 2.824427480916031e-06, "loss": 0.1301, "step": 296 }, { "epoch": 0.01, "grad_norm": 12.172533911130705, "learning_rate": 2.833969465648855e-06, "loss": 0.6444, "step": 297 }, { "epoch": 0.01, "grad_norm": 16.98231297305059, "learning_rate": 2.8435114503816796e-06, "loss": 0.2971, "step": 298 }, { "epoch": 0.01, "grad_norm": 6.54306239730091, "learning_rate": 2.853053435114504e-06, "loss": 0.178, "step": 299 }, { "epoch": 0.01, "grad_norm": 8.580030997338135, "learning_rate": 2.862595419847328e-06, "loss": 0.4388, "step": 300 }, { "epoch": 0.01, "grad_norm": 8.610268769715795, "learning_rate": 2.872137404580153e-06, "loss": 0.3938, "step": 301 }, { "epoch": 0.01, "grad_norm": 4.350533409957521, "learning_rate": 2.8816793893129774e-06, "loss": 0.1335, "step": 302 }, { "epoch": 0.01, "grad_norm": 5.496114658947657, "learning_rate": 2.891221374045802e-06, "loss": 0.302, "step": 303 }, { "epoch": 0.01, "grad_norm": 10.28302635917635, "learning_rate": 2.900763358778626e-06, "loss": 0.2788, "step": 304 }, { "epoch": 0.01, "grad_norm": 9.005975699764493, "learning_rate": 2.9103053435114507e-06, "loss": 0.4355, "step": 305 }, { "epoch": 0.01, "grad_norm": 11.242667457668606, "learning_rate": 2.919847328244275e-06, "loss": 0.4546, "step": 306 }, { "epoch": 0.01, "grad_norm": 4.594785573582432, "learning_rate": 2.9293893129770996e-06, "loss": 0.096, "step": 307 }, { "epoch": 0.01, "grad_norm": 13.023047777190346, "learning_rate": 2.938931297709924e-06, "loss": 0.3545, "step": 308 }, { "epoch": 0.01, "grad_norm": 4.721981937553245, "learning_rate": 2.948473282442748e-06, "loss": 0.127, "step": 309 }, { "epoch": 0.01, "grad_norm": 15.250785025731037, "learning_rate": 2.958015267175573e-06, "loss": 0.7549, "step": 310 }, { "epoch": 0.01, "grad_norm": 31.56953815510257, "learning_rate": 2.9675572519083973e-06, "loss": 0.8759, "step": 311 }, { "epoch": 0.01, "grad_norm": 8.926940655690215, "learning_rate": 2.9770992366412218e-06, "loss": 0.671, "step": 312 }, { "epoch": 0.01, "grad_norm": 7.921073557023465, "learning_rate": 2.9866412213740458e-06, "loss": 0.6232, "step": 313 }, { "epoch": 0.01, "grad_norm": 5.281028246315191, "learning_rate": 2.9961832061068706e-06, "loss": 0.4361, "step": 314 }, { "epoch": 0.01, "grad_norm": 11.289426086273025, "learning_rate": 3.005725190839695e-06, "loss": 0.3363, "step": 315 }, { "epoch": 0.01, "grad_norm": 6.889258874251736, "learning_rate": 3.0152671755725195e-06, "loss": 0.5807, "step": 316 }, { "epoch": 0.01, "grad_norm": 3.3222919084068807, "learning_rate": 3.0248091603053435e-06, "loss": 0.1972, "step": 317 }, { "epoch": 0.01, "grad_norm": 6.55734086466405, "learning_rate": 3.034351145038168e-06, "loss": 0.4885, "step": 318 }, { "epoch": 0.01, "grad_norm": 6.808555370929554, "learning_rate": 3.043893129770993e-06, "loss": 0.6575, "step": 319 }, { "epoch": 0.01, "grad_norm": 7.655311682676503, "learning_rate": 3.0534351145038173e-06, "loss": 0.4846, "step": 320 }, { "epoch": 0.01, "grad_norm": 6.818529662882669, "learning_rate": 3.0629770992366413e-06, "loss": 0.5876, "step": 321 }, { "epoch": 0.01, "grad_norm": 8.559969897172707, "learning_rate": 3.0725190839694657e-06, "loss": 0.5201, "step": 322 }, { "epoch": 0.01, "grad_norm": 8.083894952619094, "learning_rate": 3.0820610687022906e-06, "loss": 0.251, "step": 323 }, { "epoch": 0.01, "grad_norm": 7.232689943426928, "learning_rate": 3.091603053435115e-06, "loss": 0.8014, "step": 324 }, { "epoch": 0.01, "grad_norm": 7.835890947623901, "learning_rate": 3.101145038167939e-06, "loss": 0.604, "step": 325 }, { "epoch": 0.01, "grad_norm": 14.285694590963685, "learning_rate": 3.1106870229007635e-06, "loss": 0.4092, "step": 326 }, { "epoch": 0.01, "grad_norm": 6.5656972090442025, "learning_rate": 3.120229007633588e-06, "loss": 0.4082, "step": 327 }, { "epoch": 0.01, "grad_norm": 21.856024855560477, "learning_rate": 3.129770992366413e-06, "loss": 0.4777, "step": 328 }, { "epoch": 0.01, "grad_norm": 14.658086730779555, "learning_rate": 3.139312977099237e-06, "loss": 0.6115, "step": 329 }, { "epoch": 0.01, "grad_norm": 7.819551200280992, "learning_rate": 3.1488549618320612e-06, "loss": 0.1918, "step": 330 }, { "epoch": 0.01, "grad_norm": 11.678251472913212, "learning_rate": 3.1583969465648857e-06, "loss": 0.4083, "step": 331 }, { "epoch": 0.01, "grad_norm": 4.766715653485669, "learning_rate": 3.1679389312977097e-06, "loss": 0.5719, "step": 332 }, { "epoch": 0.01, "grad_norm": 7.423901026407218, "learning_rate": 3.1774809160305346e-06, "loss": 0.1648, "step": 333 }, { "epoch": 0.01, "grad_norm": 9.765241984676422, "learning_rate": 3.187022900763359e-06, "loss": 0.3681, "step": 334 }, { "epoch": 0.01, "grad_norm": 4.340259892575183, "learning_rate": 3.1965648854961834e-06, "loss": 0.2225, "step": 335 }, { "epoch": 0.01, "grad_norm": 6.406460642259143, "learning_rate": 3.206106870229008e-06, "loss": 0.4357, "step": 336 }, { "epoch": 0.01, "grad_norm": 9.431465291068037, "learning_rate": 3.2156488549618327e-06, "loss": 0.6791, "step": 337 }, { "epoch": 0.01, "grad_norm": 10.861578101752693, "learning_rate": 3.2251908396946568e-06, "loss": 0.7174, "step": 338 }, { "epoch": 0.01, "grad_norm": 5.270900539929686, "learning_rate": 3.234732824427481e-06, "loss": 0.4115, "step": 339 }, { "epoch": 0.01, "grad_norm": 14.33334054318328, "learning_rate": 3.2442748091603056e-06, "loss": 0.2904, "step": 340 }, { "epoch": 0.01, "grad_norm": 4.875178162669271, "learning_rate": 3.2538167938931297e-06, "loss": 0.3414, "step": 341 }, { "epoch": 0.01, "grad_norm": 4.970142193454503, "learning_rate": 3.2633587786259545e-06, "loss": 0.2696, "step": 342 }, { "epoch": 0.01, "grad_norm": 5.889940338415491, "learning_rate": 3.272900763358779e-06, "loss": 0.5144, "step": 343 }, { "epoch": 0.01, "grad_norm": 8.167511630299591, "learning_rate": 3.2824427480916034e-06, "loss": 0.2489, "step": 344 }, { "epoch": 0.01, "grad_norm": 7.660235772700032, "learning_rate": 3.2919847328244274e-06, "loss": 0.33, "step": 345 }, { "epoch": 0.01, "grad_norm": 11.60613422161442, "learning_rate": 3.3015267175572523e-06, "loss": 0.4818, "step": 346 }, { "epoch": 0.01, "grad_norm": 7.180292297061254, "learning_rate": 3.3110687022900767e-06, "loss": 0.3942, "step": 347 }, { "epoch": 0.01, "grad_norm": 9.608162755408749, "learning_rate": 3.320610687022901e-06, "loss": 0.3956, "step": 348 }, { "epoch": 0.01, "grad_norm": 18.63662049989822, "learning_rate": 3.330152671755725e-06, "loss": 0.4006, "step": 349 }, { "epoch": 0.01, "grad_norm": 9.171652380144057, "learning_rate": 3.3396946564885496e-06, "loss": 0.2754, "step": 350 }, { "epoch": 0.01, "grad_norm": 6.917702375180708, "learning_rate": 3.3492366412213745e-06, "loss": 0.823, "step": 351 }, { "epoch": 0.01, "grad_norm": 15.752922120062035, "learning_rate": 3.358778625954199e-06, "loss": 0.5142, "step": 352 }, { "epoch": 0.01, "grad_norm": 7.012587945560032, "learning_rate": 3.368320610687023e-06, "loss": 0.3255, "step": 353 }, { "epoch": 0.01, "grad_norm": 11.976762883151721, "learning_rate": 3.3778625954198474e-06, "loss": 0.3232, "step": 354 }, { "epoch": 0.01, "grad_norm": 7.012260802842237, "learning_rate": 3.3874045801526722e-06, "loss": 0.5301, "step": 355 }, { "epoch": 0.01, "grad_norm": 8.444922340266361, "learning_rate": 3.3969465648854967e-06, "loss": 0.9746, "step": 356 }, { "epoch": 0.01, "grad_norm": 6.744612875278448, "learning_rate": 3.4064885496183207e-06, "loss": 0.5573, "step": 357 }, { "epoch": 0.01, "grad_norm": 2.6613344224625455, "learning_rate": 3.416030534351145e-06, "loss": 0.3383, "step": 358 }, { "epoch": 0.01, "grad_norm": 20.803612584225977, "learning_rate": 3.4255725190839696e-06, "loss": 0.2707, "step": 359 }, { "epoch": 0.01, "grad_norm": 15.660622266047625, "learning_rate": 3.4351145038167944e-06, "loss": 0.2154, "step": 360 }, { "epoch": 0.01, "grad_norm": 6.73198376041523, "learning_rate": 3.4446564885496185e-06, "loss": 0.4907, "step": 361 }, { "epoch": 0.01, "grad_norm": 5.7732482796556885, "learning_rate": 3.454198473282443e-06, "loss": 0.7616, "step": 362 }, { "epoch": 0.01, "grad_norm": 9.096982243861389, "learning_rate": 3.4637404580152673e-06, "loss": 0.712, "step": 363 }, { "epoch": 0.01, "grad_norm": 15.27124853814374, "learning_rate": 3.473282442748092e-06, "loss": 0.7203, "step": 364 }, { "epoch": 0.01, "grad_norm": 8.603213977180395, "learning_rate": 3.4828244274809166e-06, "loss": 0.5007, "step": 365 }, { "epoch": 0.01, "grad_norm": 8.161499029827755, "learning_rate": 3.4923664122137406e-06, "loss": 0.5073, "step": 366 }, { "epoch": 0.01, "grad_norm": 8.42664158389829, "learning_rate": 3.501908396946565e-06, "loss": 0.5109, "step": 367 }, { "epoch": 0.01, "grad_norm": 14.804825223015408, "learning_rate": 3.5114503816793895e-06, "loss": 0.5061, "step": 368 }, { "epoch": 0.01, "grad_norm": 7.02065187063129, "learning_rate": 3.5209923664122144e-06, "loss": 0.0981, "step": 369 }, { "epoch": 0.01, "grad_norm": 9.660691360232882, "learning_rate": 3.5305343511450384e-06, "loss": 0.4522, "step": 370 }, { "epoch": 0.01, "grad_norm": 6.2060835770865745, "learning_rate": 3.540076335877863e-06, "loss": 0.2519, "step": 371 }, { "epoch": 0.01, "grad_norm": 9.108629722926622, "learning_rate": 3.5496183206106873e-06, "loss": 0.5065, "step": 372 }, { "epoch": 0.01, "grad_norm": 11.205413456939766, "learning_rate": 3.559160305343512e-06, "loss": 0.3242, "step": 373 }, { "epoch": 0.01, "grad_norm": 6.095675355016247, "learning_rate": 3.568702290076336e-06, "loss": 0.3093, "step": 374 }, { "epoch": 0.01, "grad_norm": 7.335113316592221, "learning_rate": 3.5782442748091606e-06, "loss": 0.4143, "step": 375 }, { "epoch": 0.01, "grad_norm": 8.858716290995034, "learning_rate": 3.587786259541985e-06, "loss": 0.3514, "step": 376 }, { "epoch": 0.01, "grad_norm": 7.661908981654902, "learning_rate": 3.597328244274809e-06, "loss": 0.3463, "step": 377 }, { "epoch": 0.01, "grad_norm": 8.34143131342016, "learning_rate": 3.606870229007634e-06, "loss": 0.5566, "step": 378 }, { "epoch": 0.01, "grad_norm": 8.223395771469423, "learning_rate": 3.6164122137404584e-06, "loss": 0.3968, "step": 379 }, { "epoch": 0.01, "grad_norm": 10.726393662893067, "learning_rate": 3.625954198473283e-06, "loss": 0.7081, "step": 380 }, { "epoch": 0.01, "grad_norm": 10.885257662595935, "learning_rate": 3.635496183206107e-06, "loss": 0.5707, "step": 381 }, { "epoch": 0.01, "grad_norm": 7.9272171757778285, "learning_rate": 3.6450381679389317e-06, "loss": 0.5674, "step": 382 }, { "epoch": 0.01, "grad_norm": 4.027624259230025, "learning_rate": 3.654580152671756e-06, "loss": 0.2218, "step": 383 }, { "epoch": 0.01, "grad_norm": 9.959734723645045, "learning_rate": 3.6641221374045806e-06, "loss": 0.7639, "step": 384 }, { "epoch": 0.01, "grad_norm": 7.39928466071521, "learning_rate": 3.6736641221374046e-06, "loss": 0.6534, "step": 385 }, { "epoch": 0.01, "grad_norm": 7.002295662603, "learning_rate": 3.683206106870229e-06, "loss": 0.5799, "step": 386 }, { "epoch": 0.01, "grad_norm": 31.715244479090323, "learning_rate": 3.692748091603054e-06, "loss": 0.4894, "step": 387 }, { "epoch": 0.01, "grad_norm": 3.019755287297791, "learning_rate": 3.7022900763358783e-06, "loss": 0.1358, "step": 388 }, { "epoch": 0.01, "grad_norm": 6.667092389183108, "learning_rate": 3.7118320610687023e-06, "loss": 0.4287, "step": 389 }, { "epoch": 0.01, "grad_norm": 7.39898982498948, "learning_rate": 3.7213740458015268e-06, "loss": 0.5812, "step": 390 }, { "epoch": 0.01, "grad_norm": 5.172715743955274, "learning_rate": 3.7309160305343516e-06, "loss": 0.3848, "step": 391 }, { "epoch": 0.01, "grad_norm": 10.284147045723765, "learning_rate": 3.740458015267176e-06, "loss": 0.385, "step": 392 }, { "epoch": 0.01, "grad_norm": 11.125172988746783, "learning_rate": 3.7500000000000005e-06, "loss": 0.4465, "step": 393 }, { "epoch": 0.01, "grad_norm": 8.392303923951845, "learning_rate": 3.7595419847328245e-06, "loss": 0.4349, "step": 394 }, { "epoch": 0.01, "grad_norm": 9.365554399959322, "learning_rate": 3.769083969465649e-06, "loss": 0.7342, "step": 395 }, { "epoch": 0.01, "grad_norm": 11.278010783009549, "learning_rate": 3.778625954198474e-06, "loss": 0.3145, "step": 396 }, { "epoch": 0.01, "grad_norm": 5.585804736286944, "learning_rate": 3.7881679389312983e-06, "loss": 0.3955, "step": 397 }, { "epoch": 0.01, "grad_norm": 11.8628838013765, "learning_rate": 3.7977099236641223e-06, "loss": 0.459, "step": 398 }, { "epoch": 0.01, "grad_norm": 8.920179536663406, "learning_rate": 3.8072519083969467e-06, "loss": 0.2759, "step": 399 }, { "epoch": 0.01, "grad_norm": 9.407241211432979, "learning_rate": 3.816793893129772e-06, "loss": 0.5528, "step": 400 }, { "epoch": 0.01, "grad_norm": 8.667240588429781, "learning_rate": 3.826335877862596e-06, "loss": 0.608, "step": 401 }, { "epoch": 0.01, "grad_norm": 12.35708438755507, "learning_rate": 3.8358778625954205e-06, "loss": 0.5279, "step": 402 }, { "epoch": 0.01, "grad_norm": 7.308341514846705, "learning_rate": 3.845419847328244e-06, "loss": 0.4725, "step": 403 }, { "epoch": 0.01, "grad_norm": 7.972344997522466, "learning_rate": 3.8549618320610685e-06, "loss": 0.7313, "step": 404 }, { "epoch": 0.01, "grad_norm": 9.533566402929694, "learning_rate": 3.864503816793894e-06, "loss": 0.5899, "step": 405 }, { "epoch": 0.01, "grad_norm": 10.061418558140776, "learning_rate": 3.874045801526718e-06, "loss": 0.3698, "step": 406 }, { "epoch": 0.01, "grad_norm": 30.270640779334826, "learning_rate": 3.883587786259542e-06, "loss": 0.5983, "step": 407 }, { "epoch": 0.01, "grad_norm": 15.457270715115191, "learning_rate": 3.893129770992366e-06, "loss": 0.474, "step": 408 }, { "epoch": 0.01, "grad_norm": 7.446422866369547, "learning_rate": 3.9026717557251916e-06, "loss": 0.4419, "step": 409 }, { "epoch": 0.01, "grad_norm": 5.739634001889578, "learning_rate": 3.912213740458016e-06, "loss": 0.3505, "step": 410 }, { "epoch": 0.01, "grad_norm": 7.548251008825576, "learning_rate": 3.9217557251908404e-06, "loss": 0.7808, "step": 411 }, { "epoch": 0.01, "grad_norm": 8.51032236172282, "learning_rate": 3.931297709923664e-06, "loss": 0.6612, "step": 412 }, { "epoch": 0.01, "grad_norm": 6.140538494092885, "learning_rate": 3.9408396946564885e-06, "loss": 0.3488, "step": 413 }, { "epoch": 0.01, "grad_norm": 11.083186576464401, "learning_rate": 3.950381679389314e-06, "loss": 0.8731, "step": 414 }, { "epoch": 0.01, "grad_norm": 8.763640915280588, "learning_rate": 3.959923664122138e-06, "loss": 0.6886, "step": 415 }, { "epoch": 0.01, "grad_norm": 10.651211260476792, "learning_rate": 3.969465648854962e-06, "loss": 0.4007, "step": 416 }, { "epoch": 0.01, "grad_norm": 7.882030073943378, "learning_rate": 3.979007633587786e-06, "loss": 0.4137, "step": 417 }, { "epoch": 0.01, "grad_norm": 8.894220252080398, "learning_rate": 3.9885496183206115e-06, "loss": 0.5427, "step": 418 }, { "epoch": 0.01, "grad_norm": 7.20859530051903, "learning_rate": 3.998091603053436e-06, "loss": 0.3569, "step": 419 }, { "epoch": 0.01, "grad_norm": 3.7649278747317014, "learning_rate": 4.0076335877862595e-06, "loss": 0.1589, "step": 420 }, { "epoch": 0.01, "grad_norm": 7.247642331049849, "learning_rate": 4.017175572519084e-06, "loss": 0.2732, "step": 421 }, { "epoch": 0.01, "grad_norm": 5.709392750946829, "learning_rate": 4.026717557251908e-06, "loss": 0.2477, "step": 422 }, { "epoch": 0.01, "grad_norm": 5.243165244778922, "learning_rate": 4.036259541984734e-06, "loss": 0.5734, "step": 423 }, { "epoch": 0.01, "grad_norm": 7.6518437376523325, "learning_rate": 4.045801526717557e-06, "loss": 0.5097, "step": 424 }, { "epoch": 0.01, "grad_norm": 5.077548560657371, "learning_rate": 4.055343511450382e-06, "loss": 0.1793, "step": 425 }, { "epoch": 0.01, "grad_norm": 7.228958540970245, "learning_rate": 4.064885496183206e-06, "loss": 0.4745, "step": 426 }, { "epoch": 0.01, "grad_norm": 7.260208834752534, "learning_rate": 4.0744274809160315e-06, "loss": 0.5121, "step": 427 }, { "epoch": 0.01, "grad_norm": 10.329407766671846, "learning_rate": 4.083969465648855e-06, "loss": 0.4543, "step": 428 }, { "epoch": 0.01, "grad_norm": 7.58044649726521, "learning_rate": 4.0935114503816795e-06, "loss": 0.2754, "step": 429 }, { "epoch": 0.01, "grad_norm": 5.696860439263096, "learning_rate": 4.103053435114504e-06, "loss": 0.3317, "step": 430 }, { "epoch": 0.01, "grad_norm": 6.15582595371692, "learning_rate": 4.112595419847328e-06, "loss": 0.2081, "step": 431 }, { "epoch": 0.01, "grad_norm": 3.201143703371644, "learning_rate": 4.122137404580153e-06, "loss": 0.0927, "step": 432 }, { "epoch": 0.01, "grad_norm": 8.403632227923374, "learning_rate": 4.131679389312977e-06, "loss": 0.4062, "step": 433 }, { "epoch": 0.01, "grad_norm": 11.0232331968715, "learning_rate": 4.141221374045802e-06, "loss": 0.4291, "step": 434 }, { "epoch": 0.01, "grad_norm": 9.198209994784545, "learning_rate": 4.150763358778626e-06, "loss": 0.1563, "step": 435 }, { "epoch": 0.01, "grad_norm": 7.973096013965503, "learning_rate": 4.1603053435114506e-06, "loss": 0.52, "step": 436 }, { "epoch": 0.01, "grad_norm": 13.609805392654508, "learning_rate": 4.169847328244275e-06, "loss": 0.5353, "step": 437 }, { "epoch": 0.01, "grad_norm": 7.188598018201126, "learning_rate": 4.1793893129770995e-06, "loss": 0.4103, "step": 438 }, { "epoch": 0.01, "grad_norm": 6.450077510338444, "learning_rate": 4.188931297709924e-06, "loss": 0.4398, "step": 439 }, { "epoch": 0.01, "grad_norm": 8.305614861057238, "learning_rate": 4.198473282442748e-06, "loss": 0.3963, "step": 440 }, { "epoch": 0.01, "grad_norm": 20.708448055366592, "learning_rate": 4.208015267175573e-06, "loss": 0.6686, "step": 441 }, { "epoch": 0.01, "grad_norm": 5.098860441313117, "learning_rate": 4.217557251908397e-06, "loss": 0.3339, "step": 442 }, { "epoch": 0.01, "grad_norm": 8.878131152925974, "learning_rate": 4.227099236641222e-06, "loss": 0.3338, "step": 443 }, { "epoch": 0.01, "grad_norm": 4.506491045913426, "learning_rate": 4.236641221374046e-06, "loss": 0.2331, "step": 444 }, { "epoch": 0.01, "grad_norm": 9.59358786778335, "learning_rate": 4.2461832061068705e-06, "loss": 0.2357, "step": 445 }, { "epoch": 0.01, "grad_norm": 12.14459010588776, "learning_rate": 4.255725190839695e-06, "loss": 0.4302, "step": 446 }, { "epoch": 0.01, "grad_norm": 8.795935138478862, "learning_rate": 4.265267175572519e-06, "loss": 0.3199, "step": 447 }, { "epoch": 0.01, "grad_norm": 8.570904025421605, "learning_rate": 4.274809160305344e-06, "loss": 0.5359, "step": 448 }, { "epoch": 0.01, "grad_norm": 7.731584265104603, "learning_rate": 4.284351145038168e-06, "loss": 0.6554, "step": 449 }, { "epoch": 0.01, "grad_norm": 5.974058257389453, "learning_rate": 4.293893129770993e-06, "loss": 0.2233, "step": 450 }, { "epoch": 0.01, "grad_norm": 13.193958671506252, "learning_rate": 4.303435114503817e-06, "loss": 0.7336, "step": 451 }, { "epoch": 0.01, "grad_norm": 9.161002790624059, "learning_rate": 4.312977099236642e-06, "loss": 0.5249, "step": 452 }, { "epoch": 0.01, "grad_norm": 7.906774691936829, "learning_rate": 4.322519083969466e-06, "loss": 0.2853, "step": 453 }, { "epoch": 0.01, "grad_norm": 15.700668947583454, "learning_rate": 4.3320610687022905e-06, "loss": 0.5186, "step": 454 }, { "epoch": 0.01, "grad_norm": 6.512102963577875, "learning_rate": 4.341603053435115e-06, "loss": 0.3613, "step": 455 }, { "epoch": 0.01, "grad_norm": 5.272327673652137, "learning_rate": 4.351145038167939e-06, "loss": 0.3857, "step": 456 }, { "epoch": 0.01, "grad_norm": 9.798575465150035, "learning_rate": 4.360687022900764e-06, "loss": 0.5318, "step": 457 }, { "epoch": 0.01, "grad_norm": 9.06067086538087, "learning_rate": 4.370229007633588e-06, "loss": 0.5443, "step": 458 }, { "epoch": 0.01, "grad_norm": 4.66565721811156, "learning_rate": 4.379770992366413e-06, "loss": 0.4781, "step": 459 }, { "epoch": 0.01, "grad_norm": 6.293616537972796, "learning_rate": 4.389312977099237e-06, "loss": 0.3733, "step": 460 }, { "epoch": 0.01, "grad_norm": 3.265046191597086, "learning_rate": 4.3988549618320616e-06, "loss": 0.1322, "step": 461 }, { "epoch": 0.01, "grad_norm": 10.342573208437996, "learning_rate": 4.408396946564886e-06, "loss": 0.7432, "step": 462 }, { "epoch": 0.01, "grad_norm": 17.54623227118482, "learning_rate": 4.41793893129771e-06, "loss": 0.3464, "step": 463 }, { "epoch": 0.01, "grad_norm": 5.340106033994075, "learning_rate": 4.427480916030535e-06, "loss": 0.3645, "step": 464 }, { "epoch": 0.01, "grad_norm": 43.727697458918946, "learning_rate": 4.437022900763359e-06, "loss": 0.4288, "step": 465 }, { "epoch": 0.01, "grad_norm": 7.160679191976291, "learning_rate": 4.446564885496184e-06, "loss": 0.5386, "step": 466 }, { "epoch": 0.01, "grad_norm": 3.234941497432938, "learning_rate": 4.456106870229008e-06, "loss": 0.3853, "step": 467 }, { "epoch": 0.01, "grad_norm": 2.1407094228761916, "learning_rate": 4.465648854961833e-06, "loss": 0.1032, "step": 468 }, { "epoch": 0.01, "grad_norm": 16.64195929537273, "learning_rate": 4.475190839694657e-06, "loss": 0.7037, "step": 469 }, { "epoch": 0.01, "grad_norm": 3.09029222168327, "learning_rate": 4.4847328244274815e-06, "loss": 0.2064, "step": 470 }, { "epoch": 0.01, "grad_norm": 6.600906249163193, "learning_rate": 4.494274809160306e-06, "loss": 0.2257, "step": 471 }, { "epoch": 0.01, "grad_norm": 14.213704045944128, "learning_rate": 4.5038167938931296e-06, "loss": 0.3648, "step": 472 }, { "epoch": 0.01, "grad_norm": 9.192354022898586, "learning_rate": 4.513358778625955e-06, "loss": 0.5148, "step": 473 }, { "epoch": 0.01, "grad_norm": 9.602828014584325, "learning_rate": 4.522900763358779e-06, "loss": 1.0047, "step": 474 }, { "epoch": 0.01, "grad_norm": 19.839406897385796, "learning_rate": 4.532442748091604e-06, "loss": 0.4393, "step": 475 }, { "epoch": 0.01, "grad_norm": 8.075051171108182, "learning_rate": 4.541984732824427e-06, "loss": 0.1005, "step": 476 }, { "epoch": 0.01, "grad_norm": 5.888397888591298, "learning_rate": 4.551526717557253e-06, "loss": 0.4769, "step": 477 }, { "epoch": 0.01, "grad_norm": 5.109368980474163, "learning_rate": 4.561068702290077e-06, "loss": 0.3142, "step": 478 }, { "epoch": 0.01, "grad_norm": 8.00972600637338, "learning_rate": 4.5706106870229015e-06, "loss": 0.3723, "step": 479 }, { "epoch": 0.01, "grad_norm": 9.584109603844194, "learning_rate": 4.580152671755725e-06, "loss": 0.7101, "step": 480 }, { "epoch": 0.01, "grad_norm": 5.480002809649116, "learning_rate": 4.5896946564885495e-06, "loss": 0.3498, "step": 481 }, { "epoch": 0.01, "grad_norm": 11.143038751708186, "learning_rate": 4.599236641221375e-06, "loss": 0.5735, "step": 482 }, { "epoch": 0.01, "grad_norm": 14.415877892115715, "learning_rate": 4.608778625954199e-06, "loss": 0.4314, "step": 483 }, { "epoch": 0.01, "grad_norm": 6.0877484821153045, "learning_rate": 4.618320610687023e-06, "loss": 0.373, "step": 484 }, { "epoch": 0.01, "grad_norm": 13.01542540691426, "learning_rate": 4.627862595419847e-06, "loss": 0.9354, "step": 485 }, { "epoch": 0.01, "grad_norm": 4.758295394870925, "learning_rate": 4.6374045801526726e-06, "loss": 0.4004, "step": 486 }, { "epoch": 0.01, "grad_norm": 9.062511575625688, "learning_rate": 4.646946564885497e-06, "loss": 0.3922, "step": 487 }, { "epoch": 0.01, "grad_norm": 8.476573581952, "learning_rate": 4.656488549618321e-06, "loss": 0.5348, "step": 488 }, { "epoch": 0.01, "grad_norm": 6.599974582363083, "learning_rate": 4.666030534351145e-06, "loss": 0.2559, "step": 489 }, { "epoch": 0.01, "grad_norm": 8.980728762904155, "learning_rate": 4.6755725190839695e-06, "loss": 0.4768, "step": 490 }, { "epoch": 0.01, "grad_norm": 14.410496878068678, "learning_rate": 4.685114503816795e-06, "loss": 0.4219, "step": 491 }, { "epoch": 0.01, "grad_norm": 20.20818103326516, "learning_rate": 4.694656488549618e-06, "loss": 0.7699, "step": 492 }, { "epoch": 0.01, "grad_norm": 7.444074721984707, "learning_rate": 4.704198473282443e-06, "loss": 0.3385, "step": 493 }, { "epoch": 0.01, "grad_norm": 8.463914116905057, "learning_rate": 4.713740458015267e-06, "loss": 0.3354, "step": 494 }, { "epoch": 0.01, "grad_norm": 7.315830222294027, "learning_rate": 4.7232824427480925e-06, "loss": 0.4787, "step": 495 }, { "epoch": 0.01, "grad_norm": 16.19630454853043, "learning_rate": 4.732824427480917e-06, "loss": 0.2547, "step": 496 }, { "epoch": 0.01, "grad_norm": 5.2879752051541145, "learning_rate": 4.7423664122137405e-06, "loss": 0.1407, "step": 497 }, { "epoch": 0.01, "grad_norm": 12.46503134896827, "learning_rate": 4.751908396946565e-06, "loss": 0.5309, "step": 498 }, { "epoch": 0.01, "grad_norm": 6.572922413899548, "learning_rate": 4.761450381679389e-06, "loss": 0.6354, "step": 499 }, { "epoch": 0.01, "grad_norm": 11.30949479962216, "learning_rate": 4.770992366412215e-06, "loss": 0.533, "step": 500 }, { "epoch": 0.01, "grad_norm": 6.74029335829628, "learning_rate": 4.780534351145038e-06, "loss": 0.3329, "step": 501 }, { "epoch": 0.01, "grad_norm": 6.637762911487289, "learning_rate": 4.790076335877863e-06, "loss": 0.2975, "step": 502 }, { "epoch": 0.01, "grad_norm": 8.074913699985098, "learning_rate": 4.799618320610687e-06, "loss": 0.4874, "step": 503 }, { "epoch": 0.01, "grad_norm": 6.298403078863844, "learning_rate": 4.8091603053435125e-06, "loss": 0.331, "step": 504 }, { "epoch": 0.01, "grad_norm": 12.11157769627507, "learning_rate": 4.818702290076336e-06, "loss": 0.2325, "step": 505 }, { "epoch": 0.01, "grad_norm": 4.929381577177945, "learning_rate": 4.8282442748091605e-06, "loss": 0.1729, "step": 506 }, { "epoch": 0.01, "grad_norm": 6.573168048998778, "learning_rate": 4.837786259541985e-06, "loss": 0.2626, "step": 507 }, { "epoch": 0.01, "grad_norm": 8.479905161147336, "learning_rate": 4.847328244274809e-06, "loss": 0.3656, "step": 508 }, { "epoch": 0.01, "grad_norm": 11.456459257438409, "learning_rate": 4.856870229007634e-06, "loss": 0.3298, "step": 509 }, { "epoch": 0.01, "grad_norm": 8.80846068964773, "learning_rate": 4.866412213740458e-06, "loss": 0.5102, "step": 510 }, { "epoch": 0.01, "grad_norm": 8.845052778173889, "learning_rate": 4.875954198473283e-06, "loss": 0.444, "step": 511 }, { "epoch": 0.01, "grad_norm": 9.274393393943486, "learning_rate": 4.885496183206107e-06, "loss": 0.3946, "step": 512 }, { "epoch": 0.01, "grad_norm": 10.43001777040279, "learning_rate": 4.8950381679389316e-06, "loss": 0.4156, "step": 513 }, { "epoch": 0.01, "grad_norm": 15.20170186704571, "learning_rate": 4.904580152671756e-06, "loss": 0.4374, "step": 514 }, { "epoch": 0.01, "grad_norm": 8.954220423500818, "learning_rate": 4.9141221374045805e-06, "loss": 0.5529, "step": 515 }, { "epoch": 0.01, "grad_norm": 8.213412433608225, "learning_rate": 4.923664122137405e-06, "loss": 0.4487, "step": 516 }, { "epoch": 0.01, "grad_norm": 12.16681226007451, "learning_rate": 4.933206106870229e-06, "loss": 0.6855, "step": 517 }, { "epoch": 0.01, "grad_norm": 8.413343138479977, "learning_rate": 4.942748091603054e-06, "loss": 0.3038, "step": 518 }, { "epoch": 0.01, "grad_norm": 7.739514918343698, "learning_rate": 4.952290076335878e-06, "loss": 0.3852, "step": 519 }, { "epoch": 0.01, "grad_norm": 13.382878487641971, "learning_rate": 4.961832061068703e-06, "loss": 0.5145, "step": 520 }, { "epoch": 0.01, "grad_norm": 9.217241066755845, "learning_rate": 4.971374045801527e-06, "loss": 0.6197, "step": 521 }, { "epoch": 0.01, "grad_norm": 7.364053166652878, "learning_rate": 4.9809160305343515e-06, "loss": 0.2052, "step": 522 }, { "epoch": 0.01, "grad_norm": 7.17338197122036, "learning_rate": 4.990458015267176e-06, "loss": 0.6402, "step": 523 }, { "epoch": 0.02, "grad_norm": 6.108620638507035, "learning_rate": 5e-06, "loss": 0.2173, "step": 524 }, { "epoch": 0.02, "grad_norm": 28.163916032174843, "learning_rate": 5.009541984732825e-06, "loss": 0.5188, "step": 525 }, { "epoch": 0.02, "grad_norm": 13.829283429777618, "learning_rate": 5.019083969465649e-06, "loss": 0.3976, "step": 526 }, { "epoch": 0.02, "grad_norm": 16.082143149868013, "learning_rate": 5.028625954198474e-06, "loss": 0.4306, "step": 527 }, { "epoch": 0.02, "grad_norm": 8.696627151625629, "learning_rate": 5.038167938931297e-06, "loss": 0.5913, "step": 528 }, { "epoch": 0.02, "grad_norm": 11.486620292975863, "learning_rate": 5.047709923664123e-06, "loss": 0.4242, "step": 529 }, { "epoch": 0.02, "grad_norm": 17.798023431311787, "learning_rate": 5.057251908396947e-06, "loss": 0.6515, "step": 530 }, { "epoch": 0.02, "grad_norm": 9.82748619635638, "learning_rate": 5.0667938931297715e-06, "loss": 0.5419, "step": 531 }, { "epoch": 0.02, "grad_norm": 13.608175266435659, "learning_rate": 5.076335877862596e-06, "loss": 0.4504, "step": 532 }, { "epoch": 0.02, "grad_norm": 8.106759092275661, "learning_rate": 5.0858778625954195e-06, "loss": 0.3116, "step": 533 }, { "epoch": 0.02, "grad_norm": 8.661799996236619, "learning_rate": 5.095419847328245e-06, "loss": 0.4542, "step": 534 }, { "epoch": 0.02, "grad_norm": 4.546567296893412, "learning_rate": 5.104961832061069e-06, "loss": 0.1772, "step": 535 }, { "epoch": 0.02, "grad_norm": 11.97908358780068, "learning_rate": 5.114503816793893e-06, "loss": 0.7087, "step": 536 }, { "epoch": 0.02, "grad_norm": 10.709951187008304, "learning_rate": 5.124045801526718e-06, "loss": 0.346, "step": 537 }, { "epoch": 0.02, "grad_norm": 2.9835631179935187, "learning_rate": 5.1335877862595426e-06, "loss": 0.071, "step": 538 }, { "epoch": 0.02, "grad_norm": 11.053315396296263, "learning_rate": 5.143129770992367e-06, "loss": 0.4155, "step": 539 }, { "epoch": 0.02, "grad_norm": 9.77080038643971, "learning_rate": 5.1526717557251914e-06, "loss": 0.6031, "step": 540 }, { "epoch": 0.02, "grad_norm": 10.539435672020963, "learning_rate": 5.162213740458015e-06, "loss": 0.1437, "step": 541 }, { "epoch": 0.02, "grad_norm": 10.761759735417076, "learning_rate": 5.17175572519084e-06, "loss": 0.6652, "step": 542 }, { "epoch": 0.02, "grad_norm": 9.946287863969614, "learning_rate": 5.181297709923665e-06, "loss": 0.9077, "step": 543 }, { "epoch": 0.02, "grad_norm": 9.109661693216491, "learning_rate": 5.190839694656488e-06, "loss": 0.3019, "step": 544 }, { "epoch": 0.02, "grad_norm": 5.868988614010032, "learning_rate": 5.200381679389314e-06, "loss": 0.2626, "step": 545 }, { "epoch": 0.02, "grad_norm": 6.1548273989216264, "learning_rate": 5.209923664122137e-06, "loss": 0.5112, "step": 546 }, { "epoch": 0.02, "grad_norm": 11.100049274781977, "learning_rate": 5.2194656488549625e-06, "loss": 0.2351, "step": 547 }, { "epoch": 0.02, "grad_norm": 9.999281142146177, "learning_rate": 5.229007633587787e-06, "loss": 0.3553, "step": 548 }, { "epoch": 0.02, "grad_norm": 5.537349721277317, "learning_rate": 5.2385496183206106e-06, "loss": 0.1014, "step": 549 }, { "epoch": 0.02, "grad_norm": 10.445322839907291, "learning_rate": 5.248091603053436e-06, "loss": 0.3918, "step": 550 }, { "epoch": 0.02, "grad_norm": 9.407628563522744, "learning_rate": 5.2576335877862594e-06, "loss": 0.4601, "step": 551 }, { "epoch": 0.02, "grad_norm": 8.334327148940924, "learning_rate": 5.267175572519084e-06, "loss": 0.4168, "step": 552 }, { "epoch": 0.02, "grad_norm": 4.907681468925668, "learning_rate": 5.276717557251909e-06, "loss": 0.329, "step": 553 }, { "epoch": 0.02, "grad_norm": 5.6058737365951234, "learning_rate": 5.286259541984733e-06, "loss": 0.5268, "step": 554 }, { "epoch": 0.02, "grad_norm": 5.971022807289664, "learning_rate": 5.295801526717558e-06, "loss": 0.6778, "step": 555 }, { "epoch": 0.02, "grad_norm": 7.834880723367896, "learning_rate": 5.3053435114503825e-06, "loss": 0.4805, "step": 556 }, { "epoch": 0.02, "grad_norm": 8.518934310296217, "learning_rate": 5.314885496183206e-06, "loss": 0.3648, "step": 557 }, { "epoch": 0.02, "grad_norm": 11.38465199370456, "learning_rate": 5.324427480916031e-06, "loss": 0.3863, "step": 558 }, { "epoch": 0.02, "grad_norm": 6.91267469023285, "learning_rate": 5.333969465648855e-06, "loss": 0.4943, "step": 559 }, { "epoch": 0.02, "grad_norm": 13.648655811609833, "learning_rate": 5.34351145038168e-06, "loss": 0.4884, "step": 560 }, { "epoch": 0.02, "grad_norm": 7.077883482595675, "learning_rate": 5.353053435114505e-06, "loss": 0.4337, "step": 561 }, { "epoch": 0.02, "grad_norm": 12.040287893661672, "learning_rate": 5.362595419847328e-06, "loss": 0.4506, "step": 562 }, { "epoch": 0.02, "grad_norm": 8.422565155909568, "learning_rate": 5.3721374045801536e-06, "loss": 0.4088, "step": 563 }, { "epoch": 0.02, "grad_norm": 6.767918010945527, "learning_rate": 5.381679389312977e-06, "loss": 0.3615, "step": 564 }, { "epoch": 0.02, "grad_norm": 5.771552494376449, "learning_rate": 5.391221374045802e-06, "loss": 0.3219, "step": 565 }, { "epoch": 0.02, "grad_norm": 5.97922942054417, "learning_rate": 5.400763358778627e-06, "loss": 0.3895, "step": 566 }, { "epoch": 0.02, "grad_norm": 7.10463440847768, "learning_rate": 5.4103053435114505e-06, "loss": 0.3779, "step": 567 }, { "epoch": 0.02, "grad_norm": 12.628861507167782, "learning_rate": 5.419847328244276e-06, "loss": 0.6611, "step": 568 }, { "epoch": 0.02, "grad_norm": 11.388993092708919, "learning_rate": 5.429389312977099e-06, "loss": 0.6646, "step": 569 }, { "epoch": 0.02, "grad_norm": 13.281244111901268, "learning_rate": 5.438931297709924e-06, "loss": 0.5042, "step": 570 }, { "epoch": 0.02, "grad_norm": 12.051454537005876, "learning_rate": 5.448473282442749e-06, "loss": 0.9462, "step": 571 }, { "epoch": 0.02, "grad_norm": 7.736318633033497, "learning_rate": 5.458015267175573e-06, "loss": 0.4486, "step": 572 }, { "epoch": 0.02, "grad_norm": 9.205617259567044, "learning_rate": 5.467557251908397e-06, "loss": 0.5291, "step": 573 }, { "epoch": 0.02, "grad_norm": 4.479478082994336, "learning_rate": 5.477099236641222e-06, "loss": 0.5525, "step": 574 }, { "epoch": 0.02, "grad_norm": 4.911161236425687, "learning_rate": 5.486641221374046e-06, "loss": 0.1572, "step": 575 }, { "epoch": 0.02, "grad_norm": 8.239739887150439, "learning_rate": 5.496183206106871e-06, "loss": 0.4919, "step": 576 }, { "epoch": 0.02, "grad_norm": 12.84122755986127, "learning_rate": 5.505725190839695e-06, "loss": 0.529, "step": 577 }, { "epoch": 0.02, "grad_norm": 6.438482931704722, "learning_rate": 5.515267175572519e-06, "loss": 0.2353, "step": 578 }, { "epoch": 0.02, "grad_norm": 5.25257074903059, "learning_rate": 5.524809160305345e-06, "loss": 0.5939, "step": 579 }, { "epoch": 0.02, "grad_norm": 4.007549463158409, "learning_rate": 5.534351145038168e-06, "loss": 0.2964, "step": 580 }, { "epoch": 0.02, "grad_norm": 10.010115895167257, "learning_rate": 5.543893129770993e-06, "loss": 0.6337, "step": 581 }, { "epoch": 0.02, "grad_norm": 5.453583859263459, "learning_rate": 5.553435114503817e-06, "loss": 0.243, "step": 582 }, { "epoch": 0.02, "grad_norm": 9.12393284135859, "learning_rate": 5.5629770992366415e-06, "loss": 0.3628, "step": 583 }, { "epoch": 0.02, "grad_norm": 6.983562106419178, "learning_rate": 5.572519083969467e-06, "loss": 0.6443, "step": 584 }, { "epoch": 0.02, "grad_norm": 5.074057631544608, "learning_rate": 5.58206106870229e-06, "loss": 0.3105, "step": 585 }, { "epoch": 0.02, "grad_norm": 5.125554403696391, "learning_rate": 5.591603053435115e-06, "loss": 0.1935, "step": 586 }, { "epoch": 0.02, "grad_norm": 6.51393050519622, "learning_rate": 5.601145038167939e-06, "loss": 0.2155, "step": 587 }, { "epoch": 0.02, "grad_norm": 5.61765751046165, "learning_rate": 5.610687022900764e-06, "loss": 0.3364, "step": 588 }, { "epoch": 0.02, "grad_norm": 5.030577288844621, "learning_rate": 5.620229007633589e-06, "loss": 0.2934, "step": 589 }, { "epoch": 0.02, "grad_norm": 9.827562810347274, "learning_rate": 5.6297709923664126e-06, "loss": 0.4583, "step": 590 }, { "epoch": 0.02, "grad_norm": 11.101505802876861, "learning_rate": 5.639312977099237e-06, "loss": 0.7355, "step": 591 }, { "epoch": 0.02, "grad_norm": 5.995963487235291, "learning_rate": 5.648854961832062e-06, "loss": 0.2722, "step": 592 }, { "epoch": 0.02, "grad_norm": 10.584334000766257, "learning_rate": 5.658396946564886e-06, "loss": 0.3492, "step": 593 }, { "epoch": 0.02, "grad_norm": 9.980737062195733, "learning_rate": 5.66793893129771e-06, "loss": 0.4244, "step": 594 }, { "epoch": 0.02, "grad_norm": 8.249145405751552, "learning_rate": 5.677480916030535e-06, "loss": 0.46, "step": 595 }, { "epoch": 0.02, "grad_norm": 5.375587697391439, "learning_rate": 5.687022900763359e-06, "loss": 0.2786, "step": 596 }, { "epoch": 0.02, "grad_norm": 19.2366738204577, "learning_rate": 5.6965648854961845e-06, "loss": 0.7313, "step": 597 }, { "epoch": 0.02, "grad_norm": 13.458780798938367, "learning_rate": 5.706106870229008e-06, "loss": 0.5137, "step": 598 }, { "epoch": 0.02, "grad_norm": 6.852539201670941, "learning_rate": 5.7156488549618325e-06, "loss": 0.2615, "step": 599 }, { "epoch": 0.02, "grad_norm": 10.743674435441344, "learning_rate": 5.725190839694656e-06, "loss": 0.2967, "step": 600 }, { "epoch": 0.02, "grad_norm": 6.600675805489341, "learning_rate": 5.734732824427481e-06, "loss": 0.3975, "step": 601 }, { "epoch": 0.02, "grad_norm": 2.3447494410392045, "learning_rate": 5.744274809160306e-06, "loss": 0.2472, "step": 602 }, { "epoch": 0.02, "grad_norm": 5.798635868265851, "learning_rate": 5.75381679389313e-06, "loss": 0.2541, "step": 603 }, { "epoch": 0.02, "grad_norm": 4.718139255232808, "learning_rate": 5.763358778625955e-06, "loss": 0.3601, "step": 604 }, { "epoch": 0.02, "grad_norm": 9.834111996441006, "learning_rate": 5.772900763358778e-06, "loss": 0.4016, "step": 605 }, { "epoch": 0.02, "grad_norm": 5.520715628274908, "learning_rate": 5.782442748091604e-06, "loss": 0.3716, "step": 606 }, { "epoch": 0.02, "grad_norm": 15.333005604489601, "learning_rate": 5.791984732824428e-06, "loss": 0.7792, "step": 607 }, { "epoch": 0.02, "grad_norm": 6.861249732686248, "learning_rate": 5.801526717557252e-06, "loss": 0.6362, "step": 608 }, { "epoch": 0.02, "grad_norm": 12.03387826723009, "learning_rate": 5.811068702290077e-06, "loss": 0.3999, "step": 609 }, { "epoch": 0.02, "grad_norm": 8.136543524765015, "learning_rate": 5.820610687022901e-06, "loss": 0.4725, "step": 610 }, { "epoch": 0.02, "grad_norm": 9.789040190236697, "learning_rate": 5.830152671755726e-06, "loss": 0.5527, "step": 611 }, { "epoch": 0.02, "grad_norm": 20.160521062141978, "learning_rate": 5.83969465648855e-06, "loss": 0.3806, "step": 612 }, { "epoch": 0.02, "grad_norm": 13.42109697460569, "learning_rate": 5.849236641221374e-06, "loss": 0.3016, "step": 613 }, { "epoch": 0.02, "grad_norm": 11.315674057258683, "learning_rate": 5.858778625954199e-06, "loss": 0.5712, "step": 614 }, { "epoch": 0.02, "grad_norm": 12.400986522993614, "learning_rate": 5.8683206106870236e-06, "loss": 0.2982, "step": 615 }, { "epoch": 0.02, "grad_norm": 8.0813431075796, "learning_rate": 5.877862595419848e-06, "loss": 0.5489, "step": 616 }, { "epoch": 0.02, "grad_norm": 6.013879775504631, "learning_rate": 5.8874045801526724e-06, "loss": 0.4186, "step": 617 }, { "epoch": 0.02, "grad_norm": 7.77118154023444, "learning_rate": 5.896946564885496e-06, "loss": 0.3674, "step": 618 }, { "epoch": 0.02, "grad_norm": 12.840708574369817, "learning_rate": 5.906488549618321e-06, "loss": 0.7397, "step": 619 }, { "epoch": 0.02, "grad_norm": 6.211786252934693, "learning_rate": 5.916030534351146e-06, "loss": 0.439, "step": 620 }, { "epoch": 0.02, "grad_norm": 9.079977768942216, "learning_rate": 5.925572519083969e-06, "loss": 0.5369, "step": 621 }, { "epoch": 0.02, "grad_norm": 6.414067703969798, "learning_rate": 5.935114503816795e-06, "loss": 0.3119, "step": 622 }, { "epoch": 0.02, "grad_norm": 9.108282687880239, "learning_rate": 5.944656488549618e-06, "loss": 0.4972, "step": 623 }, { "epoch": 0.02, "grad_norm": 12.702298239517262, "learning_rate": 5.9541984732824435e-06, "loss": 0.376, "step": 624 }, { "epoch": 0.02, "grad_norm": 5.3882992357393835, "learning_rate": 5.963740458015268e-06, "loss": 0.6495, "step": 625 }, { "epoch": 0.02, "grad_norm": 5.389930790968192, "learning_rate": 5.9732824427480916e-06, "loss": 0.4376, "step": 626 }, { "epoch": 0.02, "grad_norm": 7.9383639068249705, "learning_rate": 5.982824427480917e-06, "loss": 0.4671, "step": 627 }, { "epoch": 0.02, "grad_norm": 7.324452795739696, "learning_rate": 5.992366412213741e-06, "loss": 0.1847, "step": 628 }, { "epoch": 0.02, "grad_norm": 10.214951488736318, "learning_rate": 6.001908396946565e-06, "loss": 0.4677, "step": 629 }, { "epoch": 0.02, "grad_norm": 9.647046193493603, "learning_rate": 6.01145038167939e-06, "loss": 0.6166, "step": 630 }, { "epoch": 0.02, "grad_norm": 8.828811915857141, "learning_rate": 6.020992366412214e-06, "loss": 0.4162, "step": 631 }, { "epoch": 0.02, "grad_norm": 11.897480816618252, "learning_rate": 6.030534351145039e-06, "loss": 0.5901, "step": 632 }, { "epoch": 0.02, "grad_norm": 11.169996495805565, "learning_rate": 6.0400763358778635e-06, "loss": 0.7002, "step": 633 }, { "epoch": 0.02, "grad_norm": 9.145121141885879, "learning_rate": 6.049618320610687e-06, "loss": 0.5619, "step": 634 }, { "epoch": 0.02, "grad_norm": 6.893613690492463, "learning_rate": 6.059160305343512e-06, "loss": 0.3481, "step": 635 }, { "epoch": 0.02, "grad_norm": 9.360774118738833, "learning_rate": 6.068702290076336e-06, "loss": 0.2811, "step": 636 }, { "epoch": 0.02, "grad_norm": 7.074955009344358, "learning_rate": 6.07824427480916e-06, "loss": 0.5372, "step": 637 }, { "epoch": 0.02, "grad_norm": 8.909140348739603, "learning_rate": 6.087786259541986e-06, "loss": 0.5627, "step": 638 }, { "epoch": 0.02, "grad_norm": 11.109944518094112, "learning_rate": 6.097328244274809e-06, "loss": 0.6051, "step": 639 }, { "epoch": 0.02, "grad_norm": 7.932849753357906, "learning_rate": 6.1068702290076346e-06, "loss": 0.7568, "step": 640 }, { "epoch": 0.02, "grad_norm": 8.786321295615705, "learning_rate": 6.116412213740458e-06, "loss": 0.3117, "step": 641 }, { "epoch": 0.02, "grad_norm": 17.866835056526767, "learning_rate": 6.125954198473283e-06, "loss": 0.9985, "step": 642 }, { "epoch": 0.02, "grad_norm": 4.604276727710146, "learning_rate": 6.135496183206108e-06, "loss": 0.2996, "step": 643 }, { "epoch": 0.02, "grad_norm": 7.2679537250880575, "learning_rate": 6.1450381679389315e-06, "loss": 0.1833, "step": 644 }, { "epoch": 0.02, "grad_norm": 13.558020207861732, "learning_rate": 6.154580152671757e-06, "loss": 0.9996, "step": 645 }, { "epoch": 0.02, "grad_norm": 7.706617728042755, "learning_rate": 6.164122137404581e-06, "loss": 0.6181, "step": 646 }, { "epoch": 0.02, "grad_norm": 10.054712729729903, "learning_rate": 6.173664122137405e-06, "loss": 0.3785, "step": 647 }, { "epoch": 0.02, "grad_norm": 6.174330558474043, "learning_rate": 6.18320610687023e-06, "loss": 0.608, "step": 648 }, { "epoch": 0.02, "grad_norm": 5.673689076595978, "learning_rate": 6.192748091603054e-06, "loss": 0.3634, "step": 649 }, { "epoch": 0.02, "grad_norm": 8.402570975905348, "learning_rate": 6.202290076335878e-06, "loss": 0.5191, "step": 650 }, { "epoch": 0.02, "grad_norm": 9.242268656280014, "learning_rate": 6.211832061068703e-06, "loss": 0.8073, "step": 651 }, { "epoch": 0.02, "grad_norm": 8.602423047112607, "learning_rate": 6.221374045801527e-06, "loss": 0.7331, "step": 652 }, { "epoch": 0.02, "grad_norm": 8.661470292007225, "learning_rate": 6.230916030534352e-06, "loss": 0.7894, "step": 653 }, { "epoch": 0.02, "grad_norm": 4.951905205646655, "learning_rate": 6.240458015267176e-06, "loss": 0.4527, "step": 654 }, { "epoch": 0.02, "grad_norm": 11.842892094846006, "learning_rate": 6.25e-06, "loss": 0.3919, "step": 655 }, { "epoch": 0.02, "grad_norm": 5.538390082475048, "learning_rate": 6.259541984732826e-06, "loss": 0.4216, "step": 656 }, { "epoch": 0.02, "grad_norm": 8.130056421757445, "learning_rate": 6.269083969465649e-06, "loss": 0.7309, "step": 657 }, { "epoch": 0.02, "grad_norm": 5.52141835214053, "learning_rate": 6.278625954198474e-06, "loss": 0.5517, "step": 658 }, { "epoch": 0.02, "grad_norm": 6.822173154276871, "learning_rate": 6.288167938931298e-06, "loss": 0.2917, "step": 659 }, { "epoch": 0.02, "grad_norm": 11.126166893271847, "learning_rate": 6.2977099236641225e-06, "loss": 0.3596, "step": 660 }, { "epoch": 0.02, "grad_norm": 9.533899958206293, "learning_rate": 6.307251908396948e-06, "loss": 0.3155, "step": 661 }, { "epoch": 0.02, "grad_norm": 8.328203669707877, "learning_rate": 6.316793893129771e-06, "loss": 0.367, "step": 662 }, { "epoch": 0.02, "grad_norm": 6.544695143043782, "learning_rate": 6.326335877862596e-06, "loss": 0.5095, "step": 663 }, { "epoch": 0.02, "grad_norm": 10.401115548386091, "learning_rate": 6.335877862595419e-06, "loss": 0.5379, "step": 664 }, { "epoch": 0.02, "grad_norm": 10.976388407239572, "learning_rate": 6.345419847328245e-06, "loss": 0.4841, "step": 665 }, { "epoch": 0.02, "grad_norm": 9.535638843093029, "learning_rate": 6.354961832061069e-06, "loss": 0.5782, "step": 666 }, { "epoch": 0.02, "grad_norm": 4.216178562476311, "learning_rate": 6.3645038167938936e-06, "loss": 0.2713, "step": 667 }, { "epoch": 0.02, "grad_norm": 8.651083165083966, "learning_rate": 6.374045801526718e-06, "loss": 0.5269, "step": 668 }, { "epoch": 0.02, "grad_norm": 6.530291546305458, "learning_rate": 6.383587786259543e-06, "loss": 0.5318, "step": 669 }, { "epoch": 0.02, "grad_norm": 6.203040028717975, "learning_rate": 6.393129770992367e-06, "loss": 0.2563, "step": 670 }, { "epoch": 0.02, "grad_norm": 9.973420295778187, "learning_rate": 6.402671755725191e-06, "loss": 0.6013, "step": 671 }, { "epoch": 0.02, "grad_norm": 2.336237285348778, "learning_rate": 6.412213740458016e-06, "loss": 0.1724, "step": 672 }, { "epoch": 0.02, "grad_norm": 2.7373471918524834, "learning_rate": 6.42175572519084e-06, "loss": 0.3017, "step": 673 }, { "epoch": 0.02, "grad_norm": 12.427908414622948, "learning_rate": 6.4312977099236655e-06, "loss": 0.4982, "step": 674 }, { "epoch": 0.02, "grad_norm": 10.516099032052177, "learning_rate": 6.440839694656489e-06, "loss": 0.2024, "step": 675 }, { "epoch": 0.02, "grad_norm": 11.365887355844338, "learning_rate": 6.4503816793893135e-06, "loss": 0.6654, "step": 676 }, { "epoch": 0.02, "grad_norm": 5.979881651482315, "learning_rate": 6.459923664122137e-06, "loss": 0.3454, "step": 677 }, { "epoch": 0.02, "grad_norm": 9.617548051857353, "learning_rate": 6.469465648854962e-06, "loss": 0.3892, "step": 678 }, { "epoch": 0.02, "grad_norm": 7.237331536000918, "learning_rate": 6.479007633587787e-06, "loss": 0.4442, "step": 679 }, { "epoch": 0.02, "grad_norm": 7.43689074545326, "learning_rate": 6.488549618320611e-06, "loss": 0.4498, "step": 680 }, { "epoch": 0.02, "grad_norm": 5.270092503751016, "learning_rate": 6.498091603053436e-06, "loss": 0.7408, "step": 681 }, { "epoch": 0.02, "grad_norm": 6.1418896797232225, "learning_rate": 6.507633587786259e-06, "loss": 0.2144, "step": 682 }, { "epoch": 0.02, "grad_norm": 9.125078645132117, "learning_rate": 6.517175572519085e-06, "loss": 0.4143, "step": 683 }, { "epoch": 0.02, "grad_norm": 7.467492735954662, "learning_rate": 6.526717557251909e-06, "loss": 0.2874, "step": 684 }, { "epoch": 0.02, "grad_norm": 7.153134329920159, "learning_rate": 6.536259541984733e-06, "loss": 0.4084, "step": 685 }, { "epoch": 0.02, "grad_norm": 6.8905355919121565, "learning_rate": 6.545801526717558e-06, "loss": 0.6271, "step": 686 }, { "epoch": 0.02, "grad_norm": 4.759045270492463, "learning_rate": 6.555343511450382e-06, "loss": 0.151, "step": 687 }, { "epoch": 0.02, "grad_norm": 4.8250313634297815, "learning_rate": 6.564885496183207e-06, "loss": 0.4284, "step": 688 }, { "epoch": 0.02, "grad_norm": 8.23228966730103, "learning_rate": 6.574427480916031e-06, "loss": 0.5175, "step": 689 }, { "epoch": 0.02, "grad_norm": 5.353072083006224, "learning_rate": 6.583969465648855e-06, "loss": 0.3836, "step": 690 }, { "epoch": 0.02, "grad_norm": 5.816721869847073, "learning_rate": 6.59351145038168e-06, "loss": 0.2583, "step": 691 }, { "epoch": 0.02, "grad_norm": 14.920184326260511, "learning_rate": 6.6030534351145046e-06, "loss": 1.0299, "step": 692 }, { "epoch": 0.02, "grad_norm": 9.646670580478482, "learning_rate": 6.612595419847328e-06, "loss": 0.52, "step": 693 }, { "epoch": 0.02, "grad_norm": 16.58531948956066, "learning_rate": 6.6221374045801534e-06, "loss": 0.3226, "step": 694 }, { "epoch": 0.02, "grad_norm": 8.32098360175878, "learning_rate": 6.631679389312977e-06, "loss": 0.3513, "step": 695 }, { "epoch": 0.02, "grad_norm": 6.95596043068861, "learning_rate": 6.641221374045802e-06, "loss": 0.4227, "step": 696 }, { "epoch": 0.02, "grad_norm": 7.092846581828893, "learning_rate": 6.650763358778627e-06, "loss": 0.4854, "step": 697 }, { "epoch": 0.02, "grad_norm": 10.930898363445817, "learning_rate": 6.66030534351145e-06, "loss": 0.3622, "step": 698 }, { "epoch": 0.02, "grad_norm": 9.155215885235773, "learning_rate": 6.669847328244276e-06, "loss": 0.5915, "step": 699 }, { "epoch": 0.02, "grad_norm": 4.188893726766565, "learning_rate": 6.679389312977099e-06, "loss": 0.2879, "step": 700 }, { "epoch": 0.02, "grad_norm": 9.004092133893325, "learning_rate": 6.6889312977099245e-06, "loss": 0.7346, "step": 701 }, { "epoch": 0.02, "grad_norm": 8.632432775647207, "learning_rate": 6.698473282442749e-06, "loss": 0.5568, "step": 702 }, { "epoch": 0.02, "grad_norm": 5.665880845316677, "learning_rate": 6.7080152671755726e-06, "loss": 0.2472, "step": 703 }, { "epoch": 0.02, "grad_norm": 9.801736455949012, "learning_rate": 6.717557251908398e-06, "loss": 0.4803, "step": 704 }, { "epoch": 0.02, "grad_norm": 11.030300629047384, "learning_rate": 6.727099236641222e-06, "loss": 0.2232, "step": 705 }, { "epoch": 0.02, "grad_norm": 7.895951752458245, "learning_rate": 6.736641221374046e-06, "loss": 0.4453, "step": 706 }, { "epoch": 0.02, "grad_norm": 7.241162800644958, "learning_rate": 6.746183206106871e-06, "loss": 0.1846, "step": 707 }, { "epoch": 0.02, "grad_norm": 6.533821804249787, "learning_rate": 6.755725190839695e-06, "loss": 0.4954, "step": 708 }, { "epoch": 0.02, "grad_norm": 7.541527411367748, "learning_rate": 6.76526717557252e-06, "loss": 0.6172, "step": 709 }, { "epoch": 0.02, "grad_norm": 4.20120047715159, "learning_rate": 6.7748091603053445e-06, "loss": 0.1384, "step": 710 }, { "epoch": 0.02, "grad_norm": 14.440324449249227, "learning_rate": 6.784351145038168e-06, "loss": 0.7403, "step": 711 }, { "epoch": 0.02, "grad_norm": 8.92124260633648, "learning_rate": 6.793893129770993e-06, "loss": 0.5548, "step": 712 }, { "epoch": 0.02, "grad_norm": 4.06881299651287, "learning_rate": 6.803435114503817e-06, "loss": 0.3185, "step": 713 }, { "epoch": 0.02, "grad_norm": 5.9244394914465355, "learning_rate": 6.812977099236641e-06, "loss": 0.0849, "step": 714 }, { "epoch": 0.02, "grad_norm": 11.8341208092946, "learning_rate": 6.822519083969467e-06, "loss": 0.2809, "step": 715 }, { "epoch": 0.02, "grad_norm": 6.429342490079085, "learning_rate": 6.83206106870229e-06, "loss": 0.3882, "step": 716 }, { "epoch": 0.02, "grad_norm": 7.076575103229697, "learning_rate": 6.8416030534351156e-06, "loss": 0.4647, "step": 717 }, { "epoch": 0.02, "grad_norm": 8.437598164305266, "learning_rate": 6.851145038167939e-06, "loss": 0.6136, "step": 718 }, { "epoch": 0.02, "grad_norm": 8.144026091573172, "learning_rate": 6.860687022900764e-06, "loss": 0.2853, "step": 719 }, { "epoch": 0.02, "grad_norm": 5.533186406555664, "learning_rate": 6.870229007633589e-06, "loss": 0.4798, "step": 720 }, { "epoch": 0.02, "grad_norm": 8.507100841788631, "learning_rate": 6.8797709923664125e-06, "loss": 0.7353, "step": 721 }, { "epoch": 0.02, "grad_norm": 6.74499410867964, "learning_rate": 6.889312977099237e-06, "loss": 0.3598, "step": 722 }, { "epoch": 0.02, "grad_norm": 11.682676411298203, "learning_rate": 6.898854961832062e-06, "loss": 0.4565, "step": 723 }, { "epoch": 0.02, "grad_norm": 8.300618535859261, "learning_rate": 6.908396946564886e-06, "loss": 0.3692, "step": 724 }, { "epoch": 0.02, "grad_norm": 10.059164880630156, "learning_rate": 6.917938931297711e-06, "loss": 0.2757, "step": 725 }, { "epoch": 0.02, "grad_norm": 7.241059809089652, "learning_rate": 6.927480916030535e-06, "loss": 0.631, "step": 726 }, { "epoch": 0.02, "grad_norm": 4.682281921286053, "learning_rate": 6.937022900763359e-06, "loss": 0.4043, "step": 727 }, { "epoch": 0.02, "grad_norm": 8.836385187735717, "learning_rate": 6.946564885496184e-06, "loss": 0.4309, "step": 728 }, { "epoch": 0.02, "grad_norm": 15.696648593540335, "learning_rate": 6.956106870229008e-06, "loss": 0.5021, "step": 729 }, { "epoch": 0.02, "grad_norm": 10.301151574211632, "learning_rate": 6.965648854961833e-06, "loss": 0.5705, "step": 730 }, { "epoch": 0.02, "grad_norm": 14.63596239622362, "learning_rate": 6.975190839694657e-06, "loss": 0.3844, "step": 731 }, { "epoch": 0.02, "grad_norm": 12.173874971081451, "learning_rate": 6.984732824427481e-06, "loss": 0.317, "step": 732 }, { "epoch": 0.02, "grad_norm": 4.967544937352893, "learning_rate": 6.994274809160307e-06, "loss": 0.2921, "step": 733 }, { "epoch": 0.02, "grad_norm": 11.396666401616425, "learning_rate": 7.00381679389313e-06, "loss": 0.3126, "step": 734 }, { "epoch": 0.02, "grad_norm": 8.142866883953262, "learning_rate": 7.013358778625955e-06, "loss": 0.5668, "step": 735 }, { "epoch": 0.02, "grad_norm": 7.509232147674112, "learning_rate": 7.022900763358779e-06, "loss": 0.4456, "step": 736 }, { "epoch": 0.02, "grad_norm": 12.871824298785645, "learning_rate": 7.0324427480916035e-06, "loss": 0.5701, "step": 737 }, { "epoch": 0.02, "grad_norm": 8.38521863494245, "learning_rate": 7.041984732824429e-06, "loss": 0.729, "step": 738 }, { "epoch": 0.02, "grad_norm": 7.578767100394622, "learning_rate": 7.051526717557252e-06, "loss": 0.3461, "step": 739 }, { "epoch": 0.02, "grad_norm": 8.542146815008946, "learning_rate": 7.061068702290077e-06, "loss": 0.7417, "step": 740 }, { "epoch": 0.02, "grad_norm": 12.336069039917252, "learning_rate": 7.070610687022902e-06, "loss": 0.4754, "step": 741 }, { "epoch": 0.02, "grad_norm": 6.692172120776018, "learning_rate": 7.080152671755726e-06, "loss": 0.2441, "step": 742 }, { "epoch": 0.02, "grad_norm": 8.954881532397698, "learning_rate": 7.08969465648855e-06, "loss": 0.6112, "step": 743 }, { "epoch": 0.02, "grad_norm": 6.032501999957073, "learning_rate": 7.0992366412213746e-06, "loss": 0.3292, "step": 744 }, { "epoch": 0.02, "grad_norm": 10.319854703072583, "learning_rate": 7.108778625954199e-06, "loss": 0.5981, "step": 745 }, { "epoch": 0.02, "grad_norm": 8.191836890168885, "learning_rate": 7.118320610687024e-06, "loss": 0.3164, "step": 746 }, { "epoch": 0.02, "grad_norm": 2.482845567371411, "learning_rate": 7.127862595419848e-06, "loss": 0.2786, "step": 747 }, { "epoch": 0.02, "grad_norm": 8.666002089133276, "learning_rate": 7.137404580152672e-06, "loss": 0.7066, "step": 748 }, { "epoch": 0.02, "grad_norm": 9.798095237753405, "learning_rate": 7.146946564885496e-06, "loss": 0.408, "step": 749 }, { "epoch": 0.02, "grad_norm": 9.053870676898846, "learning_rate": 7.156488549618321e-06, "loss": 0.5135, "step": 750 }, { "epoch": 0.02, "grad_norm": 8.454162815996186, "learning_rate": 7.166030534351146e-06, "loss": 0.6732, "step": 751 }, { "epoch": 0.02, "grad_norm": 9.460127653305074, "learning_rate": 7.17557251908397e-06, "loss": 0.3101, "step": 752 }, { "epoch": 0.02, "grad_norm": 7.56503373253885, "learning_rate": 7.1851145038167945e-06, "loss": 0.232, "step": 753 }, { "epoch": 0.02, "grad_norm": 5.3879189374853675, "learning_rate": 7.194656488549618e-06, "loss": 0.6035, "step": 754 }, { "epoch": 0.02, "grad_norm": 9.771837281879257, "learning_rate": 7.204198473282443e-06, "loss": 0.7916, "step": 755 }, { "epoch": 0.02, "grad_norm": 5.974457812748454, "learning_rate": 7.213740458015268e-06, "loss": 0.3856, "step": 756 }, { "epoch": 0.02, "grad_norm": 8.138196733125042, "learning_rate": 7.223282442748092e-06, "loss": 0.1842, "step": 757 }, { "epoch": 0.02, "grad_norm": 5.711577748783746, "learning_rate": 7.232824427480917e-06, "loss": 0.5261, "step": 758 }, { "epoch": 0.02, "grad_norm": 7.080216593473539, "learning_rate": 7.242366412213742e-06, "loss": 0.3862, "step": 759 }, { "epoch": 0.02, "grad_norm": 7.033306550099992, "learning_rate": 7.251908396946566e-06, "loss": 0.7505, "step": 760 }, { "epoch": 0.02, "grad_norm": 8.125548417583076, "learning_rate": 7.26145038167939e-06, "loss": 0.5149, "step": 761 }, { "epoch": 0.02, "grad_norm": 5.48884815261223, "learning_rate": 7.270992366412214e-06, "loss": 0.3093, "step": 762 }, { "epoch": 0.02, "grad_norm": 3.7585642611881678, "learning_rate": 7.280534351145039e-06, "loss": 0.1582, "step": 763 }, { "epoch": 0.02, "grad_norm": 4.1562952598459395, "learning_rate": 7.290076335877863e-06, "loss": 0.2726, "step": 764 }, { "epoch": 0.02, "grad_norm": 6.563294862111673, "learning_rate": 7.299618320610688e-06, "loss": 0.4347, "step": 765 }, { "epoch": 0.02, "grad_norm": 5.798976465873768, "learning_rate": 7.309160305343512e-06, "loss": 0.2679, "step": 766 }, { "epoch": 0.02, "grad_norm": 15.101508948107325, "learning_rate": 7.318702290076336e-06, "loss": 0.724, "step": 767 }, { "epoch": 0.02, "grad_norm": 5.247615545199293, "learning_rate": 7.328244274809161e-06, "loss": 0.4349, "step": 768 }, { "epoch": 0.02, "grad_norm": 9.126623505693935, "learning_rate": 7.3377862595419856e-06, "loss": 0.8263, "step": 769 }, { "epoch": 0.02, "grad_norm": 8.99407965590763, "learning_rate": 7.347328244274809e-06, "loss": 0.3304, "step": 770 }, { "epoch": 0.02, "grad_norm": 5.259754428787866, "learning_rate": 7.3568702290076344e-06, "loss": 0.3875, "step": 771 }, { "epoch": 0.02, "grad_norm": 7.276518172378302, "learning_rate": 7.366412213740458e-06, "loss": 0.628, "step": 772 }, { "epoch": 0.02, "grad_norm": 9.111907135943206, "learning_rate": 7.375954198473283e-06, "loss": 0.5237, "step": 773 }, { "epoch": 0.02, "grad_norm": 18.910317959061537, "learning_rate": 7.385496183206108e-06, "loss": 0.6733, "step": 774 }, { "epoch": 0.02, "grad_norm": 26.68846178666826, "learning_rate": 7.395038167938931e-06, "loss": 0.3674, "step": 775 }, { "epoch": 0.02, "grad_norm": 8.75108450571957, "learning_rate": 7.404580152671757e-06, "loss": 0.7886, "step": 776 }, { "epoch": 0.02, "grad_norm": 6.531516767830589, "learning_rate": 7.414122137404581e-06, "loss": 0.3659, "step": 777 }, { "epoch": 0.02, "grad_norm": 5.938167012544533, "learning_rate": 7.423664122137405e-06, "loss": 0.7215, "step": 778 }, { "epoch": 0.02, "grad_norm": 16.33969900425968, "learning_rate": 7.43320610687023e-06, "loss": 0.4347, "step": 779 }, { "epoch": 0.02, "grad_norm": 8.29932121810419, "learning_rate": 7.4427480916030536e-06, "loss": 0.5956, "step": 780 }, { "epoch": 0.02, "grad_norm": 4.142398440049027, "learning_rate": 7.452290076335879e-06, "loss": 0.2751, "step": 781 }, { "epoch": 0.02, "grad_norm": 9.703277144414077, "learning_rate": 7.461832061068703e-06, "loss": 0.3609, "step": 782 }, { "epoch": 0.02, "grad_norm": 9.294073269319853, "learning_rate": 7.471374045801527e-06, "loss": 0.1247, "step": 783 }, { "epoch": 0.02, "grad_norm": 10.191477929210846, "learning_rate": 7.480916030534352e-06, "loss": 0.3823, "step": 784 }, { "epoch": 0.02, "grad_norm": 3.203307616447826, "learning_rate": 7.490458015267176e-06, "loss": 0.2641, "step": 785 }, { "epoch": 0.02, "grad_norm": 7.773071341780649, "learning_rate": 7.500000000000001e-06, "loss": 0.854, "step": 786 }, { "epoch": 0.02, "grad_norm": 8.792772220565816, "learning_rate": 7.5095419847328255e-06, "loss": 0.6972, "step": 787 }, { "epoch": 0.02, "grad_norm": 11.644924194298854, "learning_rate": 7.519083969465649e-06, "loss": 0.3976, "step": 788 }, { "epoch": 0.02, "grad_norm": 16.952105429090626, "learning_rate": 7.528625954198474e-06, "loss": 0.1111, "step": 789 }, { "epoch": 0.02, "grad_norm": 7.485177206159998, "learning_rate": 7.538167938931298e-06, "loss": 0.4986, "step": 790 }, { "epoch": 0.02, "grad_norm": 6.79329578386785, "learning_rate": 7.547709923664122e-06, "loss": 0.2844, "step": 791 }, { "epoch": 0.02, "grad_norm": 11.345688415192091, "learning_rate": 7.557251908396948e-06, "loss": 0.6404, "step": 792 }, { "epoch": 0.02, "grad_norm": 6.653217860801628, "learning_rate": 7.566793893129771e-06, "loss": 0.7288, "step": 793 }, { "epoch": 0.02, "grad_norm": 3.4198428176988065, "learning_rate": 7.5763358778625966e-06, "loss": 0.2193, "step": 794 }, { "epoch": 0.02, "grad_norm": 10.676596963861154, "learning_rate": 7.58587786259542e-06, "loss": 0.3435, "step": 795 }, { "epoch": 0.02, "grad_norm": 8.809891443179115, "learning_rate": 7.595419847328245e-06, "loss": 0.6691, "step": 796 }, { "epoch": 0.02, "grad_norm": 5.7581864006555765, "learning_rate": 7.60496183206107e-06, "loss": 0.4583, "step": 797 }, { "epoch": 0.02, "grad_norm": 12.834961494771445, "learning_rate": 7.6145038167938935e-06, "loss": 0.8899, "step": 798 }, { "epoch": 0.02, "grad_norm": 8.937757581720351, "learning_rate": 7.624045801526718e-06, "loss": 0.3812, "step": 799 }, { "epoch": 0.02, "grad_norm": 9.462572909317421, "learning_rate": 7.633587786259543e-06, "loss": 0.4062, "step": 800 }, { "epoch": 0.02, "grad_norm": 9.39370803874397, "learning_rate": 7.643129770992368e-06, "loss": 0.3013, "step": 801 }, { "epoch": 0.02, "grad_norm": 10.808604337951085, "learning_rate": 7.652671755725192e-06, "loss": 0.6391, "step": 802 }, { "epoch": 0.02, "grad_norm": 7.629318452745681, "learning_rate": 7.662213740458015e-06, "loss": 0.3092, "step": 803 }, { "epoch": 0.02, "grad_norm": 10.6142516578813, "learning_rate": 7.671755725190841e-06, "loss": 0.6114, "step": 804 }, { "epoch": 0.02, "grad_norm": 10.366184027947291, "learning_rate": 7.681297709923665e-06, "loss": 0.479, "step": 805 }, { "epoch": 0.02, "grad_norm": 11.930736123434357, "learning_rate": 7.690839694656488e-06, "loss": 0.4328, "step": 806 }, { "epoch": 0.02, "grad_norm": 4.01772909293409, "learning_rate": 7.700381679389314e-06, "loss": 0.2939, "step": 807 }, { "epoch": 0.02, "grad_norm": 8.370321020079636, "learning_rate": 7.709923664122137e-06, "loss": 0.5597, "step": 808 }, { "epoch": 0.02, "grad_norm": 6.303442093415043, "learning_rate": 7.719465648854963e-06, "loss": 0.7671, "step": 809 }, { "epoch": 0.02, "grad_norm": 8.599379259817235, "learning_rate": 7.729007633587788e-06, "loss": 0.5284, "step": 810 }, { "epoch": 0.02, "grad_norm": 7.238475354346028, "learning_rate": 7.73854961832061e-06, "loss": 0.6223, "step": 811 }, { "epoch": 0.02, "grad_norm": 9.56000301696219, "learning_rate": 7.748091603053436e-06, "loss": 0.3493, "step": 812 }, { "epoch": 0.02, "grad_norm": 9.662934097862754, "learning_rate": 7.75763358778626e-06, "loss": 0.4103, "step": 813 }, { "epoch": 0.02, "grad_norm": 13.03955779326307, "learning_rate": 7.767175572519084e-06, "loss": 0.449, "step": 814 }, { "epoch": 0.02, "grad_norm": 6.913706831755056, "learning_rate": 7.77671755725191e-06, "loss": 0.5042, "step": 815 }, { "epoch": 0.02, "grad_norm": 4.28719396138436, "learning_rate": 7.786259541984733e-06, "loss": 0.1891, "step": 816 }, { "epoch": 0.02, "grad_norm": 5.819690153912735, "learning_rate": 7.795801526717559e-06, "loss": 0.2422, "step": 817 }, { "epoch": 0.02, "grad_norm": 5.262978633036322, "learning_rate": 7.805343511450383e-06, "loss": 0.4033, "step": 818 }, { "epoch": 0.02, "grad_norm": 9.7384379374679, "learning_rate": 7.814885496183206e-06, "loss": 0.4445, "step": 819 }, { "epoch": 0.02, "grad_norm": 7.9234919656556455, "learning_rate": 7.824427480916032e-06, "loss": 0.6979, "step": 820 }, { "epoch": 0.02, "grad_norm": 9.151103790078125, "learning_rate": 7.833969465648855e-06, "loss": 0.541, "step": 821 }, { "epoch": 0.02, "grad_norm": 7.6252097429288614, "learning_rate": 7.843511450381681e-06, "loss": 0.6453, "step": 822 }, { "epoch": 0.02, "grad_norm": 7.367093260598209, "learning_rate": 7.853053435114505e-06, "loss": 0.6524, "step": 823 }, { "epoch": 0.02, "grad_norm": 9.583475631541976, "learning_rate": 7.862595419847328e-06, "loss": 0.4511, "step": 824 }, { "epoch": 0.02, "grad_norm": 7.70814987857843, "learning_rate": 7.872137404580154e-06, "loss": 0.2978, "step": 825 }, { "epoch": 0.02, "grad_norm": 6.2582487223358, "learning_rate": 7.881679389312977e-06, "loss": 0.506, "step": 826 }, { "epoch": 0.02, "grad_norm": 12.35270115274562, "learning_rate": 7.891221374045801e-06, "loss": 0.9232, "step": 827 }, { "epoch": 0.02, "grad_norm": 8.41059247137825, "learning_rate": 7.900763358778627e-06, "loss": 0.2259, "step": 828 }, { "epoch": 0.02, "grad_norm": 6.4369556974646835, "learning_rate": 7.91030534351145e-06, "loss": 0.4842, "step": 829 }, { "epoch": 0.02, "grad_norm": 9.720193148605196, "learning_rate": 7.919847328244276e-06, "loss": 0.359, "step": 830 }, { "epoch": 0.02, "grad_norm": 4.7464626592736305, "learning_rate": 7.929389312977099e-06, "loss": 0.524, "step": 831 }, { "epoch": 0.02, "grad_norm": 8.48786632438333, "learning_rate": 7.938931297709924e-06, "loss": 0.3313, "step": 832 }, { "epoch": 0.02, "grad_norm": 13.176153282343988, "learning_rate": 7.94847328244275e-06, "loss": 0.4372, "step": 833 }, { "epoch": 0.02, "grad_norm": 7.439637045415804, "learning_rate": 7.958015267175572e-06, "loss": 0.3652, "step": 834 }, { "epoch": 0.02, "grad_norm": 6.630332851713994, "learning_rate": 7.967557251908397e-06, "loss": 0.6687, "step": 835 }, { "epoch": 0.02, "grad_norm": 8.69287930810518, "learning_rate": 7.977099236641223e-06, "loss": 0.6353, "step": 836 }, { "epoch": 0.02, "grad_norm": 8.506354761424513, "learning_rate": 7.986641221374046e-06, "loss": 0.5033, "step": 837 }, { "epoch": 0.02, "grad_norm": 6.800839454011796, "learning_rate": 7.996183206106872e-06, "loss": 0.2441, "step": 838 }, { "epoch": 0.02, "grad_norm": 7.007023693586348, "learning_rate": 8.005725190839695e-06, "loss": 0.3124, "step": 839 }, { "epoch": 0.02, "grad_norm": 6.472606080896209, "learning_rate": 8.015267175572519e-06, "loss": 0.3972, "step": 840 }, { "epoch": 0.02, "grad_norm": 7.267976474686098, "learning_rate": 8.024809160305345e-06, "loss": 0.5126, "step": 841 }, { "epoch": 0.02, "grad_norm": 6.287102018164188, "learning_rate": 8.034351145038168e-06, "loss": 0.6703, "step": 842 }, { "epoch": 0.02, "grad_norm": 7.772315046846658, "learning_rate": 8.043893129770992e-06, "loss": 0.5236, "step": 843 }, { "epoch": 0.02, "grad_norm": 9.247434724328304, "learning_rate": 8.053435114503817e-06, "loss": 0.5657, "step": 844 }, { "epoch": 0.02, "grad_norm": 4.496262534757465, "learning_rate": 8.062977099236641e-06, "loss": 0.3393, "step": 845 }, { "epoch": 0.02, "grad_norm": 6.568218028906801, "learning_rate": 8.072519083969467e-06, "loss": 0.4228, "step": 846 }, { "epoch": 0.02, "grad_norm": 5.693561719174459, "learning_rate": 8.08206106870229e-06, "loss": 0.546, "step": 847 }, { "epoch": 0.02, "grad_norm": 13.354685070590612, "learning_rate": 8.091603053435115e-06, "loss": 0.6543, "step": 848 }, { "epoch": 0.02, "grad_norm": 9.651727796451114, "learning_rate": 8.101145038167939e-06, "loss": 0.7035, "step": 849 }, { "epoch": 0.02, "grad_norm": 6.167933381204502, "learning_rate": 8.110687022900763e-06, "loss": 0.4672, "step": 850 }, { "epoch": 0.02, "grad_norm": 4.985809741509792, "learning_rate": 8.12022900763359e-06, "loss": 0.437, "step": 851 }, { "epoch": 0.02, "grad_norm": 8.527229898962256, "learning_rate": 8.129770992366412e-06, "loss": 0.4513, "step": 852 }, { "epoch": 0.02, "grad_norm": 8.578830266146518, "learning_rate": 8.139312977099237e-06, "loss": 0.5053, "step": 853 }, { "epoch": 0.02, "grad_norm": 9.931465523975726, "learning_rate": 8.148854961832063e-06, "loss": 0.4068, "step": 854 }, { "epoch": 0.02, "grad_norm": 7.6796820049712515, "learning_rate": 8.158396946564886e-06, "loss": 0.2323, "step": 855 }, { "epoch": 0.02, "grad_norm": 18.70898070486184, "learning_rate": 8.16793893129771e-06, "loss": 1.0582, "step": 856 }, { "epoch": 0.02, "grad_norm": 6.70499847480364, "learning_rate": 8.177480916030535e-06, "loss": 0.4582, "step": 857 }, { "epoch": 0.02, "grad_norm": 10.318399764643882, "learning_rate": 8.187022900763359e-06, "loss": 0.5939, "step": 858 }, { "epoch": 0.02, "grad_norm": 6.510274615963319, "learning_rate": 8.196564885496185e-06, "loss": 0.6937, "step": 859 }, { "epoch": 0.02, "grad_norm": 9.269749255857976, "learning_rate": 8.206106870229008e-06, "loss": 0.6037, "step": 860 }, { "epoch": 0.02, "grad_norm": 8.485693644695306, "learning_rate": 8.215648854961832e-06, "loss": 0.5439, "step": 861 }, { "epoch": 0.02, "grad_norm": 6.277791147958627, "learning_rate": 8.225190839694657e-06, "loss": 0.3388, "step": 862 }, { "epoch": 0.02, "grad_norm": 6.058504734824196, "learning_rate": 8.234732824427481e-06, "loss": 0.5028, "step": 863 }, { "epoch": 0.02, "grad_norm": 7.342848864519416, "learning_rate": 8.244274809160306e-06, "loss": 0.3618, "step": 864 }, { "epoch": 0.02, "grad_norm": 4.447889844588706, "learning_rate": 8.25381679389313e-06, "loss": 0.4017, "step": 865 }, { "epoch": 0.02, "grad_norm": 4.1279831416484445, "learning_rate": 8.263358778625955e-06, "loss": 0.2371, "step": 866 }, { "epoch": 0.02, "grad_norm": 44.254607903602526, "learning_rate": 8.272900763358779e-06, "loss": 1.0117, "step": 867 }, { "epoch": 0.02, "grad_norm": 5.358749508849919, "learning_rate": 8.282442748091603e-06, "loss": 0.2176, "step": 868 }, { "epoch": 0.02, "grad_norm": 3.557940560282355, "learning_rate": 8.291984732824428e-06, "loss": 0.1975, "step": 869 }, { "epoch": 0.02, "grad_norm": 6.677091716536935, "learning_rate": 8.301526717557252e-06, "loss": 0.2705, "step": 870 }, { "epoch": 0.02, "grad_norm": 9.905629424383536, "learning_rate": 8.311068702290077e-06, "loss": 0.4675, "step": 871 }, { "epoch": 0.02, "grad_norm": 13.649738496504218, "learning_rate": 8.320610687022901e-06, "loss": 0.6197, "step": 872 }, { "epoch": 0.03, "grad_norm": 9.412213333040224, "learning_rate": 8.330152671755726e-06, "loss": 0.5147, "step": 873 }, { "epoch": 0.03, "grad_norm": 9.293285542460538, "learning_rate": 8.33969465648855e-06, "loss": 0.4559, "step": 874 }, { "epoch": 0.03, "grad_norm": 6.775120579959138, "learning_rate": 8.349236641221374e-06, "loss": 0.3989, "step": 875 }, { "epoch": 0.03, "grad_norm": 6.3293276927254, "learning_rate": 8.358778625954199e-06, "loss": 0.1975, "step": 876 }, { "epoch": 0.03, "grad_norm": 8.043603087464106, "learning_rate": 8.368320610687023e-06, "loss": 0.4773, "step": 877 }, { "epoch": 0.03, "grad_norm": 9.906583762337547, "learning_rate": 8.377862595419848e-06, "loss": 0.8986, "step": 878 }, { "epoch": 0.03, "grad_norm": 6.345405588075435, "learning_rate": 8.387404580152672e-06, "loss": 0.3021, "step": 879 }, { "epoch": 0.03, "grad_norm": 10.241865861557715, "learning_rate": 8.396946564885497e-06, "loss": 0.4421, "step": 880 }, { "epoch": 0.03, "grad_norm": 7.611248122634945, "learning_rate": 8.406488549618321e-06, "loss": 0.3238, "step": 881 }, { "epoch": 0.03, "grad_norm": 3.979212419469876, "learning_rate": 8.416030534351146e-06, "loss": 0.18, "step": 882 }, { "epoch": 0.03, "grad_norm": 9.979198564599223, "learning_rate": 8.42557251908397e-06, "loss": 0.6052, "step": 883 }, { "epoch": 0.03, "grad_norm": 9.277452610381053, "learning_rate": 8.435114503816794e-06, "loss": 0.4539, "step": 884 }, { "epoch": 0.03, "grad_norm": 9.940422348168157, "learning_rate": 8.444656488549619e-06, "loss": 0.3858, "step": 885 }, { "epoch": 0.03, "grad_norm": 13.053245259307035, "learning_rate": 8.454198473282443e-06, "loss": 0.2979, "step": 886 }, { "epoch": 0.03, "grad_norm": 7.316923515254781, "learning_rate": 8.463740458015268e-06, "loss": 0.8218, "step": 887 }, { "epoch": 0.03, "grad_norm": 7.854532194527758, "learning_rate": 8.473282442748092e-06, "loss": 0.2774, "step": 888 }, { "epoch": 0.03, "grad_norm": 8.251004822545253, "learning_rate": 8.482824427480917e-06, "loss": 0.3104, "step": 889 }, { "epoch": 0.03, "grad_norm": 8.356976159555998, "learning_rate": 8.492366412213741e-06, "loss": 0.4531, "step": 890 }, { "epoch": 0.03, "grad_norm": 16.159787172672306, "learning_rate": 8.501908396946565e-06, "loss": 0.7471, "step": 891 }, { "epoch": 0.03, "grad_norm": 7.718708918535508, "learning_rate": 8.51145038167939e-06, "loss": 0.4615, "step": 892 }, { "epoch": 0.03, "grad_norm": 10.541873995183316, "learning_rate": 8.520992366412214e-06, "loss": 0.4765, "step": 893 }, { "epoch": 0.03, "grad_norm": 7.073133346392244, "learning_rate": 8.530534351145039e-06, "loss": 0.64, "step": 894 }, { "epoch": 0.03, "grad_norm": 7.284040972146651, "learning_rate": 8.540076335877863e-06, "loss": 0.5114, "step": 895 }, { "epoch": 0.03, "grad_norm": 9.591208055957093, "learning_rate": 8.549618320610688e-06, "loss": 0.3104, "step": 896 }, { "epoch": 0.03, "grad_norm": 4.654841702901184, "learning_rate": 8.559160305343512e-06, "loss": 0.3612, "step": 897 }, { "epoch": 0.03, "grad_norm": 11.666378253823218, "learning_rate": 8.568702290076337e-06, "loss": 0.342, "step": 898 }, { "epoch": 0.03, "grad_norm": 9.237992251111928, "learning_rate": 8.578244274809161e-06, "loss": 0.5052, "step": 899 }, { "epoch": 0.03, "grad_norm": 13.205984747154975, "learning_rate": 8.587786259541985e-06, "loss": 0.4225, "step": 900 }, { "epoch": 0.03, "grad_norm": 8.339670391201723, "learning_rate": 8.59732824427481e-06, "loss": 0.5575, "step": 901 }, { "epoch": 0.03, "grad_norm": 7.054851610338554, "learning_rate": 8.606870229007634e-06, "loss": 0.4812, "step": 902 }, { "epoch": 0.03, "grad_norm": 9.228898698233229, "learning_rate": 8.616412213740459e-06, "loss": 0.6728, "step": 903 }, { "epoch": 0.03, "grad_norm": 12.918691775814079, "learning_rate": 8.625954198473283e-06, "loss": 0.5779, "step": 904 }, { "epoch": 0.03, "grad_norm": 6.757471611818557, "learning_rate": 8.635496183206108e-06, "loss": 0.4248, "step": 905 }, { "epoch": 0.03, "grad_norm": 8.62144219921948, "learning_rate": 8.645038167938932e-06, "loss": 0.4144, "step": 906 }, { "epoch": 0.03, "grad_norm": 6.776217443447439, "learning_rate": 8.654580152671757e-06, "loss": 0.5845, "step": 907 }, { "epoch": 0.03, "grad_norm": 8.81620164328721, "learning_rate": 8.664122137404581e-06, "loss": 0.6728, "step": 908 }, { "epoch": 0.03, "grad_norm": 9.521888012277785, "learning_rate": 8.673664122137405e-06, "loss": 0.5839, "step": 909 }, { "epoch": 0.03, "grad_norm": 6.234932590111039, "learning_rate": 8.68320610687023e-06, "loss": 0.2263, "step": 910 }, { "epoch": 0.03, "grad_norm": 4.634201124740345, "learning_rate": 8.692748091603054e-06, "loss": 0.3761, "step": 911 }, { "epoch": 0.03, "grad_norm": 11.260531179932128, "learning_rate": 8.702290076335879e-06, "loss": 0.5852, "step": 912 }, { "epoch": 0.03, "grad_norm": 9.749608643331285, "learning_rate": 8.711832061068703e-06, "loss": 0.5885, "step": 913 }, { "epoch": 0.03, "grad_norm": 4.130358048267464, "learning_rate": 8.721374045801528e-06, "loss": 0.3831, "step": 914 }, { "epoch": 0.03, "grad_norm": 8.05560119952136, "learning_rate": 8.730916030534352e-06, "loss": 0.3652, "step": 915 }, { "epoch": 0.03, "grad_norm": 3.2360794625669795, "learning_rate": 8.740458015267176e-06, "loss": 0.3653, "step": 916 }, { "epoch": 0.03, "grad_norm": 5.452653443679854, "learning_rate": 8.750000000000001e-06, "loss": 0.5945, "step": 917 }, { "epoch": 0.03, "grad_norm": 7.77739120461257, "learning_rate": 8.759541984732825e-06, "loss": 0.1868, "step": 918 }, { "epoch": 0.03, "grad_norm": 13.318800890219821, "learning_rate": 8.76908396946565e-06, "loss": 0.4632, "step": 919 }, { "epoch": 0.03, "grad_norm": 6.406214420871093, "learning_rate": 8.778625954198474e-06, "loss": 0.7549, "step": 920 }, { "epoch": 0.03, "grad_norm": 13.117396668248348, "learning_rate": 8.788167938931299e-06, "loss": 0.659, "step": 921 }, { "epoch": 0.03, "grad_norm": 7.845516082000344, "learning_rate": 8.797709923664123e-06, "loss": 0.6056, "step": 922 }, { "epoch": 0.03, "grad_norm": 5.277445173100734, "learning_rate": 8.807251908396948e-06, "loss": 0.4597, "step": 923 }, { "epoch": 0.03, "grad_norm": 8.870842403074555, "learning_rate": 8.816793893129772e-06, "loss": 0.3975, "step": 924 }, { "epoch": 0.03, "grad_norm": 8.939540336705216, "learning_rate": 8.826335877862596e-06, "loss": 0.3494, "step": 925 }, { "epoch": 0.03, "grad_norm": 6.856813791161948, "learning_rate": 8.83587786259542e-06, "loss": 0.7914, "step": 926 }, { "epoch": 0.03, "grad_norm": 4.086688379325459, "learning_rate": 8.845419847328245e-06, "loss": 0.3011, "step": 927 }, { "epoch": 0.03, "grad_norm": 6.99680800548543, "learning_rate": 8.85496183206107e-06, "loss": 0.488, "step": 928 }, { "epoch": 0.03, "grad_norm": 7.8487939309003885, "learning_rate": 8.864503816793894e-06, "loss": 0.3171, "step": 929 }, { "epoch": 0.03, "grad_norm": 14.586999218083015, "learning_rate": 8.874045801526719e-06, "loss": 0.426, "step": 930 }, { "epoch": 0.03, "grad_norm": 7.183497168434654, "learning_rate": 8.883587786259543e-06, "loss": 0.8901, "step": 931 }, { "epoch": 0.03, "grad_norm": 14.549724426806032, "learning_rate": 8.893129770992368e-06, "loss": 0.5144, "step": 932 }, { "epoch": 0.03, "grad_norm": 3.9911522465463145, "learning_rate": 8.902671755725192e-06, "loss": 0.4058, "step": 933 }, { "epoch": 0.03, "grad_norm": 6.886729897648557, "learning_rate": 8.912213740458016e-06, "loss": 0.5364, "step": 934 }, { "epoch": 0.03, "grad_norm": 6.241457327102384, "learning_rate": 8.92175572519084e-06, "loss": 0.4675, "step": 935 }, { "epoch": 0.03, "grad_norm": 13.28093390593517, "learning_rate": 8.931297709923665e-06, "loss": 0.7255, "step": 936 }, { "epoch": 0.03, "grad_norm": 6.415399707047416, "learning_rate": 8.94083969465649e-06, "loss": 0.2076, "step": 937 }, { "epoch": 0.03, "grad_norm": 7.864659135128972, "learning_rate": 8.950381679389314e-06, "loss": 0.2714, "step": 938 }, { "epoch": 0.03, "grad_norm": 7.579740057239902, "learning_rate": 8.959923664122137e-06, "loss": 0.5843, "step": 939 }, { "epoch": 0.03, "grad_norm": 3.433013172561082, "learning_rate": 8.969465648854963e-06, "loss": 0.2467, "step": 940 }, { "epoch": 0.03, "grad_norm": 3.605964932339431, "learning_rate": 8.979007633587787e-06, "loss": 0.4649, "step": 941 }, { "epoch": 0.03, "grad_norm": 8.441388456298489, "learning_rate": 8.988549618320612e-06, "loss": 0.5887, "step": 942 }, { "epoch": 0.03, "grad_norm": 4.344480878417557, "learning_rate": 8.998091603053436e-06, "loss": 0.3171, "step": 943 }, { "epoch": 0.03, "grad_norm": 5.778214931252389, "learning_rate": 9.007633587786259e-06, "loss": 0.532, "step": 944 }, { "epoch": 0.03, "grad_norm": 8.97947057458995, "learning_rate": 9.017175572519085e-06, "loss": 0.5361, "step": 945 }, { "epoch": 0.03, "grad_norm": 7.3584693950833895, "learning_rate": 9.02671755725191e-06, "loss": 0.2738, "step": 946 }, { "epoch": 0.03, "grad_norm": 10.802744792132476, "learning_rate": 9.036259541984732e-06, "loss": 0.4963, "step": 947 }, { "epoch": 0.03, "grad_norm": 4.914385640931984, "learning_rate": 9.045801526717559e-06, "loss": 0.3522, "step": 948 }, { "epoch": 0.03, "grad_norm": 6.194959478915424, "learning_rate": 9.055343511450383e-06, "loss": 0.4994, "step": 949 }, { "epoch": 0.03, "grad_norm": 6.029987420469102, "learning_rate": 9.064885496183207e-06, "loss": 0.384, "step": 950 }, { "epoch": 0.03, "grad_norm": 5.667482971785062, "learning_rate": 9.074427480916032e-06, "loss": 0.3537, "step": 951 }, { "epoch": 0.03, "grad_norm": 6.576325666568385, "learning_rate": 9.083969465648855e-06, "loss": 0.3229, "step": 952 }, { "epoch": 0.03, "grad_norm": 8.59517338842489, "learning_rate": 9.09351145038168e-06, "loss": 0.5997, "step": 953 }, { "epoch": 0.03, "grad_norm": 11.949893609375383, "learning_rate": 9.103053435114505e-06, "loss": 0.389, "step": 954 }, { "epoch": 0.03, "grad_norm": 7.849824354308423, "learning_rate": 9.112595419847328e-06, "loss": 0.48, "step": 955 }, { "epoch": 0.03, "grad_norm": 6.683715890540886, "learning_rate": 9.122137404580154e-06, "loss": 0.3934, "step": 956 }, { "epoch": 0.03, "grad_norm": 5.1705814829972905, "learning_rate": 9.131679389312977e-06, "loss": 0.4527, "step": 957 }, { "epoch": 0.03, "grad_norm": 8.896289060356017, "learning_rate": 9.141221374045803e-06, "loss": 0.4717, "step": 958 }, { "epoch": 0.03, "grad_norm": 14.243287630431164, "learning_rate": 9.150763358778627e-06, "loss": 0.635, "step": 959 }, { "epoch": 0.03, "grad_norm": 8.10605228391775, "learning_rate": 9.16030534351145e-06, "loss": 0.4333, "step": 960 }, { "epoch": 0.03, "grad_norm": 4.483956161238784, "learning_rate": 9.169847328244276e-06, "loss": 0.3146, "step": 961 }, { "epoch": 0.03, "grad_norm": 7.173735732499574, "learning_rate": 9.179389312977099e-06, "loss": 0.3151, "step": 962 }, { "epoch": 0.03, "grad_norm": 8.531795721800037, "learning_rate": 9.188931297709925e-06, "loss": 0.5082, "step": 963 }, { "epoch": 0.03, "grad_norm": 4.635601471492393, "learning_rate": 9.19847328244275e-06, "loss": 0.2982, "step": 964 }, { "epoch": 0.03, "grad_norm": 3.2382031328927945, "learning_rate": 9.208015267175572e-06, "loss": 0.2887, "step": 965 }, { "epoch": 0.03, "grad_norm": 5.845833391700285, "learning_rate": 9.217557251908398e-06, "loss": 0.5116, "step": 966 }, { "epoch": 0.03, "grad_norm": 5.39294878864642, "learning_rate": 9.227099236641223e-06, "loss": 0.5177, "step": 967 }, { "epoch": 0.03, "grad_norm": 9.23801398183025, "learning_rate": 9.236641221374046e-06, "loss": 1.0989, "step": 968 }, { "epoch": 0.03, "grad_norm": 9.118767268875013, "learning_rate": 9.246183206106872e-06, "loss": 0.9024, "step": 969 }, { "epoch": 0.03, "grad_norm": 12.085936079660145, "learning_rate": 9.255725190839695e-06, "loss": 0.6005, "step": 970 }, { "epoch": 0.03, "grad_norm": 6.236479057590636, "learning_rate": 9.26526717557252e-06, "loss": 0.5087, "step": 971 }, { "epoch": 0.03, "grad_norm": 8.809286302632193, "learning_rate": 9.274809160305345e-06, "loss": 0.388, "step": 972 }, { "epoch": 0.03, "grad_norm": 10.055338899960876, "learning_rate": 9.284351145038168e-06, "loss": 0.5893, "step": 973 }, { "epoch": 0.03, "grad_norm": 6.543094189708236, "learning_rate": 9.293893129770994e-06, "loss": 0.4446, "step": 974 }, { "epoch": 0.03, "grad_norm": 12.73961904941313, "learning_rate": 9.303435114503817e-06, "loss": 0.4089, "step": 975 }, { "epoch": 0.03, "grad_norm": 8.569744525960758, "learning_rate": 9.312977099236641e-06, "loss": 0.2975, "step": 976 }, { "epoch": 0.03, "grad_norm": 9.841987185597967, "learning_rate": 9.322519083969467e-06, "loss": 0.4619, "step": 977 }, { "epoch": 0.03, "grad_norm": 7.606856789125592, "learning_rate": 9.33206106870229e-06, "loss": 0.461, "step": 978 }, { "epoch": 0.03, "grad_norm": 7.908913325864225, "learning_rate": 9.341603053435116e-06, "loss": 0.3271, "step": 979 }, { "epoch": 0.03, "grad_norm": 5.06765168234462, "learning_rate": 9.351145038167939e-06, "loss": 0.3401, "step": 980 }, { "epoch": 0.03, "grad_norm": 7.6742343731946105, "learning_rate": 9.360687022900763e-06, "loss": 0.6069, "step": 981 }, { "epoch": 0.03, "grad_norm": 14.270427852169828, "learning_rate": 9.37022900763359e-06, "loss": 0.4368, "step": 982 }, { "epoch": 0.03, "grad_norm": 6.948233471862945, "learning_rate": 9.379770992366412e-06, "loss": 0.248, "step": 983 }, { "epoch": 0.03, "grad_norm": 7.352586729594622, "learning_rate": 9.389312977099237e-06, "loss": 0.6342, "step": 984 }, { "epoch": 0.03, "grad_norm": 9.420991163409377, "learning_rate": 9.398854961832063e-06, "loss": 0.6254, "step": 985 }, { "epoch": 0.03, "grad_norm": 7.421744383616276, "learning_rate": 9.408396946564886e-06, "loss": 0.4135, "step": 986 }, { "epoch": 0.03, "grad_norm": 5.993231810444339, "learning_rate": 9.417938931297712e-06, "loss": 0.5106, "step": 987 }, { "epoch": 0.03, "grad_norm": 7.025850112901638, "learning_rate": 9.427480916030534e-06, "loss": 0.6125, "step": 988 }, { "epoch": 0.03, "grad_norm": 15.557843718191414, "learning_rate": 9.437022900763359e-06, "loss": 0.4026, "step": 989 }, { "epoch": 0.03, "grad_norm": 5.756936906053705, "learning_rate": 9.446564885496185e-06, "loss": 0.2794, "step": 990 }, { "epoch": 0.03, "grad_norm": 6.409179203905874, "learning_rate": 9.456106870229008e-06, "loss": 0.5753, "step": 991 }, { "epoch": 0.03, "grad_norm": 9.467776207532472, "learning_rate": 9.465648854961834e-06, "loss": 0.4246, "step": 992 }, { "epoch": 0.03, "grad_norm": 13.015034857991596, "learning_rate": 9.475190839694657e-06, "loss": 0.6431, "step": 993 }, { "epoch": 0.03, "grad_norm": 3.9279992210140735, "learning_rate": 9.484732824427481e-06, "loss": 0.1055, "step": 994 }, { "epoch": 0.03, "grad_norm": 12.04206975946398, "learning_rate": 9.494274809160307e-06, "loss": 0.574, "step": 995 }, { "epoch": 0.03, "grad_norm": 9.070241370895603, "learning_rate": 9.50381679389313e-06, "loss": 0.6047, "step": 996 }, { "epoch": 0.03, "grad_norm": 5.217967996998468, "learning_rate": 9.513358778625954e-06, "loss": 0.5854, "step": 997 }, { "epoch": 0.03, "grad_norm": 7.97103125138211, "learning_rate": 9.522900763358779e-06, "loss": 0.3847, "step": 998 }, { "epoch": 0.03, "grad_norm": 4.232920435546335, "learning_rate": 9.532442748091603e-06, "loss": 0.3096, "step": 999 }, { "epoch": 0.03, "grad_norm": 10.114642933055423, "learning_rate": 9.54198473282443e-06, "loss": 0.5593, "step": 1000 }, { "epoch": 0.03, "grad_norm": 8.104056258703926, "learning_rate": 9.551526717557252e-06, "loss": 0.1912, "step": 1001 }, { "epoch": 0.03, "grad_norm": 4.105001464731975, "learning_rate": 9.561068702290077e-06, "loss": 0.2936, "step": 1002 }, { "epoch": 0.03, "grad_norm": 5.007969513096501, "learning_rate": 9.570610687022903e-06, "loss": 0.3564, "step": 1003 }, { "epoch": 0.03, "grad_norm": 9.674992417916572, "learning_rate": 9.580152671755725e-06, "loss": 0.4625, "step": 1004 }, { "epoch": 0.03, "grad_norm": 8.498496764155776, "learning_rate": 9.58969465648855e-06, "loss": 0.2435, "step": 1005 }, { "epoch": 0.03, "grad_norm": 9.473267940946092, "learning_rate": 9.599236641221374e-06, "loss": 0.586, "step": 1006 }, { "epoch": 0.03, "grad_norm": 5.313422582962463, "learning_rate": 9.608778625954199e-06, "loss": 0.7724, "step": 1007 }, { "epoch": 0.03, "grad_norm": 5.770354029539323, "learning_rate": 9.618320610687025e-06, "loss": 0.3537, "step": 1008 }, { "epoch": 0.03, "grad_norm": 6.571574485681168, "learning_rate": 9.627862595419848e-06, "loss": 0.4758, "step": 1009 }, { "epoch": 0.03, "grad_norm": 10.444085242360032, "learning_rate": 9.637404580152672e-06, "loss": 0.8654, "step": 1010 }, { "epoch": 0.03, "grad_norm": 11.810722580655142, "learning_rate": 9.646946564885497e-06, "loss": 0.4723, "step": 1011 }, { "epoch": 0.03, "grad_norm": 14.980706586280192, "learning_rate": 9.656488549618321e-06, "loss": 1.0686, "step": 1012 }, { "epoch": 0.03, "grad_norm": 8.251253206263698, "learning_rate": 9.666030534351145e-06, "loss": 0.5714, "step": 1013 }, { "epoch": 0.03, "grad_norm": 6.924253619077289, "learning_rate": 9.67557251908397e-06, "loss": 0.7074, "step": 1014 }, { "epoch": 0.03, "grad_norm": 4.964050085468276, "learning_rate": 9.685114503816794e-06, "loss": 0.1806, "step": 1015 }, { "epoch": 0.03, "grad_norm": 8.994299884922777, "learning_rate": 9.694656488549619e-06, "loss": 0.4109, "step": 1016 }, { "epoch": 0.03, "grad_norm": 9.220766422415657, "learning_rate": 9.704198473282443e-06, "loss": 0.4955, "step": 1017 }, { "epoch": 0.03, "grad_norm": 9.592254522114006, "learning_rate": 9.713740458015268e-06, "loss": 0.4533, "step": 1018 }, { "epoch": 0.03, "grad_norm": 6.899328604112904, "learning_rate": 9.723282442748092e-06, "loss": 0.5092, "step": 1019 }, { "epoch": 0.03, "grad_norm": 12.469000562860726, "learning_rate": 9.732824427480917e-06, "loss": 0.6262, "step": 1020 }, { "epoch": 0.03, "grad_norm": 10.064874878213185, "learning_rate": 9.742366412213743e-06, "loss": 0.4182, "step": 1021 }, { "epoch": 0.03, "grad_norm": 5.427523526176305, "learning_rate": 9.751908396946565e-06, "loss": 0.2715, "step": 1022 }, { "epoch": 0.03, "grad_norm": 10.673962680704951, "learning_rate": 9.76145038167939e-06, "loss": 1.3771, "step": 1023 }, { "epoch": 0.03, "grad_norm": 6.525873261694389, "learning_rate": 9.770992366412214e-06, "loss": 0.3858, "step": 1024 }, { "epoch": 0.03, "grad_norm": 10.548065254018262, "learning_rate": 9.780534351145039e-06, "loss": 0.5172, "step": 1025 }, { "epoch": 0.03, "grad_norm": 8.20526746110548, "learning_rate": 9.790076335877863e-06, "loss": 0.7194, "step": 1026 }, { "epoch": 0.03, "grad_norm": 9.284522921251481, "learning_rate": 9.799618320610688e-06, "loss": 0.9044, "step": 1027 }, { "epoch": 0.03, "grad_norm": 7.0648055575341475, "learning_rate": 9.809160305343512e-06, "loss": 0.3544, "step": 1028 }, { "epoch": 0.03, "grad_norm": 3.4086244688850957, "learning_rate": 9.818702290076336e-06, "loss": 0.3367, "step": 1029 }, { "epoch": 0.03, "grad_norm": 9.916469272818079, "learning_rate": 9.828244274809161e-06, "loss": 0.4172, "step": 1030 }, { "epoch": 0.03, "grad_norm": 6.96518256732054, "learning_rate": 9.837786259541985e-06, "loss": 0.4067, "step": 1031 }, { "epoch": 0.03, "grad_norm": 12.343487777821965, "learning_rate": 9.84732824427481e-06, "loss": 0.4041, "step": 1032 }, { "epoch": 0.03, "grad_norm": 5.852599190935074, "learning_rate": 9.856870229007634e-06, "loss": 0.6033, "step": 1033 }, { "epoch": 0.03, "grad_norm": 6.623323912297873, "learning_rate": 9.866412213740459e-06, "loss": 0.4961, "step": 1034 }, { "epoch": 0.03, "grad_norm": 7.883874101486114, "learning_rate": 9.875954198473283e-06, "loss": 0.3635, "step": 1035 }, { "epoch": 0.03, "grad_norm": 5.968942689032218, "learning_rate": 9.885496183206108e-06, "loss": 0.3288, "step": 1036 }, { "epoch": 0.03, "grad_norm": 8.050281522104928, "learning_rate": 9.895038167938932e-06, "loss": 0.4971, "step": 1037 }, { "epoch": 0.03, "grad_norm": 8.362831841286757, "learning_rate": 9.904580152671756e-06, "loss": 0.5257, "step": 1038 }, { "epoch": 0.03, "grad_norm": 7.099913251373742, "learning_rate": 9.914122137404581e-06, "loss": 0.3207, "step": 1039 }, { "epoch": 0.03, "grad_norm": 7.215451894924783, "learning_rate": 9.923664122137405e-06, "loss": 0.4231, "step": 1040 }, { "epoch": 0.03, "grad_norm": 9.878770422312824, "learning_rate": 9.93320610687023e-06, "loss": 0.7172, "step": 1041 }, { "epoch": 0.03, "grad_norm": 6.21051027759979, "learning_rate": 9.942748091603054e-06, "loss": 0.4064, "step": 1042 }, { "epoch": 0.03, "grad_norm": 6.58423140794257, "learning_rate": 9.952290076335879e-06, "loss": 0.4942, "step": 1043 }, { "epoch": 0.03, "grad_norm": 6.8045393076898035, "learning_rate": 9.961832061068703e-06, "loss": 0.4005, "step": 1044 }, { "epoch": 0.03, "grad_norm": 7.038828110258943, "learning_rate": 9.971374045801527e-06, "loss": 0.5076, "step": 1045 }, { "epoch": 0.03, "grad_norm": 4.5673899639302835, "learning_rate": 9.980916030534352e-06, "loss": 0.2335, "step": 1046 }, { "epoch": 0.03, "grad_norm": 10.224367609964032, "learning_rate": 9.990458015267176e-06, "loss": 0.2048, "step": 1047 }, { "epoch": 0.03, "grad_norm": 8.756891969595324, "learning_rate": 1e-05, "loss": 0.4415, "step": 1048 }, { "epoch": 0.03, "grad_norm": 16.898527796799897, "learning_rate": 9.99999997849281e-06, "loss": 0.6561, "step": 1049 }, { "epoch": 0.03, "grad_norm": 6.112485362154356, "learning_rate": 9.999999913971232e-06, "loss": 0.46, "step": 1050 }, { "epoch": 0.03, "grad_norm": 8.558128856183632, "learning_rate": 9.999999806435273e-06, "loss": 0.6311, "step": 1051 }, { "epoch": 0.03, "grad_norm": 4.5843255038407555, "learning_rate": 9.99999965588493e-06, "loss": 0.1981, "step": 1052 }, { "epoch": 0.03, "grad_norm": 5.049922389198905, "learning_rate": 9.999999462320205e-06, "loss": 0.3901, "step": 1053 }, { "epoch": 0.03, "grad_norm": 6.2348886149230305, "learning_rate": 9.999999225741102e-06, "loss": 0.5251, "step": 1054 }, { "epoch": 0.03, "grad_norm": 8.475496314715835, "learning_rate": 9.99999894614762e-06, "loss": 0.7946, "step": 1055 }, { "epoch": 0.03, "grad_norm": 17.528077190331928, "learning_rate": 9.999998623539762e-06, "loss": 0.4566, "step": 1056 }, { "epoch": 0.03, "grad_norm": 7.3330324898768255, "learning_rate": 9.999998257917533e-06, "loss": 0.565, "step": 1057 }, { "epoch": 0.03, "grad_norm": 5.509392542819234, "learning_rate": 9.999997849280933e-06, "loss": 0.2176, "step": 1058 }, { "epoch": 0.03, "grad_norm": 18.279281745005225, "learning_rate": 9.99999739762997e-06, "loss": 0.6828, "step": 1059 }, { "epoch": 0.03, "grad_norm": 6.861567083986935, "learning_rate": 9.999996902964643e-06, "loss": 0.3958, "step": 1060 }, { "epoch": 0.03, "grad_norm": 8.110871810808753, "learning_rate": 9.999996365284959e-06, "loss": 0.9578, "step": 1061 }, { "epoch": 0.03, "grad_norm": 10.608661700668168, "learning_rate": 9.99999578459092e-06, "loss": 0.878, "step": 1062 }, { "epoch": 0.03, "grad_norm": 9.337751089496512, "learning_rate": 9.999995160882535e-06, "loss": 0.5069, "step": 1063 }, { "epoch": 0.03, "grad_norm": 9.226706430215359, "learning_rate": 9.999994494159805e-06, "loss": 0.5751, "step": 1064 }, { "epoch": 0.03, "grad_norm": 4.249213314175657, "learning_rate": 9.999993784422742e-06, "loss": 0.2712, "step": 1065 }, { "epoch": 0.03, "grad_norm": 8.654430211890352, "learning_rate": 9.999993031671345e-06, "loss": 0.4718, "step": 1066 }, { "epoch": 0.03, "grad_norm": 6.673663987175085, "learning_rate": 9.999992235905624e-06, "loss": 0.1361, "step": 1067 }, { "epoch": 0.03, "grad_norm": 8.199059225405172, "learning_rate": 9.999991397125584e-06, "loss": 0.5575, "step": 1068 }, { "epoch": 0.03, "grad_norm": 3.597253070651568, "learning_rate": 9.999990515331235e-06, "loss": 0.3995, "step": 1069 }, { "epoch": 0.03, "grad_norm": 15.42440019242009, "learning_rate": 9.999989590522586e-06, "loss": 0.4877, "step": 1070 }, { "epoch": 0.03, "grad_norm": 12.900848514536536, "learning_rate": 9.999988622699639e-06, "loss": 0.7031, "step": 1071 }, { "epoch": 0.03, "grad_norm": 9.539472491477158, "learning_rate": 9.999987611862405e-06, "loss": 0.642, "step": 1072 }, { "epoch": 0.03, "grad_norm": 7.221145430605595, "learning_rate": 9.999986558010895e-06, "loss": 0.5836, "step": 1073 }, { "epoch": 0.03, "grad_norm": 11.150296201321169, "learning_rate": 9.999985461145114e-06, "loss": 0.4529, "step": 1074 }, { "epoch": 0.03, "grad_norm": 6.343423712847113, "learning_rate": 9.999984321265076e-06, "loss": 0.3993, "step": 1075 }, { "epoch": 0.03, "grad_norm": 7.361679396695695, "learning_rate": 9.999983138370788e-06, "loss": 0.3914, "step": 1076 }, { "epoch": 0.03, "grad_norm": 9.175351577384026, "learning_rate": 9.99998191246226e-06, "loss": 0.6279, "step": 1077 }, { "epoch": 0.03, "grad_norm": 6.208425337687892, "learning_rate": 9.999980643539503e-06, "loss": 0.4719, "step": 1078 }, { "epoch": 0.03, "grad_norm": 12.514916027243064, "learning_rate": 9.99997933160253e-06, "loss": 0.5603, "step": 1079 }, { "epoch": 0.03, "grad_norm": 10.783002473235323, "learning_rate": 9.999977976651348e-06, "loss": 0.1794, "step": 1080 }, { "epoch": 0.03, "grad_norm": 4.421491329950315, "learning_rate": 9.999976578685973e-06, "loss": 0.4634, "step": 1081 }, { "epoch": 0.03, "grad_norm": 5.5491529058830125, "learning_rate": 9.999975137706414e-06, "loss": 0.4973, "step": 1082 }, { "epoch": 0.03, "grad_norm": 5.811253075519879, "learning_rate": 9.999973653712685e-06, "loss": 0.405, "step": 1083 }, { "epoch": 0.03, "grad_norm": 16.821960510229324, "learning_rate": 9.999972126704798e-06, "loss": 0.6452, "step": 1084 }, { "epoch": 0.03, "grad_norm": 11.487044292787969, "learning_rate": 9.999970556682766e-06, "loss": 0.6308, "step": 1085 }, { "epoch": 0.03, "grad_norm": 6.741340309923119, "learning_rate": 9.999968943646603e-06, "loss": 0.5653, "step": 1086 }, { "epoch": 0.03, "grad_norm": 6.111735714675898, "learning_rate": 9.999967287596324e-06, "loss": 0.5518, "step": 1087 }, { "epoch": 0.03, "grad_norm": 9.74522116506497, "learning_rate": 9.999965588531941e-06, "loss": 0.6879, "step": 1088 }, { "epoch": 0.03, "grad_norm": 10.512608178996192, "learning_rate": 9.99996384645347e-06, "loss": 0.5432, "step": 1089 }, { "epoch": 0.03, "grad_norm": 4.234674084225299, "learning_rate": 9.999962061360925e-06, "loss": 0.2984, "step": 1090 }, { "epoch": 0.03, "grad_norm": 7.564948324063552, "learning_rate": 9.999960233254322e-06, "loss": 0.2808, "step": 1091 }, { "epoch": 0.03, "grad_norm": 7.065129558565879, "learning_rate": 9.999958362133677e-06, "loss": 0.6693, "step": 1092 }, { "epoch": 0.03, "grad_norm": 9.17888689161664, "learning_rate": 9.999956447999008e-06, "loss": 0.438, "step": 1093 }, { "epoch": 0.03, "grad_norm": 12.26848375572755, "learning_rate": 9.999954490850326e-06, "loss": 0.8184, "step": 1094 }, { "epoch": 0.03, "grad_norm": 9.813844527927623, "learning_rate": 9.999952490687653e-06, "loss": 0.5449, "step": 1095 }, { "epoch": 0.03, "grad_norm": 4.706152616306396, "learning_rate": 9.999950447511005e-06, "loss": 0.2702, "step": 1096 }, { "epoch": 0.03, "grad_norm": 6.098270793398461, "learning_rate": 9.999948361320398e-06, "loss": 0.4134, "step": 1097 }, { "epoch": 0.03, "grad_norm": 10.378863523005531, "learning_rate": 9.999946232115848e-06, "loss": 0.8375, "step": 1098 }, { "epoch": 0.03, "grad_norm": 7.00332903039223, "learning_rate": 9.99994405989738e-06, "loss": 0.3426, "step": 1099 }, { "epoch": 0.03, "grad_norm": 5.2267391705268755, "learning_rate": 9.999941844665008e-06, "loss": 0.3884, "step": 1100 }, { "epoch": 0.03, "grad_norm": 11.636995177172672, "learning_rate": 9.99993958641875e-06, "loss": 1.043, "step": 1101 }, { "epoch": 0.03, "grad_norm": 6.4191401057573065, "learning_rate": 9.99993728515863e-06, "loss": 0.2776, "step": 1102 }, { "epoch": 0.03, "grad_norm": 15.328731357662981, "learning_rate": 9.999934940884664e-06, "loss": 0.9713, "step": 1103 }, { "epoch": 0.03, "grad_norm": 6.907207094734336, "learning_rate": 9.999932553596873e-06, "loss": 0.2612, "step": 1104 }, { "epoch": 0.03, "grad_norm": 10.243920733161922, "learning_rate": 9.999930123295279e-06, "loss": 0.7503, "step": 1105 }, { "epoch": 0.03, "grad_norm": 7.147633791909192, "learning_rate": 9.9999276499799e-06, "loss": 0.31, "step": 1106 }, { "epoch": 0.03, "grad_norm": 5.903860506973655, "learning_rate": 9.999925133650761e-06, "loss": 0.4882, "step": 1107 }, { "epoch": 0.03, "grad_norm": 5.607117094860648, "learning_rate": 9.999922574307882e-06, "loss": 0.3692, "step": 1108 }, { "epoch": 0.03, "grad_norm": 13.273922754750092, "learning_rate": 9.999919971951282e-06, "loss": 0.9882, "step": 1109 }, { "epoch": 0.03, "grad_norm": 9.323457761154035, "learning_rate": 9.999917326580987e-06, "loss": 0.6859, "step": 1110 }, { "epoch": 0.03, "grad_norm": 8.96683053666969, "learning_rate": 9.999914638197019e-06, "loss": 0.4578, "step": 1111 }, { "epoch": 0.03, "grad_norm": 8.160969388265169, "learning_rate": 9.999911906799403e-06, "loss": 0.5519, "step": 1112 }, { "epoch": 0.03, "grad_norm": 8.46847167736623, "learning_rate": 9.999909132388157e-06, "loss": 0.4642, "step": 1113 }, { "epoch": 0.03, "grad_norm": 10.110217382129624, "learning_rate": 9.99990631496331e-06, "loss": 0.4235, "step": 1114 }, { "epoch": 0.03, "grad_norm": 6.532994781045947, "learning_rate": 9.999903454524885e-06, "loss": 0.3191, "step": 1115 }, { "epoch": 0.03, "grad_norm": 10.342913360412812, "learning_rate": 9.999900551072906e-06, "loss": 0.6936, "step": 1116 }, { "epoch": 0.03, "grad_norm": 9.572026263569606, "learning_rate": 9.999897604607399e-06, "loss": 0.3292, "step": 1117 }, { "epoch": 0.03, "grad_norm": 10.046463645000896, "learning_rate": 9.999894615128387e-06, "loss": 0.5988, "step": 1118 }, { "epoch": 0.03, "grad_norm": 12.421333118831273, "learning_rate": 9.999891582635896e-06, "loss": 0.8358, "step": 1119 }, { "epoch": 0.03, "grad_norm": 10.26093936627962, "learning_rate": 9.999888507129955e-06, "loss": 0.6508, "step": 1120 }, { "epoch": 0.03, "grad_norm": 7.945133293478937, "learning_rate": 9.999885388610591e-06, "loss": 0.5913, "step": 1121 }, { "epoch": 0.03, "grad_norm": 10.415867299243926, "learning_rate": 9.999882227077827e-06, "loss": 0.7667, "step": 1122 }, { "epoch": 0.03, "grad_norm": 7.743132532608589, "learning_rate": 9.99987902253169e-06, "loss": 0.6855, "step": 1123 }, { "epoch": 0.03, "grad_norm": 9.116356296126206, "learning_rate": 9.99987577497221e-06, "loss": 0.6097, "step": 1124 }, { "epoch": 0.03, "grad_norm": 8.717199481655706, "learning_rate": 9.999872484399415e-06, "loss": 0.9114, "step": 1125 }, { "epoch": 0.03, "grad_norm": 16.806944656060335, "learning_rate": 9.999869150813333e-06, "loss": 0.5381, "step": 1126 }, { "epoch": 0.03, "grad_norm": 6.917898374831731, "learning_rate": 9.999865774213992e-06, "loss": 0.5552, "step": 1127 }, { "epoch": 0.03, "grad_norm": 7.091037281973949, "learning_rate": 9.999862354601422e-06, "loss": 0.8044, "step": 1128 }, { "epoch": 0.03, "grad_norm": 17.805568805490616, "learning_rate": 9.99985889197565e-06, "loss": 0.6131, "step": 1129 }, { "epoch": 0.03, "grad_norm": 11.548934154130222, "learning_rate": 9.999855386336707e-06, "loss": 0.7422, "step": 1130 }, { "epoch": 0.03, "grad_norm": 4.7077055262159, "learning_rate": 9.999851837684627e-06, "loss": 0.7369, "step": 1131 }, { "epoch": 0.03, "grad_norm": 8.97565361979432, "learning_rate": 9.999848246019435e-06, "loss": 0.5247, "step": 1132 }, { "epoch": 0.03, "grad_norm": 10.050784001681755, "learning_rate": 9.999844611341165e-06, "loss": 0.6388, "step": 1133 }, { "epoch": 0.03, "grad_norm": 7.89493426502734, "learning_rate": 9.999840933649847e-06, "loss": 0.324, "step": 1134 }, { "epoch": 0.03, "grad_norm": 8.026626294970239, "learning_rate": 9.999837212945513e-06, "loss": 0.3989, "step": 1135 }, { "epoch": 0.03, "grad_norm": 7.536038154817279, "learning_rate": 9.999833449228195e-06, "loss": 0.4596, "step": 1136 }, { "epoch": 0.03, "grad_norm": 8.518730395288221, "learning_rate": 9.999829642497924e-06, "loss": 0.3194, "step": 1137 }, { "epoch": 0.03, "grad_norm": 3.0151267158867476, "learning_rate": 9.999825792754738e-06, "loss": 0.2121, "step": 1138 }, { "epoch": 0.03, "grad_norm": 8.472426232416126, "learning_rate": 9.999821899998662e-06, "loss": 0.5738, "step": 1139 }, { "epoch": 0.03, "grad_norm": 6.064146231289722, "learning_rate": 9.999817964229737e-06, "loss": 0.3881, "step": 1140 }, { "epoch": 0.03, "grad_norm": 4.492169083267275, "learning_rate": 9.999813985447993e-06, "loss": 0.1566, "step": 1141 }, { "epoch": 0.03, "grad_norm": 7.905689868007948, "learning_rate": 9.999809963653463e-06, "loss": 0.7308, "step": 1142 }, { "epoch": 0.03, "grad_norm": 4.871067368253163, "learning_rate": 9.999805898846185e-06, "loss": 0.3433, "step": 1143 }, { "epoch": 0.03, "grad_norm": 3.8026152495626024, "learning_rate": 9.999801791026192e-06, "loss": 0.3085, "step": 1144 }, { "epoch": 0.03, "grad_norm": 5.162760837421825, "learning_rate": 9.999797640193521e-06, "loss": 0.2895, "step": 1145 }, { "epoch": 0.03, "grad_norm": 5.632188992640066, "learning_rate": 9.999793446348206e-06, "loss": 0.4211, "step": 1146 }, { "epoch": 0.03, "grad_norm": 8.05952179779867, "learning_rate": 9.999789209490283e-06, "loss": 0.8857, "step": 1147 }, { "epoch": 0.03, "grad_norm": 7.780999972451425, "learning_rate": 9.99978492961979e-06, "loss": 0.608, "step": 1148 }, { "epoch": 0.03, "grad_norm": 5.695109506346311, "learning_rate": 9.99978060673676e-06, "loss": 0.2718, "step": 1149 }, { "epoch": 0.03, "grad_norm": 6.139043939319183, "learning_rate": 9.999776240841235e-06, "loss": 0.4431, "step": 1150 }, { "epoch": 0.03, "grad_norm": 9.071694386281205, "learning_rate": 9.999771831933251e-06, "loss": 0.6041, "step": 1151 }, { "epoch": 0.03, "grad_norm": 10.326431574188467, "learning_rate": 9.999767380012845e-06, "loss": 0.7838, "step": 1152 }, { "epoch": 0.03, "grad_norm": 7.361685226252367, "learning_rate": 9.999762885080056e-06, "loss": 0.6119, "step": 1153 }, { "epoch": 0.03, "grad_norm": 6.49147024220681, "learning_rate": 9.999758347134922e-06, "loss": 0.2444, "step": 1154 }, { "epoch": 0.03, "grad_norm": 2.896581883502981, "learning_rate": 9.999753766177483e-06, "loss": 0.1213, "step": 1155 }, { "epoch": 0.03, "grad_norm": 5.973232603238498, "learning_rate": 9.999749142207778e-06, "loss": 0.6296, "step": 1156 }, { "epoch": 0.03, "grad_norm": 10.357689077321298, "learning_rate": 9.999744475225847e-06, "loss": 0.7613, "step": 1157 }, { "epoch": 0.03, "grad_norm": 7.301535186292, "learning_rate": 9.999739765231728e-06, "loss": 0.3233, "step": 1158 }, { "epoch": 0.03, "grad_norm": 7.2224723499543915, "learning_rate": 9.999735012225466e-06, "loss": 0.6521, "step": 1159 }, { "epoch": 0.03, "grad_norm": 6.4877258467272245, "learning_rate": 9.999730216207097e-06, "loss": 0.3472, "step": 1160 }, { "epoch": 0.03, "grad_norm": 13.499031314710951, "learning_rate": 9.999725377176667e-06, "loss": 0.8642, "step": 1161 }, { "epoch": 0.03, "grad_norm": 10.842666615830305, "learning_rate": 9.999720495134214e-06, "loss": 0.6188, "step": 1162 }, { "epoch": 0.03, "grad_norm": 11.088361436135589, "learning_rate": 9.99971557007978e-06, "loss": 0.6294, "step": 1163 }, { "epoch": 0.03, "grad_norm": 5.973654005640741, "learning_rate": 9.99971060201341e-06, "loss": 0.4062, "step": 1164 }, { "epoch": 0.03, "grad_norm": 22.92346793310467, "learning_rate": 9.999705590935145e-06, "loss": 0.4317, "step": 1165 }, { "epoch": 0.03, "grad_norm": 3.770844858197608, "learning_rate": 9.999700536845027e-06, "loss": 0.2503, "step": 1166 }, { "epoch": 0.03, "grad_norm": 7.829526612062453, "learning_rate": 9.999695439743102e-06, "loss": 0.2892, "step": 1167 }, { "epoch": 0.03, "grad_norm": 6.07522435343083, "learning_rate": 9.999690299629414e-06, "loss": 0.5162, "step": 1168 }, { "epoch": 0.03, "grad_norm": 6.358712014170519, "learning_rate": 9.999685116504004e-06, "loss": 0.3961, "step": 1169 }, { "epoch": 0.03, "grad_norm": 8.277242129729558, "learning_rate": 9.999679890366918e-06, "loss": 0.2117, "step": 1170 }, { "epoch": 0.03, "grad_norm": 8.231211943310328, "learning_rate": 9.999674621218204e-06, "loss": 0.5426, "step": 1171 }, { "epoch": 0.03, "grad_norm": 9.403317016385468, "learning_rate": 9.999669309057903e-06, "loss": 0.5534, "step": 1172 }, { "epoch": 0.03, "grad_norm": 11.494098807804136, "learning_rate": 9.999663953886063e-06, "loss": 0.5117, "step": 1173 }, { "epoch": 0.03, "grad_norm": 8.425271674510363, "learning_rate": 9.99965855570273e-06, "loss": 0.365, "step": 1174 }, { "epoch": 0.03, "grad_norm": 7.452259453311637, "learning_rate": 9.99965311450795e-06, "loss": 0.6622, "step": 1175 }, { "epoch": 0.03, "grad_norm": 5.273267774323086, "learning_rate": 9.999647630301768e-06, "loss": 0.4868, "step": 1176 }, { "epoch": 0.03, "grad_norm": 9.075129265823504, "learning_rate": 9.999642103084235e-06, "loss": 0.5825, "step": 1177 }, { "epoch": 0.03, "grad_norm": 4.751363106946674, "learning_rate": 9.999636532855396e-06, "loss": 0.3668, "step": 1178 }, { "epoch": 0.03, "grad_norm": 5.1190582117391425, "learning_rate": 9.999630919615299e-06, "loss": 0.3494, "step": 1179 }, { "epoch": 0.03, "grad_norm": 6.085955638148416, "learning_rate": 9.999625263363994e-06, "loss": 0.5322, "step": 1180 }, { "epoch": 0.03, "grad_norm": 8.782492987469514, "learning_rate": 9.999619564101527e-06, "loss": 0.3677, "step": 1181 }, { "epoch": 0.03, "grad_norm": 6.822561131500868, "learning_rate": 9.99961382182795e-06, "loss": 0.6388, "step": 1182 }, { "epoch": 0.03, "grad_norm": 7.086964323599676, "learning_rate": 9.999608036543309e-06, "loss": 0.4391, "step": 1183 }, { "epoch": 0.03, "grad_norm": 15.116605588767843, "learning_rate": 9.999602208247657e-06, "loss": 0.6166, "step": 1184 }, { "epoch": 0.03, "grad_norm": 8.041219082830272, "learning_rate": 9.999596336941042e-06, "loss": 0.2699, "step": 1185 }, { "epoch": 0.03, "grad_norm": 2.9738953823680103, "learning_rate": 9.999590422623515e-06, "loss": 0.2296, "step": 1186 }, { "epoch": 0.03, "grad_norm": 9.39044184196706, "learning_rate": 9.99958446529513e-06, "loss": 0.9147, "step": 1187 }, { "epoch": 0.03, "grad_norm": 10.764371846073487, "learning_rate": 9.999578464955932e-06, "loss": 0.4177, "step": 1188 }, { "epoch": 0.03, "grad_norm": 2.0113818877312446, "learning_rate": 9.999572421605976e-06, "loss": 0.3836, "step": 1189 }, { "epoch": 0.03, "grad_norm": 6.278019696003331, "learning_rate": 9.999566335245317e-06, "loss": 0.4968, "step": 1190 }, { "epoch": 0.03, "grad_norm": 9.82341640221982, "learning_rate": 9.999560205874001e-06, "loss": 0.5344, "step": 1191 }, { "epoch": 0.03, "grad_norm": 5.229652460742797, "learning_rate": 9.999554033492087e-06, "loss": 0.5369, "step": 1192 }, { "epoch": 0.03, "grad_norm": 8.184030278109777, "learning_rate": 9.999547818099625e-06, "loss": 0.5196, "step": 1193 }, { "epoch": 0.03, "grad_norm": 15.733943202165339, "learning_rate": 9.999541559696666e-06, "loss": 0.7947, "step": 1194 }, { "epoch": 0.03, "grad_norm": 5.371481253777228, "learning_rate": 9.999535258283268e-06, "loss": 0.3517, "step": 1195 }, { "epoch": 0.03, "grad_norm": 4.81137250102587, "learning_rate": 9.999528913859484e-06, "loss": 0.4251, "step": 1196 }, { "epoch": 0.03, "grad_norm": 8.301183469115415, "learning_rate": 9.999522526425368e-06, "loss": 0.5504, "step": 1197 }, { "epoch": 0.03, "grad_norm": 3.2582649996162067, "learning_rate": 9.999516095980976e-06, "loss": 0.2897, "step": 1198 }, { "epoch": 0.03, "grad_norm": 5.1000078294731335, "learning_rate": 9.999509622526362e-06, "loss": 0.7419, "step": 1199 }, { "epoch": 0.03, "grad_norm": 6.754546435734248, "learning_rate": 9.999503106061582e-06, "loss": 0.5512, "step": 1200 }, { "epoch": 0.03, "grad_norm": 10.292510570773649, "learning_rate": 9.999496546586694e-06, "loss": 0.7746, "step": 1201 }, { "epoch": 0.03, "grad_norm": 5.875376912464883, "learning_rate": 9.99948994410175e-06, "loss": 0.368, "step": 1202 }, { "epoch": 0.03, "grad_norm": 6.6738786391260305, "learning_rate": 9.999483298606812e-06, "loss": 0.2592, "step": 1203 }, { "epoch": 0.03, "grad_norm": 5.253515156650799, "learning_rate": 9.999476610101932e-06, "loss": 0.4132, "step": 1204 }, { "epoch": 0.03, "grad_norm": 10.562391562723473, "learning_rate": 9.999469878587174e-06, "loss": 0.5704, "step": 1205 }, { "epoch": 0.03, "grad_norm": 7.079075225305366, "learning_rate": 9.999463104062589e-06, "loss": 0.5029, "step": 1206 }, { "epoch": 0.03, "grad_norm": 9.133758991884907, "learning_rate": 9.99945628652824e-06, "loss": 0.4254, "step": 1207 }, { "epoch": 0.03, "grad_norm": 9.910389032513145, "learning_rate": 9.999449425984184e-06, "loss": 0.6485, "step": 1208 }, { "epoch": 0.03, "grad_norm": 9.84645852261726, "learning_rate": 9.999442522430478e-06, "loss": 0.5301, "step": 1209 }, { "epoch": 0.03, "grad_norm": 8.498998582977418, "learning_rate": 9.999435575867188e-06, "loss": 0.5216, "step": 1210 }, { "epoch": 0.03, "grad_norm": 10.651565551309707, "learning_rate": 9.999428586294366e-06, "loss": 0.5777, "step": 1211 }, { "epoch": 0.03, "grad_norm": 4.192959813065277, "learning_rate": 9.999421553712077e-06, "loss": 0.4992, "step": 1212 }, { "epoch": 0.03, "grad_norm": 14.064631186164535, "learning_rate": 9.999414478120377e-06, "loss": 0.5866, "step": 1213 }, { "epoch": 0.03, "grad_norm": 9.499551210343414, "learning_rate": 9.999407359519333e-06, "loss": 0.6327, "step": 1214 }, { "epoch": 0.03, "grad_norm": 2.767763722508817, "learning_rate": 9.999400197909002e-06, "loss": 0.4234, "step": 1215 }, { "epoch": 0.03, "grad_norm": 9.67832965579017, "learning_rate": 9.999392993289445e-06, "loss": 0.8164, "step": 1216 }, { "epoch": 0.03, "grad_norm": 8.31090995094078, "learning_rate": 9.999385745660729e-06, "loss": 0.7905, "step": 1217 }, { "epoch": 0.03, "grad_norm": 5.388913399792452, "learning_rate": 9.99937845502291e-06, "loss": 0.5363, "step": 1218 }, { "epoch": 0.03, "grad_norm": 8.05507454002363, "learning_rate": 9.999371121376054e-06, "loss": 0.4602, "step": 1219 }, { "epoch": 0.03, "grad_norm": 7.03632500496534, "learning_rate": 9.999363744720225e-06, "loss": 0.5041, "step": 1220 }, { "epoch": 0.03, "grad_norm": 4.271820766771662, "learning_rate": 9.999356325055484e-06, "loss": 0.2164, "step": 1221 }, { "epoch": 0.03, "grad_norm": 7.469627807618702, "learning_rate": 9.999348862381898e-06, "loss": 0.4282, "step": 1222 }, { "epoch": 0.04, "grad_norm": 8.460235143870127, "learning_rate": 9.999341356699528e-06, "loss": 0.6941, "step": 1223 }, { "epoch": 0.04, "grad_norm": 3.4665362021001433, "learning_rate": 9.99933380800844e-06, "loss": 0.3346, "step": 1224 }, { "epoch": 0.04, "grad_norm": 5.321659136172561, "learning_rate": 9.9993262163087e-06, "loss": 0.5104, "step": 1225 }, { "epoch": 0.04, "grad_norm": 12.887798799441955, "learning_rate": 9.99931858160037e-06, "loss": 0.6088, "step": 1226 }, { "epoch": 0.04, "grad_norm": 13.418053115068657, "learning_rate": 9.999310903883518e-06, "loss": 0.4554, "step": 1227 }, { "epoch": 0.04, "grad_norm": 5.496586347139251, "learning_rate": 9.999303183158211e-06, "loss": 0.3892, "step": 1228 }, { "epoch": 0.04, "grad_norm": 7.078212257504244, "learning_rate": 9.999295419424514e-06, "loss": 0.3906, "step": 1229 }, { "epoch": 0.04, "grad_norm": 10.91536762186389, "learning_rate": 9.999287612682494e-06, "loss": 1.187, "step": 1230 }, { "epoch": 0.04, "grad_norm": 3.4769541316239345, "learning_rate": 9.999279762932217e-06, "loss": 0.2239, "step": 1231 }, { "epoch": 0.04, "grad_norm": 9.021192236640474, "learning_rate": 9.999271870173754e-06, "loss": 0.4494, "step": 1232 }, { "epoch": 0.04, "grad_norm": 12.491898161762201, "learning_rate": 9.999263934407169e-06, "loss": 0.507, "step": 1233 }, { "epoch": 0.04, "grad_norm": 7.5859732661400985, "learning_rate": 9.999255955632532e-06, "loss": 0.392, "step": 1234 }, { "epoch": 0.04, "grad_norm": 7.6742194608237355, "learning_rate": 9.999247933849912e-06, "loss": 0.5244, "step": 1235 }, { "epoch": 0.04, "grad_norm": 9.532960581883758, "learning_rate": 9.999239869059376e-06, "loss": 0.4138, "step": 1236 }, { "epoch": 0.04, "grad_norm": 4.292970416109492, "learning_rate": 9.999231761260997e-06, "loss": 0.2795, "step": 1237 }, { "epoch": 0.04, "grad_norm": 5.253331807438069, "learning_rate": 9.999223610454843e-06, "loss": 0.6593, "step": 1238 }, { "epoch": 0.04, "grad_norm": 8.38415675837172, "learning_rate": 9.999215416640982e-06, "loss": 0.6822, "step": 1239 }, { "epoch": 0.04, "grad_norm": 5.907912610690671, "learning_rate": 9.999207179819487e-06, "loss": 0.5317, "step": 1240 }, { "epoch": 0.04, "grad_norm": 6.29296382475269, "learning_rate": 9.999198899990428e-06, "loss": 0.4141, "step": 1241 }, { "epoch": 0.04, "grad_norm": 3.9827731334466283, "learning_rate": 9.999190577153876e-06, "loss": 0.3181, "step": 1242 }, { "epoch": 0.04, "grad_norm": 5.612198695397799, "learning_rate": 9.999182211309904e-06, "loss": 0.5456, "step": 1243 }, { "epoch": 0.04, "grad_norm": 11.110198557785015, "learning_rate": 9.999173802458582e-06, "loss": 0.4453, "step": 1244 }, { "epoch": 0.04, "grad_norm": 6.79339601775434, "learning_rate": 9.999165350599985e-06, "loss": 0.6424, "step": 1245 }, { "epoch": 0.04, "grad_norm": 7.457282803959986, "learning_rate": 9.999156855734182e-06, "loss": 0.6465, "step": 1246 }, { "epoch": 0.04, "grad_norm": 6.172722944124567, "learning_rate": 9.99914831786125e-06, "loss": 0.5596, "step": 1247 }, { "epoch": 0.04, "grad_norm": 7.244804230566441, "learning_rate": 9.999139736981259e-06, "loss": 0.6821, "step": 1248 }, { "epoch": 0.04, "grad_norm": 7.772062584515576, "learning_rate": 9.999131113094284e-06, "loss": 0.4498, "step": 1249 }, { "epoch": 0.04, "grad_norm": 13.872602650875647, "learning_rate": 9.9991224462004e-06, "loss": 0.684, "step": 1250 }, { "epoch": 0.04, "grad_norm": 6.559730017851687, "learning_rate": 9.999113736299682e-06, "loss": 0.5874, "step": 1251 }, { "epoch": 0.04, "grad_norm": 8.183765986776404, "learning_rate": 9.999104983392202e-06, "loss": 0.8834, "step": 1252 }, { "epoch": 0.04, "grad_norm": 9.118022236533557, "learning_rate": 9.999096187478039e-06, "loss": 0.4176, "step": 1253 }, { "epoch": 0.04, "grad_norm": 8.304219531477811, "learning_rate": 9.999087348557268e-06, "loss": 1.1772, "step": 1254 }, { "epoch": 0.04, "grad_norm": 8.052769370584732, "learning_rate": 9.999078466629963e-06, "loss": 0.5575, "step": 1255 }, { "epoch": 0.04, "grad_norm": 14.044924998478654, "learning_rate": 9.9990695416962e-06, "loss": 0.4287, "step": 1256 }, { "epoch": 0.04, "grad_norm": 9.834898683989003, "learning_rate": 9.999060573756059e-06, "loss": 0.8359, "step": 1257 }, { "epoch": 0.04, "grad_norm": 5.043681832270043, "learning_rate": 9.999051562809614e-06, "loss": 0.3986, "step": 1258 }, { "epoch": 0.04, "grad_norm": 5.163233287910017, "learning_rate": 9.999042508856946e-06, "loss": 0.6338, "step": 1259 }, { "epoch": 0.04, "grad_norm": 6.3677637506444125, "learning_rate": 9.99903341189813e-06, "loss": 0.3541, "step": 1260 }, { "epoch": 0.04, "grad_norm": 6.1084836421790305, "learning_rate": 9.999024271933244e-06, "loss": 0.3824, "step": 1261 }, { "epoch": 0.04, "grad_norm": 14.812001151525509, "learning_rate": 9.999015088962367e-06, "loss": 0.602, "step": 1262 }, { "epoch": 0.04, "grad_norm": 4.910475957957869, "learning_rate": 9.99900586298558e-06, "loss": 0.7755, "step": 1263 }, { "epoch": 0.04, "grad_norm": 5.359731211933049, "learning_rate": 9.998996594002962e-06, "loss": 0.1975, "step": 1264 }, { "epoch": 0.04, "grad_norm": 10.165731188984363, "learning_rate": 9.998987282014592e-06, "loss": 0.415, "step": 1265 }, { "epoch": 0.04, "grad_norm": 9.001252087281744, "learning_rate": 9.998977927020548e-06, "loss": 0.3893, "step": 1266 }, { "epoch": 0.04, "grad_norm": 8.394020515905817, "learning_rate": 9.998968529020913e-06, "loss": 0.6019, "step": 1267 }, { "epoch": 0.04, "grad_norm": 9.88625717743543, "learning_rate": 9.998959088015768e-06, "loss": 0.7162, "step": 1268 }, { "epoch": 0.04, "grad_norm": 7.949978368957649, "learning_rate": 9.998949604005192e-06, "loss": 0.2802, "step": 1269 }, { "epoch": 0.04, "grad_norm": 3.998647103400316, "learning_rate": 9.998940076989269e-06, "loss": 0.1915, "step": 1270 }, { "epoch": 0.04, "grad_norm": 4.4437370525445115, "learning_rate": 9.99893050696808e-06, "loss": 0.3305, "step": 1271 }, { "epoch": 0.04, "grad_norm": 11.847709054448762, "learning_rate": 9.998920893941706e-06, "loss": 0.6095, "step": 1272 }, { "epoch": 0.04, "grad_norm": 3.905538845177238, "learning_rate": 9.998911237910234e-06, "loss": 0.3827, "step": 1273 }, { "epoch": 0.04, "grad_norm": 8.173310133769894, "learning_rate": 9.998901538873743e-06, "loss": 0.5622, "step": 1274 }, { "epoch": 0.04, "grad_norm": 4.534916308723241, "learning_rate": 9.998891796832315e-06, "loss": 0.4784, "step": 1275 }, { "epoch": 0.04, "grad_norm": 4.632500263261453, "learning_rate": 9.998882011786039e-06, "loss": 0.3746, "step": 1276 }, { "epoch": 0.04, "grad_norm": 8.687666280788044, "learning_rate": 9.998872183734996e-06, "loss": 0.4844, "step": 1277 }, { "epoch": 0.04, "grad_norm": 9.382003939917068, "learning_rate": 9.99886231267927e-06, "loss": 0.7194, "step": 1278 }, { "epoch": 0.04, "grad_norm": 6.216566958237187, "learning_rate": 9.998852398618949e-06, "loss": 0.5173, "step": 1279 }, { "epoch": 0.04, "grad_norm": 8.303025719074569, "learning_rate": 9.998842441554115e-06, "loss": 0.3193, "step": 1280 }, { "epoch": 0.04, "grad_norm": 14.982528747805993, "learning_rate": 9.998832441484854e-06, "loss": 0.7126, "step": 1281 }, { "epoch": 0.04, "grad_norm": 14.002273102556392, "learning_rate": 9.998822398411254e-06, "loss": 1.0166, "step": 1282 }, { "epoch": 0.04, "grad_norm": 7.518328777129475, "learning_rate": 9.998812312333402e-06, "loss": 0.7334, "step": 1283 }, { "epoch": 0.04, "grad_norm": 4.749767009140649, "learning_rate": 9.99880218325138e-06, "loss": 0.2379, "step": 1284 }, { "epoch": 0.04, "grad_norm": 10.83264338912809, "learning_rate": 9.99879201116528e-06, "loss": 0.5397, "step": 1285 }, { "epoch": 0.04, "grad_norm": 8.314043231634228, "learning_rate": 9.998781796075187e-06, "loss": 0.8392, "step": 1286 }, { "epoch": 0.04, "grad_norm": 9.143640786885692, "learning_rate": 9.998771537981191e-06, "loss": 0.3512, "step": 1287 }, { "epoch": 0.04, "grad_norm": 8.06593249687024, "learning_rate": 9.998761236883379e-06, "loss": 0.244, "step": 1288 }, { "epoch": 0.04, "grad_norm": 4.2841010241898445, "learning_rate": 9.998750892781839e-06, "loss": 0.4579, "step": 1289 }, { "epoch": 0.04, "grad_norm": 6.957850669826388, "learning_rate": 9.99874050567666e-06, "loss": 0.5079, "step": 1290 }, { "epoch": 0.04, "grad_norm": 14.495489076519803, "learning_rate": 9.998730075567931e-06, "loss": 0.9478, "step": 1291 }, { "epoch": 0.04, "grad_norm": 7.6536676839849935, "learning_rate": 9.998719602455745e-06, "loss": 0.4903, "step": 1292 }, { "epoch": 0.04, "grad_norm": 9.475271012750806, "learning_rate": 9.998709086340189e-06, "loss": 1.1004, "step": 1293 }, { "epoch": 0.04, "grad_norm": 4.615347584398584, "learning_rate": 9.998698527221353e-06, "loss": 0.608, "step": 1294 }, { "epoch": 0.04, "grad_norm": 8.55464424575252, "learning_rate": 9.99868792509933e-06, "loss": 0.3454, "step": 1295 }, { "epoch": 0.04, "grad_norm": 9.495849104083296, "learning_rate": 9.99867727997421e-06, "loss": 0.6198, "step": 1296 }, { "epoch": 0.04, "grad_norm": 8.502357436490437, "learning_rate": 9.998666591846084e-06, "loss": 0.615, "step": 1297 }, { "epoch": 0.04, "grad_norm": 10.768863716787042, "learning_rate": 9.998655860715045e-06, "loss": 0.6782, "step": 1298 }, { "epoch": 0.04, "grad_norm": 3.2345852253728293, "learning_rate": 9.998645086581186e-06, "loss": 0.1632, "step": 1299 }, { "epoch": 0.04, "grad_norm": 8.98791338695426, "learning_rate": 9.9986342694446e-06, "loss": 0.6663, "step": 1300 }, { "epoch": 0.04, "grad_norm": 8.74632848277255, "learning_rate": 9.998623409305377e-06, "loss": 1.0394, "step": 1301 }, { "epoch": 0.04, "grad_norm": 7.036276618499203, "learning_rate": 9.998612506163612e-06, "loss": 0.4178, "step": 1302 }, { "epoch": 0.04, "grad_norm": 7.9014720124790205, "learning_rate": 9.998601560019398e-06, "loss": 0.4749, "step": 1303 }, { "epoch": 0.04, "grad_norm": 6.742730075756534, "learning_rate": 9.998590570872832e-06, "loss": 0.3743, "step": 1304 }, { "epoch": 0.04, "grad_norm": 10.362890645246122, "learning_rate": 9.998579538724009e-06, "loss": 0.4687, "step": 1305 }, { "epoch": 0.04, "grad_norm": 11.570640626281826, "learning_rate": 9.998568463573018e-06, "loss": 0.598, "step": 1306 }, { "epoch": 0.04, "grad_norm": 7.596172872443361, "learning_rate": 9.99855734541996e-06, "loss": 0.8525, "step": 1307 }, { "epoch": 0.04, "grad_norm": 9.200293942607003, "learning_rate": 9.998546184264928e-06, "loss": 0.3751, "step": 1308 }, { "epoch": 0.04, "grad_norm": 8.758841053492283, "learning_rate": 9.998534980108017e-06, "loss": 0.6736, "step": 1309 }, { "epoch": 0.04, "grad_norm": 8.775858191016452, "learning_rate": 9.998523732949328e-06, "loss": 0.6412, "step": 1310 }, { "epoch": 0.04, "grad_norm": 6.553159624334793, "learning_rate": 9.998512442788952e-06, "loss": 0.575, "step": 1311 }, { "epoch": 0.04, "grad_norm": 5.535162883315759, "learning_rate": 9.99850110962699e-06, "loss": 0.4751, "step": 1312 }, { "epoch": 0.04, "grad_norm": 5.081242094702395, "learning_rate": 9.99848973346354e-06, "loss": 0.4794, "step": 1313 }, { "epoch": 0.04, "grad_norm": 8.25103637659184, "learning_rate": 9.998478314298696e-06, "loss": 0.6619, "step": 1314 }, { "epoch": 0.04, "grad_norm": 4.480029136954652, "learning_rate": 9.998466852132558e-06, "loss": 0.1972, "step": 1315 }, { "epoch": 0.04, "grad_norm": 11.254102933595632, "learning_rate": 9.998455346965227e-06, "loss": 0.6489, "step": 1316 }, { "epoch": 0.04, "grad_norm": 6.618822420496702, "learning_rate": 9.9984437987968e-06, "loss": 0.8906, "step": 1317 }, { "epoch": 0.04, "grad_norm": 7.2120412062463775, "learning_rate": 9.998432207627376e-06, "loss": 0.5012, "step": 1318 }, { "epoch": 0.04, "grad_norm": 8.658899555899989, "learning_rate": 9.998420573457054e-06, "loss": 0.6925, "step": 1319 }, { "epoch": 0.04, "grad_norm": 7.986096698545269, "learning_rate": 9.998408896285935e-06, "loss": 0.4304, "step": 1320 }, { "epoch": 0.04, "grad_norm": 8.031725442669575, "learning_rate": 9.99839717611412e-06, "loss": 0.5179, "step": 1321 }, { "epoch": 0.04, "grad_norm": 6.94072497217281, "learning_rate": 9.99838541294171e-06, "loss": 0.5881, "step": 1322 }, { "epoch": 0.04, "grad_norm": 4.3376855299349515, "learning_rate": 9.998373606768808e-06, "loss": 0.3147, "step": 1323 }, { "epoch": 0.04, "grad_norm": 13.737318537227768, "learning_rate": 9.998361757595512e-06, "loss": 0.6543, "step": 1324 }, { "epoch": 0.04, "grad_norm": 6.421486464578463, "learning_rate": 9.998349865421924e-06, "loss": 0.3899, "step": 1325 }, { "epoch": 0.04, "grad_norm": 5.792895490886657, "learning_rate": 9.998337930248147e-06, "loss": 0.4819, "step": 1326 }, { "epoch": 0.04, "grad_norm": 12.44085433430449, "learning_rate": 9.998325952074286e-06, "loss": 0.7593, "step": 1327 }, { "epoch": 0.04, "grad_norm": 6.484173139624393, "learning_rate": 9.998313930900442e-06, "loss": 0.5913, "step": 1328 }, { "epoch": 0.04, "grad_norm": 8.080115834812748, "learning_rate": 9.998301866726719e-06, "loss": 0.553, "step": 1329 }, { "epoch": 0.04, "grad_norm": 5.594824831541188, "learning_rate": 9.99828975955322e-06, "loss": 0.3137, "step": 1330 }, { "epoch": 0.04, "grad_norm": 9.006088210908823, "learning_rate": 9.998277609380047e-06, "loss": 0.5997, "step": 1331 }, { "epoch": 0.04, "grad_norm": 9.041834980559743, "learning_rate": 9.998265416207311e-06, "loss": 0.4722, "step": 1332 }, { "epoch": 0.04, "grad_norm": 5.098761684949559, "learning_rate": 9.99825318003511e-06, "loss": 0.4162, "step": 1333 }, { "epoch": 0.04, "grad_norm": 9.21583686965372, "learning_rate": 9.998240900863554e-06, "loss": 0.7931, "step": 1334 }, { "epoch": 0.04, "grad_norm": 10.444807361380354, "learning_rate": 9.998228578692747e-06, "loss": 0.7472, "step": 1335 }, { "epoch": 0.04, "grad_norm": 16.15015808594987, "learning_rate": 9.998216213522793e-06, "loss": 0.4614, "step": 1336 }, { "epoch": 0.04, "grad_norm": 3.9062030331649122, "learning_rate": 9.998203805353804e-06, "loss": 0.2845, "step": 1337 }, { "epoch": 0.04, "grad_norm": 10.457172608016826, "learning_rate": 9.99819135418588e-06, "loss": 0.2757, "step": 1338 }, { "epoch": 0.04, "grad_norm": 10.797696249349343, "learning_rate": 9.998178860019131e-06, "loss": 0.7784, "step": 1339 }, { "epoch": 0.04, "grad_norm": 6.115856508172199, "learning_rate": 9.998166322853666e-06, "loss": 0.6445, "step": 1340 }, { "epoch": 0.04, "grad_norm": 15.688164807097369, "learning_rate": 9.998153742689591e-06, "loss": 0.7304, "step": 1341 }, { "epoch": 0.04, "grad_norm": 7.5805092118665645, "learning_rate": 9.998141119527017e-06, "loss": 0.442, "step": 1342 }, { "epoch": 0.04, "grad_norm": 6.389269323586136, "learning_rate": 9.998128453366047e-06, "loss": 0.7024, "step": 1343 }, { "epoch": 0.04, "grad_norm": 6.372490407608014, "learning_rate": 9.998115744206796e-06, "loss": 0.4506, "step": 1344 }, { "epoch": 0.04, "grad_norm": 7.430863119951712, "learning_rate": 9.998102992049368e-06, "loss": 0.4278, "step": 1345 }, { "epoch": 0.04, "grad_norm": 6.365615219985123, "learning_rate": 9.998090196893877e-06, "loss": 0.461, "step": 1346 }, { "epoch": 0.04, "grad_norm": 6.816462388121714, "learning_rate": 9.99807735874043e-06, "loss": 0.2923, "step": 1347 }, { "epoch": 0.04, "grad_norm": 7.488785974947512, "learning_rate": 9.99806447758914e-06, "loss": 0.4173, "step": 1348 }, { "epoch": 0.04, "grad_norm": 6.935563581871421, "learning_rate": 9.998051553440117e-06, "loss": 0.3498, "step": 1349 }, { "epoch": 0.04, "grad_norm": 13.77121415912569, "learning_rate": 9.998038586293472e-06, "loss": 0.4971, "step": 1350 }, { "epoch": 0.04, "grad_norm": 4.028359522773186, "learning_rate": 9.998025576149315e-06, "loss": 0.4158, "step": 1351 }, { "epoch": 0.04, "grad_norm": 5.876615342803038, "learning_rate": 9.99801252300776e-06, "loss": 0.4047, "step": 1352 }, { "epoch": 0.04, "grad_norm": 6.618868671682567, "learning_rate": 9.99799942686892e-06, "loss": 0.4162, "step": 1353 }, { "epoch": 0.04, "grad_norm": 9.231959096318803, "learning_rate": 9.997986287732904e-06, "loss": 0.7392, "step": 1354 }, { "epoch": 0.04, "grad_norm": 5.178421303024685, "learning_rate": 9.997973105599829e-06, "loss": 0.2292, "step": 1355 }, { "epoch": 0.04, "grad_norm": 13.81017936440901, "learning_rate": 9.997959880469806e-06, "loss": 0.3359, "step": 1356 }, { "epoch": 0.04, "grad_norm": 9.772734033529835, "learning_rate": 9.99794661234295e-06, "loss": 0.4503, "step": 1357 }, { "epoch": 0.04, "grad_norm": 5.546981165769878, "learning_rate": 9.997933301219373e-06, "loss": 0.4171, "step": 1358 }, { "epoch": 0.04, "grad_norm": 6.05628605567112, "learning_rate": 9.997919947099194e-06, "loss": 0.5636, "step": 1359 }, { "epoch": 0.04, "grad_norm": 7.722782085746101, "learning_rate": 9.997906549982524e-06, "loss": 0.5471, "step": 1360 }, { "epoch": 0.04, "grad_norm": 7.705145766199453, "learning_rate": 9.99789310986948e-06, "loss": 0.3504, "step": 1361 }, { "epoch": 0.04, "grad_norm": 10.840353925916613, "learning_rate": 9.997879626760175e-06, "loss": 0.4888, "step": 1362 }, { "epoch": 0.04, "grad_norm": 8.29640446422404, "learning_rate": 9.997866100654728e-06, "loss": 0.6044, "step": 1363 }, { "epoch": 0.04, "grad_norm": 9.103626144972495, "learning_rate": 9.997852531553254e-06, "loss": 0.5912, "step": 1364 }, { "epoch": 0.04, "grad_norm": 7.2005203072316055, "learning_rate": 9.997838919455869e-06, "loss": 0.4731, "step": 1365 }, { "epoch": 0.04, "grad_norm": 4.405046393315785, "learning_rate": 9.997825264362692e-06, "loss": 0.4209, "step": 1366 }, { "epoch": 0.04, "grad_norm": 5.306310031942191, "learning_rate": 9.99781156627384e-06, "loss": 0.375, "step": 1367 }, { "epoch": 0.04, "grad_norm": 9.11301251345089, "learning_rate": 9.99779782518943e-06, "loss": 0.8772, "step": 1368 }, { "epoch": 0.04, "grad_norm": 8.125275123046315, "learning_rate": 9.997784041109581e-06, "loss": 0.8606, "step": 1369 }, { "epoch": 0.04, "grad_norm": 4.9350682559795835, "learning_rate": 9.997770214034412e-06, "loss": 0.2848, "step": 1370 }, { "epoch": 0.04, "grad_norm": 6.653136478842406, "learning_rate": 9.99775634396404e-06, "loss": 0.3585, "step": 1371 }, { "epoch": 0.04, "grad_norm": 9.024215175920885, "learning_rate": 9.997742430898585e-06, "loss": 0.3797, "step": 1372 }, { "epoch": 0.04, "grad_norm": 9.51286749512135, "learning_rate": 9.997728474838168e-06, "loss": 0.8755, "step": 1373 }, { "epoch": 0.04, "grad_norm": 11.746010529172892, "learning_rate": 9.997714475782908e-06, "loss": 0.9771, "step": 1374 }, { "epoch": 0.04, "grad_norm": 8.076352720069385, "learning_rate": 9.997700433732927e-06, "loss": 0.3976, "step": 1375 }, { "epoch": 0.04, "grad_norm": 10.50014468502087, "learning_rate": 9.997686348688344e-06, "loss": 1.2096, "step": 1376 }, { "epoch": 0.04, "grad_norm": 7.68945448926042, "learning_rate": 9.99767222064928e-06, "loss": 0.606, "step": 1377 }, { "epoch": 0.04, "grad_norm": 6.330048897360015, "learning_rate": 9.997658049615857e-06, "loss": 0.3494, "step": 1378 }, { "epoch": 0.04, "grad_norm": 4.875186720964646, "learning_rate": 9.997643835588197e-06, "loss": 0.6668, "step": 1379 }, { "epoch": 0.04, "grad_norm": 6.482928158309202, "learning_rate": 9.997629578566424e-06, "loss": 0.2298, "step": 1380 }, { "epoch": 0.04, "grad_norm": 5.867631024334952, "learning_rate": 9.997615278550657e-06, "loss": 0.4549, "step": 1381 }, { "epoch": 0.04, "grad_norm": 9.151753698419268, "learning_rate": 9.997600935541023e-06, "loss": 0.6497, "step": 1382 }, { "epoch": 0.04, "grad_norm": 8.204950736186778, "learning_rate": 9.997586549537641e-06, "loss": 0.8537, "step": 1383 }, { "epoch": 0.04, "grad_norm": 7.027556436828441, "learning_rate": 9.99757212054064e-06, "loss": 0.278, "step": 1384 }, { "epoch": 0.04, "grad_norm": 3.0635391535397916, "learning_rate": 9.99755764855014e-06, "loss": 0.1144, "step": 1385 }, { "epoch": 0.04, "grad_norm": 9.284874307976226, "learning_rate": 9.997543133566267e-06, "loss": 0.4618, "step": 1386 }, { "epoch": 0.04, "grad_norm": 5.964736547799555, "learning_rate": 9.997528575589146e-06, "loss": 0.2241, "step": 1387 }, { "epoch": 0.04, "grad_norm": 5.431053198441285, "learning_rate": 9.997513974618902e-06, "loss": 0.6677, "step": 1388 }, { "epoch": 0.04, "grad_norm": 9.7711486331737, "learning_rate": 9.99749933065566e-06, "loss": 0.4071, "step": 1389 }, { "epoch": 0.04, "grad_norm": 5.4917423507422525, "learning_rate": 9.997484643699546e-06, "loss": 0.2986, "step": 1390 }, { "epoch": 0.04, "grad_norm": 7.896253817860663, "learning_rate": 9.997469913750689e-06, "loss": 0.5384, "step": 1391 }, { "epoch": 0.04, "grad_norm": 8.008228659632142, "learning_rate": 9.997455140809211e-06, "loss": 0.4116, "step": 1392 }, { "epoch": 0.04, "grad_norm": 9.498851305128603, "learning_rate": 9.997440324875245e-06, "loss": 0.564, "step": 1393 }, { "epoch": 0.04, "grad_norm": 5.691575403259092, "learning_rate": 9.997425465948913e-06, "loss": 0.4773, "step": 1394 }, { "epoch": 0.04, "grad_norm": 5.1407165519191285, "learning_rate": 9.997410564030345e-06, "loss": 0.4518, "step": 1395 }, { "epoch": 0.04, "grad_norm": 5.748878411080033, "learning_rate": 9.99739561911967e-06, "loss": 0.1759, "step": 1396 }, { "epoch": 0.04, "grad_norm": 8.79435506879554, "learning_rate": 9.997380631217015e-06, "loss": 1.1212, "step": 1397 }, { "epoch": 0.04, "grad_norm": 4.881327752777455, "learning_rate": 9.99736560032251e-06, "loss": 0.4804, "step": 1398 }, { "epoch": 0.04, "grad_norm": 6.754017870340832, "learning_rate": 9.997350526436284e-06, "loss": 0.9012, "step": 1399 }, { "epoch": 0.04, "grad_norm": 6.720272583655578, "learning_rate": 9.997335409558466e-06, "loss": 0.6059, "step": 1400 }, { "epoch": 0.04, "grad_norm": 8.672586827661352, "learning_rate": 9.997320249689188e-06, "loss": 0.7532, "step": 1401 }, { "epoch": 0.04, "grad_norm": 7.37805461664167, "learning_rate": 9.99730504682858e-06, "loss": 0.2166, "step": 1402 }, { "epoch": 0.04, "grad_norm": 6.848959068230612, "learning_rate": 9.99728980097677e-06, "loss": 0.5422, "step": 1403 }, { "epoch": 0.04, "grad_norm": 9.797698988630021, "learning_rate": 9.997274512133893e-06, "loss": 0.5012, "step": 1404 }, { "epoch": 0.04, "grad_norm": 5.626560799964706, "learning_rate": 9.997259180300076e-06, "loss": 0.4302, "step": 1405 }, { "epoch": 0.04, "grad_norm": 4.303560085089477, "learning_rate": 9.997243805475457e-06, "loss": 0.183, "step": 1406 }, { "epoch": 0.04, "grad_norm": 6.866600037532363, "learning_rate": 9.997228387660161e-06, "loss": 0.4229, "step": 1407 }, { "epoch": 0.04, "grad_norm": 10.453608525929162, "learning_rate": 9.997212926854328e-06, "loss": 0.4009, "step": 1408 }, { "epoch": 0.04, "grad_norm": 5.341018915799629, "learning_rate": 9.997197423058085e-06, "loss": 0.5289, "step": 1409 }, { "epoch": 0.04, "grad_norm": 5.491263473289281, "learning_rate": 9.997181876271569e-06, "loss": 0.3348, "step": 1410 }, { "epoch": 0.04, "grad_norm": 6.608441298165278, "learning_rate": 9.997166286494912e-06, "loss": 0.6082, "step": 1411 }, { "epoch": 0.04, "grad_norm": 9.068235594910929, "learning_rate": 9.997150653728248e-06, "loss": 0.3661, "step": 1412 }, { "epoch": 0.04, "grad_norm": 4.504315717467676, "learning_rate": 9.997134977971714e-06, "loss": 0.284, "step": 1413 }, { "epoch": 0.04, "grad_norm": 4.855405701207205, "learning_rate": 9.997119259225441e-06, "loss": 0.5792, "step": 1414 }, { "epoch": 0.04, "grad_norm": 9.150891086567617, "learning_rate": 9.997103497489568e-06, "loss": 0.7109, "step": 1415 }, { "epoch": 0.04, "grad_norm": 9.758531017130842, "learning_rate": 9.99708769276423e-06, "loss": 0.4801, "step": 1416 }, { "epoch": 0.04, "grad_norm": 4.796452258241969, "learning_rate": 9.997071845049559e-06, "loss": 0.3078, "step": 1417 }, { "epoch": 0.04, "grad_norm": 5.343797917736777, "learning_rate": 9.997055954345694e-06, "loss": 0.266, "step": 1418 }, { "epoch": 0.04, "grad_norm": 5.620316526588098, "learning_rate": 9.997040020652774e-06, "loss": 0.601, "step": 1419 }, { "epoch": 0.04, "grad_norm": 5.964513503489869, "learning_rate": 9.997024043970934e-06, "loss": 0.4568, "step": 1420 }, { "epoch": 0.04, "grad_norm": 3.316175688692127, "learning_rate": 9.99700802430031e-06, "loss": 0.3227, "step": 1421 }, { "epoch": 0.04, "grad_norm": 10.686187306594029, "learning_rate": 9.996991961641041e-06, "loss": 0.5972, "step": 1422 }, { "epoch": 0.04, "grad_norm": 12.331236472028145, "learning_rate": 9.996975855993268e-06, "loss": 0.7089, "step": 1423 }, { "epoch": 0.04, "grad_norm": 7.90795614658173, "learning_rate": 9.996959707357123e-06, "loss": 0.5574, "step": 1424 }, { "epoch": 0.04, "grad_norm": 6.00922261172336, "learning_rate": 9.99694351573275e-06, "loss": 0.662, "step": 1425 }, { "epoch": 0.04, "grad_norm": 7.047046870738741, "learning_rate": 9.99692728112029e-06, "loss": 0.6279, "step": 1426 }, { "epoch": 0.04, "grad_norm": 4.661492232158587, "learning_rate": 9.996911003519878e-06, "loss": 0.3474, "step": 1427 }, { "epoch": 0.04, "grad_norm": 5.465760552899373, "learning_rate": 9.996894682931657e-06, "loss": 0.5718, "step": 1428 }, { "epoch": 0.04, "grad_norm": 8.708075712734322, "learning_rate": 9.996878319355764e-06, "loss": 0.5254, "step": 1429 }, { "epoch": 0.04, "grad_norm": 6.359849092024718, "learning_rate": 9.996861912792344e-06, "loss": 0.5956, "step": 1430 }, { "epoch": 0.04, "grad_norm": 4.147926255960203, "learning_rate": 9.996845463241535e-06, "loss": 0.4059, "step": 1431 }, { "epoch": 0.04, "grad_norm": 6.390729516254284, "learning_rate": 9.99682897070348e-06, "loss": 0.2837, "step": 1432 }, { "epoch": 0.04, "grad_norm": 12.464503432756368, "learning_rate": 9.99681243517832e-06, "loss": 0.3573, "step": 1433 }, { "epoch": 0.04, "grad_norm": 5.357840938498204, "learning_rate": 9.996795856666199e-06, "loss": 0.32, "step": 1434 }, { "epoch": 0.04, "grad_norm": 8.810866346509973, "learning_rate": 9.996779235167258e-06, "loss": 0.6914, "step": 1435 }, { "epoch": 0.04, "grad_norm": 10.72172465073059, "learning_rate": 9.996762570681641e-06, "loss": 0.5761, "step": 1436 }, { "epoch": 0.04, "grad_norm": 3.868233433317858, "learning_rate": 9.99674586320949e-06, "loss": 0.3175, "step": 1437 }, { "epoch": 0.04, "grad_norm": 11.723639930300317, "learning_rate": 9.996729112750949e-06, "loss": 0.7621, "step": 1438 }, { "epoch": 0.04, "grad_norm": 9.774138866845453, "learning_rate": 9.996712319306164e-06, "loss": 0.6057, "step": 1439 }, { "epoch": 0.04, "grad_norm": 4.18380278021507, "learning_rate": 9.996695482875279e-06, "loss": 0.6467, "step": 1440 }, { "epoch": 0.04, "grad_norm": 13.720644322605313, "learning_rate": 9.996678603458437e-06, "loss": 0.5346, "step": 1441 }, { "epoch": 0.04, "grad_norm": 5.566425652721119, "learning_rate": 9.996661681055783e-06, "loss": 0.3747, "step": 1442 }, { "epoch": 0.04, "grad_norm": 6.917388012816748, "learning_rate": 9.996644715667463e-06, "loss": 0.37, "step": 1443 }, { "epoch": 0.04, "grad_norm": 2.899586863522236, "learning_rate": 9.996627707293626e-06, "loss": 0.4153, "step": 1444 }, { "epoch": 0.04, "grad_norm": 10.256701860347848, "learning_rate": 9.996610655934416e-06, "loss": 0.5108, "step": 1445 }, { "epoch": 0.04, "grad_norm": 4.0041314782784, "learning_rate": 9.996593561589977e-06, "loss": 0.3122, "step": 1446 }, { "epoch": 0.04, "grad_norm": 8.847632051481119, "learning_rate": 9.996576424260463e-06, "loss": 0.8483, "step": 1447 }, { "epoch": 0.04, "grad_norm": 8.651470531204215, "learning_rate": 9.996559243946015e-06, "loss": 0.4807, "step": 1448 }, { "epoch": 0.04, "grad_norm": 9.318120194969342, "learning_rate": 9.996542020646782e-06, "loss": 0.5259, "step": 1449 }, { "epoch": 0.04, "grad_norm": 4.178732390325278, "learning_rate": 9.996524754362915e-06, "loss": 0.1928, "step": 1450 }, { "epoch": 0.04, "grad_norm": 7.78237790431961, "learning_rate": 9.996507445094559e-06, "loss": 0.5515, "step": 1451 }, { "epoch": 0.04, "grad_norm": 6.477208748338721, "learning_rate": 9.996490092841866e-06, "loss": 0.539, "step": 1452 }, { "epoch": 0.04, "grad_norm": 5.33863744386185, "learning_rate": 9.996472697604983e-06, "loss": 0.3972, "step": 1453 }, { "epoch": 0.04, "grad_norm": 8.69093836521516, "learning_rate": 9.996455259384062e-06, "loss": 0.5562, "step": 1454 }, { "epoch": 0.04, "grad_norm": 22.530463216856266, "learning_rate": 9.996437778179251e-06, "loss": 0.5397, "step": 1455 }, { "epoch": 0.04, "grad_norm": 4.011372433342573, "learning_rate": 9.9964202539907e-06, "loss": 0.5895, "step": 1456 }, { "epoch": 0.04, "grad_norm": 15.109457684421832, "learning_rate": 9.996402686818561e-06, "loss": 0.7163, "step": 1457 }, { "epoch": 0.04, "grad_norm": 4.555690020357451, "learning_rate": 9.996385076662987e-06, "loss": 0.2953, "step": 1458 }, { "epoch": 0.04, "grad_norm": 4.925563338539597, "learning_rate": 9.996367423524125e-06, "loss": 0.1966, "step": 1459 }, { "epoch": 0.04, "grad_norm": 9.420304959943179, "learning_rate": 9.996349727402129e-06, "loss": 0.8142, "step": 1460 }, { "epoch": 0.04, "grad_norm": 9.971029852017015, "learning_rate": 9.996331988297153e-06, "loss": 0.5572, "step": 1461 }, { "epoch": 0.04, "grad_norm": 5.476878767907921, "learning_rate": 9.996314206209347e-06, "loss": 0.4497, "step": 1462 }, { "epoch": 0.04, "grad_norm": 5.226115619616973, "learning_rate": 9.996296381138867e-06, "loss": 0.3857, "step": 1463 }, { "epoch": 0.04, "grad_norm": 5.624206571781726, "learning_rate": 9.996278513085862e-06, "loss": 0.2549, "step": 1464 }, { "epoch": 0.04, "grad_norm": 7.221511972993358, "learning_rate": 9.996260602050489e-06, "loss": 0.7239, "step": 1465 }, { "epoch": 0.04, "grad_norm": 7.449210364462539, "learning_rate": 9.9962426480329e-06, "loss": 0.6115, "step": 1466 }, { "epoch": 0.04, "grad_norm": 9.602203670538621, "learning_rate": 9.996224651033252e-06, "loss": 0.7596, "step": 1467 }, { "epoch": 0.04, "grad_norm": 5.675759826745744, "learning_rate": 9.996206611051698e-06, "loss": 0.6976, "step": 1468 }, { "epoch": 0.04, "grad_norm": 4.8803592469919375, "learning_rate": 9.996188528088395e-06, "loss": 0.5289, "step": 1469 }, { "epoch": 0.04, "grad_norm": 7.116920575057705, "learning_rate": 9.996170402143495e-06, "loss": 0.6123, "step": 1470 }, { "epoch": 0.04, "grad_norm": 5.68502519023216, "learning_rate": 9.996152233217157e-06, "loss": 0.2677, "step": 1471 }, { "epoch": 0.04, "grad_norm": 12.195206303839754, "learning_rate": 9.996134021309538e-06, "loss": 0.7498, "step": 1472 }, { "epoch": 0.04, "grad_norm": 8.963736485269063, "learning_rate": 9.996115766420791e-06, "loss": 0.1872, "step": 1473 }, { "epoch": 0.04, "grad_norm": 8.04375958920065, "learning_rate": 9.996097468551077e-06, "loss": 0.5931, "step": 1474 }, { "epoch": 0.04, "grad_norm": 36.91133897856362, "learning_rate": 9.99607912770055e-06, "loss": 0.6454, "step": 1475 }, { "epoch": 0.04, "grad_norm": 8.612010850012748, "learning_rate": 9.99606074386937e-06, "loss": 0.906, "step": 1476 }, { "epoch": 0.04, "grad_norm": 5.62061185275542, "learning_rate": 9.996042317057695e-06, "loss": 0.6105, "step": 1477 }, { "epoch": 0.04, "grad_norm": 7.109446330289055, "learning_rate": 9.996023847265683e-06, "loss": 0.643, "step": 1478 }, { "epoch": 0.04, "grad_norm": 13.672958383414683, "learning_rate": 9.996005334493493e-06, "loss": 0.6381, "step": 1479 }, { "epoch": 0.04, "grad_norm": 6.201108436560353, "learning_rate": 9.995986778741284e-06, "loss": 0.7298, "step": 1480 }, { "epoch": 0.04, "grad_norm": 8.310440209821502, "learning_rate": 9.995968180009216e-06, "loss": 0.5111, "step": 1481 }, { "epoch": 0.04, "grad_norm": 10.386397294733053, "learning_rate": 9.995949538297449e-06, "loss": 0.6196, "step": 1482 }, { "epoch": 0.04, "grad_norm": 6.8534372824764995, "learning_rate": 9.995930853606142e-06, "loss": 0.3753, "step": 1483 }, { "epoch": 0.04, "grad_norm": 12.446193857827817, "learning_rate": 9.995912125935459e-06, "loss": 0.6637, "step": 1484 }, { "epoch": 0.04, "grad_norm": 6.001202621733788, "learning_rate": 9.995893355285557e-06, "loss": 0.4885, "step": 1485 }, { "epoch": 0.04, "grad_norm": 5.076493925627688, "learning_rate": 9.9958745416566e-06, "loss": 0.3364, "step": 1486 }, { "epoch": 0.04, "grad_norm": 22.684125833263575, "learning_rate": 9.99585568504875e-06, "loss": 0.8161, "step": 1487 }, { "epoch": 0.04, "grad_norm": 6.088367804488786, "learning_rate": 9.99583678546217e-06, "loss": 0.931, "step": 1488 }, { "epoch": 0.04, "grad_norm": 6.8988162117124014, "learning_rate": 9.995817842897019e-06, "loss": 0.6676, "step": 1489 }, { "epoch": 0.04, "grad_norm": 13.017924056496975, "learning_rate": 9.995798857353464e-06, "loss": 0.6242, "step": 1490 }, { "epoch": 0.04, "grad_norm": 4.6919115796485595, "learning_rate": 9.995779828831665e-06, "loss": 0.5595, "step": 1491 }, { "epoch": 0.04, "grad_norm": 11.443612721743259, "learning_rate": 9.995760757331788e-06, "loss": 0.5147, "step": 1492 }, { "epoch": 0.04, "grad_norm": 5.575703798012424, "learning_rate": 9.995741642853995e-06, "loss": 0.6747, "step": 1493 }, { "epoch": 0.04, "grad_norm": 5.500979596378757, "learning_rate": 9.995722485398453e-06, "loss": 0.5856, "step": 1494 }, { "epoch": 0.04, "grad_norm": 7.622280699070319, "learning_rate": 9.995703284965327e-06, "loss": 0.6991, "step": 1495 }, { "epoch": 0.04, "grad_norm": 11.426548451806624, "learning_rate": 9.995684041554779e-06, "loss": 0.9979, "step": 1496 }, { "epoch": 0.04, "grad_norm": 12.690305503011565, "learning_rate": 9.995664755166976e-06, "loss": 1.0873, "step": 1497 }, { "epoch": 0.04, "grad_norm": 8.445984989179117, "learning_rate": 9.995645425802085e-06, "loss": 0.3815, "step": 1498 }, { "epoch": 0.04, "grad_norm": 4.872145428348962, "learning_rate": 9.995626053460272e-06, "loss": 0.3169, "step": 1499 }, { "epoch": 0.04, "grad_norm": 7.624833777445053, "learning_rate": 9.995606638141701e-06, "loss": 0.7333, "step": 1500 }, { "epoch": 0.04, "grad_norm": 10.063814160321682, "learning_rate": 9.995587179846542e-06, "loss": 0.4753, "step": 1501 }, { "epoch": 0.04, "grad_norm": 7.6364547488734456, "learning_rate": 9.995567678574963e-06, "loss": 0.4782, "step": 1502 }, { "epoch": 0.04, "grad_norm": 7.803320779357136, "learning_rate": 9.995548134327129e-06, "loss": 0.3009, "step": 1503 }, { "epoch": 0.04, "grad_norm": 5.731854667090932, "learning_rate": 9.99552854710321e-06, "loss": 0.3668, "step": 1504 }, { "epoch": 0.04, "grad_norm": 6.308456485542281, "learning_rate": 9.995508916903373e-06, "loss": 0.7412, "step": 1505 }, { "epoch": 0.04, "grad_norm": 9.73839524038279, "learning_rate": 9.995489243727789e-06, "loss": 0.7357, "step": 1506 }, { "epoch": 0.04, "grad_norm": 4.620054152241526, "learning_rate": 9.995469527576627e-06, "loss": 0.238, "step": 1507 }, { "epoch": 0.04, "grad_norm": 9.920953758904842, "learning_rate": 9.995449768450053e-06, "loss": 0.8734, "step": 1508 }, { "epoch": 0.04, "grad_norm": 11.28122514632174, "learning_rate": 9.995429966348243e-06, "loss": 0.5628, "step": 1509 }, { "epoch": 0.04, "grad_norm": 7.925176771340475, "learning_rate": 9.99541012127136e-06, "loss": 0.7695, "step": 1510 }, { "epoch": 0.04, "grad_norm": 7.927701413841924, "learning_rate": 9.995390233219582e-06, "loss": 0.5802, "step": 1511 }, { "epoch": 0.04, "grad_norm": 6.30673630713603, "learning_rate": 9.995370302193075e-06, "loss": 0.2167, "step": 1512 }, { "epoch": 0.04, "grad_norm": 3.941792192268255, "learning_rate": 9.995350328192013e-06, "loss": 0.5443, "step": 1513 }, { "epoch": 0.04, "grad_norm": 8.997814018957659, "learning_rate": 9.995330311216569e-06, "loss": 0.7601, "step": 1514 }, { "epoch": 0.04, "grad_norm": 5.673753663721811, "learning_rate": 9.995310251266911e-06, "loss": 0.539, "step": 1515 }, { "epoch": 0.04, "grad_norm": 4.287360348665349, "learning_rate": 9.995290148343214e-06, "loss": 0.2266, "step": 1516 }, { "epoch": 0.04, "grad_norm": 9.0643459150159, "learning_rate": 9.995270002445652e-06, "loss": 0.4684, "step": 1517 }, { "epoch": 0.04, "grad_norm": 10.135838300214527, "learning_rate": 9.995249813574397e-06, "loss": 0.6063, "step": 1518 }, { "epoch": 0.04, "grad_norm": 7.457744326500683, "learning_rate": 9.995229581729622e-06, "loss": 0.5729, "step": 1519 }, { "epoch": 0.04, "grad_norm": 7.231302881995386, "learning_rate": 9.995209306911504e-06, "loss": 0.707, "step": 1520 }, { "epoch": 0.04, "grad_norm": 12.752879397311153, "learning_rate": 9.995188989120213e-06, "loss": 0.7584, "step": 1521 }, { "epoch": 0.04, "grad_norm": 11.38484369612106, "learning_rate": 9.995168628355926e-06, "loss": 0.6035, "step": 1522 }, { "epoch": 0.04, "grad_norm": 9.121408565732732, "learning_rate": 9.995148224618821e-06, "loss": 0.8788, "step": 1523 }, { "epoch": 0.04, "grad_norm": 5.653101091982768, "learning_rate": 9.99512777790907e-06, "loss": 0.5349, "step": 1524 }, { "epoch": 0.04, "grad_norm": 9.339787079795922, "learning_rate": 9.995107288226848e-06, "loss": 0.6159, "step": 1525 }, { "epoch": 0.04, "grad_norm": 9.98573430080969, "learning_rate": 9.995086755572334e-06, "loss": 0.7979, "step": 1526 }, { "epoch": 0.04, "grad_norm": 10.967306275736115, "learning_rate": 9.995066179945704e-06, "loss": 0.6612, "step": 1527 }, { "epoch": 0.04, "grad_norm": 8.86228052897878, "learning_rate": 9.995045561347135e-06, "loss": 0.6381, "step": 1528 }, { "epoch": 0.04, "grad_norm": 10.26735334783444, "learning_rate": 9.995024899776804e-06, "loss": 0.5248, "step": 1529 }, { "epoch": 0.04, "grad_norm": 9.886287804913655, "learning_rate": 9.995004195234888e-06, "loss": 0.5877, "step": 1530 }, { "epoch": 0.04, "grad_norm": 6.42565936404871, "learning_rate": 9.994983447721565e-06, "loss": 0.7036, "step": 1531 }, { "epoch": 0.04, "grad_norm": 6.0547659395120625, "learning_rate": 9.994962657237015e-06, "loss": 0.474, "step": 1532 }, { "epoch": 0.04, "grad_norm": 9.22858284739075, "learning_rate": 9.994941823781417e-06, "loss": 0.5935, "step": 1533 }, { "epoch": 0.04, "grad_norm": 7.2976814494194615, "learning_rate": 9.99492094735495e-06, "loss": 0.5393, "step": 1534 }, { "epoch": 0.04, "grad_norm": 7.31047825700832, "learning_rate": 9.994900027957793e-06, "loss": 0.2064, "step": 1535 }, { "epoch": 0.04, "grad_norm": 3.5303530271219348, "learning_rate": 9.994879065590124e-06, "loss": 0.184, "step": 1536 }, { "epoch": 0.04, "grad_norm": 6.409061429050019, "learning_rate": 9.994858060252127e-06, "loss": 0.3308, "step": 1537 }, { "epoch": 0.04, "grad_norm": 13.447647592387414, "learning_rate": 9.99483701194398e-06, "loss": 0.2672, "step": 1538 }, { "epoch": 0.04, "grad_norm": 8.18951801735596, "learning_rate": 9.994815920665866e-06, "loss": 0.9223, "step": 1539 }, { "epoch": 0.04, "grad_norm": 5.923921055478314, "learning_rate": 9.994794786417967e-06, "loss": 0.3681, "step": 1540 }, { "epoch": 0.04, "grad_norm": 6.8415378844964385, "learning_rate": 9.99477360920046e-06, "loss": 0.2118, "step": 1541 }, { "epoch": 0.04, "grad_norm": 6.874215722866456, "learning_rate": 9.994752389013533e-06, "loss": 0.6189, "step": 1542 }, { "epoch": 0.04, "grad_norm": 12.527669723356764, "learning_rate": 9.994731125857367e-06, "loss": 0.7923, "step": 1543 }, { "epoch": 0.04, "grad_norm": 6.090680880263313, "learning_rate": 9.994709819732143e-06, "loss": 0.2854, "step": 1544 }, { "epoch": 0.04, "grad_norm": 4.604914844649241, "learning_rate": 9.994688470638045e-06, "loss": 0.2708, "step": 1545 }, { "epoch": 0.04, "grad_norm": 10.455606619313489, "learning_rate": 9.994667078575258e-06, "loss": 0.4836, "step": 1546 }, { "epoch": 0.04, "grad_norm": 11.533192788813107, "learning_rate": 9.994645643543963e-06, "loss": 0.4653, "step": 1547 }, { "epoch": 0.04, "grad_norm": 2.966645548956071, "learning_rate": 9.99462416554435e-06, "loss": 0.1605, "step": 1548 }, { "epoch": 0.04, "grad_norm": 5.433389973881533, "learning_rate": 9.994602644576597e-06, "loss": 0.6786, "step": 1549 }, { "epoch": 0.04, "grad_norm": 10.731162422013881, "learning_rate": 9.994581080640893e-06, "loss": 0.4542, "step": 1550 }, { "epoch": 0.04, "grad_norm": 9.899562756679584, "learning_rate": 9.994559473737423e-06, "loss": 0.526, "step": 1551 }, { "epoch": 0.04, "grad_norm": 5.629701514521764, "learning_rate": 9.994537823866374e-06, "loss": 0.4652, "step": 1552 }, { "epoch": 0.04, "grad_norm": 8.291813227701669, "learning_rate": 9.994516131027931e-06, "loss": 0.4115, "step": 1553 }, { "epoch": 0.04, "grad_norm": 9.727807047323214, "learning_rate": 9.994494395222279e-06, "loss": 0.7277, "step": 1554 }, { "epoch": 0.04, "grad_norm": 6.285023936223811, "learning_rate": 9.994472616449609e-06, "loss": 0.5741, "step": 1555 }, { "epoch": 0.04, "grad_norm": 4.488963129289435, "learning_rate": 9.994450794710103e-06, "loss": 0.2581, "step": 1556 }, { "epoch": 0.04, "grad_norm": 5.749217892405812, "learning_rate": 9.994428930003955e-06, "loss": 0.2944, "step": 1557 }, { "epoch": 0.04, "grad_norm": 7.684844543085937, "learning_rate": 9.994407022331347e-06, "loss": 0.8133, "step": 1558 }, { "epoch": 0.04, "grad_norm": 7.45230821016561, "learning_rate": 9.994385071692472e-06, "loss": 0.1598, "step": 1559 }, { "epoch": 0.04, "grad_norm": 7.985513326045129, "learning_rate": 9.994363078087516e-06, "loss": 0.6959, "step": 1560 }, { "epoch": 0.04, "grad_norm": 8.982865447231546, "learning_rate": 9.99434104151667e-06, "loss": 0.3889, "step": 1561 }, { "epoch": 0.04, "grad_norm": 6.534374530049712, "learning_rate": 9.994318961980124e-06, "loss": 0.2803, "step": 1562 }, { "epoch": 0.04, "grad_norm": 12.853043064530546, "learning_rate": 9.994296839478066e-06, "loss": 0.729, "step": 1563 }, { "epoch": 0.04, "grad_norm": 4.159670419039622, "learning_rate": 9.994274674010687e-06, "loss": 0.176, "step": 1564 }, { "epoch": 0.04, "grad_norm": 1.5112783495292519, "learning_rate": 9.994252465578179e-06, "loss": 0.1184, "step": 1565 }, { "epoch": 0.04, "grad_norm": 4.173375520635133, "learning_rate": 9.99423021418073e-06, "loss": 0.3881, "step": 1566 }, { "epoch": 0.04, "grad_norm": 10.088786602575391, "learning_rate": 9.994207919818533e-06, "loss": 0.4074, "step": 1567 }, { "epoch": 0.04, "grad_norm": 9.537498752098951, "learning_rate": 9.994185582491782e-06, "loss": 0.4772, "step": 1568 }, { "epoch": 0.04, "grad_norm": 6.169945137484835, "learning_rate": 9.994163202200666e-06, "loss": 0.534, "step": 1569 }, { "epoch": 0.04, "grad_norm": 6.36310971979042, "learning_rate": 9.99414077894538e-06, "loss": 0.5193, "step": 1570 }, { "epoch": 0.04, "grad_norm": 4.569295447231908, "learning_rate": 9.994118312726115e-06, "loss": 0.2859, "step": 1571 }, { "epoch": 0.05, "grad_norm": 5.320250590632193, "learning_rate": 9.994095803543067e-06, "loss": 0.5134, "step": 1572 }, { "epoch": 0.05, "grad_norm": 5.126236301265218, "learning_rate": 9.994073251396425e-06, "loss": 0.4339, "step": 1573 }, { "epoch": 0.05, "grad_norm": 5.896531601748601, "learning_rate": 9.994050656286387e-06, "loss": 0.2571, "step": 1574 }, { "epoch": 0.05, "grad_norm": 10.572759036437846, "learning_rate": 9.994028018213146e-06, "loss": 0.6228, "step": 1575 }, { "epoch": 0.05, "grad_norm": 6.733416212472005, "learning_rate": 9.994005337176897e-06, "loss": 0.1654, "step": 1576 }, { "epoch": 0.05, "grad_norm": 6.465941109542205, "learning_rate": 9.993982613177835e-06, "loss": 0.5046, "step": 1577 }, { "epoch": 0.05, "grad_norm": 2.8339487463974433, "learning_rate": 9.993959846216156e-06, "loss": 0.1867, "step": 1578 }, { "epoch": 0.05, "grad_norm": 7.8237068667779255, "learning_rate": 9.993937036292054e-06, "loss": 0.4038, "step": 1579 }, { "epoch": 0.05, "grad_norm": 3.794939277675776, "learning_rate": 9.993914183405728e-06, "loss": 0.579, "step": 1580 }, { "epoch": 0.05, "grad_norm": 11.588521184438829, "learning_rate": 9.993891287557372e-06, "loss": 0.5615, "step": 1581 }, { "epoch": 0.05, "grad_norm": 8.5035701434713, "learning_rate": 9.993868348747185e-06, "loss": 0.4015, "step": 1582 }, { "epoch": 0.05, "grad_norm": 3.8301302893168123, "learning_rate": 9.993845366975361e-06, "loss": 0.3182, "step": 1583 }, { "epoch": 0.05, "grad_norm": 5.119003882068161, "learning_rate": 9.993822342242103e-06, "loss": 0.2789, "step": 1584 }, { "epoch": 0.05, "grad_norm": 12.602053496130539, "learning_rate": 9.993799274547605e-06, "loss": 0.7321, "step": 1585 }, { "epoch": 0.05, "grad_norm": 9.408097703504234, "learning_rate": 9.993776163892068e-06, "loss": 0.5081, "step": 1586 }, { "epoch": 0.05, "grad_norm": 6.154673805424954, "learning_rate": 9.993753010275688e-06, "loss": 0.5148, "step": 1587 }, { "epoch": 0.05, "grad_norm": 6.521453263264831, "learning_rate": 9.993729813698665e-06, "loss": 0.448, "step": 1588 }, { "epoch": 0.05, "grad_norm": 7.859536686896895, "learning_rate": 9.993706574161202e-06, "loss": 0.7004, "step": 1589 }, { "epoch": 0.05, "grad_norm": 10.60472119473407, "learning_rate": 9.993683291663494e-06, "loss": 0.7095, "step": 1590 }, { "epoch": 0.05, "grad_norm": 7.593956085045952, "learning_rate": 9.993659966205745e-06, "loss": 0.4695, "step": 1591 }, { "epoch": 0.05, "grad_norm": 6.460257147285828, "learning_rate": 9.993636597788151e-06, "loss": 0.3669, "step": 1592 }, { "epoch": 0.05, "grad_norm": 8.088118313438105, "learning_rate": 9.993613186410919e-06, "loss": 0.4992, "step": 1593 }, { "epoch": 0.05, "grad_norm": 6.4746272712563755, "learning_rate": 9.993589732074247e-06, "loss": 0.5185, "step": 1594 }, { "epoch": 0.05, "grad_norm": 6.0204841310108055, "learning_rate": 9.993566234778338e-06, "loss": 0.4406, "step": 1595 }, { "epoch": 0.05, "grad_norm": 6.755717716898951, "learning_rate": 9.993542694523393e-06, "loss": 0.2695, "step": 1596 }, { "epoch": 0.05, "grad_norm": 5.8585109226157215, "learning_rate": 9.993519111309615e-06, "loss": 0.7696, "step": 1597 }, { "epoch": 0.05, "grad_norm": 10.324123421097914, "learning_rate": 9.993495485137206e-06, "loss": 0.506, "step": 1598 }, { "epoch": 0.05, "grad_norm": 6.181006829673205, "learning_rate": 9.99347181600637e-06, "loss": 0.5365, "step": 1599 }, { "epoch": 0.05, "grad_norm": 7.490072673857576, "learning_rate": 9.993448103917313e-06, "loss": 0.5069, "step": 1600 }, { "epoch": 0.05, "grad_norm": 3.717411217004521, "learning_rate": 9.993424348870235e-06, "loss": 0.2934, "step": 1601 }, { "epoch": 0.05, "grad_norm": 5.134929992436282, "learning_rate": 9.993400550865343e-06, "loss": 0.3896, "step": 1602 }, { "epoch": 0.05, "grad_norm": 8.17334443806718, "learning_rate": 9.99337670990284e-06, "loss": 0.3357, "step": 1603 }, { "epoch": 0.05, "grad_norm": 6.836126392479522, "learning_rate": 9.993352825982934e-06, "loss": 0.715, "step": 1604 }, { "epoch": 0.05, "grad_norm": 6.101326834099705, "learning_rate": 9.993328899105825e-06, "loss": 0.5834, "step": 1605 }, { "epoch": 0.05, "grad_norm": 8.228757849731133, "learning_rate": 9.993304929271726e-06, "loss": 0.6308, "step": 1606 }, { "epoch": 0.05, "grad_norm": 7.87768993152349, "learning_rate": 9.993280916480836e-06, "loss": 0.5484, "step": 1607 }, { "epoch": 0.05, "grad_norm": 3.3603155904106656, "learning_rate": 9.993256860733367e-06, "loss": 0.2371, "step": 1608 }, { "epoch": 0.05, "grad_norm": 6.645929206308083, "learning_rate": 9.993232762029524e-06, "loss": 0.5795, "step": 1609 }, { "epoch": 0.05, "grad_norm": 16.243470039024647, "learning_rate": 9.993208620369514e-06, "loss": 0.6435, "step": 1610 }, { "epoch": 0.05, "grad_norm": 7.217089044956901, "learning_rate": 9.993184435753544e-06, "loss": 0.2353, "step": 1611 }, { "epoch": 0.05, "grad_norm": 12.391106267869144, "learning_rate": 9.993160208181825e-06, "loss": 0.6752, "step": 1612 }, { "epoch": 0.05, "grad_norm": 8.238858415726343, "learning_rate": 9.993135937654563e-06, "loss": 0.7079, "step": 1613 }, { "epoch": 0.05, "grad_norm": 7.524222137954229, "learning_rate": 9.993111624171968e-06, "loss": 0.5527, "step": 1614 }, { "epoch": 0.05, "grad_norm": 6.3075612454459185, "learning_rate": 9.993087267734247e-06, "loss": 0.5832, "step": 1615 }, { "epoch": 0.05, "grad_norm": 7.40780359383116, "learning_rate": 9.993062868341611e-06, "loss": 0.5971, "step": 1616 }, { "epoch": 0.05, "grad_norm": 5.964436595238531, "learning_rate": 9.99303842599427e-06, "loss": 0.6565, "step": 1617 }, { "epoch": 0.05, "grad_norm": 7.799067064881512, "learning_rate": 9.993013940692436e-06, "loss": 0.3625, "step": 1618 }, { "epoch": 0.05, "grad_norm": 2.9106832551931983, "learning_rate": 9.992989412436316e-06, "loss": 0.0966, "step": 1619 }, { "epoch": 0.05, "grad_norm": 6.516465459906077, "learning_rate": 9.992964841226122e-06, "loss": 0.3707, "step": 1620 }, { "epoch": 0.05, "grad_norm": 6.693559236773442, "learning_rate": 9.992940227062067e-06, "loss": 0.862, "step": 1621 }, { "epoch": 0.05, "grad_norm": 7.094163252434869, "learning_rate": 9.992915569944364e-06, "loss": 0.5597, "step": 1622 }, { "epoch": 0.05, "grad_norm": 14.28503387874422, "learning_rate": 9.992890869873221e-06, "loss": 0.6986, "step": 1623 }, { "epoch": 0.05, "grad_norm": 9.781801445867075, "learning_rate": 9.992866126848853e-06, "loss": 0.6127, "step": 1624 }, { "epoch": 0.05, "grad_norm": 8.297162239576759, "learning_rate": 9.992841340871471e-06, "loss": 0.4749, "step": 1625 }, { "epoch": 0.05, "grad_norm": 18.209917338569635, "learning_rate": 9.992816511941293e-06, "loss": 0.4163, "step": 1626 }, { "epoch": 0.05, "grad_norm": 13.458061349362438, "learning_rate": 9.992791640058526e-06, "loss": 0.8405, "step": 1627 }, { "epoch": 0.05, "grad_norm": 10.660390457817892, "learning_rate": 9.99276672522339e-06, "loss": 0.7942, "step": 1628 }, { "epoch": 0.05, "grad_norm": 7.6755201455471225, "learning_rate": 9.992741767436095e-06, "loss": 0.5558, "step": 1629 }, { "epoch": 0.05, "grad_norm": 9.01998394027938, "learning_rate": 9.992716766696857e-06, "loss": 0.5555, "step": 1630 }, { "epoch": 0.05, "grad_norm": 6.132020741275215, "learning_rate": 9.992691723005891e-06, "loss": 0.4585, "step": 1631 }, { "epoch": 0.05, "grad_norm": 5.689884859539183, "learning_rate": 9.992666636363416e-06, "loss": 0.5322, "step": 1632 }, { "epoch": 0.05, "grad_norm": 17.83573578608937, "learning_rate": 9.992641506769642e-06, "loss": 0.5594, "step": 1633 }, { "epoch": 0.05, "grad_norm": 8.880909094284345, "learning_rate": 9.992616334224786e-06, "loss": 1.0355, "step": 1634 }, { "epoch": 0.05, "grad_norm": 8.354378206867862, "learning_rate": 9.992591118729071e-06, "loss": 0.39, "step": 1635 }, { "epoch": 0.05, "grad_norm": 6.107127446957477, "learning_rate": 9.992565860282707e-06, "loss": 0.4453, "step": 1636 }, { "epoch": 0.05, "grad_norm": 4.288299965257299, "learning_rate": 9.992540558885912e-06, "loss": 0.4412, "step": 1637 }, { "epoch": 0.05, "grad_norm": 9.318399595942592, "learning_rate": 9.992515214538908e-06, "loss": 0.6614, "step": 1638 }, { "epoch": 0.05, "grad_norm": 5.683463917720814, "learning_rate": 9.992489827241908e-06, "loss": 0.5079, "step": 1639 }, { "epoch": 0.05, "grad_norm": 8.680902789143426, "learning_rate": 9.992464396995135e-06, "loss": 0.3985, "step": 1640 }, { "epoch": 0.05, "grad_norm": 3.344022721906974, "learning_rate": 9.992438923798805e-06, "loss": 0.123, "step": 1641 }, { "epoch": 0.05, "grad_norm": 9.182126816026916, "learning_rate": 9.992413407653137e-06, "loss": 0.681, "step": 1642 }, { "epoch": 0.05, "grad_norm": 4.436210552436815, "learning_rate": 9.99238784855835e-06, "loss": 0.2667, "step": 1643 }, { "epoch": 0.05, "grad_norm": 9.86727438893726, "learning_rate": 9.992362246514666e-06, "loss": 0.5121, "step": 1644 }, { "epoch": 0.05, "grad_norm": 4.429503529544763, "learning_rate": 9.992336601522305e-06, "loss": 0.5437, "step": 1645 }, { "epoch": 0.05, "grad_norm": 8.926605467022691, "learning_rate": 9.992310913581487e-06, "loss": 0.3131, "step": 1646 }, { "epoch": 0.05, "grad_norm": 4.822615062711394, "learning_rate": 9.992285182692431e-06, "loss": 0.265, "step": 1647 }, { "epoch": 0.05, "grad_norm": 8.317060725241872, "learning_rate": 9.992259408855362e-06, "loss": 0.6606, "step": 1648 }, { "epoch": 0.05, "grad_norm": 9.612670234194663, "learning_rate": 9.9922335920705e-06, "loss": 0.6579, "step": 1649 }, { "epoch": 0.05, "grad_norm": 5.232714944900026, "learning_rate": 9.992207732338066e-06, "loss": 0.4847, "step": 1650 }, { "epoch": 0.05, "grad_norm": 11.394036597133022, "learning_rate": 9.992181829658283e-06, "loss": 0.8528, "step": 1651 }, { "epoch": 0.05, "grad_norm": 10.981000791706153, "learning_rate": 9.992155884031376e-06, "loss": 0.7584, "step": 1652 }, { "epoch": 0.05, "grad_norm": 3.131443990001543, "learning_rate": 9.992129895457565e-06, "loss": 0.4248, "step": 1653 }, { "epoch": 0.05, "grad_norm": 11.187337266814653, "learning_rate": 9.992103863937077e-06, "loss": 1.0553, "step": 1654 }, { "epoch": 0.05, "grad_norm": 5.974814404913071, "learning_rate": 9.992077789470132e-06, "loss": 0.3895, "step": 1655 }, { "epoch": 0.05, "grad_norm": 5.60401852020754, "learning_rate": 9.992051672056958e-06, "loss": 0.2187, "step": 1656 }, { "epoch": 0.05, "grad_norm": 6.403812372669729, "learning_rate": 9.992025511697779e-06, "loss": 0.6819, "step": 1657 }, { "epoch": 0.05, "grad_norm": 5.810455701096665, "learning_rate": 9.991999308392816e-06, "loss": 0.5171, "step": 1658 }, { "epoch": 0.05, "grad_norm": 8.824734251003262, "learning_rate": 9.991973062142299e-06, "loss": 0.787, "step": 1659 }, { "epoch": 0.05, "grad_norm": 8.167867870610898, "learning_rate": 9.991946772946454e-06, "loss": 0.4888, "step": 1660 }, { "epoch": 0.05, "grad_norm": 9.779089558062651, "learning_rate": 9.991920440805505e-06, "loss": 0.2993, "step": 1661 }, { "epoch": 0.05, "grad_norm": 5.338750251478843, "learning_rate": 9.991894065719679e-06, "loss": 0.4584, "step": 1662 }, { "epoch": 0.05, "grad_norm": 9.919874671482741, "learning_rate": 9.991867647689201e-06, "loss": 0.9292, "step": 1663 }, { "epoch": 0.05, "grad_norm": 5.80320714708566, "learning_rate": 9.991841186714302e-06, "loss": 0.6305, "step": 1664 }, { "epoch": 0.05, "grad_norm": 4.43671196669866, "learning_rate": 9.991814682795207e-06, "loss": 0.2703, "step": 1665 }, { "epoch": 0.05, "grad_norm": 7.982230837304484, "learning_rate": 9.991788135932146e-06, "loss": 0.652, "step": 1666 }, { "epoch": 0.05, "grad_norm": 7.783345197005559, "learning_rate": 9.991761546125345e-06, "loss": 0.633, "step": 1667 }, { "epoch": 0.05, "grad_norm": 7.394690268570471, "learning_rate": 9.991734913375035e-06, "loss": 0.1846, "step": 1668 }, { "epoch": 0.05, "grad_norm": 6.997309303860769, "learning_rate": 9.991708237681443e-06, "loss": 0.385, "step": 1669 }, { "epoch": 0.05, "grad_norm": 1.8771636439941788, "learning_rate": 9.991681519044801e-06, "loss": 0.0778, "step": 1670 }, { "epoch": 0.05, "grad_norm": 3.332335696337438, "learning_rate": 9.991654757465336e-06, "loss": 0.2361, "step": 1671 }, { "epoch": 0.05, "grad_norm": 7.295796904880536, "learning_rate": 9.991627952943282e-06, "loss": 0.3465, "step": 1672 }, { "epoch": 0.05, "grad_norm": 8.482477808483988, "learning_rate": 9.991601105478865e-06, "loss": 0.7126, "step": 1673 }, { "epoch": 0.05, "grad_norm": 4.483591642643483, "learning_rate": 9.99157421507232e-06, "loss": 0.2215, "step": 1674 }, { "epoch": 0.05, "grad_norm": 7.677986957953332, "learning_rate": 9.991547281723876e-06, "loss": 0.4665, "step": 1675 }, { "epoch": 0.05, "grad_norm": 12.855222455670132, "learning_rate": 9.991520305433766e-06, "loss": 0.5062, "step": 1676 }, { "epoch": 0.05, "grad_norm": 5.12616203671684, "learning_rate": 9.99149328620222e-06, "loss": 0.5104, "step": 1677 }, { "epoch": 0.05, "grad_norm": 4.774170714660906, "learning_rate": 9.991466224029472e-06, "loss": 0.7733, "step": 1678 }, { "epoch": 0.05, "grad_norm": 7.816174733909696, "learning_rate": 9.991439118915756e-06, "loss": 0.5091, "step": 1679 }, { "epoch": 0.05, "grad_norm": 6.48772393576979, "learning_rate": 9.991411970861303e-06, "loss": 0.4127, "step": 1680 }, { "epoch": 0.05, "grad_norm": 8.826465876140398, "learning_rate": 9.991384779866347e-06, "loss": 0.8507, "step": 1681 }, { "epoch": 0.05, "grad_norm": 7.5191612251693565, "learning_rate": 9.991357545931122e-06, "loss": 0.6411, "step": 1682 }, { "epoch": 0.05, "grad_norm": 9.38662599032793, "learning_rate": 9.991330269055863e-06, "loss": 0.7547, "step": 1683 }, { "epoch": 0.05, "grad_norm": 5.299327698838516, "learning_rate": 9.991302949240804e-06, "loss": 0.6419, "step": 1684 }, { "epoch": 0.05, "grad_norm": 3.6904194476633374, "learning_rate": 9.99127558648618e-06, "loss": 0.1575, "step": 1685 }, { "epoch": 0.05, "grad_norm": 8.354842566896576, "learning_rate": 9.991248180792228e-06, "loss": 0.4542, "step": 1686 }, { "epoch": 0.05, "grad_norm": 10.728505846925728, "learning_rate": 9.99122073215918e-06, "loss": 0.8996, "step": 1687 }, { "epoch": 0.05, "grad_norm": 4.431316473156958, "learning_rate": 9.991193240587279e-06, "loss": 0.2329, "step": 1688 }, { "epoch": 0.05, "grad_norm": 7.5849076268324165, "learning_rate": 9.991165706076752e-06, "loss": 0.8076, "step": 1689 }, { "epoch": 0.05, "grad_norm": 8.206643358203172, "learning_rate": 9.991138128627844e-06, "loss": 0.8771, "step": 1690 }, { "epoch": 0.05, "grad_norm": 6.590389896140766, "learning_rate": 9.991110508240789e-06, "loss": 0.8296, "step": 1691 }, { "epoch": 0.05, "grad_norm": 9.275268965160425, "learning_rate": 9.991082844915825e-06, "loss": 1.0761, "step": 1692 }, { "epoch": 0.05, "grad_norm": 3.076499722887929, "learning_rate": 9.991055138653188e-06, "loss": 0.511, "step": 1693 }, { "epoch": 0.05, "grad_norm": 7.466333549028213, "learning_rate": 9.99102738945312e-06, "loss": 0.6321, "step": 1694 }, { "epoch": 0.05, "grad_norm": 8.852412641730377, "learning_rate": 9.990999597315858e-06, "loss": 0.5425, "step": 1695 }, { "epoch": 0.05, "grad_norm": 6.422811175418484, "learning_rate": 9.99097176224164e-06, "loss": 0.3358, "step": 1696 }, { "epoch": 0.05, "grad_norm": 9.430337120251806, "learning_rate": 9.990943884230706e-06, "loss": 0.4739, "step": 1697 }, { "epoch": 0.05, "grad_norm": 3.927951072660074, "learning_rate": 9.990915963283297e-06, "loss": 0.2313, "step": 1698 }, { "epoch": 0.05, "grad_norm": 12.346516359566412, "learning_rate": 9.990887999399652e-06, "loss": 0.4235, "step": 1699 }, { "epoch": 0.05, "grad_norm": 12.25976134660691, "learning_rate": 9.990859992580012e-06, "loss": 0.9796, "step": 1700 }, { "epoch": 0.05, "grad_norm": 13.577253763673403, "learning_rate": 9.990831942824619e-06, "loss": 0.6844, "step": 1701 }, { "epoch": 0.05, "grad_norm": 3.93127542799836, "learning_rate": 9.990803850133712e-06, "loss": 0.2889, "step": 1702 }, { "epoch": 0.05, "grad_norm": 6.653283976191508, "learning_rate": 9.990775714507535e-06, "loss": 0.3751, "step": 1703 }, { "epoch": 0.05, "grad_norm": 6.669860464235044, "learning_rate": 9.990747535946327e-06, "loss": 0.6984, "step": 1704 }, { "epoch": 0.05, "grad_norm": 5.499457722720308, "learning_rate": 9.990719314450334e-06, "loss": 0.4754, "step": 1705 }, { "epoch": 0.05, "grad_norm": 25.681828896112886, "learning_rate": 9.990691050019799e-06, "loss": 1.012, "step": 1706 }, { "epoch": 0.05, "grad_norm": 5.5983039603674625, "learning_rate": 9.990662742654961e-06, "loss": 0.5745, "step": 1707 }, { "epoch": 0.05, "grad_norm": 10.59868999519553, "learning_rate": 9.990634392356066e-06, "loss": 0.5328, "step": 1708 }, { "epoch": 0.05, "grad_norm": 5.652131325105191, "learning_rate": 9.99060599912336e-06, "loss": 0.1931, "step": 1709 }, { "epoch": 0.05, "grad_norm": 4.228475930818309, "learning_rate": 9.990577562957083e-06, "loss": 0.5694, "step": 1710 }, { "epoch": 0.05, "grad_norm": 12.437396025103242, "learning_rate": 9.99054908385748e-06, "loss": 1.0659, "step": 1711 }, { "epoch": 0.05, "grad_norm": 2.619242917196097, "learning_rate": 9.9905205618248e-06, "loss": 0.2692, "step": 1712 }, { "epoch": 0.05, "grad_norm": 7.863390766864953, "learning_rate": 9.990491996859287e-06, "loss": 0.4881, "step": 1713 }, { "epoch": 0.05, "grad_norm": 6.300277510086485, "learning_rate": 9.990463388961184e-06, "loss": 0.3605, "step": 1714 }, { "epoch": 0.05, "grad_norm": 5.153336875358309, "learning_rate": 9.99043473813074e-06, "loss": 0.5195, "step": 1715 }, { "epoch": 0.05, "grad_norm": 8.9513748349031, "learning_rate": 9.990406044368199e-06, "loss": 0.5747, "step": 1716 }, { "epoch": 0.05, "grad_norm": 6.353770868033347, "learning_rate": 9.99037730767381e-06, "loss": 0.253, "step": 1717 }, { "epoch": 0.05, "grad_norm": 5.205608290820557, "learning_rate": 9.99034852804782e-06, "loss": 0.8042, "step": 1718 }, { "epoch": 0.05, "grad_norm": 6.174518492300058, "learning_rate": 9.990319705490476e-06, "loss": 0.3896, "step": 1719 }, { "epoch": 0.05, "grad_norm": 7.4269974869898086, "learning_rate": 9.990290840002024e-06, "loss": 0.6735, "step": 1720 }, { "epoch": 0.05, "grad_norm": 7.313649144315876, "learning_rate": 9.990261931582717e-06, "loss": 0.5377, "step": 1721 }, { "epoch": 0.05, "grad_norm": 10.753205642020887, "learning_rate": 9.990232980232799e-06, "loss": 0.6754, "step": 1722 }, { "epoch": 0.05, "grad_norm": 8.56297089333016, "learning_rate": 9.990203985952522e-06, "loss": 0.6044, "step": 1723 }, { "epoch": 0.05, "grad_norm": 12.336921444886674, "learning_rate": 9.990174948742135e-06, "loss": 0.6219, "step": 1724 }, { "epoch": 0.05, "grad_norm": 10.555552799380651, "learning_rate": 9.990145868601887e-06, "loss": 0.6488, "step": 1725 }, { "epoch": 0.05, "grad_norm": 7.285230473608152, "learning_rate": 9.990116745532029e-06, "loss": 0.4051, "step": 1726 }, { "epoch": 0.05, "grad_norm": 7.246577408681609, "learning_rate": 9.99008757953281e-06, "loss": 0.2687, "step": 1727 }, { "epoch": 0.05, "grad_norm": 4.163119182413661, "learning_rate": 9.990058370604483e-06, "loss": 0.443, "step": 1728 }, { "epoch": 0.05, "grad_norm": 7.642962451204217, "learning_rate": 9.990029118747296e-06, "loss": 0.7442, "step": 1729 }, { "epoch": 0.05, "grad_norm": 7.448185721563857, "learning_rate": 9.989999823961504e-06, "loss": 0.6407, "step": 1730 }, { "epoch": 0.05, "grad_norm": 7.188627370240452, "learning_rate": 9.98997048624736e-06, "loss": 0.5904, "step": 1731 }, { "epoch": 0.05, "grad_norm": 10.359915740800895, "learning_rate": 9.989941105605113e-06, "loss": 0.5706, "step": 1732 }, { "epoch": 0.05, "grad_norm": 11.368468746588121, "learning_rate": 9.989911682035016e-06, "loss": 0.4065, "step": 1733 }, { "epoch": 0.05, "grad_norm": 6.795200746783491, "learning_rate": 9.989882215537323e-06, "loss": 0.5849, "step": 1734 }, { "epoch": 0.05, "grad_norm": 7.985342276970035, "learning_rate": 9.989852706112289e-06, "loss": 0.2801, "step": 1735 }, { "epoch": 0.05, "grad_norm": 10.404939998616854, "learning_rate": 9.989823153760165e-06, "loss": 0.563, "step": 1736 }, { "epoch": 0.05, "grad_norm": 7.581058524309058, "learning_rate": 9.989793558481209e-06, "loss": 0.5922, "step": 1737 }, { "epoch": 0.05, "grad_norm": 13.19803318051536, "learning_rate": 9.989763920275672e-06, "loss": 0.8736, "step": 1738 }, { "epoch": 0.05, "grad_norm": 8.870390863762868, "learning_rate": 9.989734239143811e-06, "loss": 0.3984, "step": 1739 }, { "epoch": 0.05, "grad_norm": 10.495273797987672, "learning_rate": 9.98970451508588e-06, "loss": 0.5, "step": 1740 }, { "epoch": 0.05, "grad_norm": 12.246578906607816, "learning_rate": 9.989674748102136e-06, "loss": 0.5168, "step": 1741 }, { "epoch": 0.05, "grad_norm": 8.037044053026618, "learning_rate": 9.989644938192832e-06, "loss": 0.5517, "step": 1742 }, { "epoch": 0.05, "grad_norm": 6.568608048172858, "learning_rate": 9.98961508535823e-06, "loss": 0.5885, "step": 1743 }, { "epoch": 0.05, "grad_norm": 6.488273900789787, "learning_rate": 9.989585189598583e-06, "loss": 0.4182, "step": 1744 }, { "epoch": 0.05, "grad_norm": 8.681705216295153, "learning_rate": 9.98955525091415e-06, "loss": 0.7206, "step": 1745 }, { "epoch": 0.05, "grad_norm": 6.156782601646477, "learning_rate": 9.989525269305184e-06, "loss": 0.6683, "step": 1746 }, { "epoch": 0.05, "grad_norm": 5.683144253394322, "learning_rate": 9.98949524477195e-06, "loss": 0.5243, "step": 1747 }, { "epoch": 0.05, "grad_norm": 5.563303428662477, "learning_rate": 9.989465177314702e-06, "loss": 0.4726, "step": 1748 }, { "epoch": 0.05, "grad_norm": 5.964287452762765, "learning_rate": 9.9894350669337e-06, "loss": 0.452, "step": 1749 }, { "epoch": 0.05, "grad_norm": 10.45340590391789, "learning_rate": 9.989404913629199e-06, "loss": 0.4219, "step": 1750 }, { "epoch": 0.05, "grad_norm": 8.70272256410435, "learning_rate": 9.989374717401465e-06, "loss": 0.7368, "step": 1751 }, { "epoch": 0.05, "grad_norm": 12.072162464184316, "learning_rate": 9.989344478250753e-06, "loss": 0.3059, "step": 1752 }, { "epoch": 0.05, "grad_norm": 4.8215938701248255, "learning_rate": 9.989314196177326e-06, "loss": 0.4672, "step": 1753 }, { "epoch": 0.05, "grad_norm": 6.684426180593219, "learning_rate": 9.989283871181443e-06, "loss": 0.7596, "step": 1754 }, { "epoch": 0.05, "grad_norm": 6.686883951493656, "learning_rate": 9.989253503263366e-06, "loss": 0.7985, "step": 1755 }, { "epoch": 0.05, "grad_norm": 7.994121179619949, "learning_rate": 9.989223092423352e-06, "loss": 0.7134, "step": 1756 }, { "epoch": 0.05, "grad_norm": 5.818463455503714, "learning_rate": 9.989192638661669e-06, "loss": 0.4294, "step": 1757 }, { "epoch": 0.05, "grad_norm": 9.060528718371412, "learning_rate": 9.989162141978575e-06, "loss": 0.4754, "step": 1758 }, { "epoch": 0.05, "grad_norm": 15.100205961932222, "learning_rate": 9.989131602374335e-06, "loss": 0.8827, "step": 1759 }, { "epoch": 0.05, "grad_norm": 8.92928026927811, "learning_rate": 9.989101019849207e-06, "loss": 0.4077, "step": 1760 }, { "epoch": 0.05, "grad_norm": 13.810713432385548, "learning_rate": 9.98907039440346e-06, "loss": 0.4309, "step": 1761 }, { "epoch": 0.05, "grad_norm": 6.878030213218945, "learning_rate": 9.989039726037354e-06, "loss": 0.5211, "step": 1762 }, { "epoch": 0.05, "grad_norm": 6.811422140263806, "learning_rate": 9.989009014751153e-06, "loss": 0.5221, "step": 1763 }, { "epoch": 0.05, "grad_norm": 9.115475426023135, "learning_rate": 9.98897826054512e-06, "loss": 0.8561, "step": 1764 }, { "epoch": 0.05, "grad_norm": 14.013964296273956, "learning_rate": 9.988947463419525e-06, "loss": 0.7672, "step": 1765 }, { "epoch": 0.05, "grad_norm": 5.395123404363363, "learning_rate": 9.988916623374627e-06, "loss": 0.5391, "step": 1766 }, { "epoch": 0.05, "grad_norm": 10.135361303091353, "learning_rate": 9.988885740410694e-06, "loss": 0.5668, "step": 1767 }, { "epoch": 0.05, "grad_norm": 5.734157339845307, "learning_rate": 9.988854814527991e-06, "loss": 0.4321, "step": 1768 }, { "epoch": 0.05, "grad_norm": 5.395044610349043, "learning_rate": 9.988823845726785e-06, "loss": 0.5186, "step": 1769 }, { "epoch": 0.05, "grad_norm": 12.098920480792371, "learning_rate": 9.988792834007342e-06, "loss": 1.5726, "step": 1770 }, { "epoch": 0.05, "grad_norm": 5.4602185213983985, "learning_rate": 9.988761779369926e-06, "loss": 0.4059, "step": 1771 }, { "epoch": 0.05, "grad_norm": 4.90085969020219, "learning_rate": 9.988730681814809e-06, "loss": 0.5724, "step": 1772 }, { "epoch": 0.05, "grad_norm": 8.570382937730427, "learning_rate": 9.988699541342257e-06, "loss": 0.8844, "step": 1773 }, { "epoch": 0.05, "grad_norm": 11.961676197924323, "learning_rate": 9.988668357952534e-06, "loss": 0.4409, "step": 1774 }, { "epoch": 0.05, "grad_norm": 10.5041869761901, "learning_rate": 9.988637131645912e-06, "loss": 0.5226, "step": 1775 }, { "epoch": 0.05, "grad_norm": 9.115114160974757, "learning_rate": 9.988605862422662e-06, "loss": 0.2767, "step": 1776 }, { "epoch": 0.05, "grad_norm": 8.355030335440317, "learning_rate": 9.988574550283047e-06, "loss": 0.9017, "step": 1777 }, { "epoch": 0.05, "grad_norm": 7.060249408876833, "learning_rate": 9.98854319522734e-06, "loss": 0.4471, "step": 1778 }, { "epoch": 0.05, "grad_norm": 9.265734512685452, "learning_rate": 9.98851179725581e-06, "loss": 0.5281, "step": 1779 }, { "epoch": 0.05, "grad_norm": 11.804447557900234, "learning_rate": 9.988480356368727e-06, "loss": 0.992, "step": 1780 }, { "epoch": 0.05, "grad_norm": 5.59712045238898, "learning_rate": 9.988448872566363e-06, "loss": 0.5744, "step": 1781 }, { "epoch": 0.05, "grad_norm": 8.288598021527863, "learning_rate": 9.988417345848986e-06, "loss": 0.4205, "step": 1782 }, { "epoch": 0.05, "grad_norm": 18.574794794851368, "learning_rate": 9.988385776216871e-06, "loss": 0.5411, "step": 1783 }, { "epoch": 0.05, "grad_norm": 9.888440220783053, "learning_rate": 9.988354163670286e-06, "loss": 0.5606, "step": 1784 }, { "epoch": 0.05, "grad_norm": 5.772677834878687, "learning_rate": 9.988322508209503e-06, "loss": 0.3285, "step": 1785 }, { "epoch": 0.05, "grad_norm": 5.423488541750032, "learning_rate": 9.988290809834797e-06, "loss": 0.447, "step": 1786 }, { "epoch": 0.05, "grad_norm": 13.491063727764084, "learning_rate": 9.98825906854644e-06, "loss": 0.4389, "step": 1787 }, { "epoch": 0.05, "grad_norm": 9.87106563562602, "learning_rate": 9.988227284344702e-06, "loss": 0.933, "step": 1788 }, { "epoch": 0.05, "grad_norm": 12.125004404598105, "learning_rate": 9.98819545722986e-06, "loss": 0.9887, "step": 1789 }, { "epoch": 0.05, "grad_norm": 9.286231608180678, "learning_rate": 9.988163587202188e-06, "loss": 0.4305, "step": 1790 }, { "epoch": 0.05, "grad_norm": 4.2532492166131775, "learning_rate": 9.988131674261958e-06, "loss": 0.342, "step": 1791 }, { "epoch": 0.05, "grad_norm": 7.758868219585197, "learning_rate": 9.988099718409445e-06, "loss": 0.3004, "step": 1792 }, { "epoch": 0.05, "grad_norm": 7.651726020622113, "learning_rate": 9.988067719644925e-06, "loss": 0.6325, "step": 1793 }, { "epoch": 0.05, "grad_norm": 8.110577150591581, "learning_rate": 9.988035677968672e-06, "loss": 0.6316, "step": 1794 }, { "epoch": 0.05, "grad_norm": 5.825185816997709, "learning_rate": 9.988003593380962e-06, "loss": 0.3213, "step": 1795 }, { "epoch": 0.05, "grad_norm": 12.509884321377752, "learning_rate": 9.987971465882072e-06, "loss": 0.8407, "step": 1796 }, { "epoch": 0.05, "grad_norm": 4.367333261312749, "learning_rate": 9.987939295472276e-06, "loss": 0.4138, "step": 1797 }, { "epoch": 0.05, "grad_norm": 5.988635267330304, "learning_rate": 9.987907082151852e-06, "loss": 0.3184, "step": 1798 }, { "epoch": 0.05, "grad_norm": 10.559674850139743, "learning_rate": 9.98787482592108e-06, "loss": 0.5735, "step": 1799 }, { "epoch": 0.05, "grad_norm": 8.751598103223865, "learning_rate": 9.987842526780235e-06, "loss": 0.3943, "step": 1800 }, { "epoch": 0.05, "grad_norm": 7.625850004752448, "learning_rate": 9.987810184729594e-06, "loss": 0.4614, "step": 1801 }, { "epoch": 0.05, "grad_norm": 6.9859454205049385, "learning_rate": 9.987777799769435e-06, "loss": 0.3645, "step": 1802 }, { "epoch": 0.05, "grad_norm": 9.992989848644514, "learning_rate": 9.98774537190004e-06, "loss": 0.8208, "step": 1803 }, { "epoch": 0.05, "grad_norm": 8.336251536266364, "learning_rate": 9.987712901121683e-06, "loss": 0.6578, "step": 1804 }, { "epoch": 0.05, "grad_norm": 7.994863500282733, "learning_rate": 9.987680387434647e-06, "loss": 0.548, "step": 1805 }, { "epoch": 0.05, "grad_norm": 6.260981563358795, "learning_rate": 9.987647830839211e-06, "loss": 0.6229, "step": 1806 }, { "epoch": 0.05, "grad_norm": 11.211681211040695, "learning_rate": 9.987615231335655e-06, "loss": 1.1742, "step": 1807 }, { "epoch": 0.05, "grad_norm": 20.461316816346823, "learning_rate": 9.98758258892426e-06, "loss": 0.5974, "step": 1808 }, { "epoch": 0.05, "grad_norm": 8.682447487375475, "learning_rate": 9.987549903605305e-06, "loss": 0.4764, "step": 1809 }, { "epoch": 0.05, "grad_norm": 5.112206330760508, "learning_rate": 9.987517175379072e-06, "loss": 0.5409, "step": 1810 }, { "epoch": 0.05, "grad_norm": 10.030790039165133, "learning_rate": 9.987484404245842e-06, "loss": 0.6279, "step": 1811 }, { "epoch": 0.05, "grad_norm": 12.9360535038754, "learning_rate": 9.9874515902059e-06, "loss": 0.497, "step": 1812 }, { "epoch": 0.05, "grad_norm": 9.052163326025578, "learning_rate": 9.987418733259525e-06, "loss": 0.4624, "step": 1813 }, { "epoch": 0.05, "grad_norm": 5.832765383546704, "learning_rate": 9.987385833407e-06, "loss": 0.6287, "step": 1814 }, { "epoch": 0.05, "grad_norm": 7.10315235835864, "learning_rate": 9.987352890648609e-06, "loss": 0.6165, "step": 1815 }, { "epoch": 0.05, "grad_norm": 4.054728954301728, "learning_rate": 9.987319904984636e-06, "loss": 0.4698, "step": 1816 }, { "epoch": 0.05, "grad_norm": 2.4755902231995695, "learning_rate": 9.987286876415365e-06, "loss": 0.1625, "step": 1817 }, { "epoch": 0.05, "grad_norm": 5.9384967971103215, "learning_rate": 9.987253804941078e-06, "loss": 0.5327, "step": 1818 }, { "epoch": 0.05, "grad_norm": 9.286638025302175, "learning_rate": 9.987220690562058e-06, "loss": 0.9522, "step": 1819 }, { "epoch": 0.05, "grad_norm": 9.625845141276578, "learning_rate": 9.987187533278595e-06, "loss": 0.3627, "step": 1820 }, { "epoch": 0.05, "grad_norm": 3.1299212805992105, "learning_rate": 9.987154333090972e-06, "loss": 0.2721, "step": 1821 }, { "epoch": 0.05, "grad_norm": 5.408135586940604, "learning_rate": 9.987121089999474e-06, "loss": 0.4304, "step": 1822 }, { "epoch": 0.05, "grad_norm": 3.9264575584466046, "learning_rate": 9.987087804004386e-06, "loss": 0.4185, "step": 1823 }, { "epoch": 0.05, "grad_norm": 8.747518896108117, "learning_rate": 9.987054475105997e-06, "loss": 0.5711, "step": 1824 }, { "epoch": 0.05, "grad_norm": 7.3747544328571575, "learning_rate": 9.987021103304592e-06, "loss": 0.4657, "step": 1825 }, { "epoch": 0.05, "grad_norm": 3.16632959596364, "learning_rate": 9.986987688600458e-06, "loss": 0.5175, "step": 1826 }, { "epoch": 0.05, "grad_norm": 5.518726201076322, "learning_rate": 9.986954230993884e-06, "loss": 0.5981, "step": 1827 }, { "epoch": 0.05, "grad_norm": 3.6992731946344484, "learning_rate": 9.986920730485156e-06, "loss": 0.3605, "step": 1828 }, { "epoch": 0.05, "grad_norm": 9.4335806078454, "learning_rate": 9.986887187074564e-06, "loss": 0.5111, "step": 1829 }, { "epoch": 0.05, "grad_norm": 6.94842452710062, "learning_rate": 9.986853600762393e-06, "loss": 0.3471, "step": 1830 }, { "epoch": 0.05, "grad_norm": 6.4856405654754266, "learning_rate": 9.986819971548936e-06, "loss": 0.3948, "step": 1831 }, { "epoch": 0.05, "grad_norm": 3.9189762177949086, "learning_rate": 9.98678629943448e-06, "loss": 0.4705, "step": 1832 }, { "epoch": 0.05, "grad_norm": 8.322916488424656, "learning_rate": 9.986752584419318e-06, "loss": 0.5814, "step": 1833 }, { "epoch": 0.05, "grad_norm": 6.047202167975185, "learning_rate": 9.986718826503735e-06, "loss": 0.8923, "step": 1834 }, { "epoch": 0.05, "grad_norm": 14.299120817300835, "learning_rate": 9.986685025688024e-06, "loss": 0.5625, "step": 1835 }, { "epoch": 0.05, "grad_norm": 5.88163115252421, "learning_rate": 9.986651181972477e-06, "loss": 0.389, "step": 1836 }, { "epoch": 0.05, "grad_norm": 7.700384832649101, "learning_rate": 9.986617295357384e-06, "loss": 0.5943, "step": 1837 }, { "epoch": 0.05, "grad_norm": 4.34551170356148, "learning_rate": 9.986583365843034e-06, "loss": 0.2511, "step": 1838 }, { "epoch": 0.05, "grad_norm": 6.929888080502352, "learning_rate": 9.986549393429723e-06, "loss": 0.4885, "step": 1839 }, { "epoch": 0.05, "grad_norm": 8.642246087371753, "learning_rate": 9.986515378117741e-06, "loss": 0.3866, "step": 1840 }, { "epoch": 0.05, "grad_norm": 9.894177418077538, "learning_rate": 9.986481319907381e-06, "loss": 1.0066, "step": 1841 }, { "epoch": 0.05, "grad_norm": 8.253402066301495, "learning_rate": 9.986447218798937e-06, "loss": 0.4065, "step": 1842 }, { "epoch": 0.05, "grad_norm": 8.688227780821332, "learning_rate": 9.986413074792703e-06, "loss": 0.6648, "step": 1843 }, { "epoch": 0.05, "grad_norm": 7.373994403105502, "learning_rate": 9.98637888788897e-06, "loss": 0.564, "step": 1844 }, { "epoch": 0.05, "grad_norm": 3.288967031511398, "learning_rate": 9.986344658088032e-06, "loss": 0.2987, "step": 1845 }, { "epoch": 0.05, "grad_norm": 8.682046015299607, "learning_rate": 9.986310385390186e-06, "loss": 0.4618, "step": 1846 }, { "epoch": 0.05, "grad_norm": 7.6269082355033015, "learning_rate": 9.986276069795724e-06, "loss": 0.584, "step": 1847 }, { "epoch": 0.05, "grad_norm": 7.888750514606332, "learning_rate": 9.986241711304945e-06, "loss": 0.4786, "step": 1848 }, { "epoch": 0.05, "grad_norm": 5.815861048102418, "learning_rate": 9.986207309918141e-06, "loss": 0.8297, "step": 1849 }, { "epoch": 0.05, "grad_norm": 6.235302466525587, "learning_rate": 9.986172865635611e-06, "loss": 0.3943, "step": 1850 }, { "epoch": 0.05, "grad_norm": 7.573117172616211, "learning_rate": 9.986138378457648e-06, "loss": 0.5412, "step": 1851 }, { "epoch": 0.05, "grad_norm": 8.189449398550309, "learning_rate": 9.986103848384552e-06, "loss": 1.001, "step": 1852 }, { "epoch": 0.05, "grad_norm": 8.617557631815332, "learning_rate": 9.986069275416618e-06, "loss": 0.706, "step": 1853 }, { "epoch": 0.05, "grad_norm": 7.412098245524418, "learning_rate": 9.986034659554144e-06, "loss": 0.4812, "step": 1854 }, { "epoch": 0.05, "grad_norm": 9.85296451475412, "learning_rate": 9.986000000797426e-06, "loss": 0.5186, "step": 1855 }, { "epoch": 0.05, "grad_norm": 7.7338960749763865, "learning_rate": 9.985965299146766e-06, "loss": 0.5678, "step": 1856 }, { "epoch": 0.05, "grad_norm": 6.872143065119105, "learning_rate": 9.985930554602462e-06, "loss": 0.7676, "step": 1857 }, { "epoch": 0.05, "grad_norm": 7.005474538365722, "learning_rate": 9.985895767164808e-06, "loss": 0.5186, "step": 1858 }, { "epoch": 0.05, "grad_norm": 4.213247454744848, "learning_rate": 9.985860936834107e-06, "loss": 0.3079, "step": 1859 }, { "epoch": 0.05, "grad_norm": 8.455469620722317, "learning_rate": 9.98582606361066e-06, "loss": 0.6677, "step": 1860 }, { "epoch": 0.05, "grad_norm": 5.383661255823592, "learning_rate": 9.985791147494762e-06, "loss": 0.5659, "step": 1861 }, { "epoch": 0.05, "grad_norm": 3.719970951479634, "learning_rate": 9.985756188486721e-06, "loss": 0.4497, "step": 1862 }, { "epoch": 0.05, "grad_norm": 7.792573464677287, "learning_rate": 9.98572118658683e-06, "loss": 0.4873, "step": 1863 }, { "epoch": 0.05, "grad_norm": 6.358172666220452, "learning_rate": 9.985686141795396e-06, "loss": 0.562, "step": 1864 }, { "epoch": 0.05, "grad_norm": 9.934245410324761, "learning_rate": 9.985651054112716e-06, "loss": 0.7233, "step": 1865 }, { "epoch": 0.05, "grad_norm": 7.819922743315677, "learning_rate": 9.985615923539095e-06, "loss": 0.8411, "step": 1866 }, { "epoch": 0.05, "grad_norm": 12.381181964562172, "learning_rate": 9.985580750074832e-06, "loss": 0.7027, "step": 1867 }, { "epoch": 0.05, "grad_norm": 8.485569091846463, "learning_rate": 9.985545533720235e-06, "loss": 0.3458, "step": 1868 }, { "epoch": 0.05, "grad_norm": 7.565783835961098, "learning_rate": 9.9855102744756e-06, "loss": 0.6648, "step": 1869 }, { "epoch": 0.05, "grad_norm": 10.375067423406085, "learning_rate": 9.985474972341236e-06, "loss": 0.6598, "step": 1870 }, { "epoch": 0.05, "grad_norm": 2.5808202930815285, "learning_rate": 9.985439627317445e-06, "loss": 0.332, "step": 1871 }, { "epoch": 0.05, "grad_norm": 5.319121195193248, "learning_rate": 9.98540423940453e-06, "loss": 0.3021, "step": 1872 }, { "epoch": 0.05, "grad_norm": 7.870457928820812, "learning_rate": 9.985368808602797e-06, "loss": 0.4296, "step": 1873 }, { "epoch": 0.05, "grad_norm": 9.364804687825874, "learning_rate": 9.98533333491255e-06, "loss": 0.4446, "step": 1874 }, { "epoch": 0.05, "grad_norm": 6.361760097567454, "learning_rate": 9.985297818334091e-06, "loss": 0.6996, "step": 1875 }, { "epoch": 0.05, "grad_norm": 5.086585968914005, "learning_rate": 9.985262258867731e-06, "loss": 0.392, "step": 1876 }, { "epoch": 0.05, "grad_norm": 5.776315263529306, "learning_rate": 9.985226656513775e-06, "loss": 0.3437, "step": 1877 }, { "epoch": 0.05, "grad_norm": 5.91582024479287, "learning_rate": 9.985191011272525e-06, "loss": 0.3373, "step": 1878 }, { "epoch": 0.05, "grad_norm": 6.7370515890004885, "learning_rate": 9.985155323144293e-06, "loss": 0.7487, "step": 1879 }, { "epoch": 0.05, "grad_norm": 11.49049847377307, "learning_rate": 9.985119592129384e-06, "loss": 0.4684, "step": 1880 }, { "epoch": 0.05, "grad_norm": 9.763483456592391, "learning_rate": 9.985083818228103e-06, "loss": 0.4278, "step": 1881 }, { "epoch": 0.05, "grad_norm": 4.990896449054879, "learning_rate": 9.98504800144076e-06, "loss": 0.5007, "step": 1882 }, { "epoch": 0.05, "grad_norm": 5.955482296538556, "learning_rate": 9.985012141767663e-06, "loss": 0.3338, "step": 1883 }, { "epoch": 0.05, "grad_norm": 11.359088542532357, "learning_rate": 9.98497623920912e-06, "loss": 0.7706, "step": 1884 }, { "epoch": 0.05, "grad_norm": 7.982186273131855, "learning_rate": 9.98494029376544e-06, "loss": 0.208, "step": 1885 }, { "epoch": 0.05, "grad_norm": 8.215558378777285, "learning_rate": 9.984904305436934e-06, "loss": 0.5421, "step": 1886 }, { "epoch": 0.05, "grad_norm": 8.135023419347064, "learning_rate": 9.984868274223909e-06, "loss": 0.6878, "step": 1887 }, { "epoch": 0.05, "grad_norm": 8.054197664656753, "learning_rate": 9.984832200126676e-06, "loss": 0.8061, "step": 1888 }, { "epoch": 0.05, "grad_norm": 10.374191896151185, "learning_rate": 9.984796083145546e-06, "loss": 1.2281, "step": 1889 }, { "epoch": 0.05, "grad_norm": 11.265926362344013, "learning_rate": 9.984759923280826e-06, "loss": 1.1318, "step": 1890 }, { "epoch": 0.05, "grad_norm": 6.463801089564225, "learning_rate": 9.984723720532834e-06, "loss": 0.7013, "step": 1891 }, { "epoch": 0.05, "grad_norm": 4.574577441269774, "learning_rate": 9.984687474901875e-06, "loss": 0.3952, "step": 1892 }, { "epoch": 0.05, "grad_norm": 7.969306417748553, "learning_rate": 9.984651186388266e-06, "loss": 0.2468, "step": 1893 }, { "epoch": 0.05, "grad_norm": 6.992070850273047, "learning_rate": 9.984614854992313e-06, "loss": 0.6175, "step": 1894 }, { "epoch": 0.05, "grad_norm": 8.278983615745222, "learning_rate": 9.984578480714336e-06, "loss": 0.7489, "step": 1895 }, { "epoch": 0.05, "grad_norm": 9.042012174365667, "learning_rate": 9.984542063554641e-06, "loss": 0.8188, "step": 1896 }, { "epoch": 0.05, "grad_norm": 10.157592309132845, "learning_rate": 9.984505603513546e-06, "loss": 0.6065, "step": 1897 }, { "epoch": 0.05, "grad_norm": 3.513513702513517, "learning_rate": 9.984469100591362e-06, "loss": 0.2136, "step": 1898 }, { "epoch": 0.05, "grad_norm": 5.572553571714797, "learning_rate": 9.984432554788403e-06, "loss": 0.5072, "step": 1899 }, { "epoch": 0.05, "grad_norm": 7.942111020167436, "learning_rate": 9.984395966104987e-06, "loss": 0.6963, "step": 1900 }, { "epoch": 0.05, "grad_norm": 6.539556632747466, "learning_rate": 9.984359334541424e-06, "loss": 0.6156, "step": 1901 }, { "epoch": 0.05, "grad_norm": 6.18202830661302, "learning_rate": 9.984322660098033e-06, "loss": 0.6501, "step": 1902 }, { "epoch": 0.05, "grad_norm": 7.982235496814869, "learning_rate": 9.984285942775129e-06, "loss": 0.5594, "step": 1903 }, { "epoch": 0.05, "grad_norm": 8.024165372138937, "learning_rate": 9.984249182573025e-06, "loss": 0.5, "step": 1904 }, { "epoch": 0.05, "grad_norm": 9.073646162789245, "learning_rate": 9.984212379492037e-06, "loss": 0.3943, "step": 1905 }, { "epoch": 0.05, "grad_norm": 6.10615653803038, "learning_rate": 9.984175533532486e-06, "loss": 0.5295, "step": 1906 }, { "epoch": 0.05, "grad_norm": 8.262615730525482, "learning_rate": 9.984138644694686e-06, "loss": 0.3949, "step": 1907 }, { "epoch": 0.05, "grad_norm": 7.171356115914183, "learning_rate": 9.984101712978956e-06, "loss": 0.4699, "step": 1908 }, { "epoch": 0.05, "grad_norm": 6.0863509031633045, "learning_rate": 9.98406473838561e-06, "loss": 0.7195, "step": 1909 }, { "epoch": 0.05, "grad_norm": 9.262272915134998, "learning_rate": 9.984027720914971e-06, "loss": 0.3538, "step": 1910 }, { "epoch": 0.05, "grad_norm": 6.448832638398309, "learning_rate": 9.983990660567355e-06, "loss": 0.7081, "step": 1911 }, { "epoch": 0.05, "grad_norm": 7.279821460389512, "learning_rate": 9.983953557343081e-06, "loss": 0.5063, "step": 1912 }, { "epoch": 0.05, "grad_norm": 11.723560373383554, "learning_rate": 9.983916411242466e-06, "loss": 0.5207, "step": 1913 }, { "epoch": 0.05, "grad_norm": 5.1306509172359105, "learning_rate": 9.983879222265835e-06, "loss": 0.3349, "step": 1914 }, { "epoch": 0.05, "grad_norm": 8.712736948039487, "learning_rate": 9.983841990413504e-06, "loss": 0.5465, "step": 1915 }, { "epoch": 0.05, "grad_norm": 6.245440803834265, "learning_rate": 9.983804715685791e-06, "loss": 0.6617, "step": 1916 }, { "epoch": 0.05, "grad_norm": 7.002571859648148, "learning_rate": 9.983767398083022e-06, "loss": 0.4639, "step": 1917 }, { "epoch": 0.05, "grad_norm": 4.3034501694632725, "learning_rate": 9.983730037605516e-06, "loss": 0.6044, "step": 1918 }, { "epoch": 0.05, "grad_norm": 7.23198691844179, "learning_rate": 9.983692634253594e-06, "loss": 0.5883, "step": 1919 }, { "epoch": 0.05, "grad_norm": 4.803554568815702, "learning_rate": 9.983655188027576e-06, "loss": 0.295, "step": 1920 }, { "epoch": 0.06, "grad_norm": 9.63416497975708, "learning_rate": 9.983617698927788e-06, "loss": 0.8236, "step": 1921 }, { "epoch": 0.06, "grad_norm": 7.325893265611343, "learning_rate": 9.98358016695455e-06, "loss": 0.7288, "step": 1922 }, { "epoch": 0.06, "grad_norm": 5.865459324768619, "learning_rate": 9.983542592108187e-06, "loss": 0.3801, "step": 1923 }, { "epoch": 0.06, "grad_norm": 2.9377925808258354, "learning_rate": 9.983504974389018e-06, "loss": 0.3596, "step": 1924 }, { "epoch": 0.06, "grad_norm": 14.488944475838322, "learning_rate": 9.98346731379737e-06, "loss": 0.4857, "step": 1925 }, { "epoch": 0.06, "grad_norm": 7.348122905897352, "learning_rate": 9.983429610333565e-06, "loss": 0.4881, "step": 1926 }, { "epoch": 0.06, "grad_norm": 9.672427751710558, "learning_rate": 9.983391863997932e-06, "loss": 0.6146, "step": 1927 }, { "epoch": 0.06, "grad_norm": 7.370720737158658, "learning_rate": 9.983354074790791e-06, "loss": 0.5129, "step": 1928 }, { "epoch": 0.06, "grad_norm": 3.2774398480445144, "learning_rate": 9.983316242712468e-06, "loss": 0.4007, "step": 1929 }, { "epoch": 0.06, "grad_norm": 10.797429690210494, "learning_rate": 9.983278367763289e-06, "loss": 0.6972, "step": 1930 }, { "epoch": 0.06, "grad_norm": 4.976625355274176, "learning_rate": 9.983240449943579e-06, "loss": 0.5085, "step": 1931 }, { "epoch": 0.06, "grad_norm": 11.273198157589837, "learning_rate": 9.983202489253666e-06, "loss": 0.9992, "step": 1932 }, { "epoch": 0.06, "grad_norm": 14.695223072353826, "learning_rate": 9.983164485693875e-06, "loss": 0.9971, "step": 1933 }, { "epoch": 0.06, "grad_norm": 7.004005308030501, "learning_rate": 9.983126439264534e-06, "loss": 0.4156, "step": 1934 }, { "epoch": 0.06, "grad_norm": 12.035703632371044, "learning_rate": 9.98308834996597e-06, "loss": 0.4663, "step": 1935 }, { "epoch": 0.06, "grad_norm": 11.853876781885825, "learning_rate": 9.98305021779851e-06, "loss": 0.5401, "step": 1936 }, { "epoch": 0.06, "grad_norm": 8.854380442712635, "learning_rate": 9.983012042762482e-06, "loss": 0.3785, "step": 1937 }, { "epoch": 0.06, "grad_norm": 15.912678856455061, "learning_rate": 9.982973824858215e-06, "loss": 0.5862, "step": 1938 }, { "epoch": 0.06, "grad_norm": 9.80794015861788, "learning_rate": 9.982935564086038e-06, "loss": 0.7034, "step": 1939 }, { "epoch": 0.06, "grad_norm": 5.6133429006908795, "learning_rate": 9.982897260446281e-06, "loss": 0.4511, "step": 1940 }, { "epoch": 0.06, "grad_norm": 7.861449375520681, "learning_rate": 9.98285891393927e-06, "loss": 0.488, "step": 1941 }, { "epoch": 0.06, "grad_norm": 8.078338782545606, "learning_rate": 9.98282052456534e-06, "loss": 0.2817, "step": 1942 }, { "epoch": 0.06, "grad_norm": 8.344343071479964, "learning_rate": 9.982782092324817e-06, "loss": 0.5456, "step": 1943 }, { "epoch": 0.06, "grad_norm": 5.312762848578467, "learning_rate": 9.982743617218034e-06, "loss": 0.8071, "step": 1944 }, { "epoch": 0.06, "grad_norm": 5.802944327208823, "learning_rate": 9.98270509924532e-06, "loss": 0.4765, "step": 1945 }, { "epoch": 0.06, "grad_norm": 8.457452396187813, "learning_rate": 9.982666538407008e-06, "loss": 0.4234, "step": 1946 }, { "epoch": 0.06, "grad_norm": 7.440253765741651, "learning_rate": 9.982627934703428e-06, "loss": 0.6405, "step": 1947 }, { "epoch": 0.06, "grad_norm": 5.383728923769639, "learning_rate": 9.982589288134916e-06, "loss": 0.5799, "step": 1948 }, { "epoch": 0.06, "grad_norm": 5.496212695691176, "learning_rate": 9.9825505987018e-06, "loss": 0.3548, "step": 1949 }, { "epoch": 0.06, "grad_norm": 8.401493925127257, "learning_rate": 9.982511866404414e-06, "loss": 0.9345, "step": 1950 }, { "epoch": 0.06, "grad_norm": 10.858663895191945, "learning_rate": 9.982473091243095e-06, "loss": 0.5527, "step": 1951 }, { "epoch": 0.06, "grad_norm": 8.566263582961135, "learning_rate": 9.98243427321817e-06, "loss": 0.376, "step": 1952 }, { "epoch": 0.06, "grad_norm": 6.442521701372076, "learning_rate": 9.982395412329978e-06, "loss": 0.4853, "step": 1953 }, { "epoch": 0.06, "grad_norm": 6.0571438439771965, "learning_rate": 9.982356508578852e-06, "loss": 0.482, "step": 1954 }, { "epoch": 0.06, "grad_norm": 8.939973022198348, "learning_rate": 9.982317561965126e-06, "loss": 0.4422, "step": 1955 }, { "epoch": 0.06, "grad_norm": 6.5030333703628065, "learning_rate": 9.982278572489134e-06, "loss": 0.6421, "step": 1956 }, { "epoch": 0.06, "grad_norm": 4.782798466479585, "learning_rate": 9.982239540151215e-06, "loss": 0.3684, "step": 1957 }, { "epoch": 0.06, "grad_norm": 4.810936388918186, "learning_rate": 9.982200464951702e-06, "loss": 0.2995, "step": 1958 }, { "epoch": 0.06, "grad_norm": 11.47596584402475, "learning_rate": 9.982161346890931e-06, "loss": 0.7951, "step": 1959 }, { "epoch": 0.06, "grad_norm": 7.3355061624649425, "learning_rate": 9.98212218596924e-06, "loss": 0.58, "step": 1960 }, { "epoch": 0.06, "grad_norm": 6.830186964634481, "learning_rate": 9.982082982186967e-06, "loss": 0.6458, "step": 1961 }, { "epoch": 0.06, "grad_norm": 3.083000001846101, "learning_rate": 9.982043735544444e-06, "loss": 0.4144, "step": 1962 }, { "epoch": 0.06, "grad_norm": 8.060244281654487, "learning_rate": 9.982004446042015e-06, "loss": 0.551, "step": 1963 }, { "epoch": 0.06, "grad_norm": 3.7145409686285924, "learning_rate": 9.981965113680014e-06, "loss": 0.2812, "step": 1964 }, { "epoch": 0.06, "grad_norm": 7.924912451612489, "learning_rate": 9.98192573845878e-06, "loss": 0.5423, "step": 1965 }, { "epoch": 0.06, "grad_norm": 5.998722894173724, "learning_rate": 9.981886320378654e-06, "loss": 0.567, "step": 1966 }, { "epoch": 0.06, "grad_norm": 7.098641209132295, "learning_rate": 9.981846859439971e-06, "loss": 0.6107, "step": 1967 }, { "epoch": 0.06, "grad_norm": 7.309991129023104, "learning_rate": 9.981807355643075e-06, "loss": 0.6262, "step": 1968 }, { "epoch": 0.06, "grad_norm": 5.098994030566106, "learning_rate": 9.981767808988303e-06, "loss": 0.3547, "step": 1969 }, { "epoch": 0.06, "grad_norm": 6.027901386960508, "learning_rate": 9.981728219475997e-06, "loss": 0.2045, "step": 1970 }, { "epoch": 0.06, "grad_norm": 6.840684317326835, "learning_rate": 9.981688587106495e-06, "loss": 0.859, "step": 1971 }, { "epoch": 0.06, "grad_norm": 10.322015891757877, "learning_rate": 9.98164891188014e-06, "loss": 0.8069, "step": 1972 }, { "epoch": 0.06, "grad_norm": 12.356166341109605, "learning_rate": 9.981609193797272e-06, "loss": 0.5378, "step": 1973 }, { "epoch": 0.06, "grad_norm": 5.600689850618933, "learning_rate": 9.981569432858234e-06, "loss": 0.3422, "step": 1974 }, { "epoch": 0.06, "grad_norm": 5.710036763809422, "learning_rate": 9.981529629063367e-06, "loss": 0.4244, "step": 1975 }, { "epoch": 0.06, "grad_norm": 6.419602709090799, "learning_rate": 9.981489782413015e-06, "loss": 0.7768, "step": 1976 }, { "epoch": 0.06, "grad_norm": 5.704557641253637, "learning_rate": 9.98144989290752e-06, "loss": 0.3508, "step": 1977 }, { "epoch": 0.06, "grad_norm": 10.970908536748636, "learning_rate": 9.981409960547223e-06, "loss": 0.5702, "step": 1978 }, { "epoch": 0.06, "grad_norm": 3.57431028488601, "learning_rate": 9.98136998533247e-06, "loss": 0.2611, "step": 1979 }, { "epoch": 0.06, "grad_norm": 12.274235787463908, "learning_rate": 9.981329967263604e-06, "loss": 0.6312, "step": 1980 }, { "epoch": 0.06, "grad_norm": 3.801907191268038, "learning_rate": 9.981289906340971e-06, "loss": 0.335, "step": 1981 }, { "epoch": 0.06, "grad_norm": 6.9749599551346835, "learning_rate": 9.981249802564915e-06, "loss": 0.4321, "step": 1982 }, { "epoch": 0.06, "grad_norm": 5.676398877054517, "learning_rate": 9.981209655935777e-06, "loss": 0.4532, "step": 1983 }, { "epoch": 0.06, "grad_norm": 13.547537007595439, "learning_rate": 9.981169466453909e-06, "loss": 0.7009, "step": 1984 }, { "epoch": 0.06, "grad_norm": 5.656087799275682, "learning_rate": 9.98112923411965e-06, "loss": 0.5178, "step": 1985 }, { "epoch": 0.06, "grad_norm": 9.567665799122008, "learning_rate": 9.981088958933352e-06, "loss": 0.8575, "step": 1986 }, { "epoch": 0.06, "grad_norm": 5.667534757018732, "learning_rate": 9.98104864089536e-06, "loss": 0.5478, "step": 1987 }, { "epoch": 0.06, "grad_norm": 4.980178860474296, "learning_rate": 9.981008280006018e-06, "loss": 0.3085, "step": 1988 }, { "epoch": 0.06, "grad_norm": 9.711409074745076, "learning_rate": 9.980967876265675e-06, "loss": 0.7063, "step": 1989 }, { "epoch": 0.06, "grad_norm": 9.483739086872387, "learning_rate": 9.980927429674678e-06, "loss": 0.7555, "step": 1990 }, { "epoch": 0.06, "grad_norm": 5.134029575088847, "learning_rate": 9.980886940233378e-06, "loss": 0.2747, "step": 1991 }, { "epoch": 0.06, "grad_norm": 7.946608896184181, "learning_rate": 9.980846407942118e-06, "loss": 0.4039, "step": 1992 }, { "epoch": 0.06, "grad_norm": 8.255923138948225, "learning_rate": 9.980805832801252e-06, "loss": 0.4434, "step": 1993 }, { "epoch": 0.06, "grad_norm": 7.202156779656185, "learning_rate": 9.980765214811125e-06, "loss": 0.3217, "step": 1994 }, { "epoch": 0.06, "grad_norm": 7.73779239872391, "learning_rate": 9.98072455397209e-06, "loss": 0.8215, "step": 1995 }, { "epoch": 0.06, "grad_norm": 6.281195113668197, "learning_rate": 9.980683850284493e-06, "loss": 0.6801, "step": 1996 }, { "epoch": 0.06, "grad_norm": 4.4439389967933245, "learning_rate": 9.980643103748686e-06, "loss": 0.2517, "step": 1997 }, { "epoch": 0.06, "grad_norm": 7.50900878267261, "learning_rate": 9.98060231436502e-06, "loss": 0.6124, "step": 1998 }, { "epoch": 0.06, "grad_norm": 6.766906969222699, "learning_rate": 9.980561482133848e-06, "loss": 0.3266, "step": 1999 }, { "epoch": 0.06, "grad_norm": 4.140614606736293, "learning_rate": 9.980520607055515e-06, "loss": 0.4429, "step": 2000 }, { "epoch": 0.06, "grad_norm": 5.330698012904037, "learning_rate": 9.98047968913038e-06, "loss": 0.3817, "step": 2001 }, { "epoch": 0.06, "grad_norm": 9.419416774343008, "learning_rate": 9.980438728358787e-06, "loss": 0.5258, "step": 2002 }, { "epoch": 0.06, "grad_norm": 9.383585749517861, "learning_rate": 9.980397724741097e-06, "loss": 1.1559, "step": 2003 }, { "epoch": 0.06, "grad_norm": 8.568911358783847, "learning_rate": 9.980356678277657e-06, "loss": 0.8081, "step": 2004 }, { "epoch": 0.06, "grad_norm": 8.691559645767136, "learning_rate": 9.980315588968821e-06, "loss": 0.6789, "step": 2005 }, { "epoch": 0.06, "grad_norm": 8.082576270553313, "learning_rate": 9.980274456814945e-06, "loss": 0.3917, "step": 2006 }, { "epoch": 0.06, "grad_norm": 9.26330372647659, "learning_rate": 9.98023328181638e-06, "loss": 0.6643, "step": 2007 }, { "epoch": 0.06, "grad_norm": 6.926374122816879, "learning_rate": 9.980192063973481e-06, "loss": 0.3328, "step": 2008 }, { "epoch": 0.06, "grad_norm": 10.080084842294555, "learning_rate": 9.980150803286602e-06, "loss": 0.6481, "step": 2009 }, { "epoch": 0.06, "grad_norm": 4.243379035657643, "learning_rate": 9.9801094997561e-06, "loss": 0.3025, "step": 2010 }, { "epoch": 0.06, "grad_norm": 8.572598495852036, "learning_rate": 9.980068153382329e-06, "loss": 0.3814, "step": 2011 }, { "epoch": 0.06, "grad_norm": 6.152594047177897, "learning_rate": 9.980026764165644e-06, "loss": 0.3716, "step": 2012 }, { "epoch": 0.06, "grad_norm": 7.102808474777433, "learning_rate": 9.979985332106402e-06, "loss": 0.139, "step": 2013 }, { "epoch": 0.06, "grad_norm": 12.078858505025375, "learning_rate": 9.979943857204963e-06, "loss": 0.8674, "step": 2014 }, { "epoch": 0.06, "grad_norm": 7.01675147717371, "learning_rate": 9.979902339461675e-06, "loss": 0.7698, "step": 2015 }, { "epoch": 0.06, "grad_norm": 7.262016007124464, "learning_rate": 9.979860778876905e-06, "loss": 0.4068, "step": 2016 }, { "epoch": 0.06, "grad_norm": 8.68405373916972, "learning_rate": 9.979819175451002e-06, "loss": 0.6423, "step": 2017 }, { "epoch": 0.06, "grad_norm": 7.729146774005911, "learning_rate": 9.97977752918433e-06, "loss": 0.7649, "step": 2018 }, { "epoch": 0.06, "grad_norm": 6.916652250466393, "learning_rate": 9.979735840077243e-06, "loss": 0.2856, "step": 2019 }, { "epoch": 0.06, "grad_norm": 8.145907103325618, "learning_rate": 9.979694108130104e-06, "loss": 0.399, "step": 2020 }, { "epoch": 0.06, "grad_norm": 6.616896081337586, "learning_rate": 9.979652333343268e-06, "loss": 0.4644, "step": 2021 }, { "epoch": 0.06, "grad_norm": 5.858781871021231, "learning_rate": 9.979610515717097e-06, "loss": 0.3594, "step": 2022 }, { "epoch": 0.06, "grad_norm": 9.508884090572838, "learning_rate": 9.97956865525195e-06, "loss": 0.3639, "step": 2023 }, { "epoch": 0.06, "grad_norm": 3.835760574766812, "learning_rate": 9.979526751948186e-06, "loss": 0.1645, "step": 2024 }, { "epoch": 0.06, "grad_norm": 4.214275417246027, "learning_rate": 9.979484805806166e-06, "loss": 0.4789, "step": 2025 }, { "epoch": 0.06, "grad_norm": 6.42824266592143, "learning_rate": 9.979442816826253e-06, "loss": 0.2961, "step": 2026 }, { "epoch": 0.06, "grad_norm": 9.530392167307225, "learning_rate": 9.979400785008805e-06, "loss": 0.6701, "step": 2027 }, { "epoch": 0.06, "grad_norm": 9.02257620263733, "learning_rate": 9.979358710354186e-06, "loss": 0.3857, "step": 2028 }, { "epoch": 0.06, "grad_norm": 10.08890372227135, "learning_rate": 9.979316592862758e-06, "loss": 0.4536, "step": 2029 }, { "epoch": 0.06, "grad_norm": 6.402702205135651, "learning_rate": 9.979274432534881e-06, "loss": 0.4367, "step": 2030 }, { "epoch": 0.06, "grad_norm": 4.006597680583871, "learning_rate": 9.979232229370918e-06, "loss": 0.5738, "step": 2031 }, { "epoch": 0.06, "grad_norm": 8.133319996473707, "learning_rate": 9.979189983371236e-06, "loss": 0.2754, "step": 2032 }, { "epoch": 0.06, "grad_norm": 3.9701526596159393, "learning_rate": 9.979147694536194e-06, "loss": 0.3247, "step": 2033 }, { "epoch": 0.06, "grad_norm": 6.6227243221736, "learning_rate": 9.979105362866158e-06, "loss": 0.7405, "step": 2034 }, { "epoch": 0.06, "grad_norm": 5.99750204540248, "learning_rate": 9.979062988361492e-06, "loss": 0.2866, "step": 2035 }, { "epoch": 0.06, "grad_norm": 5.867076847338414, "learning_rate": 9.979020571022559e-06, "loss": 0.5341, "step": 2036 }, { "epoch": 0.06, "grad_norm": 5.616878814067758, "learning_rate": 9.978978110849725e-06, "loss": 0.375, "step": 2037 }, { "epoch": 0.06, "grad_norm": 5.328000869871189, "learning_rate": 9.978935607843356e-06, "loss": 0.3699, "step": 2038 }, { "epoch": 0.06, "grad_norm": 2.421360699590178, "learning_rate": 9.978893062003817e-06, "loss": 0.2841, "step": 2039 }, { "epoch": 0.06, "grad_norm": 5.400292769195642, "learning_rate": 9.978850473331474e-06, "loss": 0.507, "step": 2040 }, { "epoch": 0.06, "grad_norm": 7.451584886776041, "learning_rate": 9.978807841826692e-06, "loss": 0.667, "step": 2041 }, { "epoch": 0.06, "grad_norm": 4.487598337355167, "learning_rate": 9.978765167489841e-06, "loss": 0.3697, "step": 2042 }, { "epoch": 0.06, "grad_norm": 9.20145586353295, "learning_rate": 9.978722450321287e-06, "loss": 0.5529, "step": 2043 }, { "epoch": 0.06, "grad_norm": 5.465551432773681, "learning_rate": 9.978679690321395e-06, "loss": 0.6928, "step": 2044 }, { "epoch": 0.06, "grad_norm": 5.968338158264738, "learning_rate": 9.978636887490536e-06, "loss": 0.55, "step": 2045 }, { "epoch": 0.06, "grad_norm": 10.801893908839425, "learning_rate": 9.978594041829075e-06, "loss": 0.79, "step": 2046 }, { "epoch": 0.06, "grad_norm": 8.026293639020002, "learning_rate": 9.978551153337384e-06, "loss": 0.498, "step": 2047 }, { "epoch": 0.06, "grad_norm": 7.939723326717486, "learning_rate": 9.97850822201583e-06, "loss": 0.929, "step": 2048 }, { "epoch": 0.06, "grad_norm": 16.311383556293066, "learning_rate": 9.978465247864782e-06, "loss": 0.5673, "step": 2049 }, { "epoch": 0.06, "grad_norm": 6.746467372353147, "learning_rate": 9.97842223088461e-06, "loss": 0.5802, "step": 2050 }, { "epoch": 0.06, "grad_norm": 6.562875137506092, "learning_rate": 9.978379171075687e-06, "loss": 0.2869, "step": 2051 }, { "epoch": 0.06, "grad_norm": 5.8049347569416, "learning_rate": 9.97833606843838e-06, "loss": 0.5646, "step": 2052 }, { "epoch": 0.06, "grad_norm": 7.913740296503245, "learning_rate": 9.978292922973058e-06, "loss": 0.7758, "step": 2053 }, { "epoch": 0.06, "grad_norm": 4.400575595833572, "learning_rate": 9.978249734680098e-06, "loss": 0.4578, "step": 2054 }, { "epoch": 0.06, "grad_norm": 4.875367664264448, "learning_rate": 9.978206503559868e-06, "loss": 0.5019, "step": 2055 }, { "epoch": 0.06, "grad_norm": 7.943709940475433, "learning_rate": 9.978163229612738e-06, "loss": 0.4335, "step": 2056 }, { "epoch": 0.06, "grad_norm": 8.591655795080914, "learning_rate": 9.978119912839087e-06, "loss": 0.5604, "step": 2057 }, { "epoch": 0.06, "grad_norm": 7.548126053825444, "learning_rate": 9.97807655323928e-06, "loss": 0.4935, "step": 2058 }, { "epoch": 0.06, "grad_norm": 6.164298939007934, "learning_rate": 9.978033150813695e-06, "loss": 0.3954, "step": 2059 }, { "epoch": 0.06, "grad_norm": 4.191563712625904, "learning_rate": 9.977989705562702e-06, "loss": 0.2683, "step": 2060 }, { "epoch": 0.06, "grad_norm": 4.4853740204046, "learning_rate": 9.977946217486677e-06, "loss": 0.2641, "step": 2061 }, { "epoch": 0.06, "grad_norm": 9.2103227793641, "learning_rate": 9.977902686585995e-06, "loss": 0.4136, "step": 2062 }, { "epoch": 0.06, "grad_norm": 6.1301456557326635, "learning_rate": 9.977859112861027e-06, "loss": 0.7171, "step": 2063 }, { "epoch": 0.06, "grad_norm": 6.299812862099409, "learning_rate": 9.977815496312151e-06, "loss": 0.5314, "step": 2064 }, { "epoch": 0.06, "grad_norm": 13.968658788327767, "learning_rate": 9.977771836939742e-06, "loss": 0.7902, "step": 2065 }, { "epoch": 0.06, "grad_norm": 7.565505920002445, "learning_rate": 9.977728134744173e-06, "loss": 0.7113, "step": 2066 }, { "epoch": 0.06, "grad_norm": 6.970226730096038, "learning_rate": 9.977684389725823e-06, "loss": 0.5377, "step": 2067 }, { "epoch": 0.06, "grad_norm": 5.280418872329603, "learning_rate": 9.977640601885067e-06, "loss": 0.476, "step": 2068 }, { "epoch": 0.06, "grad_norm": 5.880694612230298, "learning_rate": 9.977596771222282e-06, "loss": 0.2178, "step": 2069 }, { "epoch": 0.06, "grad_norm": 4.4963893710338345, "learning_rate": 9.977552897737843e-06, "loss": 0.6728, "step": 2070 }, { "epoch": 0.06, "grad_norm": 9.420152699717045, "learning_rate": 9.977508981432132e-06, "loss": 0.3339, "step": 2071 }, { "epoch": 0.06, "grad_norm": 6.715990076308788, "learning_rate": 9.977465022305522e-06, "loss": 0.4767, "step": 2072 }, { "epoch": 0.06, "grad_norm": 6.998932280172381, "learning_rate": 9.977421020358394e-06, "loss": 0.7378, "step": 2073 }, { "epoch": 0.06, "grad_norm": 4.264669360194017, "learning_rate": 9.977376975591127e-06, "loss": 0.5676, "step": 2074 }, { "epoch": 0.06, "grad_norm": 10.729648310149642, "learning_rate": 9.977332888004096e-06, "loss": 0.5146, "step": 2075 }, { "epoch": 0.06, "grad_norm": 5.5733047302704595, "learning_rate": 9.977288757597685e-06, "loss": 0.5884, "step": 2076 }, { "epoch": 0.06, "grad_norm": 8.691312817703562, "learning_rate": 9.977244584372271e-06, "loss": 0.5648, "step": 2077 }, { "epoch": 0.06, "grad_norm": 4.029826422992768, "learning_rate": 9.977200368328235e-06, "loss": 0.2603, "step": 2078 }, { "epoch": 0.06, "grad_norm": 8.319854351199359, "learning_rate": 9.977156109465957e-06, "loss": 0.34, "step": 2079 }, { "epoch": 0.06, "grad_norm": 9.32089060215984, "learning_rate": 9.977111807785818e-06, "loss": 0.7109, "step": 2080 }, { "epoch": 0.06, "grad_norm": 4.3069248927405, "learning_rate": 9.977067463288197e-06, "loss": 0.741, "step": 2081 }, { "epoch": 0.06, "grad_norm": 5.429614213407727, "learning_rate": 9.97702307597348e-06, "loss": 0.5099, "step": 2082 }, { "epoch": 0.06, "grad_norm": 10.331944115889671, "learning_rate": 9.976978645842045e-06, "loss": 0.9479, "step": 2083 }, { "epoch": 0.06, "grad_norm": 4.638018508673515, "learning_rate": 9.976934172894275e-06, "loss": 0.7997, "step": 2084 }, { "epoch": 0.06, "grad_norm": 8.028084483625394, "learning_rate": 9.976889657130553e-06, "loss": 0.6891, "step": 2085 }, { "epoch": 0.06, "grad_norm": 6.05399268316678, "learning_rate": 9.976845098551264e-06, "loss": 0.3518, "step": 2086 }, { "epoch": 0.06, "grad_norm": 4.979249094303411, "learning_rate": 9.976800497156789e-06, "loss": 0.7407, "step": 2087 }, { "epoch": 0.06, "grad_norm": 7.598460490752637, "learning_rate": 9.97675585294751e-06, "loss": 1.1561, "step": 2088 }, { "epoch": 0.06, "grad_norm": 5.1852134353130515, "learning_rate": 9.976711165923814e-06, "loss": 0.572, "step": 2089 }, { "epoch": 0.06, "grad_norm": 6.036514159090626, "learning_rate": 9.976666436086086e-06, "loss": 0.3902, "step": 2090 }, { "epoch": 0.06, "grad_norm": 11.941546648003161, "learning_rate": 9.97662166343471e-06, "loss": 0.7688, "step": 2091 }, { "epoch": 0.06, "grad_norm": 4.222254336106717, "learning_rate": 9.976576847970067e-06, "loss": 0.5036, "step": 2092 }, { "epoch": 0.06, "grad_norm": 8.122809481608671, "learning_rate": 9.976531989692549e-06, "loss": 0.8402, "step": 2093 }, { "epoch": 0.06, "grad_norm": 7.078187618020739, "learning_rate": 9.976487088602538e-06, "loss": 0.8323, "step": 2094 }, { "epoch": 0.06, "grad_norm": 6.174726691742105, "learning_rate": 9.97644214470042e-06, "loss": 0.3449, "step": 2095 }, { "epoch": 0.06, "grad_norm": 5.024203845669174, "learning_rate": 9.976397157986586e-06, "loss": 0.6256, "step": 2096 }, { "epoch": 0.06, "grad_norm": 8.71693516373052, "learning_rate": 9.976352128461417e-06, "loss": 0.4664, "step": 2097 }, { "epoch": 0.06, "grad_norm": 6.220316972230263, "learning_rate": 9.976307056125305e-06, "loss": 0.4088, "step": 2098 }, { "epoch": 0.06, "grad_norm": 5.468877387606735, "learning_rate": 9.976261940978635e-06, "loss": 0.6919, "step": 2099 }, { "epoch": 0.06, "grad_norm": 7.610588212006958, "learning_rate": 9.976216783021798e-06, "loss": 0.3888, "step": 2100 }, { "epoch": 0.06, "grad_norm": 10.602342569863982, "learning_rate": 9.97617158225518e-06, "loss": 0.8621, "step": 2101 }, { "epoch": 0.06, "grad_norm": 8.121075782921142, "learning_rate": 9.97612633867917e-06, "loss": 1.0184, "step": 2102 }, { "epoch": 0.06, "grad_norm": 5.1670044844947345, "learning_rate": 9.976081052294158e-06, "loss": 0.4774, "step": 2103 }, { "epoch": 0.06, "grad_norm": 8.202994093077505, "learning_rate": 9.976035723100533e-06, "loss": 0.3641, "step": 2104 }, { "epoch": 0.06, "grad_norm": 7.928300587803876, "learning_rate": 9.975990351098685e-06, "loss": 0.57, "step": 2105 }, { "epoch": 0.06, "grad_norm": 8.306352105815533, "learning_rate": 9.975944936289006e-06, "loss": 0.6065, "step": 2106 }, { "epoch": 0.06, "grad_norm": 8.34289169183061, "learning_rate": 9.975899478671886e-06, "loss": 0.4937, "step": 2107 }, { "epoch": 0.06, "grad_norm": 7.190544644552931, "learning_rate": 9.975853978247714e-06, "loss": 0.5175, "step": 2108 }, { "epoch": 0.06, "grad_norm": 6.053401648049117, "learning_rate": 9.975808435016883e-06, "loss": 0.3668, "step": 2109 }, { "epoch": 0.06, "grad_norm": 7.75593720941963, "learning_rate": 9.975762848979784e-06, "loss": 0.6821, "step": 2110 }, { "epoch": 0.06, "grad_norm": 8.032062118377455, "learning_rate": 9.97571722013681e-06, "loss": 0.9952, "step": 2111 }, { "epoch": 0.06, "grad_norm": 8.870770265836724, "learning_rate": 9.975671548488354e-06, "loss": 0.6128, "step": 2112 }, { "epoch": 0.06, "grad_norm": 5.544564596062165, "learning_rate": 9.975625834034808e-06, "loss": 0.5755, "step": 2113 }, { "epoch": 0.06, "grad_norm": 7.412836775438648, "learning_rate": 9.975580076776567e-06, "loss": 0.4413, "step": 2114 }, { "epoch": 0.06, "grad_norm": 7.289277199284761, "learning_rate": 9.975534276714022e-06, "loss": 0.3094, "step": 2115 }, { "epoch": 0.06, "grad_norm": 7.532358048472321, "learning_rate": 9.975488433847569e-06, "loss": 0.6086, "step": 2116 }, { "epoch": 0.06, "grad_norm": 7.497182761995353, "learning_rate": 9.9754425481776e-06, "loss": 0.6339, "step": 2117 }, { "epoch": 0.06, "grad_norm": 10.603285647343707, "learning_rate": 9.975396619704512e-06, "loss": 0.5543, "step": 2118 }, { "epoch": 0.06, "grad_norm": 6.725644145307684, "learning_rate": 9.9753506484287e-06, "loss": 0.6823, "step": 2119 }, { "epoch": 0.06, "grad_norm": 5.249829334936467, "learning_rate": 9.975304634350559e-06, "loss": 0.2885, "step": 2120 }, { "epoch": 0.06, "grad_norm": 2.995242459190179, "learning_rate": 9.975258577470483e-06, "loss": 0.3296, "step": 2121 }, { "epoch": 0.06, "grad_norm": 4.851269011289465, "learning_rate": 9.975212477788873e-06, "loss": 0.464, "step": 2122 }, { "epoch": 0.06, "grad_norm": 4.273225908079722, "learning_rate": 9.975166335306121e-06, "loss": 0.3774, "step": 2123 }, { "epoch": 0.06, "grad_norm": 5.121225688523736, "learning_rate": 9.975120150022625e-06, "loss": 0.7232, "step": 2124 }, { "epoch": 0.06, "grad_norm": 7.816107535004158, "learning_rate": 9.975073921938785e-06, "loss": 0.5789, "step": 2125 }, { "epoch": 0.06, "grad_norm": 7.245928903848455, "learning_rate": 9.975027651054994e-06, "loss": 0.508, "step": 2126 }, { "epoch": 0.06, "grad_norm": 9.126380815936038, "learning_rate": 9.974981337371654e-06, "loss": 1.1203, "step": 2127 }, { "epoch": 0.06, "grad_norm": 6.558336772053393, "learning_rate": 9.974934980889162e-06, "loss": 0.4054, "step": 2128 }, { "epoch": 0.06, "grad_norm": 6.182126708000556, "learning_rate": 9.974888581607916e-06, "loss": 0.541, "step": 2129 }, { "epoch": 0.06, "grad_norm": 9.729717187329246, "learning_rate": 9.974842139528318e-06, "loss": 0.8999, "step": 2130 }, { "epoch": 0.06, "grad_norm": 4.488861976019334, "learning_rate": 9.974795654650765e-06, "loss": 0.3502, "step": 2131 }, { "epoch": 0.06, "grad_norm": 4.3824692318016325, "learning_rate": 9.974749126975655e-06, "loss": 0.3151, "step": 2132 }, { "epoch": 0.06, "grad_norm": 6.50178950692018, "learning_rate": 9.974702556503392e-06, "loss": 0.7308, "step": 2133 }, { "epoch": 0.06, "grad_norm": 16.569197542124403, "learning_rate": 9.974655943234377e-06, "loss": 0.967, "step": 2134 }, { "epoch": 0.06, "grad_norm": 5.5102387407370506, "learning_rate": 9.97460928716901e-06, "loss": 0.7704, "step": 2135 }, { "epoch": 0.06, "grad_norm": 6.4592628753482995, "learning_rate": 9.97456258830769e-06, "loss": 0.4295, "step": 2136 }, { "epoch": 0.06, "grad_norm": 7.442883955047152, "learning_rate": 9.97451584665082e-06, "loss": 0.8195, "step": 2137 }, { "epoch": 0.06, "grad_norm": 4.034978783278717, "learning_rate": 9.974469062198804e-06, "loss": 0.1762, "step": 2138 }, { "epoch": 0.06, "grad_norm": 9.350022268013777, "learning_rate": 9.974422234952043e-06, "loss": 0.2552, "step": 2139 }, { "epoch": 0.06, "grad_norm": 11.66146382168017, "learning_rate": 9.97437536491094e-06, "loss": 0.6458, "step": 2140 }, { "epoch": 0.06, "grad_norm": 6.053279905055421, "learning_rate": 9.974328452075896e-06, "loss": 1.0837, "step": 2141 }, { "epoch": 0.06, "grad_norm": 6.383042927835063, "learning_rate": 9.97428149644732e-06, "loss": 0.5366, "step": 2142 }, { "epoch": 0.06, "grad_norm": 3.120510614989236, "learning_rate": 9.974234498025611e-06, "loss": 0.2957, "step": 2143 }, { "epoch": 0.06, "grad_norm": 12.418056433728994, "learning_rate": 9.974187456811175e-06, "loss": 0.4046, "step": 2144 }, { "epoch": 0.06, "grad_norm": 5.47377598462119, "learning_rate": 9.974140372804417e-06, "loss": 0.5015, "step": 2145 }, { "epoch": 0.06, "grad_norm": 11.617615025547883, "learning_rate": 9.974093246005743e-06, "loss": 0.6332, "step": 2146 }, { "epoch": 0.06, "grad_norm": 8.606827429942665, "learning_rate": 9.974046076415556e-06, "loss": 0.6631, "step": 2147 }, { "epoch": 0.06, "grad_norm": 7.963814197880883, "learning_rate": 9.973998864034261e-06, "loss": 1.0085, "step": 2148 }, { "epoch": 0.06, "grad_norm": 6.664085969514652, "learning_rate": 9.97395160886227e-06, "loss": 0.7067, "step": 2149 }, { "epoch": 0.06, "grad_norm": 6.6664012458735105, "learning_rate": 9.973904310899984e-06, "loss": 0.5818, "step": 2150 }, { "epoch": 0.06, "grad_norm": 6.065498632955236, "learning_rate": 9.973856970147813e-06, "loss": 0.7726, "step": 2151 }, { "epoch": 0.06, "grad_norm": 7.671959653181444, "learning_rate": 9.97380958660616e-06, "loss": 0.6737, "step": 2152 }, { "epoch": 0.06, "grad_norm": 7.560757901383629, "learning_rate": 9.973762160275437e-06, "loss": 0.4327, "step": 2153 }, { "epoch": 0.06, "grad_norm": 6.241767076602259, "learning_rate": 9.973714691156051e-06, "loss": 0.4722, "step": 2154 }, { "epoch": 0.06, "grad_norm": 7.080277441968141, "learning_rate": 9.97366717924841e-06, "loss": 0.9509, "step": 2155 }, { "epoch": 0.06, "grad_norm": 10.639038505420697, "learning_rate": 9.97361962455292e-06, "loss": 0.599, "step": 2156 }, { "epoch": 0.06, "grad_norm": 7.628178856271596, "learning_rate": 9.973572027069998e-06, "loss": 0.6196, "step": 2157 }, { "epoch": 0.06, "grad_norm": 5.631645092245011, "learning_rate": 9.973524386800044e-06, "loss": 0.2248, "step": 2158 }, { "epoch": 0.06, "grad_norm": 2.2847109890642012, "learning_rate": 9.973476703743473e-06, "loss": 0.2549, "step": 2159 }, { "epoch": 0.06, "grad_norm": 8.352943099325095, "learning_rate": 9.973428977900695e-06, "loss": 0.6621, "step": 2160 }, { "epoch": 0.06, "grad_norm": 10.073857123816786, "learning_rate": 9.973381209272119e-06, "loss": 0.7379, "step": 2161 }, { "epoch": 0.06, "grad_norm": 7.972781668633951, "learning_rate": 9.973333397858157e-06, "loss": 0.7017, "step": 2162 }, { "epoch": 0.06, "grad_norm": 6.173536168852094, "learning_rate": 9.97328554365922e-06, "loss": 0.3956, "step": 2163 }, { "epoch": 0.06, "grad_norm": 7.316992724488141, "learning_rate": 9.973237646675719e-06, "loss": 0.579, "step": 2164 }, { "epoch": 0.06, "grad_norm": 5.63755558434804, "learning_rate": 9.973189706908068e-06, "loss": 0.6694, "step": 2165 }, { "epoch": 0.06, "grad_norm": 9.80921706175493, "learning_rate": 9.97314172435668e-06, "loss": 0.8813, "step": 2166 }, { "epoch": 0.06, "grad_norm": 8.298390567957837, "learning_rate": 9.973093699021963e-06, "loss": 0.4571, "step": 2167 }, { "epoch": 0.06, "grad_norm": 6.702517039509635, "learning_rate": 9.973045630904335e-06, "loss": 0.416, "step": 2168 }, { "epoch": 0.06, "grad_norm": 14.423818604008368, "learning_rate": 9.972997520004207e-06, "loss": 0.9968, "step": 2169 }, { "epoch": 0.06, "grad_norm": 6.697985833158458, "learning_rate": 9.972949366321993e-06, "loss": 0.5118, "step": 2170 }, { "epoch": 0.06, "grad_norm": 8.462418614141253, "learning_rate": 9.972901169858109e-06, "loss": 0.4887, "step": 2171 }, { "epoch": 0.06, "grad_norm": 7.840491480505785, "learning_rate": 9.972852930612969e-06, "loss": 0.8148, "step": 2172 }, { "epoch": 0.06, "grad_norm": 6.090924512722, "learning_rate": 9.972804648586987e-06, "loss": 0.5714, "step": 2173 }, { "epoch": 0.06, "grad_norm": 5.898932631976249, "learning_rate": 9.972756323780579e-06, "loss": 0.7905, "step": 2174 }, { "epoch": 0.06, "grad_norm": 4.482411130672036, "learning_rate": 9.972707956194158e-06, "loss": 0.3266, "step": 2175 }, { "epoch": 0.06, "grad_norm": 8.69326250806092, "learning_rate": 9.972659545828144e-06, "loss": 0.457, "step": 2176 }, { "epoch": 0.06, "grad_norm": 5.979083557969086, "learning_rate": 9.972611092682955e-06, "loss": 0.3359, "step": 2177 }, { "epoch": 0.06, "grad_norm": 7.319522517843714, "learning_rate": 9.972562596759004e-06, "loss": 0.6333, "step": 2178 }, { "epoch": 0.06, "grad_norm": 6.58903805115551, "learning_rate": 9.972514058056707e-06, "loss": 0.5039, "step": 2179 }, { "epoch": 0.06, "grad_norm": 7.404616775384228, "learning_rate": 9.972465476576485e-06, "loss": 0.4688, "step": 2180 }, { "epoch": 0.06, "grad_norm": 12.17286264822249, "learning_rate": 9.972416852318755e-06, "loss": 0.9024, "step": 2181 }, { "epoch": 0.06, "grad_norm": 4.368071846998137, "learning_rate": 9.972368185283934e-06, "loss": 0.5289, "step": 2182 }, { "epoch": 0.06, "grad_norm": 12.602334705397071, "learning_rate": 9.972319475472443e-06, "loss": 0.5895, "step": 2183 }, { "epoch": 0.06, "grad_norm": 4.831500456759346, "learning_rate": 9.972270722884701e-06, "loss": 0.4018, "step": 2184 }, { "epoch": 0.06, "grad_norm": 8.65918067871033, "learning_rate": 9.972221927521123e-06, "loss": 0.7485, "step": 2185 }, { "epoch": 0.06, "grad_norm": 5.166262897998687, "learning_rate": 9.972173089382134e-06, "loss": 0.5501, "step": 2186 }, { "epoch": 0.06, "grad_norm": 4.2433472202045825, "learning_rate": 9.972124208468152e-06, "loss": 0.4352, "step": 2187 }, { "epoch": 0.06, "grad_norm": 7.743891492879482, "learning_rate": 9.972075284779596e-06, "loss": 0.5274, "step": 2188 }, { "epoch": 0.06, "grad_norm": 5.702171601334823, "learning_rate": 9.972026318316889e-06, "loss": 0.6663, "step": 2189 }, { "epoch": 0.06, "grad_norm": 8.262016417279042, "learning_rate": 9.971977309080455e-06, "loss": 0.5214, "step": 2190 }, { "epoch": 0.06, "grad_norm": 8.00152096595108, "learning_rate": 9.971928257070708e-06, "loss": 0.5439, "step": 2191 }, { "epoch": 0.06, "grad_norm": 6.939822658724493, "learning_rate": 9.971879162288077e-06, "loss": 0.5873, "step": 2192 }, { "epoch": 0.06, "grad_norm": 9.14758427108059, "learning_rate": 9.97183002473298e-06, "loss": 0.6225, "step": 2193 }, { "epoch": 0.06, "grad_norm": 5.627908166820731, "learning_rate": 9.971780844405843e-06, "loss": 0.5086, "step": 2194 }, { "epoch": 0.06, "grad_norm": 5.53169731443504, "learning_rate": 9.971731621307086e-06, "loss": 0.7741, "step": 2195 }, { "epoch": 0.06, "grad_norm": 4.525504475076428, "learning_rate": 9.971682355437136e-06, "loss": 0.3549, "step": 2196 }, { "epoch": 0.06, "grad_norm": 5.087173428938092, "learning_rate": 9.971633046796414e-06, "loss": 0.4956, "step": 2197 }, { "epoch": 0.06, "grad_norm": 4.869345760500037, "learning_rate": 9.971583695385343e-06, "loss": 0.9211, "step": 2198 }, { "epoch": 0.06, "grad_norm": 4.479610157975211, "learning_rate": 9.971534301204353e-06, "loss": 0.8495, "step": 2199 }, { "epoch": 0.06, "grad_norm": 4.978286037486468, "learning_rate": 9.971484864253861e-06, "loss": 0.4247, "step": 2200 }, { "epoch": 0.06, "grad_norm": 6.1611068853224875, "learning_rate": 9.971435384534301e-06, "loss": 1.0275, "step": 2201 }, { "epoch": 0.06, "grad_norm": 6.311433909821671, "learning_rate": 9.971385862046093e-06, "loss": 0.4351, "step": 2202 }, { "epoch": 0.06, "grad_norm": 9.03805739502854, "learning_rate": 9.971336296789664e-06, "loss": 0.4489, "step": 2203 }, { "epoch": 0.06, "grad_norm": 5.1349429465953, "learning_rate": 9.971286688765443e-06, "loss": 0.3437, "step": 2204 }, { "epoch": 0.06, "grad_norm": 11.124324478042405, "learning_rate": 9.971237037973854e-06, "loss": 0.3431, "step": 2205 }, { "epoch": 0.06, "grad_norm": 6.019992184427961, "learning_rate": 9.971187344415322e-06, "loss": 0.4534, "step": 2206 }, { "epoch": 0.06, "grad_norm": 6.302649034904112, "learning_rate": 9.97113760809028e-06, "loss": 0.4887, "step": 2207 }, { "epoch": 0.06, "grad_norm": 9.886047509300171, "learning_rate": 9.971087828999153e-06, "loss": 0.6729, "step": 2208 }, { "epoch": 0.06, "grad_norm": 5.5824442577867615, "learning_rate": 9.971038007142369e-06, "loss": 0.4972, "step": 2209 }, { "epoch": 0.06, "grad_norm": 6.751091409680571, "learning_rate": 9.970988142520356e-06, "loss": 0.6634, "step": 2210 }, { "epoch": 0.06, "grad_norm": 9.04133343998134, "learning_rate": 9.970938235133548e-06, "loss": 0.5137, "step": 2211 }, { "epoch": 0.06, "grad_norm": 5.829763391839671, "learning_rate": 9.970888284982367e-06, "loss": 0.5736, "step": 2212 }, { "epoch": 0.06, "grad_norm": 6.996721862507403, "learning_rate": 9.970838292067247e-06, "loss": 0.4076, "step": 2213 }, { "epoch": 0.06, "grad_norm": 4.409692244874214, "learning_rate": 9.970788256388616e-06, "loss": 0.6934, "step": 2214 }, { "epoch": 0.06, "grad_norm": 4.841213817505548, "learning_rate": 9.970738177946906e-06, "loss": 0.3621, "step": 2215 }, { "epoch": 0.06, "grad_norm": 9.181326524222127, "learning_rate": 9.970688056742547e-06, "loss": 0.7761, "step": 2216 }, { "epoch": 0.06, "grad_norm": 4.240185203600992, "learning_rate": 9.970637892775974e-06, "loss": 0.5984, "step": 2217 }, { "epoch": 0.06, "grad_norm": 6.52495961962369, "learning_rate": 9.970587686047612e-06, "loss": 0.5215, "step": 2218 }, { "epoch": 0.06, "grad_norm": 6.544041204667886, "learning_rate": 9.970537436557896e-06, "loss": 0.988, "step": 2219 }, { "epoch": 0.06, "grad_norm": 4.376183703916554, "learning_rate": 9.970487144307262e-06, "loss": 0.3682, "step": 2220 }, { "epoch": 0.06, "grad_norm": 3.439351640858006, "learning_rate": 9.970436809296136e-06, "loss": 0.3623, "step": 2221 }, { "epoch": 0.06, "grad_norm": 5.690980454162805, "learning_rate": 9.970386431524954e-06, "loss": 0.6895, "step": 2222 }, { "epoch": 0.06, "grad_norm": 4.760431880912703, "learning_rate": 9.97033601099415e-06, "loss": 0.6477, "step": 2223 }, { "epoch": 0.06, "grad_norm": 6.749202787157644, "learning_rate": 9.970285547704157e-06, "loss": 0.5271, "step": 2224 }, { "epoch": 0.06, "grad_norm": 6.31448136761972, "learning_rate": 9.97023504165541e-06, "loss": 0.7712, "step": 2225 }, { "epoch": 0.06, "grad_norm": 7.976490225917519, "learning_rate": 9.970184492848342e-06, "loss": 0.8345, "step": 2226 }, { "epoch": 0.06, "grad_norm": 6.317841941718479, "learning_rate": 9.970133901283391e-06, "loss": 0.5248, "step": 2227 }, { "epoch": 0.06, "grad_norm": 7.396069124430029, "learning_rate": 9.970083266960988e-06, "loss": 0.2885, "step": 2228 }, { "epoch": 0.06, "grad_norm": 4.551813439137944, "learning_rate": 9.970032589881571e-06, "loss": 0.4635, "step": 2229 }, { "epoch": 0.06, "grad_norm": 8.137726602753956, "learning_rate": 9.969981870045575e-06, "loss": 0.5989, "step": 2230 }, { "epoch": 0.06, "grad_norm": 6.135045280507071, "learning_rate": 9.969931107453438e-06, "loss": 0.8072, "step": 2231 }, { "epoch": 0.06, "grad_norm": 6.54725969955097, "learning_rate": 9.969880302105596e-06, "loss": 0.4677, "step": 2232 }, { "epoch": 0.06, "grad_norm": 7.8777632331026375, "learning_rate": 9.969829454002484e-06, "loss": 0.5222, "step": 2233 }, { "epoch": 0.06, "grad_norm": 5.96879985049536, "learning_rate": 9.969778563144543e-06, "loss": 0.4028, "step": 2234 }, { "epoch": 0.06, "grad_norm": 28.802026529008934, "learning_rate": 9.969727629532206e-06, "loss": 0.3491, "step": 2235 }, { "epoch": 0.06, "grad_norm": 5.227494639937117, "learning_rate": 9.969676653165916e-06, "loss": 0.2862, "step": 2236 }, { "epoch": 0.06, "grad_norm": 7.738952638403541, "learning_rate": 9.969625634046111e-06, "loss": 0.2678, "step": 2237 }, { "epoch": 0.06, "grad_norm": 9.82336475460057, "learning_rate": 9.969574572173227e-06, "loss": 0.7006, "step": 2238 }, { "epoch": 0.06, "grad_norm": 5.176159838379688, "learning_rate": 9.969523467547704e-06, "loss": 0.7001, "step": 2239 }, { "epoch": 0.06, "grad_norm": 4.817322383233267, "learning_rate": 9.969472320169984e-06, "loss": 0.3883, "step": 2240 }, { "epoch": 0.06, "grad_norm": 5.478728689038699, "learning_rate": 9.969421130040503e-06, "loss": 0.9157, "step": 2241 }, { "epoch": 0.06, "grad_norm": 11.03307148103111, "learning_rate": 9.969369897159708e-06, "loss": 0.4571, "step": 2242 }, { "epoch": 0.06, "grad_norm": 7.205741853821153, "learning_rate": 9.969318621528032e-06, "loss": 0.5349, "step": 2243 }, { "epoch": 0.06, "grad_norm": 9.779036103785517, "learning_rate": 9.969267303145922e-06, "loss": 0.8156, "step": 2244 }, { "epoch": 0.06, "grad_norm": 8.904106123032212, "learning_rate": 9.969215942013817e-06, "loss": 0.9949, "step": 2245 }, { "epoch": 0.06, "grad_norm": 7.8583263812501265, "learning_rate": 9.969164538132157e-06, "loss": 0.6276, "step": 2246 }, { "epoch": 0.06, "grad_norm": 6.720476398895148, "learning_rate": 9.969113091501388e-06, "loss": 0.6486, "step": 2247 }, { "epoch": 0.06, "grad_norm": 8.264021546681944, "learning_rate": 9.96906160212195e-06, "loss": 0.8833, "step": 2248 }, { "epoch": 0.06, "grad_norm": 10.984479531975055, "learning_rate": 9.969010069994288e-06, "loss": 0.4198, "step": 2249 }, { "epoch": 0.06, "grad_norm": 5.962630885065183, "learning_rate": 9.968958495118843e-06, "loss": 0.5608, "step": 2250 }, { "epoch": 0.06, "grad_norm": 6.597490235177259, "learning_rate": 9.96890687749606e-06, "loss": 0.7283, "step": 2251 }, { "epoch": 0.06, "grad_norm": 7.671994831835266, "learning_rate": 9.968855217126385e-06, "loss": 0.5439, "step": 2252 }, { "epoch": 0.06, "grad_norm": 8.011523530349502, "learning_rate": 9.968803514010258e-06, "loss": 0.5151, "step": 2253 }, { "epoch": 0.06, "grad_norm": 4.338587293197966, "learning_rate": 9.968751768148128e-06, "loss": 0.4217, "step": 2254 }, { "epoch": 0.06, "grad_norm": 5.660922107259706, "learning_rate": 9.968699979540436e-06, "loss": 0.4843, "step": 2255 }, { "epoch": 0.06, "grad_norm": 8.46757121873565, "learning_rate": 9.968648148187633e-06, "loss": 0.41, "step": 2256 }, { "epoch": 0.06, "grad_norm": 11.154150500812484, "learning_rate": 9.968596274090159e-06, "loss": 0.3829, "step": 2257 }, { "epoch": 0.06, "grad_norm": 4.9027694699676605, "learning_rate": 9.968544357248461e-06, "loss": 0.267, "step": 2258 }, { "epoch": 0.06, "grad_norm": 4.8368902053722165, "learning_rate": 9.968492397662993e-06, "loss": 0.5006, "step": 2259 }, { "epoch": 0.06, "grad_norm": 8.749729697275832, "learning_rate": 9.968440395334194e-06, "loss": 0.5992, "step": 2260 }, { "epoch": 0.06, "grad_norm": 6.14081780905057, "learning_rate": 9.968388350262513e-06, "loss": 0.6041, "step": 2261 }, { "epoch": 0.06, "grad_norm": 8.318630969129567, "learning_rate": 9.9683362624484e-06, "loss": 0.5537, "step": 2262 }, { "epoch": 0.06, "grad_norm": 8.704618699050625, "learning_rate": 9.9682841318923e-06, "loss": 0.4728, "step": 2263 }, { "epoch": 0.06, "grad_norm": 7.6905578486344774, "learning_rate": 9.968231958594666e-06, "loss": 0.8983, "step": 2264 }, { "epoch": 0.06, "grad_norm": 8.02561806648691, "learning_rate": 9.968179742555944e-06, "loss": 0.6495, "step": 2265 }, { "epoch": 0.06, "grad_norm": 4.891993273349108, "learning_rate": 9.968127483776582e-06, "loss": 0.3778, "step": 2266 }, { "epoch": 0.06, "grad_norm": 4.675197402335939, "learning_rate": 9.968075182257032e-06, "loss": 0.4264, "step": 2267 }, { "epoch": 0.06, "grad_norm": 8.105523343189642, "learning_rate": 9.968022837997742e-06, "loss": 0.6018, "step": 2268 }, { "epoch": 0.06, "grad_norm": 6.293860232042287, "learning_rate": 9.967970450999162e-06, "loss": 0.799, "step": 2269 }, { "epoch": 0.07, "grad_norm": 10.085282024474258, "learning_rate": 9.967918021261746e-06, "loss": 0.8504, "step": 2270 }, { "epoch": 0.07, "grad_norm": 7.311805692136055, "learning_rate": 9.967865548785944e-06, "loss": 0.5056, "step": 2271 }, { "epoch": 0.07, "grad_norm": 8.200906682479316, "learning_rate": 9.967813033572205e-06, "loss": 0.5651, "step": 2272 }, { "epoch": 0.07, "grad_norm": 5.142915348828647, "learning_rate": 9.96776047562098e-06, "loss": 0.7185, "step": 2273 }, { "epoch": 0.07, "grad_norm": 3.1794708328097028, "learning_rate": 9.967707874932725e-06, "loss": 0.2972, "step": 2274 }, { "epoch": 0.07, "grad_norm": 8.003299926609712, "learning_rate": 9.96765523150789e-06, "loss": 0.978, "step": 2275 }, { "epoch": 0.07, "grad_norm": 9.189823713137267, "learning_rate": 9.96760254534693e-06, "loss": 0.5727, "step": 2276 }, { "epoch": 0.07, "grad_norm": 3.557730443357503, "learning_rate": 9.967549816450297e-06, "loss": 0.1587, "step": 2277 }, { "epoch": 0.07, "grad_norm": 8.49740819124443, "learning_rate": 9.967497044818445e-06, "loss": 0.7623, "step": 2278 }, { "epoch": 0.07, "grad_norm": 5.088384974034682, "learning_rate": 9.967444230451826e-06, "loss": 0.3867, "step": 2279 }, { "epoch": 0.07, "grad_norm": 9.226634336187061, "learning_rate": 9.967391373350896e-06, "loss": 0.6582, "step": 2280 }, { "epoch": 0.07, "grad_norm": 14.632226576838784, "learning_rate": 9.967338473516111e-06, "loss": 0.5628, "step": 2281 }, { "epoch": 0.07, "grad_norm": 9.419541001850721, "learning_rate": 9.967285530947923e-06, "loss": 1.0354, "step": 2282 }, { "epoch": 0.07, "grad_norm": 5.435131434068262, "learning_rate": 9.96723254564679e-06, "loss": 0.4999, "step": 2283 }, { "epoch": 0.07, "grad_norm": 6.93895087664509, "learning_rate": 9.967179517613168e-06, "loss": 0.5527, "step": 2284 }, { "epoch": 0.07, "grad_norm": 5.2995659398528465, "learning_rate": 9.967126446847511e-06, "loss": 0.5188, "step": 2285 }, { "epoch": 0.07, "grad_norm": 6.460349557825936, "learning_rate": 9.967073333350276e-06, "loss": 0.4926, "step": 2286 }, { "epoch": 0.07, "grad_norm": 4.7253953233474615, "learning_rate": 9.967020177121922e-06, "loss": 0.5493, "step": 2287 }, { "epoch": 0.07, "grad_norm": 10.657677697443654, "learning_rate": 9.966966978162904e-06, "loss": 0.532, "step": 2288 }, { "epoch": 0.07, "grad_norm": 5.975400963900442, "learning_rate": 9.966913736473682e-06, "loss": 0.445, "step": 2289 }, { "epoch": 0.07, "grad_norm": 8.462056685630868, "learning_rate": 9.966860452054711e-06, "loss": 0.456, "step": 2290 }, { "epoch": 0.07, "grad_norm": 5.291709969811465, "learning_rate": 9.966807124906453e-06, "loss": 0.2789, "step": 2291 }, { "epoch": 0.07, "grad_norm": 4.54268329802696, "learning_rate": 9.966753755029364e-06, "loss": 0.3897, "step": 2292 }, { "epoch": 0.07, "grad_norm": 5.798855199137127, "learning_rate": 9.966700342423903e-06, "loss": 0.7964, "step": 2293 }, { "epoch": 0.07, "grad_norm": 6.5107257088108925, "learning_rate": 9.966646887090533e-06, "loss": 0.5243, "step": 2294 }, { "epoch": 0.07, "grad_norm": 4.95788901071935, "learning_rate": 9.966593389029708e-06, "loss": 0.6091, "step": 2295 }, { "epoch": 0.07, "grad_norm": 4.988518311222908, "learning_rate": 9.966539848241892e-06, "loss": 0.3438, "step": 2296 }, { "epoch": 0.07, "grad_norm": 11.95917544351693, "learning_rate": 9.966486264727547e-06, "loss": 0.576, "step": 2297 }, { "epoch": 0.07, "grad_norm": 8.184417377258798, "learning_rate": 9.966432638487131e-06, "loss": 0.5639, "step": 2298 }, { "epoch": 0.07, "grad_norm": 4.628383120822038, "learning_rate": 9.966378969521108e-06, "loss": 0.5702, "step": 2299 }, { "epoch": 0.07, "grad_norm": 4.172482449887201, "learning_rate": 9.966325257829936e-06, "loss": 0.2935, "step": 2300 }, { "epoch": 0.07, "grad_norm": 10.898118449911022, "learning_rate": 9.966271503414082e-06, "loss": 1.0335, "step": 2301 }, { "epoch": 0.07, "grad_norm": 7.629606387683538, "learning_rate": 9.966217706274004e-06, "loss": 0.6706, "step": 2302 }, { "epoch": 0.07, "grad_norm": 7.935333256589004, "learning_rate": 9.966163866410166e-06, "loss": 0.4294, "step": 2303 }, { "epoch": 0.07, "grad_norm": 7.874358923113183, "learning_rate": 9.966109983823033e-06, "loss": 0.9555, "step": 2304 }, { "epoch": 0.07, "grad_norm": 3.393620727774678, "learning_rate": 9.966056058513065e-06, "loss": 0.3006, "step": 2305 }, { "epoch": 0.07, "grad_norm": 8.107689301912426, "learning_rate": 9.966002090480732e-06, "loss": 0.8547, "step": 2306 }, { "epoch": 0.07, "grad_norm": 13.423214864410932, "learning_rate": 9.96594807972649e-06, "loss": 0.8596, "step": 2307 }, { "epoch": 0.07, "grad_norm": 11.392444773965615, "learning_rate": 9.96589402625081e-06, "loss": 0.6173, "step": 2308 }, { "epoch": 0.07, "grad_norm": 8.118412105350695, "learning_rate": 9.965839930054157e-06, "loss": 0.2646, "step": 2309 }, { "epoch": 0.07, "grad_norm": 6.9124161134013615, "learning_rate": 9.965785791136993e-06, "loss": 0.3518, "step": 2310 }, { "epoch": 0.07, "grad_norm": 8.277432695516683, "learning_rate": 9.965731609499784e-06, "loss": 0.9123, "step": 2311 }, { "epoch": 0.07, "grad_norm": 6.768929007321502, "learning_rate": 9.965677385143e-06, "loss": 0.5723, "step": 2312 }, { "epoch": 0.07, "grad_norm": 10.818927596390388, "learning_rate": 9.965623118067102e-06, "loss": 0.766, "step": 2313 }, { "epoch": 0.07, "grad_norm": 6.319504581300355, "learning_rate": 9.965568808272561e-06, "loss": 0.2946, "step": 2314 }, { "epoch": 0.07, "grad_norm": 15.090084918309666, "learning_rate": 9.965514455759845e-06, "loss": 0.5709, "step": 2315 }, { "epoch": 0.07, "grad_norm": 5.235859082624299, "learning_rate": 9.965460060529417e-06, "loss": 0.4165, "step": 2316 }, { "epoch": 0.07, "grad_norm": 5.001228849084546, "learning_rate": 9.965405622581747e-06, "loss": 0.322, "step": 2317 }, { "epoch": 0.07, "grad_norm": 2.438472138241569, "learning_rate": 9.965351141917306e-06, "loss": 0.1503, "step": 2318 }, { "epoch": 0.07, "grad_norm": 5.840247262135889, "learning_rate": 9.965296618536557e-06, "loss": 0.3357, "step": 2319 }, { "epoch": 0.07, "grad_norm": 7.684655632488772, "learning_rate": 9.965242052439976e-06, "loss": 0.67, "step": 2320 }, { "epoch": 0.07, "grad_norm": 3.4315194036342875, "learning_rate": 9.965187443628028e-06, "loss": 0.1708, "step": 2321 }, { "epoch": 0.07, "grad_norm": 7.864610812579183, "learning_rate": 9.965132792101183e-06, "loss": 0.7436, "step": 2322 }, { "epoch": 0.07, "grad_norm": 4.21998860579123, "learning_rate": 9.965078097859912e-06, "loss": 0.3122, "step": 2323 }, { "epoch": 0.07, "grad_norm": 7.24190062249957, "learning_rate": 9.965023360904686e-06, "loss": 0.7078, "step": 2324 }, { "epoch": 0.07, "grad_norm": 6.443625267442175, "learning_rate": 9.964968581235975e-06, "loss": 0.6327, "step": 2325 }, { "epoch": 0.07, "grad_norm": 5.738849899961306, "learning_rate": 9.964913758854252e-06, "loss": 0.4393, "step": 2326 }, { "epoch": 0.07, "grad_norm": 16.624742090045995, "learning_rate": 9.964858893759986e-06, "loss": 0.6557, "step": 2327 }, { "epoch": 0.07, "grad_norm": 5.288035080260985, "learning_rate": 9.96480398595365e-06, "loss": 0.3474, "step": 2328 }, { "epoch": 0.07, "grad_norm": 5.9391905937642395, "learning_rate": 9.964749035435717e-06, "loss": 0.5495, "step": 2329 }, { "epoch": 0.07, "grad_norm": 6.268539093516396, "learning_rate": 9.964694042206659e-06, "loss": 0.3717, "step": 2330 }, { "epoch": 0.07, "grad_norm": 12.201190008690702, "learning_rate": 9.964639006266949e-06, "loss": 0.8022, "step": 2331 }, { "epoch": 0.07, "grad_norm": 7.805859421426181, "learning_rate": 9.964583927617063e-06, "loss": 0.8288, "step": 2332 }, { "epoch": 0.07, "grad_norm": 10.507486127017854, "learning_rate": 9.96452880625747e-06, "loss": 0.9502, "step": 2333 }, { "epoch": 0.07, "grad_norm": 8.884367042669435, "learning_rate": 9.964473642188648e-06, "loss": 0.7934, "step": 2334 }, { "epoch": 0.07, "grad_norm": 9.031835464455419, "learning_rate": 9.96441843541107e-06, "loss": 1.0816, "step": 2335 }, { "epoch": 0.07, "grad_norm": 9.128760150985787, "learning_rate": 9.964363185925212e-06, "loss": 0.5005, "step": 2336 }, { "epoch": 0.07, "grad_norm": 17.37850510624499, "learning_rate": 9.964307893731549e-06, "loss": 1.0377, "step": 2337 }, { "epoch": 0.07, "grad_norm": 12.584091306851084, "learning_rate": 9.964252558830555e-06, "loss": 0.811, "step": 2338 }, { "epoch": 0.07, "grad_norm": 5.259067561320397, "learning_rate": 9.96419718122271e-06, "loss": 0.1023, "step": 2339 }, { "epoch": 0.07, "grad_norm": 3.5621742969666155, "learning_rate": 9.964141760908486e-06, "loss": 0.3152, "step": 2340 }, { "epoch": 0.07, "grad_norm": 6.032573811616608, "learning_rate": 9.964086297888362e-06, "loss": 0.5689, "step": 2341 }, { "epoch": 0.07, "grad_norm": 7.360100258890693, "learning_rate": 9.964030792162816e-06, "loss": 0.5812, "step": 2342 }, { "epoch": 0.07, "grad_norm": 7.2474682432270425, "learning_rate": 9.963975243732323e-06, "loss": 0.3695, "step": 2343 }, { "epoch": 0.07, "grad_norm": 8.210873212480996, "learning_rate": 9.963919652597363e-06, "loss": 0.4566, "step": 2344 }, { "epoch": 0.07, "grad_norm": 7.883358289048721, "learning_rate": 9.963864018758411e-06, "loss": 0.9131, "step": 2345 }, { "epoch": 0.07, "grad_norm": 7.064443607761594, "learning_rate": 9.96380834221595e-06, "loss": 0.4479, "step": 2346 }, { "epoch": 0.07, "grad_norm": 6.788521379650554, "learning_rate": 9.963752622970457e-06, "loss": 0.4564, "step": 2347 }, { "epoch": 0.07, "grad_norm": 8.48276623952003, "learning_rate": 9.963696861022411e-06, "loss": 0.5847, "step": 2348 }, { "epoch": 0.07, "grad_norm": 7.303281140729298, "learning_rate": 9.963641056372293e-06, "loss": 0.483, "step": 2349 }, { "epoch": 0.07, "grad_norm": 5.60325501222505, "learning_rate": 9.96358520902058e-06, "loss": 0.4989, "step": 2350 }, { "epoch": 0.07, "grad_norm": 9.404115757926236, "learning_rate": 9.963529318967757e-06, "loss": 0.5633, "step": 2351 }, { "epoch": 0.07, "grad_norm": 5.961300420149935, "learning_rate": 9.963473386214301e-06, "loss": 0.2683, "step": 2352 }, { "epoch": 0.07, "grad_norm": 4.170337293600485, "learning_rate": 9.963417410760694e-06, "loss": 0.4734, "step": 2353 }, { "epoch": 0.07, "grad_norm": 13.15782406195683, "learning_rate": 9.96336139260742e-06, "loss": 0.5212, "step": 2354 }, { "epoch": 0.07, "grad_norm": 7.928458162674524, "learning_rate": 9.963305331754957e-06, "loss": 0.67, "step": 2355 }, { "epoch": 0.07, "grad_norm": 6.463009558534703, "learning_rate": 9.96324922820379e-06, "loss": 0.4751, "step": 2356 }, { "epoch": 0.07, "grad_norm": 6.259727537423129, "learning_rate": 9.963193081954402e-06, "loss": 0.5747, "step": 2357 }, { "epoch": 0.07, "grad_norm": 4.102989021366539, "learning_rate": 9.963136893007275e-06, "loss": 0.528, "step": 2358 }, { "epoch": 0.07, "grad_norm": 7.928349424320059, "learning_rate": 9.963080661362891e-06, "loss": 0.6119, "step": 2359 }, { "epoch": 0.07, "grad_norm": 11.252292484250622, "learning_rate": 9.963024387021735e-06, "loss": 0.6235, "step": 2360 }, { "epoch": 0.07, "grad_norm": 6.64400326236229, "learning_rate": 9.962968069984293e-06, "loss": 0.4774, "step": 2361 }, { "epoch": 0.07, "grad_norm": 12.475401363911056, "learning_rate": 9.962911710251046e-06, "loss": 0.5342, "step": 2362 }, { "epoch": 0.07, "grad_norm": 4.969238928965088, "learning_rate": 9.962855307822484e-06, "loss": 0.6119, "step": 2363 }, { "epoch": 0.07, "grad_norm": 6.58413400086768, "learning_rate": 9.962798862699086e-06, "loss": 0.7156, "step": 2364 }, { "epoch": 0.07, "grad_norm": 7.446671128773982, "learning_rate": 9.96274237488134e-06, "loss": 0.5568, "step": 2365 }, { "epoch": 0.07, "grad_norm": 4.245547290159893, "learning_rate": 9.962685844369733e-06, "loss": 0.2192, "step": 2366 }, { "epoch": 0.07, "grad_norm": 8.746921270152018, "learning_rate": 9.96262927116475e-06, "loss": 0.4288, "step": 2367 }, { "epoch": 0.07, "grad_norm": 5.287193069387434, "learning_rate": 9.96257265526688e-06, "loss": 0.3446, "step": 2368 }, { "epoch": 0.07, "grad_norm": 5.033762760454046, "learning_rate": 9.962515996676607e-06, "loss": 0.6597, "step": 2369 }, { "epoch": 0.07, "grad_norm": 11.578173103181113, "learning_rate": 9.96245929539442e-06, "loss": 0.8364, "step": 2370 }, { "epoch": 0.07, "grad_norm": 6.552063190407005, "learning_rate": 9.962402551420807e-06, "loss": 0.4471, "step": 2371 }, { "epoch": 0.07, "grad_norm": 4.971626097121122, "learning_rate": 9.962345764756257e-06, "loss": 0.3478, "step": 2372 }, { "epoch": 0.07, "grad_norm": 3.824192471750001, "learning_rate": 9.962288935401257e-06, "loss": 0.5078, "step": 2373 }, { "epoch": 0.07, "grad_norm": 7.594554779986341, "learning_rate": 9.962232063356296e-06, "loss": 0.521, "step": 2374 }, { "epoch": 0.07, "grad_norm": 6.389989135744891, "learning_rate": 9.962175148621862e-06, "loss": 0.1577, "step": 2375 }, { "epoch": 0.07, "grad_norm": 7.606875249887585, "learning_rate": 9.962118191198448e-06, "loss": 0.5417, "step": 2376 }, { "epoch": 0.07, "grad_norm": 10.436618607819677, "learning_rate": 9.96206119108654e-06, "loss": 0.9259, "step": 2377 }, { "epoch": 0.07, "grad_norm": 4.2726539018924115, "learning_rate": 9.962004148286632e-06, "loss": 0.5242, "step": 2378 }, { "epoch": 0.07, "grad_norm": 5.753783722587447, "learning_rate": 9.961947062799215e-06, "loss": 0.3807, "step": 2379 }, { "epoch": 0.07, "grad_norm": 5.022098606071309, "learning_rate": 9.961889934624775e-06, "loss": 0.4659, "step": 2380 }, { "epoch": 0.07, "grad_norm": 6.93536832209527, "learning_rate": 9.96183276376381e-06, "loss": 0.4213, "step": 2381 }, { "epoch": 0.07, "grad_norm": 8.025543085183319, "learning_rate": 9.961775550216807e-06, "loss": 0.5233, "step": 2382 }, { "epoch": 0.07, "grad_norm": 8.044446352773456, "learning_rate": 9.961718293984259e-06, "loss": 0.6617, "step": 2383 }, { "epoch": 0.07, "grad_norm": 5.372404780513717, "learning_rate": 9.961660995066661e-06, "loss": 0.7104, "step": 2384 }, { "epoch": 0.07, "grad_norm": 7.491498197444192, "learning_rate": 9.961603653464504e-06, "loss": 0.6186, "step": 2385 }, { "epoch": 0.07, "grad_norm": 9.175760359271397, "learning_rate": 9.961546269178282e-06, "loss": 0.2568, "step": 2386 }, { "epoch": 0.07, "grad_norm": 5.714479050175583, "learning_rate": 9.961488842208486e-06, "loss": 0.3943, "step": 2387 }, { "epoch": 0.07, "grad_norm": 4.921759370929096, "learning_rate": 9.961431372555615e-06, "loss": 0.3447, "step": 2388 }, { "epoch": 0.07, "grad_norm": 13.4957265094527, "learning_rate": 9.96137386022016e-06, "loss": 0.3121, "step": 2389 }, { "epoch": 0.07, "grad_norm": 17.60102978207947, "learning_rate": 9.961316305202615e-06, "loss": 0.5291, "step": 2390 }, { "epoch": 0.07, "grad_norm": 7.348325075315198, "learning_rate": 9.961258707503479e-06, "loss": 0.6271, "step": 2391 }, { "epoch": 0.07, "grad_norm": 9.605923561600596, "learning_rate": 9.961201067123244e-06, "loss": 0.5497, "step": 2392 }, { "epoch": 0.07, "grad_norm": 9.401215990023017, "learning_rate": 9.961143384062409e-06, "loss": 0.7064, "step": 2393 }, { "epoch": 0.07, "grad_norm": 7.035097688540272, "learning_rate": 9.961085658321465e-06, "loss": 0.6869, "step": 2394 }, { "epoch": 0.07, "grad_norm": 3.077178770912754, "learning_rate": 9.961027889900914e-06, "loss": 0.2336, "step": 2395 }, { "epoch": 0.07, "grad_norm": 8.492879016448846, "learning_rate": 9.96097007880125e-06, "loss": 0.6713, "step": 2396 }, { "epoch": 0.07, "grad_norm": 10.02554648757726, "learning_rate": 9.960912225022972e-06, "loss": 0.5626, "step": 2397 }, { "epoch": 0.07, "grad_norm": 5.2359799330132315, "learning_rate": 9.960854328566576e-06, "loss": 0.7659, "step": 2398 }, { "epoch": 0.07, "grad_norm": 7.091899815397718, "learning_rate": 9.960796389432561e-06, "loss": 0.3808, "step": 2399 }, { "epoch": 0.07, "grad_norm": 7.708740477935715, "learning_rate": 9.960738407621426e-06, "loss": 0.2509, "step": 2400 }, { "epoch": 0.07, "grad_norm": 8.36277425227897, "learning_rate": 9.96068038313367e-06, "loss": 0.5533, "step": 2401 }, { "epoch": 0.07, "grad_norm": 8.201058961363987, "learning_rate": 9.96062231596979e-06, "loss": 0.6418, "step": 2402 }, { "epoch": 0.07, "grad_norm": 5.663190261503518, "learning_rate": 9.960564206130288e-06, "loss": 0.3584, "step": 2403 }, { "epoch": 0.07, "grad_norm": 3.3373885918356945, "learning_rate": 9.960506053615663e-06, "loss": 0.1726, "step": 2404 }, { "epoch": 0.07, "grad_norm": 9.759642796624279, "learning_rate": 9.960447858426413e-06, "loss": 0.4758, "step": 2405 }, { "epoch": 0.07, "grad_norm": 5.932927750358924, "learning_rate": 9.960389620563044e-06, "loss": 0.3994, "step": 2406 }, { "epoch": 0.07, "grad_norm": 6.47631923830696, "learning_rate": 9.960331340026052e-06, "loss": 0.6719, "step": 2407 }, { "epoch": 0.07, "grad_norm": 6.613213544973311, "learning_rate": 9.960273016815941e-06, "loss": 0.3452, "step": 2408 }, { "epoch": 0.07, "grad_norm": 6.319279721899742, "learning_rate": 9.960214650933212e-06, "loss": 0.3151, "step": 2409 }, { "epoch": 0.07, "grad_norm": 6.759872986768406, "learning_rate": 9.960156242378367e-06, "loss": 0.3555, "step": 2410 }, { "epoch": 0.07, "grad_norm": 6.74052502412539, "learning_rate": 9.960097791151908e-06, "loss": 0.5978, "step": 2411 }, { "epoch": 0.07, "grad_norm": 9.953551518330729, "learning_rate": 9.960039297254339e-06, "loss": 0.7415, "step": 2412 }, { "epoch": 0.07, "grad_norm": 3.392880546517788, "learning_rate": 9.959980760686161e-06, "loss": 0.3506, "step": 2413 }, { "epoch": 0.07, "grad_norm": 11.10851953058998, "learning_rate": 9.95992218144788e-06, "loss": 0.8175, "step": 2414 }, { "epoch": 0.07, "grad_norm": 12.25256632178445, "learning_rate": 9.95986355954e-06, "loss": 0.4687, "step": 2415 }, { "epoch": 0.07, "grad_norm": 8.841220075981923, "learning_rate": 9.959804894963025e-06, "loss": 0.591, "step": 2416 }, { "epoch": 0.07, "grad_norm": 11.387584892819113, "learning_rate": 9.959746187717458e-06, "loss": 0.4496, "step": 2417 }, { "epoch": 0.07, "grad_norm": 7.850751814102319, "learning_rate": 9.959687437803805e-06, "loss": 0.5908, "step": 2418 }, { "epoch": 0.07, "grad_norm": 9.831261745283383, "learning_rate": 9.959628645222572e-06, "loss": 0.5965, "step": 2419 }, { "epoch": 0.07, "grad_norm": 9.872764720714802, "learning_rate": 9.959569809974265e-06, "loss": 0.781, "step": 2420 }, { "epoch": 0.07, "grad_norm": 8.010463964143238, "learning_rate": 9.95951093205939e-06, "loss": 1.1261, "step": 2421 }, { "epoch": 0.07, "grad_norm": 13.92826776976973, "learning_rate": 9.95945201147845e-06, "loss": 0.6483, "step": 2422 }, { "epoch": 0.07, "grad_norm": 17.067303355207127, "learning_rate": 9.959393048231958e-06, "loss": 0.7068, "step": 2423 }, { "epoch": 0.07, "grad_norm": 10.028346416897444, "learning_rate": 9.959334042320418e-06, "loss": 0.6421, "step": 2424 }, { "epoch": 0.07, "grad_norm": 7.420923176014003, "learning_rate": 9.959274993744336e-06, "loss": 0.6031, "step": 2425 }, { "epoch": 0.07, "grad_norm": 6.271903072459713, "learning_rate": 9.959215902504224e-06, "loss": 0.5664, "step": 2426 }, { "epoch": 0.07, "grad_norm": 5.867032268720866, "learning_rate": 9.959156768600585e-06, "loss": 0.1975, "step": 2427 }, { "epoch": 0.07, "grad_norm": 10.288848573492649, "learning_rate": 9.959097592033935e-06, "loss": 0.6273, "step": 2428 }, { "epoch": 0.07, "grad_norm": 4.959783032271005, "learning_rate": 9.959038372804777e-06, "loss": 0.3284, "step": 2429 }, { "epoch": 0.07, "grad_norm": 10.065093186122866, "learning_rate": 9.958979110913623e-06, "loss": 0.6053, "step": 2430 }, { "epoch": 0.07, "grad_norm": 5.594887900094516, "learning_rate": 9.95891980636098e-06, "loss": 0.4887, "step": 2431 }, { "epoch": 0.07, "grad_norm": 4.306490068982937, "learning_rate": 9.958860459147363e-06, "loss": 0.4435, "step": 2432 }, { "epoch": 0.07, "grad_norm": 5.44956631116058, "learning_rate": 9.958801069273281e-06, "loss": 0.3322, "step": 2433 }, { "epoch": 0.07, "grad_norm": 8.380447722517347, "learning_rate": 9.958741636739241e-06, "loss": 0.6371, "step": 2434 }, { "epoch": 0.07, "grad_norm": 9.830990614624945, "learning_rate": 9.958682161545759e-06, "loss": 0.6842, "step": 2435 }, { "epoch": 0.07, "grad_norm": 5.576126338953192, "learning_rate": 9.958622643693344e-06, "loss": 0.4586, "step": 2436 }, { "epoch": 0.07, "grad_norm": 7.994522842377457, "learning_rate": 9.958563083182508e-06, "loss": 0.5695, "step": 2437 }, { "epoch": 0.07, "grad_norm": 3.1760384993862143, "learning_rate": 9.958503480013767e-06, "loss": 0.1626, "step": 2438 }, { "epoch": 0.07, "grad_norm": 5.526146468577837, "learning_rate": 9.95844383418763e-06, "loss": 0.3523, "step": 2439 }, { "epoch": 0.07, "grad_norm": 4.726471753470937, "learning_rate": 9.958384145704611e-06, "loss": 0.5174, "step": 2440 }, { "epoch": 0.07, "grad_norm": 8.145214990690416, "learning_rate": 9.958324414565224e-06, "loss": 0.5904, "step": 2441 }, { "epoch": 0.07, "grad_norm": 6.884898810214431, "learning_rate": 9.958264640769981e-06, "loss": 0.494, "step": 2442 }, { "epoch": 0.07, "grad_norm": 7.110713690175888, "learning_rate": 9.958204824319398e-06, "loss": 0.5707, "step": 2443 }, { "epoch": 0.07, "grad_norm": 8.896637021879238, "learning_rate": 9.958144965213992e-06, "loss": 0.5365, "step": 2444 }, { "epoch": 0.07, "grad_norm": 2.6644441661228306, "learning_rate": 9.958085063454273e-06, "loss": 0.4311, "step": 2445 }, { "epoch": 0.07, "grad_norm": 9.241024818065926, "learning_rate": 9.958025119040759e-06, "loss": 0.4962, "step": 2446 }, { "epoch": 0.07, "grad_norm": 9.386739272826608, "learning_rate": 9.957965131973965e-06, "loss": 0.4201, "step": 2447 }, { "epoch": 0.07, "grad_norm": 7.349200105283567, "learning_rate": 9.957905102254409e-06, "loss": 0.6784, "step": 2448 }, { "epoch": 0.07, "grad_norm": 9.419399258841347, "learning_rate": 9.957845029882605e-06, "loss": 0.833, "step": 2449 }, { "epoch": 0.07, "grad_norm": 6.603969663295002, "learning_rate": 9.95778491485907e-06, "loss": 0.5302, "step": 2450 }, { "epoch": 0.07, "grad_norm": 6.652609460869845, "learning_rate": 9.957724757184322e-06, "loss": 0.4077, "step": 2451 }, { "epoch": 0.07, "grad_norm": 9.033666373256793, "learning_rate": 9.957664556858878e-06, "loss": 0.4127, "step": 2452 }, { "epoch": 0.07, "grad_norm": 5.167405701151967, "learning_rate": 9.957604313883258e-06, "loss": 0.2775, "step": 2453 }, { "epoch": 0.07, "grad_norm": 7.215563644736472, "learning_rate": 9.957544028257976e-06, "loss": 0.899, "step": 2454 }, { "epoch": 0.07, "grad_norm": 5.35322101816445, "learning_rate": 9.957483699983555e-06, "loss": 0.2313, "step": 2455 }, { "epoch": 0.07, "grad_norm": 5.780950198261393, "learning_rate": 9.957423329060511e-06, "loss": 0.3183, "step": 2456 }, { "epoch": 0.07, "grad_norm": 4.509952295139856, "learning_rate": 9.957362915489366e-06, "loss": 0.389, "step": 2457 }, { "epoch": 0.07, "grad_norm": 11.352398142606559, "learning_rate": 9.957302459270636e-06, "loss": 0.6421, "step": 2458 }, { "epoch": 0.07, "grad_norm": 3.6471826980869064, "learning_rate": 9.957241960404844e-06, "loss": 0.4136, "step": 2459 }, { "epoch": 0.07, "grad_norm": 6.888700151507124, "learning_rate": 9.95718141889251e-06, "loss": 0.3505, "step": 2460 }, { "epoch": 0.07, "grad_norm": 5.42942917024014, "learning_rate": 9.957120834734156e-06, "loss": 0.7001, "step": 2461 }, { "epoch": 0.07, "grad_norm": 10.29794912567878, "learning_rate": 9.9570602079303e-06, "loss": 0.636, "step": 2462 }, { "epoch": 0.07, "grad_norm": 23.6169885039582, "learning_rate": 9.956999538481467e-06, "loss": 0.3413, "step": 2463 }, { "epoch": 0.07, "grad_norm": 9.106911170641025, "learning_rate": 9.956938826388176e-06, "loss": 0.6501, "step": 2464 }, { "epoch": 0.07, "grad_norm": 7.84121602378671, "learning_rate": 9.95687807165095e-06, "loss": 0.7003, "step": 2465 }, { "epoch": 0.07, "grad_norm": 2.78086408873795, "learning_rate": 9.956817274270314e-06, "loss": 0.2732, "step": 2466 }, { "epoch": 0.07, "grad_norm": 10.203485970296864, "learning_rate": 9.956756434246788e-06, "loss": 0.7925, "step": 2467 }, { "epoch": 0.07, "grad_norm": 5.277639023590955, "learning_rate": 9.956695551580896e-06, "loss": 0.4456, "step": 2468 }, { "epoch": 0.07, "grad_norm": 3.029686750831924, "learning_rate": 9.956634626273164e-06, "loss": 0.2734, "step": 2469 }, { "epoch": 0.07, "grad_norm": 8.06588768573773, "learning_rate": 9.956573658324115e-06, "loss": 0.7103, "step": 2470 }, { "epoch": 0.07, "grad_norm": 4.58155766972323, "learning_rate": 9.956512647734272e-06, "loss": 0.4935, "step": 2471 }, { "epoch": 0.07, "grad_norm": 8.982180650843356, "learning_rate": 9.956451594504161e-06, "loss": 0.7383, "step": 2472 }, { "epoch": 0.07, "grad_norm": 3.580570368164401, "learning_rate": 9.956390498634308e-06, "loss": 0.3772, "step": 2473 }, { "epoch": 0.07, "grad_norm": 6.387142622294649, "learning_rate": 9.956329360125237e-06, "loss": 0.6893, "step": 2474 }, { "epoch": 0.07, "grad_norm": 4.168544254058462, "learning_rate": 9.956268178977475e-06, "loss": 0.7068, "step": 2475 }, { "epoch": 0.07, "grad_norm": 5.120745870382845, "learning_rate": 9.956206955191548e-06, "loss": 0.4556, "step": 2476 }, { "epoch": 0.07, "grad_norm": 4.591117286124565, "learning_rate": 9.956145688767983e-06, "loss": 0.6609, "step": 2477 }, { "epoch": 0.07, "grad_norm": 10.645899603849555, "learning_rate": 9.956084379707307e-06, "loss": 0.7132, "step": 2478 }, { "epoch": 0.07, "grad_norm": 6.213485367917742, "learning_rate": 9.956023028010047e-06, "loss": 0.5361, "step": 2479 }, { "epoch": 0.07, "grad_norm": 7.705611440866474, "learning_rate": 9.955961633676731e-06, "loss": 0.6795, "step": 2480 }, { "epoch": 0.07, "grad_norm": 9.9037354945296, "learning_rate": 9.955900196707887e-06, "loss": 0.9884, "step": 2481 }, { "epoch": 0.07, "grad_norm": 7.761959200831706, "learning_rate": 9.955838717104044e-06, "loss": 0.4776, "step": 2482 }, { "epoch": 0.07, "grad_norm": 5.63185422579179, "learning_rate": 9.95577719486573e-06, "loss": 0.5942, "step": 2483 }, { "epoch": 0.07, "grad_norm": 3.4871438148757528, "learning_rate": 9.955715629993477e-06, "loss": 0.2605, "step": 2484 }, { "epoch": 0.07, "grad_norm": 12.528176822683628, "learning_rate": 9.955654022487811e-06, "loss": 0.2788, "step": 2485 }, { "epoch": 0.07, "grad_norm": 8.708872789043212, "learning_rate": 9.955592372349263e-06, "loss": 0.5785, "step": 2486 }, { "epoch": 0.07, "grad_norm": 12.45329188829737, "learning_rate": 9.955530679578366e-06, "loss": 0.5644, "step": 2487 }, { "epoch": 0.07, "grad_norm": 6.6397015097755725, "learning_rate": 9.955468944175646e-06, "loss": 0.345, "step": 2488 }, { "epoch": 0.07, "grad_norm": 4.617151792746406, "learning_rate": 9.95540716614164e-06, "loss": 0.4857, "step": 2489 }, { "epoch": 0.07, "grad_norm": 7.86413360387717, "learning_rate": 9.955345345476874e-06, "loss": 0.3302, "step": 2490 }, { "epoch": 0.07, "grad_norm": 8.044007436668263, "learning_rate": 9.955283482181884e-06, "loss": 0.4809, "step": 2491 }, { "epoch": 0.07, "grad_norm": 12.646363416409956, "learning_rate": 9.955221576257198e-06, "loss": 0.9576, "step": 2492 }, { "epoch": 0.07, "grad_norm": 5.473940669308614, "learning_rate": 9.955159627703353e-06, "loss": 0.5114, "step": 2493 }, { "epoch": 0.07, "grad_norm": 9.428870392472883, "learning_rate": 9.955097636520878e-06, "loss": 0.9067, "step": 2494 }, { "epoch": 0.07, "grad_norm": 3.7600456153354958, "learning_rate": 9.95503560271031e-06, "loss": 0.3591, "step": 2495 }, { "epoch": 0.07, "grad_norm": 13.384621840255356, "learning_rate": 9.954973526272178e-06, "loss": 0.9494, "step": 2496 }, { "epoch": 0.07, "grad_norm": 7.158801461020335, "learning_rate": 9.954911407207022e-06, "loss": 0.4596, "step": 2497 }, { "epoch": 0.07, "grad_norm": 6.746466347499853, "learning_rate": 9.954849245515372e-06, "loss": 0.6353, "step": 2498 }, { "epoch": 0.07, "grad_norm": 9.470892131962467, "learning_rate": 9.954787041197763e-06, "loss": 0.9363, "step": 2499 }, { "epoch": 0.07, "grad_norm": 5.756577999946688, "learning_rate": 9.954724794254731e-06, "loss": 0.4969, "step": 2500 }, { "epoch": 0.07, "grad_norm": 6.479747472245902, "learning_rate": 9.954662504686813e-06, "loss": 0.3779, "step": 2501 }, { "epoch": 0.07, "grad_norm": 5.880871334716232, "learning_rate": 9.954600172494544e-06, "loss": 0.8347, "step": 2502 }, { "epoch": 0.07, "grad_norm": 9.847586910332042, "learning_rate": 9.954537797678459e-06, "loss": 0.5928, "step": 2503 }, { "epoch": 0.07, "grad_norm": 3.482300007945455, "learning_rate": 9.954475380239095e-06, "loss": 0.396, "step": 2504 }, { "epoch": 0.07, "grad_norm": 3.374421317327929, "learning_rate": 9.954412920176989e-06, "loss": 0.1983, "step": 2505 }, { "epoch": 0.07, "grad_norm": 3.9757534170797517, "learning_rate": 9.95435041749268e-06, "loss": 0.1559, "step": 2506 }, { "epoch": 0.07, "grad_norm": 5.303914751222197, "learning_rate": 9.954287872186704e-06, "loss": 0.6064, "step": 2507 }, { "epoch": 0.07, "grad_norm": 6.61980969066152, "learning_rate": 9.954225284259597e-06, "loss": 0.4442, "step": 2508 }, { "epoch": 0.07, "grad_norm": 8.201176468261247, "learning_rate": 9.954162653711904e-06, "loss": 0.8249, "step": 2509 }, { "epoch": 0.07, "grad_norm": 8.388608096927225, "learning_rate": 9.954099980544156e-06, "loss": 0.8112, "step": 2510 }, { "epoch": 0.07, "grad_norm": 6.848589227010887, "learning_rate": 9.954037264756898e-06, "loss": 0.4062, "step": 2511 }, { "epoch": 0.07, "grad_norm": 4.690383177031658, "learning_rate": 9.953974506350665e-06, "loss": 0.3659, "step": 2512 }, { "epoch": 0.07, "grad_norm": 6.498032125279135, "learning_rate": 9.953911705326002e-06, "loss": 0.3081, "step": 2513 }, { "epoch": 0.07, "grad_norm": 4.469852498242933, "learning_rate": 9.953848861683445e-06, "loss": 0.2905, "step": 2514 }, { "epoch": 0.07, "grad_norm": 5.870807409759459, "learning_rate": 9.953785975423537e-06, "loss": 0.4109, "step": 2515 }, { "epoch": 0.07, "grad_norm": 8.045694446072872, "learning_rate": 9.953723046546817e-06, "loss": 0.6714, "step": 2516 }, { "epoch": 0.07, "grad_norm": 9.949213382163844, "learning_rate": 9.953660075053827e-06, "loss": 0.6487, "step": 2517 }, { "epoch": 0.07, "grad_norm": 6.067233056922282, "learning_rate": 9.953597060945112e-06, "loss": 0.3323, "step": 2518 }, { "epoch": 0.07, "grad_norm": 8.62524546398332, "learning_rate": 9.953534004221211e-06, "loss": 0.6198, "step": 2519 }, { "epoch": 0.07, "grad_norm": 6.148556040453634, "learning_rate": 9.953470904882665e-06, "loss": 0.5229, "step": 2520 }, { "epoch": 0.07, "grad_norm": 4.5354113445446425, "learning_rate": 9.95340776293002e-06, "loss": 0.3772, "step": 2521 }, { "epoch": 0.07, "grad_norm": 9.097375520639202, "learning_rate": 9.953344578363817e-06, "loss": 0.7751, "step": 2522 }, { "epoch": 0.07, "grad_norm": 6.9957097053017625, "learning_rate": 9.9532813511846e-06, "loss": 0.4101, "step": 2523 }, { "epoch": 0.07, "grad_norm": 4.4585095427204475, "learning_rate": 9.953218081392915e-06, "loss": 0.5473, "step": 2524 }, { "epoch": 0.07, "grad_norm": 10.49505862266448, "learning_rate": 9.953154768989304e-06, "loss": 0.6812, "step": 2525 }, { "epoch": 0.07, "grad_norm": 3.965324485460666, "learning_rate": 9.953091413974312e-06, "loss": 0.2968, "step": 2526 }, { "epoch": 0.07, "grad_norm": 2.5506302933473255, "learning_rate": 9.953028016348486e-06, "loss": 0.1755, "step": 2527 }, { "epoch": 0.07, "grad_norm": 6.031359063778203, "learning_rate": 9.952964576112369e-06, "loss": 0.55, "step": 2528 }, { "epoch": 0.07, "grad_norm": 10.73282475500647, "learning_rate": 9.952901093266507e-06, "loss": 0.9708, "step": 2529 }, { "epoch": 0.07, "grad_norm": 6.499087306281301, "learning_rate": 9.952837567811447e-06, "loss": 0.4443, "step": 2530 }, { "epoch": 0.07, "grad_norm": 10.156214458696828, "learning_rate": 9.952773999747734e-06, "loss": 0.5667, "step": 2531 }, { "epoch": 0.07, "grad_norm": 21.65716088431191, "learning_rate": 9.952710389075919e-06, "loss": 0.6045, "step": 2532 }, { "epoch": 0.07, "grad_norm": 7.214252144062328, "learning_rate": 9.952646735796543e-06, "loss": 0.9501, "step": 2533 }, { "epoch": 0.07, "grad_norm": 8.97729855699412, "learning_rate": 9.952583039910158e-06, "loss": 0.7913, "step": 2534 }, { "epoch": 0.07, "grad_norm": 11.119515342167668, "learning_rate": 9.952519301417312e-06, "loss": 0.725, "step": 2535 }, { "epoch": 0.07, "grad_norm": 10.745370000874258, "learning_rate": 9.952455520318552e-06, "loss": 0.3988, "step": 2536 }, { "epoch": 0.07, "grad_norm": 5.180079495655842, "learning_rate": 9.952391696614426e-06, "loss": 0.7109, "step": 2537 }, { "epoch": 0.07, "grad_norm": 5.48915317980635, "learning_rate": 9.952327830305483e-06, "loss": 0.4867, "step": 2538 }, { "epoch": 0.07, "grad_norm": 4.768457202746924, "learning_rate": 9.952263921392276e-06, "loss": 0.2462, "step": 2539 }, { "epoch": 0.07, "grad_norm": 9.958214385361062, "learning_rate": 9.95219996987535e-06, "loss": 0.7125, "step": 2540 }, { "epoch": 0.07, "grad_norm": 5.274130090514446, "learning_rate": 9.95213597575526e-06, "loss": 0.4662, "step": 2541 }, { "epoch": 0.07, "grad_norm": 5.011644275138689, "learning_rate": 9.952071939032553e-06, "loss": 0.324, "step": 2542 }, { "epoch": 0.07, "grad_norm": 6.8167811249665835, "learning_rate": 9.95200785970778e-06, "loss": 1.0912, "step": 2543 }, { "epoch": 0.07, "grad_norm": 7.5710717165977375, "learning_rate": 9.951943737781494e-06, "loss": 0.7303, "step": 2544 }, { "epoch": 0.07, "grad_norm": 4.712764524882547, "learning_rate": 9.951879573254244e-06, "loss": 0.5215, "step": 2545 }, { "epoch": 0.07, "grad_norm": 8.678178932536108, "learning_rate": 9.951815366126584e-06, "loss": 0.8002, "step": 2546 }, { "epoch": 0.07, "grad_norm": 5.442584280641503, "learning_rate": 9.951751116399067e-06, "loss": 0.5591, "step": 2547 }, { "epoch": 0.07, "grad_norm": 9.07852544902603, "learning_rate": 9.951686824072246e-06, "loss": 0.6726, "step": 2548 }, { "epoch": 0.07, "grad_norm": 3.155783666020599, "learning_rate": 9.95162248914667e-06, "loss": 0.3131, "step": 2549 }, { "epoch": 0.07, "grad_norm": 7.529972142121152, "learning_rate": 9.951558111622898e-06, "loss": 0.3642, "step": 2550 }, { "epoch": 0.07, "grad_norm": 9.06855040527106, "learning_rate": 9.95149369150148e-06, "loss": 0.7364, "step": 2551 }, { "epoch": 0.07, "grad_norm": 7.136918923797126, "learning_rate": 9.951429228782971e-06, "loss": 0.5085, "step": 2552 }, { "epoch": 0.07, "grad_norm": 11.360657631348259, "learning_rate": 9.951364723467927e-06, "loss": 1.1095, "step": 2553 }, { "epoch": 0.07, "grad_norm": 4.284522622623788, "learning_rate": 9.951300175556902e-06, "loss": 0.5064, "step": 2554 }, { "epoch": 0.07, "grad_norm": 6.819389918830809, "learning_rate": 9.95123558505045e-06, "loss": 0.6421, "step": 2555 }, { "epoch": 0.07, "grad_norm": 8.771635935494773, "learning_rate": 9.95117095194913e-06, "loss": 0.4055, "step": 2556 }, { "epoch": 0.07, "grad_norm": 6.498267676520823, "learning_rate": 9.951106276253494e-06, "loss": 0.5129, "step": 2557 }, { "epoch": 0.07, "grad_norm": 9.193200658883292, "learning_rate": 9.951041557964101e-06, "loss": 0.707, "step": 2558 }, { "epoch": 0.07, "grad_norm": 6.412258995836607, "learning_rate": 9.950976797081506e-06, "loss": 0.8133, "step": 2559 }, { "epoch": 0.07, "grad_norm": 4.730573481338316, "learning_rate": 9.950911993606268e-06, "loss": 0.4547, "step": 2560 }, { "epoch": 0.07, "grad_norm": 11.424427670891085, "learning_rate": 9.950847147538945e-06, "loss": 0.7569, "step": 2561 }, { "epoch": 0.07, "grad_norm": 6.217608284737804, "learning_rate": 9.950782258880091e-06, "loss": 0.5348, "step": 2562 }, { "epoch": 0.07, "grad_norm": 7.648139238630306, "learning_rate": 9.950717327630268e-06, "loss": 0.2938, "step": 2563 }, { "epoch": 0.07, "grad_norm": 2.3420957194866463, "learning_rate": 9.950652353790034e-06, "loss": 0.1746, "step": 2564 }, { "epoch": 0.07, "grad_norm": 8.976645792150562, "learning_rate": 9.950587337359947e-06, "loss": 0.7233, "step": 2565 }, { "epoch": 0.07, "grad_norm": 6.674723247262861, "learning_rate": 9.950522278340567e-06, "loss": 0.42, "step": 2566 }, { "epoch": 0.07, "grad_norm": 7.955308535746792, "learning_rate": 9.950457176732452e-06, "loss": 0.5711, "step": 2567 }, { "epoch": 0.07, "grad_norm": 7.870288771744463, "learning_rate": 9.950392032536165e-06, "loss": 0.6678, "step": 2568 }, { "epoch": 0.07, "grad_norm": 9.709413760291733, "learning_rate": 9.950326845752264e-06, "loss": 0.7659, "step": 2569 }, { "epoch": 0.07, "grad_norm": 8.652475904570599, "learning_rate": 9.95026161638131e-06, "loss": 0.5751, "step": 2570 }, { "epoch": 0.07, "grad_norm": 2.8444975823995446, "learning_rate": 9.950196344423867e-06, "loss": 0.1841, "step": 2571 }, { "epoch": 0.07, "grad_norm": 9.352463094867414, "learning_rate": 9.950131029880493e-06, "loss": 0.7645, "step": 2572 }, { "epoch": 0.07, "grad_norm": 8.213783881454704, "learning_rate": 9.95006567275175e-06, "loss": 0.9131, "step": 2573 }, { "epoch": 0.07, "grad_norm": 7.974806574565517, "learning_rate": 9.950000273038202e-06, "loss": 0.6869, "step": 2574 }, { "epoch": 0.07, "grad_norm": 7.075297281334735, "learning_rate": 9.949934830740413e-06, "loss": 0.5048, "step": 2575 }, { "epoch": 0.07, "grad_norm": 7.67350579233138, "learning_rate": 9.949869345858943e-06, "loss": 0.1451, "step": 2576 }, { "epoch": 0.07, "grad_norm": 6.640414281755582, "learning_rate": 9.949803818394356e-06, "loss": 0.7679, "step": 2577 }, { "epoch": 0.07, "grad_norm": 4.668653723606237, "learning_rate": 9.949738248347215e-06, "loss": 0.1882, "step": 2578 }, { "epoch": 0.07, "grad_norm": 6.844992259253172, "learning_rate": 9.949672635718089e-06, "loss": 0.6066, "step": 2579 }, { "epoch": 0.07, "grad_norm": 6.879589283017041, "learning_rate": 9.949606980507535e-06, "loss": 0.4268, "step": 2580 }, { "epoch": 0.07, "grad_norm": 7.215890524477819, "learning_rate": 9.949541282716124e-06, "loss": 0.4469, "step": 2581 }, { "epoch": 0.07, "grad_norm": 6.4470713302147855, "learning_rate": 9.949475542344419e-06, "loss": 0.6428, "step": 2582 }, { "epoch": 0.07, "grad_norm": 4.7667925295629825, "learning_rate": 9.949409759392982e-06, "loss": 0.9452, "step": 2583 }, { "epoch": 0.07, "grad_norm": 5.49517272302886, "learning_rate": 9.949343933862384e-06, "loss": 0.4225, "step": 2584 }, { "epoch": 0.07, "grad_norm": 6.110302164500861, "learning_rate": 9.94927806575319e-06, "loss": 0.9192, "step": 2585 }, { "epoch": 0.07, "grad_norm": 8.37352119029555, "learning_rate": 9.949212155065967e-06, "loss": 0.6727, "step": 2586 }, { "epoch": 0.07, "grad_norm": 8.265632730346988, "learning_rate": 9.94914620180128e-06, "loss": 0.4816, "step": 2587 }, { "epoch": 0.07, "grad_norm": 7.544865879815835, "learning_rate": 9.949080205959698e-06, "loss": 0.4265, "step": 2588 }, { "epoch": 0.07, "grad_norm": 9.168936471531579, "learning_rate": 9.949014167541788e-06, "loss": 1.0161, "step": 2589 }, { "epoch": 0.07, "grad_norm": 4.052367147330698, "learning_rate": 9.948948086548117e-06, "loss": 0.3059, "step": 2590 }, { "epoch": 0.07, "grad_norm": 6.487567749845692, "learning_rate": 9.948881962979257e-06, "loss": 0.6993, "step": 2591 }, { "epoch": 0.07, "grad_norm": 4.890981252936587, "learning_rate": 9.948815796835776e-06, "loss": 0.5964, "step": 2592 }, { "epoch": 0.07, "grad_norm": 4.202064351972163, "learning_rate": 9.948749588118239e-06, "loss": 0.3403, "step": 2593 }, { "epoch": 0.07, "grad_norm": 11.548558423463781, "learning_rate": 9.94868333682722e-06, "loss": 0.795, "step": 2594 }, { "epoch": 0.07, "grad_norm": 11.712426190871442, "learning_rate": 9.948617042963288e-06, "loss": 0.8067, "step": 2595 }, { "epoch": 0.07, "grad_norm": 8.3851455044391, "learning_rate": 9.948550706527012e-06, "loss": 0.6541, "step": 2596 }, { "epoch": 0.07, "grad_norm": 6.238360438504124, "learning_rate": 9.948484327518963e-06, "loss": 0.4799, "step": 2597 }, { "epoch": 0.07, "grad_norm": 8.072303016884272, "learning_rate": 9.948417905939714e-06, "loss": 0.3584, "step": 2598 }, { "epoch": 0.07, "grad_norm": 4.7321694653526425, "learning_rate": 9.948351441789836e-06, "loss": 0.6481, "step": 2599 }, { "epoch": 0.07, "grad_norm": 6.5546452335636305, "learning_rate": 9.948284935069897e-06, "loss": 0.4228, "step": 2600 }, { "epoch": 0.07, "grad_norm": 8.76547404276557, "learning_rate": 9.948218385780473e-06, "loss": 0.4964, "step": 2601 }, { "epoch": 0.07, "grad_norm": 7.026706295565209, "learning_rate": 9.948151793922137e-06, "loss": 0.6344, "step": 2602 }, { "epoch": 0.07, "grad_norm": 5.393124340984372, "learning_rate": 9.948085159495458e-06, "loss": 0.2542, "step": 2603 }, { "epoch": 0.07, "grad_norm": 3.5851138629681163, "learning_rate": 9.948018482501013e-06, "loss": 0.2591, "step": 2604 }, { "epoch": 0.07, "grad_norm": 5.749471038868826, "learning_rate": 9.947951762939374e-06, "loss": 0.3507, "step": 2605 }, { "epoch": 0.07, "grad_norm": 7.692594933282017, "learning_rate": 9.947885000811114e-06, "loss": 0.5178, "step": 2606 }, { "epoch": 0.07, "grad_norm": 7.782350393414192, "learning_rate": 9.947818196116812e-06, "loss": 0.9329, "step": 2607 }, { "epoch": 0.07, "grad_norm": 8.68173965380458, "learning_rate": 9.947751348857036e-06, "loss": 0.6067, "step": 2608 }, { "epoch": 0.07, "grad_norm": 3.686563114180113, "learning_rate": 9.947684459032364e-06, "loss": 0.4111, "step": 2609 }, { "epoch": 0.07, "grad_norm": 7.41923803766531, "learning_rate": 9.947617526643375e-06, "loss": 0.4445, "step": 2610 }, { "epoch": 0.07, "grad_norm": 7.339479024208854, "learning_rate": 9.947550551690639e-06, "loss": 0.6546, "step": 2611 }, { "epoch": 0.07, "grad_norm": 2.437517722383286, "learning_rate": 9.947483534174736e-06, "loss": 0.1459, "step": 2612 }, { "epoch": 0.07, "grad_norm": 9.533283304425552, "learning_rate": 9.947416474096239e-06, "loss": 0.8991, "step": 2613 }, { "epoch": 0.07, "grad_norm": 6.495983716958408, "learning_rate": 9.947349371455731e-06, "loss": 0.6929, "step": 2614 }, { "epoch": 0.07, "grad_norm": 5.439656827389483, "learning_rate": 9.947282226253782e-06, "loss": 0.2752, "step": 2615 }, { "epoch": 0.07, "grad_norm": 6.6223910970867195, "learning_rate": 9.947215038490974e-06, "loss": 0.6738, "step": 2616 }, { "epoch": 0.07, "grad_norm": 8.301052643771115, "learning_rate": 9.947147808167886e-06, "loss": 0.7603, "step": 2617 }, { "epoch": 0.07, "grad_norm": 5.754724137549253, "learning_rate": 9.947080535285092e-06, "loss": 0.6123, "step": 2618 }, { "epoch": 0.08, "grad_norm": 6.397672241802122, "learning_rate": 9.947013219843175e-06, "loss": 0.8004, "step": 2619 }, { "epoch": 0.08, "grad_norm": 4.7657836887641905, "learning_rate": 9.946945861842711e-06, "loss": 0.5638, "step": 2620 }, { "epoch": 0.08, "grad_norm": 5.798847037850178, "learning_rate": 9.946878461284281e-06, "loss": 0.3464, "step": 2621 }, { "epoch": 0.08, "grad_norm": 5.752837020239067, "learning_rate": 9.946811018168467e-06, "loss": 0.298, "step": 2622 }, { "epoch": 0.08, "grad_norm": 4.412355670243501, "learning_rate": 9.946743532495845e-06, "loss": 0.5238, "step": 2623 }, { "epoch": 0.08, "grad_norm": 7.9550082332201635, "learning_rate": 9.946676004266998e-06, "loss": 0.4875, "step": 2624 }, { "epoch": 0.08, "grad_norm": 6.690022358308161, "learning_rate": 9.946608433482505e-06, "loss": 0.8795, "step": 2625 }, { "epoch": 0.08, "grad_norm": 5.002932356223668, "learning_rate": 9.94654082014295e-06, "loss": 0.4243, "step": 2626 }, { "epoch": 0.08, "grad_norm": 11.66004779707729, "learning_rate": 9.946473164248913e-06, "loss": 0.889, "step": 2627 }, { "epoch": 0.08, "grad_norm": 5.656677314255239, "learning_rate": 9.946405465800977e-06, "loss": 0.3646, "step": 2628 }, { "epoch": 0.08, "grad_norm": 6.520279884211098, "learning_rate": 9.946337724799723e-06, "loss": 0.2757, "step": 2629 }, { "epoch": 0.08, "grad_norm": 8.588244252388499, "learning_rate": 9.946269941245734e-06, "loss": 1.0126, "step": 2630 }, { "epoch": 0.08, "grad_norm": 6.461771199729892, "learning_rate": 9.946202115139594e-06, "loss": 0.3395, "step": 2631 }, { "epoch": 0.08, "grad_norm": 7.410569874707337, "learning_rate": 9.946134246481888e-06, "loss": 0.1937, "step": 2632 }, { "epoch": 0.08, "grad_norm": 6.406714520244264, "learning_rate": 9.946066335273196e-06, "loss": 0.6345, "step": 2633 }, { "epoch": 0.08, "grad_norm": 7.558227340925209, "learning_rate": 9.945998381514106e-06, "loss": 0.6225, "step": 2634 }, { "epoch": 0.08, "grad_norm": 11.112830074115438, "learning_rate": 9.945930385205198e-06, "loss": 0.5987, "step": 2635 }, { "epoch": 0.08, "grad_norm": 9.877402677256892, "learning_rate": 9.945862346347064e-06, "loss": 0.3668, "step": 2636 }, { "epoch": 0.08, "grad_norm": 4.4825072436213, "learning_rate": 9.945794264940282e-06, "loss": 0.2947, "step": 2637 }, { "epoch": 0.08, "grad_norm": 5.9685478775393515, "learning_rate": 9.94572614098544e-06, "loss": 0.5688, "step": 2638 }, { "epoch": 0.08, "grad_norm": 4.588834546109668, "learning_rate": 9.94565797448313e-06, "loss": 0.087, "step": 2639 }, { "epoch": 0.08, "grad_norm": 16.414868922806836, "learning_rate": 9.94558976543393e-06, "loss": 0.8407, "step": 2640 }, { "epoch": 0.08, "grad_norm": 7.112801130788591, "learning_rate": 9.94552151383843e-06, "loss": 0.5432, "step": 2641 }, { "epoch": 0.08, "grad_norm": 8.990139380828918, "learning_rate": 9.945453219697217e-06, "loss": 0.4487, "step": 2642 }, { "epoch": 0.08, "grad_norm": 8.089911239099374, "learning_rate": 9.94538488301088e-06, "loss": 0.3231, "step": 2643 }, { "epoch": 0.08, "grad_norm": 7.909887992171953, "learning_rate": 9.945316503780004e-06, "loss": 0.7462, "step": 2644 }, { "epoch": 0.08, "grad_norm": 6.389663810542412, "learning_rate": 9.94524808200518e-06, "loss": 0.2254, "step": 2645 }, { "epoch": 0.08, "grad_norm": 6.58835312243088, "learning_rate": 9.945179617686998e-06, "loss": 0.4729, "step": 2646 }, { "epoch": 0.08, "grad_norm": 3.7174936945335206, "learning_rate": 9.94511111082604e-06, "loss": 0.3327, "step": 2647 }, { "epoch": 0.08, "grad_norm": 5.19536605463175, "learning_rate": 9.945042561422904e-06, "loss": 0.7756, "step": 2648 }, { "epoch": 0.08, "grad_norm": 4.9255347314991935, "learning_rate": 9.944973969478173e-06, "loss": 0.206, "step": 2649 }, { "epoch": 0.08, "grad_norm": 2.4791237375939668, "learning_rate": 9.944905334992443e-06, "loss": 0.2426, "step": 2650 }, { "epoch": 0.08, "grad_norm": 8.637029413634064, "learning_rate": 9.944836657966298e-06, "loss": 0.6297, "step": 2651 }, { "epoch": 0.08, "grad_norm": 15.745040112586969, "learning_rate": 9.944767938400333e-06, "loss": 0.6666, "step": 2652 }, { "epoch": 0.08, "grad_norm": 7.495560921819984, "learning_rate": 9.944699176295138e-06, "loss": 0.4242, "step": 2653 }, { "epoch": 0.08, "grad_norm": 9.02785589200647, "learning_rate": 9.944630371651306e-06, "loss": 0.5185, "step": 2654 }, { "epoch": 0.08, "grad_norm": 7.703484207661044, "learning_rate": 9.944561524469428e-06, "loss": 0.808, "step": 2655 }, { "epoch": 0.08, "grad_norm": 11.403938790886642, "learning_rate": 9.944492634750093e-06, "loss": 0.3802, "step": 2656 }, { "epoch": 0.08, "grad_norm": 5.636725813662306, "learning_rate": 9.944423702493899e-06, "loss": 0.5051, "step": 2657 }, { "epoch": 0.08, "grad_norm": 6.523580766578, "learning_rate": 9.944354727701438e-06, "loss": 0.6299, "step": 2658 }, { "epoch": 0.08, "grad_norm": 6.65600958018347, "learning_rate": 9.9442857103733e-06, "loss": 0.3001, "step": 2659 }, { "epoch": 0.08, "grad_norm": 4.8217234223646415, "learning_rate": 9.94421665051008e-06, "loss": 0.3861, "step": 2660 }, { "epoch": 0.08, "grad_norm": 6.650232379839939, "learning_rate": 9.944147548112374e-06, "loss": 0.6544, "step": 2661 }, { "epoch": 0.08, "grad_norm": 7.981389870609166, "learning_rate": 9.944078403180776e-06, "loss": 0.5081, "step": 2662 }, { "epoch": 0.08, "grad_norm": 9.82817274121453, "learning_rate": 9.94400921571588e-06, "loss": 0.4294, "step": 2663 }, { "epoch": 0.08, "grad_norm": 7.720795086352305, "learning_rate": 9.94393998571828e-06, "loss": 1.0015, "step": 2664 }, { "epoch": 0.08, "grad_norm": 3.9111844443962442, "learning_rate": 9.943870713188575e-06, "loss": 0.4608, "step": 2665 }, { "epoch": 0.08, "grad_norm": 5.777953656690429, "learning_rate": 9.943801398127358e-06, "loss": 0.4677, "step": 2666 }, { "epoch": 0.08, "grad_norm": 10.782405890822028, "learning_rate": 9.943732040535226e-06, "loss": 0.8966, "step": 2667 }, { "epoch": 0.08, "grad_norm": 5.792988834359148, "learning_rate": 9.943662640412777e-06, "loss": 0.6169, "step": 2668 }, { "epoch": 0.08, "grad_norm": 7.544758944291449, "learning_rate": 9.943593197760606e-06, "loss": 0.5894, "step": 2669 }, { "epoch": 0.08, "grad_norm": 6.772902840496851, "learning_rate": 9.94352371257931e-06, "loss": 0.7246, "step": 2670 }, { "epoch": 0.08, "grad_norm": 6.794661132862397, "learning_rate": 9.94345418486949e-06, "loss": 0.4903, "step": 2671 }, { "epoch": 0.08, "grad_norm": 5.093520364090229, "learning_rate": 9.943384614631743e-06, "loss": 0.3553, "step": 2672 }, { "epoch": 0.08, "grad_norm": 7.610241474425911, "learning_rate": 9.943315001866665e-06, "loss": 0.6125, "step": 2673 }, { "epoch": 0.08, "grad_norm": 5.765548746902178, "learning_rate": 9.943245346574858e-06, "loss": 0.437, "step": 2674 }, { "epoch": 0.08, "grad_norm": 4.0400467047966, "learning_rate": 9.94317564875692e-06, "loss": 0.8033, "step": 2675 }, { "epoch": 0.08, "grad_norm": 5.660835135890284, "learning_rate": 9.94310590841345e-06, "loss": 0.6282, "step": 2676 }, { "epoch": 0.08, "grad_norm": 6.46998718888372, "learning_rate": 9.94303612554505e-06, "loss": 0.3965, "step": 2677 }, { "epoch": 0.08, "grad_norm": 9.991445792723427, "learning_rate": 9.942966300152317e-06, "loss": 0.4358, "step": 2678 }, { "epoch": 0.08, "grad_norm": 4.807414798943689, "learning_rate": 9.942896432235853e-06, "loss": 0.2975, "step": 2679 }, { "epoch": 0.08, "grad_norm": 5.051736008283459, "learning_rate": 9.94282652179626e-06, "loss": 0.752, "step": 2680 }, { "epoch": 0.08, "grad_norm": 7.269999706787775, "learning_rate": 9.94275656883414e-06, "loss": 0.4448, "step": 2681 }, { "epoch": 0.08, "grad_norm": 10.158877317546771, "learning_rate": 9.942686573350094e-06, "loss": 0.5343, "step": 2682 }, { "epoch": 0.08, "grad_norm": 7.368329053766952, "learning_rate": 9.942616535344722e-06, "loss": 0.635, "step": 2683 }, { "epoch": 0.08, "grad_norm": 5.042448198679363, "learning_rate": 9.94254645481863e-06, "loss": 0.6735, "step": 2684 }, { "epoch": 0.08, "grad_norm": 7.864124781564513, "learning_rate": 9.942476331772419e-06, "loss": 0.5021, "step": 2685 }, { "epoch": 0.08, "grad_norm": 10.351235622787119, "learning_rate": 9.942406166206693e-06, "loss": 0.9353, "step": 2686 }, { "epoch": 0.08, "grad_norm": 8.177676298829486, "learning_rate": 9.942335958122054e-06, "loss": 0.8057, "step": 2687 }, { "epoch": 0.08, "grad_norm": 6.843494985349368, "learning_rate": 9.942265707519108e-06, "loss": 0.5055, "step": 2688 }, { "epoch": 0.08, "grad_norm": 12.00746701301334, "learning_rate": 9.94219541439846e-06, "loss": 0.7879, "step": 2689 }, { "epoch": 0.08, "grad_norm": 5.012086422189597, "learning_rate": 9.94212507876071e-06, "loss": 0.3951, "step": 2690 }, { "epoch": 0.08, "grad_norm": 4.514720390550144, "learning_rate": 9.94205470060647e-06, "loss": 0.3456, "step": 2691 }, { "epoch": 0.08, "grad_norm": 3.824500785613107, "learning_rate": 9.94198427993634e-06, "loss": 0.2344, "step": 2692 }, { "epoch": 0.08, "grad_norm": 6.278492374409155, "learning_rate": 9.94191381675093e-06, "loss": 0.2282, "step": 2693 }, { "epoch": 0.08, "grad_norm": 9.4125847760723, "learning_rate": 9.941843311050842e-06, "loss": 0.5983, "step": 2694 }, { "epoch": 0.08, "grad_norm": 6.053428115305746, "learning_rate": 9.941772762836683e-06, "loss": 0.3902, "step": 2695 }, { "epoch": 0.08, "grad_norm": 12.232364026437937, "learning_rate": 9.941702172109064e-06, "loss": 0.7582, "step": 2696 }, { "epoch": 0.08, "grad_norm": 4.382218155706409, "learning_rate": 9.941631538868588e-06, "loss": 0.2835, "step": 2697 }, { "epoch": 0.08, "grad_norm": 12.392111305480672, "learning_rate": 9.941560863115865e-06, "loss": 0.6567, "step": 2698 }, { "epoch": 0.08, "grad_norm": 7.249233830186838, "learning_rate": 9.941490144851503e-06, "loss": 0.5067, "step": 2699 }, { "epoch": 0.08, "grad_norm": 5.2878114919693635, "learning_rate": 9.94141938407611e-06, "loss": 0.2753, "step": 2700 }, { "epoch": 0.08, "grad_norm": 31.456717409647652, "learning_rate": 9.941348580790292e-06, "loss": 0.3764, "step": 2701 }, { "epoch": 0.08, "grad_norm": 3.3662037131395497, "learning_rate": 9.941277734994662e-06, "loss": 0.3259, "step": 2702 }, { "epoch": 0.08, "grad_norm": 8.069495544738693, "learning_rate": 9.941206846689828e-06, "loss": 0.57, "step": 2703 }, { "epoch": 0.08, "grad_norm": 8.035709434386712, "learning_rate": 9.941135915876402e-06, "loss": 0.3687, "step": 2704 }, { "epoch": 0.08, "grad_norm": 6.225020118282547, "learning_rate": 9.941064942554988e-06, "loss": 0.6005, "step": 2705 }, { "epoch": 0.08, "grad_norm": 3.765149953168609, "learning_rate": 9.940993926726204e-06, "loss": 0.178, "step": 2706 }, { "epoch": 0.08, "grad_norm": 5.510220135343179, "learning_rate": 9.940922868390654e-06, "loss": 0.3481, "step": 2707 }, { "epoch": 0.08, "grad_norm": 5.613002656037222, "learning_rate": 9.940851767548955e-06, "loss": 0.3338, "step": 2708 }, { "epoch": 0.08, "grad_norm": 6.038334811290561, "learning_rate": 9.940780624201718e-06, "loss": 0.2914, "step": 2709 }, { "epoch": 0.08, "grad_norm": 9.015432266475969, "learning_rate": 9.940709438349551e-06, "loss": 0.7152, "step": 2710 }, { "epoch": 0.08, "grad_norm": 6.552325508562583, "learning_rate": 9.940638209993073e-06, "loss": 0.279, "step": 2711 }, { "epoch": 0.08, "grad_norm": 9.017962491554535, "learning_rate": 9.940566939132889e-06, "loss": 0.8372, "step": 2712 }, { "epoch": 0.08, "grad_norm": 12.436890093547285, "learning_rate": 9.940495625769617e-06, "loss": 0.5343, "step": 2713 }, { "epoch": 0.08, "grad_norm": 10.527533353609122, "learning_rate": 9.94042426990387e-06, "loss": 1.1019, "step": 2714 }, { "epoch": 0.08, "grad_norm": 8.678345172137243, "learning_rate": 9.940352871536261e-06, "loss": 0.5354, "step": 2715 }, { "epoch": 0.08, "grad_norm": 7.675757581368566, "learning_rate": 9.940281430667404e-06, "loss": 0.4797, "step": 2716 }, { "epoch": 0.08, "grad_norm": 6.518703913320016, "learning_rate": 9.940209947297914e-06, "loss": 0.483, "step": 2717 }, { "epoch": 0.08, "grad_norm": 8.755552219684995, "learning_rate": 9.940138421428408e-06, "loss": 0.8087, "step": 2718 }, { "epoch": 0.08, "grad_norm": 13.402677829262148, "learning_rate": 9.940066853059495e-06, "loss": 0.7233, "step": 2719 }, { "epoch": 0.08, "grad_norm": 8.604124597134087, "learning_rate": 9.9399952421918e-06, "loss": 0.691, "step": 2720 }, { "epoch": 0.08, "grad_norm": 8.774518022043436, "learning_rate": 9.93992358882593e-06, "loss": 0.5121, "step": 2721 }, { "epoch": 0.08, "grad_norm": 7.577709823718862, "learning_rate": 9.939851892962508e-06, "loss": 0.3831, "step": 2722 }, { "epoch": 0.08, "grad_norm": 8.823915379254226, "learning_rate": 9.939780154602149e-06, "loss": 0.7723, "step": 2723 }, { "epoch": 0.08, "grad_norm": 8.241250918721153, "learning_rate": 9.939708373745469e-06, "loss": 0.8778, "step": 2724 }, { "epoch": 0.08, "grad_norm": 7.126152346935092, "learning_rate": 9.939636550393085e-06, "loss": 0.5173, "step": 2725 }, { "epoch": 0.08, "grad_norm": 4.392987304735151, "learning_rate": 9.939564684545618e-06, "loss": 0.4219, "step": 2726 }, { "epoch": 0.08, "grad_norm": 11.459181616623148, "learning_rate": 9.939492776203681e-06, "loss": 0.9948, "step": 2727 }, { "epoch": 0.08, "grad_norm": 14.343726928958331, "learning_rate": 9.939420825367897e-06, "loss": 0.7335, "step": 2728 }, { "epoch": 0.08, "grad_norm": 7.204500675199002, "learning_rate": 9.939348832038886e-06, "loss": 0.3953, "step": 2729 }, { "epoch": 0.08, "grad_norm": 8.57535150991939, "learning_rate": 9.939276796217263e-06, "loss": 1.2008, "step": 2730 }, { "epoch": 0.08, "grad_norm": 6.43649876780377, "learning_rate": 9.93920471790365e-06, "loss": 0.2487, "step": 2731 }, { "epoch": 0.08, "grad_norm": 9.94051847859512, "learning_rate": 9.939132597098668e-06, "loss": 0.7593, "step": 2732 }, { "epoch": 0.08, "grad_norm": 9.836338122922424, "learning_rate": 9.939060433802937e-06, "loss": 0.7253, "step": 2733 }, { "epoch": 0.08, "grad_norm": 14.488106815772191, "learning_rate": 9.938988228017075e-06, "loss": 0.8279, "step": 2734 }, { "epoch": 0.08, "grad_norm": 7.101815801452815, "learning_rate": 9.938915979741706e-06, "loss": 0.471, "step": 2735 }, { "epoch": 0.08, "grad_norm": 6.518458767874568, "learning_rate": 9.938843688977451e-06, "loss": 0.3124, "step": 2736 }, { "epoch": 0.08, "grad_norm": 3.9061163307207756, "learning_rate": 9.938771355724931e-06, "loss": 0.3357, "step": 2737 }, { "epoch": 0.08, "grad_norm": 5.2989379106531365, "learning_rate": 9.938698979984772e-06, "loss": 0.54, "step": 2738 }, { "epoch": 0.08, "grad_norm": 3.4965647460101135, "learning_rate": 9.938626561757591e-06, "loss": 0.1, "step": 2739 }, { "epoch": 0.08, "grad_norm": 6.827313267973384, "learning_rate": 9.938554101044016e-06, "loss": 0.2889, "step": 2740 }, { "epoch": 0.08, "grad_norm": 3.288934637198469, "learning_rate": 9.938481597844666e-06, "loss": 0.126, "step": 2741 }, { "epoch": 0.08, "grad_norm": 10.1858937688112, "learning_rate": 9.938409052160168e-06, "loss": 0.7873, "step": 2742 }, { "epoch": 0.08, "grad_norm": 6.3727020161014085, "learning_rate": 9.938336463991145e-06, "loss": 0.4827, "step": 2743 }, { "epoch": 0.08, "grad_norm": 8.789706139626972, "learning_rate": 9.93826383333822e-06, "loss": 0.5089, "step": 2744 }, { "epoch": 0.08, "grad_norm": 6.502580515602457, "learning_rate": 9.938191160202021e-06, "loss": 0.4472, "step": 2745 }, { "epoch": 0.08, "grad_norm": 5.376778951388074, "learning_rate": 9.93811844458317e-06, "loss": 0.3609, "step": 2746 }, { "epoch": 0.08, "grad_norm": 11.336189882398966, "learning_rate": 9.938045686482296e-06, "loss": 0.6972, "step": 2747 }, { "epoch": 0.08, "grad_norm": 8.790597739179091, "learning_rate": 9.937972885900022e-06, "loss": 0.4992, "step": 2748 }, { "epoch": 0.08, "grad_norm": 11.519715368145427, "learning_rate": 9.937900042836974e-06, "loss": 0.4845, "step": 2749 }, { "epoch": 0.08, "grad_norm": 13.718764667894174, "learning_rate": 9.937827157293782e-06, "loss": 1.1427, "step": 2750 }, { "epoch": 0.08, "grad_norm": 5.043769613272763, "learning_rate": 9.937754229271069e-06, "loss": 0.2991, "step": 2751 }, { "epoch": 0.08, "grad_norm": 5.9578559279641095, "learning_rate": 9.937681258769464e-06, "loss": 0.6336, "step": 2752 }, { "epoch": 0.08, "grad_norm": 5.800840435304127, "learning_rate": 9.937608245789598e-06, "loss": 0.6174, "step": 2753 }, { "epoch": 0.08, "grad_norm": 7.046314919284171, "learning_rate": 9.937535190332095e-06, "loss": 0.5585, "step": 2754 }, { "epoch": 0.08, "grad_norm": 12.358862633580127, "learning_rate": 9.937462092397583e-06, "loss": 0.788, "step": 2755 }, { "epoch": 0.08, "grad_norm": 5.066335284957411, "learning_rate": 9.937388951986694e-06, "loss": 0.3999, "step": 2756 }, { "epoch": 0.08, "grad_norm": 5.014579640244361, "learning_rate": 9.937315769100055e-06, "loss": 0.2263, "step": 2757 }, { "epoch": 0.08, "grad_norm": 9.556763916966675, "learning_rate": 9.937242543738296e-06, "loss": 0.5692, "step": 2758 }, { "epoch": 0.08, "grad_norm": 11.858618590265257, "learning_rate": 9.937169275902049e-06, "loss": 0.8192, "step": 2759 }, { "epoch": 0.08, "grad_norm": 8.615400528500082, "learning_rate": 9.93709596559194e-06, "loss": 0.5331, "step": 2760 }, { "epoch": 0.08, "grad_norm": 15.109292188888887, "learning_rate": 9.937022612808605e-06, "loss": 0.5678, "step": 2761 }, { "epoch": 0.08, "grad_norm": 4.446954353573479, "learning_rate": 9.936949217552671e-06, "loss": 0.2071, "step": 2762 }, { "epoch": 0.08, "grad_norm": 4.348415366749389, "learning_rate": 9.93687577982477e-06, "loss": 0.545, "step": 2763 }, { "epoch": 0.08, "grad_norm": 9.277180714027965, "learning_rate": 9.936802299625537e-06, "loss": 0.5112, "step": 2764 }, { "epoch": 0.08, "grad_norm": 8.911641841197733, "learning_rate": 9.9367287769556e-06, "loss": 0.4168, "step": 2765 }, { "epoch": 0.08, "grad_norm": 5.470480770344012, "learning_rate": 9.936655211815592e-06, "loss": 0.6308, "step": 2766 }, { "epoch": 0.08, "grad_norm": 5.60261681077034, "learning_rate": 9.936581604206149e-06, "loss": 0.6972, "step": 2767 }, { "epoch": 0.08, "grad_norm": 11.311247951285221, "learning_rate": 9.936507954127901e-06, "loss": 0.4029, "step": 2768 }, { "epoch": 0.08, "grad_norm": 4.767116825926468, "learning_rate": 9.936434261581484e-06, "loss": 0.3846, "step": 2769 }, { "epoch": 0.08, "grad_norm": 10.123610601345057, "learning_rate": 9.93636052656753e-06, "loss": 0.6619, "step": 2770 }, { "epoch": 0.08, "grad_norm": 4.829095786065175, "learning_rate": 9.936286749086674e-06, "loss": 0.2286, "step": 2771 }, { "epoch": 0.08, "grad_norm": 9.217476449703035, "learning_rate": 9.936212929139553e-06, "loss": 0.8582, "step": 2772 }, { "epoch": 0.08, "grad_norm": 7.655033871347152, "learning_rate": 9.936139066726797e-06, "loss": 0.608, "step": 2773 }, { "epoch": 0.08, "grad_norm": 8.533343606684381, "learning_rate": 9.936065161849045e-06, "loss": 0.7264, "step": 2774 }, { "epoch": 0.08, "grad_norm": 9.276356187157916, "learning_rate": 9.935991214506933e-06, "loss": 0.2759, "step": 2775 }, { "epoch": 0.08, "grad_norm": 11.225564571192693, "learning_rate": 9.935917224701097e-06, "loss": 0.7078, "step": 2776 }, { "epoch": 0.08, "grad_norm": 6.131482683563106, "learning_rate": 9.93584319243217e-06, "loss": 0.5549, "step": 2777 }, { "epoch": 0.08, "grad_norm": 5.0705549890402635, "learning_rate": 9.935769117700796e-06, "loss": 0.3547, "step": 2778 }, { "epoch": 0.08, "grad_norm": 4.9705627781328605, "learning_rate": 9.935695000507605e-06, "loss": 0.6612, "step": 2779 }, { "epoch": 0.08, "grad_norm": 7.16615742345313, "learning_rate": 9.935620840853237e-06, "loss": 0.3167, "step": 2780 }, { "epoch": 0.08, "grad_norm": 8.487105293815468, "learning_rate": 9.935546638738332e-06, "loss": 0.7374, "step": 2781 }, { "epoch": 0.08, "grad_norm": 3.804743740911566, "learning_rate": 9.935472394163528e-06, "loss": 0.4153, "step": 2782 }, { "epoch": 0.08, "grad_norm": 8.188010338173411, "learning_rate": 9.935398107129462e-06, "loss": 0.8085, "step": 2783 }, { "epoch": 0.08, "grad_norm": 3.7396162114871943, "learning_rate": 9.935323777636772e-06, "loss": 0.4073, "step": 2784 }, { "epoch": 0.08, "grad_norm": 5.000491285029857, "learning_rate": 9.9352494056861e-06, "loss": 0.4775, "step": 2785 }, { "epoch": 0.08, "grad_norm": 7.2150483652292765, "learning_rate": 9.935174991278086e-06, "loss": 0.419, "step": 2786 }, { "epoch": 0.08, "grad_norm": 11.846112742814771, "learning_rate": 9.935100534413369e-06, "loss": 0.5885, "step": 2787 }, { "epoch": 0.08, "grad_norm": 5.76590779755885, "learning_rate": 9.935026035092589e-06, "loss": 0.4439, "step": 2788 }, { "epoch": 0.08, "grad_norm": 7.394739405004069, "learning_rate": 9.934951493316388e-06, "loss": 0.4215, "step": 2789 }, { "epoch": 0.08, "grad_norm": 5.460722781259109, "learning_rate": 9.934876909085407e-06, "loss": 0.6562, "step": 2790 }, { "epoch": 0.08, "grad_norm": 6.7207834759459475, "learning_rate": 9.934802282400287e-06, "loss": 0.5048, "step": 2791 }, { "epoch": 0.08, "grad_norm": 8.619722493212027, "learning_rate": 9.934727613261671e-06, "loss": 0.505, "step": 2792 }, { "epoch": 0.08, "grad_norm": 6.666002606061489, "learning_rate": 9.934652901670201e-06, "loss": 0.4993, "step": 2793 }, { "epoch": 0.08, "grad_norm": 5.358735182581269, "learning_rate": 9.934578147626519e-06, "loss": 0.3696, "step": 2794 }, { "epoch": 0.08, "grad_norm": 4.605717025419311, "learning_rate": 9.93450335113127e-06, "loss": 0.488, "step": 2795 }, { "epoch": 0.08, "grad_norm": 4.481060198564461, "learning_rate": 9.934428512185095e-06, "loss": 0.4376, "step": 2796 }, { "epoch": 0.08, "grad_norm": 6.759479647817052, "learning_rate": 9.93435363078864e-06, "loss": 0.354, "step": 2797 }, { "epoch": 0.08, "grad_norm": 6.709500781844887, "learning_rate": 9.934278706942546e-06, "loss": 0.365, "step": 2798 }, { "epoch": 0.08, "grad_norm": 8.502037140816503, "learning_rate": 9.934203740647463e-06, "loss": 0.6744, "step": 2799 }, { "epoch": 0.08, "grad_norm": 4.451083856560198, "learning_rate": 9.934128731904031e-06, "loss": 0.428, "step": 2800 }, { "epoch": 0.08, "grad_norm": 8.573360933740938, "learning_rate": 9.934053680712899e-06, "loss": 0.4836, "step": 2801 }, { "epoch": 0.08, "grad_norm": 6.623138742082563, "learning_rate": 9.933978587074709e-06, "loss": 0.6632, "step": 2802 }, { "epoch": 0.08, "grad_norm": 7.00743090080736, "learning_rate": 9.933903450990107e-06, "loss": 0.4452, "step": 2803 }, { "epoch": 0.08, "grad_norm": 9.887949697730745, "learning_rate": 9.933828272459743e-06, "loss": 0.9016, "step": 2804 }, { "epoch": 0.08, "grad_norm": 7.410347364471775, "learning_rate": 9.933753051484263e-06, "loss": 0.8038, "step": 2805 }, { "epoch": 0.08, "grad_norm": 8.506898885245953, "learning_rate": 9.93367778806431e-06, "loss": 0.5058, "step": 2806 }, { "epoch": 0.08, "grad_norm": 6.340015984885103, "learning_rate": 9.933602482200537e-06, "loss": 0.4186, "step": 2807 }, { "epoch": 0.08, "grad_norm": 8.898466383139445, "learning_rate": 9.933527133893587e-06, "loss": 0.8245, "step": 2808 }, { "epoch": 0.08, "grad_norm": 13.068207511424262, "learning_rate": 9.933451743144114e-06, "loss": 0.6775, "step": 2809 }, { "epoch": 0.08, "grad_norm": 6.197838401834557, "learning_rate": 9.93337630995276e-06, "loss": 0.6256, "step": 2810 }, { "epoch": 0.08, "grad_norm": 4.641031677716641, "learning_rate": 9.933300834320179e-06, "loss": 0.4483, "step": 2811 }, { "epoch": 0.08, "grad_norm": 6.896774869260234, "learning_rate": 9.933225316247017e-06, "loss": 0.8547, "step": 2812 }, { "epoch": 0.08, "grad_norm": 7.5611006017358555, "learning_rate": 9.933149755733924e-06, "loss": 0.6255, "step": 2813 }, { "epoch": 0.08, "grad_norm": 3.2967597977456564, "learning_rate": 9.933074152781552e-06, "loss": 0.3328, "step": 2814 }, { "epoch": 0.08, "grad_norm": 4.639662928670013, "learning_rate": 9.932998507390553e-06, "loss": 0.4167, "step": 2815 }, { "epoch": 0.08, "grad_norm": 8.110625007149066, "learning_rate": 9.932922819561573e-06, "loss": 0.3705, "step": 2816 }, { "epoch": 0.08, "grad_norm": 10.588170565613007, "learning_rate": 9.932847089295263e-06, "loss": 0.8324, "step": 2817 }, { "epoch": 0.08, "grad_norm": 3.9487433897363413, "learning_rate": 9.93277131659228e-06, "loss": 0.3134, "step": 2818 }, { "epoch": 0.08, "grad_norm": 5.949675073486956, "learning_rate": 9.932695501453273e-06, "loss": 0.8046, "step": 2819 }, { "epoch": 0.08, "grad_norm": 2.8071265812013384, "learning_rate": 9.932619643878892e-06, "loss": 0.3113, "step": 2820 }, { "epoch": 0.08, "grad_norm": 4.00892198460736, "learning_rate": 9.932543743869792e-06, "loss": 0.4595, "step": 2821 }, { "epoch": 0.08, "grad_norm": 9.216981672959319, "learning_rate": 9.932467801426627e-06, "loss": 0.6018, "step": 2822 }, { "epoch": 0.08, "grad_norm": 6.818052286593808, "learning_rate": 9.932391816550046e-06, "loss": 0.439, "step": 2823 }, { "epoch": 0.08, "grad_norm": 6.455788863812538, "learning_rate": 9.932315789240707e-06, "loss": 0.4643, "step": 2824 }, { "epoch": 0.08, "grad_norm": 8.69116205108136, "learning_rate": 9.932239719499263e-06, "loss": 0.6494, "step": 2825 }, { "epoch": 0.08, "grad_norm": 7.821932477202643, "learning_rate": 9.932163607326369e-06, "loss": 1.2025, "step": 2826 }, { "epoch": 0.08, "grad_norm": 4.067432929894563, "learning_rate": 9.932087452722677e-06, "loss": 0.271, "step": 2827 }, { "epoch": 0.08, "grad_norm": 7.211800933756414, "learning_rate": 9.932011255688844e-06, "loss": 0.4256, "step": 2828 }, { "epoch": 0.08, "grad_norm": 7.100440848458788, "learning_rate": 9.931935016225524e-06, "loss": 0.1889, "step": 2829 }, { "epoch": 0.08, "grad_norm": 9.586324694834051, "learning_rate": 9.931858734333378e-06, "loss": 0.7607, "step": 2830 }, { "epoch": 0.08, "grad_norm": 9.769504307614788, "learning_rate": 9.931782410013055e-06, "loss": 0.5855, "step": 2831 }, { "epoch": 0.08, "grad_norm": 5.725392720441428, "learning_rate": 9.931706043265216e-06, "loss": 0.5068, "step": 2832 }, { "epoch": 0.08, "grad_norm": 3.632601299094239, "learning_rate": 9.931629634090519e-06, "loss": 0.3359, "step": 2833 }, { "epoch": 0.08, "grad_norm": 6.2636379220801786, "learning_rate": 9.931553182489618e-06, "loss": 0.65, "step": 2834 }, { "epoch": 0.08, "grad_norm": 4.882152909551463, "learning_rate": 9.931476688463173e-06, "loss": 0.5937, "step": 2835 }, { "epoch": 0.08, "grad_norm": 4.905302551535515, "learning_rate": 9.93140015201184e-06, "loss": 0.413, "step": 2836 }, { "epoch": 0.08, "grad_norm": 7.540958968480511, "learning_rate": 9.93132357313628e-06, "loss": 0.7301, "step": 2837 }, { "epoch": 0.08, "grad_norm": 3.8112553300129024, "learning_rate": 9.93124695183715e-06, "loss": 0.2681, "step": 2838 }, { "epoch": 0.08, "grad_norm": 5.87049404794638, "learning_rate": 9.931170288115108e-06, "loss": 0.2332, "step": 2839 }, { "epoch": 0.08, "grad_norm": 7.23157448662904, "learning_rate": 9.931093581970817e-06, "loss": 0.4899, "step": 2840 }, { "epoch": 0.08, "grad_norm": 4.175688126099282, "learning_rate": 9.931016833404935e-06, "loss": 0.3086, "step": 2841 }, { "epoch": 0.08, "grad_norm": 4.031933334749352, "learning_rate": 9.930940042418122e-06, "loss": 0.5143, "step": 2842 }, { "epoch": 0.08, "grad_norm": 6.096454978564515, "learning_rate": 9.930863209011038e-06, "loss": 0.3482, "step": 2843 }, { "epoch": 0.08, "grad_norm": 4.536888035052501, "learning_rate": 9.930786333184345e-06, "loss": 0.4045, "step": 2844 }, { "epoch": 0.08, "grad_norm": 31.570723325401694, "learning_rate": 9.930709414938706e-06, "loss": 0.4005, "step": 2845 }, { "epoch": 0.08, "grad_norm": 7.399881320723395, "learning_rate": 9.93063245427478e-06, "loss": 0.3897, "step": 2846 }, { "epoch": 0.08, "grad_norm": 2.7227921527236694, "learning_rate": 9.93055545119323e-06, "loss": 0.4529, "step": 2847 }, { "epoch": 0.08, "grad_norm": 8.193696392707427, "learning_rate": 9.930478405694718e-06, "loss": 0.4976, "step": 2848 }, { "epoch": 0.08, "grad_norm": 9.947559567311716, "learning_rate": 9.93040131777991e-06, "loss": 0.4419, "step": 2849 }, { "epoch": 0.08, "grad_norm": 6.833001863379708, "learning_rate": 9.930324187449463e-06, "loss": 0.6804, "step": 2850 }, { "epoch": 0.08, "grad_norm": 6.503507914674051, "learning_rate": 9.930247014704045e-06, "loss": 0.7733, "step": 2851 }, { "epoch": 0.08, "grad_norm": 5.613961727385599, "learning_rate": 9.93016979954432e-06, "loss": 0.4329, "step": 2852 }, { "epoch": 0.08, "grad_norm": 8.544120832988952, "learning_rate": 9.93009254197095e-06, "loss": 0.8036, "step": 2853 }, { "epoch": 0.08, "grad_norm": 9.459433224174028, "learning_rate": 9.930015241984602e-06, "loss": 0.9515, "step": 2854 }, { "epoch": 0.08, "grad_norm": 9.418884612540932, "learning_rate": 9.92993789958594e-06, "loss": 0.8399, "step": 2855 }, { "epoch": 0.08, "grad_norm": 3.990462645332563, "learning_rate": 9.929860514775629e-06, "loss": 0.5372, "step": 2856 }, { "epoch": 0.08, "grad_norm": 7.494452141199416, "learning_rate": 9.929783087554334e-06, "loss": 0.3937, "step": 2857 }, { "epoch": 0.08, "grad_norm": 5.288213123542634, "learning_rate": 9.929705617922723e-06, "loss": 0.3758, "step": 2858 }, { "epoch": 0.08, "grad_norm": 4.636706666360229, "learning_rate": 9.929628105881462e-06, "loss": 0.572, "step": 2859 }, { "epoch": 0.08, "grad_norm": 5.018660105355041, "learning_rate": 9.929550551431216e-06, "loss": 0.4191, "step": 2860 }, { "epoch": 0.08, "grad_norm": 9.38019560213441, "learning_rate": 9.929472954572654e-06, "loss": 0.4903, "step": 2861 }, { "epoch": 0.08, "grad_norm": 10.895747420857868, "learning_rate": 9.929395315306442e-06, "loss": 0.7187, "step": 2862 }, { "epoch": 0.08, "grad_norm": 5.515303864464469, "learning_rate": 9.92931763363325e-06, "loss": 0.7775, "step": 2863 }, { "epoch": 0.08, "grad_norm": 4.026665794222992, "learning_rate": 9.929239909553746e-06, "loss": 0.2024, "step": 2864 }, { "epoch": 0.08, "grad_norm": 2.6372628053341076, "learning_rate": 9.929162143068599e-06, "loss": 0.1488, "step": 2865 }, { "epoch": 0.08, "grad_norm": 8.876910635297788, "learning_rate": 9.929084334178475e-06, "loss": 0.3583, "step": 2866 }, { "epoch": 0.08, "grad_norm": 6.228814649895197, "learning_rate": 9.929006482884045e-06, "loss": 0.4663, "step": 2867 }, { "epoch": 0.08, "grad_norm": 5.607561078497895, "learning_rate": 9.92892858918598e-06, "loss": 0.5962, "step": 2868 }, { "epoch": 0.08, "grad_norm": 5.174768658318385, "learning_rate": 9.92885065308495e-06, "loss": 0.3931, "step": 2869 }, { "epoch": 0.08, "grad_norm": 3.944568284348762, "learning_rate": 9.928772674581624e-06, "loss": 0.4069, "step": 2870 }, { "epoch": 0.08, "grad_norm": 6.429095661647238, "learning_rate": 9.928694653676673e-06, "loss": 0.7298, "step": 2871 }, { "epoch": 0.08, "grad_norm": 11.640494900174824, "learning_rate": 9.928616590370769e-06, "loss": 0.7348, "step": 2872 }, { "epoch": 0.08, "grad_norm": 6.4185853129119055, "learning_rate": 9.928538484664582e-06, "loss": 0.4483, "step": 2873 }, { "epoch": 0.08, "grad_norm": 5.330356298493599, "learning_rate": 9.928460336558788e-06, "loss": 0.3696, "step": 2874 }, { "epoch": 0.08, "grad_norm": 8.324243954531292, "learning_rate": 9.928382146054054e-06, "loss": 0.586, "step": 2875 }, { "epoch": 0.08, "grad_norm": 10.624398786701176, "learning_rate": 9.928303913151058e-06, "loss": 1.0766, "step": 2876 }, { "epoch": 0.08, "grad_norm": 7.836723387112832, "learning_rate": 9.928225637850467e-06, "loss": 0.5143, "step": 2877 }, { "epoch": 0.08, "grad_norm": 6.454257930293429, "learning_rate": 9.92814732015296e-06, "loss": 0.4751, "step": 2878 }, { "epoch": 0.08, "grad_norm": 9.984609777230377, "learning_rate": 9.928068960059208e-06, "loss": 0.7865, "step": 2879 }, { "epoch": 0.08, "grad_norm": 3.7786889348725596, "learning_rate": 9.927990557569885e-06, "loss": 0.4696, "step": 2880 }, { "epoch": 0.08, "grad_norm": 6.201872115360855, "learning_rate": 9.927912112685667e-06, "loss": 0.4417, "step": 2881 }, { "epoch": 0.08, "grad_norm": 7.613312628707302, "learning_rate": 9.927833625407226e-06, "loss": 0.8027, "step": 2882 }, { "epoch": 0.08, "grad_norm": 6.212004141715905, "learning_rate": 9.92775509573524e-06, "loss": 0.2851, "step": 2883 }, { "epoch": 0.08, "grad_norm": 10.558754612427268, "learning_rate": 9.927676523670384e-06, "loss": 0.6985, "step": 2884 }, { "epoch": 0.08, "grad_norm": 3.3673725475612604, "learning_rate": 9.927597909213332e-06, "loss": 0.3155, "step": 2885 }, { "epoch": 0.08, "grad_norm": 9.873390646061333, "learning_rate": 9.927519252364764e-06, "loss": 0.6016, "step": 2886 }, { "epoch": 0.08, "grad_norm": 12.131622393639503, "learning_rate": 9.927440553125354e-06, "loss": 0.9014, "step": 2887 }, { "epoch": 0.08, "grad_norm": 5.578751515485083, "learning_rate": 9.927361811495777e-06, "loss": 0.4522, "step": 2888 }, { "epoch": 0.08, "grad_norm": 6.314945049575372, "learning_rate": 9.927283027476718e-06, "loss": 0.4923, "step": 2889 }, { "epoch": 0.08, "grad_norm": 6.979622574524098, "learning_rate": 9.927204201068845e-06, "loss": 0.6837, "step": 2890 }, { "epoch": 0.08, "grad_norm": 8.026025460581886, "learning_rate": 9.927125332272843e-06, "loss": 0.6216, "step": 2891 }, { "epoch": 0.08, "grad_norm": 6.08193120953771, "learning_rate": 9.92704642108939e-06, "loss": 0.9133, "step": 2892 }, { "epoch": 0.08, "grad_norm": 11.201239497704824, "learning_rate": 9.92696746751916e-06, "loss": 0.4436, "step": 2893 }, { "epoch": 0.08, "grad_norm": 5.785582367673101, "learning_rate": 9.926888471562837e-06, "loss": 0.2662, "step": 2894 }, { "epoch": 0.08, "grad_norm": 7.515475138797922, "learning_rate": 9.9268094332211e-06, "loss": 0.2804, "step": 2895 }, { "epoch": 0.08, "grad_norm": 4.326917679612143, "learning_rate": 9.926730352494628e-06, "loss": 0.4757, "step": 2896 }, { "epoch": 0.08, "grad_norm": 6.25842388847792, "learning_rate": 9.926651229384098e-06, "loss": 0.6197, "step": 2897 }, { "epoch": 0.08, "grad_norm": 6.54374346801082, "learning_rate": 9.926572063890199e-06, "loss": 0.4434, "step": 2898 }, { "epoch": 0.08, "grad_norm": 4.775578944075643, "learning_rate": 9.926492856013604e-06, "loss": 0.1989, "step": 2899 }, { "epoch": 0.08, "grad_norm": 6.6715598110104395, "learning_rate": 9.926413605754998e-06, "loss": 0.9154, "step": 2900 }, { "epoch": 0.08, "grad_norm": 5.982816249141878, "learning_rate": 9.926334313115063e-06, "loss": 0.2637, "step": 2901 }, { "epoch": 0.08, "grad_norm": 5.298838293766735, "learning_rate": 9.92625497809448e-06, "loss": 0.2696, "step": 2902 }, { "epoch": 0.08, "grad_norm": 7.47741981302653, "learning_rate": 9.926175600693933e-06, "loss": 0.5896, "step": 2903 }, { "epoch": 0.08, "grad_norm": 7.226417993698767, "learning_rate": 9.926096180914102e-06, "loss": 0.3097, "step": 2904 }, { "epoch": 0.08, "grad_norm": 5.297081957349576, "learning_rate": 9.926016718755674e-06, "loss": 0.2911, "step": 2905 }, { "epoch": 0.08, "grad_norm": 5.310760684547762, "learning_rate": 9.925937214219329e-06, "loss": 0.3813, "step": 2906 }, { "epoch": 0.08, "grad_norm": 1.6589755538371775, "learning_rate": 9.925857667305753e-06, "loss": 0.1637, "step": 2907 }, { "epoch": 0.08, "grad_norm": 9.896595386810766, "learning_rate": 9.92577807801563e-06, "loss": 0.5892, "step": 2908 }, { "epoch": 0.08, "grad_norm": 4.960716326731877, "learning_rate": 9.925698446349646e-06, "loss": 0.3807, "step": 2909 }, { "epoch": 0.08, "grad_norm": 9.063763024022618, "learning_rate": 9.925618772308483e-06, "loss": 0.798, "step": 2910 }, { "epoch": 0.08, "grad_norm": 13.599990254286471, "learning_rate": 9.925539055892828e-06, "loss": 0.7479, "step": 2911 }, { "epoch": 0.08, "grad_norm": 4.840215562481095, "learning_rate": 9.925459297103369e-06, "loss": 0.3201, "step": 2912 }, { "epoch": 0.08, "grad_norm": 9.828266573372959, "learning_rate": 9.925379495940788e-06, "loss": 1.0889, "step": 2913 }, { "epoch": 0.08, "grad_norm": 10.831526140940165, "learning_rate": 9.925299652405776e-06, "loss": 0.808, "step": 2914 }, { "epoch": 0.08, "grad_norm": 4.952693547371574, "learning_rate": 9.925219766499014e-06, "loss": 0.5105, "step": 2915 }, { "epoch": 0.08, "grad_norm": 6.421634938748859, "learning_rate": 9.925139838221197e-06, "loss": 0.4259, "step": 2916 }, { "epoch": 0.08, "grad_norm": 7.721370113328461, "learning_rate": 9.925059867573005e-06, "loss": 0.5879, "step": 2917 }, { "epoch": 0.08, "grad_norm": 5.912736711340448, "learning_rate": 9.92497985455513e-06, "loss": 0.696, "step": 2918 }, { "epoch": 0.08, "grad_norm": 5.577814024049251, "learning_rate": 9.92489979916826e-06, "loss": 0.4702, "step": 2919 }, { "epoch": 0.08, "grad_norm": 4.362956199730407, "learning_rate": 9.924819701413085e-06, "loss": 0.3677, "step": 2920 }, { "epoch": 0.08, "grad_norm": 5.680280358017438, "learning_rate": 9.924739561290289e-06, "loss": 0.4183, "step": 2921 }, { "epoch": 0.08, "grad_norm": 7.19995318768436, "learning_rate": 9.924659378800567e-06, "loss": 0.3699, "step": 2922 }, { "epoch": 0.08, "grad_norm": 6.910487605334478, "learning_rate": 9.924579153944607e-06, "loss": 0.429, "step": 2923 }, { "epoch": 0.08, "grad_norm": 7.062732658942595, "learning_rate": 9.9244988867231e-06, "loss": 0.5164, "step": 2924 }, { "epoch": 0.08, "grad_norm": 4.152518239127655, "learning_rate": 9.924418577136732e-06, "loss": 0.3654, "step": 2925 }, { "epoch": 0.08, "grad_norm": 9.664968363339606, "learning_rate": 9.924338225186199e-06, "loss": 1.0995, "step": 2926 }, { "epoch": 0.08, "grad_norm": 8.898621193025518, "learning_rate": 9.924257830872192e-06, "loss": 0.5063, "step": 2927 }, { "epoch": 0.08, "grad_norm": 5.426452990612392, "learning_rate": 9.9241773941954e-06, "loss": 0.3627, "step": 2928 }, { "epoch": 0.08, "grad_norm": 7.408343860145302, "learning_rate": 9.924096915156515e-06, "loss": 0.3316, "step": 2929 }, { "epoch": 0.08, "grad_norm": 8.726944220927544, "learning_rate": 9.924016393756233e-06, "loss": 0.4065, "step": 2930 }, { "epoch": 0.08, "grad_norm": 5.626368462146642, "learning_rate": 9.923935829995241e-06, "loss": 0.4114, "step": 2931 }, { "epoch": 0.08, "grad_norm": 10.774594937032617, "learning_rate": 9.923855223874239e-06, "loss": 0.6504, "step": 2932 }, { "epoch": 0.08, "grad_norm": 8.072641544371102, "learning_rate": 9.923774575393915e-06, "loss": 0.1848, "step": 2933 }, { "epoch": 0.08, "grad_norm": 6.492406738104963, "learning_rate": 9.923693884554964e-06, "loss": 0.5622, "step": 2934 }, { "epoch": 0.08, "grad_norm": 5.444995274327157, "learning_rate": 9.92361315135808e-06, "loss": 0.6892, "step": 2935 }, { "epoch": 0.08, "grad_norm": 13.832110435175153, "learning_rate": 9.92353237580396e-06, "loss": 0.8685, "step": 2936 }, { "epoch": 0.08, "grad_norm": 6.679088971722678, "learning_rate": 9.923451557893297e-06, "loss": 0.3567, "step": 2937 }, { "epoch": 0.08, "grad_norm": 4.851285327604838, "learning_rate": 9.923370697626786e-06, "loss": 0.3888, "step": 2938 }, { "epoch": 0.08, "grad_norm": 5.90896487535104, "learning_rate": 9.923289795005123e-06, "loss": 0.3113, "step": 2939 }, { "epoch": 0.08, "grad_norm": 5.9927649387617254, "learning_rate": 9.923208850029005e-06, "loss": 0.5494, "step": 2940 }, { "epoch": 0.08, "grad_norm": 3.7947469328384074, "learning_rate": 9.923127862699125e-06, "loss": 0.2162, "step": 2941 }, { "epoch": 0.08, "grad_norm": 11.286056889305607, "learning_rate": 9.923046833016184e-06, "loss": 0.6882, "step": 2942 }, { "epoch": 0.08, "grad_norm": 6.125399595966463, "learning_rate": 9.922965760980877e-06, "loss": 0.5231, "step": 2943 }, { "epoch": 0.08, "grad_norm": 5.459662161635045, "learning_rate": 9.922884646593902e-06, "loss": 0.7445, "step": 2944 }, { "epoch": 0.08, "grad_norm": 6.499772728101173, "learning_rate": 9.922803489855956e-06, "loss": 0.4389, "step": 2945 }, { "epoch": 0.08, "grad_norm": 10.340457265397866, "learning_rate": 9.922722290767738e-06, "loss": 0.6192, "step": 2946 }, { "epoch": 0.08, "grad_norm": 4.988162954148051, "learning_rate": 9.922641049329946e-06, "loss": 0.4891, "step": 2947 }, { "epoch": 0.08, "grad_norm": 5.834142488036201, "learning_rate": 9.922559765543279e-06, "loss": 0.5544, "step": 2948 }, { "epoch": 0.08, "grad_norm": 9.34902396998126, "learning_rate": 9.922478439408436e-06, "loss": 1.0769, "step": 2949 }, { "epoch": 0.08, "grad_norm": 4.330058660880716, "learning_rate": 9.92239707092612e-06, "loss": 0.4094, "step": 2950 }, { "epoch": 0.08, "grad_norm": 6.196078470798307, "learning_rate": 9.922315660097024e-06, "loss": 0.8246, "step": 2951 }, { "epoch": 0.08, "grad_norm": 10.400835982982207, "learning_rate": 9.922234206921854e-06, "loss": 0.869, "step": 2952 }, { "epoch": 0.08, "grad_norm": 6.580574431346901, "learning_rate": 9.922152711401309e-06, "loss": 0.5858, "step": 2953 }, { "epoch": 0.08, "grad_norm": 5.600403691114462, "learning_rate": 9.922071173536092e-06, "loss": 0.5825, "step": 2954 }, { "epoch": 0.08, "grad_norm": 3.994971452403331, "learning_rate": 9.921989593326902e-06, "loss": 0.2353, "step": 2955 }, { "epoch": 0.08, "grad_norm": 7.648427244341232, "learning_rate": 9.92190797077444e-06, "loss": 0.4072, "step": 2956 }, { "epoch": 0.08, "grad_norm": 4.900585038428787, "learning_rate": 9.921826305879411e-06, "loss": 0.6475, "step": 2957 }, { "epoch": 0.08, "grad_norm": 8.894650263737692, "learning_rate": 9.921744598642515e-06, "loss": 0.5274, "step": 2958 }, { "epoch": 0.08, "grad_norm": 7.307035605271158, "learning_rate": 9.921662849064457e-06, "loss": 0.5334, "step": 2959 }, { "epoch": 0.08, "grad_norm": 10.920195312569867, "learning_rate": 9.92158105714594e-06, "loss": 0.9733, "step": 2960 }, { "epoch": 0.08, "grad_norm": 5.229481519159286, "learning_rate": 9.921499222887667e-06, "loss": 0.352, "step": 2961 }, { "epoch": 0.08, "grad_norm": 4.110625131266881, "learning_rate": 9.921417346290341e-06, "loss": 0.5787, "step": 2962 }, { "epoch": 0.08, "grad_norm": 6.043600334199991, "learning_rate": 9.92133542735467e-06, "loss": 0.5865, "step": 2963 }, { "epoch": 0.08, "grad_norm": 8.046275766523138, "learning_rate": 9.921253466081354e-06, "loss": 0.4906, "step": 2964 }, { "epoch": 0.08, "grad_norm": 6.832142222110109, "learning_rate": 9.9211714624711e-06, "loss": 0.4081, "step": 2965 }, { "epoch": 0.08, "grad_norm": 5.92118497265604, "learning_rate": 9.921089416524617e-06, "loss": 0.3447, "step": 2966 }, { "epoch": 0.08, "grad_norm": 8.575933235477933, "learning_rate": 9.921007328242604e-06, "loss": 0.6722, "step": 2967 }, { "epoch": 0.08, "grad_norm": 6.147432043903009, "learning_rate": 9.920925197625772e-06, "loss": 0.4063, "step": 2968 }, { "epoch": 0.09, "grad_norm": 4.822727408379291, "learning_rate": 9.920843024674826e-06, "loss": 0.2129, "step": 2969 }, { "epoch": 0.09, "grad_norm": 3.725261917122845, "learning_rate": 9.920760809390475e-06, "loss": 0.306, "step": 2970 }, { "epoch": 0.09, "grad_norm": 5.749972550699809, "learning_rate": 9.920678551773424e-06, "loss": 0.4764, "step": 2971 }, { "epoch": 0.09, "grad_norm": 5.914995853038891, "learning_rate": 9.920596251824381e-06, "loss": 0.6015, "step": 2972 }, { "epoch": 0.09, "grad_norm": 9.411625336919439, "learning_rate": 9.920513909544053e-06, "loss": 0.5328, "step": 2973 }, { "epoch": 0.09, "grad_norm": 5.583158620786286, "learning_rate": 9.920431524933152e-06, "loss": 0.2905, "step": 2974 }, { "epoch": 0.09, "grad_norm": 6.5361780803121095, "learning_rate": 9.920349097992384e-06, "loss": 0.3997, "step": 2975 }, { "epoch": 0.09, "grad_norm": 7.785382931208913, "learning_rate": 9.920266628722457e-06, "loss": 0.2575, "step": 2976 }, { "epoch": 0.09, "grad_norm": 6.960075693266982, "learning_rate": 9.920184117124082e-06, "loss": 0.9003, "step": 2977 }, { "epoch": 0.09, "grad_norm": 5.921106776832055, "learning_rate": 9.920101563197971e-06, "loss": 0.4927, "step": 2978 }, { "epoch": 0.09, "grad_norm": 13.539502030517038, "learning_rate": 9.920018966944829e-06, "loss": 0.6228, "step": 2979 }, { "epoch": 0.09, "grad_norm": 7.539190451371311, "learning_rate": 9.919936328365371e-06, "loss": 0.552, "step": 2980 }, { "epoch": 0.09, "grad_norm": 7.841816880520091, "learning_rate": 9.919853647460307e-06, "loss": 0.586, "step": 2981 }, { "epoch": 0.09, "grad_norm": 5.621208014794798, "learning_rate": 9.919770924230347e-06, "loss": 0.3264, "step": 2982 }, { "epoch": 0.09, "grad_norm": 8.928441238544066, "learning_rate": 9.919688158676205e-06, "loss": 0.7423, "step": 2983 }, { "epoch": 0.09, "grad_norm": 7.871703290410579, "learning_rate": 9.919605350798591e-06, "loss": 0.5082, "step": 2984 }, { "epoch": 0.09, "grad_norm": 9.98204870688972, "learning_rate": 9.919522500598217e-06, "loss": 0.5933, "step": 2985 }, { "epoch": 0.09, "grad_norm": 8.822790482004164, "learning_rate": 9.919439608075797e-06, "loss": 0.5383, "step": 2986 }, { "epoch": 0.09, "grad_norm": 7.13403871322664, "learning_rate": 9.919356673232044e-06, "loss": 0.8931, "step": 2987 }, { "epoch": 0.09, "grad_norm": 6.178589849972216, "learning_rate": 9.91927369606767e-06, "loss": 0.669, "step": 2988 }, { "epoch": 0.09, "grad_norm": 21.33531073505556, "learning_rate": 9.919190676583392e-06, "loss": 0.4522, "step": 2989 }, { "epoch": 0.09, "grad_norm": 6.413033294810205, "learning_rate": 9.919107614779922e-06, "loss": 0.6534, "step": 2990 }, { "epoch": 0.09, "grad_norm": 10.481834319812911, "learning_rate": 9.919024510657973e-06, "loss": 0.7026, "step": 2991 }, { "epoch": 0.09, "grad_norm": 6.411976111355054, "learning_rate": 9.918941364218265e-06, "loss": 0.431, "step": 2992 }, { "epoch": 0.09, "grad_norm": 6.765893944026644, "learning_rate": 9.918858175461507e-06, "loss": 0.4727, "step": 2993 }, { "epoch": 0.09, "grad_norm": 4.446720376345663, "learning_rate": 9.918774944388418e-06, "loss": 0.4322, "step": 2994 }, { "epoch": 0.09, "grad_norm": 5.804745618649891, "learning_rate": 9.918691670999715e-06, "loss": 0.5081, "step": 2995 }, { "epoch": 0.09, "grad_norm": 12.671541898702626, "learning_rate": 9.918608355296114e-06, "loss": 0.9719, "step": 2996 }, { "epoch": 0.09, "grad_norm": 3.6210785076392558, "learning_rate": 9.918524997278329e-06, "loss": 0.1915, "step": 2997 }, { "epoch": 0.09, "grad_norm": 8.53795854669133, "learning_rate": 9.91844159694708e-06, "loss": 0.4008, "step": 2998 }, { "epoch": 0.09, "grad_norm": 6.2966223729953406, "learning_rate": 9.918358154303082e-06, "loss": 0.6701, "step": 2999 }, { "epoch": 0.09, "grad_norm": 11.542025403301686, "learning_rate": 9.918274669347056e-06, "loss": 0.698, "step": 3000 }, { "epoch": 0.09, "grad_norm": 6.554342449504262, "learning_rate": 9.918191142079719e-06, "loss": 0.3914, "step": 3001 }, { "epoch": 0.09, "grad_norm": 6.197792855530089, "learning_rate": 9.918107572501786e-06, "loss": 0.9864, "step": 3002 }, { "epoch": 0.09, "grad_norm": 7.535385832346874, "learning_rate": 9.918023960613982e-06, "loss": 0.7669, "step": 3003 }, { "epoch": 0.09, "grad_norm": 6.397602925816132, "learning_rate": 9.917940306417025e-06, "loss": 0.3451, "step": 3004 }, { "epoch": 0.09, "grad_norm": 5.261847887387324, "learning_rate": 9.917856609911629e-06, "loss": 0.8931, "step": 3005 }, { "epoch": 0.09, "grad_norm": 6.007901909092198, "learning_rate": 9.91777287109852e-06, "loss": 0.4329, "step": 3006 }, { "epoch": 0.09, "grad_norm": 5.190343697841328, "learning_rate": 9.917689089978418e-06, "loss": 0.6788, "step": 3007 }, { "epoch": 0.09, "grad_norm": 6.1280164492114935, "learning_rate": 9.91760526655204e-06, "loss": 0.4534, "step": 3008 }, { "epoch": 0.09, "grad_norm": 7.155260575705821, "learning_rate": 9.91752140082011e-06, "loss": 0.4381, "step": 3009 }, { "epoch": 0.09, "grad_norm": 8.106061784122438, "learning_rate": 9.917437492783349e-06, "loss": 0.7279, "step": 3010 }, { "epoch": 0.09, "grad_norm": 5.596212984993243, "learning_rate": 9.917353542442479e-06, "loss": 0.4402, "step": 3011 }, { "epoch": 0.09, "grad_norm": 9.835273992991345, "learning_rate": 9.917269549798221e-06, "loss": 1.4056, "step": 3012 }, { "epoch": 0.09, "grad_norm": 7.594172524836073, "learning_rate": 9.917185514851299e-06, "loss": 0.508, "step": 3013 }, { "epoch": 0.09, "grad_norm": 5.535099263459849, "learning_rate": 9.917101437602435e-06, "loss": 0.514, "step": 3014 }, { "epoch": 0.09, "grad_norm": 8.368559453274981, "learning_rate": 9.917017318052354e-06, "loss": 0.6768, "step": 3015 }, { "epoch": 0.09, "grad_norm": 11.737147099041408, "learning_rate": 9.916933156201776e-06, "loss": 0.6341, "step": 3016 }, { "epoch": 0.09, "grad_norm": 8.178166609684848, "learning_rate": 9.91684895205143e-06, "loss": 0.9113, "step": 3017 }, { "epoch": 0.09, "grad_norm": 2.2042216148690454, "learning_rate": 9.916764705602036e-06, "loss": 0.23, "step": 3018 }, { "epoch": 0.09, "grad_norm": 7.46792138263531, "learning_rate": 9.916680416854322e-06, "loss": 0.7279, "step": 3019 }, { "epoch": 0.09, "grad_norm": 8.66602927089302, "learning_rate": 9.916596085809011e-06, "loss": 0.5537, "step": 3020 }, { "epoch": 0.09, "grad_norm": 9.160709271685876, "learning_rate": 9.91651171246683e-06, "loss": 1.0097, "step": 3021 }, { "epoch": 0.09, "grad_norm": 9.279877840586376, "learning_rate": 9.916427296828503e-06, "loss": 0.6124, "step": 3022 }, { "epoch": 0.09, "grad_norm": 8.018866941174648, "learning_rate": 9.916342838894758e-06, "loss": 0.8079, "step": 3023 }, { "epoch": 0.09, "grad_norm": 6.253388058735422, "learning_rate": 9.91625833866632e-06, "loss": 0.5378, "step": 3024 }, { "epoch": 0.09, "grad_norm": 3.581970597047964, "learning_rate": 9.916173796143917e-06, "loss": 0.4529, "step": 3025 }, { "epoch": 0.09, "grad_norm": 10.415696905135697, "learning_rate": 9.916089211328278e-06, "loss": 0.4875, "step": 3026 }, { "epoch": 0.09, "grad_norm": 6.120677590601454, "learning_rate": 9.916004584220127e-06, "loss": 0.7273, "step": 3027 }, { "epoch": 0.09, "grad_norm": 6.737570940256421, "learning_rate": 9.915919914820194e-06, "loss": 0.5465, "step": 3028 }, { "epoch": 0.09, "grad_norm": 5.381164010454116, "learning_rate": 9.915835203129207e-06, "loss": 0.4299, "step": 3029 }, { "epoch": 0.09, "grad_norm": 6.56239536746944, "learning_rate": 9.915750449147896e-06, "loss": 0.5597, "step": 3030 }, { "epoch": 0.09, "grad_norm": 8.581651917982834, "learning_rate": 9.915665652876988e-06, "loss": 0.9073, "step": 3031 }, { "epoch": 0.09, "grad_norm": 7.07647726319692, "learning_rate": 9.915580814317215e-06, "loss": 1.0148, "step": 3032 }, { "epoch": 0.09, "grad_norm": 9.249917674342658, "learning_rate": 9.915495933469305e-06, "loss": 0.6569, "step": 3033 }, { "epoch": 0.09, "grad_norm": 13.285905807333496, "learning_rate": 9.915411010333988e-06, "loss": 0.521, "step": 3034 }, { "epoch": 0.09, "grad_norm": 8.437712151014859, "learning_rate": 9.915326044911995e-06, "loss": 0.889, "step": 3035 }, { "epoch": 0.09, "grad_norm": 19.57927045949553, "learning_rate": 9.915241037204059e-06, "loss": 0.5151, "step": 3036 }, { "epoch": 0.09, "grad_norm": 7.295889548958915, "learning_rate": 9.915155987210908e-06, "loss": 0.4174, "step": 3037 }, { "epoch": 0.09, "grad_norm": 6.150274329541186, "learning_rate": 9.915070894933276e-06, "loss": 0.2414, "step": 3038 }, { "epoch": 0.09, "grad_norm": 6.802485673658713, "learning_rate": 9.914985760371893e-06, "loss": 0.7656, "step": 3039 }, { "epoch": 0.09, "grad_norm": 7.417174446925396, "learning_rate": 9.914900583527493e-06, "loss": 0.5976, "step": 3040 }, { "epoch": 0.09, "grad_norm": 5.424938292965452, "learning_rate": 9.914815364400809e-06, "loss": 0.8745, "step": 3041 }, { "epoch": 0.09, "grad_norm": 6.850876324698965, "learning_rate": 9.914730102992575e-06, "loss": 0.4067, "step": 3042 }, { "epoch": 0.09, "grad_norm": 7.520578255030597, "learning_rate": 9.914644799303522e-06, "loss": 0.3353, "step": 3043 }, { "epoch": 0.09, "grad_norm": 6.383389058214318, "learning_rate": 9.914559453334384e-06, "loss": 0.3754, "step": 3044 }, { "epoch": 0.09, "grad_norm": 6.18421124226565, "learning_rate": 9.914474065085897e-06, "loss": 0.4788, "step": 3045 }, { "epoch": 0.09, "grad_norm": 4.6592792667452825, "learning_rate": 9.914388634558793e-06, "loss": 0.5605, "step": 3046 }, { "epoch": 0.09, "grad_norm": 22.321835400170933, "learning_rate": 9.91430316175381e-06, "loss": 1.1807, "step": 3047 }, { "epoch": 0.09, "grad_norm": 9.294913820767025, "learning_rate": 9.914217646671682e-06, "loss": 0.7054, "step": 3048 }, { "epoch": 0.09, "grad_norm": 6.406168123047658, "learning_rate": 9.914132089313144e-06, "loss": 0.8981, "step": 3049 }, { "epoch": 0.09, "grad_norm": 5.728716228001844, "learning_rate": 9.914046489678935e-06, "loss": 0.4931, "step": 3050 }, { "epoch": 0.09, "grad_norm": 11.438890049581133, "learning_rate": 9.913960847769785e-06, "loss": 0.6915, "step": 3051 }, { "epoch": 0.09, "grad_norm": 6.397677813135055, "learning_rate": 9.91387516358644e-06, "loss": 0.5129, "step": 3052 }, { "epoch": 0.09, "grad_norm": 8.624854819140152, "learning_rate": 9.913789437129628e-06, "loss": 0.5443, "step": 3053 }, { "epoch": 0.09, "grad_norm": 5.312571177286278, "learning_rate": 9.913703668400092e-06, "loss": 0.3005, "step": 3054 }, { "epoch": 0.09, "grad_norm": 5.225316741687948, "learning_rate": 9.913617857398568e-06, "loss": 0.6377, "step": 3055 }, { "epoch": 0.09, "grad_norm": 11.101238119821083, "learning_rate": 9.913532004125794e-06, "loss": 0.8377, "step": 3056 }, { "epoch": 0.09, "grad_norm": 6.377960415276807, "learning_rate": 9.91344610858251e-06, "loss": 0.4294, "step": 3057 }, { "epoch": 0.09, "grad_norm": 3.5368817412559386, "learning_rate": 9.913360170769453e-06, "loss": 0.5286, "step": 3058 }, { "epoch": 0.09, "grad_norm": 5.00054995373336, "learning_rate": 9.913274190687365e-06, "loss": 0.6626, "step": 3059 }, { "epoch": 0.09, "grad_norm": 5.437381019605966, "learning_rate": 9.913188168336982e-06, "loss": 0.5197, "step": 3060 }, { "epoch": 0.09, "grad_norm": 8.44970871011342, "learning_rate": 9.913102103719048e-06, "loss": 0.581, "step": 3061 }, { "epoch": 0.09, "grad_norm": 4.163971805121602, "learning_rate": 9.913015996834301e-06, "loss": 0.2729, "step": 3062 }, { "epoch": 0.09, "grad_norm": 7.853962849233104, "learning_rate": 9.912929847683481e-06, "loss": 0.7505, "step": 3063 }, { "epoch": 0.09, "grad_norm": 3.8613928951992142, "learning_rate": 9.912843656267333e-06, "loss": 0.3937, "step": 3064 }, { "epoch": 0.09, "grad_norm": 4.540532174576014, "learning_rate": 9.912757422586594e-06, "loss": 0.334, "step": 3065 }, { "epoch": 0.09, "grad_norm": 5.072251547175846, "learning_rate": 9.912671146642008e-06, "loss": 0.5003, "step": 3066 }, { "epoch": 0.09, "grad_norm": 10.822036418542616, "learning_rate": 9.912584828434318e-06, "loss": 0.5656, "step": 3067 }, { "epoch": 0.09, "grad_norm": 2.3287203878709906, "learning_rate": 9.912498467964265e-06, "loss": 0.3843, "step": 3068 }, { "epoch": 0.09, "grad_norm": 6.5279903399802715, "learning_rate": 9.91241206523259e-06, "loss": 0.6364, "step": 3069 }, { "epoch": 0.09, "grad_norm": 7.292068205177038, "learning_rate": 9.912325620240042e-06, "loss": 0.6567, "step": 3070 }, { "epoch": 0.09, "grad_norm": 8.126994079031935, "learning_rate": 9.91223913298736e-06, "loss": 0.7729, "step": 3071 }, { "epoch": 0.09, "grad_norm": 7.877490482403264, "learning_rate": 9.912152603475291e-06, "loss": 0.5007, "step": 3072 }, { "epoch": 0.09, "grad_norm": 10.472479540235067, "learning_rate": 9.912066031704577e-06, "loss": 0.6897, "step": 3073 }, { "epoch": 0.09, "grad_norm": 4.9869272997941945, "learning_rate": 9.911979417675965e-06, "loss": 0.5983, "step": 3074 }, { "epoch": 0.09, "grad_norm": 5.32986126574234, "learning_rate": 9.911892761390197e-06, "loss": 0.7631, "step": 3075 }, { "epoch": 0.09, "grad_norm": 10.358878053278987, "learning_rate": 9.911806062848022e-06, "loss": 0.8147, "step": 3076 }, { "epoch": 0.09, "grad_norm": 9.183222598904232, "learning_rate": 9.911719322050182e-06, "loss": 0.3798, "step": 3077 }, { "epoch": 0.09, "grad_norm": 9.285317887763721, "learning_rate": 9.911632538997426e-06, "loss": 0.511, "step": 3078 }, { "epoch": 0.09, "grad_norm": 12.325367898460373, "learning_rate": 9.911545713690503e-06, "loss": 0.7153, "step": 3079 }, { "epoch": 0.09, "grad_norm": 9.074914886663969, "learning_rate": 9.911458846130152e-06, "loss": 0.6065, "step": 3080 }, { "epoch": 0.09, "grad_norm": 10.306893599143367, "learning_rate": 9.911371936317129e-06, "loss": 0.4547, "step": 3081 }, { "epoch": 0.09, "grad_norm": 6.934629447844958, "learning_rate": 9.911284984252177e-06, "loss": 0.419, "step": 3082 }, { "epoch": 0.09, "grad_norm": 4.5491114618646415, "learning_rate": 9.911197989936046e-06, "loss": 0.4823, "step": 3083 }, { "epoch": 0.09, "grad_norm": 6.132352542029652, "learning_rate": 9.91111095336948e-06, "loss": 0.2698, "step": 3084 }, { "epoch": 0.09, "grad_norm": 9.077180114001346, "learning_rate": 9.911023874553234e-06, "loss": 0.5871, "step": 3085 }, { "epoch": 0.09, "grad_norm": 6.501503110365464, "learning_rate": 9.910936753488054e-06, "loss": 0.4742, "step": 3086 }, { "epoch": 0.09, "grad_norm": 3.9271438131894727, "learning_rate": 9.91084959017469e-06, "loss": 0.1781, "step": 3087 }, { "epoch": 0.09, "grad_norm": 9.232753759801879, "learning_rate": 9.91076238461389e-06, "loss": 0.8718, "step": 3088 }, { "epoch": 0.09, "grad_norm": 9.797279605085143, "learning_rate": 9.910675136806407e-06, "loss": 0.2925, "step": 3089 }, { "epoch": 0.09, "grad_norm": 4.861817950978024, "learning_rate": 9.91058784675299e-06, "loss": 0.4059, "step": 3090 }, { "epoch": 0.09, "grad_norm": 3.49324937758375, "learning_rate": 9.910500514454391e-06, "loss": 0.3123, "step": 3091 }, { "epoch": 0.09, "grad_norm": 4.894257046872696, "learning_rate": 9.91041313991136e-06, "loss": 0.4513, "step": 3092 }, { "epoch": 0.09, "grad_norm": 7.684991008428154, "learning_rate": 9.91032572312465e-06, "loss": 0.6611, "step": 3093 }, { "epoch": 0.09, "grad_norm": 6.880773894065703, "learning_rate": 9.910238264095011e-06, "loss": 0.4125, "step": 3094 }, { "epoch": 0.09, "grad_norm": 7.684606053934229, "learning_rate": 9.910150762823199e-06, "loss": 0.4731, "step": 3095 }, { "epoch": 0.09, "grad_norm": 8.071491196000013, "learning_rate": 9.910063219309961e-06, "loss": 0.6139, "step": 3096 }, { "epoch": 0.09, "grad_norm": 7.813515436938508, "learning_rate": 9.909975633556057e-06, "loss": 1.044, "step": 3097 }, { "epoch": 0.09, "grad_norm": 9.501193925220065, "learning_rate": 9.909888005562235e-06, "loss": 0.6573, "step": 3098 }, { "epoch": 0.09, "grad_norm": 6.259040931018079, "learning_rate": 9.909800335329254e-06, "loss": 0.4262, "step": 3099 }, { "epoch": 0.09, "grad_norm": 9.290188168528397, "learning_rate": 9.909712622857864e-06, "loss": 0.6803, "step": 3100 }, { "epoch": 0.09, "grad_norm": 11.437637411657482, "learning_rate": 9.90962486814882e-06, "loss": 0.9315, "step": 3101 }, { "epoch": 0.09, "grad_norm": 5.271581614255752, "learning_rate": 9.909537071202877e-06, "loss": 0.6991, "step": 3102 }, { "epoch": 0.09, "grad_norm": 8.07943694272645, "learning_rate": 9.90944923202079e-06, "loss": 0.8246, "step": 3103 }, { "epoch": 0.09, "grad_norm": 5.589801353651023, "learning_rate": 9.909361350603319e-06, "loss": 0.3008, "step": 3104 }, { "epoch": 0.09, "grad_norm": 8.126844284843308, "learning_rate": 9.909273426951216e-06, "loss": 0.4447, "step": 3105 }, { "epoch": 0.09, "grad_norm": 7.941339720500344, "learning_rate": 9.909185461065238e-06, "loss": 0.5551, "step": 3106 }, { "epoch": 0.09, "grad_norm": 8.240876875533566, "learning_rate": 9.909097452946142e-06, "loss": 0.7578, "step": 3107 }, { "epoch": 0.09, "grad_norm": 8.38308240415109, "learning_rate": 9.909009402594684e-06, "loss": 0.7355, "step": 3108 }, { "epoch": 0.09, "grad_norm": 11.254791002540395, "learning_rate": 9.908921310011623e-06, "loss": 0.5824, "step": 3109 }, { "epoch": 0.09, "grad_norm": 10.830802113142983, "learning_rate": 9.908833175197716e-06, "loss": 0.7738, "step": 3110 }, { "epoch": 0.09, "grad_norm": 11.209252547867978, "learning_rate": 9.908744998153723e-06, "loss": 0.5413, "step": 3111 }, { "epoch": 0.09, "grad_norm": 4.948910626075975, "learning_rate": 9.908656778880399e-06, "loss": 0.3806, "step": 3112 }, { "epoch": 0.09, "grad_norm": 4.55764240186806, "learning_rate": 9.908568517378508e-06, "loss": 0.331, "step": 3113 }, { "epoch": 0.09, "grad_norm": 10.93901034145074, "learning_rate": 9.908480213648803e-06, "loss": 0.572, "step": 3114 }, { "epoch": 0.09, "grad_norm": 6.699458224184185, "learning_rate": 9.908391867692049e-06, "loss": 0.5176, "step": 3115 }, { "epoch": 0.09, "grad_norm": 8.355468179311282, "learning_rate": 9.908303479509005e-06, "loss": 0.8945, "step": 3116 }, { "epoch": 0.09, "grad_norm": 4.022958878872369, "learning_rate": 9.908215049100429e-06, "loss": 0.123, "step": 3117 }, { "epoch": 0.09, "grad_norm": 7.060465359250221, "learning_rate": 9.908126576467084e-06, "loss": 0.6366, "step": 3118 }, { "epoch": 0.09, "grad_norm": 7.6783995067322035, "learning_rate": 9.908038061609729e-06, "loss": 0.568, "step": 3119 }, { "epoch": 0.09, "grad_norm": 9.997250178869988, "learning_rate": 9.907949504529128e-06, "loss": 0.4408, "step": 3120 }, { "epoch": 0.09, "grad_norm": 6.767209860730183, "learning_rate": 9.907860905226041e-06, "loss": 0.6912, "step": 3121 }, { "epoch": 0.09, "grad_norm": 11.24790460357801, "learning_rate": 9.907772263701232e-06, "loss": 0.6397, "step": 3122 }, { "epoch": 0.09, "grad_norm": 5.011311229808461, "learning_rate": 9.907683579955461e-06, "loss": 0.5822, "step": 3123 }, { "epoch": 0.09, "grad_norm": 5.187108358283539, "learning_rate": 9.907594853989493e-06, "loss": 0.7571, "step": 3124 }, { "epoch": 0.09, "grad_norm": 8.25528004772593, "learning_rate": 9.907506085804093e-06, "loss": 0.6191, "step": 3125 }, { "epoch": 0.09, "grad_norm": 10.826892248663055, "learning_rate": 9.90741727540002e-06, "loss": 0.5032, "step": 3126 }, { "epoch": 0.09, "grad_norm": 10.094354900741383, "learning_rate": 9.90732842277804e-06, "loss": 0.5286, "step": 3127 }, { "epoch": 0.09, "grad_norm": 13.130448581909082, "learning_rate": 9.90723952793892e-06, "loss": 0.5506, "step": 3128 }, { "epoch": 0.09, "grad_norm": 9.065821387587592, "learning_rate": 9.90715059088342e-06, "loss": 0.5122, "step": 3129 }, { "epoch": 0.09, "grad_norm": 6.427975951726019, "learning_rate": 9.907061611612308e-06, "loss": 0.5405, "step": 3130 }, { "epoch": 0.09, "grad_norm": 7.842181166642317, "learning_rate": 9.906972590126352e-06, "loss": 0.8943, "step": 3131 }, { "epoch": 0.09, "grad_norm": 6.858145983869079, "learning_rate": 9.906883526426311e-06, "loss": 0.468, "step": 3132 }, { "epoch": 0.09, "grad_norm": 10.88200354598944, "learning_rate": 9.906794420512957e-06, "loss": 0.3788, "step": 3133 }, { "epoch": 0.09, "grad_norm": 4.854324512889557, "learning_rate": 9.906705272387056e-06, "loss": 0.327, "step": 3134 }, { "epoch": 0.09, "grad_norm": 10.235400199501607, "learning_rate": 9.906616082049373e-06, "loss": 0.6473, "step": 3135 }, { "epoch": 0.09, "grad_norm": 3.6802076227248786, "learning_rate": 9.906526849500675e-06, "loss": 0.3431, "step": 3136 }, { "epoch": 0.09, "grad_norm": 8.25530193929621, "learning_rate": 9.90643757474173e-06, "loss": 0.592, "step": 3137 }, { "epoch": 0.09, "grad_norm": 6.85537397965789, "learning_rate": 9.906348257773308e-06, "loss": 0.6126, "step": 3138 }, { "epoch": 0.09, "grad_norm": 7.0638342078162575, "learning_rate": 9.906258898596175e-06, "loss": 0.4177, "step": 3139 }, { "epoch": 0.09, "grad_norm": 11.146657201437453, "learning_rate": 9.906169497211102e-06, "loss": 0.6281, "step": 3140 }, { "epoch": 0.09, "grad_norm": 5.401340747327825, "learning_rate": 9.906080053618856e-06, "loss": 0.5705, "step": 3141 }, { "epoch": 0.09, "grad_norm": 7.585901419399585, "learning_rate": 9.905990567820207e-06, "loss": 0.2565, "step": 3142 }, { "epoch": 0.09, "grad_norm": 7.194854158430453, "learning_rate": 9.905901039815926e-06, "loss": 0.9427, "step": 3143 }, { "epoch": 0.09, "grad_norm": 8.549647688717723, "learning_rate": 9.905811469606783e-06, "loss": 0.6291, "step": 3144 }, { "epoch": 0.09, "grad_norm": 24.2682225985695, "learning_rate": 9.905721857193546e-06, "loss": 0.3023, "step": 3145 }, { "epoch": 0.09, "grad_norm": 5.219444959589835, "learning_rate": 9.90563220257699e-06, "loss": 0.5278, "step": 3146 }, { "epoch": 0.09, "grad_norm": 6.25462181863026, "learning_rate": 9.905542505757884e-06, "loss": 0.415, "step": 3147 }, { "epoch": 0.09, "grad_norm": 11.63811733194177, "learning_rate": 9.905452766736998e-06, "loss": 0.8074, "step": 3148 }, { "epoch": 0.09, "grad_norm": 4.915620590178882, "learning_rate": 9.905362985515109e-06, "loss": 0.436, "step": 3149 }, { "epoch": 0.09, "grad_norm": 10.558663659070728, "learning_rate": 9.905273162092983e-06, "loss": 0.5324, "step": 3150 }, { "epoch": 0.09, "grad_norm": 11.394600047733256, "learning_rate": 9.905183296471398e-06, "loss": 0.3067, "step": 3151 }, { "epoch": 0.09, "grad_norm": 11.708821314804974, "learning_rate": 9.905093388651122e-06, "loss": 0.5446, "step": 3152 }, { "epoch": 0.09, "grad_norm": 3.92349609367708, "learning_rate": 9.905003438632934e-06, "loss": 0.7544, "step": 3153 }, { "epoch": 0.09, "grad_norm": 7.982580770913637, "learning_rate": 9.904913446417605e-06, "loss": 0.6208, "step": 3154 }, { "epoch": 0.09, "grad_norm": 4.689610679995252, "learning_rate": 9.904823412005909e-06, "loss": 0.6491, "step": 3155 }, { "epoch": 0.09, "grad_norm": 8.10628519749114, "learning_rate": 9.904733335398622e-06, "loss": 0.6507, "step": 3156 }, { "epoch": 0.09, "grad_norm": 7.548388343229314, "learning_rate": 9.904643216596517e-06, "loss": 0.7544, "step": 3157 }, { "epoch": 0.09, "grad_norm": 10.456945887072202, "learning_rate": 9.904553055600372e-06, "loss": 0.9717, "step": 3158 }, { "epoch": 0.09, "grad_norm": 5.391256101955514, "learning_rate": 9.904462852410958e-06, "loss": 0.6863, "step": 3159 }, { "epoch": 0.09, "grad_norm": 10.614777977194198, "learning_rate": 9.904372607029056e-06, "loss": 0.6735, "step": 3160 }, { "epoch": 0.09, "grad_norm": 6.447594792267143, "learning_rate": 9.90428231945544e-06, "loss": 0.2938, "step": 3161 }, { "epoch": 0.09, "grad_norm": 3.8809553344719783, "learning_rate": 9.904191989690888e-06, "loss": 0.584, "step": 3162 }, { "epoch": 0.09, "grad_norm": 5.817452484579428, "learning_rate": 9.904101617736175e-06, "loss": 0.4476, "step": 3163 }, { "epoch": 0.09, "grad_norm": 7.883066239446107, "learning_rate": 9.904011203592079e-06, "loss": 0.587, "step": 3164 }, { "epoch": 0.09, "grad_norm": 11.155403767608943, "learning_rate": 9.90392074725938e-06, "loss": 0.8342, "step": 3165 }, { "epoch": 0.09, "grad_norm": 3.499608750273543, "learning_rate": 9.903830248738852e-06, "loss": 0.199, "step": 3166 }, { "epoch": 0.09, "grad_norm": 5.091591875158608, "learning_rate": 9.90373970803128e-06, "loss": 0.382, "step": 3167 }, { "epoch": 0.09, "grad_norm": 7.637748442010101, "learning_rate": 9.903649125137436e-06, "loss": 0.7037, "step": 3168 }, { "epoch": 0.09, "grad_norm": 7.701654444722451, "learning_rate": 9.903558500058104e-06, "loss": 0.9011, "step": 3169 }, { "epoch": 0.09, "grad_norm": 3.7717983247537714, "learning_rate": 9.90346783279406e-06, "loss": 0.5151, "step": 3170 }, { "epoch": 0.09, "grad_norm": 4.646970167897897, "learning_rate": 9.903377123346088e-06, "loss": 0.5738, "step": 3171 }, { "epoch": 0.09, "grad_norm": 6.5155473151024035, "learning_rate": 9.903286371714966e-06, "loss": 0.3538, "step": 3172 }, { "epoch": 0.09, "grad_norm": 6.343455641358026, "learning_rate": 9.903195577901474e-06, "loss": 1.0657, "step": 3173 }, { "epoch": 0.09, "grad_norm": 6.359484172511925, "learning_rate": 9.903104741906395e-06, "loss": 0.3661, "step": 3174 }, { "epoch": 0.09, "grad_norm": 6.690254820516634, "learning_rate": 9.903013863730508e-06, "loss": 0.4682, "step": 3175 }, { "epoch": 0.09, "grad_norm": 8.259198003078309, "learning_rate": 9.902922943374598e-06, "loss": 0.8296, "step": 3176 }, { "epoch": 0.09, "grad_norm": 2.7885417505510555, "learning_rate": 9.902831980839448e-06, "loss": 0.3529, "step": 3177 }, { "epoch": 0.09, "grad_norm": 8.540865143332, "learning_rate": 9.902740976125832e-06, "loss": 0.3148, "step": 3178 }, { "epoch": 0.09, "grad_norm": 4.78066534319829, "learning_rate": 9.902649929234545e-06, "loss": 0.7716, "step": 3179 }, { "epoch": 0.09, "grad_norm": 7.617709004082936, "learning_rate": 9.90255884016636e-06, "loss": 0.392, "step": 3180 }, { "epoch": 0.09, "grad_norm": 7.71286867766012, "learning_rate": 9.902467708922067e-06, "loss": 0.5963, "step": 3181 }, { "epoch": 0.09, "grad_norm": 9.307030985632158, "learning_rate": 9.902376535502448e-06, "loss": 1.0228, "step": 3182 }, { "epoch": 0.09, "grad_norm": 7.353631112659695, "learning_rate": 9.902285319908287e-06, "loss": 0.5635, "step": 3183 }, { "epoch": 0.09, "grad_norm": 4.284092314650703, "learning_rate": 9.902194062140368e-06, "loss": 0.5105, "step": 3184 }, { "epoch": 0.09, "grad_norm": 1.9854774866732214, "learning_rate": 9.902102762199479e-06, "loss": 0.3393, "step": 3185 }, { "epoch": 0.09, "grad_norm": 10.80338992852493, "learning_rate": 9.902011420086401e-06, "loss": 0.5527, "step": 3186 }, { "epoch": 0.09, "grad_norm": 3.866734549458174, "learning_rate": 9.901920035801923e-06, "loss": 0.496, "step": 3187 }, { "epoch": 0.09, "grad_norm": 6.801250163328776, "learning_rate": 9.901828609346833e-06, "loss": 0.5691, "step": 3188 }, { "epoch": 0.09, "grad_norm": 10.542170898279688, "learning_rate": 9.901737140721912e-06, "loss": 0.8057, "step": 3189 }, { "epoch": 0.09, "grad_norm": 4.746144587240591, "learning_rate": 9.901645629927952e-06, "loss": 0.2428, "step": 3190 }, { "epoch": 0.09, "grad_norm": 1.4526643945958642, "learning_rate": 9.901554076965736e-06, "loss": 0.2708, "step": 3191 }, { "epoch": 0.09, "grad_norm": 4.12592903425843, "learning_rate": 9.901462481836057e-06, "loss": 0.5152, "step": 3192 }, { "epoch": 0.09, "grad_norm": 6.0853949759189385, "learning_rate": 9.901370844539697e-06, "loss": 0.351, "step": 3193 }, { "epoch": 0.09, "grad_norm": 11.169762386297993, "learning_rate": 9.901279165077447e-06, "loss": 0.6955, "step": 3194 }, { "epoch": 0.09, "grad_norm": 4.992668642071607, "learning_rate": 9.901187443450097e-06, "loss": 0.3742, "step": 3195 }, { "epoch": 0.09, "grad_norm": 7.5684248989099085, "learning_rate": 9.901095679658434e-06, "loss": 0.321, "step": 3196 }, { "epoch": 0.09, "grad_norm": 3.506320355639349, "learning_rate": 9.90100387370325e-06, "loss": 0.268, "step": 3197 }, { "epoch": 0.09, "grad_norm": 5.427832504763579, "learning_rate": 9.900912025585331e-06, "loss": 0.6454, "step": 3198 }, { "epoch": 0.09, "grad_norm": 3.2755490050355673, "learning_rate": 9.900820135305471e-06, "loss": 0.144, "step": 3199 }, { "epoch": 0.09, "grad_norm": 4.607254575090901, "learning_rate": 9.900728202864458e-06, "loss": 0.4486, "step": 3200 }, { "epoch": 0.09, "grad_norm": 7.4818804890321955, "learning_rate": 9.900636228263084e-06, "loss": 0.5191, "step": 3201 }, { "epoch": 0.09, "grad_norm": 10.390484939434314, "learning_rate": 9.90054421150214e-06, "loss": 0.8464, "step": 3202 }, { "epoch": 0.09, "grad_norm": 6.164736364007274, "learning_rate": 9.900452152582416e-06, "loss": 0.5646, "step": 3203 }, { "epoch": 0.09, "grad_norm": 5.7012197159906055, "learning_rate": 9.900360051504706e-06, "loss": 0.3661, "step": 3204 }, { "epoch": 0.09, "grad_norm": 10.008922601695375, "learning_rate": 9.900267908269803e-06, "loss": 0.7635, "step": 3205 }, { "epoch": 0.09, "grad_norm": 6.750962365388117, "learning_rate": 9.900175722878497e-06, "loss": 0.3778, "step": 3206 }, { "epoch": 0.09, "grad_norm": 48.55927384705567, "learning_rate": 9.900083495331583e-06, "loss": 0.9462, "step": 3207 }, { "epoch": 0.09, "grad_norm": 3.3097932300926907, "learning_rate": 9.899991225629855e-06, "loss": 0.1839, "step": 3208 }, { "epoch": 0.09, "grad_norm": 7.922669982145924, "learning_rate": 9.899898913774105e-06, "loss": 0.5238, "step": 3209 }, { "epoch": 0.09, "grad_norm": 6.976295115781761, "learning_rate": 9.899806559765128e-06, "loss": 0.5396, "step": 3210 }, { "epoch": 0.09, "grad_norm": 6.8941718075345495, "learning_rate": 9.89971416360372e-06, "loss": 0.3636, "step": 3211 }, { "epoch": 0.09, "grad_norm": 6.257675417042741, "learning_rate": 9.899621725290672e-06, "loss": 0.401, "step": 3212 }, { "epoch": 0.09, "grad_norm": 5.251479031354324, "learning_rate": 9.899529244826783e-06, "loss": 0.5222, "step": 3213 }, { "epoch": 0.09, "grad_norm": 3.6541435172820473, "learning_rate": 9.899436722212846e-06, "loss": 0.3498, "step": 3214 }, { "epoch": 0.09, "grad_norm": 7.132386043372294, "learning_rate": 9.89934415744966e-06, "loss": 0.6255, "step": 3215 }, { "epoch": 0.09, "grad_norm": 7.078595884714473, "learning_rate": 9.899251550538017e-06, "loss": 0.2784, "step": 3216 }, { "epoch": 0.09, "grad_norm": 13.04101321174813, "learning_rate": 9.899158901478718e-06, "loss": 0.5124, "step": 3217 }, { "epoch": 0.09, "grad_norm": 15.391526063209046, "learning_rate": 9.899066210272556e-06, "loss": 0.8671, "step": 3218 }, { "epoch": 0.09, "grad_norm": 8.751494579736104, "learning_rate": 9.898973476920333e-06, "loss": 0.4828, "step": 3219 }, { "epoch": 0.09, "grad_norm": 8.76660373819874, "learning_rate": 9.898880701422843e-06, "loss": 0.8724, "step": 3220 }, { "epoch": 0.09, "grad_norm": 6.238947479846302, "learning_rate": 9.898787883780887e-06, "loss": 0.5855, "step": 3221 }, { "epoch": 0.09, "grad_norm": 8.301803188471451, "learning_rate": 9.898695023995261e-06, "loss": 0.5649, "step": 3222 }, { "epoch": 0.09, "grad_norm": 3.957316713119147, "learning_rate": 9.898602122066764e-06, "loss": 0.3564, "step": 3223 }, { "epoch": 0.09, "grad_norm": 12.533468226119476, "learning_rate": 9.898509177996198e-06, "loss": 0.1775, "step": 3224 }, { "epoch": 0.09, "grad_norm": 6.013996448783473, "learning_rate": 9.898416191784358e-06, "loss": 0.5565, "step": 3225 }, { "epoch": 0.09, "grad_norm": 10.889298871381863, "learning_rate": 9.898323163432049e-06, "loss": 0.6772, "step": 3226 }, { "epoch": 0.09, "grad_norm": 6.865506188775806, "learning_rate": 9.898230092940068e-06, "loss": 0.5856, "step": 3227 }, { "epoch": 0.09, "grad_norm": 8.202204073119889, "learning_rate": 9.898136980309217e-06, "loss": 0.7078, "step": 3228 }, { "epoch": 0.09, "grad_norm": 12.566248775654012, "learning_rate": 9.898043825540296e-06, "loss": 0.6684, "step": 3229 }, { "epoch": 0.09, "grad_norm": 5.075946046943798, "learning_rate": 9.897950628634108e-06, "loss": 0.3345, "step": 3230 }, { "epoch": 0.09, "grad_norm": 6.884884404449669, "learning_rate": 9.897857389591452e-06, "loss": 0.3592, "step": 3231 }, { "epoch": 0.09, "grad_norm": 5.496609509743265, "learning_rate": 9.897764108413133e-06, "loss": 0.4507, "step": 3232 }, { "epoch": 0.09, "grad_norm": 7.9234234201362215, "learning_rate": 9.897670785099953e-06, "loss": 0.8153, "step": 3233 }, { "epoch": 0.09, "grad_norm": 5.91109901266047, "learning_rate": 9.897577419652713e-06, "loss": 0.7301, "step": 3234 }, { "epoch": 0.09, "grad_norm": 9.439086780665262, "learning_rate": 9.897484012072218e-06, "loss": 0.6871, "step": 3235 }, { "epoch": 0.09, "grad_norm": 6.236869102786409, "learning_rate": 9.897390562359272e-06, "loss": 0.5361, "step": 3236 }, { "epoch": 0.09, "grad_norm": 4.0621112344031225, "learning_rate": 9.897297070514676e-06, "loss": 0.4155, "step": 3237 }, { "epoch": 0.09, "grad_norm": 5.802239786318155, "learning_rate": 9.897203536539238e-06, "loss": 0.3303, "step": 3238 }, { "epoch": 0.09, "grad_norm": 8.31927088275348, "learning_rate": 9.89710996043376e-06, "loss": 0.5698, "step": 3239 }, { "epoch": 0.09, "grad_norm": 6.240589152057898, "learning_rate": 9.897016342199048e-06, "loss": 0.7501, "step": 3240 }, { "epoch": 0.09, "grad_norm": 2.7578599239518757, "learning_rate": 9.896922681835907e-06, "loss": 0.2623, "step": 3241 }, { "epoch": 0.09, "grad_norm": 4.863262081491467, "learning_rate": 9.896828979345145e-06, "loss": 0.6131, "step": 3242 }, { "epoch": 0.09, "grad_norm": 5.838240693700715, "learning_rate": 9.896735234727565e-06, "loss": 0.5719, "step": 3243 }, { "epoch": 0.09, "grad_norm": 13.730977164306944, "learning_rate": 9.896641447983974e-06, "loss": 0.7437, "step": 3244 }, { "epoch": 0.09, "grad_norm": 5.621629849782779, "learning_rate": 9.896547619115181e-06, "loss": 0.7202, "step": 3245 }, { "epoch": 0.09, "grad_norm": 5.0778930141571825, "learning_rate": 9.89645374812199e-06, "loss": 0.5162, "step": 3246 }, { "epoch": 0.09, "grad_norm": 11.482590641832546, "learning_rate": 9.896359835005212e-06, "loss": 0.7703, "step": 3247 }, { "epoch": 0.09, "grad_norm": 12.401387297027261, "learning_rate": 9.896265879765653e-06, "loss": 0.5226, "step": 3248 }, { "epoch": 0.09, "grad_norm": 5.294888644393833, "learning_rate": 9.896171882404119e-06, "loss": 0.5, "step": 3249 }, { "epoch": 0.09, "grad_norm": 4.786995844776843, "learning_rate": 9.896077842921423e-06, "loss": 0.5696, "step": 3250 }, { "epoch": 0.09, "grad_norm": 6.1711044796296095, "learning_rate": 9.895983761318373e-06, "loss": 0.6841, "step": 3251 }, { "epoch": 0.09, "grad_norm": 5.40425028779161, "learning_rate": 9.895889637595777e-06, "loss": 0.4424, "step": 3252 }, { "epoch": 0.09, "grad_norm": 8.654420735121331, "learning_rate": 9.895795471754445e-06, "loss": 0.4673, "step": 3253 }, { "epoch": 0.09, "grad_norm": 7.733802111437618, "learning_rate": 9.895701263795186e-06, "loss": 0.3959, "step": 3254 }, { "epoch": 0.09, "grad_norm": 3.587885391987414, "learning_rate": 9.895607013718813e-06, "loss": 0.1939, "step": 3255 }, { "epoch": 0.09, "grad_norm": 4.306856581621281, "learning_rate": 9.895512721526137e-06, "loss": 0.4992, "step": 3256 }, { "epoch": 0.09, "grad_norm": 7.18680184332144, "learning_rate": 9.895418387217965e-06, "loss": 0.5343, "step": 3257 }, { "epoch": 0.09, "grad_norm": 6.981607934421381, "learning_rate": 9.895324010795114e-06, "loss": 0.46, "step": 3258 }, { "epoch": 0.09, "grad_norm": 5.323334136935758, "learning_rate": 9.89522959225839e-06, "loss": 0.3676, "step": 3259 }, { "epoch": 0.09, "grad_norm": 6.565031961913796, "learning_rate": 9.895135131608612e-06, "loss": 0.646, "step": 3260 }, { "epoch": 0.09, "grad_norm": 8.645211132288823, "learning_rate": 9.895040628846588e-06, "loss": 0.5621, "step": 3261 }, { "epoch": 0.09, "grad_norm": 4.140472783583692, "learning_rate": 9.894946083973132e-06, "loss": 0.2121, "step": 3262 }, { "epoch": 0.09, "grad_norm": 4.898809148144709, "learning_rate": 9.894851496989057e-06, "loss": 0.6287, "step": 3263 }, { "epoch": 0.09, "grad_norm": 5.181882413362613, "learning_rate": 9.894756867895177e-06, "loss": 0.6565, "step": 3264 }, { "epoch": 0.09, "grad_norm": 9.177036425282639, "learning_rate": 9.894662196692309e-06, "loss": 0.4579, "step": 3265 }, { "epoch": 0.09, "grad_norm": 7.022965259397093, "learning_rate": 9.89456748338126e-06, "loss": 0.8158, "step": 3266 }, { "epoch": 0.09, "grad_norm": 6.8473591106247635, "learning_rate": 9.894472727962854e-06, "loss": 0.5813, "step": 3267 }, { "epoch": 0.09, "grad_norm": 4.683359631818647, "learning_rate": 9.8943779304379e-06, "loss": 0.4186, "step": 3268 }, { "epoch": 0.09, "grad_norm": 6.125715603189032, "learning_rate": 9.894283090807215e-06, "loss": 0.5233, "step": 3269 }, { "epoch": 0.09, "grad_norm": 4.509042608391853, "learning_rate": 9.894188209071616e-06, "loss": 0.4048, "step": 3270 }, { "epoch": 0.09, "grad_norm": 8.281457977562336, "learning_rate": 9.894093285231918e-06, "loss": 0.7366, "step": 3271 }, { "epoch": 0.09, "grad_norm": 12.20148436474534, "learning_rate": 9.893998319288937e-06, "loss": 0.7036, "step": 3272 }, { "epoch": 0.09, "grad_norm": 8.191766457194015, "learning_rate": 9.893903311243493e-06, "loss": 0.8854, "step": 3273 }, { "epoch": 0.09, "grad_norm": 5.72350513815064, "learning_rate": 9.8938082610964e-06, "loss": 0.4201, "step": 3274 }, { "epoch": 0.09, "grad_norm": 7.268251793303489, "learning_rate": 9.893713168848478e-06, "loss": 0.3034, "step": 3275 }, { "epoch": 0.09, "grad_norm": 9.813662903510606, "learning_rate": 9.893618034500544e-06, "loss": 0.3816, "step": 3276 }, { "epoch": 0.09, "grad_norm": 7.53753488510435, "learning_rate": 9.893522858053415e-06, "loss": 0.5198, "step": 3277 }, { "epoch": 0.09, "grad_norm": 6.893867300770027, "learning_rate": 9.893427639507913e-06, "loss": 0.4729, "step": 3278 }, { "epoch": 0.09, "grad_norm": 7.376496114757349, "learning_rate": 9.893332378864856e-06, "loss": 0.2709, "step": 3279 }, { "epoch": 0.09, "grad_norm": 4.984770134358144, "learning_rate": 9.89323707612506e-06, "loss": 0.1867, "step": 3280 }, { "epoch": 0.09, "grad_norm": 18.219686493924357, "learning_rate": 9.89314173128935e-06, "loss": 0.6509, "step": 3281 }, { "epoch": 0.09, "grad_norm": 9.424933700935451, "learning_rate": 9.893046344358545e-06, "loss": 0.5566, "step": 3282 }, { "epoch": 0.09, "grad_norm": 3.7279005867243997, "learning_rate": 9.892950915333464e-06, "loss": 0.3443, "step": 3283 }, { "epoch": 0.09, "grad_norm": 9.400874835759293, "learning_rate": 9.892855444214928e-06, "loss": 0.5118, "step": 3284 }, { "epoch": 0.09, "grad_norm": 5.5743910315768845, "learning_rate": 9.892759931003759e-06, "loss": 0.8829, "step": 3285 }, { "epoch": 0.09, "grad_norm": 8.033088918848403, "learning_rate": 9.89266437570078e-06, "loss": 0.3496, "step": 3286 }, { "epoch": 0.09, "grad_norm": 6.4856634491523995, "learning_rate": 9.892568778306811e-06, "loss": 0.565, "step": 3287 }, { "epoch": 0.09, "grad_norm": 6.821614913421019, "learning_rate": 9.892473138822676e-06, "loss": 0.5481, "step": 3288 }, { "epoch": 0.09, "grad_norm": 8.793370634679027, "learning_rate": 9.892377457249196e-06, "loss": 0.6474, "step": 3289 }, { "epoch": 0.09, "grad_norm": 7.725150323534793, "learning_rate": 9.892281733587195e-06, "loss": 0.553, "step": 3290 }, { "epoch": 0.09, "grad_norm": 8.51420942667344, "learning_rate": 9.892185967837498e-06, "loss": 0.3991, "step": 3291 }, { "epoch": 0.09, "grad_norm": 8.189326424759228, "learning_rate": 9.892090160000926e-06, "loss": 0.8893, "step": 3292 }, { "epoch": 0.09, "grad_norm": 5.484174588880781, "learning_rate": 9.891994310078304e-06, "loss": 0.3241, "step": 3293 }, { "epoch": 0.09, "grad_norm": 9.345999354467976, "learning_rate": 9.891898418070459e-06, "loss": 0.6682, "step": 3294 }, { "epoch": 0.09, "grad_norm": 5.391081815520154, "learning_rate": 9.891802483978213e-06, "loss": 0.5148, "step": 3295 }, { "epoch": 0.09, "grad_norm": 8.695054974988164, "learning_rate": 9.891706507802395e-06, "loss": 0.4514, "step": 3296 }, { "epoch": 0.09, "grad_norm": 6.9771948311038035, "learning_rate": 9.891610489543826e-06, "loss": 0.693, "step": 3297 }, { "epoch": 0.09, "grad_norm": 5.713371999951759, "learning_rate": 9.891514429203334e-06, "loss": 0.7354, "step": 3298 }, { "epoch": 0.09, "grad_norm": 7.6121775892412336, "learning_rate": 9.891418326781746e-06, "loss": 0.4409, "step": 3299 }, { "epoch": 0.09, "grad_norm": 12.613150597418715, "learning_rate": 9.891322182279887e-06, "loss": 0.5353, "step": 3300 }, { "epoch": 0.09, "grad_norm": 5.534542073894858, "learning_rate": 9.891225995698588e-06, "loss": 0.7221, "step": 3301 }, { "epoch": 0.09, "grad_norm": 13.720978992444687, "learning_rate": 9.891129767038675e-06, "loss": 0.456, "step": 3302 }, { "epoch": 0.09, "grad_norm": 6.724403734965675, "learning_rate": 9.89103349630097e-06, "loss": 0.3478, "step": 3303 }, { "epoch": 0.09, "grad_norm": 5.936894958186254, "learning_rate": 9.89093718348631e-06, "loss": 0.5839, "step": 3304 }, { "epoch": 0.09, "grad_norm": 4.823641377234572, "learning_rate": 9.890840828595518e-06, "loss": 0.3273, "step": 3305 }, { "epoch": 0.09, "grad_norm": 7.4449769591928625, "learning_rate": 9.890744431629425e-06, "loss": 0.7753, "step": 3306 }, { "epoch": 0.09, "grad_norm": 5.213092364126855, "learning_rate": 9.89064799258886e-06, "loss": 0.6535, "step": 3307 }, { "epoch": 0.09, "grad_norm": 5.909584153870185, "learning_rate": 9.890551511474652e-06, "loss": 0.7199, "step": 3308 }, { "epoch": 0.09, "grad_norm": 8.746721034792046, "learning_rate": 9.890454988287631e-06, "loss": 0.8476, "step": 3309 }, { "epoch": 0.09, "grad_norm": 5.437127856969921, "learning_rate": 9.89035842302863e-06, "loss": 0.7258, "step": 3310 }, { "epoch": 0.09, "grad_norm": 6.132230811262343, "learning_rate": 9.890261815698477e-06, "loss": 0.7491, "step": 3311 }, { "epoch": 0.09, "grad_norm": 5.174734103241017, "learning_rate": 9.890165166298002e-06, "loss": 0.4431, "step": 3312 }, { "epoch": 0.09, "grad_norm": 4.444197127626896, "learning_rate": 9.89006847482804e-06, "loss": 0.3893, "step": 3313 }, { "epoch": 0.09, "grad_norm": 7.793451998840279, "learning_rate": 9.88997174128942e-06, "loss": 0.9808, "step": 3314 }, { "epoch": 0.09, "grad_norm": 6.817914228679213, "learning_rate": 9.889874965682975e-06, "loss": 0.643, "step": 3315 }, { "epoch": 0.09, "grad_norm": 9.947494806794994, "learning_rate": 9.889778148009537e-06, "loss": 0.9124, "step": 3316 }, { "epoch": 0.09, "grad_norm": 6.479078662270559, "learning_rate": 9.889681288269941e-06, "loss": 0.4896, "step": 3317 }, { "epoch": 0.1, "grad_norm": 7.419750672544503, "learning_rate": 9.889584386465019e-06, "loss": 0.5311, "step": 3318 }, { "epoch": 0.1, "grad_norm": 12.020750026755797, "learning_rate": 9.889487442595605e-06, "loss": 0.745, "step": 3319 }, { "epoch": 0.1, "grad_norm": 5.708968923040797, "learning_rate": 9.889390456662531e-06, "loss": 0.568, "step": 3320 }, { "epoch": 0.1, "grad_norm": 7.200991465598052, "learning_rate": 9.889293428666633e-06, "loss": 0.4471, "step": 3321 }, { "epoch": 0.1, "grad_norm": 6.639565273324586, "learning_rate": 9.889196358608746e-06, "loss": 0.7216, "step": 3322 }, { "epoch": 0.1, "grad_norm": 7.767174057765664, "learning_rate": 9.889099246489707e-06, "loss": 0.9865, "step": 3323 }, { "epoch": 0.1, "grad_norm": 6.755710093957274, "learning_rate": 9.889002092310345e-06, "loss": 0.8774, "step": 3324 }, { "epoch": 0.1, "grad_norm": 10.663101991427721, "learning_rate": 9.888904896071503e-06, "loss": 0.6166, "step": 3325 }, { "epoch": 0.1, "grad_norm": 5.467005097082095, "learning_rate": 9.888807657774013e-06, "loss": 0.5113, "step": 3326 }, { "epoch": 0.1, "grad_norm": 7.086461629272872, "learning_rate": 9.888710377418713e-06, "loss": 0.2863, "step": 3327 }, { "epoch": 0.1, "grad_norm": 9.070647109477921, "learning_rate": 9.88861305500644e-06, "loss": 0.7719, "step": 3328 }, { "epoch": 0.1, "grad_norm": 4.952223783027811, "learning_rate": 9.888515690538028e-06, "loss": 0.4795, "step": 3329 }, { "epoch": 0.1, "grad_norm": 12.546866030945191, "learning_rate": 9.88841828401432e-06, "loss": 0.6541, "step": 3330 }, { "epoch": 0.1, "grad_norm": 12.552397200378667, "learning_rate": 9.888320835436151e-06, "loss": 0.6012, "step": 3331 }, { "epoch": 0.1, "grad_norm": 5.172584646351913, "learning_rate": 9.888223344804359e-06, "loss": 0.4477, "step": 3332 }, { "epoch": 0.1, "grad_norm": 7.317678101681926, "learning_rate": 9.888125812119783e-06, "loss": 0.4179, "step": 3333 }, { "epoch": 0.1, "grad_norm": 4.936717683647416, "learning_rate": 9.888028237383264e-06, "loss": 0.4491, "step": 3334 }, { "epoch": 0.1, "grad_norm": 6.801234318400167, "learning_rate": 9.88793062059564e-06, "loss": 0.9465, "step": 3335 }, { "epoch": 0.1, "grad_norm": 5.905777241188519, "learning_rate": 9.88783296175775e-06, "loss": 0.5384, "step": 3336 }, { "epoch": 0.1, "grad_norm": 8.41138984644421, "learning_rate": 9.887735260870436e-06, "loss": 0.7255, "step": 3337 }, { "epoch": 0.1, "grad_norm": 8.356328562422128, "learning_rate": 9.887637517934536e-06, "loss": 0.6522, "step": 3338 }, { "epoch": 0.1, "grad_norm": 5.6064358292383, "learning_rate": 9.887539732950892e-06, "loss": 0.5083, "step": 3339 }, { "epoch": 0.1, "grad_norm": 4.20587748766939, "learning_rate": 9.887441905920346e-06, "loss": 0.2816, "step": 3340 }, { "epoch": 0.1, "grad_norm": 5.420262473461915, "learning_rate": 9.887344036843739e-06, "loss": 0.1988, "step": 3341 }, { "epoch": 0.1, "grad_norm": 3.4962615074101997, "learning_rate": 9.887246125721914e-06, "loss": 0.4762, "step": 3342 }, { "epoch": 0.1, "grad_norm": 5.678505586960478, "learning_rate": 9.887148172555709e-06, "loss": 0.3452, "step": 3343 }, { "epoch": 0.1, "grad_norm": 4.165980028160449, "learning_rate": 9.887050177345974e-06, "loss": 0.1964, "step": 3344 }, { "epoch": 0.1, "grad_norm": 9.453029865385185, "learning_rate": 9.886952140093546e-06, "loss": 0.8108, "step": 3345 }, { "epoch": 0.1, "grad_norm": 2.3620429459736676, "learning_rate": 9.886854060799272e-06, "loss": 0.2954, "step": 3346 }, { "epoch": 0.1, "grad_norm": 3.3563989808672603, "learning_rate": 9.886755939463993e-06, "loss": 0.2217, "step": 3347 }, { "epoch": 0.1, "grad_norm": 9.845844348756556, "learning_rate": 9.886657776088553e-06, "loss": 0.5629, "step": 3348 }, { "epoch": 0.1, "grad_norm": 5.674463653426536, "learning_rate": 9.886559570673799e-06, "loss": 0.5313, "step": 3349 }, { "epoch": 0.1, "grad_norm": 4.617332030227202, "learning_rate": 9.886461323220576e-06, "loss": 0.2837, "step": 3350 }, { "epoch": 0.1, "grad_norm": 4.876491269633703, "learning_rate": 9.886363033729724e-06, "loss": 0.5754, "step": 3351 }, { "epoch": 0.1, "grad_norm": 5.093551865919236, "learning_rate": 9.886264702202096e-06, "loss": 0.6419, "step": 3352 }, { "epoch": 0.1, "grad_norm": 9.19408372886626, "learning_rate": 9.886166328638533e-06, "loss": 0.6844, "step": 3353 }, { "epoch": 0.1, "grad_norm": 7.599525453157566, "learning_rate": 9.886067913039883e-06, "loss": 0.2431, "step": 3354 }, { "epoch": 0.1, "grad_norm": 4.460676249054279, "learning_rate": 9.885969455406993e-06, "loss": 0.5018, "step": 3355 }, { "epoch": 0.1, "grad_norm": 8.759472760224178, "learning_rate": 9.885870955740708e-06, "loss": 0.6831, "step": 3356 }, { "epoch": 0.1, "grad_norm": 9.317909717604703, "learning_rate": 9.88577241404188e-06, "loss": 0.7946, "step": 3357 }, { "epoch": 0.1, "grad_norm": 5.76051472125719, "learning_rate": 9.885673830311348e-06, "loss": 0.3221, "step": 3358 }, { "epoch": 0.1, "grad_norm": 5.844283298902615, "learning_rate": 9.88557520454997e-06, "loss": 0.3681, "step": 3359 }, { "epoch": 0.1, "grad_norm": 11.904624607747682, "learning_rate": 9.885476536758588e-06, "loss": 0.8727, "step": 3360 }, { "epoch": 0.1, "grad_norm": 7.57857576583013, "learning_rate": 9.885377826938053e-06, "loss": 0.4691, "step": 3361 }, { "epoch": 0.1, "grad_norm": 6.544391534321918, "learning_rate": 9.885279075089216e-06, "loss": 0.532, "step": 3362 }, { "epoch": 0.1, "grad_norm": 13.689842824876559, "learning_rate": 9.885180281212923e-06, "loss": 0.7076, "step": 3363 }, { "epoch": 0.1, "grad_norm": 8.437572535450295, "learning_rate": 9.885081445310026e-06, "loss": 0.6715, "step": 3364 }, { "epoch": 0.1, "grad_norm": 7.808532592472877, "learning_rate": 9.884982567381374e-06, "loss": 0.5144, "step": 3365 }, { "epoch": 0.1, "grad_norm": 7.3434407919860885, "learning_rate": 9.884883647427818e-06, "loss": 0.593, "step": 3366 }, { "epoch": 0.1, "grad_norm": 3.8351932899176884, "learning_rate": 9.88478468545021e-06, "loss": 0.3068, "step": 3367 }, { "epoch": 0.1, "grad_norm": 4.604243431774707, "learning_rate": 9.884685681449401e-06, "loss": 0.3431, "step": 3368 }, { "epoch": 0.1, "grad_norm": 5.237368237424434, "learning_rate": 9.884586635426243e-06, "loss": 0.3867, "step": 3369 }, { "epoch": 0.1, "grad_norm": 4.363420366410547, "learning_rate": 9.884487547381587e-06, "loss": 0.4105, "step": 3370 }, { "epoch": 0.1, "grad_norm": 6.435176096583758, "learning_rate": 9.884388417316286e-06, "loss": 0.4385, "step": 3371 }, { "epoch": 0.1, "grad_norm": 8.383579073338455, "learning_rate": 9.884289245231193e-06, "loss": 0.8987, "step": 3372 }, { "epoch": 0.1, "grad_norm": 4.114199920688862, "learning_rate": 9.884190031127162e-06, "loss": 0.6317, "step": 3373 }, { "epoch": 0.1, "grad_norm": 5.545344009804329, "learning_rate": 9.884090775005043e-06, "loss": 0.3827, "step": 3374 }, { "epoch": 0.1, "grad_norm": 5.618265358939816, "learning_rate": 9.883991476865694e-06, "loss": 0.5232, "step": 3375 }, { "epoch": 0.1, "grad_norm": 7.606226400873191, "learning_rate": 9.88389213670997e-06, "loss": 0.5637, "step": 3376 }, { "epoch": 0.1, "grad_norm": 3.5878564690722476, "learning_rate": 9.88379275453872e-06, "loss": 0.4564, "step": 3377 }, { "epoch": 0.1, "grad_norm": 4.294904555302732, "learning_rate": 9.883693330352805e-06, "loss": 0.4562, "step": 3378 }, { "epoch": 0.1, "grad_norm": 5.804221626162108, "learning_rate": 9.883593864153074e-06, "loss": 0.2311, "step": 3379 }, { "epoch": 0.1, "grad_norm": 6.870259922391189, "learning_rate": 9.883494355940388e-06, "loss": 0.4597, "step": 3380 }, { "epoch": 0.1, "grad_norm": 10.719958445767384, "learning_rate": 9.883394805715601e-06, "loss": 1.0555, "step": 3381 }, { "epoch": 0.1, "grad_norm": 3.3255989983597294, "learning_rate": 9.88329521347957e-06, "loss": 0.3345, "step": 3382 }, { "epoch": 0.1, "grad_norm": 9.092826088445728, "learning_rate": 9.88319557923315e-06, "loss": 0.5644, "step": 3383 }, { "epoch": 0.1, "grad_norm": 5.4862125285414525, "learning_rate": 9.883095902977202e-06, "loss": 0.8857, "step": 3384 }, { "epoch": 0.1, "grad_norm": 6.004572556502562, "learning_rate": 9.88299618471258e-06, "loss": 0.6661, "step": 3385 }, { "epoch": 0.1, "grad_norm": 4.899415960343837, "learning_rate": 9.882896424440145e-06, "loss": 0.5124, "step": 3386 }, { "epoch": 0.1, "grad_norm": 4.13481011094212, "learning_rate": 9.88279662216075e-06, "loss": 0.6367, "step": 3387 }, { "epoch": 0.1, "grad_norm": 4.181475362135635, "learning_rate": 9.88269677787526e-06, "loss": 0.3176, "step": 3388 }, { "epoch": 0.1, "grad_norm": 6.860989356751743, "learning_rate": 9.882596891584529e-06, "loss": 0.5496, "step": 3389 }, { "epoch": 0.1, "grad_norm": 4.785037068031098, "learning_rate": 9.882496963289418e-06, "loss": 0.2788, "step": 3390 }, { "epoch": 0.1, "grad_norm": 3.243306897425819, "learning_rate": 9.882396992990788e-06, "loss": 0.3852, "step": 3391 }, { "epoch": 0.1, "grad_norm": 8.135802907400048, "learning_rate": 9.882296980689496e-06, "loss": 0.6219, "step": 3392 }, { "epoch": 0.1, "grad_norm": 1.4000198213843031, "learning_rate": 9.882196926386406e-06, "loss": 0.0956, "step": 3393 }, { "epoch": 0.1, "grad_norm": 5.4405973535631755, "learning_rate": 9.882096830082377e-06, "loss": 0.3661, "step": 3394 }, { "epoch": 0.1, "grad_norm": 4.62648929639811, "learning_rate": 9.881996691778268e-06, "loss": 0.394, "step": 3395 }, { "epoch": 0.1, "grad_norm": 9.372992745728261, "learning_rate": 9.881896511474945e-06, "loss": 0.3752, "step": 3396 }, { "epoch": 0.1, "grad_norm": 4.2305559059527535, "learning_rate": 9.881796289173265e-06, "loss": 0.5655, "step": 3397 }, { "epoch": 0.1, "grad_norm": 5.66857132971836, "learning_rate": 9.881696024874095e-06, "loss": 0.5924, "step": 3398 }, { "epoch": 0.1, "grad_norm": 3.486121078027778, "learning_rate": 9.881595718578293e-06, "loss": 0.5193, "step": 3399 }, { "epoch": 0.1, "grad_norm": 6.236078091849986, "learning_rate": 9.881495370286724e-06, "loss": 0.869, "step": 3400 }, { "epoch": 0.1, "grad_norm": 7.678655576403271, "learning_rate": 9.881394980000254e-06, "loss": 0.9309, "step": 3401 }, { "epoch": 0.1, "grad_norm": 9.169307836869443, "learning_rate": 9.881294547719742e-06, "loss": 0.5187, "step": 3402 }, { "epoch": 0.1, "grad_norm": 7.242166101450823, "learning_rate": 9.881194073446054e-06, "loss": 0.6026, "step": 3403 }, { "epoch": 0.1, "grad_norm": 8.300113828866658, "learning_rate": 9.881093557180055e-06, "loss": 0.6529, "step": 3404 }, { "epoch": 0.1, "grad_norm": 5.761374873874889, "learning_rate": 9.88099299892261e-06, "loss": 0.3202, "step": 3405 }, { "epoch": 0.1, "grad_norm": 8.067539857731049, "learning_rate": 9.880892398674582e-06, "loss": 0.4345, "step": 3406 }, { "epoch": 0.1, "grad_norm": 4.2139865968832355, "learning_rate": 9.880791756436837e-06, "loss": 0.2739, "step": 3407 }, { "epoch": 0.1, "grad_norm": 4.477280617413436, "learning_rate": 9.880691072210242e-06, "loss": 0.2327, "step": 3408 }, { "epoch": 0.1, "grad_norm": 4.092928796767236, "learning_rate": 9.880590345995662e-06, "loss": 0.1495, "step": 3409 }, { "epoch": 0.1, "grad_norm": 7.822370565141512, "learning_rate": 9.880489577793965e-06, "loss": 0.3289, "step": 3410 }, { "epoch": 0.1, "grad_norm": 8.096916728057579, "learning_rate": 9.880388767606017e-06, "loss": 0.9301, "step": 3411 }, { "epoch": 0.1, "grad_norm": 6.579734261083207, "learning_rate": 9.880287915432686e-06, "loss": 0.608, "step": 3412 }, { "epoch": 0.1, "grad_norm": 6.348497803289209, "learning_rate": 9.880187021274839e-06, "loss": 0.3303, "step": 3413 }, { "epoch": 0.1, "grad_norm": 4.946600121134473, "learning_rate": 9.880086085133342e-06, "loss": 0.6379, "step": 3414 }, { "epoch": 0.1, "grad_norm": 5.234572449561264, "learning_rate": 9.879985107009066e-06, "loss": 0.8357, "step": 3415 }, { "epoch": 0.1, "grad_norm": 9.218225548260683, "learning_rate": 9.87988408690288e-06, "loss": 0.4601, "step": 3416 }, { "epoch": 0.1, "grad_norm": 9.794234946781144, "learning_rate": 9.879783024815651e-06, "loss": 1.4922, "step": 3417 }, { "epoch": 0.1, "grad_norm": 10.365451808307158, "learning_rate": 9.87968192074825e-06, "loss": 0.4814, "step": 3418 }, { "epoch": 0.1, "grad_norm": 13.474328251162222, "learning_rate": 9.879580774701546e-06, "loss": 0.4466, "step": 3419 }, { "epoch": 0.1, "grad_norm": 10.368726925079809, "learning_rate": 9.87947958667641e-06, "loss": 0.587, "step": 3420 }, { "epoch": 0.1, "grad_norm": 9.710145336442395, "learning_rate": 9.879378356673711e-06, "loss": 0.2597, "step": 3421 }, { "epoch": 0.1, "grad_norm": 8.228118952116173, "learning_rate": 9.87927708469432e-06, "loss": 0.4149, "step": 3422 }, { "epoch": 0.1, "grad_norm": 11.169391147887245, "learning_rate": 9.879175770739111e-06, "loss": 0.3347, "step": 3423 }, { "epoch": 0.1, "grad_norm": 2.7544225546945182, "learning_rate": 9.879074414808953e-06, "loss": 0.3141, "step": 3424 }, { "epoch": 0.1, "grad_norm": 4.718386099217255, "learning_rate": 9.878973016904719e-06, "loss": 0.4287, "step": 3425 }, { "epoch": 0.1, "grad_norm": 5.065896249012175, "learning_rate": 9.878871577027279e-06, "loss": 0.3959, "step": 3426 }, { "epoch": 0.1, "grad_norm": 5.017540729245255, "learning_rate": 9.878770095177508e-06, "loss": 0.3394, "step": 3427 }, { "epoch": 0.1, "grad_norm": 7.416027938113214, "learning_rate": 9.87866857135628e-06, "loss": 0.6496, "step": 3428 }, { "epoch": 0.1, "grad_norm": 4.865507347139803, "learning_rate": 9.878567005564466e-06, "loss": 0.6492, "step": 3429 }, { "epoch": 0.1, "grad_norm": 20.06297230387401, "learning_rate": 9.878465397802942e-06, "loss": 0.5182, "step": 3430 }, { "epoch": 0.1, "grad_norm": 7.09477764197889, "learning_rate": 9.87836374807258e-06, "loss": 0.632, "step": 3431 }, { "epoch": 0.1, "grad_norm": 5.152172534055776, "learning_rate": 9.878262056374255e-06, "loss": 0.4105, "step": 3432 }, { "epoch": 0.1, "grad_norm": 6.560122540655249, "learning_rate": 9.87816032270884e-06, "loss": 0.734, "step": 3433 }, { "epoch": 0.1, "grad_norm": 4.388191607705559, "learning_rate": 9.878058547077216e-06, "loss": 0.447, "step": 3434 }, { "epoch": 0.1, "grad_norm": 8.159220070684773, "learning_rate": 9.877956729480253e-06, "loss": 0.3583, "step": 3435 }, { "epoch": 0.1, "grad_norm": 9.354905017919055, "learning_rate": 9.877854869918829e-06, "loss": 0.6559, "step": 3436 }, { "epoch": 0.1, "grad_norm": 8.273453291895045, "learning_rate": 9.877752968393821e-06, "loss": 0.8322, "step": 3437 }, { "epoch": 0.1, "grad_norm": 4.418695310823455, "learning_rate": 9.877651024906104e-06, "loss": 0.2179, "step": 3438 }, { "epoch": 0.1, "grad_norm": 19.496195140736248, "learning_rate": 9.877549039456555e-06, "loss": 0.6356, "step": 3439 }, { "epoch": 0.1, "grad_norm": 7.696500022387091, "learning_rate": 9.877447012046054e-06, "loss": 0.5888, "step": 3440 }, { "epoch": 0.1, "grad_norm": 5.300813948910637, "learning_rate": 9.877344942675476e-06, "loss": 0.8324, "step": 3441 }, { "epoch": 0.1, "grad_norm": 7.176552836425973, "learning_rate": 9.8772428313457e-06, "loss": 0.5397, "step": 3442 }, { "epoch": 0.1, "grad_norm": 10.314209212641021, "learning_rate": 9.877140678057604e-06, "loss": 1.006, "step": 3443 }, { "epoch": 0.1, "grad_norm": 4.001838321258544, "learning_rate": 9.87703848281207e-06, "loss": 0.3628, "step": 3444 }, { "epoch": 0.1, "grad_norm": 7.251183972201937, "learning_rate": 9.87693624560997e-06, "loss": 0.549, "step": 3445 }, { "epoch": 0.1, "grad_norm": 8.631739650706745, "learning_rate": 9.876833966452191e-06, "loss": 0.6096, "step": 3446 }, { "epoch": 0.1, "grad_norm": 4.031391289179326, "learning_rate": 9.876731645339608e-06, "loss": 0.6183, "step": 3447 }, { "epoch": 0.1, "grad_norm": 6.190731245243172, "learning_rate": 9.876629282273105e-06, "loss": 0.4866, "step": 3448 }, { "epoch": 0.1, "grad_norm": 10.52109769482736, "learning_rate": 9.876526877253559e-06, "loss": 0.6959, "step": 3449 }, { "epoch": 0.1, "grad_norm": 10.32811433500123, "learning_rate": 9.876424430281853e-06, "loss": 0.4621, "step": 3450 }, { "epoch": 0.1, "grad_norm": 7.320733220112991, "learning_rate": 9.876321941358868e-06, "loss": 0.4968, "step": 3451 }, { "epoch": 0.1, "grad_norm": 4.8440352817324674, "learning_rate": 9.876219410485487e-06, "loss": 0.7018, "step": 3452 }, { "epoch": 0.1, "grad_norm": 7.6719172645538976, "learning_rate": 9.87611683766259e-06, "loss": 0.6772, "step": 3453 }, { "epoch": 0.1, "grad_norm": 5.130175442920986, "learning_rate": 9.87601422289106e-06, "loss": 0.4285, "step": 3454 }, { "epoch": 0.1, "grad_norm": 13.645814797580545, "learning_rate": 9.87591156617178e-06, "loss": 0.9064, "step": 3455 }, { "epoch": 0.1, "grad_norm": 6.7889715085445275, "learning_rate": 9.875808867505633e-06, "loss": 0.7313, "step": 3456 }, { "epoch": 0.1, "grad_norm": 6.091477464311238, "learning_rate": 9.875706126893503e-06, "loss": 0.5319, "step": 3457 }, { "epoch": 0.1, "grad_norm": 5.138429120548837, "learning_rate": 9.875603344336273e-06, "loss": 0.5806, "step": 3458 }, { "epoch": 0.1, "grad_norm": 7.73044469366046, "learning_rate": 9.875500519834828e-06, "loss": 0.3961, "step": 3459 }, { "epoch": 0.1, "grad_norm": 4.170832471564805, "learning_rate": 9.875397653390054e-06, "loss": 0.3536, "step": 3460 }, { "epoch": 0.1, "grad_norm": 4.759963778510241, "learning_rate": 9.875294745002832e-06, "loss": 0.709, "step": 3461 }, { "epoch": 0.1, "grad_norm": 9.420288053510275, "learning_rate": 9.87519179467405e-06, "loss": 0.5263, "step": 3462 }, { "epoch": 0.1, "grad_norm": 7.239379206645237, "learning_rate": 9.875088802404592e-06, "loss": 0.5586, "step": 3463 }, { "epoch": 0.1, "grad_norm": 4.56731735290247, "learning_rate": 9.874985768195349e-06, "loss": 0.3105, "step": 3464 }, { "epoch": 0.1, "grad_norm": 7.311158277223538, "learning_rate": 9.8748826920472e-06, "loss": 0.3938, "step": 3465 }, { "epoch": 0.1, "grad_norm": 6.399410634918203, "learning_rate": 9.874779573961038e-06, "loss": 0.6481, "step": 3466 }, { "epoch": 0.1, "grad_norm": 4.971550925750467, "learning_rate": 9.874676413937745e-06, "loss": 0.2857, "step": 3467 }, { "epoch": 0.1, "grad_norm": 10.464294214119661, "learning_rate": 9.874573211978213e-06, "loss": 0.3682, "step": 3468 }, { "epoch": 0.1, "grad_norm": 3.8236390591128195, "learning_rate": 9.874469968083327e-06, "loss": 0.2281, "step": 3469 }, { "epoch": 0.1, "grad_norm": 9.253406979774391, "learning_rate": 9.874366682253977e-06, "loss": 0.4728, "step": 3470 }, { "epoch": 0.1, "grad_norm": 5.741714686436081, "learning_rate": 9.874263354491049e-06, "loss": 0.6869, "step": 3471 }, { "epoch": 0.1, "grad_norm": 4.155592902305271, "learning_rate": 9.874159984795435e-06, "loss": 0.8542, "step": 3472 }, { "epoch": 0.1, "grad_norm": 7.880311779108864, "learning_rate": 9.87405657316802e-06, "loss": 0.6357, "step": 3473 }, { "epoch": 0.1, "grad_norm": 6.366705940618096, "learning_rate": 9.8739531196097e-06, "loss": 0.6542, "step": 3474 }, { "epoch": 0.1, "grad_norm": 4.100701363400056, "learning_rate": 9.87384962412136e-06, "loss": 0.2825, "step": 3475 }, { "epoch": 0.1, "grad_norm": 5.261993242451568, "learning_rate": 9.873746086703893e-06, "loss": 0.4892, "step": 3476 }, { "epoch": 0.1, "grad_norm": 7.368757515499822, "learning_rate": 9.873642507358185e-06, "loss": 0.4714, "step": 3477 }, { "epoch": 0.1, "grad_norm": 8.370198823670696, "learning_rate": 9.873538886085134e-06, "loss": 0.3067, "step": 3478 }, { "epoch": 0.1, "grad_norm": 5.489165754077342, "learning_rate": 9.873435222885626e-06, "loss": 0.2746, "step": 3479 }, { "epoch": 0.1, "grad_norm": 7.287096844051224, "learning_rate": 9.873331517760557e-06, "loss": 0.6402, "step": 3480 }, { "epoch": 0.1, "grad_norm": 6.203234693196325, "learning_rate": 9.873227770710814e-06, "loss": 0.409, "step": 3481 }, { "epoch": 0.1, "grad_norm": 20.473486525569975, "learning_rate": 9.873123981737293e-06, "loss": 0.6659, "step": 3482 }, { "epoch": 0.1, "grad_norm": 7.192437590619006, "learning_rate": 9.87302015084089e-06, "loss": 0.5542, "step": 3483 }, { "epoch": 0.1, "grad_norm": 4.815932894895061, "learning_rate": 9.87291627802249e-06, "loss": 0.5002, "step": 3484 }, { "epoch": 0.1, "grad_norm": 8.7975150327779, "learning_rate": 9.872812363282994e-06, "loss": 0.7777, "step": 3485 }, { "epoch": 0.1, "grad_norm": 3.7971605107215773, "learning_rate": 9.872708406623292e-06, "loss": 0.499, "step": 3486 }, { "epoch": 0.1, "grad_norm": 6.201413281164816, "learning_rate": 9.87260440804428e-06, "loss": 0.5737, "step": 3487 }, { "epoch": 0.1, "grad_norm": 6.067934098088904, "learning_rate": 9.872500367546853e-06, "loss": 0.4987, "step": 3488 }, { "epoch": 0.1, "grad_norm": 10.145593210834518, "learning_rate": 9.872396285131905e-06, "loss": 0.4338, "step": 3489 }, { "epoch": 0.1, "grad_norm": 9.332646128922933, "learning_rate": 9.87229216080033e-06, "loss": 0.7176, "step": 3490 }, { "epoch": 0.1, "grad_norm": 4.330844782029424, "learning_rate": 9.872187994553027e-06, "loss": 0.6928, "step": 3491 }, { "epoch": 0.1, "grad_norm": 4.481666012531392, "learning_rate": 9.872083786390889e-06, "loss": 0.747, "step": 3492 }, { "epoch": 0.1, "grad_norm": 5.207295210058059, "learning_rate": 9.871979536314815e-06, "loss": 0.3999, "step": 3493 }, { "epoch": 0.1, "grad_norm": 6.757826294636842, "learning_rate": 9.871875244325701e-06, "loss": 0.4785, "step": 3494 }, { "epoch": 0.1, "grad_norm": 4.470769705886983, "learning_rate": 9.871770910424445e-06, "loss": 0.1753, "step": 3495 }, { "epoch": 0.1, "grad_norm": 4.653738432569368, "learning_rate": 9.871666534611944e-06, "loss": 0.4972, "step": 3496 }, { "epoch": 0.1, "grad_norm": 6.06688908843443, "learning_rate": 9.871562116889093e-06, "loss": 0.4541, "step": 3497 }, { "epoch": 0.1, "grad_norm": 7.3777317871373596, "learning_rate": 9.871457657256794e-06, "loss": 0.7543, "step": 3498 }, { "epoch": 0.1, "grad_norm": 3.2386655682976393, "learning_rate": 9.871353155715947e-06, "loss": 0.4299, "step": 3499 }, { "epoch": 0.1, "grad_norm": 8.219783297371624, "learning_rate": 9.871248612267445e-06, "loss": 0.9973, "step": 3500 }, { "epoch": 0.1, "grad_norm": 4.686695106066446, "learning_rate": 9.871144026912191e-06, "loss": 0.2437, "step": 3501 }, { "epoch": 0.1, "grad_norm": 6.246351016092143, "learning_rate": 9.871039399651087e-06, "loss": 0.5095, "step": 3502 }, { "epoch": 0.1, "grad_norm": 8.443897224006806, "learning_rate": 9.87093473048503e-06, "loss": 0.6375, "step": 3503 }, { "epoch": 0.1, "grad_norm": 7.717315725901372, "learning_rate": 9.870830019414921e-06, "loss": 0.6999, "step": 3504 }, { "epoch": 0.1, "grad_norm": 3.883701215080949, "learning_rate": 9.870725266441663e-06, "loss": 0.4999, "step": 3505 }, { "epoch": 0.1, "grad_norm": 6.364567054773502, "learning_rate": 9.870620471566152e-06, "loss": 0.7404, "step": 3506 }, { "epoch": 0.1, "grad_norm": 4.849665813140685, "learning_rate": 9.870515634789295e-06, "loss": 0.2362, "step": 3507 }, { "epoch": 0.1, "grad_norm": 9.2003653619614, "learning_rate": 9.87041075611199e-06, "loss": 0.5591, "step": 3508 }, { "epoch": 0.1, "grad_norm": 7.669801637752803, "learning_rate": 9.870305835535142e-06, "loss": 0.2517, "step": 3509 }, { "epoch": 0.1, "grad_norm": 4.556215504331261, "learning_rate": 9.870200873059652e-06, "loss": 0.261, "step": 3510 }, { "epoch": 0.1, "grad_norm": 6.322017029596627, "learning_rate": 9.870095868686425e-06, "loss": 0.7094, "step": 3511 }, { "epoch": 0.1, "grad_norm": 14.532984082519784, "learning_rate": 9.86999082241636e-06, "loss": 0.5837, "step": 3512 }, { "epoch": 0.1, "grad_norm": 7.018478255136384, "learning_rate": 9.869885734250365e-06, "loss": 0.378, "step": 3513 }, { "epoch": 0.1, "grad_norm": 5.203925497892525, "learning_rate": 9.869780604189345e-06, "loss": 0.4613, "step": 3514 }, { "epoch": 0.1, "grad_norm": 21.99829892604536, "learning_rate": 9.869675432234199e-06, "loss": 1.2866, "step": 3515 }, { "epoch": 0.1, "grad_norm": 7.746023019072401, "learning_rate": 9.869570218385835e-06, "loss": 0.4244, "step": 3516 }, { "epoch": 0.1, "grad_norm": 5.849423861865571, "learning_rate": 9.86946496264516e-06, "loss": 0.5698, "step": 3517 }, { "epoch": 0.1, "grad_norm": 7.758933671068218, "learning_rate": 9.869359665013077e-06, "loss": 0.5372, "step": 3518 }, { "epoch": 0.1, "grad_norm": 5.4605202358553075, "learning_rate": 9.869254325490492e-06, "loss": 0.4671, "step": 3519 }, { "epoch": 0.1, "grad_norm": 3.4053432062526703, "learning_rate": 9.869148944078311e-06, "loss": 0.5125, "step": 3520 }, { "epoch": 0.1, "grad_norm": 7.11777457905144, "learning_rate": 9.869043520777441e-06, "loss": 0.5701, "step": 3521 }, { "epoch": 0.1, "grad_norm": 11.87097332596146, "learning_rate": 9.868938055588789e-06, "loss": 0.842, "step": 3522 }, { "epoch": 0.1, "grad_norm": 4.864048124464281, "learning_rate": 9.868832548513263e-06, "loss": 0.5237, "step": 3523 }, { "epoch": 0.1, "grad_norm": 5.945155137895685, "learning_rate": 9.86872699955177e-06, "loss": 0.6338, "step": 3524 }, { "epoch": 0.1, "grad_norm": 11.072703252095215, "learning_rate": 9.868621408705216e-06, "loss": 0.7084, "step": 3525 }, { "epoch": 0.1, "grad_norm": 7.289868538172208, "learning_rate": 9.868515775974515e-06, "loss": 0.3379, "step": 3526 }, { "epoch": 0.1, "grad_norm": 8.058567359574154, "learning_rate": 9.86841010136057e-06, "loss": 0.5678, "step": 3527 }, { "epoch": 0.1, "grad_norm": 4.309244309090242, "learning_rate": 9.868304384864293e-06, "loss": 0.5502, "step": 3528 }, { "epoch": 0.1, "grad_norm": 4.0292016969017315, "learning_rate": 9.868198626486592e-06, "loss": 0.2363, "step": 3529 }, { "epoch": 0.1, "grad_norm": 5.347476396120685, "learning_rate": 9.86809282622838e-06, "loss": 0.4294, "step": 3530 }, { "epoch": 0.1, "grad_norm": 11.776427073571195, "learning_rate": 9.867986984090562e-06, "loss": 0.7461, "step": 3531 }, { "epoch": 0.1, "grad_norm": 8.178526991868026, "learning_rate": 9.867881100074053e-06, "loss": 0.6284, "step": 3532 }, { "epoch": 0.1, "grad_norm": 7.76298241582862, "learning_rate": 9.867775174179761e-06, "loss": 1.1282, "step": 3533 }, { "epoch": 0.1, "grad_norm": 4.3910383253933505, "learning_rate": 9.8676692064086e-06, "loss": 0.5122, "step": 3534 }, { "epoch": 0.1, "grad_norm": 4.115858467912491, "learning_rate": 9.867563196761478e-06, "loss": 0.2396, "step": 3535 }, { "epoch": 0.1, "grad_norm": 9.673423532196445, "learning_rate": 9.86745714523931e-06, "loss": 0.6398, "step": 3536 }, { "epoch": 0.1, "grad_norm": 9.798638484732457, "learning_rate": 9.867351051843008e-06, "loss": 0.8289, "step": 3537 }, { "epoch": 0.1, "grad_norm": 4.747769534799737, "learning_rate": 9.867244916573485e-06, "loss": 0.4479, "step": 3538 }, { "epoch": 0.1, "grad_norm": 5.385940772205816, "learning_rate": 9.867138739431651e-06, "loss": 0.4665, "step": 3539 }, { "epoch": 0.1, "grad_norm": 6.619648373361276, "learning_rate": 9.867032520418423e-06, "loss": 0.7762, "step": 3540 }, { "epoch": 0.1, "grad_norm": 6.895328464831022, "learning_rate": 9.866926259534715e-06, "loss": 0.783, "step": 3541 }, { "epoch": 0.1, "grad_norm": 6.751665333779356, "learning_rate": 9.866819956781436e-06, "loss": 0.6737, "step": 3542 }, { "epoch": 0.1, "grad_norm": 6.9644255627574525, "learning_rate": 9.866713612159507e-06, "loss": 0.9546, "step": 3543 }, { "epoch": 0.1, "grad_norm": 5.540622851892562, "learning_rate": 9.86660722566984e-06, "loss": 0.4789, "step": 3544 }, { "epoch": 0.1, "grad_norm": 4.594860254140617, "learning_rate": 9.86650079731335e-06, "loss": 0.331, "step": 3545 }, { "epoch": 0.1, "grad_norm": 7.075933997304109, "learning_rate": 9.86639432709095e-06, "loss": 0.4654, "step": 3546 }, { "epoch": 0.1, "grad_norm": 6.466003055867948, "learning_rate": 9.866287815003562e-06, "loss": 0.4119, "step": 3547 }, { "epoch": 0.1, "grad_norm": 5.48634068401742, "learning_rate": 9.866181261052099e-06, "loss": 0.8057, "step": 3548 }, { "epoch": 0.1, "grad_norm": 4.513669392392356, "learning_rate": 9.866074665237477e-06, "loss": 0.3906, "step": 3549 }, { "epoch": 0.1, "grad_norm": 11.881646795735568, "learning_rate": 9.865968027560613e-06, "loss": 0.5262, "step": 3550 }, { "epoch": 0.1, "grad_norm": 6.406805395846593, "learning_rate": 9.865861348022426e-06, "loss": 0.7503, "step": 3551 }, { "epoch": 0.1, "grad_norm": 3.510058289448732, "learning_rate": 9.865754626623833e-06, "loss": 0.4643, "step": 3552 }, { "epoch": 0.1, "grad_norm": 7.816160458384811, "learning_rate": 9.865647863365751e-06, "loss": 0.3893, "step": 3553 }, { "epoch": 0.1, "grad_norm": 11.337205033844592, "learning_rate": 9.8655410582491e-06, "loss": 0.3424, "step": 3554 }, { "epoch": 0.1, "grad_norm": 5.86151873219214, "learning_rate": 9.8654342112748e-06, "loss": 0.338, "step": 3555 }, { "epoch": 0.1, "grad_norm": 8.092321557030036, "learning_rate": 9.865327322443767e-06, "loss": 0.7503, "step": 3556 }, { "epoch": 0.1, "grad_norm": 2.2000140823650476, "learning_rate": 9.865220391756922e-06, "loss": 0.1934, "step": 3557 }, { "epoch": 0.1, "grad_norm": 7.019925980462985, "learning_rate": 9.865113419215185e-06, "loss": 0.5051, "step": 3558 }, { "epoch": 0.1, "grad_norm": 9.127347879262741, "learning_rate": 9.865006404819478e-06, "loss": 0.709, "step": 3559 }, { "epoch": 0.1, "grad_norm": 5.2372329425325495, "learning_rate": 9.864899348570717e-06, "loss": 0.5455, "step": 3560 }, { "epoch": 0.1, "grad_norm": 4.315830893166287, "learning_rate": 9.864792250469828e-06, "loss": 0.3651, "step": 3561 }, { "epoch": 0.1, "grad_norm": 4.724629257431553, "learning_rate": 9.86468511051773e-06, "loss": 0.4117, "step": 3562 }, { "epoch": 0.1, "grad_norm": 7.639595258786857, "learning_rate": 9.864577928715344e-06, "loss": 0.2698, "step": 3563 }, { "epoch": 0.1, "grad_norm": 2.3992002311943343, "learning_rate": 9.864470705063593e-06, "loss": 0.2454, "step": 3564 }, { "epoch": 0.1, "grad_norm": 5.983376601478027, "learning_rate": 9.8643634395634e-06, "loss": 0.5627, "step": 3565 }, { "epoch": 0.1, "grad_norm": 7.841336551518789, "learning_rate": 9.864256132215686e-06, "loss": 0.3022, "step": 3566 }, { "epoch": 0.1, "grad_norm": 2.5061010540013178, "learning_rate": 9.864148783021377e-06, "loss": 0.2093, "step": 3567 }, { "epoch": 0.1, "grad_norm": 5.497775148008871, "learning_rate": 9.864041391981394e-06, "loss": 0.4086, "step": 3568 }, { "epoch": 0.1, "grad_norm": 8.945719587233363, "learning_rate": 9.863933959096664e-06, "loss": 0.2268, "step": 3569 }, { "epoch": 0.1, "grad_norm": 4.279300997865711, "learning_rate": 9.863826484368106e-06, "loss": 0.3889, "step": 3570 }, { "epoch": 0.1, "grad_norm": 7.2769568888718865, "learning_rate": 9.86371896779665e-06, "loss": 0.3505, "step": 3571 }, { "epoch": 0.1, "grad_norm": 3.3343585384859917, "learning_rate": 9.863611409383217e-06, "loss": 0.2789, "step": 3572 }, { "epoch": 0.1, "grad_norm": 4.446748149721959, "learning_rate": 9.863503809128734e-06, "loss": 0.5111, "step": 3573 }, { "epoch": 0.1, "grad_norm": 6.943526528622651, "learning_rate": 9.863396167034127e-06, "loss": 0.5348, "step": 3574 }, { "epoch": 0.1, "grad_norm": 3.9096800431271963, "learning_rate": 9.863288483100322e-06, "loss": 0.537, "step": 3575 }, { "epoch": 0.1, "grad_norm": 6.757653736167926, "learning_rate": 9.863180757328244e-06, "loss": 0.94, "step": 3576 }, { "epoch": 0.1, "grad_norm": 5.567241533752195, "learning_rate": 9.86307298971882e-06, "loss": 0.5735, "step": 3577 }, { "epoch": 0.1, "grad_norm": 5.628962879835821, "learning_rate": 9.86296518027298e-06, "loss": 0.4717, "step": 3578 }, { "epoch": 0.1, "grad_norm": 7.924148925425479, "learning_rate": 9.862857328991648e-06, "loss": 0.4703, "step": 3579 }, { "epoch": 0.1, "grad_norm": 9.52143744980424, "learning_rate": 9.862749435875754e-06, "loss": 0.8108, "step": 3580 }, { "epoch": 0.1, "grad_norm": 4.219464615943904, "learning_rate": 9.862641500926225e-06, "loss": 0.234, "step": 3581 }, { "epoch": 0.1, "grad_norm": 5.632349934413824, "learning_rate": 9.862533524143989e-06, "loss": 0.3364, "step": 3582 }, { "epoch": 0.1, "grad_norm": 12.0209495146551, "learning_rate": 9.862425505529975e-06, "loss": 0.7027, "step": 3583 }, { "epoch": 0.1, "grad_norm": 4.505119114104453, "learning_rate": 9.862317445085114e-06, "loss": 0.4778, "step": 3584 }, { "epoch": 0.1, "grad_norm": 4.283617687558504, "learning_rate": 9.862209342810336e-06, "loss": 0.488, "step": 3585 }, { "epoch": 0.1, "grad_norm": 7.913561067954497, "learning_rate": 9.862101198706568e-06, "loss": 0.6149, "step": 3586 }, { "epoch": 0.1, "grad_norm": 6.887978458574661, "learning_rate": 9.861993012774745e-06, "loss": 0.5516, "step": 3587 }, { "epoch": 0.1, "grad_norm": 7.309168325619793, "learning_rate": 9.861884785015791e-06, "loss": 0.478, "step": 3588 }, { "epoch": 0.1, "grad_norm": 5.031914068905951, "learning_rate": 9.861776515430643e-06, "loss": 0.5599, "step": 3589 }, { "epoch": 0.1, "grad_norm": 3.77241353739151, "learning_rate": 9.86166820402023e-06, "loss": 0.3455, "step": 3590 }, { "epoch": 0.1, "grad_norm": 3.944547159726113, "learning_rate": 9.861559850785483e-06, "loss": 0.3149, "step": 3591 }, { "epoch": 0.1, "grad_norm": 9.194120966767976, "learning_rate": 9.861451455727335e-06, "loss": 0.7802, "step": 3592 }, { "epoch": 0.1, "grad_norm": 5.220757012850862, "learning_rate": 9.861343018846721e-06, "loss": 0.586, "step": 3593 }, { "epoch": 0.1, "grad_norm": 9.25748656137734, "learning_rate": 9.861234540144568e-06, "loss": 0.889, "step": 3594 }, { "epoch": 0.1, "grad_norm": 8.70331588344025, "learning_rate": 9.861126019621815e-06, "loss": 0.7094, "step": 3595 }, { "epoch": 0.1, "grad_norm": 7.484022223773461, "learning_rate": 9.861017457279394e-06, "loss": 0.5146, "step": 3596 }, { "epoch": 0.1, "grad_norm": 3.768026828496227, "learning_rate": 9.860908853118238e-06, "loss": 0.6875, "step": 3597 }, { "epoch": 0.1, "grad_norm": 6.68464107545635, "learning_rate": 9.86080020713928e-06, "loss": 0.7545, "step": 3598 }, { "epoch": 0.1, "grad_norm": 4.9761413204357945, "learning_rate": 9.860691519343456e-06, "loss": 0.5328, "step": 3599 }, { "epoch": 0.1, "grad_norm": 7.3831229331332455, "learning_rate": 9.860582789731705e-06, "loss": 0.493, "step": 3600 }, { "epoch": 0.1, "grad_norm": 11.163221216771184, "learning_rate": 9.860474018304955e-06, "loss": 0.3137, "step": 3601 }, { "epoch": 0.1, "grad_norm": 22.37030065192071, "learning_rate": 9.860365205064148e-06, "loss": 0.9984, "step": 3602 }, { "epoch": 0.1, "grad_norm": 7.096170961774074, "learning_rate": 9.860256350010215e-06, "loss": 0.376, "step": 3603 }, { "epoch": 0.1, "grad_norm": 3.5211755973693153, "learning_rate": 9.860147453144098e-06, "loss": 0.2155, "step": 3604 }, { "epoch": 0.1, "grad_norm": 8.018329127052974, "learning_rate": 9.86003851446673e-06, "loss": 0.6701, "step": 3605 }, { "epoch": 0.1, "grad_norm": 6.902765283844061, "learning_rate": 9.859929533979046e-06, "loss": 0.7247, "step": 3606 }, { "epoch": 0.1, "grad_norm": 3.240810681047544, "learning_rate": 9.85982051168199e-06, "loss": 0.2386, "step": 3607 }, { "epoch": 0.1, "grad_norm": 7.75056412397234, "learning_rate": 9.859711447576497e-06, "loss": 0.462, "step": 3608 }, { "epoch": 0.1, "grad_norm": 7.727752994973271, "learning_rate": 9.859602341663502e-06, "loss": 0.8774, "step": 3609 }, { "epoch": 0.1, "grad_norm": 4.5024874355642845, "learning_rate": 9.859493193943948e-06, "loss": 0.3891, "step": 3610 }, { "epoch": 0.1, "grad_norm": 7.298246136400201, "learning_rate": 9.859384004418771e-06, "loss": 0.4618, "step": 3611 }, { "epoch": 0.1, "grad_norm": 6.71322290942771, "learning_rate": 9.859274773088914e-06, "loss": 0.5828, "step": 3612 }, { "epoch": 0.1, "grad_norm": 7.212912242618397, "learning_rate": 9.859165499955312e-06, "loss": 0.5331, "step": 3613 }, { "epoch": 0.1, "grad_norm": 6.301239697024734, "learning_rate": 9.859056185018909e-06, "loss": 0.4448, "step": 3614 }, { "epoch": 0.1, "grad_norm": 3.0464347521294854, "learning_rate": 9.858946828280643e-06, "loss": 0.3023, "step": 3615 }, { "epoch": 0.1, "grad_norm": 8.293206619825863, "learning_rate": 9.858837429741457e-06, "loss": 0.4667, "step": 3616 }, { "epoch": 0.1, "grad_norm": 3.8682382716625643, "learning_rate": 9.85872798940229e-06, "loss": 0.2362, "step": 3617 }, { "epoch": 0.1, "grad_norm": 7.254500143444606, "learning_rate": 9.858618507264083e-06, "loss": 0.543, "step": 3618 }, { "epoch": 0.1, "grad_norm": 3.350500156301823, "learning_rate": 9.858508983327781e-06, "loss": 0.494, "step": 3619 }, { "epoch": 0.1, "grad_norm": 7.311823626154438, "learning_rate": 9.858399417594323e-06, "loss": 0.7547, "step": 3620 }, { "epoch": 0.1, "grad_norm": 8.863524311308854, "learning_rate": 9.858289810064657e-06, "loss": 0.2973, "step": 3621 }, { "epoch": 0.1, "grad_norm": 8.025288072210728, "learning_rate": 9.858180160739716e-06, "loss": 0.5257, "step": 3622 }, { "epoch": 0.1, "grad_norm": 8.678855382275934, "learning_rate": 9.858070469620453e-06, "loss": 0.3659, "step": 3623 }, { "epoch": 0.1, "grad_norm": 7.764883270761843, "learning_rate": 9.857960736707807e-06, "loss": 0.6481, "step": 3624 }, { "epoch": 0.1, "grad_norm": 14.685999562343095, "learning_rate": 9.857850962002724e-06, "loss": 0.8727, "step": 3625 }, { "epoch": 0.1, "grad_norm": 5.398772608821629, "learning_rate": 9.857741145506146e-06, "loss": 0.2993, "step": 3626 }, { "epoch": 0.1, "grad_norm": 3.729574845350217, "learning_rate": 9.857631287219018e-06, "loss": 0.2939, "step": 3627 }, { "epoch": 0.1, "grad_norm": 5.412092590856665, "learning_rate": 9.857521387142288e-06, "loss": 0.4317, "step": 3628 }, { "epoch": 0.1, "grad_norm": 4.37535578779668, "learning_rate": 9.857411445276899e-06, "loss": 0.5008, "step": 3629 }, { "epoch": 0.1, "grad_norm": 4.952980785496651, "learning_rate": 9.857301461623797e-06, "loss": 0.2625, "step": 3630 }, { "epoch": 0.1, "grad_norm": 5.011855660974573, "learning_rate": 9.857191436183929e-06, "loss": 0.5926, "step": 3631 }, { "epoch": 0.1, "grad_norm": 5.452253168143203, "learning_rate": 9.857081368958242e-06, "loss": 0.446, "step": 3632 }, { "epoch": 0.1, "grad_norm": 9.863260751835135, "learning_rate": 9.85697125994768e-06, "loss": 0.7583, "step": 3633 }, { "epoch": 0.1, "grad_norm": 5.045505373266828, "learning_rate": 9.856861109153193e-06, "loss": 0.4986, "step": 3634 }, { "epoch": 0.1, "grad_norm": 5.132111430372946, "learning_rate": 9.856750916575728e-06, "loss": 0.4199, "step": 3635 }, { "epoch": 0.1, "grad_norm": 11.566163178698005, "learning_rate": 9.856640682216234e-06, "loss": 0.8619, "step": 3636 }, { "epoch": 0.1, "grad_norm": 5.1304281378065575, "learning_rate": 9.856530406075657e-06, "loss": 0.3777, "step": 3637 }, { "epoch": 0.1, "grad_norm": 5.718241100910924, "learning_rate": 9.856420088154946e-06, "loss": 0.7445, "step": 3638 }, { "epoch": 0.1, "grad_norm": 3.304571091064844, "learning_rate": 9.856309728455052e-06, "loss": 0.2372, "step": 3639 }, { "epoch": 0.1, "grad_norm": 5.927591187006189, "learning_rate": 9.856199326976925e-06, "loss": 0.3132, "step": 3640 }, { "epoch": 0.1, "grad_norm": 6.748752478711966, "learning_rate": 9.856088883721511e-06, "loss": 0.8028, "step": 3641 }, { "epoch": 0.1, "grad_norm": 8.55056409703848, "learning_rate": 9.855978398689762e-06, "loss": 0.395, "step": 3642 }, { "epoch": 0.1, "grad_norm": 5.557557685679653, "learning_rate": 9.85586787188263e-06, "loss": 0.6432, "step": 3643 }, { "epoch": 0.1, "grad_norm": 2.7569348719185385, "learning_rate": 9.855757303301064e-06, "loss": 0.7322, "step": 3644 }, { "epoch": 0.1, "grad_norm": 14.651469738245465, "learning_rate": 9.855646692946014e-06, "loss": 0.8892, "step": 3645 }, { "epoch": 0.1, "grad_norm": 4.059952568096589, "learning_rate": 9.855536040818436e-06, "loss": 0.4339, "step": 3646 }, { "epoch": 0.1, "grad_norm": 6.6321451774785025, "learning_rate": 9.855425346919278e-06, "loss": 1.0857, "step": 3647 }, { "epoch": 0.1, "grad_norm": 10.952425728976886, "learning_rate": 9.855314611249494e-06, "loss": 0.616, "step": 3648 }, { "epoch": 0.1, "grad_norm": 3.675130496660012, "learning_rate": 9.855203833810036e-06, "loss": 0.1962, "step": 3649 }, { "epoch": 0.1, "grad_norm": 7.793890372253422, "learning_rate": 9.855093014601858e-06, "loss": 0.6132, "step": 3650 }, { "epoch": 0.1, "grad_norm": 7.001870041783927, "learning_rate": 9.85498215362591e-06, "loss": 0.3668, "step": 3651 }, { "epoch": 0.1, "grad_norm": 8.04910336352224, "learning_rate": 9.854871250883151e-06, "loss": 0.4114, "step": 3652 }, { "epoch": 0.1, "grad_norm": 8.279194155607263, "learning_rate": 9.85476030637453e-06, "loss": 0.4142, "step": 3653 }, { "epoch": 0.1, "grad_norm": 5.361313138569105, "learning_rate": 9.854649320101006e-06, "loss": 0.417, "step": 3654 }, { "epoch": 0.1, "grad_norm": 7.767058948165945, "learning_rate": 9.85453829206353e-06, "loss": 0.4232, "step": 3655 }, { "epoch": 0.1, "grad_norm": 4.8730820771132715, "learning_rate": 9.85442722226306e-06, "loss": 0.7998, "step": 3656 }, { "epoch": 0.1, "grad_norm": 6.076950862026118, "learning_rate": 9.854316110700552e-06, "loss": 0.2769, "step": 3657 }, { "epoch": 0.1, "grad_norm": 5.119786797718774, "learning_rate": 9.854204957376956e-06, "loss": 0.3845, "step": 3658 }, { "epoch": 0.1, "grad_norm": 3.9169108368807803, "learning_rate": 9.854093762293235e-06, "loss": 0.3587, "step": 3659 }, { "epoch": 0.1, "grad_norm": 5.832109720324347, "learning_rate": 9.853982525450343e-06, "loss": 0.5376, "step": 3660 }, { "epoch": 0.1, "grad_norm": 6.269339813447041, "learning_rate": 9.853871246849238e-06, "loss": 0.6013, "step": 3661 }, { "epoch": 0.1, "grad_norm": 3.8515800491149865, "learning_rate": 9.853759926490873e-06, "loss": 0.3078, "step": 3662 }, { "epoch": 0.1, "grad_norm": 5.151038478108477, "learning_rate": 9.853648564376213e-06, "loss": 0.336, "step": 3663 }, { "epoch": 0.1, "grad_norm": 6.29067301535249, "learning_rate": 9.853537160506209e-06, "loss": 0.5759, "step": 3664 }, { "epoch": 0.1, "grad_norm": 7.398834266214957, "learning_rate": 9.853425714881824e-06, "loss": 0.5363, "step": 3665 }, { "epoch": 0.1, "grad_norm": 7.293882945074103, "learning_rate": 9.853314227504014e-06, "loss": 0.4608, "step": 3666 }, { "epoch": 0.11, "grad_norm": 4.884641796008309, "learning_rate": 9.853202698373738e-06, "loss": 0.5561, "step": 3667 }, { "epoch": 0.11, "grad_norm": 8.967635454314559, "learning_rate": 9.85309112749196e-06, "loss": 0.4175, "step": 3668 }, { "epoch": 0.11, "grad_norm": 7.541706281807337, "learning_rate": 9.852979514859634e-06, "loss": 0.4574, "step": 3669 }, { "epoch": 0.11, "grad_norm": 3.546174707053785, "learning_rate": 9.852867860477723e-06, "loss": 0.5028, "step": 3670 }, { "epoch": 0.11, "grad_norm": 19.303619589883528, "learning_rate": 9.852756164347188e-06, "loss": 0.411, "step": 3671 }, { "epoch": 0.11, "grad_norm": 4.414294589532403, "learning_rate": 9.85264442646899e-06, "loss": 0.1858, "step": 3672 }, { "epoch": 0.11, "grad_norm": 5.643559376823086, "learning_rate": 9.852532646844088e-06, "loss": 0.5603, "step": 3673 }, { "epoch": 0.11, "grad_norm": 7.450993839001799, "learning_rate": 9.852420825473445e-06, "loss": 0.5791, "step": 3674 }, { "epoch": 0.11, "grad_norm": 4.243784595061968, "learning_rate": 9.852308962358025e-06, "loss": 0.4824, "step": 3675 }, { "epoch": 0.11, "grad_norm": 6.146551053655545, "learning_rate": 9.852197057498787e-06, "loss": 0.639, "step": 3676 }, { "epoch": 0.11, "grad_norm": 14.326639312628089, "learning_rate": 9.852085110896697e-06, "loss": 1.1902, "step": 3677 }, { "epoch": 0.11, "grad_norm": 10.982776682924596, "learning_rate": 9.851973122552714e-06, "loss": 0.4693, "step": 3678 }, { "epoch": 0.11, "grad_norm": 6.280551283201894, "learning_rate": 9.851861092467803e-06, "loss": 0.6382, "step": 3679 }, { "epoch": 0.11, "grad_norm": 4.593161616629073, "learning_rate": 9.85174902064293e-06, "loss": 0.3576, "step": 3680 }, { "epoch": 0.11, "grad_norm": 13.322653753188355, "learning_rate": 9.851636907079056e-06, "loss": 0.7173, "step": 3681 }, { "epoch": 0.11, "grad_norm": 11.110518384594576, "learning_rate": 9.851524751777148e-06, "loss": 0.4454, "step": 3682 }, { "epoch": 0.11, "grad_norm": 4.924776463349249, "learning_rate": 9.85141255473817e-06, "loss": 0.4252, "step": 3683 }, { "epoch": 0.11, "grad_norm": 5.365694463032504, "learning_rate": 9.851300315963088e-06, "loss": 0.4495, "step": 3684 }, { "epoch": 0.11, "grad_norm": 5.070259881591497, "learning_rate": 9.851188035452866e-06, "loss": 0.2668, "step": 3685 }, { "epoch": 0.11, "grad_norm": 6.561224204892255, "learning_rate": 9.851075713208472e-06, "loss": 0.5214, "step": 3686 }, { "epoch": 0.11, "grad_norm": 5.113830070679977, "learning_rate": 9.850963349230869e-06, "loss": 0.869, "step": 3687 }, { "epoch": 0.11, "grad_norm": 14.00936807823308, "learning_rate": 9.850850943521026e-06, "loss": 0.4704, "step": 3688 }, { "epoch": 0.11, "grad_norm": 4.090419171359002, "learning_rate": 9.850738496079909e-06, "loss": 0.4613, "step": 3689 }, { "epoch": 0.11, "grad_norm": 4.096456986786171, "learning_rate": 9.850626006908485e-06, "loss": 0.3541, "step": 3690 }, { "epoch": 0.11, "grad_norm": 3.280067212503572, "learning_rate": 9.850513476007726e-06, "loss": 0.7446, "step": 3691 }, { "epoch": 0.11, "grad_norm": 11.736501650861358, "learning_rate": 9.850400903378593e-06, "loss": 0.9731, "step": 3692 }, { "epoch": 0.11, "grad_norm": 6.146397253339701, "learning_rate": 9.850288289022062e-06, "loss": 0.5989, "step": 3693 }, { "epoch": 0.11, "grad_norm": 3.9019320617345565, "learning_rate": 9.850175632939096e-06, "loss": 0.3342, "step": 3694 }, { "epoch": 0.11, "grad_norm": 14.327019002944827, "learning_rate": 9.850062935130666e-06, "loss": 0.8674, "step": 3695 }, { "epoch": 0.11, "grad_norm": 15.398441711456172, "learning_rate": 9.849950195597741e-06, "loss": 0.9631, "step": 3696 }, { "epoch": 0.11, "grad_norm": 8.430640590126453, "learning_rate": 9.849837414341294e-06, "loss": 0.6574, "step": 3697 }, { "epoch": 0.11, "grad_norm": 4.103970763409079, "learning_rate": 9.849724591362292e-06, "loss": 0.2059, "step": 3698 }, { "epoch": 0.11, "grad_norm": 8.777006053667348, "learning_rate": 9.849611726661705e-06, "loss": 0.7081, "step": 3699 }, { "epoch": 0.11, "grad_norm": 9.255258972567542, "learning_rate": 9.849498820240508e-06, "loss": 0.7603, "step": 3700 }, { "epoch": 0.11, "grad_norm": 6.017272089599119, "learning_rate": 9.849385872099667e-06, "loss": 0.763, "step": 3701 }, { "epoch": 0.11, "grad_norm": 7.167794855903277, "learning_rate": 9.849272882240159e-06, "loss": 0.8454, "step": 3702 }, { "epoch": 0.11, "grad_norm": 7.032389162819511, "learning_rate": 9.849159850662951e-06, "loss": 0.4903, "step": 3703 }, { "epoch": 0.11, "grad_norm": 5.617299849403983, "learning_rate": 9.84904677736902e-06, "loss": 0.3283, "step": 3704 }, { "epoch": 0.11, "grad_norm": 9.23458722640408, "learning_rate": 9.848933662359334e-06, "loss": 0.6094, "step": 3705 }, { "epoch": 0.11, "grad_norm": 4.300904926181409, "learning_rate": 9.84882050563487e-06, "loss": 0.1797, "step": 3706 }, { "epoch": 0.11, "grad_norm": 6.113888334410705, "learning_rate": 9.8487073071966e-06, "loss": 0.4924, "step": 3707 }, { "epoch": 0.11, "grad_norm": 3.3015750810750846, "learning_rate": 9.8485940670455e-06, "loss": 0.3062, "step": 3708 }, { "epoch": 0.11, "grad_norm": 10.187506740076376, "learning_rate": 9.848480785182541e-06, "loss": 0.5976, "step": 3709 }, { "epoch": 0.11, "grad_norm": 4.519662325976954, "learning_rate": 9.848367461608699e-06, "loss": 0.7293, "step": 3710 }, { "epoch": 0.11, "grad_norm": 6.427964490667799, "learning_rate": 9.848254096324947e-06, "loss": 0.486, "step": 3711 }, { "epoch": 0.11, "grad_norm": 7.574192403164191, "learning_rate": 9.848140689332264e-06, "loss": 0.836, "step": 3712 }, { "epoch": 0.11, "grad_norm": 7.395623933935012, "learning_rate": 9.848027240631622e-06, "loss": 0.7851, "step": 3713 }, { "epoch": 0.11, "grad_norm": 4.056004055072894, "learning_rate": 9.847913750224e-06, "loss": 0.7449, "step": 3714 }, { "epoch": 0.11, "grad_norm": 5.068025422900353, "learning_rate": 9.847800218110371e-06, "loss": 0.3646, "step": 3715 }, { "epoch": 0.11, "grad_norm": 5.681038424305029, "learning_rate": 9.847686644291716e-06, "loss": 0.5161, "step": 3716 }, { "epoch": 0.11, "grad_norm": 4.6152020879868045, "learning_rate": 9.847573028769008e-06, "loss": 0.5784, "step": 3717 }, { "epoch": 0.11, "grad_norm": 5.465675318002683, "learning_rate": 9.847459371543226e-06, "loss": 0.5269, "step": 3718 }, { "epoch": 0.11, "grad_norm": 7.930128624583677, "learning_rate": 9.847345672615348e-06, "loss": 0.6251, "step": 3719 }, { "epoch": 0.11, "grad_norm": 8.282390184903619, "learning_rate": 9.847231931986351e-06, "loss": 0.6481, "step": 3720 }, { "epoch": 0.11, "grad_norm": 4.866951411661337, "learning_rate": 9.847118149657217e-06, "loss": 0.4306, "step": 3721 }, { "epoch": 0.11, "grad_norm": 3.784824746558251, "learning_rate": 9.847004325628921e-06, "loss": 0.4646, "step": 3722 }, { "epoch": 0.11, "grad_norm": 9.759144431745687, "learning_rate": 9.846890459902446e-06, "loss": 0.5495, "step": 3723 }, { "epoch": 0.11, "grad_norm": 13.839434982408935, "learning_rate": 9.846776552478766e-06, "loss": 0.7533, "step": 3724 }, { "epoch": 0.11, "grad_norm": 7.280906472677238, "learning_rate": 9.846662603358864e-06, "loss": 0.3249, "step": 3725 }, { "epoch": 0.11, "grad_norm": 4.466624014328151, "learning_rate": 9.846548612543722e-06, "loss": 0.3139, "step": 3726 }, { "epoch": 0.11, "grad_norm": 6.287444974078341, "learning_rate": 9.846434580034317e-06, "loss": 0.644, "step": 3727 }, { "epoch": 0.11, "grad_norm": 8.214084504903308, "learning_rate": 9.846320505831634e-06, "loss": 0.7894, "step": 3728 }, { "epoch": 0.11, "grad_norm": 9.312487813442374, "learning_rate": 9.846206389936652e-06, "loss": 0.5737, "step": 3729 }, { "epoch": 0.11, "grad_norm": 14.290193591184662, "learning_rate": 9.846092232350353e-06, "loss": 0.3383, "step": 3730 }, { "epoch": 0.11, "grad_norm": 3.433223804251441, "learning_rate": 9.84597803307372e-06, "loss": 0.399, "step": 3731 }, { "epoch": 0.11, "grad_norm": 3.2434388101473854, "learning_rate": 9.845863792107733e-06, "loss": 0.5941, "step": 3732 }, { "epoch": 0.11, "grad_norm": 4.485607761978719, "learning_rate": 9.845749509453376e-06, "loss": 0.3803, "step": 3733 }, { "epoch": 0.11, "grad_norm": 8.989963604489532, "learning_rate": 9.845635185111634e-06, "loss": 0.6435, "step": 3734 }, { "epoch": 0.11, "grad_norm": 7.4576546841524864, "learning_rate": 9.84552081908349e-06, "loss": 0.6759, "step": 3735 }, { "epoch": 0.11, "grad_norm": 6.221658074636766, "learning_rate": 9.845406411369925e-06, "loss": 0.6823, "step": 3736 }, { "epoch": 0.11, "grad_norm": 4.822883711579389, "learning_rate": 9.845291961971926e-06, "loss": 0.2419, "step": 3737 }, { "epoch": 0.11, "grad_norm": 9.962763648092405, "learning_rate": 9.845177470890476e-06, "loss": 0.7584, "step": 3738 }, { "epoch": 0.11, "grad_norm": 6.530537835677684, "learning_rate": 9.845062938126561e-06, "loss": 0.8427, "step": 3739 }, { "epoch": 0.11, "grad_norm": 5.191066113675052, "learning_rate": 9.844948363681168e-06, "loss": 0.4163, "step": 3740 }, { "epoch": 0.11, "grad_norm": 7.856229664632841, "learning_rate": 9.844833747555278e-06, "loss": 0.5484, "step": 3741 }, { "epoch": 0.11, "grad_norm": 17.404398794028822, "learning_rate": 9.844719089749883e-06, "loss": 0.6182, "step": 3742 }, { "epoch": 0.11, "grad_norm": 10.299034528944622, "learning_rate": 9.844604390265963e-06, "loss": 0.6151, "step": 3743 }, { "epoch": 0.11, "grad_norm": 3.5904397065083513, "learning_rate": 9.844489649104509e-06, "loss": 0.3672, "step": 3744 }, { "epoch": 0.11, "grad_norm": 7.982186512082506, "learning_rate": 9.844374866266508e-06, "loss": 0.4403, "step": 3745 }, { "epoch": 0.11, "grad_norm": 3.967826104947107, "learning_rate": 9.844260041752946e-06, "loss": 0.8296, "step": 3746 }, { "epoch": 0.11, "grad_norm": 4.1905553119278425, "learning_rate": 9.844145175564811e-06, "loss": 0.3937, "step": 3747 }, { "epoch": 0.11, "grad_norm": 10.137403919973513, "learning_rate": 9.84403026770309e-06, "loss": 0.7677, "step": 3748 }, { "epoch": 0.11, "grad_norm": 10.559758027945747, "learning_rate": 9.843915318168777e-06, "loss": 1.2783, "step": 3749 }, { "epoch": 0.11, "grad_norm": 3.3911829194951046, "learning_rate": 9.843800326962853e-06, "loss": 0.1505, "step": 3750 }, { "epoch": 0.11, "grad_norm": 9.535190505638615, "learning_rate": 9.843685294086313e-06, "loss": 0.9789, "step": 3751 }, { "epoch": 0.11, "grad_norm": 5.081539004131789, "learning_rate": 9.843570219540145e-06, "loss": 0.5864, "step": 3752 }, { "epoch": 0.11, "grad_norm": 9.162433294076864, "learning_rate": 9.84345510332534e-06, "loss": 0.5833, "step": 3753 }, { "epoch": 0.11, "grad_norm": 6.200267373749413, "learning_rate": 9.843339945442883e-06, "loss": 0.7762, "step": 3754 }, { "epoch": 0.11, "grad_norm": 9.136503467412814, "learning_rate": 9.843224745893771e-06, "loss": 0.6382, "step": 3755 }, { "epoch": 0.11, "grad_norm": 7.1529743410362485, "learning_rate": 9.843109504678993e-06, "loss": 0.523, "step": 3756 }, { "epoch": 0.11, "grad_norm": 5.867564061013848, "learning_rate": 9.84299422179954e-06, "loss": 0.2109, "step": 3757 }, { "epoch": 0.11, "grad_norm": 9.27945252554746, "learning_rate": 9.842878897256406e-06, "loss": 0.7004, "step": 3758 }, { "epoch": 0.11, "grad_norm": 3.0647851048133226, "learning_rate": 9.842763531050578e-06, "loss": 0.2254, "step": 3759 }, { "epoch": 0.11, "grad_norm": 7.487202470628242, "learning_rate": 9.842648123183052e-06, "loss": 0.5658, "step": 3760 }, { "epoch": 0.11, "grad_norm": 6.36707816000827, "learning_rate": 9.842532673654822e-06, "loss": 0.5907, "step": 3761 }, { "epoch": 0.11, "grad_norm": 7.500397639859633, "learning_rate": 9.84241718246688e-06, "loss": 0.402, "step": 3762 }, { "epoch": 0.11, "grad_norm": 11.80158969063381, "learning_rate": 9.842301649620218e-06, "loss": 0.8933, "step": 3763 }, { "epoch": 0.11, "grad_norm": 8.021373687641786, "learning_rate": 9.842186075115831e-06, "loss": 0.7495, "step": 3764 }, { "epoch": 0.11, "grad_norm": 6.377670177670025, "learning_rate": 9.842070458954714e-06, "loss": 0.6507, "step": 3765 }, { "epoch": 0.11, "grad_norm": 7.290599861216137, "learning_rate": 9.84195480113786e-06, "loss": 0.5293, "step": 3766 }, { "epoch": 0.11, "grad_norm": 19.902389367587563, "learning_rate": 9.841839101666266e-06, "loss": 0.626, "step": 3767 }, { "epoch": 0.11, "grad_norm": 3.9649596249038805, "learning_rate": 9.841723360540925e-06, "loss": 0.5573, "step": 3768 }, { "epoch": 0.11, "grad_norm": 10.047593205293254, "learning_rate": 9.841607577762835e-06, "loss": 0.378, "step": 3769 }, { "epoch": 0.11, "grad_norm": 6.093473614024636, "learning_rate": 9.841491753332991e-06, "loss": 0.4267, "step": 3770 }, { "epoch": 0.11, "grad_norm": 3.4655249785014437, "learning_rate": 9.84137588725239e-06, "loss": 0.1756, "step": 3771 }, { "epoch": 0.11, "grad_norm": 3.4720535356024333, "learning_rate": 9.841259979522029e-06, "loss": 0.3419, "step": 3772 }, { "epoch": 0.11, "grad_norm": 4.593810152451487, "learning_rate": 9.841144030142902e-06, "loss": 0.3359, "step": 3773 }, { "epoch": 0.11, "grad_norm": 4.951258455080028, "learning_rate": 9.841028039116011e-06, "loss": 0.3379, "step": 3774 }, { "epoch": 0.11, "grad_norm": 6.442133152871506, "learning_rate": 9.840912006442352e-06, "loss": 0.2593, "step": 3775 }, { "epoch": 0.11, "grad_norm": 10.987241281494788, "learning_rate": 9.840795932122921e-06, "loss": 0.4834, "step": 3776 }, { "epoch": 0.11, "grad_norm": 13.34469193790102, "learning_rate": 9.84067981615872e-06, "loss": 0.836, "step": 3777 }, { "epoch": 0.11, "grad_norm": 5.945170336914698, "learning_rate": 9.840563658550746e-06, "loss": 0.6637, "step": 3778 }, { "epoch": 0.11, "grad_norm": 6.25569797180222, "learning_rate": 9.8404474593e-06, "loss": 0.8277, "step": 3779 }, { "epoch": 0.11, "grad_norm": 11.265207819945555, "learning_rate": 9.840331218407479e-06, "loss": 1.0479, "step": 3780 }, { "epoch": 0.11, "grad_norm": 3.8742823705355365, "learning_rate": 9.840214935874185e-06, "loss": 0.595, "step": 3781 }, { "epoch": 0.11, "grad_norm": 3.612267990317788, "learning_rate": 9.84009861170112e-06, "loss": 0.165, "step": 3782 }, { "epoch": 0.11, "grad_norm": 10.379936973953741, "learning_rate": 9.83998224588928e-06, "loss": 0.3497, "step": 3783 }, { "epoch": 0.11, "grad_norm": 6.391155752512589, "learning_rate": 9.839865838439669e-06, "loss": 0.7045, "step": 3784 }, { "epoch": 0.11, "grad_norm": 5.12159357606967, "learning_rate": 9.83974938935329e-06, "loss": 0.3486, "step": 3785 }, { "epoch": 0.11, "grad_norm": 8.777331906944335, "learning_rate": 9.83963289863114e-06, "loss": 0.5156, "step": 3786 }, { "epoch": 0.11, "grad_norm": 8.413747004099115, "learning_rate": 9.839516366274226e-06, "loss": 0.8202, "step": 3787 }, { "epoch": 0.11, "grad_norm": 4.749616256070119, "learning_rate": 9.839399792283548e-06, "loss": 0.4785, "step": 3788 }, { "epoch": 0.11, "grad_norm": 5.887276342723832, "learning_rate": 9.83928317666011e-06, "loss": 0.5058, "step": 3789 }, { "epoch": 0.11, "grad_norm": 8.240620425255221, "learning_rate": 9.839166519404914e-06, "loss": 0.4711, "step": 3790 }, { "epoch": 0.11, "grad_norm": 5.112765479248454, "learning_rate": 9.839049820518964e-06, "loss": 0.8579, "step": 3791 }, { "epoch": 0.11, "grad_norm": 5.0275155655341575, "learning_rate": 9.838933080003263e-06, "loss": 0.6589, "step": 3792 }, { "epoch": 0.11, "grad_norm": 10.740664753578098, "learning_rate": 9.83881629785882e-06, "loss": 0.4751, "step": 3793 }, { "epoch": 0.11, "grad_norm": 11.211738967159581, "learning_rate": 9.838699474086633e-06, "loss": 0.535, "step": 3794 }, { "epoch": 0.11, "grad_norm": 9.05328163377535, "learning_rate": 9.838582608687712e-06, "loss": 0.46, "step": 3795 }, { "epoch": 0.11, "grad_norm": 9.484950040324726, "learning_rate": 9.838465701663059e-06, "loss": 1.0061, "step": 3796 }, { "epoch": 0.11, "grad_norm": 6.2972845128934845, "learning_rate": 9.838348753013683e-06, "loss": 0.8626, "step": 3797 }, { "epoch": 0.11, "grad_norm": 8.184778358101106, "learning_rate": 9.838231762740587e-06, "loss": 0.4181, "step": 3798 }, { "epoch": 0.11, "grad_norm": 4.514334259884512, "learning_rate": 9.838114730844779e-06, "loss": 0.387, "step": 3799 }, { "epoch": 0.11, "grad_norm": 8.61749925507713, "learning_rate": 9.837997657327267e-06, "loss": 0.6922, "step": 3800 }, { "epoch": 0.11, "grad_norm": 9.49265868975832, "learning_rate": 9.837880542189055e-06, "loss": 0.8485, "step": 3801 }, { "epoch": 0.11, "grad_norm": 8.364414642520522, "learning_rate": 9.837763385431153e-06, "loss": 0.4645, "step": 3802 }, { "epoch": 0.11, "grad_norm": 3.621749110827584, "learning_rate": 9.83764618705457e-06, "loss": 0.7475, "step": 3803 }, { "epoch": 0.11, "grad_norm": 7.873013321491078, "learning_rate": 9.837528947060312e-06, "loss": 0.6604, "step": 3804 }, { "epoch": 0.11, "grad_norm": 10.287777112934311, "learning_rate": 9.837411665449385e-06, "loss": 0.7611, "step": 3805 }, { "epoch": 0.11, "grad_norm": 14.151466757303167, "learning_rate": 9.837294342222803e-06, "loss": 0.3901, "step": 3806 }, { "epoch": 0.11, "grad_norm": 10.600360394595096, "learning_rate": 9.837176977381573e-06, "loss": 0.8074, "step": 3807 }, { "epoch": 0.11, "grad_norm": 4.684032378285968, "learning_rate": 9.837059570926706e-06, "loss": 0.5686, "step": 3808 }, { "epoch": 0.11, "grad_norm": 5.872726081533442, "learning_rate": 9.83694212285921e-06, "loss": 0.3198, "step": 3809 }, { "epoch": 0.11, "grad_norm": 6.351180587115965, "learning_rate": 9.836824633180097e-06, "loss": 0.4965, "step": 3810 }, { "epoch": 0.11, "grad_norm": 7.1084791688020195, "learning_rate": 9.836707101890377e-06, "loss": 0.6341, "step": 3811 }, { "epoch": 0.11, "grad_norm": 6.940236728874147, "learning_rate": 9.836589528991063e-06, "loss": 0.8735, "step": 3812 }, { "epoch": 0.11, "grad_norm": 6.569792918595311, "learning_rate": 9.836471914483162e-06, "loss": 0.7128, "step": 3813 }, { "epoch": 0.11, "grad_norm": 10.503448646596016, "learning_rate": 9.83635425836769e-06, "loss": 0.7773, "step": 3814 }, { "epoch": 0.11, "grad_norm": 4.0195738379592125, "learning_rate": 9.836236560645657e-06, "loss": 0.402, "step": 3815 }, { "epoch": 0.11, "grad_norm": 7.974298796724291, "learning_rate": 9.836118821318079e-06, "loss": 0.5439, "step": 3816 }, { "epoch": 0.11, "grad_norm": 4.362337425285008, "learning_rate": 9.836001040385963e-06, "loss": 0.7095, "step": 3817 }, { "epoch": 0.11, "grad_norm": 7.381929052063126, "learning_rate": 9.835883217850327e-06, "loss": 0.6429, "step": 3818 }, { "epoch": 0.11, "grad_norm": 5.954264293997697, "learning_rate": 9.835765353712183e-06, "loss": 0.2966, "step": 3819 }, { "epoch": 0.11, "grad_norm": 8.089719674684533, "learning_rate": 9.835647447972546e-06, "loss": 0.3806, "step": 3820 }, { "epoch": 0.11, "grad_norm": 11.259713112410072, "learning_rate": 9.835529500632425e-06, "loss": 0.7163, "step": 3821 }, { "epoch": 0.11, "grad_norm": 4.144748290164852, "learning_rate": 9.835411511692843e-06, "loss": 0.5841, "step": 3822 }, { "epoch": 0.11, "grad_norm": 10.640956520596152, "learning_rate": 9.83529348115481e-06, "loss": 0.6664, "step": 3823 }, { "epoch": 0.11, "grad_norm": 4.719839715499081, "learning_rate": 9.835175409019344e-06, "loss": 0.5877, "step": 3824 }, { "epoch": 0.11, "grad_norm": 7.137474049322249, "learning_rate": 9.835057295287458e-06, "loss": 0.4964, "step": 3825 }, { "epoch": 0.11, "grad_norm": 5.045086097776555, "learning_rate": 9.83493913996017e-06, "loss": 0.4603, "step": 3826 }, { "epoch": 0.11, "grad_norm": 8.327010489889542, "learning_rate": 9.834820943038495e-06, "loss": 0.4794, "step": 3827 }, { "epoch": 0.11, "grad_norm": 4.509946744312581, "learning_rate": 9.834702704523452e-06, "loss": 0.6189, "step": 3828 }, { "epoch": 0.11, "grad_norm": 10.618300715703588, "learning_rate": 9.834584424416057e-06, "loss": 1.2681, "step": 3829 }, { "epoch": 0.11, "grad_norm": 10.626814608730713, "learning_rate": 9.834466102717326e-06, "loss": 0.5284, "step": 3830 }, { "epoch": 0.11, "grad_norm": 6.314142749921151, "learning_rate": 9.834347739428279e-06, "loss": 0.6348, "step": 3831 }, { "epoch": 0.11, "grad_norm": 5.520037563721376, "learning_rate": 9.834229334549932e-06, "loss": 0.5925, "step": 3832 }, { "epoch": 0.11, "grad_norm": 8.48682482004448, "learning_rate": 9.834110888083308e-06, "loss": 0.7573, "step": 3833 }, { "epoch": 0.11, "grad_norm": 7.113719911995982, "learning_rate": 9.833992400029421e-06, "loss": 0.7885, "step": 3834 }, { "epoch": 0.11, "grad_norm": 6.725250647883354, "learning_rate": 9.833873870389295e-06, "loss": 0.8347, "step": 3835 }, { "epoch": 0.11, "grad_norm": 7.352517822980761, "learning_rate": 9.833755299163945e-06, "loss": 0.3745, "step": 3836 }, { "epoch": 0.11, "grad_norm": 4.700600158641998, "learning_rate": 9.833636686354392e-06, "loss": 0.6936, "step": 3837 }, { "epoch": 0.11, "grad_norm": 8.489411546962268, "learning_rate": 9.833518031961663e-06, "loss": 0.8516, "step": 3838 }, { "epoch": 0.11, "grad_norm": 5.320580205470016, "learning_rate": 9.83339933598677e-06, "loss": 0.4333, "step": 3839 }, { "epoch": 0.11, "grad_norm": 3.9614230428296184, "learning_rate": 9.833280598430737e-06, "loss": 0.2426, "step": 3840 }, { "epoch": 0.11, "grad_norm": 9.029845184570553, "learning_rate": 9.833161819294586e-06, "loss": 0.3558, "step": 3841 }, { "epoch": 0.11, "grad_norm": 4.385846452953719, "learning_rate": 9.833042998579341e-06, "loss": 0.4268, "step": 3842 }, { "epoch": 0.11, "grad_norm": 3.7369983349174483, "learning_rate": 9.83292413628602e-06, "loss": 0.4029, "step": 3843 }, { "epoch": 0.11, "grad_norm": 5.284586805974961, "learning_rate": 9.83280523241565e-06, "loss": 0.4078, "step": 3844 }, { "epoch": 0.11, "grad_norm": 8.433527541154684, "learning_rate": 9.83268628696925e-06, "loss": 0.7248, "step": 3845 }, { "epoch": 0.11, "grad_norm": 3.9436166121086433, "learning_rate": 9.832567299947846e-06, "loss": 0.3887, "step": 3846 }, { "epoch": 0.11, "grad_norm": 9.040827124742918, "learning_rate": 9.83244827135246e-06, "loss": 1.0155, "step": 3847 }, { "epoch": 0.11, "grad_norm": 4.731294794513091, "learning_rate": 9.832329201184115e-06, "loss": 0.1918, "step": 3848 }, { "epoch": 0.11, "grad_norm": 9.937375625695744, "learning_rate": 9.83221008944384e-06, "loss": 0.8082, "step": 3849 }, { "epoch": 0.11, "grad_norm": 6.177844519029769, "learning_rate": 9.832090936132653e-06, "loss": 0.5803, "step": 3850 }, { "epoch": 0.11, "grad_norm": 7.075402298263063, "learning_rate": 9.831971741251584e-06, "loss": 0.7514, "step": 3851 }, { "epoch": 0.11, "grad_norm": 7.873973476777039, "learning_rate": 9.831852504801659e-06, "loss": 0.6005, "step": 3852 }, { "epoch": 0.11, "grad_norm": 6.931629068037703, "learning_rate": 9.8317332267839e-06, "loss": 0.528, "step": 3853 }, { "epoch": 0.11, "grad_norm": 6.428077282911604, "learning_rate": 9.831613907199334e-06, "loss": 0.5534, "step": 3854 }, { "epoch": 0.11, "grad_norm": 5.894806124254954, "learning_rate": 9.831494546048989e-06, "loss": 0.6089, "step": 3855 }, { "epoch": 0.11, "grad_norm": 4.18197629063333, "learning_rate": 9.831375143333892e-06, "loss": 0.1421, "step": 3856 }, { "epoch": 0.11, "grad_norm": 7.006463472787667, "learning_rate": 9.83125569905507e-06, "loss": 0.7138, "step": 3857 }, { "epoch": 0.11, "grad_norm": 6.276933771785267, "learning_rate": 9.83113621321355e-06, "loss": 0.5826, "step": 3858 }, { "epoch": 0.11, "grad_norm": 6.732553647320175, "learning_rate": 9.83101668581036e-06, "loss": 0.595, "step": 3859 }, { "epoch": 0.11, "grad_norm": 4.563197670464904, "learning_rate": 9.830897116846528e-06, "loss": 0.3272, "step": 3860 }, { "epoch": 0.11, "grad_norm": 9.220035890025292, "learning_rate": 9.830777506323082e-06, "loss": 0.8173, "step": 3861 }, { "epoch": 0.11, "grad_norm": 6.090680019077311, "learning_rate": 9.830657854241053e-06, "loss": 0.7633, "step": 3862 }, { "epoch": 0.11, "grad_norm": 9.823242138958271, "learning_rate": 9.830538160601468e-06, "loss": 0.5624, "step": 3863 }, { "epoch": 0.11, "grad_norm": 3.833611492071615, "learning_rate": 9.830418425405358e-06, "loss": 0.3396, "step": 3864 }, { "epoch": 0.11, "grad_norm": 2.693320159958579, "learning_rate": 9.830298648653753e-06, "loss": 0.2846, "step": 3865 }, { "epoch": 0.11, "grad_norm": 6.426671819223156, "learning_rate": 9.830178830347685e-06, "loss": 0.8107, "step": 3866 }, { "epoch": 0.11, "grad_norm": 6.522223847385589, "learning_rate": 9.830058970488182e-06, "loss": 0.7868, "step": 3867 }, { "epoch": 0.11, "grad_norm": 8.798566042503566, "learning_rate": 9.829939069076278e-06, "loss": 0.9088, "step": 3868 }, { "epoch": 0.11, "grad_norm": 6.4301455069255145, "learning_rate": 9.829819126113e-06, "loss": 0.5885, "step": 3869 }, { "epoch": 0.11, "grad_norm": 5.083499071931759, "learning_rate": 9.829699141599384e-06, "loss": 0.7786, "step": 3870 }, { "epoch": 0.11, "grad_norm": 11.044178030530663, "learning_rate": 9.829579115536462e-06, "loss": 0.6532, "step": 3871 }, { "epoch": 0.11, "grad_norm": 4.5142858558925525, "learning_rate": 9.829459047925266e-06, "loss": 0.2207, "step": 3872 }, { "epoch": 0.11, "grad_norm": 7.199148342307805, "learning_rate": 9.829338938766825e-06, "loss": 0.6523, "step": 3873 }, { "epoch": 0.11, "grad_norm": 9.096538732217782, "learning_rate": 9.82921878806218e-06, "loss": 0.8568, "step": 3874 }, { "epoch": 0.11, "grad_norm": 11.430103849805302, "learning_rate": 9.829098595812357e-06, "loss": 0.7134, "step": 3875 }, { "epoch": 0.11, "grad_norm": 7.082073516635414, "learning_rate": 9.828978362018395e-06, "loss": 0.2701, "step": 3876 }, { "epoch": 0.11, "grad_norm": 6.375117880535438, "learning_rate": 9.828858086681327e-06, "loss": 0.5247, "step": 3877 }, { "epoch": 0.11, "grad_norm": 2.8004392790216075, "learning_rate": 9.828737769802186e-06, "loss": 0.3108, "step": 3878 }, { "epoch": 0.11, "grad_norm": 10.391122814672597, "learning_rate": 9.82861741138201e-06, "loss": 1.0663, "step": 3879 }, { "epoch": 0.11, "grad_norm": 6.115404904575953, "learning_rate": 9.828497011421832e-06, "loss": 0.4424, "step": 3880 }, { "epoch": 0.11, "grad_norm": 3.2548351999686678, "learning_rate": 9.828376569922688e-06, "loss": 0.3438, "step": 3881 }, { "epoch": 0.11, "grad_norm": 6.383244139178482, "learning_rate": 9.828256086885618e-06, "loss": 0.6629, "step": 3882 }, { "epoch": 0.11, "grad_norm": 2.3240389201373612, "learning_rate": 9.828135562311652e-06, "loss": 0.1223, "step": 3883 }, { "epoch": 0.11, "grad_norm": 11.60830815009685, "learning_rate": 9.82801499620183e-06, "loss": 0.888, "step": 3884 }, { "epoch": 0.11, "grad_norm": 6.540445452693993, "learning_rate": 9.827894388557191e-06, "loss": 0.4935, "step": 3885 }, { "epoch": 0.11, "grad_norm": 5.476309907886364, "learning_rate": 9.827773739378772e-06, "loss": 0.5079, "step": 3886 }, { "epoch": 0.11, "grad_norm": 6.713059681552513, "learning_rate": 9.827653048667608e-06, "loss": 0.4589, "step": 3887 }, { "epoch": 0.11, "grad_norm": 6.714483174867036, "learning_rate": 9.827532316424739e-06, "loss": 0.9998, "step": 3888 }, { "epoch": 0.11, "grad_norm": 7.0974337050085206, "learning_rate": 9.827411542651206e-06, "loss": 0.7811, "step": 3889 }, { "epoch": 0.11, "grad_norm": 3.5144752148134035, "learning_rate": 9.827290727348045e-06, "loss": 0.4188, "step": 3890 }, { "epoch": 0.11, "grad_norm": 5.380863540010865, "learning_rate": 9.827169870516295e-06, "loss": 0.4878, "step": 3891 }, { "epoch": 0.11, "grad_norm": 5.712524652740906, "learning_rate": 9.827048972156999e-06, "loss": 0.7382, "step": 3892 }, { "epoch": 0.11, "grad_norm": 5.1819148503341195, "learning_rate": 9.826928032271192e-06, "loss": 0.5503, "step": 3893 }, { "epoch": 0.11, "grad_norm": 7.537649198102469, "learning_rate": 9.82680705085992e-06, "loss": 0.3701, "step": 3894 }, { "epoch": 0.11, "grad_norm": 9.307197238732712, "learning_rate": 9.82668602792422e-06, "loss": 0.7214, "step": 3895 }, { "epoch": 0.11, "grad_norm": 6.118877581784944, "learning_rate": 9.826564963465136e-06, "loss": 0.375, "step": 3896 }, { "epoch": 0.11, "grad_norm": 7.960119023799008, "learning_rate": 9.826443857483706e-06, "loss": 0.9648, "step": 3897 }, { "epoch": 0.11, "grad_norm": 3.889616624920904, "learning_rate": 9.826322709980975e-06, "loss": 0.3106, "step": 3898 }, { "epoch": 0.11, "grad_norm": 8.10632802062477, "learning_rate": 9.826201520957984e-06, "loss": 0.8567, "step": 3899 }, { "epoch": 0.11, "grad_norm": 8.203892205101628, "learning_rate": 9.826080290415774e-06, "loss": 0.7026, "step": 3900 }, { "epoch": 0.11, "grad_norm": 10.976133725082951, "learning_rate": 9.82595901835539e-06, "loss": 0.68, "step": 3901 }, { "epoch": 0.11, "grad_norm": 5.883807053914402, "learning_rate": 9.825837704777877e-06, "loss": 0.7423, "step": 3902 }, { "epoch": 0.11, "grad_norm": 10.31387689819607, "learning_rate": 9.825716349684273e-06, "loss": 0.4122, "step": 3903 }, { "epoch": 0.11, "grad_norm": 5.566102800630956, "learning_rate": 9.825594953075628e-06, "loss": 0.5661, "step": 3904 }, { "epoch": 0.11, "grad_norm": 13.68703794352677, "learning_rate": 9.825473514952983e-06, "loss": 0.5693, "step": 3905 }, { "epoch": 0.11, "grad_norm": 6.819437187106365, "learning_rate": 9.825352035317382e-06, "loss": 0.4597, "step": 3906 }, { "epoch": 0.11, "grad_norm": 7.936571502446125, "learning_rate": 9.825230514169874e-06, "loss": 0.6545, "step": 3907 }, { "epoch": 0.11, "grad_norm": 5.519353238722596, "learning_rate": 9.825108951511499e-06, "loss": 0.3912, "step": 3908 }, { "epoch": 0.11, "grad_norm": 6.471535456143753, "learning_rate": 9.824987347343308e-06, "loss": 0.8242, "step": 3909 }, { "epoch": 0.11, "grad_norm": 4.833051273183683, "learning_rate": 9.824865701666344e-06, "loss": 0.4722, "step": 3910 }, { "epoch": 0.11, "grad_norm": 4.689745810072444, "learning_rate": 9.824744014481656e-06, "loss": 0.5261, "step": 3911 }, { "epoch": 0.11, "grad_norm": 9.737744821156761, "learning_rate": 9.824622285790288e-06, "loss": 0.7936, "step": 3912 }, { "epoch": 0.11, "grad_norm": 6.427474131107388, "learning_rate": 9.824500515593289e-06, "loss": 0.354, "step": 3913 }, { "epoch": 0.11, "grad_norm": 26.08219535059579, "learning_rate": 9.824378703891706e-06, "loss": 0.7729, "step": 3914 }, { "epoch": 0.11, "grad_norm": 6.307914730412483, "learning_rate": 9.824256850686586e-06, "loss": 0.5852, "step": 3915 }, { "epoch": 0.11, "grad_norm": 6.154744578995307, "learning_rate": 9.824134955978978e-06, "loss": 0.6012, "step": 3916 }, { "epoch": 0.11, "grad_norm": 3.587055505736273, "learning_rate": 9.824013019769934e-06, "loss": 0.3579, "step": 3917 }, { "epoch": 0.11, "grad_norm": 8.664478673820307, "learning_rate": 9.823891042060496e-06, "loss": 0.4318, "step": 3918 }, { "epoch": 0.11, "grad_norm": 3.5580013715881824, "learning_rate": 9.82376902285172e-06, "loss": 0.6854, "step": 3919 }, { "epoch": 0.11, "grad_norm": 12.279425099611851, "learning_rate": 9.823646962144653e-06, "loss": 0.5038, "step": 3920 }, { "epoch": 0.11, "grad_norm": 5.341186109143314, "learning_rate": 9.823524859940343e-06, "loss": 0.5464, "step": 3921 }, { "epoch": 0.11, "grad_norm": 6.193768726247431, "learning_rate": 9.823402716239844e-06, "loss": 0.5322, "step": 3922 }, { "epoch": 0.11, "grad_norm": 14.701413256641475, "learning_rate": 9.823280531044206e-06, "loss": 0.8894, "step": 3923 }, { "epoch": 0.11, "grad_norm": 9.419052840074407, "learning_rate": 9.823158304354479e-06, "loss": 0.9861, "step": 3924 }, { "epoch": 0.11, "grad_norm": 4.399559243841317, "learning_rate": 9.823036036171714e-06, "loss": 0.446, "step": 3925 }, { "epoch": 0.11, "grad_norm": 11.572932923468919, "learning_rate": 9.822913726496965e-06, "loss": 0.5028, "step": 3926 }, { "epoch": 0.11, "grad_norm": 6.493079609636528, "learning_rate": 9.822791375331282e-06, "loss": 0.647, "step": 3927 }, { "epoch": 0.11, "grad_norm": 3.8184181930811896, "learning_rate": 9.822668982675718e-06, "loss": 0.4176, "step": 3928 }, { "epoch": 0.11, "grad_norm": 5.582732917993542, "learning_rate": 9.822546548531329e-06, "loss": 0.6641, "step": 3929 }, { "epoch": 0.11, "grad_norm": 4.756238955912256, "learning_rate": 9.822424072899164e-06, "loss": 0.6965, "step": 3930 }, { "epoch": 0.11, "grad_norm": 4.53008749946821, "learning_rate": 9.822301555780279e-06, "loss": 0.2451, "step": 3931 }, { "epoch": 0.11, "grad_norm": 2.8773361955844416, "learning_rate": 9.822178997175726e-06, "loss": 0.379, "step": 3932 }, { "epoch": 0.11, "grad_norm": 4.6695488079137295, "learning_rate": 9.822056397086562e-06, "loss": 0.7201, "step": 3933 }, { "epoch": 0.11, "grad_norm": 9.034693078135708, "learning_rate": 9.821933755513841e-06, "loss": 1.3282, "step": 3934 }, { "epoch": 0.11, "grad_norm": 6.2029289003215125, "learning_rate": 9.821811072458618e-06, "loss": 0.4753, "step": 3935 }, { "epoch": 0.11, "grad_norm": 5.501524453869883, "learning_rate": 9.821688347921948e-06, "loss": 0.323, "step": 3936 }, { "epoch": 0.11, "grad_norm": 6.758943002328695, "learning_rate": 9.821565581904884e-06, "loss": 0.3467, "step": 3937 }, { "epoch": 0.11, "grad_norm": 4.5899756359874875, "learning_rate": 9.821442774408487e-06, "loss": 0.2906, "step": 3938 }, { "epoch": 0.11, "grad_norm": 15.196574447518257, "learning_rate": 9.821319925433813e-06, "loss": 0.7293, "step": 3939 }, { "epoch": 0.11, "grad_norm": 5.30734842386741, "learning_rate": 9.821197034981914e-06, "loss": 0.4279, "step": 3940 }, { "epoch": 0.11, "grad_norm": 5.733572797247087, "learning_rate": 9.821074103053852e-06, "loss": 0.4452, "step": 3941 }, { "epoch": 0.11, "grad_norm": 4.178556028857051, "learning_rate": 9.820951129650684e-06, "loss": 0.7466, "step": 3942 }, { "epoch": 0.11, "grad_norm": 6.969251606084657, "learning_rate": 9.820828114773467e-06, "loss": 0.5856, "step": 3943 }, { "epoch": 0.11, "grad_norm": 10.846125713505447, "learning_rate": 9.820705058423258e-06, "loss": 0.4619, "step": 3944 }, { "epoch": 0.11, "grad_norm": 10.067039176751766, "learning_rate": 9.820581960601116e-06, "loss": 0.9747, "step": 3945 }, { "epoch": 0.11, "grad_norm": 9.30289583548603, "learning_rate": 9.820458821308103e-06, "loss": 0.4534, "step": 3946 }, { "epoch": 0.11, "grad_norm": 3.5984718483442117, "learning_rate": 9.820335640545275e-06, "loss": 0.1447, "step": 3947 }, { "epoch": 0.11, "grad_norm": 5.503419073458599, "learning_rate": 9.820212418313695e-06, "loss": 0.4423, "step": 3948 }, { "epoch": 0.11, "grad_norm": 87.5463430404501, "learning_rate": 9.820089154614419e-06, "loss": 0.7097, "step": 3949 }, { "epoch": 0.11, "grad_norm": 7.055983110757972, "learning_rate": 9.81996584944851e-06, "loss": 0.7619, "step": 3950 }, { "epoch": 0.11, "grad_norm": 8.902673424796708, "learning_rate": 9.819842502817028e-06, "loss": 0.9728, "step": 3951 }, { "epoch": 0.11, "grad_norm": 3.526173309768752, "learning_rate": 9.819719114721035e-06, "loss": 0.2575, "step": 3952 }, { "epoch": 0.11, "grad_norm": 5.219880672567127, "learning_rate": 9.819595685161591e-06, "loss": 0.4673, "step": 3953 }, { "epoch": 0.11, "grad_norm": 5.931098378115662, "learning_rate": 9.81947221413976e-06, "loss": 0.6848, "step": 3954 }, { "epoch": 0.11, "grad_norm": 6.941031270372074, "learning_rate": 9.819348701656604e-06, "loss": 0.5993, "step": 3955 }, { "epoch": 0.11, "grad_norm": 4.920775178613576, "learning_rate": 9.819225147713182e-06, "loss": 0.6439, "step": 3956 }, { "epoch": 0.11, "grad_norm": 7.216033292232608, "learning_rate": 9.819101552310559e-06, "loss": 0.4783, "step": 3957 }, { "epoch": 0.11, "grad_norm": 5.097504946045396, "learning_rate": 9.818977915449802e-06, "loss": 0.6085, "step": 3958 }, { "epoch": 0.11, "grad_norm": 6.917681799547978, "learning_rate": 9.818854237131967e-06, "loss": 0.4402, "step": 3959 }, { "epoch": 0.11, "grad_norm": 9.30247439209987, "learning_rate": 9.818730517358126e-06, "loss": 0.9599, "step": 3960 }, { "epoch": 0.11, "grad_norm": 4.209376454512381, "learning_rate": 9.818606756129337e-06, "loss": 0.1923, "step": 3961 }, { "epoch": 0.11, "grad_norm": 5.725643151817368, "learning_rate": 9.818482953446669e-06, "loss": 0.3269, "step": 3962 }, { "epoch": 0.11, "grad_norm": 6.582033495803587, "learning_rate": 9.818359109311186e-06, "loss": 0.465, "step": 3963 }, { "epoch": 0.11, "grad_norm": 5.707875653410162, "learning_rate": 9.81823522372395e-06, "loss": 0.1707, "step": 3964 }, { "epoch": 0.11, "grad_norm": 10.164414349153585, "learning_rate": 9.818111296686033e-06, "loss": 0.4393, "step": 3965 }, { "epoch": 0.11, "grad_norm": 5.9191813117419905, "learning_rate": 9.817987328198495e-06, "loss": 0.9314, "step": 3966 }, { "epoch": 0.11, "grad_norm": 9.150017918636934, "learning_rate": 9.817863318262406e-06, "loss": 0.537, "step": 3967 }, { "epoch": 0.11, "grad_norm": 6.200465422511827, "learning_rate": 9.817739266878834e-06, "loss": 0.3136, "step": 3968 }, { "epoch": 0.11, "grad_norm": 6.561685420752284, "learning_rate": 9.817615174048842e-06, "loss": 0.4765, "step": 3969 }, { "epoch": 0.11, "grad_norm": 6.974532734522737, "learning_rate": 9.8174910397735e-06, "loss": 0.8088, "step": 3970 }, { "epoch": 0.11, "grad_norm": 6.1423913487601265, "learning_rate": 9.817366864053877e-06, "loss": 0.3592, "step": 3971 }, { "epoch": 0.11, "grad_norm": 7.807399519128103, "learning_rate": 9.817242646891039e-06, "loss": 0.8074, "step": 3972 }, { "epoch": 0.11, "grad_norm": 6.1686981137150685, "learning_rate": 9.817118388286055e-06, "loss": 0.5994, "step": 3973 }, { "epoch": 0.11, "grad_norm": 5.382134825371146, "learning_rate": 9.816994088239996e-06, "loss": 0.5006, "step": 3974 }, { "epoch": 0.11, "grad_norm": 7.250021638509094, "learning_rate": 9.81686974675393e-06, "loss": 0.3851, "step": 3975 }, { "epoch": 0.11, "grad_norm": 4.732954662022833, "learning_rate": 9.816745363828925e-06, "loss": 0.919, "step": 3976 }, { "epoch": 0.11, "grad_norm": 5.745479008863429, "learning_rate": 9.816620939466054e-06, "loss": 0.4329, "step": 3977 }, { "epoch": 0.11, "grad_norm": 4.506260146765852, "learning_rate": 9.816496473666386e-06, "loss": 0.3968, "step": 3978 }, { "epoch": 0.11, "grad_norm": 3.54573739919795, "learning_rate": 9.816371966430991e-06, "loss": 0.3552, "step": 3979 }, { "epoch": 0.11, "grad_norm": 4.372258008277003, "learning_rate": 9.816247417760942e-06, "loss": 0.5977, "step": 3980 }, { "epoch": 0.11, "grad_norm": 4.690436803815131, "learning_rate": 9.81612282765731e-06, "loss": 0.6522, "step": 3981 }, { "epoch": 0.11, "grad_norm": 6.739112338353454, "learning_rate": 9.815998196121167e-06, "loss": 0.6028, "step": 3982 }, { "epoch": 0.11, "grad_norm": 7.299377371742483, "learning_rate": 9.81587352315358e-06, "loss": 0.3638, "step": 3983 }, { "epoch": 0.11, "grad_norm": 2.984705182728965, "learning_rate": 9.81574880875563e-06, "loss": 0.4629, "step": 3984 }, { "epoch": 0.11, "grad_norm": 4.773908925853376, "learning_rate": 9.815624052928385e-06, "loss": 0.3923, "step": 3985 }, { "epoch": 0.11, "grad_norm": 9.482229502304882, "learning_rate": 9.81549925567292e-06, "loss": 0.7429, "step": 3986 }, { "epoch": 0.11, "grad_norm": 4.8228827846756825, "learning_rate": 9.815374416990306e-06, "loss": 0.3171, "step": 3987 }, { "epoch": 0.11, "grad_norm": 4.589189977956685, "learning_rate": 9.81524953688162e-06, "loss": 0.5035, "step": 3988 }, { "epoch": 0.11, "grad_norm": 10.59739833789324, "learning_rate": 9.815124615347934e-06, "loss": 0.6873, "step": 3989 }, { "epoch": 0.11, "grad_norm": 4.549813910231584, "learning_rate": 9.814999652390325e-06, "loss": 0.4358, "step": 3990 }, { "epoch": 0.11, "grad_norm": 8.117261634759814, "learning_rate": 9.814874648009866e-06, "loss": 0.8704, "step": 3991 }, { "epoch": 0.11, "grad_norm": 5.330912109972868, "learning_rate": 9.814749602207634e-06, "loss": 0.553, "step": 3992 }, { "epoch": 0.11, "grad_norm": 4.820870439831166, "learning_rate": 9.814624514984704e-06, "loss": 0.3766, "step": 3993 }, { "epoch": 0.11, "grad_norm": 16.92144442333663, "learning_rate": 9.81449938634215e-06, "loss": 0.6037, "step": 3994 }, { "epoch": 0.11, "grad_norm": 8.548308703805487, "learning_rate": 9.814374216281053e-06, "loss": 0.7487, "step": 3995 }, { "epoch": 0.11, "grad_norm": 8.290656969200754, "learning_rate": 9.814249004802486e-06, "loss": 0.8291, "step": 3996 }, { "epoch": 0.11, "grad_norm": 5.778346306795722, "learning_rate": 9.814123751907528e-06, "loss": 0.3254, "step": 3997 }, { "epoch": 0.11, "grad_norm": 4.703426820315688, "learning_rate": 9.813998457597257e-06, "loss": 0.5496, "step": 3998 }, { "epoch": 0.11, "grad_norm": 4.218933271030019, "learning_rate": 9.81387312187275e-06, "loss": 0.3967, "step": 3999 }, { "epoch": 0.11, "grad_norm": 4.193668277700348, "learning_rate": 9.813747744735083e-06, "loss": 0.1568, "step": 4000 }, { "epoch": 0.11, "grad_norm": 6.495223270892627, "learning_rate": 9.81362232618534e-06, "loss": 0.688, "step": 4001 }, { "epoch": 0.11, "grad_norm": 4.753764768703409, "learning_rate": 9.813496866224594e-06, "loss": 0.3975, "step": 4002 }, { "epoch": 0.11, "grad_norm": 7.221720955236214, "learning_rate": 9.813371364853927e-06, "loss": 0.5909, "step": 4003 }, { "epoch": 0.11, "grad_norm": 7.703075911731112, "learning_rate": 9.813245822074421e-06, "loss": 0.3818, "step": 4004 }, { "epoch": 0.11, "grad_norm": 5.035522921094411, "learning_rate": 9.813120237887152e-06, "loss": 0.2197, "step": 4005 }, { "epoch": 0.11, "grad_norm": 9.762306442979517, "learning_rate": 9.812994612293203e-06, "loss": 0.7226, "step": 4006 }, { "epoch": 0.11, "grad_norm": 4.126113914596199, "learning_rate": 9.812868945293654e-06, "loss": 0.405, "step": 4007 }, { "epoch": 0.11, "grad_norm": 11.258100306096772, "learning_rate": 9.812743236889585e-06, "loss": 0.4013, "step": 4008 }, { "epoch": 0.11, "grad_norm": 6.948743661355046, "learning_rate": 9.812617487082081e-06, "loss": 0.3503, "step": 4009 }, { "epoch": 0.11, "grad_norm": 3.352175783186836, "learning_rate": 9.81249169587222e-06, "loss": 0.5148, "step": 4010 }, { "epoch": 0.11, "grad_norm": 9.993908743584647, "learning_rate": 9.812365863261086e-06, "loss": 0.6149, "step": 4011 }, { "epoch": 0.11, "grad_norm": 3.3204311215023874, "learning_rate": 9.81223998924976e-06, "loss": 0.2152, "step": 4012 }, { "epoch": 0.11, "grad_norm": 7.927673790722058, "learning_rate": 9.812114073839325e-06, "loss": 0.5553, "step": 4013 }, { "epoch": 0.11, "grad_norm": 3.631421058552043, "learning_rate": 9.811988117030867e-06, "loss": 0.3411, "step": 4014 }, { "epoch": 0.11, "grad_norm": 3.893197430977816, "learning_rate": 9.811862118825466e-06, "loss": 0.662, "step": 4015 }, { "epoch": 0.12, "grad_norm": 6.142440954474973, "learning_rate": 9.81173607922421e-06, "loss": 0.2457, "step": 4016 }, { "epoch": 0.12, "grad_norm": 2.662641554637951, "learning_rate": 9.811609998228178e-06, "loss": 0.21, "step": 4017 }, { "epoch": 0.12, "grad_norm": 6.2982906520326365, "learning_rate": 9.811483875838458e-06, "loss": 0.3974, "step": 4018 }, { "epoch": 0.12, "grad_norm": 6.546575789795242, "learning_rate": 9.811357712056138e-06, "loss": 0.7063, "step": 4019 }, { "epoch": 0.12, "grad_norm": 4.549126451077779, "learning_rate": 9.811231506882297e-06, "loss": 0.1976, "step": 4020 }, { "epoch": 0.12, "grad_norm": 9.628473522065287, "learning_rate": 9.811105260318024e-06, "loss": 0.5447, "step": 4021 }, { "epoch": 0.12, "grad_norm": 7.6842553959769315, "learning_rate": 9.810978972364406e-06, "loss": 0.4886, "step": 4022 }, { "epoch": 0.12, "grad_norm": 8.089151261630908, "learning_rate": 9.810852643022527e-06, "loss": 0.6179, "step": 4023 }, { "epoch": 0.12, "grad_norm": 7.693515749850694, "learning_rate": 9.810726272293476e-06, "loss": 0.4556, "step": 4024 }, { "epoch": 0.12, "grad_norm": 6.1495342512472595, "learning_rate": 9.810599860178337e-06, "loss": 1.1628, "step": 4025 }, { "epoch": 0.12, "grad_norm": 6.307761802970424, "learning_rate": 9.810473406678202e-06, "loss": 0.5788, "step": 4026 }, { "epoch": 0.12, "grad_norm": 6.605890893333685, "learning_rate": 9.810346911794158e-06, "loss": 0.5535, "step": 4027 }, { "epoch": 0.12, "grad_norm": 10.6352435828661, "learning_rate": 9.810220375527288e-06, "loss": 0.6797, "step": 4028 }, { "epoch": 0.12, "grad_norm": 9.472730095958937, "learning_rate": 9.810093797878685e-06, "loss": 0.7655, "step": 4029 }, { "epoch": 0.12, "grad_norm": 8.295720826527207, "learning_rate": 9.80996717884944e-06, "loss": 0.724, "step": 4030 }, { "epoch": 0.12, "grad_norm": 10.080700638353834, "learning_rate": 9.809840518440639e-06, "loss": 0.6937, "step": 4031 }, { "epoch": 0.12, "grad_norm": 6.035103313127462, "learning_rate": 9.80971381665337e-06, "loss": 0.605, "step": 4032 }, { "epoch": 0.12, "grad_norm": 11.747268196866509, "learning_rate": 9.809587073488727e-06, "loss": 0.7039, "step": 4033 }, { "epoch": 0.12, "grad_norm": 11.663410677010626, "learning_rate": 9.809460288947797e-06, "loss": 1.2658, "step": 4034 }, { "epoch": 0.12, "grad_norm": 8.141763530637848, "learning_rate": 9.809333463031673e-06, "loss": 0.9159, "step": 4035 }, { "epoch": 0.12, "grad_norm": 3.365025864228245, "learning_rate": 9.809206595741447e-06, "loss": 0.3818, "step": 4036 }, { "epoch": 0.12, "grad_norm": 6.415573481126675, "learning_rate": 9.809079687078207e-06, "loss": 0.7317, "step": 4037 }, { "epoch": 0.12, "grad_norm": 2.9168328215183723, "learning_rate": 9.808952737043047e-06, "loss": 0.2675, "step": 4038 }, { "epoch": 0.12, "grad_norm": 9.400313065773826, "learning_rate": 9.808825745637059e-06, "loss": 0.6963, "step": 4039 }, { "epoch": 0.12, "grad_norm": 10.135395317930742, "learning_rate": 9.808698712861335e-06, "loss": 0.4679, "step": 4040 }, { "epoch": 0.12, "grad_norm": 5.627678339473946, "learning_rate": 9.808571638716968e-06, "loss": 0.3607, "step": 4041 }, { "epoch": 0.12, "grad_norm": 9.713785847858027, "learning_rate": 9.808444523205051e-06, "loss": 0.5893, "step": 4042 }, { "epoch": 0.12, "grad_norm": 10.647761479481298, "learning_rate": 9.808317366326678e-06, "loss": 1.1353, "step": 4043 }, { "epoch": 0.12, "grad_norm": 4.079491353515446, "learning_rate": 9.808190168082943e-06, "loss": 0.3467, "step": 4044 }, { "epoch": 0.12, "grad_norm": 6.251213795099984, "learning_rate": 9.80806292847494e-06, "loss": 0.4248, "step": 4045 }, { "epoch": 0.12, "grad_norm": 4.011429402020787, "learning_rate": 9.807935647503763e-06, "loss": 0.2916, "step": 4046 }, { "epoch": 0.12, "grad_norm": 6.623787931012224, "learning_rate": 9.807808325170507e-06, "loss": 0.4908, "step": 4047 }, { "epoch": 0.12, "grad_norm": 5.030222130263663, "learning_rate": 9.80768096147627e-06, "loss": 0.4207, "step": 4048 }, { "epoch": 0.12, "grad_norm": 4.766158877626442, "learning_rate": 9.807553556422144e-06, "loss": 0.4386, "step": 4049 }, { "epoch": 0.12, "grad_norm": 10.167645162386094, "learning_rate": 9.807426110009225e-06, "loss": 0.6808, "step": 4050 }, { "epoch": 0.12, "grad_norm": 4.341099870602823, "learning_rate": 9.807298622238614e-06, "loss": 0.1368, "step": 4051 }, { "epoch": 0.12, "grad_norm": 4.1738697669344385, "learning_rate": 9.807171093111405e-06, "loss": 0.3504, "step": 4052 }, { "epoch": 0.12, "grad_norm": 9.510182394851824, "learning_rate": 9.807043522628694e-06, "loss": 0.3837, "step": 4053 }, { "epoch": 0.12, "grad_norm": 8.321893647445545, "learning_rate": 9.806915910791579e-06, "loss": 0.793, "step": 4054 }, { "epoch": 0.12, "grad_norm": 6.742777952151763, "learning_rate": 9.806788257601159e-06, "loss": 0.4652, "step": 4055 }, { "epoch": 0.12, "grad_norm": 4.217920235872227, "learning_rate": 9.80666056305853e-06, "loss": 0.4757, "step": 4056 }, { "epoch": 0.12, "grad_norm": 4.399996005403265, "learning_rate": 9.806532827164794e-06, "loss": 0.4632, "step": 4057 }, { "epoch": 0.12, "grad_norm": 6.186393369106743, "learning_rate": 9.806405049921047e-06, "loss": 0.667, "step": 4058 }, { "epoch": 0.12, "grad_norm": 3.2088607420645245, "learning_rate": 9.80627723132839e-06, "loss": 0.4666, "step": 4059 }, { "epoch": 0.12, "grad_norm": 7.286847529311684, "learning_rate": 9.806149371387921e-06, "loss": 0.4645, "step": 4060 }, { "epoch": 0.12, "grad_norm": 3.0318679966135003, "learning_rate": 9.80602147010074e-06, "loss": 0.313, "step": 4061 }, { "epoch": 0.12, "grad_norm": 3.9285514812458486, "learning_rate": 9.80589352746795e-06, "loss": 0.4498, "step": 4062 }, { "epoch": 0.12, "grad_norm": 6.467393299781232, "learning_rate": 9.805765543490647e-06, "loss": 0.3996, "step": 4063 }, { "epoch": 0.12, "grad_norm": 6.409107073573494, "learning_rate": 9.805637518169937e-06, "loss": 0.6578, "step": 4064 }, { "epoch": 0.12, "grad_norm": 3.9287897680966126, "learning_rate": 9.805509451506917e-06, "loss": 0.1513, "step": 4065 }, { "epoch": 0.12, "grad_norm": 10.031163295478091, "learning_rate": 9.805381343502691e-06, "loss": 0.5621, "step": 4066 }, { "epoch": 0.12, "grad_norm": 6.486739644381394, "learning_rate": 9.805253194158363e-06, "loss": 0.7262, "step": 4067 }, { "epoch": 0.12, "grad_norm": 6.197097232621511, "learning_rate": 9.805125003475033e-06, "loss": 0.5164, "step": 4068 }, { "epoch": 0.12, "grad_norm": 9.201296146973087, "learning_rate": 9.804996771453803e-06, "loss": 0.5505, "step": 4069 }, { "epoch": 0.12, "grad_norm": 7.424136232938941, "learning_rate": 9.804868498095779e-06, "loss": 0.6847, "step": 4070 }, { "epoch": 0.12, "grad_norm": 6.673576834761867, "learning_rate": 9.804740183402061e-06, "loss": 0.4784, "step": 4071 }, { "epoch": 0.12, "grad_norm": 7.576264583672424, "learning_rate": 9.804611827373756e-06, "loss": 0.8258, "step": 4072 }, { "epoch": 0.12, "grad_norm": 9.6479793053685, "learning_rate": 9.804483430011967e-06, "loss": 0.6664, "step": 4073 }, { "epoch": 0.12, "grad_norm": 9.388823274487235, "learning_rate": 9.804354991317798e-06, "loss": 0.7242, "step": 4074 }, { "epoch": 0.12, "grad_norm": 7.806590660609122, "learning_rate": 9.804226511292354e-06, "loss": 0.7296, "step": 4075 }, { "epoch": 0.12, "grad_norm": 5.974068194717524, "learning_rate": 9.804097989936741e-06, "loss": 0.5828, "step": 4076 }, { "epoch": 0.12, "grad_norm": 6.106570446465452, "learning_rate": 9.803969427252067e-06, "loss": 0.6647, "step": 4077 }, { "epoch": 0.12, "grad_norm": 8.53146853883182, "learning_rate": 9.803840823239433e-06, "loss": 0.915, "step": 4078 }, { "epoch": 0.12, "grad_norm": 7.080137594039039, "learning_rate": 9.803712177899948e-06, "loss": 0.7591, "step": 4079 }, { "epoch": 0.12, "grad_norm": 2.4917837312133138, "learning_rate": 9.803583491234719e-06, "loss": 0.1318, "step": 4080 }, { "epoch": 0.12, "grad_norm": 9.301852161800042, "learning_rate": 9.803454763244852e-06, "loss": 0.5753, "step": 4081 }, { "epoch": 0.12, "grad_norm": 10.496866803224252, "learning_rate": 9.803325993931455e-06, "loss": 0.9177, "step": 4082 }, { "epoch": 0.12, "grad_norm": 5.662290224816931, "learning_rate": 9.803197183295639e-06, "loss": 0.4773, "step": 4083 }, { "epoch": 0.12, "grad_norm": 3.2474085673205164, "learning_rate": 9.803068331338506e-06, "loss": 0.4704, "step": 4084 }, { "epoch": 0.12, "grad_norm": 12.982110846328773, "learning_rate": 9.802939438061168e-06, "loss": 0.7002, "step": 4085 }, { "epoch": 0.12, "grad_norm": 6.637220304239146, "learning_rate": 9.802810503464735e-06, "loss": 0.3786, "step": 4086 }, { "epoch": 0.12, "grad_norm": 7.211517277613117, "learning_rate": 9.802681527550314e-06, "loss": 0.3718, "step": 4087 }, { "epoch": 0.12, "grad_norm": 10.242615016508422, "learning_rate": 9.802552510319013e-06, "loss": 0.751, "step": 4088 }, { "epoch": 0.12, "grad_norm": 10.783214622288291, "learning_rate": 9.802423451771945e-06, "loss": 0.7833, "step": 4089 }, { "epoch": 0.12, "grad_norm": 11.39549538473324, "learning_rate": 9.80229435191022e-06, "loss": 0.859, "step": 4090 }, { "epoch": 0.12, "grad_norm": 8.554794185443496, "learning_rate": 9.802165210734948e-06, "loss": 0.5485, "step": 4091 }, { "epoch": 0.12, "grad_norm": 3.4975679155944723, "learning_rate": 9.80203602824724e-06, "loss": 0.336, "step": 4092 }, { "epoch": 0.12, "grad_norm": 5.617003415291923, "learning_rate": 9.801906804448206e-06, "loss": 0.5084, "step": 4093 }, { "epoch": 0.12, "grad_norm": 4.599880513422327, "learning_rate": 9.80177753933896e-06, "loss": 0.4302, "step": 4094 }, { "epoch": 0.12, "grad_norm": 5.486431898994082, "learning_rate": 9.801648232920614e-06, "loss": 0.6392, "step": 4095 }, { "epoch": 0.12, "grad_norm": 11.775172844203361, "learning_rate": 9.801518885194277e-06, "loss": 0.4903, "step": 4096 }, { "epoch": 0.12, "grad_norm": 8.510945061319102, "learning_rate": 9.801389496161066e-06, "loss": 0.7081, "step": 4097 }, { "epoch": 0.12, "grad_norm": 6.905408126128187, "learning_rate": 9.801260065822091e-06, "loss": 0.9882, "step": 4098 }, { "epoch": 0.12, "grad_norm": 8.790925149848244, "learning_rate": 9.801130594178468e-06, "loss": 0.6056, "step": 4099 }, { "epoch": 0.12, "grad_norm": 5.788958632223352, "learning_rate": 9.80100108123131e-06, "loss": 0.4962, "step": 4100 }, { "epoch": 0.12, "grad_norm": 2.8225306614172507, "learning_rate": 9.80087152698173e-06, "loss": 0.2808, "step": 4101 }, { "epoch": 0.12, "grad_norm": 7.479968340597623, "learning_rate": 9.800741931430844e-06, "loss": 0.5755, "step": 4102 }, { "epoch": 0.12, "grad_norm": 5.762623488950395, "learning_rate": 9.800612294579765e-06, "loss": 0.4482, "step": 4103 }, { "epoch": 0.12, "grad_norm": 5.700847474056364, "learning_rate": 9.80048261642961e-06, "loss": 0.4288, "step": 4104 }, { "epoch": 0.12, "grad_norm": 8.523334226323533, "learning_rate": 9.800352896981495e-06, "loss": 0.6634, "step": 4105 }, { "epoch": 0.12, "grad_norm": 4.606247283644891, "learning_rate": 9.800223136236535e-06, "loss": 0.3722, "step": 4106 }, { "epoch": 0.12, "grad_norm": 4.678617326392756, "learning_rate": 9.800093334195845e-06, "loss": 0.513, "step": 4107 }, { "epoch": 0.12, "grad_norm": 5.78876322643241, "learning_rate": 9.799963490860544e-06, "loss": 0.372, "step": 4108 }, { "epoch": 0.12, "grad_norm": 14.330933673094261, "learning_rate": 9.799833606231747e-06, "loss": 0.2381, "step": 4109 }, { "epoch": 0.12, "grad_norm": 5.665691062680962, "learning_rate": 9.799703680310575e-06, "loss": 0.7992, "step": 4110 }, { "epoch": 0.12, "grad_norm": 6.166393830303665, "learning_rate": 9.79957371309814e-06, "loss": 0.6604, "step": 4111 }, { "epoch": 0.12, "grad_norm": 4.303268780380503, "learning_rate": 9.799443704595566e-06, "loss": 0.1846, "step": 4112 }, { "epoch": 0.12, "grad_norm": 6.042440478876509, "learning_rate": 9.799313654803968e-06, "loss": 0.6295, "step": 4113 }, { "epoch": 0.12, "grad_norm": 3.2624086118738336, "learning_rate": 9.799183563724463e-06, "loss": 0.1391, "step": 4114 }, { "epoch": 0.12, "grad_norm": 14.464001924936623, "learning_rate": 9.799053431358176e-06, "loss": 0.776, "step": 4115 }, { "epoch": 0.12, "grad_norm": 7.079701936395997, "learning_rate": 9.798923257706222e-06, "loss": 0.6772, "step": 4116 }, { "epoch": 0.12, "grad_norm": 5.396337517511787, "learning_rate": 9.798793042769723e-06, "loss": 0.7523, "step": 4117 }, { "epoch": 0.12, "grad_norm": 4.974022377227647, "learning_rate": 9.798662786549797e-06, "loss": 0.7908, "step": 4118 }, { "epoch": 0.12, "grad_norm": 8.988436210586343, "learning_rate": 9.798532489047566e-06, "loss": 0.6324, "step": 4119 }, { "epoch": 0.12, "grad_norm": 2.7277679620472526, "learning_rate": 9.798402150264151e-06, "loss": 0.3032, "step": 4120 }, { "epoch": 0.12, "grad_norm": 15.877487588456242, "learning_rate": 9.798271770200674e-06, "loss": 0.6981, "step": 4121 }, { "epoch": 0.12, "grad_norm": 3.6968496403989235, "learning_rate": 9.798141348858255e-06, "loss": 0.3331, "step": 4122 }, { "epoch": 0.12, "grad_norm": 10.024703744901126, "learning_rate": 9.798010886238017e-06, "loss": 0.4972, "step": 4123 }, { "epoch": 0.12, "grad_norm": 8.81112865843381, "learning_rate": 9.79788038234108e-06, "loss": 0.7644, "step": 4124 }, { "epoch": 0.12, "grad_norm": 6.5087143729890355, "learning_rate": 9.797749837168571e-06, "loss": 0.9525, "step": 4125 }, { "epoch": 0.12, "grad_norm": 8.055629553034793, "learning_rate": 9.797619250721611e-06, "loss": 0.7614, "step": 4126 }, { "epoch": 0.12, "grad_norm": 5.26608985184722, "learning_rate": 9.797488623001323e-06, "loss": 0.5158, "step": 4127 }, { "epoch": 0.12, "grad_norm": 4.337337123660067, "learning_rate": 9.79735795400883e-06, "loss": 0.4214, "step": 4128 }, { "epoch": 0.12, "grad_norm": 5.012707964228872, "learning_rate": 9.797227243745258e-06, "loss": 0.5058, "step": 4129 }, { "epoch": 0.12, "grad_norm": 4.7507839057963865, "learning_rate": 9.79709649221173e-06, "loss": 0.4908, "step": 4130 }, { "epoch": 0.12, "grad_norm": 6.148808973401421, "learning_rate": 9.796965699409372e-06, "loss": 0.341, "step": 4131 }, { "epoch": 0.12, "grad_norm": 3.5388125906111356, "learning_rate": 9.79683486533931e-06, "loss": 0.227, "step": 4132 }, { "epoch": 0.12, "grad_norm": 13.12047186351873, "learning_rate": 9.796703990002666e-06, "loss": 0.6276, "step": 4133 }, { "epoch": 0.12, "grad_norm": 6.025375588767705, "learning_rate": 9.79657307340057e-06, "loss": 0.2146, "step": 4134 }, { "epoch": 0.12, "grad_norm": 6.373396035910789, "learning_rate": 9.796442115534145e-06, "loss": 0.272, "step": 4135 }, { "epoch": 0.12, "grad_norm": 9.75167851793917, "learning_rate": 9.79631111640452e-06, "loss": 0.8944, "step": 4136 }, { "epoch": 0.12, "grad_norm": 5.968408584816294, "learning_rate": 9.796180076012821e-06, "loss": 0.9298, "step": 4137 }, { "epoch": 0.12, "grad_norm": 8.996223982126304, "learning_rate": 9.796048994360175e-06, "loss": 0.6434, "step": 4138 }, { "epoch": 0.12, "grad_norm": 4.126557662943699, "learning_rate": 9.79591787144771e-06, "loss": 0.4498, "step": 4139 }, { "epoch": 0.12, "grad_norm": 6.302342088499804, "learning_rate": 9.795786707276555e-06, "loss": 0.3519, "step": 4140 }, { "epoch": 0.12, "grad_norm": 4.529945896196112, "learning_rate": 9.795655501847836e-06, "loss": 0.8375, "step": 4141 }, { "epoch": 0.12, "grad_norm": 4.147200692789438, "learning_rate": 9.795524255162682e-06, "loss": 0.4514, "step": 4142 }, { "epoch": 0.12, "grad_norm": 7.381371412966376, "learning_rate": 9.795392967222226e-06, "loss": 0.5543, "step": 4143 }, { "epoch": 0.12, "grad_norm": 7.533911935000438, "learning_rate": 9.795261638027594e-06, "loss": 0.4442, "step": 4144 }, { "epoch": 0.12, "grad_norm": 6.139448970553715, "learning_rate": 9.795130267579916e-06, "loss": 0.2775, "step": 4145 }, { "epoch": 0.12, "grad_norm": 7.304819295581587, "learning_rate": 9.794998855880323e-06, "loss": 0.492, "step": 4146 }, { "epoch": 0.12, "grad_norm": 5.884633343536376, "learning_rate": 9.794867402929946e-06, "loss": 0.429, "step": 4147 }, { "epoch": 0.12, "grad_norm": 6.982709852562313, "learning_rate": 9.794735908729913e-06, "loss": 0.5901, "step": 4148 }, { "epoch": 0.12, "grad_norm": 6.963843564261047, "learning_rate": 9.794604373281359e-06, "loss": 0.4949, "step": 4149 }, { "epoch": 0.12, "grad_norm": 7.225056061312696, "learning_rate": 9.794472796585414e-06, "loss": 0.1674, "step": 4150 }, { "epoch": 0.12, "grad_norm": 7.6569217309357915, "learning_rate": 9.79434117864321e-06, "loss": 0.6756, "step": 4151 }, { "epoch": 0.12, "grad_norm": 3.694989954479226, "learning_rate": 9.794209519455877e-06, "loss": 0.2121, "step": 4152 }, { "epoch": 0.12, "grad_norm": 10.040235493926353, "learning_rate": 9.794077819024553e-06, "loss": 0.7316, "step": 4153 }, { "epoch": 0.12, "grad_norm": 5.869578924332, "learning_rate": 9.793946077350365e-06, "loss": 0.5641, "step": 4154 }, { "epoch": 0.12, "grad_norm": 6.041055130975837, "learning_rate": 9.793814294434449e-06, "loss": 0.5622, "step": 4155 }, { "epoch": 0.12, "grad_norm": 4.025943131452153, "learning_rate": 9.79368247027794e-06, "loss": 0.5132, "step": 4156 }, { "epoch": 0.12, "grad_norm": 4.658675873957115, "learning_rate": 9.793550604881971e-06, "loss": 0.54, "step": 4157 }, { "epoch": 0.12, "grad_norm": 4.278768892505604, "learning_rate": 9.793418698247676e-06, "loss": 0.3592, "step": 4158 }, { "epoch": 0.12, "grad_norm": 6.463885519229652, "learning_rate": 9.793286750376191e-06, "loss": 0.5202, "step": 4159 }, { "epoch": 0.12, "grad_norm": 12.761731603433228, "learning_rate": 9.793154761268651e-06, "loss": 0.6321, "step": 4160 }, { "epoch": 0.12, "grad_norm": 11.312473023103193, "learning_rate": 9.793022730926188e-06, "loss": 1.0153, "step": 4161 }, { "epoch": 0.12, "grad_norm": 2.5830424257782454, "learning_rate": 9.792890659349943e-06, "loss": 0.2244, "step": 4162 }, { "epoch": 0.12, "grad_norm": 5.951494288936202, "learning_rate": 9.792758546541047e-06, "loss": 0.452, "step": 4163 }, { "epoch": 0.12, "grad_norm": 7.573738134047157, "learning_rate": 9.792626392500642e-06, "loss": 0.4261, "step": 4164 }, { "epoch": 0.12, "grad_norm": 5.518560865537486, "learning_rate": 9.792494197229862e-06, "loss": 0.3792, "step": 4165 }, { "epoch": 0.12, "grad_norm": 4.770920109966076, "learning_rate": 9.792361960729844e-06, "loss": 0.7707, "step": 4166 }, { "epoch": 0.12, "grad_norm": 7.4873727039348985, "learning_rate": 9.792229683001726e-06, "loss": 0.4681, "step": 4167 }, { "epoch": 0.12, "grad_norm": 5.75137445359531, "learning_rate": 9.792097364046644e-06, "loss": 0.3465, "step": 4168 }, { "epoch": 0.12, "grad_norm": 5.683889018251118, "learning_rate": 9.791965003865743e-06, "loss": 0.4072, "step": 4169 }, { "epoch": 0.12, "grad_norm": 5.809797971531926, "learning_rate": 9.791832602460154e-06, "loss": 0.3775, "step": 4170 }, { "epoch": 0.12, "grad_norm": 9.731086579642986, "learning_rate": 9.791700159831019e-06, "loss": 0.588, "step": 4171 }, { "epoch": 0.12, "grad_norm": 9.477422488093598, "learning_rate": 9.791567675979478e-06, "loss": 0.638, "step": 4172 }, { "epoch": 0.12, "grad_norm": 7.221783714757428, "learning_rate": 9.79143515090667e-06, "loss": 0.6089, "step": 4173 }, { "epoch": 0.12, "grad_norm": 6.9167974720118375, "learning_rate": 9.791302584613735e-06, "loss": 0.4604, "step": 4174 }, { "epoch": 0.12, "grad_norm": 10.126715149837688, "learning_rate": 9.791169977101816e-06, "loss": 0.6678, "step": 4175 }, { "epoch": 0.12, "grad_norm": 5.093541989438091, "learning_rate": 9.791037328372049e-06, "loss": 0.5699, "step": 4176 }, { "epoch": 0.12, "grad_norm": 6.952691396844788, "learning_rate": 9.790904638425577e-06, "loss": 0.4986, "step": 4177 }, { "epoch": 0.12, "grad_norm": 7.657438474003842, "learning_rate": 9.790771907263546e-06, "loss": 0.5051, "step": 4178 }, { "epoch": 0.12, "grad_norm": 9.838218176097396, "learning_rate": 9.790639134887092e-06, "loss": 0.6185, "step": 4179 }, { "epoch": 0.12, "grad_norm": 5.609945879180474, "learning_rate": 9.79050632129736e-06, "loss": 0.5303, "step": 4180 }, { "epoch": 0.12, "grad_norm": 10.114850125524777, "learning_rate": 9.790373466495491e-06, "loss": 1.1787, "step": 4181 }, { "epoch": 0.12, "grad_norm": 6.096799116967546, "learning_rate": 9.790240570482629e-06, "loss": 0.5631, "step": 4182 }, { "epoch": 0.12, "grad_norm": 5.99391648714669, "learning_rate": 9.790107633259918e-06, "loss": 0.4818, "step": 4183 }, { "epoch": 0.12, "grad_norm": 11.774828914039261, "learning_rate": 9.7899746548285e-06, "loss": 0.6616, "step": 4184 }, { "epoch": 0.12, "grad_norm": 2.6691853331231634, "learning_rate": 9.789841635189519e-06, "loss": 0.245, "step": 4185 }, { "epoch": 0.12, "grad_norm": 6.523444005540356, "learning_rate": 9.789708574344121e-06, "loss": 0.6452, "step": 4186 }, { "epoch": 0.12, "grad_norm": 4.832290531439818, "learning_rate": 9.78957547229345e-06, "loss": 0.1951, "step": 4187 }, { "epoch": 0.12, "grad_norm": 10.362045841365243, "learning_rate": 9.789442329038652e-06, "loss": 0.6296, "step": 4188 }, { "epoch": 0.12, "grad_norm": 5.672679245101917, "learning_rate": 9.78930914458087e-06, "loss": 0.4663, "step": 4189 }, { "epoch": 0.12, "grad_norm": 5.420036377943078, "learning_rate": 9.789175918921253e-06, "loss": 0.6444, "step": 4190 }, { "epoch": 0.12, "grad_norm": 6.215937414512118, "learning_rate": 9.789042652060942e-06, "loss": 0.4128, "step": 4191 }, { "epoch": 0.12, "grad_norm": 6.578403639158764, "learning_rate": 9.788909344001088e-06, "loss": 0.3399, "step": 4192 }, { "epoch": 0.12, "grad_norm": 5.096259218740464, "learning_rate": 9.788775994742837e-06, "loss": 0.536, "step": 4193 }, { "epoch": 0.12, "grad_norm": 5.5364968151096114, "learning_rate": 9.788642604287336e-06, "loss": 0.6693, "step": 4194 }, { "epoch": 0.12, "grad_norm": 9.943258284633936, "learning_rate": 9.788509172635732e-06, "loss": 0.6158, "step": 4195 }, { "epoch": 0.12, "grad_norm": 6.620426398659926, "learning_rate": 9.788375699789174e-06, "loss": 0.3566, "step": 4196 }, { "epoch": 0.12, "grad_norm": 7.426257957476064, "learning_rate": 9.788242185748808e-06, "loss": 0.7796, "step": 4197 }, { "epoch": 0.12, "grad_norm": 5.54425442539246, "learning_rate": 9.788108630515785e-06, "loss": 0.5382, "step": 4198 }, { "epoch": 0.12, "grad_norm": 4.5609699191920825, "learning_rate": 9.787975034091252e-06, "loss": 0.5852, "step": 4199 }, { "epoch": 0.12, "grad_norm": 7.317673377409466, "learning_rate": 9.78784139647636e-06, "loss": 0.7363, "step": 4200 }, { "epoch": 0.12, "grad_norm": 12.604289299654686, "learning_rate": 9.787707717672256e-06, "loss": 0.3889, "step": 4201 }, { "epoch": 0.12, "grad_norm": 8.357547053901564, "learning_rate": 9.787573997680095e-06, "loss": 1.0723, "step": 4202 }, { "epoch": 0.12, "grad_norm": 5.452033865556889, "learning_rate": 9.787440236501023e-06, "loss": 0.8662, "step": 4203 }, { "epoch": 0.12, "grad_norm": 11.719579804605145, "learning_rate": 9.787306434136192e-06, "loss": 0.8501, "step": 4204 }, { "epoch": 0.12, "grad_norm": 13.515568410060357, "learning_rate": 9.787172590586753e-06, "loss": 1.0296, "step": 4205 }, { "epoch": 0.12, "grad_norm": 4.120126561933812, "learning_rate": 9.787038705853858e-06, "loss": 0.1813, "step": 4206 }, { "epoch": 0.12, "grad_norm": 8.730104333837454, "learning_rate": 9.78690477993866e-06, "loss": 0.4936, "step": 4207 }, { "epoch": 0.12, "grad_norm": 7.433666299053981, "learning_rate": 9.786770812842307e-06, "loss": 0.9062, "step": 4208 }, { "epoch": 0.12, "grad_norm": 2.8484569622134503, "learning_rate": 9.786636804565955e-06, "loss": 0.2599, "step": 4209 }, { "epoch": 0.12, "grad_norm": 6.325968132259581, "learning_rate": 9.786502755110756e-06, "loss": 0.5346, "step": 4210 }, { "epoch": 0.12, "grad_norm": 6.049493385415371, "learning_rate": 9.786368664477863e-06, "loss": 0.4175, "step": 4211 }, { "epoch": 0.12, "grad_norm": 5.6859667042157565, "learning_rate": 9.78623453266843e-06, "loss": 0.1576, "step": 4212 }, { "epoch": 0.12, "grad_norm": 10.863228049862991, "learning_rate": 9.786100359683611e-06, "loss": 0.4789, "step": 4213 }, { "epoch": 0.12, "grad_norm": 7.104652932567243, "learning_rate": 9.785966145524558e-06, "loss": 0.6459, "step": 4214 }, { "epoch": 0.12, "grad_norm": 6.189691714126818, "learning_rate": 9.78583189019243e-06, "loss": 0.457, "step": 4215 }, { "epoch": 0.12, "grad_norm": 7.210345408225205, "learning_rate": 9.785697593688376e-06, "loss": 0.9049, "step": 4216 }, { "epoch": 0.12, "grad_norm": 5.826604077085207, "learning_rate": 9.785563256013559e-06, "loss": 0.3686, "step": 4217 }, { "epoch": 0.12, "grad_norm": 4.2218153942991465, "learning_rate": 9.785428877169127e-06, "loss": 0.3426, "step": 4218 }, { "epoch": 0.12, "grad_norm": 9.382008082125925, "learning_rate": 9.785294457156243e-06, "loss": 0.8645, "step": 4219 }, { "epoch": 0.12, "grad_norm": 3.1721829795711245, "learning_rate": 9.785159995976056e-06, "loss": 0.3648, "step": 4220 }, { "epoch": 0.12, "grad_norm": 6.152010547152681, "learning_rate": 9.78502549362973e-06, "loss": 0.3585, "step": 4221 }, { "epoch": 0.12, "grad_norm": 9.22954690005011, "learning_rate": 9.784890950118418e-06, "loss": 0.5086, "step": 4222 }, { "epoch": 0.12, "grad_norm": 14.723431792243433, "learning_rate": 9.784756365443278e-06, "loss": 0.4259, "step": 4223 }, { "epoch": 0.12, "grad_norm": 6.57420460636569, "learning_rate": 9.784621739605468e-06, "loss": 0.4564, "step": 4224 }, { "epoch": 0.12, "grad_norm": 4.657533199992589, "learning_rate": 9.784487072606145e-06, "loss": 0.6301, "step": 4225 }, { "epoch": 0.12, "grad_norm": 4.870385896327342, "learning_rate": 9.78435236444647e-06, "loss": 0.5267, "step": 4226 }, { "epoch": 0.12, "grad_norm": 10.098778573387794, "learning_rate": 9.784217615127603e-06, "loss": 0.6168, "step": 4227 }, { "epoch": 0.12, "grad_norm": 4.452609437243363, "learning_rate": 9.784082824650699e-06, "loss": 0.4534, "step": 4228 }, { "epoch": 0.12, "grad_norm": 5.648865574065712, "learning_rate": 9.783947993016917e-06, "loss": 0.6295, "step": 4229 }, { "epoch": 0.12, "grad_norm": 2.510030294630661, "learning_rate": 9.783813120227423e-06, "loss": 0.1819, "step": 4230 }, { "epoch": 0.12, "grad_norm": 7.749014822577127, "learning_rate": 9.78367820628337e-06, "loss": 0.6237, "step": 4231 }, { "epoch": 0.12, "grad_norm": 3.5284491715285897, "learning_rate": 9.783543251185928e-06, "loss": 0.2639, "step": 4232 }, { "epoch": 0.12, "grad_norm": 7.128862839658069, "learning_rate": 9.783408254936248e-06, "loss": 0.5576, "step": 4233 }, { "epoch": 0.12, "grad_norm": 5.982912447620807, "learning_rate": 9.783273217535496e-06, "loss": 0.4361, "step": 4234 }, { "epoch": 0.12, "grad_norm": 20.20939082364356, "learning_rate": 9.783138138984836e-06, "loss": 0.6035, "step": 4235 }, { "epoch": 0.12, "grad_norm": 3.8319384829057057, "learning_rate": 9.783003019285425e-06, "loss": 0.3833, "step": 4236 }, { "epoch": 0.12, "grad_norm": 7.766542990631648, "learning_rate": 9.782867858438428e-06, "loss": 0.6584, "step": 4237 }, { "epoch": 0.12, "grad_norm": 5.697811337275635, "learning_rate": 9.78273265644501e-06, "loss": 0.6889, "step": 4238 }, { "epoch": 0.12, "grad_norm": 7.343973034657525, "learning_rate": 9.78259741330633e-06, "loss": 0.6205, "step": 4239 }, { "epoch": 0.12, "grad_norm": 4.933413009171985, "learning_rate": 9.782462129023553e-06, "loss": 0.3996, "step": 4240 }, { "epoch": 0.12, "grad_norm": 2.4552318005956937, "learning_rate": 9.782326803597844e-06, "loss": 0.1971, "step": 4241 }, { "epoch": 0.12, "grad_norm": 6.028518018473028, "learning_rate": 9.782191437030366e-06, "loss": 0.5544, "step": 4242 }, { "epoch": 0.12, "grad_norm": 6.965000152971276, "learning_rate": 9.782056029322284e-06, "loss": 0.8726, "step": 4243 }, { "epoch": 0.12, "grad_norm": 6.917942936664415, "learning_rate": 9.781920580474762e-06, "loss": 0.3129, "step": 4244 }, { "epoch": 0.12, "grad_norm": 10.016366154568372, "learning_rate": 9.781785090488966e-06, "loss": 0.6662, "step": 4245 }, { "epoch": 0.12, "grad_norm": 8.880730404241387, "learning_rate": 9.781649559366063e-06, "loss": 0.9409, "step": 4246 }, { "epoch": 0.12, "grad_norm": 13.146279224182999, "learning_rate": 9.781513987107216e-06, "loss": 0.5948, "step": 4247 }, { "epoch": 0.12, "grad_norm": 7.083855875226014, "learning_rate": 9.781378373713595e-06, "loss": 0.5195, "step": 4248 }, { "epoch": 0.12, "grad_norm": 9.586369263019897, "learning_rate": 9.781242719186363e-06, "loss": 0.4429, "step": 4249 }, { "epoch": 0.12, "grad_norm": 15.707516487935004, "learning_rate": 9.781107023526688e-06, "loss": 0.476, "step": 4250 }, { "epoch": 0.12, "grad_norm": 5.57856308528429, "learning_rate": 9.780971286735739e-06, "loss": 0.692, "step": 4251 }, { "epoch": 0.12, "grad_norm": 6.691869573814506, "learning_rate": 9.780835508814684e-06, "loss": 0.4712, "step": 4252 }, { "epoch": 0.12, "grad_norm": 5.98708750154018, "learning_rate": 9.780699689764687e-06, "loss": 0.6814, "step": 4253 }, { "epoch": 0.12, "grad_norm": 4.970619090031301, "learning_rate": 9.78056382958692e-06, "loss": 0.457, "step": 4254 }, { "epoch": 0.12, "grad_norm": 5.456915950444543, "learning_rate": 9.780427928282552e-06, "loss": 0.2391, "step": 4255 }, { "epoch": 0.12, "grad_norm": 7.831727620848458, "learning_rate": 9.78029198585275e-06, "loss": 0.4166, "step": 4256 }, { "epoch": 0.12, "grad_norm": 4.250410200406469, "learning_rate": 9.780156002298684e-06, "loss": 0.295, "step": 4257 }, { "epoch": 0.12, "grad_norm": 7.47914039231753, "learning_rate": 9.780019977621526e-06, "loss": 0.8489, "step": 4258 }, { "epoch": 0.12, "grad_norm": 6.128154137137015, "learning_rate": 9.779883911822446e-06, "loss": 0.5985, "step": 4259 }, { "epoch": 0.12, "grad_norm": 6.963017727626732, "learning_rate": 9.77974780490261e-06, "loss": 0.318, "step": 4260 }, { "epoch": 0.12, "grad_norm": 6.985902828232045, "learning_rate": 9.779611656863194e-06, "loss": 0.4966, "step": 4261 }, { "epoch": 0.12, "grad_norm": 16.336460099693177, "learning_rate": 9.779475467705368e-06, "loss": 0.6282, "step": 4262 }, { "epoch": 0.12, "grad_norm": 6.758381761157104, "learning_rate": 9.7793392374303e-06, "loss": 0.4315, "step": 4263 }, { "epoch": 0.12, "grad_norm": 5.786443697146617, "learning_rate": 9.779202966039167e-06, "loss": 0.4561, "step": 4264 }, { "epoch": 0.12, "grad_norm": 4.086203396899887, "learning_rate": 9.77906665353314e-06, "loss": 0.698, "step": 4265 }, { "epoch": 0.12, "grad_norm": 6.596185062819651, "learning_rate": 9.77893029991339e-06, "loss": 0.858, "step": 4266 }, { "epoch": 0.12, "grad_norm": 3.901161602843628, "learning_rate": 9.778793905181093e-06, "loss": 0.3711, "step": 4267 }, { "epoch": 0.12, "grad_norm": 6.555097273883932, "learning_rate": 9.778657469337418e-06, "loss": 0.3271, "step": 4268 }, { "epoch": 0.12, "grad_norm": 4.97308551526716, "learning_rate": 9.778520992383541e-06, "loss": 0.6395, "step": 4269 }, { "epoch": 0.12, "grad_norm": 17.77210428121204, "learning_rate": 9.778384474320636e-06, "loss": 0.439, "step": 4270 }, { "epoch": 0.12, "grad_norm": 6.111968014585974, "learning_rate": 9.77824791514988e-06, "loss": 0.5562, "step": 4271 }, { "epoch": 0.12, "grad_norm": 2.7814412426294304, "learning_rate": 9.778111314872445e-06, "loss": 0.4035, "step": 4272 }, { "epoch": 0.12, "grad_norm": 8.917828365362212, "learning_rate": 9.777974673489506e-06, "loss": 0.6424, "step": 4273 }, { "epoch": 0.12, "grad_norm": 8.797759152254296, "learning_rate": 9.777837991002239e-06, "loss": 0.5524, "step": 4274 }, { "epoch": 0.12, "grad_norm": 4.956827097868007, "learning_rate": 9.77770126741182e-06, "loss": 0.9735, "step": 4275 }, { "epoch": 0.12, "grad_norm": 12.029143230722394, "learning_rate": 9.777564502719425e-06, "loss": 0.8046, "step": 4276 }, { "epoch": 0.12, "grad_norm": 7.568111639090579, "learning_rate": 9.777427696926232e-06, "loss": 0.5593, "step": 4277 }, { "epoch": 0.12, "grad_norm": 5.220693763070894, "learning_rate": 9.777290850033415e-06, "loss": 0.4409, "step": 4278 }, { "epoch": 0.12, "grad_norm": 5.029054695890877, "learning_rate": 9.777153962042154e-06, "loss": 0.5705, "step": 4279 }, { "epoch": 0.12, "grad_norm": 4.353944492907966, "learning_rate": 9.777017032953627e-06, "loss": 0.5953, "step": 4280 }, { "epoch": 0.12, "grad_norm": 9.226926481478905, "learning_rate": 9.776880062769007e-06, "loss": 0.755, "step": 4281 }, { "epoch": 0.12, "grad_norm": 6.91427291291323, "learning_rate": 9.77674305148948e-06, "loss": 0.6172, "step": 4282 }, { "epoch": 0.12, "grad_norm": 7.237352454702539, "learning_rate": 9.776605999116218e-06, "loss": 0.4792, "step": 4283 }, { "epoch": 0.12, "grad_norm": 7.806421096918051, "learning_rate": 9.776468905650403e-06, "loss": 0.6318, "step": 4284 }, { "epoch": 0.12, "grad_norm": 7.446922680521665, "learning_rate": 9.776331771093214e-06, "loss": 0.7524, "step": 4285 }, { "epoch": 0.12, "grad_norm": 6.033681902994104, "learning_rate": 9.776194595445834e-06, "loss": 0.5774, "step": 4286 }, { "epoch": 0.12, "grad_norm": 5.30185445155351, "learning_rate": 9.776057378709435e-06, "loss": 0.6632, "step": 4287 }, { "epoch": 0.12, "grad_norm": 5.495581195630257, "learning_rate": 9.775920120885205e-06, "loss": 0.494, "step": 4288 }, { "epoch": 0.12, "grad_norm": 7.941469476526006, "learning_rate": 9.775782821974324e-06, "loss": 0.3484, "step": 4289 }, { "epoch": 0.12, "grad_norm": 6.591039597705025, "learning_rate": 9.77564548197797e-06, "loss": 0.4475, "step": 4290 }, { "epoch": 0.12, "grad_norm": 11.526428206981265, "learning_rate": 9.775508100897325e-06, "loss": 0.5381, "step": 4291 }, { "epoch": 0.12, "grad_norm": 6.656892843948701, "learning_rate": 9.775370678733571e-06, "loss": 0.5645, "step": 4292 }, { "epoch": 0.12, "grad_norm": 6.811758088476161, "learning_rate": 9.775233215487893e-06, "loss": 0.4635, "step": 4293 }, { "epoch": 0.12, "grad_norm": 5.575759514361008, "learning_rate": 9.775095711161472e-06, "loss": 0.5398, "step": 4294 }, { "epoch": 0.12, "grad_norm": 18.857735727741645, "learning_rate": 9.774958165755488e-06, "loss": 0.7572, "step": 4295 }, { "epoch": 0.12, "grad_norm": 7.8929913588925675, "learning_rate": 9.774820579271128e-06, "loss": 0.8288, "step": 4296 }, { "epoch": 0.12, "grad_norm": 8.28785609059974, "learning_rate": 9.774682951709574e-06, "loss": 0.398, "step": 4297 }, { "epoch": 0.12, "grad_norm": 10.63803593398306, "learning_rate": 9.774545283072012e-06, "loss": 0.7146, "step": 4298 }, { "epoch": 0.12, "grad_norm": 3.303588828847417, "learning_rate": 9.774407573359624e-06, "loss": 0.2872, "step": 4299 }, { "epoch": 0.12, "grad_norm": 11.40862159634176, "learning_rate": 9.774269822573594e-06, "loss": 0.4512, "step": 4300 }, { "epoch": 0.12, "grad_norm": 5.959164376980123, "learning_rate": 9.774132030715111e-06, "loss": 0.5633, "step": 4301 }, { "epoch": 0.12, "grad_norm": 10.270754387624988, "learning_rate": 9.773994197785356e-06, "loss": 1.0267, "step": 4302 }, { "epoch": 0.12, "grad_norm": 7.4652577619259635, "learning_rate": 9.773856323785518e-06, "loss": 0.8946, "step": 4303 }, { "epoch": 0.12, "grad_norm": 9.622907485401448, "learning_rate": 9.773718408716782e-06, "loss": 0.6983, "step": 4304 }, { "epoch": 0.12, "grad_norm": 6.570048468656313, "learning_rate": 9.773580452580332e-06, "loss": 0.6443, "step": 4305 }, { "epoch": 0.12, "grad_norm": 9.521432692167425, "learning_rate": 9.773442455377359e-06, "loss": 1.1189, "step": 4306 }, { "epoch": 0.12, "grad_norm": 8.56616577979858, "learning_rate": 9.773304417109048e-06, "loss": 0.8142, "step": 4307 }, { "epoch": 0.12, "grad_norm": 7.252880313531117, "learning_rate": 9.773166337776587e-06, "loss": 0.5261, "step": 4308 }, { "epoch": 0.12, "grad_norm": 6.300879480645084, "learning_rate": 9.773028217381163e-06, "loss": 0.6192, "step": 4309 }, { "epoch": 0.12, "grad_norm": 7.153754186394063, "learning_rate": 9.772890055923965e-06, "loss": 0.5832, "step": 4310 }, { "epoch": 0.12, "grad_norm": 5.2224287669727705, "learning_rate": 9.772751853406182e-06, "loss": 0.5052, "step": 4311 }, { "epoch": 0.12, "grad_norm": 4.560461869356329, "learning_rate": 9.772613609829003e-06, "loss": 0.193, "step": 4312 }, { "epoch": 0.12, "grad_norm": 7.9208486520404024, "learning_rate": 9.772475325193616e-06, "loss": 0.4438, "step": 4313 }, { "epoch": 0.12, "grad_norm": 5.207709826977526, "learning_rate": 9.77233699950121e-06, "loss": 0.6205, "step": 4314 }, { "epoch": 0.12, "grad_norm": 6.51201568091283, "learning_rate": 9.772198632752979e-06, "loss": 0.5812, "step": 4315 }, { "epoch": 0.12, "grad_norm": 8.442764391861061, "learning_rate": 9.772060224950109e-06, "loss": 0.843, "step": 4316 }, { "epoch": 0.12, "grad_norm": 7.032230427522024, "learning_rate": 9.771921776093793e-06, "loss": 0.7723, "step": 4317 }, { "epoch": 0.12, "grad_norm": 10.646605436889093, "learning_rate": 9.771783286185223e-06, "loss": 0.7384, "step": 4318 }, { "epoch": 0.12, "grad_norm": 3.2824173077105856, "learning_rate": 9.771644755225587e-06, "loss": 0.361, "step": 4319 }, { "epoch": 0.12, "grad_norm": 2.736056666281654, "learning_rate": 9.771506183216079e-06, "loss": 0.3331, "step": 4320 }, { "epoch": 0.12, "grad_norm": 4.314468653155765, "learning_rate": 9.77136757015789e-06, "loss": 0.6349, "step": 4321 }, { "epoch": 0.12, "grad_norm": 9.73022695858886, "learning_rate": 9.771228916052215e-06, "loss": 1.0588, "step": 4322 }, { "epoch": 0.12, "grad_norm": 6.341294889375784, "learning_rate": 9.771090220900243e-06, "loss": 0.495, "step": 4323 }, { "epoch": 0.12, "grad_norm": 4.115562972612977, "learning_rate": 9.77095148470317e-06, "loss": 0.6391, "step": 4324 }, { "epoch": 0.12, "grad_norm": 5.043724943001866, "learning_rate": 9.770812707462191e-06, "loss": 0.5926, "step": 4325 }, { "epoch": 0.12, "grad_norm": 8.782208047646028, "learning_rate": 9.770673889178495e-06, "loss": 0.9571, "step": 4326 }, { "epoch": 0.12, "grad_norm": 10.38552800268389, "learning_rate": 9.770535029853281e-06, "loss": 0.5633, "step": 4327 }, { "epoch": 0.12, "grad_norm": 7.535065566842711, "learning_rate": 9.770396129487739e-06, "loss": 0.5979, "step": 4328 }, { "epoch": 0.12, "grad_norm": 4.231220999185863, "learning_rate": 9.770257188083067e-06, "loss": 0.51, "step": 4329 }, { "epoch": 0.12, "grad_norm": 9.32275691117524, "learning_rate": 9.770118205640463e-06, "loss": 0.4071, "step": 4330 }, { "epoch": 0.12, "grad_norm": 5.715433044714183, "learning_rate": 9.769979182161116e-06, "loss": 0.4639, "step": 4331 }, { "epoch": 0.12, "grad_norm": 3.538355405118444, "learning_rate": 9.769840117646226e-06, "loss": 0.782, "step": 4332 }, { "epoch": 0.12, "grad_norm": 6.857211316993114, "learning_rate": 9.769701012096987e-06, "loss": 0.9031, "step": 4333 }, { "epoch": 0.12, "grad_norm": 5.585973736838866, "learning_rate": 9.7695618655146e-06, "loss": 0.5753, "step": 4334 }, { "epoch": 0.12, "grad_norm": 4.798199796860108, "learning_rate": 9.76942267790026e-06, "loss": 0.5309, "step": 4335 }, { "epoch": 0.12, "grad_norm": 5.110016388838852, "learning_rate": 9.769283449255164e-06, "loss": 0.5174, "step": 4336 }, { "epoch": 0.12, "grad_norm": 6.213985208784118, "learning_rate": 9.769144179580507e-06, "loss": 0.7263, "step": 4337 }, { "epoch": 0.12, "grad_norm": 4.536966677132398, "learning_rate": 9.769004868877492e-06, "loss": 0.8443, "step": 4338 }, { "epoch": 0.12, "grad_norm": 5.82620584015724, "learning_rate": 9.768865517147314e-06, "loss": 0.2074, "step": 4339 }, { "epoch": 0.12, "grad_norm": 9.688453159817328, "learning_rate": 9.768726124391174e-06, "loss": 0.4856, "step": 4340 }, { "epoch": 0.12, "grad_norm": 4.584188878520408, "learning_rate": 9.768586690610272e-06, "loss": 0.7043, "step": 4341 }, { "epoch": 0.12, "grad_norm": 4.071975134132847, "learning_rate": 9.768447215805804e-06, "loss": 0.6384, "step": 4342 }, { "epoch": 0.12, "grad_norm": 6.872360346968604, "learning_rate": 9.768307699978971e-06, "loss": 0.4144, "step": 4343 }, { "epoch": 0.12, "grad_norm": 4.998464682416589, "learning_rate": 9.768168143130973e-06, "loss": 0.389, "step": 4344 }, { "epoch": 0.12, "grad_norm": 8.214322380611968, "learning_rate": 9.768028545263015e-06, "loss": 0.6822, "step": 4345 }, { "epoch": 0.12, "grad_norm": 8.409784871365117, "learning_rate": 9.767888906376293e-06, "loss": 0.6468, "step": 4346 }, { "epoch": 0.12, "grad_norm": 6.510946007126372, "learning_rate": 9.76774922647201e-06, "loss": 0.4581, "step": 4347 }, { "epoch": 0.12, "grad_norm": 3.837226045254774, "learning_rate": 9.767609505551365e-06, "loss": 0.3214, "step": 4348 }, { "epoch": 0.12, "grad_norm": 4.526755925905745, "learning_rate": 9.767469743615565e-06, "loss": 0.4389, "step": 4349 }, { "epoch": 0.12, "grad_norm": 9.240487646140574, "learning_rate": 9.767329940665809e-06, "loss": 0.8973, "step": 4350 }, { "epoch": 0.12, "grad_norm": 5.945897678845971, "learning_rate": 9.7671900967033e-06, "loss": 0.6282, "step": 4351 }, { "epoch": 0.12, "grad_norm": 6.311771426315707, "learning_rate": 9.767050211729243e-06, "loss": 0.6798, "step": 4352 }, { "epoch": 0.12, "grad_norm": 6.24934536366532, "learning_rate": 9.766910285744839e-06, "loss": 0.5506, "step": 4353 }, { "epoch": 0.12, "grad_norm": 16.31615471366332, "learning_rate": 9.766770318751292e-06, "loss": 0.7503, "step": 4354 }, { "epoch": 0.12, "grad_norm": 11.166121189591948, "learning_rate": 9.766630310749808e-06, "loss": 0.962, "step": 4355 }, { "epoch": 0.12, "grad_norm": 4.735177201899876, "learning_rate": 9.76649026174159e-06, "loss": 0.3369, "step": 4356 }, { "epoch": 0.12, "grad_norm": 9.459643728309896, "learning_rate": 9.766350171727843e-06, "loss": 0.3338, "step": 4357 }, { "epoch": 0.12, "grad_norm": 9.397397663181325, "learning_rate": 9.766210040709771e-06, "loss": 0.9057, "step": 4358 }, { "epoch": 0.12, "grad_norm": 8.29257429768742, "learning_rate": 9.766069868688584e-06, "loss": 0.3854, "step": 4359 }, { "epoch": 0.12, "grad_norm": 5.384429378755213, "learning_rate": 9.765929655665482e-06, "loss": 0.5276, "step": 4360 }, { "epoch": 0.12, "grad_norm": 3.7911728635211213, "learning_rate": 9.765789401641675e-06, "loss": 0.6019, "step": 4361 }, { "epoch": 0.12, "grad_norm": 10.069379646414324, "learning_rate": 9.765649106618369e-06, "loss": 0.6512, "step": 4362 }, { "epoch": 0.12, "grad_norm": 5.265997853647758, "learning_rate": 9.76550877059677e-06, "loss": 0.6267, "step": 4363 }, { "epoch": 0.12, "grad_norm": 6.324224027898894, "learning_rate": 9.765368393578084e-06, "loss": 0.7289, "step": 4364 }, { "epoch": 0.13, "grad_norm": 5.968461673822836, "learning_rate": 9.765227975563526e-06, "loss": 0.5021, "step": 4365 }, { "epoch": 0.13, "grad_norm": 6.911766093305275, "learning_rate": 9.765087516554293e-06, "loss": 0.6982, "step": 4366 }, { "epoch": 0.13, "grad_norm": 13.551549894647446, "learning_rate": 9.7649470165516e-06, "loss": 0.6002, "step": 4367 }, { "epoch": 0.13, "grad_norm": 5.417201685769977, "learning_rate": 9.764806475556655e-06, "loss": 0.4235, "step": 4368 }, { "epoch": 0.13, "grad_norm": 6.969505029669004, "learning_rate": 9.764665893570666e-06, "loss": 0.5185, "step": 4369 }, { "epoch": 0.13, "grad_norm": 4.327970811872371, "learning_rate": 9.764525270594844e-06, "loss": 0.3924, "step": 4370 }, { "epoch": 0.13, "grad_norm": 4.297363392210401, "learning_rate": 9.764384606630398e-06, "loss": 0.5505, "step": 4371 }, { "epoch": 0.13, "grad_norm": 6.473605632509128, "learning_rate": 9.764243901678536e-06, "loss": 0.588, "step": 4372 }, { "epoch": 0.13, "grad_norm": 3.982207767255683, "learning_rate": 9.764103155740472e-06, "loss": 0.5321, "step": 4373 }, { "epoch": 0.13, "grad_norm": 6.7246046949863905, "learning_rate": 9.763962368817415e-06, "loss": 0.5206, "step": 4374 }, { "epoch": 0.13, "grad_norm": 4.424697783758807, "learning_rate": 9.763821540910575e-06, "loss": 0.4061, "step": 4375 }, { "epoch": 0.13, "grad_norm": 12.319132533682973, "learning_rate": 9.763680672021164e-06, "loss": 0.9824, "step": 4376 }, { "epoch": 0.13, "grad_norm": 6.324024444856153, "learning_rate": 9.763539762150397e-06, "loss": 0.3014, "step": 4377 }, { "epoch": 0.13, "grad_norm": 3.251057929467167, "learning_rate": 9.763398811299485e-06, "loss": 0.3197, "step": 4378 }, { "epoch": 0.13, "grad_norm": 6.193678227845049, "learning_rate": 9.763257819469637e-06, "loss": 0.6882, "step": 4379 }, { "epoch": 0.13, "grad_norm": 5.017503143142301, "learning_rate": 9.763116786662068e-06, "loss": 0.26, "step": 4380 }, { "epoch": 0.13, "grad_norm": 8.864757806575332, "learning_rate": 9.762975712877993e-06, "loss": 0.6085, "step": 4381 }, { "epoch": 0.13, "grad_norm": 4.724202523724873, "learning_rate": 9.762834598118625e-06, "loss": 0.3762, "step": 4382 }, { "epoch": 0.13, "grad_norm": 4.834415276328189, "learning_rate": 9.762693442385178e-06, "loss": 0.2813, "step": 4383 }, { "epoch": 0.13, "grad_norm": 3.832994890130486, "learning_rate": 9.762552245678863e-06, "loss": 0.15, "step": 4384 }, { "epoch": 0.13, "grad_norm": 4.4283283173505525, "learning_rate": 9.762411008000899e-06, "loss": 0.1883, "step": 4385 }, { "epoch": 0.13, "grad_norm": 6.998111333824333, "learning_rate": 9.7622697293525e-06, "loss": 0.7026, "step": 4386 }, { "epoch": 0.13, "grad_norm": 8.304044625061318, "learning_rate": 9.762128409734879e-06, "loss": 0.4705, "step": 4387 }, { "epoch": 0.13, "grad_norm": 8.446369172085845, "learning_rate": 9.761987049149255e-06, "loss": 0.4967, "step": 4388 }, { "epoch": 0.13, "grad_norm": 9.935465382473687, "learning_rate": 9.761845647596843e-06, "loss": 0.9553, "step": 4389 }, { "epoch": 0.13, "grad_norm": 5.276533063151828, "learning_rate": 9.76170420507886e-06, "loss": 0.5934, "step": 4390 }, { "epoch": 0.13, "grad_norm": 5.058689755740148, "learning_rate": 9.76156272159652e-06, "loss": 0.3799, "step": 4391 }, { "epoch": 0.13, "grad_norm": 8.020486974462688, "learning_rate": 9.761421197151042e-06, "loss": 0.791, "step": 4392 }, { "epoch": 0.13, "grad_norm": 6.989912121047084, "learning_rate": 9.761279631743644e-06, "loss": 0.5594, "step": 4393 }, { "epoch": 0.13, "grad_norm": 10.817109112343273, "learning_rate": 9.761138025375545e-06, "loss": 0.6128, "step": 4394 }, { "epoch": 0.13, "grad_norm": 4.9536982063244475, "learning_rate": 9.76099637804796e-06, "loss": 0.668, "step": 4395 }, { "epoch": 0.13, "grad_norm": 3.7797583167706663, "learning_rate": 9.76085468976211e-06, "loss": 0.2255, "step": 4396 }, { "epoch": 0.13, "grad_norm": 8.16835112294697, "learning_rate": 9.760712960519213e-06, "loss": 0.5462, "step": 4397 }, { "epoch": 0.13, "grad_norm": 17.12867363279192, "learning_rate": 9.760571190320489e-06, "loss": 0.7749, "step": 4398 }, { "epoch": 0.13, "grad_norm": 6.085166715847295, "learning_rate": 9.760429379167158e-06, "loss": 0.7852, "step": 4399 }, { "epoch": 0.13, "grad_norm": 4.879189329586066, "learning_rate": 9.760287527060437e-06, "loss": 0.6303, "step": 4400 }, { "epoch": 0.13, "grad_norm": 4.852986739449627, "learning_rate": 9.76014563400155e-06, "loss": 0.3058, "step": 4401 }, { "epoch": 0.13, "grad_norm": 8.868054425717366, "learning_rate": 9.760003699991717e-06, "loss": 0.8546, "step": 4402 }, { "epoch": 0.13, "grad_norm": 10.566563654095624, "learning_rate": 9.759861725032159e-06, "loss": 0.8021, "step": 4403 }, { "epoch": 0.13, "grad_norm": 9.45513501196479, "learning_rate": 9.759719709124094e-06, "loss": 0.159, "step": 4404 }, { "epoch": 0.13, "grad_norm": 7.690859052994175, "learning_rate": 9.759577652268745e-06, "loss": 0.9353, "step": 4405 }, { "epoch": 0.13, "grad_norm": 6.737886650583644, "learning_rate": 9.75943555446734e-06, "loss": 0.6083, "step": 4406 }, { "epoch": 0.13, "grad_norm": 8.168069131928526, "learning_rate": 9.759293415721095e-06, "loss": 0.7519, "step": 4407 }, { "epoch": 0.13, "grad_norm": 6.545966547390139, "learning_rate": 9.759151236031234e-06, "loss": 0.8296, "step": 4408 }, { "epoch": 0.13, "grad_norm": 4.242468274907737, "learning_rate": 9.759009015398982e-06, "loss": 0.2886, "step": 4409 }, { "epoch": 0.13, "grad_norm": 5.805474085542898, "learning_rate": 9.75886675382556e-06, "loss": 0.7203, "step": 4410 }, { "epoch": 0.13, "grad_norm": 6.583058951088272, "learning_rate": 9.758724451312194e-06, "loss": 0.7383, "step": 4411 }, { "epoch": 0.13, "grad_norm": 3.9523945612298323, "learning_rate": 9.758582107860106e-06, "loss": 0.3864, "step": 4412 }, { "epoch": 0.13, "grad_norm": 6.058751077779772, "learning_rate": 9.758439723470525e-06, "loss": 0.6159, "step": 4413 }, { "epoch": 0.13, "grad_norm": 4.2452716470212195, "learning_rate": 9.75829729814467e-06, "loss": 0.4812, "step": 4414 }, { "epoch": 0.13, "grad_norm": 10.02124477554305, "learning_rate": 9.75815483188377e-06, "loss": 0.6443, "step": 4415 }, { "epoch": 0.13, "grad_norm": 3.164126360213483, "learning_rate": 9.758012324689052e-06, "loss": 0.6077, "step": 4416 }, { "epoch": 0.13, "grad_norm": 3.6411416992780987, "learning_rate": 9.757869776561738e-06, "loss": 0.4148, "step": 4417 }, { "epoch": 0.13, "grad_norm": 4.793758112814752, "learning_rate": 9.757727187503056e-06, "loss": 0.3892, "step": 4418 }, { "epoch": 0.13, "grad_norm": 7.879371761703522, "learning_rate": 9.757584557514232e-06, "loss": 0.7989, "step": 4419 }, { "epoch": 0.13, "grad_norm": 6.480420922395465, "learning_rate": 9.757441886596495e-06, "loss": 0.9819, "step": 4420 }, { "epoch": 0.13, "grad_norm": 4.320400685519233, "learning_rate": 9.757299174751072e-06, "loss": 0.2894, "step": 4421 }, { "epoch": 0.13, "grad_norm": 5.959806722476913, "learning_rate": 9.757156421979188e-06, "loss": 0.608, "step": 4422 }, { "epoch": 0.13, "grad_norm": 5.3968539530791535, "learning_rate": 9.757013628282074e-06, "loss": 0.7383, "step": 4423 }, { "epoch": 0.13, "grad_norm": 6.635782356505535, "learning_rate": 9.756870793660957e-06, "loss": 0.5935, "step": 4424 }, { "epoch": 0.13, "grad_norm": 5.938446130158621, "learning_rate": 9.756727918117068e-06, "loss": 0.4877, "step": 4425 }, { "epoch": 0.13, "grad_norm": 8.11818779224097, "learning_rate": 9.756585001651633e-06, "loss": 0.5819, "step": 4426 }, { "epoch": 0.13, "grad_norm": 3.8414074962738853, "learning_rate": 9.756442044265884e-06, "loss": 0.1807, "step": 4427 }, { "epoch": 0.13, "grad_norm": 4.1763979221831695, "learning_rate": 9.756299045961047e-06, "loss": 0.3724, "step": 4428 }, { "epoch": 0.13, "grad_norm": 7.635323899049469, "learning_rate": 9.756156006738358e-06, "loss": 0.8046, "step": 4429 }, { "epoch": 0.13, "grad_norm": 5.374710208266625, "learning_rate": 9.756012926599042e-06, "loss": 0.3913, "step": 4430 }, { "epoch": 0.13, "grad_norm": 9.953020080300856, "learning_rate": 9.755869805544333e-06, "loss": 0.4819, "step": 4431 }, { "epoch": 0.13, "grad_norm": 8.883209085460383, "learning_rate": 9.755726643575462e-06, "loss": 0.3244, "step": 4432 }, { "epoch": 0.13, "grad_norm": 4.067869394054981, "learning_rate": 9.755583440693662e-06, "loss": 0.3013, "step": 4433 }, { "epoch": 0.13, "grad_norm": 12.173412945683074, "learning_rate": 9.755440196900162e-06, "loss": 0.5132, "step": 4434 }, { "epoch": 0.13, "grad_norm": 9.162529988779742, "learning_rate": 9.755296912196193e-06, "loss": 1.1465, "step": 4435 }, { "epoch": 0.13, "grad_norm": 8.740788106586452, "learning_rate": 9.755153586582993e-06, "loss": 0.6951, "step": 4436 }, { "epoch": 0.13, "grad_norm": 5.009042930900364, "learning_rate": 9.755010220061792e-06, "loss": 0.5916, "step": 4437 }, { "epoch": 0.13, "grad_norm": 7.500827552915522, "learning_rate": 9.754866812633822e-06, "loss": 0.5933, "step": 4438 }, { "epoch": 0.13, "grad_norm": 7.697900513118853, "learning_rate": 9.754723364300319e-06, "loss": 0.4918, "step": 4439 }, { "epoch": 0.13, "grad_norm": 8.732606056982517, "learning_rate": 9.754579875062517e-06, "loss": 0.8639, "step": 4440 }, { "epoch": 0.13, "grad_norm": 2.7607605821869057, "learning_rate": 9.754436344921648e-06, "loss": 0.1908, "step": 4441 }, { "epoch": 0.13, "grad_norm": 7.666342953053577, "learning_rate": 9.75429277387895e-06, "loss": 0.9026, "step": 4442 }, { "epoch": 0.13, "grad_norm": 6.265719824475337, "learning_rate": 9.754149161935657e-06, "loss": 0.3856, "step": 4443 }, { "epoch": 0.13, "grad_norm": 9.332387285877783, "learning_rate": 9.754005509093002e-06, "loss": 0.4245, "step": 4444 }, { "epoch": 0.13, "grad_norm": 9.44652540119681, "learning_rate": 9.753861815352226e-06, "loss": 0.8292, "step": 4445 }, { "epoch": 0.13, "grad_norm": 3.4330714569555534, "learning_rate": 9.75371808071456e-06, "loss": 0.7525, "step": 4446 }, { "epoch": 0.13, "grad_norm": 8.459790321980215, "learning_rate": 9.753574305181243e-06, "loss": 1.0241, "step": 4447 }, { "epoch": 0.13, "grad_norm": 8.941528052837375, "learning_rate": 9.75343048875351e-06, "loss": 0.6569, "step": 4448 }, { "epoch": 0.13, "grad_norm": 4.5387072268198505, "learning_rate": 9.753286631432603e-06, "loss": 0.5738, "step": 4449 }, { "epoch": 0.13, "grad_norm": 9.56563561584422, "learning_rate": 9.753142733219754e-06, "loss": 0.8884, "step": 4450 }, { "epoch": 0.13, "grad_norm": 5.356822393005719, "learning_rate": 9.752998794116205e-06, "loss": 0.4041, "step": 4451 }, { "epoch": 0.13, "grad_norm": 4.6169834769624405, "learning_rate": 9.75285481412319e-06, "loss": 0.9873, "step": 4452 }, { "epoch": 0.13, "grad_norm": 3.3803798747606413, "learning_rate": 9.752710793241954e-06, "loss": 0.5372, "step": 4453 }, { "epoch": 0.13, "grad_norm": 6.543549305225584, "learning_rate": 9.752566731473729e-06, "loss": 0.5429, "step": 4454 }, { "epoch": 0.13, "grad_norm": 7.755403388554831, "learning_rate": 9.752422628819757e-06, "loss": 0.4854, "step": 4455 }, { "epoch": 0.13, "grad_norm": 8.323849208439352, "learning_rate": 9.75227848528128e-06, "loss": 0.498, "step": 4456 }, { "epoch": 0.13, "grad_norm": 3.568410988921414, "learning_rate": 9.752134300859537e-06, "loss": 0.2683, "step": 4457 }, { "epoch": 0.13, "grad_norm": 6.515479911817143, "learning_rate": 9.751990075555766e-06, "loss": 0.4341, "step": 4458 }, { "epoch": 0.13, "grad_norm": 7.349985275772813, "learning_rate": 9.751845809371211e-06, "loss": 0.8943, "step": 4459 }, { "epoch": 0.13, "grad_norm": 3.9548786480367535, "learning_rate": 9.751701502307112e-06, "loss": 0.2044, "step": 4460 }, { "epoch": 0.13, "grad_norm": 5.6195231583599465, "learning_rate": 9.751557154364708e-06, "loss": 0.2491, "step": 4461 }, { "epoch": 0.13, "grad_norm": 10.241090903511301, "learning_rate": 9.751412765545243e-06, "loss": 0.7408, "step": 4462 }, { "epoch": 0.13, "grad_norm": 9.387151447027405, "learning_rate": 9.75126833584996e-06, "loss": 0.5827, "step": 4463 }, { "epoch": 0.13, "grad_norm": 4.830715509140025, "learning_rate": 9.7511238652801e-06, "loss": 0.4986, "step": 4464 }, { "epoch": 0.13, "grad_norm": 2.3674681846470844, "learning_rate": 9.750979353836907e-06, "loss": 0.1097, "step": 4465 }, { "epoch": 0.13, "grad_norm": 6.203123078237856, "learning_rate": 9.750834801521623e-06, "loss": 0.4671, "step": 4466 }, { "epoch": 0.13, "grad_norm": 4.172476592961066, "learning_rate": 9.75069020833549e-06, "loss": 0.8196, "step": 4467 }, { "epoch": 0.13, "grad_norm": 4.945284923217444, "learning_rate": 9.750545574279758e-06, "loss": 0.5618, "step": 4468 }, { "epoch": 0.13, "grad_norm": 5.918358254283079, "learning_rate": 9.750400899355667e-06, "loss": 0.4918, "step": 4469 }, { "epoch": 0.13, "grad_norm": 6.420483812064434, "learning_rate": 9.750256183564459e-06, "loss": 0.8339, "step": 4470 }, { "epoch": 0.13, "grad_norm": 4.049779313750539, "learning_rate": 9.750111426907386e-06, "loss": 0.3319, "step": 4471 }, { "epoch": 0.13, "grad_norm": 4.094816898775087, "learning_rate": 9.749966629385685e-06, "loss": 0.3666, "step": 4472 }, { "epoch": 0.13, "grad_norm": 5.363966995931384, "learning_rate": 9.749821791000608e-06, "loss": 0.8085, "step": 4473 }, { "epoch": 0.13, "grad_norm": 10.938703721429912, "learning_rate": 9.749676911753398e-06, "loss": 0.8306, "step": 4474 }, { "epoch": 0.13, "grad_norm": 10.26702016700762, "learning_rate": 9.749531991645304e-06, "loss": 0.7854, "step": 4475 }, { "epoch": 0.13, "grad_norm": 11.138764770484322, "learning_rate": 9.74938703067757e-06, "loss": 0.9173, "step": 4476 }, { "epoch": 0.13, "grad_norm": 3.741986677479437, "learning_rate": 9.749242028851443e-06, "loss": 0.3278, "step": 4477 }, { "epoch": 0.13, "grad_norm": 11.954190714082744, "learning_rate": 9.749096986168174e-06, "loss": 0.7764, "step": 4478 }, { "epoch": 0.13, "grad_norm": 5.10000133140397, "learning_rate": 9.748951902629007e-06, "loss": 0.5162, "step": 4479 }, { "epoch": 0.13, "grad_norm": 8.907113016712007, "learning_rate": 9.748806778235192e-06, "loss": 0.5135, "step": 4480 }, { "epoch": 0.13, "grad_norm": 4.617556148986956, "learning_rate": 9.748661612987975e-06, "loss": 0.6747, "step": 4481 }, { "epoch": 0.13, "grad_norm": 7.254796973344649, "learning_rate": 9.748516406888609e-06, "loss": 0.884, "step": 4482 }, { "epoch": 0.13, "grad_norm": 4.715875116165301, "learning_rate": 9.74837115993834e-06, "loss": 0.4004, "step": 4483 }, { "epoch": 0.13, "grad_norm": 1.5843156075490579, "learning_rate": 9.748225872138419e-06, "loss": 0.2101, "step": 4484 }, { "epoch": 0.13, "grad_norm": 4.611204424390119, "learning_rate": 9.748080543490094e-06, "loss": 0.5331, "step": 4485 }, { "epoch": 0.13, "grad_norm": 4.539820281181272, "learning_rate": 9.747935173994619e-06, "loss": 0.3489, "step": 4486 }, { "epoch": 0.13, "grad_norm": 10.048642775714097, "learning_rate": 9.747789763653241e-06, "loss": 0.369, "step": 4487 }, { "epoch": 0.13, "grad_norm": 8.195283291512833, "learning_rate": 9.747644312467212e-06, "loss": 0.8193, "step": 4488 }, { "epoch": 0.13, "grad_norm": 7.5842594133205425, "learning_rate": 9.747498820437785e-06, "loss": 0.3041, "step": 4489 }, { "epoch": 0.13, "grad_norm": 8.92827957548253, "learning_rate": 9.747353287566209e-06, "loss": 0.3082, "step": 4490 }, { "epoch": 0.13, "grad_norm": 6.219485292143904, "learning_rate": 9.747207713853738e-06, "loss": 0.7919, "step": 4491 }, { "epoch": 0.13, "grad_norm": 8.722708292360423, "learning_rate": 9.747062099301623e-06, "loss": 0.7291, "step": 4492 }, { "epoch": 0.13, "grad_norm": 6.5732673595798286, "learning_rate": 9.746916443911116e-06, "loss": 0.4002, "step": 4493 }, { "epoch": 0.13, "grad_norm": 10.398650822790723, "learning_rate": 9.746770747683475e-06, "loss": 0.7139, "step": 4494 }, { "epoch": 0.13, "grad_norm": 4.6740811347628215, "learning_rate": 9.746625010619946e-06, "loss": 0.2714, "step": 4495 }, { "epoch": 0.13, "grad_norm": 7.483112204781387, "learning_rate": 9.746479232721787e-06, "loss": 0.3917, "step": 4496 }, { "epoch": 0.13, "grad_norm": 5.167795860299263, "learning_rate": 9.746333413990252e-06, "loss": 0.4526, "step": 4497 }, { "epoch": 0.13, "grad_norm": 5.947089291919728, "learning_rate": 9.746187554426596e-06, "loss": 0.3839, "step": 4498 }, { "epoch": 0.13, "grad_norm": 7.709170926612823, "learning_rate": 9.74604165403207e-06, "loss": 0.6345, "step": 4499 }, { "epoch": 0.13, "grad_norm": 8.090703560280886, "learning_rate": 9.745895712807936e-06, "loss": 0.3448, "step": 4500 }, { "epoch": 0.13, "grad_norm": 8.862454856251825, "learning_rate": 9.745749730755442e-06, "loss": 0.3766, "step": 4501 }, { "epoch": 0.13, "grad_norm": 6.244675695399278, "learning_rate": 9.745603707875851e-06, "loss": 0.4917, "step": 4502 }, { "epoch": 0.13, "grad_norm": 5.558541850573551, "learning_rate": 9.745457644170412e-06, "loss": 0.493, "step": 4503 }, { "epoch": 0.13, "grad_norm": 10.846285564790303, "learning_rate": 9.745311539640388e-06, "loss": 0.3005, "step": 4504 }, { "epoch": 0.13, "grad_norm": 9.197005797430487, "learning_rate": 9.74516539428703e-06, "loss": 0.5753, "step": 4505 }, { "epoch": 0.13, "grad_norm": 5.934398845124512, "learning_rate": 9.745019208111602e-06, "loss": 0.3756, "step": 4506 }, { "epoch": 0.13, "grad_norm": 7.453085861263236, "learning_rate": 9.744872981115355e-06, "loss": 0.4522, "step": 4507 }, { "epoch": 0.13, "grad_norm": 9.088231513006068, "learning_rate": 9.74472671329955e-06, "loss": 0.7787, "step": 4508 }, { "epoch": 0.13, "grad_norm": 10.909085492652936, "learning_rate": 9.744580404665447e-06, "loss": 0.7943, "step": 4509 }, { "epoch": 0.13, "grad_norm": 6.781014258158897, "learning_rate": 9.744434055214303e-06, "loss": 0.6539, "step": 4510 }, { "epoch": 0.13, "grad_norm": 9.141557433995082, "learning_rate": 9.744287664947376e-06, "loss": 0.5472, "step": 4511 }, { "epoch": 0.13, "grad_norm": 6.385550613201539, "learning_rate": 9.744141233865926e-06, "loss": 0.4006, "step": 4512 }, { "epoch": 0.13, "grad_norm": 4.726216806427852, "learning_rate": 9.743994761971214e-06, "loss": 0.355, "step": 4513 }, { "epoch": 0.13, "grad_norm": 21.136263761909014, "learning_rate": 9.743848249264498e-06, "loss": 0.4584, "step": 4514 }, { "epoch": 0.13, "grad_norm": 5.794088511366673, "learning_rate": 9.743701695747041e-06, "loss": 0.3756, "step": 4515 }, { "epoch": 0.13, "grad_norm": 3.8060901727302916, "learning_rate": 9.743555101420101e-06, "loss": 0.4172, "step": 4516 }, { "epoch": 0.13, "grad_norm": 9.030979878434968, "learning_rate": 9.743408466284943e-06, "loss": 0.8354, "step": 4517 }, { "epoch": 0.13, "grad_norm": 11.829483982159987, "learning_rate": 9.743261790342825e-06, "loss": 0.8681, "step": 4518 }, { "epoch": 0.13, "grad_norm": 5.845869771237055, "learning_rate": 9.743115073595007e-06, "loss": 0.5571, "step": 4519 }, { "epoch": 0.13, "grad_norm": 8.332448212983138, "learning_rate": 9.742968316042757e-06, "loss": 0.4289, "step": 4520 }, { "epoch": 0.13, "grad_norm": 6.1693672203777785, "learning_rate": 9.742821517687334e-06, "loss": 0.7089, "step": 4521 }, { "epoch": 0.13, "grad_norm": 6.462336470639665, "learning_rate": 9.74267467853e-06, "loss": 0.8894, "step": 4522 }, { "epoch": 0.13, "grad_norm": 5.112402552676281, "learning_rate": 9.742527798572021e-06, "loss": 0.4363, "step": 4523 }, { "epoch": 0.13, "grad_norm": 4.662706392030803, "learning_rate": 9.742380877814659e-06, "loss": 0.3622, "step": 4524 }, { "epoch": 0.13, "grad_norm": 9.12957737699728, "learning_rate": 9.742233916259178e-06, "loss": 0.6648, "step": 4525 }, { "epoch": 0.13, "grad_norm": 8.05926129252407, "learning_rate": 9.742086913906843e-06, "loss": 0.8763, "step": 4526 }, { "epoch": 0.13, "grad_norm": 2.6523553008107514, "learning_rate": 9.741939870758918e-06, "loss": 0.195, "step": 4527 }, { "epoch": 0.13, "grad_norm": 5.230049464062975, "learning_rate": 9.741792786816669e-06, "loss": 0.6289, "step": 4528 }, { "epoch": 0.13, "grad_norm": 5.079027747094779, "learning_rate": 9.741645662081358e-06, "loss": 0.2778, "step": 4529 }, { "epoch": 0.13, "grad_norm": 5.815246876390367, "learning_rate": 9.741498496554255e-06, "loss": 0.6845, "step": 4530 }, { "epoch": 0.13, "grad_norm": 4.266794316920452, "learning_rate": 9.741351290236624e-06, "loss": 0.3328, "step": 4531 }, { "epoch": 0.13, "grad_norm": 4.8280807542471, "learning_rate": 9.741204043129732e-06, "loss": 0.6148, "step": 4532 }, { "epoch": 0.13, "grad_norm": 16.362015562995026, "learning_rate": 9.741056755234844e-06, "loss": 0.9709, "step": 4533 }, { "epoch": 0.13, "grad_norm": 7.171511439331523, "learning_rate": 9.740909426553229e-06, "loss": 0.5638, "step": 4534 }, { "epoch": 0.13, "grad_norm": 8.306392691988973, "learning_rate": 9.740762057086154e-06, "loss": 0.4432, "step": 4535 }, { "epoch": 0.13, "grad_norm": 4.477840061532871, "learning_rate": 9.740614646834887e-06, "loss": 0.4254, "step": 4536 }, { "epoch": 0.13, "grad_norm": 6.196794791960159, "learning_rate": 9.740467195800698e-06, "loss": 0.5901, "step": 4537 }, { "epoch": 0.13, "grad_norm": 4.272647401065002, "learning_rate": 9.74031970398485e-06, "loss": 0.5933, "step": 4538 }, { "epoch": 0.13, "grad_norm": 9.80408267401175, "learning_rate": 9.740172171388616e-06, "loss": 1.0517, "step": 4539 }, { "epoch": 0.13, "grad_norm": 5.303055434931203, "learning_rate": 9.740024598013264e-06, "loss": 0.3524, "step": 4540 }, { "epoch": 0.13, "grad_norm": 6.509903698866679, "learning_rate": 9.739876983860065e-06, "loss": 0.7057, "step": 4541 }, { "epoch": 0.13, "grad_norm": 5.360218815785185, "learning_rate": 9.73972932893029e-06, "loss": 0.4197, "step": 4542 }, { "epoch": 0.13, "grad_norm": 6.405052556962065, "learning_rate": 9.739581633225203e-06, "loss": 0.3018, "step": 4543 }, { "epoch": 0.13, "grad_norm": 7.54540811818976, "learning_rate": 9.739433896746082e-06, "loss": 0.6776, "step": 4544 }, { "epoch": 0.13, "grad_norm": 9.69387420633752, "learning_rate": 9.739286119494194e-06, "loss": 0.7207, "step": 4545 }, { "epoch": 0.13, "grad_norm": 3.3720330336537607, "learning_rate": 9.73913830147081e-06, "loss": 0.4992, "step": 4546 }, { "epoch": 0.13, "grad_norm": 7.755849415088984, "learning_rate": 9.738990442677203e-06, "loss": 0.6066, "step": 4547 }, { "epoch": 0.13, "grad_norm": 5.841750797458597, "learning_rate": 9.738842543114643e-06, "loss": 0.5747, "step": 4548 }, { "epoch": 0.13, "grad_norm": 9.829952197497104, "learning_rate": 9.738694602784408e-06, "loss": 0.7901, "step": 4549 }, { "epoch": 0.13, "grad_norm": 5.895805367806725, "learning_rate": 9.738546621687764e-06, "loss": 0.2969, "step": 4550 }, { "epoch": 0.13, "grad_norm": 7.905635342433997, "learning_rate": 9.738398599825988e-06, "loss": 0.9864, "step": 4551 }, { "epoch": 0.13, "grad_norm": 6.4091035023755545, "learning_rate": 9.738250537200351e-06, "loss": 0.4142, "step": 4552 }, { "epoch": 0.13, "grad_norm": 5.837891605156324, "learning_rate": 9.738102433812128e-06, "loss": 0.386, "step": 4553 }, { "epoch": 0.13, "grad_norm": 6.511057178778816, "learning_rate": 9.737954289662594e-06, "loss": 0.6571, "step": 4554 }, { "epoch": 0.13, "grad_norm": 3.54547198942646, "learning_rate": 9.737806104753024e-06, "loss": 0.1716, "step": 4555 }, { "epoch": 0.13, "grad_norm": 4.1358278520127465, "learning_rate": 9.73765787908469e-06, "loss": 0.4112, "step": 4556 }, { "epoch": 0.13, "grad_norm": 3.87325684501995, "learning_rate": 9.737509612658869e-06, "loss": 0.4837, "step": 4557 }, { "epoch": 0.13, "grad_norm": 3.141934231074023, "learning_rate": 9.737361305476835e-06, "loss": 0.4229, "step": 4558 }, { "epoch": 0.13, "grad_norm": 5.243798907919175, "learning_rate": 9.737212957539866e-06, "loss": 0.483, "step": 4559 }, { "epoch": 0.13, "grad_norm": 8.093867415235598, "learning_rate": 9.737064568849237e-06, "loss": 0.542, "step": 4560 }, { "epoch": 0.13, "grad_norm": 3.9460249153738833, "learning_rate": 9.736916139406223e-06, "loss": 0.6133, "step": 4561 }, { "epoch": 0.13, "grad_norm": 3.869606818006754, "learning_rate": 9.736767669212105e-06, "loss": 0.3215, "step": 4562 }, { "epoch": 0.13, "grad_norm": 10.997792889509775, "learning_rate": 9.736619158268157e-06, "loss": 0.8143, "step": 4563 }, { "epoch": 0.13, "grad_norm": 8.926865446324221, "learning_rate": 9.736470606575657e-06, "loss": 0.7207, "step": 4564 }, { "epoch": 0.13, "grad_norm": 3.897137931757493, "learning_rate": 9.736322014135884e-06, "loss": 0.3303, "step": 4565 }, { "epoch": 0.13, "grad_norm": 10.834951387515508, "learning_rate": 9.736173380950116e-06, "loss": 0.4935, "step": 4566 }, { "epoch": 0.13, "grad_norm": 4.294062828206646, "learning_rate": 9.736024707019633e-06, "loss": 0.4886, "step": 4567 }, { "epoch": 0.13, "grad_norm": 9.664891989904163, "learning_rate": 9.735875992345711e-06, "loss": 0.9892, "step": 4568 }, { "epoch": 0.13, "grad_norm": 3.8976267734192134, "learning_rate": 9.735727236929631e-06, "loss": 0.2361, "step": 4569 }, { "epoch": 0.13, "grad_norm": 4.003540260521273, "learning_rate": 9.735578440772673e-06, "loss": 0.2734, "step": 4570 }, { "epoch": 0.13, "grad_norm": 7.229265686881414, "learning_rate": 9.735429603876117e-06, "loss": 1.0827, "step": 4571 }, { "epoch": 0.13, "grad_norm": 5.698928041214348, "learning_rate": 9.735280726241241e-06, "loss": 0.6686, "step": 4572 }, { "epoch": 0.13, "grad_norm": 7.163347680758094, "learning_rate": 9.735131807869331e-06, "loss": 0.6733, "step": 4573 }, { "epoch": 0.13, "grad_norm": 6.3601397305259875, "learning_rate": 9.734982848761663e-06, "loss": 0.3325, "step": 4574 }, { "epoch": 0.13, "grad_norm": 2.0414902568690216, "learning_rate": 9.734833848919522e-06, "loss": 0.1292, "step": 4575 }, { "epoch": 0.13, "grad_norm": 6.529598556610187, "learning_rate": 9.734684808344189e-06, "loss": 0.7118, "step": 4576 }, { "epoch": 0.13, "grad_norm": 6.987101216955792, "learning_rate": 9.734535727036943e-06, "loss": 0.2697, "step": 4577 }, { "epoch": 0.13, "grad_norm": 6.754108803195491, "learning_rate": 9.734386604999069e-06, "loss": 0.68, "step": 4578 }, { "epoch": 0.13, "grad_norm": 7.158515837995899, "learning_rate": 9.734237442231852e-06, "loss": 0.8298, "step": 4579 }, { "epoch": 0.13, "grad_norm": 7.340024384554239, "learning_rate": 9.734088238736573e-06, "loss": 0.6494, "step": 4580 }, { "epoch": 0.13, "grad_norm": 9.775028090180527, "learning_rate": 9.733938994514515e-06, "loss": 0.5833, "step": 4581 }, { "epoch": 0.13, "grad_norm": 8.01269590760205, "learning_rate": 9.733789709566962e-06, "loss": 0.7329, "step": 4582 }, { "epoch": 0.13, "grad_norm": 7.436638149045499, "learning_rate": 9.7336403838952e-06, "loss": 0.8607, "step": 4583 }, { "epoch": 0.13, "grad_norm": 5.7549846438841845, "learning_rate": 9.733491017500512e-06, "loss": 0.5564, "step": 4584 }, { "epoch": 0.13, "grad_norm": 9.586933559908106, "learning_rate": 9.733341610384184e-06, "loss": 0.658, "step": 4585 }, { "epoch": 0.13, "grad_norm": 2.3968760598589407, "learning_rate": 9.733192162547501e-06, "loss": 0.2533, "step": 4586 }, { "epoch": 0.13, "grad_norm": 6.511687774676066, "learning_rate": 9.733042673991748e-06, "loss": 0.417, "step": 4587 }, { "epoch": 0.13, "grad_norm": 6.169137043197984, "learning_rate": 9.732893144718212e-06, "loss": 0.4727, "step": 4588 }, { "epoch": 0.13, "grad_norm": 5.110145790685422, "learning_rate": 9.732743574728179e-06, "loss": 0.3706, "step": 4589 }, { "epoch": 0.13, "grad_norm": 9.737047541907867, "learning_rate": 9.732593964022935e-06, "loss": 0.5799, "step": 4590 }, { "epoch": 0.13, "grad_norm": 4.958452698796141, "learning_rate": 9.732444312603771e-06, "loss": 0.5603, "step": 4591 }, { "epoch": 0.13, "grad_norm": 5.012187552286492, "learning_rate": 9.732294620471968e-06, "loss": 0.3345, "step": 4592 }, { "epoch": 0.13, "grad_norm": 4.279265061943164, "learning_rate": 9.732144887628819e-06, "loss": 0.5089, "step": 4593 }, { "epoch": 0.13, "grad_norm": 7.92887024825708, "learning_rate": 9.731995114075608e-06, "loss": 0.5551, "step": 4594 }, { "epoch": 0.13, "grad_norm": 9.328770319663384, "learning_rate": 9.731845299813627e-06, "loss": 0.9312, "step": 4595 }, { "epoch": 0.13, "grad_norm": 7.81558416905771, "learning_rate": 9.731695444844162e-06, "loss": 0.4723, "step": 4596 }, { "epoch": 0.13, "grad_norm": 6.098054275376251, "learning_rate": 9.731545549168507e-06, "loss": 0.5113, "step": 4597 }, { "epoch": 0.13, "grad_norm": 3.754549064735257, "learning_rate": 9.731395612787945e-06, "loss": 0.428, "step": 4598 }, { "epoch": 0.13, "grad_norm": 5.763763871681929, "learning_rate": 9.73124563570377e-06, "loss": 0.5745, "step": 4599 }, { "epoch": 0.13, "grad_norm": 10.938614270808133, "learning_rate": 9.731095617917273e-06, "loss": 0.5617, "step": 4600 }, { "epoch": 0.13, "grad_norm": 6.270914104697663, "learning_rate": 9.73094555942974e-06, "loss": 0.247, "step": 4601 }, { "epoch": 0.13, "grad_norm": 4.1172316836568665, "learning_rate": 9.730795460242467e-06, "loss": 0.1581, "step": 4602 }, { "epoch": 0.13, "grad_norm": 11.099115326139703, "learning_rate": 9.730645320356742e-06, "loss": 0.7197, "step": 4603 }, { "epoch": 0.13, "grad_norm": 10.420220056359137, "learning_rate": 9.730495139773857e-06, "loss": 0.5822, "step": 4604 }, { "epoch": 0.13, "grad_norm": 7.859024313047783, "learning_rate": 9.730344918495107e-06, "loss": 0.4027, "step": 4605 }, { "epoch": 0.13, "grad_norm": 2.4867180865326146, "learning_rate": 9.73019465652178e-06, "loss": 0.4538, "step": 4606 }, { "epoch": 0.13, "grad_norm": 9.179522521788774, "learning_rate": 9.730044353855173e-06, "loss": 0.3713, "step": 4607 }, { "epoch": 0.13, "grad_norm": 3.7021311138435027, "learning_rate": 9.729894010496575e-06, "loss": 0.4304, "step": 4608 }, { "epoch": 0.13, "grad_norm": 5.433684972878991, "learning_rate": 9.729743626447282e-06, "loss": 0.509, "step": 4609 }, { "epoch": 0.13, "grad_norm": 6.119719506237354, "learning_rate": 9.729593201708587e-06, "loss": 0.4246, "step": 4610 }, { "epoch": 0.13, "grad_norm": 5.791995151270107, "learning_rate": 9.729442736281784e-06, "loss": 0.5135, "step": 4611 }, { "epoch": 0.13, "grad_norm": 7.36006533873622, "learning_rate": 9.729292230168166e-06, "loss": 0.5817, "step": 4612 }, { "epoch": 0.13, "grad_norm": 3.5761454741140453, "learning_rate": 9.72914168336903e-06, "loss": 0.3927, "step": 4613 }, { "epoch": 0.13, "grad_norm": 4.285295009539789, "learning_rate": 9.728991095885671e-06, "loss": 0.4676, "step": 4614 }, { "epoch": 0.13, "grad_norm": 4.169932509683453, "learning_rate": 9.728840467719385e-06, "loss": 0.3219, "step": 4615 }, { "epoch": 0.13, "grad_norm": 6.6657925827485505, "learning_rate": 9.728689798871464e-06, "loss": 0.6785, "step": 4616 }, { "epoch": 0.13, "grad_norm": 5.315846365345415, "learning_rate": 9.72853908934321e-06, "loss": 0.5285, "step": 4617 }, { "epoch": 0.13, "grad_norm": 6.50628116537906, "learning_rate": 9.728388339135912e-06, "loss": 0.3524, "step": 4618 }, { "epoch": 0.13, "grad_norm": 5.573702279686711, "learning_rate": 9.728237548250875e-06, "loss": 0.5768, "step": 4619 }, { "epoch": 0.13, "grad_norm": 7.324275455509655, "learning_rate": 9.72808671668939e-06, "loss": 0.7505, "step": 4620 }, { "epoch": 0.13, "grad_norm": 2.9441940045664388, "learning_rate": 9.727935844452759e-06, "loss": 0.4317, "step": 4621 }, { "epoch": 0.13, "grad_norm": 4.766321050122856, "learning_rate": 9.727784931542278e-06, "loss": 0.5742, "step": 4622 }, { "epoch": 0.13, "grad_norm": 4.182754332658451, "learning_rate": 9.727633977959245e-06, "loss": 0.4487, "step": 4623 }, { "epoch": 0.13, "grad_norm": 6.259063786098891, "learning_rate": 9.72748298370496e-06, "loss": 0.7958, "step": 4624 }, { "epoch": 0.13, "grad_norm": 4.502626129620454, "learning_rate": 9.727331948780719e-06, "loss": 0.4466, "step": 4625 }, { "epoch": 0.13, "grad_norm": 9.05479556159903, "learning_rate": 9.727180873187823e-06, "loss": 0.8373, "step": 4626 }, { "epoch": 0.13, "grad_norm": 7.806480438520654, "learning_rate": 9.727029756927572e-06, "loss": 0.9676, "step": 4627 }, { "epoch": 0.13, "grad_norm": 7.4848376551205655, "learning_rate": 9.726878600001268e-06, "loss": 0.3404, "step": 4628 }, { "epoch": 0.13, "grad_norm": 4.052538116482913, "learning_rate": 9.72672740241021e-06, "loss": 0.6111, "step": 4629 }, { "epoch": 0.13, "grad_norm": 4.2334041256629895, "learning_rate": 9.726576164155695e-06, "loss": 0.208, "step": 4630 }, { "epoch": 0.13, "grad_norm": 7.700569425310673, "learning_rate": 9.72642488523903e-06, "loss": 0.5694, "step": 4631 }, { "epoch": 0.13, "grad_norm": 6.4494726453373, "learning_rate": 9.726273565661512e-06, "loss": 0.4735, "step": 4632 }, { "epoch": 0.13, "grad_norm": 9.097415565461606, "learning_rate": 9.726122205424445e-06, "loss": 0.4907, "step": 4633 }, { "epoch": 0.13, "grad_norm": 7.037644424447174, "learning_rate": 9.72597080452913e-06, "loss": 0.4298, "step": 4634 }, { "epoch": 0.13, "grad_norm": 5.348836340870415, "learning_rate": 9.72581936297687e-06, "loss": 0.376, "step": 4635 }, { "epoch": 0.13, "grad_norm": 4.143327788164863, "learning_rate": 9.725667880768969e-06, "loss": 0.4417, "step": 4636 }, { "epoch": 0.13, "grad_norm": 6.955849925804007, "learning_rate": 9.725516357906729e-06, "loss": 0.6697, "step": 4637 }, { "epoch": 0.13, "grad_norm": 14.039794223505519, "learning_rate": 9.725364794391452e-06, "loss": 1.0927, "step": 4638 }, { "epoch": 0.13, "grad_norm": 4.756878339891335, "learning_rate": 9.725213190224445e-06, "loss": 0.4774, "step": 4639 }, { "epoch": 0.13, "grad_norm": 10.940546799407219, "learning_rate": 9.72506154540701e-06, "loss": 1.0517, "step": 4640 }, { "epoch": 0.13, "grad_norm": 5.624195931503991, "learning_rate": 9.724909859940452e-06, "loss": 0.2509, "step": 4641 }, { "epoch": 0.13, "grad_norm": 7.486511244152759, "learning_rate": 9.724758133826078e-06, "loss": 0.4727, "step": 4642 }, { "epoch": 0.13, "grad_norm": 7.40623338914771, "learning_rate": 9.724606367065188e-06, "loss": 0.6537, "step": 4643 }, { "epoch": 0.13, "grad_norm": 5.846079234658705, "learning_rate": 9.724454559659094e-06, "loss": 0.7629, "step": 4644 }, { "epoch": 0.13, "grad_norm": 6.259830678045362, "learning_rate": 9.7243027116091e-06, "loss": 0.752, "step": 4645 }, { "epoch": 0.13, "grad_norm": 6.548494088709594, "learning_rate": 9.724150822916509e-06, "loss": 0.4928, "step": 4646 }, { "epoch": 0.13, "grad_norm": 13.077251664948506, "learning_rate": 9.72399889358263e-06, "loss": 0.6787, "step": 4647 }, { "epoch": 0.13, "grad_norm": 6.883014585059166, "learning_rate": 9.723846923608772e-06, "loss": 0.3865, "step": 4648 }, { "epoch": 0.13, "grad_norm": 8.628712256043848, "learning_rate": 9.72369491299624e-06, "loss": 0.6523, "step": 4649 }, { "epoch": 0.13, "grad_norm": 3.644144804358501, "learning_rate": 9.723542861746342e-06, "loss": 0.3798, "step": 4650 }, { "epoch": 0.13, "grad_norm": 8.896587819273735, "learning_rate": 9.723390769860385e-06, "loss": 0.5802, "step": 4651 }, { "epoch": 0.13, "grad_norm": 7.6555273843858105, "learning_rate": 9.72323863733968e-06, "loss": 0.7234, "step": 4652 }, { "epoch": 0.13, "grad_norm": 2.4506759932256306, "learning_rate": 9.723086464185536e-06, "loss": 0.1424, "step": 4653 }, { "epoch": 0.13, "grad_norm": 4.755773198093076, "learning_rate": 9.72293425039926e-06, "loss": 0.6568, "step": 4654 }, { "epoch": 0.13, "grad_norm": 2.8119527072348176, "learning_rate": 9.72278199598216e-06, "loss": 0.6078, "step": 4655 }, { "epoch": 0.13, "grad_norm": 10.916781000213213, "learning_rate": 9.72262970093555e-06, "loss": 0.5318, "step": 4656 }, { "epoch": 0.13, "grad_norm": 4.847240888212905, "learning_rate": 9.722477365260737e-06, "loss": 0.3763, "step": 4657 }, { "epoch": 0.13, "grad_norm": 6.55545363194315, "learning_rate": 9.722324988959032e-06, "loss": 0.7265, "step": 4658 }, { "epoch": 0.13, "grad_norm": 6.825092820874293, "learning_rate": 9.72217257203175e-06, "loss": 0.7397, "step": 4659 }, { "epoch": 0.13, "grad_norm": 8.547839680404886, "learning_rate": 9.722020114480195e-06, "loss": 0.7469, "step": 4660 }, { "epoch": 0.13, "grad_norm": 8.627066654894534, "learning_rate": 9.721867616305684e-06, "loss": 1.162, "step": 4661 }, { "epoch": 0.13, "grad_norm": 5.086683274304904, "learning_rate": 9.721715077509528e-06, "loss": 0.5924, "step": 4662 }, { "epoch": 0.13, "grad_norm": 4.402575320065806, "learning_rate": 9.721562498093038e-06, "loss": 0.3667, "step": 4663 }, { "epoch": 0.13, "grad_norm": 8.187624049520956, "learning_rate": 9.721409878057526e-06, "loss": 0.9269, "step": 4664 }, { "epoch": 0.13, "grad_norm": 6.441034883489672, "learning_rate": 9.721257217404308e-06, "loss": 0.4959, "step": 4665 }, { "epoch": 0.13, "grad_norm": 4.835760948397132, "learning_rate": 9.721104516134693e-06, "loss": 0.5424, "step": 4666 }, { "epoch": 0.13, "grad_norm": 8.295723183202549, "learning_rate": 9.72095177425e-06, "loss": 0.5991, "step": 4667 }, { "epoch": 0.13, "grad_norm": 5.96916391026012, "learning_rate": 9.720798991751538e-06, "loss": 0.8477, "step": 4668 }, { "epoch": 0.13, "grad_norm": 5.538235795023805, "learning_rate": 9.720646168640623e-06, "loss": 0.6882, "step": 4669 }, { "epoch": 0.13, "grad_norm": 7.538061773264158, "learning_rate": 9.720493304918573e-06, "loss": 0.6002, "step": 4670 }, { "epoch": 0.13, "grad_norm": 12.571384154278453, "learning_rate": 9.720340400586699e-06, "loss": 0.4723, "step": 4671 }, { "epoch": 0.13, "grad_norm": 7.321151017699916, "learning_rate": 9.720187455646317e-06, "loss": 0.6315, "step": 4672 }, { "epoch": 0.13, "grad_norm": 5.939447505186835, "learning_rate": 9.720034470098744e-06, "loss": 0.5216, "step": 4673 }, { "epoch": 0.13, "grad_norm": 3.7776977310083777, "learning_rate": 9.719881443945295e-06, "loss": 0.6487, "step": 4674 }, { "epoch": 0.13, "grad_norm": 5.820037917246315, "learning_rate": 9.719728377187288e-06, "loss": 0.4588, "step": 4675 }, { "epoch": 0.13, "grad_norm": 4.49736912963691, "learning_rate": 9.719575269826038e-06, "loss": 0.5112, "step": 4676 }, { "epoch": 0.13, "grad_norm": 7.160699385684241, "learning_rate": 9.719422121862863e-06, "loss": 0.8578, "step": 4677 }, { "epoch": 0.13, "grad_norm": 7.9357692303348975, "learning_rate": 9.719268933299082e-06, "loss": 0.8309, "step": 4678 }, { "epoch": 0.13, "grad_norm": 10.90138236563095, "learning_rate": 9.719115704136012e-06, "loss": 0.8374, "step": 4679 }, { "epoch": 0.13, "grad_norm": 3.542641957951202, "learning_rate": 9.718962434374967e-06, "loss": 0.3419, "step": 4680 }, { "epoch": 0.13, "grad_norm": 8.455679854678129, "learning_rate": 9.718809124017272e-06, "loss": 0.6919, "step": 4681 }, { "epoch": 0.13, "grad_norm": 10.473318623274055, "learning_rate": 9.718655773064243e-06, "loss": 0.9077, "step": 4682 }, { "epoch": 0.13, "grad_norm": 4.246712703565088, "learning_rate": 9.7185023815172e-06, "loss": 0.599, "step": 4683 }, { "epoch": 0.13, "grad_norm": 13.549047592868959, "learning_rate": 9.71834894937746e-06, "loss": 0.4698, "step": 4684 }, { "epoch": 0.13, "grad_norm": 7.290216090920298, "learning_rate": 9.718195476646345e-06, "loss": 0.3377, "step": 4685 }, { "epoch": 0.13, "grad_norm": 4.315160179657139, "learning_rate": 9.718041963325175e-06, "loss": 0.411, "step": 4686 }, { "epoch": 0.13, "grad_norm": 10.232197630380679, "learning_rate": 9.717888409415272e-06, "loss": 0.9837, "step": 4687 }, { "epoch": 0.13, "grad_norm": 3.214383765647607, "learning_rate": 9.717734814917955e-06, "loss": 0.2755, "step": 4688 }, { "epoch": 0.13, "grad_norm": 7.027284580594425, "learning_rate": 9.717581179834547e-06, "loss": 0.5289, "step": 4689 }, { "epoch": 0.13, "grad_norm": 8.28250550163678, "learning_rate": 9.71742750416637e-06, "loss": 0.7023, "step": 4690 }, { "epoch": 0.13, "grad_norm": 3.275921154787464, "learning_rate": 9.717273787914744e-06, "loss": 0.1739, "step": 4691 }, { "epoch": 0.13, "grad_norm": 2.2855883849968763, "learning_rate": 9.717120031080993e-06, "loss": 0.3034, "step": 4692 }, { "epoch": 0.13, "grad_norm": 8.997941099538366, "learning_rate": 9.716966233666438e-06, "loss": 0.5852, "step": 4693 }, { "epoch": 0.13, "grad_norm": 6.351624210522402, "learning_rate": 9.716812395672404e-06, "loss": 0.6367, "step": 4694 }, { "epoch": 0.13, "grad_norm": 4.298084296937525, "learning_rate": 9.716658517100212e-06, "loss": 0.5158, "step": 4695 }, { "epoch": 0.13, "grad_norm": 8.03899766842101, "learning_rate": 9.716504597951191e-06, "loss": 0.6808, "step": 4696 }, { "epoch": 0.13, "grad_norm": 7.027697127544976, "learning_rate": 9.71635063822666e-06, "loss": 0.2939, "step": 4697 }, { "epoch": 0.13, "grad_norm": 6.616953515754126, "learning_rate": 9.716196637927944e-06, "loss": 0.6911, "step": 4698 }, { "epoch": 0.13, "grad_norm": 4.784059135629691, "learning_rate": 9.71604259705637e-06, "loss": 0.4774, "step": 4699 }, { "epoch": 0.13, "grad_norm": 6.286376720826836, "learning_rate": 9.715888515613262e-06, "loss": 0.8225, "step": 4700 }, { "epoch": 0.13, "grad_norm": 4.0818747750549305, "learning_rate": 9.715734393599945e-06, "loss": 0.3647, "step": 4701 }, { "epoch": 0.13, "grad_norm": 5.0761920243113225, "learning_rate": 9.715580231017746e-06, "loss": 0.4817, "step": 4702 }, { "epoch": 0.13, "grad_norm": 6.738274987259318, "learning_rate": 9.715426027867992e-06, "loss": 0.7644, "step": 4703 }, { "epoch": 0.13, "grad_norm": 6.636168692315794, "learning_rate": 9.715271784152007e-06, "loss": 0.293, "step": 4704 }, { "epoch": 0.13, "grad_norm": 8.261807806017504, "learning_rate": 9.715117499871121e-06, "loss": 0.5151, "step": 4705 }, { "epoch": 0.13, "grad_norm": 5.356978967975334, "learning_rate": 9.714963175026658e-06, "loss": 0.3495, "step": 4706 }, { "epoch": 0.13, "grad_norm": 7.122036300780916, "learning_rate": 9.714808809619948e-06, "loss": 0.3192, "step": 4707 }, { "epoch": 0.13, "grad_norm": 6.369584083033885, "learning_rate": 9.714654403652319e-06, "loss": 0.4575, "step": 4708 }, { "epoch": 0.13, "grad_norm": 6.87027102733984, "learning_rate": 9.714499957125098e-06, "loss": 0.5223, "step": 4709 }, { "epoch": 0.13, "grad_norm": 7.0512378999132554, "learning_rate": 9.714345470039615e-06, "loss": 0.4842, "step": 4710 }, { "epoch": 0.13, "grad_norm": 5.788128529110842, "learning_rate": 9.714190942397197e-06, "loss": 0.3185, "step": 4711 }, { "epoch": 0.13, "grad_norm": 8.70574287202749, "learning_rate": 9.714036374199175e-06, "loss": 0.7861, "step": 4712 }, { "epoch": 0.13, "grad_norm": 3.3369257086376733, "learning_rate": 9.71388176544688e-06, "loss": 0.332, "step": 4713 }, { "epoch": 0.13, "grad_norm": 6.126657534161148, "learning_rate": 9.713727116141638e-06, "loss": 0.6834, "step": 4714 }, { "epoch": 0.14, "grad_norm": 9.934289857608121, "learning_rate": 9.713572426284784e-06, "loss": 0.7851, "step": 4715 }, { "epoch": 0.14, "grad_norm": 8.277819055913746, "learning_rate": 9.713417695877647e-06, "loss": 0.6116, "step": 4716 }, { "epoch": 0.14, "grad_norm": 4.646398324638647, "learning_rate": 9.713262924921555e-06, "loss": 0.3269, "step": 4717 }, { "epoch": 0.14, "grad_norm": 6.267230544017351, "learning_rate": 9.713108113417846e-06, "loss": 0.634, "step": 4718 }, { "epoch": 0.14, "grad_norm": 8.90590562656583, "learning_rate": 9.712953261367847e-06, "loss": 0.8981, "step": 4719 }, { "epoch": 0.14, "grad_norm": 4.42419873973559, "learning_rate": 9.712798368772891e-06, "loss": 0.7357, "step": 4720 }, { "epoch": 0.14, "grad_norm": 6.18751302390221, "learning_rate": 9.71264343563431e-06, "loss": 0.3848, "step": 4721 }, { "epoch": 0.14, "grad_norm": 5.453986769298904, "learning_rate": 9.712488461953439e-06, "loss": 0.3779, "step": 4722 }, { "epoch": 0.14, "grad_norm": 5.839829950730979, "learning_rate": 9.71233344773161e-06, "loss": 0.3935, "step": 4723 }, { "epoch": 0.14, "grad_norm": 4.653741762623757, "learning_rate": 9.712178392970158e-06, "loss": 1.0422, "step": 4724 }, { "epoch": 0.14, "grad_norm": 6.751967885004297, "learning_rate": 9.712023297670413e-06, "loss": 0.6346, "step": 4725 }, { "epoch": 0.14, "grad_norm": 4.901245431856547, "learning_rate": 9.711868161833712e-06, "loss": 0.4149, "step": 4726 }, { "epoch": 0.14, "grad_norm": 8.029915546623954, "learning_rate": 9.71171298546139e-06, "loss": 0.5974, "step": 4727 }, { "epoch": 0.14, "grad_norm": 17.98962095161338, "learning_rate": 9.71155776855478e-06, "loss": 0.3229, "step": 4728 }, { "epoch": 0.14, "grad_norm": 5.964460659153503, "learning_rate": 9.71140251111522e-06, "loss": 0.7192, "step": 4729 }, { "epoch": 0.14, "grad_norm": 5.835306095145721, "learning_rate": 9.711247213144045e-06, "loss": 0.6701, "step": 4730 }, { "epoch": 0.14, "grad_norm": 7.645009941508763, "learning_rate": 9.711091874642589e-06, "loss": 0.202, "step": 4731 }, { "epoch": 0.14, "grad_norm": 6.976451091154478, "learning_rate": 9.71093649561219e-06, "loss": 0.4778, "step": 4732 }, { "epoch": 0.14, "grad_norm": 6.13173538759831, "learning_rate": 9.710781076054184e-06, "loss": 0.7067, "step": 4733 }, { "epoch": 0.14, "grad_norm": 6.199256440152044, "learning_rate": 9.710625615969908e-06, "loss": 0.4972, "step": 4734 }, { "epoch": 0.14, "grad_norm": 13.620361098377508, "learning_rate": 9.710470115360701e-06, "loss": 1.2393, "step": 4735 }, { "epoch": 0.14, "grad_norm": 8.082665235563123, "learning_rate": 9.7103145742279e-06, "loss": 0.7652, "step": 4736 }, { "epoch": 0.14, "grad_norm": 18.380017678913337, "learning_rate": 9.710158992572843e-06, "loss": 0.4651, "step": 4737 }, { "epoch": 0.14, "grad_norm": 6.592535006416085, "learning_rate": 9.710003370396866e-06, "loss": 0.3385, "step": 4738 }, { "epoch": 0.14, "grad_norm": 3.0136049551218256, "learning_rate": 9.70984770770131e-06, "loss": 0.1143, "step": 4739 }, { "epoch": 0.14, "grad_norm": 4.062431628312123, "learning_rate": 9.709692004487516e-06, "loss": 0.4747, "step": 4740 }, { "epoch": 0.14, "grad_norm": 2.9456853655856583, "learning_rate": 9.70953626075682e-06, "loss": 0.3685, "step": 4741 }, { "epoch": 0.14, "grad_norm": 5.303929270515824, "learning_rate": 9.709380476510564e-06, "loss": 0.5704, "step": 4742 }, { "epoch": 0.14, "grad_norm": 6.501624747933824, "learning_rate": 9.709224651750087e-06, "loss": 0.787, "step": 4743 }, { "epoch": 0.14, "grad_norm": 5.823315965567974, "learning_rate": 9.70906878647673e-06, "loss": 0.5105, "step": 4744 }, { "epoch": 0.14, "grad_norm": 10.489967139658994, "learning_rate": 9.708912880691835e-06, "loss": 0.5302, "step": 4745 }, { "epoch": 0.14, "grad_norm": 8.972309801706325, "learning_rate": 9.708756934396741e-06, "loss": 0.449, "step": 4746 }, { "epoch": 0.14, "grad_norm": 7.010911101259028, "learning_rate": 9.708600947592792e-06, "loss": 0.522, "step": 4747 }, { "epoch": 0.14, "grad_norm": 2.9583385001840377, "learning_rate": 9.70844492028133e-06, "loss": 0.1827, "step": 4748 }, { "epoch": 0.14, "grad_norm": 5.295297753568745, "learning_rate": 9.708288852463694e-06, "loss": 0.3697, "step": 4749 }, { "epoch": 0.14, "grad_norm": 5.219201211211498, "learning_rate": 9.708132744141228e-06, "loss": 0.9625, "step": 4750 }, { "epoch": 0.14, "grad_norm": 10.42070712247365, "learning_rate": 9.707976595315277e-06, "loss": 0.6703, "step": 4751 }, { "epoch": 0.14, "grad_norm": 4.904525089157354, "learning_rate": 9.707820405987185e-06, "loss": 0.3845, "step": 4752 }, { "epoch": 0.14, "grad_norm": 2.7145437390437532, "learning_rate": 9.707664176158291e-06, "loss": 0.3016, "step": 4753 }, { "epoch": 0.14, "grad_norm": 4.971543156778456, "learning_rate": 9.707507905829942e-06, "loss": 0.5975, "step": 4754 }, { "epoch": 0.14, "grad_norm": 12.809376554542553, "learning_rate": 9.707351595003483e-06, "loss": 0.5962, "step": 4755 }, { "epoch": 0.14, "grad_norm": 8.211742096741544, "learning_rate": 9.707195243680256e-06, "loss": 0.4074, "step": 4756 }, { "epoch": 0.14, "grad_norm": 6.685890360552594, "learning_rate": 9.70703885186161e-06, "loss": 0.5961, "step": 4757 }, { "epoch": 0.14, "grad_norm": 10.900200857709152, "learning_rate": 9.706882419548886e-06, "loss": 0.7072, "step": 4758 }, { "epoch": 0.14, "grad_norm": 4.844634830503758, "learning_rate": 9.706725946743435e-06, "loss": 0.7241, "step": 4759 }, { "epoch": 0.14, "grad_norm": 5.30751935066796, "learning_rate": 9.706569433446598e-06, "loss": 0.4824, "step": 4760 }, { "epoch": 0.14, "grad_norm": 7.757836102683129, "learning_rate": 9.706412879659725e-06, "loss": 0.7672, "step": 4761 }, { "epoch": 0.14, "grad_norm": 7.427756201897328, "learning_rate": 9.706256285384162e-06, "loss": 0.4513, "step": 4762 }, { "epoch": 0.14, "grad_norm": 8.601419695397219, "learning_rate": 9.706099650621252e-06, "loss": 0.6725, "step": 4763 }, { "epoch": 0.14, "grad_norm": 8.933164118355059, "learning_rate": 9.705942975372348e-06, "loss": 0.4096, "step": 4764 }, { "epoch": 0.14, "grad_norm": 6.670775794218508, "learning_rate": 9.705786259638798e-06, "loss": 0.4584, "step": 4765 }, { "epoch": 0.14, "grad_norm": 7.805942804844895, "learning_rate": 9.705629503421947e-06, "loss": 0.6228, "step": 4766 }, { "epoch": 0.14, "grad_norm": 3.7339928563166622, "learning_rate": 9.705472706723145e-06, "loss": 0.3052, "step": 4767 }, { "epoch": 0.14, "grad_norm": 5.567244403044032, "learning_rate": 9.70531586954374e-06, "loss": 0.5432, "step": 4768 }, { "epoch": 0.14, "grad_norm": 7.475214502835575, "learning_rate": 9.705158991885082e-06, "loss": 0.4964, "step": 4769 }, { "epoch": 0.14, "grad_norm": 4.887296305555757, "learning_rate": 9.705002073748521e-06, "loss": 0.5765, "step": 4770 }, { "epoch": 0.14, "grad_norm": 3.044609675328475, "learning_rate": 9.704845115135407e-06, "loss": 0.2254, "step": 4771 }, { "epoch": 0.14, "grad_norm": 4.325294670915455, "learning_rate": 9.704688116047088e-06, "loss": 0.59, "step": 4772 }, { "epoch": 0.14, "grad_norm": 10.972570552742512, "learning_rate": 9.704531076484917e-06, "loss": 0.6495, "step": 4773 }, { "epoch": 0.14, "grad_norm": 4.380927266901408, "learning_rate": 9.704373996450244e-06, "loss": 0.2823, "step": 4774 }, { "epoch": 0.14, "grad_norm": 14.466564433590667, "learning_rate": 9.704216875944421e-06, "loss": 0.8337, "step": 4775 }, { "epoch": 0.14, "grad_norm": 4.293131664680243, "learning_rate": 9.704059714968802e-06, "loss": 0.2898, "step": 4776 }, { "epoch": 0.14, "grad_norm": 8.28438085716268, "learning_rate": 9.703902513524731e-06, "loss": 0.5732, "step": 4777 }, { "epoch": 0.14, "grad_norm": 5.809320626253197, "learning_rate": 9.70374527161357e-06, "loss": 0.9079, "step": 4778 }, { "epoch": 0.14, "grad_norm": 10.16169264285527, "learning_rate": 9.703587989236666e-06, "loss": 0.763, "step": 4779 }, { "epoch": 0.14, "grad_norm": 8.33486797189294, "learning_rate": 9.703430666395375e-06, "loss": 0.4137, "step": 4780 }, { "epoch": 0.14, "grad_norm": 2.430234608274484, "learning_rate": 9.703273303091049e-06, "loss": 0.4067, "step": 4781 }, { "epoch": 0.14, "grad_norm": 3.545895033582326, "learning_rate": 9.703115899325041e-06, "loss": 0.2041, "step": 4782 }, { "epoch": 0.14, "grad_norm": 5.090175146525741, "learning_rate": 9.702958455098705e-06, "loss": 0.5949, "step": 4783 }, { "epoch": 0.14, "grad_norm": 4.272400334391613, "learning_rate": 9.702800970413399e-06, "loss": 0.4579, "step": 4784 }, { "epoch": 0.14, "grad_norm": 6.300303734920421, "learning_rate": 9.702643445270474e-06, "loss": 0.7664, "step": 4785 }, { "epoch": 0.14, "grad_norm": 8.760933611688488, "learning_rate": 9.702485879671286e-06, "loss": 0.5988, "step": 4786 }, { "epoch": 0.14, "grad_norm": 7.602181890644527, "learning_rate": 9.70232827361719e-06, "loss": 0.4637, "step": 4787 }, { "epoch": 0.14, "grad_norm": 4.521273844948458, "learning_rate": 9.702170627109545e-06, "loss": 0.3816, "step": 4788 }, { "epoch": 0.14, "grad_norm": 5.240822172237446, "learning_rate": 9.702012940149704e-06, "loss": 0.3341, "step": 4789 }, { "epoch": 0.14, "grad_norm": 6.6259723885395845, "learning_rate": 9.701855212739026e-06, "loss": 0.8069, "step": 4790 }, { "epoch": 0.14, "grad_norm": 12.177153279038626, "learning_rate": 9.701697444878864e-06, "loss": 0.5634, "step": 4791 }, { "epoch": 0.14, "grad_norm": 6.468671678446729, "learning_rate": 9.70153963657058e-06, "loss": 0.3264, "step": 4792 }, { "epoch": 0.14, "grad_norm": 3.9563919258875284, "learning_rate": 9.701381787815528e-06, "loss": 0.2484, "step": 4793 }, { "epoch": 0.14, "grad_norm": 5.079230367226127, "learning_rate": 9.701223898615068e-06, "loss": 0.6345, "step": 4794 }, { "epoch": 0.14, "grad_norm": 5.04882484303115, "learning_rate": 9.701065968970557e-06, "loss": 0.4846, "step": 4795 }, { "epoch": 0.14, "grad_norm": 8.011069507229244, "learning_rate": 9.700907998883356e-06, "loss": 0.4802, "step": 4796 }, { "epoch": 0.14, "grad_norm": 6.388160510168348, "learning_rate": 9.70074998835482e-06, "loss": 0.5344, "step": 4797 }, { "epoch": 0.14, "grad_norm": 7.999514684261529, "learning_rate": 9.700591937386311e-06, "loss": 0.2231, "step": 4798 }, { "epoch": 0.14, "grad_norm": 4.844748166510362, "learning_rate": 9.700433845979189e-06, "loss": 0.5799, "step": 4799 }, { "epoch": 0.14, "grad_norm": 8.317175589937595, "learning_rate": 9.700275714134814e-06, "loss": 0.8872, "step": 4800 }, { "epoch": 0.14, "grad_norm": 6.040303585147155, "learning_rate": 9.700117541854544e-06, "loss": 0.3822, "step": 4801 }, { "epoch": 0.14, "grad_norm": 23.433098809582166, "learning_rate": 9.699959329139744e-06, "loss": 0.3889, "step": 4802 }, { "epoch": 0.14, "grad_norm": 6.113787879303941, "learning_rate": 9.69980107599177e-06, "loss": 0.3328, "step": 4803 }, { "epoch": 0.14, "grad_norm": 6.791597233360801, "learning_rate": 9.699642782411986e-06, "loss": 0.6988, "step": 4804 }, { "epoch": 0.14, "grad_norm": 5.088415523697832, "learning_rate": 9.699484448401757e-06, "loss": 0.4945, "step": 4805 }, { "epoch": 0.14, "grad_norm": 6.136286554590953, "learning_rate": 9.699326073962439e-06, "loss": 0.5742, "step": 4806 }, { "epoch": 0.14, "grad_norm": 4.70984261158678, "learning_rate": 9.699167659095398e-06, "loss": 0.3593, "step": 4807 }, { "epoch": 0.14, "grad_norm": 5.152733511133377, "learning_rate": 9.699009203801996e-06, "loss": 0.3601, "step": 4808 }, { "epoch": 0.14, "grad_norm": 8.668556912010647, "learning_rate": 9.698850708083595e-06, "loss": 0.4916, "step": 4809 }, { "epoch": 0.14, "grad_norm": 2.5020933208747973, "learning_rate": 9.698692171941563e-06, "loss": 0.3608, "step": 4810 }, { "epoch": 0.14, "grad_norm": 6.798714364756752, "learning_rate": 9.69853359537726e-06, "loss": 0.4446, "step": 4811 }, { "epoch": 0.14, "grad_norm": 6.466714068694582, "learning_rate": 9.69837497839205e-06, "loss": 0.5029, "step": 4812 }, { "epoch": 0.14, "grad_norm": 4.302275076865829, "learning_rate": 9.6982163209873e-06, "loss": 0.4413, "step": 4813 }, { "epoch": 0.14, "grad_norm": 5.629051487231622, "learning_rate": 9.69805762316437e-06, "loss": 0.2764, "step": 4814 }, { "epoch": 0.14, "grad_norm": 10.633595479127495, "learning_rate": 9.697898884924633e-06, "loss": 0.6909, "step": 4815 }, { "epoch": 0.14, "grad_norm": 4.357331702899356, "learning_rate": 9.697740106269446e-06, "loss": 0.4898, "step": 4816 }, { "epoch": 0.14, "grad_norm": 7.274252857107396, "learning_rate": 9.697581287200183e-06, "loss": 0.5153, "step": 4817 }, { "epoch": 0.14, "grad_norm": 5.414904156062981, "learning_rate": 9.697422427718204e-06, "loss": 0.2266, "step": 4818 }, { "epoch": 0.14, "grad_norm": 8.61615874996592, "learning_rate": 9.697263527824881e-06, "loss": 0.5722, "step": 4819 }, { "epoch": 0.14, "grad_norm": 9.826277663848645, "learning_rate": 9.697104587521575e-06, "loss": 1.3373, "step": 4820 }, { "epoch": 0.14, "grad_norm": 9.015715653147193, "learning_rate": 9.696945606809658e-06, "loss": 0.3265, "step": 4821 }, { "epoch": 0.14, "grad_norm": 6.986269017738152, "learning_rate": 9.696786585690498e-06, "loss": 0.7509, "step": 4822 }, { "epoch": 0.14, "grad_norm": 5.553957991842627, "learning_rate": 9.69662752416546e-06, "loss": 0.4281, "step": 4823 }, { "epoch": 0.14, "grad_norm": 7.762700840905692, "learning_rate": 9.696468422235912e-06, "loss": 0.4935, "step": 4824 }, { "epoch": 0.14, "grad_norm": 8.547202736883964, "learning_rate": 9.696309279903227e-06, "loss": 0.4431, "step": 4825 }, { "epoch": 0.14, "grad_norm": 4.912104230085526, "learning_rate": 9.696150097168769e-06, "loss": 0.6079, "step": 4826 }, { "epoch": 0.14, "grad_norm": 6.090576284418278, "learning_rate": 9.69599087403391e-06, "loss": 0.4672, "step": 4827 }, { "epoch": 0.14, "grad_norm": 4.662539541777388, "learning_rate": 9.695831610500023e-06, "loss": 0.4406, "step": 4828 }, { "epoch": 0.14, "grad_norm": 5.501725229696797, "learning_rate": 9.695672306568474e-06, "loss": 0.622, "step": 4829 }, { "epoch": 0.14, "grad_norm": 6.436126710648452, "learning_rate": 9.695512962240633e-06, "loss": 0.5755, "step": 4830 }, { "epoch": 0.14, "grad_norm": 7.178526542702185, "learning_rate": 9.69535357751787e-06, "loss": 0.6037, "step": 4831 }, { "epoch": 0.14, "grad_norm": 5.194674161544957, "learning_rate": 9.695194152401561e-06, "loss": 0.1928, "step": 4832 }, { "epoch": 0.14, "grad_norm": 6.977763278740939, "learning_rate": 9.695034686893075e-06, "loss": 0.8949, "step": 4833 }, { "epoch": 0.14, "grad_norm": 10.437792379886899, "learning_rate": 9.694875180993781e-06, "loss": 0.7021, "step": 4834 }, { "epoch": 0.14, "grad_norm": 2.626060249013699, "learning_rate": 9.694715634705057e-06, "loss": 0.2724, "step": 4835 }, { "epoch": 0.14, "grad_norm": 10.777425601837825, "learning_rate": 9.69455604802827e-06, "loss": 0.6682, "step": 4836 }, { "epoch": 0.14, "grad_norm": 11.931220195367354, "learning_rate": 9.694396420964798e-06, "loss": 0.8213, "step": 4837 }, { "epoch": 0.14, "grad_norm": 6.109183696278537, "learning_rate": 9.694236753516008e-06, "loss": 0.3272, "step": 4838 }, { "epoch": 0.14, "grad_norm": 6.3241818799834375, "learning_rate": 9.694077045683282e-06, "loss": 0.7999, "step": 4839 }, { "epoch": 0.14, "grad_norm": 3.3051191187873665, "learning_rate": 9.693917297467985e-06, "loss": 0.4371, "step": 4840 }, { "epoch": 0.14, "grad_norm": 6.233783626951428, "learning_rate": 9.693757508871497e-06, "loss": 0.5653, "step": 4841 }, { "epoch": 0.14, "grad_norm": 10.541023496123382, "learning_rate": 9.69359767989519e-06, "loss": 1.0014, "step": 4842 }, { "epoch": 0.14, "grad_norm": 6.232948120814412, "learning_rate": 9.693437810540441e-06, "loss": 0.2938, "step": 4843 }, { "epoch": 0.14, "grad_norm": 4.360788533185776, "learning_rate": 9.693277900808624e-06, "loss": 0.678, "step": 4844 }, { "epoch": 0.14, "grad_norm": 4.456542835003627, "learning_rate": 9.693117950701117e-06, "loss": 0.4577, "step": 4845 }, { "epoch": 0.14, "grad_norm": 5.71951722381877, "learning_rate": 9.692957960219291e-06, "loss": 0.4147, "step": 4846 }, { "epoch": 0.14, "grad_norm": 6.065621113370567, "learning_rate": 9.692797929364528e-06, "loss": 0.4728, "step": 4847 }, { "epoch": 0.14, "grad_norm": 6.929019177538429, "learning_rate": 9.6926378581382e-06, "loss": 0.5668, "step": 4848 }, { "epoch": 0.14, "grad_norm": 7.298305591786932, "learning_rate": 9.692477746541688e-06, "loss": 0.7321, "step": 4849 }, { "epoch": 0.14, "grad_norm": 7.668525850460259, "learning_rate": 9.692317594576367e-06, "loss": 0.3591, "step": 4850 }, { "epoch": 0.14, "grad_norm": 3.1443752084245578, "learning_rate": 9.692157402243616e-06, "loss": 0.5445, "step": 4851 }, { "epoch": 0.14, "grad_norm": 3.8336737143007276, "learning_rate": 9.691997169544812e-06, "loss": 0.135, "step": 4852 }, { "epoch": 0.14, "grad_norm": 3.359068319826854, "learning_rate": 9.691836896481334e-06, "loss": 0.3794, "step": 4853 }, { "epoch": 0.14, "grad_norm": 6.717737733746341, "learning_rate": 9.691676583054562e-06, "loss": 0.3919, "step": 4854 }, { "epoch": 0.14, "grad_norm": 5.5387664414499485, "learning_rate": 9.691516229265872e-06, "loss": 0.9114, "step": 4855 }, { "epoch": 0.14, "grad_norm": 6.874229804159199, "learning_rate": 9.691355835116648e-06, "loss": 0.3815, "step": 4856 }, { "epoch": 0.14, "grad_norm": 4.226612704220373, "learning_rate": 9.691195400608264e-06, "loss": 0.244, "step": 4857 }, { "epoch": 0.14, "grad_norm": 5.226473546661052, "learning_rate": 9.691034925742105e-06, "loss": 0.8887, "step": 4858 }, { "epoch": 0.14, "grad_norm": 4.620100828862262, "learning_rate": 9.69087441051955e-06, "loss": 0.2774, "step": 4859 }, { "epoch": 0.14, "grad_norm": 4.048422145777582, "learning_rate": 9.690713854941983e-06, "loss": 0.3753, "step": 4860 }, { "epoch": 0.14, "grad_norm": 4.961154626615362, "learning_rate": 9.69055325901078e-06, "loss": 0.6023, "step": 4861 }, { "epoch": 0.14, "grad_norm": 6.8131632350782745, "learning_rate": 9.690392622727323e-06, "loss": 0.4279, "step": 4862 }, { "epoch": 0.14, "grad_norm": 5.55882055767513, "learning_rate": 9.690231946092999e-06, "loss": 0.2872, "step": 4863 }, { "epoch": 0.14, "grad_norm": 7.267511496152601, "learning_rate": 9.690071229109183e-06, "loss": 0.5523, "step": 4864 }, { "epoch": 0.14, "grad_norm": 3.3888894310848308, "learning_rate": 9.689910471777266e-06, "loss": 0.4448, "step": 4865 }, { "epoch": 0.14, "grad_norm": 7.888038861084206, "learning_rate": 9.689749674098624e-06, "loss": 0.4449, "step": 4866 }, { "epoch": 0.14, "grad_norm": 4.221185475704336, "learning_rate": 9.689588836074643e-06, "loss": 0.3467, "step": 4867 }, { "epoch": 0.14, "grad_norm": 7.4462129222541655, "learning_rate": 9.689427957706709e-06, "loss": 0.3662, "step": 4868 }, { "epoch": 0.14, "grad_norm": 4.992685785633314, "learning_rate": 9.689267038996201e-06, "loss": 0.2739, "step": 4869 }, { "epoch": 0.14, "grad_norm": 4.528024512019045, "learning_rate": 9.689106079944508e-06, "loss": 0.4519, "step": 4870 }, { "epoch": 0.14, "grad_norm": 8.489386271118418, "learning_rate": 9.688945080553012e-06, "loss": 0.4706, "step": 4871 }, { "epoch": 0.14, "grad_norm": 5.016263736921827, "learning_rate": 9.688784040823098e-06, "loss": 0.365, "step": 4872 }, { "epoch": 0.14, "grad_norm": 4.44703485412012, "learning_rate": 9.688622960756154e-06, "loss": 0.5057, "step": 4873 }, { "epoch": 0.14, "grad_norm": 8.179018126227334, "learning_rate": 9.688461840353563e-06, "loss": 0.6349, "step": 4874 }, { "epoch": 0.14, "grad_norm": 3.442074765970998, "learning_rate": 9.68830067961671e-06, "loss": 0.6318, "step": 4875 }, { "epoch": 0.14, "grad_norm": 5.718381359121405, "learning_rate": 9.688139478546986e-06, "loss": 0.512, "step": 4876 }, { "epoch": 0.14, "grad_norm": 5.675478208450128, "learning_rate": 9.687978237145773e-06, "loss": 0.4694, "step": 4877 }, { "epoch": 0.14, "grad_norm": 4.134337549479689, "learning_rate": 9.687816955414465e-06, "loss": 0.4001, "step": 4878 }, { "epoch": 0.14, "grad_norm": 4.217094605232951, "learning_rate": 9.68765563335444e-06, "loss": 0.4999, "step": 4879 }, { "epoch": 0.14, "grad_norm": 13.801994367588666, "learning_rate": 9.687494270967092e-06, "loss": 0.8418, "step": 4880 }, { "epoch": 0.14, "grad_norm": 9.747422488956895, "learning_rate": 9.68733286825381e-06, "loss": 0.6916, "step": 4881 }, { "epoch": 0.14, "grad_norm": 3.5878656061470346, "learning_rate": 9.68717142521598e-06, "loss": 0.3247, "step": 4882 }, { "epoch": 0.14, "grad_norm": 8.232973851787328, "learning_rate": 9.68700994185499e-06, "loss": 0.978, "step": 4883 }, { "epoch": 0.14, "grad_norm": 2.2779780399345806, "learning_rate": 9.686848418172229e-06, "loss": 0.3266, "step": 4884 }, { "epoch": 0.14, "grad_norm": 8.12563256955768, "learning_rate": 9.686686854169091e-06, "loss": 0.8183, "step": 4885 }, { "epoch": 0.14, "grad_norm": 7.503854841764138, "learning_rate": 9.686525249846962e-06, "loss": 0.6343, "step": 4886 }, { "epoch": 0.14, "grad_norm": 5.42365268729872, "learning_rate": 9.686363605207234e-06, "loss": 0.2375, "step": 4887 }, { "epoch": 0.14, "grad_norm": 5.018334675821111, "learning_rate": 9.686201920251295e-06, "loss": 0.3279, "step": 4888 }, { "epoch": 0.14, "grad_norm": 4.520709148875315, "learning_rate": 9.68604019498054e-06, "loss": 0.7371, "step": 4889 }, { "epoch": 0.14, "grad_norm": 3.8793869412193684, "learning_rate": 9.685878429396358e-06, "loss": 0.373, "step": 4890 }, { "epoch": 0.14, "grad_norm": 3.7092597782844035, "learning_rate": 9.68571662350014e-06, "loss": 0.8245, "step": 4891 }, { "epoch": 0.14, "grad_norm": 9.589806557017997, "learning_rate": 9.685554777293277e-06, "loss": 0.8777, "step": 4892 }, { "epoch": 0.14, "grad_norm": 4.164561916268895, "learning_rate": 9.685392890777166e-06, "loss": 0.3815, "step": 4893 }, { "epoch": 0.14, "grad_norm": 6.9806143858094, "learning_rate": 9.685230963953196e-06, "loss": 0.3499, "step": 4894 }, { "epoch": 0.14, "grad_norm": 8.253325456801411, "learning_rate": 9.68506899682276e-06, "loss": 0.9652, "step": 4895 }, { "epoch": 0.14, "grad_norm": 8.276701458957575, "learning_rate": 9.684906989387253e-06, "loss": 0.8447, "step": 4896 }, { "epoch": 0.14, "grad_norm": 2.69415941866525, "learning_rate": 9.68474494164807e-06, "loss": 0.4278, "step": 4897 }, { "epoch": 0.14, "grad_norm": 4.77213230800831, "learning_rate": 9.6845828536066e-06, "loss": 0.4544, "step": 4898 }, { "epoch": 0.14, "grad_norm": 9.25630704976801, "learning_rate": 9.684420725264243e-06, "loss": 0.8035, "step": 4899 }, { "epoch": 0.14, "grad_norm": 6.516100237230528, "learning_rate": 9.684258556622389e-06, "loss": 0.7791, "step": 4900 }, { "epoch": 0.14, "grad_norm": 4.891725093464463, "learning_rate": 9.684096347682437e-06, "loss": 0.8189, "step": 4901 }, { "epoch": 0.14, "grad_norm": 4.681322095695233, "learning_rate": 9.68393409844578e-06, "loss": 0.7872, "step": 4902 }, { "epoch": 0.14, "grad_norm": 10.493101260572884, "learning_rate": 9.683771808913813e-06, "loss": 0.5474, "step": 4903 }, { "epoch": 0.14, "grad_norm": 6.837556169966095, "learning_rate": 9.683609479087937e-06, "loss": 0.1866, "step": 4904 }, { "epoch": 0.14, "grad_norm": 10.843889658795357, "learning_rate": 9.683447108969543e-06, "loss": 0.496, "step": 4905 }, { "epoch": 0.14, "grad_norm": 5.16760487908178, "learning_rate": 9.683284698560032e-06, "loss": 0.6777, "step": 4906 }, { "epoch": 0.14, "grad_norm": 5.398021058113941, "learning_rate": 9.683122247860798e-06, "loss": 0.2297, "step": 4907 }, { "epoch": 0.14, "grad_norm": 6.371749217394759, "learning_rate": 9.68295975687324e-06, "loss": 0.4058, "step": 4908 }, { "epoch": 0.14, "grad_norm": 8.878826659584709, "learning_rate": 9.682797225598757e-06, "loss": 0.8989, "step": 4909 }, { "epoch": 0.14, "grad_norm": 5.4829321373415585, "learning_rate": 9.682634654038744e-06, "loss": 0.5283, "step": 4910 }, { "epoch": 0.14, "grad_norm": 5.048996234507827, "learning_rate": 9.682472042194603e-06, "loss": 0.4363, "step": 4911 }, { "epoch": 0.14, "grad_norm": 5.195149538166103, "learning_rate": 9.68230939006773e-06, "loss": 0.5391, "step": 4912 }, { "epoch": 0.14, "grad_norm": 3.6236387196113693, "learning_rate": 9.682146697659528e-06, "loss": 0.2543, "step": 4913 }, { "epoch": 0.14, "grad_norm": 8.639103953910084, "learning_rate": 9.681983964971394e-06, "loss": 0.3594, "step": 4914 }, { "epoch": 0.14, "grad_norm": 4.097111028394483, "learning_rate": 9.681821192004727e-06, "loss": 0.6414, "step": 4915 }, { "epoch": 0.14, "grad_norm": 5.673161888729676, "learning_rate": 9.681658378760929e-06, "loss": 0.4706, "step": 4916 }, { "epoch": 0.14, "grad_norm": 5.254439429701131, "learning_rate": 9.681495525241402e-06, "loss": 0.3956, "step": 4917 }, { "epoch": 0.14, "grad_norm": 5.618429542712823, "learning_rate": 9.681332631447544e-06, "loss": 0.434, "step": 4918 }, { "epoch": 0.14, "grad_norm": 3.536335144743909, "learning_rate": 9.681169697380758e-06, "loss": 0.121, "step": 4919 }, { "epoch": 0.14, "grad_norm": 3.7831405138182603, "learning_rate": 9.681006723042446e-06, "loss": 0.2373, "step": 4920 }, { "epoch": 0.14, "grad_norm": 8.549055696508354, "learning_rate": 9.68084370843401e-06, "loss": 0.8415, "step": 4921 }, { "epoch": 0.14, "grad_norm": 7.220838368526522, "learning_rate": 9.680680653556852e-06, "loss": 0.966, "step": 4922 }, { "epoch": 0.14, "grad_norm": 4.440624158135696, "learning_rate": 9.680517558412372e-06, "loss": 0.4576, "step": 4923 }, { "epoch": 0.14, "grad_norm": 7.268313724614126, "learning_rate": 9.680354423001978e-06, "loss": 0.6356, "step": 4924 }, { "epoch": 0.14, "grad_norm": 9.564591372306591, "learning_rate": 9.680191247327071e-06, "loss": 0.4831, "step": 4925 }, { "epoch": 0.14, "grad_norm": 3.749162644039816, "learning_rate": 9.680028031389054e-06, "loss": 0.3083, "step": 4926 }, { "epoch": 0.14, "grad_norm": 6.239745004774584, "learning_rate": 9.679864775189332e-06, "loss": 0.5516, "step": 4927 }, { "epoch": 0.14, "grad_norm": 4.611498068974444, "learning_rate": 9.679701478729311e-06, "loss": 0.331, "step": 4928 }, { "epoch": 0.14, "grad_norm": 9.77765509017883, "learning_rate": 9.679538142010395e-06, "loss": 0.6416, "step": 4929 }, { "epoch": 0.14, "grad_norm": 6.067062391572417, "learning_rate": 9.679374765033985e-06, "loss": 0.5545, "step": 4930 }, { "epoch": 0.14, "grad_norm": 7.866522206849694, "learning_rate": 9.679211347801492e-06, "loss": 0.6946, "step": 4931 }, { "epoch": 0.14, "grad_norm": 7.834365245811498, "learning_rate": 9.67904789031432e-06, "loss": 0.8198, "step": 4932 }, { "epoch": 0.14, "grad_norm": 4.468381826532936, "learning_rate": 9.678884392573875e-06, "loss": 0.2442, "step": 4933 }, { "epoch": 0.14, "grad_norm": 8.251378254927378, "learning_rate": 9.678720854581565e-06, "loss": 0.1504, "step": 4934 }, { "epoch": 0.14, "grad_norm": 7.832402201022776, "learning_rate": 9.678557276338793e-06, "loss": 0.4277, "step": 4935 }, { "epoch": 0.14, "grad_norm": 6.982712310939509, "learning_rate": 9.678393657846969e-06, "loss": 0.6496, "step": 4936 }, { "epoch": 0.14, "grad_norm": 4.65814597936482, "learning_rate": 9.678229999107502e-06, "loss": 0.5173, "step": 4937 }, { "epoch": 0.14, "grad_norm": 6.561586370902986, "learning_rate": 9.678066300121798e-06, "loss": 0.6494, "step": 4938 }, { "epoch": 0.14, "grad_norm": 6.901545065507379, "learning_rate": 9.677902560891265e-06, "loss": 0.5002, "step": 4939 }, { "epoch": 0.14, "grad_norm": 7.60026139261565, "learning_rate": 9.677738781417314e-06, "loss": 0.4948, "step": 4940 }, { "epoch": 0.14, "grad_norm": 5.736189843479788, "learning_rate": 9.67757496170135e-06, "loss": 0.6795, "step": 4941 }, { "epoch": 0.14, "grad_norm": 6.228469460891363, "learning_rate": 9.677411101744784e-06, "loss": 0.6629, "step": 4942 }, { "epoch": 0.14, "grad_norm": 6.8123636494561595, "learning_rate": 9.67724720154903e-06, "loss": 0.6148, "step": 4943 }, { "epoch": 0.14, "grad_norm": 3.84919929352371, "learning_rate": 9.67708326111549e-06, "loss": 0.3468, "step": 4944 }, { "epoch": 0.14, "grad_norm": 6.398505949928144, "learning_rate": 9.676919280445581e-06, "loss": 0.5925, "step": 4945 }, { "epoch": 0.14, "grad_norm": 4.538413626798415, "learning_rate": 9.67675525954071e-06, "loss": 0.5662, "step": 4946 }, { "epoch": 0.14, "grad_norm": 12.77184652983813, "learning_rate": 9.67659119840229e-06, "loss": 0.5658, "step": 4947 }, { "epoch": 0.14, "grad_norm": 12.549612238391445, "learning_rate": 9.67642709703173e-06, "loss": 0.6368, "step": 4948 }, { "epoch": 0.14, "grad_norm": 8.732881339827875, "learning_rate": 9.676262955430447e-06, "loss": 0.7095, "step": 4949 }, { "epoch": 0.14, "grad_norm": 5.476225969148218, "learning_rate": 9.676098773599848e-06, "loss": 0.539, "step": 4950 }, { "epoch": 0.14, "grad_norm": 11.811439749148022, "learning_rate": 9.675934551541346e-06, "loss": 0.4905, "step": 4951 }, { "epoch": 0.14, "grad_norm": 4.949521507872339, "learning_rate": 9.675770289256354e-06, "loss": 0.7421, "step": 4952 }, { "epoch": 0.14, "grad_norm": 7.295923338428762, "learning_rate": 9.675605986746288e-06, "loss": 0.5646, "step": 4953 }, { "epoch": 0.14, "grad_norm": 7.17368076172069, "learning_rate": 9.67544164401256e-06, "loss": 0.7597, "step": 4954 }, { "epoch": 0.14, "grad_norm": 10.414865862000429, "learning_rate": 9.67527726105658e-06, "loss": 0.5735, "step": 4955 }, { "epoch": 0.14, "grad_norm": 6.862612227614852, "learning_rate": 9.675112837879768e-06, "loss": 0.7697, "step": 4956 }, { "epoch": 0.14, "grad_norm": 8.093554107769833, "learning_rate": 9.674948374483535e-06, "loss": 0.6351, "step": 4957 }, { "epoch": 0.14, "grad_norm": 7.86689110721313, "learning_rate": 9.674783870869296e-06, "loss": 0.4021, "step": 4958 }, { "epoch": 0.14, "grad_norm": 8.098096174038805, "learning_rate": 9.674619327038468e-06, "loss": 0.6767, "step": 4959 }, { "epoch": 0.14, "grad_norm": 10.332593358114428, "learning_rate": 9.674454742992466e-06, "loss": 0.686, "step": 4960 }, { "epoch": 0.14, "grad_norm": 5.644397266738118, "learning_rate": 9.674290118732704e-06, "loss": 0.3691, "step": 4961 }, { "epoch": 0.14, "grad_norm": 5.436080615820597, "learning_rate": 9.6741254542606e-06, "loss": 0.5669, "step": 4962 }, { "epoch": 0.14, "grad_norm": 5.296029667650202, "learning_rate": 9.67396074957757e-06, "loss": 0.2608, "step": 4963 }, { "epoch": 0.14, "grad_norm": 8.160810401448268, "learning_rate": 9.673796004685032e-06, "loss": 0.5418, "step": 4964 }, { "epoch": 0.14, "grad_norm": 6.769950558750938, "learning_rate": 9.673631219584401e-06, "loss": 0.5323, "step": 4965 }, { "epoch": 0.14, "grad_norm": 6.1870065357450565, "learning_rate": 9.673466394277098e-06, "loss": 0.7225, "step": 4966 }, { "epoch": 0.14, "grad_norm": 6.710226001259278, "learning_rate": 9.673301528764538e-06, "loss": 0.2588, "step": 4967 }, { "epoch": 0.14, "grad_norm": 10.07053447801315, "learning_rate": 9.673136623048141e-06, "loss": 0.6801, "step": 4968 }, { "epoch": 0.14, "grad_norm": 5.697922682773499, "learning_rate": 9.672971677129323e-06, "loss": 0.3865, "step": 4969 }, { "epoch": 0.14, "grad_norm": 3.93709253291848, "learning_rate": 9.672806691009507e-06, "loss": 0.3869, "step": 4970 }, { "epoch": 0.14, "grad_norm": 7.427235356113015, "learning_rate": 9.67264166469011e-06, "loss": 0.6936, "step": 4971 }, { "epoch": 0.14, "grad_norm": 9.16064316486401, "learning_rate": 9.672476598172552e-06, "loss": 0.4697, "step": 4972 }, { "epoch": 0.14, "grad_norm": 10.390691541874572, "learning_rate": 9.672311491458252e-06, "loss": 0.5069, "step": 4973 }, { "epoch": 0.14, "grad_norm": 5.5222570730773075, "learning_rate": 9.672146344548631e-06, "loss": 0.4841, "step": 4974 }, { "epoch": 0.14, "grad_norm": 11.342803495238371, "learning_rate": 9.671981157445113e-06, "loss": 1.1848, "step": 4975 }, { "epoch": 0.14, "grad_norm": 3.3466353668136675, "learning_rate": 9.671815930149116e-06, "loss": 0.1062, "step": 4976 }, { "epoch": 0.14, "grad_norm": 8.734335802021, "learning_rate": 9.67165066266206e-06, "loss": 1.0737, "step": 4977 }, { "epoch": 0.14, "grad_norm": 11.170546147203664, "learning_rate": 9.67148535498537e-06, "loss": 0.5993, "step": 4978 }, { "epoch": 0.14, "grad_norm": 5.779041507670237, "learning_rate": 9.671320007120465e-06, "loss": 0.5339, "step": 4979 }, { "epoch": 0.14, "grad_norm": 8.767919012433552, "learning_rate": 9.67115461906877e-06, "loss": 0.7657, "step": 4980 }, { "epoch": 0.14, "grad_norm": 5.976082217796091, "learning_rate": 9.670989190831708e-06, "loss": 0.9617, "step": 4981 }, { "epoch": 0.14, "grad_norm": 6.10480451234949, "learning_rate": 9.6708237224107e-06, "loss": 0.4874, "step": 4982 }, { "epoch": 0.14, "grad_norm": 10.309251215741735, "learning_rate": 9.670658213807171e-06, "loss": 0.9493, "step": 4983 }, { "epoch": 0.14, "grad_norm": 3.5569358841386203, "learning_rate": 9.670492665022545e-06, "loss": 0.2157, "step": 4984 }, { "epoch": 0.14, "grad_norm": 4.661003604922715, "learning_rate": 9.670327076058243e-06, "loss": 0.5341, "step": 4985 }, { "epoch": 0.14, "grad_norm": 5.904222372470221, "learning_rate": 9.670161446915694e-06, "loss": 0.6776, "step": 4986 }, { "epoch": 0.14, "grad_norm": 3.936662494055345, "learning_rate": 9.669995777596322e-06, "loss": 0.7046, "step": 4987 }, { "epoch": 0.14, "grad_norm": 8.559271433476322, "learning_rate": 9.66983006810155e-06, "loss": 0.7398, "step": 4988 }, { "epoch": 0.14, "grad_norm": 6.452137943176724, "learning_rate": 9.669664318432805e-06, "loss": 0.5702, "step": 4989 }, { "epoch": 0.14, "grad_norm": 4.167903595748511, "learning_rate": 9.669498528591513e-06, "loss": 0.6936, "step": 4990 }, { "epoch": 0.14, "grad_norm": 4.281602998089133, "learning_rate": 9.6693326985791e-06, "loss": 0.2522, "step": 4991 }, { "epoch": 0.14, "grad_norm": 4.032396700506201, "learning_rate": 9.669166828396993e-06, "loss": 0.468, "step": 4992 }, { "epoch": 0.14, "grad_norm": 7.7603465202967445, "learning_rate": 9.669000918046618e-06, "loss": 0.3148, "step": 4993 }, { "epoch": 0.14, "grad_norm": 5.236340782779134, "learning_rate": 9.668834967529404e-06, "loss": 0.7826, "step": 4994 }, { "epoch": 0.14, "grad_norm": 3.7299602062055994, "learning_rate": 9.668668976846778e-06, "loss": 0.5176, "step": 4995 }, { "epoch": 0.14, "grad_norm": 11.960342442257227, "learning_rate": 9.668502946000166e-06, "loss": 0.5185, "step": 4996 }, { "epoch": 0.14, "grad_norm": 21.18405240346875, "learning_rate": 9.668336874991e-06, "loss": 0.4969, "step": 4997 }, { "epoch": 0.14, "grad_norm": 4.221031320955012, "learning_rate": 9.668170763820703e-06, "loss": 0.4033, "step": 4998 }, { "epoch": 0.14, "grad_norm": 4.003541183576197, "learning_rate": 9.66800461249071e-06, "loss": 0.5108, "step": 4999 }, { "epoch": 0.14, "grad_norm": 3.64129578464375, "learning_rate": 9.667838421002449e-06, "loss": 0.1149, "step": 5000 }, { "epoch": 0.14, "grad_norm": 7.349265588023325, "learning_rate": 9.667672189357348e-06, "loss": 0.4774, "step": 5001 }, { "epoch": 0.14, "grad_norm": 9.077917940335752, "learning_rate": 9.667505917556838e-06, "loss": 0.925, "step": 5002 }, { "epoch": 0.14, "grad_norm": 3.2174365132077143, "learning_rate": 9.667339605602348e-06, "loss": 0.2029, "step": 5003 }, { "epoch": 0.14, "grad_norm": 2.66253844488622, "learning_rate": 9.667173253495312e-06, "loss": 0.1765, "step": 5004 }, { "epoch": 0.14, "grad_norm": 4.265667478468667, "learning_rate": 9.667006861237158e-06, "loss": 0.3226, "step": 5005 }, { "epoch": 0.14, "grad_norm": 4.280718805257484, "learning_rate": 9.666840428829318e-06, "loss": 0.6352, "step": 5006 }, { "epoch": 0.14, "grad_norm": 5.44239052265962, "learning_rate": 9.666673956273226e-06, "loss": 0.6498, "step": 5007 }, { "epoch": 0.14, "grad_norm": 9.266895946803956, "learning_rate": 9.666507443570313e-06, "loss": 0.6464, "step": 5008 }, { "epoch": 0.14, "grad_norm": 6.923124798992278, "learning_rate": 9.666340890722007e-06, "loss": 0.6429, "step": 5009 }, { "epoch": 0.14, "grad_norm": 4.863900043385921, "learning_rate": 9.66617429772975e-06, "loss": 0.6982, "step": 5010 }, { "epoch": 0.14, "grad_norm": 8.09823743222357, "learning_rate": 9.666007664594967e-06, "loss": 0.9954, "step": 5011 }, { "epoch": 0.14, "grad_norm": 12.790058710054486, "learning_rate": 9.665840991319095e-06, "loss": 0.5421, "step": 5012 }, { "epoch": 0.14, "grad_norm": 5.590195704055768, "learning_rate": 9.665674277903567e-06, "loss": 0.8017, "step": 5013 }, { "epoch": 0.14, "grad_norm": 4.186973709512245, "learning_rate": 9.665507524349817e-06, "loss": 0.3886, "step": 5014 }, { "epoch": 0.14, "grad_norm": 3.310738058955738, "learning_rate": 9.665340730659284e-06, "loss": 0.2104, "step": 5015 }, { "epoch": 0.14, "grad_norm": 7.501738029004382, "learning_rate": 9.665173896833395e-06, "loss": 0.5211, "step": 5016 }, { "epoch": 0.14, "grad_norm": 10.506870156615435, "learning_rate": 9.665007022873592e-06, "loss": 0.7411, "step": 5017 }, { "epoch": 0.14, "grad_norm": 7.135914256619962, "learning_rate": 9.664840108781306e-06, "loss": 0.3782, "step": 5018 }, { "epoch": 0.14, "grad_norm": 6.51916338303867, "learning_rate": 9.664673154557975e-06, "loss": 0.51, "step": 5019 }, { "epoch": 0.14, "grad_norm": 5.753851512847987, "learning_rate": 9.664506160205037e-06, "loss": 0.2772, "step": 5020 }, { "epoch": 0.14, "grad_norm": 5.514543940690817, "learning_rate": 9.664339125723925e-06, "loss": 0.9312, "step": 5021 }, { "epoch": 0.14, "grad_norm": 14.097402433466023, "learning_rate": 9.664172051116078e-06, "loss": 0.6781, "step": 5022 }, { "epoch": 0.14, "grad_norm": 6.076675281375562, "learning_rate": 9.664004936382933e-06, "loss": 0.2647, "step": 5023 }, { "epoch": 0.14, "grad_norm": 6.923063292449183, "learning_rate": 9.663837781525928e-06, "loss": 0.3591, "step": 5024 }, { "epoch": 0.14, "grad_norm": 6.461511625637668, "learning_rate": 9.663670586546502e-06, "loss": 0.5817, "step": 5025 }, { "epoch": 0.14, "grad_norm": 7.173974787342103, "learning_rate": 9.663503351446092e-06, "loss": 0.4382, "step": 5026 }, { "epoch": 0.14, "grad_norm": 9.620269950758974, "learning_rate": 9.663336076226136e-06, "loss": 0.2203, "step": 5027 }, { "epoch": 0.14, "grad_norm": 9.165057584391246, "learning_rate": 9.663168760888074e-06, "loss": 0.6291, "step": 5028 }, { "epoch": 0.14, "grad_norm": 8.647200663961426, "learning_rate": 9.663001405433345e-06, "loss": 0.6054, "step": 5029 }, { "epoch": 0.14, "grad_norm": 8.562099586647252, "learning_rate": 9.66283400986339e-06, "loss": 0.5814, "step": 5030 }, { "epoch": 0.14, "grad_norm": 6.403533657395079, "learning_rate": 9.662666574179647e-06, "loss": 0.6236, "step": 5031 }, { "epoch": 0.14, "grad_norm": 8.51538488766986, "learning_rate": 9.66249909838356e-06, "loss": 1.0046, "step": 5032 }, { "epoch": 0.14, "grad_norm": 11.727136810085023, "learning_rate": 9.662331582476564e-06, "loss": 0.5956, "step": 5033 }, { "epoch": 0.14, "grad_norm": 5.348891433946249, "learning_rate": 9.662164026460105e-06, "loss": 0.8107, "step": 5034 }, { "epoch": 0.14, "grad_norm": 4.193814711775001, "learning_rate": 9.661996430335623e-06, "loss": 0.4883, "step": 5035 }, { "epoch": 0.14, "grad_norm": 4.457102327841545, "learning_rate": 9.66182879410456e-06, "loss": 0.3417, "step": 5036 }, { "epoch": 0.14, "grad_norm": 4.001488229940431, "learning_rate": 9.661661117768357e-06, "loss": 0.2805, "step": 5037 }, { "epoch": 0.14, "grad_norm": 4.134802586133956, "learning_rate": 9.661493401328457e-06, "loss": 0.2451, "step": 5038 }, { "epoch": 0.14, "grad_norm": 8.955682359317802, "learning_rate": 9.661325644786303e-06, "loss": 0.7821, "step": 5039 }, { "epoch": 0.14, "grad_norm": 7.1791418488778875, "learning_rate": 9.66115784814334e-06, "loss": 0.4305, "step": 5040 }, { "epoch": 0.14, "grad_norm": 4.00782784799236, "learning_rate": 9.66099001140101e-06, "loss": 0.5006, "step": 5041 }, { "epoch": 0.14, "grad_norm": 9.764223434579707, "learning_rate": 9.660822134560756e-06, "loss": 1.1564, "step": 5042 }, { "epoch": 0.14, "grad_norm": 4.719572993376322, "learning_rate": 9.660654217624023e-06, "loss": 0.6175, "step": 5043 }, { "epoch": 0.14, "grad_norm": 2.62310693236085, "learning_rate": 9.660486260592255e-06, "loss": 0.3202, "step": 5044 }, { "epoch": 0.14, "grad_norm": 8.089072330226456, "learning_rate": 9.660318263466898e-06, "loss": 0.5729, "step": 5045 }, { "epoch": 0.14, "grad_norm": 8.773845496673594, "learning_rate": 9.660150226249396e-06, "loss": 0.7384, "step": 5046 }, { "epoch": 0.14, "grad_norm": 5.798715181108435, "learning_rate": 9.659982148941197e-06, "loss": 0.4889, "step": 5047 }, { "epoch": 0.14, "grad_norm": 5.8014797830476805, "learning_rate": 9.659814031543744e-06, "loss": 0.3583, "step": 5048 }, { "epoch": 0.14, "grad_norm": 13.445543374756411, "learning_rate": 9.659645874058487e-06, "loss": 0.9438, "step": 5049 }, { "epoch": 0.14, "grad_norm": 6.121478664465393, "learning_rate": 9.659477676486868e-06, "loss": 0.2055, "step": 5050 }, { "epoch": 0.14, "grad_norm": 6.1005630358392375, "learning_rate": 9.659309438830337e-06, "loss": 0.5189, "step": 5051 }, { "epoch": 0.14, "grad_norm": 4.47202463812686, "learning_rate": 9.65914116109034e-06, "loss": 0.4863, "step": 5052 }, { "epoch": 0.14, "grad_norm": 5.200479012214349, "learning_rate": 9.658972843268327e-06, "loss": 0.3773, "step": 5053 }, { "epoch": 0.14, "grad_norm": 9.062904506732075, "learning_rate": 9.658804485365741e-06, "loss": 0.7914, "step": 5054 }, { "epoch": 0.14, "grad_norm": 6.962484442592478, "learning_rate": 9.658636087384035e-06, "loss": 0.8978, "step": 5055 }, { "epoch": 0.14, "grad_norm": 6.267219359631471, "learning_rate": 9.658467649324657e-06, "loss": 0.5698, "step": 5056 }, { "epoch": 0.14, "grad_norm": 5.691189376270223, "learning_rate": 9.658299171189056e-06, "loss": 0.2597, "step": 5057 }, { "epoch": 0.14, "grad_norm": 5.743883860691715, "learning_rate": 9.658130652978679e-06, "loss": 0.4066, "step": 5058 }, { "epoch": 0.14, "grad_norm": 7.178936907663037, "learning_rate": 9.657962094694979e-06, "loss": 0.8227, "step": 5059 }, { "epoch": 0.14, "grad_norm": 8.267162043074997, "learning_rate": 9.657793496339403e-06, "loss": 0.308, "step": 5060 }, { "epoch": 0.14, "grad_norm": 5.70603772339918, "learning_rate": 9.657624857913405e-06, "loss": 0.2673, "step": 5061 }, { "epoch": 0.14, "grad_norm": 5.646178663139677, "learning_rate": 9.657456179418432e-06, "loss": 0.4486, "step": 5062 }, { "epoch": 0.14, "grad_norm": 10.95007486361339, "learning_rate": 9.657287460855938e-06, "loss": 0.7701, "step": 5063 }, { "epoch": 0.15, "grad_norm": 5.124442465624538, "learning_rate": 9.657118702227373e-06, "loss": 0.8405, "step": 5064 }, { "epoch": 0.15, "grad_norm": 7.47086700060255, "learning_rate": 9.65694990353419e-06, "loss": 0.695, "step": 5065 }, { "epoch": 0.15, "grad_norm": 2.227456753157897, "learning_rate": 9.656781064777841e-06, "loss": 0.3668, "step": 5066 }, { "epoch": 0.15, "grad_norm": 6.5357918894774025, "learning_rate": 9.656612185959774e-06, "loss": 0.8971, "step": 5067 }, { "epoch": 0.15, "grad_norm": 5.227754374316687, "learning_rate": 9.65644326708145e-06, "loss": 0.6554, "step": 5068 }, { "epoch": 0.15, "grad_norm": 5.030827356439949, "learning_rate": 9.656274308144316e-06, "loss": 0.9183, "step": 5069 }, { "epoch": 0.15, "grad_norm": 4.765724056027399, "learning_rate": 9.656105309149829e-06, "loss": 0.5253, "step": 5070 }, { "epoch": 0.15, "grad_norm": 7.918547222910271, "learning_rate": 9.655936270099439e-06, "loss": 0.7688, "step": 5071 }, { "epoch": 0.15, "grad_norm": 9.027942196905613, "learning_rate": 9.655767190994603e-06, "loss": 0.8808, "step": 5072 }, { "epoch": 0.15, "grad_norm": 7.911328671070702, "learning_rate": 9.655598071836775e-06, "loss": 0.5744, "step": 5073 }, { "epoch": 0.15, "grad_norm": 5.157368093700061, "learning_rate": 9.655428912627411e-06, "loss": 0.3735, "step": 5074 }, { "epoch": 0.15, "grad_norm": 4.688143571542994, "learning_rate": 9.655259713367965e-06, "loss": 0.4259, "step": 5075 }, { "epoch": 0.15, "grad_norm": 4.506822447586122, "learning_rate": 9.655090474059893e-06, "loss": 0.2434, "step": 5076 }, { "epoch": 0.15, "grad_norm": 4.648416009420517, "learning_rate": 9.65492119470465e-06, "loss": 0.617, "step": 5077 }, { "epoch": 0.15, "grad_norm": 5.070838724581199, "learning_rate": 9.654751875303694e-06, "loss": 0.385, "step": 5078 }, { "epoch": 0.15, "grad_norm": 3.8731171740578585, "learning_rate": 9.654582515858479e-06, "loss": 0.1127, "step": 5079 }, { "epoch": 0.15, "grad_norm": 4.97355270995345, "learning_rate": 9.654413116370466e-06, "loss": 0.3991, "step": 5080 }, { "epoch": 0.15, "grad_norm": 4.844095131668382, "learning_rate": 9.654243676841108e-06, "loss": 0.8027, "step": 5081 }, { "epoch": 0.15, "grad_norm": 6.6659275678378505, "learning_rate": 9.654074197271866e-06, "loss": 0.378, "step": 5082 }, { "epoch": 0.15, "grad_norm": 3.0409752482316703, "learning_rate": 9.653904677664197e-06, "loss": 0.4148, "step": 5083 }, { "epoch": 0.15, "grad_norm": 7.199097241655936, "learning_rate": 9.653735118019558e-06, "loss": 0.9046, "step": 5084 }, { "epoch": 0.15, "grad_norm": 4.364483864803252, "learning_rate": 9.65356551833941e-06, "loss": 0.3511, "step": 5085 }, { "epoch": 0.15, "grad_norm": 7.366030265395842, "learning_rate": 9.653395878625209e-06, "loss": 0.4304, "step": 5086 }, { "epoch": 0.15, "grad_norm": 5.503267531360666, "learning_rate": 9.653226198878419e-06, "loss": 0.4015, "step": 5087 }, { "epoch": 0.15, "grad_norm": 4.36642842528602, "learning_rate": 9.653056479100494e-06, "loss": 0.5827, "step": 5088 }, { "epoch": 0.15, "grad_norm": 6.231837126563513, "learning_rate": 9.652886719292898e-06, "loss": 0.4918, "step": 5089 }, { "epoch": 0.15, "grad_norm": 7.013286311217992, "learning_rate": 9.65271691945709e-06, "loss": 0.574, "step": 5090 }, { "epoch": 0.15, "grad_norm": 3.048437686140015, "learning_rate": 9.652547079594532e-06, "loss": 0.3773, "step": 5091 }, { "epoch": 0.15, "grad_norm": 7.598103974396331, "learning_rate": 9.652377199706684e-06, "loss": 0.7225, "step": 5092 }, { "epoch": 0.15, "grad_norm": 4.19925400830038, "learning_rate": 9.652207279795008e-06, "loss": 0.4048, "step": 5093 }, { "epoch": 0.15, "grad_norm": 8.63296577697284, "learning_rate": 9.652037319860965e-06, "loss": 0.6277, "step": 5094 }, { "epoch": 0.15, "grad_norm": 5.823684719069432, "learning_rate": 9.651867319906019e-06, "loss": 0.5701, "step": 5095 }, { "epoch": 0.15, "grad_norm": 6.426464584795544, "learning_rate": 9.65169727993163e-06, "loss": 0.7985, "step": 5096 }, { "epoch": 0.15, "grad_norm": 6.764546508814147, "learning_rate": 9.651527199939264e-06, "loss": 0.6089, "step": 5097 }, { "epoch": 0.15, "grad_norm": 9.427699121978709, "learning_rate": 9.651357079930379e-06, "loss": 0.6992, "step": 5098 }, { "epoch": 0.15, "grad_norm": 9.100231993516179, "learning_rate": 9.651186919906443e-06, "loss": 0.5386, "step": 5099 }, { "epoch": 0.15, "grad_norm": 10.47837237058414, "learning_rate": 9.65101671986892e-06, "loss": 0.5231, "step": 5100 }, { "epoch": 0.15, "grad_norm": 10.546200246066208, "learning_rate": 9.650846479819272e-06, "loss": 0.5944, "step": 5101 }, { "epoch": 0.15, "grad_norm": 9.217594449309743, "learning_rate": 9.650676199758963e-06, "loss": 0.8654, "step": 5102 }, { "epoch": 0.15, "grad_norm": 5.116797465828106, "learning_rate": 9.65050587968946e-06, "loss": 0.549, "step": 5103 }, { "epoch": 0.15, "grad_norm": 7.572658620125784, "learning_rate": 9.650335519612228e-06, "loss": 0.5769, "step": 5104 }, { "epoch": 0.15, "grad_norm": 3.9743996004800093, "learning_rate": 9.650165119528733e-06, "loss": 0.2067, "step": 5105 }, { "epoch": 0.15, "grad_norm": 4.790150853212579, "learning_rate": 9.649994679440439e-06, "loss": 0.6533, "step": 5106 }, { "epoch": 0.15, "grad_norm": 5.791763766821828, "learning_rate": 9.649824199348812e-06, "loss": 0.5216, "step": 5107 }, { "epoch": 0.15, "grad_norm": 4.482989585678866, "learning_rate": 9.649653679255322e-06, "loss": 0.3831, "step": 5108 }, { "epoch": 0.15, "grad_norm": 9.73612918631387, "learning_rate": 9.649483119161433e-06, "loss": 1.0179, "step": 5109 }, { "epoch": 0.15, "grad_norm": 5.307634975821157, "learning_rate": 9.649312519068613e-06, "loss": 0.6662, "step": 5110 }, { "epoch": 0.15, "grad_norm": 5.108485013111119, "learning_rate": 9.64914187897833e-06, "loss": 0.4377, "step": 5111 }, { "epoch": 0.15, "grad_norm": 8.066446447387735, "learning_rate": 9.648971198892051e-06, "loss": 0.3633, "step": 5112 }, { "epoch": 0.15, "grad_norm": 6.189887000610129, "learning_rate": 9.648800478811247e-06, "loss": 0.4283, "step": 5113 }, { "epoch": 0.15, "grad_norm": 5.666599142382855, "learning_rate": 9.648629718737385e-06, "loss": 0.5907, "step": 5114 }, { "epoch": 0.15, "grad_norm": 3.8445914983340557, "learning_rate": 9.648458918671932e-06, "loss": 0.4367, "step": 5115 }, { "epoch": 0.15, "grad_norm": 9.176929907034037, "learning_rate": 9.64828807861636e-06, "loss": 0.7445, "step": 5116 }, { "epoch": 0.15, "grad_norm": 7.055951416077591, "learning_rate": 9.648117198572138e-06, "loss": 0.5232, "step": 5117 }, { "epoch": 0.15, "grad_norm": 9.706285487181963, "learning_rate": 9.647946278540736e-06, "loss": 0.5922, "step": 5118 }, { "epoch": 0.15, "grad_norm": 5.3912859083077, "learning_rate": 9.647775318523624e-06, "loss": 0.2928, "step": 5119 }, { "epoch": 0.15, "grad_norm": 6.158815923871808, "learning_rate": 9.647604318522274e-06, "loss": 0.8943, "step": 5120 }, { "epoch": 0.15, "grad_norm": 6.241129531293634, "learning_rate": 9.647433278538156e-06, "loss": 0.7313, "step": 5121 }, { "epoch": 0.15, "grad_norm": 7.1914547862125, "learning_rate": 9.647262198572742e-06, "loss": 0.6668, "step": 5122 }, { "epoch": 0.15, "grad_norm": 6.7931464480760715, "learning_rate": 9.647091078627504e-06, "loss": 0.5478, "step": 5123 }, { "epoch": 0.15, "grad_norm": 4.876017684350557, "learning_rate": 9.646919918703913e-06, "loss": 0.4644, "step": 5124 }, { "epoch": 0.15, "grad_norm": 7.616177129004011, "learning_rate": 9.64674871880344e-06, "loss": 0.8875, "step": 5125 }, { "epoch": 0.15, "grad_norm": 9.64976666519235, "learning_rate": 9.646577478927564e-06, "loss": 0.4678, "step": 5126 }, { "epoch": 0.15, "grad_norm": 4.034611269106846, "learning_rate": 9.646406199077752e-06, "loss": 0.4006, "step": 5127 }, { "epoch": 0.15, "grad_norm": 6.101482786471131, "learning_rate": 9.646234879255479e-06, "loss": 0.6376, "step": 5128 }, { "epoch": 0.15, "grad_norm": 5.3187197234998775, "learning_rate": 9.64606351946222e-06, "loss": 0.5368, "step": 5129 }, { "epoch": 0.15, "grad_norm": 5.642384558474085, "learning_rate": 9.645892119699447e-06, "loss": 0.5632, "step": 5130 }, { "epoch": 0.15, "grad_norm": 9.82625466215001, "learning_rate": 9.645720679968637e-06, "loss": 0.3438, "step": 5131 }, { "epoch": 0.15, "grad_norm": 4.511515400585129, "learning_rate": 9.645549200271264e-06, "loss": 0.3686, "step": 5132 }, { "epoch": 0.15, "grad_norm": 8.054810634970531, "learning_rate": 9.645377680608804e-06, "loss": 0.522, "step": 5133 }, { "epoch": 0.15, "grad_norm": 8.388379185358804, "learning_rate": 9.645206120982732e-06, "loss": 0.605, "step": 5134 }, { "epoch": 0.15, "grad_norm": 7.456402647721372, "learning_rate": 9.645034521394524e-06, "loss": 0.8408, "step": 5135 }, { "epoch": 0.15, "grad_norm": 6.021448937682072, "learning_rate": 9.644862881845653e-06, "loss": 0.7616, "step": 5136 }, { "epoch": 0.15, "grad_norm": 4.218180808770499, "learning_rate": 9.644691202337602e-06, "loss": 0.41, "step": 5137 }, { "epoch": 0.15, "grad_norm": 17.482695416187912, "learning_rate": 9.644519482871844e-06, "loss": 0.9854, "step": 5138 }, { "epoch": 0.15, "grad_norm": 6.702166081910695, "learning_rate": 9.644347723449855e-06, "loss": 0.3738, "step": 5139 }, { "epoch": 0.15, "grad_norm": 1.4259749294052733, "learning_rate": 9.644175924073114e-06, "loss": 0.0829, "step": 5140 }, { "epoch": 0.15, "grad_norm": 11.394000271524483, "learning_rate": 9.644004084743102e-06, "loss": 0.8259, "step": 5141 }, { "epoch": 0.15, "grad_norm": 6.7009650175965865, "learning_rate": 9.643832205461292e-06, "loss": 0.6419, "step": 5142 }, { "epoch": 0.15, "grad_norm": 5.982354044490785, "learning_rate": 9.643660286229166e-06, "loss": 0.5475, "step": 5143 }, { "epoch": 0.15, "grad_norm": 4.405804415418458, "learning_rate": 9.643488327048202e-06, "loss": 0.3744, "step": 5144 }, { "epoch": 0.15, "grad_norm": 5.144874184778211, "learning_rate": 9.643316327919881e-06, "loss": 0.6927, "step": 5145 }, { "epoch": 0.15, "grad_norm": 6.611217485457172, "learning_rate": 9.64314428884568e-06, "loss": 0.5176, "step": 5146 }, { "epoch": 0.15, "grad_norm": 12.09795872202696, "learning_rate": 9.642972209827082e-06, "loss": 0.9785, "step": 5147 }, { "epoch": 0.15, "grad_norm": 4.040496512580635, "learning_rate": 9.642800090865565e-06, "loss": 0.4866, "step": 5148 }, { "epoch": 0.15, "grad_norm": 9.603484492923531, "learning_rate": 9.64262793196261e-06, "loss": 0.7818, "step": 5149 }, { "epoch": 0.15, "grad_norm": 7.55085756724115, "learning_rate": 9.642455733119699e-06, "loss": 0.2544, "step": 5150 }, { "epoch": 0.15, "grad_norm": 4.066063682352044, "learning_rate": 9.642283494338312e-06, "loss": 0.4162, "step": 5151 }, { "epoch": 0.15, "grad_norm": 8.309066005784812, "learning_rate": 9.642111215619933e-06, "loss": 0.3909, "step": 5152 }, { "epoch": 0.15, "grad_norm": 7.453616527927765, "learning_rate": 9.641938896966042e-06, "loss": 0.6069, "step": 5153 }, { "epoch": 0.15, "grad_norm": 2.644040939879762, "learning_rate": 9.641766538378122e-06, "loss": 0.1341, "step": 5154 }, { "epoch": 0.15, "grad_norm": 7.16362412504023, "learning_rate": 9.641594139857657e-06, "loss": 0.5914, "step": 5155 }, { "epoch": 0.15, "grad_norm": 7.3594773724799865, "learning_rate": 9.641421701406128e-06, "loss": 0.7704, "step": 5156 }, { "epoch": 0.15, "grad_norm": 10.526554498475509, "learning_rate": 9.64124922302502e-06, "loss": 0.529, "step": 5157 }, { "epoch": 0.15, "grad_norm": 5.319545472095675, "learning_rate": 9.641076704715816e-06, "loss": 0.5262, "step": 5158 }, { "epoch": 0.15, "grad_norm": 14.456950111439765, "learning_rate": 9.640904146480002e-06, "loss": 0.5589, "step": 5159 }, { "epoch": 0.15, "grad_norm": 4.218050199323903, "learning_rate": 9.640731548319059e-06, "loss": 0.4897, "step": 5160 }, { "epoch": 0.15, "grad_norm": 11.202603507573604, "learning_rate": 9.640558910234474e-06, "loss": 1.0034, "step": 5161 }, { "epoch": 0.15, "grad_norm": 7.508506337951843, "learning_rate": 9.640386232227733e-06, "loss": 0.4592, "step": 5162 }, { "epoch": 0.15, "grad_norm": 3.7896021822390193, "learning_rate": 9.64021351430032e-06, "loss": 0.3558, "step": 5163 }, { "epoch": 0.15, "grad_norm": 7.831451135778723, "learning_rate": 9.640040756453723e-06, "loss": 0.6441, "step": 5164 }, { "epoch": 0.15, "grad_norm": 4.082762175467738, "learning_rate": 9.639867958689425e-06, "loss": 0.4238, "step": 5165 }, { "epoch": 0.15, "grad_norm": 6.669760590107091, "learning_rate": 9.639695121008914e-06, "loss": 0.4368, "step": 5166 }, { "epoch": 0.15, "grad_norm": 3.7181667383377413, "learning_rate": 9.639522243413679e-06, "loss": 0.4401, "step": 5167 }, { "epoch": 0.15, "grad_norm": 5.3128912388966905, "learning_rate": 9.639349325905203e-06, "loss": 0.6439, "step": 5168 }, { "epoch": 0.15, "grad_norm": 5.781452979571197, "learning_rate": 9.639176368484979e-06, "loss": 0.5779, "step": 5169 }, { "epoch": 0.15, "grad_norm": 4.773717893865577, "learning_rate": 9.63900337115449e-06, "loss": 0.676, "step": 5170 }, { "epoch": 0.15, "grad_norm": 4.374477328005513, "learning_rate": 9.638830333915225e-06, "loss": 0.3101, "step": 5171 }, { "epoch": 0.15, "grad_norm": 11.297142437187102, "learning_rate": 9.638657256768675e-06, "loss": 0.5813, "step": 5172 }, { "epoch": 0.15, "grad_norm": 5.165442208842398, "learning_rate": 9.638484139716328e-06, "loss": 0.6507, "step": 5173 }, { "epoch": 0.15, "grad_norm": 9.687705450032876, "learning_rate": 9.638310982759673e-06, "loss": 0.6991, "step": 5174 }, { "epoch": 0.15, "grad_norm": 6.438996113372687, "learning_rate": 9.638137785900199e-06, "loss": 0.7159, "step": 5175 }, { "epoch": 0.15, "grad_norm": 17.29898226616434, "learning_rate": 9.637964549139398e-06, "loss": 0.4656, "step": 5176 }, { "epoch": 0.15, "grad_norm": 13.43783526335032, "learning_rate": 9.637791272478757e-06, "loss": 0.8113, "step": 5177 }, { "epoch": 0.15, "grad_norm": 5.244363847524518, "learning_rate": 9.637617955919772e-06, "loss": 0.3697, "step": 5178 }, { "epoch": 0.15, "grad_norm": 5.341081945916653, "learning_rate": 9.637444599463928e-06, "loss": 0.6162, "step": 5179 }, { "epoch": 0.15, "grad_norm": 9.29323910692336, "learning_rate": 9.63727120311272e-06, "loss": 0.4284, "step": 5180 }, { "epoch": 0.15, "grad_norm": 10.565750615056666, "learning_rate": 9.637097766867638e-06, "loss": 1.0465, "step": 5181 }, { "epoch": 0.15, "grad_norm": 5.00861201100761, "learning_rate": 9.636924290730176e-06, "loss": 0.5251, "step": 5182 }, { "epoch": 0.15, "grad_norm": 9.533696495377162, "learning_rate": 9.636750774701825e-06, "loss": 0.5199, "step": 5183 }, { "epoch": 0.15, "grad_norm": 7.343046049102228, "learning_rate": 9.636577218784076e-06, "loss": 0.6773, "step": 5184 }, { "epoch": 0.15, "grad_norm": 10.29794421744412, "learning_rate": 9.636403622978426e-06, "loss": 0.9964, "step": 5185 }, { "epoch": 0.15, "grad_norm": 5.421065740077291, "learning_rate": 9.636229987286367e-06, "loss": 0.7047, "step": 5186 }, { "epoch": 0.15, "grad_norm": 11.332123486137897, "learning_rate": 9.636056311709392e-06, "loss": 0.466, "step": 5187 }, { "epoch": 0.15, "grad_norm": 5.197450011665724, "learning_rate": 9.635882596248993e-06, "loss": 0.2443, "step": 5188 }, { "epoch": 0.15, "grad_norm": 2.4903587040999158, "learning_rate": 9.63570884090667e-06, "loss": 0.1495, "step": 5189 }, { "epoch": 0.15, "grad_norm": 6.159986843181778, "learning_rate": 9.635535045683912e-06, "loss": 0.4627, "step": 5190 }, { "epoch": 0.15, "grad_norm": 5.8583489498764125, "learning_rate": 9.635361210582219e-06, "loss": 0.6034, "step": 5191 }, { "epoch": 0.15, "grad_norm": 6.776119805446723, "learning_rate": 9.635187335603082e-06, "loss": 0.7585, "step": 5192 }, { "epoch": 0.15, "grad_norm": 7.7473531325416385, "learning_rate": 9.635013420748001e-06, "loss": 0.3806, "step": 5193 }, { "epoch": 0.15, "grad_norm": 10.163977491417665, "learning_rate": 9.634839466018469e-06, "loss": 0.7785, "step": 5194 }, { "epoch": 0.15, "grad_norm": 7.309546436584252, "learning_rate": 9.634665471415986e-06, "loss": 0.4345, "step": 5195 }, { "epoch": 0.15, "grad_norm": 2.9444615478038876, "learning_rate": 9.634491436942043e-06, "loss": 0.151, "step": 5196 }, { "epoch": 0.15, "grad_norm": 4.384278132265145, "learning_rate": 9.634317362598142e-06, "loss": 0.3303, "step": 5197 }, { "epoch": 0.15, "grad_norm": 5.395432824351635, "learning_rate": 9.63414324838578e-06, "loss": 0.5115, "step": 5198 }, { "epoch": 0.15, "grad_norm": 4.997931720201604, "learning_rate": 9.633969094306454e-06, "loss": 0.5203, "step": 5199 }, { "epoch": 0.15, "grad_norm": 3.6394683080939756, "learning_rate": 9.633794900361663e-06, "loss": 0.3481, "step": 5200 }, { "epoch": 0.15, "grad_norm": 4.459028968136906, "learning_rate": 9.633620666552905e-06, "loss": 0.5625, "step": 5201 }, { "epoch": 0.15, "grad_norm": 4.656743631502838, "learning_rate": 9.633446392881678e-06, "loss": 0.2705, "step": 5202 }, { "epoch": 0.15, "grad_norm": 7.366312438192718, "learning_rate": 9.633272079349482e-06, "loss": 0.6152, "step": 5203 }, { "epoch": 0.15, "grad_norm": 5.620724473054141, "learning_rate": 9.633097725957818e-06, "loss": 0.6547, "step": 5204 }, { "epoch": 0.15, "grad_norm": 2.677865835534097, "learning_rate": 9.632923332708184e-06, "loss": 0.3071, "step": 5205 }, { "epoch": 0.15, "grad_norm": 6.973521903875792, "learning_rate": 9.63274889960208e-06, "loss": 0.7284, "step": 5206 }, { "epoch": 0.15, "grad_norm": 6.793325404960242, "learning_rate": 9.632574426641011e-06, "loss": 0.6406, "step": 5207 }, { "epoch": 0.15, "grad_norm": 3.6463703450301415, "learning_rate": 9.63239991382647e-06, "loss": 0.4228, "step": 5208 }, { "epoch": 0.15, "grad_norm": 2.5047857730502865, "learning_rate": 9.632225361159965e-06, "loss": 0.3276, "step": 5209 }, { "epoch": 0.15, "grad_norm": 4.14664893378568, "learning_rate": 9.632050768642996e-06, "loss": 0.4087, "step": 5210 }, { "epoch": 0.15, "grad_norm": 10.588205602703974, "learning_rate": 9.631876136277065e-06, "loss": 1.0441, "step": 5211 }, { "epoch": 0.15, "grad_norm": 8.643505119997902, "learning_rate": 9.631701464063674e-06, "loss": 0.5214, "step": 5212 }, { "epoch": 0.15, "grad_norm": 6.546185659748319, "learning_rate": 9.631526752004323e-06, "loss": 0.4076, "step": 5213 }, { "epoch": 0.15, "grad_norm": 5.385903233794572, "learning_rate": 9.63135200010052e-06, "loss": 0.3941, "step": 5214 }, { "epoch": 0.15, "grad_norm": 6.633741477729379, "learning_rate": 9.631177208353766e-06, "loss": 0.2996, "step": 5215 }, { "epoch": 0.15, "grad_norm": 2.895031369719586, "learning_rate": 9.631002376765564e-06, "loss": 0.2346, "step": 5216 }, { "epoch": 0.15, "grad_norm": 7.733595683591536, "learning_rate": 9.630827505337418e-06, "loss": 0.8556, "step": 5217 }, { "epoch": 0.15, "grad_norm": 5.450469411530559, "learning_rate": 9.630652594070835e-06, "loss": 0.5812, "step": 5218 }, { "epoch": 0.15, "grad_norm": 5.506934302909827, "learning_rate": 9.630477642967315e-06, "loss": 0.2966, "step": 5219 }, { "epoch": 0.15, "grad_norm": 19.48188630020427, "learning_rate": 9.630302652028369e-06, "loss": 0.9442, "step": 5220 }, { "epoch": 0.15, "grad_norm": 9.783752316325625, "learning_rate": 9.630127621255497e-06, "loss": 0.9192, "step": 5221 }, { "epoch": 0.15, "grad_norm": 10.209954352356197, "learning_rate": 9.629952550650208e-06, "loss": 0.6446, "step": 5222 }, { "epoch": 0.15, "grad_norm": 13.459540526552646, "learning_rate": 9.629777440214008e-06, "loss": 1.1444, "step": 5223 }, { "epoch": 0.15, "grad_norm": 5.703168309713549, "learning_rate": 9.629602289948403e-06, "loss": 0.636, "step": 5224 }, { "epoch": 0.15, "grad_norm": 6.35394298805977, "learning_rate": 9.629427099854898e-06, "loss": 0.5909, "step": 5225 }, { "epoch": 0.15, "grad_norm": 5.09812463238233, "learning_rate": 9.629251869935e-06, "loss": 0.241, "step": 5226 }, { "epoch": 0.15, "grad_norm": 6.411915948456118, "learning_rate": 9.629076600190223e-06, "loss": 0.4362, "step": 5227 }, { "epoch": 0.15, "grad_norm": 10.767334025892689, "learning_rate": 9.628901290622067e-06, "loss": 0.714, "step": 5228 }, { "epoch": 0.15, "grad_norm": 7.650911455147873, "learning_rate": 9.628725941232044e-06, "loss": 0.4572, "step": 5229 }, { "epoch": 0.15, "grad_norm": 9.52888209211742, "learning_rate": 9.628550552021661e-06, "loss": 0.3694, "step": 5230 }, { "epoch": 0.15, "grad_norm": 14.670972119907912, "learning_rate": 9.628375122992427e-06, "loss": 0.8061, "step": 5231 }, { "epoch": 0.15, "grad_norm": 7.949134769980227, "learning_rate": 9.628199654145852e-06, "loss": 0.3298, "step": 5232 }, { "epoch": 0.15, "grad_norm": 7.324611447805824, "learning_rate": 9.628024145483446e-06, "loss": 0.8153, "step": 5233 }, { "epoch": 0.15, "grad_norm": 5.769578565482293, "learning_rate": 9.627848597006717e-06, "loss": 0.8308, "step": 5234 }, { "epoch": 0.15, "grad_norm": 4.66784713778235, "learning_rate": 9.627673008717177e-06, "loss": 0.3372, "step": 5235 }, { "epoch": 0.15, "grad_norm": 7.047739859752012, "learning_rate": 9.627497380616336e-06, "loss": 0.6238, "step": 5236 }, { "epoch": 0.15, "grad_norm": 5.95868733383718, "learning_rate": 9.627321712705705e-06, "loss": 0.2274, "step": 5237 }, { "epoch": 0.15, "grad_norm": 10.851867979074106, "learning_rate": 9.627146004986794e-06, "loss": 0.4083, "step": 5238 }, { "epoch": 0.15, "grad_norm": 3.0546381719562574, "learning_rate": 9.626970257461117e-06, "loss": 0.1952, "step": 5239 }, { "epoch": 0.15, "grad_norm": 4.668743678935111, "learning_rate": 9.626794470130183e-06, "loss": 0.2853, "step": 5240 }, { "epoch": 0.15, "grad_norm": 6.1204508801092485, "learning_rate": 9.626618642995507e-06, "loss": 0.5675, "step": 5241 }, { "epoch": 0.15, "grad_norm": 6.436891841864543, "learning_rate": 9.6264427760586e-06, "loss": 0.6379, "step": 5242 }, { "epoch": 0.15, "grad_norm": 12.625816243290023, "learning_rate": 9.626266869320976e-06, "loss": 0.4845, "step": 5243 }, { "epoch": 0.15, "grad_norm": 5.685312458265413, "learning_rate": 9.626090922784147e-06, "loss": 0.9456, "step": 5244 }, { "epoch": 0.15, "grad_norm": 6.607448504532689, "learning_rate": 9.625914936449627e-06, "loss": 0.5624, "step": 5245 }, { "epoch": 0.15, "grad_norm": 7.942355825804468, "learning_rate": 9.625738910318932e-06, "loss": 0.8131, "step": 5246 }, { "epoch": 0.15, "grad_norm": 4.281657373579866, "learning_rate": 9.625562844393573e-06, "loss": 0.3999, "step": 5247 }, { "epoch": 0.15, "grad_norm": 4.923050173987106, "learning_rate": 9.625386738675068e-06, "loss": 0.3322, "step": 5248 }, { "epoch": 0.15, "grad_norm": 7.177671595707596, "learning_rate": 9.625210593164928e-06, "loss": 0.5983, "step": 5249 }, { "epoch": 0.15, "grad_norm": 6.9777440077712996, "learning_rate": 9.625034407864673e-06, "loss": 0.5056, "step": 5250 }, { "epoch": 0.15, "grad_norm": 8.173320460078612, "learning_rate": 9.624858182775815e-06, "loss": 0.8359, "step": 5251 }, { "epoch": 0.15, "grad_norm": 5.101042973702111, "learning_rate": 9.624681917899873e-06, "loss": 0.4913, "step": 5252 }, { "epoch": 0.15, "grad_norm": 6.238373661969127, "learning_rate": 9.624505613238362e-06, "loss": 0.5152, "step": 5253 }, { "epoch": 0.15, "grad_norm": 5.127409205904568, "learning_rate": 9.624329268792795e-06, "loss": 0.874, "step": 5254 }, { "epoch": 0.15, "grad_norm": 6.599719125377573, "learning_rate": 9.624152884564696e-06, "loss": 0.8555, "step": 5255 }, { "epoch": 0.15, "grad_norm": 6.947698502973501, "learning_rate": 9.62397646055558e-06, "loss": 0.6198, "step": 5256 }, { "epoch": 0.15, "grad_norm": 9.146406174593633, "learning_rate": 9.623799996766961e-06, "loss": 1.1167, "step": 5257 }, { "epoch": 0.15, "grad_norm": 8.0976863994856, "learning_rate": 9.623623493200361e-06, "loss": 0.5299, "step": 5258 }, { "epoch": 0.15, "grad_norm": 5.43422563116153, "learning_rate": 9.623446949857298e-06, "loss": 0.318, "step": 5259 }, { "epoch": 0.15, "grad_norm": 6.469208779398398, "learning_rate": 9.623270366739289e-06, "loss": 0.4107, "step": 5260 }, { "epoch": 0.15, "grad_norm": 8.282071976084307, "learning_rate": 9.623093743847856e-06, "loss": 0.6062, "step": 5261 }, { "epoch": 0.15, "grad_norm": 6.480818763740404, "learning_rate": 9.622917081184515e-06, "loss": 0.5284, "step": 5262 }, { "epoch": 0.15, "grad_norm": 7.783139286712789, "learning_rate": 9.622740378750787e-06, "loss": 0.7535, "step": 5263 }, { "epoch": 0.15, "grad_norm": 4.390451366751674, "learning_rate": 9.622563636548194e-06, "loss": 0.448, "step": 5264 }, { "epoch": 0.15, "grad_norm": 8.112362467536045, "learning_rate": 9.622386854578255e-06, "loss": 0.7009, "step": 5265 }, { "epoch": 0.15, "grad_norm": 9.352491697518284, "learning_rate": 9.622210032842491e-06, "loss": 0.5678, "step": 5266 }, { "epoch": 0.15, "grad_norm": 3.5079121394969652, "learning_rate": 9.622033171342425e-06, "loss": 0.3151, "step": 5267 }, { "epoch": 0.15, "grad_norm": 6.958311465413647, "learning_rate": 9.621856270079574e-06, "loss": 0.6594, "step": 5268 }, { "epoch": 0.15, "grad_norm": 5.557650241408306, "learning_rate": 9.621679329055463e-06, "loss": 0.5391, "step": 5269 }, { "epoch": 0.15, "grad_norm": 11.941006130594216, "learning_rate": 9.621502348271614e-06, "loss": 0.3573, "step": 5270 }, { "epoch": 0.15, "grad_norm": 4.218247023374825, "learning_rate": 9.62132532772955e-06, "loss": 0.5441, "step": 5271 }, { "epoch": 0.15, "grad_norm": 2.8252792477789606, "learning_rate": 9.621148267430793e-06, "loss": 0.3963, "step": 5272 }, { "epoch": 0.15, "grad_norm": 3.1833863734941126, "learning_rate": 9.620971167376865e-06, "loss": 0.5431, "step": 5273 }, { "epoch": 0.15, "grad_norm": 5.206001689342734, "learning_rate": 9.620794027569294e-06, "loss": 0.3669, "step": 5274 }, { "epoch": 0.15, "grad_norm": 4.263491701451087, "learning_rate": 9.620616848009598e-06, "loss": 0.2474, "step": 5275 }, { "epoch": 0.15, "grad_norm": 2.824596638282164, "learning_rate": 9.620439628699306e-06, "loss": 0.3111, "step": 5276 }, { "epoch": 0.15, "grad_norm": 11.494955532125685, "learning_rate": 9.62026236963994e-06, "loss": 1.1929, "step": 5277 }, { "epoch": 0.15, "grad_norm": 9.439481918698325, "learning_rate": 9.620085070833025e-06, "loss": 0.5604, "step": 5278 }, { "epoch": 0.15, "grad_norm": 5.264436536744266, "learning_rate": 9.619907732280086e-06, "loss": 0.4805, "step": 5279 }, { "epoch": 0.15, "grad_norm": 5.494767430931508, "learning_rate": 9.619730353982652e-06, "loss": 0.4716, "step": 5280 }, { "epoch": 0.15, "grad_norm": 4.543004174393674, "learning_rate": 9.619552935942247e-06, "loss": 0.5135, "step": 5281 }, { "epoch": 0.15, "grad_norm": 11.891595026418402, "learning_rate": 9.619375478160395e-06, "loss": 0.8508, "step": 5282 }, { "epoch": 0.15, "grad_norm": 7.620257763395194, "learning_rate": 9.619197980638626e-06, "loss": 0.6508, "step": 5283 }, { "epoch": 0.15, "grad_norm": 9.860955693508163, "learning_rate": 9.619020443378465e-06, "loss": 0.6546, "step": 5284 }, { "epoch": 0.15, "grad_norm": 1.869965708999585, "learning_rate": 9.61884286638144e-06, "loss": 0.2261, "step": 5285 }, { "epoch": 0.15, "grad_norm": 11.792291703300195, "learning_rate": 9.618665249649077e-06, "loss": 0.6481, "step": 5286 }, { "epoch": 0.15, "grad_norm": 2.9489961249596717, "learning_rate": 9.618487593182906e-06, "loss": 0.3254, "step": 5287 }, { "epoch": 0.15, "grad_norm": 7.298287559215317, "learning_rate": 9.618309896984454e-06, "loss": 0.8669, "step": 5288 }, { "epoch": 0.15, "grad_norm": 7.637796389358827, "learning_rate": 9.618132161055254e-06, "loss": 0.4027, "step": 5289 }, { "epoch": 0.15, "grad_norm": 5.42175750165371, "learning_rate": 9.617954385396829e-06, "loss": 0.3738, "step": 5290 }, { "epoch": 0.15, "grad_norm": 6.186461236245118, "learning_rate": 9.617776570010712e-06, "loss": 0.4692, "step": 5291 }, { "epoch": 0.15, "grad_norm": 4.7019503320969465, "learning_rate": 9.61759871489843e-06, "loss": 0.2355, "step": 5292 }, { "epoch": 0.15, "grad_norm": 7.140181446281, "learning_rate": 9.617420820061518e-06, "loss": 0.4587, "step": 5293 }, { "epoch": 0.15, "grad_norm": 6.069492947549271, "learning_rate": 9.6172428855015e-06, "loss": 0.5539, "step": 5294 }, { "epoch": 0.15, "grad_norm": 5.171220582431609, "learning_rate": 9.617064911219913e-06, "loss": 0.9763, "step": 5295 }, { "epoch": 0.15, "grad_norm": 4.930285482516954, "learning_rate": 9.616886897218283e-06, "loss": 0.4716, "step": 5296 }, { "epoch": 0.15, "grad_norm": 3.790795690808449, "learning_rate": 9.616708843498144e-06, "loss": 0.4346, "step": 5297 }, { "epoch": 0.15, "grad_norm": 4.434831905146899, "learning_rate": 9.616530750061026e-06, "loss": 0.2555, "step": 5298 }, { "epoch": 0.15, "grad_norm": 7.992869298646396, "learning_rate": 9.616352616908465e-06, "loss": 0.4553, "step": 5299 }, { "epoch": 0.15, "grad_norm": 6.060295726749175, "learning_rate": 9.616174444041988e-06, "loss": 0.3523, "step": 5300 }, { "epoch": 0.15, "grad_norm": 11.140602914325497, "learning_rate": 9.615996231463132e-06, "loss": 0.4974, "step": 5301 }, { "epoch": 0.15, "grad_norm": 9.27152767938723, "learning_rate": 9.615817979173428e-06, "loss": 0.6289, "step": 5302 }, { "epoch": 0.15, "grad_norm": 2.6627372286781026, "learning_rate": 9.615639687174411e-06, "loss": 0.1734, "step": 5303 }, { "epoch": 0.15, "grad_norm": 7.145439611622728, "learning_rate": 9.615461355467613e-06, "loss": 0.6589, "step": 5304 }, { "epoch": 0.15, "grad_norm": 8.881632945471017, "learning_rate": 9.61528298405457e-06, "loss": 0.5485, "step": 5305 }, { "epoch": 0.15, "grad_norm": 12.82240162157744, "learning_rate": 9.615104572936817e-06, "loss": 0.4955, "step": 5306 }, { "epoch": 0.15, "grad_norm": 5.756382592545236, "learning_rate": 9.614926122115886e-06, "loss": 0.6834, "step": 5307 }, { "epoch": 0.15, "grad_norm": 6.596958121205704, "learning_rate": 9.614747631593314e-06, "loss": 0.5281, "step": 5308 }, { "epoch": 0.15, "grad_norm": 8.270881928141804, "learning_rate": 9.614569101370636e-06, "loss": 0.7008, "step": 5309 }, { "epoch": 0.15, "grad_norm": 4.532484136428467, "learning_rate": 9.61439053144939e-06, "loss": 0.3722, "step": 5310 }, { "epoch": 0.15, "grad_norm": 11.819617170921344, "learning_rate": 9.614211921831108e-06, "loss": 0.9885, "step": 5311 }, { "epoch": 0.15, "grad_norm": 6.3114771628037305, "learning_rate": 9.614033272517331e-06, "loss": 0.7715, "step": 5312 }, { "epoch": 0.15, "grad_norm": 8.441937332358131, "learning_rate": 9.613854583509592e-06, "loss": 0.6868, "step": 5313 }, { "epoch": 0.15, "grad_norm": 5.344179058138905, "learning_rate": 9.613675854809432e-06, "loss": 0.5506, "step": 5314 }, { "epoch": 0.15, "grad_norm": 9.213006768334587, "learning_rate": 9.613497086418384e-06, "loss": 0.4539, "step": 5315 }, { "epoch": 0.15, "grad_norm": 8.086766356220368, "learning_rate": 9.61331827833799e-06, "loss": 0.7695, "step": 5316 }, { "epoch": 0.15, "grad_norm": 5.376807130826238, "learning_rate": 9.61313943056979e-06, "loss": 0.4716, "step": 5317 }, { "epoch": 0.15, "grad_norm": 6.828614939695231, "learning_rate": 9.612960543115315e-06, "loss": 0.5481, "step": 5318 }, { "epoch": 0.15, "grad_norm": 4.950273408949146, "learning_rate": 9.61278161597611e-06, "loss": 0.4639, "step": 5319 }, { "epoch": 0.15, "grad_norm": 4.063599012345581, "learning_rate": 9.612602649153713e-06, "loss": 0.4275, "step": 5320 }, { "epoch": 0.15, "grad_norm": 6.878439701685337, "learning_rate": 9.612423642649663e-06, "loss": 0.437, "step": 5321 }, { "epoch": 0.15, "grad_norm": 6.553094354284081, "learning_rate": 9.6122445964655e-06, "loss": 0.7564, "step": 5322 }, { "epoch": 0.15, "grad_norm": 10.249147844945275, "learning_rate": 9.612065510602767e-06, "loss": 0.5936, "step": 5323 }, { "epoch": 0.15, "grad_norm": 3.8046311642590305, "learning_rate": 9.611886385063e-06, "loss": 0.1274, "step": 5324 }, { "epoch": 0.15, "grad_norm": 9.508284720837903, "learning_rate": 9.611707219847741e-06, "loss": 0.6966, "step": 5325 }, { "epoch": 0.15, "grad_norm": 7.31147015484542, "learning_rate": 9.611528014958535e-06, "loss": 0.5263, "step": 5326 }, { "epoch": 0.15, "grad_norm": 6.749924023518539, "learning_rate": 9.61134877039692e-06, "loss": 0.4749, "step": 5327 }, { "epoch": 0.15, "grad_norm": 8.560336124960054, "learning_rate": 9.611169486164439e-06, "loss": 0.6955, "step": 5328 }, { "epoch": 0.15, "grad_norm": 9.156443857477793, "learning_rate": 9.610990162262635e-06, "loss": 0.5222, "step": 5329 }, { "epoch": 0.15, "grad_norm": 6.0540817646847795, "learning_rate": 9.610810798693052e-06, "loss": 0.5501, "step": 5330 }, { "epoch": 0.15, "grad_norm": 5.877042577638952, "learning_rate": 9.610631395457229e-06, "loss": 0.5426, "step": 5331 }, { "epoch": 0.15, "grad_norm": 10.117138718888794, "learning_rate": 9.610451952556713e-06, "loss": 0.6769, "step": 5332 }, { "epoch": 0.15, "grad_norm": 4.660527843615213, "learning_rate": 9.610272469993046e-06, "loss": 0.2338, "step": 5333 }, { "epoch": 0.15, "grad_norm": 8.128480136597167, "learning_rate": 9.610092947767773e-06, "loss": 0.6496, "step": 5334 }, { "epoch": 0.15, "grad_norm": 7.48601206672872, "learning_rate": 9.609913385882437e-06, "loss": 0.3618, "step": 5335 }, { "epoch": 0.15, "grad_norm": 8.591568104585306, "learning_rate": 9.609733784338586e-06, "loss": 0.6882, "step": 5336 }, { "epoch": 0.15, "grad_norm": 8.097300100706311, "learning_rate": 9.609554143137761e-06, "loss": 0.6125, "step": 5337 }, { "epoch": 0.15, "grad_norm": 8.218932755327584, "learning_rate": 9.60937446228151e-06, "loss": 0.5624, "step": 5338 }, { "epoch": 0.15, "grad_norm": 7.105830489015984, "learning_rate": 9.609194741771377e-06, "loss": 0.6233, "step": 5339 }, { "epoch": 0.15, "grad_norm": 7.278816225699527, "learning_rate": 9.60901498160891e-06, "loss": 0.2854, "step": 5340 }, { "epoch": 0.15, "grad_norm": 7.779721029423503, "learning_rate": 9.608835181795655e-06, "loss": 0.7278, "step": 5341 }, { "epoch": 0.15, "grad_norm": 6.135116591398844, "learning_rate": 9.608655342333157e-06, "loss": 0.2456, "step": 5342 }, { "epoch": 0.15, "grad_norm": 3.072243779535831, "learning_rate": 9.608475463222965e-06, "loss": 0.236, "step": 5343 }, { "epoch": 0.15, "grad_norm": 13.18285942688715, "learning_rate": 9.608295544466628e-06, "loss": 0.8631, "step": 5344 }, { "epoch": 0.15, "grad_norm": 14.353728370268172, "learning_rate": 9.608115586065689e-06, "loss": 0.3048, "step": 5345 }, { "epoch": 0.15, "grad_norm": 6.081319739166306, "learning_rate": 9.6079355880217e-06, "loss": 0.2312, "step": 5346 }, { "epoch": 0.15, "grad_norm": 6.1516752721532875, "learning_rate": 9.607755550336208e-06, "loss": 0.6175, "step": 5347 }, { "epoch": 0.15, "grad_norm": 7.541783607658152, "learning_rate": 9.607575473010765e-06, "loss": 0.5809, "step": 5348 }, { "epoch": 0.15, "grad_norm": 5.784189971651914, "learning_rate": 9.607395356046913e-06, "loss": 0.4648, "step": 5349 }, { "epoch": 0.15, "grad_norm": 9.989612238540174, "learning_rate": 9.60721519944621e-06, "loss": 0.515, "step": 5350 }, { "epoch": 0.15, "grad_norm": 6.707338942599961, "learning_rate": 9.6070350032102e-06, "loss": 0.4794, "step": 5351 }, { "epoch": 0.15, "grad_norm": 7.2031110320710905, "learning_rate": 9.606854767340435e-06, "loss": 0.5415, "step": 5352 }, { "epoch": 0.15, "grad_norm": 9.460132416570621, "learning_rate": 9.606674491838466e-06, "loss": 0.6957, "step": 5353 }, { "epoch": 0.15, "grad_norm": 7.591441364762617, "learning_rate": 9.606494176705844e-06, "loss": 1.1218, "step": 5354 }, { "epoch": 0.15, "grad_norm": 4.231094440885726, "learning_rate": 9.60631382194412e-06, "loss": 0.4876, "step": 5355 }, { "epoch": 0.15, "grad_norm": 6.327595723941689, "learning_rate": 9.606133427554845e-06, "loss": 0.5074, "step": 5356 }, { "epoch": 0.15, "grad_norm": 7.46128216491607, "learning_rate": 9.60595299353957e-06, "loss": 0.611, "step": 5357 }, { "epoch": 0.15, "grad_norm": 6.661149491782922, "learning_rate": 9.60577251989985e-06, "loss": 0.4195, "step": 5358 }, { "epoch": 0.15, "grad_norm": 10.27991845688596, "learning_rate": 9.605592006637235e-06, "loss": 0.7243, "step": 5359 }, { "epoch": 0.15, "grad_norm": 6.094654113994171, "learning_rate": 9.60541145375328e-06, "loss": 0.4623, "step": 5360 }, { "epoch": 0.15, "grad_norm": 10.451221423886235, "learning_rate": 9.605230861249538e-06, "loss": 0.5531, "step": 5361 }, { "epoch": 0.15, "grad_norm": 4.098012757770643, "learning_rate": 9.60505022912756e-06, "loss": 0.4438, "step": 5362 }, { "epoch": 0.15, "grad_norm": 9.313337659604983, "learning_rate": 9.604869557388903e-06, "loss": 0.5584, "step": 5363 }, { "epoch": 0.15, "grad_norm": 7.779801996158146, "learning_rate": 9.604688846035119e-06, "loss": 1.0185, "step": 5364 }, { "epoch": 0.15, "grad_norm": 10.940274829376325, "learning_rate": 9.604508095067766e-06, "loss": 0.7375, "step": 5365 }, { "epoch": 0.15, "grad_norm": 9.768044622120954, "learning_rate": 9.604327304488395e-06, "loss": 0.8652, "step": 5366 }, { "epoch": 0.15, "grad_norm": 5.565736364913624, "learning_rate": 9.604146474298563e-06, "loss": 0.5396, "step": 5367 }, { "epoch": 0.15, "grad_norm": 8.360392342810977, "learning_rate": 9.603965604499826e-06, "loss": 0.6606, "step": 5368 }, { "epoch": 0.15, "grad_norm": 6.286291120739438, "learning_rate": 9.60378469509374e-06, "loss": 0.708, "step": 5369 }, { "epoch": 0.15, "grad_norm": 4.896663216671249, "learning_rate": 9.60360374608186e-06, "loss": 0.6025, "step": 5370 }, { "epoch": 0.15, "grad_norm": 5.105999219285462, "learning_rate": 9.603422757465746e-06, "loss": 0.3784, "step": 5371 }, { "epoch": 0.15, "grad_norm": 6.502160226875895, "learning_rate": 9.603241729246951e-06, "loss": 0.3531, "step": 5372 }, { "epoch": 0.15, "grad_norm": 8.335218146478077, "learning_rate": 9.603060661427034e-06, "loss": 0.564, "step": 5373 }, { "epoch": 0.15, "grad_norm": 6.173043596720282, "learning_rate": 9.602879554007556e-06, "loss": 0.6748, "step": 5374 }, { "epoch": 0.15, "grad_norm": 6.200364043715269, "learning_rate": 9.60269840699007e-06, "loss": 0.4934, "step": 5375 }, { "epoch": 0.15, "grad_norm": 4.576251014223796, "learning_rate": 9.602517220376134e-06, "loss": 0.3293, "step": 5376 }, { "epoch": 0.15, "grad_norm": 3.733263113576273, "learning_rate": 9.602335994167312e-06, "loss": 0.4017, "step": 5377 }, { "epoch": 0.15, "grad_norm": 9.946397073465446, "learning_rate": 9.602154728365158e-06, "loss": 0.6138, "step": 5378 }, { "epoch": 0.15, "grad_norm": 8.716810331990228, "learning_rate": 9.601973422971235e-06, "loss": 0.8236, "step": 5379 }, { "epoch": 0.15, "grad_norm": 3.8346767490234077, "learning_rate": 9.601792077987103e-06, "loss": 0.3536, "step": 5380 }, { "epoch": 0.15, "grad_norm": 7.930412792992604, "learning_rate": 9.601610693414315e-06, "loss": 0.4908, "step": 5381 }, { "epoch": 0.15, "grad_norm": 6.086682177367543, "learning_rate": 9.60142926925444e-06, "loss": 0.5264, "step": 5382 }, { "epoch": 0.15, "grad_norm": 11.165897247984105, "learning_rate": 9.601247805509038e-06, "loss": 0.5118, "step": 5383 }, { "epoch": 0.15, "grad_norm": 6.761723051725415, "learning_rate": 9.601066302179665e-06, "loss": 0.4107, "step": 5384 }, { "epoch": 0.15, "grad_norm": 5.490858022904776, "learning_rate": 9.600884759267885e-06, "loss": 0.8989, "step": 5385 }, { "epoch": 0.15, "grad_norm": 5.015159799268097, "learning_rate": 9.600703176775259e-06, "loss": 0.5038, "step": 5386 }, { "epoch": 0.15, "grad_norm": 5.595846752833308, "learning_rate": 9.600521554703352e-06, "loss": 0.2371, "step": 5387 }, { "epoch": 0.15, "grad_norm": 7.840056746725122, "learning_rate": 9.600339893053723e-06, "loss": 1.0637, "step": 5388 }, { "epoch": 0.15, "grad_norm": 6.607902344755645, "learning_rate": 9.600158191827938e-06, "loss": 0.8058, "step": 5389 }, { "epoch": 0.15, "grad_norm": 11.466457127349527, "learning_rate": 9.599976451027557e-06, "loss": 0.8058, "step": 5390 }, { "epoch": 0.15, "grad_norm": 9.160355932991457, "learning_rate": 9.599794670654146e-06, "loss": 0.5148, "step": 5391 }, { "epoch": 0.15, "grad_norm": 7.527021115113907, "learning_rate": 9.599612850709267e-06, "loss": 0.5757, "step": 5392 }, { "epoch": 0.15, "grad_norm": 6.970335296819332, "learning_rate": 9.599430991194486e-06, "loss": 0.6175, "step": 5393 }, { "epoch": 0.15, "grad_norm": 6.6369118755008385, "learning_rate": 9.599249092111364e-06, "loss": 0.47, "step": 5394 }, { "epoch": 0.15, "grad_norm": 6.178861541142558, "learning_rate": 9.59906715346147e-06, "loss": 0.5188, "step": 5395 }, { "epoch": 0.15, "grad_norm": 2.2823064657056786, "learning_rate": 9.598885175246368e-06, "loss": 0.2389, "step": 5396 }, { "epoch": 0.15, "grad_norm": 4.422877194765577, "learning_rate": 9.598703157467623e-06, "loss": 0.3434, "step": 5397 }, { "epoch": 0.15, "grad_norm": 4.275773966591541, "learning_rate": 9.5985211001268e-06, "loss": 0.7657, "step": 5398 }, { "epoch": 0.15, "grad_norm": 5.790642608928721, "learning_rate": 9.598339003225467e-06, "loss": 0.3515, "step": 5399 }, { "epoch": 0.15, "grad_norm": 7.24276954920909, "learning_rate": 9.59815686676519e-06, "loss": 0.7807, "step": 5400 }, { "epoch": 0.15, "grad_norm": 4.184612588212645, "learning_rate": 9.597974690747534e-06, "loss": 0.381, "step": 5401 }, { "epoch": 0.15, "grad_norm": 5.0494479302523345, "learning_rate": 9.597792475174068e-06, "loss": 0.2983, "step": 5402 }, { "epoch": 0.15, "grad_norm": 7.409649547572859, "learning_rate": 9.597610220046361e-06, "loss": 0.6802, "step": 5403 }, { "epoch": 0.15, "grad_norm": 8.281205087216064, "learning_rate": 9.597427925365978e-06, "loss": 0.7347, "step": 5404 }, { "epoch": 0.15, "grad_norm": 11.468288568262334, "learning_rate": 9.59724559113449e-06, "loss": 1.0982, "step": 5405 }, { "epoch": 0.15, "grad_norm": 6.790099561913615, "learning_rate": 9.597063217353464e-06, "loss": 0.5299, "step": 5406 }, { "epoch": 0.15, "grad_norm": 8.965705275018314, "learning_rate": 9.596880804024468e-06, "loss": 0.6625, "step": 5407 }, { "epoch": 0.15, "grad_norm": 5.252748314980898, "learning_rate": 9.596698351149074e-06, "loss": 0.461, "step": 5408 }, { "epoch": 0.15, "grad_norm": 3.903373208258993, "learning_rate": 9.596515858728847e-06, "loss": 0.5257, "step": 5409 }, { "epoch": 0.15, "grad_norm": 5.768085399297065, "learning_rate": 9.596333326765364e-06, "loss": 0.5329, "step": 5410 }, { "epoch": 0.15, "grad_norm": 8.807077674063196, "learning_rate": 9.59615075526019e-06, "loss": 0.5266, "step": 5411 }, { "epoch": 0.15, "grad_norm": 7.80160214035687, "learning_rate": 9.595968144214896e-06, "loss": 0.6158, "step": 5412 }, { "epoch": 0.16, "grad_norm": 9.324338825093523, "learning_rate": 9.595785493631053e-06, "loss": 0.7055, "step": 5413 }, { "epoch": 0.16, "grad_norm": 6.3093554529999185, "learning_rate": 9.595602803510235e-06, "loss": 0.8141, "step": 5414 }, { "epoch": 0.16, "grad_norm": 5.270543684809945, "learning_rate": 9.595420073854012e-06, "loss": 0.4283, "step": 5415 }, { "epoch": 0.16, "grad_norm": 3.04512865931611, "learning_rate": 9.595237304663955e-06, "loss": 0.6267, "step": 5416 }, { "epoch": 0.16, "grad_norm": 9.878043562816792, "learning_rate": 9.595054495941638e-06, "loss": 0.615, "step": 5417 }, { "epoch": 0.16, "grad_norm": 3.5072567505888106, "learning_rate": 9.594871647688631e-06, "loss": 0.4306, "step": 5418 }, { "epoch": 0.16, "grad_norm": 9.601189229738372, "learning_rate": 9.594688759906512e-06, "loss": 0.4755, "step": 5419 }, { "epoch": 0.16, "grad_norm": 6.794384940902208, "learning_rate": 9.59450583259685e-06, "loss": 0.367, "step": 5420 }, { "epoch": 0.16, "grad_norm": 9.096083299106125, "learning_rate": 9.59432286576122e-06, "loss": 0.6267, "step": 5421 }, { "epoch": 0.16, "grad_norm": 4.370036988614667, "learning_rate": 9.594139859401195e-06, "loss": 0.4771, "step": 5422 }, { "epoch": 0.16, "grad_norm": 7.851195308772369, "learning_rate": 9.59395681351835e-06, "loss": 0.5344, "step": 5423 }, { "epoch": 0.16, "grad_norm": 7.156729536356337, "learning_rate": 9.593773728114262e-06, "loss": 0.5696, "step": 5424 }, { "epoch": 0.16, "grad_norm": 11.263191796330114, "learning_rate": 9.593590603190503e-06, "loss": 1.1734, "step": 5425 }, { "epoch": 0.16, "grad_norm": 5.456371008021628, "learning_rate": 9.593407438748651e-06, "loss": 0.3709, "step": 5426 }, { "epoch": 0.16, "grad_norm": 6.23148165992475, "learning_rate": 9.59322423479028e-06, "loss": 0.6725, "step": 5427 }, { "epoch": 0.16, "grad_norm": 5.651221047175172, "learning_rate": 9.593040991316967e-06, "loss": 0.6171, "step": 5428 }, { "epoch": 0.16, "grad_norm": 9.676631170959583, "learning_rate": 9.592857708330286e-06, "loss": 0.8976, "step": 5429 }, { "epoch": 0.16, "grad_norm": 6.1200001609403305, "learning_rate": 9.592674385831818e-06, "loss": 0.5749, "step": 5430 }, { "epoch": 0.16, "grad_norm": 6.26091031515352, "learning_rate": 9.592491023823134e-06, "loss": 0.9716, "step": 5431 }, { "epoch": 0.16, "grad_norm": 8.2449561219352, "learning_rate": 9.592307622305818e-06, "loss": 0.5926, "step": 5432 }, { "epoch": 0.16, "grad_norm": 3.841972839638751, "learning_rate": 9.592124181281444e-06, "loss": 0.2703, "step": 5433 }, { "epoch": 0.16, "grad_norm": 8.713366524774932, "learning_rate": 9.591940700751592e-06, "loss": 0.4731, "step": 5434 }, { "epoch": 0.16, "grad_norm": 5.449097093981092, "learning_rate": 9.59175718071784e-06, "loss": 0.6397, "step": 5435 }, { "epoch": 0.16, "grad_norm": 11.401978244131088, "learning_rate": 9.591573621181764e-06, "loss": 0.478, "step": 5436 }, { "epoch": 0.16, "grad_norm": 7.408071237682954, "learning_rate": 9.591390022144947e-06, "loss": 0.8667, "step": 5437 }, { "epoch": 0.16, "grad_norm": 4.365582220568322, "learning_rate": 9.591206383608966e-06, "loss": 0.4575, "step": 5438 }, { "epoch": 0.16, "grad_norm": 7.087905627287615, "learning_rate": 9.591022705575403e-06, "loss": 0.79, "step": 5439 }, { "epoch": 0.16, "grad_norm": 8.490004440504155, "learning_rate": 9.590838988045836e-06, "loss": 0.6483, "step": 5440 }, { "epoch": 0.16, "grad_norm": 8.324752611488252, "learning_rate": 9.590655231021848e-06, "loss": 0.5363, "step": 5441 }, { "epoch": 0.16, "grad_norm": 6.141164236683081, "learning_rate": 9.590471434505017e-06, "loss": 0.9395, "step": 5442 }, { "epoch": 0.16, "grad_norm": 5.254427428080971, "learning_rate": 9.590287598496926e-06, "loss": 0.5617, "step": 5443 }, { "epoch": 0.16, "grad_norm": 5.450537606067856, "learning_rate": 9.590103722999155e-06, "loss": 0.5499, "step": 5444 }, { "epoch": 0.16, "grad_norm": 9.315683473584592, "learning_rate": 9.589919808013287e-06, "loss": 0.5389, "step": 5445 }, { "epoch": 0.16, "grad_norm": 8.366028720603047, "learning_rate": 9.589735853540904e-06, "loss": 0.2688, "step": 5446 }, { "epoch": 0.16, "grad_norm": 5.409581387272259, "learning_rate": 9.589551859583588e-06, "loss": 0.4741, "step": 5447 }, { "epoch": 0.16, "grad_norm": 5.4681240922510925, "learning_rate": 9.589367826142924e-06, "loss": 0.8502, "step": 5448 }, { "epoch": 0.16, "grad_norm": 6.647221921431998, "learning_rate": 9.589183753220494e-06, "loss": 0.9146, "step": 5449 }, { "epoch": 0.16, "grad_norm": 5.531752396521018, "learning_rate": 9.58899964081788e-06, "loss": 0.6462, "step": 5450 }, { "epoch": 0.16, "grad_norm": 7.130741315881525, "learning_rate": 9.588815488936667e-06, "loss": 0.6696, "step": 5451 }, { "epoch": 0.16, "grad_norm": 7.414032368252044, "learning_rate": 9.588631297578439e-06, "loss": 0.7281, "step": 5452 }, { "epoch": 0.16, "grad_norm": 5.361515474085991, "learning_rate": 9.588447066744781e-06, "loss": 0.3305, "step": 5453 }, { "epoch": 0.16, "grad_norm": 5.279588968577386, "learning_rate": 9.58826279643728e-06, "loss": 0.6572, "step": 5454 }, { "epoch": 0.16, "grad_norm": 9.825398708339119, "learning_rate": 9.588078486657517e-06, "loss": 0.43, "step": 5455 }, { "epoch": 0.16, "grad_norm": 5.256318785662992, "learning_rate": 9.58789413740708e-06, "loss": 0.3279, "step": 5456 }, { "epoch": 0.16, "grad_norm": 5.615543979236496, "learning_rate": 9.587709748687554e-06, "loss": 0.3517, "step": 5457 }, { "epoch": 0.16, "grad_norm": 9.44175765646591, "learning_rate": 9.587525320500527e-06, "loss": 0.8418, "step": 5458 }, { "epoch": 0.16, "grad_norm": 5.805030946082298, "learning_rate": 9.587340852847584e-06, "loss": 0.2312, "step": 5459 }, { "epoch": 0.16, "grad_norm": 7.633760516389156, "learning_rate": 9.587156345730312e-06, "loss": 1.0011, "step": 5460 }, { "epoch": 0.16, "grad_norm": 11.453906273313342, "learning_rate": 9.586971799150299e-06, "loss": 0.8824, "step": 5461 }, { "epoch": 0.16, "grad_norm": 3.9817250583662926, "learning_rate": 9.586787213109135e-06, "loss": 0.5842, "step": 5462 }, { "epoch": 0.16, "grad_norm": 6.9213175323096285, "learning_rate": 9.586602587608402e-06, "loss": 0.8797, "step": 5463 }, { "epoch": 0.16, "grad_norm": 7.634518794775687, "learning_rate": 9.586417922649693e-06, "loss": 0.4112, "step": 5464 }, { "epoch": 0.16, "grad_norm": 5.106247250802504, "learning_rate": 9.586233218234595e-06, "loss": 0.5342, "step": 5465 }, { "epoch": 0.16, "grad_norm": 6.96206344249457, "learning_rate": 9.586048474364697e-06, "loss": 0.2805, "step": 5466 }, { "epoch": 0.16, "grad_norm": 5.353670648934883, "learning_rate": 9.585863691041587e-06, "loss": 0.686, "step": 5467 }, { "epoch": 0.16, "grad_norm": 6.079247535270527, "learning_rate": 9.58567886826686e-06, "loss": 1.093, "step": 5468 }, { "epoch": 0.16, "grad_norm": 10.426602368054914, "learning_rate": 9.5854940060421e-06, "loss": 0.7717, "step": 5469 }, { "epoch": 0.16, "grad_norm": 5.953372579717904, "learning_rate": 9.5853091043689e-06, "loss": 0.6121, "step": 5470 }, { "epoch": 0.16, "grad_norm": 4.812064213033368, "learning_rate": 9.585124163248851e-06, "loss": 0.3301, "step": 5471 }, { "epoch": 0.16, "grad_norm": 4.6446439763049625, "learning_rate": 9.584939182683543e-06, "loss": 0.2901, "step": 5472 }, { "epoch": 0.16, "grad_norm": 8.08686860109414, "learning_rate": 9.584754162674567e-06, "loss": 0.9745, "step": 5473 }, { "epoch": 0.16, "grad_norm": 3.63112832684627, "learning_rate": 9.584569103223516e-06, "loss": 0.5885, "step": 5474 }, { "epoch": 0.16, "grad_norm": 10.372696586358645, "learning_rate": 9.584384004331981e-06, "loss": 0.9955, "step": 5475 }, { "epoch": 0.16, "grad_norm": 5.515752688218565, "learning_rate": 9.584198866001557e-06, "loss": 0.413, "step": 5476 }, { "epoch": 0.16, "grad_norm": 5.693377130654132, "learning_rate": 9.584013688233832e-06, "loss": 0.4616, "step": 5477 }, { "epoch": 0.16, "grad_norm": 4.647006697799095, "learning_rate": 9.583828471030403e-06, "loss": 0.3989, "step": 5478 }, { "epoch": 0.16, "grad_norm": 7.7233035598244655, "learning_rate": 9.583643214392862e-06, "loss": 1.0861, "step": 5479 }, { "epoch": 0.16, "grad_norm": 3.889581808543847, "learning_rate": 9.583457918322802e-06, "loss": 0.2297, "step": 5480 }, { "epoch": 0.16, "grad_norm": 6.804932827474345, "learning_rate": 9.58327258282182e-06, "loss": 0.4974, "step": 5481 }, { "epoch": 0.16, "grad_norm": 6.9385344275384435, "learning_rate": 9.583087207891504e-06, "loss": 0.2981, "step": 5482 }, { "epoch": 0.16, "grad_norm": 7.4649658831910966, "learning_rate": 9.582901793533457e-06, "loss": 0.6541, "step": 5483 }, { "epoch": 0.16, "grad_norm": 5.9207284667974065, "learning_rate": 9.582716339749267e-06, "loss": 0.6245, "step": 5484 }, { "epoch": 0.16, "grad_norm": 5.669452833117224, "learning_rate": 9.582530846540534e-06, "loss": 0.555, "step": 5485 }, { "epoch": 0.16, "grad_norm": 5.685871729584379, "learning_rate": 9.582345313908855e-06, "loss": 0.8025, "step": 5486 }, { "epoch": 0.16, "grad_norm": 8.686505816539885, "learning_rate": 9.582159741855818e-06, "loss": 0.5418, "step": 5487 }, { "epoch": 0.16, "grad_norm": 4.620744035762073, "learning_rate": 9.581974130383029e-06, "loss": 0.6105, "step": 5488 }, { "epoch": 0.16, "grad_norm": 5.356599606262553, "learning_rate": 9.581788479492081e-06, "loss": 0.4598, "step": 5489 }, { "epoch": 0.16, "grad_norm": 5.139829069627728, "learning_rate": 9.581602789184568e-06, "loss": 0.5703, "step": 5490 }, { "epoch": 0.16, "grad_norm": 8.567504311463935, "learning_rate": 9.581417059462093e-06, "loss": 0.5604, "step": 5491 }, { "epoch": 0.16, "grad_norm": 7.438712013287379, "learning_rate": 9.581231290326249e-06, "loss": 0.741, "step": 5492 }, { "epoch": 0.16, "grad_norm": 6.192500704232809, "learning_rate": 9.581045481778637e-06, "loss": 0.8745, "step": 5493 }, { "epoch": 0.16, "grad_norm": 8.07837185215042, "learning_rate": 9.580859633820856e-06, "loss": 0.2609, "step": 5494 }, { "epoch": 0.16, "grad_norm": 9.429449045220895, "learning_rate": 9.580673746454502e-06, "loss": 0.8243, "step": 5495 }, { "epoch": 0.16, "grad_norm": 7.597490433401027, "learning_rate": 9.580487819681176e-06, "loss": 0.4742, "step": 5496 }, { "epoch": 0.16, "grad_norm": 3.5059610251138866, "learning_rate": 9.580301853502478e-06, "loss": 0.2636, "step": 5497 }, { "epoch": 0.16, "grad_norm": 4.549987356985783, "learning_rate": 9.580115847920007e-06, "loss": 0.3898, "step": 5498 }, { "epoch": 0.16, "grad_norm": 6.632353821835621, "learning_rate": 9.579929802935362e-06, "loss": 0.5225, "step": 5499 }, { "epoch": 0.16, "grad_norm": 8.54566882895967, "learning_rate": 9.579743718550145e-06, "loss": 0.6629, "step": 5500 }, { "epoch": 0.16, "grad_norm": 10.061459884435434, "learning_rate": 9.579557594765957e-06, "loss": 0.9656, "step": 5501 }, { "epoch": 0.16, "grad_norm": 10.51668486560833, "learning_rate": 9.5793714315844e-06, "loss": 0.7463, "step": 5502 }, { "epoch": 0.16, "grad_norm": 9.286826362669911, "learning_rate": 9.579185229007074e-06, "loss": 0.4623, "step": 5503 }, { "epoch": 0.16, "grad_norm": 5.6915927665273065, "learning_rate": 9.578998987035581e-06, "loss": 0.5492, "step": 5504 }, { "epoch": 0.16, "grad_norm": 4.62055270848068, "learning_rate": 9.578812705671524e-06, "loss": 0.5865, "step": 5505 }, { "epoch": 0.16, "grad_norm": 8.269236073296062, "learning_rate": 9.578626384916504e-06, "loss": 0.6326, "step": 5506 }, { "epoch": 0.16, "grad_norm": 8.859077354423274, "learning_rate": 9.578440024772125e-06, "loss": 0.6146, "step": 5507 }, { "epoch": 0.16, "grad_norm": 5.0258065150565905, "learning_rate": 9.57825362523999e-06, "loss": 0.5036, "step": 5508 }, { "epoch": 0.16, "grad_norm": 5.456801041680922, "learning_rate": 9.578067186321704e-06, "loss": 0.2851, "step": 5509 }, { "epoch": 0.16, "grad_norm": 7.266209404525347, "learning_rate": 9.57788070801887e-06, "loss": 0.5093, "step": 5510 }, { "epoch": 0.16, "grad_norm": 4.4094967074396525, "learning_rate": 9.57769419033309e-06, "loss": 0.6204, "step": 5511 }, { "epoch": 0.16, "grad_norm": 6.092198257456359, "learning_rate": 9.57750763326597e-06, "loss": 0.4813, "step": 5512 }, { "epoch": 0.16, "grad_norm": 3.744653578092622, "learning_rate": 9.577321036819118e-06, "loss": 0.3796, "step": 5513 }, { "epoch": 0.16, "grad_norm": 3.6202386959687884, "learning_rate": 9.577134400994135e-06, "loss": 0.3119, "step": 5514 }, { "epoch": 0.16, "grad_norm": 5.748362764213315, "learning_rate": 9.576947725792629e-06, "loss": 0.7082, "step": 5515 }, { "epoch": 0.16, "grad_norm": 6.727743543556503, "learning_rate": 9.576761011216206e-06, "loss": 0.7898, "step": 5516 }, { "epoch": 0.16, "grad_norm": 5.050087204737326, "learning_rate": 9.57657425726647e-06, "loss": 0.5183, "step": 5517 }, { "epoch": 0.16, "grad_norm": 6.524093211220907, "learning_rate": 9.576387463945029e-06, "loss": 0.445, "step": 5518 }, { "epoch": 0.16, "grad_norm": 6.683385135643305, "learning_rate": 9.576200631253491e-06, "loss": 1.0139, "step": 5519 }, { "epoch": 0.16, "grad_norm": 3.6975617448862703, "learning_rate": 9.576013759193464e-06, "loss": 0.3948, "step": 5520 }, { "epoch": 0.16, "grad_norm": 7.877992939248835, "learning_rate": 9.575826847766552e-06, "loss": 0.4829, "step": 5521 }, { "epoch": 0.16, "grad_norm": 5.616054542684197, "learning_rate": 9.575639896974364e-06, "loss": 0.4489, "step": 5522 }, { "epoch": 0.16, "grad_norm": 4.7100206688105475, "learning_rate": 9.575452906818512e-06, "loss": 0.124, "step": 5523 }, { "epoch": 0.16, "grad_norm": 2.887939360248778, "learning_rate": 9.5752658773006e-06, "loss": 0.268, "step": 5524 }, { "epoch": 0.16, "grad_norm": 4.683330067139072, "learning_rate": 9.57507880842224e-06, "loss": 0.2723, "step": 5525 }, { "epoch": 0.16, "grad_norm": 6.450673411205867, "learning_rate": 9.57489170018504e-06, "loss": 0.5133, "step": 5526 }, { "epoch": 0.16, "grad_norm": 3.8936862775018484, "learning_rate": 9.57470455259061e-06, "loss": 0.2725, "step": 5527 }, { "epoch": 0.16, "grad_norm": 5.940831745368844, "learning_rate": 9.574517365640558e-06, "loss": 0.6713, "step": 5528 }, { "epoch": 0.16, "grad_norm": 2.0095539074180975, "learning_rate": 9.5743301393365e-06, "loss": 0.206, "step": 5529 }, { "epoch": 0.16, "grad_norm": 9.305190373051115, "learning_rate": 9.574142873680042e-06, "loss": 0.4348, "step": 5530 }, { "epoch": 0.16, "grad_norm": 5.829428969238013, "learning_rate": 9.573955568672793e-06, "loss": 0.5746, "step": 5531 }, { "epoch": 0.16, "grad_norm": 3.2220842998316885, "learning_rate": 9.573768224316372e-06, "loss": 0.276, "step": 5532 }, { "epoch": 0.16, "grad_norm": 6.0236504859723174, "learning_rate": 9.573580840612381e-06, "loss": 0.244, "step": 5533 }, { "epoch": 0.16, "grad_norm": 10.004072170825461, "learning_rate": 9.573393417562442e-06, "loss": 0.415, "step": 5534 }, { "epoch": 0.16, "grad_norm": 9.110742168658355, "learning_rate": 9.573205955168159e-06, "loss": 0.4688, "step": 5535 }, { "epoch": 0.16, "grad_norm": 4.258901306366681, "learning_rate": 9.57301845343115e-06, "loss": 0.5926, "step": 5536 }, { "epoch": 0.16, "grad_norm": 2.8920832280797097, "learning_rate": 9.572830912353026e-06, "loss": 0.2727, "step": 5537 }, { "epoch": 0.16, "grad_norm": 6.34872196582629, "learning_rate": 9.5726433319354e-06, "loss": 0.6376, "step": 5538 }, { "epoch": 0.16, "grad_norm": 6.710923608980912, "learning_rate": 9.572455712179888e-06, "loss": 0.4634, "step": 5539 }, { "epoch": 0.16, "grad_norm": 9.492412548772506, "learning_rate": 9.572268053088102e-06, "loss": 0.5997, "step": 5540 }, { "epoch": 0.16, "grad_norm": 5.78258433021969, "learning_rate": 9.572080354661656e-06, "loss": 0.443, "step": 5541 }, { "epoch": 0.16, "grad_norm": 3.282779373517455, "learning_rate": 9.571892616902165e-06, "loss": 0.3706, "step": 5542 }, { "epoch": 0.16, "grad_norm": 4.7668686541640986, "learning_rate": 9.571704839811247e-06, "loss": 0.3395, "step": 5543 }, { "epoch": 0.16, "grad_norm": 9.03803797976539, "learning_rate": 9.571517023390515e-06, "loss": 0.469, "step": 5544 }, { "epoch": 0.16, "grad_norm": 7.086145164533866, "learning_rate": 9.571329167641583e-06, "loss": 0.6828, "step": 5545 }, { "epoch": 0.16, "grad_norm": 8.753414741141432, "learning_rate": 9.571141272566069e-06, "loss": 1.0585, "step": 5546 }, { "epoch": 0.16, "grad_norm": 3.8928979722152937, "learning_rate": 9.570953338165592e-06, "loss": 0.3363, "step": 5547 }, { "epoch": 0.16, "grad_norm": 5.319325449348252, "learning_rate": 9.570765364441762e-06, "loss": 0.6007, "step": 5548 }, { "epoch": 0.16, "grad_norm": 10.10371355961273, "learning_rate": 9.570577351396204e-06, "loss": 0.9736, "step": 5549 }, { "epoch": 0.16, "grad_norm": 4.4451974273965975, "learning_rate": 9.57038929903053e-06, "loss": 0.3012, "step": 5550 }, { "epoch": 0.16, "grad_norm": 5.377179613203502, "learning_rate": 9.570201207346362e-06, "loss": 0.5334, "step": 5551 }, { "epoch": 0.16, "grad_norm": 7.067919626546993, "learning_rate": 9.570013076345313e-06, "loss": 0.2753, "step": 5552 }, { "epoch": 0.16, "grad_norm": 6.842055123933102, "learning_rate": 9.569824906029006e-06, "loss": 0.4783, "step": 5553 }, { "epoch": 0.16, "grad_norm": 3.8844944709211133, "learning_rate": 9.569636696399057e-06, "loss": 0.516, "step": 5554 }, { "epoch": 0.16, "grad_norm": 7.790930245697413, "learning_rate": 9.569448447457086e-06, "loss": 0.5554, "step": 5555 }, { "epoch": 0.16, "grad_norm": 9.456125334972459, "learning_rate": 9.569260159204712e-06, "loss": 0.6118, "step": 5556 }, { "epoch": 0.16, "grad_norm": 7.603413402576726, "learning_rate": 9.569071831643557e-06, "loss": 0.3301, "step": 5557 }, { "epoch": 0.16, "grad_norm": 4.362835976634687, "learning_rate": 9.568883464775238e-06, "loss": 0.3807, "step": 5558 }, { "epoch": 0.16, "grad_norm": 4.345787654440363, "learning_rate": 9.568695058601378e-06, "loss": 0.4975, "step": 5559 }, { "epoch": 0.16, "grad_norm": 6.452833580105694, "learning_rate": 9.568506613123597e-06, "loss": 0.5098, "step": 5560 }, { "epoch": 0.16, "grad_norm": 3.867503205330722, "learning_rate": 9.568318128343517e-06, "loss": 0.2136, "step": 5561 }, { "epoch": 0.16, "grad_norm": 11.282143808393378, "learning_rate": 9.568129604262757e-06, "loss": 0.6518, "step": 5562 }, { "epoch": 0.16, "grad_norm": 7.7021602312754425, "learning_rate": 9.567941040882942e-06, "loss": 0.7649, "step": 5563 }, { "epoch": 0.16, "grad_norm": 6.849930711381862, "learning_rate": 9.56775243820569e-06, "loss": 0.5791, "step": 5564 }, { "epoch": 0.16, "grad_norm": 6.599215877076666, "learning_rate": 9.56756379623263e-06, "loss": 0.5962, "step": 5565 }, { "epoch": 0.16, "grad_norm": 7.4289794327752485, "learning_rate": 9.567375114965379e-06, "loss": 0.7628, "step": 5566 }, { "epoch": 0.16, "grad_norm": 6.2352554349726095, "learning_rate": 9.567186394405561e-06, "loss": 0.5237, "step": 5567 }, { "epoch": 0.16, "grad_norm": 5.80091729309643, "learning_rate": 9.566997634554802e-06, "loss": 0.4225, "step": 5568 }, { "epoch": 0.16, "grad_norm": 3.2063661320049786, "learning_rate": 9.566808835414725e-06, "loss": 0.4167, "step": 5569 }, { "epoch": 0.16, "grad_norm": 6.301240340249849, "learning_rate": 9.566619996986952e-06, "loss": 0.5237, "step": 5570 }, { "epoch": 0.16, "grad_norm": 5.138309641430918, "learning_rate": 9.566431119273112e-06, "loss": 0.5769, "step": 5571 }, { "epoch": 0.16, "grad_norm": 6.024570046908477, "learning_rate": 9.566242202274825e-06, "loss": 0.8892, "step": 5572 }, { "epoch": 0.16, "grad_norm": 3.5173747137839535, "learning_rate": 9.56605324599372e-06, "loss": 0.2078, "step": 5573 }, { "epoch": 0.16, "grad_norm": 2.8787731207160543, "learning_rate": 9.565864250431421e-06, "loss": 0.2809, "step": 5574 }, { "epoch": 0.16, "grad_norm": 6.791197727450861, "learning_rate": 9.565675215589554e-06, "loss": 0.3389, "step": 5575 }, { "epoch": 0.16, "grad_norm": 4.71321202876526, "learning_rate": 9.565486141469746e-06, "loss": 0.2938, "step": 5576 }, { "epoch": 0.16, "grad_norm": 5.721108955843381, "learning_rate": 9.565297028073621e-06, "loss": 0.4272, "step": 5577 }, { "epoch": 0.16, "grad_norm": 4.481516601633895, "learning_rate": 9.56510787540281e-06, "loss": 0.5612, "step": 5578 }, { "epoch": 0.16, "grad_norm": 4.769587935877056, "learning_rate": 9.564918683458937e-06, "loss": 0.414, "step": 5579 }, { "epoch": 0.16, "grad_norm": 4.090121867315918, "learning_rate": 9.56472945224363e-06, "loss": 0.0924, "step": 5580 }, { "epoch": 0.16, "grad_norm": 6.209925462333803, "learning_rate": 9.564540181758518e-06, "loss": 0.6767, "step": 5581 }, { "epoch": 0.16, "grad_norm": 10.086208490562145, "learning_rate": 9.564350872005228e-06, "loss": 0.9678, "step": 5582 }, { "epoch": 0.16, "grad_norm": 4.399237930887416, "learning_rate": 9.56416152298539e-06, "loss": 0.5134, "step": 5583 }, { "epoch": 0.16, "grad_norm": 4.9121546111858425, "learning_rate": 9.563972134700633e-06, "loss": 0.5732, "step": 5584 }, { "epoch": 0.16, "grad_norm": 4.853086689775105, "learning_rate": 9.563782707152585e-06, "loss": 0.1475, "step": 5585 }, { "epoch": 0.16, "grad_norm": 9.22039989642697, "learning_rate": 9.563593240342877e-06, "loss": 0.3273, "step": 5586 }, { "epoch": 0.16, "grad_norm": 7.150943037678667, "learning_rate": 9.563403734273137e-06, "loss": 0.8539, "step": 5587 }, { "epoch": 0.16, "grad_norm": 5.148357778577398, "learning_rate": 9.563214188944997e-06, "loss": 0.4372, "step": 5588 }, { "epoch": 0.16, "grad_norm": 6.055830167188018, "learning_rate": 9.563024604360087e-06, "loss": 0.4926, "step": 5589 }, { "epoch": 0.16, "grad_norm": 7.522378912395197, "learning_rate": 9.56283498052004e-06, "loss": 0.6364, "step": 5590 }, { "epoch": 0.16, "grad_norm": 6.001320931941703, "learning_rate": 9.562645317426483e-06, "loss": 0.3653, "step": 5591 }, { "epoch": 0.16, "grad_norm": 6.711306756089205, "learning_rate": 9.562455615081051e-06, "loss": 0.6258, "step": 5592 }, { "epoch": 0.16, "grad_norm": 8.183430133358419, "learning_rate": 9.562265873485375e-06, "loss": 0.8236, "step": 5593 }, { "epoch": 0.16, "grad_norm": 6.228894532892509, "learning_rate": 9.562076092641088e-06, "loss": 0.4867, "step": 5594 }, { "epoch": 0.16, "grad_norm": 3.867898494211086, "learning_rate": 9.561886272549821e-06, "loss": 0.2386, "step": 5595 }, { "epoch": 0.16, "grad_norm": 8.0880180888667, "learning_rate": 9.56169641321321e-06, "loss": 0.6311, "step": 5596 }, { "epoch": 0.16, "grad_norm": 4.136574141988952, "learning_rate": 9.561506514632885e-06, "loss": 0.263, "step": 5597 }, { "epoch": 0.16, "grad_norm": 2.9767564725375384, "learning_rate": 9.56131657681048e-06, "loss": 0.4962, "step": 5598 }, { "epoch": 0.16, "grad_norm": 6.070706898409685, "learning_rate": 9.561126599747631e-06, "loss": 0.3922, "step": 5599 }, { "epoch": 0.16, "grad_norm": 8.347491279225052, "learning_rate": 9.560936583445973e-06, "loss": 0.5615, "step": 5600 }, { "epoch": 0.16, "grad_norm": 9.67685094471532, "learning_rate": 9.560746527907137e-06, "loss": 0.7467, "step": 5601 }, { "epoch": 0.16, "grad_norm": 6.104909059689193, "learning_rate": 9.560556433132762e-06, "loss": 0.8264, "step": 5602 }, { "epoch": 0.16, "grad_norm": 4.240610634000054, "learning_rate": 9.56036629912448e-06, "loss": 0.3071, "step": 5603 }, { "epoch": 0.16, "grad_norm": 10.004951586284488, "learning_rate": 9.560176125883929e-06, "loss": 0.3913, "step": 5604 }, { "epoch": 0.16, "grad_norm": 7.095302260710068, "learning_rate": 9.559985913412743e-06, "loss": 0.4915, "step": 5605 }, { "epoch": 0.16, "grad_norm": 12.332903053321457, "learning_rate": 9.559795661712563e-06, "loss": 0.8111, "step": 5606 }, { "epoch": 0.16, "grad_norm": 4.212989745833175, "learning_rate": 9.55960537078502e-06, "loss": 0.6876, "step": 5607 }, { "epoch": 0.16, "grad_norm": 8.376998093658445, "learning_rate": 9.559415040631752e-06, "loss": 1.1593, "step": 5608 }, { "epoch": 0.16, "grad_norm": 7.925432117606802, "learning_rate": 9.559224671254399e-06, "loss": 0.7741, "step": 5609 }, { "epoch": 0.16, "grad_norm": 17.688986213707608, "learning_rate": 9.5590342626546e-06, "loss": 0.7071, "step": 5610 }, { "epoch": 0.16, "grad_norm": 4.4657314749664865, "learning_rate": 9.558843814833988e-06, "loss": 0.7984, "step": 5611 }, { "epoch": 0.16, "grad_norm": 8.19381659485781, "learning_rate": 9.558653327794202e-06, "loss": 0.7158, "step": 5612 }, { "epoch": 0.16, "grad_norm": 7.240932450498731, "learning_rate": 9.558462801536886e-06, "loss": 0.6076, "step": 5613 }, { "epoch": 0.16, "grad_norm": 11.455805695071387, "learning_rate": 9.558272236063674e-06, "loss": 0.9962, "step": 5614 }, { "epoch": 0.16, "grad_norm": 8.474280490546587, "learning_rate": 9.558081631376208e-06, "loss": 0.579, "step": 5615 }, { "epoch": 0.16, "grad_norm": 5.917023898699374, "learning_rate": 9.557890987476126e-06, "loss": 0.3058, "step": 5616 }, { "epoch": 0.16, "grad_norm": 7.720662486256251, "learning_rate": 9.557700304365069e-06, "loss": 0.9823, "step": 5617 }, { "epoch": 0.16, "grad_norm": 8.303994322993518, "learning_rate": 9.557509582044679e-06, "loss": 0.3685, "step": 5618 }, { "epoch": 0.16, "grad_norm": 5.595465242612138, "learning_rate": 9.557318820516594e-06, "loss": 0.4416, "step": 5619 }, { "epoch": 0.16, "grad_norm": 3.8618206969982642, "learning_rate": 9.557128019782458e-06, "loss": 0.4186, "step": 5620 }, { "epoch": 0.16, "grad_norm": 7.472457795366933, "learning_rate": 9.556937179843908e-06, "loss": 0.676, "step": 5621 }, { "epoch": 0.16, "grad_norm": 7.815532980601772, "learning_rate": 9.55674630070259e-06, "loss": 0.8064, "step": 5622 }, { "epoch": 0.16, "grad_norm": 4.893670982999352, "learning_rate": 9.556555382360147e-06, "loss": 0.7746, "step": 5623 }, { "epoch": 0.16, "grad_norm": 8.928126721887686, "learning_rate": 9.556364424818215e-06, "loss": 0.527, "step": 5624 }, { "epoch": 0.16, "grad_norm": 5.86937980481864, "learning_rate": 9.556173428078444e-06, "loss": 0.6028, "step": 5625 }, { "epoch": 0.16, "grad_norm": 6.987813696191789, "learning_rate": 9.55598239214247e-06, "loss": 0.585, "step": 5626 }, { "epoch": 0.16, "grad_norm": 5.911589454395437, "learning_rate": 9.555791317011945e-06, "loss": 0.9563, "step": 5627 }, { "epoch": 0.16, "grad_norm": 11.088435057699016, "learning_rate": 9.555600202688505e-06, "loss": 0.3401, "step": 5628 }, { "epoch": 0.16, "grad_norm": 5.831930845690475, "learning_rate": 9.555409049173798e-06, "loss": 0.5401, "step": 5629 }, { "epoch": 0.16, "grad_norm": 8.536001929106847, "learning_rate": 9.55521785646947e-06, "loss": 0.7945, "step": 5630 }, { "epoch": 0.16, "grad_norm": 5.5047354686075245, "learning_rate": 9.555026624577162e-06, "loss": 0.4649, "step": 5631 }, { "epoch": 0.16, "grad_norm": 3.8786892710193484, "learning_rate": 9.554835353498519e-06, "loss": 0.5709, "step": 5632 }, { "epoch": 0.16, "grad_norm": 8.442506844628214, "learning_rate": 9.554644043235191e-06, "loss": 0.4744, "step": 5633 }, { "epoch": 0.16, "grad_norm": 7.936418054086858, "learning_rate": 9.55445269378882e-06, "loss": 0.4966, "step": 5634 }, { "epoch": 0.16, "grad_norm": 6.351573273108849, "learning_rate": 9.554261305161054e-06, "loss": 0.4192, "step": 5635 }, { "epoch": 0.16, "grad_norm": 5.405088443226583, "learning_rate": 9.554069877353538e-06, "loss": 0.3655, "step": 5636 }, { "epoch": 0.16, "grad_norm": 8.197265159637823, "learning_rate": 9.553878410367918e-06, "loss": 0.4692, "step": 5637 }, { "epoch": 0.16, "grad_norm": 4.747333480863529, "learning_rate": 9.553686904205845e-06, "loss": 0.5385, "step": 5638 }, { "epoch": 0.16, "grad_norm": 9.291704859177539, "learning_rate": 9.553495358868963e-06, "loss": 0.2413, "step": 5639 }, { "epoch": 0.16, "grad_norm": 8.431695085658465, "learning_rate": 9.553303774358922e-06, "loss": 0.8595, "step": 5640 }, { "epoch": 0.16, "grad_norm": 5.45608287262554, "learning_rate": 9.553112150677367e-06, "loss": 0.4807, "step": 5641 }, { "epoch": 0.16, "grad_norm": 6.325639175264976, "learning_rate": 9.552920487825952e-06, "loss": 0.3965, "step": 5642 }, { "epoch": 0.16, "grad_norm": 7.298954408046719, "learning_rate": 9.552728785806322e-06, "loss": 0.8574, "step": 5643 }, { "epoch": 0.16, "grad_norm": 6.089436104511061, "learning_rate": 9.552537044620125e-06, "loss": 0.253, "step": 5644 }, { "epoch": 0.16, "grad_norm": 5.649423431415093, "learning_rate": 9.552345264269014e-06, "loss": 0.5219, "step": 5645 }, { "epoch": 0.16, "grad_norm": 10.558971289610755, "learning_rate": 9.552153444754637e-06, "loss": 0.6596, "step": 5646 }, { "epoch": 0.16, "grad_norm": 5.878812648496967, "learning_rate": 9.551961586078645e-06, "loss": 0.4756, "step": 5647 }, { "epoch": 0.16, "grad_norm": 7.579708067725119, "learning_rate": 9.551769688242685e-06, "loss": 1.0388, "step": 5648 }, { "epoch": 0.16, "grad_norm": 4.367983887041856, "learning_rate": 9.551577751248412e-06, "loss": 0.2177, "step": 5649 }, { "epoch": 0.16, "grad_norm": 6.047396654599828, "learning_rate": 9.551385775097477e-06, "loss": 0.3399, "step": 5650 }, { "epoch": 0.16, "grad_norm": 4.6226200991389215, "learning_rate": 9.55119375979153e-06, "loss": 0.5291, "step": 5651 }, { "epoch": 0.16, "grad_norm": 12.224158609529635, "learning_rate": 9.551001705332224e-06, "loss": 0.8278, "step": 5652 }, { "epoch": 0.16, "grad_norm": 6.751585315046935, "learning_rate": 9.55080961172121e-06, "loss": 0.7813, "step": 5653 }, { "epoch": 0.16, "grad_norm": 2.8524386379452227, "learning_rate": 9.550617478960143e-06, "loss": 0.3604, "step": 5654 }, { "epoch": 0.16, "grad_norm": 12.343844893851278, "learning_rate": 9.550425307050673e-06, "loss": 0.4631, "step": 5655 }, { "epoch": 0.16, "grad_norm": 5.736765391874369, "learning_rate": 9.550233095994453e-06, "loss": 0.238, "step": 5656 }, { "epoch": 0.16, "grad_norm": 8.60865021855097, "learning_rate": 9.55004084579314e-06, "loss": 0.49, "step": 5657 }, { "epoch": 0.16, "grad_norm": 7.128192119625221, "learning_rate": 9.549848556448384e-06, "loss": 0.63, "step": 5658 }, { "epoch": 0.16, "grad_norm": 6.818049838782787, "learning_rate": 9.54965622796184e-06, "loss": 0.7945, "step": 5659 }, { "epoch": 0.16, "grad_norm": 4.388257783402906, "learning_rate": 9.549463860335167e-06, "loss": 0.5412, "step": 5660 }, { "epoch": 0.16, "grad_norm": 2.5020197696430744, "learning_rate": 9.549271453570015e-06, "loss": 0.1058, "step": 5661 }, { "epoch": 0.16, "grad_norm": 6.6797316878792445, "learning_rate": 9.549079007668041e-06, "loss": 0.4906, "step": 5662 }, { "epoch": 0.16, "grad_norm": 5.6825313453464315, "learning_rate": 9.5488865226309e-06, "loss": 0.8077, "step": 5663 }, { "epoch": 0.16, "grad_norm": 7.902651423952812, "learning_rate": 9.548693998460249e-06, "loss": 0.6305, "step": 5664 }, { "epoch": 0.16, "grad_norm": 8.23878699568561, "learning_rate": 9.548501435157743e-06, "loss": 0.7712, "step": 5665 }, { "epoch": 0.16, "grad_norm": 7.544674064720018, "learning_rate": 9.54830883272504e-06, "loss": 0.4065, "step": 5666 }, { "epoch": 0.16, "grad_norm": 4.254302791892219, "learning_rate": 9.548116191163794e-06, "loss": 0.1899, "step": 5667 }, { "epoch": 0.16, "grad_norm": 5.749119691219562, "learning_rate": 9.547923510475666e-06, "loss": 0.6732, "step": 5668 }, { "epoch": 0.16, "grad_norm": 7.006863907283026, "learning_rate": 9.547730790662312e-06, "loss": 0.7178, "step": 5669 }, { "epoch": 0.16, "grad_norm": 7.286515129109772, "learning_rate": 9.547538031725389e-06, "loss": 0.5565, "step": 5670 }, { "epoch": 0.16, "grad_norm": 4.144413492469559, "learning_rate": 9.547345233666558e-06, "loss": 0.4794, "step": 5671 }, { "epoch": 0.16, "grad_norm": 6.024904203642635, "learning_rate": 9.547152396487474e-06, "loss": 0.3061, "step": 5672 }, { "epoch": 0.16, "grad_norm": 13.33092531076194, "learning_rate": 9.546959520189799e-06, "loss": 0.5425, "step": 5673 }, { "epoch": 0.16, "grad_norm": 3.374523217467703, "learning_rate": 9.54676660477519e-06, "loss": 0.0959, "step": 5674 }, { "epoch": 0.16, "grad_norm": 6.580723373919766, "learning_rate": 9.54657365024531e-06, "loss": 0.3001, "step": 5675 }, { "epoch": 0.16, "grad_norm": 5.437243093801314, "learning_rate": 9.546380656601815e-06, "loss": 0.3891, "step": 5676 }, { "epoch": 0.16, "grad_norm": 4.962977384959618, "learning_rate": 9.546187623846365e-06, "loss": 0.6537, "step": 5677 }, { "epoch": 0.16, "grad_norm": 8.353179346914635, "learning_rate": 9.545994551980626e-06, "loss": 0.6062, "step": 5678 }, { "epoch": 0.16, "grad_norm": 3.0900546844668138, "learning_rate": 9.545801441006254e-06, "loss": 0.3917, "step": 5679 }, { "epoch": 0.16, "grad_norm": 3.9845102025448447, "learning_rate": 9.545608290924913e-06, "loss": 0.5705, "step": 5680 }, { "epoch": 0.16, "grad_norm": 6.189909802890502, "learning_rate": 9.545415101738262e-06, "loss": 0.7122, "step": 5681 }, { "epoch": 0.16, "grad_norm": 7.81883141881569, "learning_rate": 9.545221873447965e-06, "loss": 0.6313, "step": 5682 }, { "epoch": 0.16, "grad_norm": 11.46823168838473, "learning_rate": 9.545028606055685e-06, "loss": 0.7874, "step": 5683 }, { "epoch": 0.16, "grad_norm": 4.361332060986804, "learning_rate": 9.544835299563083e-06, "loss": 0.5009, "step": 5684 }, { "epoch": 0.16, "grad_norm": 5.424311615298541, "learning_rate": 9.54464195397182e-06, "loss": 0.4122, "step": 5685 }, { "epoch": 0.16, "grad_norm": 5.711457819790503, "learning_rate": 9.544448569283564e-06, "loss": 0.6432, "step": 5686 }, { "epoch": 0.16, "grad_norm": 6.966079542373445, "learning_rate": 9.544255145499977e-06, "loss": 0.4912, "step": 5687 }, { "epoch": 0.16, "grad_norm": 5.689210403979796, "learning_rate": 9.544061682622721e-06, "loss": 0.3996, "step": 5688 }, { "epoch": 0.16, "grad_norm": 4.7598288136241536, "learning_rate": 9.543868180653463e-06, "loss": 0.3329, "step": 5689 }, { "epoch": 0.16, "grad_norm": 3.902962636510275, "learning_rate": 9.543674639593866e-06, "loss": 0.3687, "step": 5690 }, { "epoch": 0.16, "grad_norm": 18.700503516940646, "learning_rate": 9.543481059445596e-06, "loss": 0.8128, "step": 5691 }, { "epoch": 0.16, "grad_norm": 4.148172172495424, "learning_rate": 9.543287440210317e-06, "loss": 0.4236, "step": 5692 }, { "epoch": 0.16, "grad_norm": 15.813311589940664, "learning_rate": 9.543093781889696e-06, "loss": 0.4757, "step": 5693 }, { "epoch": 0.16, "grad_norm": 15.03615916577422, "learning_rate": 9.542900084485399e-06, "loss": 0.5221, "step": 5694 }, { "epoch": 0.16, "grad_norm": 6.367132455786804, "learning_rate": 9.542706347999091e-06, "loss": 0.6167, "step": 5695 }, { "epoch": 0.16, "grad_norm": 10.436599052971253, "learning_rate": 9.54251257243244e-06, "loss": 0.5578, "step": 5696 }, { "epoch": 0.16, "grad_norm": 8.404661007137495, "learning_rate": 9.542318757787114e-06, "loss": 0.7362, "step": 5697 }, { "epoch": 0.16, "grad_norm": 5.436546373601586, "learning_rate": 9.542124904064777e-06, "loss": 0.278, "step": 5698 }, { "epoch": 0.16, "grad_norm": 5.287061033672611, "learning_rate": 9.5419310112671e-06, "loss": 0.6825, "step": 5699 }, { "epoch": 0.16, "grad_norm": 4.156400520065785, "learning_rate": 9.541737079395749e-06, "loss": 0.5038, "step": 5700 }, { "epoch": 0.16, "grad_norm": 4.691999742509553, "learning_rate": 9.541543108452394e-06, "loss": 0.3963, "step": 5701 }, { "epoch": 0.16, "grad_norm": 5.235384988847077, "learning_rate": 9.541349098438702e-06, "loss": 0.3533, "step": 5702 }, { "epoch": 0.16, "grad_norm": 6.685186806025278, "learning_rate": 9.541155049356342e-06, "loss": 0.6007, "step": 5703 }, { "epoch": 0.16, "grad_norm": 5.95588291794297, "learning_rate": 9.540960961206986e-06, "loss": 0.6852, "step": 5704 }, { "epoch": 0.16, "grad_norm": 4.332717029442785, "learning_rate": 9.540766833992301e-06, "loss": 0.572, "step": 5705 }, { "epoch": 0.16, "grad_norm": 2.5518750683151263, "learning_rate": 9.540572667713958e-06, "loss": 0.4233, "step": 5706 }, { "epoch": 0.16, "grad_norm": 12.232327227749357, "learning_rate": 9.540378462373628e-06, "loss": 0.2809, "step": 5707 }, { "epoch": 0.16, "grad_norm": 4.354674673181319, "learning_rate": 9.540184217972979e-06, "loss": 0.3595, "step": 5708 }, { "epoch": 0.16, "grad_norm": 2.929103986486399, "learning_rate": 9.539989934513687e-06, "loss": 0.2307, "step": 5709 }, { "epoch": 0.16, "grad_norm": 8.095377519085424, "learning_rate": 9.53979561199742e-06, "loss": 0.5092, "step": 5710 }, { "epoch": 0.16, "grad_norm": 4.354530678215593, "learning_rate": 9.539601250425849e-06, "loss": 0.7821, "step": 5711 }, { "epoch": 0.16, "grad_norm": 7.66840882478978, "learning_rate": 9.539406849800648e-06, "loss": 0.6182, "step": 5712 }, { "epoch": 0.16, "grad_norm": 10.019551047738725, "learning_rate": 9.539212410123488e-06, "loss": 0.8836, "step": 5713 }, { "epoch": 0.16, "grad_norm": 18.772477613174882, "learning_rate": 9.539017931396044e-06, "loss": 0.5149, "step": 5714 }, { "epoch": 0.16, "grad_norm": 5.7976037567316, "learning_rate": 9.538823413619987e-06, "loss": 0.5764, "step": 5715 }, { "epoch": 0.16, "grad_norm": 5.346930956923941, "learning_rate": 9.53862885679699e-06, "loss": 0.2738, "step": 5716 }, { "epoch": 0.16, "grad_norm": 9.90382451830688, "learning_rate": 9.538434260928728e-06, "loss": 0.4852, "step": 5717 }, { "epoch": 0.16, "grad_norm": 8.150120737784583, "learning_rate": 9.538239626016877e-06, "loss": 0.404, "step": 5718 }, { "epoch": 0.16, "grad_norm": 6.768824325252079, "learning_rate": 9.538044952063106e-06, "loss": 0.672, "step": 5719 }, { "epoch": 0.16, "grad_norm": 7.886115449273645, "learning_rate": 9.537850239069095e-06, "loss": 0.7689, "step": 5720 }, { "epoch": 0.16, "grad_norm": 4.1165245144866, "learning_rate": 9.537655487036518e-06, "loss": 0.2518, "step": 5721 }, { "epoch": 0.16, "grad_norm": 8.493400816936274, "learning_rate": 9.537460695967049e-06, "loss": 0.5535, "step": 5722 }, { "epoch": 0.16, "grad_norm": 6.276020473164801, "learning_rate": 9.537265865862363e-06, "loss": 0.6686, "step": 5723 }, { "epoch": 0.16, "grad_norm": 7.627302541311814, "learning_rate": 9.537070996724139e-06, "loss": 0.885, "step": 5724 }, { "epoch": 0.16, "grad_norm": 5.001093601793283, "learning_rate": 9.53687608855405e-06, "loss": 0.46, "step": 5725 }, { "epoch": 0.16, "grad_norm": 7.367801321111546, "learning_rate": 9.536681141353776e-06, "loss": 0.4085, "step": 5726 }, { "epoch": 0.16, "grad_norm": 5.5888753354687895, "learning_rate": 9.536486155124992e-06, "loss": 0.5331, "step": 5727 }, { "epoch": 0.16, "grad_norm": 8.546040579671148, "learning_rate": 9.536291129869376e-06, "loss": 0.6186, "step": 5728 }, { "epoch": 0.16, "grad_norm": 6.4173749020921855, "learning_rate": 9.536096065588606e-06, "loss": 0.4108, "step": 5729 }, { "epoch": 0.16, "grad_norm": 9.600537312888637, "learning_rate": 9.535900962284361e-06, "loss": 0.2468, "step": 5730 }, { "epoch": 0.16, "grad_norm": 4.035556001609502, "learning_rate": 9.535705819958318e-06, "loss": 0.3008, "step": 5731 }, { "epoch": 0.16, "grad_norm": 7.886394401482289, "learning_rate": 9.535510638612155e-06, "loss": 0.2742, "step": 5732 }, { "epoch": 0.16, "grad_norm": 7.8352750306098295, "learning_rate": 9.535315418247555e-06, "loss": 0.5899, "step": 5733 }, { "epoch": 0.16, "grad_norm": 6.613711654618256, "learning_rate": 9.535120158866194e-06, "loss": 0.4263, "step": 5734 }, { "epoch": 0.16, "grad_norm": 8.835559017261257, "learning_rate": 9.534924860469753e-06, "loss": 0.4314, "step": 5735 }, { "epoch": 0.16, "grad_norm": 9.811397369051722, "learning_rate": 9.534729523059909e-06, "loss": 0.7732, "step": 5736 }, { "epoch": 0.16, "grad_norm": 4.466949833901864, "learning_rate": 9.534534146638349e-06, "loss": 0.2281, "step": 5737 }, { "epoch": 0.16, "grad_norm": 8.44162626743221, "learning_rate": 9.534338731206747e-06, "loss": 0.5262, "step": 5738 }, { "epoch": 0.16, "grad_norm": 10.096547781276993, "learning_rate": 9.534143276766787e-06, "loss": 0.727, "step": 5739 }, { "epoch": 0.16, "grad_norm": 6.294075279580831, "learning_rate": 9.533947783320153e-06, "loss": 0.4476, "step": 5740 }, { "epoch": 0.16, "grad_norm": 4.02953204431607, "learning_rate": 9.533752250868525e-06, "loss": 0.1603, "step": 5741 }, { "epoch": 0.16, "grad_norm": 4.694336496174166, "learning_rate": 9.533556679413581e-06, "loss": 0.6584, "step": 5742 }, { "epoch": 0.16, "grad_norm": 7.611879159951446, "learning_rate": 9.53336106895701e-06, "loss": 0.6593, "step": 5743 }, { "epoch": 0.16, "grad_norm": 11.694068042338612, "learning_rate": 9.53316541950049e-06, "loss": 0.7831, "step": 5744 }, { "epoch": 0.16, "grad_norm": 6.079591392790793, "learning_rate": 9.532969731045707e-06, "loss": 0.901, "step": 5745 }, { "epoch": 0.16, "grad_norm": 6.065500637628926, "learning_rate": 9.532774003594346e-06, "loss": 0.5232, "step": 5746 }, { "epoch": 0.16, "grad_norm": 7.668412431346146, "learning_rate": 9.532578237148084e-06, "loss": 0.5597, "step": 5747 }, { "epoch": 0.16, "grad_norm": 3.294023265758775, "learning_rate": 9.532382431708612e-06, "loss": 0.2938, "step": 5748 }, { "epoch": 0.16, "grad_norm": 6.171092464235845, "learning_rate": 9.53218658727761e-06, "loss": 0.3831, "step": 5749 }, { "epoch": 0.16, "grad_norm": 5.0150062916222735, "learning_rate": 9.531990703856766e-06, "loss": 0.5919, "step": 5750 }, { "epoch": 0.16, "grad_norm": 2.2533208129702382, "learning_rate": 9.531794781447765e-06, "loss": 0.1038, "step": 5751 }, { "epoch": 0.16, "grad_norm": 10.315800179210644, "learning_rate": 9.53159882005229e-06, "loss": 0.4899, "step": 5752 }, { "epoch": 0.16, "grad_norm": 6.684584079948387, "learning_rate": 9.53140281967203e-06, "loss": 0.3422, "step": 5753 }, { "epoch": 0.16, "grad_norm": 4.794246239075023, "learning_rate": 9.531206780308668e-06, "loss": 0.2017, "step": 5754 }, { "epoch": 0.16, "grad_norm": 7.641269336827612, "learning_rate": 9.531010701963893e-06, "loss": 0.5301, "step": 5755 }, { "epoch": 0.16, "grad_norm": 9.625633317993207, "learning_rate": 9.53081458463939e-06, "loss": 1.0277, "step": 5756 }, { "epoch": 0.16, "grad_norm": 6.593828824552916, "learning_rate": 9.530618428336846e-06, "loss": 1.3423, "step": 5757 }, { "epoch": 0.16, "grad_norm": 5.374900373378345, "learning_rate": 9.530422233057952e-06, "loss": 0.185, "step": 5758 }, { "epoch": 0.16, "grad_norm": 5.163584838567684, "learning_rate": 9.530225998804393e-06, "loss": 0.7221, "step": 5759 }, { "epoch": 0.16, "grad_norm": 6.464719687017672, "learning_rate": 9.530029725577855e-06, "loss": 0.4557, "step": 5760 }, { "epoch": 0.16, "grad_norm": 12.926086274620666, "learning_rate": 9.52983341338003e-06, "loss": 0.8482, "step": 5761 }, { "epoch": 0.17, "grad_norm": 8.63405051351673, "learning_rate": 9.529637062212607e-06, "loss": 0.9509, "step": 5762 }, { "epoch": 0.17, "grad_norm": 5.227719895833714, "learning_rate": 9.529440672077272e-06, "loss": 0.5017, "step": 5763 }, { "epoch": 0.17, "grad_norm": 5.567416515024174, "learning_rate": 9.52924424297572e-06, "loss": 0.4329, "step": 5764 }, { "epoch": 0.17, "grad_norm": 5.570561300448716, "learning_rate": 9.529047774909635e-06, "loss": 0.4337, "step": 5765 }, { "epoch": 0.17, "grad_norm": 7.246962930623729, "learning_rate": 9.52885126788071e-06, "loss": 0.7106, "step": 5766 }, { "epoch": 0.17, "grad_norm": 8.42945993026133, "learning_rate": 9.528654721890634e-06, "loss": 0.7228, "step": 5767 }, { "epoch": 0.17, "grad_norm": 7.05747868101256, "learning_rate": 9.528458136941101e-06, "loss": 0.3432, "step": 5768 }, { "epoch": 0.17, "grad_norm": 4.052938565545099, "learning_rate": 9.528261513033799e-06, "loss": 0.4749, "step": 5769 }, { "epoch": 0.17, "grad_norm": 5.740896772784529, "learning_rate": 9.528064850170423e-06, "loss": 0.5755, "step": 5770 }, { "epoch": 0.17, "grad_norm": 6.318001832824712, "learning_rate": 9.527868148352659e-06, "loss": 0.4303, "step": 5771 }, { "epoch": 0.17, "grad_norm": 7.070222396697966, "learning_rate": 9.527671407582204e-06, "loss": 0.4509, "step": 5772 }, { "epoch": 0.17, "grad_norm": 10.19190397030491, "learning_rate": 9.527474627860752e-06, "loss": 0.6587, "step": 5773 }, { "epoch": 0.17, "grad_norm": 7.683838197032222, "learning_rate": 9.52727780918999e-06, "loss": 0.6723, "step": 5774 }, { "epoch": 0.17, "grad_norm": 6.97049099510426, "learning_rate": 9.527080951571615e-06, "loss": 0.7882, "step": 5775 }, { "epoch": 0.17, "grad_norm": 6.002034001499107, "learning_rate": 9.52688405500732e-06, "loss": 0.4175, "step": 5776 }, { "epoch": 0.17, "grad_norm": 6.470009059280585, "learning_rate": 9.5266871194988e-06, "loss": 0.5006, "step": 5777 }, { "epoch": 0.17, "grad_norm": 6.770086953521782, "learning_rate": 9.526490145047747e-06, "loss": 0.4436, "step": 5778 }, { "epoch": 0.17, "grad_norm": 8.683450029276063, "learning_rate": 9.526293131655857e-06, "loss": 0.6452, "step": 5779 }, { "epoch": 0.17, "grad_norm": 7.387207692877681, "learning_rate": 9.526096079324824e-06, "loss": 0.3289, "step": 5780 }, { "epoch": 0.17, "grad_norm": 7.025174546703563, "learning_rate": 9.525898988056344e-06, "loss": 0.5321, "step": 5781 }, { "epoch": 0.17, "grad_norm": 6.950122450703509, "learning_rate": 9.525701857852113e-06, "loss": 0.7927, "step": 5782 }, { "epoch": 0.17, "grad_norm": 3.803461586218769, "learning_rate": 9.525504688713826e-06, "loss": 0.2325, "step": 5783 }, { "epoch": 0.17, "grad_norm": 6.470407614507449, "learning_rate": 9.525307480643178e-06, "loss": 0.699, "step": 5784 }, { "epoch": 0.17, "grad_norm": 4.542619057051939, "learning_rate": 9.525110233641867e-06, "loss": 0.3722, "step": 5785 }, { "epoch": 0.17, "grad_norm": 6.132041736933537, "learning_rate": 9.524912947711592e-06, "loss": 0.5758, "step": 5786 }, { "epoch": 0.17, "grad_norm": 5.380952289422955, "learning_rate": 9.524715622854046e-06, "loss": 0.3904, "step": 5787 }, { "epoch": 0.17, "grad_norm": 7.945329604228916, "learning_rate": 9.52451825907093e-06, "loss": 0.8704, "step": 5788 }, { "epoch": 0.17, "grad_norm": 9.767079139783322, "learning_rate": 9.52432085636394e-06, "loss": 0.4094, "step": 5789 }, { "epoch": 0.17, "grad_norm": 8.915472134824988, "learning_rate": 9.524123414734774e-06, "loss": 0.6246, "step": 5790 }, { "epoch": 0.17, "grad_norm": 4.810451492683541, "learning_rate": 9.523925934185132e-06, "loss": 0.4324, "step": 5791 }, { "epoch": 0.17, "grad_norm": 7.195205008400737, "learning_rate": 9.523728414716713e-06, "loss": 0.4441, "step": 5792 }, { "epoch": 0.17, "grad_norm": 6.133600230560358, "learning_rate": 9.523530856331214e-06, "loss": 0.6966, "step": 5793 }, { "epoch": 0.17, "grad_norm": 8.646807316224644, "learning_rate": 9.523333259030337e-06, "loss": 0.6284, "step": 5794 }, { "epoch": 0.17, "grad_norm": 8.816531726787643, "learning_rate": 9.523135622815778e-06, "loss": 0.7903, "step": 5795 }, { "epoch": 0.17, "grad_norm": 7.977219422850696, "learning_rate": 9.522937947689243e-06, "loss": 0.6439, "step": 5796 }, { "epoch": 0.17, "grad_norm": 11.947834275129816, "learning_rate": 9.52274023365243e-06, "loss": 0.9589, "step": 5797 }, { "epoch": 0.17, "grad_norm": 8.135375572683671, "learning_rate": 9.522542480707038e-06, "loss": 0.4894, "step": 5798 }, { "epoch": 0.17, "grad_norm": 3.906334319158315, "learning_rate": 9.52234468885477e-06, "loss": 0.4048, "step": 5799 }, { "epoch": 0.17, "grad_norm": 36.105404142350636, "learning_rate": 9.52214685809733e-06, "loss": 0.6823, "step": 5800 }, { "epoch": 0.17, "grad_norm": 6.7530631426617695, "learning_rate": 9.521948988436413e-06, "loss": 0.6717, "step": 5801 }, { "epoch": 0.17, "grad_norm": 5.128811419177579, "learning_rate": 9.521751079873728e-06, "loss": 0.6549, "step": 5802 }, { "epoch": 0.17, "grad_norm": 3.578440190055142, "learning_rate": 9.521553132410977e-06, "loss": 0.1548, "step": 5803 }, { "epoch": 0.17, "grad_norm": 9.848733082735105, "learning_rate": 9.52135514604986e-06, "loss": 0.7291, "step": 5804 }, { "epoch": 0.17, "grad_norm": 12.083887223450786, "learning_rate": 9.521157120792079e-06, "loss": 0.4274, "step": 5805 }, { "epoch": 0.17, "grad_norm": 4.288915329372906, "learning_rate": 9.520959056639342e-06, "loss": 0.5047, "step": 5806 }, { "epoch": 0.17, "grad_norm": 4.543699669854357, "learning_rate": 9.52076095359335e-06, "loss": 0.3051, "step": 5807 }, { "epoch": 0.17, "grad_norm": 6.106853189028288, "learning_rate": 9.52056281165581e-06, "loss": 0.7123, "step": 5808 }, { "epoch": 0.17, "grad_norm": 5.491505131446534, "learning_rate": 9.520364630828422e-06, "loss": 0.3372, "step": 5809 }, { "epoch": 0.17, "grad_norm": 9.250035466306585, "learning_rate": 9.520166411112895e-06, "loss": 0.3985, "step": 5810 }, { "epoch": 0.17, "grad_norm": 4.965974644426072, "learning_rate": 9.519968152510933e-06, "loss": 0.7037, "step": 5811 }, { "epoch": 0.17, "grad_norm": 6.063350893064758, "learning_rate": 9.51976985502424e-06, "loss": 0.3086, "step": 5812 }, { "epoch": 0.17, "grad_norm": 11.27965840870807, "learning_rate": 9.519571518654527e-06, "loss": 0.998, "step": 5813 }, { "epoch": 0.17, "grad_norm": 7.855614735267859, "learning_rate": 9.519373143403495e-06, "loss": 0.9959, "step": 5814 }, { "epoch": 0.17, "grad_norm": 6.364873547552746, "learning_rate": 9.519174729272851e-06, "loss": 0.3482, "step": 5815 }, { "epoch": 0.17, "grad_norm": 4.126947607857248, "learning_rate": 9.518976276264305e-06, "loss": 0.196, "step": 5816 }, { "epoch": 0.17, "grad_norm": 5.022934053386446, "learning_rate": 9.51877778437956e-06, "loss": 0.4, "step": 5817 }, { "epoch": 0.17, "grad_norm": 7.45947548487358, "learning_rate": 9.518579253620328e-06, "loss": 0.227, "step": 5818 }, { "epoch": 0.17, "grad_norm": 6.869290287056703, "learning_rate": 9.518380683988316e-06, "loss": 0.5906, "step": 5819 }, { "epoch": 0.17, "grad_norm": 5.003496854593373, "learning_rate": 9.518182075485229e-06, "loss": 0.3237, "step": 5820 }, { "epoch": 0.17, "grad_norm": 6.256245811072289, "learning_rate": 9.517983428112779e-06, "loss": 0.5473, "step": 5821 }, { "epoch": 0.17, "grad_norm": 6.212010743123902, "learning_rate": 9.517784741872674e-06, "loss": 0.4261, "step": 5822 }, { "epoch": 0.17, "grad_norm": 6.1787360962424405, "learning_rate": 9.517586016766623e-06, "loss": 0.556, "step": 5823 }, { "epoch": 0.17, "grad_norm": 14.250541542872563, "learning_rate": 9.517387252796333e-06, "loss": 0.7255, "step": 5824 }, { "epoch": 0.17, "grad_norm": 2.9714522661781344, "learning_rate": 9.51718844996352e-06, "loss": 0.1423, "step": 5825 }, { "epoch": 0.17, "grad_norm": 7.818987906638254, "learning_rate": 9.516989608269891e-06, "loss": 0.5174, "step": 5826 }, { "epoch": 0.17, "grad_norm": 4.8258348483912865, "learning_rate": 9.516790727717155e-06, "loss": 0.3672, "step": 5827 }, { "epoch": 0.17, "grad_norm": 10.032173375683382, "learning_rate": 9.516591808307024e-06, "loss": 1.0297, "step": 5828 }, { "epoch": 0.17, "grad_norm": 4.491115568226331, "learning_rate": 9.516392850041213e-06, "loss": 0.4208, "step": 5829 }, { "epoch": 0.17, "grad_norm": 4.707485775351241, "learning_rate": 9.516193852921427e-06, "loss": 0.4158, "step": 5830 }, { "epoch": 0.17, "grad_norm": 7.851742627559102, "learning_rate": 9.515994816949383e-06, "loss": 0.7287, "step": 5831 }, { "epoch": 0.17, "grad_norm": 3.8531401152055196, "learning_rate": 9.515795742126791e-06, "loss": 0.4003, "step": 5832 }, { "epoch": 0.17, "grad_norm": 6.3166906968651935, "learning_rate": 9.515596628455365e-06, "loss": 0.8527, "step": 5833 }, { "epoch": 0.17, "grad_norm": 3.2421887593094554, "learning_rate": 9.515397475936816e-06, "loss": 0.2527, "step": 5834 }, { "epoch": 0.17, "grad_norm": 5.072979805910481, "learning_rate": 9.51519828457286e-06, "loss": 0.5225, "step": 5835 }, { "epoch": 0.17, "grad_norm": 6.583573284685469, "learning_rate": 9.514999054365207e-06, "loss": 0.2658, "step": 5836 }, { "epoch": 0.17, "grad_norm": 6.653621387374572, "learning_rate": 9.514799785315573e-06, "loss": 0.3755, "step": 5837 }, { "epoch": 0.17, "grad_norm": 4.590943783363746, "learning_rate": 9.514600477425676e-06, "loss": 0.4854, "step": 5838 }, { "epoch": 0.17, "grad_norm": 10.116603336628293, "learning_rate": 9.514401130697224e-06, "loss": 0.9394, "step": 5839 }, { "epoch": 0.17, "grad_norm": 12.234236617785541, "learning_rate": 9.514201745131935e-06, "loss": 0.6866, "step": 5840 }, { "epoch": 0.17, "grad_norm": 5.421170807326651, "learning_rate": 9.514002320731525e-06, "loss": 0.4695, "step": 5841 }, { "epoch": 0.17, "grad_norm": 2.3641221560195516, "learning_rate": 9.513802857497706e-06, "loss": 0.2175, "step": 5842 }, { "epoch": 0.17, "grad_norm": 6.676620082486392, "learning_rate": 9.513603355432202e-06, "loss": 0.5217, "step": 5843 }, { "epoch": 0.17, "grad_norm": 6.883818294199532, "learning_rate": 9.51340381453672e-06, "loss": 0.397, "step": 5844 }, { "epoch": 0.17, "grad_norm": 10.586437030562983, "learning_rate": 9.513204234812983e-06, "loss": 0.7068, "step": 5845 }, { "epoch": 0.17, "grad_norm": 6.331215807717906, "learning_rate": 9.513004616262703e-06, "loss": 0.5431, "step": 5846 }, { "epoch": 0.17, "grad_norm": 8.12308409183137, "learning_rate": 9.512804958887603e-06, "loss": 0.5999, "step": 5847 }, { "epoch": 0.17, "grad_norm": 6.738263417095967, "learning_rate": 9.512605262689394e-06, "loss": 0.3403, "step": 5848 }, { "epoch": 0.17, "grad_norm": 4.0697434040322875, "learning_rate": 9.5124055276698e-06, "loss": 0.5178, "step": 5849 }, { "epoch": 0.17, "grad_norm": 4.901938566812019, "learning_rate": 9.512205753830536e-06, "loss": 0.667, "step": 5850 }, { "epoch": 0.17, "grad_norm": 4.258035132510917, "learning_rate": 9.512005941173324e-06, "loss": 0.3682, "step": 5851 }, { "epoch": 0.17, "grad_norm": 6.069806759066323, "learning_rate": 9.511806089699878e-06, "loss": 0.5472, "step": 5852 }, { "epoch": 0.17, "grad_norm": 6.17257230658377, "learning_rate": 9.511606199411919e-06, "loss": 0.4293, "step": 5853 }, { "epoch": 0.17, "grad_norm": 7.678263504089477, "learning_rate": 9.511406270311168e-06, "loss": 0.6024, "step": 5854 }, { "epoch": 0.17, "grad_norm": 5.1429918400215575, "learning_rate": 9.511206302399345e-06, "loss": 0.6459, "step": 5855 }, { "epoch": 0.17, "grad_norm": 10.435727271566819, "learning_rate": 9.511006295678169e-06, "loss": 0.46, "step": 5856 }, { "epoch": 0.17, "grad_norm": 7.746541943763476, "learning_rate": 9.510806250149363e-06, "loss": 0.5143, "step": 5857 }, { "epoch": 0.17, "grad_norm": 5.4509805654635235, "learning_rate": 9.510606165814644e-06, "loss": 0.5116, "step": 5858 }, { "epoch": 0.17, "grad_norm": 5.2257465586663505, "learning_rate": 9.510406042675737e-06, "loss": 0.8727, "step": 5859 }, { "epoch": 0.17, "grad_norm": 7.18614688243924, "learning_rate": 9.51020588073436e-06, "loss": 0.5067, "step": 5860 }, { "epoch": 0.17, "grad_norm": 9.427578188446255, "learning_rate": 9.510005679992238e-06, "loss": 0.4222, "step": 5861 }, { "epoch": 0.17, "grad_norm": 5.8049600570579365, "learning_rate": 9.509805440451094e-06, "loss": 0.4655, "step": 5862 }, { "epoch": 0.17, "grad_norm": 8.652707583525574, "learning_rate": 9.509605162112648e-06, "loss": 1.1888, "step": 5863 }, { "epoch": 0.17, "grad_norm": 10.085478236632941, "learning_rate": 9.509404844978625e-06, "loss": 0.6397, "step": 5864 }, { "epoch": 0.17, "grad_norm": 5.777457276751277, "learning_rate": 9.509204489050745e-06, "loss": 0.6269, "step": 5865 }, { "epoch": 0.17, "grad_norm": 5.573317692193225, "learning_rate": 9.509004094330736e-06, "loss": 0.7157, "step": 5866 }, { "epoch": 0.17, "grad_norm": 2.9869930507577798, "learning_rate": 9.508803660820319e-06, "loss": 0.2225, "step": 5867 }, { "epoch": 0.17, "grad_norm": 8.807656519525285, "learning_rate": 9.50860318852122e-06, "loss": 0.3753, "step": 5868 }, { "epoch": 0.17, "grad_norm": 7.23418187322367, "learning_rate": 9.508402677435163e-06, "loss": 0.3442, "step": 5869 }, { "epoch": 0.17, "grad_norm": 8.029704023063244, "learning_rate": 9.508202127563872e-06, "loss": 0.5943, "step": 5870 }, { "epoch": 0.17, "grad_norm": 9.23798662486061, "learning_rate": 9.508001538909074e-06, "loss": 0.3744, "step": 5871 }, { "epoch": 0.17, "grad_norm": 3.233799500796159, "learning_rate": 9.507800911472492e-06, "loss": 0.3139, "step": 5872 }, { "epoch": 0.17, "grad_norm": 4.636413178502207, "learning_rate": 9.507600245255854e-06, "loss": 0.3573, "step": 5873 }, { "epoch": 0.17, "grad_norm": 8.434227372326585, "learning_rate": 9.507399540260889e-06, "loss": 0.8352, "step": 5874 }, { "epoch": 0.17, "grad_norm": 5.029618277048208, "learning_rate": 9.507198796489318e-06, "loss": 0.4085, "step": 5875 }, { "epoch": 0.17, "grad_norm": 4.727546450761436, "learning_rate": 9.506998013942872e-06, "loss": 0.7357, "step": 5876 }, { "epoch": 0.17, "grad_norm": 5.236641828565903, "learning_rate": 9.506797192623277e-06, "loss": 0.4814, "step": 5877 }, { "epoch": 0.17, "grad_norm": 11.26127127698848, "learning_rate": 9.506596332532259e-06, "loss": 0.9465, "step": 5878 }, { "epoch": 0.17, "grad_norm": 5.756348712418524, "learning_rate": 9.506395433671549e-06, "loss": 0.848, "step": 5879 }, { "epoch": 0.17, "grad_norm": 8.21906810768574, "learning_rate": 9.506194496042873e-06, "loss": 0.3366, "step": 5880 }, { "epoch": 0.17, "grad_norm": 9.08327969409842, "learning_rate": 9.505993519647963e-06, "loss": 0.7887, "step": 5881 }, { "epoch": 0.17, "grad_norm": 3.471699877973989, "learning_rate": 9.505792504488542e-06, "loss": 0.3756, "step": 5882 }, { "epoch": 0.17, "grad_norm": 7.785011699768173, "learning_rate": 9.505591450566345e-06, "loss": 0.5633, "step": 5883 }, { "epoch": 0.17, "grad_norm": 4.581513176367551, "learning_rate": 9.505390357883099e-06, "loss": 0.4251, "step": 5884 }, { "epoch": 0.17, "grad_norm": 5.060762413319859, "learning_rate": 9.505189226440535e-06, "loss": 0.7179, "step": 5885 }, { "epoch": 0.17, "grad_norm": 4.445499034112198, "learning_rate": 9.504988056240382e-06, "loss": 0.3332, "step": 5886 }, { "epoch": 0.17, "grad_norm": 3.9965276249542043, "learning_rate": 9.504786847284373e-06, "loss": 0.4583, "step": 5887 }, { "epoch": 0.17, "grad_norm": 9.103407827449892, "learning_rate": 9.504585599574235e-06, "loss": 0.6342, "step": 5888 }, { "epoch": 0.17, "grad_norm": 6.04281286569959, "learning_rate": 9.504384313111703e-06, "loss": 0.5188, "step": 5889 }, { "epoch": 0.17, "grad_norm": 7.045317517902942, "learning_rate": 9.504182987898508e-06, "loss": 0.3299, "step": 5890 }, { "epoch": 0.17, "grad_norm": 9.627421780379683, "learning_rate": 9.50398162393638e-06, "loss": 0.7133, "step": 5891 }, { "epoch": 0.17, "grad_norm": 5.685799417813431, "learning_rate": 9.503780221227053e-06, "loss": 0.417, "step": 5892 }, { "epoch": 0.17, "grad_norm": 9.60665353834573, "learning_rate": 9.50357877977226e-06, "loss": 0.8652, "step": 5893 }, { "epoch": 0.17, "grad_norm": 6.440749968344068, "learning_rate": 9.503377299573732e-06, "loss": 0.3783, "step": 5894 }, { "epoch": 0.17, "grad_norm": 8.473661567979473, "learning_rate": 9.503175780633205e-06, "loss": 0.8069, "step": 5895 }, { "epoch": 0.17, "grad_norm": 17.18819065440301, "learning_rate": 9.50297422295241e-06, "loss": 0.9254, "step": 5896 }, { "epoch": 0.17, "grad_norm": 9.995374372683479, "learning_rate": 9.502772626533083e-06, "loss": 1.0837, "step": 5897 }, { "epoch": 0.17, "grad_norm": 9.712044662740535, "learning_rate": 9.502570991376957e-06, "loss": 1.0129, "step": 5898 }, { "epoch": 0.17, "grad_norm": 8.217593963893563, "learning_rate": 9.502369317485768e-06, "loss": 0.3746, "step": 5899 }, { "epoch": 0.17, "grad_norm": 6.124765858748676, "learning_rate": 9.50216760486125e-06, "loss": 0.5952, "step": 5900 }, { "epoch": 0.17, "grad_norm": 2.433382199615427, "learning_rate": 9.501965853505137e-06, "loss": 0.2135, "step": 5901 }, { "epoch": 0.17, "grad_norm": 5.202010439157186, "learning_rate": 9.501764063419167e-06, "loss": 0.4654, "step": 5902 }, { "epoch": 0.17, "grad_norm": 3.8956128974333644, "learning_rate": 9.501562234605075e-06, "loss": 0.2588, "step": 5903 }, { "epoch": 0.17, "grad_norm": 3.724709231913907, "learning_rate": 9.501360367064597e-06, "loss": 0.3355, "step": 5904 }, { "epoch": 0.17, "grad_norm": 5.492159934266372, "learning_rate": 9.50115846079947e-06, "loss": 0.636, "step": 5905 }, { "epoch": 0.17, "grad_norm": 5.049190391662752, "learning_rate": 9.500956515811432e-06, "loss": 0.5089, "step": 5906 }, { "epoch": 0.17, "grad_norm": 5.8711033336893665, "learning_rate": 9.50075453210222e-06, "loss": 1.0871, "step": 5907 }, { "epoch": 0.17, "grad_norm": 4.854816420568031, "learning_rate": 9.500552509673569e-06, "loss": 0.3363, "step": 5908 }, { "epoch": 0.17, "grad_norm": 8.676901492945511, "learning_rate": 9.500350448527218e-06, "loss": 0.8166, "step": 5909 }, { "epoch": 0.17, "grad_norm": 6.527412930115705, "learning_rate": 9.500148348664908e-06, "loss": 0.6108, "step": 5910 }, { "epoch": 0.17, "grad_norm": 4.306413861660348, "learning_rate": 9.499946210088375e-06, "loss": 0.432, "step": 5911 }, { "epoch": 0.17, "grad_norm": 9.085406536355276, "learning_rate": 9.499744032799359e-06, "loss": 0.4538, "step": 5912 }, { "epoch": 0.17, "grad_norm": 6.858538809101197, "learning_rate": 9.499541816799599e-06, "loss": 0.4139, "step": 5913 }, { "epoch": 0.17, "grad_norm": 4.819813385631377, "learning_rate": 9.499339562090834e-06, "loss": 0.1716, "step": 5914 }, { "epoch": 0.17, "grad_norm": 5.953372659813203, "learning_rate": 9.499137268674807e-06, "loss": 0.5358, "step": 5915 }, { "epoch": 0.17, "grad_norm": 4.647425283290031, "learning_rate": 9.498934936553255e-06, "loss": 0.5763, "step": 5916 }, { "epoch": 0.17, "grad_norm": 5.927437839286701, "learning_rate": 9.49873256572792e-06, "loss": 0.6788, "step": 5917 }, { "epoch": 0.17, "grad_norm": 3.353047075459362, "learning_rate": 9.49853015620054e-06, "loss": 0.7016, "step": 5918 }, { "epoch": 0.17, "grad_norm": 5.2075720217921235, "learning_rate": 9.498327707972862e-06, "loss": 0.8011, "step": 5919 }, { "epoch": 0.17, "grad_norm": 7.482514661882274, "learning_rate": 9.498125221046624e-06, "loss": 0.58, "step": 5920 }, { "epoch": 0.17, "grad_norm": 6.341034632235843, "learning_rate": 9.497922695423566e-06, "loss": 0.4548, "step": 5921 }, { "epoch": 0.17, "grad_norm": 4.915264449625927, "learning_rate": 9.497720131105437e-06, "loss": 0.544, "step": 5922 }, { "epoch": 0.17, "grad_norm": 6.433398894073118, "learning_rate": 9.497517528093973e-06, "loss": 0.4958, "step": 5923 }, { "epoch": 0.17, "grad_norm": 7.477361080367611, "learning_rate": 9.49731488639092e-06, "loss": 0.607, "step": 5924 }, { "epoch": 0.17, "grad_norm": 6.345312743479319, "learning_rate": 9.49711220599802e-06, "loss": 0.5222, "step": 5925 }, { "epoch": 0.17, "grad_norm": 5.713815947364676, "learning_rate": 9.496909486917019e-06, "loss": 0.3282, "step": 5926 }, { "epoch": 0.17, "grad_norm": 4.987651883826647, "learning_rate": 9.496706729149659e-06, "loss": 0.541, "step": 5927 }, { "epoch": 0.17, "grad_norm": 5.874231714375646, "learning_rate": 9.496503932697683e-06, "loss": 0.4398, "step": 5928 }, { "epoch": 0.17, "grad_norm": 8.235207908243252, "learning_rate": 9.496301097562837e-06, "loss": 0.8234, "step": 5929 }, { "epoch": 0.17, "grad_norm": 3.4395411586813704, "learning_rate": 9.496098223746868e-06, "loss": 0.391, "step": 5930 }, { "epoch": 0.17, "grad_norm": 2.8056692248840323, "learning_rate": 9.49589531125152e-06, "loss": 0.318, "step": 5931 }, { "epoch": 0.17, "grad_norm": 3.6260683524269326, "learning_rate": 9.495692360078537e-06, "loss": 0.4537, "step": 5932 }, { "epoch": 0.17, "grad_norm": 6.6260692885115535, "learning_rate": 9.495489370229667e-06, "loss": 0.4264, "step": 5933 }, { "epoch": 0.17, "grad_norm": 7.772040528182429, "learning_rate": 9.495286341706654e-06, "loss": 0.5263, "step": 5934 }, { "epoch": 0.17, "grad_norm": 4.2269490007113, "learning_rate": 9.495083274511247e-06, "loss": 0.5174, "step": 5935 }, { "epoch": 0.17, "grad_norm": 4.303884774080223, "learning_rate": 9.494880168645193e-06, "loss": 0.2827, "step": 5936 }, { "epoch": 0.17, "grad_norm": 9.240922443039256, "learning_rate": 9.494677024110237e-06, "loss": 0.4361, "step": 5937 }, { "epoch": 0.17, "grad_norm": 7.445184472375265, "learning_rate": 9.494473840908128e-06, "loss": 0.5391, "step": 5938 }, { "epoch": 0.17, "grad_norm": 8.895628635692798, "learning_rate": 9.494270619040615e-06, "loss": 0.8178, "step": 5939 }, { "epoch": 0.17, "grad_norm": 8.186824974596247, "learning_rate": 9.494067358509445e-06, "loss": 0.8898, "step": 5940 }, { "epoch": 0.17, "grad_norm": 6.41195328075406, "learning_rate": 9.493864059316366e-06, "loss": 0.4373, "step": 5941 }, { "epoch": 0.17, "grad_norm": 7.003463092832257, "learning_rate": 9.493660721463128e-06, "loss": 0.3371, "step": 5942 }, { "epoch": 0.17, "grad_norm": 4.7863115434134444, "learning_rate": 9.49345734495148e-06, "loss": 0.5389, "step": 5943 }, { "epoch": 0.17, "grad_norm": 5.0539114354853405, "learning_rate": 9.493253929783171e-06, "loss": 0.4768, "step": 5944 }, { "epoch": 0.17, "grad_norm": 6.420342218363226, "learning_rate": 9.493050475959953e-06, "loss": 0.3201, "step": 5945 }, { "epoch": 0.17, "grad_norm": 4.6133701403110585, "learning_rate": 9.492846983483574e-06, "loss": 0.4782, "step": 5946 }, { "epoch": 0.17, "grad_norm": 4.937628249422205, "learning_rate": 9.492643452355788e-06, "loss": 0.3563, "step": 5947 }, { "epoch": 0.17, "grad_norm": 8.805176986180635, "learning_rate": 9.49243988257834e-06, "loss": 0.6908, "step": 5948 }, { "epoch": 0.17, "grad_norm": 3.9250632772994174, "learning_rate": 9.492236274152986e-06, "loss": 0.3925, "step": 5949 }, { "epoch": 0.17, "grad_norm": 4.103680919166868, "learning_rate": 9.492032627081478e-06, "loss": 0.254, "step": 5950 }, { "epoch": 0.17, "grad_norm": 6.347167874842535, "learning_rate": 9.491828941365564e-06, "loss": 0.5747, "step": 5951 }, { "epoch": 0.17, "grad_norm": 7.772773384594255, "learning_rate": 9.491625217007001e-06, "loss": 0.5407, "step": 5952 }, { "epoch": 0.17, "grad_norm": 3.673338058858649, "learning_rate": 9.491421454007538e-06, "loss": 0.2088, "step": 5953 }, { "epoch": 0.17, "grad_norm": 9.276913641345615, "learning_rate": 9.49121765236893e-06, "loss": 0.3341, "step": 5954 }, { "epoch": 0.17, "grad_norm": 9.18014696635201, "learning_rate": 9.491013812092928e-06, "loss": 0.2302, "step": 5955 }, { "epoch": 0.17, "grad_norm": 8.510423159898377, "learning_rate": 9.490809933181289e-06, "loss": 0.7123, "step": 5956 }, { "epoch": 0.17, "grad_norm": 10.063893902827362, "learning_rate": 9.490606015635764e-06, "loss": 0.8372, "step": 5957 }, { "epoch": 0.17, "grad_norm": 3.1790866705919543, "learning_rate": 9.490402059458108e-06, "loss": 0.4598, "step": 5958 }, { "epoch": 0.17, "grad_norm": 7.451630576477273, "learning_rate": 9.490198064650077e-06, "loss": 0.5721, "step": 5959 }, { "epoch": 0.17, "grad_norm": 6.051422853481299, "learning_rate": 9.489994031213422e-06, "loss": 0.8665, "step": 5960 }, { "epoch": 0.17, "grad_norm": 4.068411941664868, "learning_rate": 9.489789959149906e-06, "loss": 0.536, "step": 5961 }, { "epoch": 0.17, "grad_norm": 6.271959028455661, "learning_rate": 9.489585848461277e-06, "loss": 0.4784, "step": 5962 }, { "epoch": 0.17, "grad_norm": 9.415073299355603, "learning_rate": 9.489381699149295e-06, "loss": 0.8762, "step": 5963 }, { "epoch": 0.17, "grad_norm": 4.261089596693347, "learning_rate": 9.489177511215713e-06, "loss": 0.4109, "step": 5964 }, { "epoch": 0.17, "grad_norm": 7.46274615829076, "learning_rate": 9.488973284662291e-06, "loss": 0.6615, "step": 5965 }, { "epoch": 0.17, "grad_norm": 6.949524399381154, "learning_rate": 9.488769019490784e-06, "loss": 0.6963, "step": 5966 }, { "epoch": 0.17, "grad_norm": 5.474518877602227, "learning_rate": 9.48856471570295e-06, "loss": 0.4875, "step": 5967 }, { "epoch": 0.17, "grad_norm": 4.466428193563405, "learning_rate": 9.488360373300547e-06, "loss": 0.3918, "step": 5968 }, { "epoch": 0.17, "grad_norm": 4.474809659607387, "learning_rate": 9.48815599228533e-06, "loss": 0.4328, "step": 5969 }, { "epoch": 0.17, "grad_norm": 5.027142951233457, "learning_rate": 9.487951572659062e-06, "loss": 0.4074, "step": 5970 }, { "epoch": 0.17, "grad_norm": 6.018696899813628, "learning_rate": 9.487747114423498e-06, "loss": 0.3383, "step": 5971 }, { "epoch": 0.17, "grad_norm": 10.556413149433995, "learning_rate": 9.487542617580399e-06, "loss": 1.0086, "step": 5972 }, { "epoch": 0.17, "grad_norm": 6.701819944161797, "learning_rate": 9.487338082131524e-06, "loss": 0.5067, "step": 5973 }, { "epoch": 0.17, "grad_norm": 5.292495987953247, "learning_rate": 9.48713350807863e-06, "loss": 0.6631, "step": 5974 }, { "epoch": 0.17, "grad_norm": 2.9416425592300985, "learning_rate": 9.48692889542348e-06, "loss": 0.3883, "step": 5975 }, { "epoch": 0.17, "grad_norm": 7.148123445543216, "learning_rate": 9.486724244167832e-06, "loss": 0.6365, "step": 5976 }, { "epoch": 0.17, "grad_norm": 5.8329489127148255, "learning_rate": 9.48651955431345e-06, "loss": 0.4351, "step": 5977 }, { "epoch": 0.17, "grad_norm": 5.2933635043638425, "learning_rate": 9.486314825862092e-06, "loss": 0.4748, "step": 5978 }, { "epoch": 0.17, "grad_norm": 5.142494163305877, "learning_rate": 9.486110058815518e-06, "loss": 0.7982, "step": 5979 }, { "epoch": 0.17, "grad_norm": 7.03802462260391, "learning_rate": 9.485905253175492e-06, "loss": 0.6992, "step": 5980 }, { "epoch": 0.17, "grad_norm": 4.453968864928195, "learning_rate": 9.485700408943776e-06, "loss": 0.5355, "step": 5981 }, { "epoch": 0.17, "grad_norm": 5.081387102968343, "learning_rate": 9.485495526122132e-06, "loss": 0.8074, "step": 5982 }, { "epoch": 0.17, "grad_norm": 4.439531344904909, "learning_rate": 9.485290604712323e-06, "loss": 0.5139, "step": 5983 }, { "epoch": 0.17, "grad_norm": 8.640923548364304, "learning_rate": 9.48508564471611e-06, "loss": 0.6496, "step": 5984 }, { "epoch": 0.17, "grad_norm": 6.3860721552470325, "learning_rate": 9.484880646135257e-06, "loss": 0.5775, "step": 5985 }, { "epoch": 0.17, "grad_norm": 4.297422062121067, "learning_rate": 9.48467560897153e-06, "loss": 0.5365, "step": 5986 }, { "epoch": 0.17, "grad_norm": 4.394029376723964, "learning_rate": 9.484470533226688e-06, "loss": 0.4408, "step": 5987 }, { "epoch": 0.17, "grad_norm": 7.441259027261255, "learning_rate": 9.484265418902501e-06, "loss": 0.6428, "step": 5988 }, { "epoch": 0.17, "grad_norm": 4.866215016307649, "learning_rate": 9.48406026600073e-06, "loss": 0.5425, "step": 5989 }, { "epoch": 0.17, "grad_norm": 4.517493128853129, "learning_rate": 9.483855074523139e-06, "loss": 0.4663, "step": 5990 }, { "epoch": 0.17, "grad_norm": 9.326070332398777, "learning_rate": 9.483649844471497e-06, "loss": 0.8113, "step": 5991 }, { "epoch": 0.17, "grad_norm": 3.7034959245167243, "learning_rate": 9.483444575847565e-06, "loss": 0.2168, "step": 5992 }, { "epoch": 0.17, "grad_norm": 4.592326456195535, "learning_rate": 9.483239268653114e-06, "loss": 0.5241, "step": 5993 }, { "epoch": 0.17, "grad_norm": 5.289608317262719, "learning_rate": 9.483033922889905e-06, "loss": 0.2882, "step": 5994 }, { "epoch": 0.17, "grad_norm": 4.856082794570852, "learning_rate": 9.48282853855971e-06, "loss": 0.5799, "step": 5995 }, { "epoch": 0.17, "grad_norm": 8.953600140225094, "learning_rate": 9.482623115664291e-06, "loss": 0.4968, "step": 5996 }, { "epoch": 0.17, "grad_norm": 6.66462359911612, "learning_rate": 9.482417654205418e-06, "loss": 0.7326, "step": 5997 }, { "epoch": 0.17, "grad_norm": 7.843018395093586, "learning_rate": 9.482212154184858e-06, "loss": 0.6724, "step": 5998 }, { "epoch": 0.17, "grad_norm": 5.177667192147717, "learning_rate": 9.482006615604377e-06, "loss": 0.4994, "step": 5999 }, { "epoch": 0.17, "grad_norm": 10.53389844786866, "learning_rate": 9.481801038465747e-06, "loss": 0.6319, "step": 6000 }, { "epoch": 0.17, "grad_norm": 5.265978430594065, "learning_rate": 9.481595422770733e-06, "loss": 0.7513, "step": 6001 }, { "epoch": 0.17, "grad_norm": 9.762121637219199, "learning_rate": 9.481389768521106e-06, "loss": 0.6709, "step": 6002 }, { "epoch": 0.17, "grad_norm": 3.2443957794000218, "learning_rate": 9.481184075718635e-06, "loss": 0.3296, "step": 6003 }, { "epoch": 0.17, "grad_norm": 4.650120201403072, "learning_rate": 9.480978344365088e-06, "loss": 0.6333, "step": 6004 }, { "epoch": 0.17, "grad_norm": 2.9808860476678585, "learning_rate": 9.480772574462238e-06, "loss": 0.3271, "step": 6005 }, { "epoch": 0.17, "grad_norm": 8.459313685036783, "learning_rate": 9.480566766011851e-06, "loss": 0.5085, "step": 6006 }, { "epoch": 0.17, "grad_norm": 4.094686619431643, "learning_rate": 9.480360919015702e-06, "loss": 0.2893, "step": 6007 }, { "epoch": 0.17, "grad_norm": 5.047562685195116, "learning_rate": 9.480155033475558e-06, "loss": 0.5092, "step": 6008 }, { "epoch": 0.17, "grad_norm": 6.692812547478438, "learning_rate": 9.479949109393192e-06, "loss": 0.4333, "step": 6009 }, { "epoch": 0.17, "grad_norm": 9.103080969932192, "learning_rate": 9.479743146770375e-06, "loss": 0.4458, "step": 6010 }, { "epoch": 0.17, "grad_norm": 5.441795406405704, "learning_rate": 9.47953714560888e-06, "loss": 0.6763, "step": 6011 }, { "epoch": 0.17, "grad_norm": 6.295230016407078, "learning_rate": 9.479331105910478e-06, "loss": 0.8209, "step": 6012 }, { "epoch": 0.17, "grad_norm": 8.048355707024385, "learning_rate": 9.479125027676943e-06, "loss": 0.4769, "step": 6013 }, { "epoch": 0.17, "grad_norm": 6.613176195151809, "learning_rate": 9.478918910910047e-06, "loss": 0.5299, "step": 6014 }, { "epoch": 0.17, "grad_norm": 8.607554276090552, "learning_rate": 9.47871275561156e-06, "loss": 0.6192, "step": 6015 }, { "epoch": 0.17, "grad_norm": 7.590504082085314, "learning_rate": 9.478506561783263e-06, "loss": 0.7266, "step": 6016 }, { "epoch": 0.17, "grad_norm": 11.254821930759954, "learning_rate": 9.478300329426924e-06, "loss": 0.659, "step": 6017 }, { "epoch": 0.17, "grad_norm": 4.086733096911947, "learning_rate": 9.478094058544317e-06, "loss": 0.4834, "step": 6018 }, { "epoch": 0.17, "grad_norm": 7.735076502573933, "learning_rate": 9.47788774913722e-06, "loss": 0.4152, "step": 6019 }, { "epoch": 0.17, "grad_norm": 8.355073709944659, "learning_rate": 9.477681401207405e-06, "loss": 0.3883, "step": 6020 }, { "epoch": 0.17, "grad_norm": 3.9132199170467206, "learning_rate": 9.477475014756647e-06, "loss": 0.2826, "step": 6021 }, { "epoch": 0.17, "grad_norm": 30.538239942994718, "learning_rate": 9.477268589786726e-06, "loss": 0.3805, "step": 6022 }, { "epoch": 0.17, "grad_norm": 7.926750623928381, "learning_rate": 9.477062126299413e-06, "loss": 0.6312, "step": 6023 }, { "epoch": 0.17, "grad_norm": 8.868490276337576, "learning_rate": 9.476855624296483e-06, "loss": 0.5319, "step": 6024 }, { "epoch": 0.17, "grad_norm": 2.446279602641624, "learning_rate": 9.47664908377972e-06, "loss": 0.115, "step": 6025 }, { "epoch": 0.17, "grad_norm": 6.2080308554610095, "learning_rate": 9.476442504750893e-06, "loss": 0.4836, "step": 6026 }, { "epoch": 0.17, "grad_norm": 13.696556335126147, "learning_rate": 9.476235887211784e-06, "loss": 0.8251, "step": 6027 }, { "epoch": 0.17, "grad_norm": 9.927907812752553, "learning_rate": 9.476029231164168e-06, "loss": 0.9789, "step": 6028 }, { "epoch": 0.17, "grad_norm": 10.98634244790733, "learning_rate": 9.475822536609823e-06, "loss": 0.7957, "step": 6029 }, { "epoch": 0.17, "grad_norm": 9.59423794577252, "learning_rate": 9.475615803550528e-06, "loss": 0.5609, "step": 6030 }, { "epoch": 0.17, "grad_norm": 16.410120146088996, "learning_rate": 9.475409031988062e-06, "loss": 0.8469, "step": 6031 }, { "epoch": 0.17, "grad_norm": 6.451583345637756, "learning_rate": 9.475202221924202e-06, "loss": 0.628, "step": 6032 }, { "epoch": 0.17, "grad_norm": 8.968031585311055, "learning_rate": 9.47499537336073e-06, "loss": 1.104, "step": 6033 }, { "epoch": 0.17, "grad_norm": 12.562304291220803, "learning_rate": 9.474788486299423e-06, "loss": 0.9678, "step": 6034 }, { "epoch": 0.17, "grad_norm": 11.156993210770903, "learning_rate": 9.474581560742063e-06, "loss": 0.8364, "step": 6035 }, { "epoch": 0.17, "grad_norm": 9.57394063881058, "learning_rate": 9.474374596690426e-06, "loss": 0.5377, "step": 6036 }, { "epoch": 0.17, "grad_norm": 5.954789256955873, "learning_rate": 9.474167594146297e-06, "loss": 0.3971, "step": 6037 }, { "epoch": 0.17, "grad_norm": 9.17730536536875, "learning_rate": 9.473960553111454e-06, "loss": 0.7851, "step": 6038 }, { "epoch": 0.17, "grad_norm": 7.439210646912298, "learning_rate": 9.47375347358768e-06, "loss": 0.7728, "step": 6039 }, { "epoch": 0.17, "grad_norm": 6.980473668332067, "learning_rate": 9.473546355576755e-06, "loss": 0.5586, "step": 6040 }, { "epoch": 0.17, "grad_norm": 5.380006366002737, "learning_rate": 9.473339199080462e-06, "loss": 0.7957, "step": 6041 }, { "epoch": 0.17, "grad_norm": 4.238633382909156, "learning_rate": 9.473132004100583e-06, "loss": 0.5236, "step": 6042 }, { "epoch": 0.17, "grad_norm": 8.439697996584922, "learning_rate": 9.4729247706389e-06, "loss": 0.8804, "step": 6043 }, { "epoch": 0.17, "grad_norm": 12.223027956056992, "learning_rate": 9.472717498697195e-06, "loss": 0.9135, "step": 6044 }, { "epoch": 0.17, "grad_norm": 9.506824351158194, "learning_rate": 9.472510188277254e-06, "loss": 0.572, "step": 6045 }, { "epoch": 0.17, "grad_norm": 5.4129957989560635, "learning_rate": 9.472302839380856e-06, "loss": 0.2552, "step": 6046 }, { "epoch": 0.17, "grad_norm": 8.134395742078517, "learning_rate": 9.472095452009788e-06, "loss": 0.5614, "step": 6047 }, { "epoch": 0.17, "grad_norm": 6.363843767700054, "learning_rate": 9.471888026165834e-06, "loss": 0.6214, "step": 6048 }, { "epoch": 0.17, "grad_norm": 10.431053683637513, "learning_rate": 9.471680561850777e-06, "loss": 0.777, "step": 6049 }, { "epoch": 0.17, "grad_norm": 10.003003813686709, "learning_rate": 9.471473059066404e-06, "loss": 0.8586, "step": 6050 }, { "epoch": 0.17, "grad_norm": 7.025397531115202, "learning_rate": 9.471265517814498e-06, "loss": 0.412, "step": 6051 }, { "epoch": 0.17, "grad_norm": 5.057469588455582, "learning_rate": 9.471057938096847e-06, "loss": 0.4043, "step": 6052 }, { "epoch": 0.17, "grad_norm": 7.138413969670039, "learning_rate": 9.470850319915233e-06, "loss": 0.4505, "step": 6053 }, { "epoch": 0.17, "grad_norm": 4.154859123687861, "learning_rate": 9.470642663271445e-06, "loss": 0.6267, "step": 6054 }, { "epoch": 0.17, "grad_norm": 9.105863283595157, "learning_rate": 9.47043496816727e-06, "loss": 0.5963, "step": 6055 }, { "epoch": 0.17, "grad_norm": 12.989237218108721, "learning_rate": 9.470227234604491e-06, "loss": 0.6951, "step": 6056 }, { "epoch": 0.17, "grad_norm": 4.0558336582083125, "learning_rate": 9.470019462584899e-06, "loss": 0.294, "step": 6057 }, { "epoch": 0.17, "grad_norm": 10.611773886850152, "learning_rate": 9.469811652110278e-06, "loss": 0.9522, "step": 6058 }, { "epoch": 0.17, "grad_norm": 3.809482380806357, "learning_rate": 9.469603803182419e-06, "loss": 0.3985, "step": 6059 }, { "epoch": 0.17, "grad_norm": 3.772191254537517, "learning_rate": 9.46939591580311e-06, "loss": 0.3222, "step": 6060 }, { "epoch": 0.17, "grad_norm": 8.428008894355763, "learning_rate": 9.469187989974136e-06, "loss": 0.5535, "step": 6061 }, { "epoch": 0.17, "grad_norm": 5.119716875287384, "learning_rate": 9.46898002569729e-06, "loss": 0.4216, "step": 6062 }, { "epoch": 0.17, "grad_norm": 6.178833913385707, "learning_rate": 9.468772022974357e-06, "loss": 0.3658, "step": 6063 }, { "epoch": 0.17, "grad_norm": 9.814569728784937, "learning_rate": 9.46856398180713e-06, "loss": 0.5153, "step": 6064 }, { "epoch": 0.17, "grad_norm": 8.154071151441679, "learning_rate": 9.468355902197397e-06, "loss": 0.6385, "step": 6065 }, { "epoch": 0.17, "grad_norm": 4.990159412276367, "learning_rate": 9.468147784146948e-06, "loss": 0.5501, "step": 6066 }, { "epoch": 0.17, "grad_norm": 6.340057237830048, "learning_rate": 9.467939627657575e-06, "loss": 0.6697, "step": 6067 }, { "epoch": 0.17, "grad_norm": 4.959316727813298, "learning_rate": 9.467731432731064e-06, "loss": 0.5239, "step": 6068 }, { "epoch": 0.17, "grad_norm": 5.351345442108133, "learning_rate": 9.467523199369213e-06, "loss": 0.552, "step": 6069 }, { "epoch": 0.17, "grad_norm": 4.927999555095429, "learning_rate": 9.467314927573809e-06, "loss": 0.5288, "step": 6070 }, { "epoch": 0.17, "grad_norm": 3.4477293226778105, "learning_rate": 9.467106617346644e-06, "loss": 0.5124, "step": 6071 }, { "epoch": 0.17, "grad_norm": 8.153302796582949, "learning_rate": 9.466898268689512e-06, "loss": 0.5605, "step": 6072 }, { "epoch": 0.17, "grad_norm": 4.844813962185342, "learning_rate": 9.466689881604203e-06, "loss": 0.2221, "step": 6073 }, { "epoch": 0.17, "grad_norm": 8.43453249139445, "learning_rate": 9.466481456092512e-06, "loss": 0.7083, "step": 6074 }, { "epoch": 0.17, "grad_norm": 4.190238768709606, "learning_rate": 9.46627299215623e-06, "loss": 0.3963, "step": 6075 }, { "epoch": 0.17, "grad_norm": 8.373080474878641, "learning_rate": 9.466064489797151e-06, "loss": 0.6758, "step": 6076 }, { "epoch": 0.17, "grad_norm": 5.881465276331392, "learning_rate": 9.465855949017072e-06, "loss": 0.7114, "step": 6077 }, { "epoch": 0.17, "grad_norm": 4.42383249042624, "learning_rate": 9.465647369817781e-06, "loss": 0.5871, "step": 6078 }, { "epoch": 0.17, "grad_norm": 5.70827963959253, "learning_rate": 9.465438752201076e-06, "loss": 0.6373, "step": 6079 }, { "epoch": 0.17, "grad_norm": 9.23421032753911, "learning_rate": 9.465230096168752e-06, "loss": 0.7179, "step": 6080 }, { "epoch": 0.17, "grad_norm": 6.605646330681421, "learning_rate": 9.465021401722603e-06, "loss": 0.7304, "step": 6081 }, { "epoch": 0.17, "grad_norm": 5.547020579832008, "learning_rate": 9.464812668864424e-06, "loss": 0.3994, "step": 6082 }, { "epoch": 0.17, "grad_norm": 7.700391799074443, "learning_rate": 9.464603897596012e-06, "loss": 0.5728, "step": 6083 }, { "epoch": 0.17, "grad_norm": 2.7139952287259534, "learning_rate": 9.464395087919162e-06, "loss": 0.3761, "step": 6084 }, { "epoch": 0.17, "grad_norm": 11.98005361618246, "learning_rate": 9.464186239835671e-06, "loss": 0.7965, "step": 6085 }, { "epoch": 0.17, "grad_norm": 3.7291234331803795, "learning_rate": 9.463977353347336e-06, "loss": 0.2521, "step": 6086 }, { "epoch": 0.17, "grad_norm": 3.7459331871842303, "learning_rate": 9.463768428455953e-06, "loss": 0.3085, "step": 6087 }, { "epoch": 0.17, "grad_norm": 5.459518925080354, "learning_rate": 9.463559465163318e-06, "loss": 0.5401, "step": 6088 }, { "epoch": 0.17, "grad_norm": 12.879717490343994, "learning_rate": 9.463350463471234e-06, "loss": 0.1622, "step": 6089 }, { "epoch": 0.17, "grad_norm": 3.3931925848010946, "learning_rate": 9.463141423381494e-06, "loss": 0.3149, "step": 6090 }, { "epoch": 0.17, "grad_norm": 4.285870585543314, "learning_rate": 9.462932344895897e-06, "loss": 0.54, "step": 6091 }, { "epoch": 0.17, "grad_norm": 5.541192079839854, "learning_rate": 9.462723228016243e-06, "loss": 0.3635, "step": 6092 }, { "epoch": 0.17, "grad_norm": 6.837411427316459, "learning_rate": 9.462514072744331e-06, "loss": 0.4765, "step": 6093 }, { "epoch": 0.17, "grad_norm": 6.125515721466039, "learning_rate": 9.462304879081959e-06, "loss": 0.7736, "step": 6094 }, { "epoch": 0.17, "grad_norm": 4.517954110193256, "learning_rate": 9.462095647030928e-06, "loss": 0.2307, "step": 6095 }, { "epoch": 0.17, "grad_norm": 4.246970864309419, "learning_rate": 9.461886376593037e-06, "loss": 0.2846, "step": 6096 }, { "epoch": 0.17, "grad_norm": 6.050730262378984, "learning_rate": 9.461677067770088e-06, "loss": 0.5333, "step": 6097 }, { "epoch": 0.17, "grad_norm": 6.049120582159434, "learning_rate": 9.46146772056388e-06, "loss": 0.7305, "step": 6098 }, { "epoch": 0.17, "grad_norm": 7.919291840692363, "learning_rate": 9.461258334976213e-06, "loss": 0.959, "step": 6099 }, { "epoch": 0.17, "grad_norm": 6.962835290803971, "learning_rate": 9.461048911008893e-06, "loss": 0.7527, "step": 6100 }, { "epoch": 0.17, "grad_norm": 4.071097446543191, "learning_rate": 9.460839448663716e-06, "loss": 0.3136, "step": 6101 }, { "epoch": 0.17, "grad_norm": 8.741430445832012, "learning_rate": 9.460629947942488e-06, "loss": 0.716, "step": 6102 }, { "epoch": 0.17, "grad_norm": 5.804323906376335, "learning_rate": 9.460420408847008e-06, "loss": 0.575, "step": 6103 }, { "epoch": 0.17, "grad_norm": 4.378026542128281, "learning_rate": 9.460210831379082e-06, "loss": 0.4136, "step": 6104 }, { "epoch": 0.17, "grad_norm": 7.989440266813285, "learning_rate": 9.46000121554051e-06, "loss": 0.6251, "step": 6105 }, { "epoch": 0.17, "grad_norm": 6.3998990095634385, "learning_rate": 9.459791561333099e-06, "loss": 0.5689, "step": 6106 }, { "epoch": 0.17, "grad_norm": 6.351987048009788, "learning_rate": 9.459581868758648e-06, "loss": 0.5218, "step": 6107 }, { "epoch": 0.17, "grad_norm": 7.322279430751856, "learning_rate": 9.459372137818964e-06, "loss": 0.5367, "step": 6108 }, { "epoch": 0.17, "grad_norm": 5.22808877187924, "learning_rate": 9.45916236851585e-06, "loss": 0.4428, "step": 6109 }, { "epoch": 0.17, "grad_norm": 2.6861601195247213, "learning_rate": 9.458952560851112e-06, "loss": 0.2055, "step": 6110 }, { "epoch": 0.18, "grad_norm": 6.515348945459753, "learning_rate": 9.458742714826554e-06, "loss": 0.4012, "step": 6111 }, { "epoch": 0.18, "grad_norm": 9.629650924392804, "learning_rate": 9.45853283044398e-06, "loss": 0.6476, "step": 6112 }, { "epoch": 0.18, "grad_norm": 4.533039239618912, "learning_rate": 9.458322907705198e-06, "loss": 0.4708, "step": 6113 }, { "epoch": 0.18, "grad_norm": 37.60999503296158, "learning_rate": 9.458112946612014e-06, "loss": 0.2989, "step": 6114 }, { "epoch": 0.18, "grad_norm": 6.672677163851816, "learning_rate": 9.457902947166231e-06, "loss": 0.5216, "step": 6115 }, { "epoch": 0.18, "grad_norm": 5.536574026665184, "learning_rate": 9.45769290936966e-06, "loss": 0.4886, "step": 6116 }, { "epoch": 0.18, "grad_norm": 7.7131266013257305, "learning_rate": 9.457482833224106e-06, "loss": 0.451, "step": 6117 }, { "epoch": 0.18, "grad_norm": 10.793225920764948, "learning_rate": 9.457272718731374e-06, "loss": 0.5961, "step": 6118 }, { "epoch": 0.18, "grad_norm": 5.952633053935946, "learning_rate": 9.457062565893275e-06, "loss": 0.6838, "step": 6119 }, { "epoch": 0.18, "grad_norm": 5.975500753000956, "learning_rate": 9.456852374711616e-06, "loss": 0.4771, "step": 6120 }, { "epoch": 0.18, "grad_norm": 8.391124859630686, "learning_rate": 9.456642145188203e-06, "loss": 0.4257, "step": 6121 }, { "epoch": 0.18, "grad_norm": 7.534407749049475, "learning_rate": 9.456431877324846e-06, "loss": 0.8061, "step": 6122 }, { "epoch": 0.18, "grad_norm": 8.893471582254188, "learning_rate": 9.456221571123356e-06, "loss": 0.6898, "step": 6123 }, { "epoch": 0.18, "grad_norm": 4.7679604732936, "learning_rate": 9.456011226585539e-06, "loss": 0.9283, "step": 6124 }, { "epoch": 0.18, "grad_norm": 7.909826502579138, "learning_rate": 9.455800843713207e-06, "loss": 0.557, "step": 6125 }, { "epoch": 0.18, "grad_norm": 4.438528398435622, "learning_rate": 9.45559042250817e-06, "loss": 0.3654, "step": 6126 }, { "epoch": 0.18, "grad_norm": 8.577156498612146, "learning_rate": 9.455379962972236e-06, "loss": 0.6359, "step": 6127 }, { "epoch": 0.18, "grad_norm": 5.433618278091543, "learning_rate": 9.455169465107217e-06, "loss": 0.6659, "step": 6128 }, { "epoch": 0.18, "grad_norm": 2.8604088670402446, "learning_rate": 9.454958928914923e-06, "loss": 0.3144, "step": 6129 }, { "epoch": 0.18, "grad_norm": 8.132203943238652, "learning_rate": 9.454748354397166e-06, "loss": 0.6722, "step": 6130 }, { "epoch": 0.18, "grad_norm": 4.125972488672176, "learning_rate": 9.454537741555758e-06, "loss": 0.2153, "step": 6131 }, { "epoch": 0.18, "grad_norm": 6.5406692700225175, "learning_rate": 9.45432709039251e-06, "loss": 0.5368, "step": 6132 }, { "epoch": 0.18, "grad_norm": 3.1749380946694883, "learning_rate": 9.454116400909234e-06, "loss": 0.3082, "step": 6133 }, { "epoch": 0.18, "grad_norm": 7.520175309775584, "learning_rate": 9.453905673107743e-06, "loss": 0.4924, "step": 6134 }, { "epoch": 0.18, "grad_norm": 7.415808678051018, "learning_rate": 9.453694906989852e-06, "loss": 0.7606, "step": 6135 }, { "epoch": 0.18, "grad_norm": 6.63411423992322, "learning_rate": 9.45348410255737e-06, "loss": 0.3964, "step": 6136 }, { "epoch": 0.18, "grad_norm": 10.149895848834484, "learning_rate": 9.453273259812113e-06, "loss": 0.8586, "step": 6137 }, { "epoch": 0.18, "grad_norm": 4.425780926777625, "learning_rate": 9.453062378755894e-06, "loss": 0.3455, "step": 6138 }, { "epoch": 0.18, "grad_norm": 4.204094587047784, "learning_rate": 9.452851459390528e-06, "loss": 0.7176, "step": 6139 }, { "epoch": 0.18, "grad_norm": 8.06382663664962, "learning_rate": 9.45264050171783e-06, "loss": 0.6228, "step": 6140 }, { "epoch": 0.18, "grad_norm": 6.313428980389142, "learning_rate": 9.452429505739612e-06, "loss": 0.5609, "step": 6141 }, { "epoch": 0.18, "grad_norm": 6.440957742579369, "learning_rate": 9.452218471457693e-06, "loss": 0.7044, "step": 6142 }, { "epoch": 0.18, "grad_norm": 5.09469595708533, "learning_rate": 9.452007398873886e-06, "loss": 0.5463, "step": 6143 }, { "epoch": 0.18, "grad_norm": 7.4328627354718755, "learning_rate": 9.451796287990007e-06, "loss": 0.2667, "step": 6144 }, { "epoch": 0.18, "grad_norm": 6.156342094236009, "learning_rate": 9.451585138807873e-06, "loss": 0.2466, "step": 6145 }, { "epoch": 0.18, "grad_norm": 5.9768721356934025, "learning_rate": 9.451373951329302e-06, "loss": 0.6503, "step": 6146 }, { "epoch": 0.18, "grad_norm": 6.660646768444707, "learning_rate": 9.451162725556106e-06, "loss": 0.8219, "step": 6147 }, { "epoch": 0.18, "grad_norm": 9.655561107765106, "learning_rate": 9.450951461490107e-06, "loss": 0.765, "step": 6148 }, { "epoch": 0.18, "grad_norm": 5.414718147860821, "learning_rate": 9.45074015913312e-06, "loss": 0.3227, "step": 6149 }, { "epoch": 0.18, "grad_norm": 8.465986159778566, "learning_rate": 9.450528818486963e-06, "loss": 0.381, "step": 6150 }, { "epoch": 0.18, "grad_norm": 4.389947207661325, "learning_rate": 9.450317439553455e-06, "loss": 0.3949, "step": 6151 }, { "epoch": 0.18, "grad_norm": 5.188825139306002, "learning_rate": 9.450106022334413e-06, "loss": 0.6725, "step": 6152 }, { "epoch": 0.18, "grad_norm": 8.614340234822075, "learning_rate": 9.44989456683166e-06, "loss": 0.5771, "step": 6153 }, { "epoch": 0.18, "grad_norm": 6.403406693837439, "learning_rate": 9.449683073047007e-06, "loss": 0.6707, "step": 6154 }, { "epoch": 0.18, "grad_norm": 4.722522515989944, "learning_rate": 9.449471540982282e-06, "loss": 0.5346, "step": 6155 }, { "epoch": 0.18, "grad_norm": 6.526102401154461, "learning_rate": 9.4492599706393e-06, "loss": 0.5175, "step": 6156 }, { "epoch": 0.18, "grad_norm": 5.609105690784461, "learning_rate": 9.449048362019883e-06, "loss": 0.5468, "step": 6157 }, { "epoch": 0.18, "grad_norm": 8.488175470050308, "learning_rate": 9.44883671512585e-06, "loss": 0.9392, "step": 6158 }, { "epoch": 0.18, "grad_norm": 4.563436124733902, "learning_rate": 9.448625029959021e-06, "loss": 0.3821, "step": 6159 }, { "epoch": 0.18, "grad_norm": 5.693355857363836, "learning_rate": 9.448413306521223e-06, "loss": 0.8815, "step": 6160 }, { "epoch": 0.18, "grad_norm": 3.5643782767184287, "learning_rate": 9.448201544814268e-06, "loss": 0.3189, "step": 6161 }, { "epoch": 0.18, "grad_norm": 11.152615679149731, "learning_rate": 9.447989744839986e-06, "loss": 0.6358, "step": 6162 }, { "epoch": 0.18, "grad_norm": 12.249434321830293, "learning_rate": 9.447777906600195e-06, "loss": 0.5168, "step": 6163 }, { "epoch": 0.18, "grad_norm": 7.914602940926627, "learning_rate": 9.447566030096718e-06, "loss": 0.5864, "step": 6164 }, { "epoch": 0.18, "grad_norm": 2.389223897408388, "learning_rate": 9.447354115331379e-06, "loss": 0.2545, "step": 6165 }, { "epoch": 0.18, "grad_norm": 9.708400207638475, "learning_rate": 9.447142162305998e-06, "loss": 0.7156, "step": 6166 }, { "epoch": 0.18, "grad_norm": 4.620742487838509, "learning_rate": 9.446930171022404e-06, "loss": 0.248, "step": 6167 }, { "epoch": 0.18, "grad_norm": 4.32365985901019, "learning_rate": 9.446718141482413e-06, "loss": 0.5546, "step": 6168 }, { "epoch": 0.18, "grad_norm": 4.1137426394707095, "learning_rate": 9.446506073687856e-06, "loss": 0.4519, "step": 6169 }, { "epoch": 0.18, "grad_norm": 4.758924633015292, "learning_rate": 9.446293967640555e-06, "loss": 0.3533, "step": 6170 }, { "epoch": 0.18, "grad_norm": 6.116211171388508, "learning_rate": 9.446081823342334e-06, "loss": 0.3799, "step": 6171 }, { "epoch": 0.18, "grad_norm": 9.240188137302408, "learning_rate": 9.445869640795018e-06, "loss": 0.7375, "step": 6172 }, { "epoch": 0.18, "grad_norm": 9.092076413029435, "learning_rate": 9.445657420000434e-06, "loss": 0.8626, "step": 6173 }, { "epoch": 0.18, "grad_norm": 8.657014007454796, "learning_rate": 9.445445160960405e-06, "loss": 0.6463, "step": 6174 }, { "epoch": 0.18, "grad_norm": 7.135950273669834, "learning_rate": 9.445232863676758e-06, "loss": 0.6486, "step": 6175 }, { "epoch": 0.18, "grad_norm": 12.390575817727303, "learning_rate": 9.445020528151323e-06, "loss": 0.4001, "step": 6176 }, { "epoch": 0.18, "grad_norm": 2.2046391309464917, "learning_rate": 9.44480815438592e-06, "loss": 0.2476, "step": 6177 }, { "epoch": 0.18, "grad_norm": 8.438527588612217, "learning_rate": 9.444595742382383e-06, "loss": 0.4309, "step": 6178 }, { "epoch": 0.18, "grad_norm": 3.148542208385772, "learning_rate": 9.444383292142533e-06, "loss": 0.1765, "step": 6179 }, { "epoch": 0.18, "grad_norm": 3.6285631163354934, "learning_rate": 9.444170803668204e-06, "loss": 0.5246, "step": 6180 }, { "epoch": 0.18, "grad_norm": 6.154702936188476, "learning_rate": 9.443958276961218e-06, "loss": 0.1654, "step": 6181 }, { "epoch": 0.18, "grad_norm": 5.2784769576663635, "learning_rate": 9.443745712023409e-06, "loss": 0.3332, "step": 6182 }, { "epoch": 0.18, "grad_norm": 6.149949776048972, "learning_rate": 9.443533108856599e-06, "loss": 0.3357, "step": 6183 }, { "epoch": 0.18, "grad_norm": 11.297305867907042, "learning_rate": 9.443320467462625e-06, "loss": 0.9264, "step": 6184 }, { "epoch": 0.18, "grad_norm": 10.365954396922747, "learning_rate": 9.443107787843308e-06, "loss": 0.6037, "step": 6185 }, { "epoch": 0.18, "grad_norm": 6.879978700704651, "learning_rate": 9.442895070000484e-06, "loss": 0.5198, "step": 6186 }, { "epoch": 0.18, "grad_norm": 6.93242273892237, "learning_rate": 9.44268231393598e-06, "loss": 0.4034, "step": 6187 }, { "epoch": 0.18, "grad_norm": 10.730123297008726, "learning_rate": 9.442469519651628e-06, "loss": 0.2966, "step": 6188 }, { "epoch": 0.18, "grad_norm": 5.990642084725165, "learning_rate": 9.442256687149259e-06, "loss": 0.6501, "step": 6189 }, { "epoch": 0.18, "grad_norm": 5.003818198508086, "learning_rate": 9.442043816430699e-06, "loss": 0.3321, "step": 6190 }, { "epoch": 0.18, "grad_norm": 5.376572268099394, "learning_rate": 9.441830907497785e-06, "loss": 0.3744, "step": 6191 }, { "epoch": 0.18, "grad_norm": 9.99987206377207, "learning_rate": 9.441617960352347e-06, "loss": 1.0796, "step": 6192 }, { "epoch": 0.18, "grad_norm": 8.577394242419937, "learning_rate": 9.441404974996215e-06, "loss": 0.4573, "step": 6193 }, { "epoch": 0.18, "grad_norm": 5.090360227364954, "learning_rate": 9.441191951431224e-06, "loss": 0.3856, "step": 6194 }, { "epoch": 0.18, "grad_norm": 7.357191405751204, "learning_rate": 9.440978889659204e-06, "loss": 0.8544, "step": 6195 }, { "epoch": 0.18, "grad_norm": 6.476915816918627, "learning_rate": 9.440765789681992e-06, "loss": 0.4229, "step": 6196 }, { "epoch": 0.18, "grad_norm": 9.649402746188715, "learning_rate": 9.440552651501418e-06, "loss": 1.048, "step": 6197 }, { "epoch": 0.18, "grad_norm": 7.729017525302495, "learning_rate": 9.440339475119315e-06, "loss": 0.3428, "step": 6198 }, { "epoch": 0.18, "grad_norm": 6.6612467390450805, "learning_rate": 9.440126260537518e-06, "loss": 0.6426, "step": 6199 }, { "epoch": 0.18, "grad_norm": 6.899219300152672, "learning_rate": 9.439913007757863e-06, "loss": 0.6462, "step": 6200 }, { "epoch": 0.18, "grad_norm": 18.971867262779472, "learning_rate": 9.439699716782183e-06, "loss": 0.7895, "step": 6201 }, { "epoch": 0.18, "grad_norm": 3.409230616770162, "learning_rate": 9.43948638761231e-06, "loss": 0.4554, "step": 6202 }, { "epoch": 0.18, "grad_norm": 24.79382990099892, "learning_rate": 9.439273020250085e-06, "loss": 0.4223, "step": 6203 }, { "epoch": 0.18, "grad_norm": 7.197237600119284, "learning_rate": 9.439059614697341e-06, "loss": 0.4855, "step": 6204 }, { "epoch": 0.18, "grad_norm": 4.053741932830641, "learning_rate": 9.438846170955914e-06, "loss": 0.5148, "step": 6205 }, { "epoch": 0.18, "grad_norm": 4.668755551989827, "learning_rate": 9.438632689027639e-06, "loss": 0.8484, "step": 6206 }, { "epoch": 0.18, "grad_norm": 6.429391365137982, "learning_rate": 9.438419168914353e-06, "loss": 0.5904, "step": 6207 }, { "epoch": 0.18, "grad_norm": 4.731718621169724, "learning_rate": 9.438205610617892e-06, "loss": 0.7016, "step": 6208 }, { "epoch": 0.18, "grad_norm": 7.403176615620838, "learning_rate": 9.437992014140097e-06, "loss": 0.4761, "step": 6209 }, { "epoch": 0.18, "grad_norm": 4.101680267096494, "learning_rate": 9.437778379482803e-06, "loss": 0.3273, "step": 6210 }, { "epoch": 0.18, "grad_norm": 8.02825064258254, "learning_rate": 9.437564706647847e-06, "loss": 0.6674, "step": 6211 }, { "epoch": 0.18, "grad_norm": 5.427994937689156, "learning_rate": 9.437350995637069e-06, "loss": 0.7205, "step": 6212 }, { "epoch": 0.18, "grad_norm": 17.236627011395832, "learning_rate": 9.437137246452306e-06, "loss": 0.3509, "step": 6213 }, { "epoch": 0.18, "grad_norm": 10.533800489794226, "learning_rate": 9.436923459095397e-06, "loss": 0.4276, "step": 6214 }, { "epoch": 0.18, "grad_norm": 5.295401309099629, "learning_rate": 9.436709633568183e-06, "loss": 0.271, "step": 6215 }, { "epoch": 0.18, "grad_norm": 5.982669995798322, "learning_rate": 9.436495769872503e-06, "loss": 0.1613, "step": 6216 }, { "epoch": 0.18, "grad_norm": 5.281714424614757, "learning_rate": 9.436281868010194e-06, "loss": 0.5935, "step": 6217 }, { "epoch": 0.18, "grad_norm": 5.320521525619522, "learning_rate": 9.436067927983099e-06, "loss": 0.4415, "step": 6218 }, { "epoch": 0.18, "grad_norm": 6.042956361468192, "learning_rate": 9.435853949793059e-06, "loss": 0.5788, "step": 6219 }, { "epoch": 0.18, "grad_norm": 4.558889345131907, "learning_rate": 9.435639933441912e-06, "loss": 0.3489, "step": 6220 }, { "epoch": 0.18, "grad_norm": 7.64531409417375, "learning_rate": 9.435425878931503e-06, "loss": 0.6154, "step": 6221 }, { "epoch": 0.18, "grad_norm": 6.472553553943528, "learning_rate": 9.435211786263669e-06, "loss": 0.724, "step": 6222 }, { "epoch": 0.18, "grad_norm": 9.075647748208418, "learning_rate": 9.434997655440255e-06, "loss": 0.7127, "step": 6223 }, { "epoch": 0.18, "grad_norm": 9.699677473782206, "learning_rate": 9.434783486463102e-06, "loss": 0.4807, "step": 6224 }, { "epoch": 0.18, "grad_norm": 11.878830623166447, "learning_rate": 9.434569279334054e-06, "loss": 0.5265, "step": 6225 }, { "epoch": 0.18, "grad_norm": 14.07905418047751, "learning_rate": 9.43435503405495e-06, "loss": 0.7883, "step": 6226 }, { "epoch": 0.18, "grad_norm": 7.449816723303696, "learning_rate": 9.434140750627638e-06, "loss": 0.5599, "step": 6227 }, { "epoch": 0.18, "grad_norm": 9.845259002796896, "learning_rate": 9.433926429053957e-06, "loss": 0.1635, "step": 6228 }, { "epoch": 0.18, "grad_norm": 3.9883740570299664, "learning_rate": 9.433712069335754e-06, "loss": 0.6569, "step": 6229 }, { "epoch": 0.18, "grad_norm": 7.095800229538735, "learning_rate": 9.433497671474871e-06, "loss": 0.4191, "step": 6230 }, { "epoch": 0.18, "grad_norm": 3.109632586827258, "learning_rate": 9.433283235473155e-06, "loss": 0.3617, "step": 6231 }, { "epoch": 0.18, "grad_norm": 4.923383063932764, "learning_rate": 9.43306876133245e-06, "loss": 0.2198, "step": 6232 }, { "epoch": 0.18, "grad_norm": 4.2750709940497655, "learning_rate": 9.432854249054597e-06, "loss": 0.5341, "step": 6233 }, { "epoch": 0.18, "grad_norm": 9.995436008364742, "learning_rate": 9.432639698641445e-06, "loss": 0.7797, "step": 6234 }, { "epoch": 0.18, "grad_norm": 6.270034227962245, "learning_rate": 9.432425110094843e-06, "loss": 0.7945, "step": 6235 }, { "epoch": 0.18, "grad_norm": 7.794666596746211, "learning_rate": 9.432210483416631e-06, "loss": 0.521, "step": 6236 }, { "epoch": 0.18, "grad_norm": 7.281686335902571, "learning_rate": 9.431995818608656e-06, "loss": 0.6934, "step": 6237 }, { "epoch": 0.18, "grad_norm": 5.498387533851366, "learning_rate": 9.43178111567277e-06, "loss": 0.3025, "step": 6238 }, { "epoch": 0.18, "grad_norm": 5.480265672745444, "learning_rate": 9.431566374610817e-06, "loss": 0.5941, "step": 6239 }, { "epoch": 0.18, "grad_norm": 6.074560537259069, "learning_rate": 9.431351595424643e-06, "loss": 0.5267, "step": 6240 }, { "epoch": 0.18, "grad_norm": 5.382649235124941, "learning_rate": 9.431136778116094e-06, "loss": 0.4434, "step": 6241 }, { "epoch": 0.18, "grad_norm": 8.756008401351739, "learning_rate": 9.430921922687024e-06, "loss": 0.5125, "step": 6242 }, { "epoch": 0.18, "grad_norm": 10.95121036615984, "learning_rate": 9.430707029139279e-06, "loss": 0.7052, "step": 6243 }, { "epoch": 0.18, "grad_norm": 4.6064735720659025, "learning_rate": 9.430492097474705e-06, "loss": 0.4315, "step": 6244 }, { "epoch": 0.18, "grad_norm": 5.670689188264824, "learning_rate": 9.430277127695154e-06, "loss": 0.626, "step": 6245 }, { "epoch": 0.18, "grad_norm": 7.179146996410461, "learning_rate": 9.430062119802474e-06, "loss": 0.7316, "step": 6246 }, { "epoch": 0.18, "grad_norm": 5.9443328090338, "learning_rate": 9.429847073798516e-06, "loss": 0.5129, "step": 6247 }, { "epoch": 0.18, "grad_norm": 8.729670204321621, "learning_rate": 9.429631989685127e-06, "loss": 0.6177, "step": 6248 }, { "epoch": 0.18, "grad_norm": 5.632060261309162, "learning_rate": 9.42941686746416e-06, "loss": 0.7444, "step": 6249 }, { "epoch": 0.18, "grad_norm": 3.747409394615011, "learning_rate": 9.429201707137466e-06, "loss": 0.2207, "step": 6250 }, { "epoch": 0.18, "grad_norm": 8.999553457414503, "learning_rate": 9.428986508706894e-06, "loss": 0.8199, "step": 6251 }, { "epoch": 0.18, "grad_norm": 5.879222549598243, "learning_rate": 9.428771272174296e-06, "loss": 0.689, "step": 6252 }, { "epoch": 0.18, "grad_norm": 3.389056919879461, "learning_rate": 9.428555997541525e-06, "loss": 0.3787, "step": 6253 }, { "epoch": 0.18, "grad_norm": 4.149847218562202, "learning_rate": 9.428340684810432e-06, "loss": 0.3265, "step": 6254 }, { "epoch": 0.18, "grad_norm": 8.373188278324745, "learning_rate": 9.428125333982869e-06, "loss": 0.603, "step": 6255 }, { "epoch": 0.18, "grad_norm": 7.214119090485614, "learning_rate": 9.42790994506069e-06, "loss": 0.4822, "step": 6256 }, { "epoch": 0.18, "grad_norm": 6.363743062177603, "learning_rate": 9.427694518045745e-06, "loss": 0.1978, "step": 6257 }, { "epoch": 0.18, "grad_norm": 3.3211314548962516, "learning_rate": 9.427479052939891e-06, "loss": 0.3918, "step": 6258 }, { "epoch": 0.18, "grad_norm": 4.497719001166614, "learning_rate": 9.427263549744978e-06, "loss": 0.3125, "step": 6259 }, { "epoch": 0.18, "grad_norm": 4.650226152724658, "learning_rate": 9.427048008462865e-06, "loss": 0.3417, "step": 6260 }, { "epoch": 0.18, "grad_norm": 10.879637945967096, "learning_rate": 9.426832429095399e-06, "loss": 0.4579, "step": 6261 }, { "epoch": 0.18, "grad_norm": 4.22637623149074, "learning_rate": 9.426616811644442e-06, "loss": 0.3784, "step": 6262 }, { "epoch": 0.18, "grad_norm": 8.495796342226338, "learning_rate": 9.426401156111844e-06, "loss": 0.4711, "step": 6263 }, { "epoch": 0.18, "grad_norm": 9.543726525218501, "learning_rate": 9.426185462499463e-06, "loss": 0.5303, "step": 6264 }, { "epoch": 0.18, "grad_norm": 3.0045988675840376, "learning_rate": 9.425969730809152e-06, "loss": 0.2516, "step": 6265 }, { "epoch": 0.18, "grad_norm": 6.943476190670823, "learning_rate": 9.425753961042767e-06, "loss": 0.5775, "step": 6266 }, { "epoch": 0.18, "grad_norm": 5.287398106019336, "learning_rate": 9.425538153202169e-06, "loss": 0.5823, "step": 6267 }, { "epoch": 0.18, "grad_norm": 7.053527567694329, "learning_rate": 9.425322307289209e-06, "loss": 0.593, "step": 6268 }, { "epoch": 0.18, "grad_norm": 5.115273713277048, "learning_rate": 9.425106423305746e-06, "loss": 0.4443, "step": 6269 }, { "epoch": 0.18, "grad_norm": 5.006098413732237, "learning_rate": 9.424890501253638e-06, "loss": 0.4495, "step": 6270 }, { "epoch": 0.18, "grad_norm": 3.3148174096939313, "learning_rate": 9.424674541134742e-06, "loss": 0.4206, "step": 6271 }, { "epoch": 0.18, "grad_norm": 6.819941245130791, "learning_rate": 9.424458542950916e-06, "loss": 0.7659, "step": 6272 }, { "epoch": 0.18, "grad_norm": 8.204693511687536, "learning_rate": 9.424242506704018e-06, "loss": 0.5659, "step": 6273 }, { "epoch": 0.18, "grad_norm": 5.958610310374567, "learning_rate": 9.424026432395906e-06, "loss": 0.6303, "step": 6274 }, { "epoch": 0.18, "grad_norm": 5.6958790354742765, "learning_rate": 9.423810320028436e-06, "loss": 0.5151, "step": 6275 }, { "epoch": 0.18, "grad_norm": 8.447741474133423, "learning_rate": 9.423594169603476e-06, "loss": 0.235, "step": 6276 }, { "epoch": 0.18, "grad_norm": 9.016083545362047, "learning_rate": 9.423377981122876e-06, "loss": 0.7809, "step": 6277 }, { "epoch": 0.18, "grad_norm": 5.605137576644555, "learning_rate": 9.423161754588503e-06, "loss": 0.4538, "step": 6278 }, { "epoch": 0.18, "grad_norm": 4.38739372624642, "learning_rate": 9.422945490002211e-06, "loss": 0.3797, "step": 6279 }, { "epoch": 0.18, "grad_norm": 4.4825998175379524, "learning_rate": 9.422729187365866e-06, "loss": 0.5861, "step": 6280 }, { "epoch": 0.18, "grad_norm": 5.437662494906088, "learning_rate": 9.422512846681325e-06, "loss": 0.6101, "step": 6281 }, { "epoch": 0.18, "grad_norm": 3.7122495560348754, "learning_rate": 9.422296467950452e-06, "loss": 0.1192, "step": 6282 }, { "epoch": 0.18, "grad_norm": 9.305383510485834, "learning_rate": 9.422080051175104e-06, "loss": 0.7349, "step": 6283 }, { "epoch": 0.18, "grad_norm": 5.888680539354115, "learning_rate": 9.421863596357149e-06, "loss": 0.3748, "step": 6284 }, { "epoch": 0.18, "grad_norm": 6.553494259568163, "learning_rate": 9.421647103498444e-06, "loss": 0.2936, "step": 6285 }, { "epoch": 0.18, "grad_norm": 3.396754722957482, "learning_rate": 9.421430572600853e-06, "loss": 0.4162, "step": 6286 }, { "epoch": 0.18, "grad_norm": 14.983720942155852, "learning_rate": 9.42121400366624e-06, "loss": 0.5711, "step": 6287 }, { "epoch": 0.18, "grad_norm": 5.7095747338045815, "learning_rate": 9.420997396696468e-06, "loss": 0.2362, "step": 6288 }, { "epoch": 0.18, "grad_norm": 5.3022782215606785, "learning_rate": 9.420780751693399e-06, "loss": 0.5168, "step": 6289 }, { "epoch": 0.18, "grad_norm": 8.170807048386374, "learning_rate": 9.420564068658897e-06, "loss": 0.6896, "step": 6290 }, { "epoch": 0.18, "grad_norm": 7.669237541985699, "learning_rate": 9.420347347594827e-06, "loss": 0.5206, "step": 6291 }, { "epoch": 0.18, "grad_norm": 7.782589839192513, "learning_rate": 9.420130588503054e-06, "loss": 0.8974, "step": 6292 }, { "epoch": 0.18, "grad_norm": 6.741387948623521, "learning_rate": 9.41991379138544e-06, "loss": 0.8083, "step": 6293 }, { "epoch": 0.18, "grad_norm": 5.591938748493557, "learning_rate": 9.419696956243854e-06, "loss": 0.4449, "step": 6294 }, { "epoch": 0.18, "grad_norm": 6.732453463762199, "learning_rate": 9.419480083080156e-06, "loss": 0.4569, "step": 6295 }, { "epoch": 0.18, "grad_norm": 6.297346452428908, "learning_rate": 9.419263171896218e-06, "loss": 0.5033, "step": 6296 }, { "epoch": 0.18, "grad_norm": 3.577199158138401, "learning_rate": 9.419046222693901e-06, "loss": 0.2214, "step": 6297 }, { "epoch": 0.18, "grad_norm": 7.834356572571162, "learning_rate": 9.418829235475076e-06, "loss": 0.7065, "step": 6298 }, { "epoch": 0.18, "grad_norm": 5.393054138418313, "learning_rate": 9.418612210241605e-06, "loss": 0.5136, "step": 6299 }, { "epoch": 0.18, "grad_norm": 4.871711820227201, "learning_rate": 9.418395146995359e-06, "loss": 0.2587, "step": 6300 }, { "epoch": 0.18, "grad_norm": 5.8140425737342465, "learning_rate": 9.4181780457382e-06, "loss": 0.5586, "step": 6301 }, { "epoch": 0.18, "grad_norm": 6.6489501468592715, "learning_rate": 9.417960906472003e-06, "loss": 0.4793, "step": 6302 }, { "epoch": 0.18, "grad_norm": 7.712354535627581, "learning_rate": 9.41774372919863e-06, "loss": 0.5653, "step": 6303 }, { "epoch": 0.18, "grad_norm": 8.614755323569739, "learning_rate": 9.417526513919953e-06, "loss": 0.9314, "step": 6304 }, { "epoch": 0.18, "grad_norm": 8.495488287738429, "learning_rate": 9.417309260637838e-06, "loss": 0.7739, "step": 6305 }, { "epoch": 0.18, "grad_norm": 1.962113350847679, "learning_rate": 9.417091969354158e-06, "loss": 0.0597, "step": 6306 }, { "epoch": 0.18, "grad_norm": 9.245240920348856, "learning_rate": 9.416874640070777e-06, "loss": 0.6195, "step": 6307 }, { "epoch": 0.18, "grad_norm": 8.469523602163667, "learning_rate": 9.416657272789568e-06, "loss": 0.8786, "step": 6308 }, { "epoch": 0.18, "grad_norm": 3.853272590378433, "learning_rate": 9.4164398675124e-06, "loss": 0.5231, "step": 6309 }, { "epoch": 0.18, "grad_norm": 5.029995590117021, "learning_rate": 9.416222424241144e-06, "loss": 0.3261, "step": 6310 }, { "epoch": 0.18, "grad_norm": 7.2059364376351045, "learning_rate": 9.41600494297767e-06, "loss": 0.6643, "step": 6311 }, { "epoch": 0.18, "grad_norm": 6.715459150748904, "learning_rate": 9.41578742372385e-06, "loss": 0.8385, "step": 6312 }, { "epoch": 0.18, "grad_norm": 5.748976948159557, "learning_rate": 9.415569866481552e-06, "loss": 0.7217, "step": 6313 }, { "epoch": 0.18, "grad_norm": 8.092817628393886, "learning_rate": 9.415352271252654e-06, "loss": 0.9685, "step": 6314 }, { "epoch": 0.18, "grad_norm": 4.665693563377826, "learning_rate": 9.415134638039021e-06, "loss": 0.3047, "step": 6315 }, { "epoch": 0.18, "grad_norm": 4.126184264603857, "learning_rate": 9.41491696684253e-06, "loss": 0.4368, "step": 6316 }, { "epoch": 0.18, "grad_norm": 7.9186014488154335, "learning_rate": 9.41469925766505e-06, "loss": 1.0035, "step": 6317 }, { "epoch": 0.18, "grad_norm": 9.56445212789713, "learning_rate": 9.414481510508457e-06, "loss": 1.0353, "step": 6318 }, { "epoch": 0.18, "grad_norm": 7.170404054809925, "learning_rate": 9.414263725374624e-06, "loss": 0.5781, "step": 6319 }, { "epoch": 0.18, "grad_norm": 6.359896514278574, "learning_rate": 9.414045902265421e-06, "loss": 0.4525, "step": 6320 }, { "epoch": 0.18, "grad_norm": 5.088056366137414, "learning_rate": 9.413828041182728e-06, "loss": 0.3997, "step": 6321 }, { "epoch": 0.18, "grad_norm": 10.29202290483425, "learning_rate": 9.413610142128413e-06, "loss": 0.699, "step": 6322 }, { "epoch": 0.18, "grad_norm": 6.297519508867685, "learning_rate": 9.413392205104354e-06, "loss": 0.4524, "step": 6323 }, { "epoch": 0.18, "grad_norm": 3.7063635655930782, "learning_rate": 9.413174230112428e-06, "loss": 0.2636, "step": 6324 }, { "epoch": 0.18, "grad_norm": 6.976855093532816, "learning_rate": 9.412956217154505e-06, "loss": 0.6071, "step": 6325 }, { "epoch": 0.18, "grad_norm": 7.57200422566372, "learning_rate": 9.412738166232462e-06, "loss": 0.7239, "step": 6326 }, { "epoch": 0.18, "grad_norm": 9.914250942450364, "learning_rate": 9.412520077348177e-06, "loss": 0.6753, "step": 6327 }, { "epoch": 0.18, "grad_norm": 4.304854263426301, "learning_rate": 9.412301950503525e-06, "loss": 0.2486, "step": 6328 }, { "epoch": 0.18, "grad_norm": 7.436673895853594, "learning_rate": 9.412083785700384e-06, "loss": 0.3533, "step": 6329 }, { "epoch": 0.18, "grad_norm": 3.313630019281871, "learning_rate": 9.411865582940627e-06, "loss": 0.0903, "step": 6330 }, { "epoch": 0.18, "grad_norm": 3.5623590541865404, "learning_rate": 9.411647342226136e-06, "loss": 0.3999, "step": 6331 }, { "epoch": 0.18, "grad_norm": 2.894774350808539, "learning_rate": 9.411429063558783e-06, "loss": 0.3315, "step": 6332 }, { "epoch": 0.18, "grad_norm": 1.9350142992462236, "learning_rate": 9.411210746940451e-06, "loss": 0.138, "step": 6333 }, { "epoch": 0.18, "grad_norm": 3.1433671016605262, "learning_rate": 9.410992392373017e-06, "loss": 0.3473, "step": 6334 }, { "epoch": 0.18, "grad_norm": 5.141133291977451, "learning_rate": 9.410773999858356e-06, "loss": 0.4809, "step": 6335 }, { "epoch": 0.18, "grad_norm": 7.998935986332942, "learning_rate": 9.410555569398352e-06, "loss": 0.4821, "step": 6336 }, { "epoch": 0.18, "grad_norm": 4.589778610741228, "learning_rate": 9.410337100994879e-06, "loss": 0.3639, "step": 6337 }, { "epoch": 0.18, "grad_norm": 8.036876088275891, "learning_rate": 9.41011859464982e-06, "loss": 0.7343, "step": 6338 }, { "epoch": 0.18, "grad_norm": 4.732307712942812, "learning_rate": 9.409900050365055e-06, "loss": 0.5365, "step": 6339 }, { "epoch": 0.18, "grad_norm": 6.549122972958693, "learning_rate": 9.40968146814246e-06, "loss": 0.5115, "step": 6340 }, { "epoch": 0.18, "grad_norm": 7.280978545666682, "learning_rate": 9.409462847983921e-06, "loss": 0.5425, "step": 6341 }, { "epoch": 0.18, "grad_norm": 2.8755996120288123, "learning_rate": 9.409244189891316e-06, "loss": 0.161, "step": 6342 }, { "epoch": 0.18, "grad_norm": 8.54506450773815, "learning_rate": 9.409025493866523e-06, "loss": 0.5175, "step": 6343 }, { "epoch": 0.18, "grad_norm": 7.4117232431949205, "learning_rate": 9.40880675991143e-06, "loss": 0.4813, "step": 6344 }, { "epoch": 0.18, "grad_norm": 4.070772727090869, "learning_rate": 9.408587988027913e-06, "loss": 0.4378, "step": 6345 }, { "epoch": 0.18, "grad_norm": 9.665822244352718, "learning_rate": 9.408369178217858e-06, "loss": 0.9326, "step": 6346 }, { "epoch": 0.18, "grad_norm": 8.67119968293576, "learning_rate": 9.408150330483144e-06, "loss": 0.7347, "step": 6347 }, { "epoch": 0.18, "grad_norm": 3.073452845633537, "learning_rate": 9.407931444825658e-06, "loss": 0.1879, "step": 6348 }, { "epoch": 0.18, "grad_norm": 5.871435829293357, "learning_rate": 9.407712521247278e-06, "loss": 0.6235, "step": 6349 }, { "epoch": 0.18, "grad_norm": 12.269822839516573, "learning_rate": 9.40749355974989e-06, "loss": 0.8825, "step": 6350 }, { "epoch": 0.18, "grad_norm": 8.304745376973768, "learning_rate": 9.407274560335379e-06, "loss": 0.6679, "step": 6351 }, { "epoch": 0.18, "grad_norm": 7.466452528516853, "learning_rate": 9.407055523005627e-06, "loss": 0.456, "step": 6352 }, { "epoch": 0.18, "grad_norm": 9.392643310168378, "learning_rate": 9.406836447762519e-06, "loss": 0.6165, "step": 6353 }, { "epoch": 0.18, "grad_norm": 9.988460272123431, "learning_rate": 9.406617334607939e-06, "loss": 0.7108, "step": 6354 }, { "epoch": 0.18, "grad_norm": 7.06155419557288, "learning_rate": 9.406398183543774e-06, "loss": 0.3835, "step": 6355 }, { "epoch": 0.18, "grad_norm": 5.1141300298356365, "learning_rate": 9.406178994571907e-06, "loss": 0.5884, "step": 6356 }, { "epoch": 0.18, "grad_norm": 3.7800047770853484, "learning_rate": 9.405959767694224e-06, "loss": 0.2236, "step": 6357 }, { "epoch": 0.18, "grad_norm": 7.375840737218406, "learning_rate": 9.405740502912612e-06, "loss": 0.7658, "step": 6358 }, { "epoch": 0.18, "grad_norm": 5.031249336574345, "learning_rate": 9.405521200228957e-06, "loss": 0.3999, "step": 6359 }, { "epoch": 0.18, "grad_norm": 5.20518819323996, "learning_rate": 9.405301859645147e-06, "loss": 0.4381, "step": 6360 }, { "epoch": 0.18, "grad_norm": 14.181617357481601, "learning_rate": 9.405082481163066e-06, "loss": 0.3588, "step": 6361 }, { "epoch": 0.18, "grad_norm": 6.029019116132038, "learning_rate": 9.404863064784603e-06, "loss": 0.4763, "step": 6362 }, { "epoch": 0.18, "grad_norm": 5.548108976409108, "learning_rate": 9.404643610511645e-06, "loss": 0.6719, "step": 6363 }, { "epoch": 0.18, "grad_norm": 5.198275760740728, "learning_rate": 9.404424118346081e-06, "loss": 0.5283, "step": 6364 }, { "epoch": 0.18, "grad_norm": 7.775823332439752, "learning_rate": 9.404204588289798e-06, "loss": 0.6674, "step": 6365 }, { "epoch": 0.18, "grad_norm": 4.679053516780582, "learning_rate": 9.403985020344685e-06, "loss": 0.5533, "step": 6366 }, { "epoch": 0.18, "grad_norm": 7.224557431403731, "learning_rate": 9.403765414512633e-06, "loss": 0.6686, "step": 6367 }, { "epoch": 0.18, "grad_norm": 8.34684164653985, "learning_rate": 9.403545770795527e-06, "loss": 0.9995, "step": 6368 }, { "epoch": 0.18, "grad_norm": 8.84165967628809, "learning_rate": 9.40332608919526e-06, "loss": 0.8417, "step": 6369 }, { "epoch": 0.18, "grad_norm": 8.934800333824482, "learning_rate": 9.40310636971372e-06, "loss": 0.5993, "step": 6370 }, { "epoch": 0.18, "grad_norm": 4.95993363443706, "learning_rate": 9.402886612352799e-06, "loss": 0.4212, "step": 6371 }, { "epoch": 0.18, "grad_norm": 12.259576129649105, "learning_rate": 9.402666817114386e-06, "loss": 0.7156, "step": 6372 }, { "epoch": 0.18, "grad_norm": 8.570054223368688, "learning_rate": 9.402446984000373e-06, "loss": 0.4649, "step": 6373 }, { "epoch": 0.18, "grad_norm": 10.004856694542532, "learning_rate": 9.40222711301265e-06, "loss": 0.8998, "step": 6374 }, { "epoch": 0.18, "grad_norm": 4.687098167997998, "learning_rate": 9.402007204153109e-06, "loss": 0.3209, "step": 6375 }, { "epoch": 0.18, "grad_norm": 5.8557459057546595, "learning_rate": 9.401787257423643e-06, "loss": 0.3245, "step": 6376 }, { "epoch": 0.18, "grad_norm": 5.428830569040728, "learning_rate": 9.401567272826142e-06, "loss": 0.695, "step": 6377 }, { "epoch": 0.18, "grad_norm": 8.39307906138601, "learning_rate": 9.401347250362503e-06, "loss": 0.2918, "step": 6378 }, { "epoch": 0.18, "grad_norm": 4.806800191711564, "learning_rate": 9.401127190034611e-06, "loss": 0.4194, "step": 6379 }, { "epoch": 0.18, "grad_norm": 3.980480639558804, "learning_rate": 9.400907091844366e-06, "loss": 0.617, "step": 6380 }, { "epoch": 0.18, "grad_norm": 6.408262020585707, "learning_rate": 9.400686955793658e-06, "loss": 0.5453, "step": 6381 }, { "epoch": 0.18, "grad_norm": 5.212950813686341, "learning_rate": 9.400466781884383e-06, "loss": 0.4663, "step": 6382 }, { "epoch": 0.18, "grad_norm": 4.473151113794483, "learning_rate": 9.400246570118433e-06, "loss": 0.288, "step": 6383 }, { "epoch": 0.18, "grad_norm": 7.880869297933653, "learning_rate": 9.400026320497707e-06, "loss": 0.6757, "step": 6384 }, { "epoch": 0.18, "grad_norm": 7.576962725987305, "learning_rate": 9.399806033024093e-06, "loss": 0.3798, "step": 6385 }, { "epoch": 0.18, "grad_norm": 12.334803639655453, "learning_rate": 9.39958570769949e-06, "loss": 0.4823, "step": 6386 }, { "epoch": 0.18, "grad_norm": 4.649344168323757, "learning_rate": 9.399365344525792e-06, "loss": 0.5304, "step": 6387 }, { "epoch": 0.18, "grad_norm": 11.953516895127951, "learning_rate": 9.399144943504897e-06, "loss": 0.3304, "step": 6388 }, { "epoch": 0.18, "grad_norm": 6.365250256481414, "learning_rate": 9.398924504638697e-06, "loss": 0.6279, "step": 6389 }, { "epoch": 0.18, "grad_norm": 8.298082626589709, "learning_rate": 9.398704027929095e-06, "loss": 0.386, "step": 6390 }, { "epoch": 0.18, "grad_norm": 2.4938433177955224, "learning_rate": 9.398483513377982e-06, "loss": 0.2529, "step": 6391 }, { "epoch": 0.18, "grad_norm": 7.50167932782916, "learning_rate": 9.398262960987256e-06, "loss": 0.5672, "step": 6392 }, { "epoch": 0.18, "grad_norm": 5.76142035277359, "learning_rate": 9.398042370758817e-06, "loss": 0.7472, "step": 6393 }, { "epoch": 0.18, "grad_norm": 7.123720288379543, "learning_rate": 9.39782174269456e-06, "loss": 0.6538, "step": 6394 }, { "epoch": 0.18, "grad_norm": 6.341667961123815, "learning_rate": 9.397601076796385e-06, "loss": 0.5308, "step": 6395 }, { "epoch": 0.18, "grad_norm": 3.934369220077959, "learning_rate": 9.397380373066188e-06, "loss": 0.567, "step": 6396 }, { "epoch": 0.18, "grad_norm": 8.549305348871462, "learning_rate": 9.39715963150587e-06, "loss": 0.703, "step": 6397 }, { "epoch": 0.18, "grad_norm": 4.65970734115223, "learning_rate": 9.396938852117329e-06, "loss": 0.6149, "step": 6398 }, { "epoch": 0.18, "grad_norm": 2.697204014587583, "learning_rate": 9.396718034902464e-06, "loss": 0.4714, "step": 6399 }, { "epoch": 0.18, "grad_norm": 7.4122466264363425, "learning_rate": 9.396497179863175e-06, "loss": 0.2103, "step": 6400 }, { "epoch": 0.18, "grad_norm": 8.264767117320368, "learning_rate": 9.396276287001362e-06, "loss": 0.6245, "step": 6401 }, { "epoch": 0.18, "grad_norm": 5.327399554046361, "learning_rate": 9.396055356318927e-06, "loss": 0.5534, "step": 6402 }, { "epoch": 0.18, "grad_norm": 5.789731500556935, "learning_rate": 9.395834387817767e-06, "loss": 0.4355, "step": 6403 }, { "epoch": 0.18, "grad_norm": 5.53137379308417, "learning_rate": 9.395613381499786e-06, "loss": 0.429, "step": 6404 }, { "epoch": 0.18, "grad_norm": 8.305384121347862, "learning_rate": 9.395392337366885e-06, "loss": 0.4481, "step": 6405 }, { "epoch": 0.18, "grad_norm": 7.667792638387693, "learning_rate": 9.395171255420963e-06, "loss": 0.4489, "step": 6406 }, { "epoch": 0.18, "grad_norm": 3.7936792621585007, "learning_rate": 9.394950135663925e-06, "loss": 0.5048, "step": 6407 }, { "epoch": 0.18, "grad_norm": 8.628376562422755, "learning_rate": 9.394728978097672e-06, "loss": 0.8239, "step": 6408 }, { "epoch": 0.18, "grad_norm": 4.1662367249239285, "learning_rate": 9.394507782724106e-06, "loss": 0.1233, "step": 6409 }, { "epoch": 0.18, "grad_norm": 7.505204810715925, "learning_rate": 9.394286549545131e-06, "loss": 0.8317, "step": 6410 }, { "epoch": 0.18, "grad_norm": 4.343527108930003, "learning_rate": 9.394065278562651e-06, "loss": 0.3192, "step": 6411 }, { "epoch": 0.18, "grad_norm": 9.581452779496576, "learning_rate": 9.393843969778566e-06, "loss": 0.4957, "step": 6412 }, { "epoch": 0.18, "grad_norm": 5.392770931633889, "learning_rate": 9.393622623194785e-06, "loss": 0.4468, "step": 6413 }, { "epoch": 0.18, "grad_norm": 5.794173317992336, "learning_rate": 9.393401238813208e-06, "loss": 0.3513, "step": 6414 }, { "epoch": 0.18, "grad_norm": 4.678300400416336, "learning_rate": 9.393179816635741e-06, "loss": 0.6941, "step": 6415 }, { "epoch": 0.18, "grad_norm": 4.772681792979687, "learning_rate": 9.39295835666429e-06, "loss": 0.2488, "step": 6416 }, { "epoch": 0.18, "grad_norm": 5.484423167134014, "learning_rate": 9.392736858900759e-06, "loss": 0.5102, "step": 6417 }, { "epoch": 0.18, "grad_norm": 3.819252725278483, "learning_rate": 9.392515323347051e-06, "loss": 0.2465, "step": 6418 }, { "epoch": 0.18, "grad_norm": 6.9902141151870065, "learning_rate": 9.392293750005078e-06, "loss": 0.3688, "step": 6419 }, { "epoch": 0.18, "grad_norm": 4.507757467532291, "learning_rate": 9.392072138876742e-06, "loss": 0.676, "step": 6420 }, { "epoch": 0.18, "grad_norm": 7.977518082337909, "learning_rate": 9.391850489963949e-06, "loss": 0.6018, "step": 6421 }, { "epoch": 0.18, "grad_norm": 5.952557474152876, "learning_rate": 9.391628803268606e-06, "loss": 0.4655, "step": 6422 }, { "epoch": 0.18, "grad_norm": 3.9291085941126433, "learning_rate": 9.391407078792623e-06, "loss": 0.4112, "step": 6423 }, { "epoch": 0.18, "grad_norm": 3.6166983288083654, "learning_rate": 9.391185316537905e-06, "loss": 0.3296, "step": 6424 }, { "epoch": 0.18, "grad_norm": 5.470718029673333, "learning_rate": 9.390963516506362e-06, "loss": 0.3054, "step": 6425 }, { "epoch": 0.18, "grad_norm": 2.5406704550192436, "learning_rate": 9.390741678699898e-06, "loss": 0.4278, "step": 6426 }, { "epoch": 0.18, "grad_norm": 5.264142017211177, "learning_rate": 9.390519803120424e-06, "loss": 0.8196, "step": 6427 }, { "epoch": 0.18, "grad_norm": 6.363705896641723, "learning_rate": 9.390297889769851e-06, "loss": 0.5576, "step": 6428 }, { "epoch": 0.18, "grad_norm": 9.822763651481626, "learning_rate": 9.390075938650084e-06, "loss": 0.5663, "step": 6429 }, { "epoch": 0.18, "grad_norm": 6.816885979939886, "learning_rate": 9.389853949763035e-06, "loss": 0.6656, "step": 6430 }, { "epoch": 0.18, "grad_norm": 5.788669505917805, "learning_rate": 9.389631923110611e-06, "loss": 0.3525, "step": 6431 }, { "epoch": 0.18, "grad_norm": 5.027639432213239, "learning_rate": 9.389409858694727e-06, "loss": 0.6061, "step": 6432 }, { "epoch": 0.18, "grad_norm": 5.850437723395436, "learning_rate": 9.389187756517288e-06, "loss": 0.4308, "step": 6433 }, { "epoch": 0.18, "grad_norm": 8.252594973188973, "learning_rate": 9.388965616580209e-06, "loss": 0.5018, "step": 6434 }, { "epoch": 0.18, "grad_norm": 5.5072751046841955, "learning_rate": 9.3887434388854e-06, "loss": 0.7271, "step": 6435 }, { "epoch": 0.18, "grad_norm": 5.725957528876508, "learning_rate": 9.388521223434768e-06, "loss": 0.9334, "step": 6436 }, { "epoch": 0.18, "grad_norm": 5.307482043724214, "learning_rate": 9.388298970230231e-06, "loss": 0.4596, "step": 6437 }, { "epoch": 0.18, "grad_norm": 9.204989440348307, "learning_rate": 9.388076679273696e-06, "loss": 0.5897, "step": 6438 }, { "epoch": 0.18, "grad_norm": 3.0282439240997996, "learning_rate": 9.38785435056708e-06, "loss": 0.3676, "step": 6439 }, { "epoch": 0.18, "grad_norm": 10.192140984798234, "learning_rate": 9.387631984112293e-06, "loss": 0.6973, "step": 6440 }, { "epoch": 0.18, "grad_norm": 6.1176841992145174, "learning_rate": 9.387409579911247e-06, "loss": 0.5744, "step": 6441 }, { "epoch": 0.18, "grad_norm": 5.254373953610331, "learning_rate": 9.387187137965858e-06, "loss": 0.657, "step": 6442 }, { "epoch": 0.18, "grad_norm": 7.480821215739561, "learning_rate": 9.386964658278036e-06, "loss": 0.7791, "step": 6443 }, { "epoch": 0.18, "grad_norm": 10.080909332554848, "learning_rate": 9.3867421408497e-06, "loss": 0.4529, "step": 6444 }, { "epoch": 0.18, "grad_norm": 7.753773539678228, "learning_rate": 9.386519585682759e-06, "loss": 0.6188, "step": 6445 }, { "epoch": 0.18, "grad_norm": 2.8878647693584414, "learning_rate": 9.386296992779131e-06, "loss": 0.264, "step": 6446 }, { "epoch": 0.18, "grad_norm": 4.982152700536549, "learning_rate": 9.386074362140731e-06, "loss": 0.8686, "step": 6447 }, { "epoch": 0.18, "grad_norm": 6.328223936461354, "learning_rate": 9.385851693769473e-06, "loss": 0.6755, "step": 6448 }, { "epoch": 0.18, "grad_norm": 6.496365668225134, "learning_rate": 9.385628987667271e-06, "loss": 0.6329, "step": 6449 }, { "epoch": 0.18, "grad_norm": 8.000487372334591, "learning_rate": 9.385406243836043e-06, "loss": 0.8102, "step": 6450 }, { "epoch": 0.18, "grad_norm": 5.78608192389278, "learning_rate": 9.385183462277707e-06, "loss": 0.1883, "step": 6451 }, { "epoch": 0.18, "grad_norm": 4.954044340958591, "learning_rate": 9.384960642994177e-06, "loss": 0.3109, "step": 6452 }, { "epoch": 0.18, "grad_norm": 3.3973456020794925, "learning_rate": 9.38473778598737e-06, "loss": 0.3354, "step": 6453 }, { "epoch": 0.18, "grad_norm": 5.405436019037521, "learning_rate": 9.384514891259204e-06, "loss": 0.2997, "step": 6454 }, { "epoch": 0.18, "grad_norm": 11.191497599976216, "learning_rate": 9.384291958811596e-06, "loss": 0.7313, "step": 6455 }, { "epoch": 0.18, "grad_norm": 4.7670743645640705, "learning_rate": 9.384068988646464e-06, "loss": 0.4797, "step": 6456 }, { "epoch": 0.18, "grad_norm": 5.864071766667879, "learning_rate": 9.383845980765727e-06, "loss": 0.3697, "step": 6457 }, { "epoch": 0.18, "grad_norm": 6.524762778519116, "learning_rate": 9.3836229351713e-06, "loss": 0.7013, "step": 6458 }, { "epoch": 0.18, "grad_norm": 6.704230655280233, "learning_rate": 9.383399851865109e-06, "loss": 0.6506, "step": 6459 }, { "epoch": 0.18, "grad_norm": 7.380968103670406, "learning_rate": 9.383176730849066e-06, "loss": 0.6711, "step": 6460 }, { "epoch": 0.19, "grad_norm": 8.345625677269433, "learning_rate": 9.382953572125095e-06, "loss": 0.5509, "step": 6461 }, { "epoch": 0.19, "grad_norm": 8.725372643150596, "learning_rate": 9.382730375695112e-06, "loss": 0.7578, "step": 6462 }, { "epoch": 0.19, "grad_norm": 8.390982309473033, "learning_rate": 9.382507141561042e-06, "loss": 0.6621, "step": 6463 }, { "epoch": 0.19, "grad_norm": 4.47894431456835, "learning_rate": 9.382283869724799e-06, "loss": 0.4481, "step": 6464 }, { "epoch": 0.19, "grad_norm": 7.5915959446694465, "learning_rate": 9.38206056018831e-06, "loss": 0.6196, "step": 6465 }, { "epoch": 0.19, "grad_norm": 7.500001335143924, "learning_rate": 9.381837212953494e-06, "loss": 0.5982, "step": 6466 }, { "epoch": 0.19, "grad_norm": 12.752793997011972, "learning_rate": 9.38161382802227e-06, "loss": 0.3305, "step": 6467 }, { "epoch": 0.19, "grad_norm": 6.406250260515906, "learning_rate": 9.381390405396563e-06, "loss": 0.5159, "step": 6468 }, { "epoch": 0.19, "grad_norm": 5.311806734514708, "learning_rate": 9.381166945078293e-06, "loss": 0.6702, "step": 6469 }, { "epoch": 0.19, "grad_norm": 8.183729162438746, "learning_rate": 9.380943447069381e-06, "loss": 0.5128, "step": 6470 }, { "epoch": 0.19, "grad_norm": 7.185536721821959, "learning_rate": 9.380719911371756e-06, "loss": 0.392, "step": 6471 }, { "epoch": 0.19, "grad_norm": 9.297351959559798, "learning_rate": 9.380496337987335e-06, "loss": 0.5189, "step": 6472 }, { "epoch": 0.19, "grad_norm": 4.0021527577060825, "learning_rate": 9.380272726918043e-06, "loss": 0.1895, "step": 6473 }, { "epoch": 0.19, "grad_norm": 6.795829289113737, "learning_rate": 9.380049078165805e-06, "loss": 0.3764, "step": 6474 }, { "epoch": 0.19, "grad_norm": 7.55001756148159, "learning_rate": 9.379825391732544e-06, "loss": 0.4133, "step": 6475 }, { "epoch": 0.19, "grad_norm": 6.325892075666365, "learning_rate": 9.379601667620183e-06, "loss": 0.5867, "step": 6476 }, { "epoch": 0.19, "grad_norm": 2.79958337042808, "learning_rate": 9.379377905830651e-06, "loss": 0.31, "step": 6477 }, { "epoch": 0.19, "grad_norm": 5.337657288281858, "learning_rate": 9.379154106365868e-06, "loss": 0.2334, "step": 6478 }, { "epoch": 0.19, "grad_norm": 5.297361098408678, "learning_rate": 9.378930269227762e-06, "loss": 0.5446, "step": 6479 }, { "epoch": 0.19, "grad_norm": 6.3044449429845555, "learning_rate": 9.378706394418258e-06, "loss": 0.7196, "step": 6480 }, { "epoch": 0.19, "grad_norm": 5.260412630388077, "learning_rate": 9.378482481939281e-06, "loss": 0.1249, "step": 6481 }, { "epoch": 0.19, "grad_norm": 7.594604506903041, "learning_rate": 9.37825853179276e-06, "loss": 0.9364, "step": 6482 }, { "epoch": 0.19, "grad_norm": 5.167453281706304, "learning_rate": 9.378034543980619e-06, "loss": 0.6179, "step": 6483 }, { "epoch": 0.19, "grad_norm": 7.717186587994575, "learning_rate": 9.377810518504786e-06, "loss": 0.9078, "step": 6484 }, { "epoch": 0.19, "grad_norm": 7.89603524130249, "learning_rate": 9.377586455367187e-06, "loss": 0.6883, "step": 6485 }, { "epoch": 0.19, "grad_norm": 4.980546922257224, "learning_rate": 9.377362354569752e-06, "loss": 0.4234, "step": 6486 }, { "epoch": 0.19, "grad_norm": 5.035683473340104, "learning_rate": 9.377138216114408e-06, "loss": 0.2883, "step": 6487 }, { "epoch": 0.19, "grad_norm": 3.890757884012132, "learning_rate": 9.376914040003082e-06, "loss": 0.6962, "step": 6488 }, { "epoch": 0.19, "grad_norm": 4.2318163809648155, "learning_rate": 9.376689826237703e-06, "loss": 0.2985, "step": 6489 }, { "epoch": 0.19, "grad_norm": 5.667145914493654, "learning_rate": 9.376465574820202e-06, "loss": 0.4262, "step": 6490 }, { "epoch": 0.19, "grad_norm": 6.580855647749656, "learning_rate": 9.376241285752505e-06, "loss": 0.6608, "step": 6491 }, { "epoch": 0.19, "grad_norm": 10.637407844404263, "learning_rate": 9.376016959036543e-06, "loss": 1.1476, "step": 6492 }, { "epoch": 0.19, "grad_norm": 10.455733950159761, "learning_rate": 9.375792594674247e-06, "loss": 0.8499, "step": 6493 }, { "epoch": 0.19, "grad_norm": 9.049051606028495, "learning_rate": 9.375568192667545e-06, "loss": 0.5878, "step": 6494 }, { "epoch": 0.19, "grad_norm": 4.811852200988357, "learning_rate": 9.37534375301837e-06, "loss": 0.5768, "step": 6495 }, { "epoch": 0.19, "grad_norm": 7.4480072620635065, "learning_rate": 9.37511927572865e-06, "loss": 0.4758, "step": 6496 }, { "epoch": 0.19, "grad_norm": 7.043673919564721, "learning_rate": 9.374894760800317e-06, "loss": 0.8881, "step": 6497 }, { "epoch": 0.19, "grad_norm": 6.332296094115259, "learning_rate": 9.374670208235303e-06, "loss": 0.6079, "step": 6498 }, { "epoch": 0.19, "grad_norm": 4.330595832954409, "learning_rate": 9.374445618035541e-06, "loss": 0.3714, "step": 6499 }, { "epoch": 0.19, "grad_norm": 4.036616088081487, "learning_rate": 9.374220990202963e-06, "loss": 0.347, "step": 6500 }, { "epoch": 0.19, "grad_norm": 4.972752644338006, "learning_rate": 9.373996324739499e-06, "loss": 0.5503, "step": 6501 }, { "epoch": 0.19, "grad_norm": 7.283825332935496, "learning_rate": 9.373771621647083e-06, "loss": 0.6467, "step": 6502 }, { "epoch": 0.19, "grad_norm": 5.88502159030425, "learning_rate": 9.373546880927648e-06, "loss": 0.8186, "step": 6503 }, { "epoch": 0.19, "grad_norm": 7.678775488717849, "learning_rate": 9.37332210258313e-06, "loss": 0.7117, "step": 6504 }, { "epoch": 0.19, "grad_norm": 6.431524892554058, "learning_rate": 9.373097286615456e-06, "loss": 0.4195, "step": 6505 }, { "epoch": 0.19, "grad_norm": 6.250940175867578, "learning_rate": 9.372872433026568e-06, "loss": 0.6455, "step": 6506 }, { "epoch": 0.19, "grad_norm": 8.884170951740847, "learning_rate": 9.372647541818397e-06, "loss": 0.752, "step": 6507 }, { "epoch": 0.19, "grad_norm": 3.6755355120414372, "learning_rate": 9.372422612992876e-06, "loss": 0.365, "step": 6508 }, { "epoch": 0.19, "grad_norm": 4.836159696389095, "learning_rate": 9.372197646551941e-06, "loss": 0.2672, "step": 6509 }, { "epoch": 0.19, "grad_norm": 7.032674551871363, "learning_rate": 9.37197264249753e-06, "loss": 0.8885, "step": 6510 }, { "epoch": 0.19, "grad_norm": 3.139689007106908, "learning_rate": 9.371747600831575e-06, "loss": 0.1656, "step": 6511 }, { "epoch": 0.19, "grad_norm": 7.7383855112707405, "learning_rate": 9.371522521556014e-06, "loss": 0.6254, "step": 6512 }, { "epoch": 0.19, "grad_norm": 5.425241925518984, "learning_rate": 9.371297404672784e-06, "loss": 0.6428, "step": 6513 }, { "epoch": 0.19, "grad_norm": 4.878875756760193, "learning_rate": 9.37107225018382e-06, "loss": 0.2955, "step": 6514 }, { "epoch": 0.19, "grad_norm": 5.72258059090344, "learning_rate": 9.37084705809106e-06, "loss": 0.3356, "step": 6515 }, { "epoch": 0.19, "grad_norm": 7.266025753659158, "learning_rate": 9.37062182839644e-06, "loss": 0.5454, "step": 6516 }, { "epoch": 0.19, "grad_norm": 6.066631836144925, "learning_rate": 9.3703965611019e-06, "loss": 0.6555, "step": 6517 }, { "epoch": 0.19, "grad_norm": 8.962710164477686, "learning_rate": 9.370171256209375e-06, "loss": 0.6972, "step": 6518 }, { "epoch": 0.19, "grad_norm": 6.569432874440423, "learning_rate": 9.369945913720805e-06, "loss": 0.8092, "step": 6519 }, { "epoch": 0.19, "grad_norm": 5.928921053158302, "learning_rate": 9.369720533638128e-06, "loss": 0.6873, "step": 6520 }, { "epoch": 0.19, "grad_norm": 11.244267338232083, "learning_rate": 9.369495115963285e-06, "loss": 1.0258, "step": 6521 }, { "epoch": 0.19, "grad_norm": 5.708336526169081, "learning_rate": 9.369269660698213e-06, "loss": 0.4459, "step": 6522 }, { "epoch": 0.19, "grad_norm": 3.8291692749667634, "learning_rate": 9.36904416784485e-06, "loss": 0.6951, "step": 6523 }, { "epoch": 0.19, "grad_norm": 7.637724967677532, "learning_rate": 9.368818637405139e-06, "loss": 0.5473, "step": 6524 }, { "epoch": 0.19, "grad_norm": 4.240392990343576, "learning_rate": 9.36859306938102e-06, "loss": 0.2856, "step": 6525 }, { "epoch": 0.19, "grad_norm": 4.628177015590521, "learning_rate": 9.368367463774433e-06, "loss": 0.3911, "step": 6526 }, { "epoch": 0.19, "grad_norm": 6.256553647106651, "learning_rate": 9.368141820587318e-06, "loss": 0.8506, "step": 6527 }, { "epoch": 0.19, "grad_norm": 8.918722420792152, "learning_rate": 9.367916139821615e-06, "loss": 0.5931, "step": 6528 }, { "epoch": 0.19, "grad_norm": 10.713796466374578, "learning_rate": 9.367690421479269e-06, "loss": 0.6163, "step": 6529 }, { "epoch": 0.19, "grad_norm": 6.888388065194842, "learning_rate": 9.36746466556222e-06, "loss": 0.7893, "step": 6530 }, { "epoch": 0.19, "grad_norm": 5.657873942735017, "learning_rate": 9.36723887207241e-06, "loss": 0.5242, "step": 6531 }, { "epoch": 0.19, "grad_norm": 8.268029250884485, "learning_rate": 9.367013041011783e-06, "loss": 1.0961, "step": 6532 }, { "epoch": 0.19, "grad_norm": 5.804262045546495, "learning_rate": 9.366787172382278e-06, "loss": 0.644, "step": 6533 }, { "epoch": 0.19, "grad_norm": 7.398354293210334, "learning_rate": 9.366561266185843e-06, "loss": 0.7394, "step": 6534 }, { "epoch": 0.19, "grad_norm": 4.900820844366572, "learning_rate": 9.366335322424417e-06, "loss": 0.2165, "step": 6535 }, { "epoch": 0.19, "grad_norm": 4.098832913345685, "learning_rate": 9.366109341099947e-06, "loss": 0.1484, "step": 6536 }, { "epoch": 0.19, "grad_norm": 9.3005454118829, "learning_rate": 9.365883322214377e-06, "loss": 0.5604, "step": 6537 }, { "epoch": 0.19, "grad_norm": 7.487875229439934, "learning_rate": 9.365657265769648e-06, "loss": 0.4255, "step": 6538 }, { "epoch": 0.19, "grad_norm": 6.331491963921046, "learning_rate": 9.365431171767709e-06, "loss": 0.7954, "step": 6539 }, { "epoch": 0.19, "grad_norm": 6.232557409445182, "learning_rate": 9.3652050402105e-06, "loss": 0.2815, "step": 6540 }, { "epoch": 0.19, "grad_norm": 5.947011216207741, "learning_rate": 9.364978871099973e-06, "loss": 0.5461, "step": 6541 }, { "epoch": 0.19, "grad_norm": 3.9496073998533165, "learning_rate": 9.36475266443807e-06, "loss": 0.3349, "step": 6542 }, { "epoch": 0.19, "grad_norm": 5.2057817337819525, "learning_rate": 9.364526420226736e-06, "loss": 0.2245, "step": 6543 }, { "epoch": 0.19, "grad_norm": 3.9425499034715497, "learning_rate": 9.364300138467918e-06, "loss": 0.4695, "step": 6544 }, { "epoch": 0.19, "grad_norm": 5.863389656543844, "learning_rate": 9.364073819163564e-06, "loss": 0.9047, "step": 6545 }, { "epoch": 0.19, "grad_norm": 5.859223346019196, "learning_rate": 9.363847462315621e-06, "loss": 0.5694, "step": 6546 }, { "epoch": 0.19, "grad_norm": 5.124263477900847, "learning_rate": 9.363621067926034e-06, "loss": 0.3017, "step": 6547 }, { "epoch": 0.19, "grad_norm": 4.393628270944157, "learning_rate": 9.363394635996753e-06, "loss": 0.2928, "step": 6548 }, { "epoch": 0.19, "grad_norm": 10.021069646151918, "learning_rate": 9.363168166529725e-06, "loss": 0.6646, "step": 6549 }, { "epoch": 0.19, "grad_norm": 7.432981512984092, "learning_rate": 9.362941659526899e-06, "loss": 0.6144, "step": 6550 }, { "epoch": 0.19, "grad_norm": 2.9904685399056254, "learning_rate": 9.362715114990222e-06, "loss": 0.1725, "step": 6551 }, { "epoch": 0.19, "grad_norm": 4.022376681725991, "learning_rate": 9.362488532921646e-06, "loss": 0.4344, "step": 6552 }, { "epoch": 0.19, "grad_norm": 6.599827754056104, "learning_rate": 9.362261913323116e-06, "loss": 0.7195, "step": 6553 }, { "epoch": 0.19, "grad_norm": 10.455566668484824, "learning_rate": 9.362035256196584e-06, "loss": 1.011, "step": 6554 }, { "epoch": 0.19, "grad_norm": 5.036277391667412, "learning_rate": 9.361808561544002e-06, "loss": 0.7944, "step": 6555 }, { "epoch": 0.19, "grad_norm": 4.5128789821842075, "learning_rate": 9.361581829367316e-06, "loss": 0.6031, "step": 6556 }, { "epoch": 0.19, "grad_norm": 9.97343683829971, "learning_rate": 9.36135505966848e-06, "loss": 0.5313, "step": 6557 }, { "epoch": 0.19, "grad_norm": 4.734444998547866, "learning_rate": 9.361128252449441e-06, "loss": 0.7246, "step": 6558 }, { "epoch": 0.19, "grad_norm": 5.0295976093338135, "learning_rate": 9.360901407712155e-06, "loss": 0.2522, "step": 6559 }, { "epoch": 0.19, "grad_norm": 11.882448931968932, "learning_rate": 9.36067452545857e-06, "loss": 0.8179, "step": 6560 }, { "epoch": 0.19, "grad_norm": 3.5953028765387556, "learning_rate": 9.36044760569064e-06, "loss": 0.5939, "step": 6561 }, { "epoch": 0.19, "grad_norm": 9.42469535355516, "learning_rate": 9.360220648410315e-06, "loss": 0.69, "step": 6562 }, { "epoch": 0.19, "grad_norm": 6.233750314416107, "learning_rate": 9.35999365361955e-06, "loss": 0.518, "step": 6563 }, { "epoch": 0.19, "grad_norm": 1.4062562359565647, "learning_rate": 9.359766621320295e-06, "loss": 0.077, "step": 6564 }, { "epoch": 0.19, "grad_norm": 5.781118113070176, "learning_rate": 9.359539551514505e-06, "loss": 0.6274, "step": 6565 }, { "epoch": 0.19, "grad_norm": 5.5811286554249175, "learning_rate": 9.359312444204132e-06, "loss": 0.788, "step": 6566 }, { "epoch": 0.19, "grad_norm": 3.6584621486418363, "learning_rate": 9.359085299391131e-06, "loss": 0.435, "step": 6567 }, { "epoch": 0.19, "grad_norm": 7.170757530767825, "learning_rate": 9.358858117077458e-06, "loss": 0.6894, "step": 6568 }, { "epoch": 0.19, "grad_norm": 5.7363331125489045, "learning_rate": 9.358630897265063e-06, "loss": 0.2513, "step": 6569 }, { "epoch": 0.19, "grad_norm": 5.249890507964102, "learning_rate": 9.358403639955904e-06, "loss": 1.041, "step": 6570 }, { "epoch": 0.19, "grad_norm": 13.543857119514666, "learning_rate": 9.358176345151935e-06, "loss": 0.5131, "step": 6571 }, { "epoch": 0.19, "grad_norm": 3.637362086247693, "learning_rate": 9.357949012855112e-06, "loss": 0.3718, "step": 6572 }, { "epoch": 0.19, "grad_norm": 3.4476112775983516, "learning_rate": 9.357721643067389e-06, "loss": 0.2228, "step": 6573 }, { "epoch": 0.19, "grad_norm": 14.787640663954814, "learning_rate": 9.357494235790723e-06, "loss": 0.9223, "step": 6574 }, { "epoch": 0.19, "grad_norm": 6.009825053152099, "learning_rate": 9.357266791027073e-06, "loss": 0.3043, "step": 6575 }, { "epoch": 0.19, "grad_norm": 10.097093152517473, "learning_rate": 9.35703930877839e-06, "loss": 0.3487, "step": 6576 }, { "epoch": 0.19, "grad_norm": 7.15520204758619, "learning_rate": 9.356811789046639e-06, "loss": 0.5122, "step": 6577 }, { "epoch": 0.19, "grad_norm": 5.950585932692031, "learning_rate": 9.356584231833769e-06, "loss": 0.3891, "step": 6578 }, { "epoch": 0.19, "grad_norm": 8.252278909064644, "learning_rate": 9.35635663714174e-06, "loss": 0.7401, "step": 6579 }, { "epoch": 0.19, "grad_norm": 9.695875718620146, "learning_rate": 9.356129004972515e-06, "loss": 0.7059, "step": 6580 }, { "epoch": 0.19, "grad_norm": 3.9996417004329325, "learning_rate": 9.355901335328046e-06, "loss": 0.5569, "step": 6581 }, { "epoch": 0.19, "grad_norm": 2.846768673901286, "learning_rate": 9.355673628210295e-06, "loss": 0.3781, "step": 6582 }, { "epoch": 0.19, "grad_norm": 5.123629898595703, "learning_rate": 9.35544588362122e-06, "loss": 0.7139, "step": 6583 }, { "epoch": 0.19, "grad_norm": 2.954335474258695, "learning_rate": 9.35521810156278e-06, "loss": 0.131, "step": 6584 }, { "epoch": 0.19, "grad_norm": 8.471222351218229, "learning_rate": 9.354990282036935e-06, "loss": 0.7684, "step": 6585 }, { "epoch": 0.19, "grad_norm": 5.830434019641364, "learning_rate": 9.354762425045644e-06, "loss": 0.4491, "step": 6586 }, { "epoch": 0.19, "grad_norm": 3.799068790451607, "learning_rate": 9.35453453059087e-06, "loss": 0.3776, "step": 6587 }, { "epoch": 0.19, "grad_norm": 8.061571940155423, "learning_rate": 9.35430659867457e-06, "loss": 0.4827, "step": 6588 }, { "epoch": 0.19, "grad_norm": 8.109782361215933, "learning_rate": 9.354078629298707e-06, "loss": 0.6255, "step": 6589 }, { "epoch": 0.19, "grad_norm": 8.328874192884195, "learning_rate": 9.353850622465241e-06, "loss": 0.605, "step": 6590 }, { "epoch": 0.19, "grad_norm": 4.194495350624933, "learning_rate": 9.353622578176136e-06, "loss": 0.4993, "step": 6591 }, { "epoch": 0.19, "grad_norm": 7.53507335057593, "learning_rate": 9.35339449643335e-06, "loss": 0.6278, "step": 6592 }, { "epoch": 0.19, "grad_norm": 7.500099690092667, "learning_rate": 9.353166377238848e-06, "loss": 0.8148, "step": 6593 }, { "epoch": 0.19, "grad_norm": 6.470041450179042, "learning_rate": 9.352938220594592e-06, "loss": 0.5444, "step": 6594 }, { "epoch": 0.19, "grad_norm": 3.4287517200761872, "learning_rate": 9.352710026502545e-06, "loss": 0.494, "step": 6595 }, { "epoch": 0.19, "grad_norm": 2.3865834399137977, "learning_rate": 9.352481794964667e-06, "loss": 0.1391, "step": 6596 }, { "epoch": 0.19, "grad_norm": 5.796662072232345, "learning_rate": 9.352253525982927e-06, "loss": 0.5234, "step": 6597 }, { "epoch": 0.19, "grad_norm": 7.603267466711211, "learning_rate": 9.352025219559283e-06, "loss": 0.4315, "step": 6598 }, { "epoch": 0.19, "grad_norm": 7.228553885250676, "learning_rate": 9.351796875695703e-06, "loss": 0.5338, "step": 6599 }, { "epoch": 0.19, "grad_norm": 10.897735069909281, "learning_rate": 9.35156849439415e-06, "loss": 0.4384, "step": 6600 }, { "epoch": 0.19, "grad_norm": 8.832382930753944, "learning_rate": 9.351340075656589e-06, "loss": 0.915, "step": 6601 }, { "epoch": 0.19, "grad_norm": 3.224495990314732, "learning_rate": 9.351111619484987e-06, "loss": 0.2244, "step": 6602 }, { "epoch": 0.19, "grad_norm": 6.576758453429831, "learning_rate": 9.350883125881306e-06, "loss": 0.2776, "step": 6603 }, { "epoch": 0.19, "grad_norm": 4.1699812230481, "learning_rate": 9.350654594847513e-06, "loss": 0.518, "step": 6604 }, { "epoch": 0.19, "grad_norm": 6.228333263293436, "learning_rate": 9.350426026385575e-06, "loss": 0.8028, "step": 6605 }, { "epoch": 0.19, "grad_norm": 8.224116788710244, "learning_rate": 9.350197420497456e-06, "loss": 0.7416, "step": 6606 }, { "epoch": 0.19, "grad_norm": 5.645464482757307, "learning_rate": 9.349968777185126e-06, "loss": 0.5347, "step": 6607 }, { "epoch": 0.19, "grad_norm": 4.60304513565466, "learning_rate": 9.349740096450548e-06, "loss": 0.4057, "step": 6608 }, { "epoch": 0.19, "grad_norm": 5.80159392652725, "learning_rate": 9.349511378295693e-06, "loss": 0.4772, "step": 6609 }, { "epoch": 0.19, "grad_norm": 5.2440365118564065, "learning_rate": 9.349282622722527e-06, "loss": 0.4989, "step": 6610 }, { "epoch": 0.19, "grad_norm": 8.813083277669108, "learning_rate": 9.349053829733019e-06, "loss": 0.3331, "step": 6611 }, { "epoch": 0.19, "grad_norm": 11.304107672235919, "learning_rate": 9.348824999329136e-06, "loss": 0.6, "step": 6612 }, { "epoch": 0.19, "grad_norm": 11.3351478433075, "learning_rate": 9.348596131512846e-06, "loss": 0.6995, "step": 6613 }, { "epoch": 0.19, "grad_norm": 7.375528930834895, "learning_rate": 9.348367226286119e-06, "loss": 0.7078, "step": 6614 }, { "epoch": 0.19, "grad_norm": 3.327197738206062, "learning_rate": 9.348138283650926e-06, "loss": 0.1701, "step": 6615 }, { "epoch": 0.19, "grad_norm": 5.628396937077095, "learning_rate": 9.347909303609231e-06, "loss": 0.4929, "step": 6616 }, { "epoch": 0.19, "grad_norm": 7.627972476845563, "learning_rate": 9.34768028616301e-06, "loss": 0.4479, "step": 6617 }, { "epoch": 0.19, "grad_norm": 6.172144978086927, "learning_rate": 9.347451231314231e-06, "loss": 0.5938, "step": 6618 }, { "epoch": 0.19, "grad_norm": 7.391918367603927, "learning_rate": 9.347222139064862e-06, "loss": 0.6757, "step": 6619 }, { "epoch": 0.19, "grad_norm": 3.176248720117016, "learning_rate": 9.34699300941688e-06, "loss": 0.3695, "step": 6620 }, { "epoch": 0.19, "grad_norm": 6.974691005411758, "learning_rate": 9.34676384237225e-06, "loss": 0.4332, "step": 6621 }, { "epoch": 0.19, "grad_norm": 10.243119139967872, "learning_rate": 9.346534637932946e-06, "loss": 1.3438, "step": 6622 }, { "epoch": 0.19, "grad_norm": 6.73833545607726, "learning_rate": 9.34630539610094e-06, "loss": 0.5907, "step": 6623 }, { "epoch": 0.19, "grad_norm": 6.490960180731398, "learning_rate": 9.346076116878205e-06, "loss": 0.2875, "step": 6624 }, { "epoch": 0.19, "grad_norm": 4.4240085590272855, "learning_rate": 9.345846800266709e-06, "loss": 0.6048, "step": 6625 }, { "epoch": 0.19, "grad_norm": 5.1353212493430105, "learning_rate": 9.345617446268431e-06, "loss": 0.521, "step": 6626 }, { "epoch": 0.19, "grad_norm": 4.595920179877441, "learning_rate": 9.34538805488534e-06, "loss": 0.5229, "step": 6627 }, { "epoch": 0.19, "grad_norm": 4.072884243236895, "learning_rate": 9.345158626119411e-06, "loss": 0.6129, "step": 6628 }, { "epoch": 0.19, "grad_norm": 4.486855860731892, "learning_rate": 9.344929159972616e-06, "loss": 0.33, "step": 6629 }, { "epoch": 0.19, "grad_norm": 5.1075610285925235, "learning_rate": 9.344699656446932e-06, "loss": 0.2195, "step": 6630 }, { "epoch": 0.19, "grad_norm": 5.085828226207652, "learning_rate": 9.344470115544332e-06, "loss": 0.4072, "step": 6631 }, { "epoch": 0.19, "grad_norm": 16.86094521041664, "learning_rate": 9.344240537266789e-06, "loss": 0.7253, "step": 6632 }, { "epoch": 0.19, "grad_norm": 5.608192212888035, "learning_rate": 9.34401092161628e-06, "loss": 0.6233, "step": 6633 }, { "epoch": 0.19, "grad_norm": 6.708673843682204, "learning_rate": 9.343781268594781e-06, "loss": 0.4779, "step": 6634 }, { "epoch": 0.19, "grad_norm": 3.4921121269131086, "learning_rate": 9.343551578204265e-06, "loss": 0.2619, "step": 6635 }, { "epoch": 0.19, "grad_norm": 5.911607643516075, "learning_rate": 9.343321850446712e-06, "loss": 0.6526, "step": 6636 }, { "epoch": 0.19, "grad_norm": 7.228363142696054, "learning_rate": 9.343092085324094e-06, "loss": 0.6104, "step": 6637 }, { "epoch": 0.19, "grad_norm": 3.0786669878468675, "learning_rate": 9.34286228283839e-06, "loss": 0.2848, "step": 6638 }, { "epoch": 0.19, "grad_norm": 6.101378649336304, "learning_rate": 9.342632442991576e-06, "loss": 0.4201, "step": 6639 }, { "epoch": 0.19, "grad_norm": 7.123734478911001, "learning_rate": 9.34240256578563e-06, "loss": 0.4112, "step": 6640 }, { "epoch": 0.19, "grad_norm": 5.100304628137881, "learning_rate": 9.342172651222528e-06, "loss": 0.3303, "step": 6641 }, { "epoch": 0.19, "grad_norm": 7.4741848759538, "learning_rate": 9.341942699304251e-06, "loss": 1.3358, "step": 6642 }, { "epoch": 0.19, "grad_norm": 6.081129179927549, "learning_rate": 9.341712710032775e-06, "loss": 0.3226, "step": 6643 }, { "epoch": 0.19, "grad_norm": 4.216274594875046, "learning_rate": 9.341482683410081e-06, "loss": 0.2777, "step": 6644 }, { "epoch": 0.19, "grad_norm": 12.614545818412726, "learning_rate": 9.341252619438144e-06, "loss": 0.5402, "step": 6645 }, { "epoch": 0.19, "grad_norm": 5.978208068493859, "learning_rate": 9.341022518118945e-06, "loss": 0.3096, "step": 6646 }, { "epoch": 0.19, "grad_norm": 4.480869637790913, "learning_rate": 9.340792379454464e-06, "loss": 0.6987, "step": 6647 }, { "epoch": 0.19, "grad_norm": 6.2302943285979255, "learning_rate": 9.340562203446681e-06, "loss": 0.4519, "step": 6648 }, { "epoch": 0.19, "grad_norm": 15.378151919444663, "learning_rate": 9.340331990097577e-06, "loss": 0.6377, "step": 6649 }, { "epoch": 0.19, "grad_norm": 5.525320355375402, "learning_rate": 9.340101739409129e-06, "loss": 0.2963, "step": 6650 }, { "epoch": 0.19, "grad_norm": 3.6071054168009895, "learning_rate": 9.339871451383321e-06, "loss": 0.3381, "step": 6651 }, { "epoch": 0.19, "grad_norm": 6.9678337966026165, "learning_rate": 9.339641126022134e-06, "loss": 0.7764, "step": 6652 }, { "epoch": 0.19, "grad_norm": 6.195438263471702, "learning_rate": 9.339410763327547e-06, "loss": 0.2817, "step": 6653 }, { "epoch": 0.19, "grad_norm": 5.171705168272277, "learning_rate": 9.339180363301546e-06, "loss": 0.403, "step": 6654 }, { "epoch": 0.19, "grad_norm": 5.1006098700496345, "learning_rate": 9.338949925946108e-06, "loss": 0.3232, "step": 6655 }, { "epoch": 0.19, "grad_norm": 8.152717614660435, "learning_rate": 9.33871945126322e-06, "loss": 0.5968, "step": 6656 }, { "epoch": 0.19, "grad_norm": 11.712940617337452, "learning_rate": 9.338488939254861e-06, "loss": 0.7823, "step": 6657 }, { "epoch": 0.19, "grad_norm": 6.346140068323789, "learning_rate": 9.338258389923017e-06, "loss": 0.5073, "step": 6658 }, { "epoch": 0.19, "grad_norm": 7.252991552411363, "learning_rate": 9.338027803269672e-06, "loss": 0.3672, "step": 6659 }, { "epoch": 0.19, "grad_norm": 3.2518692326535814, "learning_rate": 9.337797179296804e-06, "loss": 0.3064, "step": 6660 }, { "epoch": 0.19, "grad_norm": 9.659204099063341, "learning_rate": 9.337566518006403e-06, "loss": 0.5311, "step": 6661 }, { "epoch": 0.19, "grad_norm": 4.904972274499708, "learning_rate": 9.337335819400451e-06, "loss": 0.3366, "step": 6662 }, { "epoch": 0.19, "grad_norm": 4.724516118214598, "learning_rate": 9.337105083480932e-06, "loss": 0.4644, "step": 6663 }, { "epoch": 0.19, "grad_norm": 8.890740150818846, "learning_rate": 9.336874310249835e-06, "loss": 0.9272, "step": 6664 }, { "epoch": 0.19, "grad_norm": 5.265177903027105, "learning_rate": 9.33664349970914e-06, "loss": 0.2706, "step": 6665 }, { "epoch": 0.19, "grad_norm": 4.4928795057602695, "learning_rate": 9.336412651860836e-06, "loss": 0.247, "step": 6666 }, { "epoch": 0.19, "grad_norm": 8.94582416786846, "learning_rate": 9.336181766706907e-06, "loss": 0.5229, "step": 6667 }, { "epoch": 0.19, "grad_norm": 7.468774707705438, "learning_rate": 9.33595084424934e-06, "loss": 0.5046, "step": 6668 }, { "epoch": 0.19, "grad_norm": 7.229179246728452, "learning_rate": 9.335719884490121e-06, "loss": 0.8081, "step": 6669 }, { "epoch": 0.19, "grad_norm": 5.17625451569313, "learning_rate": 9.33548888743124e-06, "loss": 0.3507, "step": 6670 }, { "epoch": 0.19, "grad_norm": 8.069047974655074, "learning_rate": 9.33525785307468e-06, "loss": 1.0274, "step": 6671 }, { "epoch": 0.19, "grad_norm": 3.765060350813347, "learning_rate": 9.335026781422431e-06, "loss": 0.4917, "step": 6672 }, { "epoch": 0.19, "grad_norm": 5.526666067251458, "learning_rate": 9.33479567247648e-06, "loss": 0.3506, "step": 6673 }, { "epoch": 0.19, "grad_norm": 4.817064127423335, "learning_rate": 9.334564526238816e-06, "loss": 0.5811, "step": 6674 }, { "epoch": 0.19, "grad_norm": 4.823802556297504, "learning_rate": 9.334333342711428e-06, "loss": 0.7281, "step": 6675 }, { "epoch": 0.19, "grad_norm": 5.905641393236041, "learning_rate": 9.334102121896303e-06, "loss": 0.525, "step": 6676 }, { "epoch": 0.19, "grad_norm": 5.117375366023818, "learning_rate": 9.333870863795431e-06, "loss": 0.5408, "step": 6677 }, { "epoch": 0.19, "grad_norm": 3.8860565081427674, "learning_rate": 9.333639568410802e-06, "loss": 0.1961, "step": 6678 }, { "epoch": 0.19, "grad_norm": 3.3272315603609868, "learning_rate": 9.333408235744404e-06, "loss": 0.2809, "step": 6679 }, { "epoch": 0.19, "grad_norm": 8.995815257881336, "learning_rate": 9.33317686579823e-06, "loss": 0.7459, "step": 6680 }, { "epoch": 0.19, "grad_norm": 4.631897474117831, "learning_rate": 9.332945458574268e-06, "loss": 0.1447, "step": 6681 }, { "epoch": 0.19, "grad_norm": 4.236470537970829, "learning_rate": 9.332714014074509e-06, "loss": 0.4344, "step": 6682 }, { "epoch": 0.19, "grad_norm": 4.339575318860025, "learning_rate": 9.332482532300945e-06, "loss": 0.4957, "step": 6683 }, { "epoch": 0.19, "grad_norm": 8.268217260797694, "learning_rate": 9.33225101325557e-06, "loss": 0.359, "step": 6684 }, { "epoch": 0.19, "grad_norm": 9.560949262152636, "learning_rate": 9.332019456940372e-06, "loss": 0.4877, "step": 6685 }, { "epoch": 0.19, "grad_norm": 7.1945302664385045, "learning_rate": 9.331787863357343e-06, "loss": 0.9258, "step": 6686 }, { "epoch": 0.19, "grad_norm": 6.955568857082097, "learning_rate": 9.331556232508476e-06, "loss": 0.4284, "step": 6687 }, { "epoch": 0.19, "grad_norm": 6.785551192539355, "learning_rate": 9.331324564395763e-06, "loss": 0.5925, "step": 6688 }, { "epoch": 0.19, "grad_norm": 4.575248569760736, "learning_rate": 9.3310928590212e-06, "loss": 0.1856, "step": 6689 }, { "epoch": 0.19, "grad_norm": 5.0945552821266, "learning_rate": 9.330861116386778e-06, "loss": 0.2493, "step": 6690 }, { "epoch": 0.19, "grad_norm": 4.288545448808699, "learning_rate": 9.33062933649449e-06, "loss": 0.3266, "step": 6691 }, { "epoch": 0.19, "grad_norm": 7.467201487369851, "learning_rate": 9.330397519346332e-06, "loss": 0.3651, "step": 6692 }, { "epoch": 0.19, "grad_norm": 4.513198517392222, "learning_rate": 9.330165664944298e-06, "loss": 0.4545, "step": 6693 }, { "epoch": 0.19, "grad_norm": 4.880391782761612, "learning_rate": 9.329933773290381e-06, "loss": 0.6576, "step": 6694 }, { "epoch": 0.19, "grad_norm": 7.052926824960563, "learning_rate": 9.329701844386577e-06, "loss": 0.6004, "step": 6695 }, { "epoch": 0.19, "grad_norm": 8.220363560082761, "learning_rate": 9.329469878234881e-06, "loss": 0.5789, "step": 6696 }, { "epoch": 0.19, "grad_norm": 5.349102707957049, "learning_rate": 9.32923787483729e-06, "loss": 0.5566, "step": 6697 }, { "epoch": 0.19, "grad_norm": 7.140952806022754, "learning_rate": 9.329005834195797e-06, "loss": 0.5961, "step": 6698 }, { "epoch": 0.19, "grad_norm": 8.664327275157747, "learning_rate": 9.328773756312399e-06, "loss": 0.7934, "step": 6699 }, { "epoch": 0.19, "grad_norm": 5.186970511381137, "learning_rate": 9.328541641189095e-06, "loss": 0.5114, "step": 6700 }, { "epoch": 0.19, "grad_norm": 7.1708165137717845, "learning_rate": 9.32830948882788e-06, "loss": 0.5066, "step": 6701 }, { "epoch": 0.19, "grad_norm": 5.790982071005764, "learning_rate": 9.328077299230752e-06, "loss": 0.4407, "step": 6702 }, { "epoch": 0.19, "grad_norm": 8.678172311458122, "learning_rate": 9.327845072399706e-06, "loss": 0.5262, "step": 6703 }, { "epoch": 0.19, "grad_norm": 11.150496701009768, "learning_rate": 9.327612808336743e-06, "loss": 0.384, "step": 6704 }, { "epoch": 0.19, "grad_norm": 6.417530641560653, "learning_rate": 9.32738050704386e-06, "loss": 0.7716, "step": 6705 }, { "epoch": 0.19, "grad_norm": 6.565034794595383, "learning_rate": 9.327148168523055e-06, "loss": 0.6224, "step": 6706 }, { "epoch": 0.19, "grad_norm": 3.1709207875292655, "learning_rate": 9.326915792776326e-06, "loss": 0.1354, "step": 6707 }, { "epoch": 0.19, "grad_norm": 4.905148425019591, "learning_rate": 9.326683379805674e-06, "loss": 0.8109, "step": 6708 }, { "epoch": 0.19, "grad_norm": 9.776508144711027, "learning_rate": 9.326450929613097e-06, "loss": 1.0857, "step": 6709 }, { "epoch": 0.19, "grad_norm": 3.966299877652617, "learning_rate": 9.326218442200597e-06, "loss": 0.5797, "step": 6710 }, { "epoch": 0.19, "grad_norm": 5.773702137912433, "learning_rate": 9.325985917570172e-06, "loss": 0.3906, "step": 6711 }, { "epoch": 0.19, "grad_norm": 6.117529926484073, "learning_rate": 9.325753355723822e-06, "loss": 0.6087, "step": 6712 }, { "epoch": 0.19, "grad_norm": 3.6950410577870025, "learning_rate": 9.325520756663548e-06, "loss": 0.5087, "step": 6713 }, { "epoch": 0.19, "grad_norm": 5.614725520421968, "learning_rate": 9.325288120391353e-06, "loss": 0.4905, "step": 6714 }, { "epoch": 0.19, "grad_norm": 5.586325038457996, "learning_rate": 9.325055446909235e-06, "loss": 0.5518, "step": 6715 }, { "epoch": 0.19, "grad_norm": 4.549792294424995, "learning_rate": 9.3248227362192e-06, "loss": 0.5189, "step": 6716 }, { "epoch": 0.19, "grad_norm": 4.2634145577790745, "learning_rate": 9.324589988323246e-06, "loss": 0.3883, "step": 6717 }, { "epoch": 0.19, "grad_norm": 6.493323895717136, "learning_rate": 9.324357203223376e-06, "loss": 0.6765, "step": 6718 }, { "epoch": 0.19, "grad_norm": 6.760921402610762, "learning_rate": 9.324124380921596e-06, "loss": 0.3849, "step": 6719 }, { "epoch": 0.19, "grad_norm": 5.1374610045500795, "learning_rate": 9.323891521419904e-06, "loss": 0.7139, "step": 6720 }, { "epoch": 0.19, "grad_norm": 7.626698039108165, "learning_rate": 9.323658624720306e-06, "loss": 0.7383, "step": 6721 }, { "epoch": 0.19, "grad_norm": 11.60378207671476, "learning_rate": 9.323425690824806e-06, "loss": 0.4541, "step": 6722 }, { "epoch": 0.19, "grad_norm": 4.411781329235847, "learning_rate": 9.323192719735405e-06, "loss": 0.4164, "step": 6723 }, { "epoch": 0.19, "grad_norm": 6.248503429525758, "learning_rate": 9.322959711454111e-06, "loss": 0.3407, "step": 6724 }, { "epoch": 0.19, "grad_norm": 6.020706408613687, "learning_rate": 9.322726665982927e-06, "loss": 0.6485, "step": 6725 }, { "epoch": 0.19, "grad_norm": 7.310698458470545, "learning_rate": 9.322493583323857e-06, "loss": 0.4783, "step": 6726 }, { "epoch": 0.19, "grad_norm": 7.618436270834623, "learning_rate": 9.322260463478908e-06, "loss": 0.3338, "step": 6727 }, { "epoch": 0.19, "grad_norm": 4.021065792552047, "learning_rate": 9.322027306450083e-06, "loss": 0.276, "step": 6728 }, { "epoch": 0.19, "grad_norm": 3.4005345155792512, "learning_rate": 9.32179411223939e-06, "loss": 0.2627, "step": 6729 }, { "epoch": 0.19, "grad_norm": 4.030368974799772, "learning_rate": 9.321560880848835e-06, "loss": 0.6392, "step": 6730 }, { "epoch": 0.19, "grad_norm": 3.7319058670916565, "learning_rate": 9.321327612280424e-06, "loss": 0.5986, "step": 6731 }, { "epoch": 0.19, "grad_norm": 5.715315115543901, "learning_rate": 9.321094306536163e-06, "loss": 0.326, "step": 6732 }, { "epoch": 0.19, "grad_norm": 11.124934335579121, "learning_rate": 9.320860963618059e-06, "loss": 0.5817, "step": 6733 }, { "epoch": 0.19, "grad_norm": 9.725990860774903, "learning_rate": 9.32062758352812e-06, "loss": 0.3853, "step": 6734 }, { "epoch": 0.19, "grad_norm": 8.725225580135758, "learning_rate": 9.320394166268354e-06, "loss": 0.4113, "step": 6735 }, { "epoch": 0.19, "grad_norm": 5.1904260586917, "learning_rate": 9.32016071184077e-06, "loss": 0.575, "step": 6736 }, { "epoch": 0.19, "grad_norm": 5.945461877858654, "learning_rate": 9.319927220247375e-06, "loss": 0.4927, "step": 6737 }, { "epoch": 0.19, "grad_norm": 8.504646321634505, "learning_rate": 9.319693691490177e-06, "loss": 0.7602, "step": 6738 }, { "epoch": 0.19, "grad_norm": 11.50526423834289, "learning_rate": 9.319460125571188e-06, "loss": 0.4481, "step": 6739 }, { "epoch": 0.19, "grad_norm": 4.7909785757953305, "learning_rate": 9.319226522492412e-06, "loss": 0.4418, "step": 6740 }, { "epoch": 0.19, "grad_norm": 6.032872312891965, "learning_rate": 9.318992882255865e-06, "loss": 0.2794, "step": 6741 }, { "epoch": 0.19, "grad_norm": 6.347388140371503, "learning_rate": 9.318759204863554e-06, "loss": 0.461, "step": 6742 }, { "epoch": 0.19, "grad_norm": 15.289722925423812, "learning_rate": 9.318525490317487e-06, "loss": 0.8738, "step": 6743 }, { "epoch": 0.19, "grad_norm": 7.335945755016489, "learning_rate": 9.31829173861968e-06, "loss": 0.561, "step": 6744 }, { "epoch": 0.19, "grad_norm": 4.952961482769535, "learning_rate": 9.318057949772139e-06, "loss": 0.4826, "step": 6745 }, { "epoch": 0.19, "grad_norm": 6.021865817796945, "learning_rate": 9.317824123776877e-06, "loss": 0.4376, "step": 6746 }, { "epoch": 0.19, "grad_norm": 8.233215597857894, "learning_rate": 9.317590260635905e-06, "loss": 0.2525, "step": 6747 }, { "epoch": 0.19, "grad_norm": 8.52257390109691, "learning_rate": 9.317356360351237e-06, "loss": 0.4322, "step": 6748 }, { "epoch": 0.19, "grad_norm": 8.910285758114593, "learning_rate": 9.317122422924883e-06, "loss": 0.8682, "step": 6749 }, { "epoch": 0.19, "grad_norm": 8.159971535483436, "learning_rate": 9.316888448358858e-06, "loss": 0.8556, "step": 6750 }, { "epoch": 0.19, "grad_norm": 12.61211712410136, "learning_rate": 9.316654436655172e-06, "loss": 0.6099, "step": 6751 }, { "epoch": 0.19, "grad_norm": 2.659235723593618, "learning_rate": 9.316420387815838e-06, "loss": 0.2742, "step": 6752 }, { "epoch": 0.19, "grad_norm": 5.241121231997372, "learning_rate": 9.316186301842873e-06, "loss": 0.7914, "step": 6753 }, { "epoch": 0.19, "grad_norm": 12.231353737494022, "learning_rate": 9.315952178738288e-06, "loss": 0.988, "step": 6754 }, { "epoch": 0.19, "grad_norm": 6.227922393979269, "learning_rate": 9.315718018504096e-06, "loss": 0.6913, "step": 6755 }, { "epoch": 0.19, "grad_norm": 4.340699586165676, "learning_rate": 9.315483821142316e-06, "loss": 0.2588, "step": 6756 }, { "epoch": 0.19, "grad_norm": 7.4108156706133625, "learning_rate": 9.31524958665496e-06, "loss": 0.6578, "step": 6757 }, { "epoch": 0.19, "grad_norm": 10.562543699636858, "learning_rate": 9.315015315044041e-06, "loss": 0.9609, "step": 6758 }, { "epoch": 0.19, "grad_norm": 4.523312067852384, "learning_rate": 9.314781006311577e-06, "loss": 0.5333, "step": 6759 }, { "epoch": 0.19, "grad_norm": 10.093400222811999, "learning_rate": 9.314546660459585e-06, "loss": 0.7069, "step": 6760 }, { "epoch": 0.19, "grad_norm": 4.369525372374885, "learning_rate": 9.314312277490077e-06, "loss": 0.4675, "step": 6761 }, { "epoch": 0.19, "grad_norm": 5.215079472961247, "learning_rate": 9.314077857405072e-06, "loss": 0.8142, "step": 6762 }, { "epoch": 0.19, "grad_norm": 5.088536923997505, "learning_rate": 9.313843400206587e-06, "loss": 0.5017, "step": 6763 }, { "epoch": 0.19, "grad_norm": 3.617867918674967, "learning_rate": 9.31360890589664e-06, "loss": 0.5843, "step": 6764 }, { "epoch": 0.19, "grad_norm": 4.31065058097312, "learning_rate": 9.313374374477244e-06, "loss": 0.2978, "step": 6765 }, { "epoch": 0.19, "grad_norm": 2.622126777936012, "learning_rate": 9.31313980595042e-06, "loss": 0.3517, "step": 6766 }, { "epoch": 0.19, "grad_norm": 5.231961733074409, "learning_rate": 9.312905200318187e-06, "loss": 0.5934, "step": 6767 }, { "epoch": 0.19, "grad_norm": 4.579784834525984, "learning_rate": 9.312670557582561e-06, "loss": 0.6357, "step": 6768 }, { "epoch": 0.19, "grad_norm": 7.966929747681486, "learning_rate": 9.31243587774556e-06, "loss": 0.9895, "step": 6769 }, { "epoch": 0.19, "grad_norm": 8.498780331339395, "learning_rate": 9.312201160809205e-06, "loss": 0.4247, "step": 6770 }, { "epoch": 0.19, "grad_norm": 7.663028503412748, "learning_rate": 9.311966406775513e-06, "loss": 0.7217, "step": 6771 }, { "epoch": 0.19, "grad_norm": 3.775923579331701, "learning_rate": 9.311731615646507e-06, "loss": 0.4046, "step": 6772 }, { "epoch": 0.19, "grad_norm": 10.176478128128865, "learning_rate": 9.311496787424204e-06, "loss": 0.7069, "step": 6773 }, { "epoch": 0.19, "grad_norm": 5.870972246954649, "learning_rate": 9.311261922110624e-06, "loss": 0.8661, "step": 6774 }, { "epoch": 0.19, "grad_norm": 2.3175019475934877, "learning_rate": 9.311027019707789e-06, "loss": 0.2971, "step": 6775 }, { "epoch": 0.19, "grad_norm": 5.966482121083964, "learning_rate": 9.310792080217719e-06, "loss": 0.9744, "step": 6776 }, { "epoch": 0.19, "grad_norm": 4.723986467518106, "learning_rate": 9.310557103642437e-06, "loss": 0.5415, "step": 6777 }, { "epoch": 0.19, "grad_norm": 4.598090683628966, "learning_rate": 9.31032208998396e-06, "loss": 0.4095, "step": 6778 }, { "epoch": 0.19, "grad_norm": 6.716287134899607, "learning_rate": 9.310087039244316e-06, "loss": 0.7349, "step": 6779 }, { "epoch": 0.19, "grad_norm": 3.730908544689889, "learning_rate": 9.309851951425523e-06, "loss": 0.2609, "step": 6780 }, { "epoch": 0.19, "grad_norm": 5.966403919522714, "learning_rate": 9.309616826529604e-06, "loss": 0.359, "step": 6781 }, { "epoch": 0.19, "grad_norm": 7.04508279479672, "learning_rate": 9.309381664558582e-06, "loss": 0.7001, "step": 6782 }, { "epoch": 0.19, "grad_norm": 7.583595676565314, "learning_rate": 9.30914646551448e-06, "loss": 0.7757, "step": 6783 }, { "epoch": 0.19, "grad_norm": 6.3465081593944825, "learning_rate": 9.30891122939932e-06, "loss": 0.3104, "step": 6784 }, { "epoch": 0.19, "grad_norm": 5.548144224854861, "learning_rate": 9.30867595621513e-06, "loss": 0.196, "step": 6785 }, { "epoch": 0.19, "grad_norm": 8.491343185158327, "learning_rate": 9.308440645963929e-06, "loss": 0.5858, "step": 6786 }, { "epoch": 0.19, "grad_norm": 11.794898790973955, "learning_rate": 9.308205298647746e-06, "loss": 0.9874, "step": 6787 }, { "epoch": 0.19, "grad_norm": 4.520332377522261, "learning_rate": 9.307969914268601e-06, "loss": 0.3934, "step": 6788 }, { "epoch": 0.19, "grad_norm": 3.9012908584327657, "learning_rate": 9.307734492828521e-06, "loss": 0.3064, "step": 6789 }, { "epoch": 0.19, "grad_norm": 6.034433028951104, "learning_rate": 9.307499034329535e-06, "loss": 0.7133, "step": 6790 }, { "epoch": 0.19, "grad_norm": 5.941033366062258, "learning_rate": 9.307263538773662e-06, "loss": 0.6179, "step": 6791 }, { "epoch": 0.19, "grad_norm": 10.080360436731606, "learning_rate": 9.307028006162933e-06, "loss": 0.388, "step": 6792 }, { "epoch": 0.19, "grad_norm": 6.782066964014903, "learning_rate": 9.30679243649937e-06, "loss": 0.749, "step": 6793 }, { "epoch": 0.19, "grad_norm": 4.880518650650454, "learning_rate": 9.306556829785003e-06, "loss": 0.5207, "step": 6794 }, { "epoch": 0.19, "grad_norm": 6.983920054798568, "learning_rate": 9.306321186021859e-06, "loss": 0.719, "step": 6795 }, { "epoch": 0.19, "grad_norm": 7.104248646760011, "learning_rate": 9.306085505211962e-06, "loss": 0.5645, "step": 6796 }, { "epoch": 0.19, "grad_norm": 5.927084450702082, "learning_rate": 9.305849787357344e-06, "loss": 0.3681, "step": 6797 }, { "epoch": 0.19, "grad_norm": 3.5294988054056518, "learning_rate": 9.305614032460028e-06, "loss": 0.4034, "step": 6798 }, { "epoch": 0.19, "grad_norm": 8.613202249938873, "learning_rate": 9.305378240522046e-06, "loss": 0.9634, "step": 6799 }, { "epoch": 0.19, "grad_norm": 7.203401705046553, "learning_rate": 9.305142411545425e-06, "loss": 0.4728, "step": 6800 }, { "epoch": 0.19, "grad_norm": 9.049427680365376, "learning_rate": 9.304906545532192e-06, "loss": 1.277, "step": 6801 }, { "epoch": 0.19, "grad_norm": 9.009031850132898, "learning_rate": 9.304670642484381e-06, "loss": 0.5516, "step": 6802 }, { "epoch": 0.19, "grad_norm": 7.23424739189561, "learning_rate": 9.304434702404018e-06, "loss": 0.3649, "step": 6803 }, { "epoch": 0.19, "grad_norm": 6.350228895732675, "learning_rate": 9.304198725293133e-06, "loss": 0.7728, "step": 6804 }, { "epoch": 0.19, "grad_norm": 8.706672424890893, "learning_rate": 9.303962711153755e-06, "loss": 0.5054, "step": 6805 }, { "epoch": 0.19, "grad_norm": 10.175999990151363, "learning_rate": 9.303726659987916e-06, "loss": 0.5613, "step": 6806 }, { "epoch": 0.19, "grad_norm": 5.58224600077854, "learning_rate": 9.303490571797647e-06, "loss": 0.2581, "step": 6807 }, { "epoch": 0.19, "grad_norm": 7.064914223217066, "learning_rate": 9.303254446584978e-06, "loss": 0.5372, "step": 6808 }, { "epoch": 0.19, "grad_norm": 6.128589784853208, "learning_rate": 9.303018284351942e-06, "loss": 0.4988, "step": 6809 }, { "epoch": 0.2, "grad_norm": 6.037275516293541, "learning_rate": 9.302782085100568e-06, "loss": 0.2467, "step": 6810 }, { "epoch": 0.2, "grad_norm": 5.079445617323832, "learning_rate": 9.302545848832891e-06, "loss": 0.3404, "step": 6811 }, { "epoch": 0.2, "grad_norm": 10.346413629557658, "learning_rate": 9.302309575550941e-06, "loss": 0.8349, "step": 6812 }, { "epoch": 0.2, "grad_norm": 6.58267264934039, "learning_rate": 9.302073265256751e-06, "loss": 0.5591, "step": 6813 }, { "epoch": 0.2, "grad_norm": 8.177216922778364, "learning_rate": 9.301836917952354e-06, "loss": 0.7144, "step": 6814 }, { "epoch": 0.2, "grad_norm": 6.024969042306723, "learning_rate": 9.301600533639785e-06, "loss": 0.3629, "step": 6815 }, { "epoch": 0.2, "grad_norm": 6.788505364528227, "learning_rate": 9.301364112321077e-06, "loss": 0.1905, "step": 6816 }, { "epoch": 0.2, "grad_norm": 3.6388118141608596, "learning_rate": 9.30112765399826e-06, "loss": 0.524, "step": 6817 }, { "epoch": 0.2, "grad_norm": 7.151013653268051, "learning_rate": 9.300891158673372e-06, "loss": 0.5242, "step": 6818 }, { "epoch": 0.2, "grad_norm": 5.33290959204724, "learning_rate": 9.30065462634845e-06, "loss": 0.5901, "step": 6819 }, { "epoch": 0.2, "grad_norm": 6.260724974629278, "learning_rate": 9.300418057025525e-06, "loss": 0.3041, "step": 6820 }, { "epoch": 0.2, "grad_norm": 6.38553343806653, "learning_rate": 9.300181450706632e-06, "loss": 0.3617, "step": 6821 }, { "epoch": 0.2, "grad_norm": 4.142257857903307, "learning_rate": 9.299944807393805e-06, "loss": 0.1855, "step": 6822 }, { "epoch": 0.2, "grad_norm": 5.767670100712723, "learning_rate": 9.299708127089085e-06, "loss": 0.6844, "step": 6823 }, { "epoch": 0.2, "grad_norm": 8.41799401370194, "learning_rate": 9.299471409794505e-06, "loss": 0.9721, "step": 6824 }, { "epoch": 0.2, "grad_norm": 7.766117870877495, "learning_rate": 9.299234655512102e-06, "loss": 0.7149, "step": 6825 }, { "epoch": 0.2, "grad_norm": 2.9249535320536038, "learning_rate": 9.298997864243913e-06, "loss": 0.2736, "step": 6826 }, { "epoch": 0.2, "grad_norm": 8.705277291515072, "learning_rate": 9.298761035991973e-06, "loss": 1.6902, "step": 6827 }, { "epoch": 0.2, "grad_norm": 3.4702743411381527, "learning_rate": 9.298524170758323e-06, "loss": 0.3448, "step": 6828 }, { "epoch": 0.2, "grad_norm": 5.753309686401471, "learning_rate": 9.298287268544996e-06, "loss": 0.3855, "step": 6829 }, { "epoch": 0.2, "grad_norm": 3.632163008189094, "learning_rate": 9.298050329354037e-06, "loss": 0.5726, "step": 6830 }, { "epoch": 0.2, "grad_norm": 7.029614270997466, "learning_rate": 9.297813353187477e-06, "loss": 0.3049, "step": 6831 }, { "epoch": 0.2, "grad_norm": 7.428084945731204, "learning_rate": 9.297576340047358e-06, "loss": 0.7788, "step": 6832 }, { "epoch": 0.2, "grad_norm": 7.763485742413014, "learning_rate": 9.29733928993572e-06, "loss": 0.6239, "step": 6833 }, { "epoch": 0.2, "grad_norm": 9.33153886801441, "learning_rate": 9.2971022028546e-06, "loss": 0.6788, "step": 6834 }, { "epoch": 0.2, "grad_norm": 4.834326110474274, "learning_rate": 9.296865078806041e-06, "loss": 0.5899, "step": 6835 }, { "epoch": 0.2, "grad_norm": 8.448618591975311, "learning_rate": 9.29662791779208e-06, "loss": 1.1771, "step": 6836 }, { "epoch": 0.2, "grad_norm": 8.067132698211338, "learning_rate": 9.296390719814755e-06, "loss": 0.5186, "step": 6837 }, { "epoch": 0.2, "grad_norm": 4.947974261567717, "learning_rate": 9.296153484876114e-06, "loss": 0.3688, "step": 6838 }, { "epoch": 0.2, "grad_norm": 6.2109659062341755, "learning_rate": 9.29591621297819e-06, "loss": 0.418, "step": 6839 }, { "epoch": 0.2, "grad_norm": 8.455236259421538, "learning_rate": 9.295678904123032e-06, "loss": 1.2591, "step": 6840 }, { "epoch": 0.2, "grad_norm": 4.843545060282997, "learning_rate": 9.295441558312673e-06, "loss": 0.3444, "step": 6841 }, { "epoch": 0.2, "grad_norm": 3.113941653864517, "learning_rate": 9.295204175549162e-06, "loss": 0.3916, "step": 6842 }, { "epoch": 0.2, "grad_norm": 5.291458015633629, "learning_rate": 9.294966755834536e-06, "loss": 0.3487, "step": 6843 }, { "epoch": 0.2, "grad_norm": 4.511574482778675, "learning_rate": 9.294729299170841e-06, "loss": 0.6443, "step": 6844 }, { "epoch": 0.2, "grad_norm": 10.383110093788208, "learning_rate": 9.294491805560118e-06, "loss": 0.8044, "step": 6845 }, { "epoch": 0.2, "grad_norm": 4.410473470032259, "learning_rate": 9.294254275004413e-06, "loss": 0.2887, "step": 6846 }, { "epoch": 0.2, "grad_norm": 3.532318173107221, "learning_rate": 9.294016707505763e-06, "loss": 0.1673, "step": 6847 }, { "epoch": 0.2, "grad_norm": 4.656280363867151, "learning_rate": 9.29377910306622e-06, "loss": 0.5692, "step": 6848 }, { "epoch": 0.2, "grad_norm": 7.31663810171172, "learning_rate": 9.293541461687823e-06, "loss": 0.414, "step": 6849 }, { "epoch": 0.2, "grad_norm": 4.149024591372586, "learning_rate": 9.293303783372615e-06, "loss": 0.458, "step": 6850 }, { "epoch": 0.2, "grad_norm": 8.558927305782623, "learning_rate": 9.293066068122646e-06, "loss": 1.1626, "step": 6851 }, { "epoch": 0.2, "grad_norm": 7.549623450937539, "learning_rate": 9.292828315939957e-06, "loss": 0.4314, "step": 6852 }, { "epoch": 0.2, "grad_norm": 3.997864779158331, "learning_rate": 9.292590526826594e-06, "loss": 0.5525, "step": 6853 }, { "epoch": 0.2, "grad_norm": 3.6072116990918945, "learning_rate": 9.292352700784606e-06, "loss": 0.3547, "step": 6854 }, { "epoch": 0.2, "grad_norm": 6.647996074218601, "learning_rate": 9.292114837816033e-06, "loss": 0.3387, "step": 6855 }, { "epoch": 0.2, "grad_norm": 2.144133762008795, "learning_rate": 9.291876937922928e-06, "loss": 0.1048, "step": 6856 }, { "epoch": 0.2, "grad_norm": 5.385929794018276, "learning_rate": 9.29163900110733e-06, "loss": 0.4622, "step": 6857 }, { "epoch": 0.2, "grad_norm": 8.828692662715723, "learning_rate": 9.291401027371292e-06, "loss": 0.6788, "step": 6858 }, { "epoch": 0.2, "grad_norm": 3.9853139331988245, "learning_rate": 9.29116301671686e-06, "loss": 0.3, "step": 6859 }, { "epoch": 0.2, "grad_norm": 8.176239540316189, "learning_rate": 9.29092496914608e-06, "loss": 0.4754, "step": 6860 }, { "epoch": 0.2, "grad_norm": 7.53143994103712, "learning_rate": 9.290686884661001e-06, "loss": 0.7097, "step": 6861 }, { "epoch": 0.2, "grad_norm": 5.326410549119224, "learning_rate": 9.29044876326367e-06, "loss": 0.4779, "step": 6862 }, { "epoch": 0.2, "grad_norm": 7.7499291201395835, "learning_rate": 9.290210604956139e-06, "loss": 0.4898, "step": 6863 }, { "epoch": 0.2, "grad_norm": 5.495691692772334, "learning_rate": 9.289972409740453e-06, "loss": 0.436, "step": 6864 }, { "epoch": 0.2, "grad_norm": 7.956759898110026, "learning_rate": 9.289734177618662e-06, "loss": 0.2605, "step": 6865 }, { "epoch": 0.2, "grad_norm": 7.345883128631585, "learning_rate": 9.289495908592816e-06, "loss": 0.2522, "step": 6866 }, { "epoch": 0.2, "grad_norm": 7.830957383810622, "learning_rate": 9.289257602664965e-06, "loss": 0.6751, "step": 6867 }, { "epoch": 0.2, "grad_norm": 4.855770675589237, "learning_rate": 9.289019259837159e-06, "loss": 0.7319, "step": 6868 }, { "epoch": 0.2, "grad_norm": 4.106948963173261, "learning_rate": 9.28878088011145e-06, "loss": 0.2784, "step": 6869 }, { "epoch": 0.2, "grad_norm": 5.188486694808535, "learning_rate": 9.288542463489885e-06, "loss": 0.5171, "step": 6870 }, { "epoch": 0.2, "grad_norm": 8.035817432044325, "learning_rate": 9.288304009974519e-06, "loss": 0.9436, "step": 6871 }, { "epoch": 0.2, "grad_norm": 6.892817038300104, "learning_rate": 9.2880655195674e-06, "loss": 0.3466, "step": 6872 }, { "epoch": 0.2, "grad_norm": 8.117100147001253, "learning_rate": 9.287826992270584e-06, "loss": 0.4241, "step": 6873 }, { "epoch": 0.2, "grad_norm": 3.706364980782831, "learning_rate": 9.287588428086116e-06, "loss": 0.416, "step": 6874 }, { "epoch": 0.2, "grad_norm": 3.544432517499664, "learning_rate": 9.287349827016055e-06, "loss": 0.3835, "step": 6875 }, { "epoch": 0.2, "grad_norm": 7.664033008950331, "learning_rate": 9.287111189062453e-06, "loss": 0.5192, "step": 6876 }, { "epoch": 0.2, "grad_norm": 13.105820074929527, "learning_rate": 9.286872514227359e-06, "loss": 0.8236, "step": 6877 }, { "epoch": 0.2, "grad_norm": 4.047565887273042, "learning_rate": 9.286633802512828e-06, "loss": 0.213, "step": 6878 }, { "epoch": 0.2, "grad_norm": 5.968650777101892, "learning_rate": 9.286395053920916e-06, "loss": 0.4293, "step": 6879 }, { "epoch": 0.2, "grad_norm": 4.031142566047761, "learning_rate": 9.286156268453675e-06, "loss": 0.3519, "step": 6880 }, { "epoch": 0.2, "grad_norm": 6.2312738645779575, "learning_rate": 9.285917446113158e-06, "loss": 0.7873, "step": 6881 }, { "epoch": 0.2, "grad_norm": 5.444780320989585, "learning_rate": 9.285678586901421e-06, "loss": 0.586, "step": 6882 }, { "epoch": 0.2, "grad_norm": 7.121818233596851, "learning_rate": 9.28543969082052e-06, "loss": 0.488, "step": 6883 }, { "epoch": 0.2, "grad_norm": 4.170553562462003, "learning_rate": 9.285200757872508e-06, "loss": 0.2804, "step": 6884 }, { "epoch": 0.2, "grad_norm": 2.8282240434027224, "learning_rate": 9.284961788059442e-06, "loss": 0.474, "step": 6885 }, { "epoch": 0.2, "grad_norm": 5.777135322943073, "learning_rate": 9.284722781383377e-06, "loss": 0.7881, "step": 6886 }, { "epoch": 0.2, "grad_norm": 7.937207254132662, "learning_rate": 9.284483737846371e-06, "loss": 0.6458, "step": 6887 }, { "epoch": 0.2, "grad_norm": 6.9928977903896135, "learning_rate": 9.284244657450475e-06, "loss": 0.6539, "step": 6888 }, { "epoch": 0.2, "grad_norm": 3.655561887757506, "learning_rate": 9.284005540197754e-06, "loss": 0.6196, "step": 6889 }, { "epoch": 0.2, "grad_norm": 4.201762833080485, "learning_rate": 9.283766386090258e-06, "loss": 0.6894, "step": 6890 }, { "epoch": 0.2, "grad_norm": 6.668509534595747, "learning_rate": 9.283527195130049e-06, "loss": 0.686, "step": 6891 }, { "epoch": 0.2, "grad_norm": 3.7029361596225217, "learning_rate": 9.283287967319184e-06, "loss": 0.3798, "step": 6892 }, { "epoch": 0.2, "grad_norm": 6.7979307560526765, "learning_rate": 9.283048702659717e-06, "loss": 0.4012, "step": 6893 }, { "epoch": 0.2, "grad_norm": 5.440481398964437, "learning_rate": 9.282809401153711e-06, "loss": 0.5115, "step": 6894 }, { "epoch": 0.2, "grad_norm": 6.107307689198308, "learning_rate": 9.282570062803222e-06, "loss": 0.9743, "step": 6895 }, { "epoch": 0.2, "grad_norm": 5.628785364360552, "learning_rate": 9.282330687610311e-06, "loss": 0.4212, "step": 6896 }, { "epoch": 0.2, "grad_norm": 6.656963149000975, "learning_rate": 9.282091275577037e-06, "loss": 0.6913, "step": 6897 }, { "epoch": 0.2, "grad_norm": 4.068826239321335, "learning_rate": 9.281851826705457e-06, "loss": 0.3236, "step": 6898 }, { "epoch": 0.2, "grad_norm": 4.860479223265995, "learning_rate": 9.281612340997635e-06, "loss": 0.1732, "step": 6899 }, { "epoch": 0.2, "grad_norm": 19.554747584056713, "learning_rate": 9.281372818455627e-06, "loss": 0.5624, "step": 6900 }, { "epoch": 0.2, "grad_norm": 11.274321819471613, "learning_rate": 9.281133259081497e-06, "loss": 0.4497, "step": 6901 }, { "epoch": 0.2, "grad_norm": 2.9483563103183945, "learning_rate": 9.280893662877304e-06, "loss": 0.2611, "step": 6902 }, { "epoch": 0.2, "grad_norm": 7.514949712730983, "learning_rate": 9.280654029845111e-06, "loss": 0.479, "step": 6903 }, { "epoch": 0.2, "grad_norm": 7.240169371525872, "learning_rate": 9.280414359986978e-06, "loss": 0.8506, "step": 6904 }, { "epoch": 0.2, "grad_norm": 26.03391635972949, "learning_rate": 9.280174653304967e-06, "loss": 0.1813, "step": 6905 }, { "epoch": 0.2, "grad_norm": 8.299427738914114, "learning_rate": 9.27993490980114e-06, "loss": 0.7647, "step": 6906 }, { "epoch": 0.2, "grad_norm": 3.8271406504091168, "learning_rate": 9.279695129477561e-06, "loss": 0.0722, "step": 6907 }, { "epoch": 0.2, "grad_norm": 7.116779470629207, "learning_rate": 9.279455312336291e-06, "loss": 0.5973, "step": 6908 }, { "epoch": 0.2, "grad_norm": 9.919367243521172, "learning_rate": 9.279215458379393e-06, "loss": 0.4009, "step": 6909 }, { "epoch": 0.2, "grad_norm": 5.414722044653983, "learning_rate": 9.278975567608935e-06, "loss": 0.5251, "step": 6910 }, { "epoch": 0.2, "grad_norm": 5.336029498757947, "learning_rate": 9.278735640026972e-06, "loss": 0.4612, "step": 6911 }, { "epoch": 0.2, "grad_norm": 7.971054222714067, "learning_rate": 9.278495675635577e-06, "loss": 0.2835, "step": 6912 }, { "epoch": 0.2, "grad_norm": 3.9937081921318898, "learning_rate": 9.278255674436809e-06, "loss": 0.3902, "step": 6913 }, { "epoch": 0.2, "grad_norm": 6.607639382458622, "learning_rate": 9.278015636432734e-06, "loss": 0.6609, "step": 6914 }, { "epoch": 0.2, "grad_norm": 4.7999118300128405, "learning_rate": 9.277775561625417e-06, "loss": 0.3029, "step": 6915 }, { "epoch": 0.2, "grad_norm": 12.684378930669467, "learning_rate": 9.277535450016924e-06, "loss": 1.1378, "step": 6916 }, { "epoch": 0.2, "grad_norm": 12.780702761101479, "learning_rate": 9.27729530160932e-06, "loss": 1.0188, "step": 6917 }, { "epoch": 0.2, "grad_norm": 8.076473812561712, "learning_rate": 9.277055116404672e-06, "loss": 0.6364, "step": 6918 }, { "epoch": 0.2, "grad_norm": 5.9544757902476615, "learning_rate": 9.276814894405044e-06, "loss": 0.5613, "step": 6919 }, { "epoch": 0.2, "grad_norm": 5.047853215609011, "learning_rate": 9.276574635612505e-06, "loss": 0.5067, "step": 6920 }, { "epoch": 0.2, "grad_norm": 5.849354142621493, "learning_rate": 9.276334340029121e-06, "loss": 1.0171, "step": 6921 }, { "epoch": 0.2, "grad_norm": 7.895942663745971, "learning_rate": 9.276094007656959e-06, "loss": 0.9628, "step": 6922 }, { "epoch": 0.2, "grad_norm": 3.459242429320971, "learning_rate": 9.275853638498086e-06, "loss": 0.3825, "step": 6923 }, { "epoch": 0.2, "grad_norm": 14.819308748173164, "learning_rate": 9.275613232554571e-06, "loss": 0.9028, "step": 6924 }, { "epoch": 0.2, "grad_norm": 7.1533829055495275, "learning_rate": 9.275372789828483e-06, "loss": 0.9175, "step": 6925 }, { "epoch": 0.2, "grad_norm": 6.131387999495221, "learning_rate": 9.275132310321889e-06, "loss": 0.7372, "step": 6926 }, { "epoch": 0.2, "grad_norm": 5.090421841421455, "learning_rate": 9.274891794036857e-06, "loss": 0.7764, "step": 6927 }, { "epoch": 0.2, "grad_norm": 4.147842177726977, "learning_rate": 9.274651240975458e-06, "loss": 0.5739, "step": 6928 }, { "epoch": 0.2, "grad_norm": 6.581142793454225, "learning_rate": 9.27441065113976e-06, "loss": 0.6681, "step": 6929 }, { "epoch": 0.2, "grad_norm": 5.232797458976755, "learning_rate": 9.274170024531834e-06, "loss": 0.3684, "step": 6930 }, { "epoch": 0.2, "grad_norm": 5.936595767585051, "learning_rate": 9.273929361153748e-06, "loss": 0.3333, "step": 6931 }, { "epoch": 0.2, "grad_norm": 9.712022838875187, "learning_rate": 9.273688661007576e-06, "loss": 0.8611, "step": 6932 }, { "epoch": 0.2, "grad_norm": 9.907052473904795, "learning_rate": 9.273447924095387e-06, "loss": 0.5002, "step": 6933 }, { "epoch": 0.2, "grad_norm": 6.959768965712247, "learning_rate": 9.27320715041925e-06, "loss": 0.7749, "step": 6934 }, { "epoch": 0.2, "grad_norm": 5.160907468324388, "learning_rate": 9.272966339981237e-06, "loss": 0.3719, "step": 6935 }, { "epoch": 0.2, "grad_norm": 4.036929897532172, "learning_rate": 9.272725492783423e-06, "loss": 0.4511, "step": 6936 }, { "epoch": 0.2, "grad_norm": 3.0640065516334585, "learning_rate": 9.272484608827878e-06, "loss": 0.2789, "step": 6937 }, { "epoch": 0.2, "grad_norm": 5.066946831703733, "learning_rate": 9.272243688116673e-06, "loss": 0.4146, "step": 6938 }, { "epoch": 0.2, "grad_norm": 7.031418387198108, "learning_rate": 9.272002730651881e-06, "loss": 0.368, "step": 6939 }, { "epoch": 0.2, "grad_norm": 9.674964522264672, "learning_rate": 9.271761736435576e-06, "loss": 0.551, "step": 6940 }, { "epoch": 0.2, "grad_norm": 6.603366149456214, "learning_rate": 9.271520705469829e-06, "loss": 0.4428, "step": 6941 }, { "epoch": 0.2, "grad_norm": 5.104609141990469, "learning_rate": 9.271279637756718e-06, "loss": 0.5232, "step": 6942 }, { "epoch": 0.2, "grad_norm": 6.827531243179112, "learning_rate": 9.271038533298314e-06, "loss": 0.4969, "step": 6943 }, { "epoch": 0.2, "grad_norm": 8.154676380577481, "learning_rate": 9.27079739209669e-06, "loss": 0.5782, "step": 6944 }, { "epoch": 0.2, "grad_norm": 6.626509800300901, "learning_rate": 9.270556214153922e-06, "loss": 0.4714, "step": 6945 }, { "epoch": 0.2, "grad_norm": 13.980022072617729, "learning_rate": 9.270314999472086e-06, "loss": 0.6596, "step": 6946 }, { "epoch": 0.2, "grad_norm": 13.553295826995287, "learning_rate": 9.270073748053255e-06, "loss": 0.9567, "step": 6947 }, { "epoch": 0.2, "grad_norm": 11.894430706933434, "learning_rate": 9.269832459899503e-06, "loss": 0.659, "step": 6948 }, { "epoch": 0.2, "grad_norm": 8.775275291841352, "learning_rate": 9.269591135012912e-06, "loss": 0.5604, "step": 6949 }, { "epoch": 0.2, "grad_norm": 5.848457806424826, "learning_rate": 9.269349773395552e-06, "loss": 0.3918, "step": 6950 }, { "epoch": 0.2, "grad_norm": 4.903361983009176, "learning_rate": 9.269108375049502e-06, "loss": 0.3747, "step": 6951 }, { "epoch": 0.2, "grad_norm": 4.532263885719179, "learning_rate": 9.268866939976838e-06, "loss": 0.4458, "step": 6952 }, { "epoch": 0.2, "grad_norm": 38.34945728261157, "learning_rate": 9.268625468179638e-06, "loss": 0.9446, "step": 6953 }, { "epoch": 0.2, "grad_norm": 7.54197100733058, "learning_rate": 9.268383959659978e-06, "loss": 0.1336, "step": 6954 }, { "epoch": 0.2, "grad_norm": 3.810719308710663, "learning_rate": 9.268142414419936e-06, "loss": 0.6624, "step": 6955 }, { "epoch": 0.2, "grad_norm": 6.0260603323877895, "learning_rate": 9.267900832461592e-06, "loss": 0.8457, "step": 6956 }, { "epoch": 0.2, "grad_norm": 7.525203002523388, "learning_rate": 9.267659213787022e-06, "loss": 0.4363, "step": 6957 }, { "epoch": 0.2, "grad_norm": 4.142751644228809, "learning_rate": 9.267417558398304e-06, "loss": 0.1962, "step": 6958 }, { "epoch": 0.2, "grad_norm": 7.542054589623528, "learning_rate": 9.267175866297519e-06, "loss": 0.8342, "step": 6959 }, { "epoch": 0.2, "grad_norm": 3.4923204571633817, "learning_rate": 9.266934137486745e-06, "loss": 0.4986, "step": 6960 }, { "epoch": 0.2, "grad_norm": 22.45992278618249, "learning_rate": 9.266692371968063e-06, "loss": 0.6676, "step": 6961 }, { "epoch": 0.2, "grad_norm": 6.999464014514148, "learning_rate": 9.26645056974355e-06, "loss": 0.6393, "step": 6962 }, { "epoch": 0.2, "grad_norm": 7.400991705865211, "learning_rate": 9.26620873081529e-06, "loss": 0.3961, "step": 6963 }, { "epoch": 0.2, "grad_norm": 4.261967330941666, "learning_rate": 9.26596685518536e-06, "loss": 0.442, "step": 6964 }, { "epoch": 0.2, "grad_norm": 3.9108917733543365, "learning_rate": 9.265724942855842e-06, "loss": 0.3154, "step": 6965 }, { "epoch": 0.2, "grad_norm": 4.978474918766255, "learning_rate": 9.26548299382882e-06, "loss": 0.6793, "step": 6966 }, { "epoch": 0.2, "grad_norm": 6.676937746393923, "learning_rate": 9.26524100810637e-06, "loss": 0.4585, "step": 6967 }, { "epoch": 0.2, "grad_norm": 6.376373068349664, "learning_rate": 9.26499898569058e-06, "loss": 0.4175, "step": 6968 }, { "epoch": 0.2, "grad_norm": 8.576109824915019, "learning_rate": 9.264756926583527e-06, "loss": 0.4245, "step": 6969 }, { "epoch": 0.2, "grad_norm": 5.349657106552188, "learning_rate": 9.264514830787295e-06, "loss": 0.5962, "step": 6970 }, { "epoch": 0.2, "grad_norm": 4.469088028080715, "learning_rate": 9.264272698303967e-06, "loss": 0.5629, "step": 6971 }, { "epoch": 0.2, "grad_norm": 7.823123819086422, "learning_rate": 9.264030529135628e-06, "loss": 0.7597, "step": 6972 }, { "epoch": 0.2, "grad_norm": 5.085378331914134, "learning_rate": 9.263788323284358e-06, "loss": 0.5101, "step": 6973 }, { "epoch": 0.2, "grad_norm": 3.6344706043057524, "learning_rate": 9.263546080752241e-06, "loss": 0.3987, "step": 6974 }, { "epoch": 0.2, "grad_norm": 8.799399520153816, "learning_rate": 9.263303801541364e-06, "loss": 0.8061, "step": 6975 }, { "epoch": 0.2, "grad_norm": 5.200394793342359, "learning_rate": 9.263061485653807e-06, "loss": 0.5001, "step": 6976 }, { "epoch": 0.2, "grad_norm": 12.087633385065317, "learning_rate": 9.262819133091659e-06, "loss": 0.5822, "step": 6977 }, { "epoch": 0.2, "grad_norm": 6.170727896020342, "learning_rate": 9.262576743857002e-06, "loss": 0.5251, "step": 6978 }, { "epoch": 0.2, "grad_norm": 8.2052399733539, "learning_rate": 9.26233431795192e-06, "loss": 0.5481, "step": 6979 }, { "epoch": 0.2, "grad_norm": 8.827119429040868, "learning_rate": 9.262091855378503e-06, "loss": 0.6821, "step": 6980 }, { "epoch": 0.2, "grad_norm": 6.700615117318667, "learning_rate": 9.261849356138835e-06, "loss": 0.5747, "step": 6981 }, { "epoch": 0.2, "grad_norm": 4.86170547878312, "learning_rate": 9.261606820235001e-06, "loss": 0.2832, "step": 6982 }, { "epoch": 0.2, "grad_norm": 3.464256830404039, "learning_rate": 9.261364247669087e-06, "loss": 0.3686, "step": 6983 }, { "epoch": 0.2, "grad_norm": 5.19800019690824, "learning_rate": 9.261121638443182e-06, "loss": 0.3507, "step": 6984 }, { "epoch": 0.2, "grad_norm": 6.96570931371196, "learning_rate": 9.260878992559374e-06, "loss": 0.5028, "step": 6985 }, { "epoch": 0.2, "grad_norm": 3.037506175721723, "learning_rate": 9.260636310019747e-06, "loss": 0.3306, "step": 6986 }, { "epoch": 0.2, "grad_norm": 6.685550227207566, "learning_rate": 9.26039359082639e-06, "loss": 0.6361, "step": 6987 }, { "epoch": 0.2, "grad_norm": 5.045123525644101, "learning_rate": 9.260150834981391e-06, "loss": 0.3462, "step": 6988 }, { "epoch": 0.2, "grad_norm": 5.075452835099864, "learning_rate": 9.25990804248684e-06, "loss": 0.6572, "step": 6989 }, { "epoch": 0.2, "grad_norm": 4.9391385027629156, "learning_rate": 9.259665213344826e-06, "loss": 0.6531, "step": 6990 }, { "epoch": 0.2, "grad_norm": 7.864312715905999, "learning_rate": 9.259422347557434e-06, "loss": 0.6207, "step": 6991 }, { "epoch": 0.2, "grad_norm": 7.593078583598946, "learning_rate": 9.259179445126758e-06, "loss": 0.5796, "step": 6992 }, { "epoch": 0.2, "grad_norm": 9.427854649021752, "learning_rate": 9.258936506054883e-06, "loss": 0.7485, "step": 6993 }, { "epoch": 0.2, "grad_norm": 9.487656253538212, "learning_rate": 9.258693530343904e-06, "loss": 0.4388, "step": 6994 }, { "epoch": 0.2, "grad_norm": 2.0825548927015007, "learning_rate": 9.258450517995909e-06, "loss": 0.0944, "step": 6995 }, { "epoch": 0.2, "grad_norm": 7.752223957046933, "learning_rate": 9.258207469012988e-06, "loss": 0.9406, "step": 6996 }, { "epoch": 0.2, "grad_norm": 3.984697725278264, "learning_rate": 9.25796438339723e-06, "loss": 0.4408, "step": 6997 }, { "epoch": 0.2, "grad_norm": 4.458200981576141, "learning_rate": 9.257721261150732e-06, "loss": 0.4571, "step": 6998 }, { "epoch": 0.2, "grad_norm": 4.674977296376306, "learning_rate": 9.257478102275582e-06, "loss": 0.3645, "step": 6999 }, { "epoch": 0.2, "grad_norm": 7.368501483327682, "learning_rate": 9.257234906773869e-06, "loss": 0.4026, "step": 7000 }, { "epoch": 0.2, "grad_norm": 5.525559725825148, "learning_rate": 9.25699167464769e-06, "loss": 0.5056, "step": 7001 }, { "epoch": 0.2, "grad_norm": 2.8611457844724684, "learning_rate": 9.256748405899136e-06, "loss": 0.3197, "step": 7002 }, { "epoch": 0.2, "grad_norm": 6.965568055527248, "learning_rate": 9.2565051005303e-06, "loss": 0.4007, "step": 7003 }, { "epoch": 0.2, "grad_norm": 3.646114385761841, "learning_rate": 9.256261758543274e-06, "loss": 0.6955, "step": 7004 }, { "epoch": 0.2, "grad_norm": 7.596672092428554, "learning_rate": 9.256018379940154e-06, "loss": 0.7247, "step": 7005 }, { "epoch": 0.2, "grad_norm": 4.73842686667605, "learning_rate": 9.25577496472303e-06, "loss": 0.2384, "step": 7006 }, { "epoch": 0.2, "grad_norm": 5.583934718335195, "learning_rate": 9.255531512893999e-06, "loss": 0.4988, "step": 7007 }, { "epoch": 0.2, "grad_norm": 8.12339009694692, "learning_rate": 9.255288024455154e-06, "loss": 0.528, "step": 7008 }, { "epoch": 0.2, "grad_norm": 11.721810310040853, "learning_rate": 9.255044499408588e-06, "loss": 1.0459, "step": 7009 }, { "epoch": 0.2, "grad_norm": 7.2500319644617965, "learning_rate": 9.254800937756401e-06, "loss": 0.4815, "step": 7010 }, { "epoch": 0.2, "grad_norm": 7.202986246291035, "learning_rate": 9.254557339500684e-06, "loss": 0.679, "step": 7011 }, { "epoch": 0.2, "grad_norm": 5.885566948066338, "learning_rate": 9.254313704643534e-06, "loss": 0.4906, "step": 7012 }, { "epoch": 0.2, "grad_norm": 2.3719408008560214, "learning_rate": 9.254070033187049e-06, "loss": 0.2026, "step": 7013 }, { "epoch": 0.2, "grad_norm": 3.099585789649039, "learning_rate": 9.25382632513332e-06, "loss": 0.2297, "step": 7014 }, { "epoch": 0.2, "grad_norm": 6.786388542263382, "learning_rate": 9.253582580484448e-06, "loss": 0.4925, "step": 7015 }, { "epoch": 0.2, "grad_norm": 3.579356352309737, "learning_rate": 9.253338799242529e-06, "loss": 0.5578, "step": 7016 }, { "epoch": 0.2, "grad_norm": 9.33237686251754, "learning_rate": 9.25309498140966e-06, "loss": 0.4824, "step": 7017 }, { "epoch": 0.2, "grad_norm": 9.80712376452675, "learning_rate": 9.25285112698794e-06, "loss": 0.6466, "step": 7018 }, { "epoch": 0.2, "grad_norm": 6.174185289082053, "learning_rate": 9.252607235979463e-06, "loss": 0.4402, "step": 7019 }, { "epoch": 0.2, "grad_norm": 4.445158032245943, "learning_rate": 9.252363308386333e-06, "loss": 0.6515, "step": 7020 }, { "epoch": 0.2, "grad_norm": 6.47265956512532, "learning_rate": 9.252119344210642e-06, "loss": 0.5277, "step": 7021 }, { "epoch": 0.2, "grad_norm": 3.1112559394580495, "learning_rate": 9.251875343454491e-06, "loss": 0.4765, "step": 7022 }, { "epoch": 0.2, "grad_norm": 7.170541876263981, "learning_rate": 9.251631306119982e-06, "loss": 0.489, "step": 7023 }, { "epoch": 0.2, "grad_norm": 8.828029071658014, "learning_rate": 9.251387232209212e-06, "loss": 0.8682, "step": 7024 }, { "epoch": 0.2, "grad_norm": 6.208779551537668, "learning_rate": 9.25114312172428e-06, "loss": 0.8802, "step": 7025 }, { "epoch": 0.2, "grad_norm": 7.123418599024367, "learning_rate": 9.250898974667288e-06, "loss": 0.6381, "step": 7026 }, { "epoch": 0.2, "grad_norm": 6.665648867478988, "learning_rate": 9.250654791040336e-06, "loss": 0.9726, "step": 7027 }, { "epoch": 0.2, "grad_norm": 8.80331548309346, "learning_rate": 9.250410570845523e-06, "loss": 0.657, "step": 7028 }, { "epoch": 0.2, "grad_norm": 4.765665348069784, "learning_rate": 9.250166314084952e-06, "loss": 0.387, "step": 7029 }, { "epoch": 0.2, "grad_norm": 9.997930932569702, "learning_rate": 9.249922020760725e-06, "loss": 0.5602, "step": 7030 }, { "epoch": 0.2, "grad_norm": 6.19940050672331, "learning_rate": 9.249677690874938e-06, "loss": 0.431, "step": 7031 }, { "epoch": 0.2, "grad_norm": 7.669622459619228, "learning_rate": 9.2494333244297e-06, "loss": 0.2116, "step": 7032 }, { "epoch": 0.2, "grad_norm": 7.415952869598844, "learning_rate": 9.24918892142711e-06, "loss": 0.5679, "step": 7033 }, { "epoch": 0.2, "grad_norm": 8.314058716989454, "learning_rate": 9.24894448186927e-06, "loss": 0.7265, "step": 7034 }, { "epoch": 0.2, "grad_norm": 4.381311741066019, "learning_rate": 9.248700005758286e-06, "loss": 0.4426, "step": 7035 }, { "epoch": 0.2, "grad_norm": 6.48136501081461, "learning_rate": 9.248455493096258e-06, "loss": 0.8335, "step": 7036 }, { "epoch": 0.2, "grad_norm": 5.206944710312138, "learning_rate": 9.24821094388529e-06, "loss": 0.5301, "step": 7037 }, { "epoch": 0.2, "grad_norm": 8.29871090909218, "learning_rate": 9.247966358127486e-06, "loss": 0.3337, "step": 7038 }, { "epoch": 0.2, "grad_norm": 4.158305068802318, "learning_rate": 9.24772173582495e-06, "loss": 0.1722, "step": 7039 }, { "epoch": 0.2, "grad_norm": 7.3622750239818915, "learning_rate": 9.247477076979787e-06, "loss": 0.3132, "step": 7040 }, { "epoch": 0.2, "grad_norm": 7.79107560416559, "learning_rate": 9.247232381594102e-06, "loss": 0.4309, "step": 7041 }, { "epoch": 0.2, "grad_norm": 5.5957633715943045, "learning_rate": 9.24698764967e-06, "loss": 0.9189, "step": 7042 }, { "epoch": 0.2, "grad_norm": 8.811759322403049, "learning_rate": 9.246742881209587e-06, "loss": 0.2793, "step": 7043 }, { "epoch": 0.2, "grad_norm": 5.779023520143737, "learning_rate": 9.246498076214967e-06, "loss": 0.402, "step": 7044 }, { "epoch": 0.2, "grad_norm": 5.046968441502673, "learning_rate": 9.246253234688247e-06, "loss": 0.4492, "step": 7045 }, { "epoch": 0.2, "grad_norm": 6.662260037622287, "learning_rate": 9.246008356631533e-06, "loss": 0.5732, "step": 7046 }, { "epoch": 0.2, "grad_norm": 5.5105215782557835, "learning_rate": 9.245763442046933e-06, "loss": 0.5352, "step": 7047 }, { "epoch": 0.2, "grad_norm": 5.111841335631196, "learning_rate": 9.245518490936551e-06, "loss": 0.4127, "step": 7048 }, { "epoch": 0.2, "grad_norm": 12.009479275483251, "learning_rate": 9.245273503302497e-06, "loss": 0.5973, "step": 7049 }, { "epoch": 0.2, "grad_norm": 7.714474625267522, "learning_rate": 9.245028479146877e-06, "loss": 0.5199, "step": 7050 }, { "epoch": 0.2, "grad_norm": 5.22791549892184, "learning_rate": 9.244783418471801e-06, "loss": 0.5368, "step": 7051 }, { "epoch": 0.2, "grad_norm": 12.486130649455655, "learning_rate": 9.244538321279377e-06, "loss": 0.485, "step": 7052 }, { "epoch": 0.2, "grad_norm": 5.492441879610864, "learning_rate": 9.244293187571709e-06, "loss": 0.7128, "step": 7053 }, { "epoch": 0.2, "grad_norm": 5.770148097737545, "learning_rate": 9.244048017350912e-06, "loss": 0.9514, "step": 7054 }, { "epoch": 0.2, "grad_norm": 5.3806308261637295, "learning_rate": 9.24380281061909e-06, "loss": 0.284, "step": 7055 }, { "epoch": 0.2, "grad_norm": 11.89429712926404, "learning_rate": 9.243557567378357e-06, "loss": 0.6238, "step": 7056 }, { "epoch": 0.2, "grad_norm": 6.592160652577952, "learning_rate": 9.243312287630821e-06, "loss": 0.8385, "step": 7057 }, { "epoch": 0.2, "grad_norm": 6.766991950737073, "learning_rate": 9.24306697137859e-06, "loss": 0.5728, "step": 7058 }, { "epoch": 0.2, "grad_norm": 5.271718017556366, "learning_rate": 9.242821618623779e-06, "loss": 0.4053, "step": 7059 }, { "epoch": 0.2, "grad_norm": 4.7465391601535165, "learning_rate": 9.242576229368494e-06, "loss": 0.4953, "step": 7060 }, { "epoch": 0.2, "grad_norm": 7.971255159225338, "learning_rate": 9.242330803614848e-06, "loss": 0.5744, "step": 7061 }, { "epoch": 0.2, "grad_norm": 7.578980073175082, "learning_rate": 9.242085341364952e-06, "loss": 0.5023, "step": 7062 }, { "epoch": 0.2, "grad_norm": 5.87489140186988, "learning_rate": 9.241839842620919e-06, "loss": 0.5695, "step": 7063 }, { "epoch": 0.2, "grad_norm": 2.8070695585147205, "learning_rate": 9.24159430738486e-06, "loss": 0.1756, "step": 7064 }, { "epoch": 0.2, "grad_norm": 5.604887248325052, "learning_rate": 9.241348735658888e-06, "loss": 0.6158, "step": 7065 }, { "epoch": 0.2, "grad_norm": 9.824488029050622, "learning_rate": 9.241103127445113e-06, "loss": 0.5876, "step": 7066 }, { "epoch": 0.2, "grad_norm": 7.33731862352521, "learning_rate": 9.240857482745653e-06, "loss": 0.9953, "step": 7067 }, { "epoch": 0.2, "grad_norm": 6.483431239561546, "learning_rate": 9.240611801562615e-06, "loss": 0.4103, "step": 7068 }, { "epoch": 0.2, "grad_norm": 9.396280220270372, "learning_rate": 9.240366083898118e-06, "loss": 0.8141, "step": 7069 }, { "epoch": 0.2, "grad_norm": 1.633275929078022, "learning_rate": 9.240120329754273e-06, "loss": 0.0847, "step": 7070 }, { "epoch": 0.2, "grad_norm": 6.119727687636534, "learning_rate": 9.239874539133195e-06, "loss": 0.5435, "step": 7071 }, { "epoch": 0.2, "grad_norm": 6.0177230545118325, "learning_rate": 9.239628712036999e-06, "loss": 0.7471, "step": 7072 }, { "epoch": 0.2, "grad_norm": 9.095386371007832, "learning_rate": 9.239382848467796e-06, "loss": 0.8146, "step": 7073 }, { "epoch": 0.2, "grad_norm": 6.7920339598149315, "learning_rate": 9.239136948427708e-06, "loss": 0.45, "step": 7074 }, { "epoch": 0.2, "grad_norm": 6.286070117694095, "learning_rate": 9.238891011918846e-06, "loss": 0.6021, "step": 7075 }, { "epoch": 0.2, "grad_norm": 4.157572543026881, "learning_rate": 9.238645038943325e-06, "loss": 0.4271, "step": 7076 }, { "epoch": 0.2, "grad_norm": 4.327019560626651, "learning_rate": 9.238399029503264e-06, "loss": 0.5216, "step": 7077 }, { "epoch": 0.2, "grad_norm": 8.628039722788012, "learning_rate": 9.238152983600779e-06, "loss": 0.3937, "step": 7078 }, { "epoch": 0.2, "grad_norm": 9.705045844730854, "learning_rate": 9.237906901237984e-06, "loss": 0.7095, "step": 7079 }, { "epoch": 0.2, "grad_norm": 2.948717511763376, "learning_rate": 9.237660782416997e-06, "loss": 0.2281, "step": 7080 }, { "epoch": 0.2, "grad_norm": 4.574438856971493, "learning_rate": 9.237414627139937e-06, "loss": 0.7218, "step": 7081 }, { "epoch": 0.2, "grad_norm": 7.666731841045281, "learning_rate": 9.237168435408922e-06, "loss": 0.2959, "step": 7082 }, { "epoch": 0.2, "grad_norm": 7.108103342871048, "learning_rate": 9.236922207226067e-06, "loss": 0.6778, "step": 7083 }, { "epoch": 0.2, "grad_norm": 10.825264336792232, "learning_rate": 9.236675942593492e-06, "loss": 0.2545, "step": 7084 }, { "epoch": 0.2, "grad_norm": 6.344506918563757, "learning_rate": 9.236429641513316e-06, "loss": 0.4414, "step": 7085 }, { "epoch": 0.2, "grad_norm": 4.4443362861956714, "learning_rate": 9.236183303987658e-06, "loss": 0.4005, "step": 7086 }, { "epoch": 0.2, "grad_norm": 5.779697910948135, "learning_rate": 9.235936930018636e-06, "loss": 0.166, "step": 7087 }, { "epoch": 0.2, "grad_norm": 3.3599458431239015, "learning_rate": 9.235690519608368e-06, "loss": 0.3851, "step": 7088 }, { "epoch": 0.2, "grad_norm": 5.2146499883537345, "learning_rate": 9.23544407275898e-06, "loss": 0.5262, "step": 7089 }, { "epoch": 0.2, "grad_norm": 5.48169540631253, "learning_rate": 9.235197589472586e-06, "loss": 0.6675, "step": 7090 }, { "epoch": 0.2, "grad_norm": 5.503710448854843, "learning_rate": 9.234951069751308e-06, "loss": 0.5495, "step": 7091 }, { "epoch": 0.2, "grad_norm": 12.027427165085049, "learning_rate": 9.234704513597268e-06, "loss": 0.5344, "step": 7092 }, { "epoch": 0.2, "grad_norm": 5.773766328896332, "learning_rate": 9.234457921012586e-06, "loss": 0.5288, "step": 7093 }, { "epoch": 0.2, "grad_norm": 7.099641009106793, "learning_rate": 9.234211291999385e-06, "loss": 0.6991, "step": 7094 }, { "epoch": 0.2, "grad_norm": 6.309371040573502, "learning_rate": 9.233964626559787e-06, "loss": 0.6183, "step": 7095 }, { "epoch": 0.2, "grad_norm": 8.155855717391656, "learning_rate": 9.233717924695908e-06, "loss": 0.5656, "step": 7096 }, { "epoch": 0.2, "grad_norm": 7.2319413905532794, "learning_rate": 9.23347118640988e-06, "loss": 0.5037, "step": 7097 }, { "epoch": 0.2, "grad_norm": 4.174002815557278, "learning_rate": 9.233224411703815e-06, "loss": 0.7647, "step": 7098 }, { "epoch": 0.2, "grad_norm": 1.8959191105549495, "learning_rate": 9.232977600579846e-06, "loss": 0.1446, "step": 7099 }, { "epoch": 0.2, "grad_norm": 4.2110747985537955, "learning_rate": 9.23273075304009e-06, "loss": 0.2807, "step": 7100 }, { "epoch": 0.2, "grad_norm": 8.078665562146675, "learning_rate": 9.232483869086675e-06, "loss": 0.8005, "step": 7101 }, { "epoch": 0.2, "grad_norm": 6.954128304435311, "learning_rate": 9.23223694872172e-06, "loss": 0.4614, "step": 7102 }, { "epoch": 0.2, "grad_norm": 2.8427650087232266, "learning_rate": 9.231989991947353e-06, "loss": 0.3813, "step": 7103 }, { "epoch": 0.2, "grad_norm": 12.33948050915596, "learning_rate": 9.231742998765696e-06, "loss": 0.8039, "step": 7104 }, { "epoch": 0.2, "grad_norm": 6.835753694515516, "learning_rate": 9.231495969178874e-06, "loss": 0.5989, "step": 7105 }, { "epoch": 0.2, "grad_norm": 4.987224852350781, "learning_rate": 9.231248903189014e-06, "loss": 0.1316, "step": 7106 }, { "epoch": 0.2, "grad_norm": 6.83370998166405, "learning_rate": 9.231001800798243e-06, "loss": 0.5097, "step": 7107 }, { "epoch": 0.2, "grad_norm": 10.418608603979704, "learning_rate": 9.230754662008682e-06, "loss": 0.7338, "step": 7108 }, { "epoch": 0.2, "grad_norm": 7.1495911714740705, "learning_rate": 9.230507486822459e-06, "loss": 0.4314, "step": 7109 }, { "epoch": 0.2, "grad_norm": 5.504593881357238, "learning_rate": 9.230260275241702e-06, "loss": 0.5079, "step": 7110 }, { "epoch": 0.2, "grad_norm": 4.74218785193252, "learning_rate": 9.230013027268539e-06, "loss": 0.5621, "step": 7111 }, { "epoch": 0.2, "grad_norm": 6.591381714356855, "learning_rate": 9.229765742905092e-06, "loss": 0.8703, "step": 7112 }, { "epoch": 0.2, "grad_norm": 5.33190770268095, "learning_rate": 9.229518422153492e-06, "loss": 0.265, "step": 7113 }, { "epoch": 0.2, "grad_norm": 9.672669854111037, "learning_rate": 9.229271065015863e-06, "loss": 0.67, "step": 7114 }, { "epoch": 0.2, "grad_norm": 7.801344880289753, "learning_rate": 9.22902367149434e-06, "loss": 0.3937, "step": 7115 }, { "epoch": 0.2, "grad_norm": 8.593383337348403, "learning_rate": 9.228776241591044e-06, "loss": 0.3936, "step": 7116 }, { "epoch": 0.2, "grad_norm": 4.547758875142567, "learning_rate": 9.228528775308108e-06, "loss": 0.2445, "step": 7117 }, { "epoch": 0.2, "grad_norm": 5.047700749448956, "learning_rate": 9.22828127264766e-06, "loss": 0.4733, "step": 7118 }, { "epoch": 0.2, "grad_norm": 5.425441481386039, "learning_rate": 9.228033733611828e-06, "loss": 0.5516, "step": 7119 }, { "epoch": 0.2, "grad_norm": 6.659877415522662, "learning_rate": 9.227786158202742e-06, "loss": 0.6312, "step": 7120 }, { "epoch": 0.2, "grad_norm": 5.51273337149174, "learning_rate": 9.227538546422535e-06, "loss": 0.526, "step": 7121 }, { "epoch": 0.2, "grad_norm": 4.2869137521658764, "learning_rate": 9.227290898273331e-06, "loss": 0.4819, "step": 7122 }, { "epoch": 0.2, "grad_norm": 3.0513082872048614, "learning_rate": 9.227043213757266e-06, "loss": 0.4781, "step": 7123 }, { "epoch": 0.2, "grad_norm": 3.9600676535115906, "learning_rate": 9.226795492876465e-06, "loss": 0.135, "step": 7124 }, { "epoch": 0.2, "grad_norm": 6.418952666800328, "learning_rate": 9.226547735633067e-06, "loss": 0.3304, "step": 7125 }, { "epoch": 0.2, "grad_norm": 9.425295890393043, "learning_rate": 9.226299942029197e-06, "loss": 0.6063, "step": 7126 }, { "epoch": 0.2, "grad_norm": 7.115279141792331, "learning_rate": 9.22605211206699e-06, "loss": 0.4432, "step": 7127 }, { "epoch": 0.2, "grad_norm": 5.419032778285829, "learning_rate": 9.225804245748578e-06, "loss": 0.6836, "step": 7128 }, { "epoch": 0.2, "grad_norm": 5.0845221499934885, "learning_rate": 9.225556343076089e-06, "loss": 0.3357, "step": 7129 }, { "epoch": 0.2, "grad_norm": 6.148815022260672, "learning_rate": 9.225308404051663e-06, "loss": 0.2049, "step": 7130 }, { "epoch": 0.2, "grad_norm": 11.588417163422951, "learning_rate": 9.225060428677426e-06, "loss": 0.5645, "step": 7131 }, { "epoch": 0.2, "grad_norm": 2.6406058813001136, "learning_rate": 9.224812416955517e-06, "loss": 0.2544, "step": 7132 }, { "epoch": 0.2, "grad_norm": 5.716434823090298, "learning_rate": 9.224564368888064e-06, "loss": 0.5713, "step": 7133 }, { "epoch": 0.2, "grad_norm": 4.902190576113326, "learning_rate": 9.224316284477206e-06, "loss": 0.3198, "step": 7134 }, { "epoch": 0.2, "grad_norm": 6.651129817353593, "learning_rate": 9.224068163725075e-06, "loss": 0.2638, "step": 7135 }, { "epoch": 0.2, "grad_norm": 5.320464861644529, "learning_rate": 9.223820006633804e-06, "loss": 0.3771, "step": 7136 }, { "epoch": 0.2, "grad_norm": 5.121235325396321, "learning_rate": 9.223571813205531e-06, "loss": 0.3064, "step": 7137 }, { "epoch": 0.2, "grad_norm": 7.861042793734915, "learning_rate": 9.22332358344239e-06, "loss": 0.3823, "step": 7138 }, { "epoch": 0.2, "grad_norm": 6.780938734654548, "learning_rate": 9.223075317346514e-06, "loss": 0.4242, "step": 7139 }, { "epoch": 0.2, "grad_norm": 8.590409284972958, "learning_rate": 9.222827014920045e-06, "loss": 0.4942, "step": 7140 }, { "epoch": 0.2, "grad_norm": 7.800835408198016, "learning_rate": 9.222578676165111e-06, "loss": 0.701, "step": 7141 }, { "epoch": 0.2, "grad_norm": 6.09635877276386, "learning_rate": 9.222330301083855e-06, "loss": 0.6571, "step": 7142 }, { "epoch": 0.2, "grad_norm": 5.946220178278748, "learning_rate": 9.22208188967841e-06, "loss": 0.5963, "step": 7143 }, { "epoch": 0.2, "grad_norm": 5.247707297749968, "learning_rate": 9.221833441950915e-06, "loss": 0.4822, "step": 7144 }, { "epoch": 0.2, "grad_norm": 9.700543636150144, "learning_rate": 9.221584957903507e-06, "loss": 0.8526, "step": 7145 }, { "epoch": 0.2, "grad_norm": 9.055211839478119, "learning_rate": 9.221336437538323e-06, "loss": 0.5312, "step": 7146 }, { "epoch": 0.2, "grad_norm": 5.214742846709309, "learning_rate": 9.2210878808575e-06, "loss": 0.3034, "step": 7147 }, { "epoch": 0.2, "grad_norm": 9.557142963690456, "learning_rate": 9.22083928786318e-06, "loss": 0.431, "step": 7148 }, { "epoch": 0.2, "grad_norm": 4.818317365211955, "learning_rate": 9.220590658557499e-06, "loss": 0.3851, "step": 7149 }, { "epoch": 0.2, "grad_norm": 8.265779028685063, "learning_rate": 9.220341992942595e-06, "loss": 0.7522, "step": 7150 }, { "epoch": 0.2, "grad_norm": 10.5105271020134, "learning_rate": 9.22009329102061e-06, "loss": 0.5862, "step": 7151 }, { "epoch": 0.2, "grad_norm": 3.912542234442666, "learning_rate": 9.21984455279368e-06, "loss": 0.5517, "step": 7152 }, { "epoch": 0.2, "grad_norm": 5.981462053548001, "learning_rate": 9.219595778263949e-06, "loss": 0.4983, "step": 7153 }, { "epoch": 0.2, "grad_norm": 7.100496554136733, "learning_rate": 9.219346967433552e-06, "loss": 0.2562, "step": 7154 }, { "epoch": 0.2, "grad_norm": 6.233231671502165, "learning_rate": 9.219098120304634e-06, "loss": 0.3065, "step": 7155 }, { "epoch": 0.2, "grad_norm": 3.332534224369509, "learning_rate": 9.218849236879335e-06, "loss": 0.4313, "step": 7156 }, { "epoch": 0.2, "grad_norm": 2.349953782357595, "learning_rate": 9.218600317159796e-06, "loss": 0.2046, "step": 7157 }, { "epoch": 0.2, "grad_norm": 6.865433713099976, "learning_rate": 9.218351361148156e-06, "loss": 0.5572, "step": 7158 }, { "epoch": 0.21, "grad_norm": 2.170006339200161, "learning_rate": 9.21810236884656e-06, "loss": 0.1923, "step": 7159 }, { "epoch": 0.21, "grad_norm": 4.027635240042357, "learning_rate": 9.21785334025715e-06, "loss": 0.2996, "step": 7160 }, { "epoch": 0.21, "grad_norm": 8.603037653104456, "learning_rate": 9.217604275382064e-06, "loss": 0.7513, "step": 7161 }, { "epoch": 0.21, "grad_norm": 7.383614116120335, "learning_rate": 9.21735517422345e-06, "loss": 0.9655, "step": 7162 }, { "epoch": 0.21, "grad_norm": 5.886223400717708, "learning_rate": 9.217106036783448e-06, "loss": 0.749, "step": 7163 }, { "epoch": 0.21, "grad_norm": 5.125169332544619, "learning_rate": 9.2168568630642e-06, "loss": 0.4676, "step": 7164 }, { "epoch": 0.21, "grad_norm": 6.533982430865602, "learning_rate": 9.216607653067855e-06, "loss": 0.7424, "step": 7165 }, { "epoch": 0.21, "grad_norm": 4.399107914136236, "learning_rate": 9.21635840679655e-06, "loss": 0.2594, "step": 7166 }, { "epoch": 0.21, "grad_norm": 3.8609340468560664, "learning_rate": 9.216109124252436e-06, "loss": 0.513, "step": 7167 }, { "epoch": 0.21, "grad_norm": 5.17579654331467, "learning_rate": 9.215859805437652e-06, "loss": 0.251, "step": 7168 }, { "epoch": 0.21, "grad_norm": 4.062875026118758, "learning_rate": 9.215610450354348e-06, "loss": 0.5636, "step": 7169 }, { "epoch": 0.21, "grad_norm": 6.693229110781496, "learning_rate": 9.215361059004664e-06, "loss": 0.7475, "step": 7170 }, { "epoch": 0.21, "grad_norm": 11.942756094866857, "learning_rate": 9.215111631390747e-06, "loss": 0.984, "step": 7171 }, { "epoch": 0.21, "grad_norm": 3.7224692650955076, "learning_rate": 9.214862167514746e-06, "loss": 0.2703, "step": 7172 }, { "epoch": 0.21, "grad_norm": 8.02787323850598, "learning_rate": 9.214612667378803e-06, "loss": 0.528, "step": 7173 }, { "epoch": 0.21, "grad_norm": 7.88191137843728, "learning_rate": 9.214363130985066e-06, "loss": 0.9015, "step": 7174 }, { "epoch": 0.21, "grad_norm": 6.8346720020034235, "learning_rate": 9.214113558335685e-06, "loss": 0.6297, "step": 7175 }, { "epoch": 0.21, "grad_norm": 6.150844466375014, "learning_rate": 9.2138639494328e-06, "loss": 0.6834, "step": 7176 }, { "epoch": 0.21, "grad_norm": 5.388470868920635, "learning_rate": 9.213614304278564e-06, "loss": 0.5269, "step": 7177 }, { "epoch": 0.21, "grad_norm": 5.816517333743431, "learning_rate": 9.213364622875123e-06, "loss": 0.1914, "step": 7178 }, { "epoch": 0.21, "grad_norm": 8.852715251366517, "learning_rate": 9.213114905224626e-06, "loss": 0.7194, "step": 7179 }, { "epoch": 0.21, "grad_norm": 11.271893369756482, "learning_rate": 9.212865151329218e-06, "loss": 0.918, "step": 7180 }, { "epoch": 0.21, "grad_norm": 6.540927197236516, "learning_rate": 9.21261536119105e-06, "loss": 0.3931, "step": 7181 }, { "epoch": 0.21, "grad_norm": 4.344609648901442, "learning_rate": 9.21236553481227e-06, "loss": 0.3576, "step": 7182 }, { "epoch": 0.21, "grad_norm": 7.070631022154433, "learning_rate": 9.21211567219503e-06, "loss": 0.8966, "step": 7183 }, { "epoch": 0.21, "grad_norm": 3.216425380320963, "learning_rate": 9.211865773341477e-06, "loss": 0.4053, "step": 7184 }, { "epoch": 0.21, "grad_norm": 5.976360123143743, "learning_rate": 9.211615838253762e-06, "loss": 0.663, "step": 7185 }, { "epoch": 0.21, "grad_norm": 6.660320344844329, "learning_rate": 9.211365866934033e-06, "loss": 0.4602, "step": 7186 }, { "epoch": 0.21, "grad_norm": 10.023043974884125, "learning_rate": 9.211115859384443e-06, "loss": 0.3954, "step": 7187 }, { "epoch": 0.21, "grad_norm": 5.252935224800716, "learning_rate": 9.210865815607142e-06, "loss": 0.6017, "step": 7188 }, { "epoch": 0.21, "grad_norm": 7.298672832401856, "learning_rate": 9.21061573560428e-06, "loss": 0.7514, "step": 7189 }, { "epoch": 0.21, "grad_norm": 3.5235959374781016, "learning_rate": 9.21036561937801e-06, "loss": 0.2962, "step": 7190 }, { "epoch": 0.21, "grad_norm": 8.942370494814465, "learning_rate": 9.210115466930481e-06, "loss": 1.0096, "step": 7191 }, { "epoch": 0.21, "grad_norm": 9.635096617186035, "learning_rate": 9.20986527826385e-06, "loss": 0.7, "step": 7192 }, { "epoch": 0.21, "grad_norm": 8.779438958805766, "learning_rate": 9.209615053380264e-06, "loss": 0.959, "step": 7193 }, { "epoch": 0.21, "grad_norm": 3.895166312793889, "learning_rate": 9.209364792281878e-06, "loss": 0.4653, "step": 7194 }, { "epoch": 0.21, "grad_norm": 7.298557193781582, "learning_rate": 9.209114494970846e-06, "loss": 0.7562, "step": 7195 }, { "epoch": 0.21, "grad_norm": 8.046449520380602, "learning_rate": 9.20886416144932e-06, "loss": 0.5309, "step": 7196 }, { "epoch": 0.21, "grad_norm": 4.76212377288672, "learning_rate": 9.208613791719454e-06, "loss": 0.6679, "step": 7197 }, { "epoch": 0.21, "grad_norm": 5.893191155549734, "learning_rate": 9.208363385783402e-06, "loss": 0.7117, "step": 7198 }, { "epoch": 0.21, "grad_norm": 4.775524575942767, "learning_rate": 9.208112943643316e-06, "loss": 0.413, "step": 7199 }, { "epoch": 0.21, "grad_norm": 7.572334043259147, "learning_rate": 9.207862465301356e-06, "loss": 0.4828, "step": 7200 }, { "epoch": 0.21, "grad_norm": 4.265403266705085, "learning_rate": 9.20761195075967e-06, "loss": 0.2003, "step": 7201 }, { "epoch": 0.21, "grad_norm": 6.4154085147861455, "learning_rate": 9.207361400020418e-06, "loss": 0.788, "step": 7202 }, { "epoch": 0.21, "grad_norm": 4.946715482837653, "learning_rate": 9.207110813085753e-06, "loss": 0.6046, "step": 7203 }, { "epoch": 0.21, "grad_norm": 3.0184265721659718, "learning_rate": 9.206860189957834e-06, "loss": 0.4436, "step": 7204 }, { "epoch": 0.21, "grad_norm": 4.934696174011473, "learning_rate": 9.206609530638813e-06, "loss": 1.0451, "step": 7205 }, { "epoch": 0.21, "grad_norm": 5.069678597580292, "learning_rate": 9.206358835130846e-06, "loss": 0.3177, "step": 7206 }, { "epoch": 0.21, "grad_norm": 3.5693192383757792, "learning_rate": 9.206108103436096e-06, "loss": 0.4842, "step": 7207 }, { "epoch": 0.21, "grad_norm": 3.263872251175123, "learning_rate": 9.205857335556713e-06, "loss": 0.5745, "step": 7208 }, { "epoch": 0.21, "grad_norm": 5.609400077062461, "learning_rate": 9.205606531494857e-06, "loss": 0.3219, "step": 7209 }, { "epoch": 0.21, "grad_norm": 29.2974592226905, "learning_rate": 9.205355691252689e-06, "loss": 0.5578, "step": 7210 }, { "epoch": 0.21, "grad_norm": 8.62390522297531, "learning_rate": 9.20510481483236e-06, "loss": 0.7214, "step": 7211 }, { "epoch": 0.21, "grad_norm": 6.09476392086922, "learning_rate": 9.204853902236035e-06, "loss": 0.5731, "step": 7212 }, { "epoch": 0.21, "grad_norm": 7.377250521974772, "learning_rate": 9.204602953465866e-06, "loss": 0.3623, "step": 7213 }, { "epoch": 0.21, "grad_norm": 8.874230687284262, "learning_rate": 9.204351968524018e-06, "loss": 0.4688, "step": 7214 }, { "epoch": 0.21, "grad_norm": 4.509449203842948, "learning_rate": 9.204100947412649e-06, "loss": 0.6418, "step": 7215 }, { "epoch": 0.21, "grad_norm": 8.55632553791728, "learning_rate": 9.203849890133914e-06, "loss": 0.8854, "step": 7216 }, { "epoch": 0.21, "grad_norm": 5.025037306082488, "learning_rate": 9.203598796689978e-06, "loss": 0.5412, "step": 7217 }, { "epoch": 0.21, "grad_norm": 6.577754973019054, "learning_rate": 9.203347667082999e-06, "loss": 0.5675, "step": 7218 }, { "epoch": 0.21, "grad_norm": 6.763082224660643, "learning_rate": 9.203096501315136e-06, "loss": 0.7397, "step": 7219 }, { "epoch": 0.21, "grad_norm": 8.12995090765401, "learning_rate": 9.202845299388555e-06, "loss": 0.4838, "step": 7220 }, { "epoch": 0.21, "grad_norm": 6.698649871072143, "learning_rate": 9.20259406130541e-06, "loss": 0.4047, "step": 7221 }, { "epoch": 0.21, "grad_norm": 4.83250026539083, "learning_rate": 9.202342787067866e-06, "loss": 0.4365, "step": 7222 }, { "epoch": 0.21, "grad_norm": 3.636311738770954, "learning_rate": 9.202091476678084e-06, "loss": 0.3135, "step": 7223 }, { "epoch": 0.21, "grad_norm": 4.54772217713739, "learning_rate": 9.201840130138228e-06, "loss": 0.5853, "step": 7224 }, { "epoch": 0.21, "grad_norm": 5.781112339339716, "learning_rate": 9.201588747450459e-06, "loss": 0.663, "step": 7225 }, { "epoch": 0.21, "grad_norm": 6.606558558134157, "learning_rate": 9.201337328616937e-06, "loss": 0.8296, "step": 7226 }, { "epoch": 0.21, "grad_norm": 9.319712105508046, "learning_rate": 9.201085873639829e-06, "loss": 0.9307, "step": 7227 }, { "epoch": 0.21, "grad_norm": 5.460944747374259, "learning_rate": 9.200834382521297e-06, "loss": 0.4247, "step": 7228 }, { "epoch": 0.21, "grad_norm": 4.097013468551936, "learning_rate": 9.200582855263502e-06, "loss": 0.2297, "step": 7229 }, { "epoch": 0.21, "grad_norm": 5.541160713369681, "learning_rate": 9.200331291868611e-06, "loss": 0.5932, "step": 7230 }, { "epoch": 0.21, "grad_norm": 3.051653435715043, "learning_rate": 9.200079692338788e-06, "loss": 0.1842, "step": 7231 }, { "epoch": 0.21, "grad_norm": 8.636499009600028, "learning_rate": 9.199828056676195e-06, "loss": 0.6316, "step": 7232 }, { "epoch": 0.21, "grad_norm": 9.160149118384085, "learning_rate": 9.199576384883e-06, "loss": 0.9088, "step": 7233 }, { "epoch": 0.21, "grad_norm": 6.014084497353574, "learning_rate": 9.199324676961365e-06, "loss": 0.3285, "step": 7234 }, { "epoch": 0.21, "grad_norm": 8.595324729213118, "learning_rate": 9.199072932913458e-06, "loss": 0.369, "step": 7235 }, { "epoch": 0.21, "grad_norm": 7.9100078050789495, "learning_rate": 9.198821152741441e-06, "loss": 0.8258, "step": 7236 }, { "epoch": 0.21, "grad_norm": 9.76572167920894, "learning_rate": 9.198569336447484e-06, "loss": 0.6443, "step": 7237 }, { "epoch": 0.21, "grad_norm": 12.847599649928057, "learning_rate": 9.198317484033753e-06, "loss": 1.2748, "step": 7238 }, { "epoch": 0.21, "grad_norm": 9.99999752044647, "learning_rate": 9.198065595502414e-06, "loss": 0.9619, "step": 7239 }, { "epoch": 0.21, "grad_norm": 5.608551942590058, "learning_rate": 9.197813670855631e-06, "loss": 0.7047, "step": 7240 }, { "epoch": 0.21, "grad_norm": 4.303371332081086, "learning_rate": 9.197561710095577e-06, "loss": 0.2481, "step": 7241 }, { "epoch": 0.21, "grad_norm": 12.854606216969355, "learning_rate": 9.197309713224414e-06, "loss": 0.6461, "step": 7242 }, { "epoch": 0.21, "grad_norm": 2.429786263080764, "learning_rate": 9.197057680244312e-06, "loss": 0.2235, "step": 7243 }, { "epoch": 0.21, "grad_norm": 10.455049528627644, "learning_rate": 9.196805611157441e-06, "loss": 0.6188, "step": 7244 }, { "epoch": 0.21, "grad_norm": 3.003242538406679, "learning_rate": 9.196553505965969e-06, "loss": 0.6558, "step": 7245 }, { "epoch": 0.21, "grad_norm": 6.6140678104746105, "learning_rate": 9.196301364672063e-06, "loss": 0.7511, "step": 7246 }, { "epoch": 0.21, "grad_norm": 5.873149986364139, "learning_rate": 9.19604918727789e-06, "loss": 0.3785, "step": 7247 }, { "epoch": 0.21, "grad_norm": 6.605425870046731, "learning_rate": 9.195796973785626e-06, "loss": 0.6205, "step": 7248 }, { "epoch": 0.21, "grad_norm": 4.2788347267189515, "learning_rate": 9.195544724197436e-06, "loss": 0.2162, "step": 7249 }, { "epoch": 0.21, "grad_norm": 5.670491052351246, "learning_rate": 9.195292438515492e-06, "loss": 0.3493, "step": 7250 }, { "epoch": 0.21, "grad_norm": 6.64628521410395, "learning_rate": 9.195040116741962e-06, "loss": 0.528, "step": 7251 }, { "epoch": 0.21, "grad_norm": 3.840267200735165, "learning_rate": 9.19478775887902e-06, "loss": 0.4194, "step": 7252 }, { "epoch": 0.21, "grad_norm": 10.507304330329504, "learning_rate": 9.194535364928837e-06, "loss": 0.6429, "step": 7253 }, { "epoch": 0.21, "grad_norm": 11.31460863556711, "learning_rate": 9.19428293489358e-06, "loss": 1.2895, "step": 7254 }, { "epoch": 0.21, "grad_norm": 6.404530810905427, "learning_rate": 9.194030468775423e-06, "loss": 0.4226, "step": 7255 }, { "epoch": 0.21, "grad_norm": 6.058825687024632, "learning_rate": 9.19377796657654e-06, "loss": 0.6269, "step": 7256 }, { "epoch": 0.21, "grad_norm": 8.184215789679609, "learning_rate": 9.1935254282991e-06, "loss": 0.5369, "step": 7257 }, { "epoch": 0.21, "grad_norm": 7.39249988348209, "learning_rate": 9.193272853945279e-06, "loss": 0.521, "step": 7258 }, { "epoch": 0.21, "grad_norm": 6.3268480496019555, "learning_rate": 9.193020243517246e-06, "loss": 0.6076, "step": 7259 }, { "epoch": 0.21, "grad_norm": 4.139007367536979, "learning_rate": 9.192767597017177e-06, "loss": 0.4304, "step": 7260 }, { "epoch": 0.21, "grad_norm": 13.474464001018596, "learning_rate": 9.192514914447243e-06, "loss": 0.7456, "step": 7261 }, { "epoch": 0.21, "grad_norm": 7.985435221353842, "learning_rate": 9.192262195809622e-06, "loss": 0.6812, "step": 7262 }, { "epoch": 0.21, "grad_norm": 5.642557167233491, "learning_rate": 9.192009441106484e-06, "loss": 0.5089, "step": 7263 }, { "epoch": 0.21, "grad_norm": 7.206830509357118, "learning_rate": 9.191756650340007e-06, "loss": 0.8764, "step": 7264 }, { "epoch": 0.21, "grad_norm": 5.105440123637587, "learning_rate": 9.19150382351236e-06, "loss": 0.7369, "step": 7265 }, { "epoch": 0.21, "grad_norm": 7.902200196904994, "learning_rate": 9.191250960625723e-06, "loss": 0.3798, "step": 7266 }, { "epoch": 0.21, "grad_norm": 5.1278164498200445, "learning_rate": 9.190998061682273e-06, "loss": 0.4156, "step": 7267 }, { "epoch": 0.21, "grad_norm": 4.47116042531857, "learning_rate": 9.19074512668418e-06, "loss": 0.3435, "step": 7268 }, { "epoch": 0.21, "grad_norm": 4.7847569637701355, "learning_rate": 9.190492155633623e-06, "loss": 0.5087, "step": 7269 }, { "epoch": 0.21, "grad_norm": 8.481842787174568, "learning_rate": 9.190239148532778e-06, "loss": 0.4842, "step": 7270 }, { "epoch": 0.21, "grad_norm": 7.010945141972831, "learning_rate": 9.189986105383821e-06, "loss": 0.7036, "step": 7271 }, { "epoch": 0.21, "grad_norm": 3.5562201748420232, "learning_rate": 9.18973302618893e-06, "loss": 0.5861, "step": 7272 }, { "epoch": 0.21, "grad_norm": 4.212800273890257, "learning_rate": 9.18947991095028e-06, "loss": 0.4623, "step": 7273 }, { "epoch": 0.21, "grad_norm": 5.862204069627317, "learning_rate": 9.189226759670052e-06, "loss": 0.5414, "step": 7274 }, { "epoch": 0.21, "grad_norm": 6.763786048377154, "learning_rate": 9.18897357235042e-06, "loss": 0.4497, "step": 7275 }, { "epoch": 0.21, "grad_norm": 7.551480169654526, "learning_rate": 9.188720348993567e-06, "loss": 0.4947, "step": 7276 }, { "epoch": 0.21, "grad_norm": 6.356449550920868, "learning_rate": 9.188467089601668e-06, "loss": 0.4807, "step": 7277 }, { "epoch": 0.21, "grad_norm": 8.589216666514819, "learning_rate": 9.1882137941769e-06, "loss": 0.7146, "step": 7278 }, { "epoch": 0.21, "grad_norm": 7.800984112084137, "learning_rate": 9.187960462721447e-06, "loss": 0.6539, "step": 7279 }, { "epoch": 0.21, "grad_norm": 5.964406135459427, "learning_rate": 9.187707095237485e-06, "loss": 0.5142, "step": 7280 }, { "epoch": 0.21, "grad_norm": 3.209348206375736, "learning_rate": 9.187453691727194e-06, "loss": 0.3683, "step": 7281 }, { "epoch": 0.21, "grad_norm": 5.860796092317483, "learning_rate": 9.187200252192755e-06, "loss": 0.5941, "step": 7282 }, { "epoch": 0.21, "grad_norm": 4.373357164654309, "learning_rate": 9.186946776636347e-06, "loss": 0.774, "step": 7283 }, { "epoch": 0.21, "grad_norm": 6.498221447511311, "learning_rate": 9.186693265060154e-06, "loss": 0.63, "step": 7284 }, { "epoch": 0.21, "grad_norm": 6.567417954134605, "learning_rate": 9.186439717466352e-06, "loss": 0.7619, "step": 7285 }, { "epoch": 0.21, "grad_norm": 4.682338110410898, "learning_rate": 9.186186133857123e-06, "loss": 0.617, "step": 7286 }, { "epoch": 0.21, "grad_norm": 2.545447680944779, "learning_rate": 9.185932514234651e-06, "loss": 0.4002, "step": 7287 }, { "epoch": 0.21, "grad_norm": 6.433399227608656, "learning_rate": 9.185678858601119e-06, "loss": 0.7156, "step": 7288 }, { "epoch": 0.21, "grad_norm": 7.5925898038485125, "learning_rate": 9.185425166958705e-06, "loss": 0.5233, "step": 7289 }, { "epoch": 0.21, "grad_norm": 8.021194753719648, "learning_rate": 9.185171439309596e-06, "loss": 0.4688, "step": 7290 }, { "epoch": 0.21, "grad_norm": 3.546000301376132, "learning_rate": 9.184917675655972e-06, "loss": 0.5115, "step": 7291 }, { "epoch": 0.21, "grad_norm": 3.5688408754873846, "learning_rate": 9.184663876000015e-06, "loss": 0.4477, "step": 7292 }, { "epoch": 0.21, "grad_norm": 9.970266388268035, "learning_rate": 9.18441004034391e-06, "loss": 0.2846, "step": 7293 }, { "epoch": 0.21, "grad_norm": 6.443325480880465, "learning_rate": 9.184156168689841e-06, "loss": 0.5147, "step": 7294 }, { "epoch": 0.21, "grad_norm": 8.30611421059324, "learning_rate": 9.183902261039992e-06, "loss": 0.428, "step": 7295 }, { "epoch": 0.21, "grad_norm": 9.973260606442782, "learning_rate": 9.183648317396545e-06, "loss": 0.4329, "step": 7296 }, { "epoch": 0.21, "grad_norm": 6.931992827942798, "learning_rate": 9.183394337761689e-06, "loss": 0.3127, "step": 7297 }, { "epoch": 0.21, "grad_norm": 6.497701678725554, "learning_rate": 9.183140322137605e-06, "loss": 0.3593, "step": 7298 }, { "epoch": 0.21, "grad_norm": 4.827628668794492, "learning_rate": 9.18288627052648e-06, "loss": 0.3803, "step": 7299 }, { "epoch": 0.21, "grad_norm": 8.257764457050406, "learning_rate": 9.182632182930502e-06, "loss": 0.6023, "step": 7300 }, { "epoch": 0.21, "grad_norm": 5.849108355787167, "learning_rate": 9.182378059351851e-06, "loss": 0.4346, "step": 7301 }, { "epoch": 0.21, "grad_norm": 5.816043470967955, "learning_rate": 9.182123899792718e-06, "loss": 0.8597, "step": 7302 }, { "epoch": 0.21, "grad_norm": 7.146173704473042, "learning_rate": 9.181869704255288e-06, "loss": 0.4802, "step": 7303 }, { "epoch": 0.21, "grad_norm": 3.76742836598702, "learning_rate": 9.181615472741749e-06, "loss": 0.398, "step": 7304 }, { "epoch": 0.21, "grad_norm": 3.709968842018264, "learning_rate": 9.181361205254284e-06, "loss": 0.2769, "step": 7305 }, { "epoch": 0.21, "grad_norm": 6.210572545107513, "learning_rate": 9.181106901795087e-06, "loss": 0.4638, "step": 7306 }, { "epoch": 0.21, "grad_norm": 3.1955578623537706, "learning_rate": 9.180852562366338e-06, "loss": 0.6727, "step": 7307 }, { "epoch": 0.21, "grad_norm": 3.36152367297349, "learning_rate": 9.180598186970234e-06, "loss": 0.4491, "step": 7308 }, { "epoch": 0.21, "grad_norm": 14.312669511482541, "learning_rate": 9.180343775608956e-06, "loss": 0.9453, "step": 7309 }, { "epoch": 0.21, "grad_norm": 8.811368179686855, "learning_rate": 9.180089328284694e-06, "loss": 0.5796, "step": 7310 }, { "epoch": 0.21, "grad_norm": 3.5807741511438045, "learning_rate": 9.179834844999639e-06, "loss": 0.4379, "step": 7311 }, { "epoch": 0.21, "grad_norm": 4.275130778537569, "learning_rate": 9.17958032575598e-06, "loss": 0.4721, "step": 7312 }, { "epoch": 0.21, "grad_norm": 5.590907349554497, "learning_rate": 9.179325770555906e-06, "loss": 0.7214, "step": 7313 }, { "epoch": 0.21, "grad_norm": 6.85050296746798, "learning_rate": 9.179071179401606e-06, "loss": 0.9032, "step": 7314 }, { "epoch": 0.21, "grad_norm": 3.9709862994138043, "learning_rate": 9.178816552295271e-06, "loss": 0.156, "step": 7315 }, { "epoch": 0.21, "grad_norm": 8.433334113360987, "learning_rate": 9.178561889239093e-06, "loss": 0.4208, "step": 7316 }, { "epoch": 0.21, "grad_norm": 7.340892998945882, "learning_rate": 9.178307190235262e-06, "loss": 0.3445, "step": 7317 }, { "epoch": 0.21, "grad_norm": 7.041908899538665, "learning_rate": 9.178052455285969e-06, "loss": 0.5486, "step": 7318 }, { "epoch": 0.21, "grad_norm": 6.687385201582947, "learning_rate": 9.177797684393403e-06, "loss": 0.3021, "step": 7319 }, { "epoch": 0.21, "grad_norm": 5.881710359593575, "learning_rate": 9.177542877559759e-06, "loss": 0.8061, "step": 7320 }, { "epoch": 0.21, "grad_norm": 2.7759475310338813, "learning_rate": 9.177288034787228e-06, "loss": 0.3867, "step": 7321 }, { "epoch": 0.21, "grad_norm": 6.759759029461262, "learning_rate": 9.177033156078002e-06, "loss": 0.5429, "step": 7322 }, { "epoch": 0.21, "grad_norm": 11.45013097163426, "learning_rate": 9.176778241434275e-06, "loss": 1.0841, "step": 7323 }, { "epoch": 0.21, "grad_norm": 6.133462936994672, "learning_rate": 9.176523290858236e-06, "loss": 0.4191, "step": 7324 }, { "epoch": 0.21, "grad_norm": 6.0146789995950884, "learning_rate": 9.176268304352085e-06, "loss": 0.84, "step": 7325 }, { "epoch": 0.21, "grad_norm": 3.7939213068336417, "learning_rate": 9.17601328191801e-06, "loss": 0.3602, "step": 7326 }, { "epoch": 0.21, "grad_norm": 6.269568935768259, "learning_rate": 9.175758223558208e-06, "loss": 0.3981, "step": 7327 }, { "epoch": 0.21, "grad_norm": 6.927041358151333, "learning_rate": 9.175503129274872e-06, "loss": 0.6383, "step": 7328 }, { "epoch": 0.21, "grad_norm": 4.565840713031781, "learning_rate": 9.175247999070196e-06, "loss": 0.3118, "step": 7329 }, { "epoch": 0.21, "grad_norm": 5.463056619772864, "learning_rate": 9.174992832946377e-06, "loss": 0.3032, "step": 7330 }, { "epoch": 0.21, "grad_norm": 4.3498647860635655, "learning_rate": 9.174737630905606e-06, "loss": 0.142, "step": 7331 }, { "epoch": 0.21, "grad_norm": 3.9743026277611797, "learning_rate": 9.174482392950084e-06, "loss": 0.2974, "step": 7332 }, { "epoch": 0.21, "grad_norm": 8.066556752719752, "learning_rate": 9.174227119082002e-06, "loss": 0.6767, "step": 7333 }, { "epoch": 0.21, "grad_norm": 6.091896518627393, "learning_rate": 9.173971809303558e-06, "loss": 0.6026, "step": 7334 }, { "epoch": 0.21, "grad_norm": 9.31114263691063, "learning_rate": 9.17371646361695e-06, "loss": 0.9154, "step": 7335 }, { "epoch": 0.21, "grad_norm": 7.23358835512037, "learning_rate": 9.173461082024373e-06, "loss": 0.785, "step": 7336 }, { "epoch": 0.21, "grad_norm": 7.98103325294839, "learning_rate": 9.173205664528022e-06, "loss": 1.0203, "step": 7337 }, { "epoch": 0.21, "grad_norm": 7.913652776597839, "learning_rate": 9.1729502111301e-06, "loss": 0.6196, "step": 7338 }, { "epoch": 0.21, "grad_norm": 5.071045597725225, "learning_rate": 9.172694721832797e-06, "loss": 0.3147, "step": 7339 }, { "epoch": 0.21, "grad_norm": 9.05007219233092, "learning_rate": 9.172439196638318e-06, "loss": 0.5242, "step": 7340 }, { "epoch": 0.21, "grad_norm": 9.25644109040598, "learning_rate": 9.172183635548858e-06, "loss": 0.5982, "step": 7341 }, { "epoch": 0.21, "grad_norm": 8.390319477677831, "learning_rate": 9.171928038566616e-06, "loss": 0.5324, "step": 7342 }, { "epoch": 0.21, "grad_norm": 4.0408867971720275, "learning_rate": 9.17167240569379e-06, "loss": 0.465, "step": 7343 }, { "epoch": 0.21, "grad_norm": 10.86876837546236, "learning_rate": 9.17141673693258e-06, "loss": 0.7443, "step": 7344 }, { "epoch": 0.21, "grad_norm": 6.394255896100283, "learning_rate": 9.171161032285185e-06, "loss": 0.8017, "step": 7345 }, { "epoch": 0.21, "grad_norm": 4.148420315866745, "learning_rate": 9.170905291753806e-06, "loss": 0.396, "step": 7346 }, { "epoch": 0.21, "grad_norm": 6.79815026958511, "learning_rate": 9.170649515340641e-06, "loss": 0.619, "step": 7347 }, { "epoch": 0.21, "grad_norm": 5.7340817077637665, "learning_rate": 9.170393703047892e-06, "loss": 0.6775, "step": 7348 }, { "epoch": 0.21, "grad_norm": 4.151114941392593, "learning_rate": 9.170137854877762e-06, "loss": 0.2785, "step": 7349 }, { "epoch": 0.21, "grad_norm": 4.168983629076962, "learning_rate": 9.169881970832447e-06, "loss": 0.2755, "step": 7350 }, { "epoch": 0.21, "grad_norm": 6.629417889732225, "learning_rate": 9.169626050914152e-06, "loss": 0.3309, "step": 7351 }, { "epoch": 0.21, "grad_norm": 4.880330179566807, "learning_rate": 9.169370095125077e-06, "loss": 0.6254, "step": 7352 }, { "epoch": 0.21, "grad_norm": 7.106539049535522, "learning_rate": 9.169114103467424e-06, "loss": 0.6405, "step": 7353 }, { "epoch": 0.21, "grad_norm": 9.441474380847087, "learning_rate": 9.168858075943397e-06, "loss": 0.548, "step": 7354 }, { "epoch": 0.21, "grad_norm": 4.186748835816281, "learning_rate": 9.168602012555194e-06, "loss": 0.1805, "step": 7355 }, { "epoch": 0.21, "grad_norm": 7.4298787538639655, "learning_rate": 9.168345913305025e-06, "loss": 0.8286, "step": 7356 }, { "epoch": 0.21, "grad_norm": 2.365086463442499, "learning_rate": 9.168089778195087e-06, "loss": 0.2623, "step": 7357 }, { "epoch": 0.21, "grad_norm": 4.414140494678743, "learning_rate": 9.167833607227587e-06, "loss": 0.3643, "step": 7358 }, { "epoch": 0.21, "grad_norm": 5.837088189312859, "learning_rate": 9.167577400404726e-06, "loss": 0.5383, "step": 7359 }, { "epoch": 0.21, "grad_norm": 6.008895359745122, "learning_rate": 9.167321157728712e-06, "loss": 0.5192, "step": 7360 }, { "epoch": 0.21, "grad_norm": 3.987158876135888, "learning_rate": 9.167064879201746e-06, "loss": 0.3672, "step": 7361 }, { "epoch": 0.21, "grad_norm": 4.23639276155342, "learning_rate": 9.166808564826034e-06, "loss": 0.5097, "step": 7362 }, { "epoch": 0.21, "grad_norm": 8.588963288706958, "learning_rate": 9.166552214603781e-06, "loss": 0.6368, "step": 7363 }, { "epoch": 0.21, "grad_norm": 4.725856053546571, "learning_rate": 9.166295828537192e-06, "loss": 0.2888, "step": 7364 }, { "epoch": 0.21, "grad_norm": 2.7869511153714583, "learning_rate": 9.166039406628473e-06, "loss": 0.1999, "step": 7365 }, { "epoch": 0.21, "grad_norm": 2.758657698752993, "learning_rate": 9.165782948879832e-06, "loss": 0.2619, "step": 7366 }, { "epoch": 0.21, "grad_norm": 3.631543107932541, "learning_rate": 9.16552645529347e-06, "loss": 0.4368, "step": 7367 }, { "epoch": 0.21, "grad_norm": 4.986942622463368, "learning_rate": 9.165269925871598e-06, "loss": 0.3659, "step": 7368 }, { "epoch": 0.21, "grad_norm": 6.975012663689606, "learning_rate": 9.165013360616422e-06, "loss": 0.6681, "step": 7369 }, { "epoch": 0.21, "grad_norm": 4.772251212747841, "learning_rate": 9.16475675953015e-06, "loss": 0.4773, "step": 7370 }, { "epoch": 0.21, "grad_norm": 7.6573501107954645, "learning_rate": 9.164500122614985e-06, "loss": 0.8625, "step": 7371 }, { "epoch": 0.21, "grad_norm": 7.521980120721102, "learning_rate": 9.164243449873141e-06, "loss": 0.3622, "step": 7372 }, { "epoch": 0.21, "grad_norm": 4.930935950970118, "learning_rate": 9.163986741306822e-06, "loss": 0.6358, "step": 7373 }, { "epoch": 0.21, "grad_norm": 6.205395760960004, "learning_rate": 9.163729996918239e-06, "loss": 0.4255, "step": 7374 }, { "epoch": 0.21, "grad_norm": 5.746089185846462, "learning_rate": 9.163473216709597e-06, "loss": 0.7148, "step": 7375 }, { "epoch": 0.21, "grad_norm": 4.1314146854511735, "learning_rate": 9.163216400683111e-06, "loss": 0.2354, "step": 7376 }, { "epoch": 0.21, "grad_norm": 4.705664877214007, "learning_rate": 9.162959548840985e-06, "loss": 0.3498, "step": 7377 }, { "epoch": 0.21, "grad_norm": 6.887283864584941, "learning_rate": 9.16270266118543e-06, "loss": 0.3838, "step": 7378 }, { "epoch": 0.21, "grad_norm": 4.301744787952048, "learning_rate": 9.162445737718657e-06, "loss": 0.5332, "step": 7379 }, { "epoch": 0.21, "grad_norm": 5.912483518549265, "learning_rate": 9.162188778442876e-06, "loss": 0.7605, "step": 7380 }, { "epoch": 0.21, "grad_norm": 6.678532907740605, "learning_rate": 9.161931783360299e-06, "loss": 0.5436, "step": 7381 }, { "epoch": 0.21, "grad_norm": 5.1997535711364495, "learning_rate": 9.161674752473133e-06, "loss": 0.498, "step": 7382 }, { "epoch": 0.21, "grad_norm": 6.402129210996577, "learning_rate": 9.161417685783592e-06, "loss": 0.3958, "step": 7383 }, { "epoch": 0.21, "grad_norm": 10.227819495179633, "learning_rate": 9.161160583293888e-06, "loss": 0.53, "step": 7384 }, { "epoch": 0.21, "grad_norm": 6.948633865179232, "learning_rate": 9.16090344500623e-06, "loss": 0.3364, "step": 7385 }, { "epoch": 0.21, "grad_norm": 8.408204486062608, "learning_rate": 9.160646270922833e-06, "loss": 0.3956, "step": 7386 }, { "epoch": 0.21, "grad_norm": 8.922246120236661, "learning_rate": 9.16038906104591e-06, "loss": 0.5054, "step": 7387 }, { "epoch": 0.21, "grad_norm": 4.535578587307585, "learning_rate": 9.160131815377671e-06, "loss": 0.636, "step": 7388 }, { "epoch": 0.21, "grad_norm": 8.728705133756826, "learning_rate": 9.15987453392033e-06, "loss": 0.5135, "step": 7389 }, { "epoch": 0.21, "grad_norm": 8.144530693804313, "learning_rate": 9.1596172166761e-06, "loss": 0.7305, "step": 7390 }, { "epoch": 0.21, "grad_norm": 4.780872604335606, "learning_rate": 9.159359863647196e-06, "loss": 0.2249, "step": 7391 }, { "epoch": 0.21, "grad_norm": 10.985934845391405, "learning_rate": 9.15910247483583e-06, "loss": 0.6447, "step": 7392 }, { "epoch": 0.21, "grad_norm": 3.451635255128827, "learning_rate": 9.158845050244218e-06, "loss": 0.2259, "step": 7393 }, { "epoch": 0.21, "grad_norm": 9.28155034154067, "learning_rate": 9.158587589874575e-06, "loss": 0.737, "step": 7394 }, { "epoch": 0.21, "grad_norm": 12.453375570918576, "learning_rate": 9.158330093729114e-06, "loss": 0.7627, "step": 7395 }, { "epoch": 0.21, "grad_norm": 3.5418284809321703, "learning_rate": 9.158072561810052e-06, "loss": 0.2666, "step": 7396 }, { "epoch": 0.21, "grad_norm": 5.849349455241158, "learning_rate": 9.157814994119603e-06, "loss": 0.691, "step": 7397 }, { "epoch": 0.21, "grad_norm": 2.5929765122479336, "learning_rate": 9.157557390659984e-06, "loss": 0.3116, "step": 7398 }, { "epoch": 0.21, "grad_norm": 4.783733745035984, "learning_rate": 9.15729975143341e-06, "loss": 0.553, "step": 7399 }, { "epoch": 0.21, "grad_norm": 4.736548651840927, "learning_rate": 9.1570420764421e-06, "loss": 0.3239, "step": 7400 }, { "epoch": 0.21, "grad_norm": 5.547291570790407, "learning_rate": 9.156784365688267e-06, "loss": 0.1389, "step": 7401 }, { "epoch": 0.21, "grad_norm": 5.628388803961969, "learning_rate": 9.15652661917413e-06, "loss": 0.6896, "step": 7402 }, { "epoch": 0.21, "grad_norm": 10.685188712781764, "learning_rate": 9.156268836901906e-06, "loss": 0.4748, "step": 7403 }, { "epoch": 0.21, "grad_norm": 3.6456333387107187, "learning_rate": 9.156011018873814e-06, "loss": 0.5403, "step": 7404 }, { "epoch": 0.21, "grad_norm": 6.588282157602047, "learning_rate": 9.15575316509207e-06, "loss": 0.3827, "step": 7405 }, { "epoch": 0.21, "grad_norm": 5.5013184701047235, "learning_rate": 9.155495275558895e-06, "loss": 0.8853, "step": 7406 }, { "epoch": 0.21, "grad_norm": 8.31792049499657, "learning_rate": 9.155237350276504e-06, "loss": 0.7101, "step": 7407 }, { "epoch": 0.21, "grad_norm": 6.202071669771219, "learning_rate": 9.154979389247119e-06, "loss": 0.3368, "step": 7408 }, { "epoch": 0.21, "grad_norm": 7.7753969109592935, "learning_rate": 9.154721392472956e-06, "loss": 0.6682, "step": 7409 }, { "epoch": 0.21, "grad_norm": 5.764393186403001, "learning_rate": 9.154463359956237e-06, "loss": 0.3548, "step": 7410 }, { "epoch": 0.21, "grad_norm": 4.299720099676505, "learning_rate": 9.154205291699182e-06, "loss": 0.3953, "step": 7411 }, { "epoch": 0.21, "grad_norm": 5.291400702493447, "learning_rate": 9.15394718770401e-06, "loss": 0.5968, "step": 7412 }, { "epoch": 0.21, "grad_norm": 3.6477850222330863, "learning_rate": 9.153689047972941e-06, "loss": 0.1806, "step": 7413 }, { "epoch": 0.21, "grad_norm": 8.089908586699982, "learning_rate": 9.153430872508198e-06, "loss": 1.2015, "step": 7414 }, { "epoch": 0.21, "grad_norm": 5.421833225056329, "learning_rate": 9.153172661312e-06, "loss": 0.6414, "step": 7415 }, { "epoch": 0.21, "grad_norm": 9.63869740213185, "learning_rate": 9.152914414386568e-06, "loss": 0.4034, "step": 7416 }, { "epoch": 0.21, "grad_norm": 9.41377155281733, "learning_rate": 9.152656131734126e-06, "loss": 0.845, "step": 7417 }, { "epoch": 0.21, "grad_norm": 7.577652938140102, "learning_rate": 9.152397813356895e-06, "loss": 0.2968, "step": 7418 }, { "epoch": 0.21, "grad_norm": 5.204743211001991, "learning_rate": 9.152139459257095e-06, "loss": 0.194, "step": 7419 }, { "epoch": 0.21, "grad_norm": 6.13593573282448, "learning_rate": 9.151881069436952e-06, "loss": 0.7076, "step": 7420 }, { "epoch": 0.21, "grad_norm": 6.417847421512751, "learning_rate": 9.151622643898686e-06, "loss": 0.7649, "step": 7421 }, { "epoch": 0.21, "grad_norm": 7.728043494313422, "learning_rate": 9.151364182644522e-06, "loss": 0.2045, "step": 7422 }, { "epoch": 0.21, "grad_norm": 5.8452685010080545, "learning_rate": 9.151105685676684e-06, "loss": 0.6531, "step": 7423 }, { "epoch": 0.21, "grad_norm": 3.6643618073047843, "learning_rate": 9.150847152997394e-06, "loss": 0.3135, "step": 7424 }, { "epoch": 0.21, "grad_norm": 6.405588441510501, "learning_rate": 9.150588584608876e-06, "loss": 0.3859, "step": 7425 }, { "epoch": 0.21, "grad_norm": 7.201814213643286, "learning_rate": 9.150329980513356e-06, "loss": 0.6876, "step": 7426 }, { "epoch": 0.21, "grad_norm": 6.060894389735547, "learning_rate": 9.15007134071306e-06, "loss": 0.195, "step": 7427 }, { "epoch": 0.21, "grad_norm": 7.2277673984967485, "learning_rate": 9.149812665210211e-06, "loss": 0.5437, "step": 7428 }, { "epoch": 0.21, "grad_norm": 5.031872918087433, "learning_rate": 9.149553954007033e-06, "loss": 0.5879, "step": 7429 }, { "epoch": 0.21, "grad_norm": 6.349234779687837, "learning_rate": 9.149295207105754e-06, "loss": 0.5412, "step": 7430 }, { "epoch": 0.21, "grad_norm": 2.8804182329085766, "learning_rate": 9.1490364245086e-06, "loss": 0.2291, "step": 7431 }, { "epoch": 0.21, "grad_norm": 8.974077403643342, "learning_rate": 9.148777606217796e-06, "loss": 0.6439, "step": 7432 }, { "epoch": 0.21, "grad_norm": 5.348553645627824, "learning_rate": 9.148518752235567e-06, "loss": 0.5341, "step": 7433 }, { "epoch": 0.21, "grad_norm": 5.677532264621154, "learning_rate": 9.148259862564145e-06, "loss": 0.4189, "step": 7434 }, { "epoch": 0.21, "grad_norm": 3.7843099460948206, "learning_rate": 9.148000937205752e-06, "loss": 0.2788, "step": 7435 }, { "epoch": 0.21, "grad_norm": 6.3063913002478715, "learning_rate": 9.147741976162618e-06, "loss": 0.3402, "step": 7436 }, { "epoch": 0.21, "grad_norm": 4.0774618610818, "learning_rate": 9.147482979436971e-06, "loss": 0.4568, "step": 7437 }, { "epoch": 0.21, "grad_norm": 4.2466219211199245, "learning_rate": 9.147223947031039e-06, "loss": 0.4439, "step": 7438 }, { "epoch": 0.21, "grad_norm": 5.371150922991164, "learning_rate": 9.146964878947049e-06, "loss": 0.7189, "step": 7439 }, { "epoch": 0.21, "grad_norm": 6.736030817591106, "learning_rate": 9.146705775187232e-06, "loss": 0.2409, "step": 7440 }, { "epoch": 0.21, "grad_norm": 10.4697004655403, "learning_rate": 9.146446635753815e-06, "loss": 0.3053, "step": 7441 }, { "epoch": 0.21, "grad_norm": 2.983978165211704, "learning_rate": 9.146187460649028e-06, "loss": 0.1556, "step": 7442 }, { "epoch": 0.21, "grad_norm": 6.494102847340778, "learning_rate": 9.1459282498751e-06, "loss": 0.4052, "step": 7443 }, { "epoch": 0.21, "grad_norm": 9.419700055216675, "learning_rate": 9.145669003434263e-06, "loss": 0.8036, "step": 7444 }, { "epoch": 0.21, "grad_norm": 4.730148619601317, "learning_rate": 9.145409721328747e-06, "loss": 0.5962, "step": 7445 }, { "epoch": 0.21, "grad_norm": 8.640688187159475, "learning_rate": 9.145150403560779e-06, "loss": 0.3666, "step": 7446 }, { "epoch": 0.21, "grad_norm": 5.286017258256905, "learning_rate": 9.144891050132594e-06, "loss": 0.2983, "step": 7447 }, { "epoch": 0.21, "grad_norm": 2.8106245781692687, "learning_rate": 9.144631661046422e-06, "loss": 0.2364, "step": 7448 }, { "epoch": 0.21, "grad_norm": 8.219282473876657, "learning_rate": 9.144372236304491e-06, "loss": 1.1334, "step": 7449 }, { "epoch": 0.21, "grad_norm": 2.985949555297342, "learning_rate": 9.14411277590904e-06, "loss": 0.3248, "step": 7450 }, { "epoch": 0.21, "grad_norm": 6.526689424712131, "learning_rate": 9.143853279862293e-06, "loss": 0.4901, "step": 7451 }, { "epoch": 0.21, "grad_norm": 4.937068703078934, "learning_rate": 9.14359374816649e-06, "loss": 0.4828, "step": 7452 }, { "epoch": 0.21, "grad_norm": 5.683078640277071, "learning_rate": 9.143334180823859e-06, "loss": 0.4529, "step": 7453 }, { "epoch": 0.21, "grad_norm": 8.368382301828015, "learning_rate": 9.143074577836635e-06, "loss": 0.776, "step": 7454 }, { "epoch": 0.21, "grad_norm": 3.1349177338485372, "learning_rate": 9.142814939207049e-06, "loss": 0.5533, "step": 7455 }, { "epoch": 0.21, "grad_norm": 3.5156116061485307, "learning_rate": 9.142555264937337e-06, "loss": 0.2037, "step": 7456 }, { "epoch": 0.21, "grad_norm": 2.7724068674348934, "learning_rate": 9.142295555029731e-06, "loss": 0.2352, "step": 7457 }, { "epoch": 0.21, "grad_norm": 3.994164591299964, "learning_rate": 9.142035809486468e-06, "loss": 0.2758, "step": 7458 }, { "epoch": 0.21, "grad_norm": 7.267720959968491, "learning_rate": 9.14177602830978e-06, "loss": 0.6381, "step": 7459 }, { "epoch": 0.21, "grad_norm": 4.058183283972608, "learning_rate": 9.141516211501902e-06, "loss": 0.531, "step": 7460 }, { "epoch": 0.21, "grad_norm": 4.522095370772093, "learning_rate": 9.141256359065072e-06, "loss": 0.4854, "step": 7461 }, { "epoch": 0.21, "grad_norm": 4.09584492668511, "learning_rate": 9.140996471001522e-06, "loss": 0.6363, "step": 7462 }, { "epoch": 0.21, "grad_norm": 6.010842857478458, "learning_rate": 9.140736547313489e-06, "loss": 0.7991, "step": 7463 }, { "epoch": 0.21, "grad_norm": 4.726348872125423, "learning_rate": 9.14047658800321e-06, "loss": 0.5536, "step": 7464 }, { "epoch": 0.21, "grad_norm": 3.634870147432125, "learning_rate": 9.14021659307292e-06, "loss": 0.3316, "step": 7465 }, { "epoch": 0.21, "grad_norm": 9.057309788836966, "learning_rate": 9.139956562524857e-06, "loss": 0.5261, "step": 7466 }, { "epoch": 0.21, "grad_norm": 6.952641982542647, "learning_rate": 9.139696496361257e-06, "loss": 1.0051, "step": 7467 }, { "epoch": 0.21, "grad_norm": 5.975154416630736, "learning_rate": 9.139436394584358e-06, "loss": 0.6393, "step": 7468 }, { "epoch": 0.21, "grad_norm": 5.715712026994091, "learning_rate": 9.139176257196398e-06, "loss": 0.4108, "step": 7469 }, { "epoch": 0.21, "grad_norm": 8.116710894840795, "learning_rate": 9.138916084199614e-06, "loss": 0.5838, "step": 7470 }, { "epoch": 0.21, "grad_norm": 3.614336740105586, "learning_rate": 9.138655875596243e-06, "loss": 0.2696, "step": 7471 }, { "epoch": 0.21, "grad_norm": 8.979165545564538, "learning_rate": 9.138395631388527e-06, "loss": 0.7357, "step": 7472 }, { "epoch": 0.21, "grad_norm": 4.845329962203498, "learning_rate": 9.138135351578701e-06, "loss": 0.4621, "step": 7473 }, { "epoch": 0.21, "grad_norm": 5.799027364667257, "learning_rate": 9.137875036169008e-06, "loss": 0.4971, "step": 7474 }, { "epoch": 0.21, "grad_norm": 11.210967018008832, "learning_rate": 9.137614685161684e-06, "loss": 0.7403, "step": 7475 }, { "epoch": 0.21, "grad_norm": 3.7864502071973805, "learning_rate": 9.13735429855897e-06, "loss": 0.2122, "step": 7476 }, { "epoch": 0.21, "grad_norm": 9.059813212980805, "learning_rate": 9.13709387636311e-06, "loss": 0.494, "step": 7477 }, { "epoch": 0.21, "grad_norm": 4.184853842175871, "learning_rate": 9.136833418576338e-06, "loss": 0.1349, "step": 7478 }, { "epoch": 0.21, "grad_norm": 5.992459884082772, "learning_rate": 9.136572925200897e-06, "loss": 0.426, "step": 7479 }, { "epoch": 0.21, "grad_norm": 6.523006292470809, "learning_rate": 9.136312396239028e-06, "loss": 0.4436, "step": 7480 }, { "epoch": 0.21, "grad_norm": 6.824513333474375, "learning_rate": 9.136051831692975e-06, "loss": 0.9156, "step": 7481 }, { "epoch": 0.21, "grad_norm": 6.281487911140768, "learning_rate": 9.135791231564976e-06, "loss": 0.4077, "step": 7482 }, { "epoch": 0.21, "grad_norm": 5.355505166183857, "learning_rate": 9.135530595857275e-06, "loss": 0.6579, "step": 7483 }, { "epoch": 0.21, "grad_norm": 10.526313145285828, "learning_rate": 9.135269924572112e-06, "loss": 0.6843, "step": 7484 }, { "epoch": 0.21, "grad_norm": 9.999191632976553, "learning_rate": 9.135009217711733e-06, "loss": 0.694, "step": 7485 }, { "epoch": 0.21, "grad_norm": 7.132041663877748, "learning_rate": 9.134748475278376e-06, "loss": 0.4567, "step": 7486 }, { "epoch": 0.21, "grad_norm": 7.244513178358243, "learning_rate": 9.13448769727429e-06, "loss": 0.3433, "step": 7487 }, { "epoch": 0.21, "grad_norm": 7.963020089738878, "learning_rate": 9.134226883701714e-06, "loss": 0.3167, "step": 7488 }, { "epoch": 0.21, "grad_norm": 1.8541431229057423, "learning_rate": 9.133966034562893e-06, "loss": 0.2768, "step": 7489 }, { "epoch": 0.21, "grad_norm": 4.152554525514605, "learning_rate": 9.13370514986007e-06, "loss": 0.2673, "step": 7490 }, { "epoch": 0.21, "grad_norm": 2.745451112655478, "learning_rate": 9.133444229595494e-06, "loss": 0.2747, "step": 7491 }, { "epoch": 0.21, "grad_norm": 8.339415492334231, "learning_rate": 9.133183273771404e-06, "loss": 0.594, "step": 7492 }, { "epoch": 0.21, "grad_norm": 10.533239340865794, "learning_rate": 9.132922282390046e-06, "loss": 0.9447, "step": 7493 }, { "epoch": 0.21, "grad_norm": 4.349899878391038, "learning_rate": 9.132661255453668e-06, "loss": 0.4046, "step": 7494 }, { "epoch": 0.21, "grad_norm": 5.598864556052111, "learning_rate": 9.132400192964515e-06, "loss": 0.3718, "step": 7495 }, { "epoch": 0.21, "grad_norm": 6.527093175967292, "learning_rate": 9.13213909492483e-06, "loss": 0.6141, "step": 7496 }, { "epoch": 0.21, "grad_norm": 5.51413112218265, "learning_rate": 9.13187796133686e-06, "loss": 0.6986, "step": 7497 }, { "epoch": 0.21, "grad_norm": 9.141847655858593, "learning_rate": 9.131616792202856e-06, "loss": 0.8011, "step": 7498 }, { "epoch": 0.21, "grad_norm": 9.006471532443861, "learning_rate": 9.13135558752506e-06, "loss": 0.7619, "step": 7499 }, { "epoch": 0.21, "grad_norm": 6.9465803913584665, "learning_rate": 9.131094347305721e-06, "loss": 0.553, "step": 7500 }, { "epoch": 0.21, "grad_norm": 4.755394031231582, "learning_rate": 9.130833071547085e-06, "loss": 0.4876, "step": 7501 }, { "epoch": 0.21, "grad_norm": 7.188934581060437, "learning_rate": 9.1305717602514e-06, "loss": 0.6259, "step": 7502 }, { "epoch": 0.21, "grad_norm": 5.157044875403338, "learning_rate": 9.130310413420917e-06, "loss": 0.321, "step": 7503 }, { "epoch": 0.21, "grad_norm": 8.865421257057895, "learning_rate": 9.130049031057882e-06, "loss": 0.5377, "step": 7504 }, { "epoch": 0.21, "grad_norm": 4.219381271038952, "learning_rate": 9.129787613164541e-06, "loss": 0.4906, "step": 7505 }, { "epoch": 0.21, "grad_norm": 5.8757541253653605, "learning_rate": 9.129526159743147e-06, "loss": 0.1872, "step": 7506 }, { "epoch": 0.21, "grad_norm": 4.911465296600465, "learning_rate": 9.12926467079595e-06, "loss": 0.6782, "step": 7507 }, { "epoch": 0.22, "grad_norm": 7.137240419494362, "learning_rate": 9.129003146325194e-06, "loss": 0.4348, "step": 7508 }, { "epoch": 0.22, "grad_norm": 2.49802029426278, "learning_rate": 9.128741586333135e-06, "loss": 0.1827, "step": 7509 }, { "epoch": 0.22, "grad_norm": 3.473622002208105, "learning_rate": 9.12847999082202e-06, "loss": 0.3849, "step": 7510 }, { "epoch": 0.22, "grad_norm": 7.971683695197601, "learning_rate": 9.128218359794098e-06, "loss": 0.7108, "step": 7511 }, { "epoch": 0.22, "grad_norm": 5.26700146170314, "learning_rate": 9.127956693251623e-06, "loss": 0.7473, "step": 7512 }, { "epoch": 0.22, "grad_norm": 8.16714007763608, "learning_rate": 9.127694991196845e-06, "loss": 0.7398, "step": 7513 }, { "epoch": 0.22, "grad_norm": 3.8196466095720663, "learning_rate": 9.127433253632014e-06, "loss": 0.2236, "step": 7514 }, { "epoch": 0.22, "grad_norm": 8.171990767583425, "learning_rate": 9.127171480559384e-06, "loss": 0.5489, "step": 7515 }, { "epoch": 0.22, "grad_norm": 12.583700785208626, "learning_rate": 9.126909671981207e-06, "loss": 0.772, "step": 7516 }, { "epoch": 0.22, "grad_norm": 10.112434596355923, "learning_rate": 9.126647827899734e-06, "loss": 0.7575, "step": 7517 }, { "epoch": 0.22, "grad_norm": 4.613527244579206, "learning_rate": 9.126385948317215e-06, "loss": 0.1995, "step": 7518 }, { "epoch": 0.22, "grad_norm": 8.405653297773215, "learning_rate": 9.126124033235909e-06, "loss": 0.5608, "step": 7519 }, { "epoch": 0.22, "grad_norm": 7.021764978311688, "learning_rate": 9.125862082658064e-06, "loss": 1.282, "step": 7520 }, { "epoch": 0.22, "grad_norm": 24.452556527936974, "learning_rate": 9.125600096585937e-06, "loss": 0.4516, "step": 7521 }, { "epoch": 0.22, "grad_norm": 2.0014525666968965, "learning_rate": 9.125338075021779e-06, "loss": 0.0934, "step": 7522 }, { "epoch": 0.22, "grad_norm": 8.877931030259909, "learning_rate": 9.125076017967845e-06, "loss": 0.3778, "step": 7523 }, { "epoch": 0.22, "grad_norm": 6.477279420937421, "learning_rate": 9.124813925426392e-06, "loss": 0.9475, "step": 7524 }, { "epoch": 0.22, "grad_norm": 8.62583676024843, "learning_rate": 9.12455179739967e-06, "loss": 0.8042, "step": 7525 }, { "epoch": 0.22, "grad_norm": 5.329312949997631, "learning_rate": 9.124289633889939e-06, "loss": 0.445, "step": 7526 }, { "epoch": 0.22, "grad_norm": 7.453687170807602, "learning_rate": 9.124027434899451e-06, "loss": 0.3274, "step": 7527 }, { "epoch": 0.22, "grad_norm": 12.967553067412231, "learning_rate": 9.123765200430464e-06, "loss": 0.6674, "step": 7528 }, { "epoch": 0.22, "grad_norm": 9.083687999928978, "learning_rate": 9.12350293048523e-06, "loss": 0.5497, "step": 7529 }, { "epoch": 0.22, "grad_norm": 6.139324836825047, "learning_rate": 9.12324062506601e-06, "loss": 0.7505, "step": 7530 }, { "epoch": 0.22, "grad_norm": 5.890736100745328, "learning_rate": 9.122978284175058e-06, "loss": 0.3884, "step": 7531 }, { "epoch": 0.22, "grad_norm": 4.24843102833801, "learning_rate": 9.122715907814632e-06, "loss": 0.1927, "step": 7532 }, { "epoch": 0.22, "grad_norm": 4.61457546856327, "learning_rate": 9.122453495986989e-06, "loss": 0.4885, "step": 7533 }, { "epoch": 0.22, "grad_norm": 6.474933193848964, "learning_rate": 9.122191048694385e-06, "loss": 0.628, "step": 7534 }, { "epoch": 0.22, "grad_norm": 4.351854297435845, "learning_rate": 9.12192856593908e-06, "loss": 0.339, "step": 7535 }, { "epoch": 0.22, "grad_norm": 5.587148297738447, "learning_rate": 9.12166604772333e-06, "loss": 0.4283, "step": 7536 }, { "epoch": 0.22, "grad_norm": 7.740389125219381, "learning_rate": 9.121403494049394e-06, "loss": 0.3704, "step": 7537 }, { "epoch": 0.22, "grad_norm": 5.518518828824172, "learning_rate": 9.121140904919534e-06, "loss": 0.7458, "step": 7538 }, { "epoch": 0.22, "grad_norm": 5.824177896009674, "learning_rate": 9.120878280336004e-06, "loss": 0.1919, "step": 7539 }, { "epoch": 0.22, "grad_norm": 8.327634185012258, "learning_rate": 9.120615620301065e-06, "loss": 0.58, "step": 7540 }, { "epoch": 0.22, "grad_norm": 5.4566766054314995, "learning_rate": 9.120352924816978e-06, "loss": 0.5323, "step": 7541 }, { "epoch": 0.22, "grad_norm": 4.806168937247528, "learning_rate": 9.120090193886003e-06, "loss": 0.644, "step": 7542 }, { "epoch": 0.22, "grad_norm": 9.668847271085355, "learning_rate": 9.119827427510397e-06, "loss": 0.5419, "step": 7543 }, { "epoch": 0.22, "grad_norm": 7.500116665250655, "learning_rate": 9.119564625692425e-06, "loss": 0.2625, "step": 7544 }, { "epoch": 0.22, "grad_norm": 4.0864629462328965, "learning_rate": 9.119301788434344e-06, "loss": 0.3747, "step": 7545 }, { "epoch": 0.22, "grad_norm": 8.14293993582738, "learning_rate": 9.11903891573842e-06, "loss": 0.2252, "step": 7546 }, { "epoch": 0.22, "grad_norm": 7.010283613902584, "learning_rate": 9.118776007606909e-06, "loss": 0.4751, "step": 7547 }, { "epoch": 0.22, "grad_norm": 8.847417387780105, "learning_rate": 9.118513064042075e-06, "loss": 0.768, "step": 7548 }, { "epoch": 0.22, "grad_norm": 11.205411244122294, "learning_rate": 9.11825008504618e-06, "loss": 0.5677, "step": 7549 }, { "epoch": 0.22, "grad_norm": 6.567106827960372, "learning_rate": 9.117987070621488e-06, "loss": 0.6276, "step": 7550 }, { "epoch": 0.22, "grad_norm": 3.849321065216106, "learning_rate": 9.11772402077026e-06, "loss": 0.5315, "step": 7551 }, { "epoch": 0.22, "grad_norm": 7.029870537967155, "learning_rate": 9.117460935494757e-06, "loss": 0.7523, "step": 7552 }, { "epoch": 0.22, "grad_norm": 5.753228235017844, "learning_rate": 9.117197814797247e-06, "loss": 0.4269, "step": 7553 }, { "epoch": 0.22, "grad_norm": 4.497710890816571, "learning_rate": 9.11693465867999e-06, "loss": 0.5012, "step": 7554 }, { "epoch": 0.22, "grad_norm": 5.844265430587213, "learning_rate": 9.116671467145253e-06, "loss": 0.4664, "step": 7555 }, { "epoch": 0.22, "grad_norm": 7.914521365028968, "learning_rate": 9.116408240195296e-06, "loss": 0.6197, "step": 7556 }, { "epoch": 0.22, "grad_norm": 4.752664546495909, "learning_rate": 9.116144977832386e-06, "loss": 0.7142, "step": 7557 }, { "epoch": 0.22, "grad_norm": 4.817497383516845, "learning_rate": 9.115881680058789e-06, "loss": 0.6537, "step": 7558 }, { "epoch": 0.22, "grad_norm": 3.8687432213571356, "learning_rate": 9.115618346876767e-06, "loss": 0.2686, "step": 7559 }, { "epoch": 0.22, "grad_norm": 6.997468286178699, "learning_rate": 9.115354978288588e-06, "loss": 0.4915, "step": 7560 }, { "epoch": 0.22, "grad_norm": 6.003247415385649, "learning_rate": 9.115091574296514e-06, "loss": 0.404, "step": 7561 }, { "epoch": 0.22, "grad_norm": 8.572203880395369, "learning_rate": 9.114828134902817e-06, "loss": 0.5419, "step": 7562 }, { "epoch": 0.22, "grad_norm": 10.772245721549291, "learning_rate": 9.114564660109761e-06, "loss": 0.8109, "step": 7563 }, { "epoch": 0.22, "grad_norm": 5.106645794287141, "learning_rate": 9.11430114991961e-06, "loss": 0.7484, "step": 7564 }, { "epoch": 0.22, "grad_norm": 3.628363430130252, "learning_rate": 9.114037604334634e-06, "loss": 0.4622, "step": 7565 }, { "epoch": 0.22, "grad_norm": 2.3855955537133515, "learning_rate": 9.113774023357097e-06, "loss": 0.2197, "step": 7566 }, { "epoch": 0.22, "grad_norm": 7.402963319480681, "learning_rate": 9.11351040698927e-06, "loss": 0.4876, "step": 7567 }, { "epoch": 0.22, "grad_norm": 7.114757336508215, "learning_rate": 9.113246755233419e-06, "loss": 0.66, "step": 7568 }, { "epoch": 0.22, "grad_norm": 4.468473786040596, "learning_rate": 9.112983068091812e-06, "loss": 0.3232, "step": 7569 }, { "epoch": 0.22, "grad_norm": 11.290336985732703, "learning_rate": 9.11271934556672e-06, "loss": 0.8937, "step": 7570 }, { "epoch": 0.22, "grad_norm": 5.138859848707928, "learning_rate": 9.112455587660408e-06, "loss": 0.3597, "step": 7571 }, { "epoch": 0.22, "grad_norm": 3.9444359136020544, "learning_rate": 9.112191794375148e-06, "loss": 0.4458, "step": 7572 }, { "epoch": 0.22, "grad_norm": 4.828024162548066, "learning_rate": 9.111927965713207e-06, "loss": 0.448, "step": 7573 }, { "epoch": 0.22, "grad_norm": 4.189416873659447, "learning_rate": 9.111664101676859e-06, "loss": 0.2417, "step": 7574 }, { "epoch": 0.22, "grad_norm": 6.082388512733828, "learning_rate": 9.111400202268368e-06, "loss": 0.5162, "step": 7575 }, { "epoch": 0.22, "grad_norm": 8.525171930363676, "learning_rate": 9.111136267490008e-06, "loss": 0.6163, "step": 7576 }, { "epoch": 0.22, "grad_norm": 6.204202515152898, "learning_rate": 9.11087229734405e-06, "loss": 0.6634, "step": 7577 }, { "epoch": 0.22, "grad_norm": 5.738811263295628, "learning_rate": 9.110608291832761e-06, "loss": 0.6392, "step": 7578 }, { "epoch": 0.22, "grad_norm": 3.98935409298976, "learning_rate": 9.110344250958417e-06, "loss": 0.3491, "step": 7579 }, { "epoch": 0.22, "grad_norm": 8.233228571092717, "learning_rate": 9.110080174723288e-06, "loss": 0.7054, "step": 7580 }, { "epoch": 0.22, "grad_norm": 6.880223284081139, "learning_rate": 9.109816063129644e-06, "loss": 0.9671, "step": 7581 }, { "epoch": 0.22, "grad_norm": 11.512108606235765, "learning_rate": 9.10955191617976e-06, "loss": 0.4026, "step": 7582 }, { "epoch": 0.22, "grad_norm": 8.09119212850769, "learning_rate": 9.109287733875904e-06, "loss": 0.5253, "step": 7583 }, { "epoch": 0.22, "grad_norm": 5.0553289416226725, "learning_rate": 9.109023516220354e-06, "loss": 0.5683, "step": 7584 }, { "epoch": 0.22, "grad_norm": 9.316465263259767, "learning_rate": 9.10875926321538e-06, "loss": 1.0373, "step": 7585 }, { "epoch": 0.22, "grad_norm": 9.011488152155039, "learning_rate": 9.108494974863256e-06, "loss": 0.8555, "step": 7586 }, { "epoch": 0.22, "grad_norm": 6.411663763833766, "learning_rate": 9.108230651166255e-06, "loss": 0.5313, "step": 7587 }, { "epoch": 0.22, "grad_norm": 6.717380614786807, "learning_rate": 9.107966292126653e-06, "loss": 0.8464, "step": 7588 }, { "epoch": 0.22, "grad_norm": 7.0883682943570685, "learning_rate": 9.107701897746721e-06, "loss": 0.6131, "step": 7589 }, { "epoch": 0.22, "grad_norm": 6.993894980174737, "learning_rate": 9.107437468028738e-06, "loss": 0.6687, "step": 7590 }, { "epoch": 0.22, "grad_norm": 4.13847240419164, "learning_rate": 9.107173002974972e-06, "loss": 0.4118, "step": 7591 }, { "epoch": 0.22, "grad_norm": 5.870926276563915, "learning_rate": 9.106908502587705e-06, "loss": 0.5588, "step": 7592 }, { "epoch": 0.22, "grad_norm": 7.01736707234642, "learning_rate": 9.106643966869211e-06, "loss": 0.6817, "step": 7593 }, { "epoch": 0.22, "grad_norm": 4.7941281533823075, "learning_rate": 9.106379395821761e-06, "loss": 0.3488, "step": 7594 }, { "epoch": 0.22, "grad_norm": 5.624629283980799, "learning_rate": 9.106114789447638e-06, "loss": 0.4792, "step": 7595 }, { "epoch": 0.22, "grad_norm": 5.836868211918115, "learning_rate": 9.105850147749114e-06, "loss": 0.4222, "step": 7596 }, { "epoch": 0.22, "grad_norm": 5.85499657374281, "learning_rate": 9.105585470728466e-06, "loss": 0.637, "step": 7597 }, { "epoch": 0.22, "grad_norm": 9.936232791933243, "learning_rate": 9.105320758387972e-06, "loss": 0.8009, "step": 7598 }, { "epoch": 0.22, "grad_norm": 3.5586791378961373, "learning_rate": 9.105056010729908e-06, "loss": 0.4239, "step": 7599 }, { "epoch": 0.22, "grad_norm": 4.3369387664812855, "learning_rate": 9.104791227756555e-06, "loss": 0.2229, "step": 7600 }, { "epoch": 0.22, "grad_norm": 12.333526626996752, "learning_rate": 9.104526409470187e-06, "loss": 1.2454, "step": 7601 }, { "epoch": 0.22, "grad_norm": 5.144487779067238, "learning_rate": 9.104261555873083e-06, "loss": 0.4111, "step": 7602 }, { "epoch": 0.22, "grad_norm": 6.217625655318342, "learning_rate": 9.103996666967524e-06, "loss": 1.1015, "step": 7603 }, { "epoch": 0.22, "grad_norm": 7.6895586994375655, "learning_rate": 9.103731742755785e-06, "loss": 0.5406, "step": 7604 }, { "epoch": 0.22, "grad_norm": 10.139867910092855, "learning_rate": 9.103466783240149e-06, "loss": 0.6492, "step": 7605 }, { "epoch": 0.22, "grad_norm": 9.605219939780282, "learning_rate": 9.103201788422891e-06, "loss": 0.7395, "step": 7606 }, { "epoch": 0.22, "grad_norm": 5.315387008694693, "learning_rate": 9.102936758306296e-06, "loss": 0.579, "step": 7607 }, { "epoch": 0.22, "grad_norm": 5.470354814755292, "learning_rate": 9.10267169289264e-06, "loss": 0.6813, "step": 7608 }, { "epoch": 0.22, "grad_norm": 9.277214020474544, "learning_rate": 9.102406592184204e-06, "loss": 0.5758, "step": 7609 }, { "epoch": 0.22, "grad_norm": 4.79211706726037, "learning_rate": 9.10214145618327e-06, "loss": 0.6983, "step": 7610 }, { "epoch": 0.22, "grad_norm": 5.985263170911296, "learning_rate": 9.101876284892118e-06, "loss": 0.6522, "step": 7611 }, { "epoch": 0.22, "grad_norm": 5.608790162607733, "learning_rate": 9.101611078313029e-06, "loss": 0.5631, "step": 7612 }, { "epoch": 0.22, "grad_norm": 2.424650066729362, "learning_rate": 9.101345836448285e-06, "loss": 0.1062, "step": 7613 }, { "epoch": 0.22, "grad_norm": 5.703371936207431, "learning_rate": 9.101080559300168e-06, "loss": 0.4632, "step": 7614 }, { "epoch": 0.22, "grad_norm": 4.736954090400859, "learning_rate": 9.10081524687096e-06, "loss": 0.2687, "step": 7615 }, { "epoch": 0.22, "grad_norm": 7.864455293198368, "learning_rate": 9.100549899162942e-06, "loss": 0.8204, "step": 7616 }, { "epoch": 0.22, "grad_norm": 9.308374297468252, "learning_rate": 9.1002845161784e-06, "loss": 0.5086, "step": 7617 }, { "epoch": 0.22, "grad_norm": 4.194728249442378, "learning_rate": 9.100019097919614e-06, "loss": 0.3514, "step": 7618 }, { "epoch": 0.22, "grad_norm": 7.274166082978777, "learning_rate": 9.099753644388868e-06, "loss": 0.6222, "step": 7619 }, { "epoch": 0.22, "grad_norm": 6.666082722202968, "learning_rate": 9.099488155588447e-06, "loss": 0.61, "step": 7620 }, { "epoch": 0.22, "grad_norm": 8.02463918451138, "learning_rate": 9.099222631520634e-06, "loss": 0.5675, "step": 7621 }, { "epoch": 0.22, "grad_norm": 11.866413264480036, "learning_rate": 9.098957072187713e-06, "loss": 0.8561, "step": 7622 }, { "epoch": 0.22, "grad_norm": 4.237413393108952, "learning_rate": 9.09869147759197e-06, "loss": 0.4406, "step": 7623 }, { "epoch": 0.22, "grad_norm": 8.444175453112445, "learning_rate": 9.098425847735689e-06, "loss": 0.8249, "step": 7624 }, { "epoch": 0.22, "grad_norm": 7.006442783504025, "learning_rate": 9.098160182621155e-06, "loss": 0.4685, "step": 7625 }, { "epoch": 0.22, "grad_norm": 6.291619808030947, "learning_rate": 9.097894482250651e-06, "loss": 0.3116, "step": 7626 }, { "epoch": 0.22, "grad_norm": 7.546810426297565, "learning_rate": 9.097628746626467e-06, "loss": 0.5549, "step": 7627 }, { "epoch": 0.22, "grad_norm": 6.586801058264348, "learning_rate": 9.09736297575089e-06, "loss": 0.7232, "step": 7628 }, { "epoch": 0.22, "grad_norm": 3.6791577615914557, "learning_rate": 9.097097169626201e-06, "loss": 0.4954, "step": 7629 }, { "epoch": 0.22, "grad_norm": 4.56387851739965, "learning_rate": 9.09683132825469e-06, "loss": 0.4184, "step": 7630 }, { "epoch": 0.22, "grad_norm": 7.900157646827471, "learning_rate": 9.096565451638643e-06, "loss": 0.6905, "step": 7631 }, { "epoch": 0.22, "grad_norm": 4.849175275899968, "learning_rate": 9.096299539780348e-06, "loss": 0.5682, "step": 7632 }, { "epoch": 0.22, "grad_norm": 9.482396631177778, "learning_rate": 9.096033592682093e-06, "loss": 1.1372, "step": 7633 }, { "epoch": 0.22, "grad_norm": 4.339881107090567, "learning_rate": 9.095767610346165e-06, "loss": 0.4468, "step": 7634 }, { "epoch": 0.22, "grad_norm": 5.006365110143122, "learning_rate": 9.095501592774853e-06, "loss": 0.5374, "step": 7635 }, { "epoch": 0.22, "grad_norm": 3.505371059742391, "learning_rate": 9.095235539970444e-06, "loss": 0.1487, "step": 7636 }, { "epoch": 0.22, "grad_norm": 3.6096723689984986, "learning_rate": 9.094969451935229e-06, "loss": 0.5742, "step": 7637 }, { "epoch": 0.22, "grad_norm": 5.4465632705648535, "learning_rate": 9.094703328671495e-06, "loss": 0.5951, "step": 7638 }, { "epoch": 0.22, "grad_norm": 6.96996758605338, "learning_rate": 9.094437170181533e-06, "loss": 0.5295, "step": 7639 }, { "epoch": 0.22, "grad_norm": 9.36762722023672, "learning_rate": 9.094170976467631e-06, "loss": 0.4445, "step": 7640 }, { "epoch": 0.22, "grad_norm": 10.92239522718251, "learning_rate": 9.093904747532082e-06, "loss": 0.7477, "step": 7641 }, { "epoch": 0.22, "grad_norm": 5.274608024388505, "learning_rate": 9.093638483377174e-06, "loss": 0.4729, "step": 7642 }, { "epoch": 0.22, "grad_norm": 8.810622860803457, "learning_rate": 9.093372184005197e-06, "loss": 0.7547, "step": 7643 }, { "epoch": 0.22, "grad_norm": 3.998860435284506, "learning_rate": 9.093105849418444e-06, "loss": 0.6749, "step": 7644 }, { "epoch": 0.22, "grad_norm": 5.266180988242594, "learning_rate": 9.092839479619205e-06, "loss": 0.5698, "step": 7645 }, { "epoch": 0.22, "grad_norm": 5.483703485225965, "learning_rate": 9.092573074609772e-06, "loss": 0.2339, "step": 7646 }, { "epoch": 0.22, "grad_norm": 40.836874630532684, "learning_rate": 9.092306634392437e-06, "loss": 0.5986, "step": 7647 }, { "epoch": 0.22, "grad_norm": 4.566358215236023, "learning_rate": 9.092040158969492e-06, "loss": 0.4273, "step": 7648 }, { "epoch": 0.22, "grad_norm": 7.571313372607045, "learning_rate": 9.091773648343229e-06, "loss": 0.4969, "step": 7649 }, { "epoch": 0.22, "grad_norm": 5.903759769461498, "learning_rate": 9.091507102515942e-06, "loss": 0.422, "step": 7650 }, { "epoch": 0.22, "grad_norm": 4.663924069802363, "learning_rate": 9.091240521489922e-06, "loss": 0.4871, "step": 7651 }, { "epoch": 0.22, "grad_norm": 6.83671703144619, "learning_rate": 9.090973905267465e-06, "loss": 0.2214, "step": 7652 }, { "epoch": 0.22, "grad_norm": 5.346044371322665, "learning_rate": 9.09070725385086e-06, "loss": 0.5693, "step": 7653 }, { "epoch": 0.22, "grad_norm": 9.944680552695798, "learning_rate": 9.090440567242406e-06, "loss": 0.6989, "step": 7654 }, { "epoch": 0.22, "grad_norm": 5.515486502056742, "learning_rate": 9.090173845444397e-06, "loss": 0.4709, "step": 7655 }, { "epoch": 0.22, "grad_norm": 4.228799534664569, "learning_rate": 9.089907088459125e-06, "loss": 0.6496, "step": 7656 }, { "epoch": 0.22, "grad_norm": 4.085105913486437, "learning_rate": 9.089640296288884e-06, "loss": 0.5946, "step": 7657 }, { "epoch": 0.22, "grad_norm": 7.519996878339242, "learning_rate": 9.089373468935974e-06, "loss": 0.4889, "step": 7658 }, { "epoch": 0.22, "grad_norm": 4.735830807089726, "learning_rate": 9.089106606402685e-06, "loss": 0.2305, "step": 7659 }, { "epoch": 0.22, "grad_norm": 9.747293145449667, "learning_rate": 9.088839708691317e-06, "loss": 0.5636, "step": 7660 }, { "epoch": 0.22, "grad_norm": 4.835749362126882, "learning_rate": 9.088572775804163e-06, "loss": 0.2705, "step": 7661 }, { "epoch": 0.22, "grad_norm": 4.9629044365568715, "learning_rate": 9.088305807743523e-06, "loss": 0.6093, "step": 7662 }, { "epoch": 0.22, "grad_norm": 9.66643483333359, "learning_rate": 9.088038804511689e-06, "loss": 0.815, "step": 7663 }, { "epoch": 0.22, "grad_norm": 8.73265394475375, "learning_rate": 9.087771766110962e-06, "loss": 0.8955, "step": 7664 }, { "epoch": 0.22, "grad_norm": 10.711722603258096, "learning_rate": 9.087504692543638e-06, "loss": 0.7708, "step": 7665 }, { "epoch": 0.22, "grad_norm": 5.936959573344331, "learning_rate": 9.087237583812014e-06, "loss": 0.661, "step": 7666 }, { "epoch": 0.22, "grad_norm": 4.600053653196823, "learning_rate": 9.086970439918388e-06, "loss": 0.278, "step": 7667 }, { "epoch": 0.22, "grad_norm": 3.330992667616988, "learning_rate": 9.086703260865057e-06, "loss": 0.3106, "step": 7668 }, { "epoch": 0.22, "grad_norm": 4.762073281462476, "learning_rate": 9.086436046654324e-06, "loss": 0.3571, "step": 7669 }, { "epoch": 0.22, "grad_norm": 10.582935135526611, "learning_rate": 9.086168797288483e-06, "loss": 0.6215, "step": 7670 }, { "epoch": 0.22, "grad_norm": 4.579455706308343, "learning_rate": 9.085901512769834e-06, "loss": 0.4084, "step": 7671 }, { "epoch": 0.22, "grad_norm": 3.9599218025578544, "learning_rate": 9.08563419310068e-06, "loss": 0.5164, "step": 7672 }, { "epoch": 0.22, "grad_norm": 6.504898792860569, "learning_rate": 9.085366838283317e-06, "loss": 0.5312, "step": 7673 }, { "epoch": 0.22, "grad_norm": 4.015970326578085, "learning_rate": 9.085099448320046e-06, "loss": 0.3361, "step": 7674 }, { "epoch": 0.22, "grad_norm": 2.8031234205362723, "learning_rate": 9.084832023213167e-06, "loss": 0.3944, "step": 7675 }, { "epoch": 0.22, "grad_norm": 7.8564164829140095, "learning_rate": 9.08456456296498e-06, "loss": 0.5281, "step": 7676 }, { "epoch": 0.22, "grad_norm": 3.4656796484612307, "learning_rate": 9.08429706757779e-06, "loss": 0.3787, "step": 7677 }, { "epoch": 0.22, "grad_norm": 4.874072769211941, "learning_rate": 9.084029537053894e-06, "loss": 0.3869, "step": 7678 }, { "epoch": 0.22, "grad_norm": 5.3979792531531015, "learning_rate": 9.083761971395594e-06, "loss": 0.3118, "step": 7679 }, { "epoch": 0.22, "grad_norm": 3.128889638639972, "learning_rate": 9.083494370605192e-06, "loss": 0.5109, "step": 7680 }, { "epoch": 0.22, "grad_norm": 11.755845584027881, "learning_rate": 9.083226734684992e-06, "loss": 0.5695, "step": 7681 }, { "epoch": 0.22, "grad_norm": 7.031288791125634, "learning_rate": 9.082959063637296e-06, "loss": 0.2514, "step": 7682 }, { "epoch": 0.22, "grad_norm": 3.3896853643596847, "learning_rate": 9.082691357464403e-06, "loss": 0.458, "step": 7683 }, { "epoch": 0.22, "grad_norm": 6.337411918253757, "learning_rate": 9.082423616168621e-06, "loss": 0.4825, "step": 7684 }, { "epoch": 0.22, "grad_norm": 6.538877277189302, "learning_rate": 9.082155839752251e-06, "loss": 0.5062, "step": 7685 }, { "epoch": 0.22, "grad_norm": 6.639754797073367, "learning_rate": 9.081888028217597e-06, "loss": 0.5415, "step": 7686 }, { "epoch": 0.22, "grad_norm": 10.120428866621017, "learning_rate": 9.081620181566963e-06, "loss": 0.8667, "step": 7687 }, { "epoch": 0.22, "grad_norm": 8.510489190740833, "learning_rate": 9.081352299802653e-06, "loss": 0.542, "step": 7688 }, { "epoch": 0.22, "grad_norm": 6.2341168954908355, "learning_rate": 9.08108438292697e-06, "loss": 0.5504, "step": 7689 }, { "epoch": 0.22, "grad_norm": 6.059804297393702, "learning_rate": 9.080816430942223e-06, "loss": 0.4396, "step": 7690 }, { "epoch": 0.22, "grad_norm": 4.518935841053631, "learning_rate": 9.080548443850714e-06, "loss": 0.295, "step": 7691 }, { "epoch": 0.22, "grad_norm": 4.465784088995544, "learning_rate": 9.080280421654748e-06, "loss": 0.5502, "step": 7692 }, { "epoch": 0.22, "grad_norm": 2.5689498628703498, "learning_rate": 9.080012364356632e-06, "loss": 0.2817, "step": 7693 }, { "epoch": 0.22, "grad_norm": 3.68906188473662, "learning_rate": 9.079744271958673e-06, "loss": 0.7327, "step": 7694 }, { "epoch": 0.22, "grad_norm": 9.831521131265324, "learning_rate": 9.079476144463176e-06, "loss": 0.6703, "step": 7695 }, { "epoch": 0.22, "grad_norm": 2.642514709478633, "learning_rate": 9.07920798187245e-06, "loss": 0.228, "step": 7696 }, { "epoch": 0.22, "grad_norm": 3.620100162816979, "learning_rate": 9.078939784188797e-06, "loss": 0.2816, "step": 7697 }, { "epoch": 0.22, "grad_norm": 3.945243487368249, "learning_rate": 9.078671551414528e-06, "loss": 0.4097, "step": 7698 }, { "epoch": 0.22, "grad_norm": 3.9321461564314126, "learning_rate": 9.078403283551949e-06, "loss": 0.3872, "step": 7699 }, { "epoch": 0.22, "grad_norm": 9.791192507254229, "learning_rate": 9.07813498060337e-06, "loss": 0.5177, "step": 7700 }, { "epoch": 0.22, "grad_norm": 6.691955686371191, "learning_rate": 9.077866642571098e-06, "loss": 0.7481, "step": 7701 }, { "epoch": 0.22, "grad_norm": 6.6940128663038845, "learning_rate": 9.07759826945744e-06, "loss": 0.4918, "step": 7702 }, { "epoch": 0.22, "grad_norm": 3.725534517225835, "learning_rate": 9.077329861264708e-06, "loss": 0.4955, "step": 7703 }, { "epoch": 0.22, "grad_norm": 7.8946050294111645, "learning_rate": 9.077061417995208e-06, "loss": 0.6241, "step": 7704 }, { "epoch": 0.22, "grad_norm": 2.4637955796173694, "learning_rate": 9.07679293965125e-06, "loss": 0.143, "step": 7705 }, { "epoch": 0.22, "grad_norm": 5.398847506420514, "learning_rate": 9.076524426235147e-06, "loss": 0.4178, "step": 7706 }, { "epoch": 0.22, "grad_norm": 3.072492199477373, "learning_rate": 9.076255877749205e-06, "loss": 0.1444, "step": 7707 }, { "epoch": 0.22, "grad_norm": 6.447687365799922, "learning_rate": 9.075987294195734e-06, "loss": 0.6813, "step": 7708 }, { "epoch": 0.22, "grad_norm": 2.8638520966292047, "learning_rate": 9.075718675577047e-06, "loss": 0.35, "step": 7709 }, { "epoch": 0.22, "grad_norm": 4.433170570645931, "learning_rate": 9.075450021895454e-06, "loss": 0.2543, "step": 7710 }, { "epoch": 0.22, "grad_norm": 10.60626885362432, "learning_rate": 9.075181333153266e-06, "loss": 0.9373, "step": 7711 }, { "epoch": 0.22, "grad_norm": 3.811752683844616, "learning_rate": 9.074912609352795e-06, "loss": 0.3887, "step": 7712 }, { "epoch": 0.22, "grad_norm": 5.337909898049671, "learning_rate": 9.074643850496354e-06, "loss": 0.3294, "step": 7713 }, { "epoch": 0.22, "grad_norm": 7.7878573756625835, "learning_rate": 9.074375056586252e-06, "loss": 0.7074, "step": 7714 }, { "epoch": 0.22, "grad_norm": 2.9005716697625026, "learning_rate": 9.074106227624804e-06, "loss": 0.2122, "step": 7715 }, { "epoch": 0.22, "grad_norm": 7.369249267747877, "learning_rate": 9.073837363614321e-06, "loss": 0.4363, "step": 7716 }, { "epoch": 0.22, "grad_norm": 3.3033739369691255, "learning_rate": 9.073568464557117e-06, "loss": 0.5536, "step": 7717 }, { "epoch": 0.22, "grad_norm": 7.729758563003231, "learning_rate": 9.073299530455505e-06, "loss": 0.4613, "step": 7718 }, { "epoch": 0.22, "grad_norm": 3.247367673179243, "learning_rate": 9.073030561311798e-06, "loss": 0.3352, "step": 7719 }, { "epoch": 0.22, "grad_norm": 7.223553665220553, "learning_rate": 9.072761557128312e-06, "loss": 0.6866, "step": 7720 }, { "epoch": 0.22, "grad_norm": 5.231118444613885, "learning_rate": 9.072492517907358e-06, "loss": 0.3879, "step": 7721 }, { "epoch": 0.22, "grad_norm": 5.439559283496176, "learning_rate": 9.072223443651254e-06, "loss": 0.3557, "step": 7722 }, { "epoch": 0.22, "grad_norm": 9.933445901254235, "learning_rate": 9.071954334362312e-06, "loss": 0.5131, "step": 7723 }, { "epoch": 0.22, "grad_norm": 7.562498108414342, "learning_rate": 9.071685190042849e-06, "loss": 0.8378, "step": 7724 }, { "epoch": 0.22, "grad_norm": 5.860538255298329, "learning_rate": 9.071416010695178e-06, "loss": 0.7296, "step": 7725 }, { "epoch": 0.22, "grad_norm": 4.681658118147269, "learning_rate": 9.07114679632162e-06, "loss": 0.3877, "step": 7726 }, { "epoch": 0.22, "grad_norm": 6.199030065038676, "learning_rate": 9.070877546924483e-06, "loss": 0.4544, "step": 7727 }, { "epoch": 0.22, "grad_norm": 5.267126395686708, "learning_rate": 9.070608262506091e-06, "loss": 0.3429, "step": 7728 }, { "epoch": 0.22, "grad_norm": 7.290875143051244, "learning_rate": 9.070338943068754e-06, "loss": 0.5817, "step": 7729 }, { "epoch": 0.22, "grad_norm": 8.061234604058786, "learning_rate": 9.070069588614794e-06, "loss": 0.7607, "step": 7730 }, { "epoch": 0.22, "grad_norm": 11.245787107229658, "learning_rate": 9.069800199146529e-06, "loss": 0.8993, "step": 7731 }, { "epoch": 0.22, "grad_norm": 6.490522627925433, "learning_rate": 9.069530774666271e-06, "loss": 0.79, "step": 7732 }, { "epoch": 0.22, "grad_norm": 5.438723021019293, "learning_rate": 9.069261315176341e-06, "loss": 0.481, "step": 7733 }, { "epoch": 0.22, "grad_norm": 5.374473545995593, "learning_rate": 9.068991820679056e-06, "loss": 0.9317, "step": 7734 }, { "epoch": 0.22, "grad_norm": 6.316303353777888, "learning_rate": 9.068722291176737e-06, "loss": 0.7383, "step": 7735 }, { "epoch": 0.22, "grad_norm": 5.038598043638857, "learning_rate": 9.068452726671702e-06, "loss": 0.4521, "step": 7736 }, { "epoch": 0.22, "grad_norm": 7.276802309035721, "learning_rate": 9.068183127166267e-06, "loss": 0.422, "step": 7737 }, { "epoch": 0.22, "grad_norm": 4.411156972464694, "learning_rate": 9.067913492662754e-06, "loss": 0.1967, "step": 7738 }, { "epoch": 0.22, "grad_norm": 6.472509940778128, "learning_rate": 9.067643823163483e-06, "loss": 0.4404, "step": 7739 }, { "epoch": 0.22, "grad_norm": 2.8173177357897976, "learning_rate": 9.067374118670772e-06, "loss": 0.3282, "step": 7740 }, { "epoch": 0.22, "grad_norm": 6.638896474829489, "learning_rate": 9.067104379186943e-06, "loss": 0.512, "step": 7741 }, { "epoch": 0.22, "grad_norm": 7.86943178260767, "learning_rate": 9.066834604714315e-06, "loss": 0.5386, "step": 7742 }, { "epoch": 0.22, "grad_norm": 3.410964642782379, "learning_rate": 9.066564795255211e-06, "loss": 0.3324, "step": 7743 }, { "epoch": 0.22, "grad_norm": 2.747638327697665, "learning_rate": 9.06629495081195e-06, "loss": 0.3233, "step": 7744 }, { "epoch": 0.22, "grad_norm": 5.451724946610352, "learning_rate": 9.066025071386852e-06, "loss": 0.2715, "step": 7745 }, { "epoch": 0.22, "grad_norm": 7.3079698011863234, "learning_rate": 9.065755156982243e-06, "loss": 0.6347, "step": 7746 }, { "epoch": 0.22, "grad_norm": 9.369208059889301, "learning_rate": 9.065485207600444e-06, "loss": 0.7231, "step": 7747 }, { "epoch": 0.22, "grad_norm": 7.11130385170775, "learning_rate": 9.065215223243775e-06, "loss": 0.6025, "step": 7748 }, { "epoch": 0.22, "grad_norm": 3.4719645752154364, "learning_rate": 9.064945203914561e-06, "loss": 0.4192, "step": 7749 }, { "epoch": 0.22, "grad_norm": 8.95567607651297, "learning_rate": 9.064675149615122e-06, "loss": 0.7092, "step": 7750 }, { "epoch": 0.22, "grad_norm": 8.08919217125669, "learning_rate": 9.064405060347785e-06, "loss": 0.6407, "step": 7751 }, { "epoch": 0.22, "grad_norm": 1.7901953562896509, "learning_rate": 9.06413493611487e-06, "loss": 0.1567, "step": 7752 }, { "epoch": 0.22, "grad_norm": 5.165069989656179, "learning_rate": 9.063864776918704e-06, "loss": 0.432, "step": 7753 }, { "epoch": 0.22, "grad_norm": 11.399072181021346, "learning_rate": 9.063594582761608e-06, "loss": 0.7057, "step": 7754 }, { "epoch": 0.22, "grad_norm": 9.514791918838217, "learning_rate": 9.06332435364591e-06, "loss": 0.5806, "step": 7755 }, { "epoch": 0.22, "grad_norm": 2.5674423147033956, "learning_rate": 9.063054089573933e-06, "loss": 0.2444, "step": 7756 }, { "epoch": 0.22, "grad_norm": 5.284529892034822, "learning_rate": 9.062783790548e-06, "loss": 0.5391, "step": 7757 }, { "epoch": 0.22, "grad_norm": 6.698506576007515, "learning_rate": 9.06251345657044e-06, "loss": 0.4891, "step": 7758 }, { "epoch": 0.22, "grad_norm": 5.931932127531489, "learning_rate": 9.062243087643577e-06, "loss": 0.5442, "step": 7759 }, { "epoch": 0.22, "grad_norm": 4.269487106779104, "learning_rate": 9.061972683769736e-06, "loss": 0.6547, "step": 7760 }, { "epoch": 0.22, "grad_norm": 8.292286324150261, "learning_rate": 9.061702244951243e-06, "loss": 0.4539, "step": 7761 }, { "epoch": 0.22, "grad_norm": 4.289714826162099, "learning_rate": 9.061431771190426e-06, "loss": 0.4785, "step": 7762 }, { "epoch": 0.22, "grad_norm": 3.919732135310843, "learning_rate": 9.061161262489612e-06, "loss": 0.5455, "step": 7763 }, { "epoch": 0.22, "grad_norm": 2.056744202471772, "learning_rate": 9.06089071885113e-06, "loss": 0.2867, "step": 7764 }, { "epoch": 0.22, "grad_norm": 11.027626112357556, "learning_rate": 9.060620140277303e-06, "loss": 0.3637, "step": 7765 }, { "epoch": 0.22, "grad_norm": 4.91092153334806, "learning_rate": 9.060349526770462e-06, "loss": 0.4454, "step": 7766 }, { "epoch": 0.22, "grad_norm": 8.04400791089671, "learning_rate": 9.060078878332932e-06, "loss": 0.7515, "step": 7767 }, { "epoch": 0.22, "grad_norm": 7.032437677741247, "learning_rate": 9.059808194967045e-06, "loss": 0.5536, "step": 7768 }, { "epoch": 0.22, "grad_norm": 8.257067456374932, "learning_rate": 9.059537476675126e-06, "loss": 0.6851, "step": 7769 }, { "epoch": 0.22, "grad_norm": 6.772532753539559, "learning_rate": 9.059266723459507e-06, "loss": 0.337, "step": 7770 }, { "epoch": 0.22, "grad_norm": 8.764961883975603, "learning_rate": 9.058995935322517e-06, "loss": 0.6608, "step": 7771 }, { "epoch": 0.22, "grad_norm": 8.088410549407051, "learning_rate": 9.058725112266484e-06, "loss": 0.8892, "step": 7772 }, { "epoch": 0.22, "grad_norm": 4.1368423169694, "learning_rate": 9.05845425429374e-06, "loss": 0.2276, "step": 7773 }, { "epoch": 0.22, "grad_norm": 6.536611847512275, "learning_rate": 9.058183361406611e-06, "loss": 0.7206, "step": 7774 }, { "epoch": 0.22, "grad_norm": 8.517044425046768, "learning_rate": 9.057912433607433e-06, "loss": 0.8686, "step": 7775 }, { "epoch": 0.22, "grad_norm": 8.464638757269956, "learning_rate": 9.057641470898531e-06, "loss": 0.8094, "step": 7776 }, { "epoch": 0.22, "grad_norm": 9.741147154788052, "learning_rate": 9.057370473282241e-06, "loss": 1.2807, "step": 7777 }, { "epoch": 0.22, "grad_norm": 8.301552124131833, "learning_rate": 9.057099440760893e-06, "loss": 0.574, "step": 7778 }, { "epoch": 0.22, "grad_norm": 4.782359960532731, "learning_rate": 9.056828373336816e-06, "loss": 0.2328, "step": 7779 }, { "epoch": 0.22, "grad_norm": 6.856504843469307, "learning_rate": 9.056557271012345e-06, "loss": 0.8169, "step": 7780 }, { "epoch": 0.22, "grad_norm": 12.681784032835964, "learning_rate": 9.056286133789811e-06, "loss": 0.4359, "step": 7781 }, { "epoch": 0.22, "grad_norm": 7.753065302909973, "learning_rate": 9.056014961671548e-06, "loss": 0.4773, "step": 7782 }, { "epoch": 0.22, "grad_norm": 4.666061126499901, "learning_rate": 9.055743754659887e-06, "loss": 0.5073, "step": 7783 }, { "epoch": 0.22, "grad_norm": 7.2561003243241124, "learning_rate": 9.055472512757161e-06, "loss": 0.9653, "step": 7784 }, { "epoch": 0.22, "grad_norm": 4.964397275284709, "learning_rate": 9.055201235965705e-06, "loss": 0.6333, "step": 7785 }, { "epoch": 0.22, "grad_norm": 7.3410374278309725, "learning_rate": 9.054929924287852e-06, "loss": 0.3085, "step": 7786 }, { "epoch": 0.22, "grad_norm": 4.216401188048483, "learning_rate": 9.054658577725937e-06, "loss": 0.3196, "step": 7787 }, { "epoch": 0.22, "grad_norm": 4.177549439234141, "learning_rate": 9.054387196282293e-06, "loss": 0.3261, "step": 7788 }, { "epoch": 0.22, "grad_norm": 6.056244483842887, "learning_rate": 9.054115779959254e-06, "loss": 0.3706, "step": 7789 }, { "epoch": 0.22, "grad_norm": 8.689987457714095, "learning_rate": 9.053844328759155e-06, "loss": 0.4408, "step": 7790 }, { "epoch": 0.22, "grad_norm": 6.367460954094859, "learning_rate": 9.053572842684335e-06, "loss": 0.5078, "step": 7791 }, { "epoch": 0.22, "grad_norm": 4.525901452787324, "learning_rate": 9.053301321737126e-06, "loss": 0.8702, "step": 7792 }, { "epoch": 0.22, "grad_norm": 3.840678499747757, "learning_rate": 9.053029765919865e-06, "loss": 0.3911, "step": 7793 }, { "epoch": 0.22, "grad_norm": 5.9662808408965065, "learning_rate": 9.052758175234889e-06, "loss": 0.655, "step": 7794 }, { "epoch": 0.22, "grad_norm": 5.33284436408336, "learning_rate": 9.052486549684532e-06, "loss": 0.6922, "step": 7795 }, { "epoch": 0.22, "grad_norm": 8.748138938260965, "learning_rate": 9.05221488927113e-06, "loss": 0.654, "step": 7796 }, { "epoch": 0.22, "grad_norm": 6.743803429590786, "learning_rate": 9.051943193997026e-06, "loss": 0.3392, "step": 7797 }, { "epoch": 0.22, "grad_norm": 5.7790640332552, "learning_rate": 9.051671463864553e-06, "loss": 0.8525, "step": 7798 }, { "epoch": 0.22, "grad_norm": 6.672354937763489, "learning_rate": 9.05139969887605e-06, "loss": 0.727, "step": 7799 }, { "epoch": 0.22, "grad_norm": 5.2449174529381075, "learning_rate": 9.051127899033851e-06, "loss": 0.6003, "step": 7800 }, { "epoch": 0.22, "grad_norm": 7.272130600880138, "learning_rate": 9.050856064340299e-06, "loss": 0.538, "step": 7801 }, { "epoch": 0.22, "grad_norm": 11.522760010539924, "learning_rate": 9.05058419479773e-06, "loss": 0.9667, "step": 7802 }, { "epoch": 0.22, "grad_norm": 6.833011371502039, "learning_rate": 9.050312290408487e-06, "loss": 0.4327, "step": 7803 }, { "epoch": 0.22, "grad_norm": 6.207015040516319, "learning_rate": 9.050040351174904e-06, "loss": 0.5462, "step": 7804 }, { "epoch": 0.22, "grad_norm": 7.155638522880785, "learning_rate": 9.049768377099322e-06, "loss": 0.666, "step": 7805 }, { "epoch": 0.22, "grad_norm": 6.9332410781787, "learning_rate": 9.049496368184081e-06, "loss": 0.8679, "step": 7806 }, { "epoch": 0.22, "grad_norm": 10.67028189032026, "learning_rate": 9.049224324431522e-06, "loss": 0.9852, "step": 7807 }, { "epoch": 0.22, "grad_norm": 5.510300267894518, "learning_rate": 9.048952245843987e-06, "loss": 0.5606, "step": 7808 }, { "epoch": 0.22, "grad_norm": 9.014305401625757, "learning_rate": 9.048680132423812e-06, "loss": 0.598, "step": 7809 }, { "epoch": 0.22, "grad_norm": 6.382272723193534, "learning_rate": 9.04840798417334e-06, "loss": 0.8623, "step": 7810 }, { "epoch": 0.22, "grad_norm": 9.953074791989732, "learning_rate": 9.048135801094913e-06, "loss": 1.132, "step": 7811 }, { "epoch": 0.22, "grad_norm": 4.409441394157349, "learning_rate": 9.047863583190872e-06, "loss": 0.5141, "step": 7812 }, { "epoch": 0.22, "grad_norm": 8.432761974229296, "learning_rate": 9.04759133046356e-06, "loss": 0.4299, "step": 7813 }, { "epoch": 0.22, "grad_norm": 4.610205339546317, "learning_rate": 9.047319042915317e-06, "loss": 0.5323, "step": 7814 }, { "epoch": 0.22, "grad_norm": 4.589744222657614, "learning_rate": 9.047046720548486e-06, "loss": 0.6308, "step": 7815 }, { "epoch": 0.22, "grad_norm": 10.326785602373102, "learning_rate": 9.046774363365413e-06, "loss": 1.0669, "step": 7816 }, { "epoch": 0.22, "grad_norm": 11.045379666960512, "learning_rate": 9.046501971368436e-06, "loss": 0.6355, "step": 7817 }, { "epoch": 0.22, "grad_norm": 7.340855908811859, "learning_rate": 9.046229544559902e-06, "loss": 0.6579, "step": 7818 }, { "epoch": 0.22, "grad_norm": 6.526375589760603, "learning_rate": 9.045957082942152e-06, "loss": 0.5429, "step": 7819 }, { "epoch": 0.22, "grad_norm": 7.091968934565029, "learning_rate": 9.045684586517534e-06, "loss": 0.6975, "step": 7820 }, { "epoch": 0.22, "grad_norm": 5.983997860969325, "learning_rate": 9.045412055288388e-06, "loss": 0.5559, "step": 7821 }, { "epoch": 0.22, "grad_norm": 10.067412983679569, "learning_rate": 9.04513948925706e-06, "loss": 0.3514, "step": 7822 }, { "epoch": 0.22, "grad_norm": 7.60046256439564, "learning_rate": 9.044866888425895e-06, "loss": 0.4684, "step": 7823 }, { "epoch": 0.22, "grad_norm": 8.004643582683942, "learning_rate": 9.044594252797237e-06, "loss": 0.6686, "step": 7824 }, { "epoch": 0.22, "grad_norm": 5.412868087269056, "learning_rate": 9.044321582373434e-06, "loss": 0.5377, "step": 7825 }, { "epoch": 0.22, "grad_norm": 6.771060795019932, "learning_rate": 9.04404887715683e-06, "loss": 0.5086, "step": 7826 }, { "epoch": 0.22, "grad_norm": 3.326694449714532, "learning_rate": 9.043776137149773e-06, "loss": 0.598, "step": 7827 }, { "epoch": 0.22, "grad_norm": 6.450649534832452, "learning_rate": 9.043503362354605e-06, "loss": 0.7741, "step": 7828 }, { "epoch": 0.22, "grad_norm": 8.825194610092508, "learning_rate": 9.043230552773678e-06, "loss": 0.5439, "step": 7829 }, { "epoch": 0.22, "grad_norm": 10.98407064480162, "learning_rate": 9.042957708409335e-06, "loss": 0.301, "step": 7830 }, { "epoch": 0.22, "grad_norm": 5.507373851867347, "learning_rate": 9.042684829263924e-06, "loss": 0.6309, "step": 7831 }, { "epoch": 0.22, "grad_norm": 3.916936797437772, "learning_rate": 9.042411915339795e-06, "loss": 0.1967, "step": 7832 }, { "epoch": 0.22, "grad_norm": 7.774827075764736, "learning_rate": 9.042138966639293e-06, "loss": 0.7162, "step": 7833 }, { "epoch": 0.22, "grad_norm": 3.600839852675369, "learning_rate": 9.041865983164767e-06, "loss": 0.3891, "step": 7834 }, { "epoch": 0.22, "grad_norm": 7.790735308083891, "learning_rate": 9.041592964918566e-06, "loss": 0.6568, "step": 7835 }, { "epoch": 0.22, "grad_norm": 6.193080273862387, "learning_rate": 9.041319911903038e-06, "loss": 0.6255, "step": 7836 }, { "epoch": 0.22, "grad_norm": 9.226646015974687, "learning_rate": 9.041046824120533e-06, "loss": 0.2184, "step": 7837 }, { "epoch": 0.22, "grad_norm": 2.9279479495396292, "learning_rate": 9.040773701573398e-06, "loss": 0.1079, "step": 7838 }, { "epoch": 0.22, "grad_norm": 7.872439922007825, "learning_rate": 9.040500544263986e-06, "loss": 0.5707, "step": 7839 }, { "epoch": 0.22, "grad_norm": 6.395409805075795, "learning_rate": 9.040227352194645e-06, "loss": 0.2089, "step": 7840 }, { "epoch": 0.22, "grad_norm": 7.631603758264493, "learning_rate": 9.039954125367726e-06, "loss": 0.843, "step": 7841 }, { "epoch": 0.22, "grad_norm": 4.49626476184669, "learning_rate": 9.039680863785578e-06, "loss": 0.7872, "step": 7842 }, { "epoch": 0.22, "grad_norm": 7.707437851587516, "learning_rate": 9.039407567450554e-06, "loss": 0.3943, "step": 7843 }, { "epoch": 0.22, "grad_norm": 5.707053140732743, "learning_rate": 9.039134236365004e-06, "loss": 0.4137, "step": 7844 }, { "epoch": 0.22, "grad_norm": 5.447077679871044, "learning_rate": 9.038860870531279e-06, "loss": 0.4427, "step": 7845 }, { "epoch": 0.22, "grad_norm": 4.2209528505375635, "learning_rate": 9.038587469951729e-06, "loss": 0.5935, "step": 7846 }, { "epoch": 0.22, "grad_norm": 7.346329937912613, "learning_rate": 9.038314034628712e-06, "loss": 0.5239, "step": 7847 }, { "epoch": 0.22, "grad_norm": 6.247158014738254, "learning_rate": 9.038040564564574e-06, "loss": 0.6523, "step": 7848 }, { "epoch": 0.22, "grad_norm": 5.799019470861026, "learning_rate": 9.037767059761673e-06, "loss": 0.3725, "step": 7849 }, { "epoch": 0.22, "grad_norm": 7.2328823189240605, "learning_rate": 9.037493520222356e-06, "loss": 0.418, "step": 7850 }, { "epoch": 0.22, "grad_norm": 6.2152073416338665, "learning_rate": 9.037219945948981e-06, "loss": 0.5554, "step": 7851 }, { "epoch": 0.22, "grad_norm": 3.5830233166271603, "learning_rate": 9.0369463369439e-06, "loss": 0.5809, "step": 7852 }, { "epoch": 0.22, "grad_norm": 11.863826916266147, "learning_rate": 9.036672693209465e-06, "loss": 0.8317, "step": 7853 }, { "epoch": 0.22, "grad_norm": 6.374230132646259, "learning_rate": 9.036399014748034e-06, "loss": 0.7959, "step": 7854 }, { "epoch": 0.22, "grad_norm": 3.2212579826401138, "learning_rate": 9.036125301561957e-06, "loss": 0.4862, "step": 7855 }, { "epoch": 0.22, "grad_norm": 5.945677897670021, "learning_rate": 9.035851553653593e-06, "loss": 1.0324, "step": 7856 }, { "epoch": 0.23, "grad_norm": 8.646993707821549, "learning_rate": 9.035577771025295e-06, "loss": 0.8559, "step": 7857 }, { "epoch": 0.23, "grad_norm": 10.347930155750992, "learning_rate": 9.035303953679418e-06, "loss": 0.736, "step": 7858 }, { "epoch": 0.23, "grad_norm": 4.0356335279218944, "learning_rate": 9.035030101618317e-06, "loss": 0.2573, "step": 7859 }, { "epoch": 0.23, "grad_norm": 3.90256582855871, "learning_rate": 9.034756214844349e-06, "loss": 0.299, "step": 7860 }, { "epoch": 0.23, "grad_norm": 3.3591925283155857, "learning_rate": 9.03448229335987e-06, "loss": 0.2382, "step": 7861 }, { "epoch": 0.23, "grad_norm": 5.482904242429908, "learning_rate": 9.034208337167237e-06, "loss": 0.3622, "step": 7862 }, { "epoch": 0.23, "grad_norm": 7.583282666159363, "learning_rate": 9.033934346268807e-06, "loss": 0.8323, "step": 7863 }, { "epoch": 0.23, "grad_norm": 4.238233434170078, "learning_rate": 9.033660320666935e-06, "loss": 0.3924, "step": 7864 }, { "epoch": 0.23, "grad_norm": 5.710643004040113, "learning_rate": 9.033386260363981e-06, "loss": 0.6534, "step": 7865 }, { "epoch": 0.23, "grad_norm": 9.75300825612002, "learning_rate": 9.033112165362301e-06, "loss": 0.8134, "step": 7866 }, { "epoch": 0.23, "grad_norm": 2.7632744061852006, "learning_rate": 9.032838035664256e-06, "loss": 0.2925, "step": 7867 }, { "epoch": 0.23, "grad_norm": 10.699968616047405, "learning_rate": 9.032563871272197e-06, "loss": 0.6628, "step": 7868 }, { "epoch": 0.23, "grad_norm": 7.689707431361992, "learning_rate": 9.032289672188492e-06, "loss": 0.5836, "step": 7869 }, { "epoch": 0.23, "grad_norm": 5.796628879967245, "learning_rate": 9.032015438415494e-06, "loss": 0.7662, "step": 7870 }, { "epoch": 0.23, "grad_norm": 8.483561355609067, "learning_rate": 9.031741169955564e-06, "loss": 0.7297, "step": 7871 }, { "epoch": 0.23, "grad_norm": 6.43245099033821, "learning_rate": 9.03146686681106e-06, "loss": 0.5708, "step": 7872 }, { "epoch": 0.23, "grad_norm": 5.037799292445861, "learning_rate": 9.031192528984343e-06, "loss": 0.7494, "step": 7873 }, { "epoch": 0.23, "grad_norm": 5.422873633154484, "learning_rate": 9.030918156477773e-06, "loss": 0.6756, "step": 7874 }, { "epoch": 0.23, "grad_norm": 7.128348784924607, "learning_rate": 9.030643749293711e-06, "loss": 0.6316, "step": 7875 }, { "epoch": 0.23, "grad_norm": 6.97403130316266, "learning_rate": 9.030369307434517e-06, "loss": 0.6726, "step": 7876 }, { "epoch": 0.23, "grad_norm": 3.3537656038812065, "learning_rate": 9.030094830902552e-06, "loss": 0.237, "step": 7877 }, { "epoch": 0.23, "grad_norm": 7.905603676413673, "learning_rate": 9.029820319700178e-06, "loss": 0.5393, "step": 7878 }, { "epoch": 0.23, "grad_norm": 3.397622056667518, "learning_rate": 9.029545773829755e-06, "loss": 0.2645, "step": 7879 }, { "epoch": 0.23, "grad_norm": 7.6373601073394255, "learning_rate": 9.029271193293646e-06, "loss": 0.694, "step": 7880 }, { "epoch": 0.23, "grad_norm": 8.933336688748009, "learning_rate": 9.028996578094213e-06, "loss": 0.5781, "step": 7881 }, { "epoch": 0.23, "grad_norm": 3.5989640599751307, "learning_rate": 9.02872192823382e-06, "loss": 0.5043, "step": 7882 }, { "epoch": 0.23, "grad_norm": 6.4462183460274325, "learning_rate": 9.028447243714827e-06, "loss": 0.429, "step": 7883 }, { "epoch": 0.23, "grad_norm": 6.068479911852041, "learning_rate": 9.028172524539597e-06, "loss": 0.5338, "step": 7884 }, { "epoch": 0.23, "grad_norm": 3.388775527644069, "learning_rate": 9.027897770710497e-06, "loss": 0.3635, "step": 7885 }, { "epoch": 0.23, "grad_norm": 3.4952495557346692, "learning_rate": 9.027622982229887e-06, "loss": 0.3419, "step": 7886 }, { "epoch": 0.23, "grad_norm": 4.708780304051996, "learning_rate": 9.027348159100134e-06, "loss": 0.3313, "step": 7887 }, { "epoch": 0.23, "grad_norm": 3.7218447077853245, "learning_rate": 9.027073301323599e-06, "loss": 0.6239, "step": 7888 }, { "epoch": 0.23, "grad_norm": 7.062855770636288, "learning_rate": 9.026798408902649e-06, "loss": 0.6949, "step": 7889 }, { "epoch": 0.23, "grad_norm": 8.040137334950701, "learning_rate": 9.026523481839646e-06, "loss": 1.1323, "step": 7890 }, { "epoch": 0.23, "grad_norm": 3.4294706028369246, "learning_rate": 9.02624852013696e-06, "loss": 0.1602, "step": 7891 }, { "epoch": 0.23, "grad_norm": 9.559043159723444, "learning_rate": 9.025973523796952e-06, "loss": 0.6952, "step": 7892 }, { "epoch": 0.23, "grad_norm": 6.241400219060505, "learning_rate": 9.02569849282199e-06, "loss": 0.4367, "step": 7893 }, { "epoch": 0.23, "grad_norm": 5.918613632870883, "learning_rate": 9.025423427214441e-06, "loss": 0.4524, "step": 7894 }, { "epoch": 0.23, "grad_norm": 7.099485288687365, "learning_rate": 9.025148326976668e-06, "loss": 0.6345, "step": 7895 }, { "epoch": 0.23, "grad_norm": 7.7791428374742955, "learning_rate": 9.024873192111042e-06, "loss": 0.5386, "step": 7896 }, { "epoch": 0.23, "grad_norm": 10.051429013054202, "learning_rate": 9.024598022619923e-06, "loss": 0.3287, "step": 7897 }, { "epoch": 0.23, "grad_norm": 7.123401696813214, "learning_rate": 9.024322818505687e-06, "loss": 0.3549, "step": 7898 }, { "epoch": 0.23, "grad_norm": 5.809633101731416, "learning_rate": 9.024047579770695e-06, "loss": 0.513, "step": 7899 }, { "epoch": 0.23, "grad_norm": 5.636064963383972, "learning_rate": 9.023772306417318e-06, "loss": 0.5834, "step": 7900 }, { "epoch": 0.23, "grad_norm": 5.800717131905741, "learning_rate": 9.023496998447923e-06, "loss": 0.5438, "step": 7901 }, { "epoch": 0.23, "grad_norm": 10.246441479278612, "learning_rate": 9.02322165586488e-06, "loss": 0.9544, "step": 7902 }, { "epoch": 0.23, "grad_norm": 5.678040550330341, "learning_rate": 9.022946278670555e-06, "loss": 0.4608, "step": 7903 }, { "epoch": 0.23, "grad_norm": 6.845987938195228, "learning_rate": 9.022670866867319e-06, "loss": 0.8171, "step": 7904 }, { "epoch": 0.23, "grad_norm": 7.183470716143667, "learning_rate": 9.02239542045754e-06, "loss": 0.6097, "step": 7905 }, { "epoch": 0.23, "grad_norm": 3.8746464783393524, "learning_rate": 9.022119939443588e-06, "loss": 0.1288, "step": 7906 }, { "epoch": 0.23, "grad_norm": 2.6670876607773875, "learning_rate": 9.021844423827835e-06, "loss": 0.1825, "step": 7907 }, { "epoch": 0.23, "grad_norm": 10.073704801468228, "learning_rate": 9.021568873612649e-06, "loss": 0.7277, "step": 7908 }, { "epoch": 0.23, "grad_norm": 9.89534970891966, "learning_rate": 9.0212932888004e-06, "loss": 0.8802, "step": 7909 }, { "epoch": 0.23, "grad_norm": 9.442723881069933, "learning_rate": 9.021017669393462e-06, "loss": 0.7233, "step": 7910 }, { "epoch": 0.23, "grad_norm": 16.462217479416303, "learning_rate": 9.020742015394202e-06, "loss": 0.8874, "step": 7911 }, { "epoch": 0.23, "grad_norm": 7.196227770319284, "learning_rate": 9.020466326804996e-06, "loss": 0.8249, "step": 7912 }, { "epoch": 0.23, "grad_norm": 2.042810816887414, "learning_rate": 9.02019060362821e-06, "loss": 0.1526, "step": 7913 }, { "epoch": 0.23, "grad_norm": 6.239380206419545, "learning_rate": 9.019914845866222e-06, "loss": 0.553, "step": 7914 }, { "epoch": 0.23, "grad_norm": 8.375998024758996, "learning_rate": 9.0196390535214e-06, "loss": 0.8639, "step": 7915 }, { "epoch": 0.23, "grad_norm": 5.472209511928783, "learning_rate": 9.019363226596119e-06, "loss": 0.3052, "step": 7916 }, { "epoch": 0.23, "grad_norm": 5.148627224148531, "learning_rate": 9.019087365092751e-06, "loss": 0.5998, "step": 7917 }, { "epoch": 0.23, "grad_norm": 5.6169885167728975, "learning_rate": 9.018811469013669e-06, "loss": 0.4888, "step": 7918 }, { "epoch": 0.23, "grad_norm": 5.440822024998496, "learning_rate": 9.018535538361245e-06, "loss": 0.8357, "step": 7919 }, { "epoch": 0.23, "grad_norm": 6.221275736805553, "learning_rate": 9.018259573137858e-06, "loss": 0.5991, "step": 7920 }, { "epoch": 0.23, "grad_norm": 5.894908976349594, "learning_rate": 9.017983573345876e-06, "loss": 0.6788, "step": 7921 }, { "epoch": 0.23, "grad_norm": 7.849375254074203, "learning_rate": 9.017707538987677e-06, "loss": 0.7782, "step": 7922 }, { "epoch": 0.23, "grad_norm": 12.334233500304713, "learning_rate": 9.017431470065634e-06, "loss": 0.8715, "step": 7923 }, { "epoch": 0.23, "grad_norm": 7.394737921888148, "learning_rate": 9.017155366582125e-06, "loss": 0.6433, "step": 7924 }, { "epoch": 0.23, "grad_norm": 6.240462273628479, "learning_rate": 9.01687922853952e-06, "loss": 0.6647, "step": 7925 }, { "epoch": 0.23, "grad_norm": 4.248122389549721, "learning_rate": 9.016603055940198e-06, "loss": 0.7125, "step": 7926 }, { "epoch": 0.23, "grad_norm": 6.036503455646467, "learning_rate": 9.016326848786536e-06, "loss": 0.3943, "step": 7927 }, { "epoch": 0.23, "grad_norm": 3.3489284879898045, "learning_rate": 9.016050607080909e-06, "loss": 0.2116, "step": 7928 }, { "epoch": 0.23, "grad_norm": 7.265121688637504, "learning_rate": 9.01577433082569e-06, "loss": 0.3126, "step": 7929 }, { "epoch": 0.23, "grad_norm": 4.330580032297948, "learning_rate": 9.015498020023262e-06, "loss": 0.3492, "step": 7930 }, { "epoch": 0.23, "grad_norm": 5.951599286026562, "learning_rate": 9.015221674675999e-06, "loss": 0.4251, "step": 7931 }, { "epoch": 0.23, "grad_norm": 6.9789270919749935, "learning_rate": 9.014945294786277e-06, "loss": 0.3863, "step": 7932 }, { "epoch": 0.23, "grad_norm": 5.470753199311731, "learning_rate": 9.014668880356475e-06, "loss": 0.8734, "step": 7933 }, { "epoch": 0.23, "grad_norm": 6.477039830341996, "learning_rate": 9.01439243138897e-06, "loss": 0.2923, "step": 7934 }, { "epoch": 0.23, "grad_norm": 3.3733220167655, "learning_rate": 9.014115947886141e-06, "loss": 0.4092, "step": 7935 }, { "epoch": 0.23, "grad_norm": 2.739430185154224, "learning_rate": 9.01383942985037e-06, "loss": 0.4536, "step": 7936 }, { "epoch": 0.23, "grad_norm": 7.925688237772061, "learning_rate": 9.01356287728403e-06, "loss": 0.7823, "step": 7937 }, { "epoch": 0.23, "grad_norm": 3.376446113893053, "learning_rate": 9.013286290189502e-06, "loss": 0.4678, "step": 7938 }, { "epoch": 0.23, "grad_norm": 4.631488578715589, "learning_rate": 9.013009668569168e-06, "loss": 0.3699, "step": 7939 }, { "epoch": 0.23, "grad_norm": 5.015050409506747, "learning_rate": 9.012733012425405e-06, "loss": 0.5514, "step": 7940 }, { "epoch": 0.23, "grad_norm": 5.178597636226993, "learning_rate": 9.012456321760595e-06, "loss": 0.5358, "step": 7941 }, { "epoch": 0.23, "grad_norm": 6.968003100410937, "learning_rate": 9.012179596577117e-06, "loss": 0.5876, "step": 7942 }, { "epoch": 0.23, "grad_norm": 7.450808118987937, "learning_rate": 9.01190283687735e-06, "loss": 0.7748, "step": 7943 }, { "epoch": 0.23, "grad_norm": 5.94055105421187, "learning_rate": 9.011626042663679e-06, "loss": 0.4163, "step": 7944 }, { "epoch": 0.23, "grad_norm": 5.162386854654479, "learning_rate": 9.011349213938483e-06, "loss": 0.2715, "step": 7945 }, { "epoch": 0.23, "grad_norm": 5.221575468959201, "learning_rate": 9.011072350704143e-06, "loss": 0.4097, "step": 7946 }, { "epoch": 0.23, "grad_norm": 4.295758313813728, "learning_rate": 9.010795452963042e-06, "loss": 0.3381, "step": 7947 }, { "epoch": 0.23, "grad_norm": 10.880008815593515, "learning_rate": 9.01051852071756e-06, "loss": 0.5087, "step": 7948 }, { "epoch": 0.23, "grad_norm": 12.812486750898257, "learning_rate": 9.010241553970082e-06, "loss": 0.6483, "step": 7949 }, { "epoch": 0.23, "grad_norm": 5.055742911242869, "learning_rate": 9.00996455272299e-06, "loss": 0.35, "step": 7950 }, { "epoch": 0.23, "grad_norm": 9.35404151430638, "learning_rate": 9.009687516978667e-06, "loss": 0.5093, "step": 7951 }, { "epoch": 0.23, "grad_norm": 4.657820468756287, "learning_rate": 9.009410446739494e-06, "loss": 0.4077, "step": 7952 }, { "epoch": 0.23, "grad_norm": 4.475366056377654, "learning_rate": 9.009133342007859e-06, "loss": 0.4146, "step": 7953 }, { "epoch": 0.23, "grad_norm": 8.840994577513827, "learning_rate": 9.008856202786141e-06, "loss": 0.6194, "step": 7954 }, { "epoch": 0.23, "grad_norm": 9.720134722321525, "learning_rate": 9.008579029076728e-06, "loss": 0.5637, "step": 7955 }, { "epoch": 0.23, "grad_norm": 6.662913394434405, "learning_rate": 9.008301820882004e-06, "loss": 0.4505, "step": 7956 }, { "epoch": 0.23, "grad_norm": 3.4050199054914807, "learning_rate": 9.008024578204353e-06, "loss": 0.4031, "step": 7957 }, { "epoch": 0.23, "grad_norm": 4.959310045399844, "learning_rate": 9.007747301046159e-06, "loss": 0.6823, "step": 7958 }, { "epoch": 0.23, "grad_norm": 3.041563167291715, "learning_rate": 9.007469989409807e-06, "loss": 0.3514, "step": 7959 }, { "epoch": 0.23, "grad_norm": 4.295242554340378, "learning_rate": 9.007192643297687e-06, "loss": 0.5363, "step": 7960 }, { "epoch": 0.23, "grad_norm": 7.753829625133043, "learning_rate": 9.00691526271218e-06, "loss": 0.3022, "step": 7961 }, { "epoch": 0.23, "grad_norm": 6.5598014005976735, "learning_rate": 9.006637847655675e-06, "loss": 0.7487, "step": 7962 }, { "epoch": 0.23, "grad_norm": 7.139773361872563, "learning_rate": 9.006360398130559e-06, "loss": 0.7204, "step": 7963 }, { "epoch": 0.23, "grad_norm": 5.752509979080238, "learning_rate": 9.006082914139216e-06, "loss": 0.4069, "step": 7964 }, { "epoch": 0.23, "grad_norm": 6.411041254463323, "learning_rate": 9.005805395684035e-06, "loss": 0.8783, "step": 7965 }, { "epoch": 0.23, "grad_norm": 8.551124367205777, "learning_rate": 9.005527842767405e-06, "loss": 0.4068, "step": 7966 }, { "epoch": 0.23, "grad_norm": 4.651648891519344, "learning_rate": 9.005250255391711e-06, "loss": 0.4231, "step": 7967 }, { "epoch": 0.23, "grad_norm": 4.44028790970981, "learning_rate": 9.004972633559341e-06, "loss": 0.2926, "step": 7968 }, { "epoch": 0.23, "grad_norm": 8.083383409272914, "learning_rate": 9.004694977272686e-06, "loss": 0.3894, "step": 7969 }, { "epoch": 0.23, "grad_norm": 2.42361663215346, "learning_rate": 9.004417286534134e-06, "loss": 0.3117, "step": 7970 }, { "epoch": 0.23, "grad_norm": 2.0914976901562126, "learning_rate": 9.004139561346072e-06, "loss": 0.1696, "step": 7971 }, { "epoch": 0.23, "grad_norm": 5.527689720082885, "learning_rate": 9.00386180171089e-06, "loss": 0.6219, "step": 7972 }, { "epoch": 0.23, "grad_norm": 9.034588945640362, "learning_rate": 9.003584007630978e-06, "loss": 0.3519, "step": 7973 }, { "epoch": 0.23, "grad_norm": 3.8069853374578604, "learning_rate": 9.003306179108727e-06, "loss": 0.3341, "step": 7974 }, { "epoch": 0.23, "grad_norm": 2.7766875445491928, "learning_rate": 9.003028316146525e-06, "loss": 0.3366, "step": 7975 }, { "epoch": 0.23, "grad_norm": 5.908589883551113, "learning_rate": 9.002750418746765e-06, "loss": 0.3939, "step": 7976 }, { "epoch": 0.23, "grad_norm": 9.180170262405428, "learning_rate": 9.002472486911834e-06, "loss": 1.4996, "step": 7977 }, { "epoch": 0.23, "grad_norm": 5.536173294304277, "learning_rate": 9.002194520644124e-06, "loss": 0.5479, "step": 7978 }, { "epoch": 0.23, "grad_norm": 3.6396355983138178, "learning_rate": 9.001916519946028e-06, "loss": 0.2522, "step": 7979 }, { "epoch": 0.23, "grad_norm": 7.469542904188298, "learning_rate": 9.001638484819939e-06, "loss": 0.5317, "step": 7980 }, { "epoch": 0.23, "grad_norm": 8.64032252301364, "learning_rate": 9.001360415268245e-06, "loss": 0.4976, "step": 7981 }, { "epoch": 0.23, "grad_norm": 5.2166626462017, "learning_rate": 9.00108231129334e-06, "loss": 0.5462, "step": 7982 }, { "epoch": 0.23, "grad_norm": 5.222132894104833, "learning_rate": 9.000804172897617e-06, "loss": 0.3916, "step": 7983 }, { "epoch": 0.23, "grad_norm": 6.402027431864244, "learning_rate": 9.000526000083467e-06, "loss": 1.0025, "step": 7984 }, { "epoch": 0.23, "grad_norm": 8.882371339604306, "learning_rate": 9.000247792853285e-06, "loss": 0.2435, "step": 7985 }, { "epoch": 0.23, "grad_norm": 5.213511801934871, "learning_rate": 8.999969551209464e-06, "loss": 0.6674, "step": 7986 }, { "epoch": 0.23, "grad_norm": 3.4755519126776524, "learning_rate": 8.999691275154399e-06, "loss": 0.2579, "step": 7987 }, { "epoch": 0.23, "grad_norm": 6.646305123287645, "learning_rate": 8.99941296469048e-06, "loss": 0.5752, "step": 7988 }, { "epoch": 0.23, "grad_norm": 7.011523332768235, "learning_rate": 8.999134619820106e-06, "loss": 0.2409, "step": 7989 }, { "epoch": 0.23, "grad_norm": 7.44981480310658, "learning_rate": 8.998856240545669e-06, "loss": 0.4101, "step": 7990 }, { "epoch": 0.23, "grad_norm": 10.226758892367952, "learning_rate": 8.998577826869561e-06, "loss": 0.7897, "step": 7991 }, { "epoch": 0.23, "grad_norm": 7.1179237026831945, "learning_rate": 8.998299378794183e-06, "loss": 0.2413, "step": 7992 }, { "epoch": 0.23, "grad_norm": 6.461635122956049, "learning_rate": 8.998020896321927e-06, "loss": 0.6888, "step": 7993 }, { "epoch": 0.23, "grad_norm": 7.713407142250853, "learning_rate": 8.99774237945519e-06, "loss": 0.7194, "step": 7994 }, { "epoch": 0.23, "grad_norm": 4.720423269124408, "learning_rate": 8.997463828196366e-06, "loss": 0.3005, "step": 7995 }, { "epoch": 0.23, "grad_norm": 5.646838963134484, "learning_rate": 8.997185242547854e-06, "loss": 0.6424, "step": 7996 }, { "epoch": 0.23, "grad_norm": 6.592715069152635, "learning_rate": 8.99690662251205e-06, "loss": 0.3352, "step": 7997 }, { "epoch": 0.23, "grad_norm": 3.1068538048220176, "learning_rate": 8.99662796809135e-06, "loss": 0.2146, "step": 7998 }, { "epoch": 0.23, "grad_norm": 4.952018156841066, "learning_rate": 8.996349279288152e-06, "loss": 0.3927, "step": 7999 }, { "epoch": 0.23, "grad_norm": 7.842534795604268, "learning_rate": 8.996070556104853e-06, "loss": 0.4311, "step": 8000 }, { "epoch": 0.23, "grad_norm": 4.781944399005881, "learning_rate": 8.99579179854385e-06, "loss": 0.6636, "step": 8001 }, { "epoch": 0.23, "grad_norm": 4.888415315779265, "learning_rate": 8.995513006607544e-06, "loss": 0.6492, "step": 8002 }, { "epoch": 0.23, "grad_norm": 5.4120736040071025, "learning_rate": 8.995234180298332e-06, "loss": 0.5761, "step": 8003 }, { "epoch": 0.23, "grad_norm": 5.250039554628593, "learning_rate": 8.994955319618607e-06, "loss": 0.9872, "step": 8004 }, { "epoch": 0.23, "grad_norm": 6.577220066193109, "learning_rate": 8.994676424570779e-06, "loss": 0.5957, "step": 8005 }, { "epoch": 0.23, "grad_norm": 6.123859260364593, "learning_rate": 8.99439749515724e-06, "loss": 0.5335, "step": 8006 }, { "epoch": 0.23, "grad_norm": 9.971478414703727, "learning_rate": 8.99411853138039e-06, "loss": 0.7674, "step": 8007 }, { "epoch": 0.23, "grad_norm": 5.700779011681373, "learning_rate": 8.993839533242631e-06, "loss": 0.6633, "step": 8008 }, { "epoch": 0.23, "grad_norm": 14.545861689331307, "learning_rate": 8.993560500746361e-06, "loss": 1.2111, "step": 8009 }, { "epoch": 0.23, "grad_norm": 3.7110787133904855, "learning_rate": 8.993281433893983e-06, "loss": 0.5697, "step": 8010 }, { "epoch": 0.23, "grad_norm": 6.5263189656212655, "learning_rate": 8.993002332687896e-06, "loss": 0.5094, "step": 8011 }, { "epoch": 0.23, "grad_norm": 5.297791888344795, "learning_rate": 8.992723197130502e-06, "loss": 0.556, "step": 8012 }, { "epoch": 0.23, "grad_norm": 9.932316513843926, "learning_rate": 8.992444027224202e-06, "loss": 0.522, "step": 8013 }, { "epoch": 0.23, "grad_norm": 4.826968255096108, "learning_rate": 8.992164822971397e-06, "loss": 0.7094, "step": 8014 }, { "epoch": 0.23, "grad_norm": 4.049083033706382, "learning_rate": 8.991885584374492e-06, "loss": 0.2792, "step": 8015 }, { "epoch": 0.23, "grad_norm": 4.17868178202101, "learning_rate": 8.991606311435883e-06, "loss": 0.2588, "step": 8016 }, { "epoch": 0.23, "grad_norm": 4.997002752315915, "learning_rate": 8.991327004157978e-06, "loss": 0.5188, "step": 8017 }, { "epoch": 0.23, "grad_norm": 5.057136378734402, "learning_rate": 8.991047662543179e-06, "loss": 0.4971, "step": 8018 }, { "epoch": 0.23, "grad_norm": 7.931446079629017, "learning_rate": 8.990768286593887e-06, "loss": 0.681, "step": 8019 }, { "epoch": 0.23, "grad_norm": 2.1731594047642524, "learning_rate": 8.990488876312506e-06, "loss": 0.2944, "step": 8020 }, { "epoch": 0.23, "grad_norm": 5.486767977678785, "learning_rate": 8.990209431701441e-06, "loss": 0.539, "step": 8021 }, { "epoch": 0.23, "grad_norm": 4.735599447611633, "learning_rate": 8.989929952763099e-06, "loss": 0.4181, "step": 8022 }, { "epoch": 0.23, "grad_norm": 8.03348798035786, "learning_rate": 8.989650439499878e-06, "loss": 0.9352, "step": 8023 }, { "epoch": 0.23, "grad_norm": 9.802510807933263, "learning_rate": 8.989370891914186e-06, "loss": 0.61, "step": 8024 }, { "epoch": 0.23, "grad_norm": 9.87683870604949, "learning_rate": 8.989091310008426e-06, "loss": 1.1546, "step": 8025 }, { "epoch": 0.23, "grad_norm": 8.130519224809092, "learning_rate": 8.988811693785007e-06, "loss": 0.5166, "step": 8026 }, { "epoch": 0.23, "grad_norm": 6.225408909094563, "learning_rate": 8.988532043246329e-06, "loss": 0.4225, "step": 8027 }, { "epoch": 0.23, "grad_norm": 7.596507635300173, "learning_rate": 8.988252358394804e-06, "loss": 0.6847, "step": 8028 }, { "epoch": 0.23, "grad_norm": 7.041069464404941, "learning_rate": 8.987972639232834e-06, "loss": 0.9167, "step": 8029 }, { "epoch": 0.23, "grad_norm": 7.114255868758314, "learning_rate": 8.987692885762826e-06, "loss": 0.3847, "step": 8030 }, { "epoch": 0.23, "grad_norm": 7.994639687020346, "learning_rate": 8.987413097987187e-06, "loss": 0.814, "step": 8031 }, { "epoch": 0.23, "grad_norm": 3.5573413039282116, "learning_rate": 8.987133275908326e-06, "loss": 0.4079, "step": 8032 }, { "epoch": 0.23, "grad_norm": 4.384400540724064, "learning_rate": 8.986853419528644e-06, "loss": 0.8217, "step": 8033 }, { "epoch": 0.23, "grad_norm": 5.920586277180626, "learning_rate": 8.986573528850558e-06, "loss": 0.5702, "step": 8034 }, { "epoch": 0.23, "grad_norm": 5.7209413426097155, "learning_rate": 8.986293603876468e-06, "loss": 0.4545, "step": 8035 }, { "epoch": 0.23, "grad_norm": 7.752210547915441, "learning_rate": 8.986013644608784e-06, "loss": 1.0102, "step": 8036 }, { "epoch": 0.23, "grad_norm": 7.7080581684659455, "learning_rate": 8.985733651049918e-06, "loss": 0.6417, "step": 8037 }, { "epoch": 0.23, "grad_norm": 2.556107152850887, "learning_rate": 8.985453623202274e-06, "loss": 0.1907, "step": 8038 }, { "epoch": 0.23, "grad_norm": 6.02375489819912, "learning_rate": 8.985173561068264e-06, "loss": 0.7728, "step": 8039 }, { "epoch": 0.23, "grad_norm": 6.259079403689448, "learning_rate": 8.984893464650298e-06, "loss": 0.4293, "step": 8040 }, { "epoch": 0.23, "grad_norm": 6.518286401606852, "learning_rate": 8.984613333950782e-06, "loss": 0.5907, "step": 8041 }, { "epoch": 0.23, "grad_norm": 4.046380119553114, "learning_rate": 8.984333168972128e-06, "loss": 0.5812, "step": 8042 }, { "epoch": 0.23, "grad_norm": 5.793614213292582, "learning_rate": 8.984052969716748e-06, "loss": 0.484, "step": 8043 }, { "epoch": 0.23, "grad_norm": 7.7742352713669165, "learning_rate": 8.98377273618705e-06, "loss": 0.8415, "step": 8044 }, { "epoch": 0.23, "grad_norm": 6.15285293651838, "learning_rate": 8.983492468385445e-06, "loss": 0.2141, "step": 8045 }, { "epoch": 0.23, "grad_norm": 4.5421380081341125, "learning_rate": 8.983212166314345e-06, "loss": 0.58, "step": 8046 }, { "epoch": 0.23, "grad_norm": 4.920876562180057, "learning_rate": 8.982931829976163e-06, "loss": 0.4552, "step": 8047 }, { "epoch": 0.23, "grad_norm": 8.841518026091832, "learning_rate": 8.982651459373307e-06, "loss": 0.6014, "step": 8048 }, { "epoch": 0.23, "grad_norm": 8.072954068661257, "learning_rate": 8.98237105450819e-06, "loss": 0.5168, "step": 8049 }, { "epoch": 0.23, "grad_norm": 7.281411107781763, "learning_rate": 8.982090615383227e-06, "loss": 0.2419, "step": 8050 }, { "epoch": 0.23, "grad_norm": 3.5359773978335185, "learning_rate": 8.981810142000828e-06, "loss": 0.5052, "step": 8051 }, { "epoch": 0.23, "grad_norm": 3.2554428987550015, "learning_rate": 8.981529634363406e-06, "loss": 0.1757, "step": 8052 }, { "epoch": 0.23, "grad_norm": 6.176757426506905, "learning_rate": 8.981249092473376e-06, "loss": 0.5488, "step": 8053 }, { "epoch": 0.23, "grad_norm": 5.734564525995908, "learning_rate": 8.98096851633315e-06, "loss": 0.4472, "step": 8054 }, { "epoch": 0.23, "grad_norm": 5.709599329023521, "learning_rate": 8.98068790594514e-06, "loss": 0.4179, "step": 8055 }, { "epoch": 0.23, "grad_norm": 5.626758046395002, "learning_rate": 8.980407261311765e-06, "loss": 0.8137, "step": 8056 }, { "epoch": 0.23, "grad_norm": 7.606651116281986, "learning_rate": 8.980126582435435e-06, "loss": 0.4411, "step": 8057 }, { "epoch": 0.23, "grad_norm": 7.808920261413155, "learning_rate": 8.979845869318567e-06, "loss": 0.3784, "step": 8058 }, { "epoch": 0.23, "grad_norm": 10.420351800745795, "learning_rate": 8.979565121963573e-06, "loss": 0.3485, "step": 8059 }, { "epoch": 0.23, "grad_norm": 4.975035428635088, "learning_rate": 8.979284340372872e-06, "loss": 0.5056, "step": 8060 }, { "epoch": 0.23, "grad_norm": 8.669637937938647, "learning_rate": 8.979003524548878e-06, "loss": 0.3978, "step": 8061 }, { "epoch": 0.23, "grad_norm": 5.650275600091966, "learning_rate": 8.978722674494004e-06, "loss": 0.6377, "step": 8062 }, { "epoch": 0.23, "grad_norm": 10.121625926765285, "learning_rate": 8.978441790210671e-06, "loss": 0.9884, "step": 8063 }, { "epoch": 0.23, "grad_norm": 9.106069131807871, "learning_rate": 8.978160871701293e-06, "loss": 0.8471, "step": 8064 }, { "epoch": 0.23, "grad_norm": 12.854700029004697, "learning_rate": 8.977879918968286e-06, "loss": 0.2986, "step": 8065 }, { "epoch": 0.23, "grad_norm": 5.319207153666249, "learning_rate": 8.977598932014069e-06, "loss": 0.2074, "step": 8066 }, { "epoch": 0.23, "grad_norm": 9.230195650442784, "learning_rate": 8.977317910841057e-06, "loss": 0.898, "step": 8067 }, { "epoch": 0.23, "grad_norm": 9.535977551299432, "learning_rate": 8.977036855451668e-06, "loss": 0.9265, "step": 8068 }, { "epoch": 0.23, "grad_norm": 9.396556536013449, "learning_rate": 8.976755765848324e-06, "loss": 0.6458, "step": 8069 }, { "epoch": 0.23, "grad_norm": 4.308825378555425, "learning_rate": 8.976474642033437e-06, "loss": 0.4713, "step": 8070 }, { "epoch": 0.23, "grad_norm": 6.735780396705554, "learning_rate": 8.976193484009429e-06, "loss": 0.935, "step": 8071 }, { "epoch": 0.23, "grad_norm": 3.16830948621179, "learning_rate": 8.975912291778716e-06, "loss": 0.1788, "step": 8072 }, { "epoch": 0.23, "grad_norm": 7.875846453997396, "learning_rate": 8.975631065343722e-06, "loss": 0.8771, "step": 8073 }, { "epoch": 0.23, "grad_norm": 5.168798816824008, "learning_rate": 8.975349804706863e-06, "loss": 0.8694, "step": 8074 }, { "epoch": 0.23, "grad_norm": 2.383328904023312, "learning_rate": 8.975068509870557e-06, "loss": 0.4575, "step": 8075 }, { "epoch": 0.23, "grad_norm": 6.83937305549762, "learning_rate": 8.974787180837228e-06, "loss": 0.3564, "step": 8076 }, { "epoch": 0.23, "grad_norm": 6.011298588179392, "learning_rate": 8.974505817609293e-06, "loss": 0.5117, "step": 8077 }, { "epoch": 0.23, "grad_norm": 4.206859546157501, "learning_rate": 8.974224420189174e-06, "loss": 0.2523, "step": 8078 }, { "epoch": 0.23, "grad_norm": 6.307420405347958, "learning_rate": 8.973942988579292e-06, "loss": 0.5309, "step": 8079 }, { "epoch": 0.23, "grad_norm": 7.322238664635949, "learning_rate": 8.973661522782069e-06, "loss": 0.3741, "step": 8080 }, { "epoch": 0.23, "grad_norm": 6.930564162614872, "learning_rate": 8.973380022799924e-06, "loss": 0.5691, "step": 8081 }, { "epoch": 0.23, "grad_norm": 5.358303975113355, "learning_rate": 8.973098488635279e-06, "loss": 0.3771, "step": 8082 }, { "epoch": 0.23, "grad_norm": 5.672164447219102, "learning_rate": 8.972816920290559e-06, "loss": 0.5275, "step": 8083 }, { "epoch": 0.23, "grad_norm": 8.669737076342074, "learning_rate": 8.972535317768183e-06, "loss": 0.4975, "step": 8084 }, { "epoch": 0.23, "grad_norm": 5.7458805586672455, "learning_rate": 8.972253681070574e-06, "loss": 0.2542, "step": 8085 }, { "epoch": 0.23, "grad_norm": 9.302992556663602, "learning_rate": 8.971972010200157e-06, "loss": 0.4287, "step": 8086 }, { "epoch": 0.23, "grad_norm": 8.815236633898213, "learning_rate": 8.971690305159352e-06, "loss": 0.4869, "step": 8087 }, { "epoch": 0.23, "grad_norm": 6.034576170860731, "learning_rate": 8.971408565950585e-06, "loss": 0.4273, "step": 8088 }, { "epoch": 0.23, "grad_norm": 7.017167531377716, "learning_rate": 8.971126792576282e-06, "loss": 0.4551, "step": 8089 }, { "epoch": 0.23, "grad_norm": 8.18576479862694, "learning_rate": 8.97084498503886e-06, "loss": 0.7501, "step": 8090 }, { "epoch": 0.23, "grad_norm": 8.059320813662842, "learning_rate": 8.970563143340749e-06, "loss": 0.803, "step": 8091 }, { "epoch": 0.23, "grad_norm": 4.086457812001064, "learning_rate": 8.970281267484373e-06, "loss": 0.3614, "step": 8092 }, { "epoch": 0.23, "grad_norm": 6.210560529293045, "learning_rate": 8.969999357472157e-06, "loss": 0.3108, "step": 8093 }, { "epoch": 0.23, "grad_norm": 4.850907433555557, "learning_rate": 8.969717413306523e-06, "loss": 0.443, "step": 8094 }, { "epoch": 0.23, "grad_norm": 5.837146128316866, "learning_rate": 8.969435434989899e-06, "loss": 0.2899, "step": 8095 }, { "epoch": 0.23, "grad_norm": 4.723148064763358, "learning_rate": 8.969153422524712e-06, "loss": 0.4801, "step": 8096 }, { "epoch": 0.23, "grad_norm": 10.294597599637607, "learning_rate": 8.968871375913387e-06, "loss": 0.3215, "step": 8097 }, { "epoch": 0.23, "grad_norm": 4.62641802520011, "learning_rate": 8.968589295158348e-06, "loss": 0.5651, "step": 8098 }, { "epoch": 0.23, "grad_norm": 4.293475495160671, "learning_rate": 8.968307180262026e-06, "loss": 0.3181, "step": 8099 }, { "epoch": 0.23, "grad_norm": 7.681794134721059, "learning_rate": 8.968025031226845e-06, "loss": 0.6805, "step": 8100 }, { "epoch": 0.23, "grad_norm": 7.627304479342979, "learning_rate": 8.967742848055233e-06, "loss": 1.1252, "step": 8101 }, { "epoch": 0.23, "grad_norm": 5.665624321644527, "learning_rate": 8.967460630749619e-06, "loss": 0.9675, "step": 8102 }, { "epoch": 0.23, "grad_norm": 6.1452109027051955, "learning_rate": 8.967178379312429e-06, "loss": 0.3069, "step": 8103 }, { "epoch": 0.23, "grad_norm": 5.27551929213867, "learning_rate": 8.966896093746091e-06, "loss": 0.1793, "step": 8104 }, { "epoch": 0.23, "grad_norm": 3.46909138142321, "learning_rate": 8.966613774053034e-06, "loss": 0.3492, "step": 8105 }, { "epoch": 0.23, "grad_norm": 5.230659783359704, "learning_rate": 8.96633142023569e-06, "loss": 0.3624, "step": 8106 }, { "epoch": 0.23, "grad_norm": 4.0283718184443975, "learning_rate": 8.966049032296483e-06, "loss": 0.3404, "step": 8107 }, { "epoch": 0.23, "grad_norm": 9.588889465710192, "learning_rate": 8.965766610237845e-06, "loss": 0.8143, "step": 8108 }, { "epoch": 0.23, "grad_norm": 4.162030667259375, "learning_rate": 8.965484154062205e-06, "loss": 0.4322, "step": 8109 }, { "epoch": 0.23, "grad_norm": 3.4954203889555435, "learning_rate": 8.965201663771992e-06, "loss": 0.4968, "step": 8110 }, { "epoch": 0.23, "grad_norm": 2.4768243284237816, "learning_rate": 8.964919139369639e-06, "loss": 0.1497, "step": 8111 }, { "epoch": 0.23, "grad_norm": 5.470849097530124, "learning_rate": 8.964636580857574e-06, "loss": 0.4125, "step": 8112 }, { "epoch": 0.23, "grad_norm": 6.568607358536148, "learning_rate": 8.964353988238228e-06, "loss": 0.4432, "step": 8113 }, { "epoch": 0.23, "grad_norm": 15.145957645429071, "learning_rate": 8.964071361514035e-06, "loss": 0.8852, "step": 8114 }, { "epoch": 0.23, "grad_norm": 5.33277417271698, "learning_rate": 8.963788700687422e-06, "loss": 0.6064, "step": 8115 }, { "epoch": 0.23, "grad_norm": 6.410218921380582, "learning_rate": 8.963506005760823e-06, "loss": 0.6525, "step": 8116 }, { "epoch": 0.23, "grad_norm": 3.7701341848499625, "learning_rate": 8.96322327673667e-06, "loss": 0.4248, "step": 8117 }, { "epoch": 0.23, "grad_norm": 3.812094526147639, "learning_rate": 8.962940513617396e-06, "loss": 0.4628, "step": 8118 }, { "epoch": 0.23, "grad_norm": 7.39281207058411, "learning_rate": 8.962657716405432e-06, "loss": 0.6326, "step": 8119 }, { "epoch": 0.23, "grad_norm": 3.0602487347056972, "learning_rate": 8.96237488510321e-06, "loss": 0.4353, "step": 8120 }, { "epoch": 0.23, "grad_norm": 3.812873822005095, "learning_rate": 8.962092019713169e-06, "loss": 0.3191, "step": 8121 }, { "epoch": 0.23, "grad_norm": 5.891983482695073, "learning_rate": 8.961809120237734e-06, "loss": 0.7405, "step": 8122 }, { "epoch": 0.23, "grad_norm": 4.745832220964354, "learning_rate": 8.961526186679346e-06, "loss": 0.577, "step": 8123 }, { "epoch": 0.23, "grad_norm": 8.508968278268728, "learning_rate": 8.961243219040434e-06, "loss": 0.6214, "step": 8124 }, { "epoch": 0.23, "grad_norm": 6.044990462744472, "learning_rate": 8.960960217323435e-06, "loss": 0.1577, "step": 8125 }, { "epoch": 0.23, "grad_norm": 6.202498743071731, "learning_rate": 8.960677181530783e-06, "loss": 0.5748, "step": 8126 }, { "epoch": 0.23, "grad_norm": 4.239409350200221, "learning_rate": 8.960394111664913e-06, "loss": 0.4027, "step": 8127 }, { "epoch": 0.23, "grad_norm": 8.224493536385916, "learning_rate": 8.960111007728258e-06, "loss": 0.418, "step": 8128 }, { "epoch": 0.23, "grad_norm": 4.161473856273564, "learning_rate": 8.959827869723258e-06, "loss": 0.4662, "step": 8129 }, { "epoch": 0.23, "grad_norm": 8.142255096359927, "learning_rate": 8.959544697652344e-06, "loss": 0.6327, "step": 8130 }, { "epoch": 0.23, "grad_norm": 4.588605088637461, "learning_rate": 8.959261491517958e-06, "loss": 0.7695, "step": 8131 }, { "epoch": 0.23, "grad_norm": 3.147583156938533, "learning_rate": 8.958978251322529e-06, "loss": 0.5046, "step": 8132 }, { "epoch": 0.23, "grad_norm": 10.087446151646398, "learning_rate": 8.9586949770685e-06, "loss": 0.8763, "step": 8133 }, { "epoch": 0.23, "grad_norm": 8.885316628313715, "learning_rate": 8.958411668758306e-06, "loss": 0.7369, "step": 8134 }, { "epoch": 0.23, "grad_norm": 5.025112673696636, "learning_rate": 8.958128326394381e-06, "loss": 0.3572, "step": 8135 }, { "epoch": 0.23, "grad_norm": 5.015025545703637, "learning_rate": 8.957844949979168e-06, "loss": 0.4417, "step": 8136 }, { "epoch": 0.23, "grad_norm": 3.4078908695267995, "learning_rate": 8.957561539515104e-06, "loss": 0.3273, "step": 8137 }, { "epoch": 0.23, "grad_norm": 12.014166456971651, "learning_rate": 8.957278095004621e-06, "loss": 0.6952, "step": 8138 }, { "epoch": 0.23, "grad_norm": 4.311788251440746, "learning_rate": 8.956994616450163e-06, "loss": 0.7183, "step": 8139 }, { "epoch": 0.23, "grad_norm": 6.517190392986745, "learning_rate": 8.956711103854168e-06, "loss": 0.6024, "step": 8140 }, { "epoch": 0.23, "grad_norm": 7.389945033208404, "learning_rate": 8.956427557219074e-06, "loss": 0.9278, "step": 8141 }, { "epoch": 0.23, "grad_norm": 12.449548147803304, "learning_rate": 8.956143976547322e-06, "loss": 0.1998, "step": 8142 }, { "epoch": 0.23, "grad_norm": 8.520380382732633, "learning_rate": 8.95586036184135e-06, "loss": 0.6976, "step": 8143 }, { "epoch": 0.23, "grad_norm": 4.56546566872519, "learning_rate": 8.955576713103598e-06, "loss": 0.8104, "step": 8144 }, { "epoch": 0.23, "grad_norm": 5.4671174146548704, "learning_rate": 8.955293030336507e-06, "loss": 0.6627, "step": 8145 }, { "epoch": 0.23, "grad_norm": 5.4868902975621525, "learning_rate": 8.955009313542518e-06, "loss": 0.8745, "step": 8146 }, { "epoch": 0.23, "grad_norm": 6.045649993381048, "learning_rate": 8.95472556272407e-06, "loss": 0.4236, "step": 8147 }, { "epoch": 0.23, "grad_norm": 5.104270741965153, "learning_rate": 8.954441777883604e-06, "loss": 0.6379, "step": 8148 }, { "epoch": 0.23, "grad_norm": 11.218029455991292, "learning_rate": 8.954157959023561e-06, "loss": 0.2253, "step": 8149 }, { "epoch": 0.23, "grad_norm": 2.6529838452571655, "learning_rate": 8.953874106146387e-06, "loss": 0.2001, "step": 8150 }, { "epoch": 0.23, "grad_norm": 11.303162404009813, "learning_rate": 8.95359021925452e-06, "loss": 0.6524, "step": 8151 }, { "epoch": 0.23, "grad_norm": 5.984995518477953, "learning_rate": 8.953306298350402e-06, "loss": 0.1835, "step": 8152 }, { "epoch": 0.23, "grad_norm": 9.164552329160486, "learning_rate": 8.953022343436477e-06, "loss": 0.3447, "step": 8153 }, { "epoch": 0.23, "grad_norm": 3.1700780128482555, "learning_rate": 8.95273835451519e-06, "loss": 0.3933, "step": 8154 }, { "epoch": 0.23, "grad_norm": 10.480681859031147, "learning_rate": 8.952454331588977e-06, "loss": 0.6004, "step": 8155 }, { "epoch": 0.23, "grad_norm": 5.918241246562584, "learning_rate": 8.95217027466029e-06, "loss": 0.6151, "step": 8156 }, { "epoch": 0.23, "grad_norm": 8.438652638012288, "learning_rate": 8.951886183731566e-06, "loss": 0.7707, "step": 8157 }, { "epoch": 0.23, "grad_norm": 7.02825205998887, "learning_rate": 8.951602058805254e-06, "loss": 0.2017, "step": 8158 }, { "epoch": 0.23, "grad_norm": 11.134749523098339, "learning_rate": 8.951317899883795e-06, "loss": 0.6484, "step": 8159 }, { "epoch": 0.23, "grad_norm": 3.975771602379788, "learning_rate": 8.951033706969634e-06, "loss": 0.4673, "step": 8160 }, { "epoch": 0.23, "grad_norm": 13.111150500854404, "learning_rate": 8.950749480065218e-06, "loss": 0.8849, "step": 8161 }, { "epoch": 0.23, "grad_norm": 8.485855394899332, "learning_rate": 8.95046521917299e-06, "loss": 0.6311, "step": 8162 }, { "epoch": 0.23, "grad_norm": 8.052493013830746, "learning_rate": 8.950180924295394e-06, "loss": 0.6381, "step": 8163 }, { "epoch": 0.23, "grad_norm": 5.055728292250631, "learning_rate": 8.949896595434881e-06, "loss": 0.6021, "step": 8164 }, { "epoch": 0.23, "grad_norm": 9.595840822703632, "learning_rate": 8.949612232593893e-06, "loss": 0.8554, "step": 8165 }, { "epoch": 0.23, "grad_norm": 6.8476556929192585, "learning_rate": 8.949327835774876e-06, "loss": 0.4107, "step": 8166 }, { "epoch": 0.23, "grad_norm": 7.848672241856135, "learning_rate": 8.94904340498028e-06, "loss": 0.7124, "step": 8167 }, { "epoch": 0.23, "grad_norm": 6.198010274570217, "learning_rate": 8.948758940212548e-06, "loss": 0.8125, "step": 8168 }, { "epoch": 0.23, "grad_norm": 5.8651377504502475, "learning_rate": 8.948474441474129e-06, "loss": 0.4352, "step": 8169 }, { "epoch": 0.23, "grad_norm": 4.536937905797882, "learning_rate": 8.948189908767473e-06, "loss": 0.4161, "step": 8170 }, { "epoch": 0.23, "grad_norm": 9.530327524038935, "learning_rate": 8.947905342095023e-06, "loss": 1.0174, "step": 8171 }, { "epoch": 0.23, "grad_norm": 6.537084173103217, "learning_rate": 8.947620741459229e-06, "loss": 0.6227, "step": 8172 }, { "epoch": 0.23, "grad_norm": 6.901262096027537, "learning_rate": 8.94733610686254e-06, "loss": 1.1466, "step": 8173 }, { "epoch": 0.23, "grad_norm": 5.399190538277746, "learning_rate": 8.947051438307406e-06, "loss": 0.4595, "step": 8174 }, { "epoch": 0.23, "grad_norm": 6.201142001342904, "learning_rate": 8.946766735796273e-06, "loss": 0.8478, "step": 8175 }, { "epoch": 0.23, "grad_norm": 2.1770898010455952, "learning_rate": 8.946481999331593e-06, "loss": 0.1133, "step": 8176 }, { "epoch": 0.23, "grad_norm": 6.142277269622015, "learning_rate": 8.946197228915813e-06, "loss": 0.5687, "step": 8177 }, { "epoch": 0.23, "grad_norm": 8.825277817975655, "learning_rate": 8.945912424551387e-06, "loss": 0.4864, "step": 8178 }, { "epoch": 0.23, "grad_norm": 5.103040448975793, "learning_rate": 8.94562758624076e-06, "loss": 0.7629, "step": 8179 }, { "epoch": 0.23, "grad_norm": 7.2157616973656316, "learning_rate": 8.945342713986384e-06, "loss": 0.6705, "step": 8180 }, { "epoch": 0.23, "grad_norm": 24.84249026385571, "learning_rate": 8.94505780779071e-06, "loss": 0.5841, "step": 8181 }, { "epoch": 0.23, "grad_norm": 9.669156532149684, "learning_rate": 8.94477286765619e-06, "loss": 0.8529, "step": 8182 }, { "epoch": 0.23, "grad_norm": 6.4892571879114636, "learning_rate": 8.944487893585277e-06, "loss": 0.7689, "step": 8183 }, { "epoch": 0.23, "grad_norm": 3.5671727504751125, "learning_rate": 8.944202885580418e-06, "loss": 0.3296, "step": 8184 }, { "epoch": 0.23, "grad_norm": 7.257665495107451, "learning_rate": 8.943917843644067e-06, "loss": 0.7584, "step": 8185 }, { "epoch": 0.23, "grad_norm": 7.4296092962888505, "learning_rate": 8.943632767778678e-06, "loss": 0.2805, "step": 8186 }, { "epoch": 0.23, "grad_norm": 4.684324485908531, "learning_rate": 8.943347657986701e-06, "loss": 0.6663, "step": 8187 }, { "epoch": 0.23, "grad_norm": 6.165775037004335, "learning_rate": 8.943062514270589e-06, "loss": 0.8114, "step": 8188 }, { "epoch": 0.23, "grad_norm": 2.1896122679778216, "learning_rate": 8.942777336632795e-06, "loss": 0.1776, "step": 8189 }, { "epoch": 0.23, "grad_norm": 4.075047789919471, "learning_rate": 8.942492125075774e-06, "loss": 0.621, "step": 8190 }, { "epoch": 0.23, "grad_norm": 7.4540356093759526, "learning_rate": 8.94220687960198e-06, "loss": 0.6707, "step": 8191 }, { "epoch": 0.23, "grad_norm": 6.311240684364895, "learning_rate": 8.941921600213864e-06, "loss": 0.4783, "step": 8192 }, { "epoch": 0.23, "grad_norm": 3.938608800596209, "learning_rate": 8.941636286913882e-06, "loss": 0.1805, "step": 8193 }, { "epoch": 0.23, "grad_norm": 3.4343839220064742, "learning_rate": 8.94135093970449e-06, "loss": 0.6513, "step": 8194 }, { "epoch": 0.23, "grad_norm": 4.850058723369486, "learning_rate": 8.94106555858814e-06, "loss": 0.4042, "step": 8195 }, { "epoch": 0.23, "grad_norm": 5.209576521959052, "learning_rate": 8.940780143567286e-06, "loss": 0.7059, "step": 8196 }, { "epoch": 0.23, "grad_norm": 10.589224689981421, "learning_rate": 8.940494694644389e-06, "loss": 0.6517, "step": 8197 }, { "epoch": 0.23, "grad_norm": 8.17759437340976, "learning_rate": 8.940209211821899e-06, "loss": 0.604, "step": 8198 }, { "epoch": 0.23, "grad_norm": 7.433278560721736, "learning_rate": 8.939923695102276e-06, "loss": 0.5779, "step": 8199 }, { "epoch": 0.23, "grad_norm": 7.19355048226914, "learning_rate": 8.939638144487975e-06, "loss": 0.4096, "step": 8200 }, { "epoch": 0.23, "grad_norm": 7.307609030986527, "learning_rate": 8.939352559981448e-06, "loss": 0.341, "step": 8201 }, { "epoch": 0.23, "grad_norm": 8.91760467040091, "learning_rate": 8.939066941585159e-06, "loss": 0.8051, "step": 8202 }, { "epoch": 0.23, "grad_norm": 6.393942608360667, "learning_rate": 8.938781289301563e-06, "loss": 0.432, "step": 8203 }, { "epoch": 0.23, "grad_norm": 8.079444526614598, "learning_rate": 8.938495603133116e-06, "loss": 0.494, "step": 8204 }, { "epoch": 0.23, "grad_norm": 5.142181485502052, "learning_rate": 8.938209883082275e-06, "loss": 0.6351, "step": 8205 }, { "epoch": 0.24, "grad_norm": 3.436412292400226, "learning_rate": 8.9379241291515e-06, "loss": 0.5446, "step": 8206 }, { "epoch": 0.24, "grad_norm": 9.501848693461964, "learning_rate": 8.937638341343249e-06, "loss": 0.745, "step": 8207 }, { "epoch": 0.24, "grad_norm": 4.255372802593962, "learning_rate": 8.937352519659979e-06, "loss": 0.6007, "step": 8208 }, { "epoch": 0.24, "grad_norm": 7.578730419397266, "learning_rate": 8.937066664104151e-06, "loss": 0.2661, "step": 8209 }, { "epoch": 0.24, "grad_norm": 4.683342959551348, "learning_rate": 8.936780774678221e-06, "loss": 0.3827, "step": 8210 }, { "epoch": 0.24, "grad_norm": 8.122590426631154, "learning_rate": 8.936494851384654e-06, "loss": 0.854, "step": 8211 }, { "epoch": 0.24, "grad_norm": 9.605935822649952, "learning_rate": 8.936208894225906e-06, "loss": 0.4596, "step": 8212 }, { "epoch": 0.24, "grad_norm": 5.864295703978333, "learning_rate": 8.935922903204436e-06, "loss": 0.9851, "step": 8213 }, { "epoch": 0.24, "grad_norm": 4.328645365037259, "learning_rate": 8.935636878322707e-06, "loss": 0.3363, "step": 8214 }, { "epoch": 0.24, "grad_norm": 2.4199966283648626, "learning_rate": 8.935350819583177e-06, "loss": 0.2904, "step": 8215 }, { "epoch": 0.24, "grad_norm": 6.556886506434601, "learning_rate": 8.935064726988309e-06, "loss": 0.4688, "step": 8216 }, { "epoch": 0.24, "grad_norm": 6.389899662704601, "learning_rate": 8.934778600540565e-06, "loss": 0.7567, "step": 8217 }, { "epoch": 0.24, "grad_norm": 8.161521874046354, "learning_rate": 8.934492440242403e-06, "loss": 0.7683, "step": 8218 }, { "epoch": 0.24, "grad_norm": 5.489098669248406, "learning_rate": 8.93420624609629e-06, "loss": 0.6843, "step": 8219 }, { "epoch": 0.24, "grad_norm": 6.033966401469072, "learning_rate": 8.933920018104683e-06, "loss": 0.6581, "step": 8220 }, { "epoch": 0.24, "grad_norm": 4.767122277357849, "learning_rate": 8.933633756270045e-06, "loss": 0.6025, "step": 8221 }, { "epoch": 0.24, "grad_norm": 4.181177176852055, "learning_rate": 8.933347460594843e-06, "loss": 0.3539, "step": 8222 }, { "epoch": 0.24, "grad_norm": 5.008809459077269, "learning_rate": 8.933061131081536e-06, "loss": 0.5004, "step": 8223 }, { "epoch": 0.24, "grad_norm": 11.137718645512056, "learning_rate": 8.932774767732589e-06, "loss": 0.7145, "step": 8224 }, { "epoch": 0.24, "grad_norm": 2.7749857167572296, "learning_rate": 8.932488370550463e-06, "loss": 0.0787, "step": 8225 }, { "epoch": 0.24, "grad_norm": 5.36627580431677, "learning_rate": 8.932201939537626e-06, "loss": 0.4697, "step": 8226 }, { "epoch": 0.24, "grad_norm": 5.674440775638265, "learning_rate": 8.931915474696538e-06, "loss": 0.4539, "step": 8227 }, { "epoch": 0.24, "grad_norm": 5.18787028818027, "learning_rate": 8.931628976029668e-06, "loss": 0.6575, "step": 8228 }, { "epoch": 0.24, "grad_norm": 4.748664417223647, "learning_rate": 8.931342443539474e-06, "loss": 0.7187, "step": 8229 }, { "epoch": 0.24, "grad_norm": 8.62053125133993, "learning_rate": 8.931055877228427e-06, "loss": 0.2225, "step": 8230 }, { "epoch": 0.24, "grad_norm": 6.430684231412942, "learning_rate": 8.930769277098991e-06, "loss": 0.2602, "step": 8231 }, { "epoch": 0.24, "grad_norm": 13.032159384779087, "learning_rate": 8.93048264315363e-06, "loss": 0.6671, "step": 8232 }, { "epoch": 0.24, "grad_norm": 3.9210019298685896, "learning_rate": 8.930195975394811e-06, "loss": 0.3056, "step": 8233 }, { "epoch": 0.24, "grad_norm": 3.6107921543028496, "learning_rate": 8.929909273825e-06, "loss": 0.4748, "step": 8234 }, { "epoch": 0.24, "grad_norm": 4.26552729791669, "learning_rate": 8.929622538446663e-06, "loss": 0.5969, "step": 8235 }, { "epoch": 0.24, "grad_norm": 3.6607193651064414, "learning_rate": 8.929335769262267e-06, "loss": 0.5306, "step": 8236 }, { "epoch": 0.24, "grad_norm": 5.1180721071626385, "learning_rate": 8.929048966274281e-06, "loss": 0.718, "step": 8237 }, { "epoch": 0.24, "grad_norm": 4.5992128756086945, "learning_rate": 8.928762129485167e-06, "loss": 0.6468, "step": 8238 }, { "epoch": 0.24, "grad_norm": 4.751852427214123, "learning_rate": 8.928475258897398e-06, "loss": 0.7694, "step": 8239 }, { "epoch": 0.24, "grad_norm": 9.632242821696464, "learning_rate": 8.92818835451344e-06, "loss": 0.4514, "step": 8240 }, { "epoch": 0.24, "grad_norm": 7.234568648908282, "learning_rate": 8.927901416335763e-06, "loss": 0.3689, "step": 8241 }, { "epoch": 0.24, "grad_norm": 5.182596853908906, "learning_rate": 8.927614444366832e-06, "loss": 0.3693, "step": 8242 }, { "epoch": 0.24, "grad_norm": 8.775152512662295, "learning_rate": 8.927327438609117e-06, "loss": 0.6494, "step": 8243 }, { "epoch": 0.24, "grad_norm": 5.08031461019332, "learning_rate": 8.92704039906509e-06, "loss": 0.44, "step": 8244 }, { "epoch": 0.24, "grad_norm": 11.357887328065786, "learning_rate": 8.926753325737215e-06, "loss": 0.4094, "step": 8245 }, { "epoch": 0.24, "grad_norm": 4.152957386454975, "learning_rate": 8.926466218627966e-06, "loss": 0.6536, "step": 8246 }, { "epoch": 0.24, "grad_norm": 5.976671404874803, "learning_rate": 8.926179077739812e-06, "loss": 0.6123, "step": 8247 }, { "epoch": 0.24, "grad_norm": 3.422309429862381, "learning_rate": 8.925891903075221e-06, "loss": 0.2836, "step": 8248 }, { "epoch": 0.24, "grad_norm": 6.023201508605623, "learning_rate": 8.925604694636667e-06, "loss": 0.7271, "step": 8249 }, { "epoch": 0.24, "grad_norm": 4.9633008238678, "learning_rate": 8.925317452426618e-06, "loss": 0.2932, "step": 8250 }, { "epoch": 0.24, "grad_norm": 8.292087013925311, "learning_rate": 8.925030176447548e-06, "loss": 0.8663, "step": 8251 }, { "epoch": 0.24, "grad_norm": 9.70378260449641, "learning_rate": 8.924742866701926e-06, "loss": 0.624, "step": 8252 }, { "epoch": 0.24, "grad_norm": 6.411106631943879, "learning_rate": 8.924455523192224e-06, "loss": 0.2517, "step": 8253 }, { "epoch": 0.24, "grad_norm": 5.238493753627393, "learning_rate": 8.924168145920913e-06, "loss": 0.4144, "step": 8254 }, { "epoch": 0.24, "grad_norm": 4.928371537935241, "learning_rate": 8.923880734890469e-06, "loss": 0.467, "step": 8255 }, { "epoch": 0.24, "grad_norm": 4.782145048649915, "learning_rate": 8.923593290103361e-06, "loss": 0.5695, "step": 8256 }, { "epoch": 0.24, "grad_norm": 6.745357789149795, "learning_rate": 8.923305811562062e-06, "loss": 0.6968, "step": 8257 }, { "epoch": 0.24, "grad_norm": 6.208992323580847, "learning_rate": 8.923018299269048e-06, "loss": 0.5475, "step": 8258 }, { "epoch": 0.24, "grad_norm": 7.985257751279549, "learning_rate": 8.92273075322679e-06, "loss": 0.667, "step": 8259 }, { "epoch": 0.24, "grad_norm": 5.035813767359753, "learning_rate": 8.922443173437762e-06, "loss": 0.3235, "step": 8260 }, { "epoch": 0.24, "grad_norm": 7.964773584705044, "learning_rate": 8.922155559904437e-06, "loss": 0.3876, "step": 8261 }, { "epoch": 0.24, "grad_norm": 5.727164409901252, "learning_rate": 8.921867912629293e-06, "loss": 0.7318, "step": 8262 }, { "epoch": 0.24, "grad_norm": 5.8389031204468, "learning_rate": 8.921580231614801e-06, "loss": 0.6141, "step": 8263 }, { "epoch": 0.24, "grad_norm": 5.136928760021219, "learning_rate": 8.921292516863438e-06, "loss": 0.3373, "step": 8264 }, { "epoch": 0.24, "grad_norm": 12.558929391412951, "learning_rate": 8.921004768377677e-06, "loss": 0.852, "step": 8265 }, { "epoch": 0.24, "grad_norm": 6.881261020689688, "learning_rate": 8.920716986159996e-06, "loss": 0.6724, "step": 8266 }, { "epoch": 0.24, "grad_norm": 6.643876516119033, "learning_rate": 8.920429170212868e-06, "loss": 0.6781, "step": 8267 }, { "epoch": 0.24, "grad_norm": 11.041297724096063, "learning_rate": 8.920141320538772e-06, "loss": 0.4723, "step": 8268 }, { "epoch": 0.24, "grad_norm": 6.362778523762237, "learning_rate": 8.919853437140183e-06, "loss": 0.3155, "step": 8269 }, { "epoch": 0.24, "grad_norm": 6.741553249120256, "learning_rate": 8.919565520019575e-06, "loss": 0.6864, "step": 8270 }, { "epoch": 0.24, "grad_norm": 5.010387502520643, "learning_rate": 8.91927756917943e-06, "loss": 0.5228, "step": 8271 }, { "epoch": 0.24, "grad_norm": 9.552015653070248, "learning_rate": 8.918989584622222e-06, "loss": 0.384, "step": 8272 }, { "epoch": 0.24, "grad_norm": 8.635846159914212, "learning_rate": 8.91870156635043e-06, "loss": 0.8602, "step": 8273 }, { "epoch": 0.24, "grad_norm": 3.519942383457189, "learning_rate": 8.918413514366529e-06, "loss": 0.2547, "step": 8274 }, { "epoch": 0.24, "grad_norm": 5.840354789904501, "learning_rate": 8.918125428672999e-06, "loss": 0.5279, "step": 8275 }, { "epoch": 0.24, "grad_norm": 3.8997363906193456, "learning_rate": 8.91783730927232e-06, "loss": 0.498, "step": 8276 }, { "epoch": 0.24, "grad_norm": 7.459766108244245, "learning_rate": 8.917549156166968e-06, "loss": 0.4952, "step": 8277 }, { "epoch": 0.24, "grad_norm": 6.861758398067222, "learning_rate": 8.917260969359422e-06, "loss": 0.7537, "step": 8278 }, { "epoch": 0.24, "grad_norm": 7.571251841433564, "learning_rate": 8.916972748852164e-06, "loss": 0.9288, "step": 8279 }, { "epoch": 0.24, "grad_norm": 5.988445939140344, "learning_rate": 8.916684494647671e-06, "loss": 0.6287, "step": 8280 }, { "epoch": 0.24, "grad_norm": 6.225705881197508, "learning_rate": 8.916396206748424e-06, "loss": 0.8518, "step": 8281 }, { "epoch": 0.24, "grad_norm": 5.1123249275805165, "learning_rate": 8.916107885156902e-06, "loss": 0.4848, "step": 8282 }, { "epoch": 0.24, "grad_norm": 8.206645914769942, "learning_rate": 8.915819529875588e-06, "loss": 0.6604, "step": 8283 }, { "epoch": 0.24, "grad_norm": 3.7499372635997714, "learning_rate": 8.915531140906957e-06, "loss": 0.1939, "step": 8284 }, { "epoch": 0.24, "grad_norm": 2.5239045977464554, "learning_rate": 8.915242718253496e-06, "loss": 0.1886, "step": 8285 }, { "epoch": 0.24, "grad_norm": 10.579762837721038, "learning_rate": 8.914954261917682e-06, "loss": 0.0971, "step": 8286 }, { "epoch": 0.24, "grad_norm": 5.231570047412175, "learning_rate": 8.914665771902003e-06, "loss": 0.7208, "step": 8287 }, { "epoch": 0.24, "grad_norm": 5.883903533961663, "learning_rate": 8.914377248208932e-06, "loss": 0.5946, "step": 8288 }, { "epoch": 0.24, "grad_norm": 5.372596646979944, "learning_rate": 8.914088690840958e-06, "loss": 0.47, "step": 8289 }, { "epoch": 0.24, "grad_norm": 4.499980608580452, "learning_rate": 8.913800099800559e-06, "loss": 0.2571, "step": 8290 }, { "epoch": 0.24, "grad_norm": 4.953642135242788, "learning_rate": 8.913511475090221e-06, "loss": 0.7095, "step": 8291 }, { "epoch": 0.24, "grad_norm": 6.203699657044532, "learning_rate": 8.913222816712425e-06, "loss": 0.7822, "step": 8292 }, { "epoch": 0.24, "grad_norm": 4.497878581255831, "learning_rate": 8.912934124669654e-06, "loss": 0.4921, "step": 8293 }, { "epoch": 0.24, "grad_norm": 3.253612711146381, "learning_rate": 8.912645398964395e-06, "loss": 0.2913, "step": 8294 }, { "epoch": 0.24, "grad_norm": 10.032506846186273, "learning_rate": 8.912356639599126e-06, "loss": 0.9025, "step": 8295 }, { "epoch": 0.24, "grad_norm": 3.6772479739986563, "learning_rate": 8.912067846576338e-06, "loss": 0.4267, "step": 8296 }, { "epoch": 0.24, "grad_norm": 4.330675440959858, "learning_rate": 8.91177901989851e-06, "loss": 0.6487, "step": 8297 }, { "epoch": 0.24, "grad_norm": 4.508725026026875, "learning_rate": 8.91149015956813e-06, "loss": 0.3906, "step": 8298 }, { "epoch": 0.24, "grad_norm": 5.350916847207416, "learning_rate": 8.91120126558768e-06, "loss": 0.4737, "step": 8299 }, { "epoch": 0.24, "grad_norm": 6.956071070917284, "learning_rate": 8.910912337959647e-06, "loss": 0.5715, "step": 8300 }, { "epoch": 0.24, "grad_norm": 7.666139404852601, "learning_rate": 8.910623376686518e-06, "loss": 0.7831, "step": 8301 }, { "epoch": 0.24, "grad_norm": 6.393684233236798, "learning_rate": 8.910334381770778e-06, "loss": 0.662, "step": 8302 }, { "epoch": 0.24, "grad_norm": 9.156859914626507, "learning_rate": 8.910045353214912e-06, "loss": 0.8535, "step": 8303 }, { "epoch": 0.24, "grad_norm": 7.320245471757397, "learning_rate": 8.909756291021407e-06, "loss": 0.8612, "step": 8304 }, { "epoch": 0.24, "grad_norm": 7.665235532686563, "learning_rate": 8.90946719519275e-06, "loss": 0.41, "step": 8305 }, { "epoch": 0.24, "grad_norm": 5.203581368992004, "learning_rate": 8.909178065731428e-06, "loss": 0.355, "step": 8306 }, { "epoch": 0.24, "grad_norm": 4.943364439154977, "learning_rate": 8.90888890263993e-06, "loss": 0.4236, "step": 8307 }, { "epoch": 0.24, "grad_norm": 6.192536471724958, "learning_rate": 8.90859970592074e-06, "loss": 0.7422, "step": 8308 }, { "epoch": 0.24, "grad_norm": 8.166141387693644, "learning_rate": 8.90831047557635e-06, "loss": 0.2144, "step": 8309 }, { "epoch": 0.24, "grad_norm": 4.159437090541401, "learning_rate": 8.908021211609243e-06, "loss": 0.4617, "step": 8310 }, { "epoch": 0.24, "grad_norm": 4.941075346350034, "learning_rate": 8.907731914021913e-06, "loss": 0.3351, "step": 8311 }, { "epoch": 0.24, "grad_norm": 3.3933199179695817, "learning_rate": 8.907442582816846e-06, "loss": 0.3675, "step": 8312 }, { "epoch": 0.24, "grad_norm": 5.824533619035301, "learning_rate": 8.907153217996531e-06, "loss": 0.376, "step": 8313 }, { "epoch": 0.24, "grad_norm": 6.324143049365494, "learning_rate": 8.906863819563459e-06, "loss": 0.3524, "step": 8314 }, { "epoch": 0.24, "grad_norm": 4.186010935766824, "learning_rate": 8.906574387520119e-06, "loss": 0.4028, "step": 8315 }, { "epoch": 0.24, "grad_norm": 5.675257323224601, "learning_rate": 8.906284921869e-06, "loss": 0.4148, "step": 8316 }, { "epoch": 0.24, "grad_norm": 6.248733964484727, "learning_rate": 8.905995422612592e-06, "loss": 0.5644, "step": 8317 }, { "epoch": 0.24, "grad_norm": 9.456699118125435, "learning_rate": 8.905705889753386e-06, "loss": 0.7638, "step": 8318 }, { "epoch": 0.24, "grad_norm": 6.073994780210702, "learning_rate": 8.905416323293873e-06, "loss": 0.579, "step": 8319 }, { "epoch": 0.24, "grad_norm": 2.8883011627129944, "learning_rate": 8.905126723236546e-06, "loss": 0.3395, "step": 8320 }, { "epoch": 0.24, "grad_norm": 4.130821022234413, "learning_rate": 8.904837089583893e-06, "loss": 0.4727, "step": 8321 }, { "epoch": 0.24, "grad_norm": 8.858348645699138, "learning_rate": 8.90454742233841e-06, "loss": 0.7708, "step": 8322 }, { "epoch": 0.24, "grad_norm": 7.117886589546643, "learning_rate": 8.904257721502583e-06, "loss": 0.696, "step": 8323 }, { "epoch": 0.24, "grad_norm": 7.882428071976014, "learning_rate": 8.90396798707891e-06, "loss": 0.9381, "step": 8324 }, { "epoch": 0.24, "grad_norm": 10.379404259199028, "learning_rate": 8.903678219069878e-06, "loss": 0.7925, "step": 8325 }, { "epoch": 0.24, "grad_norm": 3.016261018874415, "learning_rate": 8.903388417477985e-06, "loss": 0.3313, "step": 8326 }, { "epoch": 0.24, "grad_norm": 7.540206458707971, "learning_rate": 8.903098582305723e-06, "loss": 0.9211, "step": 8327 }, { "epoch": 0.24, "grad_norm": 5.037875013378274, "learning_rate": 8.902808713555584e-06, "loss": 0.5579, "step": 8328 }, { "epoch": 0.24, "grad_norm": 9.62491102301355, "learning_rate": 8.902518811230061e-06, "loss": 0.7629, "step": 8329 }, { "epoch": 0.24, "grad_norm": 7.2858778363524905, "learning_rate": 8.902228875331648e-06, "loss": 0.7367, "step": 8330 }, { "epoch": 0.24, "grad_norm": 3.187168178864462, "learning_rate": 8.901938905862844e-06, "loss": 0.2609, "step": 8331 }, { "epoch": 0.24, "grad_norm": 5.372456325745785, "learning_rate": 8.901648902826136e-06, "loss": 0.6167, "step": 8332 }, { "epoch": 0.24, "grad_norm": 5.713743342477295, "learning_rate": 8.901358866224026e-06, "loss": 0.7414, "step": 8333 }, { "epoch": 0.24, "grad_norm": 3.922641268946385, "learning_rate": 8.901068796059005e-06, "loss": 0.3076, "step": 8334 }, { "epoch": 0.24, "grad_norm": 7.136070685566865, "learning_rate": 8.90077869233357e-06, "loss": 0.7457, "step": 8335 }, { "epoch": 0.24, "grad_norm": 5.125105042660416, "learning_rate": 8.900488555050215e-06, "loss": 0.8281, "step": 8336 }, { "epoch": 0.24, "grad_norm": 9.098142683784127, "learning_rate": 8.900198384211437e-06, "loss": 0.891, "step": 8337 }, { "epoch": 0.24, "grad_norm": 7.1815055064670466, "learning_rate": 8.899908179819734e-06, "loss": 0.5315, "step": 8338 }, { "epoch": 0.24, "grad_norm": 7.480044838470052, "learning_rate": 8.899617941877602e-06, "loss": 0.7013, "step": 8339 }, { "epoch": 0.24, "grad_norm": 4.9999337668800425, "learning_rate": 8.899327670387535e-06, "loss": 0.2302, "step": 8340 }, { "epoch": 0.24, "grad_norm": 8.060587160764515, "learning_rate": 8.899037365352031e-06, "loss": 0.5334, "step": 8341 }, { "epoch": 0.24, "grad_norm": 5.591655061864205, "learning_rate": 8.89874702677359e-06, "loss": 0.6039, "step": 8342 }, { "epoch": 0.24, "grad_norm": 4.121961948729992, "learning_rate": 8.898456654654708e-06, "loss": 0.4585, "step": 8343 }, { "epoch": 0.24, "grad_norm": 16.513184741576904, "learning_rate": 8.898166248997884e-06, "loss": 0.5955, "step": 8344 }, { "epoch": 0.24, "grad_norm": 6.929165068913144, "learning_rate": 8.897875809805615e-06, "loss": 0.6323, "step": 8345 }, { "epoch": 0.24, "grad_norm": 9.605183600637373, "learning_rate": 8.897585337080402e-06, "loss": 0.8502, "step": 8346 }, { "epoch": 0.24, "grad_norm": 4.677687636362925, "learning_rate": 8.89729483082474e-06, "loss": 0.5594, "step": 8347 }, { "epoch": 0.24, "grad_norm": 4.163152140718032, "learning_rate": 8.897004291041133e-06, "loss": 0.3415, "step": 8348 }, { "epoch": 0.24, "grad_norm": 6.43706762843262, "learning_rate": 8.896713717732077e-06, "loss": 0.3349, "step": 8349 }, { "epoch": 0.24, "grad_norm": 4.988232999970066, "learning_rate": 8.89642311090007e-06, "loss": 0.4894, "step": 8350 }, { "epoch": 0.24, "grad_norm": 9.15799792794685, "learning_rate": 8.896132470547617e-06, "loss": 0.4318, "step": 8351 }, { "epoch": 0.24, "grad_norm": 4.672394076188164, "learning_rate": 8.895841796677217e-06, "loss": 0.4943, "step": 8352 }, { "epoch": 0.24, "grad_norm": 4.662104849139966, "learning_rate": 8.895551089291368e-06, "loss": 0.3326, "step": 8353 }, { "epoch": 0.24, "grad_norm": 6.505240015503022, "learning_rate": 8.895260348392574e-06, "loss": 0.5322, "step": 8354 }, { "epoch": 0.24, "grad_norm": 6.262559021692579, "learning_rate": 8.894969573983334e-06, "loss": 0.5426, "step": 8355 }, { "epoch": 0.24, "grad_norm": 7.603402082769876, "learning_rate": 8.89467876606615e-06, "loss": 0.5106, "step": 8356 }, { "epoch": 0.24, "grad_norm": 9.165822048000539, "learning_rate": 8.894387924643524e-06, "loss": 0.6349, "step": 8357 }, { "epoch": 0.24, "grad_norm": 5.531012147433871, "learning_rate": 8.89409704971796e-06, "loss": 0.6603, "step": 8358 }, { "epoch": 0.24, "grad_norm": 8.042856757564, "learning_rate": 8.893806141291956e-06, "loss": 0.5756, "step": 8359 }, { "epoch": 0.24, "grad_norm": 7.728383959462542, "learning_rate": 8.893515199368018e-06, "loss": 0.8443, "step": 8360 }, { "epoch": 0.24, "grad_norm": 7.09920624607715, "learning_rate": 8.893224223948649e-06, "loss": 0.4896, "step": 8361 }, { "epoch": 0.24, "grad_norm": 3.9582138930253046, "learning_rate": 8.89293321503635e-06, "loss": 0.3882, "step": 8362 }, { "epoch": 0.24, "grad_norm": 11.096441667712066, "learning_rate": 8.892642172633628e-06, "loss": 1.1632, "step": 8363 }, { "epoch": 0.24, "grad_norm": 8.005389484315755, "learning_rate": 8.892351096742983e-06, "loss": 0.8719, "step": 8364 }, { "epoch": 0.24, "grad_norm": 7.179189770602885, "learning_rate": 8.892059987366922e-06, "loss": 1.4143, "step": 8365 }, { "epoch": 0.24, "grad_norm": 3.319911501736183, "learning_rate": 8.891768844507946e-06, "loss": 0.226, "step": 8366 }, { "epoch": 0.24, "grad_norm": 8.494386951403602, "learning_rate": 8.891477668168564e-06, "loss": 0.8776, "step": 8367 }, { "epoch": 0.24, "grad_norm": 4.123682736544608, "learning_rate": 8.891186458351277e-06, "loss": 0.6436, "step": 8368 }, { "epoch": 0.24, "grad_norm": 5.761087384303319, "learning_rate": 8.890895215058592e-06, "loss": 0.2903, "step": 8369 }, { "epoch": 0.24, "grad_norm": 7.397830346077538, "learning_rate": 8.890603938293016e-06, "loss": 0.3983, "step": 8370 }, { "epoch": 0.24, "grad_norm": 4.9010387857113145, "learning_rate": 8.890312628057053e-06, "loss": 0.669, "step": 8371 }, { "epoch": 0.24, "grad_norm": 7.008896759766298, "learning_rate": 8.890021284353208e-06, "loss": 0.625, "step": 8372 }, { "epoch": 0.24, "grad_norm": 4.922162341252822, "learning_rate": 8.88972990718399e-06, "loss": 0.5536, "step": 8373 }, { "epoch": 0.24, "grad_norm": 9.2645530659586, "learning_rate": 8.889438496551905e-06, "loss": 0.3881, "step": 8374 }, { "epoch": 0.24, "grad_norm": 6.125046340611378, "learning_rate": 8.889147052459459e-06, "loss": 0.409, "step": 8375 }, { "epoch": 0.24, "grad_norm": 4.398465795070235, "learning_rate": 8.88885557490916e-06, "loss": 0.5068, "step": 8376 }, { "epoch": 0.24, "grad_norm": 10.414683364252449, "learning_rate": 8.888564063903516e-06, "loss": 0.6235, "step": 8377 }, { "epoch": 0.24, "grad_norm": 4.991104366662721, "learning_rate": 8.888272519445031e-06, "loss": 0.3762, "step": 8378 }, { "epoch": 0.24, "grad_norm": 5.678608662019663, "learning_rate": 8.887980941536219e-06, "loss": 0.7516, "step": 8379 }, { "epoch": 0.24, "grad_norm": 4.340498221669592, "learning_rate": 8.887689330179585e-06, "loss": 0.5925, "step": 8380 }, { "epoch": 0.24, "grad_norm": 5.270679707857869, "learning_rate": 8.887397685377638e-06, "loss": 0.3741, "step": 8381 }, { "epoch": 0.24, "grad_norm": 6.731113360903346, "learning_rate": 8.887106007132888e-06, "loss": 0.2869, "step": 8382 }, { "epoch": 0.24, "grad_norm": 10.269625832001333, "learning_rate": 8.886814295447842e-06, "loss": 0.9516, "step": 8383 }, { "epoch": 0.24, "grad_norm": 7.374985840347404, "learning_rate": 8.886522550325014e-06, "loss": 1.1542, "step": 8384 }, { "epoch": 0.24, "grad_norm": 14.94299266496361, "learning_rate": 8.886230771766911e-06, "loss": 0.7617, "step": 8385 }, { "epoch": 0.24, "grad_norm": 6.207205864170912, "learning_rate": 8.885938959776041e-06, "loss": 0.8228, "step": 8386 }, { "epoch": 0.24, "grad_norm": 3.7439102316878556, "learning_rate": 8.885647114354917e-06, "loss": 0.3815, "step": 8387 }, { "epoch": 0.24, "grad_norm": 5.016422386600935, "learning_rate": 8.885355235506049e-06, "loss": 0.2346, "step": 8388 }, { "epoch": 0.24, "grad_norm": 6.861865171659467, "learning_rate": 8.885063323231948e-06, "loss": 0.3578, "step": 8389 }, { "epoch": 0.24, "grad_norm": 5.9057450255228625, "learning_rate": 8.884771377535128e-06, "loss": 0.4678, "step": 8390 }, { "epoch": 0.24, "grad_norm": 6.202586037693687, "learning_rate": 8.884479398418095e-06, "loss": 0.6346, "step": 8391 }, { "epoch": 0.24, "grad_norm": 3.6866615441328703, "learning_rate": 8.884187385883367e-06, "loss": 0.5699, "step": 8392 }, { "epoch": 0.24, "grad_norm": 7.320339955997987, "learning_rate": 8.883895339933452e-06, "loss": 0.8232, "step": 8393 }, { "epoch": 0.24, "grad_norm": 3.948451419469152, "learning_rate": 8.883603260570862e-06, "loss": 0.4567, "step": 8394 }, { "epoch": 0.24, "grad_norm": 8.014617975641208, "learning_rate": 8.883311147798113e-06, "loss": 0.7319, "step": 8395 }, { "epoch": 0.24, "grad_norm": 3.486895278404006, "learning_rate": 8.883019001617716e-06, "loss": 0.4089, "step": 8396 }, { "epoch": 0.24, "grad_norm": 6.809857022021084, "learning_rate": 8.882726822032185e-06, "loss": 0.8182, "step": 8397 }, { "epoch": 0.24, "grad_norm": 8.2508697340311, "learning_rate": 8.882434609044033e-06, "loss": 0.8186, "step": 8398 }, { "epoch": 0.24, "grad_norm": 8.371300349887983, "learning_rate": 8.882142362655773e-06, "loss": 0.7946, "step": 8399 }, { "epoch": 0.24, "grad_norm": 5.007926451625903, "learning_rate": 8.881850082869921e-06, "loss": 0.3615, "step": 8400 }, { "epoch": 0.24, "grad_norm": 6.664867500081313, "learning_rate": 8.88155776968899e-06, "loss": 1.0553, "step": 8401 }, { "epoch": 0.24, "grad_norm": 7.262232556564644, "learning_rate": 8.881265423115497e-06, "loss": 0.6747, "step": 8402 }, { "epoch": 0.24, "grad_norm": 5.397315387616524, "learning_rate": 8.880973043151954e-06, "loss": 0.4466, "step": 8403 }, { "epoch": 0.24, "grad_norm": 4.343421868753744, "learning_rate": 8.88068062980088e-06, "loss": 0.2807, "step": 8404 }, { "epoch": 0.24, "grad_norm": 9.451704539617372, "learning_rate": 8.880388183064785e-06, "loss": 0.2782, "step": 8405 }, { "epoch": 0.24, "grad_norm": 6.565317511859168, "learning_rate": 8.880095702946191e-06, "loss": 0.2352, "step": 8406 }, { "epoch": 0.24, "grad_norm": 3.1029238847942535, "learning_rate": 8.87980318944761e-06, "loss": 0.3966, "step": 8407 }, { "epoch": 0.24, "grad_norm": 6.451215780249097, "learning_rate": 8.87951064257156e-06, "loss": 0.6251, "step": 8408 }, { "epoch": 0.24, "grad_norm": 9.03652802512845, "learning_rate": 8.879218062320558e-06, "loss": 0.4252, "step": 8409 }, { "epoch": 0.24, "grad_norm": 5.201788378343437, "learning_rate": 8.87892544869712e-06, "loss": 0.695, "step": 8410 }, { "epoch": 0.24, "grad_norm": 4.708455559598483, "learning_rate": 8.878632801703765e-06, "loss": 0.4687, "step": 8411 }, { "epoch": 0.24, "grad_norm": 7.219445232254161, "learning_rate": 8.878340121343008e-06, "loss": 0.8672, "step": 8412 }, { "epoch": 0.24, "grad_norm": 2.440672313998417, "learning_rate": 8.878047407617371e-06, "loss": 0.1787, "step": 8413 }, { "epoch": 0.24, "grad_norm": 5.238953663155978, "learning_rate": 8.877754660529368e-06, "loss": 0.599, "step": 8414 }, { "epoch": 0.24, "grad_norm": 6.568017765251137, "learning_rate": 8.877461880081518e-06, "loss": 0.482, "step": 8415 }, { "epoch": 0.24, "grad_norm": 7.065053056381363, "learning_rate": 8.877169066276343e-06, "loss": 0.5793, "step": 8416 }, { "epoch": 0.24, "grad_norm": 8.275836427487189, "learning_rate": 8.876876219116359e-06, "loss": 0.4951, "step": 8417 }, { "epoch": 0.24, "grad_norm": 5.3562397586185995, "learning_rate": 8.876583338604086e-06, "loss": 0.6787, "step": 8418 }, { "epoch": 0.24, "grad_norm": 7.313509912118097, "learning_rate": 8.876290424742042e-06, "loss": 0.8012, "step": 8419 }, { "epoch": 0.24, "grad_norm": 7.5164099142654255, "learning_rate": 8.875997477532752e-06, "loss": 0.6346, "step": 8420 }, { "epoch": 0.24, "grad_norm": 6.73019707962258, "learning_rate": 8.875704496978733e-06, "loss": 0.2969, "step": 8421 }, { "epoch": 0.24, "grad_norm": 8.85001431803434, "learning_rate": 8.875411483082504e-06, "loss": 1.0332, "step": 8422 }, { "epoch": 0.24, "grad_norm": 2.438916247306617, "learning_rate": 8.875118435846587e-06, "loss": 0.2215, "step": 8423 }, { "epoch": 0.24, "grad_norm": 5.950829811894664, "learning_rate": 8.874825355273503e-06, "loss": 0.1809, "step": 8424 }, { "epoch": 0.24, "grad_norm": 5.442084823984767, "learning_rate": 8.874532241365772e-06, "loss": 0.4308, "step": 8425 }, { "epoch": 0.24, "grad_norm": 9.966760610267407, "learning_rate": 8.87423909412592e-06, "loss": 0.7381, "step": 8426 }, { "epoch": 0.24, "grad_norm": 7.221310677427808, "learning_rate": 8.873945913556465e-06, "loss": 0.242, "step": 8427 }, { "epoch": 0.24, "grad_norm": 2.427483125975682, "learning_rate": 8.873652699659931e-06, "loss": 0.1557, "step": 8428 }, { "epoch": 0.24, "grad_norm": 3.8013541092770393, "learning_rate": 8.873359452438839e-06, "loss": 0.3178, "step": 8429 }, { "epoch": 0.24, "grad_norm": 5.200739591528224, "learning_rate": 8.873066171895713e-06, "loss": 0.6578, "step": 8430 }, { "epoch": 0.24, "grad_norm": 6.830624190665627, "learning_rate": 8.872772858033074e-06, "loss": 0.4969, "step": 8431 }, { "epoch": 0.24, "grad_norm": 6.837904362690606, "learning_rate": 8.872479510853449e-06, "loss": 0.9817, "step": 8432 }, { "epoch": 0.24, "grad_norm": 5.75619848603135, "learning_rate": 8.87218613035936e-06, "loss": 0.5031, "step": 8433 }, { "epoch": 0.24, "grad_norm": 4.675625268665637, "learning_rate": 8.871892716553328e-06, "loss": 0.3688, "step": 8434 }, { "epoch": 0.24, "grad_norm": 4.319953503446784, "learning_rate": 8.871599269437881e-06, "loss": 0.479, "step": 8435 }, { "epoch": 0.24, "grad_norm": 6.040993010573882, "learning_rate": 8.871305789015542e-06, "loss": 0.4023, "step": 8436 }, { "epoch": 0.24, "grad_norm": 6.6578100104641775, "learning_rate": 8.871012275288837e-06, "loss": 0.4715, "step": 8437 }, { "epoch": 0.24, "grad_norm": 2.2618236879767073, "learning_rate": 8.870718728260289e-06, "loss": 0.1496, "step": 8438 }, { "epoch": 0.24, "grad_norm": 4.758625681370277, "learning_rate": 8.870425147932425e-06, "loss": 0.6814, "step": 8439 }, { "epoch": 0.24, "grad_norm": 9.936674743550101, "learning_rate": 8.870131534307768e-06, "loss": 0.797, "step": 8440 }, { "epoch": 0.24, "grad_norm": 4.625336003341428, "learning_rate": 8.869837887388848e-06, "loss": 0.2779, "step": 8441 }, { "epoch": 0.24, "grad_norm": 4.4149094975830545, "learning_rate": 8.869544207178188e-06, "loss": 0.3429, "step": 8442 }, { "epoch": 0.24, "grad_norm": 15.430304761684138, "learning_rate": 8.869250493678317e-06, "loss": 0.6251, "step": 8443 }, { "epoch": 0.24, "grad_norm": 8.496693192305028, "learning_rate": 8.868956746891758e-06, "loss": 0.8328, "step": 8444 }, { "epoch": 0.24, "grad_norm": 7.863511015353538, "learning_rate": 8.868662966821043e-06, "loss": 0.5728, "step": 8445 }, { "epoch": 0.24, "grad_norm": 12.848661871083753, "learning_rate": 8.868369153468694e-06, "loss": 0.5619, "step": 8446 }, { "epoch": 0.24, "grad_norm": 4.269698046386666, "learning_rate": 8.868075306837245e-06, "loss": 0.4859, "step": 8447 }, { "epoch": 0.24, "grad_norm": 4.8074165843247325, "learning_rate": 8.867781426929218e-06, "loss": 0.4334, "step": 8448 }, { "epoch": 0.24, "grad_norm": 4.392274322791559, "learning_rate": 8.867487513747143e-06, "loss": 0.2619, "step": 8449 }, { "epoch": 0.24, "grad_norm": 7.565852848546997, "learning_rate": 8.867193567293552e-06, "loss": 0.535, "step": 8450 }, { "epoch": 0.24, "grad_norm": 10.559353015399228, "learning_rate": 8.866899587570969e-06, "loss": 0.8416, "step": 8451 }, { "epoch": 0.24, "grad_norm": 4.022503820432908, "learning_rate": 8.866605574581924e-06, "loss": 0.3411, "step": 8452 }, { "epoch": 0.24, "grad_norm": 4.737274767471159, "learning_rate": 8.866311528328949e-06, "loss": 0.3902, "step": 8453 }, { "epoch": 0.24, "grad_norm": 8.273075430838919, "learning_rate": 8.866017448814572e-06, "loss": 0.7763, "step": 8454 }, { "epoch": 0.24, "grad_norm": 3.381903969071885, "learning_rate": 8.865723336041323e-06, "loss": 0.307, "step": 8455 }, { "epoch": 0.24, "grad_norm": 7.8435019647660615, "learning_rate": 8.865429190011729e-06, "loss": 0.4223, "step": 8456 }, { "epoch": 0.24, "grad_norm": 8.453303477999716, "learning_rate": 8.865135010728328e-06, "loss": 0.5642, "step": 8457 }, { "epoch": 0.24, "grad_norm": 3.929463799178452, "learning_rate": 8.864840798193644e-06, "loss": 0.5143, "step": 8458 }, { "epoch": 0.24, "grad_norm": 5.882555546917788, "learning_rate": 8.864546552410211e-06, "loss": 0.4864, "step": 8459 }, { "epoch": 0.24, "grad_norm": 3.985540271975274, "learning_rate": 8.86425227338056e-06, "loss": 0.3511, "step": 8460 }, { "epoch": 0.24, "grad_norm": 4.225234536991497, "learning_rate": 8.863957961107222e-06, "loss": 0.3943, "step": 8461 }, { "epoch": 0.24, "grad_norm": 7.463905966245101, "learning_rate": 8.86366361559273e-06, "loss": 0.4314, "step": 8462 }, { "epoch": 0.24, "grad_norm": 2.5610345045876195, "learning_rate": 8.863369236839613e-06, "loss": 0.0919, "step": 8463 }, { "epoch": 0.24, "grad_norm": 5.281738777791878, "learning_rate": 8.86307482485041e-06, "loss": 0.3686, "step": 8464 }, { "epoch": 0.24, "grad_norm": 3.4541773508701965, "learning_rate": 8.862780379627647e-06, "loss": 0.2208, "step": 8465 }, { "epoch": 0.24, "grad_norm": 5.585010456973594, "learning_rate": 8.862485901173861e-06, "loss": 0.4324, "step": 8466 }, { "epoch": 0.24, "grad_norm": 6.707169030917721, "learning_rate": 8.862191389491585e-06, "loss": 0.4598, "step": 8467 }, { "epoch": 0.24, "grad_norm": 6.50204589630891, "learning_rate": 8.861896844583351e-06, "loss": 0.3315, "step": 8468 }, { "epoch": 0.24, "grad_norm": 4.310053864162396, "learning_rate": 8.861602266451695e-06, "loss": 0.6165, "step": 8469 }, { "epoch": 0.24, "grad_norm": 8.624576779361867, "learning_rate": 8.86130765509915e-06, "loss": 0.646, "step": 8470 }, { "epoch": 0.24, "grad_norm": 5.476970358121491, "learning_rate": 8.86101301052825e-06, "loss": 0.4584, "step": 8471 }, { "epoch": 0.24, "grad_norm": 7.616241490233948, "learning_rate": 8.860718332741532e-06, "loss": 0.4846, "step": 8472 }, { "epoch": 0.24, "grad_norm": 7.469926557882036, "learning_rate": 8.860423621741528e-06, "loss": 0.7086, "step": 8473 }, { "epoch": 0.24, "grad_norm": 5.550810543361524, "learning_rate": 8.860128877530775e-06, "loss": 0.3649, "step": 8474 }, { "epoch": 0.24, "grad_norm": 9.902340282913594, "learning_rate": 8.859834100111807e-06, "loss": 0.5395, "step": 8475 }, { "epoch": 0.24, "grad_norm": 6.090660720649986, "learning_rate": 8.859539289487164e-06, "loss": 0.7471, "step": 8476 }, { "epoch": 0.24, "grad_norm": 5.015869467413365, "learning_rate": 8.85924444565938e-06, "loss": 0.5713, "step": 8477 }, { "epoch": 0.24, "grad_norm": 5.946335031648402, "learning_rate": 8.85894956863099e-06, "loss": 0.2638, "step": 8478 }, { "epoch": 0.24, "grad_norm": 18.645589316137073, "learning_rate": 8.858654658404532e-06, "loss": 0.6449, "step": 8479 }, { "epoch": 0.24, "grad_norm": 4.572592090161132, "learning_rate": 8.858359714982543e-06, "loss": 0.7997, "step": 8480 }, { "epoch": 0.24, "grad_norm": 5.888522392640443, "learning_rate": 8.85806473836756e-06, "loss": 0.5777, "step": 8481 }, { "epoch": 0.24, "grad_norm": 4.608830513557399, "learning_rate": 8.857769728562122e-06, "loss": 0.4267, "step": 8482 }, { "epoch": 0.24, "grad_norm": 6.695749168444656, "learning_rate": 8.857474685568766e-06, "loss": 0.7886, "step": 8483 }, { "epoch": 0.24, "grad_norm": 7.393915133595166, "learning_rate": 8.85717960939003e-06, "loss": 0.6458, "step": 8484 }, { "epoch": 0.24, "grad_norm": 5.793640468183433, "learning_rate": 8.856884500028453e-06, "loss": 0.6239, "step": 8485 }, { "epoch": 0.24, "grad_norm": 6.0867917755294, "learning_rate": 8.856589357486572e-06, "loss": 0.7869, "step": 8486 }, { "epoch": 0.24, "grad_norm": 6.265730478821088, "learning_rate": 8.856294181766928e-06, "loss": 1.0026, "step": 8487 }, { "epoch": 0.24, "grad_norm": 8.763843185284136, "learning_rate": 8.855998972872062e-06, "loss": 0.5479, "step": 8488 }, { "epoch": 0.24, "grad_norm": 5.727045306868973, "learning_rate": 8.85570373080451e-06, "loss": 0.4141, "step": 8489 }, { "epoch": 0.24, "grad_norm": 4.302643887974845, "learning_rate": 8.855408455566815e-06, "loss": 0.7477, "step": 8490 }, { "epoch": 0.24, "grad_norm": 6.4211258277226415, "learning_rate": 8.855113147161514e-06, "loss": 0.5998, "step": 8491 }, { "epoch": 0.24, "grad_norm": 5.879212553339518, "learning_rate": 8.854817805591149e-06, "loss": 1.2223, "step": 8492 }, { "epoch": 0.24, "grad_norm": 5.426317840693405, "learning_rate": 8.85452243085826e-06, "loss": 0.2531, "step": 8493 }, { "epoch": 0.24, "grad_norm": 9.384308529776249, "learning_rate": 8.854227022965392e-06, "loss": 0.3528, "step": 8494 }, { "epoch": 0.24, "grad_norm": 7.565918709286225, "learning_rate": 8.853931581915082e-06, "loss": 0.6355, "step": 8495 }, { "epoch": 0.24, "grad_norm": 3.2380370633558817, "learning_rate": 8.853636107709875e-06, "loss": 0.4348, "step": 8496 }, { "epoch": 0.24, "grad_norm": 7.93489776968528, "learning_rate": 8.85334060035231e-06, "loss": 0.5769, "step": 8497 }, { "epoch": 0.24, "grad_norm": 7.949092929603534, "learning_rate": 8.853045059844929e-06, "loss": 0.6551, "step": 8498 }, { "epoch": 0.24, "grad_norm": 4.6471158240380825, "learning_rate": 8.852749486190276e-06, "loss": 0.5309, "step": 8499 }, { "epoch": 0.24, "grad_norm": 6.9828124207867734, "learning_rate": 8.852453879390893e-06, "loss": 0.5558, "step": 8500 }, { "epoch": 0.24, "grad_norm": 3.8344118460056174, "learning_rate": 8.852158239449326e-06, "loss": 0.5562, "step": 8501 }, { "epoch": 0.24, "grad_norm": 9.088974003314256, "learning_rate": 8.851862566368114e-06, "loss": 0.3999, "step": 8502 }, { "epoch": 0.24, "grad_norm": 4.576706389838022, "learning_rate": 8.851566860149803e-06, "loss": 0.214, "step": 8503 }, { "epoch": 0.24, "grad_norm": 11.134691281983006, "learning_rate": 8.851271120796937e-06, "loss": 0.7656, "step": 8504 }, { "epoch": 0.24, "grad_norm": 8.091257072267664, "learning_rate": 8.85097534831206e-06, "loss": 0.6942, "step": 8505 }, { "epoch": 0.24, "grad_norm": 7.41254630629178, "learning_rate": 8.850679542697715e-06, "loss": 0.2877, "step": 8506 }, { "epoch": 0.24, "grad_norm": 7.9301459419511655, "learning_rate": 8.850383703956449e-06, "loss": 0.4766, "step": 8507 }, { "epoch": 0.24, "grad_norm": 5.996229974149847, "learning_rate": 8.850087832090806e-06, "loss": 0.6122, "step": 8508 }, { "epoch": 0.24, "grad_norm": 6.275528156804858, "learning_rate": 8.849791927103333e-06, "loss": 0.3015, "step": 8509 }, { "epoch": 0.24, "grad_norm": 4.92604977800725, "learning_rate": 8.84949598899657e-06, "loss": 0.8271, "step": 8510 }, { "epoch": 0.24, "grad_norm": 7.63896963211697, "learning_rate": 8.84920001777307e-06, "loss": 0.6712, "step": 8511 }, { "epoch": 0.24, "grad_norm": 4.997802155955724, "learning_rate": 8.848904013435376e-06, "loss": 0.898, "step": 8512 }, { "epoch": 0.24, "grad_norm": 4.989718356392778, "learning_rate": 8.848607975986034e-06, "loss": 0.2986, "step": 8513 }, { "epoch": 0.24, "grad_norm": 5.644579655528821, "learning_rate": 8.848311905427591e-06, "loss": 0.4397, "step": 8514 }, { "epoch": 0.24, "grad_norm": 8.254655738089271, "learning_rate": 8.848015801762595e-06, "loss": 0.482, "step": 8515 }, { "epoch": 0.24, "grad_norm": 3.7353610249318465, "learning_rate": 8.847719664993592e-06, "loss": 0.372, "step": 8516 }, { "epoch": 0.24, "grad_norm": 9.647735644307371, "learning_rate": 8.847423495123132e-06, "loss": 0.7371, "step": 8517 }, { "epoch": 0.24, "grad_norm": 2.2526990675734915, "learning_rate": 8.847127292153762e-06, "loss": 0.1504, "step": 8518 }, { "epoch": 0.24, "grad_norm": 4.489163171825721, "learning_rate": 8.846831056088028e-06, "loss": 0.6449, "step": 8519 }, { "epoch": 0.24, "grad_norm": 8.447469853925694, "learning_rate": 8.846534786928479e-06, "loss": 0.2969, "step": 8520 }, { "epoch": 0.24, "grad_norm": 5.504533936315461, "learning_rate": 8.846238484677667e-06, "loss": 0.6239, "step": 8521 }, { "epoch": 0.24, "grad_norm": 8.760155315938341, "learning_rate": 8.845942149338139e-06, "loss": 0.3198, "step": 8522 }, { "epoch": 0.24, "grad_norm": 6.35705850386031, "learning_rate": 8.845645780912443e-06, "loss": 0.4675, "step": 8523 }, { "epoch": 0.24, "grad_norm": 4.946801466163362, "learning_rate": 8.845349379403129e-06, "loss": 0.6929, "step": 8524 }, { "epoch": 0.24, "grad_norm": 7.446307248953201, "learning_rate": 8.845052944812748e-06, "loss": 0.4458, "step": 8525 }, { "epoch": 0.24, "grad_norm": 8.19880926046519, "learning_rate": 8.84475647714385e-06, "loss": 0.4967, "step": 8526 }, { "epoch": 0.24, "grad_norm": 4.548283886322114, "learning_rate": 8.844459976398988e-06, "loss": 0.4276, "step": 8527 }, { "epoch": 0.24, "grad_norm": 5.3833934099528005, "learning_rate": 8.844163442580706e-06, "loss": 0.4759, "step": 8528 }, { "epoch": 0.24, "grad_norm": 6.5271820833650604, "learning_rate": 8.843866875691562e-06, "loss": 0.879, "step": 8529 }, { "epoch": 0.24, "grad_norm": 5.9566024675985165, "learning_rate": 8.843570275734104e-06, "loss": 0.4535, "step": 8530 }, { "epoch": 0.24, "grad_norm": 4.979734957624171, "learning_rate": 8.843273642710886e-06, "loss": 0.4527, "step": 8531 }, { "epoch": 0.24, "grad_norm": 7.240849475237479, "learning_rate": 8.842976976624456e-06, "loss": 1.019, "step": 8532 }, { "epoch": 0.24, "grad_norm": 7.252090744574389, "learning_rate": 8.842680277477368e-06, "loss": 0.3534, "step": 8533 }, { "epoch": 0.24, "grad_norm": 7.7972139884936364, "learning_rate": 8.842383545272175e-06, "loss": 0.3652, "step": 8534 }, { "epoch": 0.24, "grad_norm": 5.807487048866654, "learning_rate": 8.84208678001143e-06, "loss": 0.4346, "step": 8535 }, { "epoch": 0.24, "grad_norm": 12.495921728506893, "learning_rate": 8.841789981697685e-06, "loss": 0.6077, "step": 8536 }, { "epoch": 0.24, "grad_norm": 3.4707176195313187, "learning_rate": 8.841493150333494e-06, "loss": 0.5863, "step": 8537 }, { "epoch": 0.24, "grad_norm": 6.4110123959321665, "learning_rate": 8.84119628592141e-06, "loss": 0.7509, "step": 8538 }, { "epoch": 0.24, "grad_norm": 6.824283313500395, "learning_rate": 8.840899388463988e-06, "loss": 0.4221, "step": 8539 }, { "epoch": 0.24, "grad_norm": 4.752246476042807, "learning_rate": 8.84060245796378e-06, "loss": 0.6636, "step": 8540 }, { "epoch": 0.24, "grad_norm": 10.649092432313562, "learning_rate": 8.840305494423344e-06, "loss": 0.5977, "step": 8541 }, { "epoch": 0.24, "grad_norm": 4.253720394169375, "learning_rate": 8.840008497845231e-06, "loss": 0.1959, "step": 8542 }, { "epoch": 0.24, "grad_norm": 5.872067307941399, "learning_rate": 8.839711468231998e-06, "loss": 0.6364, "step": 8543 }, { "epoch": 0.24, "grad_norm": 8.093004288067394, "learning_rate": 8.839414405586201e-06, "loss": 0.4015, "step": 8544 }, { "epoch": 0.24, "grad_norm": 4.372604749852357, "learning_rate": 8.839117309910395e-06, "loss": 0.6314, "step": 8545 }, { "epoch": 0.24, "grad_norm": 6.6733441845821195, "learning_rate": 8.838820181207133e-06, "loss": 0.9465, "step": 8546 }, { "epoch": 0.24, "grad_norm": 9.045917730466309, "learning_rate": 8.838523019478976e-06, "loss": 0.4791, "step": 8547 }, { "epoch": 0.24, "grad_norm": 7.660347352083026, "learning_rate": 8.838225824728477e-06, "loss": 0.5445, "step": 8548 }, { "epoch": 0.24, "grad_norm": 12.312763191934389, "learning_rate": 8.837928596958193e-06, "loss": 0.8156, "step": 8549 }, { "epoch": 0.24, "grad_norm": 5.920638909079435, "learning_rate": 8.837631336170681e-06, "loss": 0.3722, "step": 8550 }, { "epoch": 0.24, "grad_norm": 5.0215387864431, "learning_rate": 8.837334042368502e-06, "loss": 0.4117, "step": 8551 }, { "epoch": 0.24, "grad_norm": 8.899353033832087, "learning_rate": 8.83703671555421e-06, "loss": 0.666, "step": 8552 }, { "epoch": 0.24, "grad_norm": 4.955208613895742, "learning_rate": 8.836739355730364e-06, "loss": 0.7377, "step": 8553 }, { "epoch": 0.24, "grad_norm": 4.144849112471225, "learning_rate": 8.83644196289952e-06, "loss": 0.4459, "step": 8554 }, { "epoch": 0.24, "grad_norm": 5.971717176842156, "learning_rate": 8.836144537064239e-06, "loss": 0.1327, "step": 8555 }, { "epoch": 0.25, "grad_norm": 5.133495267980855, "learning_rate": 8.835847078227079e-06, "loss": 0.4962, "step": 8556 }, { "epoch": 0.25, "grad_norm": 4.8816235857245935, "learning_rate": 8.835549586390599e-06, "loss": 0.5713, "step": 8557 }, { "epoch": 0.25, "grad_norm": 2.495052126280695, "learning_rate": 8.835252061557357e-06, "loss": 0.3194, "step": 8558 }, { "epoch": 0.25, "grad_norm": 3.532749094134676, "learning_rate": 8.834954503729916e-06, "loss": 0.3773, "step": 8559 }, { "epoch": 0.25, "grad_norm": 3.7723311861868103, "learning_rate": 8.834656912910831e-06, "loss": 0.3301, "step": 8560 }, { "epoch": 0.25, "grad_norm": 3.8789864000719776, "learning_rate": 8.834359289102666e-06, "loss": 0.7877, "step": 8561 }, { "epoch": 0.25, "grad_norm": 3.56875098741187, "learning_rate": 8.83406163230798e-06, "loss": 0.675, "step": 8562 }, { "epoch": 0.25, "grad_norm": 5.509960172478102, "learning_rate": 8.833763942529334e-06, "loss": 0.5014, "step": 8563 }, { "epoch": 0.25, "grad_norm": 9.38648893191667, "learning_rate": 8.83346621976929e-06, "loss": 0.6617, "step": 8564 }, { "epoch": 0.25, "grad_norm": 6.453378815543208, "learning_rate": 8.833168464030405e-06, "loss": 0.6082, "step": 8565 }, { "epoch": 0.25, "grad_norm": 9.20323898685246, "learning_rate": 8.832870675315246e-06, "loss": 0.554, "step": 8566 }, { "epoch": 0.25, "grad_norm": 3.791762878259276, "learning_rate": 8.832572853626371e-06, "loss": 0.4264, "step": 8567 }, { "epoch": 0.25, "grad_norm": 5.244189543824443, "learning_rate": 8.832274998966345e-06, "loss": 0.5052, "step": 8568 }, { "epoch": 0.25, "grad_norm": 6.4193332960534395, "learning_rate": 8.831977111337729e-06, "loss": 0.4836, "step": 8569 }, { "epoch": 0.25, "grad_norm": 3.538714907589074, "learning_rate": 8.831679190743085e-06, "loss": 0.2181, "step": 8570 }, { "epoch": 0.25, "grad_norm": 4.887795406692377, "learning_rate": 8.831381237184978e-06, "loss": 0.2644, "step": 8571 }, { "epoch": 0.25, "grad_norm": 9.253681429720098, "learning_rate": 8.831083250665968e-06, "loss": 1.0529, "step": 8572 }, { "epoch": 0.25, "grad_norm": 5.168412170071308, "learning_rate": 8.830785231188621e-06, "loss": 0.2192, "step": 8573 }, { "epoch": 0.25, "grad_norm": 3.18800922140944, "learning_rate": 8.8304871787555e-06, "loss": 0.3058, "step": 8574 }, { "epoch": 0.25, "grad_norm": 6.182830358580199, "learning_rate": 8.83018909336917e-06, "loss": 0.6783, "step": 8575 }, { "epoch": 0.25, "grad_norm": 7.866368422538996, "learning_rate": 8.829890975032196e-06, "loss": 0.9987, "step": 8576 }, { "epoch": 0.25, "grad_norm": 9.934634581297694, "learning_rate": 8.82959282374714e-06, "loss": 0.5453, "step": 8577 }, { "epoch": 0.25, "grad_norm": 8.772727224212517, "learning_rate": 8.829294639516569e-06, "loss": 0.4418, "step": 8578 }, { "epoch": 0.25, "grad_norm": 6.040601842592138, "learning_rate": 8.828996422343049e-06, "loss": 0.4329, "step": 8579 }, { "epoch": 0.25, "grad_norm": 5.8561154670692295, "learning_rate": 8.82869817222914e-06, "loss": 0.5699, "step": 8580 }, { "epoch": 0.25, "grad_norm": 5.784659457721697, "learning_rate": 8.828399889177418e-06, "loss": 0.4823, "step": 8581 }, { "epoch": 0.25, "grad_norm": 6.0272526514730345, "learning_rate": 8.828101573190437e-06, "loss": 0.6482, "step": 8582 }, { "epoch": 0.25, "grad_norm": 3.417943708055586, "learning_rate": 8.827803224270774e-06, "loss": 0.3522, "step": 8583 }, { "epoch": 0.25, "grad_norm": 6.779258048722006, "learning_rate": 8.827504842420989e-06, "loss": 0.6918, "step": 8584 }, { "epoch": 0.25, "grad_norm": 5.884957903871061, "learning_rate": 8.827206427643652e-06, "loss": 0.6897, "step": 8585 }, { "epoch": 0.25, "grad_norm": 6.5753951399973545, "learning_rate": 8.826907979941328e-06, "loss": 0.616, "step": 8586 }, { "epoch": 0.25, "grad_norm": 8.673892004838237, "learning_rate": 8.826609499316587e-06, "loss": 0.6243, "step": 8587 }, { "epoch": 0.25, "grad_norm": 7.4249029467883325, "learning_rate": 8.826310985771995e-06, "loss": 0.5309, "step": 8588 }, { "epoch": 0.25, "grad_norm": 6.090405646846994, "learning_rate": 8.826012439310122e-06, "loss": 0.6186, "step": 8589 }, { "epoch": 0.25, "grad_norm": 6.34591630299915, "learning_rate": 8.825713859933535e-06, "loss": 0.5746, "step": 8590 }, { "epoch": 0.25, "grad_norm": 5.326333334825248, "learning_rate": 8.825415247644801e-06, "loss": 0.6729, "step": 8591 }, { "epoch": 0.25, "grad_norm": 4.526865923255038, "learning_rate": 8.825116602446492e-06, "loss": 0.547, "step": 8592 }, { "epoch": 0.25, "grad_norm": 6.423677623049315, "learning_rate": 8.824817924341176e-06, "loss": 0.3949, "step": 8593 }, { "epoch": 0.25, "grad_norm": 3.741672424674471, "learning_rate": 8.824519213331421e-06, "loss": 0.7057, "step": 8594 }, { "epoch": 0.25, "grad_norm": 6.965717767890946, "learning_rate": 8.824220469419799e-06, "loss": 0.425, "step": 8595 }, { "epoch": 0.25, "grad_norm": 4.816847460014559, "learning_rate": 8.82392169260888e-06, "loss": 0.6052, "step": 8596 }, { "epoch": 0.25, "grad_norm": 5.285361117546089, "learning_rate": 8.82362288290123e-06, "loss": 0.4305, "step": 8597 }, { "epoch": 0.25, "grad_norm": 3.913482166142913, "learning_rate": 8.823324040299427e-06, "loss": 0.3554, "step": 8598 }, { "epoch": 0.25, "grad_norm": 5.948458547231776, "learning_rate": 8.823025164806037e-06, "loss": 0.4257, "step": 8599 }, { "epoch": 0.25, "grad_norm": 15.942595740586095, "learning_rate": 8.82272625642363e-06, "loss": 1.1375, "step": 8600 }, { "epoch": 0.25, "grad_norm": 4.66938654225582, "learning_rate": 8.822427315154782e-06, "loss": 0.3855, "step": 8601 }, { "epoch": 0.25, "grad_norm": 9.951451159330693, "learning_rate": 8.822128341002063e-06, "loss": 0.4356, "step": 8602 }, { "epoch": 0.25, "grad_norm": 14.446216577654798, "learning_rate": 8.821829333968042e-06, "loss": 0.6373, "step": 8603 }, { "epoch": 0.25, "grad_norm": 9.472109057165923, "learning_rate": 8.821530294055296e-06, "loss": 0.6422, "step": 8604 }, { "epoch": 0.25, "grad_norm": 13.330372449818457, "learning_rate": 8.821231221266394e-06, "loss": 0.3477, "step": 8605 }, { "epoch": 0.25, "grad_norm": 5.609448445723908, "learning_rate": 8.82093211560391e-06, "loss": 0.5071, "step": 8606 }, { "epoch": 0.25, "grad_norm": 3.6361165434159926, "learning_rate": 8.82063297707042e-06, "loss": 0.281, "step": 8607 }, { "epoch": 0.25, "grad_norm": 3.6741694971669094, "learning_rate": 8.820333805668493e-06, "loss": 0.239, "step": 8608 }, { "epoch": 0.25, "grad_norm": 7.966791129104328, "learning_rate": 8.820034601400704e-06, "loss": 0.4108, "step": 8609 }, { "epoch": 0.25, "grad_norm": 3.8212475090779927, "learning_rate": 8.81973536426963e-06, "loss": 0.2988, "step": 8610 }, { "epoch": 0.25, "grad_norm": 7.106261659476747, "learning_rate": 8.819436094277841e-06, "loss": 0.4865, "step": 8611 }, { "epoch": 0.25, "grad_norm": 4.601215653875531, "learning_rate": 8.819136791427914e-06, "loss": 0.7259, "step": 8612 }, { "epoch": 0.25, "grad_norm": 3.0318729114544936, "learning_rate": 8.818837455722422e-06, "loss": 0.4506, "step": 8613 }, { "epoch": 0.25, "grad_norm": 4.122419388158146, "learning_rate": 8.818538087163943e-06, "loss": 0.4039, "step": 8614 }, { "epoch": 0.25, "grad_norm": 5.930113444151828, "learning_rate": 8.818238685755052e-06, "loss": 0.6485, "step": 8615 }, { "epoch": 0.25, "grad_norm": 5.519046770283681, "learning_rate": 8.817939251498321e-06, "loss": 0.5483, "step": 8616 }, { "epoch": 0.25, "grad_norm": 4.787761816529986, "learning_rate": 8.817639784396333e-06, "loss": 0.3604, "step": 8617 }, { "epoch": 0.25, "grad_norm": 4.663519107212185, "learning_rate": 8.817340284451657e-06, "loss": 0.2924, "step": 8618 }, { "epoch": 0.25, "grad_norm": 6.664797671910779, "learning_rate": 8.817040751666871e-06, "loss": 0.5286, "step": 8619 }, { "epoch": 0.25, "grad_norm": 5.717772728637339, "learning_rate": 8.816741186044556e-06, "loss": 0.4533, "step": 8620 }, { "epoch": 0.25, "grad_norm": 5.7421674870771575, "learning_rate": 8.816441587587285e-06, "loss": 0.2923, "step": 8621 }, { "epoch": 0.25, "grad_norm": 4.955870580401215, "learning_rate": 8.816141956297638e-06, "loss": 0.6002, "step": 8622 }, { "epoch": 0.25, "grad_norm": 4.406599801970652, "learning_rate": 8.815842292178192e-06, "loss": 0.6667, "step": 8623 }, { "epoch": 0.25, "grad_norm": 7.8480319307749, "learning_rate": 8.815542595231523e-06, "loss": 0.7037, "step": 8624 }, { "epoch": 0.25, "grad_norm": 4.944602975673186, "learning_rate": 8.815242865460211e-06, "loss": 0.5057, "step": 8625 }, { "epoch": 0.25, "grad_norm": 4.756328282357434, "learning_rate": 8.814943102866836e-06, "loss": 0.5422, "step": 8626 }, { "epoch": 0.25, "grad_norm": 8.52612542026018, "learning_rate": 8.814643307453976e-06, "loss": 0.4977, "step": 8627 }, { "epoch": 0.25, "grad_norm": 5.207941615315044, "learning_rate": 8.814343479224207e-06, "loss": 0.4087, "step": 8628 }, { "epoch": 0.25, "grad_norm": 14.734960097410024, "learning_rate": 8.81404361818011e-06, "loss": 0.9993, "step": 8629 }, { "epoch": 0.25, "grad_norm": 6.112960972790387, "learning_rate": 8.813743724324268e-06, "loss": 0.153, "step": 8630 }, { "epoch": 0.25, "grad_norm": 3.1318204455324077, "learning_rate": 8.813443797659256e-06, "loss": 0.3892, "step": 8631 }, { "epoch": 0.25, "grad_norm": 6.702730038262045, "learning_rate": 8.813143838187658e-06, "loss": 0.7577, "step": 8632 }, { "epoch": 0.25, "grad_norm": 2.843176920230778, "learning_rate": 8.81284384591205e-06, "loss": 0.4431, "step": 8633 }, { "epoch": 0.25, "grad_norm": 7.179069484142223, "learning_rate": 8.81254382083502e-06, "loss": 0.6726, "step": 8634 }, { "epoch": 0.25, "grad_norm": 3.5324488604948137, "learning_rate": 8.812243762959143e-06, "loss": 0.3977, "step": 8635 }, { "epoch": 0.25, "grad_norm": 6.331035820519377, "learning_rate": 8.811943672287003e-06, "loss": 0.3035, "step": 8636 }, { "epoch": 0.25, "grad_norm": 7.535441328463083, "learning_rate": 8.811643548821181e-06, "loss": 0.8636, "step": 8637 }, { "epoch": 0.25, "grad_norm": 6.2903719279307495, "learning_rate": 8.811343392564257e-06, "loss": 0.2967, "step": 8638 }, { "epoch": 0.25, "grad_norm": 9.029813975702435, "learning_rate": 8.811043203518817e-06, "loss": 0.6358, "step": 8639 }, { "epoch": 0.25, "grad_norm": 6.3460058326177435, "learning_rate": 8.810742981687442e-06, "loss": 0.931, "step": 8640 }, { "epoch": 0.25, "grad_norm": 6.690489198962625, "learning_rate": 8.810442727072712e-06, "loss": 0.5569, "step": 8641 }, { "epoch": 0.25, "grad_norm": 3.4383816022038127, "learning_rate": 8.810142439677214e-06, "loss": 0.268, "step": 8642 }, { "epoch": 0.25, "grad_norm": 5.097760827172044, "learning_rate": 8.80984211950353e-06, "loss": 0.4051, "step": 8643 }, { "epoch": 0.25, "grad_norm": 4.049600427249161, "learning_rate": 8.809541766554241e-06, "loss": 0.5609, "step": 8644 }, { "epoch": 0.25, "grad_norm": 5.7353177244181, "learning_rate": 8.809241380831935e-06, "loss": 0.5554, "step": 8645 }, { "epoch": 0.25, "grad_norm": 5.631455214272132, "learning_rate": 8.808940962339194e-06, "loss": 0.3523, "step": 8646 }, { "epoch": 0.25, "grad_norm": 6.633084780223274, "learning_rate": 8.808640511078603e-06, "loss": 0.3981, "step": 8647 }, { "epoch": 0.25, "grad_norm": 6.760822732620019, "learning_rate": 8.808340027052748e-06, "loss": 0.4555, "step": 8648 }, { "epoch": 0.25, "grad_norm": 4.648084713667784, "learning_rate": 8.80803951026421e-06, "loss": 0.4399, "step": 8649 }, { "epoch": 0.25, "grad_norm": 5.050385756914034, "learning_rate": 8.80773896071558e-06, "loss": 0.2171, "step": 8650 }, { "epoch": 0.25, "grad_norm": 15.770172461399408, "learning_rate": 8.807438378409439e-06, "loss": 0.9315, "step": 8651 }, { "epoch": 0.25, "grad_norm": 3.990574609831149, "learning_rate": 8.807137763348374e-06, "loss": 0.3823, "step": 8652 }, { "epoch": 0.25, "grad_norm": 4.24182880493928, "learning_rate": 8.806837115534972e-06, "loss": 0.649, "step": 8653 }, { "epoch": 0.25, "grad_norm": 8.751555277340072, "learning_rate": 8.806536434971818e-06, "loss": 0.7629, "step": 8654 }, { "epoch": 0.25, "grad_norm": 5.9195995136064115, "learning_rate": 8.806235721661501e-06, "loss": 0.7171, "step": 8655 }, { "epoch": 0.25, "grad_norm": 4.617811183471038, "learning_rate": 8.805934975606607e-06, "loss": 0.352, "step": 8656 }, { "epoch": 0.25, "grad_norm": 5.04096813985045, "learning_rate": 8.805634196809722e-06, "loss": 0.2628, "step": 8657 }, { "epoch": 0.25, "grad_norm": 7.464118000005452, "learning_rate": 8.805333385273435e-06, "loss": 0.6273, "step": 8658 }, { "epoch": 0.25, "grad_norm": 5.997725373486329, "learning_rate": 8.805032541000333e-06, "loss": 0.5613, "step": 8659 }, { "epoch": 0.25, "grad_norm": 4.394382715046186, "learning_rate": 8.804731663993005e-06, "loss": 0.2006, "step": 8660 }, { "epoch": 0.25, "grad_norm": 3.9938411535089844, "learning_rate": 8.804430754254039e-06, "loss": 0.3368, "step": 8661 }, { "epoch": 0.25, "grad_norm": 4.711854608190321, "learning_rate": 8.804129811786023e-06, "loss": 0.4945, "step": 8662 }, { "epoch": 0.25, "grad_norm": 2.6745605580178853, "learning_rate": 8.803828836591547e-06, "loss": 0.2699, "step": 8663 }, { "epoch": 0.25, "grad_norm": 6.634921004997249, "learning_rate": 8.8035278286732e-06, "loss": 0.8755, "step": 8664 }, { "epoch": 0.25, "grad_norm": 1.5103152201280938, "learning_rate": 8.803226788033572e-06, "loss": 0.1634, "step": 8665 }, { "epoch": 0.25, "grad_norm": 5.957964794614514, "learning_rate": 8.80292571467525e-06, "loss": 0.7916, "step": 8666 }, { "epoch": 0.25, "grad_norm": 4.931028156584615, "learning_rate": 8.802624608600828e-06, "loss": 0.6387, "step": 8667 }, { "epoch": 0.25, "grad_norm": 5.8265100445563816, "learning_rate": 8.802323469812894e-06, "loss": 0.4668, "step": 8668 }, { "epoch": 0.25, "grad_norm": 9.296443127771012, "learning_rate": 8.802022298314042e-06, "loss": 0.6665, "step": 8669 }, { "epoch": 0.25, "grad_norm": 6.352377713710455, "learning_rate": 8.801721094106856e-06, "loss": 0.7191, "step": 8670 }, { "epoch": 0.25, "grad_norm": 6.922383309121632, "learning_rate": 8.801419857193933e-06, "loss": 0.4887, "step": 8671 }, { "epoch": 0.25, "grad_norm": 5.28900207304702, "learning_rate": 8.801118587577865e-06, "loss": 0.1741, "step": 8672 }, { "epoch": 0.25, "grad_norm": 4.274289842926709, "learning_rate": 8.80081728526124e-06, "loss": 0.6134, "step": 8673 }, { "epoch": 0.25, "grad_norm": 8.354159487549193, "learning_rate": 8.800515950246654e-06, "loss": 0.8259, "step": 8674 }, { "epoch": 0.25, "grad_norm": 3.2340422749419107, "learning_rate": 8.800214582536695e-06, "loss": 0.4583, "step": 8675 }, { "epoch": 0.25, "grad_norm": 4.643861354966272, "learning_rate": 8.799913182133957e-06, "loss": 0.1838, "step": 8676 }, { "epoch": 0.25, "grad_norm": 6.450104050203637, "learning_rate": 8.799611749041037e-06, "loss": 0.3863, "step": 8677 }, { "epoch": 0.25, "grad_norm": 6.609885121673168, "learning_rate": 8.799310283260521e-06, "loss": 0.5671, "step": 8678 }, { "epoch": 0.25, "grad_norm": 3.3323134630370244, "learning_rate": 8.79900878479501e-06, "loss": 0.3804, "step": 8679 }, { "epoch": 0.25, "grad_norm": 2.663425671485059, "learning_rate": 8.798707253647093e-06, "loss": 0.1935, "step": 8680 }, { "epoch": 0.25, "grad_norm": 7.97048282108522, "learning_rate": 8.798405689819364e-06, "loss": 0.5368, "step": 8681 }, { "epoch": 0.25, "grad_norm": 6.142289263751518, "learning_rate": 8.798104093314419e-06, "loss": 0.8616, "step": 8682 }, { "epoch": 0.25, "grad_norm": 4.135874372979799, "learning_rate": 8.797802464134852e-06, "loss": 0.4113, "step": 8683 }, { "epoch": 0.25, "grad_norm": 6.838221472521717, "learning_rate": 8.797500802283258e-06, "loss": 0.6652, "step": 8684 }, { "epoch": 0.25, "grad_norm": 8.488110192546284, "learning_rate": 8.797199107762234e-06, "loss": 0.6469, "step": 8685 }, { "epoch": 0.25, "grad_norm": 5.991010368739341, "learning_rate": 8.796897380574372e-06, "loss": 0.3952, "step": 8686 }, { "epoch": 0.25, "grad_norm": 9.511854003947244, "learning_rate": 8.796595620722269e-06, "loss": 0.8216, "step": 8687 }, { "epoch": 0.25, "grad_norm": 5.155365607970919, "learning_rate": 8.796293828208523e-06, "loss": 0.5152, "step": 8688 }, { "epoch": 0.25, "grad_norm": 2.5362241873219595, "learning_rate": 8.795992003035727e-06, "loss": 0.4748, "step": 8689 }, { "epoch": 0.25, "grad_norm": 5.408501701888636, "learning_rate": 8.79569014520648e-06, "loss": 0.5119, "step": 8690 }, { "epoch": 0.25, "grad_norm": 3.9715060479143167, "learning_rate": 8.795388254723376e-06, "loss": 0.9009, "step": 8691 }, { "epoch": 0.25, "grad_norm": 5.198135136948397, "learning_rate": 8.795086331589016e-06, "loss": 0.638, "step": 8692 }, { "epoch": 0.25, "grad_norm": 6.992266401985864, "learning_rate": 8.794784375805997e-06, "loss": 0.5934, "step": 8693 }, { "epoch": 0.25, "grad_norm": 3.8513087098111085, "learning_rate": 8.794482387376913e-06, "loss": 0.2612, "step": 8694 }, { "epoch": 0.25, "grad_norm": 9.665638775820309, "learning_rate": 8.794180366304365e-06, "loss": 0.9335, "step": 8695 }, { "epoch": 0.25, "grad_norm": 5.898791978302311, "learning_rate": 8.79387831259095e-06, "loss": 0.6995, "step": 8696 }, { "epoch": 0.25, "grad_norm": 6.33189810837044, "learning_rate": 8.793576226239267e-06, "loss": 0.6992, "step": 8697 }, { "epoch": 0.25, "grad_norm": 5.335012084782792, "learning_rate": 8.793274107251915e-06, "loss": 0.5743, "step": 8698 }, { "epoch": 0.25, "grad_norm": 6.795834060410072, "learning_rate": 8.792971955631493e-06, "loss": 0.8213, "step": 8699 }, { "epoch": 0.25, "grad_norm": 7.312340433058881, "learning_rate": 8.7926697713806e-06, "loss": 0.6411, "step": 8700 }, { "epoch": 0.25, "grad_norm": 6.439850683554476, "learning_rate": 8.792367554501837e-06, "loss": 0.5458, "step": 8701 }, { "epoch": 0.25, "grad_norm": 4.169620433337484, "learning_rate": 8.792065304997802e-06, "loss": 0.5324, "step": 8702 }, { "epoch": 0.25, "grad_norm": 8.0480526849357, "learning_rate": 8.791763022871096e-06, "loss": 0.4622, "step": 8703 }, { "epoch": 0.25, "grad_norm": 5.198077941512734, "learning_rate": 8.79146070812432e-06, "loss": 0.6066, "step": 8704 }, { "epoch": 0.25, "grad_norm": 6.381127610186473, "learning_rate": 8.791158360760072e-06, "loss": 0.2535, "step": 8705 }, { "epoch": 0.25, "grad_norm": 8.102273996244344, "learning_rate": 8.790855980780958e-06, "loss": 0.8117, "step": 8706 }, { "epoch": 0.25, "grad_norm": 8.016578900127092, "learning_rate": 8.790553568189577e-06, "loss": 0.4798, "step": 8707 }, { "epoch": 0.25, "grad_norm": 8.308984170698219, "learning_rate": 8.790251122988529e-06, "loss": 0.7427, "step": 8708 }, { "epoch": 0.25, "grad_norm": 15.040132622235536, "learning_rate": 8.789948645180419e-06, "loss": 0.9253, "step": 8709 }, { "epoch": 0.25, "grad_norm": 4.988805685055144, "learning_rate": 8.789646134767845e-06, "loss": 0.6176, "step": 8710 }, { "epoch": 0.25, "grad_norm": 4.843378385009181, "learning_rate": 8.789343591753414e-06, "loss": 0.2966, "step": 8711 }, { "epoch": 0.25, "grad_norm": 7.856428439618302, "learning_rate": 8.789041016139725e-06, "loss": 0.5036, "step": 8712 }, { "epoch": 0.25, "grad_norm": 4.022621472020003, "learning_rate": 8.788738407929384e-06, "loss": 0.3027, "step": 8713 }, { "epoch": 0.25, "grad_norm": 10.476074025303687, "learning_rate": 8.788435767124992e-06, "loss": 0.9565, "step": 8714 }, { "epoch": 0.25, "grad_norm": 8.237928286172448, "learning_rate": 8.788133093729152e-06, "loss": 0.6987, "step": 8715 }, { "epoch": 0.25, "grad_norm": 7.989390013211467, "learning_rate": 8.787830387744474e-06, "loss": 0.6381, "step": 8716 }, { "epoch": 0.25, "grad_norm": 7.478633645561788, "learning_rate": 8.787527649173554e-06, "loss": 0.719, "step": 8717 }, { "epoch": 0.25, "grad_norm": 5.09594716620492, "learning_rate": 8.787224878019e-06, "loss": 0.9091, "step": 8718 }, { "epoch": 0.25, "grad_norm": 10.374826728568875, "learning_rate": 8.786922074283418e-06, "loss": 0.7041, "step": 8719 }, { "epoch": 0.25, "grad_norm": 8.564325848006657, "learning_rate": 8.786619237969412e-06, "loss": 0.6884, "step": 8720 }, { "epoch": 0.25, "grad_norm": 3.9020347434686493, "learning_rate": 8.786316369079587e-06, "loss": 0.1957, "step": 8721 }, { "epoch": 0.25, "grad_norm": 4.342444299089182, "learning_rate": 8.786013467616545e-06, "loss": 0.3765, "step": 8722 }, { "epoch": 0.25, "grad_norm": 5.200520318849016, "learning_rate": 8.785710533582898e-06, "loss": 0.2311, "step": 8723 }, { "epoch": 0.25, "grad_norm": 7.797380388962423, "learning_rate": 8.78540756698125e-06, "loss": 0.4687, "step": 8724 }, { "epoch": 0.25, "grad_norm": 4.574091726309938, "learning_rate": 8.785104567814205e-06, "loss": 0.3301, "step": 8725 }, { "epoch": 0.25, "grad_norm": 10.166475002379094, "learning_rate": 8.784801536084373e-06, "loss": 0.9196, "step": 8726 }, { "epoch": 0.25, "grad_norm": 7.0440322656948515, "learning_rate": 8.784498471794358e-06, "loss": 0.3607, "step": 8727 }, { "epoch": 0.25, "grad_norm": 5.417445855462036, "learning_rate": 8.78419537494677e-06, "loss": 0.3666, "step": 8728 }, { "epoch": 0.25, "grad_norm": 7.098609369050056, "learning_rate": 8.783892245544215e-06, "loss": 0.5044, "step": 8729 }, { "epoch": 0.25, "grad_norm": 6.884249343452571, "learning_rate": 8.7835890835893e-06, "loss": 0.8263, "step": 8730 }, { "epoch": 0.25, "grad_norm": 4.706099776546554, "learning_rate": 8.783285889084633e-06, "loss": 0.4934, "step": 8731 }, { "epoch": 0.25, "grad_norm": 4.631150576390437, "learning_rate": 8.782982662032826e-06, "loss": 0.3531, "step": 8732 }, { "epoch": 0.25, "grad_norm": 3.501677281525356, "learning_rate": 8.782679402436484e-06, "loss": 0.2987, "step": 8733 }, { "epoch": 0.25, "grad_norm": 6.849628031857255, "learning_rate": 8.782376110298217e-06, "loss": 0.5201, "step": 8734 }, { "epoch": 0.25, "grad_norm": 7.678067941729225, "learning_rate": 8.782072785620633e-06, "loss": 0.6041, "step": 8735 }, { "epoch": 0.25, "grad_norm": 5.4221319747842305, "learning_rate": 8.781769428406342e-06, "loss": 0.5035, "step": 8736 }, { "epoch": 0.25, "grad_norm": 4.016292531381628, "learning_rate": 8.781466038657956e-06, "loss": 0.3333, "step": 8737 }, { "epoch": 0.25, "grad_norm": 9.108097988018715, "learning_rate": 8.781162616378082e-06, "loss": 1.1058, "step": 8738 }, { "epoch": 0.25, "grad_norm": 6.524335897565848, "learning_rate": 8.780859161569333e-06, "loss": 0.6675, "step": 8739 }, { "epoch": 0.25, "grad_norm": 3.4703502915757993, "learning_rate": 8.780555674234315e-06, "loss": 0.5382, "step": 8740 }, { "epoch": 0.25, "grad_norm": 9.696829702484813, "learning_rate": 8.780252154375646e-06, "loss": 0.8114, "step": 8741 }, { "epoch": 0.25, "grad_norm": 5.995881654051154, "learning_rate": 8.77994860199593e-06, "loss": 0.8021, "step": 8742 }, { "epoch": 0.25, "grad_norm": 5.069595309702392, "learning_rate": 8.779645017097785e-06, "loss": 0.5726, "step": 8743 }, { "epoch": 0.25, "grad_norm": 6.023108308853498, "learning_rate": 8.779341399683816e-06, "loss": 0.2776, "step": 8744 }, { "epoch": 0.25, "grad_norm": 3.020041833423613, "learning_rate": 8.779037749756637e-06, "loss": 0.3965, "step": 8745 }, { "epoch": 0.25, "grad_norm": 3.8236468221580515, "learning_rate": 8.778734067318866e-06, "loss": 0.3911, "step": 8746 }, { "epoch": 0.25, "grad_norm": 4.596559690323796, "learning_rate": 8.778430352373108e-06, "loss": 0.2103, "step": 8747 }, { "epoch": 0.25, "grad_norm": 5.550369709176031, "learning_rate": 8.77812660492198e-06, "loss": 0.135, "step": 8748 }, { "epoch": 0.25, "grad_norm": 2.931769157911129, "learning_rate": 8.777822824968094e-06, "loss": 0.3039, "step": 8749 }, { "epoch": 0.25, "grad_norm": 5.684952217646068, "learning_rate": 8.777519012514064e-06, "loss": 0.4685, "step": 8750 }, { "epoch": 0.25, "grad_norm": 4.587987672210534, "learning_rate": 8.777215167562503e-06, "loss": 0.1532, "step": 8751 }, { "epoch": 0.25, "grad_norm": 5.007538834115109, "learning_rate": 8.776911290116024e-06, "loss": 0.7978, "step": 8752 }, { "epoch": 0.25, "grad_norm": 10.565050392806997, "learning_rate": 8.776607380177243e-06, "loss": 0.5277, "step": 8753 }, { "epoch": 0.25, "grad_norm": 7.090447518358034, "learning_rate": 8.776303437748773e-06, "loss": 0.4519, "step": 8754 }, { "epoch": 0.25, "grad_norm": 4.6372940490726595, "learning_rate": 8.775999462833228e-06, "loss": 0.4764, "step": 8755 }, { "epoch": 0.25, "grad_norm": 3.2831727343621533, "learning_rate": 8.775695455433227e-06, "loss": 0.1265, "step": 8756 }, { "epoch": 0.25, "grad_norm": 3.2940554743196886, "learning_rate": 8.775391415551382e-06, "loss": 0.4073, "step": 8757 }, { "epoch": 0.25, "grad_norm": 5.990362533526753, "learning_rate": 8.77508734319031e-06, "loss": 0.7557, "step": 8758 }, { "epoch": 0.25, "grad_norm": 4.694444065106518, "learning_rate": 8.774783238352625e-06, "loss": 0.3639, "step": 8759 }, { "epoch": 0.25, "grad_norm": 3.579364478640499, "learning_rate": 8.774479101040945e-06, "loss": 0.4765, "step": 8760 }, { "epoch": 0.25, "grad_norm": 8.206775485179046, "learning_rate": 8.774174931257887e-06, "loss": 0.8173, "step": 8761 }, { "epoch": 0.25, "grad_norm": 5.167525430292858, "learning_rate": 8.773870729006067e-06, "loss": 0.4473, "step": 8762 }, { "epoch": 0.25, "grad_norm": 9.907521452070753, "learning_rate": 8.773566494288101e-06, "loss": 0.6064, "step": 8763 }, { "epoch": 0.25, "grad_norm": 9.320171499025124, "learning_rate": 8.773262227106607e-06, "loss": 0.8931, "step": 8764 }, { "epoch": 0.25, "grad_norm": 2.6369513854079627, "learning_rate": 8.772957927464204e-06, "loss": 0.2088, "step": 8765 }, { "epoch": 0.25, "grad_norm": 7.981504248958423, "learning_rate": 8.772653595363505e-06, "loss": 0.5935, "step": 8766 }, { "epoch": 0.25, "grad_norm": 3.846389794448319, "learning_rate": 8.772349230807135e-06, "loss": 0.2976, "step": 8767 }, { "epoch": 0.25, "grad_norm": 2.624881401107615, "learning_rate": 8.772044833797707e-06, "loss": 0.1798, "step": 8768 }, { "epoch": 0.25, "grad_norm": 4.9746734770773635, "learning_rate": 8.771740404337842e-06, "loss": 0.5448, "step": 8769 }, { "epoch": 0.25, "grad_norm": 8.089454483105058, "learning_rate": 8.77143594243016e-06, "loss": 0.4973, "step": 8770 }, { "epoch": 0.25, "grad_norm": 7.506371779649042, "learning_rate": 8.77113144807728e-06, "loss": 0.2241, "step": 8771 }, { "epoch": 0.25, "grad_norm": 6.368392811530483, "learning_rate": 8.770826921281816e-06, "loss": 0.731, "step": 8772 }, { "epoch": 0.25, "grad_norm": 7.759999561408119, "learning_rate": 8.770522362046397e-06, "loss": 0.733, "step": 8773 }, { "epoch": 0.25, "grad_norm": 3.515174362123068, "learning_rate": 8.770217770373636e-06, "loss": 0.4838, "step": 8774 }, { "epoch": 0.25, "grad_norm": 6.948170300324277, "learning_rate": 8.769913146266155e-06, "loss": 0.523, "step": 8775 }, { "epoch": 0.25, "grad_norm": 2.929795591267697, "learning_rate": 8.769608489726578e-06, "loss": 0.3645, "step": 8776 }, { "epoch": 0.25, "grad_norm": 6.873312413559659, "learning_rate": 8.76930380075752e-06, "loss": 0.2853, "step": 8777 }, { "epoch": 0.25, "grad_norm": 8.80896390488204, "learning_rate": 8.768999079361608e-06, "loss": 0.6973, "step": 8778 }, { "epoch": 0.25, "grad_norm": 5.882458396274897, "learning_rate": 8.76869432554146e-06, "loss": 0.2893, "step": 8779 }, { "epoch": 0.25, "grad_norm": 9.365500660154611, "learning_rate": 8.768389539299698e-06, "loss": 0.6911, "step": 8780 }, { "epoch": 0.25, "grad_norm": 12.139074592324071, "learning_rate": 8.768084720638946e-06, "loss": 0.3864, "step": 8781 }, { "epoch": 0.25, "grad_norm": 7.890867928268534, "learning_rate": 8.767779869561825e-06, "loss": 0.7554, "step": 8782 }, { "epoch": 0.25, "grad_norm": 7.812600341152501, "learning_rate": 8.767474986070959e-06, "loss": 0.5554, "step": 8783 }, { "epoch": 0.25, "grad_norm": 2.4275211354130146, "learning_rate": 8.767170070168966e-06, "loss": 0.1144, "step": 8784 }, { "epoch": 0.25, "grad_norm": 3.9804650663259604, "learning_rate": 8.766865121858476e-06, "loss": 0.5396, "step": 8785 }, { "epoch": 0.25, "grad_norm": 14.940950258564758, "learning_rate": 8.766560141142107e-06, "loss": 1.0105, "step": 8786 }, { "epoch": 0.25, "grad_norm": 8.757513090489237, "learning_rate": 8.766255128022485e-06, "loss": 0.5735, "step": 8787 }, { "epoch": 0.25, "grad_norm": 3.1052443321280108, "learning_rate": 8.765950082502236e-06, "loss": 0.249, "step": 8788 }, { "epoch": 0.25, "grad_norm": 6.402010598862422, "learning_rate": 8.76564500458398e-06, "loss": 0.6821, "step": 8789 }, { "epoch": 0.25, "grad_norm": 4.294029625417768, "learning_rate": 8.765339894270344e-06, "loss": 0.2647, "step": 8790 }, { "epoch": 0.25, "grad_norm": 3.7641804880515237, "learning_rate": 8.765034751563953e-06, "loss": 0.2771, "step": 8791 }, { "epoch": 0.25, "grad_norm": 6.914307877663824, "learning_rate": 8.764729576467432e-06, "loss": 0.8141, "step": 8792 }, { "epoch": 0.25, "grad_norm": 12.422605864898452, "learning_rate": 8.764424368983406e-06, "loss": 0.1208, "step": 8793 }, { "epoch": 0.25, "grad_norm": 8.59916022880287, "learning_rate": 8.7641191291145e-06, "loss": 0.7443, "step": 8794 }, { "epoch": 0.25, "grad_norm": 6.5138685388344335, "learning_rate": 8.763813856863338e-06, "loss": 0.3187, "step": 8795 }, { "epoch": 0.25, "grad_norm": 5.516831565920189, "learning_rate": 8.763508552232554e-06, "loss": 0.7773, "step": 8796 }, { "epoch": 0.25, "grad_norm": 4.930419142123994, "learning_rate": 8.763203215224766e-06, "loss": 0.5206, "step": 8797 }, { "epoch": 0.25, "grad_norm": 10.18914210653014, "learning_rate": 8.762897845842606e-06, "loss": 0.9311, "step": 8798 }, { "epoch": 0.25, "grad_norm": 4.264571985634356, "learning_rate": 8.762592444088696e-06, "loss": 0.4385, "step": 8799 }, { "epoch": 0.25, "grad_norm": 7.166480335504307, "learning_rate": 8.762287009965668e-06, "loss": 0.8725, "step": 8800 }, { "epoch": 0.25, "grad_norm": 5.344495576299778, "learning_rate": 8.761981543476151e-06, "loss": 0.4758, "step": 8801 }, { "epoch": 0.25, "grad_norm": 7.8335954811655055, "learning_rate": 8.761676044622767e-06, "loss": 0.8686, "step": 8802 }, { "epoch": 0.25, "grad_norm": 7.154206962968147, "learning_rate": 8.761370513408147e-06, "loss": 0.3559, "step": 8803 }, { "epoch": 0.25, "grad_norm": 8.085553352703151, "learning_rate": 8.76106494983492e-06, "loss": 0.6119, "step": 8804 }, { "epoch": 0.25, "grad_norm": 16.045427206931254, "learning_rate": 8.760759353905714e-06, "loss": 0.7302, "step": 8805 }, { "epoch": 0.25, "grad_norm": 7.001332020046731, "learning_rate": 8.76045372562316e-06, "loss": 0.5251, "step": 8806 }, { "epoch": 0.25, "grad_norm": 5.4253659401803835, "learning_rate": 8.760148064989884e-06, "loss": 0.3999, "step": 8807 }, { "epoch": 0.25, "grad_norm": 10.694790233179605, "learning_rate": 8.759842372008517e-06, "loss": 0.4745, "step": 8808 }, { "epoch": 0.25, "grad_norm": 7.606226714325041, "learning_rate": 8.75953664668169e-06, "loss": 0.4827, "step": 8809 }, { "epoch": 0.25, "grad_norm": 8.541803783773844, "learning_rate": 8.759230889012032e-06, "loss": 1.0809, "step": 8810 }, { "epoch": 0.25, "grad_norm": 4.700443022727651, "learning_rate": 8.758925099002173e-06, "loss": 0.6439, "step": 8811 }, { "epoch": 0.25, "grad_norm": 8.149581609412705, "learning_rate": 8.758619276654743e-06, "loss": 0.4765, "step": 8812 }, { "epoch": 0.25, "grad_norm": 3.589518651676565, "learning_rate": 8.758313421972376e-06, "loss": 0.4606, "step": 8813 }, { "epoch": 0.25, "grad_norm": 6.476794764422903, "learning_rate": 8.7580075349577e-06, "loss": 0.3676, "step": 8814 }, { "epoch": 0.25, "grad_norm": 7.368312260355221, "learning_rate": 8.75770161561335e-06, "loss": 0.7051, "step": 8815 }, { "epoch": 0.25, "grad_norm": 5.684000720368352, "learning_rate": 8.757395663941954e-06, "loss": 0.437, "step": 8816 }, { "epoch": 0.25, "grad_norm": 6.739105970250006, "learning_rate": 8.757089679946147e-06, "loss": 0.7535, "step": 8817 }, { "epoch": 0.25, "grad_norm": 14.408554587900204, "learning_rate": 8.756783663628558e-06, "loss": 0.962, "step": 8818 }, { "epoch": 0.25, "grad_norm": 9.353738963339703, "learning_rate": 8.756477614991822e-06, "loss": 0.5105, "step": 8819 }, { "epoch": 0.25, "grad_norm": 8.4827421243404, "learning_rate": 8.756171534038574e-06, "loss": 1.11, "step": 8820 }, { "epoch": 0.25, "grad_norm": 3.6175056465911557, "learning_rate": 8.755865420771443e-06, "loss": 0.3243, "step": 8821 }, { "epoch": 0.25, "grad_norm": 5.448866091018979, "learning_rate": 8.755559275193064e-06, "loss": 0.4823, "step": 8822 }, { "epoch": 0.25, "grad_norm": 4.7269826150618455, "learning_rate": 8.755253097306073e-06, "loss": 0.6233, "step": 8823 }, { "epoch": 0.25, "grad_norm": 8.281038188024922, "learning_rate": 8.754946887113099e-06, "loss": 0.4967, "step": 8824 }, { "epoch": 0.25, "grad_norm": 11.015422413363193, "learning_rate": 8.754640644616781e-06, "loss": 0.8848, "step": 8825 }, { "epoch": 0.25, "grad_norm": 3.9944195563705716, "learning_rate": 8.754334369819753e-06, "loss": 0.3571, "step": 8826 }, { "epoch": 0.25, "grad_norm": 6.735535771640032, "learning_rate": 8.754028062724646e-06, "loss": 0.5464, "step": 8827 }, { "epoch": 0.25, "grad_norm": 5.3368953219378765, "learning_rate": 8.753721723334098e-06, "loss": 0.3359, "step": 8828 }, { "epoch": 0.25, "grad_norm": 9.183531027141186, "learning_rate": 8.753415351650745e-06, "loss": 0.6431, "step": 8829 }, { "epoch": 0.25, "grad_norm": 2.4153478959459065, "learning_rate": 8.753108947677222e-06, "loss": 0.2245, "step": 8830 }, { "epoch": 0.25, "grad_norm": 6.3815632488246, "learning_rate": 8.752802511416165e-06, "loss": 0.3608, "step": 8831 }, { "epoch": 0.25, "grad_norm": 7.853661039420211, "learning_rate": 8.752496042870211e-06, "loss": 0.4614, "step": 8832 }, { "epoch": 0.25, "grad_norm": 12.004446795204132, "learning_rate": 8.752189542041993e-06, "loss": 0.761, "step": 8833 }, { "epoch": 0.25, "grad_norm": 4.399894892997728, "learning_rate": 8.751883008934153e-06, "loss": 0.3957, "step": 8834 }, { "epoch": 0.25, "grad_norm": 5.539243557960981, "learning_rate": 8.751576443549323e-06, "loss": 0.3413, "step": 8835 }, { "epoch": 0.25, "grad_norm": 4.399885654047388, "learning_rate": 8.751269845890145e-06, "loss": 0.4482, "step": 8836 }, { "epoch": 0.25, "grad_norm": 3.2571646871886895, "learning_rate": 8.750963215959253e-06, "loss": 0.3045, "step": 8837 }, { "epoch": 0.25, "grad_norm": 5.102484627027216, "learning_rate": 8.750656553759287e-06, "loss": 0.1084, "step": 8838 }, { "epoch": 0.25, "grad_norm": 5.4448736334114844, "learning_rate": 8.750349859292884e-06, "loss": 0.5978, "step": 8839 }, { "epoch": 0.25, "grad_norm": 6.138074272310486, "learning_rate": 8.750043132562683e-06, "loss": 0.6415, "step": 8840 }, { "epoch": 0.25, "grad_norm": 7.89382636968861, "learning_rate": 8.749736373571322e-06, "loss": 0.6429, "step": 8841 }, { "epoch": 0.25, "grad_norm": 8.622329298427287, "learning_rate": 8.74942958232144e-06, "loss": 0.5007, "step": 8842 }, { "epoch": 0.25, "grad_norm": 7.339196989221248, "learning_rate": 8.74912275881568e-06, "loss": 0.4125, "step": 8843 }, { "epoch": 0.25, "grad_norm": 7.540459601259726, "learning_rate": 8.748815903056675e-06, "loss": 0.5748, "step": 8844 }, { "epoch": 0.25, "grad_norm": 3.261268136883122, "learning_rate": 8.74850901504707e-06, "loss": 0.2317, "step": 8845 }, { "epoch": 0.25, "grad_norm": 10.550529287720005, "learning_rate": 8.748202094789505e-06, "loss": 0.6816, "step": 8846 }, { "epoch": 0.25, "grad_norm": 9.713452726200696, "learning_rate": 8.747895142286618e-06, "loss": 0.5481, "step": 8847 }, { "epoch": 0.25, "grad_norm": 5.7136030538687, "learning_rate": 8.74758815754105e-06, "loss": 0.6297, "step": 8848 }, { "epoch": 0.25, "grad_norm": 8.545934761396492, "learning_rate": 8.747281140555441e-06, "loss": 0.8859, "step": 8849 }, { "epoch": 0.25, "grad_norm": 6.483344306380817, "learning_rate": 8.746974091332437e-06, "loss": 0.3139, "step": 8850 }, { "epoch": 0.25, "grad_norm": 3.959363304789311, "learning_rate": 8.746667009874671e-06, "loss": 0.704, "step": 8851 }, { "epoch": 0.25, "grad_norm": 7.171682716734055, "learning_rate": 8.746359896184795e-06, "loss": 0.4965, "step": 8852 }, { "epoch": 0.25, "grad_norm": 4.033577062530547, "learning_rate": 8.746052750265445e-06, "loss": 0.393, "step": 8853 }, { "epoch": 0.25, "grad_norm": 3.7347207966717417, "learning_rate": 8.745745572119264e-06, "loss": 0.4086, "step": 8854 }, { "epoch": 0.25, "grad_norm": 4.2637940553990985, "learning_rate": 8.745438361748895e-06, "loss": 0.5412, "step": 8855 }, { "epoch": 0.25, "grad_norm": 6.543339651042876, "learning_rate": 8.74513111915698e-06, "loss": 0.8434, "step": 8856 }, { "epoch": 0.25, "grad_norm": 5.656529667299967, "learning_rate": 8.744823844346165e-06, "loss": 0.7569, "step": 8857 }, { "epoch": 0.25, "grad_norm": 8.633292151197011, "learning_rate": 8.74451653731909e-06, "loss": 1.0613, "step": 8858 }, { "epoch": 0.25, "grad_norm": 8.490027917247653, "learning_rate": 8.7442091980784e-06, "loss": 0.6474, "step": 8859 }, { "epoch": 0.25, "grad_norm": 4.349843478401329, "learning_rate": 8.743901826626742e-06, "loss": 0.5108, "step": 8860 }, { "epoch": 0.25, "grad_norm": 4.437382709941035, "learning_rate": 8.743594422966755e-06, "loss": 0.511, "step": 8861 }, { "epoch": 0.25, "grad_norm": 6.319643341213492, "learning_rate": 8.743286987101087e-06, "loss": 0.5376, "step": 8862 }, { "epoch": 0.25, "grad_norm": 5.477477559087653, "learning_rate": 8.742979519032381e-06, "loss": 0.6176, "step": 8863 }, { "epoch": 0.25, "grad_norm": 3.09462422243526, "learning_rate": 8.742672018763283e-06, "loss": 0.1481, "step": 8864 }, { "epoch": 0.25, "grad_norm": 7.39787243595578, "learning_rate": 8.742364486296439e-06, "loss": 0.873, "step": 8865 }, { "epoch": 0.25, "grad_norm": 6.11250782909555, "learning_rate": 8.742056921634493e-06, "loss": 0.4149, "step": 8866 }, { "epoch": 0.25, "grad_norm": 7.89339384844604, "learning_rate": 8.741749324780095e-06, "loss": 0.8359, "step": 8867 }, { "epoch": 0.25, "grad_norm": 6.920311607004204, "learning_rate": 8.741441695735886e-06, "loss": 0.7455, "step": 8868 }, { "epoch": 0.25, "grad_norm": 5.517873073560175, "learning_rate": 8.741134034504515e-06, "loss": 0.6004, "step": 8869 }, { "epoch": 0.25, "grad_norm": 3.7158008710274912, "learning_rate": 8.74082634108863e-06, "loss": 0.5907, "step": 8870 }, { "epoch": 0.25, "grad_norm": 2.9263775654542377, "learning_rate": 8.740518615490873e-06, "loss": 0.4222, "step": 8871 }, { "epoch": 0.25, "grad_norm": 9.338727079395776, "learning_rate": 8.7402108577139e-06, "loss": 0.6426, "step": 8872 }, { "epoch": 0.25, "grad_norm": 5.965744899785782, "learning_rate": 8.73990306776035e-06, "loss": 0.6011, "step": 8873 }, { "epoch": 0.25, "grad_norm": 4.523064237225737, "learning_rate": 8.739595245632875e-06, "loss": 0.2631, "step": 8874 }, { "epoch": 0.25, "grad_norm": 2.920074418079842, "learning_rate": 8.739287391334123e-06, "loss": 0.281, "step": 8875 }, { "epoch": 0.25, "grad_norm": 4.280936153073875, "learning_rate": 8.73897950486674e-06, "loss": 0.1829, "step": 8876 }, { "epoch": 0.25, "grad_norm": 5.9775245041865475, "learning_rate": 8.738671586233379e-06, "loss": 0.3145, "step": 8877 }, { "epoch": 0.25, "grad_norm": 4.556477216332298, "learning_rate": 8.738363635436687e-06, "loss": 0.6758, "step": 8878 }, { "epoch": 0.25, "grad_norm": 3.915740728837247, "learning_rate": 8.738055652479312e-06, "loss": 0.688, "step": 8879 }, { "epoch": 0.25, "grad_norm": 10.308575895061981, "learning_rate": 8.737747637363903e-06, "loss": 0.9278, "step": 8880 }, { "epoch": 0.25, "grad_norm": 8.395255915346194, "learning_rate": 8.737439590093113e-06, "loss": 0.9522, "step": 8881 }, { "epoch": 0.25, "grad_norm": 7.069739134075168, "learning_rate": 8.73713151066959e-06, "loss": 0.3018, "step": 8882 }, { "epoch": 0.25, "grad_norm": 6.625941551526423, "learning_rate": 8.736823399095984e-06, "loss": 0.1913, "step": 8883 }, { "epoch": 0.25, "grad_norm": 4.592958148516459, "learning_rate": 8.736515255374948e-06, "loss": 0.3238, "step": 8884 }, { "epoch": 0.25, "grad_norm": 6.1810766459997195, "learning_rate": 8.736207079509131e-06, "loss": 0.3133, "step": 8885 }, { "epoch": 0.25, "grad_norm": 5.663726312591584, "learning_rate": 8.735898871501183e-06, "loss": 0.6065, "step": 8886 }, { "epoch": 0.25, "grad_norm": 6.005952346206212, "learning_rate": 8.735590631353758e-06, "loss": 0.3967, "step": 8887 }, { "epoch": 0.25, "grad_norm": 9.921384159472485, "learning_rate": 8.735282359069508e-06, "loss": 0.9654, "step": 8888 }, { "epoch": 0.25, "grad_norm": 6.006542572612403, "learning_rate": 8.734974054651082e-06, "loss": 0.7004, "step": 8889 }, { "epoch": 0.25, "grad_norm": 4.924920898615555, "learning_rate": 8.734665718101135e-06, "loss": 0.5832, "step": 8890 }, { "epoch": 0.25, "grad_norm": 8.932098356922591, "learning_rate": 8.734357349422317e-06, "loss": 0.9662, "step": 8891 }, { "epoch": 0.25, "grad_norm": 5.048687470651685, "learning_rate": 8.734048948617284e-06, "loss": 0.5405, "step": 8892 }, { "epoch": 0.25, "grad_norm": 6.60645404598, "learning_rate": 8.733740515688687e-06, "loss": 0.6958, "step": 8893 }, { "epoch": 0.25, "grad_norm": 6.694245474862882, "learning_rate": 8.73343205063918e-06, "loss": 0.7228, "step": 8894 }, { "epoch": 0.25, "grad_norm": 12.16602440449595, "learning_rate": 8.733123553471417e-06, "loss": 0.6566, "step": 8895 }, { "epoch": 0.25, "grad_norm": 5.198870090455977, "learning_rate": 8.732815024188052e-06, "loss": 0.6756, "step": 8896 }, { "epoch": 0.25, "grad_norm": 3.341833688520929, "learning_rate": 8.73250646279174e-06, "loss": 0.6136, "step": 8897 }, { "epoch": 0.25, "grad_norm": 7.454856431526555, "learning_rate": 8.732197869285132e-06, "loss": 0.5431, "step": 8898 }, { "epoch": 0.25, "grad_norm": 5.613357256731141, "learning_rate": 8.731889243670888e-06, "loss": 0.5181, "step": 8899 }, { "epoch": 0.25, "grad_norm": 6.62830630157435, "learning_rate": 8.731580585951658e-06, "loss": 0.5002, "step": 8900 }, { "epoch": 0.25, "grad_norm": 4.989681803050874, "learning_rate": 8.7312718961301e-06, "loss": 0.3983, "step": 8901 }, { "epoch": 0.25, "grad_norm": 4.584301372371082, "learning_rate": 8.73096317420887e-06, "loss": 0.5132, "step": 8902 }, { "epoch": 0.25, "grad_norm": 8.596085553545409, "learning_rate": 8.730654420190623e-06, "loss": 0.5264, "step": 8903 }, { "epoch": 0.25, "grad_norm": 4.310308493905013, "learning_rate": 8.730345634078015e-06, "loss": 0.3983, "step": 8904 }, { "epoch": 0.26, "grad_norm": 7.973582519073629, "learning_rate": 8.730036815873703e-06, "loss": 0.5307, "step": 8905 }, { "epoch": 0.26, "grad_norm": 3.5941149319001022, "learning_rate": 8.729727965580344e-06, "loss": 0.3377, "step": 8906 }, { "epoch": 0.26, "grad_norm": 14.229357977382056, "learning_rate": 8.729419083200592e-06, "loss": 0.7571, "step": 8907 }, { "epoch": 0.26, "grad_norm": 3.0350691538604035, "learning_rate": 8.72911016873711e-06, "loss": 0.1366, "step": 8908 }, { "epoch": 0.26, "grad_norm": 6.943227585884479, "learning_rate": 8.72880122219255e-06, "loss": 0.6798, "step": 8909 }, { "epoch": 0.26, "grad_norm": 5.896535038611945, "learning_rate": 8.728492243569575e-06, "loss": 0.4441, "step": 8910 }, { "epoch": 0.26, "grad_norm": 5.371583340612341, "learning_rate": 8.728183232870839e-06, "loss": 0.4058, "step": 8911 }, { "epoch": 0.26, "grad_norm": 6.631816775794637, "learning_rate": 8.727874190099002e-06, "loss": 0.4582, "step": 8912 }, { "epoch": 0.26, "grad_norm": 9.144045484856912, "learning_rate": 8.72756511525672e-06, "loss": 0.6928, "step": 8913 }, { "epoch": 0.26, "grad_norm": 7.413980433308312, "learning_rate": 8.727256008346656e-06, "loss": 0.7798, "step": 8914 }, { "epoch": 0.26, "grad_norm": 13.903302992686497, "learning_rate": 8.726946869371468e-06, "loss": 0.2925, "step": 8915 }, { "epoch": 0.26, "grad_norm": 10.805806287544016, "learning_rate": 8.726637698333815e-06, "loss": 0.7854, "step": 8916 }, { "epoch": 0.26, "grad_norm": 11.44202021094039, "learning_rate": 8.726328495236356e-06, "loss": 0.7364, "step": 8917 }, { "epoch": 0.26, "grad_norm": 5.249529862924706, "learning_rate": 8.726019260081752e-06, "loss": 0.6048, "step": 8918 }, { "epoch": 0.26, "grad_norm": 4.821820683151746, "learning_rate": 8.725709992872662e-06, "loss": 0.603, "step": 8919 }, { "epoch": 0.26, "grad_norm": 4.286154005789624, "learning_rate": 8.72540069361175e-06, "loss": 0.7916, "step": 8920 }, { "epoch": 0.26, "grad_norm": 5.687581030299859, "learning_rate": 8.725091362301673e-06, "loss": 0.6458, "step": 8921 }, { "epoch": 0.26, "grad_norm": 6.8361295313429595, "learning_rate": 8.724781998945094e-06, "loss": 0.5641, "step": 8922 }, { "epoch": 0.26, "grad_norm": 8.379385582789238, "learning_rate": 8.724472603544674e-06, "loss": 0.4085, "step": 8923 }, { "epoch": 0.26, "grad_norm": 8.61583515806136, "learning_rate": 8.724163176103073e-06, "loss": 0.7068, "step": 8924 }, { "epoch": 0.26, "grad_norm": 6.190628108756042, "learning_rate": 8.723853716622956e-06, "loss": 0.3948, "step": 8925 }, { "epoch": 0.26, "grad_norm": 3.075849670005731, "learning_rate": 8.723544225106986e-06, "loss": 0.186, "step": 8926 }, { "epoch": 0.26, "grad_norm": 9.338382569954346, "learning_rate": 8.723234701557821e-06, "loss": 0.9656, "step": 8927 }, { "epoch": 0.26, "grad_norm": 6.226561581026438, "learning_rate": 8.722925145978127e-06, "loss": 0.5207, "step": 8928 }, { "epoch": 0.26, "grad_norm": 4.150043787208183, "learning_rate": 8.722615558370566e-06, "loss": 0.3521, "step": 8929 }, { "epoch": 0.26, "grad_norm": 8.172275963714684, "learning_rate": 8.722305938737803e-06, "loss": 0.6446, "step": 8930 }, { "epoch": 0.26, "grad_norm": 5.9857444690238575, "learning_rate": 8.7219962870825e-06, "loss": 0.5087, "step": 8931 }, { "epoch": 0.26, "grad_norm": 9.15174411139519, "learning_rate": 8.72168660340732e-06, "loss": 0.5014, "step": 8932 }, { "epoch": 0.26, "grad_norm": 7.0184190449598765, "learning_rate": 8.72137688771493e-06, "loss": 0.9963, "step": 8933 }, { "epoch": 0.26, "grad_norm": 4.8438869087805445, "learning_rate": 8.72106714000799e-06, "loss": 0.4872, "step": 8934 }, { "epoch": 0.26, "grad_norm": 6.70731327839171, "learning_rate": 8.720757360289171e-06, "loss": 0.5355, "step": 8935 }, { "epoch": 0.26, "grad_norm": 7.3758858617445675, "learning_rate": 8.720447548561133e-06, "loss": 0.4572, "step": 8936 }, { "epoch": 0.26, "grad_norm": 8.799257054178232, "learning_rate": 8.720137704826542e-06, "loss": 0.7515, "step": 8937 }, { "epoch": 0.26, "grad_norm": 4.934070313452501, "learning_rate": 8.719827829088066e-06, "loss": 0.3867, "step": 8938 }, { "epoch": 0.26, "grad_norm": 7.709427056602628, "learning_rate": 8.719517921348369e-06, "loss": 0.6144, "step": 8939 }, { "epoch": 0.26, "grad_norm": 6.177258656818948, "learning_rate": 8.719207981610115e-06, "loss": 0.5077, "step": 8940 }, { "epoch": 0.26, "grad_norm": 7.333508951800808, "learning_rate": 8.718898009875974e-06, "loss": 0.546, "step": 8941 }, { "epoch": 0.26, "grad_norm": 3.5049790321082077, "learning_rate": 8.71858800614861e-06, "loss": 0.3879, "step": 8942 }, { "epoch": 0.26, "grad_norm": 6.406756869477311, "learning_rate": 8.718277970430695e-06, "loss": 0.7019, "step": 8943 }, { "epoch": 0.26, "grad_norm": 5.000594914329984, "learning_rate": 8.717967902724889e-06, "loss": 0.6729, "step": 8944 }, { "epoch": 0.26, "grad_norm": 11.194952672165758, "learning_rate": 8.717657803033865e-06, "loss": 1.2072, "step": 8945 }, { "epoch": 0.26, "grad_norm": 2.830801398166679, "learning_rate": 8.717347671360288e-06, "loss": 0.1545, "step": 8946 }, { "epoch": 0.26, "grad_norm": 5.706824829629685, "learning_rate": 8.717037507706826e-06, "loss": 0.7263, "step": 8947 }, { "epoch": 0.26, "grad_norm": 7.781731280414472, "learning_rate": 8.716727312076148e-06, "loss": 0.7178, "step": 8948 }, { "epoch": 0.26, "grad_norm": 4.164575999592056, "learning_rate": 8.716417084470923e-06, "loss": 0.5137, "step": 8949 }, { "epoch": 0.26, "grad_norm": 4.888249609553536, "learning_rate": 8.71610682489382e-06, "loss": 0.5045, "step": 8950 }, { "epoch": 0.26, "grad_norm": 6.845243386845093, "learning_rate": 8.715796533347507e-06, "loss": 0.5261, "step": 8951 }, { "epoch": 0.26, "grad_norm": 7.016039523853951, "learning_rate": 8.715486209834654e-06, "loss": 1.0481, "step": 8952 }, { "epoch": 0.26, "grad_norm": 9.36456766151981, "learning_rate": 8.71517585435793e-06, "loss": 0.7252, "step": 8953 }, { "epoch": 0.26, "grad_norm": 4.142224359222424, "learning_rate": 8.714865466920007e-06, "loss": 0.3139, "step": 8954 }, { "epoch": 0.26, "grad_norm": 19.61806331428067, "learning_rate": 8.714555047523552e-06, "loss": 0.5532, "step": 8955 }, { "epoch": 0.26, "grad_norm": 9.548055073279219, "learning_rate": 8.714244596171237e-06, "loss": 0.3112, "step": 8956 }, { "epoch": 0.26, "grad_norm": 4.0256111865672075, "learning_rate": 8.713934112865734e-06, "loss": 0.4917, "step": 8957 }, { "epoch": 0.26, "grad_norm": 6.633754847503035, "learning_rate": 8.713623597609712e-06, "loss": 0.394, "step": 8958 }, { "epoch": 0.26, "grad_norm": 3.0198035074251663, "learning_rate": 8.713313050405845e-06, "loss": 0.4382, "step": 8959 }, { "epoch": 0.26, "grad_norm": 10.197501860232885, "learning_rate": 8.713002471256804e-06, "loss": 1.0865, "step": 8960 }, { "epoch": 0.26, "grad_norm": 5.659125276950739, "learning_rate": 8.712691860165259e-06, "loss": 0.6434, "step": 8961 }, { "epoch": 0.26, "grad_norm": 7.672750574558939, "learning_rate": 8.71238121713388e-06, "loss": 0.7477, "step": 8962 }, { "epoch": 0.26, "grad_norm": 3.8991192238819656, "learning_rate": 8.712070542165348e-06, "loss": 0.4222, "step": 8963 }, { "epoch": 0.26, "grad_norm": 5.690848067225408, "learning_rate": 8.711759835262326e-06, "loss": 0.6321, "step": 8964 }, { "epoch": 0.26, "grad_norm": 8.899374144761925, "learning_rate": 8.711449096427493e-06, "loss": 0.6762, "step": 8965 }, { "epoch": 0.26, "grad_norm": 4.253334224131819, "learning_rate": 8.71113832566352e-06, "loss": 0.2204, "step": 8966 }, { "epoch": 0.26, "grad_norm": 4.7015550343944525, "learning_rate": 8.71082752297308e-06, "loss": 0.3423, "step": 8967 }, { "epoch": 0.26, "grad_norm": 7.390054370231027, "learning_rate": 8.710516688358847e-06, "loss": 0.3628, "step": 8968 }, { "epoch": 0.26, "grad_norm": 7.673122576490782, "learning_rate": 8.710205821823497e-06, "loss": 0.7554, "step": 8969 }, { "epoch": 0.26, "grad_norm": 7.319257759964415, "learning_rate": 8.709894923369704e-06, "loss": 0.6633, "step": 8970 }, { "epoch": 0.26, "grad_norm": 7.567600705422197, "learning_rate": 8.709583993000142e-06, "loss": 0.7848, "step": 8971 }, { "epoch": 0.26, "grad_norm": 5.937934076604316, "learning_rate": 8.709273030717483e-06, "loss": 0.4021, "step": 8972 }, { "epoch": 0.26, "grad_norm": 5.718803697344382, "learning_rate": 8.708962036524406e-06, "loss": 0.7265, "step": 8973 }, { "epoch": 0.26, "grad_norm": 6.533154004743407, "learning_rate": 8.708651010423587e-06, "loss": 1.0325, "step": 8974 }, { "epoch": 0.26, "grad_norm": 6.0719827246816, "learning_rate": 8.708339952417698e-06, "loss": 0.3092, "step": 8975 }, { "epoch": 0.26, "grad_norm": 5.930299187071522, "learning_rate": 8.708028862509419e-06, "loss": 0.3701, "step": 8976 }, { "epoch": 0.26, "grad_norm": 6.464305198342, "learning_rate": 8.707717740701423e-06, "loss": 0.4547, "step": 8977 }, { "epoch": 0.26, "grad_norm": 5.1295844949355285, "learning_rate": 8.707406586996386e-06, "loss": 0.5657, "step": 8978 }, { "epoch": 0.26, "grad_norm": 8.382899416342278, "learning_rate": 8.707095401396989e-06, "loss": 0.4076, "step": 8979 }, { "epoch": 0.26, "grad_norm": 8.572768799135018, "learning_rate": 8.706784183905905e-06, "loss": 0.7915, "step": 8980 }, { "epoch": 0.26, "grad_norm": 6.385576039783983, "learning_rate": 8.706472934525815e-06, "loss": 0.5328, "step": 8981 }, { "epoch": 0.26, "grad_norm": 4.445567252798873, "learning_rate": 8.706161653259393e-06, "loss": 0.4403, "step": 8982 }, { "epoch": 0.26, "grad_norm": 5.6538595975938595, "learning_rate": 8.70585034010932e-06, "loss": 0.5151, "step": 8983 }, { "epoch": 0.26, "grad_norm": 4.876488678387794, "learning_rate": 8.705538995078273e-06, "loss": 0.4066, "step": 8984 }, { "epoch": 0.26, "grad_norm": 6.2582407601219385, "learning_rate": 8.705227618168927e-06, "loss": 0.4599, "step": 8985 }, { "epoch": 0.26, "grad_norm": 9.021754939433615, "learning_rate": 8.704916209383969e-06, "loss": 0.8939, "step": 8986 }, { "epoch": 0.26, "grad_norm": 9.983235898654238, "learning_rate": 8.704604768726068e-06, "loss": 0.5152, "step": 8987 }, { "epoch": 0.26, "grad_norm": 4.883287623368372, "learning_rate": 8.704293296197913e-06, "loss": 0.3518, "step": 8988 }, { "epoch": 0.26, "grad_norm": 8.927119462345567, "learning_rate": 8.703981791802175e-06, "loss": 1.0049, "step": 8989 }, { "epoch": 0.26, "grad_norm": 2.898569787803276, "learning_rate": 8.703670255541538e-06, "loss": 0.2866, "step": 8990 }, { "epoch": 0.26, "grad_norm": 5.479189080989038, "learning_rate": 8.703358687418684e-06, "loss": 0.4385, "step": 8991 }, { "epoch": 0.26, "grad_norm": 6.092337674532692, "learning_rate": 8.703047087436292e-06, "loss": 0.5166, "step": 8992 }, { "epoch": 0.26, "grad_norm": 6.894435252961288, "learning_rate": 8.70273545559704e-06, "loss": 0.323, "step": 8993 }, { "epoch": 0.26, "grad_norm": 7.996408848594283, "learning_rate": 8.702423791903609e-06, "loss": 1.1012, "step": 8994 }, { "epoch": 0.26, "grad_norm": 6.897175657272456, "learning_rate": 8.702112096358685e-06, "loss": 0.6873, "step": 8995 }, { "epoch": 0.26, "grad_norm": 6.882076297664108, "learning_rate": 8.701800368964945e-06, "loss": 1.2152, "step": 8996 }, { "epoch": 0.26, "grad_norm": 7.55060101573726, "learning_rate": 8.701488609725073e-06, "loss": 0.8518, "step": 8997 }, { "epoch": 0.26, "grad_norm": 4.489519418259999, "learning_rate": 8.70117681864175e-06, "loss": 0.6941, "step": 8998 }, { "epoch": 0.26, "grad_norm": 7.859716267694068, "learning_rate": 8.700864995717656e-06, "loss": 0.5071, "step": 8999 }, { "epoch": 0.26, "grad_norm": 7.909805704520444, "learning_rate": 8.700553140955479e-06, "loss": 0.4378, "step": 9000 }, { "epoch": 0.26, "grad_norm": 3.4048687470412045, "learning_rate": 8.7002412543579e-06, "loss": 0.3494, "step": 9001 }, { "epoch": 0.26, "grad_norm": 6.515147495839799, "learning_rate": 8.699929335927599e-06, "loss": 0.7379, "step": 9002 }, { "epoch": 0.26, "grad_norm": 11.274738323955978, "learning_rate": 8.69961738566726e-06, "loss": 0.4754, "step": 9003 }, { "epoch": 0.26, "grad_norm": 2.2013339885868195, "learning_rate": 8.699305403579571e-06, "loss": 0.3612, "step": 9004 }, { "epoch": 0.26, "grad_norm": 5.056210130833764, "learning_rate": 8.698993389667213e-06, "loss": 0.5933, "step": 9005 }, { "epoch": 0.26, "grad_norm": 8.659698460463126, "learning_rate": 8.69868134393287e-06, "loss": 0.749, "step": 9006 }, { "epoch": 0.26, "grad_norm": 7.224349422269296, "learning_rate": 8.698369266379224e-06, "loss": 0.3622, "step": 9007 }, { "epoch": 0.26, "grad_norm": 2.762738148966325, "learning_rate": 8.698057157008966e-06, "loss": 0.1043, "step": 9008 }, { "epoch": 0.26, "grad_norm": 10.510008946132753, "learning_rate": 8.697745015824776e-06, "loss": 0.8607, "step": 9009 }, { "epoch": 0.26, "grad_norm": 3.0648014607308776, "learning_rate": 8.697432842829341e-06, "loss": 0.2018, "step": 9010 }, { "epoch": 0.26, "grad_norm": 6.480115406044801, "learning_rate": 8.697120638025346e-06, "loss": 0.4441, "step": 9011 }, { "epoch": 0.26, "grad_norm": 4.051282360603255, "learning_rate": 8.696808401415478e-06, "loss": 0.5779, "step": 9012 }, { "epoch": 0.26, "grad_norm": 4.016111173774143, "learning_rate": 8.696496133002424e-06, "loss": 0.7344, "step": 9013 }, { "epoch": 0.26, "grad_norm": 9.628870792594451, "learning_rate": 8.696183832788867e-06, "loss": 0.5959, "step": 9014 }, { "epoch": 0.26, "grad_norm": 6.595197690440277, "learning_rate": 8.695871500777495e-06, "loss": 0.5224, "step": 9015 }, { "epoch": 0.26, "grad_norm": 5.145101760362102, "learning_rate": 8.695559136970997e-06, "loss": 0.7182, "step": 9016 }, { "epoch": 0.26, "grad_norm": 8.819049836055681, "learning_rate": 8.695246741372059e-06, "loss": 0.5272, "step": 9017 }, { "epoch": 0.26, "grad_norm": 5.288531233054867, "learning_rate": 8.694934313983367e-06, "loss": 0.711, "step": 9018 }, { "epoch": 0.26, "grad_norm": 7.900071877744085, "learning_rate": 8.69462185480761e-06, "loss": 0.8081, "step": 9019 }, { "epoch": 0.26, "grad_norm": 4.159604862548073, "learning_rate": 8.694309363847477e-06, "loss": 0.6313, "step": 9020 }, { "epoch": 0.26, "grad_norm": 2.496505798811379, "learning_rate": 8.693996841105655e-06, "loss": 0.71, "step": 9021 }, { "epoch": 0.26, "grad_norm": 4.0143368209645915, "learning_rate": 8.693684286584833e-06, "loss": 0.2869, "step": 9022 }, { "epoch": 0.26, "grad_norm": 2.520569804632693, "learning_rate": 8.6933717002877e-06, "loss": 0.406, "step": 9023 }, { "epoch": 0.26, "grad_norm": 2.781615683271422, "learning_rate": 8.693059082216944e-06, "loss": 0.1806, "step": 9024 }, { "epoch": 0.26, "grad_norm": 6.4136691791778055, "learning_rate": 8.692746432375256e-06, "loss": 0.5129, "step": 9025 }, { "epoch": 0.26, "grad_norm": 4.975513245834922, "learning_rate": 8.692433750765325e-06, "loss": 0.489, "step": 9026 }, { "epoch": 0.26, "grad_norm": 5.615417626085922, "learning_rate": 8.692121037389842e-06, "loss": 0.8528, "step": 9027 }, { "epoch": 0.26, "grad_norm": 5.615324854909622, "learning_rate": 8.691808292251497e-06, "loss": 0.3489, "step": 9028 }, { "epoch": 0.26, "grad_norm": 5.003129361285966, "learning_rate": 8.69149551535298e-06, "loss": 0.3039, "step": 9029 }, { "epoch": 0.26, "grad_norm": 14.42895519172489, "learning_rate": 8.691182706696979e-06, "loss": 0.6558, "step": 9030 }, { "epoch": 0.26, "grad_norm": 6.350778453899964, "learning_rate": 8.69086986628619e-06, "loss": 0.6147, "step": 9031 }, { "epoch": 0.26, "grad_norm": 10.85001897854178, "learning_rate": 8.690556994123299e-06, "loss": 0.8763, "step": 9032 }, { "epoch": 0.26, "grad_norm": 4.843396180058662, "learning_rate": 8.690244090211002e-06, "loss": 0.6797, "step": 9033 }, { "epoch": 0.26, "grad_norm": 6.413830212790532, "learning_rate": 8.68993115455199e-06, "loss": 0.629, "step": 9034 }, { "epoch": 0.26, "grad_norm": 7.163674080905193, "learning_rate": 8.689618187148953e-06, "loss": 0.6897, "step": 9035 }, { "epoch": 0.26, "grad_norm": 2.732209301656168, "learning_rate": 8.689305188004589e-06, "loss": 0.2936, "step": 9036 }, { "epoch": 0.26, "grad_norm": 9.459881951238717, "learning_rate": 8.688992157121584e-06, "loss": 0.5892, "step": 9037 }, { "epoch": 0.26, "grad_norm": 6.109935451859628, "learning_rate": 8.688679094502634e-06, "loss": 0.7429, "step": 9038 }, { "epoch": 0.26, "grad_norm": 8.678933619903226, "learning_rate": 8.688366000150431e-06, "loss": 0.9354, "step": 9039 }, { "epoch": 0.26, "grad_norm": 7.200482229137352, "learning_rate": 8.68805287406767e-06, "loss": 0.7122, "step": 9040 }, { "epoch": 0.26, "grad_norm": 6.270141039513825, "learning_rate": 8.687739716257044e-06, "loss": 0.4483, "step": 9041 }, { "epoch": 0.26, "grad_norm": 6.844374049053139, "learning_rate": 8.687426526721248e-06, "loss": 0.8398, "step": 9042 }, { "epoch": 0.26, "grad_norm": 7.333469288461606, "learning_rate": 8.687113305462975e-06, "loss": 0.4894, "step": 9043 }, { "epoch": 0.26, "grad_norm": 3.089249873327475, "learning_rate": 8.68680005248492e-06, "loss": 0.2745, "step": 9044 }, { "epoch": 0.26, "grad_norm": 4.327405559913092, "learning_rate": 8.686486767789778e-06, "loss": 0.2317, "step": 9045 }, { "epoch": 0.26, "grad_norm": 9.10733522377621, "learning_rate": 8.686173451380247e-06, "loss": 0.574, "step": 9046 }, { "epoch": 0.26, "grad_norm": 7.785538682669665, "learning_rate": 8.685860103259017e-06, "loss": 0.7012, "step": 9047 }, { "epoch": 0.26, "grad_norm": 6.1206053324129925, "learning_rate": 8.685546723428786e-06, "loss": 0.7916, "step": 9048 }, { "epoch": 0.26, "grad_norm": 9.557338343756646, "learning_rate": 8.685233311892252e-06, "loss": 0.3694, "step": 9049 }, { "epoch": 0.26, "grad_norm": 4.454421915294597, "learning_rate": 8.684919868652107e-06, "loss": 0.4478, "step": 9050 }, { "epoch": 0.26, "grad_norm": 7.419598714203044, "learning_rate": 8.684606393711052e-06, "loss": 0.4082, "step": 9051 }, { "epoch": 0.26, "grad_norm": 3.1391090036444216, "learning_rate": 8.684292887071782e-06, "loss": 0.4843, "step": 9052 }, { "epoch": 0.26, "grad_norm": 4.77628532406589, "learning_rate": 8.683979348736993e-06, "loss": 0.5228, "step": 9053 }, { "epoch": 0.26, "grad_norm": 5.427671384771905, "learning_rate": 8.683665778709383e-06, "loss": 0.2853, "step": 9054 }, { "epoch": 0.26, "grad_norm": 7.14765180428563, "learning_rate": 8.683352176991652e-06, "loss": 0.5216, "step": 9055 }, { "epoch": 0.26, "grad_norm": 4.642414146290955, "learning_rate": 8.683038543586493e-06, "loss": 0.2285, "step": 9056 }, { "epoch": 0.26, "grad_norm": 6.267154573493388, "learning_rate": 8.68272487849661e-06, "loss": 0.4992, "step": 9057 }, { "epoch": 0.26, "grad_norm": 3.4401141370368453, "learning_rate": 8.682411181724695e-06, "loss": 0.3309, "step": 9058 }, { "epoch": 0.26, "grad_norm": 5.2764753167854614, "learning_rate": 8.682097453273452e-06, "loss": 0.2891, "step": 9059 }, { "epoch": 0.26, "grad_norm": 4.66114703227587, "learning_rate": 8.681783693145578e-06, "loss": 0.3663, "step": 9060 }, { "epoch": 0.26, "grad_norm": 3.362338280600001, "learning_rate": 8.681469901343771e-06, "loss": 0.2158, "step": 9061 }, { "epoch": 0.26, "grad_norm": 6.111564419531488, "learning_rate": 8.681156077870731e-06, "loss": 0.5678, "step": 9062 }, { "epoch": 0.26, "grad_norm": 5.916266812885573, "learning_rate": 8.68084222272916e-06, "loss": 0.5124, "step": 9063 }, { "epoch": 0.26, "grad_norm": 4.642598512802967, "learning_rate": 8.680528335921757e-06, "loss": 0.2895, "step": 9064 }, { "epoch": 0.26, "grad_norm": 3.554013735457515, "learning_rate": 8.680214417451222e-06, "loss": 0.4512, "step": 9065 }, { "epoch": 0.26, "grad_norm": 5.096533196341822, "learning_rate": 8.679900467320255e-06, "loss": 0.5333, "step": 9066 }, { "epoch": 0.26, "grad_norm": 4.32546318914536, "learning_rate": 8.679586485531556e-06, "loss": 0.1688, "step": 9067 }, { "epoch": 0.26, "grad_norm": 2.349797571878707, "learning_rate": 8.67927247208783e-06, "loss": 0.0997, "step": 9068 }, { "epoch": 0.26, "grad_norm": 9.874595730389323, "learning_rate": 8.678958426991773e-06, "loss": 0.9524, "step": 9069 }, { "epoch": 0.26, "grad_norm": 5.9112990661765945, "learning_rate": 8.678644350246091e-06, "loss": 0.959, "step": 9070 }, { "epoch": 0.26, "grad_norm": 3.7771330245056767, "learning_rate": 8.678330241853485e-06, "loss": 0.2428, "step": 9071 }, { "epoch": 0.26, "grad_norm": 6.4918406700283535, "learning_rate": 8.678016101816656e-06, "loss": 0.6292, "step": 9072 }, { "epoch": 0.26, "grad_norm": 5.576931139133202, "learning_rate": 8.677701930138309e-06, "loss": 0.3604, "step": 9073 }, { "epoch": 0.26, "grad_norm": 16.31739979085051, "learning_rate": 8.677387726821144e-06, "loss": 0.3152, "step": 9074 }, { "epoch": 0.26, "grad_norm": 5.343621235684789, "learning_rate": 8.677073491867865e-06, "loss": 0.198, "step": 9075 }, { "epoch": 0.26, "grad_norm": 4.511422178090822, "learning_rate": 8.676759225281175e-06, "loss": 0.2872, "step": 9076 }, { "epoch": 0.26, "grad_norm": 7.284036479728199, "learning_rate": 8.676444927063779e-06, "loss": 0.2127, "step": 9077 }, { "epoch": 0.26, "grad_norm": 6.6443859624929225, "learning_rate": 8.67613059721838e-06, "loss": 0.6081, "step": 9078 }, { "epoch": 0.26, "grad_norm": 7.797546204934262, "learning_rate": 8.675816235747682e-06, "loss": 0.4328, "step": 9079 }, { "epoch": 0.26, "grad_norm": 6.731406989431931, "learning_rate": 8.67550184265439e-06, "loss": 0.5781, "step": 9080 }, { "epoch": 0.26, "grad_norm": 11.842127062236509, "learning_rate": 8.675187417941207e-06, "loss": 0.3116, "step": 9081 }, { "epoch": 0.26, "grad_norm": 4.233552465749672, "learning_rate": 8.674872961610839e-06, "loss": 0.6615, "step": 9082 }, { "epoch": 0.26, "grad_norm": 8.199806108741063, "learning_rate": 8.674558473665991e-06, "loss": 0.409, "step": 9083 }, { "epoch": 0.26, "grad_norm": 7.452521789580351, "learning_rate": 8.67424395410937e-06, "loss": 0.5825, "step": 9084 }, { "epoch": 0.26, "grad_norm": 10.874256152440795, "learning_rate": 8.673929402943681e-06, "loss": 0.3754, "step": 9085 }, { "epoch": 0.26, "grad_norm": 6.5386492052807075, "learning_rate": 8.67361482017163e-06, "loss": 0.5099, "step": 9086 }, { "epoch": 0.26, "grad_norm": 4.833003150689494, "learning_rate": 8.673300205795922e-06, "loss": 0.7576, "step": 9087 }, { "epoch": 0.26, "grad_norm": 5.031970594467278, "learning_rate": 8.672985559819264e-06, "loss": 0.3529, "step": 9088 }, { "epoch": 0.26, "grad_norm": 2.9734758608133136, "learning_rate": 8.672670882244363e-06, "loss": 0.2603, "step": 9089 }, { "epoch": 0.26, "grad_norm": 8.06201282774071, "learning_rate": 8.672356173073929e-06, "loss": 0.6305, "step": 9090 }, { "epoch": 0.26, "grad_norm": 6.1378835134854075, "learning_rate": 8.672041432310666e-06, "loss": 0.5785, "step": 9091 }, { "epoch": 0.26, "grad_norm": 9.79419726413697, "learning_rate": 8.671726659957282e-06, "loss": 0.4532, "step": 9092 }, { "epoch": 0.26, "grad_norm": 5.399645922673122, "learning_rate": 8.671411856016487e-06, "loss": 0.5207, "step": 9093 }, { "epoch": 0.26, "grad_norm": 4.122942093277154, "learning_rate": 8.671097020490987e-06, "loss": 0.2999, "step": 9094 }, { "epoch": 0.26, "grad_norm": 5.857239926759988, "learning_rate": 8.67078215338349e-06, "loss": 0.3529, "step": 9095 }, { "epoch": 0.26, "grad_norm": 4.406059342175622, "learning_rate": 8.670467254696708e-06, "loss": 0.5668, "step": 9096 }, { "epoch": 0.26, "grad_norm": 5.392914570527002, "learning_rate": 8.670152324433347e-06, "loss": 0.2488, "step": 9097 }, { "epoch": 0.26, "grad_norm": 3.9919485479758228, "learning_rate": 8.669837362596118e-06, "loss": 0.1582, "step": 9098 }, { "epoch": 0.26, "grad_norm": 4.570181063856897, "learning_rate": 8.66952236918773e-06, "loss": 0.3108, "step": 9099 }, { "epoch": 0.26, "grad_norm": 3.5276729899001356, "learning_rate": 8.669207344210892e-06, "loss": 0.7273, "step": 9100 }, { "epoch": 0.26, "grad_norm": 6.410347721232015, "learning_rate": 8.668892287668317e-06, "loss": 0.3773, "step": 9101 }, { "epoch": 0.26, "grad_norm": 8.16166387484581, "learning_rate": 8.668577199562711e-06, "loss": 0.6039, "step": 9102 }, { "epoch": 0.26, "grad_norm": 6.288879316043345, "learning_rate": 8.66826207989679e-06, "loss": 0.9386, "step": 9103 }, { "epoch": 0.26, "grad_norm": 5.149404733013843, "learning_rate": 8.66794692867326e-06, "loss": 0.3443, "step": 9104 }, { "epoch": 0.26, "grad_norm": 7.6017962240452865, "learning_rate": 8.667631745894833e-06, "loss": 0.3673, "step": 9105 }, { "epoch": 0.26, "grad_norm": 4.765201437215813, "learning_rate": 8.667316531564223e-06, "loss": 0.4028, "step": 9106 }, { "epoch": 0.26, "grad_norm": 6.1237305863293265, "learning_rate": 8.667001285684141e-06, "loss": 0.3533, "step": 9107 }, { "epoch": 0.26, "grad_norm": 6.013751602759393, "learning_rate": 8.666686008257299e-06, "loss": 0.4184, "step": 9108 }, { "epoch": 0.26, "grad_norm": 5.75374473039484, "learning_rate": 8.666370699286407e-06, "loss": 1.0421, "step": 9109 }, { "epoch": 0.26, "grad_norm": 8.154660300191004, "learning_rate": 8.66605535877418e-06, "loss": 0.8448, "step": 9110 }, { "epoch": 0.26, "grad_norm": 5.664690580074163, "learning_rate": 8.66573998672333e-06, "loss": 0.86, "step": 9111 }, { "epoch": 0.26, "grad_norm": 4.366257488332771, "learning_rate": 8.66542458313657e-06, "loss": 0.4036, "step": 9112 }, { "epoch": 0.26, "grad_norm": 6.402654504129461, "learning_rate": 8.665109148016615e-06, "loss": 0.3577, "step": 9113 }, { "epoch": 0.26, "grad_norm": 2.9150083045039636, "learning_rate": 8.664793681366176e-06, "loss": 0.0547, "step": 9114 }, { "epoch": 0.26, "grad_norm": 6.385265089251171, "learning_rate": 8.664478183187968e-06, "loss": 0.7308, "step": 9115 }, { "epoch": 0.26, "grad_norm": 5.859635085373511, "learning_rate": 8.664162653484704e-06, "loss": 0.3185, "step": 9116 }, { "epoch": 0.26, "grad_norm": 11.341775685575179, "learning_rate": 8.663847092259102e-06, "loss": 0.3871, "step": 9117 }, { "epoch": 0.26, "grad_norm": 7.837783185354142, "learning_rate": 8.663531499513875e-06, "loss": 0.3908, "step": 9118 }, { "epoch": 0.26, "grad_norm": 3.3785944800337955, "learning_rate": 8.663215875251736e-06, "loss": 0.358, "step": 9119 }, { "epoch": 0.26, "grad_norm": 3.9576586181749205, "learning_rate": 8.662900219475403e-06, "loss": 0.547, "step": 9120 }, { "epoch": 0.26, "grad_norm": 2.4210696388759976, "learning_rate": 8.66258453218759e-06, "loss": 0.3043, "step": 9121 }, { "epoch": 0.26, "grad_norm": 4.938158981192805, "learning_rate": 8.662268813391012e-06, "loss": 0.2099, "step": 9122 }, { "epoch": 0.26, "grad_norm": 3.2815816439193792, "learning_rate": 8.661953063088387e-06, "loss": 0.1788, "step": 9123 }, { "epoch": 0.26, "grad_norm": 4.55440984962587, "learning_rate": 8.661637281282432e-06, "loss": 0.7802, "step": 9124 }, { "epoch": 0.26, "grad_norm": 6.343584218231454, "learning_rate": 8.661321467975861e-06, "loss": 0.6122, "step": 9125 }, { "epoch": 0.26, "grad_norm": 6.987482800402824, "learning_rate": 8.661005623171392e-06, "loss": 0.4912, "step": 9126 }, { "epoch": 0.26, "grad_norm": 6.295326780907555, "learning_rate": 8.660689746871745e-06, "loss": 0.7444, "step": 9127 }, { "epoch": 0.26, "grad_norm": 4.0165344425587595, "learning_rate": 8.660373839079632e-06, "loss": 0.3807, "step": 9128 }, { "epoch": 0.26, "grad_norm": 5.2081942221819295, "learning_rate": 8.660057899797773e-06, "loss": 0.7328, "step": 9129 }, { "epoch": 0.26, "grad_norm": 3.621005784563966, "learning_rate": 8.659741929028889e-06, "loss": 0.4315, "step": 9130 }, { "epoch": 0.26, "grad_norm": 5.059299257782381, "learning_rate": 8.659425926775694e-06, "loss": 0.1979, "step": 9131 }, { "epoch": 0.26, "grad_norm": 5.672598464265913, "learning_rate": 8.659109893040908e-06, "loss": 0.3945, "step": 9132 }, { "epoch": 0.26, "grad_norm": 15.047705353100778, "learning_rate": 8.658793827827253e-06, "loss": 0.4637, "step": 9133 }, { "epoch": 0.26, "grad_norm": 11.135353071544436, "learning_rate": 8.658477731137443e-06, "loss": 0.8368, "step": 9134 }, { "epoch": 0.26, "grad_norm": 4.005663885129314, "learning_rate": 8.6581616029742e-06, "loss": 0.7738, "step": 9135 }, { "epoch": 0.26, "grad_norm": 4.000930201136384, "learning_rate": 8.657845443340243e-06, "loss": 0.8932, "step": 9136 }, { "epoch": 0.26, "grad_norm": 3.9727543581510845, "learning_rate": 8.657529252238291e-06, "loss": 0.341, "step": 9137 }, { "epoch": 0.26, "grad_norm": 6.093380021821389, "learning_rate": 8.657213029671066e-06, "loss": 0.9387, "step": 9138 }, { "epoch": 0.26, "grad_norm": 5.383292189264685, "learning_rate": 8.656896775641288e-06, "loss": 0.5431, "step": 9139 }, { "epoch": 0.26, "grad_norm": 2.100469829874315, "learning_rate": 8.656580490151678e-06, "loss": 0.1568, "step": 9140 }, { "epoch": 0.26, "grad_norm": 6.516257221094254, "learning_rate": 8.656264173204955e-06, "loss": 0.3793, "step": 9141 }, { "epoch": 0.26, "grad_norm": 10.151451597096754, "learning_rate": 8.655947824803841e-06, "loss": 0.6004, "step": 9142 }, { "epoch": 0.26, "grad_norm": 13.372388281691615, "learning_rate": 8.655631444951058e-06, "loss": 0.4699, "step": 9143 }, { "epoch": 0.26, "grad_norm": 6.674100625244107, "learning_rate": 8.65531503364933e-06, "loss": 0.6028, "step": 9144 }, { "epoch": 0.26, "grad_norm": 12.043144944913042, "learning_rate": 8.654998590901374e-06, "loss": 0.5227, "step": 9145 }, { "epoch": 0.26, "grad_norm": 4.044943271140145, "learning_rate": 8.654682116709916e-06, "loss": 0.4371, "step": 9146 }, { "epoch": 0.26, "grad_norm": 4.316269042997446, "learning_rate": 8.65436561107768e-06, "loss": 0.6314, "step": 9147 }, { "epoch": 0.26, "grad_norm": 2.325013167590109, "learning_rate": 8.654049074007384e-06, "loss": 0.0842, "step": 9148 }, { "epoch": 0.26, "grad_norm": 9.042022510563827, "learning_rate": 8.653732505501756e-06, "loss": 0.9597, "step": 9149 }, { "epoch": 0.26, "grad_norm": 5.737150139208326, "learning_rate": 8.653415905563514e-06, "loss": 0.5127, "step": 9150 }, { "epoch": 0.26, "grad_norm": 2.9539899316298457, "learning_rate": 8.653099274195388e-06, "loss": 0.2237, "step": 9151 }, { "epoch": 0.26, "grad_norm": 5.898726096261448, "learning_rate": 8.652782611400094e-06, "loss": 0.4022, "step": 9152 }, { "epoch": 0.26, "grad_norm": 4.695613451562311, "learning_rate": 8.652465917180365e-06, "loss": 0.5338, "step": 9153 }, { "epoch": 0.26, "grad_norm": 6.296258863117583, "learning_rate": 8.65214919153892e-06, "loss": 0.5032, "step": 9154 }, { "epoch": 0.26, "grad_norm": 6.006881621907544, "learning_rate": 8.651832434478486e-06, "loss": 0.649, "step": 9155 }, { "epoch": 0.26, "grad_norm": 7.456120750669182, "learning_rate": 8.651515646001788e-06, "loss": 1.0223, "step": 9156 }, { "epoch": 0.26, "grad_norm": 4.992397059189748, "learning_rate": 8.65119882611155e-06, "loss": 0.3193, "step": 9157 }, { "epoch": 0.26, "grad_norm": 5.605956053071293, "learning_rate": 8.650881974810497e-06, "loss": 0.5812, "step": 9158 }, { "epoch": 0.26, "grad_norm": 4.58325668039669, "learning_rate": 8.650565092101356e-06, "loss": 0.3478, "step": 9159 }, { "epoch": 0.26, "grad_norm": 2.748467972300784, "learning_rate": 8.650248177986852e-06, "loss": 0.1723, "step": 9160 }, { "epoch": 0.26, "grad_norm": 3.944609505747309, "learning_rate": 8.649931232469715e-06, "loss": 0.4978, "step": 9161 }, { "epoch": 0.26, "grad_norm": 8.128552173856992, "learning_rate": 8.649614255552667e-06, "loss": 1.301, "step": 9162 }, { "epoch": 0.26, "grad_norm": 4.817849492934816, "learning_rate": 8.649297247238437e-06, "loss": 0.7026, "step": 9163 }, { "epoch": 0.26, "grad_norm": 9.580953009395113, "learning_rate": 8.648980207529753e-06, "loss": 0.699, "step": 9164 }, { "epoch": 0.26, "grad_norm": 8.325266621303843, "learning_rate": 8.64866313642934e-06, "loss": 0.5341, "step": 9165 }, { "epoch": 0.26, "grad_norm": 4.730028429651613, "learning_rate": 8.64834603393993e-06, "loss": 0.2308, "step": 9166 }, { "epoch": 0.26, "grad_norm": 4.876285896902548, "learning_rate": 8.648028900064246e-06, "loss": 0.4125, "step": 9167 }, { "epoch": 0.26, "grad_norm": 6.948028685559411, "learning_rate": 8.647711734805019e-06, "loss": 0.8185, "step": 9168 }, { "epoch": 0.26, "grad_norm": 6.2019037154275924, "learning_rate": 8.647394538164976e-06, "loss": 0.5041, "step": 9169 }, { "epoch": 0.26, "grad_norm": 9.092239306837065, "learning_rate": 8.647077310146849e-06, "loss": 0.7792, "step": 9170 }, { "epoch": 0.26, "grad_norm": 4.568612667790708, "learning_rate": 8.646760050753364e-06, "loss": 0.43, "step": 9171 }, { "epoch": 0.26, "grad_norm": 4.624667903214982, "learning_rate": 8.646442759987251e-06, "loss": 0.3669, "step": 9172 }, { "epoch": 0.26, "grad_norm": 3.861658756531731, "learning_rate": 8.646125437851241e-06, "loss": 0.6253, "step": 9173 }, { "epoch": 0.26, "grad_norm": 8.333389479130114, "learning_rate": 8.645808084348062e-06, "loss": 0.7504, "step": 9174 }, { "epoch": 0.26, "grad_norm": 7.104028858925221, "learning_rate": 8.645490699480446e-06, "loss": 0.4057, "step": 9175 }, { "epoch": 0.26, "grad_norm": 6.212345640233586, "learning_rate": 8.64517328325112e-06, "loss": 0.468, "step": 9176 }, { "epoch": 0.26, "grad_norm": 7.42059668524709, "learning_rate": 8.644855835662819e-06, "loss": 0.8587, "step": 9177 }, { "epoch": 0.26, "grad_norm": 8.347979812884633, "learning_rate": 8.64453835671827e-06, "loss": 0.8724, "step": 9178 }, { "epoch": 0.26, "grad_norm": 9.025003880212008, "learning_rate": 8.644220846420208e-06, "loss": 0.466, "step": 9179 }, { "epoch": 0.26, "grad_norm": 2.4210531624625933, "learning_rate": 8.643903304771362e-06, "loss": 0.2658, "step": 9180 }, { "epoch": 0.26, "grad_norm": 5.13230510276593, "learning_rate": 8.643585731774466e-06, "loss": 0.4046, "step": 9181 }, { "epoch": 0.26, "grad_norm": 7.848998301066141, "learning_rate": 8.64326812743225e-06, "loss": 0.8924, "step": 9182 }, { "epoch": 0.26, "grad_norm": 6.965767465934054, "learning_rate": 8.642950491747446e-06, "loss": 0.5264, "step": 9183 }, { "epoch": 0.26, "grad_norm": 6.445224238860645, "learning_rate": 8.642632824722787e-06, "loss": 0.3776, "step": 9184 }, { "epoch": 0.26, "grad_norm": 3.6192609146310892, "learning_rate": 8.642315126361007e-06, "loss": 0.5408, "step": 9185 }, { "epoch": 0.26, "grad_norm": 6.771105654204157, "learning_rate": 8.641997396664838e-06, "loss": 0.7009, "step": 9186 }, { "epoch": 0.26, "grad_norm": 7.669117387273869, "learning_rate": 8.641679635637015e-06, "loss": 0.5591, "step": 9187 }, { "epoch": 0.26, "grad_norm": 5.237443394685093, "learning_rate": 8.64136184328027e-06, "loss": 0.2326, "step": 9188 }, { "epoch": 0.26, "grad_norm": 3.956237472345606, "learning_rate": 8.641044019597338e-06, "loss": 0.801, "step": 9189 }, { "epoch": 0.26, "grad_norm": 7.605397902493269, "learning_rate": 8.640726164590951e-06, "loss": 1.0752, "step": 9190 }, { "epoch": 0.26, "grad_norm": 7.47197561160402, "learning_rate": 8.640408278263845e-06, "loss": 0.7809, "step": 9191 }, { "epoch": 0.26, "grad_norm": 5.876269183132634, "learning_rate": 8.640090360618756e-06, "loss": 0.7086, "step": 9192 }, { "epoch": 0.26, "grad_norm": 6.448113183235064, "learning_rate": 8.639772411658419e-06, "loss": 0.6278, "step": 9193 }, { "epoch": 0.26, "grad_norm": 6.476110279230495, "learning_rate": 8.639454431385566e-06, "loss": 0.6046, "step": 9194 }, { "epoch": 0.26, "grad_norm": 2.992150607765592, "learning_rate": 8.639136419802936e-06, "loss": 0.2403, "step": 9195 }, { "epoch": 0.26, "grad_norm": 5.7386258452256405, "learning_rate": 8.63881837691326e-06, "loss": 0.7256, "step": 9196 }, { "epoch": 0.26, "grad_norm": 8.748491865705564, "learning_rate": 8.638500302719281e-06, "loss": 0.7618, "step": 9197 }, { "epoch": 0.26, "grad_norm": 7.1546014611060444, "learning_rate": 8.638182197223731e-06, "loss": 0.2703, "step": 9198 }, { "epoch": 0.26, "grad_norm": 2.011608824458008, "learning_rate": 8.637864060429349e-06, "loss": 0.0725, "step": 9199 }, { "epoch": 0.26, "grad_norm": 6.122261680865108, "learning_rate": 8.637545892338867e-06, "loss": 0.8071, "step": 9200 }, { "epoch": 0.26, "grad_norm": 4.0902481534986475, "learning_rate": 8.637227692955028e-06, "loss": 0.4268, "step": 9201 }, { "epoch": 0.26, "grad_norm": 17.080350488287248, "learning_rate": 8.636909462280567e-06, "loss": 0.6039, "step": 9202 }, { "epoch": 0.26, "grad_norm": 9.14924984134228, "learning_rate": 8.63659120031822e-06, "loss": 0.9936, "step": 9203 }, { "epoch": 0.26, "grad_norm": 4.043312994265576, "learning_rate": 8.636272907070728e-06, "loss": 0.3353, "step": 9204 }, { "epoch": 0.26, "grad_norm": 10.856007993962233, "learning_rate": 8.635954582540829e-06, "loss": 0.9124, "step": 9205 }, { "epoch": 0.26, "grad_norm": 3.5668157904339264, "learning_rate": 8.63563622673126e-06, "loss": 0.6556, "step": 9206 }, { "epoch": 0.26, "grad_norm": 7.489243996209873, "learning_rate": 8.635317839644757e-06, "loss": 0.4634, "step": 9207 }, { "epoch": 0.26, "grad_norm": 5.333352262741511, "learning_rate": 8.634999421284064e-06, "loss": 0.6276, "step": 9208 }, { "epoch": 0.26, "grad_norm": 5.092672784090436, "learning_rate": 8.634680971651918e-06, "loss": 0.561, "step": 9209 }, { "epoch": 0.26, "grad_norm": 8.951387939242627, "learning_rate": 8.634362490751063e-06, "loss": 0.5352, "step": 9210 }, { "epoch": 0.26, "grad_norm": 4.629512647351743, "learning_rate": 8.634043978584232e-06, "loss": 0.5389, "step": 9211 }, { "epoch": 0.26, "grad_norm": 7.289030019804292, "learning_rate": 8.633725435154166e-06, "loss": 0.5597, "step": 9212 }, { "epoch": 0.26, "grad_norm": 5.222417216797574, "learning_rate": 8.633406860463611e-06, "loss": 0.5452, "step": 9213 }, { "epoch": 0.26, "grad_norm": 10.366339619395305, "learning_rate": 8.633088254515302e-06, "loss": 0.6652, "step": 9214 }, { "epoch": 0.26, "grad_norm": 13.506416491356553, "learning_rate": 8.632769617311984e-06, "loss": 0.5795, "step": 9215 }, { "epoch": 0.26, "grad_norm": 3.6155596338955864, "learning_rate": 8.632450948856395e-06, "loss": 0.4755, "step": 9216 }, { "epoch": 0.26, "grad_norm": 5.731873426569226, "learning_rate": 8.632132249151278e-06, "loss": 0.7472, "step": 9217 }, { "epoch": 0.26, "grad_norm": 10.124538081428934, "learning_rate": 8.631813518199373e-06, "loss": 0.2599, "step": 9218 }, { "epoch": 0.26, "grad_norm": 3.6289311501559274, "learning_rate": 8.631494756003425e-06, "loss": 0.2235, "step": 9219 }, { "epoch": 0.26, "grad_norm": 7.138629542693611, "learning_rate": 8.631175962566174e-06, "loss": 0.5984, "step": 9220 }, { "epoch": 0.26, "grad_norm": 6.240653411693588, "learning_rate": 8.630857137890365e-06, "loss": 0.3276, "step": 9221 }, { "epoch": 0.26, "grad_norm": 7.533497328647062, "learning_rate": 8.630538281978737e-06, "loss": 0.3926, "step": 9222 }, { "epoch": 0.26, "grad_norm": 10.55132013531484, "learning_rate": 8.630219394834037e-06, "loss": 0.7352, "step": 9223 }, { "epoch": 0.26, "grad_norm": 5.223491228188473, "learning_rate": 8.629900476459004e-06, "loss": 0.4043, "step": 9224 }, { "epoch": 0.26, "grad_norm": 6.170605106327487, "learning_rate": 8.629581526856386e-06, "loss": 0.4137, "step": 9225 }, { "epoch": 0.26, "grad_norm": 3.0706602486874486, "learning_rate": 8.629262546028925e-06, "loss": 0.2858, "step": 9226 }, { "epoch": 0.26, "grad_norm": 5.405740416220108, "learning_rate": 8.628943533979364e-06, "loss": 0.2382, "step": 9227 }, { "epoch": 0.26, "grad_norm": 10.049113782079283, "learning_rate": 8.628624490710452e-06, "loss": 0.4058, "step": 9228 }, { "epoch": 0.26, "grad_norm": 8.084573499840076, "learning_rate": 8.628305416224926e-06, "loss": 0.7223, "step": 9229 }, { "epoch": 0.26, "grad_norm": 6.605282898763607, "learning_rate": 8.627986310525537e-06, "loss": 0.7245, "step": 9230 }, { "epoch": 0.26, "grad_norm": 4.708061213211269, "learning_rate": 8.627667173615029e-06, "loss": 0.6703, "step": 9231 }, { "epoch": 0.26, "grad_norm": 3.966204359992407, "learning_rate": 8.627348005496145e-06, "loss": 0.2481, "step": 9232 }, { "epoch": 0.26, "grad_norm": 4.037361258966296, "learning_rate": 8.627028806171635e-06, "loss": 0.415, "step": 9233 }, { "epoch": 0.26, "grad_norm": 8.780424683364652, "learning_rate": 8.626709575644241e-06, "loss": 0.4651, "step": 9234 }, { "epoch": 0.26, "grad_norm": 7.446588685057984, "learning_rate": 8.626390313916712e-06, "loss": 0.5261, "step": 9235 }, { "epoch": 0.26, "grad_norm": 4.150633781939641, "learning_rate": 8.626071020991793e-06, "loss": 0.2072, "step": 9236 }, { "epoch": 0.26, "grad_norm": 3.091266849316106, "learning_rate": 8.625751696872233e-06, "loss": 0.3131, "step": 9237 }, { "epoch": 0.26, "grad_norm": 4.728066701586354, "learning_rate": 8.625432341560776e-06, "loss": 0.5171, "step": 9238 }, { "epoch": 0.26, "grad_norm": 3.821672692796692, "learning_rate": 8.625112955060171e-06, "loss": 0.3026, "step": 9239 }, { "epoch": 0.26, "grad_norm": 3.4835788176396822, "learning_rate": 8.624793537373167e-06, "loss": 0.4339, "step": 9240 }, { "epoch": 0.26, "grad_norm": 5.017882222441158, "learning_rate": 8.62447408850251e-06, "loss": 0.4621, "step": 9241 }, { "epoch": 0.26, "grad_norm": 2.6223744704365406, "learning_rate": 8.624154608450948e-06, "loss": 0.2866, "step": 9242 }, { "epoch": 0.26, "grad_norm": 8.741726042403723, "learning_rate": 8.62383509722123e-06, "loss": 0.4895, "step": 9243 }, { "epoch": 0.26, "grad_norm": 10.06114263442426, "learning_rate": 8.623515554816104e-06, "loss": 0.5585, "step": 9244 }, { "epoch": 0.26, "grad_norm": 7.992123779758815, "learning_rate": 8.623195981238322e-06, "loss": 0.3249, "step": 9245 }, { "epoch": 0.26, "grad_norm": 8.036555100305666, "learning_rate": 8.62287637649063e-06, "loss": 0.7997, "step": 9246 }, { "epoch": 0.26, "grad_norm": 3.9870472642036, "learning_rate": 8.622556740575779e-06, "loss": 0.6254, "step": 9247 }, { "epoch": 0.26, "grad_norm": 6.9631124367500865, "learning_rate": 8.622237073496518e-06, "loss": 0.5632, "step": 9248 }, { "epoch": 0.26, "grad_norm": 7.763185543926457, "learning_rate": 8.621917375255597e-06, "loss": 0.3591, "step": 9249 }, { "epoch": 0.26, "grad_norm": 26.157532764185696, "learning_rate": 8.621597645855767e-06, "loss": 0.7684, "step": 9250 }, { "epoch": 0.26, "grad_norm": 6.50032633182339, "learning_rate": 8.621277885299779e-06, "loss": 0.8163, "step": 9251 }, { "epoch": 0.26, "grad_norm": 3.650188181219708, "learning_rate": 8.620958093590382e-06, "loss": 0.5623, "step": 9252 }, { "epoch": 0.26, "grad_norm": 5.218522529441576, "learning_rate": 8.62063827073033e-06, "loss": 0.6438, "step": 9253 }, { "epoch": 0.27, "grad_norm": 4.144638074528262, "learning_rate": 8.62031841672237e-06, "loss": 0.3894, "step": 9254 }, { "epoch": 0.27, "grad_norm": 5.095757820066875, "learning_rate": 8.61999853156926e-06, "loss": 0.2503, "step": 9255 }, { "epoch": 0.27, "grad_norm": 5.210108335031284, "learning_rate": 8.619678615273745e-06, "loss": 0.5492, "step": 9256 }, { "epoch": 0.27, "grad_norm": 6.341143932511961, "learning_rate": 8.619358667838583e-06, "loss": 0.5936, "step": 9257 }, { "epoch": 0.27, "grad_norm": 5.333849429948649, "learning_rate": 8.619038689266524e-06, "loss": 0.3529, "step": 9258 }, { "epoch": 0.27, "grad_norm": 5.143455224843608, "learning_rate": 8.618718679560319e-06, "loss": 0.4358, "step": 9259 }, { "epoch": 0.27, "grad_norm": 4.575259903784654, "learning_rate": 8.618398638722724e-06, "loss": 0.6139, "step": 9260 }, { "epoch": 0.27, "grad_norm": 3.731440041205629, "learning_rate": 8.61807856675649e-06, "loss": 0.4827, "step": 9261 }, { "epoch": 0.27, "grad_norm": 3.5250837282287715, "learning_rate": 8.617758463664373e-06, "loss": 0.241, "step": 9262 }, { "epoch": 0.27, "grad_norm": 4.39514177616661, "learning_rate": 8.617438329449125e-06, "loss": 0.4548, "step": 9263 }, { "epoch": 0.27, "grad_norm": 3.4869831057643315, "learning_rate": 8.6171181641135e-06, "loss": 0.312, "step": 9264 }, { "epoch": 0.27, "grad_norm": 5.519024803430746, "learning_rate": 8.616797967660253e-06, "loss": 0.3059, "step": 9265 }, { "epoch": 0.27, "grad_norm": 5.514724506645917, "learning_rate": 8.616477740092137e-06, "loss": 0.4682, "step": 9266 }, { "epoch": 0.27, "grad_norm": 9.654706220458971, "learning_rate": 8.61615748141191e-06, "loss": 0.8198, "step": 9267 }, { "epoch": 0.27, "grad_norm": 6.562806912466075, "learning_rate": 8.615837191622325e-06, "loss": 0.3643, "step": 9268 }, { "epoch": 0.27, "grad_norm": 3.869647790519773, "learning_rate": 8.615516870726137e-06, "loss": 0.7885, "step": 9269 }, { "epoch": 0.27, "grad_norm": 5.6055034569918565, "learning_rate": 8.615196518726103e-06, "loss": 0.4831, "step": 9270 }, { "epoch": 0.27, "grad_norm": 6.865428781809135, "learning_rate": 8.614876135624978e-06, "loss": 0.729, "step": 9271 }, { "epoch": 0.27, "grad_norm": 6.001728683034032, "learning_rate": 8.61455572142552e-06, "loss": 0.622, "step": 9272 }, { "epoch": 0.27, "grad_norm": 5.633025886207991, "learning_rate": 8.614235276130485e-06, "loss": 0.696, "step": 9273 }, { "epoch": 0.27, "grad_norm": 5.791940321277637, "learning_rate": 8.613914799742626e-06, "loss": 0.4728, "step": 9274 }, { "epoch": 0.27, "grad_norm": 7.059485813468394, "learning_rate": 8.613594292264707e-06, "loss": 0.7583, "step": 9275 }, { "epoch": 0.27, "grad_norm": 5.526415333771541, "learning_rate": 8.613273753699477e-06, "loss": 0.7273, "step": 9276 }, { "epoch": 0.27, "grad_norm": 9.630262645336526, "learning_rate": 8.612953184049699e-06, "loss": 0.485, "step": 9277 }, { "epoch": 0.27, "grad_norm": 9.53287039568655, "learning_rate": 8.61263258331813e-06, "loss": 0.6612, "step": 9278 }, { "epoch": 0.27, "grad_norm": 7.936740208103098, "learning_rate": 8.612311951507528e-06, "loss": 0.4986, "step": 9279 }, { "epoch": 0.27, "grad_norm": 6.463048403287609, "learning_rate": 8.611991288620651e-06, "loss": 0.6427, "step": 9280 }, { "epoch": 0.27, "grad_norm": 3.1591692350942813, "learning_rate": 8.611670594660257e-06, "loss": 0.4414, "step": 9281 }, { "epoch": 0.27, "grad_norm": 7.01708109470677, "learning_rate": 8.611349869629107e-06, "loss": 0.4261, "step": 9282 }, { "epoch": 0.27, "grad_norm": 6.082625892299024, "learning_rate": 8.611029113529957e-06, "loss": 0.7385, "step": 9283 }, { "epoch": 0.27, "grad_norm": 8.849686103038596, "learning_rate": 8.61070832636557e-06, "loss": 0.6376, "step": 9284 }, { "epoch": 0.27, "grad_norm": 5.69210829950648, "learning_rate": 8.610387508138703e-06, "loss": 0.4796, "step": 9285 }, { "epoch": 0.27, "grad_norm": 13.617931807415756, "learning_rate": 8.610066658852114e-06, "loss": 0.5958, "step": 9286 }, { "epoch": 0.27, "grad_norm": 3.8913250335507685, "learning_rate": 8.60974577850857e-06, "loss": 0.2718, "step": 9287 }, { "epoch": 0.27, "grad_norm": 4.702974336054878, "learning_rate": 8.609424867110825e-06, "loss": 0.2295, "step": 9288 }, { "epoch": 0.27, "grad_norm": 3.1872786650760356, "learning_rate": 8.609103924661643e-06, "loss": 0.1948, "step": 9289 }, { "epoch": 0.27, "grad_norm": 5.56551879233153, "learning_rate": 8.608782951163785e-06, "loss": 0.5933, "step": 9290 }, { "epoch": 0.27, "grad_norm": 8.087787980342835, "learning_rate": 8.60846194662001e-06, "loss": 0.7866, "step": 9291 }, { "epoch": 0.27, "grad_norm": 8.001698909610601, "learning_rate": 8.608140911033084e-06, "loss": 0.4474, "step": 9292 }, { "epoch": 0.27, "grad_norm": 4.921456067860492, "learning_rate": 8.607819844405766e-06, "loss": 0.6635, "step": 9293 }, { "epoch": 0.27, "grad_norm": 9.10655244575051, "learning_rate": 8.607498746740814e-06, "loss": 0.8859, "step": 9294 }, { "epoch": 0.27, "grad_norm": 7.641613730521462, "learning_rate": 8.607177618040997e-06, "loss": 0.5404, "step": 9295 }, { "epoch": 0.27, "grad_norm": 1.9290142352012516, "learning_rate": 8.606856458309075e-06, "loss": 0.1337, "step": 9296 }, { "epoch": 0.27, "grad_norm": 6.295740370839367, "learning_rate": 8.606535267547812e-06, "loss": 0.6505, "step": 9297 }, { "epoch": 0.27, "grad_norm": 7.898763376337223, "learning_rate": 8.606214045759968e-06, "loss": 0.4218, "step": 9298 }, { "epoch": 0.27, "grad_norm": 4.220932346599831, "learning_rate": 8.60589279294831e-06, "loss": 0.4427, "step": 9299 }, { "epoch": 0.27, "grad_norm": 3.8574870507251013, "learning_rate": 8.6055715091156e-06, "loss": 0.1552, "step": 9300 }, { "epoch": 0.27, "grad_norm": 9.002211246112875, "learning_rate": 8.605250194264601e-06, "loss": 0.7165, "step": 9301 }, { "epoch": 0.27, "grad_norm": 3.201874316675432, "learning_rate": 8.60492884839808e-06, "loss": 0.3914, "step": 9302 }, { "epoch": 0.27, "grad_norm": 7.786599951299329, "learning_rate": 8.6046074715188e-06, "loss": 0.4951, "step": 9303 }, { "epoch": 0.27, "grad_norm": 9.35364184928665, "learning_rate": 8.604286063629525e-06, "loss": 0.7192, "step": 9304 }, { "epoch": 0.27, "grad_norm": 4.4283643492283815, "learning_rate": 8.60396462473302e-06, "loss": 0.6398, "step": 9305 }, { "epoch": 0.27, "grad_norm": 10.438945510270376, "learning_rate": 8.603643154832054e-06, "loss": 0.6767, "step": 9306 }, { "epoch": 0.27, "grad_norm": 7.8765717406276385, "learning_rate": 8.603321653929387e-06, "loss": 0.5073, "step": 9307 }, { "epoch": 0.27, "grad_norm": 4.678291252611977, "learning_rate": 8.603000122027788e-06, "loss": 0.4441, "step": 9308 }, { "epoch": 0.27, "grad_norm": 4.570582742118812, "learning_rate": 8.602678559130023e-06, "loss": 0.406, "step": 9309 }, { "epoch": 0.27, "grad_norm": 5.133968321841818, "learning_rate": 8.602356965238859e-06, "loss": 0.3972, "step": 9310 }, { "epoch": 0.27, "grad_norm": 6.5795041629683295, "learning_rate": 8.602035340357058e-06, "loss": 0.5578, "step": 9311 }, { "epoch": 0.27, "grad_norm": 3.0055644960871777, "learning_rate": 8.601713684487394e-06, "loss": 0.4622, "step": 9312 }, { "epoch": 0.27, "grad_norm": 8.824433086438335, "learning_rate": 8.60139199763263e-06, "loss": 0.5512, "step": 9313 }, { "epoch": 0.27, "grad_norm": 3.4553229125035827, "learning_rate": 8.601070279795532e-06, "loss": 0.2957, "step": 9314 }, { "epoch": 0.27, "grad_norm": 9.744866804971265, "learning_rate": 8.600748530978871e-06, "loss": 0.5255, "step": 9315 }, { "epoch": 0.27, "grad_norm": 5.754989449551852, "learning_rate": 8.600426751185414e-06, "loss": 0.5653, "step": 9316 }, { "epoch": 0.27, "grad_norm": 5.225248984286244, "learning_rate": 8.60010494041793e-06, "loss": 0.9511, "step": 9317 }, { "epoch": 0.27, "grad_norm": 10.478621744861059, "learning_rate": 8.599783098679185e-06, "loss": 0.8161, "step": 9318 }, { "epoch": 0.27, "grad_norm": 5.915514184525447, "learning_rate": 8.599461225971949e-06, "loss": 0.457, "step": 9319 }, { "epoch": 0.27, "grad_norm": 10.112744201804855, "learning_rate": 8.599139322298991e-06, "loss": 0.4447, "step": 9320 }, { "epoch": 0.27, "grad_norm": 6.088973613870416, "learning_rate": 8.598817387663081e-06, "loss": 0.3092, "step": 9321 }, { "epoch": 0.27, "grad_norm": 6.408045447356261, "learning_rate": 8.59849542206699e-06, "loss": 0.9066, "step": 9322 }, { "epoch": 0.27, "grad_norm": 7.215054296737312, "learning_rate": 8.598173425513484e-06, "loss": 0.5458, "step": 9323 }, { "epoch": 0.27, "grad_norm": 6.452071226225856, "learning_rate": 8.597851398005334e-06, "loss": 0.3257, "step": 9324 }, { "epoch": 0.27, "grad_norm": 5.303757958299436, "learning_rate": 8.597529339545315e-06, "loss": 0.6515, "step": 9325 }, { "epoch": 0.27, "grad_norm": 8.029861537979489, "learning_rate": 8.59720725013619e-06, "loss": 1.0255, "step": 9326 }, { "epoch": 0.27, "grad_norm": 12.287569473085725, "learning_rate": 8.596885129780736e-06, "loss": 0.8276, "step": 9327 }, { "epoch": 0.27, "grad_norm": 8.1997269340732, "learning_rate": 8.596562978481721e-06, "loss": 0.7037, "step": 9328 }, { "epoch": 0.27, "grad_norm": 6.05361050779233, "learning_rate": 8.59624079624192e-06, "loss": 0.3201, "step": 9329 }, { "epoch": 0.27, "grad_norm": 6.108737141513893, "learning_rate": 8.5959185830641e-06, "loss": 0.6902, "step": 9330 }, { "epoch": 0.27, "grad_norm": 5.5544449636569295, "learning_rate": 8.595596338951035e-06, "loss": 0.5845, "step": 9331 }, { "epoch": 0.27, "grad_norm": 9.816575971442415, "learning_rate": 8.5952740639055e-06, "loss": 0.5893, "step": 9332 }, { "epoch": 0.27, "grad_norm": 6.327017661331533, "learning_rate": 8.594951757930263e-06, "loss": 0.7608, "step": 9333 }, { "epoch": 0.27, "grad_norm": 3.9934428531220694, "learning_rate": 8.594629421028098e-06, "loss": 0.2317, "step": 9334 }, { "epoch": 0.27, "grad_norm": 7.8558166821412145, "learning_rate": 8.59430705320178e-06, "loss": 0.9226, "step": 9335 }, { "epoch": 0.27, "grad_norm": 7.765394917385813, "learning_rate": 8.59398465445408e-06, "loss": 0.4843, "step": 9336 }, { "epoch": 0.27, "grad_norm": 3.649763915840156, "learning_rate": 8.593662224787773e-06, "loss": 0.2148, "step": 9337 }, { "epoch": 0.27, "grad_norm": 3.7544031360144112, "learning_rate": 8.593339764205632e-06, "loss": 0.4678, "step": 9338 }, { "epoch": 0.27, "grad_norm": 6.227052906209206, "learning_rate": 8.593017272710432e-06, "loss": 0.4842, "step": 9339 }, { "epoch": 0.27, "grad_norm": 3.141632981123979, "learning_rate": 8.592694750304946e-06, "loss": 0.4574, "step": 9340 }, { "epoch": 0.27, "grad_norm": 5.576934687453106, "learning_rate": 8.59237219699195e-06, "loss": 0.6129, "step": 9341 }, { "epoch": 0.27, "grad_norm": 6.842605982925894, "learning_rate": 8.592049612774218e-06, "loss": 0.7347, "step": 9342 }, { "epoch": 0.27, "grad_norm": 3.902354991694268, "learning_rate": 8.591726997654525e-06, "loss": 0.3327, "step": 9343 }, { "epoch": 0.27, "grad_norm": 12.188839569811137, "learning_rate": 8.591404351635648e-06, "loss": 0.9049, "step": 9344 }, { "epoch": 0.27, "grad_norm": 7.68431717027496, "learning_rate": 8.59108167472036e-06, "loss": 0.7036, "step": 9345 }, { "epoch": 0.27, "grad_norm": 7.3133458357466194, "learning_rate": 8.590758966911441e-06, "loss": 0.8723, "step": 9346 }, { "epoch": 0.27, "grad_norm": 5.9975537240538825, "learning_rate": 8.590436228211662e-06, "loss": 0.8172, "step": 9347 }, { "epoch": 0.27, "grad_norm": 3.507663137403747, "learning_rate": 8.590113458623804e-06, "loss": 0.4051, "step": 9348 }, { "epoch": 0.27, "grad_norm": 3.8009627201868534, "learning_rate": 8.58979065815064e-06, "loss": 0.5509, "step": 9349 }, { "epoch": 0.27, "grad_norm": 5.641443251332748, "learning_rate": 8.589467826794951e-06, "loss": 0.4761, "step": 9350 }, { "epoch": 0.27, "grad_norm": 8.274409970917642, "learning_rate": 8.589144964559513e-06, "loss": 0.6468, "step": 9351 }, { "epoch": 0.27, "grad_norm": 3.305583679033279, "learning_rate": 8.5888220714471e-06, "loss": 0.2248, "step": 9352 }, { "epoch": 0.27, "grad_norm": 5.829313019048458, "learning_rate": 8.588499147460494e-06, "loss": 0.3579, "step": 9353 }, { "epoch": 0.27, "grad_norm": 5.660728241403438, "learning_rate": 8.588176192602471e-06, "loss": 0.7387, "step": 9354 }, { "epoch": 0.27, "grad_norm": 4.621509317268373, "learning_rate": 8.587853206875811e-06, "loss": 0.6421, "step": 9355 }, { "epoch": 0.27, "grad_norm": 5.668993846018559, "learning_rate": 8.587530190283291e-06, "loss": 0.397, "step": 9356 }, { "epoch": 0.27, "grad_norm": 3.8485504815278713, "learning_rate": 8.587207142827692e-06, "loss": 0.433, "step": 9357 }, { "epoch": 0.27, "grad_norm": 7.351507756255393, "learning_rate": 8.58688406451179e-06, "loss": 0.7671, "step": 9358 }, { "epoch": 0.27, "grad_norm": 17.62831969144359, "learning_rate": 8.586560955338365e-06, "loss": 0.6623, "step": 9359 }, { "epoch": 0.27, "grad_norm": 9.726985863966428, "learning_rate": 8.586237815310198e-06, "loss": 0.9311, "step": 9360 }, { "epoch": 0.27, "grad_norm": 5.111728429515501, "learning_rate": 8.585914644430071e-06, "loss": 0.472, "step": 9361 }, { "epoch": 0.27, "grad_norm": 13.57986646224197, "learning_rate": 8.58559144270076e-06, "loss": 0.7167, "step": 9362 }, { "epoch": 0.27, "grad_norm": 8.690753025838895, "learning_rate": 8.585268210125049e-06, "loss": 0.5385, "step": 9363 }, { "epoch": 0.27, "grad_norm": 6.773653935513095, "learning_rate": 8.584944946705715e-06, "loss": 1.1148, "step": 9364 }, { "epoch": 0.27, "grad_norm": 6.042288684614281, "learning_rate": 8.584621652445542e-06, "loss": 0.4818, "step": 9365 }, { "epoch": 0.27, "grad_norm": 7.286150613376258, "learning_rate": 8.584298327347309e-06, "loss": 0.6663, "step": 9366 }, { "epoch": 0.27, "grad_norm": 4.746645420492142, "learning_rate": 8.5839749714138e-06, "loss": 0.3778, "step": 9367 }, { "epoch": 0.27, "grad_norm": 10.775887125762221, "learning_rate": 8.583651584647795e-06, "loss": 0.6496, "step": 9368 }, { "epoch": 0.27, "grad_norm": 4.509837655953891, "learning_rate": 8.583328167052078e-06, "loss": 0.1958, "step": 9369 }, { "epoch": 0.27, "grad_norm": 6.505283537512191, "learning_rate": 8.583004718629429e-06, "loss": 0.4956, "step": 9370 }, { "epoch": 0.27, "grad_norm": 6.093429987403076, "learning_rate": 8.58268123938263e-06, "loss": 0.4101, "step": 9371 }, { "epoch": 0.27, "grad_norm": 9.124260284540949, "learning_rate": 8.582357729314468e-06, "loss": 1.1461, "step": 9372 }, { "epoch": 0.27, "grad_norm": 8.375768882415695, "learning_rate": 8.582034188427722e-06, "loss": 0.908, "step": 9373 }, { "epoch": 0.27, "grad_norm": 7.620430812421745, "learning_rate": 8.581710616725179e-06, "loss": 0.5129, "step": 9374 }, { "epoch": 0.27, "grad_norm": 4.4318596897982445, "learning_rate": 8.581387014209618e-06, "loss": 0.5771, "step": 9375 }, { "epoch": 0.27, "grad_norm": 6.47321952067948, "learning_rate": 8.581063380883827e-06, "loss": 0.4996, "step": 9376 }, { "epoch": 0.27, "grad_norm": 10.330991407828579, "learning_rate": 8.580739716750588e-06, "loss": 0.6199, "step": 9377 }, { "epoch": 0.27, "grad_norm": 7.87438048090571, "learning_rate": 8.580416021812685e-06, "loss": 0.6764, "step": 9378 }, { "epoch": 0.27, "grad_norm": 6.579921449928674, "learning_rate": 8.580092296072905e-06, "loss": 0.3526, "step": 9379 }, { "epoch": 0.27, "grad_norm": 7.149018611113751, "learning_rate": 8.579768539534032e-06, "loss": 0.4995, "step": 9380 }, { "epoch": 0.27, "grad_norm": 4.520970200372712, "learning_rate": 8.57944475219885e-06, "loss": 0.209, "step": 9381 }, { "epoch": 0.27, "grad_norm": 3.5714558913684598, "learning_rate": 8.579120934070147e-06, "loss": 0.2723, "step": 9382 }, { "epoch": 0.27, "grad_norm": 10.66920699228571, "learning_rate": 8.578797085150708e-06, "loss": 0.9722, "step": 9383 }, { "epoch": 0.27, "grad_norm": 7.535503215326162, "learning_rate": 8.578473205443317e-06, "loss": 0.4608, "step": 9384 }, { "epoch": 0.27, "grad_norm": 5.162959386415721, "learning_rate": 8.57814929495076e-06, "loss": 0.5583, "step": 9385 }, { "epoch": 0.27, "grad_norm": 6.046083502015862, "learning_rate": 8.577825353675827e-06, "loss": 0.6006, "step": 9386 }, { "epoch": 0.27, "grad_norm": 5.1166870338374215, "learning_rate": 8.577501381621302e-06, "loss": 0.3445, "step": 9387 }, { "epoch": 0.27, "grad_norm": 8.944388769172635, "learning_rate": 8.577177378789973e-06, "loss": 0.6746, "step": 9388 }, { "epoch": 0.27, "grad_norm": 4.3805793676123574, "learning_rate": 8.576853345184629e-06, "loss": 0.2545, "step": 9389 }, { "epoch": 0.27, "grad_norm": 6.003900928780376, "learning_rate": 8.576529280808055e-06, "loss": 0.2996, "step": 9390 }, { "epoch": 0.27, "grad_norm": 7.7462162194739665, "learning_rate": 8.57620518566304e-06, "loss": 0.3363, "step": 9391 }, { "epoch": 0.27, "grad_norm": 8.282066679217447, "learning_rate": 8.575881059752374e-06, "loss": 1.1933, "step": 9392 }, { "epoch": 0.27, "grad_norm": 3.8058763561792, "learning_rate": 8.575556903078842e-06, "loss": 0.3919, "step": 9393 }, { "epoch": 0.27, "grad_norm": 7.995485999226724, "learning_rate": 8.575232715645233e-06, "loss": 0.8804, "step": 9394 }, { "epoch": 0.27, "grad_norm": 5.863917591348589, "learning_rate": 8.574908497454337e-06, "loss": 0.6061, "step": 9395 }, { "epoch": 0.27, "grad_norm": 7.356400554869735, "learning_rate": 8.574584248508945e-06, "loss": 0.4231, "step": 9396 }, { "epoch": 0.27, "grad_norm": 10.10616203085808, "learning_rate": 8.574259968811845e-06, "loss": 0.9747, "step": 9397 }, { "epoch": 0.27, "grad_norm": 6.5539121476129925, "learning_rate": 8.573935658365826e-06, "loss": 0.7652, "step": 9398 }, { "epoch": 0.27, "grad_norm": 13.77721569152103, "learning_rate": 8.573611317173678e-06, "loss": 1.0762, "step": 9399 }, { "epoch": 0.27, "grad_norm": 12.784857280532847, "learning_rate": 8.573286945238193e-06, "loss": 0.6868, "step": 9400 }, { "epoch": 0.27, "grad_norm": 4.2816131326415565, "learning_rate": 8.572962542562159e-06, "loss": 0.5668, "step": 9401 }, { "epoch": 0.27, "grad_norm": 6.030090299597616, "learning_rate": 8.572638109148368e-06, "loss": 0.8692, "step": 9402 }, { "epoch": 0.27, "grad_norm": 8.04265144392912, "learning_rate": 8.572313644999612e-06, "loss": 0.4711, "step": 9403 }, { "epoch": 0.27, "grad_norm": 4.84707721753214, "learning_rate": 8.571989150118682e-06, "loss": 0.3726, "step": 9404 }, { "epoch": 0.27, "grad_norm": 6.450550443242266, "learning_rate": 8.571664624508369e-06, "loss": 0.3866, "step": 9405 }, { "epoch": 0.27, "grad_norm": 3.2557444789593317, "learning_rate": 8.571340068171465e-06, "loss": 0.4773, "step": 9406 }, { "epoch": 0.27, "grad_norm": 3.4435420601819735, "learning_rate": 8.571015481110761e-06, "loss": 0.3447, "step": 9407 }, { "epoch": 0.27, "grad_norm": 8.537296569768696, "learning_rate": 8.570690863329051e-06, "loss": 0.4088, "step": 9408 }, { "epoch": 0.27, "grad_norm": 7.051888520007133, "learning_rate": 8.570366214829128e-06, "loss": 0.6272, "step": 9409 }, { "epoch": 0.27, "grad_norm": 4.978991863209607, "learning_rate": 8.570041535613783e-06, "loss": 0.7761, "step": 9410 }, { "epoch": 0.27, "grad_norm": 4.2723720414858954, "learning_rate": 8.56971682568581e-06, "loss": 0.2383, "step": 9411 }, { "epoch": 0.27, "grad_norm": 5.838127899708266, "learning_rate": 8.569392085048004e-06, "loss": 0.443, "step": 9412 }, { "epoch": 0.27, "grad_norm": 4.080189792388688, "learning_rate": 8.569067313703157e-06, "loss": 0.1001, "step": 9413 }, { "epoch": 0.27, "grad_norm": 8.469205724412236, "learning_rate": 8.568742511654063e-06, "loss": 0.4637, "step": 9414 }, { "epoch": 0.27, "grad_norm": 3.2230066097427277, "learning_rate": 8.568417678903516e-06, "loss": 0.506, "step": 9415 }, { "epoch": 0.27, "grad_norm": 5.352930982718105, "learning_rate": 8.568092815454312e-06, "loss": 0.4392, "step": 9416 }, { "epoch": 0.27, "grad_norm": 4.045146027842031, "learning_rate": 8.567767921309245e-06, "loss": 0.1915, "step": 9417 }, { "epoch": 0.27, "grad_norm": 4.917928996997297, "learning_rate": 8.56744299647111e-06, "loss": 0.4657, "step": 9418 }, { "epoch": 0.27, "grad_norm": 5.0939307619563285, "learning_rate": 8.567118040942702e-06, "loss": 0.554, "step": 9419 }, { "epoch": 0.27, "grad_norm": 4.8432332163126945, "learning_rate": 8.566793054726816e-06, "loss": 0.6616, "step": 9420 }, { "epoch": 0.27, "grad_norm": 3.967792755988766, "learning_rate": 8.566468037826248e-06, "loss": 0.5146, "step": 9421 }, { "epoch": 0.27, "grad_norm": 6.27192386594856, "learning_rate": 8.566142990243795e-06, "loss": 0.6002, "step": 9422 }, { "epoch": 0.27, "grad_norm": 8.605075988256692, "learning_rate": 8.565817911982253e-06, "loss": 0.6791, "step": 9423 }, { "epoch": 0.27, "grad_norm": 4.150996767548709, "learning_rate": 8.56549280304442e-06, "loss": 0.5173, "step": 9424 }, { "epoch": 0.27, "grad_norm": 7.987746032841847, "learning_rate": 8.56516766343309e-06, "loss": 0.4334, "step": 9425 }, { "epoch": 0.27, "grad_norm": 7.381498609421141, "learning_rate": 8.56484249315106e-06, "loss": 0.9453, "step": 9426 }, { "epoch": 0.27, "grad_norm": 8.57334405352079, "learning_rate": 8.564517292201131e-06, "loss": 0.3673, "step": 9427 }, { "epoch": 0.27, "grad_norm": 5.733341342158971, "learning_rate": 8.564192060586098e-06, "loss": 0.4525, "step": 9428 }, { "epoch": 0.27, "grad_norm": 6.0468519737731805, "learning_rate": 8.563866798308759e-06, "loss": 0.4124, "step": 9429 }, { "epoch": 0.27, "grad_norm": 10.201226424367722, "learning_rate": 8.563541505371916e-06, "loss": 0.7709, "step": 9430 }, { "epoch": 0.27, "grad_norm": 8.208829222544164, "learning_rate": 8.563216181778361e-06, "loss": 0.6558, "step": 9431 }, { "epoch": 0.27, "grad_norm": 7.103456821570606, "learning_rate": 8.562890827530896e-06, "loss": 0.4467, "step": 9432 }, { "epoch": 0.27, "grad_norm": 3.661299552386794, "learning_rate": 8.562565442632321e-06, "loss": 0.5686, "step": 9433 }, { "epoch": 0.27, "grad_norm": 4.1115084719574675, "learning_rate": 8.562240027085434e-06, "loss": 0.2674, "step": 9434 }, { "epoch": 0.27, "grad_norm": 5.418905494861259, "learning_rate": 8.561914580893034e-06, "loss": 0.712, "step": 9435 }, { "epoch": 0.27, "grad_norm": 7.784412890219156, "learning_rate": 8.56158910405792e-06, "loss": 0.8132, "step": 9436 }, { "epoch": 0.27, "grad_norm": 3.0821251800496214, "learning_rate": 8.561263596582897e-06, "loss": 0.4959, "step": 9437 }, { "epoch": 0.27, "grad_norm": 5.41774920381096, "learning_rate": 8.560938058470759e-06, "loss": 0.5057, "step": 9438 }, { "epoch": 0.27, "grad_norm": 6.160627154806278, "learning_rate": 8.560612489724311e-06, "loss": 0.337, "step": 9439 }, { "epoch": 0.27, "grad_norm": 8.108605026706257, "learning_rate": 8.560286890346351e-06, "loss": 0.3559, "step": 9440 }, { "epoch": 0.27, "grad_norm": 7.9829899431997, "learning_rate": 8.559961260339682e-06, "loss": 0.7133, "step": 9441 }, { "epoch": 0.27, "grad_norm": 7.426631262790158, "learning_rate": 8.559635599707103e-06, "loss": 0.4119, "step": 9442 }, { "epoch": 0.27, "grad_norm": 6.894127334131475, "learning_rate": 8.559309908451419e-06, "loss": 0.3053, "step": 9443 }, { "epoch": 0.27, "grad_norm": 12.42763300423748, "learning_rate": 8.558984186575429e-06, "loss": 0.778, "step": 9444 }, { "epoch": 0.27, "grad_norm": 5.03595811897055, "learning_rate": 8.558658434081937e-06, "loss": 0.3216, "step": 9445 }, { "epoch": 0.27, "grad_norm": 5.4244573857942555, "learning_rate": 8.558332650973744e-06, "loss": 0.4518, "step": 9446 }, { "epoch": 0.27, "grad_norm": 4.325577189127944, "learning_rate": 8.558006837253654e-06, "loss": 0.2966, "step": 9447 }, { "epoch": 0.27, "grad_norm": 6.469330618780457, "learning_rate": 8.557680992924469e-06, "loss": 0.3775, "step": 9448 }, { "epoch": 0.27, "grad_norm": 7.516496572099517, "learning_rate": 8.557355117988993e-06, "loss": 0.6545, "step": 9449 }, { "epoch": 0.27, "grad_norm": 7.555212434600751, "learning_rate": 8.557029212450028e-06, "loss": 0.91, "step": 9450 }, { "epoch": 0.27, "grad_norm": 4.50884818041842, "learning_rate": 8.55670327631038e-06, "loss": 0.496, "step": 9451 }, { "epoch": 0.27, "grad_norm": 5.430494992156126, "learning_rate": 8.55637730957285e-06, "loss": 0.4632, "step": 9452 }, { "epoch": 0.27, "grad_norm": 6.131476170434835, "learning_rate": 8.556051312240243e-06, "loss": 0.556, "step": 9453 }, { "epoch": 0.27, "grad_norm": 6.073473054797986, "learning_rate": 8.555725284315367e-06, "loss": 0.4865, "step": 9454 }, { "epoch": 0.27, "grad_norm": 10.968252839127004, "learning_rate": 8.555399225801022e-06, "loss": 0.2332, "step": 9455 }, { "epoch": 0.27, "grad_norm": 10.659352496952941, "learning_rate": 8.555073136700016e-06, "loss": 0.8801, "step": 9456 }, { "epoch": 0.27, "grad_norm": 2.4300491946935305, "learning_rate": 8.554747017015155e-06, "loss": 0.4589, "step": 9457 }, { "epoch": 0.27, "grad_norm": 8.68840439294729, "learning_rate": 8.554420866749242e-06, "loss": 0.7998, "step": 9458 }, { "epoch": 0.27, "grad_norm": 3.963552420656283, "learning_rate": 8.554094685905084e-06, "loss": 0.6519, "step": 9459 }, { "epoch": 0.27, "grad_norm": 5.082404531247428, "learning_rate": 8.553768474485487e-06, "loss": 0.7484, "step": 9460 }, { "epoch": 0.27, "grad_norm": 8.454661795485956, "learning_rate": 8.553442232493256e-06, "loss": 0.6836, "step": 9461 }, { "epoch": 0.27, "grad_norm": 3.9966966578800878, "learning_rate": 8.5531159599312e-06, "loss": 0.622, "step": 9462 }, { "epoch": 0.27, "grad_norm": 8.301556748011796, "learning_rate": 8.552789656802124e-06, "loss": 0.4409, "step": 9463 }, { "epoch": 0.27, "grad_norm": 3.346076939995059, "learning_rate": 8.552463323108838e-06, "loss": 0.5361, "step": 9464 }, { "epoch": 0.27, "grad_norm": 7.519503538402671, "learning_rate": 8.552136958854147e-06, "loss": 0.5712, "step": 9465 }, { "epoch": 0.27, "grad_norm": 8.574428519287629, "learning_rate": 8.551810564040858e-06, "loss": 0.3757, "step": 9466 }, { "epoch": 0.27, "grad_norm": 7.924035012974187, "learning_rate": 8.551484138671781e-06, "loss": 0.6102, "step": 9467 }, { "epoch": 0.27, "grad_norm": 5.458226023291344, "learning_rate": 8.551157682749722e-06, "loss": 0.2771, "step": 9468 }, { "epoch": 0.27, "grad_norm": 6.150077203715724, "learning_rate": 8.550831196277491e-06, "loss": 0.401, "step": 9469 }, { "epoch": 0.27, "grad_norm": 5.202246537383712, "learning_rate": 8.550504679257899e-06, "loss": 0.5475, "step": 9470 }, { "epoch": 0.27, "grad_norm": 6.368935411803461, "learning_rate": 8.55017813169375e-06, "loss": 0.6127, "step": 9471 }, { "epoch": 0.27, "grad_norm": 5.063608048294329, "learning_rate": 8.549851553587854e-06, "loss": 0.5965, "step": 9472 }, { "epoch": 0.27, "grad_norm": 5.528078107273778, "learning_rate": 8.549524944943025e-06, "loss": 0.3964, "step": 9473 }, { "epoch": 0.27, "grad_norm": 7.424918167228449, "learning_rate": 8.549198305762069e-06, "loss": 0.6896, "step": 9474 }, { "epoch": 0.27, "grad_norm": 5.8338586252846705, "learning_rate": 8.548871636047797e-06, "loss": 0.5062, "step": 9475 }, { "epoch": 0.27, "grad_norm": 7.13066966364238, "learning_rate": 8.548544935803018e-06, "loss": 0.5909, "step": 9476 }, { "epoch": 0.27, "grad_norm": 5.0148276767808895, "learning_rate": 8.548218205030545e-06, "loss": 0.329, "step": 9477 }, { "epoch": 0.27, "grad_norm": 4.259471521102572, "learning_rate": 8.547891443733189e-06, "loss": 0.5321, "step": 9478 }, { "epoch": 0.27, "grad_norm": 6.968138526809624, "learning_rate": 8.547564651913757e-06, "loss": 0.1821, "step": 9479 }, { "epoch": 0.27, "grad_norm": 3.77251696366022, "learning_rate": 8.547237829575066e-06, "loss": 0.311, "step": 9480 }, { "epoch": 0.27, "grad_norm": 4.0478572168989455, "learning_rate": 8.546910976719922e-06, "loss": 0.3328, "step": 9481 }, { "epoch": 0.27, "grad_norm": 4.725280764478061, "learning_rate": 8.546584093351142e-06, "loss": 0.648, "step": 9482 }, { "epoch": 0.27, "grad_norm": 3.6236908620513697, "learning_rate": 8.546257179471534e-06, "loss": 0.3438, "step": 9483 }, { "epoch": 0.27, "grad_norm": 4.245620771383794, "learning_rate": 8.545930235083912e-06, "loss": 0.2056, "step": 9484 }, { "epoch": 0.27, "grad_norm": 4.048401033037753, "learning_rate": 8.54560326019109e-06, "loss": 0.232, "step": 9485 }, { "epoch": 0.27, "grad_norm": 5.186035592995286, "learning_rate": 8.54527625479588e-06, "loss": 0.56, "step": 9486 }, { "epoch": 0.27, "grad_norm": 6.492324478779257, "learning_rate": 8.544949218901094e-06, "loss": 0.4277, "step": 9487 }, { "epoch": 0.27, "grad_norm": 9.67556233903426, "learning_rate": 8.544622152509546e-06, "loss": 0.8426, "step": 9488 }, { "epoch": 0.27, "grad_norm": 7.982466437857322, "learning_rate": 8.54429505562405e-06, "loss": 0.8858, "step": 9489 }, { "epoch": 0.27, "grad_norm": 3.674203564504803, "learning_rate": 8.54396792824742e-06, "loss": 0.5525, "step": 9490 }, { "epoch": 0.27, "grad_norm": 6.898956901376967, "learning_rate": 8.543640770382471e-06, "loss": 0.5123, "step": 9491 }, { "epoch": 0.27, "grad_norm": 5.760544355286754, "learning_rate": 8.543313582032016e-06, "loss": 0.5723, "step": 9492 }, { "epoch": 0.27, "grad_norm": 8.068474057042097, "learning_rate": 8.54298636319887e-06, "loss": 0.4496, "step": 9493 }, { "epoch": 0.27, "grad_norm": 4.961615232295667, "learning_rate": 8.54265911388585e-06, "loss": 0.4637, "step": 9494 }, { "epoch": 0.27, "grad_norm": 13.539130261885031, "learning_rate": 8.54233183409577e-06, "loss": 1.137, "step": 9495 }, { "epoch": 0.27, "grad_norm": 4.917426118009642, "learning_rate": 8.542004523831444e-06, "loss": 0.4586, "step": 9496 }, { "epoch": 0.27, "grad_norm": 6.133992658642436, "learning_rate": 8.541677183095689e-06, "loss": 0.9027, "step": 9497 }, { "epoch": 0.27, "grad_norm": 7.767468914734685, "learning_rate": 8.541349811891322e-06, "loss": 0.6413, "step": 9498 }, { "epoch": 0.27, "grad_norm": 4.692834285805669, "learning_rate": 8.54102241022116e-06, "loss": 0.3627, "step": 9499 }, { "epoch": 0.27, "grad_norm": 4.218093538425562, "learning_rate": 8.540694978088016e-06, "loss": 0.4434, "step": 9500 }, { "epoch": 0.27, "grad_norm": 3.2527525506279695, "learning_rate": 8.54036751549471e-06, "loss": 0.1632, "step": 9501 }, { "epoch": 0.27, "grad_norm": 3.7872363222199916, "learning_rate": 8.540040022444058e-06, "loss": 0.4209, "step": 9502 }, { "epoch": 0.27, "grad_norm": 5.798786763203995, "learning_rate": 8.539712498938878e-06, "loss": 0.4536, "step": 9503 }, { "epoch": 0.27, "grad_norm": 4.631900073513636, "learning_rate": 8.539384944981987e-06, "loss": 0.2646, "step": 9504 }, { "epoch": 0.27, "grad_norm": 5.335656176812576, "learning_rate": 8.539057360576203e-06, "loss": 0.2811, "step": 9505 }, { "epoch": 0.27, "grad_norm": 2.3564845942271497, "learning_rate": 8.538729745724343e-06, "loss": 0.1252, "step": 9506 }, { "epoch": 0.27, "grad_norm": 6.503262727993846, "learning_rate": 8.538402100429229e-06, "loss": 0.6189, "step": 9507 }, { "epoch": 0.27, "grad_norm": 10.463183844856433, "learning_rate": 8.538074424693676e-06, "loss": 0.4953, "step": 9508 }, { "epoch": 0.27, "grad_norm": 8.85793673565637, "learning_rate": 8.537746718520504e-06, "loss": 0.6642, "step": 9509 }, { "epoch": 0.27, "grad_norm": 5.484102639067659, "learning_rate": 8.537418981912532e-06, "loss": 0.434, "step": 9510 }, { "epoch": 0.27, "grad_norm": 5.489417255164709, "learning_rate": 8.537091214872582e-06, "loss": 0.6371, "step": 9511 }, { "epoch": 0.27, "grad_norm": 9.107449624160225, "learning_rate": 8.53676341740347e-06, "loss": 0.7906, "step": 9512 }, { "epoch": 0.27, "grad_norm": 10.38852279163098, "learning_rate": 8.53643558950802e-06, "loss": 1.0345, "step": 9513 }, { "epoch": 0.27, "grad_norm": 10.985304292571538, "learning_rate": 8.536107731189049e-06, "loss": 0.7193, "step": 9514 }, { "epoch": 0.27, "grad_norm": 8.026265716561111, "learning_rate": 8.535779842449379e-06, "loss": 0.6194, "step": 9515 }, { "epoch": 0.27, "grad_norm": 6.121526706479328, "learning_rate": 8.535451923291829e-06, "loss": 0.7178, "step": 9516 }, { "epoch": 0.27, "grad_norm": 7.9451768051458025, "learning_rate": 8.535123973719222e-06, "loss": 0.429, "step": 9517 }, { "epoch": 0.27, "grad_norm": 5.598668413566383, "learning_rate": 8.53479599373438e-06, "loss": 0.6039, "step": 9518 }, { "epoch": 0.27, "grad_norm": 7.554608791379186, "learning_rate": 8.53446798334012e-06, "loss": 0.93, "step": 9519 }, { "epoch": 0.27, "grad_norm": 7.433385400062301, "learning_rate": 8.53413994253927e-06, "loss": 0.6789, "step": 9520 }, { "epoch": 0.27, "grad_norm": 7.813950091253141, "learning_rate": 8.533811871334649e-06, "loss": 0.6375, "step": 9521 }, { "epoch": 0.27, "grad_norm": 6.912843344578948, "learning_rate": 8.533483769729079e-06, "loss": 0.4893, "step": 9522 }, { "epoch": 0.27, "grad_norm": 5.11214360344954, "learning_rate": 8.533155637725384e-06, "loss": 0.696, "step": 9523 }, { "epoch": 0.27, "grad_norm": 5.104156535651085, "learning_rate": 8.532827475326383e-06, "loss": 0.4712, "step": 9524 }, { "epoch": 0.27, "grad_norm": 5.345905070171664, "learning_rate": 8.532499282534904e-06, "loss": 0.3865, "step": 9525 }, { "epoch": 0.27, "grad_norm": 4.918643435495648, "learning_rate": 8.53217105935377e-06, "loss": 0.5165, "step": 9526 }, { "epoch": 0.27, "grad_norm": 5.364181654129239, "learning_rate": 8.531842805785802e-06, "loss": 0.419, "step": 9527 }, { "epoch": 0.27, "grad_norm": 5.358439906095232, "learning_rate": 8.531514521833825e-06, "loss": 0.6483, "step": 9528 }, { "epoch": 0.27, "grad_norm": 4.01281473702032, "learning_rate": 8.531186207500665e-06, "loss": 0.5114, "step": 9529 }, { "epoch": 0.27, "grad_norm": 7.802747452859816, "learning_rate": 8.530857862789143e-06, "loss": 0.9309, "step": 9530 }, { "epoch": 0.27, "grad_norm": 4.147154758793526, "learning_rate": 8.530529487702088e-06, "loss": 0.5715, "step": 9531 }, { "epoch": 0.27, "grad_norm": 8.925589995557651, "learning_rate": 8.53020108224232e-06, "loss": 0.6718, "step": 9532 }, { "epoch": 0.27, "grad_norm": 3.3570502110425844, "learning_rate": 8.529872646412668e-06, "loss": 0.405, "step": 9533 }, { "epoch": 0.27, "grad_norm": 3.300765539730087, "learning_rate": 8.529544180215957e-06, "loss": 0.3751, "step": 9534 }, { "epoch": 0.27, "grad_norm": 7.112071538265005, "learning_rate": 8.529215683655009e-06, "loss": 0.4134, "step": 9535 }, { "epoch": 0.27, "grad_norm": 4.427925337958874, "learning_rate": 8.528887156732654e-06, "loss": 0.6368, "step": 9536 }, { "epoch": 0.27, "grad_norm": 3.0733423159142905, "learning_rate": 8.528558599451719e-06, "loss": 0.165, "step": 9537 }, { "epoch": 0.27, "grad_norm": 3.0647215278909217, "learning_rate": 8.528230011815027e-06, "loss": 0.3104, "step": 9538 }, { "epoch": 0.27, "grad_norm": 2.8324777606316633, "learning_rate": 8.527901393825409e-06, "loss": 0.2447, "step": 9539 }, { "epoch": 0.27, "grad_norm": 8.807714691620593, "learning_rate": 8.527572745485687e-06, "loss": 0.5363, "step": 9540 }, { "epoch": 0.27, "grad_norm": 8.541516397286394, "learning_rate": 8.527244066798693e-06, "loss": 0.7181, "step": 9541 }, { "epoch": 0.27, "grad_norm": 7.997961499849581, "learning_rate": 8.526915357767251e-06, "loss": 0.8038, "step": 9542 }, { "epoch": 0.27, "grad_norm": 2.7487745589078147, "learning_rate": 8.52658661839419e-06, "loss": 0.3912, "step": 9543 }, { "epoch": 0.27, "grad_norm": 6.892643103181117, "learning_rate": 8.52625784868234e-06, "loss": 0.4837, "step": 9544 }, { "epoch": 0.27, "grad_norm": 3.2357502469157984, "learning_rate": 8.525929048634528e-06, "loss": 0.5797, "step": 9545 }, { "epoch": 0.27, "grad_norm": 5.2388974822212, "learning_rate": 8.525600218253582e-06, "loss": 0.5266, "step": 9546 }, { "epoch": 0.27, "grad_norm": 3.5631268602281714, "learning_rate": 8.525271357542331e-06, "loss": 0.3184, "step": 9547 }, { "epoch": 0.27, "grad_norm": 9.345281877261836, "learning_rate": 8.524942466503606e-06, "loss": 1.0526, "step": 9548 }, { "epoch": 0.27, "grad_norm": 6.563461015362482, "learning_rate": 8.524613545140234e-06, "loss": 0.4301, "step": 9549 }, { "epoch": 0.27, "grad_norm": 3.020438093878693, "learning_rate": 8.524284593455046e-06, "loss": 0.2917, "step": 9550 }, { "epoch": 0.27, "grad_norm": 5.444933359528229, "learning_rate": 8.523955611450869e-06, "loss": 0.5948, "step": 9551 }, { "epoch": 0.27, "grad_norm": 6.030389635456902, "learning_rate": 8.52362659913054e-06, "loss": 0.546, "step": 9552 }, { "epoch": 0.27, "grad_norm": 5.949020773185285, "learning_rate": 8.523297556496882e-06, "loss": 0.5364, "step": 9553 }, { "epoch": 0.27, "grad_norm": 5.2827073518611956, "learning_rate": 8.522968483552731e-06, "loss": 0.4762, "step": 9554 }, { "epoch": 0.27, "grad_norm": 9.423160800530587, "learning_rate": 8.522639380300914e-06, "loss": 0.4874, "step": 9555 }, { "epoch": 0.27, "grad_norm": 4.544065780895558, "learning_rate": 8.522310246744266e-06, "loss": 0.1878, "step": 9556 }, { "epoch": 0.27, "grad_norm": 7.130787155550478, "learning_rate": 8.521981082885615e-06, "loss": 0.6421, "step": 9557 }, { "epoch": 0.27, "grad_norm": 7.991749323582201, "learning_rate": 8.521651888727796e-06, "loss": 0.7416, "step": 9558 }, { "epoch": 0.27, "grad_norm": 7.193410151828322, "learning_rate": 8.521322664273637e-06, "loss": 0.2764, "step": 9559 }, { "epoch": 0.27, "grad_norm": 6.435805051310814, "learning_rate": 8.520993409525976e-06, "loss": 0.4878, "step": 9560 }, { "epoch": 0.27, "grad_norm": 8.725684467564149, "learning_rate": 8.520664124487639e-06, "loss": 0.5589, "step": 9561 }, { "epoch": 0.27, "grad_norm": 6.237385952505089, "learning_rate": 8.520334809161464e-06, "loss": 0.465, "step": 9562 }, { "epoch": 0.27, "grad_norm": 5.547062679930369, "learning_rate": 8.52000546355028e-06, "loss": 0.2969, "step": 9563 }, { "epoch": 0.27, "grad_norm": 5.699715878281974, "learning_rate": 8.519676087656926e-06, "loss": 0.9174, "step": 9564 }, { "epoch": 0.27, "grad_norm": 7.372886160810499, "learning_rate": 8.519346681484228e-06, "loss": 0.5569, "step": 9565 }, { "epoch": 0.27, "grad_norm": 4.631837584663949, "learning_rate": 8.519017245035026e-06, "loss": 0.5885, "step": 9566 }, { "epoch": 0.27, "grad_norm": 7.729667510251056, "learning_rate": 8.518687778312151e-06, "loss": 0.4685, "step": 9567 }, { "epoch": 0.27, "grad_norm": 9.933017079441475, "learning_rate": 8.518358281318439e-06, "loss": 0.7075, "step": 9568 }, { "epoch": 0.27, "grad_norm": 4.810102831798082, "learning_rate": 8.518028754056724e-06, "loss": 0.4988, "step": 9569 }, { "epoch": 0.27, "grad_norm": 1.9963579917437815, "learning_rate": 8.51769919652984e-06, "loss": 0.1962, "step": 9570 }, { "epoch": 0.27, "grad_norm": 6.157000268442664, "learning_rate": 8.517369608740625e-06, "loss": 0.4694, "step": 9571 }, { "epoch": 0.27, "grad_norm": 5.134353197438065, "learning_rate": 8.51703999069191e-06, "loss": 0.5772, "step": 9572 }, { "epoch": 0.27, "grad_norm": 5.431470487660334, "learning_rate": 8.516710342386533e-06, "loss": 0.5463, "step": 9573 }, { "epoch": 0.27, "grad_norm": 3.340767466091729, "learning_rate": 8.516380663827331e-06, "loss": 0.2463, "step": 9574 }, { "epoch": 0.27, "grad_norm": 2.3005265255190386, "learning_rate": 8.516050955017139e-06, "loss": 0.1782, "step": 9575 }, { "epoch": 0.27, "grad_norm": 7.869862409501351, "learning_rate": 8.515721215958793e-06, "loss": 0.6066, "step": 9576 }, { "epoch": 0.27, "grad_norm": 9.416164103462718, "learning_rate": 8.51539144665513e-06, "loss": 0.4632, "step": 9577 }, { "epoch": 0.27, "grad_norm": 5.927177692147175, "learning_rate": 8.515061647108988e-06, "loss": 0.3702, "step": 9578 }, { "epoch": 0.27, "grad_norm": 3.1277174387845252, "learning_rate": 8.514731817323204e-06, "loss": 0.359, "step": 9579 }, { "epoch": 0.27, "grad_norm": 5.283345094701791, "learning_rate": 8.514401957300613e-06, "loss": 0.5868, "step": 9580 }, { "epoch": 0.27, "grad_norm": 6.2873840366840765, "learning_rate": 8.514072067044058e-06, "loss": 0.4703, "step": 9581 }, { "epoch": 0.27, "grad_norm": 3.0120259188940133, "learning_rate": 8.513742146556369e-06, "loss": 0.3717, "step": 9582 }, { "epoch": 0.27, "grad_norm": 6.381288082595756, "learning_rate": 8.513412195840394e-06, "loss": 0.5058, "step": 9583 }, { "epoch": 0.27, "grad_norm": 5.949486208618809, "learning_rate": 8.513082214898963e-06, "loss": 0.6843, "step": 9584 }, { "epoch": 0.27, "grad_norm": 8.513516618293579, "learning_rate": 8.512752203734917e-06, "loss": 0.7672, "step": 9585 }, { "epoch": 0.27, "grad_norm": 5.866535094620883, "learning_rate": 8.5124221623511e-06, "loss": 0.4058, "step": 9586 }, { "epoch": 0.27, "grad_norm": 3.28027158409491, "learning_rate": 8.512092090750345e-06, "loss": 0.3921, "step": 9587 }, { "epoch": 0.27, "grad_norm": 7.619042680233535, "learning_rate": 8.511761988935496e-06, "loss": 0.3053, "step": 9588 }, { "epoch": 0.27, "grad_norm": 9.0362064791861, "learning_rate": 8.51143185690939e-06, "loss": 0.7734, "step": 9589 }, { "epoch": 0.27, "grad_norm": 5.817033087038397, "learning_rate": 8.511101694674868e-06, "loss": 0.496, "step": 9590 }, { "epoch": 0.27, "grad_norm": 7.642205884151305, "learning_rate": 8.510771502234771e-06, "loss": 1.0177, "step": 9591 }, { "epoch": 0.27, "grad_norm": 4.215432027892035, "learning_rate": 8.510441279591938e-06, "loss": 0.3547, "step": 9592 }, { "epoch": 0.27, "grad_norm": 4.352974736125255, "learning_rate": 8.51011102674921e-06, "loss": 0.3928, "step": 9593 }, { "epoch": 0.27, "grad_norm": 8.686726268096397, "learning_rate": 8.509780743709432e-06, "loss": 0.7943, "step": 9594 }, { "epoch": 0.27, "grad_norm": 4.4700621165796255, "learning_rate": 8.50945043047544e-06, "loss": 0.6174, "step": 9595 }, { "epoch": 0.27, "grad_norm": 10.196375719076139, "learning_rate": 8.50912008705008e-06, "loss": 0.1003, "step": 9596 }, { "epoch": 0.27, "grad_norm": 4.671357524880685, "learning_rate": 8.508789713436192e-06, "loss": 0.768, "step": 9597 }, { "epoch": 0.27, "grad_norm": 4.391286291096807, "learning_rate": 8.508459309636617e-06, "loss": 0.5281, "step": 9598 }, { "epoch": 0.27, "grad_norm": 5.555545681573782, "learning_rate": 8.508128875654198e-06, "loss": 0.6717, "step": 9599 }, { "epoch": 0.27, "grad_norm": 4.6420672195347095, "learning_rate": 8.507798411491778e-06, "loss": 0.632, "step": 9600 }, { "epoch": 0.27, "grad_norm": 10.73214089856267, "learning_rate": 8.5074679171522e-06, "loss": 0.9048, "step": 9601 }, { "epoch": 0.27, "grad_norm": 6.3810237399796845, "learning_rate": 8.507137392638309e-06, "loss": 0.4057, "step": 9602 }, { "epoch": 0.28, "grad_norm": 4.598059365126981, "learning_rate": 8.506806837952948e-06, "loss": 0.7079, "step": 9603 }, { "epoch": 0.28, "grad_norm": 6.949050086442163, "learning_rate": 8.506476253098957e-06, "loss": 0.8338, "step": 9604 }, { "epoch": 0.28, "grad_norm": 6.375106399246611, "learning_rate": 8.506145638079184e-06, "loss": 0.4183, "step": 9605 }, { "epoch": 0.28, "grad_norm": 7.883158801877435, "learning_rate": 8.50581499289647e-06, "loss": 0.4574, "step": 9606 }, { "epoch": 0.28, "grad_norm": 9.228272566243628, "learning_rate": 8.505484317553663e-06, "loss": 0.5354, "step": 9607 }, { "epoch": 0.28, "grad_norm": 7.198255767416136, "learning_rate": 8.505153612053605e-06, "loss": 0.399, "step": 9608 }, { "epoch": 0.28, "grad_norm": 3.871687118984223, "learning_rate": 8.504822876399141e-06, "loss": 0.65, "step": 9609 }, { "epoch": 0.28, "grad_norm": 4.491762263861927, "learning_rate": 8.504492110593119e-06, "loss": 0.5043, "step": 9610 }, { "epoch": 0.28, "grad_norm": 4.24192553531552, "learning_rate": 8.504161314638383e-06, "loss": 0.6262, "step": 9611 }, { "epoch": 0.28, "grad_norm": 6.022414153925457, "learning_rate": 8.503830488537777e-06, "loss": 0.181, "step": 9612 }, { "epoch": 0.28, "grad_norm": 6.147241536680044, "learning_rate": 8.50349963229415e-06, "loss": 0.6644, "step": 9613 }, { "epoch": 0.28, "grad_norm": 10.698954283338068, "learning_rate": 8.503168745910347e-06, "loss": 0.7577, "step": 9614 }, { "epoch": 0.28, "grad_norm": 6.6592225077447145, "learning_rate": 8.502837829389214e-06, "loss": 0.7864, "step": 9615 }, { "epoch": 0.28, "grad_norm": 4.661483307102015, "learning_rate": 8.5025068827336e-06, "loss": 0.3025, "step": 9616 }, { "epoch": 0.28, "grad_norm": 4.930360533509386, "learning_rate": 8.502175905946348e-06, "loss": 0.6947, "step": 9617 }, { "epoch": 0.28, "grad_norm": 3.620116528906401, "learning_rate": 8.50184489903031e-06, "loss": 0.5781, "step": 9618 }, { "epoch": 0.28, "grad_norm": 8.439074164588916, "learning_rate": 8.50151386198833e-06, "loss": 0.9766, "step": 9619 }, { "epoch": 0.28, "grad_norm": 6.310926467874532, "learning_rate": 8.50118279482326e-06, "loss": 0.6896, "step": 9620 }, { "epoch": 0.28, "grad_norm": 5.586775411436659, "learning_rate": 8.500851697537944e-06, "loss": 0.4441, "step": 9621 }, { "epoch": 0.28, "grad_norm": 8.426813379843445, "learning_rate": 8.500520570135231e-06, "loss": 0.312, "step": 9622 }, { "epoch": 0.28, "grad_norm": 6.841896015029376, "learning_rate": 8.500189412617972e-06, "loss": 0.3797, "step": 9623 }, { "epoch": 0.28, "grad_norm": 9.607847262878112, "learning_rate": 8.499858224989013e-06, "loss": 0.7222, "step": 9624 }, { "epoch": 0.28, "grad_norm": 6.752010046178567, "learning_rate": 8.499527007251204e-06, "loss": 0.7019, "step": 9625 }, { "epoch": 0.28, "grad_norm": 4.733843380128139, "learning_rate": 8.499195759407397e-06, "loss": 0.3554, "step": 9626 }, { "epoch": 0.28, "grad_norm": 8.023003169917232, "learning_rate": 8.498864481460438e-06, "loss": 0.383, "step": 9627 }, { "epoch": 0.28, "grad_norm": 4.384769159328336, "learning_rate": 8.49853317341318e-06, "loss": 0.4882, "step": 9628 }, { "epoch": 0.28, "grad_norm": 8.152925040153237, "learning_rate": 8.498201835268472e-06, "loss": 0.497, "step": 9629 }, { "epoch": 0.28, "grad_norm": 8.140311031913281, "learning_rate": 8.497870467029163e-06, "loss": 0.6705, "step": 9630 }, { "epoch": 0.28, "grad_norm": 7.787096760751321, "learning_rate": 8.497539068698105e-06, "loss": 0.6091, "step": 9631 }, { "epoch": 0.28, "grad_norm": 8.16237909948942, "learning_rate": 8.49720764027815e-06, "loss": 0.4325, "step": 9632 }, { "epoch": 0.28, "grad_norm": 2.831074278054119, "learning_rate": 8.496876181772148e-06, "loss": 0.539, "step": 9633 }, { "epoch": 0.28, "grad_norm": 3.9740653000713975, "learning_rate": 8.49654469318295e-06, "loss": 0.3988, "step": 9634 }, { "epoch": 0.28, "grad_norm": 5.584386906453714, "learning_rate": 8.496213174513411e-06, "loss": 0.6856, "step": 9635 }, { "epoch": 0.28, "grad_norm": 6.607613836167347, "learning_rate": 8.495881625766378e-06, "loss": 0.4148, "step": 9636 }, { "epoch": 0.28, "grad_norm": 7.359811757728039, "learning_rate": 8.495550046944708e-06, "loss": 0.8694, "step": 9637 }, { "epoch": 0.28, "grad_norm": 5.687779681696499, "learning_rate": 8.49521843805125e-06, "loss": 0.3038, "step": 9638 }, { "epoch": 0.28, "grad_norm": 4.311370425247066, "learning_rate": 8.494886799088857e-06, "loss": 0.2531, "step": 9639 }, { "epoch": 0.28, "grad_norm": 4.1012519064878195, "learning_rate": 8.494555130060385e-06, "loss": 0.4731, "step": 9640 }, { "epoch": 0.28, "grad_norm": 2.996853529422608, "learning_rate": 8.494223430968685e-06, "loss": 0.275, "step": 9641 }, { "epoch": 0.28, "grad_norm": 4.4613488537115025, "learning_rate": 8.49389170181661e-06, "loss": 0.3308, "step": 9642 }, { "epoch": 0.28, "grad_norm": 5.418381370668645, "learning_rate": 8.493559942607014e-06, "loss": 0.2781, "step": 9643 }, { "epoch": 0.28, "grad_norm": 3.2962266184990545, "learning_rate": 8.493228153342754e-06, "loss": 0.4875, "step": 9644 }, { "epoch": 0.28, "grad_norm": 3.637730901147221, "learning_rate": 8.492896334026682e-06, "loss": 0.3618, "step": 9645 }, { "epoch": 0.28, "grad_norm": 14.689293544913344, "learning_rate": 8.492564484661653e-06, "loss": 0.8169, "step": 9646 }, { "epoch": 0.28, "grad_norm": 6.89439320196426, "learning_rate": 8.49223260525052e-06, "loss": 0.8868, "step": 9647 }, { "epoch": 0.28, "grad_norm": 6.2567909349847755, "learning_rate": 8.491900695796142e-06, "loss": 0.5956, "step": 9648 }, { "epoch": 0.28, "grad_norm": 6.915336431178918, "learning_rate": 8.491568756301373e-06, "loss": 0.4915, "step": 9649 }, { "epoch": 0.28, "grad_norm": 5.261431011440421, "learning_rate": 8.491236786769066e-06, "loss": 0.4385, "step": 9650 }, { "epoch": 0.28, "grad_norm": 6.868893494985854, "learning_rate": 8.490904787202078e-06, "loss": 0.6812, "step": 9651 }, { "epoch": 0.28, "grad_norm": 6.424936831568256, "learning_rate": 8.490572757603268e-06, "loss": 0.3144, "step": 9652 }, { "epoch": 0.28, "grad_norm": 3.05650152797901, "learning_rate": 8.490240697975489e-06, "loss": 0.5272, "step": 9653 }, { "epoch": 0.28, "grad_norm": 5.654464186859663, "learning_rate": 8.4899086083216e-06, "loss": 0.3479, "step": 9654 }, { "epoch": 0.28, "grad_norm": 3.6098575352060993, "learning_rate": 8.489576488644458e-06, "loss": 0.3313, "step": 9655 }, { "epoch": 0.28, "grad_norm": 10.812068335234985, "learning_rate": 8.489244338946916e-06, "loss": 0.5913, "step": 9656 }, { "epoch": 0.28, "grad_norm": 9.790858561399185, "learning_rate": 8.488912159231837e-06, "loss": 0.7987, "step": 9657 }, { "epoch": 0.28, "grad_norm": 8.547250687119904, "learning_rate": 8.488579949502075e-06, "loss": 0.4059, "step": 9658 }, { "epoch": 0.28, "grad_norm": 3.231366974799597, "learning_rate": 8.488247709760489e-06, "loss": 0.1933, "step": 9659 }, { "epoch": 0.28, "grad_norm": 4.547702360091461, "learning_rate": 8.487915440009938e-06, "loss": 0.284, "step": 9660 }, { "epoch": 0.28, "grad_norm": 4.400442990847773, "learning_rate": 8.48758314025328e-06, "loss": 1.0583, "step": 9661 }, { "epoch": 0.28, "grad_norm": 6.700473216411849, "learning_rate": 8.487250810493375e-06, "loss": 0.4456, "step": 9662 }, { "epoch": 0.28, "grad_norm": 5.92889101411272, "learning_rate": 8.48691845073308e-06, "loss": 0.414, "step": 9663 }, { "epoch": 0.28, "grad_norm": 4.860205159311439, "learning_rate": 8.486586060975253e-06, "loss": 0.629, "step": 9664 }, { "epoch": 0.28, "grad_norm": 6.407080173200987, "learning_rate": 8.486253641222757e-06, "loss": 0.4332, "step": 9665 }, { "epoch": 0.28, "grad_norm": 8.383098103261045, "learning_rate": 8.485921191478449e-06, "loss": 0.5237, "step": 9666 }, { "epoch": 0.28, "grad_norm": 21.121253014245013, "learning_rate": 8.48558871174519e-06, "loss": 0.6133, "step": 9667 }, { "epoch": 0.28, "grad_norm": 8.822391018452464, "learning_rate": 8.485256202025843e-06, "loss": 1.0731, "step": 9668 }, { "epoch": 0.28, "grad_norm": 8.102490805079567, "learning_rate": 8.484923662323263e-06, "loss": 0.4, "step": 9669 }, { "epoch": 0.28, "grad_norm": 5.181758736918914, "learning_rate": 8.484591092640316e-06, "loss": 0.7263, "step": 9670 }, { "epoch": 0.28, "grad_norm": 2.3488665363337318, "learning_rate": 8.48425849297986e-06, "loss": 0.1291, "step": 9671 }, { "epoch": 0.28, "grad_norm": 12.385980839735895, "learning_rate": 8.483925863344757e-06, "loss": 0.8704, "step": 9672 }, { "epoch": 0.28, "grad_norm": 10.946360111133618, "learning_rate": 8.48359320373787e-06, "loss": 0.608, "step": 9673 }, { "epoch": 0.28, "grad_norm": 3.9752250775606215, "learning_rate": 8.483260514162058e-06, "loss": 0.2362, "step": 9674 }, { "epoch": 0.28, "grad_norm": 8.272808566156092, "learning_rate": 8.482927794620183e-06, "loss": 0.6305, "step": 9675 }, { "epoch": 0.28, "grad_norm": 9.695402207985461, "learning_rate": 8.482595045115113e-06, "loss": 0.3241, "step": 9676 }, { "epoch": 0.28, "grad_norm": 4.873501571816265, "learning_rate": 8.482262265649706e-06, "loss": 0.5158, "step": 9677 }, { "epoch": 0.28, "grad_norm": 4.110109270875457, "learning_rate": 8.481929456226823e-06, "loss": 1.0163, "step": 9678 }, { "epoch": 0.28, "grad_norm": 6.443782333455702, "learning_rate": 8.48159661684933e-06, "loss": 0.6233, "step": 9679 }, { "epoch": 0.28, "grad_norm": 10.351443929892865, "learning_rate": 8.481263747520091e-06, "loss": 0.9988, "step": 9680 }, { "epoch": 0.28, "grad_norm": 7.826692095974624, "learning_rate": 8.480930848241969e-06, "loss": 0.398, "step": 9681 }, { "epoch": 0.28, "grad_norm": 8.509913889968152, "learning_rate": 8.480597919017827e-06, "loss": 0.5348, "step": 9682 }, { "epoch": 0.28, "grad_norm": 16.640948311257294, "learning_rate": 8.48026495985053e-06, "loss": 0.5256, "step": 9683 }, { "epoch": 0.28, "grad_norm": 5.5897213797833585, "learning_rate": 8.479931970742942e-06, "loss": 0.2599, "step": 9684 }, { "epoch": 0.28, "grad_norm": 3.9567512947910006, "learning_rate": 8.479598951697927e-06, "loss": 0.4404, "step": 9685 }, { "epoch": 0.28, "grad_norm": 6.337348489248167, "learning_rate": 8.479265902718351e-06, "loss": 0.5903, "step": 9686 }, { "epoch": 0.28, "grad_norm": 7.772080499488366, "learning_rate": 8.47893282380708e-06, "loss": 0.5456, "step": 9687 }, { "epoch": 0.28, "grad_norm": 10.981851692194512, "learning_rate": 8.478599714966976e-06, "loss": 0.5882, "step": 9688 }, { "epoch": 0.28, "grad_norm": 10.136245322518652, "learning_rate": 8.478266576200908e-06, "loss": 0.5499, "step": 9689 }, { "epoch": 0.28, "grad_norm": 5.008580046346265, "learning_rate": 8.477933407511743e-06, "loss": 0.5842, "step": 9690 }, { "epoch": 0.28, "grad_norm": 5.956288856147443, "learning_rate": 8.477600208902342e-06, "loss": 0.3283, "step": 9691 }, { "epoch": 0.28, "grad_norm": 4.229330457450287, "learning_rate": 8.477266980375575e-06, "loss": 0.3376, "step": 9692 }, { "epoch": 0.28, "grad_norm": 5.852107370267695, "learning_rate": 8.476933721934308e-06, "loss": 0.5866, "step": 9693 }, { "epoch": 0.28, "grad_norm": 7.56682412604067, "learning_rate": 8.47660043358141e-06, "loss": 0.3307, "step": 9694 }, { "epoch": 0.28, "grad_norm": 7.788809784641561, "learning_rate": 8.476267115319746e-06, "loss": 1.1019, "step": 9695 }, { "epoch": 0.28, "grad_norm": 4.934155164173808, "learning_rate": 8.475933767152185e-06, "loss": 0.4488, "step": 9696 }, { "epoch": 0.28, "grad_norm": 5.032048464139915, "learning_rate": 8.475600389081592e-06, "loss": 0.3451, "step": 9697 }, { "epoch": 0.28, "grad_norm": 5.2170936560038035, "learning_rate": 8.475266981110836e-06, "loss": 0.6501, "step": 9698 }, { "epoch": 0.28, "grad_norm": 6.619272887528533, "learning_rate": 8.474933543242788e-06, "loss": 0.4998, "step": 9699 }, { "epoch": 0.28, "grad_norm": 8.0652024227501, "learning_rate": 8.474600075480314e-06, "loss": 0.7799, "step": 9700 }, { "epoch": 0.28, "grad_norm": 8.270814705034189, "learning_rate": 8.474266577826283e-06, "loss": 0.3376, "step": 9701 }, { "epoch": 0.28, "grad_norm": 7.669011469256541, "learning_rate": 8.473933050283564e-06, "loss": 0.7488, "step": 9702 }, { "epoch": 0.28, "grad_norm": 9.189318081292202, "learning_rate": 8.473599492855028e-06, "loss": 0.4355, "step": 9703 }, { "epoch": 0.28, "grad_norm": 6.3922887375898485, "learning_rate": 8.473265905543544e-06, "loss": 0.6875, "step": 9704 }, { "epoch": 0.28, "grad_norm": 6.348738226597057, "learning_rate": 8.472932288351978e-06, "loss": 0.6184, "step": 9705 }, { "epoch": 0.28, "grad_norm": 5.373177640842752, "learning_rate": 8.472598641283205e-06, "loss": 0.6198, "step": 9706 }, { "epoch": 0.28, "grad_norm": 4.829597223993767, "learning_rate": 8.472264964340094e-06, "loss": 0.1773, "step": 9707 }, { "epoch": 0.28, "grad_norm": 8.656882913635572, "learning_rate": 8.471931257525515e-06, "loss": 0.6346, "step": 9708 }, { "epoch": 0.28, "grad_norm": 64.75466859401776, "learning_rate": 8.471597520842337e-06, "loss": 0.724, "step": 9709 }, { "epoch": 0.28, "grad_norm": 10.3176045586677, "learning_rate": 8.471263754293434e-06, "loss": 0.6522, "step": 9710 }, { "epoch": 0.28, "grad_norm": 7.4102270337124025, "learning_rate": 8.470929957881677e-06, "loss": 0.7343, "step": 9711 }, { "epoch": 0.28, "grad_norm": 4.827266332817708, "learning_rate": 8.470596131609937e-06, "loss": 0.5432, "step": 9712 }, { "epoch": 0.28, "grad_norm": 3.8101332775844656, "learning_rate": 8.470262275481085e-06, "loss": 0.3629, "step": 9713 }, { "epoch": 0.28, "grad_norm": 11.024419555608628, "learning_rate": 8.469928389497994e-06, "loss": 0.9992, "step": 9714 }, { "epoch": 0.28, "grad_norm": 22.196709325409227, "learning_rate": 8.469594473663536e-06, "loss": 0.6342, "step": 9715 }, { "epoch": 0.28, "grad_norm": 5.5208443647550345, "learning_rate": 8.469260527980582e-06, "loss": 0.4819, "step": 9716 }, { "epoch": 0.28, "grad_norm": 3.377052451284139, "learning_rate": 8.46892655245201e-06, "loss": 0.2106, "step": 9717 }, { "epoch": 0.28, "grad_norm": 6.853737567429065, "learning_rate": 8.46859254708069e-06, "loss": 0.4236, "step": 9718 }, { "epoch": 0.28, "grad_norm": 6.884388772376679, "learning_rate": 8.468258511869494e-06, "loss": 0.4795, "step": 9719 }, { "epoch": 0.28, "grad_norm": 4.957866240654067, "learning_rate": 8.467924446821299e-06, "loss": 0.2728, "step": 9720 }, { "epoch": 0.28, "grad_norm": 5.9140556869568375, "learning_rate": 8.467590351938975e-06, "loss": 0.6363, "step": 9721 }, { "epoch": 0.28, "grad_norm": 3.712537593201673, "learning_rate": 8.467256227225398e-06, "loss": 0.4927, "step": 9722 }, { "epoch": 0.28, "grad_norm": 3.48138131488119, "learning_rate": 8.466922072683444e-06, "loss": 0.4154, "step": 9723 }, { "epoch": 0.28, "grad_norm": 8.763683410052025, "learning_rate": 8.466587888315985e-06, "loss": 0.6606, "step": 9724 }, { "epoch": 0.28, "grad_norm": 7.564366890484453, "learning_rate": 8.466253674125898e-06, "loss": 0.5482, "step": 9725 }, { "epoch": 0.28, "grad_norm": 10.758688852910481, "learning_rate": 8.465919430116056e-06, "loss": 0.519, "step": 9726 }, { "epoch": 0.28, "grad_norm": 3.4881763677432223, "learning_rate": 8.465585156289337e-06, "loss": 0.4348, "step": 9727 }, { "epoch": 0.28, "grad_norm": 7.670732401429528, "learning_rate": 8.465250852648616e-06, "loss": 0.8106, "step": 9728 }, { "epoch": 0.28, "grad_norm": 6.268117584854017, "learning_rate": 8.464916519196768e-06, "loss": 0.4812, "step": 9729 }, { "epoch": 0.28, "grad_norm": 9.271237402407609, "learning_rate": 8.464582155936671e-06, "loss": 0.6934, "step": 9730 }, { "epoch": 0.28, "grad_norm": 5.738242962672484, "learning_rate": 8.464247762871198e-06, "loss": 0.4495, "step": 9731 }, { "epoch": 0.28, "grad_norm": 5.488867634029823, "learning_rate": 8.463913340003229e-06, "loss": 0.7875, "step": 9732 }, { "epoch": 0.28, "grad_norm": 5.566029832393353, "learning_rate": 8.46357888733564e-06, "loss": 0.4087, "step": 9733 }, { "epoch": 0.28, "grad_norm": 14.681487553194234, "learning_rate": 8.46324440487131e-06, "loss": 0.7067, "step": 9734 }, { "epoch": 0.28, "grad_norm": 4.611530588722961, "learning_rate": 8.462909892613113e-06, "loss": 0.3657, "step": 9735 }, { "epoch": 0.28, "grad_norm": 6.8333640524321035, "learning_rate": 8.462575350563929e-06, "loss": 0.5277, "step": 9736 }, { "epoch": 0.28, "grad_norm": 5.906990650704965, "learning_rate": 8.462240778726636e-06, "loss": 0.3901, "step": 9737 }, { "epoch": 0.28, "grad_norm": 5.954530084551763, "learning_rate": 8.46190617710411e-06, "loss": 0.5476, "step": 9738 }, { "epoch": 0.28, "grad_norm": 4.737625315677083, "learning_rate": 8.461571545699234e-06, "loss": 0.4523, "step": 9739 }, { "epoch": 0.28, "grad_norm": 8.748851591769327, "learning_rate": 8.461236884514882e-06, "loss": 1.2374, "step": 9740 }, { "epoch": 0.28, "grad_norm": 5.927693671349131, "learning_rate": 8.460902193553936e-06, "loss": 0.3195, "step": 9741 }, { "epoch": 0.28, "grad_norm": 5.6204466298154205, "learning_rate": 8.460567472819275e-06, "loss": 0.1843, "step": 9742 }, { "epoch": 0.28, "grad_norm": 8.543567638223015, "learning_rate": 8.460232722313776e-06, "loss": 0.8209, "step": 9743 }, { "epoch": 0.28, "grad_norm": 14.440054531258623, "learning_rate": 8.459897942040325e-06, "loss": 0.5811, "step": 9744 }, { "epoch": 0.28, "grad_norm": 4.996110356879652, "learning_rate": 8.459563132001796e-06, "loss": 0.3497, "step": 9745 }, { "epoch": 0.28, "grad_norm": 9.210565742322245, "learning_rate": 8.459228292201071e-06, "loss": 0.8777, "step": 9746 }, { "epoch": 0.28, "grad_norm": 9.316132828960782, "learning_rate": 8.45889342264103e-06, "loss": 0.3776, "step": 9747 }, { "epoch": 0.28, "grad_norm": 4.672184732944184, "learning_rate": 8.458558523324555e-06, "loss": 0.4565, "step": 9748 }, { "epoch": 0.28, "grad_norm": 11.195629384253943, "learning_rate": 8.458223594254529e-06, "loss": 0.6682, "step": 9749 }, { "epoch": 0.28, "grad_norm": 6.605383711706826, "learning_rate": 8.45788863543383e-06, "loss": 0.9813, "step": 9750 }, { "epoch": 0.28, "grad_norm": 6.360023934061373, "learning_rate": 8.45755364686534e-06, "loss": 0.7193, "step": 9751 }, { "epoch": 0.28, "grad_norm": 7.756780365571109, "learning_rate": 8.457218628551942e-06, "loss": 0.5676, "step": 9752 }, { "epoch": 0.28, "grad_norm": 6.236166025140386, "learning_rate": 8.456883580496518e-06, "loss": 0.7218, "step": 9753 }, { "epoch": 0.28, "grad_norm": 4.603650899913716, "learning_rate": 8.45654850270195e-06, "loss": 0.6532, "step": 9754 }, { "epoch": 0.28, "grad_norm": 5.466209606315382, "learning_rate": 8.456213395171122e-06, "loss": 0.5808, "step": 9755 }, { "epoch": 0.28, "grad_norm": 7.24713712927989, "learning_rate": 8.455878257906916e-06, "loss": 0.4395, "step": 9756 }, { "epoch": 0.28, "grad_norm": 3.541284802363774, "learning_rate": 8.455543090912212e-06, "loss": 0.1905, "step": 9757 }, { "epoch": 0.28, "grad_norm": 2.513442028240302, "learning_rate": 8.455207894189898e-06, "loss": 0.1243, "step": 9758 }, { "epoch": 0.28, "grad_norm": 7.0253687018571265, "learning_rate": 8.454872667742857e-06, "loss": 0.7115, "step": 9759 }, { "epoch": 0.28, "grad_norm": 6.6337167507651635, "learning_rate": 8.45453741157397e-06, "loss": 0.8645, "step": 9760 }, { "epoch": 0.28, "grad_norm": 6.6480958090992415, "learning_rate": 8.454202125686123e-06, "loss": 0.8223, "step": 9761 }, { "epoch": 0.28, "grad_norm": 3.699835285824871, "learning_rate": 8.453866810082201e-06, "loss": 0.3363, "step": 9762 }, { "epoch": 0.28, "grad_norm": 6.940311102916152, "learning_rate": 8.453531464765089e-06, "loss": 0.2974, "step": 9763 }, { "epoch": 0.28, "grad_norm": 5.972502761974311, "learning_rate": 8.45319608973767e-06, "loss": 0.7731, "step": 9764 }, { "epoch": 0.28, "grad_norm": 4.675582970770799, "learning_rate": 8.45286068500283e-06, "loss": 0.5535, "step": 9765 }, { "epoch": 0.28, "grad_norm": 8.4772837529772, "learning_rate": 8.452525250563455e-06, "loss": 1.0864, "step": 9766 }, { "epoch": 0.28, "grad_norm": 5.262048542474456, "learning_rate": 8.45218978642243e-06, "loss": 0.2639, "step": 9767 }, { "epoch": 0.28, "grad_norm": 5.715321998636272, "learning_rate": 8.451854292582641e-06, "loss": 0.4331, "step": 9768 }, { "epoch": 0.28, "grad_norm": 8.847775840372096, "learning_rate": 8.451518769046976e-06, "loss": 0.7853, "step": 9769 }, { "epoch": 0.28, "grad_norm": 5.69135836840971, "learning_rate": 8.45118321581832e-06, "loss": 0.4995, "step": 9770 }, { "epoch": 0.28, "grad_norm": 4.350758503960423, "learning_rate": 8.450847632899558e-06, "loss": 0.3014, "step": 9771 }, { "epoch": 0.28, "grad_norm": 3.6523700570367756, "learning_rate": 8.45051202029358e-06, "loss": 0.415, "step": 9772 }, { "epoch": 0.28, "grad_norm": 4.642396479588958, "learning_rate": 8.45017637800327e-06, "loss": 0.2993, "step": 9773 }, { "epoch": 0.28, "grad_norm": 9.190152933552175, "learning_rate": 8.44984070603152e-06, "loss": 0.6603, "step": 9774 }, { "epoch": 0.28, "grad_norm": 10.371471908198059, "learning_rate": 8.449505004381213e-06, "loss": 0.3867, "step": 9775 }, { "epoch": 0.28, "grad_norm": 4.548029880742575, "learning_rate": 8.44916927305524e-06, "loss": 0.4155, "step": 9776 }, { "epoch": 0.28, "grad_norm": 5.130644597385418, "learning_rate": 8.448833512056489e-06, "loss": 0.2996, "step": 9777 }, { "epoch": 0.28, "grad_norm": 7.056028743413955, "learning_rate": 8.448497721387848e-06, "loss": 0.2483, "step": 9778 }, { "epoch": 0.28, "grad_norm": 6.152081235098769, "learning_rate": 8.448161901052206e-06, "loss": 0.4589, "step": 9779 }, { "epoch": 0.28, "grad_norm": 4.286643369195098, "learning_rate": 8.44782605105245e-06, "loss": 0.5256, "step": 9780 }, { "epoch": 0.28, "grad_norm": 5.528157549687331, "learning_rate": 8.447490171391473e-06, "loss": 0.4768, "step": 9781 }, { "epoch": 0.28, "grad_norm": 10.66993193021046, "learning_rate": 8.447154262072162e-06, "loss": 0.5431, "step": 9782 }, { "epoch": 0.28, "grad_norm": 4.985599594596644, "learning_rate": 8.446818323097405e-06, "loss": 0.121, "step": 9783 }, { "epoch": 0.28, "grad_norm": 8.686851861501488, "learning_rate": 8.446482354470097e-06, "loss": 0.7847, "step": 9784 }, { "epoch": 0.28, "grad_norm": 7.471645161194081, "learning_rate": 8.446146356193124e-06, "loss": 0.7439, "step": 9785 }, { "epoch": 0.28, "grad_norm": 8.797452402899404, "learning_rate": 8.44581032826938e-06, "loss": 0.3828, "step": 9786 }, { "epoch": 0.28, "grad_norm": 8.941600419148973, "learning_rate": 8.445474270701751e-06, "loss": 0.8268, "step": 9787 }, { "epoch": 0.28, "grad_norm": 8.271245822821847, "learning_rate": 8.445138183493136e-06, "loss": 1.0383, "step": 9788 }, { "epoch": 0.28, "grad_norm": 9.471426708218786, "learning_rate": 8.444802066646417e-06, "loss": 0.4013, "step": 9789 }, { "epoch": 0.28, "grad_norm": 7.681101857953603, "learning_rate": 8.44446592016449e-06, "loss": 0.5732, "step": 9790 }, { "epoch": 0.28, "grad_norm": 3.072915388635057, "learning_rate": 8.444129744050248e-06, "loss": 0.2828, "step": 9791 }, { "epoch": 0.28, "grad_norm": 7.130903074098761, "learning_rate": 8.443793538306583e-06, "loss": 0.3129, "step": 9792 }, { "epoch": 0.28, "grad_norm": 6.410998040995866, "learning_rate": 8.443457302936386e-06, "loss": 0.8466, "step": 9793 }, { "epoch": 0.28, "grad_norm": 6.87185076630792, "learning_rate": 8.443121037942549e-06, "loss": 0.372, "step": 9794 }, { "epoch": 0.28, "grad_norm": 7.173446715757598, "learning_rate": 8.442784743327965e-06, "loss": 0.821, "step": 9795 }, { "epoch": 0.28, "grad_norm": 3.4855636827131384, "learning_rate": 8.44244841909553e-06, "loss": 0.2541, "step": 9796 }, { "epoch": 0.28, "grad_norm": 6.844933707931209, "learning_rate": 8.442112065248133e-06, "loss": 0.0419, "step": 9797 }, { "epoch": 0.28, "grad_norm": 6.821296052922332, "learning_rate": 8.44177568178867e-06, "loss": 0.423, "step": 9798 }, { "epoch": 0.28, "grad_norm": 3.415535441042046, "learning_rate": 8.441439268720036e-06, "loss": 0.2609, "step": 9799 }, { "epoch": 0.28, "grad_norm": 4.760452214726694, "learning_rate": 8.441102826045121e-06, "loss": 0.4158, "step": 9800 }, { "epoch": 0.28, "grad_norm": 7.278621149902709, "learning_rate": 8.440766353766823e-06, "loss": 0.2066, "step": 9801 }, { "epoch": 0.28, "grad_norm": 10.033642920177785, "learning_rate": 8.440429851888037e-06, "loss": 0.5271, "step": 9802 }, { "epoch": 0.28, "grad_norm": 3.8295570030686577, "learning_rate": 8.440093320411656e-06, "loss": 0.1718, "step": 9803 }, { "epoch": 0.28, "grad_norm": 9.645898696535143, "learning_rate": 8.439756759340576e-06, "loss": 0.8916, "step": 9804 }, { "epoch": 0.28, "grad_norm": 6.094927238799873, "learning_rate": 8.439420168677693e-06, "loss": 0.7405, "step": 9805 }, { "epoch": 0.28, "grad_norm": 5.8331122856038355, "learning_rate": 8.439083548425899e-06, "loss": 0.385, "step": 9806 }, { "epoch": 0.28, "grad_norm": 6.904424641895957, "learning_rate": 8.438746898588092e-06, "loss": 0.645, "step": 9807 }, { "epoch": 0.28, "grad_norm": 6.838957759132686, "learning_rate": 8.438410219167172e-06, "loss": 0.3466, "step": 9808 }, { "epoch": 0.28, "grad_norm": 5.617065067575184, "learning_rate": 8.438073510166032e-06, "loss": 0.4532, "step": 9809 }, { "epoch": 0.28, "grad_norm": 7.488033603001862, "learning_rate": 8.437736771587567e-06, "loss": 0.6418, "step": 9810 }, { "epoch": 0.28, "grad_norm": 6.349999529921146, "learning_rate": 8.437400003434678e-06, "loss": 0.2044, "step": 9811 }, { "epoch": 0.28, "grad_norm": 6.070138661216401, "learning_rate": 8.437063205710259e-06, "loss": 0.471, "step": 9812 }, { "epoch": 0.28, "grad_norm": 5.921511515631861, "learning_rate": 8.436726378417208e-06, "loss": 0.6205, "step": 9813 }, { "epoch": 0.28, "grad_norm": 6.4270367819049765, "learning_rate": 8.436389521558424e-06, "loss": 0.4044, "step": 9814 }, { "epoch": 0.28, "grad_norm": 4.6368873134911786, "learning_rate": 8.436052635136802e-06, "loss": 0.268, "step": 9815 }, { "epoch": 0.28, "grad_norm": 3.119104317087719, "learning_rate": 8.435715719155244e-06, "loss": 0.2987, "step": 9816 }, { "epoch": 0.28, "grad_norm": 7.693255743145639, "learning_rate": 8.435378773616646e-06, "loss": 0.4537, "step": 9817 }, { "epoch": 0.28, "grad_norm": 4.603491801288058, "learning_rate": 8.43504179852391e-06, "loss": 0.6937, "step": 9818 }, { "epoch": 0.28, "grad_norm": 4.846675924299302, "learning_rate": 8.434704793879929e-06, "loss": 0.0897, "step": 9819 }, { "epoch": 0.28, "grad_norm": 14.98379798708992, "learning_rate": 8.434367759687606e-06, "loss": 1.0535, "step": 9820 }, { "epoch": 0.28, "grad_norm": 4.723553719764364, "learning_rate": 8.434030695949842e-06, "loss": 0.6106, "step": 9821 }, { "epoch": 0.28, "grad_norm": 3.826757879787264, "learning_rate": 8.433693602669532e-06, "loss": 0.2182, "step": 9822 }, { "epoch": 0.28, "grad_norm": 6.069368620436942, "learning_rate": 8.43335647984958e-06, "loss": 0.3885, "step": 9823 }, { "epoch": 0.28, "grad_norm": 5.308369680227328, "learning_rate": 8.433019327492886e-06, "loss": 0.3219, "step": 9824 }, { "epoch": 0.28, "grad_norm": 6.5936373298745385, "learning_rate": 8.432682145602349e-06, "loss": 0.2807, "step": 9825 }, { "epoch": 0.28, "grad_norm": 4.857071309003465, "learning_rate": 8.432344934180868e-06, "loss": 0.3391, "step": 9826 }, { "epoch": 0.28, "grad_norm": 5.81738223872647, "learning_rate": 8.432007693231348e-06, "loss": 0.476, "step": 9827 }, { "epoch": 0.28, "grad_norm": 5.590671608625564, "learning_rate": 8.431670422756687e-06, "loss": 0.724, "step": 9828 }, { "epoch": 0.28, "grad_norm": 7.36844472988885, "learning_rate": 8.43133312275979e-06, "loss": 0.7789, "step": 9829 }, { "epoch": 0.28, "grad_norm": 3.3918972177587934, "learning_rate": 8.430995793243556e-06, "loss": 0.4217, "step": 9830 }, { "epoch": 0.28, "grad_norm": 3.384676237076137, "learning_rate": 8.430658434210887e-06, "loss": 0.6044, "step": 9831 }, { "epoch": 0.28, "grad_norm": 6.777004934850897, "learning_rate": 8.430321045664685e-06, "loss": 0.6201, "step": 9832 }, { "epoch": 0.28, "grad_norm": 4.30897532733503, "learning_rate": 8.429983627607855e-06, "loss": 0.7479, "step": 9833 }, { "epoch": 0.28, "grad_norm": 4.405188628800833, "learning_rate": 8.429646180043298e-06, "loss": 0.7346, "step": 9834 }, { "epoch": 0.28, "grad_norm": 6.063732218624883, "learning_rate": 8.429308702973918e-06, "loss": 0.7144, "step": 9835 }, { "epoch": 0.28, "grad_norm": 5.184943522897075, "learning_rate": 8.428971196402616e-06, "loss": 0.5235, "step": 9836 }, { "epoch": 0.28, "grad_norm": 6.693221737259352, "learning_rate": 8.428633660332298e-06, "loss": 0.4537, "step": 9837 }, { "epoch": 0.28, "grad_norm": 4.466105712473156, "learning_rate": 8.428296094765864e-06, "loss": 0.3749, "step": 9838 }, { "epoch": 0.28, "grad_norm": 3.546369407345794, "learning_rate": 8.427958499706223e-06, "loss": 0.5081, "step": 9839 }, { "epoch": 0.28, "grad_norm": 6.760641857407268, "learning_rate": 8.427620875156278e-06, "loss": 0.8664, "step": 9840 }, { "epoch": 0.28, "grad_norm": 6.924616148187829, "learning_rate": 8.427283221118932e-06, "loss": 0.7362, "step": 9841 }, { "epoch": 0.28, "grad_norm": 2.9413984584831803, "learning_rate": 8.426945537597089e-06, "loss": 0.5085, "step": 9842 }, { "epoch": 0.28, "grad_norm": 4.147342357732274, "learning_rate": 8.426607824593656e-06, "loss": 0.4515, "step": 9843 }, { "epoch": 0.28, "grad_norm": 10.150832764988, "learning_rate": 8.426270082111539e-06, "loss": 0.7521, "step": 9844 }, { "epoch": 0.28, "grad_norm": 5.069271927195925, "learning_rate": 8.425932310153642e-06, "loss": 0.4318, "step": 9845 }, { "epoch": 0.28, "grad_norm": 3.975099425601882, "learning_rate": 8.42559450872287e-06, "loss": 0.5341, "step": 9846 }, { "epoch": 0.28, "grad_norm": 12.10730482556544, "learning_rate": 8.42525667782213e-06, "loss": 0.8962, "step": 9847 }, { "epoch": 0.28, "grad_norm": 5.581471526564967, "learning_rate": 8.42491881745433e-06, "loss": 0.4519, "step": 9848 }, { "epoch": 0.28, "grad_norm": 3.0347751491091284, "learning_rate": 8.424580927622375e-06, "loss": 0.299, "step": 9849 }, { "epoch": 0.28, "grad_norm": 3.8749474552652834, "learning_rate": 8.424243008329172e-06, "loss": 0.4908, "step": 9850 }, { "epoch": 0.28, "grad_norm": 11.568858609753631, "learning_rate": 8.423905059577625e-06, "loss": 0.5341, "step": 9851 }, { "epoch": 0.28, "grad_norm": 5.7859113716856205, "learning_rate": 8.423567081370649e-06, "loss": 0.6505, "step": 9852 }, { "epoch": 0.28, "grad_norm": 9.031583100086175, "learning_rate": 8.423229073711145e-06, "loss": 0.5812, "step": 9853 }, { "epoch": 0.28, "grad_norm": 5.083485728725865, "learning_rate": 8.42289103660202e-06, "loss": 0.2756, "step": 9854 }, { "epoch": 0.28, "grad_norm": 6.442573103928499, "learning_rate": 8.422552970046189e-06, "loss": 0.6561, "step": 9855 }, { "epoch": 0.28, "grad_norm": 4.907135974560323, "learning_rate": 8.422214874046555e-06, "loss": 0.7973, "step": 9856 }, { "epoch": 0.28, "grad_norm": 5.879553673666036, "learning_rate": 8.421876748606026e-06, "loss": 0.3158, "step": 9857 }, { "epoch": 0.28, "grad_norm": 3.139024753827809, "learning_rate": 8.421538593727514e-06, "loss": 0.2105, "step": 9858 }, { "epoch": 0.28, "grad_norm": 5.598674268983253, "learning_rate": 8.421200409413927e-06, "loss": 0.483, "step": 9859 }, { "epoch": 0.28, "grad_norm": 2.404310807436404, "learning_rate": 8.420862195668175e-06, "loss": 0.1684, "step": 9860 }, { "epoch": 0.28, "grad_norm": 4.49326710871975, "learning_rate": 8.420523952493165e-06, "loss": 0.4598, "step": 9861 }, { "epoch": 0.28, "grad_norm": 8.600749879125445, "learning_rate": 8.420185679891808e-06, "loss": 0.9149, "step": 9862 }, { "epoch": 0.28, "grad_norm": 6.139866478738727, "learning_rate": 8.419847377867016e-06, "loss": 0.374, "step": 9863 }, { "epoch": 0.28, "grad_norm": 12.385201035470756, "learning_rate": 8.4195090464217e-06, "loss": 0.7991, "step": 9864 }, { "epoch": 0.28, "grad_norm": 3.8033786535502214, "learning_rate": 8.419170685558766e-06, "loss": 0.2624, "step": 9865 }, { "epoch": 0.28, "grad_norm": 5.00056053834286, "learning_rate": 8.418832295281127e-06, "loss": 0.8546, "step": 9866 }, { "epoch": 0.28, "grad_norm": 3.494452849032496, "learning_rate": 8.418493875591696e-06, "loss": 0.3662, "step": 9867 }, { "epoch": 0.28, "grad_norm": 9.040693262912544, "learning_rate": 8.418155426493385e-06, "loss": 0.4988, "step": 9868 }, { "epoch": 0.28, "grad_norm": 6.3659724474522505, "learning_rate": 8.4178169479891e-06, "loss": 0.4065, "step": 9869 }, { "epoch": 0.28, "grad_norm": 5.753901111939746, "learning_rate": 8.417478440081758e-06, "loss": 1.0188, "step": 9870 }, { "epoch": 0.28, "grad_norm": 7.637244726931497, "learning_rate": 8.41713990277427e-06, "loss": 0.4574, "step": 9871 }, { "epoch": 0.28, "grad_norm": 10.893266442356934, "learning_rate": 8.41680133606955e-06, "loss": 0.9151, "step": 9872 }, { "epoch": 0.28, "grad_norm": 9.899809226692934, "learning_rate": 8.416462739970506e-06, "loss": 0.8502, "step": 9873 }, { "epoch": 0.28, "grad_norm": 6.454136840705426, "learning_rate": 8.416124114480055e-06, "loss": 0.5627, "step": 9874 }, { "epoch": 0.28, "grad_norm": 19.39775717837053, "learning_rate": 8.415785459601108e-06, "loss": 1.0432, "step": 9875 }, { "epoch": 0.28, "grad_norm": 5.9497415133836125, "learning_rate": 8.41544677533658e-06, "loss": 0.6842, "step": 9876 }, { "epoch": 0.28, "grad_norm": 8.734260571980768, "learning_rate": 8.415108061689383e-06, "loss": 0.7279, "step": 9877 }, { "epoch": 0.28, "grad_norm": 4.322548037825218, "learning_rate": 8.414769318662432e-06, "loss": 0.2858, "step": 9878 }, { "epoch": 0.28, "grad_norm": 3.288912228302436, "learning_rate": 8.414430546258643e-06, "loss": 0.3356, "step": 9879 }, { "epoch": 0.28, "grad_norm": 5.0531948662754065, "learning_rate": 8.414091744480925e-06, "loss": 0.4308, "step": 9880 }, { "epoch": 0.28, "grad_norm": 4.698591956291479, "learning_rate": 8.413752913332199e-06, "loss": 0.4954, "step": 9881 }, { "epoch": 0.28, "grad_norm": 4.472915095539406, "learning_rate": 8.413414052815375e-06, "loss": 0.1785, "step": 9882 }, { "epoch": 0.28, "grad_norm": 6.464716755060487, "learning_rate": 8.413075162933373e-06, "loss": 0.3202, "step": 9883 }, { "epoch": 0.28, "grad_norm": 8.943821038924368, "learning_rate": 8.412736243689104e-06, "loss": 0.7866, "step": 9884 }, { "epoch": 0.28, "grad_norm": 4.3656997739795695, "learning_rate": 8.412397295085484e-06, "loss": 0.5875, "step": 9885 }, { "epoch": 0.28, "grad_norm": 6.092006844842964, "learning_rate": 8.412058317125431e-06, "loss": 0.3473, "step": 9886 }, { "epoch": 0.28, "grad_norm": 5.328117213984642, "learning_rate": 8.411719309811862e-06, "loss": 0.3161, "step": 9887 }, { "epoch": 0.28, "grad_norm": 14.969680151978501, "learning_rate": 8.411380273147689e-06, "loss": 0.6059, "step": 9888 }, { "epoch": 0.28, "grad_norm": 6.17598848037436, "learning_rate": 8.411041207135832e-06, "loss": 0.3485, "step": 9889 }, { "epoch": 0.28, "grad_norm": 4.3441759079079265, "learning_rate": 8.41070211177921e-06, "loss": 0.581, "step": 9890 }, { "epoch": 0.28, "grad_norm": 8.910326376180949, "learning_rate": 8.410362987080735e-06, "loss": 0.909, "step": 9891 }, { "epoch": 0.28, "grad_norm": 3.920118660541908, "learning_rate": 8.410023833043331e-06, "loss": 0.6146, "step": 9892 }, { "epoch": 0.28, "grad_norm": 9.529798853733848, "learning_rate": 8.409684649669906e-06, "loss": 0.5657, "step": 9893 }, { "epoch": 0.28, "grad_norm": 4.81621279486536, "learning_rate": 8.409345436963388e-06, "loss": 0.5, "step": 9894 }, { "epoch": 0.28, "grad_norm": 5.129639642116315, "learning_rate": 8.409006194926691e-06, "loss": 0.2937, "step": 9895 }, { "epoch": 0.28, "grad_norm": 7.4108520888944796, "learning_rate": 8.408666923562731e-06, "loss": 0.7303, "step": 9896 }, { "epoch": 0.28, "grad_norm": 4.717211769448439, "learning_rate": 8.40832762287443e-06, "loss": 0.2329, "step": 9897 }, { "epoch": 0.28, "grad_norm": 4.867794090912581, "learning_rate": 8.407988292864708e-06, "loss": 0.8807, "step": 9898 }, { "epoch": 0.28, "grad_norm": 7.957393257846565, "learning_rate": 8.40764893353648e-06, "loss": 0.4128, "step": 9899 }, { "epoch": 0.28, "grad_norm": 3.682211073335637, "learning_rate": 8.40730954489267e-06, "loss": 0.2599, "step": 9900 }, { "epoch": 0.28, "grad_norm": 3.9953614400134607, "learning_rate": 8.406970126936194e-06, "loss": 0.5681, "step": 9901 }, { "epoch": 0.28, "grad_norm": 4.270722218750549, "learning_rate": 8.406630679669974e-06, "loss": 0.6196, "step": 9902 }, { "epoch": 0.28, "grad_norm": 5.024779904228393, "learning_rate": 8.406291203096928e-06, "loss": 0.6662, "step": 9903 }, { "epoch": 0.28, "grad_norm": 5.915890933838846, "learning_rate": 8.405951697219981e-06, "loss": 0.6599, "step": 9904 }, { "epoch": 0.28, "grad_norm": 8.365226850699978, "learning_rate": 8.405612162042047e-06, "loss": 0.6023, "step": 9905 }, { "epoch": 0.28, "grad_norm": 4.767456603230289, "learning_rate": 8.405272597566055e-06, "loss": 0.8767, "step": 9906 }, { "epoch": 0.28, "grad_norm": 3.2312128577689374, "learning_rate": 8.40493300379492e-06, "loss": 0.3754, "step": 9907 }, { "epoch": 0.28, "grad_norm": 3.780048400125003, "learning_rate": 8.404593380731566e-06, "loss": 0.4586, "step": 9908 }, { "epoch": 0.28, "grad_norm": 5.672884260186151, "learning_rate": 8.404253728378915e-06, "loss": 0.3308, "step": 9909 }, { "epoch": 0.28, "grad_norm": 4.137073217087545, "learning_rate": 8.403914046739887e-06, "loss": 0.2956, "step": 9910 }, { "epoch": 0.28, "grad_norm": 5.531582536776933, "learning_rate": 8.403574335817405e-06, "loss": 0.6431, "step": 9911 }, { "epoch": 0.28, "grad_norm": 9.985632875392483, "learning_rate": 8.403234595614394e-06, "loss": 0.4386, "step": 9912 }, { "epoch": 0.28, "grad_norm": 5.875642457288281, "learning_rate": 8.402894826133774e-06, "loss": 0.2902, "step": 9913 }, { "epoch": 0.28, "grad_norm": 2.3008578017344283, "learning_rate": 8.40255502737847e-06, "loss": 0.2379, "step": 9914 }, { "epoch": 0.28, "grad_norm": 5.023706882782495, "learning_rate": 8.402215199351403e-06, "loss": 0.9133, "step": 9915 }, { "epoch": 0.28, "grad_norm": 9.93846298446245, "learning_rate": 8.401875342055497e-06, "loss": 0.719, "step": 9916 }, { "epoch": 0.28, "grad_norm": 3.8549766651639725, "learning_rate": 8.401535455493677e-06, "loss": 0.5892, "step": 9917 }, { "epoch": 0.28, "grad_norm": 7.227476517894156, "learning_rate": 8.401195539668865e-06, "loss": 0.9476, "step": 9918 }, { "epoch": 0.28, "grad_norm": 6.772064386596851, "learning_rate": 8.400855594583988e-06, "loss": 0.73, "step": 9919 }, { "epoch": 0.28, "grad_norm": 5.676559069396457, "learning_rate": 8.400515620241967e-06, "loss": 0.3441, "step": 9920 }, { "epoch": 0.28, "grad_norm": 8.377372747198718, "learning_rate": 8.400175616645731e-06, "loss": 0.6786, "step": 9921 }, { "epoch": 0.28, "grad_norm": 4.927065144264031, "learning_rate": 8.399835583798202e-06, "loss": 0.5395, "step": 9922 }, { "epoch": 0.28, "grad_norm": 4.217309946188501, "learning_rate": 8.399495521702306e-06, "loss": 0.6154, "step": 9923 }, { "epoch": 0.28, "grad_norm": 6.3085756850795915, "learning_rate": 8.399155430360968e-06, "loss": 0.8285, "step": 9924 }, { "epoch": 0.28, "grad_norm": 6.703818545441842, "learning_rate": 8.398815309777117e-06, "loss": 0.509, "step": 9925 }, { "epoch": 0.28, "grad_norm": 7.469867447132805, "learning_rate": 8.398475159953674e-06, "loss": 0.8957, "step": 9926 }, { "epoch": 0.28, "grad_norm": 15.316533269844387, "learning_rate": 8.398134980893568e-06, "loss": 0.9051, "step": 9927 }, { "epoch": 0.28, "grad_norm": 5.294428392559178, "learning_rate": 8.397794772599725e-06, "loss": 0.6524, "step": 9928 }, { "epoch": 0.28, "grad_norm": 8.511894710940062, "learning_rate": 8.397454535075073e-06, "loss": 0.9364, "step": 9929 }, { "epoch": 0.28, "grad_norm": 4.700360927521259, "learning_rate": 8.397114268322538e-06, "loss": 0.3824, "step": 9930 }, { "epoch": 0.28, "grad_norm": 5.7294489935467015, "learning_rate": 8.396773972345045e-06, "loss": 0.469, "step": 9931 }, { "epoch": 0.28, "grad_norm": 3.7726795074953063, "learning_rate": 8.396433647145525e-06, "loss": 0.3324, "step": 9932 }, { "epoch": 0.28, "grad_norm": 2.3016215993385507, "learning_rate": 8.396093292726904e-06, "loss": 0.2133, "step": 9933 }, { "epoch": 0.28, "grad_norm": 9.826538200427226, "learning_rate": 8.395752909092113e-06, "loss": 1.0614, "step": 9934 }, { "epoch": 0.28, "grad_norm": 2.6080413196850682, "learning_rate": 8.395412496244075e-06, "loss": 0.156, "step": 9935 }, { "epoch": 0.28, "grad_norm": 6.306336519311335, "learning_rate": 8.395072054185723e-06, "loss": 0.5045, "step": 9936 }, { "epoch": 0.28, "grad_norm": 2.1692424160186308, "learning_rate": 8.394731582919983e-06, "loss": 0.1208, "step": 9937 }, { "epoch": 0.28, "grad_norm": 6.705656129507183, "learning_rate": 8.394391082449785e-06, "loss": 0.6205, "step": 9938 }, { "epoch": 0.28, "grad_norm": 8.245127539541796, "learning_rate": 8.39405055277806e-06, "loss": 1.0222, "step": 9939 }, { "epoch": 0.28, "grad_norm": 8.855568581091177, "learning_rate": 8.393709993907735e-06, "loss": 0.6519, "step": 9940 }, { "epoch": 0.28, "grad_norm": 8.884772923991287, "learning_rate": 8.39336940584174e-06, "loss": 0.5134, "step": 9941 }, { "epoch": 0.28, "grad_norm": 7.105650343207633, "learning_rate": 8.393028788583006e-06, "loss": 0.8911, "step": 9942 }, { "epoch": 0.28, "grad_norm": 3.733615909821162, "learning_rate": 8.392688142134464e-06, "loss": 0.418, "step": 9943 }, { "epoch": 0.28, "grad_norm": 4.938721035018172, "learning_rate": 8.392347466499045e-06, "loss": 0.2841, "step": 9944 }, { "epoch": 0.28, "grad_norm": 5.468734566803, "learning_rate": 8.392006761679677e-06, "loss": 0.7274, "step": 9945 }, { "epoch": 0.28, "grad_norm": 3.5244065705069394, "learning_rate": 8.39166602767929e-06, "loss": 0.2537, "step": 9946 }, { "epoch": 0.28, "grad_norm": 6.693184976390076, "learning_rate": 8.39132526450082e-06, "loss": 0.539, "step": 9947 }, { "epoch": 0.28, "grad_norm": 5.205124479448846, "learning_rate": 8.390984472147196e-06, "loss": 0.5183, "step": 9948 }, { "epoch": 0.28, "grad_norm": 12.76051644837688, "learning_rate": 8.390643650621352e-06, "loss": 0.8852, "step": 9949 }, { "epoch": 0.28, "grad_norm": 7.182703358167668, "learning_rate": 8.390302799926215e-06, "loss": 0.3555, "step": 9950 }, { "epoch": 0.28, "grad_norm": 11.490598069471433, "learning_rate": 8.389961920064724e-06, "loss": 0.7658, "step": 9951 }, { "epoch": 0.29, "grad_norm": 6.2865477278789434, "learning_rate": 8.389621011039807e-06, "loss": 0.3196, "step": 9952 }, { "epoch": 0.29, "grad_norm": 4.708778607853973, "learning_rate": 8.389280072854396e-06, "loss": 0.7488, "step": 9953 }, { "epoch": 0.29, "grad_norm": 6.284601029362691, "learning_rate": 8.388939105511425e-06, "loss": 0.5545, "step": 9954 }, { "epoch": 0.29, "grad_norm": 5.917134382897462, "learning_rate": 8.38859810901383e-06, "loss": 0.7696, "step": 9955 }, { "epoch": 0.29, "grad_norm": 4.8329437800428305, "learning_rate": 8.388257083364542e-06, "loss": 0.3632, "step": 9956 }, { "epoch": 0.29, "grad_norm": 4.265462753507998, "learning_rate": 8.387916028566496e-06, "loss": 0.2194, "step": 9957 }, { "epoch": 0.29, "grad_norm": 9.353105537249004, "learning_rate": 8.387574944622625e-06, "loss": 0.9177, "step": 9958 }, { "epoch": 0.29, "grad_norm": 7.232488035949664, "learning_rate": 8.387233831535865e-06, "loss": 0.6399, "step": 9959 }, { "epoch": 0.29, "grad_norm": 6.159694269693404, "learning_rate": 8.386892689309149e-06, "loss": 0.5903, "step": 9960 }, { "epoch": 0.29, "grad_norm": 8.881285362522238, "learning_rate": 8.386551517945412e-06, "loss": 0.5221, "step": 9961 }, { "epoch": 0.29, "grad_norm": 2.9902904263555836, "learning_rate": 8.386210317447588e-06, "loss": 0.3919, "step": 9962 }, { "epoch": 0.29, "grad_norm": 7.39008405129172, "learning_rate": 8.385869087818615e-06, "loss": 0.5008, "step": 9963 }, { "epoch": 0.29, "grad_norm": 5.956541147551519, "learning_rate": 8.385527829061426e-06, "loss": 0.3507, "step": 9964 }, { "epoch": 0.29, "grad_norm": 6.568098459497789, "learning_rate": 8.38518654117896e-06, "loss": 0.5481, "step": 9965 }, { "epoch": 0.29, "grad_norm": 3.7048364217583623, "learning_rate": 8.38484522417415e-06, "loss": 0.7212, "step": 9966 }, { "epoch": 0.29, "grad_norm": 8.09391042189519, "learning_rate": 8.384503878049932e-06, "loss": 0.528, "step": 9967 }, { "epoch": 0.29, "grad_norm": 6.1718413146826006, "learning_rate": 8.384162502809245e-06, "loss": 0.2911, "step": 9968 }, { "epoch": 0.29, "grad_norm": 8.538238122625033, "learning_rate": 8.383821098455025e-06, "loss": 0.7765, "step": 9969 }, { "epoch": 0.29, "grad_norm": 8.465902011489003, "learning_rate": 8.383479664990208e-06, "loss": 0.7273, "step": 9970 }, { "epoch": 0.29, "grad_norm": 4.147369764604486, "learning_rate": 8.383138202417732e-06, "loss": 0.1593, "step": 9971 }, { "epoch": 0.29, "grad_norm": 3.1834002476960945, "learning_rate": 8.382796710740534e-06, "loss": 0.3127, "step": 9972 }, { "epoch": 0.29, "grad_norm": 5.795958413100598, "learning_rate": 8.382455189961553e-06, "loss": 0.2481, "step": 9973 }, { "epoch": 0.29, "grad_norm": 4.856635103212597, "learning_rate": 8.382113640083726e-06, "loss": 0.5211, "step": 9974 }, { "epoch": 0.29, "grad_norm": 3.174551900646367, "learning_rate": 8.381772061109993e-06, "loss": 0.1571, "step": 9975 }, { "epoch": 0.29, "grad_norm": 5.536621158343163, "learning_rate": 8.381430453043291e-06, "loss": 0.3102, "step": 9976 }, { "epoch": 0.29, "grad_norm": 4.723081457591813, "learning_rate": 8.38108881588656e-06, "loss": 0.4411, "step": 9977 }, { "epoch": 0.29, "grad_norm": 8.52569552965468, "learning_rate": 8.380747149642737e-06, "loss": 0.8187, "step": 9978 }, { "epoch": 0.29, "grad_norm": 5.926134897340101, "learning_rate": 8.380405454314762e-06, "loss": 0.2891, "step": 9979 }, { "epoch": 0.29, "grad_norm": 8.442435170546867, "learning_rate": 8.380063729905575e-06, "loss": 0.631, "step": 9980 }, { "epoch": 0.29, "grad_norm": 4.767110449244235, "learning_rate": 8.379721976418117e-06, "loss": 0.4432, "step": 9981 }, { "epoch": 0.29, "grad_norm": 6.873244460253134, "learning_rate": 8.379380193855326e-06, "loss": 0.3661, "step": 9982 }, { "epoch": 0.29, "grad_norm": 6.331665442676565, "learning_rate": 8.379038382220144e-06, "loss": 0.653, "step": 9983 }, { "epoch": 0.29, "grad_norm": 3.294133533536646, "learning_rate": 8.378696541515513e-06, "loss": 0.4758, "step": 9984 }, { "epoch": 0.29, "grad_norm": 7.187625983419481, "learning_rate": 8.378354671744368e-06, "loss": 0.243, "step": 9985 }, { "epoch": 0.29, "grad_norm": 8.858267148028713, "learning_rate": 8.378012772909654e-06, "loss": 0.6641, "step": 9986 }, { "epoch": 0.29, "grad_norm": 4.1396124987381935, "learning_rate": 8.377670845014314e-06, "loss": 0.5006, "step": 9987 }, { "epoch": 0.29, "grad_norm": 6.63326428136236, "learning_rate": 8.377328888061287e-06, "loss": 0.4727, "step": 9988 }, { "epoch": 0.29, "grad_norm": 5.236857813246847, "learning_rate": 8.376986902053514e-06, "loss": 0.87, "step": 9989 }, { "epoch": 0.29, "grad_norm": 7.650902573940074, "learning_rate": 8.37664488699394e-06, "loss": 0.5705, "step": 9990 }, { "epoch": 0.29, "grad_norm": 3.815536946648322, "learning_rate": 8.376302842885505e-06, "loss": 0.5606, "step": 9991 }, { "epoch": 0.29, "grad_norm": 2.7076235256741397, "learning_rate": 8.375960769731153e-06, "loss": 0.2415, "step": 9992 }, { "epoch": 0.29, "grad_norm": 4.004913917835572, "learning_rate": 8.375618667533826e-06, "loss": 0.2356, "step": 9993 }, { "epoch": 0.29, "grad_norm": 7.330216294411207, "learning_rate": 8.375276536296466e-06, "loss": 0.6687, "step": 9994 }, { "epoch": 0.29, "grad_norm": 5.784820279006115, "learning_rate": 8.37493437602202e-06, "loss": 0.3385, "step": 9995 }, { "epoch": 0.29, "grad_norm": 2.5194357565592522, "learning_rate": 8.374592186713427e-06, "loss": 0.1854, "step": 9996 }, { "epoch": 0.29, "grad_norm": 7.139279295079399, "learning_rate": 8.374249968373633e-06, "loss": 0.5802, "step": 9997 }, { "epoch": 0.29, "grad_norm": 4.9637980226467056, "learning_rate": 8.373907721005584e-06, "loss": 0.6259, "step": 9998 }, { "epoch": 0.29, "grad_norm": 8.303033529459027, "learning_rate": 8.373565444612219e-06, "loss": 0.8514, "step": 9999 }, { "epoch": 0.29, "grad_norm": 9.744111801780756, "learning_rate": 8.37322313919649e-06, "loss": 0.9974, "step": 10000 }, { "epoch": 0.29, "grad_norm": 9.801210615256283, "learning_rate": 8.372880804761334e-06, "loss": 0.678, "step": 10001 }, { "epoch": 0.29, "grad_norm": 4.080643880797223, "learning_rate": 8.3725384413097e-06, "loss": 0.3391, "step": 10002 }, { "epoch": 0.29, "grad_norm": 6.21136830113404, "learning_rate": 8.372196048844534e-06, "loss": 0.6591, "step": 10003 }, { "epoch": 0.29, "grad_norm": 5.08009150062217, "learning_rate": 8.371853627368783e-06, "loss": 0.575, "step": 10004 }, { "epoch": 0.29, "grad_norm": 5.605583758619822, "learning_rate": 8.371511176885387e-06, "loss": 0.9156, "step": 10005 }, { "epoch": 0.29, "grad_norm": 10.54567303625509, "learning_rate": 8.371168697397296e-06, "loss": 0.967, "step": 10006 }, { "epoch": 0.29, "grad_norm": 8.221348050904359, "learning_rate": 8.370826188907455e-06, "loss": 0.6241, "step": 10007 }, { "epoch": 0.29, "grad_norm": 3.472594047135186, "learning_rate": 8.370483651418815e-06, "loss": 0.3113, "step": 10008 }, { "epoch": 0.29, "grad_norm": 4.586488992780758, "learning_rate": 8.370141084934316e-06, "loss": 0.3637, "step": 10009 }, { "epoch": 0.29, "grad_norm": 4.404600714037966, "learning_rate": 8.369798489456908e-06, "loss": 0.5355, "step": 10010 }, { "epoch": 0.29, "grad_norm": 3.5587485789546403, "learning_rate": 8.36945586498954e-06, "loss": 0.4426, "step": 10011 }, { "epoch": 0.29, "grad_norm": 6.04740164185956, "learning_rate": 8.369113211535159e-06, "loss": 0.5023, "step": 10012 }, { "epoch": 0.29, "grad_norm": 7.475276282317514, "learning_rate": 8.368770529096708e-06, "loss": 0.8757, "step": 10013 }, { "epoch": 0.29, "grad_norm": 3.790627885708886, "learning_rate": 8.368427817677143e-06, "loss": 0.5486, "step": 10014 }, { "epoch": 0.29, "grad_norm": 6.417472908429456, "learning_rate": 8.368085077279406e-06, "loss": 0.7971, "step": 10015 }, { "epoch": 0.29, "grad_norm": 4.737036105067674, "learning_rate": 8.367742307906449e-06, "loss": 0.472, "step": 10016 }, { "epoch": 0.29, "grad_norm": 3.2786321278660076, "learning_rate": 8.367399509561219e-06, "loss": 0.2916, "step": 10017 }, { "epoch": 0.29, "grad_norm": 5.590349708973655, "learning_rate": 8.367056682246663e-06, "loss": 0.632, "step": 10018 }, { "epoch": 0.29, "grad_norm": 6.283925260122425, "learning_rate": 8.366713825965737e-06, "loss": 0.7927, "step": 10019 }, { "epoch": 0.29, "grad_norm": 6.688918489128061, "learning_rate": 8.366370940721384e-06, "loss": 0.5317, "step": 10020 }, { "epoch": 0.29, "grad_norm": 5.902511220660404, "learning_rate": 8.366028026516557e-06, "loss": 0.306, "step": 10021 }, { "epoch": 0.29, "grad_norm": 4.670807579348388, "learning_rate": 8.365685083354204e-06, "loss": 0.314, "step": 10022 }, { "epoch": 0.29, "grad_norm": 7.203877771724835, "learning_rate": 8.36534211123728e-06, "loss": 0.8034, "step": 10023 }, { "epoch": 0.29, "grad_norm": 6.68953859149326, "learning_rate": 8.36499911016873e-06, "loss": 0.3431, "step": 10024 }, { "epoch": 0.29, "grad_norm": 2.8863958196204273, "learning_rate": 8.364656080151507e-06, "loss": 0.2595, "step": 10025 }, { "epoch": 0.29, "grad_norm": 6.2246770935875535, "learning_rate": 8.364313021188562e-06, "loss": 0.3223, "step": 10026 }, { "epoch": 0.29, "grad_norm": 9.600675884931736, "learning_rate": 8.363969933282848e-06, "loss": 0.5353, "step": 10027 }, { "epoch": 0.29, "grad_norm": 7.324996508184128, "learning_rate": 8.363626816437311e-06, "loss": 0.6534, "step": 10028 }, { "epoch": 0.29, "grad_norm": 5.720931173971109, "learning_rate": 8.36328367065491e-06, "loss": 0.3418, "step": 10029 }, { "epoch": 0.29, "grad_norm": 6.24849866001029, "learning_rate": 8.362940495938592e-06, "loss": 0.5371, "step": 10030 }, { "epoch": 0.29, "grad_norm": 6.25842044082277, "learning_rate": 8.362597292291313e-06, "loss": 0.5643, "step": 10031 }, { "epoch": 0.29, "grad_norm": 5.098916832583228, "learning_rate": 8.362254059716021e-06, "loss": 0.4936, "step": 10032 }, { "epoch": 0.29, "grad_norm": 7.5086196957708395, "learning_rate": 8.361910798215673e-06, "loss": 0.5959, "step": 10033 }, { "epoch": 0.29, "grad_norm": 5.624860422733887, "learning_rate": 8.36156750779322e-06, "loss": 0.7414, "step": 10034 }, { "epoch": 0.29, "grad_norm": 6.248044509147919, "learning_rate": 8.361224188451615e-06, "loss": 0.4551, "step": 10035 }, { "epoch": 0.29, "grad_norm": 5.618252246097647, "learning_rate": 8.360880840193812e-06, "loss": 0.376, "step": 10036 }, { "epoch": 0.29, "grad_norm": 4.51412326432498, "learning_rate": 8.360537463022765e-06, "loss": 0.3896, "step": 10037 }, { "epoch": 0.29, "grad_norm": 5.087753323333331, "learning_rate": 8.36019405694143e-06, "loss": 0.7056, "step": 10038 }, { "epoch": 0.29, "grad_norm": 9.598251256016091, "learning_rate": 8.359850621952757e-06, "loss": 1.0308, "step": 10039 }, { "epoch": 0.29, "grad_norm": 12.795411786649034, "learning_rate": 8.359507158059703e-06, "loss": 0.8965, "step": 10040 }, { "epoch": 0.29, "grad_norm": 5.8648811206005576, "learning_rate": 8.359163665265225e-06, "loss": 0.8344, "step": 10041 }, { "epoch": 0.29, "grad_norm": 9.836199622869346, "learning_rate": 8.358820143572273e-06, "loss": 0.3806, "step": 10042 }, { "epoch": 0.29, "grad_norm": 7.5834067903531075, "learning_rate": 8.358476592983805e-06, "loss": 0.507, "step": 10043 }, { "epoch": 0.29, "grad_norm": 6.558109740437008, "learning_rate": 8.358133013502777e-06, "loss": 0.4194, "step": 10044 }, { "epoch": 0.29, "grad_norm": 5.728624376005423, "learning_rate": 8.357789405132143e-06, "loss": 0.9404, "step": 10045 }, { "epoch": 0.29, "grad_norm": 5.286466787046492, "learning_rate": 8.357445767874861e-06, "loss": 0.2997, "step": 10046 }, { "epoch": 0.29, "grad_norm": 8.595860003751897, "learning_rate": 8.357102101733887e-06, "loss": 0.7414, "step": 10047 }, { "epoch": 0.29, "grad_norm": 6.058064676840631, "learning_rate": 8.356758406712176e-06, "loss": 0.59, "step": 10048 }, { "epoch": 0.29, "grad_norm": 8.805774151174289, "learning_rate": 8.356414682812686e-06, "loss": 0.8782, "step": 10049 }, { "epoch": 0.29, "grad_norm": 4.444596204385311, "learning_rate": 8.356070930038375e-06, "loss": 0.3659, "step": 10050 }, { "epoch": 0.29, "grad_norm": 8.057217663994523, "learning_rate": 8.355727148392198e-06, "loss": 0.6168, "step": 10051 }, { "epoch": 0.29, "grad_norm": 4.053219891616031, "learning_rate": 8.355383337877114e-06, "loss": 0.245, "step": 10052 }, { "epoch": 0.29, "grad_norm": 5.974106866330882, "learning_rate": 8.355039498496078e-06, "loss": 0.2569, "step": 10053 }, { "epoch": 0.29, "grad_norm": 4.960689508442341, "learning_rate": 8.354695630252052e-06, "loss": 0.5322, "step": 10054 }, { "epoch": 0.29, "grad_norm": 3.945544608058678, "learning_rate": 8.354351733147994e-06, "loss": 0.2711, "step": 10055 }, { "epoch": 0.29, "grad_norm": 4.9844383307472535, "learning_rate": 8.35400780718686e-06, "loss": 0.5508, "step": 10056 }, { "epoch": 0.29, "grad_norm": 3.9676578550899224, "learning_rate": 8.353663852371608e-06, "loss": 0.3623, "step": 10057 }, { "epoch": 0.29, "grad_norm": 5.419601447453785, "learning_rate": 8.3533198687052e-06, "loss": 0.5212, "step": 10058 }, { "epoch": 0.29, "grad_norm": 5.709706644783286, "learning_rate": 8.352975856190594e-06, "loss": 0.5007, "step": 10059 }, { "epoch": 0.29, "grad_norm": 5.077057899450002, "learning_rate": 8.352631814830752e-06, "loss": 0.2692, "step": 10060 }, { "epoch": 0.29, "grad_norm": 3.6708237137309427, "learning_rate": 8.352287744628628e-06, "loss": 0.1833, "step": 10061 }, { "epoch": 0.29, "grad_norm": 7.901671520082273, "learning_rate": 8.351943645587186e-06, "loss": 0.6813, "step": 10062 }, { "epoch": 0.29, "grad_norm": 3.9858700210081284, "learning_rate": 8.351599517709385e-06, "loss": 0.3619, "step": 10063 }, { "epoch": 0.29, "grad_norm": 8.860897195825615, "learning_rate": 8.351255360998188e-06, "loss": 0.381, "step": 10064 }, { "epoch": 0.29, "grad_norm": 7.359418897234642, "learning_rate": 8.350911175456553e-06, "loss": 0.7293, "step": 10065 }, { "epoch": 0.29, "grad_norm": 8.684885852966216, "learning_rate": 8.350566961087441e-06, "loss": 0.8251, "step": 10066 }, { "epoch": 0.29, "grad_norm": 5.022827965759986, "learning_rate": 8.350222717893815e-06, "loss": 0.4443, "step": 10067 }, { "epoch": 0.29, "grad_norm": 6.56126581121434, "learning_rate": 8.349878445878634e-06, "loss": 0.3897, "step": 10068 }, { "epoch": 0.29, "grad_norm": 2.706798042501681, "learning_rate": 8.349534145044861e-06, "loss": 0.3307, "step": 10069 }, { "epoch": 0.29, "grad_norm": 5.061900727394703, "learning_rate": 8.34918981539546e-06, "loss": 0.3409, "step": 10070 }, { "epoch": 0.29, "grad_norm": 3.6402349958962676, "learning_rate": 8.34884545693339e-06, "loss": 0.5094, "step": 10071 }, { "epoch": 0.29, "grad_norm": 4.370354174789286, "learning_rate": 8.348501069661617e-06, "loss": 0.5863, "step": 10072 }, { "epoch": 0.29, "grad_norm": 7.0669393849680775, "learning_rate": 8.3481566535831e-06, "loss": 0.5519, "step": 10073 }, { "epoch": 0.29, "grad_norm": 5.508581229598592, "learning_rate": 8.347812208700803e-06, "loss": 0.379, "step": 10074 }, { "epoch": 0.29, "grad_norm": 5.040214655255977, "learning_rate": 8.347467735017689e-06, "loss": 0.2544, "step": 10075 }, { "epoch": 0.29, "grad_norm": 4.978205099856308, "learning_rate": 8.347123232536724e-06, "loss": 0.6259, "step": 10076 }, { "epoch": 0.29, "grad_norm": 5.696685165113337, "learning_rate": 8.346778701260868e-06, "loss": 0.5558, "step": 10077 }, { "epoch": 0.29, "grad_norm": 6.977761296977013, "learning_rate": 8.346434141193089e-06, "loss": 0.8669, "step": 10078 }, { "epoch": 0.29, "grad_norm": 3.9265909603011924, "learning_rate": 8.346089552336349e-06, "loss": 0.2971, "step": 10079 }, { "epoch": 0.29, "grad_norm": 16.664011279607315, "learning_rate": 8.345744934693611e-06, "loss": 0.9047, "step": 10080 }, { "epoch": 0.29, "grad_norm": 2.542539322979418, "learning_rate": 8.345400288267843e-06, "loss": 0.2752, "step": 10081 }, { "epoch": 0.29, "grad_norm": 6.0412328849374815, "learning_rate": 8.345055613062008e-06, "loss": 0.4228, "step": 10082 }, { "epoch": 0.29, "grad_norm": 7.513508901273857, "learning_rate": 8.34471090907907e-06, "loss": 0.6619, "step": 10083 }, { "epoch": 0.29, "grad_norm": 6.194952493720984, "learning_rate": 8.344366176321998e-06, "loss": 0.4435, "step": 10084 }, { "epoch": 0.29, "grad_norm": 4.690733925751041, "learning_rate": 8.344021414793753e-06, "loss": 0.4291, "step": 10085 }, { "epoch": 0.29, "grad_norm": 5.594889775094679, "learning_rate": 8.343676624497306e-06, "loss": 0.9673, "step": 10086 }, { "epoch": 0.29, "grad_norm": 4.8405741955508805, "learning_rate": 8.343331805435618e-06, "loss": 0.464, "step": 10087 }, { "epoch": 0.29, "grad_norm": 8.027536921717708, "learning_rate": 8.34298695761166e-06, "loss": 0.3856, "step": 10088 }, { "epoch": 0.29, "grad_norm": 4.503098268978878, "learning_rate": 8.342642081028395e-06, "loss": 0.2377, "step": 10089 }, { "epoch": 0.29, "grad_norm": 7.163958832667762, "learning_rate": 8.342297175688794e-06, "loss": 0.3291, "step": 10090 }, { "epoch": 0.29, "grad_norm": 6.08621939910949, "learning_rate": 8.34195224159582e-06, "loss": 0.608, "step": 10091 }, { "epoch": 0.29, "grad_norm": 5.0538593067743465, "learning_rate": 8.341607278752445e-06, "loss": 0.6676, "step": 10092 }, { "epoch": 0.29, "grad_norm": 3.4937073304056376, "learning_rate": 8.341262287161632e-06, "loss": 0.1771, "step": 10093 }, { "epoch": 0.29, "grad_norm": 8.118811642219693, "learning_rate": 8.340917266826352e-06, "loss": 0.7642, "step": 10094 }, { "epoch": 0.29, "grad_norm": 9.038913682719825, "learning_rate": 8.340572217749571e-06, "loss": 0.8182, "step": 10095 }, { "epoch": 0.29, "grad_norm": 2.7724739550062867, "learning_rate": 8.340227139934258e-06, "loss": 0.4632, "step": 10096 }, { "epoch": 0.29, "grad_norm": 7.730326693146667, "learning_rate": 8.339882033383385e-06, "loss": 0.5968, "step": 10097 }, { "epoch": 0.29, "grad_norm": 6.253934232326335, "learning_rate": 8.339536898099915e-06, "loss": 0.294, "step": 10098 }, { "epoch": 0.29, "grad_norm": 7.286407412354926, "learning_rate": 8.339191734086823e-06, "loss": 0.3039, "step": 10099 }, { "epoch": 0.29, "grad_norm": 7.50381404536093, "learning_rate": 8.338846541347072e-06, "loss": 0.4168, "step": 10100 }, { "epoch": 0.29, "grad_norm": 8.4148332531479, "learning_rate": 8.338501319883638e-06, "loss": 0.6793, "step": 10101 }, { "epoch": 0.29, "grad_norm": 8.105005752024883, "learning_rate": 8.338156069699487e-06, "loss": 0.6404, "step": 10102 }, { "epoch": 0.29, "grad_norm": 5.618315391153531, "learning_rate": 8.337810790797592e-06, "loss": 0.6013, "step": 10103 }, { "epoch": 0.29, "grad_norm": 5.840948075716628, "learning_rate": 8.337465483180919e-06, "loss": 0.3915, "step": 10104 }, { "epoch": 0.29, "grad_norm": 3.351307465780198, "learning_rate": 8.337120146852445e-06, "loss": 0.5002, "step": 10105 }, { "epoch": 0.29, "grad_norm": 3.0074292917255514, "learning_rate": 8.336774781815134e-06, "loss": 0.3654, "step": 10106 }, { "epoch": 0.29, "grad_norm": 7.284680520241256, "learning_rate": 8.336429388071961e-06, "loss": 0.6366, "step": 10107 }, { "epoch": 0.29, "grad_norm": 7.551411594025181, "learning_rate": 8.336083965625898e-06, "loss": 0.7316, "step": 10108 }, { "epoch": 0.29, "grad_norm": 4.3997614546615385, "learning_rate": 8.335738514479914e-06, "loss": 0.4227, "step": 10109 }, { "epoch": 0.29, "grad_norm": 5.609170468992695, "learning_rate": 8.335393034636982e-06, "loss": 0.8836, "step": 10110 }, { "epoch": 0.29, "grad_norm": 6.417104986833887, "learning_rate": 8.335047526100077e-06, "loss": 0.7821, "step": 10111 }, { "epoch": 0.29, "grad_norm": 4.833432733675601, "learning_rate": 8.334701988872166e-06, "loss": 0.5192, "step": 10112 }, { "epoch": 0.29, "grad_norm": 5.693056578133402, "learning_rate": 8.334356422956226e-06, "loss": 0.2696, "step": 10113 }, { "epoch": 0.29, "grad_norm": 6.452526110921806, "learning_rate": 8.334010828355228e-06, "loss": 0.6044, "step": 10114 }, { "epoch": 0.29, "grad_norm": 7.243085457811312, "learning_rate": 8.333665205072145e-06, "loss": 0.525, "step": 10115 }, { "epoch": 0.29, "grad_norm": 4.504510195521248, "learning_rate": 8.33331955310995e-06, "loss": 0.2801, "step": 10116 }, { "epoch": 0.29, "grad_norm": 4.229999505644965, "learning_rate": 8.33297387247162e-06, "loss": 0.2906, "step": 10117 }, { "epoch": 0.29, "grad_norm": 9.832473641871145, "learning_rate": 8.332628163160123e-06, "loss": 1.0028, "step": 10118 }, { "epoch": 0.29, "grad_norm": 4.623614902837916, "learning_rate": 8.332282425178438e-06, "loss": 0.4349, "step": 10119 }, { "epoch": 0.29, "grad_norm": 8.30786703542761, "learning_rate": 8.331936658529538e-06, "loss": 0.3859, "step": 10120 }, { "epoch": 0.29, "grad_norm": 3.101340989819547, "learning_rate": 8.331590863216395e-06, "loss": 0.311, "step": 10121 }, { "epoch": 0.29, "grad_norm": 12.357341729319188, "learning_rate": 8.331245039241988e-06, "loss": 0.4889, "step": 10122 }, { "epoch": 0.29, "grad_norm": 3.350946045138659, "learning_rate": 8.330899186609289e-06, "loss": 0.2191, "step": 10123 }, { "epoch": 0.29, "grad_norm": 4.844228683937424, "learning_rate": 8.330553305321275e-06, "loss": 0.3832, "step": 10124 }, { "epoch": 0.29, "grad_norm": 10.01765675996384, "learning_rate": 8.33020739538092e-06, "loss": 0.9327, "step": 10125 }, { "epoch": 0.29, "grad_norm": 5.595210518715875, "learning_rate": 8.329861456791202e-06, "loss": 0.8296, "step": 10126 }, { "epoch": 0.29, "grad_norm": 9.63065187056231, "learning_rate": 8.329515489555096e-06, "loss": 0.7097, "step": 10127 }, { "epoch": 0.29, "grad_norm": 3.547581404866943, "learning_rate": 8.329169493675577e-06, "loss": 0.6152, "step": 10128 }, { "epoch": 0.29, "grad_norm": 7.958415791615279, "learning_rate": 8.328823469155622e-06, "loss": 0.5085, "step": 10129 }, { "epoch": 0.29, "grad_norm": 4.753675168391572, "learning_rate": 8.328477415998211e-06, "loss": 0.5537, "step": 10130 }, { "epoch": 0.29, "grad_norm": 4.128368274067036, "learning_rate": 8.328131334206318e-06, "loss": 0.4713, "step": 10131 }, { "epoch": 0.29, "grad_norm": 9.418713698794978, "learning_rate": 8.32778522378292e-06, "loss": 0.4464, "step": 10132 }, { "epoch": 0.29, "grad_norm": 5.401730539159463, "learning_rate": 8.327439084730995e-06, "loss": 0.5132, "step": 10133 }, { "epoch": 0.29, "grad_norm": 3.508911674261922, "learning_rate": 8.327092917053524e-06, "loss": 0.5421, "step": 10134 }, { "epoch": 0.29, "grad_norm": 6.147328530022637, "learning_rate": 8.326746720753478e-06, "loss": 0.4719, "step": 10135 }, { "epoch": 0.29, "grad_norm": 4.075131834273012, "learning_rate": 8.326400495833841e-06, "loss": 0.4652, "step": 10136 }, { "epoch": 0.29, "grad_norm": 4.230438711353852, "learning_rate": 8.326054242297592e-06, "loss": 0.55, "step": 10137 }, { "epoch": 0.29, "grad_norm": 5.75197617116563, "learning_rate": 8.325707960147707e-06, "loss": 0.3719, "step": 10138 }, { "epoch": 0.29, "grad_norm": 3.3659228718172103, "learning_rate": 8.325361649387167e-06, "loss": 0.2736, "step": 10139 }, { "epoch": 0.29, "grad_norm": 6.476212697999426, "learning_rate": 8.325015310018948e-06, "loss": 0.4826, "step": 10140 }, { "epoch": 0.29, "grad_norm": 3.530586669870477, "learning_rate": 8.324668942046033e-06, "loss": 0.3264, "step": 10141 }, { "epoch": 0.29, "grad_norm": 10.337582319639926, "learning_rate": 8.324322545471401e-06, "loss": 0.8898, "step": 10142 }, { "epoch": 0.29, "grad_norm": 7.5237415604830025, "learning_rate": 8.32397612029803e-06, "loss": 0.5451, "step": 10143 }, { "epoch": 0.29, "grad_norm": 8.4033188371585, "learning_rate": 8.3236296665289e-06, "loss": 0.7123, "step": 10144 }, { "epoch": 0.29, "grad_norm": 6.03095845520911, "learning_rate": 8.323283184166996e-06, "loss": 0.4444, "step": 10145 }, { "epoch": 0.29, "grad_norm": 4.244669825515622, "learning_rate": 8.322936673215296e-06, "loss": 0.6415, "step": 10146 }, { "epoch": 0.29, "grad_norm": 10.914532945524755, "learning_rate": 8.32259013367678e-06, "loss": 0.8673, "step": 10147 }, { "epoch": 0.29, "grad_norm": 4.853292037142786, "learning_rate": 8.322243565554431e-06, "loss": 0.5019, "step": 10148 }, { "epoch": 0.29, "grad_norm": 4.774360729784961, "learning_rate": 8.321896968851228e-06, "loss": 0.9665, "step": 10149 }, { "epoch": 0.29, "grad_norm": 3.6995925150549436, "learning_rate": 8.321550343570157e-06, "loss": 0.3688, "step": 10150 }, { "epoch": 0.29, "grad_norm": 7.9702130339712225, "learning_rate": 8.321203689714195e-06, "loss": 0.7721, "step": 10151 }, { "epoch": 0.29, "grad_norm": 3.9027139757280156, "learning_rate": 8.320857007286327e-06, "loss": 0.544, "step": 10152 }, { "epoch": 0.29, "grad_norm": 4.209232530115702, "learning_rate": 8.320510296289535e-06, "loss": 0.4683, "step": 10153 }, { "epoch": 0.29, "grad_norm": 9.584703187202386, "learning_rate": 8.320163556726802e-06, "loss": 1.0981, "step": 10154 }, { "epoch": 0.29, "grad_norm": 9.297719503562094, "learning_rate": 8.319816788601111e-06, "loss": 0.6326, "step": 10155 }, { "epoch": 0.29, "grad_norm": 5.902099240765565, "learning_rate": 8.319469991915445e-06, "loss": 0.5206, "step": 10156 }, { "epoch": 0.29, "grad_norm": 2.80175391935209, "learning_rate": 8.319123166672786e-06, "loss": 0.4119, "step": 10157 }, { "epoch": 0.29, "grad_norm": 4.885377439112314, "learning_rate": 8.318776312876122e-06, "loss": 0.1463, "step": 10158 }, { "epoch": 0.29, "grad_norm": 3.3009461491493224, "learning_rate": 8.318429430528432e-06, "loss": 0.2443, "step": 10159 }, { "epoch": 0.29, "grad_norm": 7.102069832417942, "learning_rate": 8.318082519632702e-06, "loss": 0.3773, "step": 10160 }, { "epoch": 0.29, "grad_norm": 3.93765322069235, "learning_rate": 8.317735580191916e-06, "loss": 1.0464, "step": 10161 }, { "epoch": 0.29, "grad_norm": 8.884679968742411, "learning_rate": 8.317388612209061e-06, "loss": 0.5738, "step": 10162 }, { "epoch": 0.29, "grad_norm": 3.3010909618012554, "learning_rate": 8.31704161568712e-06, "loss": 0.59, "step": 10163 }, { "epoch": 0.29, "grad_norm": 4.71644050720927, "learning_rate": 8.316694590629077e-06, "loss": 0.4725, "step": 10164 }, { "epoch": 0.29, "grad_norm": 6.72901168345575, "learning_rate": 8.31634753703792e-06, "loss": 0.7119, "step": 10165 }, { "epoch": 0.29, "grad_norm": 5.781251072239131, "learning_rate": 8.316000454916633e-06, "loss": 0.6434, "step": 10166 }, { "epoch": 0.29, "grad_norm": 11.629424206952548, "learning_rate": 8.315653344268204e-06, "loss": 0.9948, "step": 10167 }, { "epoch": 0.29, "grad_norm": 4.366194528554424, "learning_rate": 8.315306205095614e-06, "loss": 0.2176, "step": 10168 }, { "epoch": 0.29, "grad_norm": 3.833051415456612, "learning_rate": 8.314959037401855e-06, "loss": 0.6002, "step": 10169 }, { "epoch": 0.29, "grad_norm": 4.5029642090956035, "learning_rate": 8.314611841189912e-06, "loss": 0.5465, "step": 10170 }, { "epoch": 0.29, "grad_norm": 9.111639614852383, "learning_rate": 8.31426461646277e-06, "loss": 0.791, "step": 10171 }, { "epoch": 0.29, "grad_norm": 5.620937915203283, "learning_rate": 8.31391736322342e-06, "loss": 0.5484, "step": 10172 }, { "epoch": 0.29, "grad_norm": 5.9607283129302955, "learning_rate": 8.313570081474845e-06, "loss": 0.5159, "step": 10173 }, { "epoch": 0.29, "grad_norm": 5.023950933200111, "learning_rate": 8.313222771220035e-06, "loss": 0.3221, "step": 10174 }, { "epoch": 0.29, "grad_norm": 7.214256770819158, "learning_rate": 8.312875432461978e-06, "loss": 0.894, "step": 10175 }, { "epoch": 0.29, "grad_norm": 2.6752752674935416, "learning_rate": 8.31252806520366e-06, "loss": 0.0831, "step": 10176 }, { "epoch": 0.29, "grad_norm": 5.569555843800356, "learning_rate": 8.312180669448072e-06, "loss": 0.8383, "step": 10177 }, { "epoch": 0.29, "grad_norm": 3.337054134975774, "learning_rate": 8.311833245198202e-06, "loss": 0.2173, "step": 10178 }, { "epoch": 0.29, "grad_norm": 5.649127754467189, "learning_rate": 8.311485792457036e-06, "loss": 0.5439, "step": 10179 }, { "epoch": 0.29, "grad_norm": 9.876422876144488, "learning_rate": 8.311138311227569e-06, "loss": 0.6005, "step": 10180 }, { "epoch": 0.29, "grad_norm": 8.019767776323528, "learning_rate": 8.310790801512785e-06, "loss": 0.8393, "step": 10181 }, { "epoch": 0.29, "grad_norm": 11.371436430715459, "learning_rate": 8.310443263315676e-06, "loss": 0.384, "step": 10182 }, { "epoch": 0.29, "grad_norm": 6.3136723251262366, "learning_rate": 8.310095696639229e-06, "loss": 0.516, "step": 10183 }, { "epoch": 0.29, "grad_norm": 7.714596020547926, "learning_rate": 8.309748101486438e-06, "loss": 0.4243, "step": 10184 }, { "epoch": 0.29, "grad_norm": 3.5003243023625976, "learning_rate": 8.30940047786029e-06, "loss": 0.3978, "step": 10185 }, { "epoch": 0.29, "grad_norm": 7.455346630079234, "learning_rate": 8.309052825763779e-06, "loss": 0.6159, "step": 10186 }, { "epoch": 0.29, "grad_norm": 10.211649536430576, "learning_rate": 8.308705145199895e-06, "loss": 0.7604, "step": 10187 }, { "epoch": 0.29, "grad_norm": 7.701351294288645, "learning_rate": 8.308357436171626e-06, "loss": 0.6783, "step": 10188 }, { "epoch": 0.29, "grad_norm": 5.6490219046042585, "learning_rate": 8.308009698681966e-06, "loss": 0.6011, "step": 10189 }, { "epoch": 0.29, "grad_norm": 6.894995239897947, "learning_rate": 8.307661932733906e-06, "loss": 0.4968, "step": 10190 }, { "epoch": 0.29, "grad_norm": 8.374961682132346, "learning_rate": 8.307314138330437e-06, "loss": 1.0556, "step": 10191 }, { "epoch": 0.29, "grad_norm": 6.116601986590735, "learning_rate": 8.306966315474552e-06, "loss": 0.5548, "step": 10192 }, { "epoch": 0.29, "grad_norm": 2.057939023564986, "learning_rate": 8.306618464169243e-06, "loss": 0.1353, "step": 10193 }, { "epoch": 0.29, "grad_norm": 5.385523054103861, "learning_rate": 8.306270584417502e-06, "loss": 0.5715, "step": 10194 }, { "epoch": 0.29, "grad_norm": 3.2555543680011665, "learning_rate": 8.305922676222322e-06, "loss": 0.1649, "step": 10195 }, { "epoch": 0.29, "grad_norm": 4.897677811358989, "learning_rate": 8.305574739586697e-06, "loss": 0.6309, "step": 10196 }, { "epoch": 0.29, "grad_norm": 5.50880289033758, "learning_rate": 8.30522677451362e-06, "loss": 0.5648, "step": 10197 }, { "epoch": 0.29, "grad_norm": 12.974662591152345, "learning_rate": 8.304878781006084e-06, "loss": 0.7622, "step": 10198 }, { "epoch": 0.29, "grad_norm": 5.872655116501446, "learning_rate": 8.304530759067082e-06, "loss": 0.6895, "step": 10199 }, { "epoch": 0.29, "grad_norm": 12.0792242144639, "learning_rate": 8.304182708699609e-06, "loss": 0.5789, "step": 10200 }, { "epoch": 0.29, "grad_norm": 4.900903327548873, "learning_rate": 8.303834629906659e-06, "loss": 0.3467, "step": 10201 }, { "epoch": 0.29, "grad_norm": 3.8168150137233225, "learning_rate": 8.303486522691226e-06, "loss": 0.6111, "step": 10202 }, { "epoch": 0.29, "grad_norm": 3.396522491048167, "learning_rate": 8.303138387056305e-06, "loss": 0.2605, "step": 10203 }, { "epoch": 0.29, "grad_norm": 3.1704797165767338, "learning_rate": 8.302790223004893e-06, "loss": 0.2271, "step": 10204 }, { "epoch": 0.29, "grad_norm": 10.774391181898686, "learning_rate": 8.302442030539982e-06, "loss": 0.5983, "step": 10205 }, { "epoch": 0.29, "grad_norm": 7.426840701070157, "learning_rate": 8.302093809664568e-06, "loss": 0.8825, "step": 10206 }, { "epoch": 0.29, "grad_norm": 4.882715233406215, "learning_rate": 8.301745560381648e-06, "loss": 0.3802, "step": 10207 }, { "epoch": 0.29, "grad_norm": 5.519568786709432, "learning_rate": 8.301397282694219e-06, "loss": 0.583, "step": 10208 }, { "epoch": 0.29, "grad_norm": 9.615933544922648, "learning_rate": 8.301048976605273e-06, "loss": 0.7984, "step": 10209 }, { "epoch": 0.29, "grad_norm": 5.890322811561507, "learning_rate": 8.300700642117812e-06, "loss": 0.6686, "step": 10210 }, { "epoch": 0.29, "grad_norm": 6.313502166058873, "learning_rate": 8.300352279234828e-06, "loss": 0.4239, "step": 10211 }, { "epoch": 0.29, "grad_norm": 8.901244165487993, "learning_rate": 8.300003887959321e-06, "loss": 0.5561, "step": 10212 }, { "epoch": 0.29, "grad_norm": 9.149369867441825, "learning_rate": 8.299655468294286e-06, "loss": 0.6435, "step": 10213 }, { "epoch": 0.29, "grad_norm": 5.46219154551998, "learning_rate": 8.29930702024272e-06, "loss": 0.5735, "step": 10214 }, { "epoch": 0.29, "grad_norm": 7.244590122062264, "learning_rate": 8.298958543807624e-06, "loss": 0.6013, "step": 10215 }, { "epoch": 0.29, "grad_norm": 10.70103505467123, "learning_rate": 8.298610038991992e-06, "loss": 0.6095, "step": 10216 }, { "epoch": 0.29, "grad_norm": 5.334067155104174, "learning_rate": 8.298261505798826e-06, "loss": 0.4737, "step": 10217 }, { "epoch": 0.29, "grad_norm": 5.361549848130371, "learning_rate": 8.29791294423112e-06, "loss": 0.4012, "step": 10218 }, { "epoch": 0.29, "grad_norm": 4.602929603588637, "learning_rate": 8.297564354291877e-06, "loss": 0.5242, "step": 10219 }, { "epoch": 0.29, "grad_norm": 6.9148759783630664, "learning_rate": 8.297215735984092e-06, "loss": 0.5401, "step": 10220 }, { "epoch": 0.29, "grad_norm": 5.707568175367768, "learning_rate": 8.296867089310768e-06, "loss": 0.8942, "step": 10221 }, { "epoch": 0.29, "grad_norm": 6.444392727045105, "learning_rate": 8.2965184142749e-06, "loss": 0.6194, "step": 10222 }, { "epoch": 0.29, "grad_norm": 8.294566319989643, "learning_rate": 8.296169710879493e-06, "loss": 0.3795, "step": 10223 }, { "epoch": 0.29, "grad_norm": 8.08293545782638, "learning_rate": 8.295820979127543e-06, "loss": 0.7792, "step": 10224 }, { "epoch": 0.29, "grad_norm": 5.255826214955432, "learning_rate": 8.29547221902205e-06, "loss": 0.9058, "step": 10225 }, { "epoch": 0.29, "grad_norm": 5.464118849453139, "learning_rate": 8.295123430566015e-06, "loss": 0.4555, "step": 10226 }, { "epoch": 0.29, "grad_norm": 7.373480688810523, "learning_rate": 8.29477461376244e-06, "loss": 0.2604, "step": 10227 }, { "epoch": 0.29, "grad_norm": 7.978764602717096, "learning_rate": 8.294425768614323e-06, "loss": 0.7341, "step": 10228 }, { "epoch": 0.29, "grad_norm": 6.158780618649172, "learning_rate": 8.294076895124668e-06, "loss": 0.637, "step": 10229 }, { "epoch": 0.29, "grad_norm": 4.862562256637596, "learning_rate": 8.293727993296474e-06, "loss": 0.4415, "step": 10230 }, { "epoch": 0.29, "grad_norm": 6.537802389091587, "learning_rate": 8.293379063132745e-06, "loss": 0.4907, "step": 10231 }, { "epoch": 0.29, "grad_norm": 3.758887567416232, "learning_rate": 8.293030104636481e-06, "loss": 0.3087, "step": 10232 }, { "epoch": 0.29, "grad_norm": 5.237736274652094, "learning_rate": 8.292681117810684e-06, "loss": 0.4292, "step": 10233 }, { "epoch": 0.29, "grad_norm": 5.088087292639138, "learning_rate": 8.292332102658356e-06, "loss": 0.4108, "step": 10234 }, { "epoch": 0.29, "grad_norm": 4.554490466297276, "learning_rate": 8.291983059182502e-06, "loss": 0.4182, "step": 10235 }, { "epoch": 0.29, "grad_norm": 10.205372024027527, "learning_rate": 8.291633987386123e-06, "loss": 0.7169, "step": 10236 }, { "epoch": 0.29, "grad_norm": 3.4912273956124307, "learning_rate": 8.291284887272219e-06, "loss": 0.2801, "step": 10237 }, { "epoch": 0.29, "grad_norm": 4.667390054879997, "learning_rate": 8.2909357588438e-06, "loss": 0.2608, "step": 10238 }, { "epoch": 0.29, "grad_norm": 6.676161307135199, "learning_rate": 8.290586602103865e-06, "loss": 0.3184, "step": 10239 }, { "epoch": 0.29, "grad_norm": 4.445867681445093, "learning_rate": 8.290237417055418e-06, "loss": 0.3012, "step": 10240 }, { "epoch": 0.29, "grad_norm": 6.816472776246815, "learning_rate": 8.289888203701463e-06, "loss": 0.6336, "step": 10241 }, { "epoch": 0.29, "grad_norm": 5.395485464196505, "learning_rate": 8.289538962045005e-06, "loss": 0.3554, "step": 10242 }, { "epoch": 0.29, "grad_norm": 8.344441245945186, "learning_rate": 8.28918969208905e-06, "loss": 0.6989, "step": 10243 }, { "epoch": 0.29, "grad_norm": 5.437327108155735, "learning_rate": 8.2888403938366e-06, "loss": 0.4353, "step": 10244 }, { "epoch": 0.29, "grad_norm": 3.615259501216879, "learning_rate": 8.288491067290661e-06, "loss": 0.2204, "step": 10245 }, { "epoch": 0.29, "grad_norm": 4.497192460678256, "learning_rate": 8.288141712454238e-06, "loss": 0.2911, "step": 10246 }, { "epoch": 0.29, "grad_norm": 3.13698465590757, "learning_rate": 8.287792329330337e-06, "loss": 0.4171, "step": 10247 }, { "epoch": 0.29, "grad_norm": 4.427852889848741, "learning_rate": 8.287442917921964e-06, "loss": 0.4175, "step": 10248 }, { "epoch": 0.29, "grad_norm": 4.216317754314745, "learning_rate": 8.287093478232124e-06, "loss": 0.9257, "step": 10249 }, { "epoch": 0.29, "grad_norm": 4.135444307915517, "learning_rate": 8.286744010263822e-06, "loss": 0.4275, "step": 10250 }, { "epoch": 0.29, "grad_norm": 6.479653351424478, "learning_rate": 8.286394514020068e-06, "loss": 0.5126, "step": 10251 }, { "epoch": 0.29, "grad_norm": 2.683453557601797, "learning_rate": 8.286044989503866e-06, "loss": 0.234, "step": 10252 }, { "epoch": 0.29, "grad_norm": 9.01984987485, "learning_rate": 8.285695436718223e-06, "loss": 0.6175, "step": 10253 }, { "epoch": 0.29, "grad_norm": 4.254744602118581, "learning_rate": 8.285345855666147e-06, "loss": 0.1184, "step": 10254 }, { "epoch": 0.29, "grad_norm": 1.8068719499285832, "learning_rate": 8.284996246350646e-06, "loss": 0.1286, "step": 10255 }, { "epoch": 0.29, "grad_norm": 3.662260348450498, "learning_rate": 8.284646608774725e-06, "loss": 0.4962, "step": 10256 }, { "epoch": 0.29, "grad_norm": 4.218563047434596, "learning_rate": 8.284296942941394e-06, "loss": 0.3491, "step": 10257 }, { "epoch": 0.29, "grad_norm": 6.1215462970748025, "learning_rate": 8.283947248853661e-06, "loss": 0.3814, "step": 10258 }, { "epoch": 0.29, "grad_norm": 8.79289554004535, "learning_rate": 8.283597526514535e-06, "loss": 0.5905, "step": 10259 }, { "epoch": 0.29, "grad_norm": 6.778773686281268, "learning_rate": 8.283247775927024e-06, "loss": 0.8661, "step": 10260 }, { "epoch": 0.29, "grad_norm": 5.204653220520463, "learning_rate": 8.282897997094134e-06, "loss": 0.2301, "step": 10261 }, { "epoch": 0.29, "grad_norm": 6.687516613520726, "learning_rate": 8.282548190018879e-06, "loss": 0.3441, "step": 10262 }, { "epoch": 0.29, "grad_norm": 8.7696000875987, "learning_rate": 8.282198354704267e-06, "loss": 0.7857, "step": 10263 }, { "epoch": 0.29, "grad_norm": 5.543632139653991, "learning_rate": 8.281848491153305e-06, "loss": 0.7117, "step": 10264 }, { "epoch": 0.29, "grad_norm": 7.2302680005728295, "learning_rate": 8.281498599369004e-06, "loss": 0.7661, "step": 10265 }, { "epoch": 0.29, "grad_norm": 5.2628225005724, "learning_rate": 8.281148679354377e-06, "loss": 0.666, "step": 10266 }, { "epoch": 0.29, "grad_norm": 6.399625861139628, "learning_rate": 8.28079873111243e-06, "loss": 0.5873, "step": 10267 }, { "epoch": 0.29, "grad_norm": 7.883274091319212, "learning_rate": 8.280448754646175e-06, "loss": 0.5023, "step": 10268 }, { "epoch": 0.29, "grad_norm": 5.341562816227302, "learning_rate": 8.280098749958625e-06, "loss": 0.5396, "step": 10269 }, { "epoch": 0.29, "grad_norm": 11.363326569929391, "learning_rate": 8.279748717052788e-06, "loss": 1.123, "step": 10270 }, { "epoch": 0.29, "grad_norm": 5.038909625905995, "learning_rate": 8.279398655931676e-06, "loss": 0.5162, "step": 10271 }, { "epoch": 0.29, "grad_norm": 4.435707858686264, "learning_rate": 8.279048566598302e-06, "loss": 0.4405, "step": 10272 }, { "epoch": 0.29, "grad_norm": 7.240196571632155, "learning_rate": 8.278698449055678e-06, "loss": 0.79, "step": 10273 }, { "epoch": 0.29, "grad_norm": 9.672075950494738, "learning_rate": 8.278348303306813e-06, "loss": 0.5243, "step": 10274 }, { "epoch": 0.29, "grad_norm": 5.972273420760046, "learning_rate": 8.277998129354724e-06, "loss": 0.7109, "step": 10275 }, { "epoch": 0.29, "grad_norm": 5.1955173079197134, "learning_rate": 8.277647927202417e-06, "loss": 0.5858, "step": 10276 }, { "epoch": 0.29, "grad_norm": 6.364726596183456, "learning_rate": 8.277297696852911e-06, "loss": 0.5225, "step": 10277 }, { "epoch": 0.29, "grad_norm": 4.879776497553618, "learning_rate": 8.276947438309215e-06, "loss": 0.6046, "step": 10278 }, { "epoch": 0.29, "grad_norm": 8.500730987980804, "learning_rate": 8.276597151574345e-06, "loss": 1.2504, "step": 10279 }, { "epoch": 0.29, "grad_norm": 6.899475296083543, "learning_rate": 8.276246836651313e-06, "loss": 0.2867, "step": 10280 }, { "epoch": 0.29, "grad_norm": 6.271315314272327, "learning_rate": 8.275896493543132e-06, "loss": 0.7725, "step": 10281 }, { "epoch": 0.29, "grad_norm": 3.549888078846082, "learning_rate": 8.275546122252817e-06, "loss": 0.1627, "step": 10282 }, { "epoch": 0.29, "grad_norm": 11.712230323447157, "learning_rate": 8.275195722783381e-06, "loss": 0.4168, "step": 10283 }, { "epoch": 0.29, "grad_norm": 4.5978058537051645, "learning_rate": 8.274845295137842e-06, "loss": 0.6806, "step": 10284 }, { "epoch": 0.29, "grad_norm": 4.143305965009933, "learning_rate": 8.27449483931921e-06, "loss": 0.2506, "step": 10285 }, { "epoch": 0.29, "grad_norm": 2.768797827107355, "learning_rate": 8.274144355330502e-06, "loss": 0.5994, "step": 10286 }, { "epoch": 0.29, "grad_norm": 4.427940010508704, "learning_rate": 8.273793843174734e-06, "loss": 0.4526, "step": 10287 }, { "epoch": 0.29, "grad_norm": 8.188840101221244, "learning_rate": 8.273443302854921e-06, "loss": 0.6539, "step": 10288 }, { "epoch": 0.29, "grad_norm": 4.843266912181218, "learning_rate": 8.273092734374079e-06, "loss": 0.4148, "step": 10289 }, { "epoch": 0.29, "grad_norm": 3.6362528927694164, "learning_rate": 8.27274213773522e-06, "loss": 0.4001, "step": 10290 }, { "epoch": 0.29, "grad_norm": 10.29656412352873, "learning_rate": 8.272391512941366e-06, "loss": 0.7277, "step": 10291 }, { "epoch": 0.29, "grad_norm": 4.852188831633071, "learning_rate": 8.272040859995532e-06, "loss": 0.5627, "step": 10292 }, { "epoch": 0.29, "grad_norm": 6.851023323305581, "learning_rate": 8.271690178900731e-06, "loss": 0.6717, "step": 10293 }, { "epoch": 0.29, "grad_norm": 5.041908146721003, "learning_rate": 8.271339469659982e-06, "loss": 0.3911, "step": 10294 }, { "epoch": 0.29, "grad_norm": 4.237971984549254, "learning_rate": 8.270988732276303e-06, "loss": 0.3933, "step": 10295 }, { "epoch": 0.29, "grad_norm": 3.2992469968857985, "learning_rate": 8.27063796675271e-06, "loss": 0.4295, "step": 10296 }, { "epoch": 0.29, "grad_norm": 6.824900409125574, "learning_rate": 8.270287173092223e-06, "loss": 0.7778, "step": 10297 }, { "epoch": 0.29, "grad_norm": 3.680775022061387, "learning_rate": 8.269936351297859e-06, "loss": 0.3081, "step": 10298 }, { "epoch": 0.29, "grad_norm": 4.80059401889653, "learning_rate": 8.26958550137263e-06, "loss": 0.3934, "step": 10299 }, { "epoch": 0.29, "grad_norm": 7.1109375965618815, "learning_rate": 8.269234623319564e-06, "loss": 0.6237, "step": 10300 }, { "epoch": 0.29, "grad_norm": 5.377764589689353, "learning_rate": 8.268883717141673e-06, "loss": 0.2662, "step": 10301 }, { "epoch": 0.3, "grad_norm": 4.283110805895977, "learning_rate": 8.26853278284198e-06, "loss": 0.3759, "step": 10302 }, { "epoch": 0.3, "grad_norm": 7.0550555263094195, "learning_rate": 8.268181820423502e-06, "loss": 0.4254, "step": 10303 }, { "epoch": 0.3, "grad_norm": 7.691155223711971, "learning_rate": 8.267830829889256e-06, "loss": 0.525, "step": 10304 }, { "epoch": 0.3, "grad_norm": 6.632802219630026, "learning_rate": 8.267479811242264e-06, "loss": 1.008, "step": 10305 }, { "epoch": 0.3, "grad_norm": 6.042478831281343, "learning_rate": 8.267128764485545e-06, "loss": 0.5993, "step": 10306 }, { "epoch": 0.3, "grad_norm": 8.900321834617309, "learning_rate": 8.26677768962212e-06, "loss": 0.5087, "step": 10307 }, { "epoch": 0.3, "grad_norm": 6.622807553826362, "learning_rate": 8.26642658665501e-06, "loss": 0.6845, "step": 10308 }, { "epoch": 0.3, "grad_norm": 4.866418278682808, "learning_rate": 8.266075455587233e-06, "loss": 0.3957, "step": 10309 }, { "epoch": 0.3, "grad_norm": 2.5216556080207795, "learning_rate": 8.265724296421813e-06, "loss": 0.137, "step": 10310 }, { "epoch": 0.3, "grad_norm": 3.7143508792757074, "learning_rate": 8.265373109161767e-06, "loss": 0.4218, "step": 10311 }, { "epoch": 0.3, "grad_norm": 4.847959301472415, "learning_rate": 8.265021893810119e-06, "loss": 0.7518, "step": 10312 }, { "epoch": 0.3, "grad_norm": 6.618477471552114, "learning_rate": 8.264670650369888e-06, "loss": 0.6629, "step": 10313 }, { "epoch": 0.3, "grad_norm": 8.939402344459221, "learning_rate": 8.264319378844098e-06, "loss": 0.8573, "step": 10314 }, { "epoch": 0.3, "grad_norm": 6.536938685869136, "learning_rate": 8.263968079235773e-06, "loss": 0.5006, "step": 10315 }, { "epoch": 0.3, "grad_norm": 7.1901554675530495, "learning_rate": 8.26361675154793e-06, "loss": 0.9028, "step": 10316 }, { "epoch": 0.3, "grad_norm": 2.7406951669129613, "learning_rate": 8.263265395783594e-06, "loss": 0.2487, "step": 10317 }, { "epoch": 0.3, "grad_norm": 5.423994216326497, "learning_rate": 8.262914011945789e-06, "loss": 0.3435, "step": 10318 }, { "epoch": 0.3, "grad_norm": 7.6012641558070735, "learning_rate": 8.262562600037534e-06, "loss": 0.4918, "step": 10319 }, { "epoch": 0.3, "grad_norm": 5.616750899153973, "learning_rate": 8.262211160061856e-06, "loss": 0.4771, "step": 10320 }, { "epoch": 0.3, "grad_norm": 5.962082259221409, "learning_rate": 8.261859692021777e-06, "loss": 0.7853, "step": 10321 }, { "epoch": 0.3, "grad_norm": 5.125714973261933, "learning_rate": 8.26150819592032e-06, "loss": 0.3527, "step": 10322 }, { "epoch": 0.3, "grad_norm": 7.853130643260429, "learning_rate": 8.261156671760512e-06, "loss": 0.2833, "step": 10323 }, { "epoch": 0.3, "grad_norm": 7.539336615604422, "learning_rate": 8.260805119545373e-06, "loss": 0.6606, "step": 10324 }, { "epoch": 0.3, "grad_norm": 6.487694242358869, "learning_rate": 8.260453539277929e-06, "loss": 0.6482, "step": 10325 }, { "epoch": 0.3, "grad_norm": 5.809449451578905, "learning_rate": 8.260101930961204e-06, "loss": 0.3947, "step": 10326 }, { "epoch": 0.3, "grad_norm": 5.636380445197987, "learning_rate": 8.259750294598225e-06, "loss": 0.5653, "step": 10327 }, { "epoch": 0.3, "grad_norm": 5.470710795246319, "learning_rate": 8.259398630192013e-06, "loss": 0.7113, "step": 10328 }, { "epoch": 0.3, "grad_norm": 6.702465033776133, "learning_rate": 8.259046937745598e-06, "loss": 0.2179, "step": 10329 }, { "epoch": 0.3, "grad_norm": 6.37360793229753, "learning_rate": 8.258695217262005e-06, "loss": 0.3388, "step": 10330 }, { "epoch": 0.3, "grad_norm": 2.8940572620835034, "learning_rate": 8.258343468744255e-06, "loss": 0.1156, "step": 10331 }, { "epoch": 0.3, "grad_norm": 4.974279673583564, "learning_rate": 8.257991692195379e-06, "loss": 0.5351, "step": 10332 }, { "epoch": 0.3, "grad_norm": 4.839274892558626, "learning_rate": 8.257639887618401e-06, "loss": 0.5533, "step": 10333 }, { "epoch": 0.3, "grad_norm": 8.189534873563487, "learning_rate": 8.257288055016349e-06, "loss": 0.1601, "step": 10334 }, { "epoch": 0.3, "grad_norm": 8.805096160692845, "learning_rate": 8.256936194392248e-06, "loss": 0.2322, "step": 10335 }, { "epoch": 0.3, "grad_norm": 10.740007502519484, "learning_rate": 8.256584305749127e-06, "loss": 0.5734, "step": 10336 }, { "epoch": 0.3, "grad_norm": 5.499727935997845, "learning_rate": 8.25623238909001e-06, "loss": 0.2765, "step": 10337 }, { "epoch": 0.3, "grad_norm": 6.3271980266037895, "learning_rate": 8.255880444417928e-06, "loss": 0.6609, "step": 10338 }, { "epoch": 0.3, "grad_norm": 4.7256899193748225, "learning_rate": 8.255528471735909e-06, "loss": 0.4931, "step": 10339 }, { "epoch": 0.3, "grad_norm": 5.44162961743737, "learning_rate": 8.255176471046977e-06, "loss": 0.5305, "step": 10340 }, { "epoch": 0.3, "grad_norm": 4.165199224829411, "learning_rate": 8.254824442354163e-06, "loss": 0.6023, "step": 10341 }, { "epoch": 0.3, "grad_norm": 6.044805522532285, "learning_rate": 8.254472385660496e-06, "loss": 0.7619, "step": 10342 }, { "epoch": 0.3, "grad_norm": 4.43217906816352, "learning_rate": 8.254120300969003e-06, "loss": 0.4323, "step": 10343 }, { "epoch": 0.3, "grad_norm": 6.918097608455021, "learning_rate": 8.253768188282713e-06, "loss": 0.6112, "step": 10344 }, { "epoch": 0.3, "grad_norm": 11.042493412567724, "learning_rate": 8.25341604760466e-06, "loss": 0.4974, "step": 10345 }, { "epoch": 0.3, "grad_norm": 6.70492451305979, "learning_rate": 8.253063878937864e-06, "loss": 1.0041, "step": 10346 }, { "epoch": 0.3, "grad_norm": 6.830647367122642, "learning_rate": 8.252711682285364e-06, "loss": 0.7163, "step": 10347 }, { "epoch": 0.3, "grad_norm": 6.442541537108391, "learning_rate": 8.252359457650184e-06, "loss": 0.4762, "step": 10348 }, { "epoch": 0.3, "grad_norm": 4.63646629822469, "learning_rate": 8.252007205035356e-06, "loss": 0.3748, "step": 10349 }, { "epoch": 0.3, "grad_norm": 13.486035506646251, "learning_rate": 8.25165492444391e-06, "loss": 0.6948, "step": 10350 }, { "epoch": 0.3, "grad_norm": 5.724472143167579, "learning_rate": 8.251302615878878e-06, "loss": 0.5252, "step": 10351 }, { "epoch": 0.3, "grad_norm": 4.410643558255041, "learning_rate": 8.250950279343288e-06, "loss": 0.5478, "step": 10352 }, { "epoch": 0.3, "grad_norm": 6.701291062382755, "learning_rate": 8.250597914840175e-06, "loss": 0.5863, "step": 10353 }, { "epoch": 0.3, "grad_norm": 2.6774153571547483, "learning_rate": 8.250245522372568e-06, "loss": 0.2141, "step": 10354 }, { "epoch": 0.3, "grad_norm": 4.001254749193981, "learning_rate": 8.2498931019435e-06, "loss": 0.1733, "step": 10355 }, { "epoch": 0.3, "grad_norm": 4.082249393748801, "learning_rate": 8.249540653556001e-06, "loss": 0.6199, "step": 10356 }, { "epoch": 0.3, "grad_norm": 4.7410066947592036, "learning_rate": 8.2491881772131e-06, "loss": 0.3292, "step": 10357 }, { "epoch": 0.3, "grad_norm": 5.177920953647485, "learning_rate": 8.24883567291784e-06, "loss": 0.3726, "step": 10358 }, { "epoch": 0.3, "grad_norm": 4.062138878837795, "learning_rate": 8.248483140673242e-06, "loss": 0.5648, "step": 10359 }, { "epoch": 0.3, "grad_norm": 8.7132558158203, "learning_rate": 8.248130580482345e-06, "loss": 0.5602, "step": 10360 }, { "epoch": 0.3, "grad_norm": 12.543165900939103, "learning_rate": 8.247777992348181e-06, "loss": 0.5391, "step": 10361 }, { "epoch": 0.3, "grad_norm": 4.137525672487208, "learning_rate": 8.24742537627378e-06, "loss": 0.3637, "step": 10362 }, { "epoch": 0.3, "grad_norm": 5.638179047958406, "learning_rate": 8.247072732262183e-06, "loss": 0.8305, "step": 10363 }, { "epoch": 0.3, "grad_norm": 4.84061229351979, "learning_rate": 8.246720060316415e-06, "loss": 0.3283, "step": 10364 }, { "epoch": 0.3, "grad_norm": 5.761634748460754, "learning_rate": 8.246367360439515e-06, "loss": 0.4611, "step": 10365 }, { "epoch": 0.3, "grad_norm": 7.496828553094447, "learning_rate": 8.246014632634518e-06, "loss": 0.7966, "step": 10366 }, { "epoch": 0.3, "grad_norm": 4.749639648019759, "learning_rate": 8.245661876904457e-06, "loss": 0.6303, "step": 10367 }, { "epoch": 0.3, "grad_norm": 3.599329092505012, "learning_rate": 8.245309093252364e-06, "loss": 0.3143, "step": 10368 }, { "epoch": 0.3, "grad_norm": 4.663978154172168, "learning_rate": 8.244956281681277e-06, "loss": 0.4229, "step": 10369 }, { "epoch": 0.3, "grad_norm": 5.219071763792565, "learning_rate": 8.244603442194232e-06, "loss": 0.2548, "step": 10370 }, { "epoch": 0.3, "grad_norm": 5.871797784659992, "learning_rate": 8.244250574794262e-06, "loss": 0.4929, "step": 10371 }, { "epoch": 0.3, "grad_norm": 5.770116860249246, "learning_rate": 8.243897679484405e-06, "loss": 0.8981, "step": 10372 }, { "epoch": 0.3, "grad_norm": 4.594424762604233, "learning_rate": 8.243544756267694e-06, "loss": 0.6806, "step": 10373 }, { "epoch": 0.3, "grad_norm": 3.4328099594141537, "learning_rate": 8.243191805147168e-06, "loss": 0.2191, "step": 10374 }, { "epoch": 0.3, "grad_norm": 6.565200177501558, "learning_rate": 8.242838826125862e-06, "loss": 0.3541, "step": 10375 }, { "epoch": 0.3, "grad_norm": 3.3707384118923036, "learning_rate": 8.242485819206812e-06, "loss": 0.264, "step": 10376 }, { "epoch": 0.3, "grad_norm": 5.199319129164016, "learning_rate": 8.242132784393058e-06, "loss": 0.5484, "step": 10377 }, { "epoch": 0.3, "grad_norm": 4.34479378433682, "learning_rate": 8.241779721687633e-06, "loss": 0.5229, "step": 10378 }, { "epoch": 0.3, "grad_norm": 9.130725370841546, "learning_rate": 8.241426631093576e-06, "loss": 0.5824, "step": 10379 }, { "epoch": 0.3, "grad_norm": 4.141874045790238, "learning_rate": 8.241073512613924e-06, "loss": 0.3698, "step": 10380 }, { "epoch": 0.3, "grad_norm": 6.540070341006098, "learning_rate": 8.240720366251717e-06, "loss": 0.3285, "step": 10381 }, { "epoch": 0.3, "grad_norm": 3.766553297418518, "learning_rate": 8.240367192009992e-06, "loss": 0.6971, "step": 10382 }, { "epoch": 0.3, "grad_norm": 6.168522950605832, "learning_rate": 8.240013989891786e-06, "loss": 0.3056, "step": 10383 }, { "epoch": 0.3, "grad_norm": 13.295806554952442, "learning_rate": 8.239660759900139e-06, "loss": 0.6859, "step": 10384 }, { "epoch": 0.3, "grad_norm": 6.030761738516871, "learning_rate": 8.23930750203809e-06, "loss": 0.3415, "step": 10385 }, { "epoch": 0.3, "grad_norm": 2.6117151696822183, "learning_rate": 8.238954216308677e-06, "loss": 0.1849, "step": 10386 }, { "epoch": 0.3, "grad_norm": 7.513525528817506, "learning_rate": 8.23860090271494e-06, "loss": 1.1051, "step": 10387 }, { "epoch": 0.3, "grad_norm": 11.741617337392405, "learning_rate": 8.238247561259918e-06, "loss": 0.5828, "step": 10388 }, { "epoch": 0.3, "grad_norm": 5.9815031087768915, "learning_rate": 8.237894191946651e-06, "loss": 0.3996, "step": 10389 }, { "epoch": 0.3, "grad_norm": 8.69940922583833, "learning_rate": 8.237540794778177e-06, "loss": 0.7029, "step": 10390 }, { "epoch": 0.3, "grad_norm": 7.74518035412711, "learning_rate": 8.23718736975754e-06, "loss": 0.3242, "step": 10391 }, { "epoch": 0.3, "grad_norm": 11.32106461010845, "learning_rate": 8.236833916887779e-06, "loss": 0.7602, "step": 10392 }, { "epoch": 0.3, "grad_norm": 6.368402395598571, "learning_rate": 8.236480436171934e-06, "loss": 0.6221, "step": 10393 }, { "epoch": 0.3, "grad_norm": 7.789126595833164, "learning_rate": 8.236126927613045e-06, "loss": 0.4672, "step": 10394 }, { "epoch": 0.3, "grad_norm": 5.84986606346763, "learning_rate": 8.235773391214155e-06, "loss": 0.6326, "step": 10395 }, { "epoch": 0.3, "grad_norm": 3.8803272852063357, "learning_rate": 8.235419826978307e-06, "loss": 0.2473, "step": 10396 }, { "epoch": 0.3, "grad_norm": 6.238459116980398, "learning_rate": 8.235066234908537e-06, "loss": 0.4989, "step": 10397 }, { "epoch": 0.3, "grad_norm": 4.709407627572359, "learning_rate": 8.234712615007892e-06, "loss": 0.2575, "step": 10398 }, { "epoch": 0.3, "grad_norm": 4.715556396632509, "learning_rate": 8.234358967279415e-06, "loss": 0.2259, "step": 10399 }, { "epoch": 0.3, "grad_norm": 8.003628384787573, "learning_rate": 8.234005291726144e-06, "loss": 1.2329, "step": 10400 }, { "epoch": 0.3, "grad_norm": 10.109118875504773, "learning_rate": 8.233651588351123e-06, "loss": 1.0192, "step": 10401 }, { "epoch": 0.3, "grad_norm": 5.853234738422522, "learning_rate": 8.233297857157396e-06, "loss": 0.6413, "step": 10402 }, { "epoch": 0.3, "grad_norm": 8.988721986763316, "learning_rate": 8.232944098148007e-06, "loss": 0.5362, "step": 10403 }, { "epoch": 0.3, "grad_norm": 3.9512839270791207, "learning_rate": 8.232590311325997e-06, "loss": 0.4427, "step": 10404 }, { "epoch": 0.3, "grad_norm": 6.890674167033789, "learning_rate": 8.23223649669441e-06, "loss": 0.8259, "step": 10405 }, { "epoch": 0.3, "grad_norm": 10.601481719190671, "learning_rate": 8.231882654256291e-06, "loss": 0.7848, "step": 10406 }, { "epoch": 0.3, "grad_norm": 9.462118010577429, "learning_rate": 8.231528784014683e-06, "loss": 0.4616, "step": 10407 }, { "epoch": 0.3, "grad_norm": 2.066110932388986, "learning_rate": 8.23117488597263e-06, "loss": 0.1389, "step": 10408 }, { "epoch": 0.3, "grad_norm": 5.573959761593067, "learning_rate": 8.23082096013318e-06, "loss": 0.5202, "step": 10409 }, { "epoch": 0.3, "grad_norm": 8.356082331958952, "learning_rate": 8.230467006499374e-06, "loss": 1.0879, "step": 10410 }, { "epoch": 0.3, "grad_norm": 7.38652967092997, "learning_rate": 8.230113025074257e-06, "loss": 0.7484, "step": 10411 }, { "epoch": 0.3, "grad_norm": 9.324789859970958, "learning_rate": 8.229759015860875e-06, "loss": 0.8473, "step": 10412 }, { "epoch": 0.3, "grad_norm": 3.619821466922447, "learning_rate": 8.229404978862276e-06, "loss": 0.5056, "step": 10413 }, { "epoch": 0.3, "grad_norm": 6.148119635890348, "learning_rate": 8.2290509140815e-06, "loss": 0.5303, "step": 10414 }, { "epoch": 0.3, "grad_norm": 10.889731897323886, "learning_rate": 8.2286968215216e-06, "loss": 0.5466, "step": 10415 }, { "epoch": 0.3, "grad_norm": 5.511559822716462, "learning_rate": 8.228342701185617e-06, "loss": 0.5022, "step": 10416 }, { "epoch": 0.3, "grad_norm": 3.7631878385696833, "learning_rate": 8.2279885530766e-06, "loss": 0.4021, "step": 10417 }, { "epoch": 0.3, "grad_norm": 4.795100178870388, "learning_rate": 8.227634377197595e-06, "loss": 0.5953, "step": 10418 }, { "epoch": 0.3, "grad_norm": 4.918986220546592, "learning_rate": 8.227280173551649e-06, "loss": 0.5206, "step": 10419 }, { "epoch": 0.3, "grad_norm": 3.6543149147424345, "learning_rate": 8.226925942141807e-06, "loss": 0.5552, "step": 10420 }, { "epoch": 0.3, "grad_norm": 7.868631149012527, "learning_rate": 8.226571682971121e-06, "loss": 0.2422, "step": 10421 }, { "epoch": 0.3, "grad_norm": 9.08375802636727, "learning_rate": 8.226217396042636e-06, "loss": 0.7549, "step": 10422 }, { "epoch": 0.3, "grad_norm": 6.233544908679553, "learning_rate": 8.225863081359397e-06, "loss": 0.1934, "step": 10423 }, { "epoch": 0.3, "grad_norm": 7.34999098485277, "learning_rate": 8.22550873892446e-06, "loss": 0.8088, "step": 10424 }, { "epoch": 0.3, "grad_norm": 6.8804732990477255, "learning_rate": 8.225154368740863e-06, "loss": 0.5768, "step": 10425 }, { "epoch": 0.3, "grad_norm": 4.152388535668051, "learning_rate": 8.224799970811664e-06, "loss": 0.304, "step": 10426 }, { "epoch": 0.3, "grad_norm": 4.972334498494037, "learning_rate": 8.224445545139907e-06, "loss": 0.6704, "step": 10427 }, { "epoch": 0.3, "grad_norm": 3.9604356286031237, "learning_rate": 8.22409109172864e-06, "loss": 0.4587, "step": 10428 }, { "epoch": 0.3, "grad_norm": 7.34262280843015, "learning_rate": 8.223736610580917e-06, "loss": 0.6243, "step": 10429 }, { "epoch": 0.3, "grad_norm": 5.932736865433823, "learning_rate": 8.223382101699784e-06, "loss": 0.7154, "step": 10430 }, { "epoch": 0.3, "grad_norm": 3.2680334491056806, "learning_rate": 8.223027565088291e-06, "loss": 0.5832, "step": 10431 }, { "epoch": 0.3, "grad_norm": 14.932630184058908, "learning_rate": 8.222673000749488e-06, "loss": 0.6383, "step": 10432 }, { "epoch": 0.3, "grad_norm": 5.107409714635081, "learning_rate": 8.222318408686428e-06, "loss": 0.3823, "step": 10433 }, { "epoch": 0.3, "grad_norm": 5.833429862540703, "learning_rate": 8.22196378890216e-06, "loss": 0.5994, "step": 10434 }, { "epoch": 0.3, "grad_norm": 1.8458796502524466, "learning_rate": 8.221609141399732e-06, "loss": 0.1181, "step": 10435 }, { "epoch": 0.3, "grad_norm": 4.359481126594138, "learning_rate": 8.221254466182199e-06, "loss": 0.3735, "step": 10436 }, { "epoch": 0.3, "grad_norm": 5.360723564929216, "learning_rate": 8.220899763252609e-06, "loss": 0.3971, "step": 10437 }, { "epoch": 0.3, "grad_norm": 4.798871932491559, "learning_rate": 8.220545032614014e-06, "loss": 0.2809, "step": 10438 }, { "epoch": 0.3, "grad_norm": 8.856818070697448, "learning_rate": 8.220190274269469e-06, "loss": 0.7447, "step": 10439 }, { "epoch": 0.3, "grad_norm": 3.5259884483269346, "learning_rate": 8.219835488222021e-06, "loss": 0.4246, "step": 10440 }, { "epoch": 0.3, "grad_norm": 6.821481226601589, "learning_rate": 8.219480674474726e-06, "loss": 0.3533, "step": 10441 }, { "epoch": 0.3, "grad_norm": 3.7011083592422724, "learning_rate": 8.219125833030636e-06, "loss": 0.3059, "step": 10442 }, { "epoch": 0.3, "grad_norm": 5.743034497547508, "learning_rate": 8.218770963892802e-06, "loss": 0.6017, "step": 10443 }, { "epoch": 0.3, "grad_norm": 4.471743934571358, "learning_rate": 8.218416067064277e-06, "loss": 0.6661, "step": 10444 }, { "epoch": 0.3, "grad_norm": 3.497836772800741, "learning_rate": 8.218061142548117e-06, "loss": 0.1317, "step": 10445 }, { "epoch": 0.3, "grad_norm": 4.555852201283283, "learning_rate": 8.217706190347372e-06, "loss": 0.4304, "step": 10446 }, { "epoch": 0.3, "grad_norm": 7.450493625954305, "learning_rate": 8.217351210465095e-06, "loss": 0.8349, "step": 10447 }, { "epoch": 0.3, "grad_norm": 6.17076011921656, "learning_rate": 8.216996202904344e-06, "loss": 0.4405, "step": 10448 }, { "epoch": 0.3, "grad_norm": 10.7293819270098, "learning_rate": 8.216641167668172e-06, "loss": 0.2105, "step": 10449 }, { "epoch": 0.3, "grad_norm": 4.680280950296911, "learning_rate": 8.216286104759628e-06, "loss": 0.5953, "step": 10450 }, { "epoch": 0.3, "grad_norm": 2.649719535633076, "learning_rate": 8.215931014181775e-06, "loss": 0.3497, "step": 10451 }, { "epoch": 0.3, "grad_norm": 3.8477751582792172, "learning_rate": 8.21557589593766e-06, "loss": 0.3443, "step": 10452 }, { "epoch": 0.3, "grad_norm": 4.773810688920577, "learning_rate": 8.215220750030343e-06, "loss": 0.4359, "step": 10453 }, { "epoch": 0.3, "grad_norm": 9.717227926996724, "learning_rate": 8.214865576462878e-06, "loss": 1.1382, "step": 10454 }, { "epoch": 0.3, "grad_norm": 6.390590527728312, "learning_rate": 8.214510375238318e-06, "loss": 0.7168, "step": 10455 }, { "epoch": 0.3, "grad_norm": 8.125128994431256, "learning_rate": 8.214155146359723e-06, "loss": 0.5531, "step": 10456 }, { "epoch": 0.3, "grad_norm": 4.423060147148393, "learning_rate": 8.213799889830147e-06, "loss": 0.3821, "step": 10457 }, { "epoch": 0.3, "grad_norm": 10.9963064928632, "learning_rate": 8.213444605652647e-06, "loss": 1.4894, "step": 10458 }, { "epoch": 0.3, "grad_norm": 7.130016301097667, "learning_rate": 8.213089293830277e-06, "loss": 0.5788, "step": 10459 }, { "epoch": 0.3, "grad_norm": 3.592376247390259, "learning_rate": 8.212733954366096e-06, "loss": 0.2748, "step": 10460 }, { "epoch": 0.3, "grad_norm": 4.029660436414382, "learning_rate": 8.212378587263161e-06, "loss": 0.4173, "step": 10461 }, { "epoch": 0.3, "grad_norm": 4.363682413400073, "learning_rate": 8.212023192524527e-06, "loss": 0.5153, "step": 10462 }, { "epoch": 0.3, "grad_norm": 6.480866956289641, "learning_rate": 8.211667770153255e-06, "loss": 0.7958, "step": 10463 }, { "epoch": 0.3, "grad_norm": 7.5210841410936, "learning_rate": 8.211312320152401e-06, "loss": 0.9072, "step": 10464 }, { "epoch": 0.3, "grad_norm": 5.429238455886467, "learning_rate": 8.21095684252502e-06, "loss": 0.6799, "step": 10465 }, { "epoch": 0.3, "grad_norm": 5.020055032557634, "learning_rate": 8.210601337274173e-06, "loss": 0.2395, "step": 10466 }, { "epoch": 0.3, "grad_norm": 4.406717235060546, "learning_rate": 8.210245804402919e-06, "loss": 0.2777, "step": 10467 }, { "epoch": 0.3, "grad_norm": 5.268704156461223, "learning_rate": 8.209890243914316e-06, "loss": 0.5509, "step": 10468 }, { "epoch": 0.3, "grad_norm": 7.351716934835944, "learning_rate": 8.209534655811422e-06, "loss": 0.8088, "step": 10469 }, { "epoch": 0.3, "grad_norm": 3.766696413451582, "learning_rate": 8.209179040097296e-06, "loss": 0.3, "step": 10470 }, { "epoch": 0.3, "grad_norm": 5.93493295691143, "learning_rate": 8.208823396775e-06, "loss": 0.5011, "step": 10471 }, { "epoch": 0.3, "grad_norm": 5.075009608142195, "learning_rate": 8.208467725847587e-06, "loss": 0.2302, "step": 10472 }, { "epoch": 0.3, "grad_norm": 5.015787947910409, "learning_rate": 8.208112027318125e-06, "loss": 0.5335, "step": 10473 }, { "epoch": 0.3, "grad_norm": 5.537097512422533, "learning_rate": 8.207756301189667e-06, "loss": 0.3441, "step": 10474 }, { "epoch": 0.3, "grad_norm": 4.463155111743614, "learning_rate": 8.20740054746528e-06, "loss": 0.5657, "step": 10475 }, { "epoch": 0.3, "grad_norm": 5.442725904031771, "learning_rate": 8.20704476614802e-06, "loss": 0.1228, "step": 10476 }, { "epoch": 0.3, "grad_norm": 7.418424008798803, "learning_rate": 8.206688957240947e-06, "loss": 0.8764, "step": 10477 }, { "epoch": 0.3, "grad_norm": 6.034204144975419, "learning_rate": 8.206333120747125e-06, "loss": 0.7104, "step": 10478 }, { "epoch": 0.3, "grad_norm": 4.754366072747515, "learning_rate": 8.205977256669614e-06, "loss": 0.4017, "step": 10479 }, { "epoch": 0.3, "grad_norm": 7.424839881172345, "learning_rate": 8.205621365011475e-06, "loss": 0.803, "step": 10480 }, { "epoch": 0.3, "grad_norm": 6.660307171585485, "learning_rate": 8.20526544577577e-06, "loss": 0.6452, "step": 10481 }, { "epoch": 0.3, "grad_norm": 8.425575589856502, "learning_rate": 8.20490949896556e-06, "loss": 0.5997, "step": 10482 }, { "epoch": 0.3, "grad_norm": 4.255736014410789, "learning_rate": 8.204553524583912e-06, "loss": 0.4201, "step": 10483 }, { "epoch": 0.3, "grad_norm": 3.9433354929200353, "learning_rate": 8.20419752263388e-06, "loss": 0.5257, "step": 10484 }, { "epoch": 0.3, "grad_norm": 5.3584415523740425, "learning_rate": 8.203841493118534e-06, "loss": 0.5447, "step": 10485 }, { "epoch": 0.3, "grad_norm": 4.573346817999516, "learning_rate": 8.203485436040934e-06, "loss": 0.2771, "step": 10486 }, { "epoch": 0.3, "grad_norm": 4.286876364525727, "learning_rate": 8.203129351404143e-06, "loss": 0.5015, "step": 10487 }, { "epoch": 0.3, "grad_norm": 4.532058591058198, "learning_rate": 8.202773239211223e-06, "loss": 0.7398, "step": 10488 }, { "epoch": 0.3, "grad_norm": 5.537803689796712, "learning_rate": 8.20241709946524e-06, "loss": 0.478, "step": 10489 }, { "epoch": 0.3, "grad_norm": 5.085629220927722, "learning_rate": 8.202060932169258e-06, "loss": 0.222, "step": 10490 }, { "epoch": 0.3, "grad_norm": 4.537808453503101, "learning_rate": 8.201704737326338e-06, "loss": 0.9407, "step": 10491 }, { "epoch": 0.3, "grad_norm": 3.0428170985382708, "learning_rate": 8.201348514939549e-06, "loss": 0.6645, "step": 10492 }, { "epoch": 0.3, "grad_norm": 3.4322642593948935, "learning_rate": 8.200992265011952e-06, "loss": 0.4361, "step": 10493 }, { "epoch": 0.3, "grad_norm": 7.664936447477301, "learning_rate": 8.200635987546612e-06, "loss": 0.8541, "step": 10494 }, { "epoch": 0.3, "grad_norm": 3.607430599018282, "learning_rate": 8.200279682546595e-06, "loss": 0.3052, "step": 10495 }, { "epoch": 0.3, "grad_norm": 6.396402149399938, "learning_rate": 8.199923350014966e-06, "loss": 0.6254, "step": 10496 }, { "epoch": 0.3, "grad_norm": 6.0836351615894415, "learning_rate": 8.19956698995479e-06, "loss": 0.8006, "step": 10497 }, { "epoch": 0.3, "grad_norm": 6.008617967202896, "learning_rate": 8.199210602369134e-06, "loss": 0.6961, "step": 10498 }, { "epoch": 0.3, "grad_norm": 6.523069962980879, "learning_rate": 8.198854187261063e-06, "loss": 0.6833, "step": 10499 }, { "epoch": 0.3, "grad_norm": 5.269369723797313, "learning_rate": 8.198497744633641e-06, "loss": 0.4903, "step": 10500 }, { "epoch": 0.3, "grad_norm": 5.2115878910316065, "learning_rate": 8.19814127448994e-06, "loss": 0.6221, "step": 10501 }, { "epoch": 0.3, "grad_norm": 4.38660768417564, "learning_rate": 8.197784776833023e-06, "loss": 0.4813, "step": 10502 }, { "epoch": 0.3, "grad_norm": 7.625993882739807, "learning_rate": 8.197428251665956e-06, "loss": 0.4029, "step": 10503 }, { "epoch": 0.3, "grad_norm": 10.347012930054115, "learning_rate": 8.197071698991807e-06, "loss": 0.7362, "step": 10504 }, { "epoch": 0.3, "grad_norm": 9.981327266310446, "learning_rate": 8.196715118813644e-06, "loss": 0.7345, "step": 10505 }, { "epoch": 0.3, "grad_norm": 5.89155170628872, "learning_rate": 8.196358511134534e-06, "loss": 0.9286, "step": 10506 }, { "epoch": 0.3, "grad_norm": 3.5598714400234495, "learning_rate": 8.196001875957547e-06, "loss": 0.4608, "step": 10507 }, { "epoch": 0.3, "grad_norm": 6.751421460843884, "learning_rate": 8.195645213285749e-06, "loss": 0.603, "step": 10508 }, { "epoch": 0.3, "grad_norm": 7.724652153504335, "learning_rate": 8.195288523122207e-06, "loss": 0.4237, "step": 10509 }, { "epoch": 0.3, "grad_norm": 8.793248406415358, "learning_rate": 8.194931805469992e-06, "loss": 0.4296, "step": 10510 }, { "epoch": 0.3, "grad_norm": 12.300075361749785, "learning_rate": 8.194575060332172e-06, "loss": 0.5967, "step": 10511 }, { "epoch": 0.3, "grad_norm": 6.838845712294927, "learning_rate": 8.194218287711818e-06, "loss": 0.4286, "step": 10512 }, { "epoch": 0.3, "grad_norm": 7.141676009478814, "learning_rate": 8.193861487611997e-06, "loss": 0.5974, "step": 10513 }, { "epoch": 0.3, "grad_norm": 3.900682494771692, "learning_rate": 8.193504660035775e-06, "loss": 0.5382, "step": 10514 }, { "epoch": 0.3, "grad_norm": 7.427319780164396, "learning_rate": 8.19314780498623e-06, "loss": 0.5203, "step": 10515 }, { "epoch": 0.3, "grad_norm": 10.452577402685467, "learning_rate": 8.192790922466425e-06, "loss": 0.8506, "step": 10516 }, { "epoch": 0.3, "grad_norm": 6.005772515886132, "learning_rate": 8.192434012479434e-06, "loss": 0.5239, "step": 10517 }, { "epoch": 0.3, "grad_norm": 4.8150434832327695, "learning_rate": 8.192077075028326e-06, "loss": 0.5762, "step": 10518 }, { "epoch": 0.3, "grad_norm": 8.040228667352405, "learning_rate": 8.191720110116173e-06, "loss": 0.4521, "step": 10519 }, { "epoch": 0.3, "grad_norm": 8.116636167569467, "learning_rate": 8.191363117746043e-06, "loss": 0.8247, "step": 10520 }, { "epoch": 0.3, "grad_norm": 4.461914770502413, "learning_rate": 8.19100609792101e-06, "loss": 0.357, "step": 10521 }, { "epoch": 0.3, "grad_norm": 4.28399725987109, "learning_rate": 8.190649050644144e-06, "loss": 0.6095, "step": 10522 }, { "epoch": 0.3, "grad_norm": 11.309352753093728, "learning_rate": 8.190291975918518e-06, "loss": 0.5812, "step": 10523 }, { "epoch": 0.3, "grad_norm": 7.825661825643783, "learning_rate": 8.1899348737472e-06, "loss": 0.6501, "step": 10524 }, { "epoch": 0.3, "grad_norm": 5.279740247550442, "learning_rate": 8.18957774413327e-06, "loss": 0.8241, "step": 10525 }, { "epoch": 0.3, "grad_norm": 5.416377751886042, "learning_rate": 8.189220587079792e-06, "loss": 0.4083, "step": 10526 }, { "epoch": 0.3, "grad_norm": 4.243695505660327, "learning_rate": 8.188863402589841e-06, "loss": 0.1564, "step": 10527 }, { "epoch": 0.3, "grad_norm": 7.569628043869407, "learning_rate": 8.188506190666494e-06, "loss": 0.8034, "step": 10528 }, { "epoch": 0.3, "grad_norm": 2.65235784018458, "learning_rate": 8.188148951312818e-06, "loss": 0.2947, "step": 10529 }, { "epoch": 0.3, "grad_norm": 4.678181044370892, "learning_rate": 8.18779168453189e-06, "loss": 0.5489, "step": 10530 }, { "epoch": 0.3, "grad_norm": 3.887200115817277, "learning_rate": 8.187434390326782e-06, "loss": 0.1657, "step": 10531 }, { "epoch": 0.3, "grad_norm": 17.64946725922065, "learning_rate": 8.187077068700571e-06, "loss": 1.0674, "step": 10532 }, { "epoch": 0.3, "grad_norm": 3.966901694208425, "learning_rate": 8.186719719656327e-06, "loss": 0.4977, "step": 10533 }, { "epoch": 0.3, "grad_norm": 3.354686019605746, "learning_rate": 8.186362343197124e-06, "loss": 0.2931, "step": 10534 }, { "epoch": 0.3, "grad_norm": 3.21833763443949, "learning_rate": 8.18600493932604e-06, "loss": 0.2191, "step": 10535 }, { "epoch": 0.3, "grad_norm": 3.471933313218247, "learning_rate": 8.185647508046149e-06, "loss": 0.2168, "step": 10536 }, { "epoch": 0.3, "grad_norm": 3.9126317346351063, "learning_rate": 8.185290049360522e-06, "loss": 0.279, "step": 10537 }, { "epoch": 0.3, "grad_norm": 4.627743551644933, "learning_rate": 8.184932563272237e-06, "loss": 0.6281, "step": 10538 }, { "epoch": 0.3, "grad_norm": 6.624156538334549, "learning_rate": 8.18457504978437e-06, "loss": 0.6947, "step": 10539 }, { "epoch": 0.3, "grad_norm": 5.3162828328804785, "learning_rate": 8.184217508899997e-06, "loss": 0.313, "step": 10540 }, { "epoch": 0.3, "grad_norm": 4.328466904559392, "learning_rate": 8.183859940622191e-06, "loss": 0.5589, "step": 10541 }, { "epoch": 0.3, "grad_norm": 8.648160714645508, "learning_rate": 8.183502344954032e-06, "loss": 0.4619, "step": 10542 }, { "epoch": 0.3, "grad_norm": 7.10810535537875, "learning_rate": 8.183144721898593e-06, "loss": 0.6008, "step": 10543 }, { "epoch": 0.3, "grad_norm": 5.200891765825917, "learning_rate": 8.18278707145895e-06, "loss": 0.4958, "step": 10544 }, { "epoch": 0.3, "grad_norm": 3.4357308083157627, "learning_rate": 8.182429393638185e-06, "loss": 0.3899, "step": 10545 }, { "epoch": 0.3, "grad_norm": 10.287271701509052, "learning_rate": 8.18207168843937e-06, "loss": 0.6689, "step": 10546 }, { "epoch": 0.3, "grad_norm": 9.819616567735906, "learning_rate": 8.181713955865584e-06, "loss": 0.5992, "step": 10547 }, { "epoch": 0.3, "grad_norm": 8.230398995240122, "learning_rate": 8.181356195919905e-06, "loss": 0.6065, "step": 10548 }, { "epoch": 0.3, "grad_norm": 5.3815310521118205, "learning_rate": 8.180998408605412e-06, "loss": 0.283, "step": 10549 }, { "epoch": 0.3, "grad_norm": 2.796952422365089, "learning_rate": 8.180640593925178e-06, "loss": 0.3422, "step": 10550 }, { "epoch": 0.3, "grad_norm": 7.136255375560581, "learning_rate": 8.180282751882287e-06, "loss": 0.4297, "step": 10551 }, { "epoch": 0.3, "grad_norm": 4.703176251002288, "learning_rate": 8.179924882479814e-06, "loss": 0.6681, "step": 10552 }, { "epoch": 0.3, "grad_norm": 8.07792288492318, "learning_rate": 8.179566985720839e-06, "loss": 0.4934, "step": 10553 }, { "epoch": 0.3, "grad_norm": 9.403985039125107, "learning_rate": 8.17920906160844e-06, "loss": 0.7287, "step": 10554 }, { "epoch": 0.3, "grad_norm": 3.245790653331748, "learning_rate": 8.1788511101457e-06, "loss": 0.1833, "step": 10555 }, { "epoch": 0.3, "grad_norm": 5.0964011097732485, "learning_rate": 8.178493131335691e-06, "loss": 0.513, "step": 10556 }, { "epoch": 0.3, "grad_norm": 5.966034755952107, "learning_rate": 8.1781351251815e-06, "loss": 0.5267, "step": 10557 }, { "epoch": 0.3, "grad_norm": 3.4674841143210515, "learning_rate": 8.177777091686202e-06, "loss": 0.104, "step": 10558 }, { "epoch": 0.3, "grad_norm": 4.536085166721988, "learning_rate": 8.17741903085288e-06, "loss": 0.3948, "step": 10559 }, { "epoch": 0.3, "grad_norm": 4.587522892284184, "learning_rate": 8.177060942684614e-06, "loss": 0.6994, "step": 10560 }, { "epoch": 0.3, "grad_norm": 9.805858332420373, "learning_rate": 8.176702827184481e-06, "loss": 0.9123, "step": 10561 }, { "epoch": 0.3, "grad_norm": 4.945789958931848, "learning_rate": 8.176344684355567e-06, "loss": 0.7036, "step": 10562 }, { "epoch": 0.3, "grad_norm": 8.348305933531597, "learning_rate": 8.17598651420095e-06, "loss": 0.6303, "step": 10563 }, { "epoch": 0.3, "grad_norm": 6.4866117733001305, "learning_rate": 8.175628316723713e-06, "loss": 0.2638, "step": 10564 }, { "epoch": 0.3, "grad_norm": 6.973454243278532, "learning_rate": 8.175270091926936e-06, "loss": 0.6939, "step": 10565 }, { "epoch": 0.3, "grad_norm": 3.498926764517388, "learning_rate": 8.174911839813701e-06, "loss": 0.1738, "step": 10566 }, { "epoch": 0.3, "grad_norm": 5.121382855737452, "learning_rate": 8.17455356038709e-06, "loss": 0.7755, "step": 10567 }, { "epoch": 0.3, "grad_norm": 9.201018527912483, "learning_rate": 8.174195253650188e-06, "loss": 0.662, "step": 10568 }, { "epoch": 0.3, "grad_norm": 6.834802256460549, "learning_rate": 8.173836919606073e-06, "loss": 0.4995, "step": 10569 }, { "epoch": 0.3, "grad_norm": 5.860487199131283, "learning_rate": 8.17347855825783e-06, "loss": 0.7126, "step": 10570 }, { "epoch": 0.3, "grad_norm": 5.453800279838876, "learning_rate": 8.173120169608542e-06, "loss": 0.3055, "step": 10571 }, { "epoch": 0.3, "grad_norm": 5.908695622874446, "learning_rate": 8.172761753661291e-06, "loss": 0.4893, "step": 10572 }, { "epoch": 0.3, "grad_norm": 6.920283080688977, "learning_rate": 8.172403310419162e-06, "loss": 0.9568, "step": 10573 }, { "epoch": 0.3, "grad_norm": 4.103364327068605, "learning_rate": 8.172044839885239e-06, "loss": 0.0771, "step": 10574 }, { "epoch": 0.3, "grad_norm": 6.457979569949491, "learning_rate": 8.171686342062602e-06, "loss": 0.4522, "step": 10575 }, { "epoch": 0.3, "grad_norm": 3.5593774730214944, "learning_rate": 8.17132781695434e-06, "loss": 0.5761, "step": 10576 }, { "epoch": 0.3, "grad_norm": 6.6740767622692285, "learning_rate": 8.170969264563535e-06, "loss": 0.3963, "step": 10577 }, { "epoch": 0.3, "grad_norm": 3.062184161813012, "learning_rate": 8.170610684893272e-06, "loss": 0.2423, "step": 10578 }, { "epoch": 0.3, "grad_norm": 4.174260746728365, "learning_rate": 8.170252077946634e-06, "loss": 0.6662, "step": 10579 }, { "epoch": 0.3, "grad_norm": 6.94193085087236, "learning_rate": 8.169893443726709e-06, "loss": 0.3976, "step": 10580 }, { "epoch": 0.3, "grad_norm": 3.7662086509361736, "learning_rate": 8.169534782236582e-06, "loss": 0.6441, "step": 10581 }, { "epoch": 0.3, "grad_norm": 7.6934443495997415, "learning_rate": 8.169176093479336e-06, "loss": 0.8999, "step": 10582 }, { "epoch": 0.3, "grad_norm": 11.335570021180148, "learning_rate": 8.168817377458058e-06, "loss": 0.7365, "step": 10583 }, { "epoch": 0.3, "grad_norm": 2.0426115376520415, "learning_rate": 8.168458634175836e-06, "loss": 0.1994, "step": 10584 }, { "epoch": 0.3, "grad_norm": 6.942757890833966, "learning_rate": 8.168099863635753e-06, "loss": 1.0224, "step": 10585 }, { "epoch": 0.3, "grad_norm": 5.257724075367379, "learning_rate": 8.167741065840898e-06, "loss": 0.5421, "step": 10586 }, { "epoch": 0.3, "grad_norm": 3.2633282008502515, "learning_rate": 8.167382240794355e-06, "loss": 0.4313, "step": 10587 }, { "epoch": 0.3, "grad_norm": 7.980657439911687, "learning_rate": 8.167023388499213e-06, "loss": 1.1905, "step": 10588 }, { "epoch": 0.3, "grad_norm": 4.683156913384872, "learning_rate": 8.16666450895856e-06, "loss": 0.4971, "step": 10589 }, { "epoch": 0.3, "grad_norm": 6.358278822092326, "learning_rate": 8.166305602175482e-06, "loss": 0.5697, "step": 10590 }, { "epoch": 0.3, "grad_norm": 4.504199280681038, "learning_rate": 8.165946668153067e-06, "loss": 0.4151, "step": 10591 }, { "epoch": 0.3, "grad_norm": 4.705403381228013, "learning_rate": 8.1655877068944e-06, "loss": 0.6421, "step": 10592 }, { "epoch": 0.3, "grad_norm": 6.618448616925205, "learning_rate": 8.165228718402574e-06, "loss": 0.3144, "step": 10593 }, { "epoch": 0.3, "grad_norm": 6.702173712392465, "learning_rate": 8.164869702680675e-06, "loss": 0.3405, "step": 10594 }, { "epoch": 0.3, "grad_norm": 5.457006696073742, "learning_rate": 8.164510659731791e-06, "loss": 0.4532, "step": 10595 }, { "epoch": 0.3, "grad_norm": 3.382919371504739, "learning_rate": 8.16415158955901e-06, "loss": 0.0792, "step": 10596 }, { "epoch": 0.3, "grad_norm": 5.557160890489024, "learning_rate": 8.163792492165425e-06, "loss": 0.5341, "step": 10597 }, { "epoch": 0.3, "grad_norm": 4.437331290463136, "learning_rate": 8.163433367554122e-06, "loss": 0.3865, "step": 10598 }, { "epoch": 0.3, "grad_norm": 3.273077678706194, "learning_rate": 8.16307421572819e-06, "loss": 0.5685, "step": 10599 }, { "epoch": 0.3, "grad_norm": 1.7345965819532863, "learning_rate": 8.162715036690723e-06, "loss": 0.1641, "step": 10600 }, { "epoch": 0.3, "grad_norm": 6.579929964974068, "learning_rate": 8.162355830444805e-06, "loss": 0.3945, "step": 10601 }, { "epoch": 0.3, "grad_norm": 5.552888281075013, "learning_rate": 8.16199659699353e-06, "loss": 0.3181, "step": 10602 }, { "epoch": 0.3, "grad_norm": 4.754185488922991, "learning_rate": 8.161637336339988e-06, "loss": 0.3482, "step": 10603 }, { "epoch": 0.3, "grad_norm": 8.039018725395579, "learning_rate": 8.161278048487269e-06, "loss": 0.7147, "step": 10604 }, { "epoch": 0.3, "grad_norm": 3.8084800977133053, "learning_rate": 8.160918733438463e-06, "loss": 0.4341, "step": 10605 }, { "epoch": 0.3, "grad_norm": 6.744273370297533, "learning_rate": 8.160559391196663e-06, "loss": 0.8134, "step": 10606 }, { "epoch": 0.3, "grad_norm": 5.717315452209369, "learning_rate": 8.16020002176496e-06, "loss": 0.606, "step": 10607 }, { "epoch": 0.3, "grad_norm": 6.8981110958931495, "learning_rate": 8.159840625146447e-06, "loss": 1.0856, "step": 10608 }, { "epoch": 0.3, "grad_norm": 4.005834347829471, "learning_rate": 8.159481201344212e-06, "loss": 0.2447, "step": 10609 }, { "epoch": 0.3, "grad_norm": 6.618358017622367, "learning_rate": 8.15912175036135e-06, "loss": 0.5932, "step": 10610 }, { "epoch": 0.3, "grad_norm": 8.978204508932933, "learning_rate": 8.158762272200953e-06, "loss": 0.5135, "step": 10611 }, { "epoch": 0.3, "grad_norm": 3.773529557078592, "learning_rate": 8.158402766866112e-06, "loss": 0.4807, "step": 10612 }, { "epoch": 0.3, "grad_norm": 5.241480226743796, "learning_rate": 8.15804323435992e-06, "loss": 0.555, "step": 10613 }, { "epoch": 0.3, "grad_norm": 4.508753501482503, "learning_rate": 8.157683674685473e-06, "loss": 0.3995, "step": 10614 }, { "epoch": 0.3, "grad_norm": 5.783076683344737, "learning_rate": 8.157324087845861e-06, "loss": 0.932, "step": 10615 }, { "epoch": 0.3, "grad_norm": 7.000713720720717, "learning_rate": 8.15696447384418e-06, "loss": 0.4222, "step": 10616 }, { "epoch": 0.3, "grad_norm": 9.775590032931177, "learning_rate": 8.156604832683521e-06, "loss": 0.6594, "step": 10617 }, { "epoch": 0.3, "grad_norm": 4.761474903550486, "learning_rate": 8.15624516436698e-06, "loss": 0.4559, "step": 10618 }, { "epoch": 0.3, "grad_norm": 10.452056009004576, "learning_rate": 8.15588546889765e-06, "loss": 0.5804, "step": 10619 }, { "epoch": 0.3, "grad_norm": 4.134290002019788, "learning_rate": 8.155525746278626e-06, "loss": 0.253, "step": 10620 }, { "epoch": 0.3, "grad_norm": 4.8909378682933955, "learning_rate": 8.155165996513e-06, "loss": 0.3318, "step": 10621 }, { "epoch": 0.3, "grad_norm": 4.034540001932211, "learning_rate": 8.154806219603873e-06, "loss": 0.3027, "step": 10622 }, { "epoch": 0.3, "grad_norm": 6.577154382421218, "learning_rate": 8.154446415554335e-06, "loss": 0.5322, "step": 10623 }, { "epoch": 0.3, "grad_norm": 5.159275716176384, "learning_rate": 8.154086584367484e-06, "loss": 0.3399, "step": 10624 }, { "epoch": 0.3, "grad_norm": 3.2301890979839136, "learning_rate": 8.153726726046414e-06, "loss": 0.2587, "step": 10625 }, { "epoch": 0.3, "grad_norm": 4.575697010913327, "learning_rate": 8.15336684059422e-06, "loss": 0.2697, "step": 10626 }, { "epoch": 0.3, "grad_norm": 6.081875896491033, "learning_rate": 8.153006928014001e-06, "loss": 0.5556, "step": 10627 }, { "epoch": 0.3, "grad_norm": 4.873862451712365, "learning_rate": 8.15264698830885e-06, "loss": 0.2931, "step": 10628 }, { "epoch": 0.3, "grad_norm": 6.525863817556443, "learning_rate": 8.152287021481866e-06, "loss": 0.2388, "step": 10629 }, { "epoch": 0.3, "grad_norm": 6.307996068460985, "learning_rate": 8.151927027536146e-06, "loss": 0.6559, "step": 10630 }, { "epoch": 0.3, "grad_norm": 5.527209600431492, "learning_rate": 8.151567006474784e-06, "loss": 0.8406, "step": 10631 }, { "epoch": 0.3, "grad_norm": 5.966695741586155, "learning_rate": 8.15120695830088e-06, "loss": 0.3926, "step": 10632 }, { "epoch": 0.3, "grad_norm": 6.468804916664725, "learning_rate": 8.150846883017532e-06, "loss": 0.2659, "step": 10633 }, { "epoch": 0.3, "grad_norm": 5.075997244284793, "learning_rate": 8.150486780627834e-06, "loss": 0.7296, "step": 10634 }, { "epoch": 0.3, "grad_norm": 7.61097684758822, "learning_rate": 8.150126651134887e-06, "loss": 1.0128, "step": 10635 }, { "epoch": 0.3, "grad_norm": 7.036951932011381, "learning_rate": 8.149766494541787e-06, "loss": 0.3037, "step": 10636 }, { "epoch": 0.3, "grad_norm": 6.538906446483933, "learning_rate": 8.149406310851634e-06, "loss": 0.4885, "step": 10637 }, { "epoch": 0.3, "grad_norm": 7.681569176445149, "learning_rate": 8.149046100067527e-06, "loss": 0.9557, "step": 10638 }, { "epoch": 0.3, "grad_norm": 7.153548751151936, "learning_rate": 8.148685862192565e-06, "loss": 0.8751, "step": 10639 }, { "epoch": 0.3, "grad_norm": 5.03832922043266, "learning_rate": 8.148325597229844e-06, "loss": 0.7495, "step": 10640 }, { "epoch": 0.3, "grad_norm": 4.346289461849227, "learning_rate": 8.147965305182467e-06, "loss": 0.3267, "step": 10641 }, { "epoch": 0.3, "grad_norm": 4.898370331664188, "learning_rate": 8.147604986053531e-06, "loss": 0.4416, "step": 10642 }, { "epoch": 0.3, "grad_norm": 4.39554598473257, "learning_rate": 8.14724463984614e-06, "loss": 0.5103, "step": 10643 }, { "epoch": 0.3, "grad_norm": 6.939673487103337, "learning_rate": 8.146884266563386e-06, "loss": 0.4962, "step": 10644 }, { "epoch": 0.3, "grad_norm": 8.371500565206745, "learning_rate": 8.146523866208378e-06, "loss": 0.7386, "step": 10645 }, { "epoch": 0.3, "grad_norm": 9.084862629886391, "learning_rate": 8.14616343878421e-06, "loss": 0.6462, "step": 10646 }, { "epoch": 0.3, "grad_norm": 9.836798644602974, "learning_rate": 8.145802984293987e-06, "loss": 0.4267, "step": 10647 }, { "epoch": 0.3, "grad_norm": 5.646643811524552, "learning_rate": 8.145442502740808e-06, "loss": 0.4307, "step": 10648 }, { "epoch": 0.3, "grad_norm": 8.67552831178871, "learning_rate": 8.145081994127773e-06, "loss": 1.1283, "step": 10649 }, { "epoch": 0.3, "grad_norm": 6.555329756251458, "learning_rate": 8.144721458457986e-06, "loss": 0.524, "step": 10650 }, { "epoch": 0.31, "grad_norm": 4.927964575943639, "learning_rate": 8.144360895734548e-06, "loss": 0.4013, "step": 10651 }, { "epoch": 0.31, "grad_norm": 3.70213246625083, "learning_rate": 8.144000305960559e-06, "loss": 0.5919, "step": 10652 }, { "epoch": 0.31, "grad_norm": 7.435508782038948, "learning_rate": 8.143639689139124e-06, "loss": 0.6862, "step": 10653 }, { "epoch": 0.31, "grad_norm": 7.357974881016544, "learning_rate": 8.143279045273343e-06, "loss": 0.6487, "step": 10654 }, { "epoch": 0.31, "grad_norm": 12.571979798580182, "learning_rate": 8.14291837436632e-06, "loss": 0.7457, "step": 10655 }, { "epoch": 0.31, "grad_norm": 3.592679519441919, "learning_rate": 8.142557676421157e-06, "loss": 0.3107, "step": 10656 }, { "epoch": 0.31, "grad_norm": 8.495048679663627, "learning_rate": 8.142196951440958e-06, "loss": 0.8701, "step": 10657 }, { "epoch": 0.31, "grad_norm": 8.42288586949535, "learning_rate": 8.141836199428823e-06, "loss": 0.7739, "step": 10658 }, { "epoch": 0.31, "grad_norm": 6.448605744533573, "learning_rate": 8.141475420387859e-06, "loss": 0.4303, "step": 10659 }, { "epoch": 0.31, "grad_norm": 5.3689526760582735, "learning_rate": 8.141114614321171e-06, "loss": 0.4981, "step": 10660 }, { "epoch": 0.31, "grad_norm": 7.000682082323645, "learning_rate": 8.140753781231857e-06, "loss": 0.2585, "step": 10661 }, { "epoch": 0.31, "grad_norm": 3.959472355176336, "learning_rate": 8.140392921123027e-06, "loss": 0.4756, "step": 10662 }, { "epoch": 0.31, "grad_norm": 6.530555031045928, "learning_rate": 8.140032033997784e-06, "loss": 0.2524, "step": 10663 }, { "epoch": 0.31, "grad_norm": 4.6398686778459854, "learning_rate": 8.139671119859231e-06, "loss": 0.6453, "step": 10664 }, { "epoch": 0.31, "grad_norm": 6.968555943819256, "learning_rate": 8.139310178710473e-06, "loss": 0.2409, "step": 10665 }, { "epoch": 0.31, "grad_norm": 5.492396170286311, "learning_rate": 8.138949210554618e-06, "loss": 0.8527, "step": 10666 }, { "epoch": 0.31, "grad_norm": 4.599614040105119, "learning_rate": 8.138588215394769e-06, "loss": 0.4179, "step": 10667 }, { "epoch": 0.31, "grad_norm": 11.124874500038262, "learning_rate": 8.138227193234031e-06, "loss": 1.0222, "step": 10668 }, { "epoch": 0.31, "grad_norm": 4.460878602288781, "learning_rate": 8.137866144075511e-06, "loss": 0.7168, "step": 10669 }, { "epoch": 0.31, "grad_norm": 4.9351359635226455, "learning_rate": 8.137505067922315e-06, "loss": 0.4063, "step": 10670 }, { "epoch": 0.31, "grad_norm": 8.67485531293274, "learning_rate": 8.13714396477755e-06, "loss": 0.7941, "step": 10671 }, { "epoch": 0.31, "grad_norm": 7.2560744324307915, "learning_rate": 8.136782834644322e-06, "loss": 0.3087, "step": 10672 }, { "epoch": 0.31, "grad_norm": 6.395384268470992, "learning_rate": 8.136421677525737e-06, "loss": 0.6243, "step": 10673 }, { "epoch": 0.31, "grad_norm": 6.071631211016653, "learning_rate": 8.136060493424902e-06, "loss": 0.7226, "step": 10674 }, { "epoch": 0.31, "grad_norm": 4.548833366315679, "learning_rate": 8.135699282344924e-06, "loss": 0.5187, "step": 10675 }, { "epoch": 0.31, "grad_norm": 3.494715208912337, "learning_rate": 8.135338044288912e-06, "loss": 0.2261, "step": 10676 }, { "epoch": 0.31, "grad_norm": 3.931846950554049, "learning_rate": 8.134976779259973e-06, "loss": 0.3623, "step": 10677 }, { "epoch": 0.31, "grad_norm": 8.349192583712915, "learning_rate": 8.134615487261215e-06, "loss": 0.9892, "step": 10678 }, { "epoch": 0.31, "grad_norm": 4.6203455831241795, "learning_rate": 8.134254168295745e-06, "loss": 0.5344, "step": 10679 }, { "epoch": 0.31, "grad_norm": 7.8829607916532805, "learning_rate": 8.133892822366674e-06, "loss": 0.6902, "step": 10680 }, { "epoch": 0.31, "grad_norm": 5.804612827975922, "learning_rate": 8.133531449477108e-06, "loss": 0.5411, "step": 10681 }, { "epoch": 0.31, "grad_norm": 4.558143469269488, "learning_rate": 8.133170049630156e-06, "loss": 0.2404, "step": 10682 }, { "epoch": 0.31, "grad_norm": 6.810884406628236, "learning_rate": 8.132808622828928e-06, "loss": 0.5722, "step": 10683 }, { "epoch": 0.31, "grad_norm": 5.3003532669805375, "learning_rate": 8.132447169076533e-06, "loss": 0.4709, "step": 10684 }, { "epoch": 0.31, "grad_norm": 9.594217394648272, "learning_rate": 8.132085688376082e-06, "loss": 0.2807, "step": 10685 }, { "epoch": 0.31, "grad_norm": 7.383127809280356, "learning_rate": 8.131724180730682e-06, "loss": 0.6152, "step": 10686 }, { "epoch": 0.31, "grad_norm": 5.314933062622642, "learning_rate": 8.131362646143445e-06, "loss": 0.7809, "step": 10687 }, { "epoch": 0.31, "grad_norm": 7.88100888324339, "learning_rate": 8.131001084617482e-06, "loss": 0.7042, "step": 10688 }, { "epoch": 0.31, "grad_norm": 6.4029195176344516, "learning_rate": 8.130639496155901e-06, "loss": 0.3681, "step": 10689 }, { "epoch": 0.31, "grad_norm": 6.10624540518175, "learning_rate": 8.130277880761814e-06, "loss": 0.4617, "step": 10690 }, { "epoch": 0.31, "grad_norm": 7.13593276633249, "learning_rate": 8.129916238438332e-06, "loss": 0.6, "step": 10691 }, { "epoch": 0.31, "grad_norm": 5.579154210816264, "learning_rate": 8.129554569188565e-06, "loss": 0.1561, "step": 10692 }, { "epoch": 0.31, "grad_norm": 7.327916221654067, "learning_rate": 8.129192873015625e-06, "loss": 0.97, "step": 10693 }, { "epoch": 0.31, "grad_norm": 4.589497992296005, "learning_rate": 8.128831149922625e-06, "loss": 0.7229, "step": 10694 }, { "epoch": 0.31, "grad_norm": 4.324542673551193, "learning_rate": 8.128469399912674e-06, "loss": 0.5143, "step": 10695 }, { "epoch": 0.31, "grad_norm": 5.288677252258097, "learning_rate": 8.12810762298889e-06, "loss": 0.3174, "step": 10696 }, { "epoch": 0.31, "grad_norm": 5.419666731044072, "learning_rate": 8.127745819154379e-06, "loss": 0.476, "step": 10697 }, { "epoch": 0.31, "grad_norm": 4.399583115118723, "learning_rate": 8.127383988412256e-06, "loss": 0.4252, "step": 10698 }, { "epoch": 0.31, "grad_norm": 4.002938383399618, "learning_rate": 8.127022130765635e-06, "loss": 0.4961, "step": 10699 }, { "epoch": 0.31, "grad_norm": 4.088232150986698, "learning_rate": 8.126660246217624e-06, "loss": 0.3101, "step": 10700 }, { "epoch": 0.31, "grad_norm": 4.7879082687437515, "learning_rate": 8.126298334771343e-06, "loss": 0.3416, "step": 10701 }, { "epoch": 0.31, "grad_norm": 2.2993371832543374, "learning_rate": 8.1259363964299e-06, "loss": 0.3399, "step": 10702 }, { "epoch": 0.31, "grad_norm": 8.179660567537145, "learning_rate": 8.125574431196412e-06, "loss": 0.73, "step": 10703 }, { "epoch": 0.31, "grad_norm": 10.00490116175416, "learning_rate": 8.125212439073993e-06, "loss": 0.6429, "step": 10704 }, { "epoch": 0.31, "grad_norm": 6.142392823740212, "learning_rate": 8.124850420065755e-06, "loss": 0.6231, "step": 10705 }, { "epoch": 0.31, "grad_norm": 3.7442871447147876, "learning_rate": 8.124488374174814e-06, "loss": 0.458, "step": 10706 }, { "epoch": 0.31, "grad_norm": 10.28429578752361, "learning_rate": 8.124126301404284e-06, "loss": 0.507, "step": 10707 }, { "epoch": 0.31, "grad_norm": 6.308232290583315, "learning_rate": 8.12376420175728e-06, "loss": 0.9169, "step": 10708 }, { "epoch": 0.31, "grad_norm": 3.8945437998363808, "learning_rate": 8.123402075236919e-06, "loss": 0.4779, "step": 10709 }, { "epoch": 0.31, "grad_norm": 10.45487019520783, "learning_rate": 8.123039921846312e-06, "loss": 0.841, "step": 10710 }, { "epoch": 0.31, "grad_norm": 3.6026525440785164, "learning_rate": 8.122677741588578e-06, "loss": 0.3029, "step": 10711 }, { "epoch": 0.31, "grad_norm": 5.118377034779072, "learning_rate": 8.122315534466832e-06, "loss": 0.8062, "step": 10712 }, { "epoch": 0.31, "grad_norm": 8.349870701319857, "learning_rate": 8.121953300484192e-06, "loss": 0.6068, "step": 10713 }, { "epoch": 0.31, "grad_norm": 8.504567545952984, "learning_rate": 8.121591039643768e-06, "loss": 0.5236, "step": 10714 }, { "epoch": 0.31, "grad_norm": 8.566271375993782, "learning_rate": 8.121228751948684e-06, "loss": 0.6907, "step": 10715 }, { "epoch": 0.31, "grad_norm": 6.286632755602047, "learning_rate": 8.120866437402052e-06, "loss": 1.1234, "step": 10716 }, { "epoch": 0.31, "grad_norm": 5.4102410595577926, "learning_rate": 8.120504096006989e-06, "loss": 0.6391, "step": 10717 }, { "epoch": 0.31, "grad_norm": 7.4664508680539425, "learning_rate": 8.120141727766615e-06, "loss": 0.5133, "step": 10718 }, { "epoch": 0.31, "grad_norm": 3.557067677868289, "learning_rate": 8.119779332684046e-06, "loss": 0.4888, "step": 10719 }, { "epoch": 0.31, "grad_norm": 5.696714001178607, "learning_rate": 8.1194169107624e-06, "loss": 0.6211, "step": 10720 }, { "epoch": 0.31, "grad_norm": 5.364350481133958, "learning_rate": 8.119054462004791e-06, "loss": 0.5128, "step": 10721 }, { "epoch": 0.31, "grad_norm": 8.438676540149384, "learning_rate": 8.118691986414344e-06, "loss": 0.4545, "step": 10722 }, { "epoch": 0.31, "grad_norm": 4.645886351050512, "learning_rate": 8.118329483994175e-06, "loss": 0.439, "step": 10723 }, { "epoch": 0.31, "grad_norm": 4.332464143194146, "learning_rate": 8.117966954747398e-06, "loss": 0.3308, "step": 10724 }, { "epoch": 0.31, "grad_norm": 4.5131362075571415, "learning_rate": 8.117604398677137e-06, "loss": 0.6698, "step": 10725 }, { "epoch": 0.31, "grad_norm": 6.370970930065201, "learning_rate": 8.117241815786508e-06, "loss": 0.7383, "step": 10726 }, { "epoch": 0.31, "grad_norm": 6.2538995403227595, "learning_rate": 8.116879206078631e-06, "loss": 0.3491, "step": 10727 }, { "epoch": 0.31, "grad_norm": 2.584751560218459, "learning_rate": 8.116516569556627e-06, "loss": 0.1813, "step": 10728 }, { "epoch": 0.31, "grad_norm": 7.992648770472374, "learning_rate": 8.116153906223615e-06, "loss": 0.9797, "step": 10729 }, { "epoch": 0.31, "grad_norm": 4.182362650470394, "learning_rate": 8.115791216082714e-06, "loss": 0.5671, "step": 10730 }, { "epoch": 0.31, "grad_norm": 4.353042926000924, "learning_rate": 8.115428499137046e-06, "loss": 0.2024, "step": 10731 }, { "epoch": 0.31, "grad_norm": 3.8069104666115416, "learning_rate": 8.11506575538973e-06, "loss": 0.2601, "step": 10732 }, { "epoch": 0.31, "grad_norm": 7.565754844184728, "learning_rate": 8.114702984843886e-06, "loss": 0.5958, "step": 10733 }, { "epoch": 0.31, "grad_norm": 7.707367570192857, "learning_rate": 8.114340187502636e-06, "loss": 0.4793, "step": 10734 }, { "epoch": 0.31, "grad_norm": 8.741420245144475, "learning_rate": 8.113977363369099e-06, "loss": 0.5698, "step": 10735 }, { "epoch": 0.31, "grad_norm": 5.919129049536162, "learning_rate": 8.1136145124464e-06, "loss": 0.4734, "step": 10736 }, { "epoch": 0.31, "grad_norm": 7.8693578277781056, "learning_rate": 8.11325163473766e-06, "loss": 0.3833, "step": 10737 }, { "epoch": 0.31, "grad_norm": 5.008339221885295, "learning_rate": 8.112888730245998e-06, "loss": 0.3519, "step": 10738 }, { "epoch": 0.31, "grad_norm": 4.988886928605164, "learning_rate": 8.112525798974538e-06, "loss": 0.6802, "step": 10739 }, { "epoch": 0.31, "grad_norm": 2.378360303548985, "learning_rate": 8.112162840926402e-06, "loss": 0.3661, "step": 10740 }, { "epoch": 0.31, "grad_norm": 6.171344318994263, "learning_rate": 8.111799856104709e-06, "loss": 0.5768, "step": 10741 }, { "epoch": 0.31, "grad_norm": 6.744043724585774, "learning_rate": 8.111436844512587e-06, "loss": 1.0315, "step": 10742 }, { "epoch": 0.31, "grad_norm": 2.024448804837856, "learning_rate": 8.111073806153157e-06, "loss": 0.3806, "step": 10743 }, { "epoch": 0.31, "grad_norm": 6.4413003166745995, "learning_rate": 8.110710741029545e-06, "loss": 0.6558, "step": 10744 }, { "epoch": 0.31, "grad_norm": 5.12563008412701, "learning_rate": 8.110347649144868e-06, "loss": 0.3666, "step": 10745 }, { "epoch": 0.31, "grad_norm": 6.299981574379755, "learning_rate": 8.109984530502256e-06, "loss": 0.4753, "step": 10746 }, { "epoch": 0.31, "grad_norm": 13.069740478986157, "learning_rate": 8.109621385104827e-06, "loss": 0.8585, "step": 10747 }, { "epoch": 0.31, "grad_norm": 3.8227005654079265, "learning_rate": 8.109258212955708e-06, "loss": 0.2544, "step": 10748 }, { "epoch": 0.31, "grad_norm": 2.622386232804899, "learning_rate": 8.108895014058025e-06, "loss": 0.176, "step": 10749 }, { "epoch": 0.31, "grad_norm": 4.059795095365574, "learning_rate": 8.108531788414901e-06, "loss": 0.5338, "step": 10750 }, { "epoch": 0.31, "grad_norm": 5.668313371525407, "learning_rate": 8.10816853602946e-06, "loss": 0.2338, "step": 10751 }, { "epoch": 0.31, "grad_norm": 4.074914421165853, "learning_rate": 8.107805256904828e-06, "loss": 0.3542, "step": 10752 }, { "epoch": 0.31, "grad_norm": 5.608737792479372, "learning_rate": 8.107441951044131e-06, "loss": 0.575, "step": 10753 }, { "epoch": 0.31, "grad_norm": 2.9619914572062602, "learning_rate": 8.107078618450492e-06, "loss": 0.4899, "step": 10754 }, { "epoch": 0.31, "grad_norm": 4.632319509383806, "learning_rate": 8.10671525912704e-06, "loss": 0.4948, "step": 10755 }, { "epoch": 0.31, "grad_norm": 4.114790680244151, "learning_rate": 8.106351873076898e-06, "loss": 0.3774, "step": 10756 }, { "epoch": 0.31, "grad_norm": 5.372581492285678, "learning_rate": 8.105988460303193e-06, "loss": 0.5895, "step": 10757 }, { "epoch": 0.31, "grad_norm": 4.030393228490667, "learning_rate": 8.105625020809052e-06, "loss": 0.3109, "step": 10758 }, { "epoch": 0.31, "grad_norm": 8.231478795236802, "learning_rate": 8.1052615545976e-06, "loss": 0.5583, "step": 10759 }, { "epoch": 0.31, "grad_norm": 3.1261898445891334, "learning_rate": 8.104898061671968e-06, "loss": 0.5389, "step": 10760 }, { "epoch": 0.31, "grad_norm": 4.915427401985452, "learning_rate": 8.104534542035278e-06, "loss": 0.6431, "step": 10761 }, { "epoch": 0.31, "grad_norm": 5.598094105815744, "learning_rate": 8.10417099569066e-06, "loss": 0.5279, "step": 10762 }, { "epoch": 0.31, "grad_norm": 4.785022145163255, "learning_rate": 8.103807422641244e-06, "loss": 0.3539, "step": 10763 }, { "epoch": 0.31, "grad_norm": 19.221234962649508, "learning_rate": 8.103443822890152e-06, "loss": 0.351, "step": 10764 }, { "epoch": 0.31, "grad_norm": 6.001893380560115, "learning_rate": 8.103080196440514e-06, "loss": 0.5918, "step": 10765 }, { "epoch": 0.31, "grad_norm": 5.307877717559343, "learning_rate": 8.102716543295462e-06, "loss": 0.2308, "step": 10766 }, { "epoch": 0.31, "grad_norm": 12.950251243245576, "learning_rate": 8.10235286345812e-06, "loss": 0.5863, "step": 10767 }, { "epoch": 0.31, "grad_norm": 3.6676215676804222, "learning_rate": 8.101989156931618e-06, "loss": 0.3649, "step": 10768 }, { "epoch": 0.31, "grad_norm": 3.979689352381814, "learning_rate": 8.101625423719084e-06, "loss": 0.3174, "step": 10769 }, { "epoch": 0.31, "grad_norm": 12.312059191253535, "learning_rate": 8.10126166382365e-06, "loss": 0.4565, "step": 10770 }, { "epoch": 0.31, "grad_norm": 6.692310385741443, "learning_rate": 8.100897877248443e-06, "loss": 0.5081, "step": 10771 }, { "epoch": 0.31, "grad_norm": 6.252144750716928, "learning_rate": 8.100534063996596e-06, "loss": 0.6196, "step": 10772 }, { "epoch": 0.31, "grad_norm": 5.320562863725899, "learning_rate": 8.100170224071233e-06, "loss": 0.5244, "step": 10773 }, { "epoch": 0.31, "grad_norm": 7.989287445942246, "learning_rate": 8.09980635747549e-06, "loss": 0.3482, "step": 10774 }, { "epoch": 0.31, "grad_norm": 11.455721489285947, "learning_rate": 8.099442464212492e-06, "loss": 0.4944, "step": 10775 }, { "epoch": 0.31, "grad_norm": 2.494755441380083, "learning_rate": 8.099078544285372e-06, "loss": 0.3244, "step": 10776 }, { "epoch": 0.31, "grad_norm": 4.614270032432562, "learning_rate": 8.098714597697264e-06, "loss": 0.2915, "step": 10777 }, { "epoch": 0.31, "grad_norm": 9.885432418526634, "learning_rate": 8.098350624451291e-06, "loss": 0.6375, "step": 10778 }, { "epoch": 0.31, "grad_norm": 6.1644738740561555, "learning_rate": 8.097986624550593e-06, "loss": 0.6088, "step": 10779 }, { "epoch": 0.31, "grad_norm": 4.924112156654338, "learning_rate": 8.097622597998297e-06, "loss": 0.7069, "step": 10780 }, { "epoch": 0.31, "grad_norm": 2.3043186991812883, "learning_rate": 8.097258544797536e-06, "loss": 0.1815, "step": 10781 }, { "epoch": 0.31, "grad_norm": 20.306461011659668, "learning_rate": 8.096894464951439e-06, "loss": 0.3901, "step": 10782 }, { "epoch": 0.31, "grad_norm": 9.083569940640965, "learning_rate": 8.096530358463139e-06, "loss": 0.4539, "step": 10783 }, { "epoch": 0.31, "grad_norm": 2.7181984791458182, "learning_rate": 8.096166225335773e-06, "loss": 0.3686, "step": 10784 }, { "epoch": 0.31, "grad_norm": 4.646384726791133, "learning_rate": 8.095802065572468e-06, "loss": 0.5242, "step": 10785 }, { "epoch": 0.31, "grad_norm": 6.2670357654647235, "learning_rate": 8.09543787917636e-06, "loss": 0.4021, "step": 10786 }, { "epoch": 0.31, "grad_norm": 2.876203430243075, "learning_rate": 8.09507366615058e-06, "loss": 0.1654, "step": 10787 }, { "epoch": 0.31, "grad_norm": 6.41068921613668, "learning_rate": 8.094709426498264e-06, "loss": 0.418, "step": 10788 }, { "epoch": 0.31, "grad_norm": 3.8927124122773153, "learning_rate": 8.094345160222543e-06, "loss": 0.4499, "step": 10789 }, { "epoch": 0.31, "grad_norm": 6.104599278800888, "learning_rate": 8.09398086732655e-06, "loss": 0.5522, "step": 10790 }, { "epoch": 0.31, "grad_norm": 5.352216382697392, "learning_rate": 8.093616547813423e-06, "loss": 0.6879, "step": 10791 }, { "epoch": 0.31, "grad_norm": 6.64728232186211, "learning_rate": 8.093252201686291e-06, "loss": 0.8535, "step": 10792 }, { "epoch": 0.31, "grad_norm": 9.347472454070248, "learning_rate": 8.092887828948292e-06, "loss": 0.5063, "step": 10793 }, { "epoch": 0.31, "grad_norm": 5.654782057322299, "learning_rate": 8.092523429602562e-06, "loss": 0.7111, "step": 10794 }, { "epoch": 0.31, "grad_norm": 6.063537037038377, "learning_rate": 8.092159003652233e-06, "loss": 0.4641, "step": 10795 }, { "epoch": 0.31, "grad_norm": 8.579199940742143, "learning_rate": 8.09179455110044e-06, "loss": 1.1454, "step": 10796 }, { "epoch": 0.31, "grad_norm": 6.831458214381383, "learning_rate": 8.09143007195032e-06, "loss": 0.6768, "step": 10797 }, { "epoch": 0.31, "grad_norm": 4.44012942787062, "learning_rate": 8.091065566205005e-06, "loss": 0.5107, "step": 10798 }, { "epoch": 0.31, "grad_norm": 8.750755495425675, "learning_rate": 8.090701033867636e-06, "loss": 0.6593, "step": 10799 }, { "epoch": 0.31, "grad_norm": 7.150893959766717, "learning_rate": 8.090336474941346e-06, "loss": 0.7245, "step": 10800 }, { "epoch": 0.31, "grad_norm": 8.652707032441224, "learning_rate": 8.089971889429272e-06, "loss": 0.4302, "step": 10801 }, { "epoch": 0.31, "grad_norm": 5.44398979420935, "learning_rate": 8.08960727733455e-06, "loss": 0.7927, "step": 10802 }, { "epoch": 0.31, "grad_norm": 7.014757744214833, "learning_rate": 8.089242638660317e-06, "loss": 0.769, "step": 10803 }, { "epoch": 0.31, "grad_norm": 9.584797412878524, "learning_rate": 8.08887797340971e-06, "loss": 0.3389, "step": 10804 }, { "epoch": 0.31, "grad_norm": 3.7266109631344118, "learning_rate": 8.088513281585866e-06, "loss": 0.4653, "step": 10805 }, { "epoch": 0.31, "grad_norm": 6.478245202516702, "learning_rate": 8.088148563191923e-06, "loss": 0.2616, "step": 10806 }, { "epoch": 0.31, "grad_norm": 4.114162803438436, "learning_rate": 8.08778381823102e-06, "loss": 0.499, "step": 10807 }, { "epoch": 0.31, "grad_norm": 4.9317989996228215, "learning_rate": 8.087419046706288e-06, "loss": 0.4413, "step": 10808 }, { "epoch": 0.31, "grad_norm": 9.178314755653416, "learning_rate": 8.087054248620874e-06, "loss": 0.6684, "step": 10809 }, { "epoch": 0.31, "grad_norm": 9.968834090401357, "learning_rate": 8.08668942397791e-06, "loss": 0.7674, "step": 10810 }, { "epoch": 0.31, "grad_norm": 6.061427935952196, "learning_rate": 8.08632457278054e-06, "loss": 0.797, "step": 10811 }, { "epoch": 0.31, "grad_norm": 7.769853756877234, "learning_rate": 8.085959695031897e-06, "loss": 0.3748, "step": 10812 }, { "epoch": 0.31, "grad_norm": 8.903241743699478, "learning_rate": 8.085594790735122e-06, "loss": 0.7905, "step": 10813 }, { "epoch": 0.31, "grad_norm": 4.465065003634222, "learning_rate": 8.085229859893358e-06, "loss": 0.5269, "step": 10814 }, { "epoch": 0.31, "grad_norm": 6.0539966213694525, "learning_rate": 8.084864902509739e-06, "loss": 0.4455, "step": 10815 }, { "epoch": 0.31, "grad_norm": 5.95047534838988, "learning_rate": 8.08449991858741e-06, "loss": 0.5776, "step": 10816 }, { "epoch": 0.31, "grad_norm": 2.429808904920351, "learning_rate": 8.084134908129504e-06, "loss": 0.5256, "step": 10817 }, { "epoch": 0.31, "grad_norm": 7.518223303455744, "learning_rate": 8.083769871139168e-06, "loss": 0.6012, "step": 10818 }, { "epoch": 0.31, "grad_norm": 4.317691000737586, "learning_rate": 8.083404807619537e-06, "loss": 0.2926, "step": 10819 }, { "epoch": 0.31, "grad_norm": 5.0165890631688015, "learning_rate": 8.083039717573757e-06, "loss": 0.478, "step": 10820 }, { "epoch": 0.31, "grad_norm": 4.696314090781846, "learning_rate": 8.082674601004966e-06, "loss": 0.9204, "step": 10821 }, { "epoch": 0.31, "grad_norm": 5.540333030482334, "learning_rate": 8.082309457916301e-06, "loss": 0.4601, "step": 10822 }, { "epoch": 0.31, "grad_norm": 4.234911867010581, "learning_rate": 8.08194428831091e-06, "loss": 0.6056, "step": 10823 }, { "epoch": 0.31, "grad_norm": 3.9679918541247834, "learning_rate": 8.081579092191932e-06, "loss": 0.3862, "step": 10824 }, { "epoch": 0.31, "grad_norm": 4.991181608056803, "learning_rate": 8.081213869562508e-06, "loss": 0.2892, "step": 10825 }, { "epoch": 0.31, "grad_norm": 7.871143547260281, "learning_rate": 8.08084862042578e-06, "loss": 0.4562, "step": 10826 }, { "epoch": 0.31, "grad_norm": 3.3006756556848953, "learning_rate": 8.08048334478489e-06, "loss": 0.2303, "step": 10827 }, { "epoch": 0.31, "grad_norm": 8.714970689068789, "learning_rate": 8.080118042642984e-06, "loss": 0.7493, "step": 10828 }, { "epoch": 0.31, "grad_norm": 4.077627831331621, "learning_rate": 8.0797527140032e-06, "loss": 0.4075, "step": 10829 }, { "epoch": 0.31, "grad_norm": 4.994118426008421, "learning_rate": 8.079387358868682e-06, "loss": 0.6248, "step": 10830 }, { "epoch": 0.31, "grad_norm": 7.859475470748221, "learning_rate": 8.079021977242573e-06, "loss": 0.9577, "step": 10831 }, { "epoch": 0.31, "grad_norm": 4.20620856923619, "learning_rate": 8.078656569128018e-06, "loss": 0.2335, "step": 10832 }, { "epoch": 0.31, "grad_norm": 9.021677824904145, "learning_rate": 8.07829113452816e-06, "loss": 0.7812, "step": 10833 }, { "epoch": 0.31, "grad_norm": 6.040725538243014, "learning_rate": 8.077925673446141e-06, "loss": 0.5596, "step": 10834 }, { "epoch": 0.31, "grad_norm": 5.91313657983062, "learning_rate": 8.077560185885108e-06, "loss": 0.7319, "step": 10835 }, { "epoch": 0.31, "grad_norm": 7.169415819000177, "learning_rate": 8.077194671848204e-06, "loss": 0.4897, "step": 10836 }, { "epoch": 0.31, "grad_norm": 5.894764465208153, "learning_rate": 8.076829131338572e-06, "loss": 0.646, "step": 10837 }, { "epoch": 0.31, "grad_norm": 4.266465631730802, "learning_rate": 8.076463564359359e-06, "loss": 0.6349, "step": 10838 }, { "epoch": 0.31, "grad_norm": 11.549096499027431, "learning_rate": 8.076097970913707e-06, "loss": 1.1142, "step": 10839 }, { "epoch": 0.31, "grad_norm": 7.006024084608979, "learning_rate": 8.075732351004763e-06, "loss": 0.8972, "step": 10840 }, { "epoch": 0.31, "grad_norm": 3.46378500329802, "learning_rate": 8.075366704635675e-06, "loss": 0.2907, "step": 10841 }, { "epoch": 0.31, "grad_norm": 8.174141039037327, "learning_rate": 8.075001031809581e-06, "loss": 0.6612, "step": 10842 }, { "epoch": 0.31, "grad_norm": 4.448986124416191, "learning_rate": 8.074635332529636e-06, "loss": 0.438, "step": 10843 }, { "epoch": 0.31, "grad_norm": 5.661860386511348, "learning_rate": 8.07426960679898e-06, "loss": 0.3498, "step": 10844 }, { "epoch": 0.31, "grad_norm": 3.3855599221461974, "learning_rate": 8.073903854620762e-06, "loss": 0.4041, "step": 10845 }, { "epoch": 0.31, "grad_norm": 6.2009120039667325, "learning_rate": 8.073538075998126e-06, "loss": 0.7294, "step": 10846 }, { "epoch": 0.31, "grad_norm": 3.227242331202937, "learning_rate": 8.073172270934223e-06, "loss": 0.2753, "step": 10847 }, { "epoch": 0.31, "grad_norm": 6.403826185253907, "learning_rate": 8.072806439432195e-06, "loss": 0.6859, "step": 10848 }, { "epoch": 0.31, "grad_norm": 7.556358807382251, "learning_rate": 8.072440581495193e-06, "loss": 0.3913, "step": 10849 }, { "epoch": 0.31, "grad_norm": 9.237267004231203, "learning_rate": 8.072074697126361e-06, "loss": 0.6119, "step": 10850 }, { "epoch": 0.31, "grad_norm": 3.488055402358393, "learning_rate": 8.07170878632885e-06, "loss": 0.2824, "step": 10851 }, { "epoch": 0.31, "grad_norm": 6.599590336483433, "learning_rate": 8.071342849105806e-06, "loss": 0.6939, "step": 10852 }, { "epoch": 0.31, "grad_norm": 3.2844909284550314, "learning_rate": 8.07097688546038e-06, "loss": 0.2045, "step": 10853 }, { "epoch": 0.31, "grad_norm": 6.937475221606845, "learning_rate": 8.070610895395714e-06, "loss": 0.5032, "step": 10854 }, { "epoch": 0.31, "grad_norm": 8.42125590637462, "learning_rate": 8.070244878914964e-06, "loss": 0.5318, "step": 10855 }, { "epoch": 0.31, "grad_norm": 8.673187541435171, "learning_rate": 8.069878836021274e-06, "loss": 1.0828, "step": 10856 }, { "epoch": 0.31, "grad_norm": 3.093388391804966, "learning_rate": 8.069512766717794e-06, "loss": 0.3581, "step": 10857 }, { "epoch": 0.31, "grad_norm": 3.803064834507496, "learning_rate": 8.069146671007672e-06, "loss": 0.301, "step": 10858 }, { "epoch": 0.31, "grad_norm": 3.568127821418207, "learning_rate": 8.068780548894062e-06, "loss": 0.7697, "step": 10859 }, { "epoch": 0.31, "grad_norm": 9.153304006923625, "learning_rate": 8.06841440038011e-06, "loss": 0.6963, "step": 10860 }, { "epoch": 0.31, "grad_norm": 11.068369283649835, "learning_rate": 8.068048225468965e-06, "loss": 0.6169, "step": 10861 }, { "epoch": 0.31, "grad_norm": 3.1172639842198886, "learning_rate": 8.067682024163781e-06, "loss": 0.3253, "step": 10862 }, { "epoch": 0.31, "grad_norm": 7.95246965447506, "learning_rate": 8.067315796467705e-06, "loss": 0.5117, "step": 10863 }, { "epoch": 0.31, "grad_norm": 9.472565640674892, "learning_rate": 8.06694954238389e-06, "loss": 0.598, "step": 10864 }, { "epoch": 0.31, "grad_norm": 9.19158222047871, "learning_rate": 8.066583261915484e-06, "loss": 0.7352, "step": 10865 }, { "epoch": 0.31, "grad_norm": 3.7739453339289226, "learning_rate": 8.066216955065641e-06, "loss": 0.1625, "step": 10866 }, { "epoch": 0.31, "grad_norm": 6.980343263049986, "learning_rate": 8.06585062183751e-06, "loss": 1.0496, "step": 10867 }, { "epoch": 0.31, "grad_norm": 12.22713648999614, "learning_rate": 8.065484262234245e-06, "loss": 0.7381, "step": 10868 }, { "epoch": 0.31, "grad_norm": 7.163461574698208, "learning_rate": 8.065117876258997e-06, "loss": 0.3984, "step": 10869 }, { "epoch": 0.31, "grad_norm": 4.639018387551037, "learning_rate": 8.064751463914914e-06, "loss": 0.7154, "step": 10870 }, { "epoch": 0.31, "grad_norm": 7.403931202433354, "learning_rate": 8.064385025205155e-06, "loss": 0.624, "step": 10871 }, { "epoch": 0.31, "grad_norm": 5.17248725148359, "learning_rate": 8.064018560132866e-06, "loss": 0.3484, "step": 10872 }, { "epoch": 0.31, "grad_norm": 5.5169700300969815, "learning_rate": 8.063652068701205e-06, "loss": 0.4554, "step": 10873 }, { "epoch": 0.31, "grad_norm": 6.676781201680822, "learning_rate": 8.06328555091332e-06, "loss": 0.4782, "step": 10874 }, { "epoch": 0.31, "grad_norm": 5.849448500826763, "learning_rate": 8.06291900677237e-06, "loss": 0.6617, "step": 10875 }, { "epoch": 0.31, "grad_norm": 3.3923624482939077, "learning_rate": 8.062552436281502e-06, "loss": 0.1733, "step": 10876 }, { "epoch": 0.31, "grad_norm": 4.866304394006185, "learning_rate": 8.062185839443874e-06, "loss": 0.4276, "step": 10877 }, { "epoch": 0.31, "grad_norm": 7.1997579951146395, "learning_rate": 8.061819216262638e-06, "loss": 0.4957, "step": 10878 }, { "epoch": 0.31, "grad_norm": 4.487657335971302, "learning_rate": 8.061452566740949e-06, "loss": 0.3866, "step": 10879 }, { "epoch": 0.31, "grad_norm": 10.686948996277167, "learning_rate": 8.06108589088196e-06, "loss": 0.7522, "step": 10880 }, { "epoch": 0.31, "grad_norm": 2.098954365602692, "learning_rate": 8.060719188688824e-06, "loss": 0.3154, "step": 10881 }, { "epoch": 0.31, "grad_norm": 7.746589771727342, "learning_rate": 8.0603524601647e-06, "loss": 0.7067, "step": 10882 }, { "epoch": 0.31, "grad_norm": 4.874087908591086, "learning_rate": 8.059985705312739e-06, "loss": 0.4093, "step": 10883 }, { "epoch": 0.31, "grad_norm": 4.215048897234476, "learning_rate": 8.059618924136097e-06, "loss": 0.8145, "step": 10884 }, { "epoch": 0.31, "grad_norm": 5.117164017718068, "learning_rate": 8.059252116637932e-06, "loss": 0.5727, "step": 10885 }, { "epoch": 0.31, "grad_norm": 4.123144946429554, "learning_rate": 8.058885282821396e-06, "loss": 0.445, "step": 10886 }, { "epoch": 0.31, "grad_norm": 5.072971957282054, "learning_rate": 8.05851842268965e-06, "loss": 0.65, "step": 10887 }, { "epoch": 0.31, "grad_norm": 5.089509193274945, "learning_rate": 8.058151536245843e-06, "loss": 0.2416, "step": 10888 }, { "epoch": 0.31, "grad_norm": 7.812441650172724, "learning_rate": 8.057784623493137e-06, "loss": 0.4973, "step": 10889 }, { "epoch": 0.31, "grad_norm": 9.002941392683436, "learning_rate": 8.057417684434685e-06, "loss": 0.9636, "step": 10890 }, { "epoch": 0.31, "grad_norm": 5.481585975363139, "learning_rate": 8.057050719073644e-06, "loss": 0.6544, "step": 10891 }, { "epoch": 0.31, "grad_norm": 3.8094889053451535, "learning_rate": 8.056683727413174e-06, "loss": 0.3727, "step": 10892 }, { "epoch": 0.31, "grad_norm": 8.134422648550808, "learning_rate": 8.056316709456431e-06, "loss": 0.9352, "step": 10893 }, { "epoch": 0.31, "grad_norm": 4.168319701673651, "learning_rate": 8.05594966520657e-06, "loss": 0.3889, "step": 10894 }, { "epoch": 0.31, "grad_norm": 5.255515970148678, "learning_rate": 8.05558259466675e-06, "loss": 0.9154, "step": 10895 }, { "epoch": 0.31, "grad_norm": 4.516907427354544, "learning_rate": 8.055215497840132e-06, "loss": 0.3311, "step": 10896 }, { "epoch": 0.31, "grad_norm": 7.572539420266122, "learning_rate": 8.054848374729869e-06, "loss": 0.9855, "step": 10897 }, { "epoch": 0.31, "grad_norm": 8.526574722681133, "learning_rate": 8.054481225339121e-06, "loss": 0.8401, "step": 10898 }, { "epoch": 0.31, "grad_norm": 6.984563496412114, "learning_rate": 8.054114049671048e-06, "loss": 0.5686, "step": 10899 }, { "epoch": 0.31, "grad_norm": 8.75714713162967, "learning_rate": 8.053746847728808e-06, "loss": 0.9051, "step": 10900 }, { "epoch": 0.31, "grad_norm": 5.257915977604745, "learning_rate": 8.05337961951556e-06, "loss": 0.4955, "step": 10901 }, { "epoch": 0.31, "grad_norm": 10.131958442406983, "learning_rate": 8.053012365034463e-06, "loss": 0.6551, "step": 10902 }, { "epoch": 0.31, "grad_norm": 6.41711787912161, "learning_rate": 8.052645084288677e-06, "loss": 0.5139, "step": 10903 }, { "epoch": 0.31, "grad_norm": 3.3364584022511643, "learning_rate": 8.052277777281359e-06, "loss": 0.4413, "step": 10904 }, { "epoch": 0.31, "grad_norm": 5.74628336662943, "learning_rate": 8.051910444015674e-06, "loss": 0.3006, "step": 10905 }, { "epoch": 0.31, "grad_norm": 5.584707271380196, "learning_rate": 8.051543084494777e-06, "loss": 0.3252, "step": 10906 }, { "epoch": 0.31, "grad_norm": 7.586057243619969, "learning_rate": 8.05117569872183e-06, "loss": 0.9776, "step": 10907 }, { "epoch": 0.31, "grad_norm": 6.505588220072175, "learning_rate": 8.050808286699996e-06, "loss": 0.2778, "step": 10908 }, { "epoch": 0.31, "grad_norm": 2.5230332278083214, "learning_rate": 8.050440848432433e-06, "loss": 0.2465, "step": 10909 }, { "epoch": 0.31, "grad_norm": 5.6918407580651635, "learning_rate": 8.050073383922304e-06, "loss": 0.394, "step": 10910 }, { "epoch": 0.31, "grad_norm": 7.188922509113659, "learning_rate": 8.049705893172767e-06, "loss": 0.4115, "step": 10911 }, { "epoch": 0.31, "grad_norm": 4.793663863660285, "learning_rate": 8.049338376186988e-06, "loss": 0.8615, "step": 10912 }, { "epoch": 0.31, "grad_norm": 4.549762084525668, "learning_rate": 8.048970832968126e-06, "loss": 0.6512, "step": 10913 }, { "epoch": 0.31, "grad_norm": 2.0016647949998774, "learning_rate": 8.048603263519344e-06, "loss": 0.2545, "step": 10914 }, { "epoch": 0.31, "grad_norm": 6.795067943769444, "learning_rate": 8.048235667843803e-06, "loss": 0.8057, "step": 10915 }, { "epoch": 0.31, "grad_norm": 6.0351569610891564, "learning_rate": 8.047868045944665e-06, "loss": 0.593, "step": 10916 }, { "epoch": 0.31, "grad_norm": 4.285158308614621, "learning_rate": 8.047500397825092e-06, "loss": 0.4161, "step": 10917 }, { "epoch": 0.31, "grad_norm": 3.6603397579866828, "learning_rate": 8.04713272348825e-06, "loss": 0.6046, "step": 10918 }, { "epoch": 0.31, "grad_norm": 5.169727720989519, "learning_rate": 8.046765022937301e-06, "loss": 0.6414, "step": 10919 }, { "epoch": 0.31, "grad_norm": 3.6534378126674665, "learning_rate": 8.046397296175407e-06, "loss": 0.3251, "step": 10920 }, { "epoch": 0.31, "grad_norm": 7.723791846455065, "learning_rate": 8.046029543205732e-06, "loss": 0.6296, "step": 10921 }, { "epoch": 0.31, "grad_norm": 7.116277877348293, "learning_rate": 8.04566176403144e-06, "loss": 0.4383, "step": 10922 }, { "epoch": 0.31, "grad_norm": 3.1697119492305563, "learning_rate": 8.045293958655696e-06, "loss": 0.5264, "step": 10923 }, { "epoch": 0.31, "grad_norm": 5.820531239335552, "learning_rate": 8.04492612708166e-06, "loss": 0.604, "step": 10924 }, { "epoch": 0.31, "grad_norm": 6.775471629375779, "learning_rate": 8.044558269312502e-06, "loss": 0.4657, "step": 10925 }, { "epoch": 0.31, "grad_norm": 3.679986702646197, "learning_rate": 8.044190385351384e-06, "loss": 0.7099, "step": 10926 }, { "epoch": 0.31, "grad_norm": 5.249580275696704, "learning_rate": 8.04382247520147e-06, "loss": 0.9482, "step": 10927 }, { "epoch": 0.31, "grad_norm": 6.9915059871040945, "learning_rate": 8.043454538865926e-06, "loss": 0.4921, "step": 10928 }, { "epoch": 0.31, "grad_norm": 5.205757506158076, "learning_rate": 8.043086576347918e-06, "loss": 0.304, "step": 10929 }, { "epoch": 0.31, "grad_norm": 6.464727800602951, "learning_rate": 8.042718587650611e-06, "loss": 0.7272, "step": 10930 }, { "epoch": 0.31, "grad_norm": 8.242770350670583, "learning_rate": 8.04235057277717e-06, "loss": 0.6987, "step": 10931 }, { "epoch": 0.31, "grad_norm": 7.2650114723319765, "learning_rate": 8.041982531730761e-06, "loss": 0.5953, "step": 10932 }, { "epoch": 0.31, "grad_norm": 4.413925384822465, "learning_rate": 8.041614464514552e-06, "loss": 0.4558, "step": 10933 }, { "epoch": 0.31, "grad_norm": 6.544882824040467, "learning_rate": 8.041246371131708e-06, "loss": 0.5858, "step": 10934 }, { "epoch": 0.31, "grad_norm": 8.50146494190544, "learning_rate": 8.040878251585395e-06, "loss": 0.8796, "step": 10935 }, { "epoch": 0.31, "grad_norm": 6.402359800103338, "learning_rate": 8.040510105878782e-06, "loss": 0.4154, "step": 10936 }, { "epoch": 0.31, "grad_norm": 5.635393881854559, "learning_rate": 8.040141934015035e-06, "loss": 0.6304, "step": 10937 }, { "epoch": 0.31, "grad_norm": 5.953535251048207, "learning_rate": 8.039773735997321e-06, "loss": 0.6926, "step": 10938 }, { "epoch": 0.31, "grad_norm": 18.43099767823324, "learning_rate": 8.039405511828808e-06, "loss": 0.3802, "step": 10939 }, { "epoch": 0.31, "grad_norm": 7.1032512742518925, "learning_rate": 8.03903726151266e-06, "loss": 0.5354, "step": 10940 }, { "epoch": 0.31, "grad_norm": 5.217731330725958, "learning_rate": 8.038668985052054e-06, "loss": 0.3382, "step": 10941 }, { "epoch": 0.31, "grad_norm": 6.132081356322589, "learning_rate": 8.03830068245015e-06, "loss": 0.509, "step": 10942 }, { "epoch": 0.31, "grad_norm": 4.085807987881175, "learning_rate": 8.03793235371012e-06, "loss": 0.4312, "step": 10943 }, { "epoch": 0.31, "grad_norm": 3.999548886610429, "learning_rate": 8.037563998835132e-06, "loss": 0.5375, "step": 10944 }, { "epoch": 0.31, "grad_norm": 3.1573112138986095, "learning_rate": 8.037195617828355e-06, "loss": 0.3318, "step": 10945 }, { "epoch": 0.31, "grad_norm": 5.296230130620891, "learning_rate": 8.036827210692959e-06, "loss": 0.7412, "step": 10946 }, { "epoch": 0.31, "grad_norm": 4.884363474377722, "learning_rate": 8.03645877743211e-06, "loss": 0.2576, "step": 10947 }, { "epoch": 0.31, "grad_norm": 8.703939129265944, "learning_rate": 8.036090318048981e-06, "loss": 0.5756, "step": 10948 }, { "epoch": 0.31, "grad_norm": 6.306931447317186, "learning_rate": 8.035721832546742e-06, "loss": 0.3573, "step": 10949 }, { "epoch": 0.31, "grad_norm": 5.948570732145712, "learning_rate": 8.03535332092856e-06, "loss": 0.4992, "step": 10950 }, { "epoch": 0.31, "grad_norm": 15.597545093166092, "learning_rate": 8.034984783197608e-06, "loss": 0.1749, "step": 10951 }, { "epoch": 0.31, "grad_norm": 6.646404955286044, "learning_rate": 8.034616219357057e-06, "loss": 0.4859, "step": 10952 }, { "epoch": 0.31, "grad_norm": 3.9264745147598883, "learning_rate": 8.034247629410075e-06, "loss": 0.6106, "step": 10953 }, { "epoch": 0.31, "grad_norm": 12.961418782151645, "learning_rate": 8.033879013359833e-06, "loss": 0.919, "step": 10954 }, { "epoch": 0.31, "grad_norm": 5.296562703246455, "learning_rate": 8.033510371209505e-06, "loss": 0.3608, "step": 10955 }, { "epoch": 0.31, "grad_norm": 4.176449642804427, "learning_rate": 8.03314170296226e-06, "loss": 0.4492, "step": 10956 }, { "epoch": 0.31, "grad_norm": 8.721833917622869, "learning_rate": 8.03277300862127e-06, "loss": 0.9752, "step": 10957 }, { "epoch": 0.31, "grad_norm": 4.582289380333635, "learning_rate": 8.032404288189708e-06, "loss": 0.3446, "step": 10958 }, { "epoch": 0.31, "grad_norm": 13.595935829953897, "learning_rate": 8.032035541670745e-06, "loss": 0.6508, "step": 10959 }, { "epoch": 0.31, "grad_norm": 2.8560801215540303, "learning_rate": 8.031666769067554e-06, "loss": 0.2695, "step": 10960 }, { "epoch": 0.31, "grad_norm": 3.722035791621643, "learning_rate": 8.031297970383307e-06, "loss": 0.237, "step": 10961 }, { "epoch": 0.31, "grad_norm": 5.496477971748622, "learning_rate": 8.030929145621176e-06, "loss": 0.628, "step": 10962 }, { "epoch": 0.31, "grad_norm": 3.6613257625160744, "learning_rate": 8.030560294784336e-06, "loss": 0.5319, "step": 10963 }, { "epoch": 0.31, "grad_norm": 4.212081866920531, "learning_rate": 8.03019141787596e-06, "loss": 0.8721, "step": 10964 }, { "epoch": 0.31, "grad_norm": 5.667776218880158, "learning_rate": 8.029822514899217e-06, "loss": 0.4746, "step": 10965 }, { "epoch": 0.31, "grad_norm": 5.164876622649486, "learning_rate": 8.029453585857285e-06, "loss": 0.5459, "step": 10966 }, { "epoch": 0.31, "grad_norm": 7.240851714264948, "learning_rate": 8.029084630753337e-06, "loss": 1.2904, "step": 10967 }, { "epoch": 0.31, "grad_norm": 4.393282645969771, "learning_rate": 8.028715649590547e-06, "loss": 0.4834, "step": 10968 }, { "epoch": 0.31, "grad_norm": 4.099665292010496, "learning_rate": 8.02834664237209e-06, "loss": 0.23, "step": 10969 }, { "epoch": 0.31, "grad_norm": 7.614718243351402, "learning_rate": 8.027977609101139e-06, "loss": 0.6651, "step": 10970 }, { "epoch": 0.31, "grad_norm": 5.105093839345219, "learning_rate": 8.027608549780869e-06, "loss": 0.2848, "step": 10971 }, { "epoch": 0.31, "grad_norm": 5.462107040621931, "learning_rate": 8.027239464414457e-06, "loss": 0.3211, "step": 10972 }, { "epoch": 0.31, "grad_norm": 6.285376792058838, "learning_rate": 8.026870353005074e-06, "loss": 0.5042, "step": 10973 }, { "epoch": 0.31, "grad_norm": 7.810808349567248, "learning_rate": 8.0265012155559e-06, "loss": 0.5812, "step": 10974 }, { "epoch": 0.31, "grad_norm": 11.513695854181455, "learning_rate": 8.026132052070109e-06, "loss": 0.8957, "step": 10975 }, { "epoch": 0.31, "grad_norm": 5.825734402444024, "learning_rate": 8.025762862550875e-06, "loss": 0.4984, "step": 10976 }, { "epoch": 0.31, "grad_norm": 3.9118774467200974, "learning_rate": 8.025393647001375e-06, "loss": 0.3815, "step": 10977 }, { "epoch": 0.31, "grad_norm": 7.472506420430105, "learning_rate": 8.025024405424788e-06, "loss": 0.5412, "step": 10978 }, { "epoch": 0.31, "grad_norm": 4.1058552684369625, "learning_rate": 8.024655137824288e-06, "loss": 0.5838, "step": 10979 }, { "epoch": 0.31, "grad_norm": 5.2867502767694745, "learning_rate": 8.024285844203052e-06, "loss": 0.6008, "step": 10980 }, { "epoch": 0.31, "grad_norm": 2.2003042682569034, "learning_rate": 8.023916524564257e-06, "loss": 0.3121, "step": 10981 }, { "epoch": 0.31, "grad_norm": 4.955539511978701, "learning_rate": 8.023547178911082e-06, "loss": 0.6832, "step": 10982 }, { "epoch": 0.31, "grad_norm": 10.535867465478201, "learning_rate": 8.0231778072467e-06, "loss": 1.0309, "step": 10983 }, { "epoch": 0.31, "grad_norm": 3.4431196054640365, "learning_rate": 8.022808409574293e-06, "loss": 0.2071, "step": 10984 }, { "epoch": 0.31, "grad_norm": 14.159585117792393, "learning_rate": 8.022438985897038e-06, "loss": 0.3927, "step": 10985 }, { "epoch": 0.31, "grad_norm": 4.895201616586891, "learning_rate": 8.022069536218113e-06, "loss": 0.5533, "step": 10986 }, { "epoch": 0.31, "grad_norm": 4.835172478677422, "learning_rate": 8.021700060540695e-06, "loss": 0.2953, "step": 10987 }, { "epoch": 0.31, "grad_norm": 6.780684768395056, "learning_rate": 8.021330558867964e-06, "loss": 0.4942, "step": 10988 }, { "epoch": 0.31, "grad_norm": 8.3800779292596, "learning_rate": 8.020961031203098e-06, "loss": 0.5466, "step": 10989 }, { "epoch": 0.31, "grad_norm": 8.474328150042604, "learning_rate": 8.020591477549276e-06, "loss": 0.8137, "step": 10990 }, { "epoch": 0.31, "grad_norm": 4.06290744792315, "learning_rate": 8.020221897909677e-06, "loss": 0.3003, "step": 10991 }, { "epoch": 0.31, "grad_norm": 7.6627332991015145, "learning_rate": 8.019852292287482e-06, "loss": 0.6358, "step": 10992 }, { "epoch": 0.31, "grad_norm": 4.684005299256517, "learning_rate": 8.019482660685867e-06, "loss": 0.267, "step": 10993 }, { "epoch": 0.31, "grad_norm": 4.462180637504217, "learning_rate": 8.019113003108016e-06, "loss": 0.6845, "step": 10994 }, { "epoch": 0.31, "grad_norm": 7.244365706126406, "learning_rate": 8.018743319557109e-06, "loss": 0.7247, "step": 10995 }, { "epoch": 0.31, "grad_norm": 2.777613013798427, "learning_rate": 8.018373610036322e-06, "loss": 0.2615, "step": 10996 }, { "epoch": 0.31, "grad_norm": 5.594802842608151, "learning_rate": 8.018003874548841e-06, "loss": 0.7163, "step": 10997 }, { "epoch": 0.31, "grad_norm": 3.8539617157476234, "learning_rate": 8.017634113097844e-06, "loss": 0.3611, "step": 10998 }, { "epoch": 0.31, "grad_norm": 5.471310348613682, "learning_rate": 8.017264325686511e-06, "loss": 0.4685, "step": 10999 }, { "epoch": 0.32, "grad_norm": 8.47948414601067, "learning_rate": 8.016894512318025e-06, "loss": 0.6088, "step": 11000 }, { "epoch": 0.32, "grad_norm": 6.240988834722574, "learning_rate": 8.016524672995567e-06, "loss": 0.7789, "step": 11001 }, { "epoch": 0.32, "grad_norm": 5.9789987223716805, "learning_rate": 8.016154807722318e-06, "loss": 0.3178, "step": 11002 }, { "epoch": 0.32, "grad_norm": 4.412050203734615, "learning_rate": 8.015784916501462e-06, "loss": 0.2009, "step": 11003 }, { "epoch": 0.32, "grad_norm": 6.316641780163432, "learning_rate": 8.01541499933618e-06, "loss": 0.6664, "step": 11004 }, { "epoch": 0.32, "grad_norm": 6.800424915950148, "learning_rate": 8.01504505622965e-06, "loss": 0.6396, "step": 11005 }, { "epoch": 0.32, "grad_norm": 7.542708990522599, "learning_rate": 8.014675087185062e-06, "loss": 0.8357, "step": 11006 }, { "epoch": 0.32, "grad_norm": 3.5769003392318006, "learning_rate": 8.014305092205597e-06, "loss": 0.2998, "step": 11007 }, { "epoch": 0.32, "grad_norm": 8.581201331770902, "learning_rate": 8.013935071294433e-06, "loss": 0.6892, "step": 11008 }, { "epoch": 0.32, "grad_norm": 7.378753805933402, "learning_rate": 8.01356502445476e-06, "loss": 0.1675, "step": 11009 }, { "epoch": 0.32, "grad_norm": 12.10210987328283, "learning_rate": 8.013194951689755e-06, "loss": 0.423, "step": 11010 }, { "epoch": 0.32, "grad_norm": 5.461001394441067, "learning_rate": 8.012824853002605e-06, "loss": 0.493, "step": 11011 }, { "epoch": 0.32, "grad_norm": 5.754233667571485, "learning_rate": 8.012454728396495e-06, "loss": 0.471, "step": 11012 }, { "epoch": 0.32, "grad_norm": 4.6998201843115295, "learning_rate": 8.012084577874608e-06, "loss": 0.1409, "step": 11013 }, { "epoch": 0.32, "grad_norm": 5.728381067224549, "learning_rate": 8.011714401440127e-06, "loss": 0.2232, "step": 11014 }, { "epoch": 0.32, "grad_norm": 12.760534497190614, "learning_rate": 8.011344199096239e-06, "loss": 0.7373, "step": 11015 }, { "epoch": 0.32, "grad_norm": 3.8389284861194413, "learning_rate": 8.010973970846128e-06, "loss": 0.1121, "step": 11016 }, { "epoch": 0.32, "grad_norm": 4.931742437841373, "learning_rate": 8.010603716692977e-06, "loss": 0.4021, "step": 11017 }, { "epoch": 0.32, "grad_norm": 4.203132629387607, "learning_rate": 8.010233436639974e-06, "loss": 0.2627, "step": 11018 }, { "epoch": 0.32, "grad_norm": 4.034492667068245, "learning_rate": 8.009863130690302e-06, "loss": 0.225, "step": 11019 }, { "epoch": 0.32, "grad_norm": 6.280854853257087, "learning_rate": 8.009492798847152e-06, "loss": 0.4367, "step": 11020 }, { "epoch": 0.32, "grad_norm": 3.0984303299767526, "learning_rate": 8.009122441113703e-06, "loss": 0.2721, "step": 11021 }, { "epoch": 0.32, "grad_norm": 8.712395105592499, "learning_rate": 8.008752057493143e-06, "loss": 0.5098, "step": 11022 }, { "epoch": 0.32, "grad_norm": 5.510821836559456, "learning_rate": 8.00838164798866e-06, "loss": 0.3468, "step": 11023 }, { "epoch": 0.32, "grad_norm": 6.366520347045606, "learning_rate": 8.008011212603442e-06, "loss": 0.7496, "step": 11024 }, { "epoch": 0.32, "grad_norm": 5.542282051202646, "learning_rate": 8.007640751340673e-06, "loss": 0.636, "step": 11025 }, { "epoch": 0.32, "grad_norm": 3.750362950880379, "learning_rate": 8.007270264203542e-06, "loss": 0.2682, "step": 11026 }, { "epoch": 0.32, "grad_norm": 6.761166096563598, "learning_rate": 8.006899751195233e-06, "loss": 0.5958, "step": 11027 }, { "epoch": 0.32, "grad_norm": 4.171439158786394, "learning_rate": 8.006529212318937e-06, "loss": 0.3515, "step": 11028 }, { "epoch": 0.32, "grad_norm": 4.306700331085913, "learning_rate": 8.00615864757784e-06, "loss": 0.798, "step": 11029 }, { "epoch": 0.32, "grad_norm": 3.7550603737784063, "learning_rate": 8.00578805697513e-06, "loss": 0.4887, "step": 11030 }, { "epoch": 0.32, "grad_norm": 5.494114284133555, "learning_rate": 8.005417440513995e-06, "loss": 0.2691, "step": 11031 }, { "epoch": 0.32, "grad_norm": 10.789933773717847, "learning_rate": 8.005046798197625e-06, "loss": 0.4098, "step": 11032 }, { "epoch": 0.32, "grad_norm": 4.167522717593037, "learning_rate": 8.004676130029206e-06, "loss": 0.4203, "step": 11033 }, { "epoch": 0.32, "grad_norm": 4.616392838566257, "learning_rate": 8.00430543601193e-06, "loss": 0.5089, "step": 11034 }, { "epoch": 0.32, "grad_norm": 8.638822702138619, "learning_rate": 8.003934716148984e-06, "loss": 0.688, "step": 11035 }, { "epoch": 0.32, "grad_norm": 8.674118439132934, "learning_rate": 8.003563970443557e-06, "loss": 0.7, "step": 11036 }, { "epoch": 0.32, "grad_norm": 7.593046399082639, "learning_rate": 8.003193198898837e-06, "loss": 0.4524, "step": 11037 }, { "epoch": 0.32, "grad_norm": 13.223011024482618, "learning_rate": 8.002822401518018e-06, "loss": 1.0068, "step": 11038 }, { "epoch": 0.32, "grad_norm": 3.8597640486236156, "learning_rate": 8.002451578304287e-06, "loss": 0.6871, "step": 11039 }, { "epoch": 0.32, "grad_norm": 3.7352910534314745, "learning_rate": 8.002080729260835e-06, "loss": 0.4704, "step": 11040 }, { "epoch": 0.32, "grad_norm": 7.300325970361122, "learning_rate": 8.001709854390851e-06, "loss": 0.703, "step": 11041 }, { "epoch": 0.32, "grad_norm": 5.262849070372854, "learning_rate": 8.001338953697528e-06, "loss": 0.6822, "step": 11042 }, { "epoch": 0.32, "grad_norm": 4.42864100559795, "learning_rate": 8.000968027184055e-06, "loss": 0.3973, "step": 11043 }, { "epoch": 0.32, "grad_norm": 5.937291673719866, "learning_rate": 8.000597074853624e-06, "loss": 0.6306, "step": 11044 }, { "epoch": 0.32, "grad_norm": 2.458048793252562, "learning_rate": 8.000226096709426e-06, "loss": 0.2259, "step": 11045 }, { "epoch": 0.32, "grad_norm": 7.880237850748278, "learning_rate": 7.999855092754651e-06, "loss": 0.5882, "step": 11046 }, { "epoch": 0.32, "grad_norm": 4.948979998989237, "learning_rate": 7.999484062992493e-06, "loss": 0.4881, "step": 11047 }, { "epoch": 0.32, "grad_norm": 3.4951303699774536, "learning_rate": 7.999113007426143e-06, "loss": 0.4075, "step": 11048 }, { "epoch": 0.32, "grad_norm": 3.6490958393805495, "learning_rate": 7.998741926058793e-06, "loss": 0.1961, "step": 11049 }, { "epoch": 0.32, "grad_norm": 7.239976794484922, "learning_rate": 7.998370818893634e-06, "loss": 0.7499, "step": 11050 }, { "epoch": 0.32, "grad_norm": 8.170066151905607, "learning_rate": 7.997999685933861e-06, "loss": 0.5647, "step": 11051 }, { "epoch": 0.32, "grad_norm": 7.434189516490002, "learning_rate": 7.997628527182667e-06, "loss": 0.7252, "step": 11052 }, { "epoch": 0.32, "grad_norm": 4.366561776580674, "learning_rate": 7.997257342643243e-06, "loss": 0.3716, "step": 11053 }, { "epoch": 0.32, "grad_norm": 10.206468113322384, "learning_rate": 7.996886132318782e-06, "loss": 0.384, "step": 11054 }, { "epoch": 0.32, "grad_norm": 5.640312883612825, "learning_rate": 7.99651489621248e-06, "loss": 0.7719, "step": 11055 }, { "epoch": 0.32, "grad_norm": 5.452495024834925, "learning_rate": 7.996143634327528e-06, "loss": 0.4842, "step": 11056 }, { "epoch": 0.32, "grad_norm": 7.43852617894047, "learning_rate": 7.995772346667123e-06, "loss": 0.6252, "step": 11057 }, { "epoch": 0.32, "grad_norm": 1.6549638756903693, "learning_rate": 7.995401033234457e-06, "loss": 0.0889, "step": 11058 }, { "epoch": 0.32, "grad_norm": 5.352205335335998, "learning_rate": 7.995029694032723e-06, "loss": 0.5085, "step": 11059 }, { "epoch": 0.32, "grad_norm": 12.458053207225047, "learning_rate": 7.994658329065119e-06, "loss": 0.5608, "step": 11060 }, { "epoch": 0.32, "grad_norm": 7.766963114285837, "learning_rate": 7.99428693833484e-06, "loss": 0.5781, "step": 11061 }, { "epoch": 0.32, "grad_norm": 4.459751325525737, "learning_rate": 7.993915521845075e-06, "loss": 0.4939, "step": 11062 }, { "epoch": 0.32, "grad_norm": 4.012465009209959, "learning_rate": 7.993544079599024e-06, "loss": 0.5272, "step": 11063 }, { "epoch": 0.32, "grad_norm": 4.133494301066306, "learning_rate": 7.993172611599884e-06, "loss": 0.5031, "step": 11064 }, { "epoch": 0.32, "grad_norm": 5.879517583649953, "learning_rate": 7.992801117850845e-06, "loss": 0.7643, "step": 11065 }, { "epoch": 0.32, "grad_norm": 3.0336275313483645, "learning_rate": 7.992429598355112e-06, "loss": 0.4954, "step": 11066 }, { "epoch": 0.32, "grad_norm": 6.552807798699112, "learning_rate": 7.99205805311587e-06, "loss": 0.3827, "step": 11067 }, { "epoch": 0.32, "grad_norm": 5.037439697131866, "learning_rate": 7.991686482136326e-06, "loss": 0.2022, "step": 11068 }, { "epoch": 0.32, "grad_norm": 6.690323990986544, "learning_rate": 7.991314885419668e-06, "loss": 0.5464, "step": 11069 }, { "epoch": 0.32, "grad_norm": 6.930584115186349, "learning_rate": 7.990943262969098e-06, "loss": 0.714, "step": 11070 }, { "epoch": 0.32, "grad_norm": 1.3945372605394615, "learning_rate": 7.99057161478781e-06, "loss": 0.0938, "step": 11071 }, { "epoch": 0.32, "grad_norm": 8.071116375099871, "learning_rate": 7.990199940879003e-06, "loss": 0.8093, "step": 11072 }, { "epoch": 0.32, "grad_norm": 6.072880696298673, "learning_rate": 7.989828241245874e-06, "loss": 0.4472, "step": 11073 }, { "epoch": 0.32, "grad_norm": 6.6608627884263365, "learning_rate": 7.989456515891622e-06, "loss": 0.6338, "step": 11074 }, { "epoch": 0.32, "grad_norm": 9.674816417677356, "learning_rate": 7.989084764819442e-06, "loss": 0.3907, "step": 11075 }, { "epoch": 0.32, "grad_norm": 5.0650024822262925, "learning_rate": 7.988712988032535e-06, "loss": 0.3104, "step": 11076 }, { "epoch": 0.32, "grad_norm": 7.70928535424768, "learning_rate": 7.988341185534098e-06, "loss": 0.4919, "step": 11077 }, { "epoch": 0.32, "grad_norm": 6.611562091964231, "learning_rate": 7.98796935732733e-06, "loss": 0.6545, "step": 11078 }, { "epoch": 0.32, "grad_norm": 7.620099603597711, "learning_rate": 7.98759750341543e-06, "loss": 0.8342, "step": 11079 }, { "epoch": 0.32, "grad_norm": 6.355910161028642, "learning_rate": 7.987225623801594e-06, "loss": 0.6023, "step": 11080 }, { "epoch": 0.32, "grad_norm": 7.540996212602085, "learning_rate": 7.986853718489026e-06, "loss": 0.6971, "step": 11081 }, { "epoch": 0.32, "grad_norm": 11.474331030533614, "learning_rate": 7.986481787480922e-06, "loss": 0.5334, "step": 11082 }, { "epoch": 0.32, "grad_norm": 5.258837346357378, "learning_rate": 7.986109830780483e-06, "loss": 0.5741, "step": 11083 }, { "epoch": 0.32, "grad_norm": 4.317640751155239, "learning_rate": 7.985737848390909e-06, "loss": 0.403, "step": 11084 }, { "epoch": 0.32, "grad_norm": 5.13322114987014, "learning_rate": 7.9853658403154e-06, "loss": 0.6325, "step": 11085 }, { "epoch": 0.32, "grad_norm": 3.2623829422335957, "learning_rate": 7.984993806557156e-06, "loss": 0.2876, "step": 11086 }, { "epoch": 0.32, "grad_norm": 7.478933630016214, "learning_rate": 7.984621747119379e-06, "loss": 0.2523, "step": 11087 }, { "epoch": 0.32, "grad_norm": 5.5539232632339814, "learning_rate": 7.984249662005267e-06, "loss": 0.5812, "step": 11088 }, { "epoch": 0.32, "grad_norm": 11.699155126535576, "learning_rate": 7.983877551218024e-06, "loss": 0.6899, "step": 11089 }, { "epoch": 0.32, "grad_norm": 6.187109270185538, "learning_rate": 7.983505414760848e-06, "loss": 0.3227, "step": 11090 }, { "epoch": 0.32, "grad_norm": 4.258055485782715, "learning_rate": 7.983133252636945e-06, "loss": 0.3858, "step": 11091 }, { "epoch": 0.32, "grad_norm": 4.469814840573924, "learning_rate": 7.98276106484951e-06, "loss": 0.4707, "step": 11092 }, { "epoch": 0.32, "grad_norm": 6.179908873864567, "learning_rate": 7.982388851401752e-06, "loss": 0.348, "step": 11093 }, { "epoch": 0.32, "grad_norm": 8.166844236298882, "learning_rate": 7.98201661229687e-06, "loss": 1.0373, "step": 11094 }, { "epoch": 0.32, "grad_norm": 4.997776967818407, "learning_rate": 7.981644347538064e-06, "loss": 0.7352, "step": 11095 }, { "epoch": 0.32, "grad_norm": 6.606333616768728, "learning_rate": 7.981272057128541e-06, "loss": 0.3183, "step": 11096 }, { "epoch": 0.32, "grad_norm": 2.709611584938806, "learning_rate": 7.9808997410715e-06, "loss": 0.4172, "step": 11097 }, { "epoch": 0.32, "grad_norm": 4.409396623954496, "learning_rate": 7.980527399370146e-06, "loss": 0.783, "step": 11098 }, { "epoch": 0.32, "grad_norm": 5.067186658719793, "learning_rate": 7.980155032027682e-06, "loss": 0.6221, "step": 11099 }, { "epoch": 0.32, "grad_norm": 4.177787534314007, "learning_rate": 7.97978263904731e-06, "loss": 0.4826, "step": 11100 }, { "epoch": 0.32, "grad_norm": 12.725803378363029, "learning_rate": 7.979410220432238e-06, "loss": 0.5604, "step": 11101 }, { "epoch": 0.32, "grad_norm": 7.220957166780591, "learning_rate": 7.979037776185664e-06, "loss": 0.3929, "step": 11102 }, { "epoch": 0.32, "grad_norm": 6.74242672103535, "learning_rate": 7.978665306310795e-06, "loss": 0.6559, "step": 11103 }, { "epoch": 0.32, "grad_norm": 6.2700296649504885, "learning_rate": 7.978292810810836e-06, "loss": 0.5952, "step": 11104 }, { "epoch": 0.32, "grad_norm": 7.611119941681553, "learning_rate": 7.97792028968899e-06, "loss": 0.6241, "step": 11105 }, { "epoch": 0.32, "grad_norm": 6.630721446696922, "learning_rate": 7.977547742948461e-06, "loss": 0.3553, "step": 11106 }, { "epoch": 0.32, "grad_norm": 2.8180625796263157, "learning_rate": 7.977175170592458e-06, "loss": 0.2047, "step": 11107 }, { "epoch": 0.32, "grad_norm": 12.43746534179766, "learning_rate": 7.976802572624183e-06, "loss": 0.5515, "step": 11108 }, { "epoch": 0.32, "grad_norm": 6.264080056907845, "learning_rate": 7.97642994904684e-06, "loss": 0.849, "step": 11109 }, { "epoch": 0.32, "grad_norm": 7.749220655079348, "learning_rate": 7.97605729986364e-06, "loss": 0.8122, "step": 11110 }, { "epoch": 0.32, "grad_norm": 5.150773764323797, "learning_rate": 7.975684625077782e-06, "loss": 0.8895, "step": 11111 }, { "epoch": 0.32, "grad_norm": 5.682098338762897, "learning_rate": 7.97531192469248e-06, "loss": 0.5052, "step": 11112 }, { "epoch": 0.32, "grad_norm": 4.601008798028285, "learning_rate": 7.974939198710931e-06, "loss": 0.4627, "step": 11113 }, { "epoch": 0.32, "grad_norm": 3.887059427790913, "learning_rate": 7.974566447136348e-06, "loss": 0.6162, "step": 11114 }, { "epoch": 0.32, "grad_norm": 3.25690205977316, "learning_rate": 7.974193669971938e-06, "loss": 0.2291, "step": 11115 }, { "epoch": 0.32, "grad_norm": 8.80914101941148, "learning_rate": 7.973820867220903e-06, "loss": 1.1246, "step": 11116 }, { "epoch": 0.32, "grad_norm": 6.184480951682497, "learning_rate": 7.973448038886456e-06, "loss": 0.4432, "step": 11117 }, { "epoch": 0.32, "grad_norm": 5.950140457739413, "learning_rate": 7.973075184971799e-06, "loss": 0.4222, "step": 11118 }, { "epoch": 0.32, "grad_norm": 3.62369579662828, "learning_rate": 7.972702305480145e-06, "loss": 0.6465, "step": 11119 }, { "epoch": 0.32, "grad_norm": 4.882257854240679, "learning_rate": 7.972329400414696e-06, "loss": 0.597, "step": 11120 }, { "epoch": 0.32, "grad_norm": 5.052131301285113, "learning_rate": 7.971956469778666e-06, "loss": 0.1733, "step": 11121 }, { "epoch": 0.32, "grad_norm": 7.353430937219844, "learning_rate": 7.971583513575258e-06, "loss": 0.6035, "step": 11122 }, { "epoch": 0.32, "grad_norm": 8.47624969608434, "learning_rate": 7.971210531807684e-06, "loss": 0.5206, "step": 11123 }, { "epoch": 0.32, "grad_norm": 7.162975898630767, "learning_rate": 7.970837524479151e-06, "loss": 1.0013, "step": 11124 }, { "epoch": 0.32, "grad_norm": 4.796687467307735, "learning_rate": 7.970464491592872e-06, "loss": 0.3367, "step": 11125 }, { "epoch": 0.32, "grad_norm": 6.791191829472288, "learning_rate": 7.970091433152048e-06, "loss": 0.5501, "step": 11126 }, { "epoch": 0.32, "grad_norm": 6.21557060513254, "learning_rate": 7.969718349159897e-06, "loss": 0.3031, "step": 11127 }, { "epoch": 0.32, "grad_norm": 8.161406542222764, "learning_rate": 7.969345239619622e-06, "loss": 0.7504, "step": 11128 }, { "epoch": 0.32, "grad_norm": 5.633838068153346, "learning_rate": 7.968972104534434e-06, "loss": 0.3428, "step": 11129 }, { "epoch": 0.32, "grad_norm": 5.5173832411656, "learning_rate": 7.968598943907548e-06, "loss": 0.3227, "step": 11130 }, { "epoch": 0.32, "grad_norm": 6.552388421148921, "learning_rate": 7.968225757742169e-06, "loss": 0.7067, "step": 11131 }, { "epoch": 0.32, "grad_norm": 4.374562023583401, "learning_rate": 7.96785254604151e-06, "loss": 0.4149, "step": 11132 }, { "epoch": 0.32, "grad_norm": 3.231768566851112, "learning_rate": 7.96747930880878e-06, "loss": 0.176, "step": 11133 }, { "epoch": 0.32, "grad_norm": 6.662135678577417, "learning_rate": 7.967106046047193e-06, "loss": 0.2204, "step": 11134 }, { "epoch": 0.32, "grad_norm": 4.703887041271596, "learning_rate": 7.966732757759956e-06, "loss": 0.3053, "step": 11135 }, { "epoch": 0.32, "grad_norm": 4.246837084153935, "learning_rate": 7.966359443950283e-06, "loss": 0.4797, "step": 11136 }, { "epoch": 0.32, "grad_norm": 5.142947521626589, "learning_rate": 7.965986104621384e-06, "loss": 0.3085, "step": 11137 }, { "epoch": 0.32, "grad_norm": 6.729275784232746, "learning_rate": 7.965612739776474e-06, "loss": 1.1984, "step": 11138 }, { "epoch": 0.32, "grad_norm": 6.880417198073788, "learning_rate": 7.965239349418761e-06, "loss": 0.6322, "step": 11139 }, { "epoch": 0.32, "grad_norm": 5.595297785752675, "learning_rate": 7.96486593355146e-06, "loss": 0.6674, "step": 11140 }, { "epoch": 0.32, "grad_norm": 5.021852993872424, "learning_rate": 7.96449249217778e-06, "loss": 0.5268, "step": 11141 }, { "epoch": 0.32, "grad_norm": 2.774975653704818, "learning_rate": 7.96411902530094e-06, "loss": 0.1125, "step": 11142 }, { "epoch": 0.32, "grad_norm": 6.500137327650879, "learning_rate": 7.963745532924146e-06, "loss": 0.4268, "step": 11143 }, { "epoch": 0.32, "grad_norm": 5.547252588449512, "learning_rate": 7.963372015050616e-06, "loss": 0.7563, "step": 11144 }, { "epoch": 0.32, "grad_norm": 2.9904000942123057, "learning_rate": 7.96299847168356e-06, "loss": 0.2753, "step": 11145 }, { "epoch": 0.32, "grad_norm": 6.437681102982938, "learning_rate": 7.962624902826192e-06, "loss": 0.4082, "step": 11146 }, { "epoch": 0.32, "grad_norm": 3.813245669126443, "learning_rate": 7.96225130848173e-06, "loss": 0.4709, "step": 11147 }, { "epoch": 0.32, "grad_norm": 4.42974245282892, "learning_rate": 7.961877688653381e-06, "loss": 0.4383, "step": 11148 }, { "epoch": 0.32, "grad_norm": 8.04537369139355, "learning_rate": 7.961504043344366e-06, "loss": 1.0073, "step": 11149 }, { "epoch": 0.32, "grad_norm": 4.286295236085717, "learning_rate": 7.961130372557895e-06, "loss": 0.2918, "step": 11150 }, { "epoch": 0.32, "grad_norm": 3.8033641730575773, "learning_rate": 7.960756676297183e-06, "loss": 0.577, "step": 11151 }, { "epoch": 0.32, "grad_norm": 13.604166306422153, "learning_rate": 7.960382954565447e-06, "loss": 0.6336, "step": 11152 }, { "epoch": 0.32, "grad_norm": 7.095417515713225, "learning_rate": 7.960009207365901e-06, "loss": 0.6895, "step": 11153 }, { "epoch": 0.32, "grad_norm": 8.918418923042555, "learning_rate": 7.95963543470176e-06, "loss": 0.8328, "step": 11154 }, { "epoch": 0.32, "grad_norm": 6.818436721891928, "learning_rate": 7.95926163657624e-06, "loss": 0.5993, "step": 11155 }, { "epoch": 0.32, "grad_norm": 8.761935730967277, "learning_rate": 7.958887812992555e-06, "loss": 0.5837, "step": 11156 }, { "epoch": 0.32, "grad_norm": 7.556944643058915, "learning_rate": 7.958513963953925e-06, "loss": 0.3874, "step": 11157 }, { "epoch": 0.32, "grad_norm": 6.732242255364264, "learning_rate": 7.958140089463563e-06, "loss": 0.4975, "step": 11158 }, { "epoch": 0.32, "grad_norm": 6.768739472413616, "learning_rate": 7.957766189524685e-06, "loss": 0.7552, "step": 11159 }, { "epoch": 0.32, "grad_norm": 4.262202528252934, "learning_rate": 7.957392264140509e-06, "loss": 0.7735, "step": 11160 }, { "epoch": 0.32, "grad_norm": 7.65918143069437, "learning_rate": 7.957018313314251e-06, "loss": 0.298, "step": 11161 }, { "epoch": 0.32, "grad_norm": 3.5986473217779076, "learning_rate": 7.95664433704913e-06, "loss": 0.5405, "step": 11162 }, { "epoch": 0.32, "grad_norm": 9.265858433373792, "learning_rate": 7.95627033534836e-06, "loss": 0.7198, "step": 11163 }, { "epoch": 0.32, "grad_norm": 7.573454497144927, "learning_rate": 7.955896308215161e-06, "loss": 0.9001, "step": 11164 }, { "epoch": 0.32, "grad_norm": 2.095039369217769, "learning_rate": 7.95552225565275e-06, "loss": 0.1271, "step": 11165 }, { "epoch": 0.32, "grad_norm": 4.497596602237345, "learning_rate": 7.955148177664345e-06, "loss": 0.411, "step": 11166 }, { "epoch": 0.32, "grad_norm": 14.490450114860131, "learning_rate": 7.954774074253164e-06, "loss": 0.6508, "step": 11167 }, { "epoch": 0.32, "grad_norm": 5.463829595387046, "learning_rate": 7.954399945422426e-06, "loss": 0.4191, "step": 11168 }, { "epoch": 0.32, "grad_norm": 6.27212651332429, "learning_rate": 7.954025791175348e-06, "loss": 0.4695, "step": 11169 }, { "epoch": 0.32, "grad_norm": 6.094041210943506, "learning_rate": 7.95365161151515e-06, "loss": 0.1825, "step": 11170 }, { "epoch": 0.32, "grad_norm": 6.098667097644321, "learning_rate": 7.953277406445052e-06, "loss": 0.4189, "step": 11171 }, { "epoch": 0.32, "grad_norm": 7.867575400069659, "learning_rate": 7.952903175968272e-06, "loss": 0.5323, "step": 11172 }, { "epoch": 0.32, "grad_norm": 9.874099110903897, "learning_rate": 7.952528920088027e-06, "loss": 0.5677, "step": 11173 }, { "epoch": 0.32, "grad_norm": 4.116531319780675, "learning_rate": 7.952154638807543e-06, "loss": 0.391, "step": 11174 }, { "epoch": 0.32, "grad_norm": 6.054682302165107, "learning_rate": 7.951780332130034e-06, "loss": 0.5967, "step": 11175 }, { "epoch": 0.32, "grad_norm": 9.83520931732893, "learning_rate": 7.951406000058723e-06, "loss": 0.6164, "step": 11176 }, { "epoch": 0.32, "grad_norm": 4.573085028721462, "learning_rate": 7.95103164259683e-06, "loss": 0.5405, "step": 11177 }, { "epoch": 0.32, "grad_norm": 6.890735651282593, "learning_rate": 7.950657259747574e-06, "loss": 0.359, "step": 11178 }, { "epoch": 0.32, "grad_norm": 7.983052362529523, "learning_rate": 7.950282851514175e-06, "loss": 0.7844, "step": 11179 }, { "epoch": 0.32, "grad_norm": 4.838430473875904, "learning_rate": 7.94990841789986e-06, "loss": 0.4785, "step": 11180 }, { "epoch": 0.32, "grad_norm": 4.298689469523881, "learning_rate": 7.949533958907842e-06, "loss": 0.4911, "step": 11181 }, { "epoch": 0.32, "grad_norm": 4.9263512979588535, "learning_rate": 7.94915947454135e-06, "loss": 0.3224, "step": 11182 }, { "epoch": 0.32, "grad_norm": 5.947130845019367, "learning_rate": 7.948784964803599e-06, "loss": 0.2985, "step": 11183 }, { "epoch": 0.32, "grad_norm": 6.597536852734766, "learning_rate": 7.948410429697818e-06, "loss": 0.644, "step": 11184 }, { "epoch": 0.32, "grad_norm": 5.243307298198327, "learning_rate": 7.948035869227221e-06, "loss": 0.4136, "step": 11185 }, { "epoch": 0.32, "grad_norm": 3.837324649147067, "learning_rate": 7.947661283395035e-06, "loss": 0.5646, "step": 11186 }, { "epoch": 0.32, "grad_norm": 6.64742758193207, "learning_rate": 7.947286672204484e-06, "loss": 0.4902, "step": 11187 }, { "epoch": 0.32, "grad_norm": 6.46370695785506, "learning_rate": 7.946912035658787e-06, "loss": 0.5301, "step": 11188 }, { "epoch": 0.32, "grad_norm": 4.932778720782347, "learning_rate": 7.946537373761171e-06, "loss": 0.4347, "step": 11189 }, { "epoch": 0.32, "grad_norm": 3.5333847075152303, "learning_rate": 7.946162686514855e-06, "loss": 0.5118, "step": 11190 }, { "epoch": 0.32, "grad_norm": 6.050476637893917, "learning_rate": 7.945787973923064e-06, "loss": 0.5906, "step": 11191 }, { "epoch": 0.32, "grad_norm": 8.672122133874733, "learning_rate": 7.945413235989021e-06, "loss": 0.6476, "step": 11192 }, { "epoch": 0.32, "grad_norm": 6.649888773396691, "learning_rate": 7.945038472715952e-06, "loss": 0.6144, "step": 11193 }, { "epoch": 0.32, "grad_norm": 9.423357086268652, "learning_rate": 7.94466368410708e-06, "loss": 0.58, "step": 11194 }, { "epoch": 0.32, "grad_norm": 7.86330417226074, "learning_rate": 7.944288870165627e-06, "loss": 0.6501, "step": 11195 }, { "epoch": 0.32, "grad_norm": 5.887940683090702, "learning_rate": 7.94391403089482e-06, "loss": 0.4549, "step": 11196 }, { "epoch": 0.32, "grad_norm": 7.224031836244172, "learning_rate": 7.943539166297885e-06, "loss": 0.7864, "step": 11197 }, { "epoch": 0.32, "grad_norm": 9.31244930791179, "learning_rate": 7.943164276378042e-06, "loss": 0.6705, "step": 11198 }, { "epoch": 0.32, "grad_norm": 5.905578131027988, "learning_rate": 7.94278936113852e-06, "loss": 0.4122, "step": 11199 }, { "epoch": 0.32, "grad_norm": 4.274852455671567, "learning_rate": 7.942414420582545e-06, "loss": 0.4874, "step": 11200 }, { "epoch": 0.32, "grad_norm": 6.230255065888968, "learning_rate": 7.94203945471334e-06, "loss": 0.4845, "step": 11201 }, { "epoch": 0.32, "grad_norm": 5.740511196713326, "learning_rate": 7.941664463534132e-06, "loss": 0.5351, "step": 11202 }, { "epoch": 0.32, "grad_norm": 5.7448419987769785, "learning_rate": 7.941289447048146e-06, "loss": 0.6316, "step": 11203 }, { "epoch": 0.32, "grad_norm": 4.2281768874588535, "learning_rate": 7.940914405258611e-06, "loss": 0.4812, "step": 11204 }, { "epoch": 0.32, "grad_norm": 4.480533403164603, "learning_rate": 7.940539338168749e-06, "loss": 0.5717, "step": 11205 }, { "epoch": 0.32, "grad_norm": 5.892832375346398, "learning_rate": 7.94016424578179e-06, "loss": 0.3486, "step": 11206 }, { "epoch": 0.32, "grad_norm": 5.350650548153201, "learning_rate": 7.939789128100961e-06, "loss": 0.5323, "step": 11207 }, { "epoch": 0.32, "grad_norm": 7.493411349040906, "learning_rate": 7.939413985129487e-06, "loss": 0.5938, "step": 11208 }, { "epoch": 0.32, "grad_norm": 3.6997944787655936, "learning_rate": 7.939038816870597e-06, "loss": 0.1832, "step": 11209 }, { "epoch": 0.32, "grad_norm": 4.4051353451550055, "learning_rate": 7.938663623327519e-06, "loss": 0.4801, "step": 11210 }, { "epoch": 0.32, "grad_norm": 5.133725066327629, "learning_rate": 7.938288404503476e-06, "loss": 0.4843, "step": 11211 }, { "epoch": 0.32, "grad_norm": 10.20753376742385, "learning_rate": 7.937913160401702e-06, "loss": 0.5019, "step": 11212 }, { "epoch": 0.32, "grad_norm": 9.721676450275703, "learning_rate": 7.937537891025424e-06, "loss": 0.8214, "step": 11213 }, { "epoch": 0.32, "grad_norm": 8.35608073414778, "learning_rate": 7.937162596377867e-06, "loss": 0.5181, "step": 11214 }, { "epoch": 0.32, "grad_norm": 4.585980208813783, "learning_rate": 7.936787276462263e-06, "loss": 0.4195, "step": 11215 }, { "epoch": 0.32, "grad_norm": 4.634606462744617, "learning_rate": 7.93641193128184e-06, "loss": 0.693, "step": 11216 }, { "epoch": 0.32, "grad_norm": 4.298689968691616, "learning_rate": 7.936036560839827e-06, "loss": 0.4282, "step": 11217 }, { "epoch": 0.32, "grad_norm": 5.1929031177388785, "learning_rate": 7.935661165139452e-06, "loss": 0.5065, "step": 11218 }, { "epoch": 0.32, "grad_norm": 6.063560943581611, "learning_rate": 7.935285744183944e-06, "loss": 0.6501, "step": 11219 }, { "epoch": 0.32, "grad_norm": 5.499331303607259, "learning_rate": 7.934910297976536e-06, "loss": 0.5843, "step": 11220 }, { "epoch": 0.32, "grad_norm": 4.391275296636527, "learning_rate": 7.934534826520455e-06, "loss": 0.5518, "step": 11221 }, { "epoch": 0.32, "grad_norm": 8.145950010851077, "learning_rate": 7.934159329818933e-06, "loss": 0.472, "step": 11222 }, { "epoch": 0.32, "grad_norm": 8.1986610110223, "learning_rate": 7.9337838078752e-06, "loss": 0.4131, "step": 11223 }, { "epoch": 0.32, "grad_norm": 3.1889491433496144, "learning_rate": 7.933408260692485e-06, "loss": 0.2877, "step": 11224 }, { "epoch": 0.32, "grad_norm": 7.429947424352187, "learning_rate": 7.933032688274022e-06, "loss": 0.4917, "step": 11225 }, { "epoch": 0.32, "grad_norm": 12.569263212935931, "learning_rate": 7.932657090623039e-06, "loss": 0.798, "step": 11226 }, { "epoch": 0.32, "grad_norm": 3.1869090598351697, "learning_rate": 7.932281467742766e-06, "loss": 0.4787, "step": 11227 }, { "epoch": 0.32, "grad_norm": 3.565310992122642, "learning_rate": 7.931905819636439e-06, "loss": 0.5932, "step": 11228 }, { "epoch": 0.32, "grad_norm": 6.8236707392007965, "learning_rate": 7.931530146307285e-06, "loss": 0.7085, "step": 11229 }, { "epoch": 0.32, "grad_norm": 4.392055292083731, "learning_rate": 7.93115444775854e-06, "loss": 0.6377, "step": 11230 }, { "epoch": 0.32, "grad_norm": 7.349218661759527, "learning_rate": 7.930778723993434e-06, "loss": 0.3741, "step": 11231 }, { "epoch": 0.32, "grad_norm": 3.9500975041487396, "learning_rate": 7.930402975015199e-06, "loss": 0.3206, "step": 11232 }, { "epoch": 0.32, "grad_norm": 3.6716144225815883, "learning_rate": 7.930027200827069e-06, "loss": 0.248, "step": 11233 }, { "epoch": 0.32, "grad_norm": 6.021270203717852, "learning_rate": 7.929651401432274e-06, "loss": 0.3646, "step": 11234 }, { "epoch": 0.32, "grad_norm": 9.755016809074588, "learning_rate": 7.92927557683405e-06, "loss": 0.8804, "step": 11235 }, { "epoch": 0.32, "grad_norm": 5.95231931345678, "learning_rate": 7.928899727035628e-06, "loss": 0.3948, "step": 11236 }, { "epoch": 0.32, "grad_norm": 6.852942159277526, "learning_rate": 7.928523852040242e-06, "loss": 0.5314, "step": 11237 }, { "epoch": 0.32, "grad_norm": 2.8498183468599874, "learning_rate": 7.928147951851126e-06, "loss": 0.4831, "step": 11238 }, { "epoch": 0.32, "grad_norm": 5.137243857576695, "learning_rate": 7.927772026471515e-06, "loss": 0.5946, "step": 11239 }, { "epoch": 0.32, "grad_norm": 9.391612935582435, "learning_rate": 7.92739607590464e-06, "loss": 0.551, "step": 11240 }, { "epoch": 0.32, "grad_norm": 6.079152312162663, "learning_rate": 7.92702010015374e-06, "loss": 0.7983, "step": 11241 }, { "epoch": 0.32, "grad_norm": 7.082951329720159, "learning_rate": 7.926644099222043e-06, "loss": 0.7536, "step": 11242 }, { "epoch": 0.32, "grad_norm": 7.879037881488275, "learning_rate": 7.926268073112789e-06, "loss": 0.3967, "step": 11243 }, { "epoch": 0.32, "grad_norm": 6.368263574953773, "learning_rate": 7.925892021829211e-06, "loss": 0.5763, "step": 11244 }, { "epoch": 0.32, "grad_norm": 3.0890922934716696, "learning_rate": 7.925515945374543e-06, "loss": 0.4469, "step": 11245 }, { "epoch": 0.32, "grad_norm": 2.879448331110657, "learning_rate": 7.925139843752022e-06, "loss": 0.3118, "step": 11246 }, { "epoch": 0.32, "grad_norm": 7.144455297701462, "learning_rate": 7.924763716964883e-06, "loss": 0.4446, "step": 11247 }, { "epoch": 0.32, "grad_norm": 3.911498396761845, "learning_rate": 7.924387565016364e-06, "loss": 0.3802, "step": 11248 }, { "epoch": 0.32, "grad_norm": 3.613978686682305, "learning_rate": 7.924011387909696e-06, "loss": 0.1163, "step": 11249 }, { "epoch": 0.32, "grad_norm": 7.078148915500953, "learning_rate": 7.92363518564812e-06, "loss": 0.6069, "step": 11250 }, { "epoch": 0.32, "grad_norm": 7.161436508607842, "learning_rate": 7.923258958234869e-06, "loss": 0.9007, "step": 11251 }, { "epoch": 0.32, "grad_norm": 8.92367519515628, "learning_rate": 7.922882705673183e-06, "loss": 0.7345, "step": 11252 }, { "epoch": 0.32, "grad_norm": 5.144697854277771, "learning_rate": 7.922506427966294e-06, "loss": 0.6842, "step": 11253 }, { "epoch": 0.32, "grad_norm": 5.463175259193017, "learning_rate": 7.922130125117444e-06, "loss": 0.481, "step": 11254 }, { "epoch": 0.32, "grad_norm": 5.419752689504667, "learning_rate": 7.921753797129868e-06, "loss": 0.3586, "step": 11255 }, { "epoch": 0.32, "grad_norm": 7.199947160950794, "learning_rate": 7.921377444006804e-06, "loss": 0.7441, "step": 11256 }, { "epoch": 0.32, "grad_norm": 3.929405217431477, "learning_rate": 7.92100106575149e-06, "loss": 0.631, "step": 11257 }, { "epoch": 0.32, "grad_norm": 5.102634147962728, "learning_rate": 7.920624662367162e-06, "loss": 0.5614, "step": 11258 }, { "epoch": 0.32, "grad_norm": 5.727935876133088, "learning_rate": 7.920248233857061e-06, "loss": 0.7827, "step": 11259 }, { "epoch": 0.32, "grad_norm": 8.32872382152874, "learning_rate": 7.919871780224424e-06, "loss": 0.7557, "step": 11260 }, { "epoch": 0.32, "grad_norm": 4.514048839164178, "learning_rate": 7.919495301472488e-06, "loss": 0.1328, "step": 11261 }, { "epoch": 0.32, "grad_norm": 7.643004002197847, "learning_rate": 7.919118797604495e-06, "loss": 0.4082, "step": 11262 }, { "epoch": 0.32, "grad_norm": 5.549884205546707, "learning_rate": 7.918742268623681e-06, "loss": 0.6767, "step": 11263 }, { "epoch": 0.32, "grad_norm": 5.670741701004743, "learning_rate": 7.918365714533289e-06, "loss": 1.0145, "step": 11264 }, { "epoch": 0.32, "grad_norm": 6.440279462449077, "learning_rate": 7.917989135336554e-06, "loss": 0.5862, "step": 11265 }, { "epoch": 0.32, "grad_norm": 4.505087579273864, "learning_rate": 7.917612531036718e-06, "loss": 0.3379, "step": 11266 }, { "epoch": 0.32, "grad_norm": 6.480874571415397, "learning_rate": 7.917235901637021e-06, "loss": 0.7617, "step": 11267 }, { "epoch": 0.32, "grad_norm": 7.130595034879812, "learning_rate": 7.916859247140702e-06, "loss": 0.6895, "step": 11268 }, { "epoch": 0.32, "grad_norm": 5.199408409505632, "learning_rate": 7.916482567551003e-06, "loss": 0.4033, "step": 11269 }, { "epoch": 0.32, "grad_norm": 6.861741650480277, "learning_rate": 7.916105862871163e-06, "loss": 0.4452, "step": 11270 }, { "epoch": 0.32, "grad_norm": 5.259751029123593, "learning_rate": 7.915729133104425e-06, "loss": 0.1705, "step": 11271 }, { "epoch": 0.32, "grad_norm": 7.791737731228729, "learning_rate": 7.915352378254025e-06, "loss": 0.7696, "step": 11272 }, { "epoch": 0.32, "grad_norm": 9.471190890202884, "learning_rate": 7.91497559832321e-06, "loss": 0.6455, "step": 11273 }, { "epoch": 0.32, "grad_norm": 4.41680215531787, "learning_rate": 7.914598793315217e-06, "loss": 0.4032, "step": 11274 }, { "epoch": 0.32, "grad_norm": 4.931599290126898, "learning_rate": 7.914221963233291e-06, "loss": 0.3595, "step": 11275 }, { "epoch": 0.32, "grad_norm": 7.263747040251216, "learning_rate": 7.913845108080672e-06, "loss": 0.5655, "step": 11276 }, { "epoch": 0.32, "grad_norm": 5.027139109705083, "learning_rate": 7.913468227860601e-06, "loss": 0.4183, "step": 11277 }, { "epoch": 0.32, "grad_norm": 2.721285810326952, "learning_rate": 7.913091322576322e-06, "loss": 0.3656, "step": 11278 }, { "epoch": 0.32, "grad_norm": 3.2490986160922994, "learning_rate": 7.91271439223108e-06, "loss": 0.3056, "step": 11279 }, { "epoch": 0.32, "grad_norm": 6.073523184150053, "learning_rate": 7.91233743682811e-06, "loss": 0.5041, "step": 11280 }, { "epoch": 0.32, "grad_norm": 6.644112494727218, "learning_rate": 7.911960456370663e-06, "loss": 0.762, "step": 11281 }, { "epoch": 0.32, "grad_norm": 12.542546305853236, "learning_rate": 7.911583450861978e-06, "loss": 1.0054, "step": 11282 }, { "epoch": 0.32, "grad_norm": 7.185714931238823, "learning_rate": 7.911206420305298e-06, "loss": 0.4485, "step": 11283 }, { "epoch": 0.32, "grad_norm": 6.051217188439853, "learning_rate": 7.910829364703869e-06, "loss": 0.5684, "step": 11284 }, { "epoch": 0.32, "grad_norm": 4.935161205661852, "learning_rate": 7.910452284060932e-06, "loss": 0.332, "step": 11285 }, { "epoch": 0.32, "grad_norm": 6.6040696659445475, "learning_rate": 7.910075178379732e-06, "loss": 0.8401, "step": 11286 }, { "epoch": 0.32, "grad_norm": 6.977533082846021, "learning_rate": 7.909698047663516e-06, "loss": 0.5681, "step": 11287 }, { "epoch": 0.32, "grad_norm": 5.719525102294986, "learning_rate": 7.909320891915524e-06, "loss": 0.4805, "step": 11288 }, { "epoch": 0.32, "grad_norm": 4.471056256361173, "learning_rate": 7.908943711139004e-06, "loss": 0.3974, "step": 11289 }, { "epoch": 0.32, "grad_norm": 8.626661389431328, "learning_rate": 7.908566505337198e-06, "loss": 0.7044, "step": 11290 }, { "epoch": 0.32, "grad_norm": 3.9249249943169353, "learning_rate": 7.908189274513354e-06, "loss": 0.3093, "step": 11291 }, { "epoch": 0.32, "grad_norm": 4.094974713183383, "learning_rate": 7.907812018670714e-06, "loss": 0.3916, "step": 11292 }, { "epoch": 0.32, "grad_norm": 4.669224449674811, "learning_rate": 7.907434737812526e-06, "loss": 0.3246, "step": 11293 }, { "epoch": 0.32, "grad_norm": 3.555283850646386, "learning_rate": 7.907057431942034e-06, "loss": 0.2192, "step": 11294 }, { "epoch": 0.32, "grad_norm": 7.542414513983213, "learning_rate": 7.906680101062485e-06, "loss": 0.5861, "step": 11295 }, { "epoch": 0.32, "grad_norm": 7.525530467552051, "learning_rate": 7.906302745177125e-06, "loss": 0.4412, "step": 11296 }, { "epoch": 0.32, "grad_norm": 3.9260467591340817, "learning_rate": 7.905925364289203e-06, "loss": 0.468, "step": 11297 }, { "epoch": 0.32, "grad_norm": 11.008778580452741, "learning_rate": 7.90554795840196e-06, "loss": 0.4971, "step": 11298 }, { "epoch": 0.32, "grad_norm": 3.297825861484191, "learning_rate": 7.905170527518646e-06, "loss": 0.1792, "step": 11299 }, { "epoch": 0.32, "grad_norm": 3.4353628277343153, "learning_rate": 7.90479307164251e-06, "loss": 0.3809, "step": 11300 }, { "epoch": 0.32, "grad_norm": 5.86815748057232, "learning_rate": 7.904415590776795e-06, "loss": 0.9888, "step": 11301 }, { "epoch": 0.32, "grad_norm": 4.613712455371445, "learning_rate": 7.904038084924748e-06, "loss": 0.5889, "step": 11302 }, { "epoch": 0.32, "grad_norm": 3.377497243647153, "learning_rate": 7.903660554089622e-06, "loss": 0.274, "step": 11303 }, { "epoch": 0.32, "grad_norm": 7.337960936123416, "learning_rate": 7.903282998274661e-06, "loss": 0.6377, "step": 11304 }, { "epoch": 0.32, "grad_norm": 7.3381126356008535, "learning_rate": 7.902905417483113e-06, "loss": 0.7002, "step": 11305 }, { "epoch": 0.32, "grad_norm": 6.422086614755889, "learning_rate": 7.902527811718228e-06, "loss": 0.6948, "step": 11306 }, { "epoch": 0.32, "grad_norm": 3.9774063803770034, "learning_rate": 7.902150180983255e-06, "loss": 0.4383, "step": 11307 }, { "epoch": 0.32, "grad_norm": 7.053796857711688, "learning_rate": 7.901772525281439e-06, "loss": 0.6018, "step": 11308 }, { "epoch": 0.32, "grad_norm": 4.596536790139316, "learning_rate": 7.901394844616032e-06, "loss": 0.6172, "step": 11309 }, { "epoch": 0.32, "grad_norm": 5.476016987447534, "learning_rate": 7.901017138990281e-06, "loss": 0.6358, "step": 11310 }, { "epoch": 0.32, "grad_norm": 6.8757690693110565, "learning_rate": 7.900639408407439e-06, "loss": 0.5515, "step": 11311 }, { "epoch": 0.32, "grad_norm": 5.939670206284506, "learning_rate": 7.900261652870754e-06, "loss": 0.5539, "step": 11312 }, { "epoch": 0.32, "grad_norm": 3.920111666324786, "learning_rate": 7.899883872383472e-06, "loss": 0.4653, "step": 11313 }, { "epoch": 0.32, "grad_norm": 4.78646491392373, "learning_rate": 7.899506066948848e-06, "loss": 0.827, "step": 11314 }, { "epoch": 0.32, "grad_norm": 5.739382290881732, "learning_rate": 7.899128236570131e-06, "loss": 0.5286, "step": 11315 }, { "epoch": 0.32, "grad_norm": 5.791631831148983, "learning_rate": 7.898750381250568e-06, "loss": 1.062, "step": 11316 }, { "epoch": 0.32, "grad_norm": 4.254948287855831, "learning_rate": 7.898372500993414e-06, "loss": 0.498, "step": 11317 }, { "epoch": 0.32, "grad_norm": 5.521722119842885, "learning_rate": 7.89799459580192e-06, "loss": 0.6881, "step": 11318 }, { "epoch": 0.32, "grad_norm": 8.847889339248232, "learning_rate": 7.897616665679332e-06, "loss": 0.4736, "step": 11319 }, { "epoch": 0.32, "grad_norm": 5.273828743443736, "learning_rate": 7.897238710628906e-06, "loss": 0.3924, "step": 11320 }, { "epoch": 0.32, "grad_norm": 5.54110720924847, "learning_rate": 7.896860730653893e-06, "loss": 0.8453, "step": 11321 }, { "epoch": 0.32, "grad_norm": 7.428772301368417, "learning_rate": 7.896482725757544e-06, "loss": 0.7352, "step": 11322 }, { "epoch": 0.32, "grad_norm": 5.021076816443033, "learning_rate": 7.896104695943108e-06, "loss": 0.7091, "step": 11323 }, { "epoch": 0.32, "grad_norm": 3.2468181586774425, "learning_rate": 7.895726641213842e-06, "loss": 0.256, "step": 11324 }, { "epoch": 0.32, "grad_norm": 6.18864619110023, "learning_rate": 7.895348561572997e-06, "loss": 0.7826, "step": 11325 }, { "epoch": 0.32, "grad_norm": 6.7442559421025425, "learning_rate": 7.894970457023822e-06, "loss": 0.4262, "step": 11326 }, { "epoch": 0.32, "grad_norm": 2.9557076115879553, "learning_rate": 7.894592327569574e-06, "loss": 0.289, "step": 11327 }, { "epoch": 0.32, "grad_norm": 11.538952330397189, "learning_rate": 7.894214173213506e-06, "loss": 0.6256, "step": 11328 }, { "epoch": 0.32, "grad_norm": 5.060350409124111, "learning_rate": 7.893835993958869e-06, "loss": 0.7378, "step": 11329 }, { "epoch": 0.32, "grad_norm": 4.6660219610018725, "learning_rate": 7.893457789808916e-06, "loss": 0.2901, "step": 11330 }, { "epoch": 0.32, "grad_norm": 5.890450553435887, "learning_rate": 7.893079560766904e-06, "loss": 0.4668, "step": 11331 }, { "epoch": 0.32, "grad_norm": 4.92632071122221, "learning_rate": 7.892701306836083e-06, "loss": 0.2331, "step": 11332 }, { "epoch": 0.32, "grad_norm": 9.81766602416834, "learning_rate": 7.89232302801971e-06, "loss": 0.5878, "step": 11333 }, { "epoch": 0.32, "grad_norm": 2.3912185605261094, "learning_rate": 7.891944724321036e-06, "loss": 0.2362, "step": 11334 }, { "epoch": 0.32, "grad_norm": 5.388513743177482, "learning_rate": 7.89156639574332e-06, "loss": 0.2233, "step": 11335 }, { "epoch": 0.32, "grad_norm": 6.8643181787740755, "learning_rate": 7.891188042289813e-06, "loss": 0.7849, "step": 11336 }, { "epoch": 0.32, "grad_norm": 7.451120166924123, "learning_rate": 7.890809663963773e-06, "loss": 0.6436, "step": 11337 }, { "epoch": 0.32, "grad_norm": 6.328182191943525, "learning_rate": 7.89043126076845e-06, "loss": 0.4312, "step": 11338 }, { "epoch": 0.32, "grad_norm": 11.445544514566544, "learning_rate": 7.890052832707106e-06, "loss": 0.5624, "step": 11339 }, { "epoch": 0.32, "grad_norm": 8.176152176696805, "learning_rate": 7.889674379782993e-06, "loss": 0.7278, "step": 11340 }, { "epoch": 0.32, "grad_norm": 4.881691765912253, "learning_rate": 7.889295901999365e-06, "loss": 0.5286, "step": 11341 }, { "epoch": 0.32, "grad_norm": 6.547814440639785, "learning_rate": 7.888917399359482e-06, "loss": 0.322, "step": 11342 }, { "epoch": 0.32, "grad_norm": 7.007974442021696, "learning_rate": 7.888538871866596e-06, "loss": 0.5339, "step": 11343 }, { "epoch": 0.32, "grad_norm": 6.425427125025516, "learning_rate": 7.888160319523968e-06, "loss": 0.659, "step": 11344 }, { "epoch": 0.32, "grad_norm": 4.330371039793837, "learning_rate": 7.887781742334853e-06, "loss": 0.4916, "step": 11345 }, { "epoch": 0.32, "grad_norm": 5.224169355740537, "learning_rate": 7.887403140302505e-06, "loss": 0.426, "step": 11346 }, { "epoch": 0.32, "grad_norm": 4.123781515546237, "learning_rate": 7.887024513430183e-06, "loss": 0.6251, "step": 11347 }, { "epoch": 0.32, "grad_norm": 9.14571401971546, "learning_rate": 7.886645861721147e-06, "loss": 0.594, "step": 11348 }, { "epoch": 0.33, "grad_norm": 2.7416805005135183, "learning_rate": 7.88626718517865e-06, "loss": 0.2426, "step": 11349 }, { "epoch": 0.33, "grad_norm": 6.16137050545643, "learning_rate": 7.885888483805954e-06, "loss": 0.7482, "step": 11350 }, { "epoch": 0.33, "grad_norm": 12.985363864082407, "learning_rate": 7.885509757606313e-06, "loss": 0.6401, "step": 11351 }, { "epoch": 0.33, "grad_norm": 5.921417158238431, "learning_rate": 7.885131006582988e-06, "loss": 0.6377, "step": 11352 }, { "epoch": 0.33, "grad_norm": 7.726109999625837, "learning_rate": 7.884752230739234e-06, "loss": 0.3569, "step": 11353 }, { "epoch": 0.33, "grad_norm": 4.935851534160955, "learning_rate": 7.884373430078314e-06, "loss": 0.6961, "step": 11354 }, { "epoch": 0.33, "grad_norm": 4.155777294841052, "learning_rate": 7.883994604603483e-06, "loss": 0.2381, "step": 11355 }, { "epoch": 0.33, "grad_norm": 5.140189062493231, "learning_rate": 7.883615754318001e-06, "loss": 0.3827, "step": 11356 }, { "epoch": 0.33, "grad_norm": 5.645821246407534, "learning_rate": 7.883236879225128e-06, "loss": 0.3079, "step": 11357 }, { "epoch": 0.33, "grad_norm": 6.248042830155576, "learning_rate": 7.882857979328126e-06, "loss": 0.9769, "step": 11358 }, { "epoch": 0.33, "grad_norm": 3.9341695869761946, "learning_rate": 7.88247905463025e-06, "loss": 0.2807, "step": 11359 }, { "epoch": 0.33, "grad_norm": 3.10942158113026, "learning_rate": 7.88210010513476e-06, "loss": 0.3786, "step": 11360 }, { "epoch": 0.33, "grad_norm": 4.764722494748811, "learning_rate": 7.88172113084492e-06, "loss": 0.5148, "step": 11361 }, { "epoch": 0.33, "grad_norm": 5.400401838680718, "learning_rate": 7.881342131763986e-06, "loss": 0.2818, "step": 11362 }, { "epoch": 0.33, "grad_norm": 10.372996125252431, "learning_rate": 7.880963107895221e-06, "loss": 0.9377, "step": 11363 }, { "epoch": 0.33, "grad_norm": 5.391109234718785, "learning_rate": 7.880584059241886e-06, "loss": 0.8825, "step": 11364 }, { "epoch": 0.33, "grad_norm": 11.121489839028943, "learning_rate": 7.880204985807241e-06, "loss": 0.3756, "step": 11365 }, { "epoch": 0.33, "grad_norm": 7.231694822881528, "learning_rate": 7.879825887594545e-06, "loss": 0.4067, "step": 11366 }, { "epoch": 0.33, "grad_norm": 7.182111362096365, "learning_rate": 7.879446764607064e-06, "loss": 0.8856, "step": 11367 }, { "epoch": 0.33, "grad_norm": 8.292398340604352, "learning_rate": 7.879067616848054e-06, "loss": 0.4588, "step": 11368 }, { "epoch": 0.33, "grad_norm": 3.6170576167011785, "learning_rate": 7.878688444320784e-06, "loss": 0.4688, "step": 11369 }, { "epoch": 0.33, "grad_norm": 9.205632177522725, "learning_rate": 7.878309247028508e-06, "loss": 0.5504, "step": 11370 }, { "epoch": 0.33, "grad_norm": 4.628046424296578, "learning_rate": 7.877930024974494e-06, "loss": 0.5567, "step": 11371 }, { "epoch": 0.33, "grad_norm": 9.926484462503947, "learning_rate": 7.877550778162002e-06, "loss": 0.5182, "step": 11372 }, { "epoch": 0.33, "grad_norm": 3.842825398463422, "learning_rate": 7.877171506594294e-06, "loss": 0.6049, "step": 11373 }, { "epoch": 0.33, "grad_norm": 3.6017036989548012, "learning_rate": 7.876792210274635e-06, "loss": 0.0919, "step": 11374 }, { "epoch": 0.33, "grad_norm": 4.645421769710778, "learning_rate": 7.876412889206287e-06, "loss": 0.3332, "step": 11375 }, { "epoch": 0.33, "grad_norm": 3.9371331967453407, "learning_rate": 7.87603354339251e-06, "loss": 0.4169, "step": 11376 }, { "epoch": 0.33, "grad_norm": 5.223923180697353, "learning_rate": 7.875654172836574e-06, "loss": 0.9875, "step": 11377 }, { "epoch": 0.33, "grad_norm": 4.282747368802679, "learning_rate": 7.875274777541737e-06, "loss": 0.5514, "step": 11378 }, { "epoch": 0.33, "grad_norm": 3.3501621691040055, "learning_rate": 7.874895357511268e-06, "loss": 0.4738, "step": 11379 }, { "epoch": 0.33, "grad_norm": 4.526952718414939, "learning_rate": 7.874515912748426e-06, "loss": 0.4733, "step": 11380 }, { "epoch": 0.33, "grad_norm": 6.509223336271946, "learning_rate": 7.874136443256476e-06, "loss": 0.6976, "step": 11381 }, { "epoch": 0.33, "grad_norm": 7.797292633185684, "learning_rate": 7.873756949038688e-06, "loss": 0.8909, "step": 11382 }, { "epoch": 0.33, "grad_norm": 7.962251743776274, "learning_rate": 7.87337743009832e-06, "loss": 0.3954, "step": 11383 }, { "epoch": 0.33, "grad_norm": 4.557013518064974, "learning_rate": 7.87299788643864e-06, "loss": 0.4306, "step": 11384 }, { "epoch": 0.33, "grad_norm": 4.311069500054332, "learning_rate": 7.872618318062914e-06, "loss": 0.3581, "step": 11385 }, { "epoch": 0.33, "grad_norm": 9.485035604632307, "learning_rate": 7.872238724974406e-06, "loss": 0.726, "step": 11386 }, { "epoch": 0.33, "grad_norm": 4.48693123509744, "learning_rate": 7.871859107176381e-06, "loss": 0.2019, "step": 11387 }, { "epoch": 0.33, "grad_norm": 3.145825459720956, "learning_rate": 7.871479464672107e-06, "loss": 0.7325, "step": 11388 }, { "epoch": 0.33, "grad_norm": 10.141646370492612, "learning_rate": 7.871099797464847e-06, "loss": 0.5309, "step": 11389 }, { "epoch": 0.33, "grad_norm": 6.299966398785312, "learning_rate": 7.87072010555787e-06, "loss": 0.4525, "step": 11390 }, { "epoch": 0.33, "grad_norm": 8.125218784614452, "learning_rate": 7.870340388954442e-06, "loss": 0.4622, "step": 11391 }, { "epoch": 0.33, "grad_norm": 4.787279802243468, "learning_rate": 7.869960647657829e-06, "loss": 0.3876, "step": 11392 }, { "epoch": 0.33, "grad_norm": 7.43474271297056, "learning_rate": 7.869580881671296e-06, "loss": 0.4276, "step": 11393 }, { "epoch": 0.33, "grad_norm": 7.3483242966280615, "learning_rate": 7.869201090998113e-06, "loss": 0.4229, "step": 11394 }, { "epoch": 0.33, "grad_norm": 4.871621477285299, "learning_rate": 7.868821275641547e-06, "loss": 0.3071, "step": 11395 }, { "epoch": 0.33, "grad_norm": 3.8028639510247215, "learning_rate": 7.868441435604865e-06, "loss": 0.4158, "step": 11396 }, { "epoch": 0.33, "grad_norm": 4.645404755991114, "learning_rate": 7.868061570891333e-06, "loss": 0.6692, "step": 11397 }, { "epoch": 0.33, "grad_norm": 4.974009938663053, "learning_rate": 7.867681681504223e-06, "loss": 0.7612, "step": 11398 }, { "epoch": 0.33, "grad_norm": 12.250076449408692, "learning_rate": 7.867301767446798e-06, "loss": 0.6715, "step": 11399 }, { "epoch": 0.33, "grad_norm": 10.911744752947019, "learning_rate": 7.86692182872233e-06, "loss": 0.7714, "step": 11400 }, { "epoch": 0.33, "grad_norm": 3.1573708121057025, "learning_rate": 7.866541865334086e-06, "loss": 0.3529, "step": 11401 }, { "epoch": 0.33, "grad_norm": 4.445406143462538, "learning_rate": 7.866161877285335e-06, "loss": 0.3447, "step": 11402 }, { "epoch": 0.33, "grad_norm": 4.117694490549237, "learning_rate": 7.865781864579346e-06, "loss": 0.409, "step": 11403 }, { "epoch": 0.33, "grad_norm": 5.118224573241683, "learning_rate": 7.86540182721939e-06, "loss": 0.4278, "step": 11404 }, { "epoch": 0.33, "grad_norm": 6.843099763541322, "learning_rate": 7.865021765208734e-06, "loss": 0.4053, "step": 11405 }, { "epoch": 0.33, "grad_norm": 8.064836799214318, "learning_rate": 7.864641678550648e-06, "loss": 0.7749, "step": 11406 }, { "epoch": 0.33, "grad_norm": 3.682577047480789, "learning_rate": 7.864261567248404e-06, "loss": 0.3052, "step": 11407 }, { "epoch": 0.33, "grad_norm": 7.768117677300117, "learning_rate": 7.86388143130527e-06, "loss": 0.4268, "step": 11408 }, { "epoch": 0.33, "grad_norm": 9.331516179799893, "learning_rate": 7.863501270724515e-06, "loss": 0.6735, "step": 11409 }, { "epoch": 0.33, "grad_norm": 2.3475985273986413, "learning_rate": 7.863121085509413e-06, "loss": 0.2334, "step": 11410 }, { "epoch": 0.33, "grad_norm": 6.255057539275008, "learning_rate": 7.862740875663231e-06, "loss": 0.4865, "step": 11411 }, { "epoch": 0.33, "grad_norm": 8.920370158738981, "learning_rate": 7.862360641189243e-06, "loss": 0.7368, "step": 11412 }, { "epoch": 0.33, "grad_norm": 5.128944693172163, "learning_rate": 7.861980382090719e-06, "loss": 0.5843, "step": 11413 }, { "epoch": 0.33, "grad_norm": 6.813893656835466, "learning_rate": 7.86160009837093e-06, "loss": 0.4061, "step": 11414 }, { "epoch": 0.33, "grad_norm": 9.682377303744943, "learning_rate": 7.861219790033146e-06, "loss": 0.8162, "step": 11415 }, { "epoch": 0.33, "grad_norm": 4.133766915027488, "learning_rate": 7.860839457080643e-06, "loss": 0.5203, "step": 11416 }, { "epoch": 0.33, "grad_norm": 2.9994994977838396, "learning_rate": 7.860459099516688e-06, "loss": 0.2124, "step": 11417 }, { "epoch": 0.33, "grad_norm": 5.520128351460821, "learning_rate": 7.860078717344557e-06, "loss": 0.4452, "step": 11418 }, { "epoch": 0.33, "grad_norm": 4.983494982867215, "learning_rate": 7.85969831056752e-06, "loss": 0.972, "step": 11419 }, { "epoch": 0.33, "grad_norm": 3.2037458236851672, "learning_rate": 7.859317879188852e-06, "loss": 0.3808, "step": 11420 }, { "epoch": 0.33, "grad_norm": 3.384399676034847, "learning_rate": 7.858937423211823e-06, "loss": 0.3615, "step": 11421 }, { "epoch": 0.33, "grad_norm": 5.4274814432846945, "learning_rate": 7.858556942639708e-06, "loss": 0.5825, "step": 11422 }, { "epoch": 0.33, "grad_norm": 6.376980268295028, "learning_rate": 7.85817643747578e-06, "loss": 0.5808, "step": 11423 }, { "epoch": 0.33, "grad_norm": 2.702294348832871, "learning_rate": 7.857795907723311e-06, "loss": 0.1678, "step": 11424 }, { "epoch": 0.33, "grad_norm": 10.777251721245353, "learning_rate": 7.857415353385576e-06, "loss": 1.3086, "step": 11425 }, { "epoch": 0.33, "grad_norm": 3.456137158998953, "learning_rate": 7.857034774465848e-06, "loss": 0.3896, "step": 11426 }, { "epoch": 0.33, "grad_norm": 5.565644393450166, "learning_rate": 7.856654170967403e-06, "loss": 0.4429, "step": 11427 }, { "epoch": 0.33, "grad_norm": 6.198870912923803, "learning_rate": 7.856273542893513e-06, "loss": 0.5475, "step": 11428 }, { "epoch": 0.33, "grad_norm": 6.3057087918379295, "learning_rate": 7.855892890247454e-06, "loss": 0.2934, "step": 11429 }, { "epoch": 0.33, "grad_norm": 3.045145062090306, "learning_rate": 7.855512213032499e-06, "loss": 0.3021, "step": 11430 }, { "epoch": 0.33, "grad_norm": 10.66923398672676, "learning_rate": 7.855131511251926e-06, "loss": 0.9258, "step": 11431 }, { "epoch": 0.33, "grad_norm": 8.901759785894393, "learning_rate": 7.854750784909006e-06, "loss": 0.7904, "step": 11432 }, { "epoch": 0.33, "grad_norm": 3.1457614553631466, "learning_rate": 7.854370034007016e-06, "loss": 0.5023, "step": 11433 }, { "epoch": 0.33, "grad_norm": 2.464386173583823, "learning_rate": 7.853989258549234e-06, "loss": 0.245, "step": 11434 }, { "epoch": 0.33, "grad_norm": 4.906471879157604, "learning_rate": 7.853608458538932e-06, "loss": 0.2329, "step": 11435 }, { "epoch": 0.33, "grad_norm": 5.416943244719858, "learning_rate": 7.85322763397939e-06, "loss": 0.907, "step": 11436 }, { "epoch": 0.33, "grad_norm": 4.051971554950992, "learning_rate": 7.852846784873881e-06, "loss": 0.3725, "step": 11437 }, { "epoch": 0.33, "grad_norm": 5.713726818477496, "learning_rate": 7.852465911225682e-06, "loss": 0.4837, "step": 11438 }, { "epoch": 0.33, "grad_norm": 4.897327059953554, "learning_rate": 7.852085013038071e-06, "loss": 0.399, "step": 11439 }, { "epoch": 0.33, "grad_norm": 10.647849611775161, "learning_rate": 7.851704090314323e-06, "loss": 0.8933, "step": 11440 }, { "epoch": 0.33, "grad_norm": 7.87182213983964, "learning_rate": 7.851323143057715e-06, "loss": 0.7883, "step": 11441 }, { "epoch": 0.33, "grad_norm": 7.431359232363605, "learning_rate": 7.850942171271525e-06, "loss": 0.5511, "step": 11442 }, { "epoch": 0.33, "grad_norm": 11.01244508554231, "learning_rate": 7.850561174959032e-06, "loss": 0.31, "step": 11443 }, { "epoch": 0.33, "grad_norm": 7.03682728291193, "learning_rate": 7.850180154123513e-06, "loss": 0.7262, "step": 11444 }, { "epoch": 0.33, "grad_norm": 4.266354872193891, "learning_rate": 7.84979910876824e-06, "loss": 0.422, "step": 11445 }, { "epoch": 0.33, "grad_norm": 5.37911135878768, "learning_rate": 7.8494180388965e-06, "loss": 0.2782, "step": 11446 }, { "epoch": 0.33, "grad_norm": 7.8860743931573225, "learning_rate": 7.849036944511568e-06, "loss": 0.6095, "step": 11447 }, { "epoch": 0.33, "grad_norm": 4.961854144137953, "learning_rate": 7.848655825616722e-06, "loss": 0.4355, "step": 11448 }, { "epoch": 0.33, "grad_norm": 7.82791064305831, "learning_rate": 7.84827468221524e-06, "loss": 0.4148, "step": 11449 }, { "epoch": 0.33, "grad_norm": 6.03890647538584, "learning_rate": 7.8478935143104e-06, "loss": 0.3684, "step": 11450 }, { "epoch": 0.33, "grad_norm": 6.788727325137486, "learning_rate": 7.847512321905487e-06, "loss": 0.8372, "step": 11451 }, { "epoch": 0.33, "grad_norm": 6.229381349196193, "learning_rate": 7.847131105003772e-06, "loss": 0.6288, "step": 11452 }, { "epoch": 0.33, "grad_norm": 8.235910739323275, "learning_rate": 7.84674986360854e-06, "loss": 0.6188, "step": 11453 }, { "epoch": 0.33, "grad_norm": 4.588481424941489, "learning_rate": 7.846368597723069e-06, "loss": 0.5816, "step": 11454 }, { "epoch": 0.33, "grad_norm": 6.372689146195704, "learning_rate": 7.84598730735064e-06, "loss": 0.4452, "step": 11455 }, { "epoch": 0.33, "grad_norm": 5.666609997559468, "learning_rate": 7.845605992494532e-06, "loss": 0.8386, "step": 11456 }, { "epoch": 0.33, "grad_norm": 8.057758563535973, "learning_rate": 7.845224653158027e-06, "loss": 0.7269, "step": 11457 }, { "epoch": 0.33, "grad_norm": 8.708034041766178, "learning_rate": 7.844843289344402e-06, "loss": 1.1069, "step": 11458 }, { "epoch": 0.33, "grad_norm": 6.75052294648084, "learning_rate": 7.844461901056943e-06, "loss": 0.9399, "step": 11459 }, { "epoch": 0.33, "grad_norm": 4.42268442354863, "learning_rate": 7.84408048829893e-06, "loss": 0.2591, "step": 11460 }, { "epoch": 0.33, "grad_norm": 6.459595915731204, "learning_rate": 7.843699051073639e-06, "loss": 0.8012, "step": 11461 }, { "epoch": 0.33, "grad_norm": 4.589162157461845, "learning_rate": 7.843317589384356e-06, "loss": 0.3598, "step": 11462 }, { "epoch": 0.33, "grad_norm": 3.4313315446124006, "learning_rate": 7.842936103234364e-06, "loss": 0.3825, "step": 11463 }, { "epoch": 0.33, "grad_norm": 7.746720635406142, "learning_rate": 7.842554592626941e-06, "loss": 0.5934, "step": 11464 }, { "epoch": 0.33, "grad_norm": 5.1121730317485214, "learning_rate": 7.842173057565373e-06, "loss": 0.4004, "step": 11465 }, { "epoch": 0.33, "grad_norm": 5.793410054822978, "learning_rate": 7.841791498052936e-06, "loss": 0.7963, "step": 11466 }, { "epoch": 0.33, "grad_norm": 4.231395390997638, "learning_rate": 7.841409914092919e-06, "loss": 0.348, "step": 11467 }, { "epoch": 0.33, "grad_norm": 5.1895387618780875, "learning_rate": 7.841028305688604e-06, "loss": 0.3091, "step": 11468 }, { "epoch": 0.33, "grad_norm": 5.856185411160173, "learning_rate": 7.84064667284327e-06, "loss": 0.6794, "step": 11469 }, { "epoch": 0.33, "grad_norm": 8.55385754883487, "learning_rate": 7.840265015560203e-06, "loss": 0.864, "step": 11470 }, { "epoch": 0.33, "grad_norm": 5.7838878902555395, "learning_rate": 7.839883333842686e-06, "loss": 0.9227, "step": 11471 }, { "epoch": 0.33, "grad_norm": 8.544102527681888, "learning_rate": 7.839501627694002e-06, "loss": 0.4715, "step": 11472 }, { "epoch": 0.33, "grad_norm": 4.4438113072883665, "learning_rate": 7.839119897117436e-06, "loss": 0.4142, "step": 11473 }, { "epoch": 0.33, "grad_norm": 4.37104450978762, "learning_rate": 7.83873814211627e-06, "loss": 0.1398, "step": 11474 }, { "epoch": 0.33, "grad_norm": 6.964676491809156, "learning_rate": 7.838356362693788e-06, "loss": 0.2207, "step": 11475 }, { "epoch": 0.33, "grad_norm": 10.303929784766558, "learning_rate": 7.837974558853278e-06, "loss": 0.5831, "step": 11476 }, { "epoch": 0.33, "grad_norm": 6.4393707075736915, "learning_rate": 7.83759273059802e-06, "loss": 0.2206, "step": 11477 }, { "epoch": 0.33, "grad_norm": 5.4456757652799945, "learning_rate": 7.837210877931304e-06, "loss": 0.4681, "step": 11478 }, { "epoch": 0.33, "grad_norm": 9.144638875648159, "learning_rate": 7.836829000856412e-06, "loss": 0.7089, "step": 11479 }, { "epoch": 0.33, "grad_norm": 6.5893252021176405, "learning_rate": 7.836447099376628e-06, "loss": 0.6178, "step": 11480 }, { "epoch": 0.33, "grad_norm": 5.292238552499374, "learning_rate": 7.836065173495238e-06, "loss": 0.4298, "step": 11481 }, { "epoch": 0.33, "grad_norm": 6.022395854095386, "learning_rate": 7.83568322321553e-06, "loss": 0.7078, "step": 11482 }, { "epoch": 0.33, "grad_norm": 10.96709844865853, "learning_rate": 7.835301248540787e-06, "loss": 0.5744, "step": 11483 }, { "epoch": 0.33, "grad_norm": 9.274299407986145, "learning_rate": 7.834919249474297e-06, "loss": 0.3944, "step": 11484 }, { "epoch": 0.33, "grad_norm": 7.31832572169796, "learning_rate": 7.834537226019348e-06, "loss": 0.3744, "step": 11485 }, { "epoch": 0.33, "grad_norm": 4.740219839205025, "learning_rate": 7.834155178179222e-06, "loss": 0.4753, "step": 11486 }, { "epoch": 0.33, "grad_norm": 6.630618897517134, "learning_rate": 7.833773105957207e-06, "loss": 0.5714, "step": 11487 }, { "epoch": 0.33, "grad_norm": 4.881383042714276, "learning_rate": 7.833391009356593e-06, "loss": 0.4407, "step": 11488 }, { "epoch": 0.33, "grad_norm": 4.515861399445388, "learning_rate": 7.833008888380664e-06, "loss": 0.4252, "step": 11489 }, { "epoch": 0.33, "grad_norm": 6.928694351746357, "learning_rate": 7.83262674303271e-06, "loss": 0.2047, "step": 11490 }, { "epoch": 0.33, "grad_norm": 13.236926052485515, "learning_rate": 7.832244573316015e-06, "loss": 0.5286, "step": 11491 }, { "epoch": 0.33, "grad_norm": 3.0742911847704546, "learning_rate": 7.831862379233867e-06, "loss": 0.2626, "step": 11492 }, { "epoch": 0.33, "grad_norm": 6.330446434428901, "learning_rate": 7.831480160789559e-06, "loss": 0.5177, "step": 11493 }, { "epoch": 0.33, "grad_norm": 5.475314445697741, "learning_rate": 7.831097917986375e-06, "loss": 0.7795, "step": 11494 }, { "epoch": 0.33, "grad_norm": 3.2020951639195543, "learning_rate": 7.830715650827603e-06, "loss": 0.3783, "step": 11495 }, { "epoch": 0.33, "grad_norm": 5.44200178120257, "learning_rate": 7.830333359316533e-06, "loss": 0.1854, "step": 11496 }, { "epoch": 0.33, "grad_norm": 5.868999318279956, "learning_rate": 7.829951043456453e-06, "loss": 0.2271, "step": 11497 }, { "epoch": 0.33, "grad_norm": 3.7430305566623296, "learning_rate": 7.829568703250656e-06, "loss": 0.3114, "step": 11498 }, { "epoch": 0.33, "grad_norm": 4.962389731923669, "learning_rate": 7.829186338702424e-06, "loss": 0.6069, "step": 11499 }, { "epoch": 0.33, "grad_norm": 2.031908354073302, "learning_rate": 7.82880394981505e-06, "loss": 0.1335, "step": 11500 }, { "epoch": 0.33, "grad_norm": 3.445020390797633, "learning_rate": 7.828421536591828e-06, "loss": 0.2251, "step": 11501 }, { "epoch": 0.33, "grad_norm": 8.942274398954714, "learning_rate": 7.828039099036041e-06, "loss": 0.3035, "step": 11502 }, { "epoch": 0.33, "grad_norm": 5.738449894015519, "learning_rate": 7.827656637150983e-06, "loss": 0.3305, "step": 11503 }, { "epoch": 0.33, "grad_norm": 6.527765339192474, "learning_rate": 7.827274150939942e-06, "loss": 0.5595, "step": 11504 }, { "epoch": 0.33, "grad_norm": 9.09012741347887, "learning_rate": 7.82689164040621e-06, "loss": 0.6556, "step": 11505 }, { "epoch": 0.33, "grad_norm": 5.09583670344934, "learning_rate": 7.826509105553077e-06, "loss": 0.6454, "step": 11506 }, { "epoch": 0.33, "grad_norm": 8.46632446238294, "learning_rate": 7.826126546383835e-06, "loss": 0.3528, "step": 11507 }, { "epoch": 0.33, "grad_norm": 5.303419026106681, "learning_rate": 7.825743962901774e-06, "loss": 0.3447, "step": 11508 }, { "epoch": 0.33, "grad_norm": 7.553920134869822, "learning_rate": 7.825361355110184e-06, "loss": 0.5695, "step": 11509 }, { "epoch": 0.33, "grad_norm": 12.15435635053944, "learning_rate": 7.82497872301236e-06, "loss": 0.7587, "step": 11510 }, { "epoch": 0.33, "grad_norm": 12.850470276773084, "learning_rate": 7.824596066611591e-06, "loss": 0.4071, "step": 11511 }, { "epoch": 0.33, "grad_norm": 8.558896441136563, "learning_rate": 7.82421338591117e-06, "loss": 0.3913, "step": 11512 }, { "epoch": 0.33, "grad_norm": 1.5287937588795875, "learning_rate": 7.823830680914389e-06, "loss": 0.1008, "step": 11513 }, { "epoch": 0.33, "grad_norm": 11.430773148307441, "learning_rate": 7.82344795162454e-06, "loss": 1.1953, "step": 11514 }, { "epoch": 0.33, "grad_norm": 2.4117273684571012, "learning_rate": 7.823065198044916e-06, "loss": 0.3031, "step": 11515 }, { "epoch": 0.33, "grad_norm": 5.416995532494009, "learning_rate": 7.822682420178811e-06, "loss": 0.5108, "step": 11516 }, { "epoch": 0.33, "grad_norm": 10.609880617928017, "learning_rate": 7.822299618029515e-06, "loss": 0.837, "step": 11517 }, { "epoch": 0.33, "grad_norm": 4.210519631907529, "learning_rate": 7.821916791600324e-06, "loss": 0.5001, "step": 11518 }, { "epoch": 0.33, "grad_norm": 7.308320636223522, "learning_rate": 7.82153394089453e-06, "loss": 0.9913, "step": 11519 }, { "epoch": 0.33, "grad_norm": 8.258913542391385, "learning_rate": 7.821151065915426e-06, "loss": 0.6763, "step": 11520 }, { "epoch": 0.33, "grad_norm": 7.46603039183359, "learning_rate": 7.820768166666308e-06, "loss": 0.6668, "step": 11521 }, { "epoch": 0.33, "grad_norm": 3.2926039125964826, "learning_rate": 7.820385243150468e-06, "loss": 0.4283, "step": 11522 }, { "epoch": 0.33, "grad_norm": 7.555227581865535, "learning_rate": 7.820002295371202e-06, "loss": 0.497, "step": 11523 }, { "epoch": 0.33, "grad_norm": 6.2672833842644105, "learning_rate": 7.819619323331803e-06, "loss": 0.7015, "step": 11524 }, { "epoch": 0.33, "grad_norm": 6.792044209789089, "learning_rate": 7.819236327035565e-06, "loss": 0.2303, "step": 11525 }, { "epoch": 0.33, "grad_norm": 4.812343557713194, "learning_rate": 7.818853306485786e-06, "loss": 0.7053, "step": 11526 }, { "epoch": 0.33, "grad_norm": 5.621278294548706, "learning_rate": 7.818470261685756e-06, "loss": 0.5495, "step": 11527 }, { "epoch": 0.33, "grad_norm": 5.704553378222972, "learning_rate": 7.818087192638775e-06, "loss": 0.5705, "step": 11528 }, { "epoch": 0.33, "grad_norm": 8.190233429503749, "learning_rate": 7.817704099348138e-06, "loss": 0.9215, "step": 11529 }, { "epoch": 0.33, "grad_norm": 5.464133924803386, "learning_rate": 7.817320981817138e-06, "loss": 0.7062, "step": 11530 }, { "epoch": 0.33, "grad_norm": 6.168303409941204, "learning_rate": 7.816937840049073e-06, "loss": 0.259, "step": 11531 }, { "epoch": 0.33, "grad_norm": 3.4487845988955974, "learning_rate": 7.816554674047239e-06, "loss": 0.3167, "step": 11532 }, { "epoch": 0.33, "grad_norm": 5.495891944200604, "learning_rate": 7.816171483814932e-06, "loss": 0.4677, "step": 11533 }, { "epoch": 0.33, "grad_norm": 8.659495602514376, "learning_rate": 7.815788269355448e-06, "loss": 0.7203, "step": 11534 }, { "epoch": 0.33, "grad_norm": 7.736335767873655, "learning_rate": 7.815405030672085e-06, "loss": 0.5501, "step": 11535 }, { "epoch": 0.33, "grad_norm": 5.0691028439234636, "learning_rate": 7.815021767768137e-06, "loss": 0.7157, "step": 11536 }, { "epoch": 0.33, "grad_norm": 10.747132207128107, "learning_rate": 7.814638480646905e-06, "loss": 0.6732, "step": 11537 }, { "epoch": 0.33, "grad_norm": 5.840622661788089, "learning_rate": 7.814255169311685e-06, "loss": 0.7393, "step": 11538 }, { "epoch": 0.33, "grad_norm": 5.866632508951509, "learning_rate": 7.813871833765774e-06, "loss": 0.2434, "step": 11539 }, { "epoch": 0.33, "grad_norm": 2.9832029587657085, "learning_rate": 7.81348847401247e-06, "loss": 0.3315, "step": 11540 }, { "epoch": 0.33, "grad_norm": 9.801522887879763, "learning_rate": 7.813105090055071e-06, "loss": 0.6307, "step": 11541 }, { "epoch": 0.33, "grad_norm": 6.239790168437587, "learning_rate": 7.812721681896874e-06, "loss": 0.6646, "step": 11542 }, { "epoch": 0.33, "grad_norm": 3.7858553497684135, "learning_rate": 7.81233824954118e-06, "loss": 0.5134, "step": 11543 }, { "epoch": 0.33, "grad_norm": 5.397042299824052, "learning_rate": 7.811954792991286e-06, "loss": 0.4171, "step": 11544 }, { "epoch": 0.33, "grad_norm": 3.748350925248792, "learning_rate": 7.811571312250492e-06, "loss": 0.3827, "step": 11545 }, { "epoch": 0.33, "grad_norm": 4.198386659019778, "learning_rate": 7.811187807322096e-06, "loss": 0.2689, "step": 11546 }, { "epoch": 0.33, "grad_norm": 3.7697830037065514, "learning_rate": 7.810804278209397e-06, "loss": 0.3793, "step": 11547 }, { "epoch": 0.33, "grad_norm": 4.5779678005271025, "learning_rate": 7.810420724915693e-06, "loss": 0.5299, "step": 11548 }, { "epoch": 0.33, "grad_norm": 3.4604390218075034, "learning_rate": 7.810037147444287e-06, "loss": 0.1983, "step": 11549 }, { "epoch": 0.33, "grad_norm": 10.452569556197579, "learning_rate": 7.809653545798479e-06, "loss": 0.7486, "step": 11550 }, { "epoch": 0.33, "grad_norm": 7.276101383591306, "learning_rate": 7.809269919981566e-06, "loss": 0.6937, "step": 11551 }, { "epoch": 0.33, "grad_norm": 5.900611567724904, "learning_rate": 7.808886269996848e-06, "loss": 0.7656, "step": 11552 }, { "epoch": 0.33, "grad_norm": 4.822816862827222, "learning_rate": 7.808502595847631e-06, "loss": 0.4748, "step": 11553 }, { "epoch": 0.33, "grad_norm": 4.595378690811666, "learning_rate": 7.80811889753721e-06, "loss": 0.4712, "step": 11554 }, { "epoch": 0.33, "grad_norm": 5.819837758205106, "learning_rate": 7.807735175068888e-06, "loss": 0.5444, "step": 11555 }, { "epoch": 0.33, "grad_norm": 5.734710974556647, "learning_rate": 7.807351428445967e-06, "loss": 0.3393, "step": 11556 }, { "epoch": 0.33, "grad_norm": 7.258427161724539, "learning_rate": 7.806967657671745e-06, "loss": 0.8409, "step": 11557 }, { "epoch": 0.33, "grad_norm": 6.17934242182073, "learning_rate": 7.806583862749528e-06, "loss": 0.6284, "step": 11558 }, { "epoch": 0.33, "grad_norm": 2.7446060871939486, "learning_rate": 7.806200043682614e-06, "loss": 0.3101, "step": 11559 }, { "epoch": 0.33, "grad_norm": 4.57165259904374, "learning_rate": 7.805816200474307e-06, "loss": 0.4792, "step": 11560 }, { "epoch": 0.33, "grad_norm": 5.997271553195142, "learning_rate": 7.80543233312791e-06, "loss": 0.4687, "step": 11561 }, { "epoch": 0.33, "grad_norm": 6.969743119025452, "learning_rate": 7.805048441646723e-06, "loss": 0.4979, "step": 11562 }, { "epoch": 0.33, "grad_norm": 3.8058387847409185, "learning_rate": 7.804664526034048e-06, "loss": 0.321, "step": 11563 }, { "epoch": 0.33, "grad_norm": 5.838235139811473, "learning_rate": 7.804280586293192e-06, "loss": 0.2608, "step": 11564 }, { "epoch": 0.33, "grad_norm": 4.38446171988156, "learning_rate": 7.803896622427454e-06, "loss": 0.2598, "step": 11565 }, { "epoch": 0.33, "grad_norm": 6.068652305248831, "learning_rate": 7.80351263444014e-06, "loss": 0.6695, "step": 11566 }, { "epoch": 0.33, "grad_norm": 5.384944499091962, "learning_rate": 7.80312862233455e-06, "loss": 0.629, "step": 11567 }, { "epoch": 0.33, "grad_norm": 5.94271806007902, "learning_rate": 7.80274458611399e-06, "loss": 0.7635, "step": 11568 }, { "epoch": 0.33, "grad_norm": 8.528896021000381, "learning_rate": 7.802360525781766e-06, "loss": 0.3261, "step": 11569 }, { "epoch": 0.33, "grad_norm": 4.077180102562291, "learning_rate": 7.801976441341178e-06, "loss": 0.3545, "step": 11570 }, { "epoch": 0.33, "grad_norm": 5.734874984190113, "learning_rate": 7.80159233279553e-06, "loss": 0.4082, "step": 11571 }, { "epoch": 0.33, "grad_norm": 6.2004003618551184, "learning_rate": 7.80120820014813e-06, "loss": 0.5657, "step": 11572 }, { "epoch": 0.33, "grad_norm": 3.696190501905901, "learning_rate": 7.800824043402277e-06, "loss": 0.3135, "step": 11573 }, { "epoch": 0.33, "grad_norm": 8.295271896403765, "learning_rate": 7.800439862561285e-06, "loss": 0.4896, "step": 11574 }, { "epoch": 0.33, "grad_norm": 6.175324762738634, "learning_rate": 7.800055657628449e-06, "loss": 0.6547, "step": 11575 }, { "epoch": 0.33, "grad_norm": 3.205806558740308, "learning_rate": 7.799671428607082e-06, "loss": 0.4738, "step": 11576 }, { "epoch": 0.33, "grad_norm": 6.53846369728208, "learning_rate": 7.799287175500484e-06, "loss": 0.2867, "step": 11577 }, { "epoch": 0.33, "grad_norm": 4.095536213585824, "learning_rate": 7.798902898311966e-06, "loss": 0.7343, "step": 11578 }, { "epoch": 0.33, "grad_norm": 4.3455891729266325, "learning_rate": 7.798518597044831e-06, "loss": 0.6671, "step": 11579 }, { "epoch": 0.33, "grad_norm": 7.58956706062504, "learning_rate": 7.798134271702383e-06, "loss": 0.6783, "step": 11580 }, { "epoch": 0.33, "grad_norm": 5.891761649884549, "learning_rate": 7.797749922287929e-06, "loss": 0.7145, "step": 11581 }, { "epoch": 0.33, "grad_norm": 3.9847024222085503, "learning_rate": 7.797365548804781e-06, "loss": 0.2212, "step": 11582 }, { "epoch": 0.33, "grad_norm": 9.988383460090676, "learning_rate": 7.796981151256238e-06, "loss": 0.6175, "step": 11583 }, { "epoch": 0.33, "grad_norm": 8.348832257786986, "learning_rate": 7.796596729645614e-06, "loss": 1.039, "step": 11584 }, { "epoch": 0.33, "grad_norm": 5.257274243037637, "learning_rate": 7.79621228397621e-06, "loss": 0.2629, "step": 11585 }, { "epoch": 0.33, "grad_norm": 6.093781104375244, "learning_rate": 7.795827814251337e-06, "loss": 0.8414, "step": 11586 }, { "epoch": 0.33, "grad_norm": 10.609833562769742, "learning_rate": 7.795443320474302e-06, "loss": 0.4536, "step": 11587 }, { "epoch": 0.33, "grad_norm": 5.870385691369503, "learning_rate": 7.79505880264841e-06, "loss": 0.2837, "step": 11588 }, { "epoch": 0.33, "grad_norm": 10.46993683877018, "learning_rate": 7.794674260776973e-06, "loss": 0.7689, "step": 11589 }, { "epoch": 0.33, "grad_norm": 5.952631652093908, "learning_rate": 7.794289694863298e-06, "loss": 0.4903, "step": 11590 }, { "epoch": 0.33, "grad_norm": 5.999990383776271, "learning_rate": 7.79390510491069e-06, "loss": 0.6153, "step": 11591 }, { "epoch": 0.33, "grad_norm": 4.453758328558762, "learning_rate": 7.793520490922463e-06, "loss": 0.6023, "step": 11592 }, { "epoch": 0.33, "grad_norm": 1.9211779470092976, "learning_rate": 7.793135852901922e-06, "loss": 0.1242, "step": 11593 }, { "epoch": 0.33, "grad_norm": 7.514896539926437, "learning_rate": 7.79275119085238e-06, "loss": 0.9236, "step": 11594 }, { "epoch": 0.33, "grad_norm": 6.737915489110651, "learning_rate": 7.79236650477714e-06, "loss": 0.445, "step": 11595 }, { "epoch": 0.33, "grad_norm": 6.177477184517509, "learning_rate": 7.791981794679517e-06, "loss": 0.5504, "step": 11596 }, { "epoch": 0.33, "grad_norm": 9.198272617547739, "learning_rate": 7.791597060562817e-06, "loss": 0.8245, "step": 11597 }, { "epoch": 0.33, "grad_norm": 5.7181146821206115, "learning_rate": 7.791212302430351e-06, "loss": 0.8289, "step": 11598 }, { "epoch": 0.33, "grad_norm": 6.550226487182427, "learning_rate": 7.79082752028543e-06, "loss": 0.6264, "step": 11599 }, { "epoch": 0.33, "grad_norm": 5.117303931389516, "learning_rate": 7.790442714131363e-06, "loss": 0.5574, "step": 11600 }, { "epoch": 0.33, "grad_norm": 6.923283211984731, "learning_rate": 7.79005788397146e-06, "loss": 0.2722, "step": 11601 }, { "epoch": 0.33, "grad_norm": 4.078770933480399, "learning_rate": 7.789673029809035e-06, "loss": 0.4191, "step": 11602 }, { "epoch": 0.33, "grad_norm": 6.395599481074512, "learning_rate": 7.789288151647394e-06, "loss": 0.7274, "step": 11603 }, { "epoch": 0.33, "grad_norm": 3.8690321404982377, "learning_rate": 7.788903249489852e-06, "loss": 0.2685, "step": 11604 }, { "epoch": 0.33, "grad_norm": 6.3350896115125925, "learning_rate": 7.788518323339718e-06, "loss": 0.351, "step": 11605 }, { "epoch": 0.33, "grad_norm": 14.117283899414055, "learning_rate": 7.788133373200304e-06, "loss": 0.5391, "step": 11606 }, { "epoch": 0.33, "grad_norm": 4.462924681685905, "learning_rate": 7.78774839907492e-06, "loss": 0.57, "step": 11607 }, { "epoch": 0.33, "grad_norm": 2.554290985693343, "learning_rate": 7.787363400966883e-06, "loss": 0.3148, "step": 11608 }, { "epoch": 0.33, "grad_norm": 11.901321151868482, "learning_rate": 7.786978378879503e-06, "loss": 0.8912, "step": 11609 }, { "epoch": 0.33, "grad_norm": 3.978842915185011, "learning_rate": 7.786593332816088e-06, "loss": 0.2926, "step": 11610 }, { "epoch": 0.33, "grad_norm": 5.3296557601927494, "learning_rate": 7.786208262779954e-06, "loss": 0.4402, "step": 11611 }, { "epoch": 0.33, "grad_norm": 7.184690440497005, "learning_rate": 7.785823168774415e-06, "loss": 0.5386, "step": 11612 }, { "epoch": 0.33, "grad_norm": 4.1767154858245945, "learning_rate": 7.78543805080278e-06, "loss": 0.6945, "step": 11613 }, { "epoch": 0.33, "grad_norm": 5.406801316920371, "learning_rate": 7.785052908868366e-06, "loss": 0.4471, "step": 11614 }, { "epoch": 0.33, "grad_norm": 9.33539552272612, "learning_rate": 7.784667742974484e-06, "loss": 1.0861, "step": 11615 }, { "epoch": 0.33, "grad_norm": 2.2131015040764463, "learning_rate": 7.78428255312445e-06, "loss": 0.3284, "step": 11616 }, { "epoch": 0.33, "grad_norm": 3.0708029745939953, "learning_rate": 7.783897339321573e-06, "loss": 0.4432, "step": 11617 }, { "epoch": 0.33, "grad_norm": 11.089623943863407, "learning_rate": 7.783512101569173e-06, "loss": 1.0982, "step": 11618 }, { "epoch": 0.33, "grad_norm": 9.25009400087999, "learning_rate": 7.78312683987056e-06, "loss": 0.6909, "step": 11619 }, { "epoch": 0.33, "grad_norm": 4.9161751576887545, "learning_rate": 7.78274155422905e-06, "loss": 0.3695, "step": 11620 }, { "epoch": 0.33, "grad_norm": 5.616905449376907, "learning_rate": 7.782356244647955e-06, "loss": 0.199, "step": 11621 }, { "epoch": 0.33, "grad_norm": 6.7259604509151085, "learning_rate": 7.781970911130592e-06, "loss": 0.4216, "step": 11622 }, { "epoch": 0.33, "grad_norm": 7.766948195772769, "learning_rate": 7.781585553680278e-06, "loss": 0.2489, "step": 11623 }, { "epoch": 0.33, "grad_norm": 4.367420197157939, "learning_rate": 7.781200172300324e-06, "loss": 0.2997, "step": 11624 }, { "epoch": 0.33, "grad_norm": 7.467188332703906, "learning_rate": 7.780814766994046e-06, "loss": 0.6419, "step": 11625 }, { "epoch": 0.33, "grad_norm": 7.565261147599882, "learning_rate": 7.780429337764766e-06, "loss": 1.0985, "step": 11626 }, { "epoch": 0.33, "grad_norm": 7.719147953786498, "learning_rate": 7.780043884615792e-06, "loss": 0.7606, "step": 11627 }, { "epoch": 0.33, "grad_norm": 8.340474918254356, "learning_rate": 7.77965840755044e-06, "loss": 0.4346, "step": 11628 }, { "epoch": 0.33, "grad_norm": 8.30986038402877, "learning_rate": 7.779272906572034e-06, "loss": 0.5976, "step": 11629 }, { "epoch": 0.33, "grad_norm": 8.614840508665617, "learning_rate": 7.778887381683882e-06, "loss": 1.0428, "step": 11630 }, { "epoch": 0.33, "grad_norm": 7.6093499341861275, "learning_rate": 7.778501832889304e-06, "loss": 0.2945, "step": 11631 }, { "epoch": 0.33, "grad_norm": 5.817138789708431, "learning_rate": 7.778116260191618e-06, "loss": 0.5234, "step": 11632 }, { "epoch": 0.33, "grad_norm": 6.586490920086421, "learning_rate": 7.77773066359414e-06, "loss": 0.584, "step": 11633 }, { "epoch": 0.33, "grad_norm": 4.9435641556446015, "learning_rate": 7.777345043100186e-06, "loss": 0.5707, "step": 11634 }, { "epoch": 0.33, "grad_norm": 3.9313575729911006, "learning_rate": 7.776959398713075e-06, "loss": 0.3709, "step": 11635 }, { "epoch": 0.33, "grad_norm": 3.824593296743857, "learning_rate": 7.776573730436125e-06, "loss": 0.3823, "step": 11636 }, { "epoch": 0.33, "grad_norm": 6.2281001357038255, "learning_rate": 7.776188038272652e-06, "loss": 0.3962, "step": 11637 }, { "epoch": 0.33, "grad_norm": 6.806700346368652, "learning_rate": 7.775802322225976e-06, "loss": 0.657, "step": 11638 }, { "epoch": 0.33, "grad_norm": 5.09397597484517, "learning_rate": 7.775416582299414e-06, "loss": 0.5712, "step": 11639 }, { "epoch": 0.33, "grad_norm": 6.471979486864398, "learning_rate": 7.775030818496285e-06, "loss": 0.2148, "step": 11640 }, { "epoch": 0.33, "grad_norm": 3.8272778877778704, "learning_rate": 7.774645030819907e-06, "loss": 0.2928, "step": 11641 }, { "epoch": 0.33, "grad_norm": 6.872411344587065, "learning_rate": 7.774259219273602e-06, "loss": 0.7763, "step": 11642 }, { "epoch": 0.33, "grad_norm": 6.656501657805962, "learning_rate": 7.773873383860683e-06, "loss": 0.756, "step": 11643 }, { "epoch": 0.33, "grad_norm": 5.46668339509653, "learning_rate": 7.773487524584472e-06, "loss": 0.3846, "step": 11644 }, { "epoch": 0.33, "grad_norm": 5.280687358613071, "learning_rate": 7.773101641448293e-06, "loss": 0.4846, "step": 11645 }, { "epoch": 0.33, "grad_norm": 6.826569473846289, "learning_rate": 7.77271573445546e-06, "loss": 0.3418, "step": 11646 }, { "epoch": 0.33, "grad_norm": 5.562022670882082, "learning_rate": 7.772329803609296e-06, "loss": 0.4937, "step": 11647 }, { "epoch": 0.33, "grad_norm": 6.340630802698991, "learning_rate": 7.77194384891312e-06, "loss": 0.5064, "step": 11648 }, { "epoch": 0.33, "grad_norm": 5.339945437084715, "learning_rate": 7.771557870370251e-06, "loss": 0.6222, "step": 11649 }, { "epoch": 0.33, "grad_norm": 5.157852039572736, "learning_rate": 7.77117186798401e-06, "loss": 0.5739, "step": 11650 }, { "epoch": 0.33, "grad_norm": 7.34461976440187, "learning_rate": 7.770785841757722e-06, "loss": 0.7076, "step": 11651 }, { "epoch": 0.33, "grad_norm": 10.176953127811277, "learning_rate": 7.770399791694702e-06, "loss": 0.7144, "step": 11652 }, { "epoch": 0.33, "grad_norm": 7.741085462923602, "learning_rate": 7.770013717798275e-06, "loss": 0.789, "step": 11653 }, { "epoch": 0.33, "grad_norm": 5.629819712487887, "learning_rate": 7.769627620071761e-06, "loss": 0.9083, "step": 11654 }, { "epoch": 0.33, "grad_norm": 8.419551606731968, "learning_rate": 7.76924149851848e-06, "loss": 0.7049, "step": 11655 }, { "epoch": 0.33, "grad_norm": 5.017465176615924, "learning_rate": 7.768855353141757e-06, "loss": 0.9614, "step": 11656 }, { "epoch": 0.33, "grad_norm": 6.386047738665913, "learning_rate": 7.768469183944913e-06, "loss": 0.6796, "step": 11657 }, { "epoch": 0.33, "grad_norm": 6.156925299925666, "learning_rate": 7.768082990931267e-06, "loss": 0.8448, "step": 11658 }, { "epoch": 0.33, "grad_norm": 5.355868223777141, "learning_rate": 7.767696774104145e-06, "loss": 0.567, "step": 11659 }, { "epoch": 0.33, "grad_norm": 3.664669710449385, "learning_rate": 7.76731053346687e-06, "loss": 0.4625, "step": 11660 }, { "epoch": 0.33, "grad_norm": 13.960981628314073, "learning_rate": 7.766924269022762e-06, "loss": 1.0818, "step": 11661 }, { "epoch": 0.33, "grad_norm": 8.418645915610728, "learning_rate": 7.766537980775144e-06, "loss": 1.3987, "step": 11662 }, { "epoch": 0.33, "grad_norm": 3.2005603106024405, "learning_rate": 7.766151668727341e-06, "loss": 0.3394, "step": 11663 }, { "epoch": 0.33, "grad_norm": 5.421012842850284, "learning_rate": 7.765765332882675e-06, "loss": 0.3856, "step": 11664 }, { "epoch": 0.33, "grad_norm": 7.944200706151571, "learning_rate": 7.765378973244473e-06, "loss": 0.545, "step": 11665 }, { "epoch": 0.33, "grad_norm": 3.2956767141855785, "learning_rate": 7.764992589816054e-06, "loss": 0.2552, "step": 11666 }, { "epoch": 0.33, "grad_norm": 2.8966999861143807, "learning_rate": 7.764606182600745e-06, "loss": 0.3034, "step": 11667 }, { "epoch": 0.33, "grad_norm": 7.896892392705823, "learning_rate": 7.76421975160187e-06, "loss": 0.9812, "step": 11668 }, { "epoch": 0.33, "grad_norm": 3.2277525307473534, "learning_rate": 7.76383329682275e-06, "loss": 0.3229, "step": 11669 }, { "epoch": 0.33, "grad_norm": 5.7827019182798525, "learning_rate": 7.763446818266716e-06, "loss": 0.609, "step": 11670 }, { "epoch": 0.33, "grad_norm": 6.067232153111982, "learning_rate": 7.763060315937088e-06, "loss": 0.3547, "step": 11671 }, { "epoch": 0.33, "grad_norm": 6.297781223561497, "learning_rate": 7.762673789837192e-06, "loss": 0.7363, "step": 11672 }, { "epoch": 0.33, "grad_norm": 4.811830300637437, "learning_rate": 7.762287239970353e-06, "loss": 0.565, "step": 11673 }, { "epoch": 0.33, "grad_norm": 5.038751565965633, "learning_rate": 7.761900666339897e-06, "loss": 0.5318, "step": 11674 }, { "epoch": 0.33, "grad_norm": 7.533858294749038, "learning_rate": 7.76151406894915e-06, "loss": 1.0744, "step": 11675 }, { "epoch": 0.33, "grad_norm": 5.3693696407184355, "learning_rate": 7.761127447801438e-06, "loss": 0.5177, "step": 11676 }, { "epoch": 0.33, "grad_norm": 2.2333631558585934, "learning_rate": 7.760740802900086e-06, "loss": 0.3133, "step": 11677 }, { "epoch": 0.33, "grad_norm": 11.557018238401467, "learning_rate": 7.760354134248422e-06, "loss": 0.6264, "step": 11678 }, { "epoch": 0.33, "grad_norm": 8.010868155691016, "learning_rate": 7.75996744184977e-06, "loss": 0.5255, "step": 11679 }, { "epoch": 0.33, "grad_norm": 5.05745304163178, "learning_rate": 7.759580725707456e-06, "loss": 0.3786, "step": 11680 }, { "epoch": 0.33, "grad_norm": 6.162309114389841, "learning_rate": 7.759193985824813e-06, "loss": 0.4776, "step": 11681 }, { "epoch": 0.33, "grad_norm": 2.776497176673315, "learning_rate": 7.75880722220516e-06, "loss": 0.1666, "step": 11682 }, { "epoch": 0.33, "grad_norm": 9.14104467227354, "learning_rate": 7.758420434851831e-06, "loss": 1.2859, "step": 11683 }, { "epoch": 0.33, "grad_norm": 5.106495153549018, "learning_rate": 7.75803362376815e-06, "loss": 0.2548, "step": 11684 }, { "epoch": 0.33, "grad_norm": 4.645378016354044, "learning_rate": 7.757646788957445e-06, "loss": 0.603, "step": 11685 }, { "epoch": 0.33, "grad_norm": 4.132484610517939, "learning_rate": 7.757259930423045e-06, "loss": 0.5809, "step": 11686 }, { "epoch": 0.33, "grad_norm": 5.939061170425807, "learning_rate": 7.756873048168275e-06, "loss": 0.4743, "step": 11687 }, { "epoch": 0.33, "grad_norm": 4.510370041369107, "learning_rate": 7.756486142196467e-06, "loss": 0.6919, "step": 11688 }, { "epoch": 0.33, "grad_norm": 5.443217724290547, "learning_rate": 7.756099212510948e-06, "loss": 0.614, "step": 11689 }, { "epoch": 0.33, "grad_norm": 2.907366138371435, "learning_rate": 7.755712259115046e-06, "loss": 0.2025, "step": 11690 }, { "epoch": 0.33, "grad_norm": 10.753619981773689, "learning_rate": 7.755325282012092e-06, "loss": 0.6617, "step": 11691 }, { "epoch": 0.33, "grad_norm": 3.8327856225409915, "learning_rate": 7.754938281205413e-06, "loss": 0.2598, "step": 11692 }, { "epoch": 0.33, "grad_norm": 30.924915304970003, "learning_rate": 7.75455125669834e-06, "loss": 0.6842, "step": 11693 }, { "epoch": 0.33, "grad_norm": 6.110968808723292, "learning_rate": 7.754164208494201e-06, "loss": 0.5863, "step": 11694 }, { "epoch": 0.33, "grad_norm": 5.931100468413805, "learning_rate": 7.753777136596328e-06, "loss": 0.7761, "step": 11695 }, { "epoch": 0.33, "grad_norm": 6.184645408512966, "learning_rate": 7.753390041008047e-06, "loss": 0.4654, "step": 11696 }, { "epoch": 0.33, "grad_norm": 6.25663740576336, "learning_rate": 7.75300292173269e-06, "loss": 0.2879, "step": 11697 }, { "epoch": 0.34, "grad_norm": 6.8183294430559, "learning_rate": 7.752615778773591e-06, "loss": 0.5218, "step": 11698 }, { "epoch": 0.34, "grad_norm": 1.5484429081226683, "learning_rate": 7.752228612134077e-06, "loss": 0.1, "step": 11699 }, { "epoch": 0.34, "grad_norm": 4.617297692444343, "learning_rate": 7.751841421817476e-06, "loss": 0.4639, "step": 11700 }, { "epoch": 0.34, "grad_norm": 7.916633753122541, "learning_rate": 7.751454207827124e-06, "loss": 0.5923, "step": 11701 }, { "epoch": 0.34, "grad_norm": 5.687071983245398, "learning_rate": 7.75106697016635e-06, "loss": 0.5333, "step": 11702 }, { "epoch": 0.34, "grad_norm": 10.973606523409487, "learning_rate": 7.750679708838486e-06, "loss": 0.9581, "step": 11703 }, { "epoch": 0.34, "grad_norm": 4.440013173831263, "learning_rate": 7.750292423846863e-06, "loss": 0.3845, "step": 11704 }, { "epoch": 0.34, "grad_norm": 6.247817764779908, "learning_rate": 7.749905115194813e-06, "loss": 0.5388, "step": 11705 }, { "epoch": 0.34, "grad_norm": 6.964253433208617, "learning_rate": 7.749517782885667e-06, "loss": 0.3898, "step": 11706 }, { "epoch": 0.34, "grad_norm": 5.805980143111987, "learning_rate": 7.749130426922758e-06, "loss": 0.374, "step": 11707 }, { "epoch": 0.34, "grad_norm": 4.976373546133198, "learning_rate": 7.74874304730942e-06, "loss": 0.9207, "step": 11708 }, { "epoch": 0.34, "grad_norm": 8.619576061737018, "learning_rate": 7.748355644048983e-06, "loss": 0.6517, "step": 11709 }, { "epoch": 0.34, "grad_norm": 3.334961303889644, "learning_rate": 7.74796821714478e-06, "loss": 0.1418, "step": 11710 }, { "epoch": 0.34, "grad_norm": 4.0383758551552145, "learning_rate": 7.747580766600147e-06, "loss": 0.4564, "step": 11711 }, { "epoch": 0.34, "grad_norm": 5.800667172732717, "learning_rate": 7.747193292418413e-06, "loss": 0.3812, "step": 11712 }, { "epoch": 0.34, "grad_norm": 6.032648388918425, "learning_rate": 7.746805794602914e-06, "loss": 0.3264, "step": 11713 }, { "epoch": 0.34, "grad_norm": 5.761783922764349, "learning_rate": 7.746418273156982e-06, "loss": 0.2092, "step": 11714 }, { "epoch": 0.34, "grad_norm": 3.345880471432332, "learning_rate": 7.746030728083953e-06, "loss": 0.8458, "step": 11715 }, { "epoch": 0.34, "grad_norm": 7.282116359528049, "learning_rate": 7.745643159387159e-06, "loss": 0.3737, "step": 11716 }, { "epoch": 0.34, "grad_norm": 4.4355655537073, "learning_rate": 7.745255567069936e-06, "loss": 0.4221, "step": 11717 }, { "epoch": 0.34, "grad_norm": 5.369917729029904, "learning_rate": 7.744867951135618e-06, "loss": 0.3902, "step": 11718 }, { "epoch": 0.34, "grad_norm": 5.702968856555251, "learning_rate": 7.744480311587536e-06, "loss": 0.3792, "step": 11719 }, { "epoch": 0.34, "grad_norm": 4.421150768267599, "learning_rate": 7.74409264842903e-06, "loss": 0.5388, "step": 11720 }, { "epoch": 0.34, "grad_norm": 3.980818234301169, "learning_rate": 7.743704961663432e-06, "loss": 0.4069, "step": 11721 }, { "epoch": 0.34, "grad_norm": 6.2340640035898325, "learning_rate": 7.743317251294078e-06, "loss": 0.6685, "step": 11722 }, { "epoch": 0.34, "grad_norm": 4.270849947393093, "learning_rate": 7.742929517324303e-06, "loss": 0.5691, "step": 11723 }, { "epoch": 0.34, "grad_norm": 8.8310179164922, "learning_rate": 7.742541759757445e-06, "loss": 0.523, "step": 11724 }, { "epoch": 0.34, "grad_norm": 8.65361883614793, "learning_rate": 7.742153978596835e-06, "loss": 0.6235, "step": 11725 }, { "epoch": 0.34, "grad_norm": 4.837546627795812, "learning_rate": 7.741766173845814e-06, "loss": 0.4885, "step": 11726 }, { "epoch": 0.34, "grad_norm": 7.239967112808984, "learning_rate": 7.741378345507717e-06, "loss": 0.6991, "step": 11727 }, { "epoch": 0.34, "grad_norm": 4.375206996925452, "learning_rate": 7.740990493585879e-06, "loss": 0.4086, "step": 11728 }, { "epoch": 0.34, "grad_norm": 4.057993972122393, "learning_rate": 7.740602618083635e-06, "loss": 0.2554, "step": 11729 }, { "epoch": 0.34, "grad_norm": 6.882427919275718, "learning_rate": 7.740214719004326e-06, "loss": 0.6653, "step": 11730 }, { "epoch": 0.34, "grad_norm": 10.643145964188157, "learning_rate": 7.739826796351288e-06, "loss": 0.5731, "step": 11731 }, { "epoch": 0.34, "grad_norm": 6.289380632966802, "learning_rate": 7.739438850127856e-06, "loss": 0.7247, "step": 11732 }, { "epoch": 0.34, "grad_norm": 4.544935015768833, "learning_rate": 7.739050880337368e-06, "loss": 0.1232, "step": 11733 }, { "epoch": 0.34, "grad_norm": 4.8556085199691905, "learning_rate": 7.738662886983165e-06, "loss": 0.5623, "step": 11734 }, { "epoch": 0.34, "grad_norm": 7.654405083539959, "learning_rate": 7.73827487006858e-06, "loss": 0.705, "step": 11735 }, { "epoch": 0.34, "grad_norm": 5.940508792870369, "learning_rate": 7.737886829596956e-06, "loss": 0.5227, "step": 11736 }, { "epoch": 0.34, "grad_norm": 5.7719397557234755, "learning_rate": 7.737498765571627e-06, "loss": 1.0491, "step": 11737 }, { "epoch": 0.34, "grad_norm": 6.278396641582792, "learning_rate": 7.737110677995933e-06, "loss": 0.4806, "step": 11738 }, { "epoch": 0.34, "grad_norm": 7.119601798066449, "learning_rate": 7.736722566873214e-06, "loss": 0.6568, "step": 11739 }, { "epoch": 0.34, "grad_norm": 8.098846892707387, "learning_rate": 7.736334432206807e-06, "loss": 0.3101, "step": 11740 }, { "epoch": 0.34, "grad_norm": 1.4434984240833832, "learning_rate": 7.735946274000052e-06, "loss": 0.0916, "step": 11741 }, { "epoch": 0.34, "grad_norm": 6.086660868560826, "learning_rate": 7.735558092256289e-06, "loss": 0.5152, "step": 11742 }, { "epoch": 0.34, "grad_norm": 6.30690562806987, "learning_rate": 7.735169886978854e-06, "loss": 0.8044, "step": 11743 }, { "epoch": 0.34, "grad_norm": 7.3368913479304485, "learning_rate": 7.73478165817109e-06, "loss": 0.4655, "step": 11744 }, { "epoch": 0.34, "grad_norm": 8.870606422896719, "learning_rate": 7.734393405836337e-06, "loss": 1.1768, "step": 11745 }, { "epoch": 0.34, "grad_norm": 4.992747530619838, "learning_rate": 7.734005129977934e-06, "loss": 0.7324, "step": 11746 }, { "epoch": 0.34, "grad_norm": 5.8799499903226025, "learning_rate": 7.733616830599221e-06, "loss": 0.3473, "step": 11747 }, { "epoch": 0.34, "grad_norm": 6.465773888372174, "learning_rate": 7.73322850770354e-06, "loss": 0.5431, "step": 11748 }, { "epoch": 0.34, "grad_norm": 7.410249941750868, "learning_rate": 7.732840161294229e-06, "loss": 0.7206, "step": 11749 }, { "epoch": 0.34, "grad_norm": 38.07930896219173, "learning_rate": 7.732451791374633e-06, "loss": 0.3407, "step": 11750 }, { "epoch": 0.34, "grad_norm": 7.10259502026415, "learning_rate": 7.732063397948088e-06, "loss": 0.3458, "step": 11751 }, { "epoch": 0.34, "grad_norm": 4.33929781222465, "learning_rate": 7.73167498101794e-06, "loss": 0.1633, "step": 11752 }, { "epoch": 0.34, "grad_norm": 4.4302854050349065, "learning_rate": 7.731286540587528e-06, "loss": 0.3078, "step": 11753 }, { "epoch": 0.34, "grad_norm": 4.716703691213666, "learning_rate": 7.730898076660192e-06, "loss": 0.3241, "step": 11754 }, { "epoch": 0.34, "grad_norm": 8.388271520295797, "learning_rate": 7.730509589239279e-06, "loss": 0.8387, "step": 11755 }, { "epoch": 0.34, "grad_norm": 9.26149911350575, "learning_rate": 7.730121078328128e-06, "loss": 0.8562, "step": 11756 }, { "epoch": 0.34, "grad_norm": 8.83501669000799, "learning_rate": 7.72973254393008e-06, "loss": 0.9299, "step": 11757 }, { "epoch": 0.34, "grad_norm": 7.7529502913545665, "learning_rate": 7.72934398604848e-06, "loss": 0.6488, "step": 11758 }, { "epoch": 0.34, "grad_norm": 5.757486528255787, "learning_rate": 7.728955404686669e-06, "loss": 0.5286, "step": 11759 }, { "epoch": 0.34, "grad_norm": 10.201710250936722, "learning_rate": 7.728566799847992e-06, "loss": 0.9434, "step": 11760 }, { "epoch": 0.34, "grad_norm": 4.122572979602069, "learning_rate": 7.72817817153579e-06, "loss": 0.3067, "step": 11761 }, { "epoch": 0.34, "grad_norm": 3.683399555350351, "learning_rate": 7.727789519753405e-06, "loss": 0.4044, "step": 11762 }, { "epoch": 0.34, "grad_norm": 3.430656635071152, "learning_rate": 7.727400844504186e-06, "loss": 0.1413, "step": 11763 }, { "epoch": 0.34, "grad_norm": 6.221206793164641, "learning_rate": 7.727012145791472e-06, "loss": 0.7399, "step": 11764 }, { "epoch": 0.34, "grad_norm": 10.43177025962093, "learning_rate": 7.726623423618608e-06, "loss": 0.9533, "step": 11765 }, { "epoch": 0.34, "grad_norm": 6.922013051053507, "learning_rate": 7.726234677988939e-06, "loss": 0.3328, "step": 11766 }, { "epoch": 0.34, "grad_norm": 5.269386600576179, "learning_rate": 7.725845908905808e-06, "loss": 0.3442, "step": 11767 }, { "epoch": 0.34, "grad_norm": 4.892159169546657, "learning_rate": 7.72545711637256e-06, "loss": 0.4179, "step": 11768 }, { "epoch": 0.34, "grad_norm": 7.148542026109944, "learning_rate": 7.725068300392543e-06, "loss": 0.5193, "step": 11769 }, { "epoch": 0.34, "grad_norm": 10.789284716948067, "learning_rate": 7.724679460969094e-06, "loss": 0.7863, "step": 11770 }, { "epoch": 0.34, "grad_norm": 5.8875790519833595, "learning_rate": 7.724290598105566e-06, "loss": 0.4927, "step": 11771 }, { "epoch": 0.34, "grad_norm": 5.861092847139769, "learning_rate": 7.723901711805301e-06, "loss": 0.4564, "step": 11772 }, { "epoch": 0.34, "grad_norm": 3.249722248826666, "learning_rate": 7.723512802071645e-06, "loss": 0.1862, "step": 11773 }, { "epoch": 0.34, "grad_norm": 6.797113138170649, "learning_rate": 7.723123868907944e-06, "loss": 0.6379, "step": 11774 }, { "epoch": 0.34, "grad_norm": 4.135637150931527, "learning_rate": 7.722734912317542e-06, "loss": 0.3391, "step": 11775 }, { "epoch": 0.34, "grad_norm": 2.6455719786192127, "learning_rate": 7.722345932303788e-06, "loss": 0.3656, "step": 11776 }, { "epoch": 0.34, "grad_norm": 5.380811255598136, "learning_rate": 7.721956928870028e-06, "loss": 0.4487, "step": 11777 }, { "epoch": 0.34, "grad_norm": 8.016068712115127, "learning_rate": 7.721567902019606e-06, "loss": 0.6864, "step": 11778 }, { "epoch": 0.34, "grad_norm": 6.79312293309289, "learning_rate": 7.721178851755872e-06, "loss": 0.6774, "step": 11779 }, { "epoch": 0.34, "grad_norm": 6.023970643431726, "learning_rate": 7.72078977808217e-06, "loss": 0.3728, "step": 11780 }, { "epoch": 0.34, "grad_norm": 5.7531204878392055, "learning_rate": 7.72040068100185e-06, "loss": 0.2716, "step": 11781 }, { "epoch": 0.34, "grad_norm": 11.582594447731692, "learning_rate": 7.720011560518256e-06, "loss": 0.5985, "step": 11782 }, { "epoch": 0.34, "grad_norm": 8.311521085304713, "learning_rate": 7.719622416634739e-06, "loss": 0.4227, "step": 11783 }, { "epoch": 0.34, "grad_norm": 3.4370336389635208, "learning_rate": 7.719233249354645e-06, "loss": 0.352, "step": 11784 }, { "epoch": 0.34, "grad_norm": 8.707223307364075, "learning_rate": 7.718844058681323e-06, "loss": 0.5813, "step": 11785 }, { "epoch": 0.34, "grad_norm": 9.262600937575197, "learning_rate": 7.718454844618119e-06, "loss": 0.3525, "step": 11786 }, { "epoch": 0.34, "grad_norm": 5.917581496457139, "learning_rate": 7.718065607168381e-06, "loss": 0.7878, "step": 11787 }, { "epoch": 0.34, "grad_norm": 4.645685974771997, "learning_rate": 7.717676346335462e-06, "loss": 0.6578, "step": 11788 }, { "epoch": 0.34, "grad_norm": 8.275184799850678, "learning_rate": 7.717287062122707e-06, "loss": 0.3895, "step": 11789 }, { "epoch": 0.34, "grad_norm": 5.644011519108729, "learning_rate": 7.716897754533465e-06, "loss": 0.6621, "step": 11790 }, { "epoch": 0.34, "grad_norm": 6.821467595641441, "learning_rate": 7.716508423571087e-06, "loss": 0.482, "step": 11791 }, { "epoch": 0.34, "grad_norm": 5.674590246389293, "learning_rate": 7.716119069238924e-06, "loss": 0.6289, "step": 11792 }, { "epoch": 0.34, "grad_norm": 7.644401132044455, "learning_rate": 7.71572969154032e-06, "loss": 0.6766, "step": 11793 }, { "epoch": 0.34, "grad_norm": 4.451342791581305, "learning_rate": 7.715340290478628e-06, "loss": 0.5174, "step": 11794 }, { "epoch": 0.34, "grad_norm": 3.8188381353468612, "learning_rate": 7.714950866057199e-06, "loss": 0.3008, "step": 11795 }, { "epoch": 0.34, "grad_norm": 2.648619428929528, "learning_rate": 7.71456141827938e-06, "loss": 0.2857, "step": 11796 }, { "epoch": 0.34, "grad_norm": 6.033128534796325, "learning_rate": 7.714171947148525e-06, "loss": 0.3528, "step": 11797 }, { "epoch": 0.34, "grad_norm": 7.898085891036353, "learning_rate": 7.713782452667983e-06, "loss": 0.4901, "step": 11798 }, { "epoch": 0.34, "grad_norm": 6.470748887388171, "learning_rate": 7.713392934841103e-06, "loss": 0.5757, "step": 11799 }, { "epoch": 0.34, "grad_norm": 3.360783020186487, "learning_rate": 7.713003393671241e-06, "loss": 0.2785, "step": 11800 }, { "epoch": 0.34, "grad_norm": 2.6703320783206532, "learning_rate": 7.712613829161743e-06, "loss": 0.3479, "step": 11801 }, { "epoch": 0.34, "grad_norm": 2.154603924143564, "learning_rate": 7.712224241315963e-06, "loss": 0.1019, "step": 11802 }, { "epoch": 0.34, "grad_norm": 4.641391729342739, "learning_rate": 7.71183463013725e-06, "loss": 0.483, "step": 11803 }, { "epoch": 0.34, "grad_norm": 5.145540200763388, "learning_rate": 7.711444995628958e-06, "loss": 0.4348, "step": 11804 }, { "epoch": 0.34, "grad_norm": 3.56425904464136, "learning_rate": 7.711055337794439e-06, "loss": 0.2743, "step": 11805 }, { "epoch": 0.34, "grad_norm": 11.193234471090324, "learning_rate": 7.710665656637044e-06, "loss": 0.6704, "step": 11806 }, { "epoch": 0.34, "grad_norm": 7.829096385511105, "learning_rate": 7.710275952160127e-06, "loss": 0.6866, "step": 11807 }, { "epoch": 0.34, "grad_norm": 12.794107326480193, "learning_rate": 7.70988622436704e-06, "loss": 0.779, "step": 11808 }, { "epoch": 0.34, "grad_norm": 7.7948485896470405, "learning_rate": 7.709496473261135e-06, "loss": 0.8306, "step": 11809 }, { "epoch": 0.34, "grad_norm": 6.256377018602321, "learning_rate": 7.709106698845765e-06, "loss": 0.5229, "step": 11810 }, { "epoch": 0.34, "grad_norm": 5.352677055185049, "learning_rate": 7.708716901124283e-06, "loss": 0.4707, "step": 11811 }, { "epoch": 0.34, "grad_norm": 4.843580452197379, "learning_rate": 7.708327080100044e-06, "loss": 0.1942, "step": 11812 }, { "epoch": 0.34, "grad_norm": 5.032571370300472, "learning_rate": 7.7079372357764e-06, "loss": 0.4133, "step": 11813 }, { "epoch": 0.34, "grad_norm": 3.495765986076119, "learning_rate": 7.707547368156706e-06, "loss": 0.4176, "step": 11814 }, { "epoch": 0.34, "grad_norm": 6.612877893737006, "learning_rate": 7.707157477244316e-06, "loss": 0.7873, "step": 11815 }, { "epoch": 0.34, "grad_norm": 4.479374400208407, "learning_rate": 7.706767563042584e-06, "loss": 0.5274, "step": 11816 }, { "epoch": 0.34, "grad_norm": 5.010213862364036, "learning_rate": 7.706377625554862e-06, "loss": 0.5465, "step": 11817 }, { "epoch": 0.34, "grad_norm": 24.036636046596108, "learning_rate": 7.705987664784506e-06, "loss": 0.9555, "step": 11818 }, { "epoch": 0.34, "grad_norm": 9.124877092761734, "learning_rate": 7.705597680734874e-06, "loss": 0.6687, "step": 11819 }, { "epoch": 0.34, "grad_norm": 6.103214387097479, "learning_rate": 7.705207673409316e-06, "loss": 0.3693, "step": 11820 }, { "epoch": 0.34, "grad_norm": 6.679612222883121, "learning_rate": 7.70481764281119e-06, "loss": 0.9213, "step": 11821 }, { "epoch": 0.34, "grad_norm": 5.162491228959805, "learning_rate": 7.70442758894385e-06, "loss": 0.4322, "step": 11822 }, { "epoch": 0.34, "grad_norm": 6.987279710385128, "learning_rate": 7.704037511810655e-06, "loss": 0.4711, "step": 11823 }, { "epoch": 0.34, "grad_norm": 4.010237883839218, "learning_rate": 7.703647411414957e-06, "loss": 0.3299, "step": 11824 }, { "epoch": 0.34, "grad_norm": 5.546388975529682, "learning_rate": 7.703257287760113e-06, "loss": 1.0028, "step": 11825 }, { "epoch": 0.34, "grad_norm": 7.487728124337131, "learning_rate": 7.702867140849479e-06, "loss": 0.7593, "step": 11826 }, { "epoch": 0.34, "grad_norm": 3.2776621685455636, "learning_rate": 7.702476970686412e-06, "loss": 0.386, "step": 11827 }, { "epoch": 0.34, "grad_norm": 7.477882707164141, "learning_rate": 7.702086777274269e-06, "loss": 0.6103, "step": 11828 }, { "epoch": 0.34, "grad_norm": 8.608454660285062, "learning_rate": 7.701696560616405e-06, "loss": 0.9255, "step": 11829 }, { "epoch": 0.34, "grad_norm": 7.74083216686023, "learning_rate": 7.70130632071618e-06, "loss": 0.5612, "step": 11830 }, { "epoch": 0.34, "grad_norm": 5.724918728047502, "learning_rate": 7.700916057576948e-06, "loss": 0.6825, "step": 11831 }, { "epoch": 0.34, "grad_norm": 7.159604814588178, "learning_rate": 7.700525771202067e-06, "loss": 0.6662, "step": 11832 }, { "epoch": 0.34, "grad_norm": 9.65663040053809, "learning_rate": 7.700135461594896e-06, "loss": 0.8051, "step": 11833 }, { "epoch": 0.34, "grad_norm": 7.356618344687379, "learning_rate": 7.699745128758793e-06, "loss": 0.6088, "step": 11834 }, { "epoch": 0.34, "grad_norm": 4.053524754677444, "learning_rate": 7.699354772697114e-06, "loss": 0.414, "step": 11835 }, { "epoch": 0.34, "grad_norm": 7.51957437081792, "learning_rate": 7.698964393413218e-06, "loss": 0.9432, "step": 11836 }, { "epoch": 0.34, "grad_norm": 8.967142898174792, "learning_rate": 7.698573990910462e-06, "loss": 0.6937, "step": 11837 }, { "epoch": 0.34, "grad_norm": 4.867819657764657, "learning_rate": 7.69818356519221e-06, "loss": 0.5505, "step": 11838 }, { "epoch": 0.34, "grad_norm": 6.218636171458924, "learning_rate": 7.697793116261815e-06, "loss": 0.4622, "step": 11839 }, { "epoch": 0.34, "grad_norm": 4.639323222156492, "learning_rate": 7.69740264412264e-06, "loss": 0.4757, "step": 11840 }, { "epoch": 0.34, "grad_norm": 5.185135911651907, "learning_rate": 7.69701214877804e-06, "loss": 0.5793, "step": 11841 }, { "epoch": 0.34, "grad_norm": 4.764985138508696, "learning_rate": 7.696621630231376e-06, "loss": 0.5459, "step": 11842 }, { "epoch": 0.34, "grad_norm": 6.694447518273682, "learning_rate": 7.69623108848601e-06, "loss": 0.4722, "step": 11843 }, { "epoch": 0.34, "grad_norm": 6.074417356329088, "learning_rate": 7.695840523545298e-06, "loss": 0.7417, "step": 11844 }, { "epoch": 0.34, "grad_norm": 4.004896088108178, "learning_rate": 7.695449935412602e-06, "loss": 0.368, "step": 11845 }, { "epoch": 0.34, "grad_norm": 4.644403377277373, "learning_rate": 7.695059324091284e-06, "loss": 0.5613, "step": 11846 }, { "epoch": 0.34, "grad_norm": 6.519547048175887, "learning_rate": 7.694668689584701e-06, "loss": 0.7601, "step": 11847 }, { "epoch": 0.34, "grad_norm": 3.962029782383075, "learning_rate": 7.694278031896217e-06, "loss": 0.1728, "step": 11848 }, { "epoch": 0.34, "grad_norm": 6.6563708513777815, "learning_rate": 7.693887351029188e-06, "loss": 0.6169, "step": 11849 }, { "epoch": 0.34, "grad_norm": 5.115857063487389, "learning_rate": 7.69349664698698e-06, "loss": 0.5713, "step": 11850 }, { "epoch": 0.34, "grad_norm": 9.8335830985424, "learning_rate": 7.693105919772952e-06, "loss": 0.3835, "step": 11851 }, { "epoch": 0.34, "grad_norm": 7.072936187702219, "learning_rate": 7.692715169390463e-06, "loss": 0.7446, "step": 11852 }, { "epoch": 0.34, "grad_norm": 3.4531475083666168, "learning_rate": 7.692324395842879e-06, "loss": 0.4196, "step": 11853 }, { "epoch": 0.34, "grad_norm": 6.481342682134376, "learning_rate": 7.691933599133558e-06, "loss": 0.6206, "step": 11854 }, { "epoch": 0.34, "grad_norm": 3.436414477872228, "learning_rate": 7.691542779265865e-06, "loss": 0.4189, "step": 11855 }, { "epoch": 0.34, "grad_norm": 4.308401011827594, "learning_rate": 7.69115193624316e-06, "loss": 0.4323, "step": 11856 }, { "epoch": 0.34, "grad_norm": 8.65629632710203, "learning_rate": 7.69076107006881e-06, "loss": 0.8538, "step": 11857 }, { "epoch": 0.34, "grad_norm": 8.900703282375975, "learning_rate": 7.690370180746168e-06, "loss": 1.0189, "step": 11858 }, { "epoch": 0.34, "grad_norm": 4.819129539019985, "learning_rate": 7.689979268278606e-06, "loss": 0.2665, "step": 11859 }, { "epoch": 0.34, "grad_norm": 5.618041672576152, "learning_rate": 7.689588332669483e-06, "loss": 0.6777, "step": 11860 }, { "epoch": 0.34, "grad_norm": 7.332663946234062, "learning_rate": 7.689197373922162e-06, "loss": 0.3973, "step": 11861 }, { "epoch": 0.34, "grad_norm": 10.980801995593522, "learning_rate": 7.688806392040007e-06, "loss": 0.4278, "step": 11862 }, { "epoch": 0.34, "grad_norm": 5.325735680708486, "learning_rate": 7.688415387026382e-06, "loss": 0.377, "step": 11863 }, { "epoch": 0.34, "grad_norm": 5.215172415102342, "learning_rate": 7.688024358884652e-06, "loss": 0.5404, "step": 11864 }, { "epoch": 0.34, "grad_norm": 4.209362945879221, "learning_rate": 7.687633307618178e-06, "loss": 0.2251, "step": 11865 }, { "epoch": 0.34, "grad_norm": 2.449780048501014, "learning_rate": 7.687242233230325e-06, "loss": 0.2268, "step": 11866 }, { "epoch": 0.34, "grad_norm": 12.700906932917828, "learning_rate": 7.68685113572446e-06, "loss": 0.4544, "step": 11867 }, { "epoch": 0.34, "grad_norm": 6.186379071051353, "learning_rate": 7.686460015103943e-06, "loss": 0.3869, "step": 11868 }, { "epoch": 0.34, "grad_norm": 3.6417099805361253, "learning_rate": 7.68606887137214e-06, "loss": 0.3342, "step": 11869 }, { "epoch": 0.34, "grad_norm": 8.423767443411162, "learning_rate": 7.68567770453242e-06, "loss": 0.2945, "step": 11870 }, { "epoch": 0.34, "grad_norm": 5.010614239239923, "learning_rate": 7.685286514588146e-06, "loss": 0.5861, "step": 11871 }, { "epoch": 0.34, "grad_norm": 6.703683398625623, "learning_rate": 7.684895301542684e-06, "loss": 0.3276, "step": 11872 }, { "epoch": 0.34, "grad_norm": 7.283833286954221, "learning_rate": 7.684504065399395e-06, "loss": 0.7271, "step": 11873 }, { "epoch": 0.34, "grad_norm": 10.9334254059365, "learning_rate": 7.684112806161649e-06, "loss": 0.3205, "step": 11874 }, { "epoch": 0.34, "grad_norm": 5.807346520499821, "learning_rate": 7.683721523832811e-06, "loss": 0.7739, "step": 11875 }, { "epoch": 0.34, "grad_norm": 1.69486272796165, "learning_rate": 7.683330218416248e-06, "loss": 0.1619, "step": 11876 }, { "epoch": 0.34, "grad_norm": 3.9610711852016265, "learning_rate": 7.682938889915323e-06, "loss": 0.4644, "step": 11877 }, { "epoch": 0.34, "grad_norm": 8.331488125281934, "learning_rate": 7.682547538333407e-06, "loss": 0.343, "step": 11878 }, { "epoch": 0.34, "grad_norm": 6.322799966438001, "learning_rate": 7.682156163673864e-06, "loss": 0.6364, "step": 11879 }, { "epoch": 0.34, "grad_norm": 5.844491472989788, "learning_rate": 7.681764765940062e-06, "loss": 0.5688, "step": 11880 }, { "epoch": 0.34, "grad_norm": 4.870439499247734, "learning_rate": 7.68137334513537e-06, "loss": 0.7416, "step": 11881 }, { "epoch": 0.34, "grad_norm": 5.329626056514914, "learning_rate": 7.680981901263152e-06, "loss": 0.6126, "step": 11882 }, { "epoch": 0.34, "grad_norm": 10.572816088468976, "learning_rate": 7.680590434326776e-06, "loss": 0.8743, "step": 11883 }, { "epoch": 0.34, "grad_norm": 4.0087730044973755, "learning_rate": 7.68019894432961e-06, "loss": 0.3335, "step": 11884 }, { "epoch": 0.34, "grad_norm": 5.920072819803187, "learning_rate": 7.679807431275024e-06, "loss": 0.2921, "step": 11885 }, { "epoch": 0.34, "grad_norm": 5.07795902861285, "learning_rate": 7.679415895166383e-06, "loss": 0.5055, "step": 11886 }, { "epoch": 0.34, "grad_norm": 5.052470314961715, "learning_rate": 7.67902433600706e-06, "loss": 0.3574, "step": 11887 }, { "epoch": 0.34, "grad_norm": 20.02898271557009, "learning_rate": 7.678632753800418e-06, "loss": 0.4042, "step": 11888 }, { "epoch": 0.34, "grad_norm": 6.510213896581897, "learning_rate": 7.678241148549827e-06, "loss": 0.4732, "step": 11889 }, { "epoch": 0.34, "grad_norm": 5.091377126865936, "learning_rate": 7.67784952025866e-06, "loss": 0.6311, "step": 11890 }, { "epoch": 0.34, "grad_norm": 8.449965812117359, "learning_rate": 7.677457868930282e-06, "loss": 0.4005, "step": 11891 }, { "epoch": 0.34, "grad_norm": 6.661035098265304, "learning_rate": 7.677066194568064e-06, "loss": 0.6505, "step": 11892 }, { "epoch": 0.34, "grad_norm": 7.72614863476237, "learning_rate": 7.676674497175373e-06, "loss": 0.8927, "step": 11893 }, { "epoch": 0.34, "grad_norm": 8.965563377469095, "learning_rate": 7.676282776755583e-06, "loss": 0.7775, "step": 11894 }, { "epoch": 0.34, "grad_norm": 3.8955542656535553, "learning_rate": 7.675891033312062e-06, "loss": 0.9376, "step": 11895 }, { "epoch": 0.34, "grad_norm": 3.7382544952041052, "learning_rate": 7.675499266848179e-06, "loss": 0.3589, "step": 11896 }, { "epoch": 0.34, "grad_norm": 1.9703928057575073, "learning_rate": 7.675107477367304e-06, "loss": 0.2237, "step": 11897 }, { "epoch": 0.34, "grad_norm": 2.0895284824899267, "learning_rate": 7.674715664872812e-06, "loss": 0.0591, "step": 11898 }, { "epoch": 0.34, "grad_norm": 5.473717705700396, "learning_rate": 7.674323829368068e-06, "loss": 0.2434, "step": 11899 }, { "epoch": 0.34, "grad_norm": 4.304283496985801, "learning_rate": 7.673931970856446e-06, "loss": 0.5664, "step": 11900 }, { "epoch": 0.34, "grad_norm": 6.951801577002585, "learning_rate": 7.673540089341318e-06, "loss": 0.5866, "step": 11901 }, { "epoch": 0.34, "grad_norm": 4.487279821748298, "learning_rate": 7.67314818482605e-06, "loss": 0.4651, "step": 11902 }, { "epoch": 0.34, "grad_norm": 4.576774658411751, "learning_rate": 7.672756257314022e-06, "loss": 0.6008, "step": 11903 }, { "epoch": 0.34, "grad_norm": 6.181609268470531, "learning_rate": 7.6723643068086e-06, "loss": 0.6155, "step": 11904 }, { "epoch": 0.34, "grad_norm": 2.979772543931263, "learning_rate": 7.671972333313154e-06, "loss": 0.0872, "step": 11905 }, { "epoch": 0.34, "grad_norm": 3.685081399978703, "learning_rate": 7.671580336831059e-06, "loss": 0.4359, "step": 11906 }, { "epoch": 0.34, "grad_norm": 4.847440261394029, "learning_rate": 7.67118831736569e-06, "loss": 0.4986, "step": 11907 }, { "epoch": 0.34, "grad_norm": 7.419531329692107, "learning_rate": 7.670796274920415e-06, "loss": 0.4754, "step": 11908 }, { "epoch": 0.34, "grad_norm": 4.568734338318664, "learning_rate": 7.67040420949861e-06, "loss": 0.8599, "step": 11909 }, { "epoch": 0.34, "grad_norm": 7.544076214984841, "learning_rate": 7.670012121103645e-06, "loss": 0.8019, "step": 11910 }, { "epoch": 0.34, "grad_norm": 5.190731237198181, "learning_rate": 7.669620009738895e-06, "loss": 0.258, "step": 11911 }, { "epoch": 0.34, "grad_norm": 5.172365701221644, "learning_rate": 7.669227875407733e-06, "loss": 0.573, "step": 11912 }, { "epoch": 0.34, "grad_norm": 6.958616955244021, "learning_rate": 7.668835718113532e-06, "loss": 0.4547, "step": 11913 }, { "epoch": 0.34, "grad_norm": 6.745994863133267, "learning_rate": 7.668443537859667e-06, "loss": 0.3145, "step": 11914 }, { "epoch": 0.34, "grad_norm": 6.188330854639197, "learning_rate": 7.668051334649508e-06, "loss": 0.8139, "step": 11915 }, { "epoch": 0.34, "grad_norm": 8.955623444538354, "learning_rate": 7.667659108486434e-06, "loss": 0.7447, "step": 11916 }, { "epoch": 0.34, "grad_norm": 8.405558391043058, "learning_rate": 7.667266859373818e-06, "loss": 0.8815, "step": 11917 }, { "epoch": 0.34, "grad_norm": 9.258426230645819, "learning_rate": 7.66687458731503e-06, "loss": 0.9409, "step": 11918 }, { "epoch": 0.34, "grad_norm": 6.508248011200323, "learning_rate": 7.666482292313451e-06, "loss": 0.5403, "step": 11919 }, { "epoch": 0.34, "grad_norm": 3.429542938174964, "learning_rate": 7.666089974372454e-06, "loss": 0.1172, "step": 11920 }, { "epoch": 0.34, "grad_norm": 7.5639733069507145, "learning_rate": 7.665697633495411e-06, "loss": 0.3371, "step": 11921 }, { "epoch": 0.34, "grad_norm": 5.523268895361361, "learning_rate": 7.665305269685702e-06, "loss": 0.1921, "step": 11922 }, { "epoch": 0.34, "grad_norm": 11.787204199080058, "learning_rate": 7.664912882946697e-06, "loss": 0.7895, "step": 11923 }, { "epoch": 0.34, "grad_norm": 8.41645947443691, "learning_rate": 7.664520473281776e-06, "loss": 0.8617, "step": 11924 }, { "epoch": 0.34, "grad_norm": 6.437444445916617, "learning_rate": 7.664128040694314e-06, "loss": 0.6066, "step": 11925 }, { "epoch": 0.34, "grad_norm": 5.298043023794094, "learning_rate": 7.663735585187685e-06, "loss": 0.7288, "step": 11926 }, { "epoch": 0.34, "grad_norm": 5.885642415896215, "learning_rate": 7.663343106765268e-06, "loss": 0.4812, "step": 11927 }, { "epoch": 0.34, "grad_norm": 9.216914003811802, "learning_rate": 7.662950605430437e-06, "loss": 0.5641, "step": 11928 }, { "epoch": 0.34, "grad_norm": 4.000224375392217, "learning_rate": 7.66255808118657e-06, "loss": 0.3132, "step": 11929 }, { "epoch": 0.34, "grad_norm": 3.2943158997278954, "learning_rate": 7.662165534037045e-06, "loss": 0.5325, "step": 11930 }, { "epoch": 0.34, "grad_norm": 6.190139977963127, "learning_rate": 7.661772963985236e-06, "loss": 0.6042, "step": 11931 }, { "epoch": 0.34, "grad_norm": 1.9685358733337766, "learning_rate": 7.661380371034523e-06, "loss": 0.3523, "step": 11932 }, { "epoch": 0.34, "grad_norm": 6.527881100244975, "learning_rate": 7.660987755188283e-06, "loss": 0.2804, "step": 11933 }, { "epoch": 0.34, "grad_norm": 9.5469258957925, "learning_rate": 7.660595116449892e-06, "loss": 0.4879, "step": 11934 }, { "epoch": 0.34, "grad_norm": 3.02819148838048, "learning_rate": 7.660202454822729e-06, "loss": 0.1796, "step": 11935 }, { "epoch": 0.34, "grad_norm": 7.996192324002349, "learning_rate": 7.659809770310172e-06, "loss": 0.4509, "step": 11936 }, { "epoch": 0.34, "grad_norm": 7.382664334864102, "learning_rate": 7.659417062915598e-06, "loss": 0.3961, "step": 11937 }, { "epoch": 0.34, "grad_norm": 3.95737403823396, "learning_rate": 7.659024332642388e-06, "loss": 0.4468, "step": 11938 }, { "epoch": 0.34, "grad_norm": 2.61728201168844, "learning_rate": 7.658631579493918e-06, "loss": 0.3438, "step": 11939 }, { "epoch": 0.34, "grad_norm": 2.8367773446576923, "learning_rate": 7.658238803473568e-06, "loss": 0.3232, "step": 11940 }, { "epoch": 0.34, "grad_norm": 3.307725992995145, "learning_rate": 7.657846004584717e-06, "loss": 0.6716, "step": 11941 }, { "epoch": 0.34, "grad_norm": 5.7276362184055145, "learning_rate": 7.657453182830744e-06, "loss": 0.5569, "step": 11942 }, { "epoch": 0.34, "grad_norm": 6.316577236713386, "learning_rate": 7.657060338215028e-06, "loss": 0.6475, "step": 11943 }, { "epoch": 0.34, "grad_norm": 7.125748745576784, "learning_rate": 7.65666747074095e-06, "loss": 0.6117, "step": 11944 }, { "epoch": 0.34, "grad_norm": 4.521132914846644, "learning_rate": 7.656274580411887e-06, "loss": 0.695, "step": 11945 }, { "epoch": 0.34, "grad_norm": 8.632662534489993, "learning_rate": 7.655881667231223e-06, "loss": 0.7035, "step": 11946 }, { "epoch": 0.34, "grad_norm": 3.10476991479742, "learning_rate": 7.655488731202336e-06, "loss": 0.3761, "step": 11947 }, { "epoch": 0.34, "grad_norm": 4.355954978727728, "learning_rate": 7.655095772328604e-06, "loss": 0.5884, "step": 11948 }, { "epoch": 0.34, "grad_norm": 3.261386767641578, "learning_rate": 7.654702790613413e-06, "loss": 0.1743, "step": 11949 }, { "epoch": 0.34, "grad_norm": 10.30416186237853, "learning_rate": 7.65430978606014e-06, "loss": 0.6561, "step": 11950 }, { "epoch": 0.34, "grad_norm": 6.426886651869645, "learning_rate": 7.653916758672164e-06, "loss": 0.6274, "step": 11951 }, { "epoch": 0.34, "grad_norm": 5.486625101125935, "learning_rate": 7.653523708452872e-06, "loss": 0.5719, "step": 11952 }, { "epoch": 0.34, "grad_norm": 5.569772274149328, "learning_rate": 7.653130635405641e-06, "loss": 0.7176, "step": 11953 }, { "epoch": 0.34, "grad_norm": 6.717471262760684, "learning_rate": 7.652737539533855e-06, "loss": 0.4953, "step": 11954 }, { "epoch": 0.34, "grad_norm": 9.843663581589913, "learning_rate": 7.652344420840893e-06, "loss": 0.6476, "step": 11955 }, { "epoch": 0.34, "grad_norm": 4.240120934120272, "learning_rate": 7.65195127933014e-06, "loss": 0.3854, "step": 11956 }, { "epoch": 0.34, "grad_norm": 7.573454937876432, "learning_rate": 7.651558115004975e-06, "loss": 0.7549, "step": 11957 }, { "epoch": 0.34, "grad_norm": 4.690308505162127, "learning_rate": 7.651164927868783e-06, "loss": 0.4275, "step": 11958 }, { "epoch": 0.34, "grad_norm": 9.012755573685878, "learning_rate": 7.650771717924943e-06, "loss": 0.711, "step": 11959 }, { "epoch": 0.34, "grad_norm": 4.852767229617662, "learning_rate": 7.650378485176844e-06, "loss": 0.3913, "step": 11960 }, { "epoch": 0.34, "grad_norm": 4.2919142506611285, "learning_rate": 7.649985229627864e-06, "loss": 0.1794, "step": 11961 }, { "epoch": 0.34, "grad_norm": 4.7266184653759895, "learning_rate": 7.649591951281387e-06, "loss": 0.3846, "step": 11962 }, { "epoch": 0.34, "grad_norm": 7.826225735636349, "learning_rate": 7.649198650140797e-06, "loss": 0.7793, "step": 11963 }, { "epoch": 0.34, "grad_norm": 6.42395709735563, "learning_rate": 7.648805326209477e-06, "loss": 0.5081, "step": 11964 }, { "epoch": 0.34, "grad_norm": 3.4261600019679586, "learning_rate": 7.64841197949081e-06, "loss": 0.189, "step": 11965 }, { "epoch": 0.34, "grad_norm": 6.352849700862349, "learning_rate": 7.648018609988183e-06, "loss": 1.0117, "step": 11966 }, { "epoch": 0.34, "grad_norm": 5.853560102902839, "learning_rate": 7.647625217704975e-06, "loss": 0.4978, "step": 11967 }, { "epoch": 0.34, "grad_norm": 6.467018890947857, "learning_rate": 7.647231802644575e-06, "loss": 0.4471, "step": 11968 }, { "epoch": 0.34, "grad_norm": 3.7477905440230095, "learning_rate": 7.646838364810367e-06, "loss": 0.3643, "step": 11969 }, { "epoch": 0.34, "grad_norm": 5.987964241012796, "learning_rate": 7.646444904205732e-06, "loss": 0.6954, "step": 11970 }, { "epoch": 0.34, "grad_norm": 5.710436234086349, "learning_rate": 7.64605142083406e-06, "loss": 0.5632, "step": 11971 }, { "epoch": 0.34, "grad_norm": 3.4966923206601646, "learning_rate": 7.645657914698731e-06, "loss": 0.586, "step": 11972 }, { "epoch": 0.34, "grad_norm": 6.870051666576331, "learning_rate": 7.645264385803132e-06, "loss": 0.6917, "step": 11973 }, { "epoch": 0.34, "grad_norm": 12.242767534470143, "learning_rate": 7.644870834150653e-06, "loss": 0.8038, "step": 11974 }, { "epoch": 0.34, "grad_norm": 3.345634000991637, "learning_rate": 7.644477259744674e-06, "loss": 0.2287, "step": 11975 }, { "epoch": 0.34, "grad_norm": 8.406135927014844, "learning_rate": 7.64408366258858e-06, "loss": 0.7305, "step": 11976 }, { "epoch": 0.34, "grad_norm": 3.7307025296921315, "learning_rate": 7.643690042685763e-06, "loss": 0.3134, "step": 11977 }, { "epoch": 0.34, "grad_norm": 4.207726551287252, "learning_rate": 7.643296400039606e-06, "loss": 0.4342, "step": 11978 }, { "epoch": 0.34, "grad_norm": 11.010277715051007, "learning_rate": 7.642902734653494e-06, "loss": 0.9423, "step": 11979 }, { "epoch": 0.34, "grad_norm": 4.738956739765516, "learning_rate": 7.642509046530814e-06, "loss": 0.2948, "step": 11980 }, { "epoch": 0.34, "grad_norm": 5.814768604847272, "learning_rate": 7.642115335674958e-06, "loss": 0.6133, "step": 11981 }, { "epoch": 0.34, "grad_norm": 6.720420948990354, "learning_rate": 7.641721602089307e-06, "loss": 0.4294, "step": 11982 }, { "epoch": 0.34, "grad_norm": 8.351831244488455, "learning_rate": 7.64132784577725e-06, "loss": 0.8238, "step": 11983 }, { "epoch": 0.34, "grad_norm": 2.9208709093108407, "learning_rate": 7.640934066742174e-06, "loss": 0.3601, "step": 11984 }, { "epoch": 0.34, "grad_norm": 5.877345286628617, "learning_rate": 7.640540264987468e-06, "loss": 0.3086, "step": 11985 }, { "epoch": 0.34, "grad_norm": 6.519444286207557, "learning_rate": 7.64014644051652e-06, "loss": 0.5789, "step": 11986 }, { "epoch": 0.34, "grad_norm": 5.37754140698724, "learning_rate": 7.639752593332717e-06, "loss": 0.5391, "step": 11987 }, { "epoch": 0.34, "grad_norm": 3.5314562914521384, "learning_rate": 7.639358723439448e-06, "loss": 0.4708, "step": 11988 }, { "epoch": 0.34, "grad_norm": 3.7957101298722282, "learning_rate": 7.6389648308401e-06, "loss": 0.5538, "step": 11989 }, { "epoch": 0.34, "grad_norm": 4.688875886533486, "learning_rate": 7.638570915538063e-06, "loss": 0.4252, "step": 11990 }, { "epoch": 0.34, "grad_norm": 10.038293665081774, "learning_rate": 7.638176977536725e-06, "loss": 0.4809, "step": 11991 }, { "epoch": 0.34, "grad_norm": 6.530966027032014, "learning_rate": 7.637783016839476e-06, "loss": 0.4237, "step": 11992 }, { "epoch": 0.34, "grad_norm": 5.355936576726998, "learning_rate": 7.637389033449703e-06, "loss": 0.5126, "step": 11993 }, { "epoch": 0.34, "grad_norm": 5.956515890873417, "learning_rate": 7.636995027370797e-06, "loss": 0.6771, "step": 11994 }, { "epoch": 0.34, "grad_norm": 7.712576339319141, "learning_rate": 7.636600998606148e-06, "loss": 0.5636, "step": 11995 }, { "epoch": 0.34, "grad_norm": 4.15227622607962, "learning_rate": 7.636206947159146e-06, "loss": 0.4289, "step": 11996 }, { "epoch": 0.34, "grad_norm": 4.608493940556555, "learning_rate": 7.63581287303318e-06, "loss": 0.3659, "step": 11997 }, { "epoch": 0.34, "grad_norm": 4.797796850437971, "learning_rate": 7.63541877623164e-06, "loss": 0.5907, "step": 11998 }, { "epoch": 0.34, "grad_norm": 3.8330859678039206, "learning_rate": 7.635024656757915e-06, "loss": 0.4609, "step": 11999 }, { "epoch": 0.34, "grad_norm": 4.695814210615286, "learning_rate": 7.6346305146154e-06, "loss": 0.5643, "step": 12000 }, { "epoch": 0.34, "grad_norm": 10.446700375552668, "learning_rate": 7.634236349807481e-06, "loss": 0.9935, "step": 12001 }, { "epoch": 0.34, "grad_norm": 4.268949701153624, "learning_rate": 7.633842162337552e-06, "loss": 0.7895, "step": 12002 }, { "epoch": 0.34, "grad_norm": 7.194057558115246, "learning_rate": 7.633447952209004e-06, "loss": 0.6531, "step": 12003 }, { "epoch": 0.34, "grad_norm": 7.519378042270393, "learning_rate": 7.633053719425226e-06, "loss": 0.6057, "step": 12004 }, { "epoch": 0.34, "grad_norm": 4.506140625323384, "learning_rate": 7.63265946398961e-06, "loss": 0.8009, "step": 12005 }, { "epoch": 0.34, "grad_norm": 1.601474317588584, "learning_rate": 7.632265185905552e-06, "loss": 0.1138, "step": 12006 }, { "epoch": 0.34, "grad_norm": 6.1645252358691005, "learning_rate": 7.63187088517644e-06, "loss": 0.4359, "step": 12007 }, { "epoch": 0.34, "grad_norm": 6.8059618311777745, "learning_rate": 7.631476561805667e-06, "loss": 0.5854, "step": 12008 }, { "epoch": 0.34, "grad_norm": 10.491615580908336, "learning_rate": 7.631082215796622e-06, "loss": 0.4547, "step": 12009 }, { "epoch": 0.34, "grad_norm": 8.806126014156126, "learning_rate": 7.630687847152703e-06, "loss": 0.4443, "step": 12010 }, { "epoch": 0.34, "grad_norm": 6.291589605941593, "learning_rate": 7.6302934558773e-06, "loss": 0.5283, "step": 12011 }, { "epoch": 0.34, "grad_norm": 4.69860891696118, "learning_rate": 7.629899041973806e-06, "loss": 0.3458, "step": 12012 }, { "epoch": 0.34, "grad_norm": 3.9269996991683187, "learning_rate": 7.629504605445616e-06, "loss": 0.428, "step": 12013 }, { "epoch": 0.34, "grad_norm": 6.784254190923494, "learning_rate": 7.629110146296119e-06, "loss": 0.5805, "step": 12014 }, { "epoch": 0.34, "grad_norm": 9.200947139540467, "learning_rate": 7.628715664528714e-06, "loss": 0.801, "step": 12015 }, { "epoch": 0.34, "grad_norm": 4.26742778312484, "learning_rate": 7.62832116014679e-06, "loss": 0.2976, "step": 12016 }, { "epoch": 0.34, "grad_norm": 3.5999332474771673, "learning_rate": 7.627926633153741e-06, "loss": 0.5133, "step": 12017 }, { "epoch": 0.34, "grad_norm": 2.553686523320797, "learning_rate": 7.627532083552967e-06, "loss": 0.2046, "step": 12018 }, { "epoch": 0.34, "grad_norm": 9.006738418370475, "learning_rate": 7.627137511347855e-06, "loss": 0.7296, "step": 12019 }, { "epoch": 0.34, "grad_norm": 5.106648688936253, "learning_rate": 7.6267429165418025e-06, "loss": 0.8641, "step": 12020 }, { "epoch": 0.34, "grad_norm": 2.6189475447043304, "learning_rate": 7.626348299138205e-06, "loss": 0.241, "step": 12021 }, { "epoch": 0.34, "grad_norm": 3.0128628429159185, "learning_rate": 7.625953659140457e-06, "loss": 0.3352, "step": 12022 }, { "epoch": 0.34, "grad_norm": 8.964748114447675, "learning_rate": 7.6255589965519515e-06, "loss": 0.7552, "step": 12023 }, { "epoch": 0.34, "grad_norm": 7.42051404822666, "learning_rate": 7.625164311376085e-06, "loss": 0.6241, "step": 12024 }, { "epoch": 0.34, "grad_norm": 4.959495274720439, "learning_rate": 7.624769603616255e-06, "loss": 0.4948, "step": 12025 }, { "epoch": 0.34, "grad_norm": 6.084260839362105, "learning_rate": 7.624374873275854e-06, "loss": 0.4065, "step": 12026 }, { "epoch": 0.34, "grad_norm": 7.901442200565474, "learning_rate": 7.623980120358279e-06, "loss": 0.3724, "step": 12027 }, { "epoch": 0.34, "grad_norm": 4.380984382359492, "learning_rate": 7.6235853448669276e-06, "loss": 0.333, "step": 12028 }, { "epoch": 0.34, "grad_norm": 7.3026385855788085, "learning_rate": 7.623190546805192e-06, "loss": 0.2079, "step": 12029 }, { "epoch": 0.34, "grad_norm": 6.816477742954232, "learning_rate": 7.622795726176473e-06, "loss": 0.5793, "step": 12030 }, { "epoch": 0.34, "grad_norm": 7.11779179609029, "learning_rate": 7.622400882984165e-06, "loss": 0.4961, "step": 12031 }, { "epoch": 0.34, "grad_norm": 5.865094742460962, "learning_rate": 7.6220060172316645e-06, "loss": 0.4223, "step": 12032 }, { "epoch": 0.34, "grad_norm": 6.012377984757857, "learning_rate": 7.6216111289223695e-06, "loss": 0.6484, "step": 12033 }, { "epoch": 0.34, "grad_norm": 4.712197155922638, "learning_rate": 7.621216218059677e-06, "loss": 0.3355, "step": 12034 }, { "epoch": 0.34, "grad_norm": 8.404890212794914, "learning_rate": 7.620821284646985e-06, "loss": 0.6348, "step": 12035 }, { "epoch": 0.34, "grad_norm": 3.158942518809569, "learning_rate": 7.620426328687687e-06, "loss": 0.2533, "step": 12036 }, { "epoch": 0.34, "grad_norm": 2.80084856237077, "learning_rate": 7.620031350185186e-06, "loss": 0.102, "step": 12037 }, { "epoch": 0.34, "grad_norm": 12.173264058174686, "learning_rate": 7.619636349142878e-06, "loss": 0.7022, "step": 12038 }, { "epoch": 0.34, "grad_norm": 8.974675364454074, "learning_rate": 7.619241325564162e-06, "loss": 0.5378, "step": 12039 }, { "epoch": 0.34, "grad_norm": 6.602072647517826, "learning_rate": 7.618846279452433e-06, "loss": 0.5974, "step": 12040 }, { "epoch": 0.34, "grad_norm": 15.060116021265394, "learning_rate": 7.618451210811093e-06, "loss": 0.7297, "step": 12041 }, { "epoch": 0.34, "grad_norm": 12.187716594629842, "learning_rate": 7.61805611964354e-06, "loss": 0.4351, "step": 12042 }, { "epoch": 0.34, "grad_norm": 5.721497881309479, "learning_rate": 7.617661005953172e-06, "loss": 0.3594, "step": 12043 }, { "epoch": 0.34, "grad_norm": 10.816732713725953, "learning_rate": 7.617265869743389e-06, "loss": 0.4048, "step": 12044 }, { "epoch": 0.34, "grad_norm": 9.073449248760719, "learning_rate": 7.6168707110175895e-06, "loss": 0.6038, "step": 12045 }, { "epoch": 0.34, "grad_norm": 6.124924873358421, "learning_rate": 7.616475529779175e-06, "loss": 0.6318, "step": 12046 }, { "epoch": 0.34, "grad_norm": 3.989671843952121, "learning_rate": 7.616080326031541e-06, "loss": 0.3774, "step": 12047 }, { "epoch": 0.35, "grad_norm": 10.016215243034656, "learning_rate": 7.615685099778093e-06, "loss": 0.6749, "step": 12048 }, { "epoch": 0.35, "grad_norm": 4.680334680095514, "learning_rate": 7.6152898510222265e-06, "loss": 0.3583, "step": 12049 }, { "epoch": 0.35, "grad_norm": 2.0102018989393855, "learning_rate": 7.614894579767342e-06, "loss": 0.1276, "step": 12050 }, { "epoch": 0.35, "grad_norm": 6.324236694838269, "learning_rate": 7.614499286016843e-06, "loss": 0.4692, "step": 12051 }, { "epoch": 0.35, "grad_norm": 7.243998312458619, "learning_rate": 7.61410396977413e-06, "loss": 0.5582, "step": 12052 }, { "epoch": 0.35, "grad_norm": 5.648237338285464, "learning_rate": 7.6137086310426e-06, "loss": 0.4313, "step": 12053 }, { "epoch": 0.35, "grad_norm": 5.760919527129778, "learning_rate": 7.613313269825657e-06, "loss": 0.8199, "step": 12054 }, { "epoch": 0.35, "grad_norm": 6.5915127615276665, "learning_rate": 7.612917886126703e-06, "loss": 0.6739, "step": 12055 }, { "epoch": 0.35, "grad_norm": 6.3514200830387555, "learning_rate": 7.612522479949137e-06, "loss": 0.6402, "step": 12056 }, { "epoch": 0.35, "grad_norm": 7.476482905140583, "learning_rate": 7.612127051296362e-06, "loss": 0.2255, "step": 12057 }, { "epoch": 0.35, "grad_norm": 7.897782144798732, "learning_rate": 7.611731600171779e-06, "loss": 0.6209, "step": 12058 }, { "epoch": 0.35, "grad_norm": 14.116177926541196, "learning_rate": 7.6113361265787914e-06, "loss": 0.4664, "step": 12059 }, { "epoch": 0.35, "grad_norm": 9.139954527693808, "learning_rate": 7.6109406305208e-06, "loss": 0.4869, "step": 12060 }, { "epoch": 0.35, "grad_norm": 5.644158015071573, "learning_rate": 7.610545112001208e-06, "loss": 0.5985, "step": 12061 }, { "epoch": 0.35, "grad_norm": 4.411417724110201, "learning_rate": 7.610149571023419e-06, "loss": 0.265, "step": 12062 }, { "epoch": 0.35, "grad_norm": 5.342698869444562, "learning_rate": 7.609754007590833e-06, "loss": 0.7327, "step": 12063 }, { "epoch": 0.35, "grad_norm": 7.130352603037039, "learning_rate": 7.609358421706856e-06, "loss": 0.6619, "step": 12064 }, { "epoch": 0.35, "grad_norm": 4.969083738814821, "learning_rate": 7.608962813374889e-06, "loss": 0.4176, "step": 12065 }, { "epoch": 0.35, "grad_norm": 2.231113921752249, "learning_rate": 7.608567182598336e-06, "loss": 0.1952, "step": 12066 }, { "epoch": 0.35, "grad_norm": 6.1150482451562365, "learning_rate": 7.6081715293806005e-06, "loss": 0.3496, "step": 12067 }, { "epoch": 0.35, "grad_norm": 6.402234265623131, "learning_rate": 7.607775853725088e-06, "loss": 0.4191, "step": 12068 }, { "epoch": 0.35, "grad_norm": 6.037466887073262, "learning_rate": 7.607380155635199e-06, "loss": 0.3733, "step": 12069 }, { "epoch": 0.35, "grad_norm": 2.9862765101281625, "learning_rate": 7.60698443511434e-06, "loss": 0.1874, "step": 12070 }, { "epoch": 0.35, "grad_norm": 4.506692466229359, "learning_rate": 7.6065886921659145e-06, "loss": 0.5344, "step": 12071 }, { "epoch": 0.35, "grad_norm": 3.0512317319992777, "learning_rate": 7.60619292679333e-06, "loss": 0.1558, "step": 12072 }, { "epoch": 0.35, "grad_norm": 10.642432956892112, "learning_rate": 7.6057971389999864e-06, "loss": 0.4718, "step": 12073 }, { "epoch": 0.35, "grad_norm": 8.97962191697003, "learning_rate": 7.605401328789289e-06, "loss": 0.6476, "step": 12074 }, { "epoch": 0.35, "grad_norm": 10.795648132486713, "learning_rate": 7.605005496164648e-06, "loss": 0.37, "step": 12075 }, { "epoch": 0.35, "grad_norm": 7.008479840368492, "learning_rate": 7.604609641129464e-06, "loss": 0.6192, "step": 12076 }, { "epoch": 0.35, "grad_norm": 9.293812428452666, "learning_rate": 7.604213763687145e-06, "loss": 0.8467, "step": 12077 }, { "epoch": 0.35, "grad_norm": 11.388683430955808, "learning_rate": 7.603817863841095e-06, "loss": 0.7057, "step": 12078 }, { "epoch": 0.35, "grad_norm": 3.4029055283801157, "learning_rate": 7.603421941594719e-06, "loss": 0.3101, "step": 12079 }, { "epoch": 0.35, "grad_norm": 4.062021403364763, "learning_rate": 7.603025996951428e-06, "loss": 0.3488, "step": 12080 }, { "epoch": 0.35, "grad_norm": 8.075861069442654, "learning_rate": 7.602630029914621e-06, "loss": 0.7373, "step": 12081 }, { "epoch": 0.35, "grad_norm": 4.782368198857865, "learning_rate": 7.602234040487713e-06, "loss": 0.2648, "step": 12082 }, { "epoch": 0.35, "grad_norm": 7.640738519554674, "learning_rate": 7.601838028674101e-06, "loss": 0.5338, "step": 12083 }, { "epoch": 0.35, "grad_norm": 7.1575740605452305, "learning_rate": 7.601441994477199e-06, "loss": 0.6792, "step": 12084 }, { "epoch": 0.35, "grad_norm": 6.138070106443449, "learning_rate": 7.601045937900412e-06, "loss": 0.3663, "step": 12085 }, { "epoch": 0.35, "grad_norm": 6.176642090689689, "learning_rate": 7.600649858947147e-06, "loss": 0.6043, "step": 12086 }, { "epoch": 0.35, "grad_norm": 2.3315695556402978, "learning_rate": 7.6002537576208105e-06, "loss": 0.206, "step": 12087 }, { "epoch": 0.35, "grad_norm": 6.787237802292128, "learning_rate": 7.599857633924812e-06, "loss": 0.7463, "step": 12088 }, { "epoch": 0.35, "grad_norm": 7.66693372537735, "learning_rate": 7.599461487862559e-06, "loss": 0.4135, "step": 12089 }, { "epoch": 0.35, "grad_norm": 3.7890658663705246, "learning_rate": 7.599065319437458e-06, "loss": 0.5038, "step": 12090 }, { "epoch": 0.35, "grad_norm": 4.143123578209316, "learning_rate": 7.598669128652917e-06, "loss": 0.5756, "step": 12091 }, { "epoch": 0.35, "grad_norm": 6.396116550731545, "learning_rate": 7.598272915512347e-06, "loss": 0.5404, "step": 12092 }, { "epoch": 0.35, "grad_norm": 5.371572066770199, "learning_rate": 7.597876680019154e-06, "loss": 0.2143, "step": 12093 }, { "epoch": 0.35, "grad_norm": 5.157217224031736, "learning_rate": 7.597480422176749e-06, "loss": 0.6418, "step": 12094 }, { "epoch": 0.35, "grad_norm": 4.894571631433206, "learning_rate": 7.59708414198854e-06, "loss": 0.4379, "step": 12095 }, { "epoch": 0.35, "grad_norm": 2.985296855970579, "learning_rate": 7.596687839457935e-06, "loss": 0.6041, "step": 12096 }, { "epoch": 0.35, "grad_norm": 3.97188705454512, "learning_rate": 7.596291514588343e-06, "loss": 0.3867, "step": 12097 }, { "epoch": 0.35, "grad_norm": 6.591355200770244, "learning_rate": 7.595895167383176e-06, "loss": 0.2616, "step": 12098 }, { "epoch": 0.35, "grad_norm": 10.71749754948162, "learning_rate": 7.595498797845841e-06, "loss": 0.7296, "step": 12099 }, { "epoch": 0.35, "grad_norm": 5.893780174461298, "learning_rate": 7.5951024059797525e-06, "loss": 0.6296, "step": 12100 }, { "epoch": 0.35, "grad_norm": 5.899737365178926, "learning_rate": 7.594705991788316e-06, "loss": 0.5726, "step": 12101 }, { "epoch": 0.35, "grad_norm": 6.054367667333834, "learning_rate": 7.594309555274944e-06, "loss": 0.2891, "step": 12102 }, { "epoch": 0.35, "grad_norm": 4.860972028589434, "learning_rate": 7.593913096443044e-06, "loss": 0.5569, "step": 12103 }, { "epoch": 0.35, "grad_norm": 6.757216446008339, "learning_rate": 7.5935166152960304e-06, "loss": 0.5545, "step": 12104 }, { "epoch": 0.35, "grad_norm": 5.741065506083483, "learning_rate": 7.593120111837314e-06, "loss": 0.4309, "step": 12105 }, { "epoch": 0.35, "grad_norm": 9.338074047822035, "learning_rate": 7.592723586070303e-06, "loss": 0.3598, "step": 12106 }, { "epoch": 0.35, "grad_norm": 7.419529016049963, "learning_rate": 7.59232703799841e-06, "loss": 0.8047, "step": 12107 }, { "epoch": 0.35, "grad_norm": 7.056671235424371, "learning_rate": 7.591930467625048e-06, "loss": 0.6825, "step": 12108 }, { "epoch": 0.35, "grad_norm": 4.106625873891513, "learning_rate": 7.591533874953627e-06, "loss": 0.4883, "step": 12109 }, { "epoch": 0.35, "grad_norm": 5.433030725264808, "learning_rate": 7.591137259987558e-06, "loss": 0.2484, "step": 12110 }, { "epoch": 0.35, "grad_norm": 3.6291213120707937, "learning_rate": 7.590740622730255e-06, "loss": 0.5071, "step": 12111 }, { "epoch": 0.35, "grad_norm": 6.363032870658444, "learning_rate": 7.5903439631851295e-06, "loss": 0.4861, "step": 12112 }, { "epoch": 0.35, "grad_norm": 7.832980905545359, "learning_rate": 7.5899472813555944e-06, "loss": 0.4568, "step": 12113 }, { "epoch": 0.35, "grad_norm": 5.38228676585757, "learning_rate": 7.589550577245059e-06, "loss": 0.5521, "step": 12114 }, { "epoch": 0.35, "grad_norm": 5.374809350469469, "learning_rate": 7.589153850856942e-06, "loss": 0.3349, "step": 12115 }, { "epoch": 0.35, "grad_norm": 4.191269245734939, "learning_rate": 7.588757102194651e-06, "loss": 0.1042, "step": 12116 }, { "epoch": 0.35, "grad_norm": 5.878365060222953, "learning_rate": 7.588360331261603e-06, "loss": 0.2853, "step": 12117 }, { "epoch": 0.35, "grad_norm": 4.5091857272983535, "learning_rate": 7.58796353806121e-06, "loss": 0.2715, "step": 12118 }, { "epoch": 0.35, "grad_norm": 3.5353476793927388, "learning_rate": 7.587566722596884e-06, "loss": 0.5204, "step": 12119 }, { "epoch": 0.35, "grad_norm": 3.1191406441093164, "learning_rate": 7.5871698848720395e-06, "loss": 0.1515, "step": 12120 }, { "epoch": 0.35, "grad_norm": 3.656273458682715, "learning_rate": 7.586773024890093e-06, "loss": 0.3825, "step": 12121 }, { "epoch": 0.35, "grad_norm": 4.387490942529964, "learning_rate": 7.586376142654456e-06, "loss": 0.6813, "step": 12122 }, { "epoch": 0.35, "grad_norm": 4.535746428394032, "learning_rate": 7.585979238168543e-06, "loss": 0.7468, "step": 12123 }, { "epoch": 0.35, "grad_norm": 4.758432507636656, "learning_rate": 7.5855823114357685e-06, "loss": 0.5885, "step": 12124 }, { "epoch": 0.35, "grad_norm": 5.194609217260134, "learning_rate": 7.585185362459548e-06, "loss": 0.1609, "step": 12125 }, { "epoch": 0.35, "grad_norm": 3.3619681086556588, "learning_rate": 7.584788391243296e-06, "loss": 0.26, "step": 12126 }, { "epoch": 0.35, "grad_norm": 9.573205278685991, "learning_rate": 7.584391397790429e-06, "loss": 0.5845, "step": 12127 }, { "epoch": 0.35, "grad_norm": 5.502106393257571, "learning_rate": 7.58399438210436e-06, "loss": 0.2318, "step": 12128 }, { "epoch": 0.35, "grad_norm": 7.075860745536345, "learning_rate": 7.5835973441885066e-06, "loss": 0.6313, "step": 12129 }, { "epoch": 0.35, "grad_norm": 4.3855326705896935, "learning_rate": 7.583200284046281e-06, "loss": 0.4714, "step": 12130 }, { "epoch": 0.35, "grad_norm": 4.485236321116448, "learning_rate": 7.582803201681104e-06, "loss": 0.2969, "step": 12131 }, { "epoch": 0.35, "grad_norm": 137.89597172818304, "learning_rate": 7.582406097096387e-06, "loss": 0.81, "step": 12132 }, { "epoch": 0.35, "grad_norm": 7.755791069427403, "learning_rate": 7.582008970295549e-06, "loss": 0.8209, "step": 12133 }, { "epoch": 0.35, "grad_norm": 5.8960431424429744, "learning_rate": 7.581611821282007e-06, "loss": 0.8355, "step": 12134 }, { "epoch": 0.35, "grad_norm": 5.382723736965442, "learning_rate": 7.581214650059175e-06, "loss": 0.7064, "step": 12135 }, { "epoch": 0.35, "grad_norm": 9.134115760823086, "learning_rate": 7.580817456630471e-06, "loss": 0.7043, "step": 12136 }, { "epoch": 0.35, "grad_norm": 6.126545652654824, "learning_rate": 7.580420240999313e-06, "loss": 0.5253, "step": 12137 }, { "epoch": 0.35, "grad_norm": 6.753466740213461, "learning_rate": 7.580023003169117e-06, "loss": 0.8259, "step": 12138 }, { "epoch": 0.35, "grad_norm": 6.8392774346375145, "learning_rate": 7.579625743143302e-06, "loss": 0.4762, "step": 12139 }, { "epoch": 0.35, "grad_norm": 7.988192367991925, "learning_rate": 7.579228460925282e-06, "loss": 0.5483, "step": 12140 }, { "epoch": 0.35, "grad_norm": 8.899785422835192, "learning_rate": 7.578831156518478e-06, "loss": 0.4419, "step": 12141 }, { "epoch": 0.35, "grad_norm": 8.177930320722774, "learning_rate": 7.578433829926308e-06, "loss": 0.4454, "step": 12142 }, { "epoch": 0.35, "grad_norm": 3.843910182933657, "learning_rate": 7.578036481152189e-06, "loss": 0.3766, "step": 12143 }, { "epoch": 0.35, "grad_norm": 11.397161777110954, "learning_rate": 7.57763911019954e-06, "loss": 0.5414, "step": 12144 }, { "epoch": 0.35, "grad_norm": 7.270578266978352, "learning_rate": 7.577241717071777e-06, "loss": 0.4554, "step": 12145 }, { "epoch": 0.35, "grad_norm": 7.83194217773929, "learning_rate": 7.576844301772323e-06, "loss": 0.6677, "step": 12146 }, { "epoch": 0.35, "grad_norm": 2.3716364432728203, "learning_rate": 7.5764468643045945e-06, "loss": 0.3399, "step": 12147 }, { "epoch": 0.35, "grad_norm": 5.355369472348642, "learning_rate": 7.576049404672008e-06, "loss": 0.2693, "step": 12148 }, { "epoch": 0.35, "grad_norm": 10.822029104300457, "learning_rate": 7.575651922877989e-06, "loss": 0.6071, "step": 12149 }, { "epoch": 0.35, "grad_norm": 7.344280726957699, "learning_rate": 7.5752544189259524e-06, "loss": 0.7314, "step": 12150 }, { "epoch": 0.35, "grad_norm": 4.143685329156996, "learning_rate": 7.574856892819318e-06, "loss": 0.4769, "step": 12151 }, { "epoch": 0.35, "grad_norm": 9.238996920779487, "learning_rate": 7.574459344561509e-06, "loss": 0.4567, "step": 12152 }, { "epoch": 0.35, "grad_norm": 4.195602563246548, "learning_rate": 7.574061774155942e-06, "loss": 0.1679, "step": 12153 }, { "epoch": 0.35, "grad_norm": 8.189368930113647, "learning_rate": 7.573664181606039e-06, "loss": 0.6339, "step": 12154 }, { "epoch": 0.35, "grad_norm": 5.89925200036567, "learning_rate": 7.573266566915218e-06, "loss": 0.3173, "step": 12155 }, { "epoch": 0.35, "grad_norm": 2.907678888644724, "learning_rate": 7.572868930086905e-06, "loss": 0.3062, "step": 12156 }, { "epoch": 0.35, "grad_norm": 4.58594553230271, "learning_rate": 7.572471271124514e-06, "loss": 0.3847, "step": 12157 }, { "epoch": 0.35, "grad_norm": 8.57268934230454, "learning_rate": 7.57207359003147e-06, "loss": 0.7879, "step": 12158 }, { "epoch": 0.35, "grad_norm": 5.536755338246232, "learning_rate": 7.5716758868111965e-06, "loss": 0.1594, "step": 12159 }, { "epoch": 0.35, "grad_norm": 6.442313792535673, "learning_rate": 7.5712781614671096e-06, "loss": 0.2661, "step": 12160 }, { "epoch": 0.35, "grad_norm": 3.57612105645449, "learning_rate": 7.570880414002633e-06, "loss": 0.3844, "step": 12161 }, { "epoch": 0.35, "grad_norm": 5.968642308726624, "learning_rate": 7.570482644421189e-06, "loss": 0.3954, "step": 12162 }, { "epoch": 0.35, "grad_norm": 6.780902906403705, "learning_rate": 7.570084852726201e-06, "loss": 0.5919, "step": 12163 }, { "epoch": 0.35, "grad_norm": 6.424284063148947, "learning_rate": 7.569687038921088e-06, "loss": 0.5429, "step": 12164 }, { "epoch": 0.35, "grad_norm": 8.026225377347897, "learning_rate": 7.569289203009274e-06, "loss": 0.4382, "step": 12165 }, { "epoch": 0.35, "grad_norm": 5.741444099728191, "learning_rate": 7.56889134499418e-06, "loss": 0.3682, "step": 12166 }, { "epoch": 0.35, "grad_norm": 6.223774859119606, "learning_rate": 7.568493464879232e-06, "loss": 0.6056, "step": 12167 }, { "epoch": 0.35, "grad_norm": 3.0833190238895525, "learning_rate": 7.56809556266785e-06, "loss": 0.2388, "step": 12168 }, { "epoch": 0.35, "grad_norm": 7.9741554028265424, "learning_rate": 7.567697638363458e-06, "loss": 0.6632, "step": 12169 }, { "epoch": 0.35, "grad_norm": 6.426605264533932, "learning_rate": 7.567299691969481e-06, "loss": 0.3437, "step": 12170 }, { "epoch": 0.35, "grad_norm": 4.183137785126718, "learning_rate": 7.5669017234893385e-06, "loss": 0.4268, "step": 12171 }, { "epoch": 0.35, "grad_norm": 4.911863820667215, "learning_rate": 7.566503732926458e-06, "loss": 0.6459, "step": 12172 }, { "epoch": 0.35, "grad_norm": 17.081842853685757, "learning_rate": 7.56610572028426e-06, "loss": 0.5109, "step": 12173 }, { "epoch": 0.35, "grad_norm": 6.009672157371559, "learning_rate": 7.565707685566173e-06, "loss": 0.5347, "step": 12174 }, { "epoch": 0.35, "grad_norm": 4.641162931045251, "learning_rate": 7.565309628775617e-06, "loss": 0.5031, "step": 12175 }, { "epoch": 0.35, "grad_norm": 5.065471012850755, "learning_rate": 7.564911549916018e-06, "loss": 0.2975, "step": 12176 }, { "epoch": 0.35, "grad_norm": 8.642408742160784, "learning_rate": 7.564513448990802e-06, "loss": 0.5872, "step": 12177 }, { "epoch": 0.35, "grad_norm": 7.504919346141559, "learning_rate": 7.5641153260033916e-06, "loss": 0.4803, "step": 12178 }, { "epoch": 0.35, "grad_norm": 3.7383911690650735, "learning_rate": 7.563717180957214e-06, "loss": 0.4318, "step": 12179 }, { "epoch": 0.35, "grad_norm": 3.803134812880734, "learning_rate": 7.5633190138556925e-06, "loss": 0.5047, "step": 12180 }, { "epoch": 0.35, "grad_norm": 6.429890922582292, "learning_rate": 7.5629208247022515e-06, "loss": 0.8388, "step": 12181 }, { "epoch": 0.35, "grad_norm": 6.59870442476773, "learning_rate": 7.56252261350032e-06, "loss": 0.4794, "step": 12182 }, { "epoch": 0.35, "grad_norm": 9.104661196292126, "learning_rate": 7.562124380253322e-06, "loss": 0.624, "step": 12183 }, { "epoch": 0.35, "grad_norm": 3.7046351190578064, "learning_rate": 7.561726124964682e-06, "loss": 0.4446, "step": 12184 }, { "epoch": 0.35, "grad_norm": 4.670104057941596, "learning_rate": 7.561327847637828e-06, "loss": 0.5606, "step": 12185 }, { "epoch": 0.35, "grad_norm": 6.780388491705303, "learning_rate": 7.560929548276188e-06, "loss": 0.6434, "step": 12186 }, { "epoch": 0.35, "grad_norm": 9.217340290454146, "learning_rate": 7.560531226883185e-06, "loss": 0.7896, "step": 12187 }, { "epoch": 0.35, "grad_norm": 5.558326012388762, "learning_rate": 7.560132883462246e-06, "loss": 0.372, "step": 12188 }, { "epoch": 0.35, "grad_norm": 6.198708833564302, "learning_rate": 7.559734518016799e-06, "loss": 0.4562, "step": 12189 }, { "epoch": 0.35, "grad_norm": 4.275268552944784, "learning_rate": 7.559336130550272e-06, "loss": 0.5568, "step": 12190 }, { "epoch": 0.35, "grad_norm": 5.841247104192629, "learning_rate": 7.558937721066091e-06, "loss": 0.6907, "step": 12191 }, { "epoch": 0.35, "grad_norm": 5.820190919975968, "learning_rate": 7.558539289567684e-06, "loss": 0.5051, "step": 12192 }, { "epoch": 0.35, "grad_norm": 2.7815955301022317, "learning_rate": 7.558140836058481e-06, "loss": 0.3158, "step": 12193 }, { "epoch": 0.35, "grad_norm": 7.569869745965661, "learning_rate": 7.557742360541905e-06, "loss": 0.7364, "step": 12194 }, { "epoch": 0.35, "grad_norm": 6.013315842568503, "learning_rate": 7.557343863021385e-06, "loss": 0.4206, "step": 12195 }, { "epoch": 0.35, "grad_norm": 6.448319869820677, "learning_rate": 7.556945343500353e-06, "loss": 0.408, "step": 12196 }, { "epoch": 0.35, "grad_norm": 9.01974514806326, "learning_rate": 7.556546801982234e-06, "loss": 0.8071, "step": 12197 }, { "epoch": 0.35, "grad_norm": 6.094073243004311, "learning_rate": 7.556148238470457e-06, "loss": 0.5496, "step": 12198 }, { "epoch": 0.35, "grad_norm": 6.300389031100126, "learning_rate": 7.555749652968453e-06, "loss": 0.3418, "step": 12199 }, { "epoch": 0.35, "grad_norm": 7.825186842372647, "learning_rate": 7.555351045479648e-06, "loss": 0.6924, "step": 12200 }, { "epoch": 0.35, "grad_norm": 2.2166574846044065, "learning_rate": 7.554952416007472e-06, "loss": 0.107, "step": 12201 }, { "epoch": 0.35, "grad_norm": 7.7633694418294965, "learning_rate": 7.554553764555357e-06, "loss": 0.7022, "step": 12202 }, { "epoch": 0.35, "grad_norm": 7.020131726020787, "learning_rate": 7.55415509112673e-06, "loss": 0.6224, "step": 12203 }, { "epoch": 0.35, "grad_norm": 3.345185871747716, "learning_rate": 7.55375639572502e-06, "loss": 0.371, "step": 12204 }, { "epoch": 0.35, "grad_norm": 3.2011332948884004, "learning_rate": 7.55335767835366e-06, "loss": 0.5221, "step": 12205 }, { "epoch": 0.35, "grad_norm": 6.959820556081282, "learning_rate": 7.552958939016077e-06, "loss": 0.3754, "step": 12206 }, { "epoch": 0.35, "grad_norm": 5.420641712122054, "learning_rate": 7.552560177715702e-06, "loss": 0.8077, "step": 12207 }, { "epoch": 0.35, "grad_norm": 9.841255186707757, "learning_rate": 7.552161394455968e-06, "loss": 0.818, "step": 12208 }, { "epoch": 0.35, "grad_norm": 4.682605858750308, "learning_rate": 7.551762589240303e-06, "loss": 0.6182, "step": 12209 }, { "epoch": 0.35, "grad_norm": 9.680360144149946, "learning_rate": 7.551363762072138e-06, "loss": 0.912, "step": 12210 }, { "epoch": 0.35, "grad_norm": 6.04035446342667, "learning_rate": 7.550964912954904e-06, "loss": 0.6801, "step": 12211 }, { "epoch": 0.35, "grad_norm": 10.754708257076256, "learning_rate": 7.550566041892034e-06, "loss": 1.0086, "step": 12212 }, { "epoch": 0.35, "grad_norm": 3.159580362137364, "learning_rate": 7.5501671488869595e-06, "loss": 0.5552, "step": 12213 }, { "epoch": 0.35, "grad_norm": 7.506496794468523, "learning_rate": 7.549768233943109e-06, "loss": 0.9005, "step": 12214 }, { "epoch": 0.35, "grad_norm": 7.857421632255154, "learning_rate": 7.549369297063916e-06, "loss": 0.6855, "step": 12215 }, { "epoch": 0.35, "grad_norm": 6.860626454123382, "learning_rate": 7.548970338252815e-06, "loss": 0.3655, "step": 12216 }, { "epoch": 0.35, "grad_norm": 6.712333098881534, "learning_rate": 7.548571357513234e-06, "loss": 0.6735, "step": 12217 }, { "epoch": 0.35, "grad_norm": 3.783729559922246, "learning_rate": 7.548172354848609e-06, "loss": 0.4102, "step": 12218 }, { "epoch": 0.35, "grad_norm": 5.783246906891645, "learning_rate": 7.54777333026237e-06, "loss": 0.2869, "step": 12219 }, { "epoch": 0.35, "grad_norm": 9.45855728310008, "learning_rate": 7.547374283757952e-06, "loss": 0.7485, "step": 12220 }, { "epoch": 0.35, "grad_norm": 8.900882428602936, "learning_rate": 7.5469752153387855e-06, "loss": 0.7599, "step": 12221 }, { "epoch": 0.35, "grad_norm": 5.962412440458807, "learning_rate": 7.546576125008305e-06, "loss": 0.8611, "step": 12222 }, { "epoch": 0.35, "grad_norm": 7.049525132655906, "learning_rate": 7.546177012769944e-06, "loss": 0.6617, "step": 12223 }, { "epoch": 0.35, "grad_norm": 4.559736120134586, "learning_rate": 7.545777878627134e-06, "loss": 0.4757, "step": 12224 }, { "epoch": 0.35, "grad_norm": 2.2582336349814964, "learning_rate": 7.545378722583312e-06, "loss": 0.2165, "step": 12225 }, { "epoch": 0.35, "grad_norm": 3.3578825008290236, "learning_rate": 7.5449795446419105e-06, "loss": 0.3678, "step": 12226 }, { "epoch": 0.35, "grad_norm": 10.11157249846549, "learning_rate": 7.544580344806362e-06, "loss": 0.5545, "step": 12227 }, { "epoch": 0.35, "grad_norm": 5.7090340715582, "learning_rate": 7.544181123080103e-06, "loss": 0.443, "step": 12228 }, { "epoch": 0.35, "grad_norm": 4.336013944484663, "learning_rate": 7.5437818794665675e-06, "loss": 0.2337, "step": 12229 }, { "epoch": 0.35, "grad_norm": 5.030015497809438, "learning_rate": 7.54338261396919e-06, "loss": 0.2996, "step": 12230 }, { "epoch": 0.35, "grad_norm": 5.889750043814532, "learning_rate": 7.542983326591402e-06, "loss": 0.5072, "step": 12231 }, { "epoch": 0.35, "grad_norm": 8.501635618479074, "learning_rate": 7.5425840173366434e-06, "loss": 0.4826, "step": 12232 }, { "epoch": 0.35, "grad_norm": 6.606561986508996, "learning_rate": 7.542184686208349e-06, "loss": 0.5565, "step": 12233 }, { "epoch": 0.35, "grad_norm": 5.366208804879012, "learning_rate": 7.541785333209952e-06, "loss": 0.7559, "step": 12234 }, { "epoch": 0.35, "grad_norm": 8.127484807282173, "learning_rate": 7.541385958344887e-06, "loss": 0.5467, "step": 12235 }, { "epoch": 0.35, "grad_norm": 5.893951286365731, "learning_rate": 7.5409865616165936e-06, "loss": 0.5619, "step": 12236 }, { "epoch": 0.35, "grad_norm": 8.366516627781282, "learning_rate": 7.540587143028505e-06, "loss": 0.7271, "step": 12237 }, { "epoch": 0.35, "grad_norm": 7.704233828702833, "learning_rate": 7.540187702584058e-06, "loss": 0.7201, "step": 12238 }, { "epoch": 0.35, "grad_norm": 3.7064904802345, "learning_rate": 7.539788240286688e-06, "loss": 0.2904, "step": 12239 }, { "epoch": 0.35, "grad_norm": 4.417058768351478, "learning_rate": 7.539388756139834e-06, "loss": 0.2871, "step": 12240 }, { "epoch": 0.35, "grad_norm": 6.214172096811078, "learning_rate": 7.53898925014693e-06, "loss": 0.5136, "step": 12241 }, { "epoch": 0.35, "grad_norm": 4.38631746489724, "learning_rate": 7.538589722311413e-06, "loss": 0.3002, "step": 12242 }, { "epoch": 0.35, "grad_norm": 3.631972152153672, "learning_rate": 7.5381901726367244e-06, "loss": 0.2296, "step": 12243 }, { "epoch": 0.35, "grad_norm": 4.570327576180396, "learning_rate": 7.537790601126297e-06, "loss": 0.2826, "step": 12244 }, { "epoch": 0.35, "grad_norm": 4.356532602434022, "learning_rate": 7.537391007783569e-06, "loss": 0.4866, "step": 12245 }, { "epoch": 0.35, "grad_norm": 12.641733844723598, "learning_rate": 7.536991392611979e-06, "loss": 0.5451, "step": 12246 }, { "epoch": 0.35, "grad_norm": 4.738643295910066, "learning_rate": 7.5365917556149635e-06, "loss": 0.3741, "step": 12247 }, { "epoch": 0.35, "grad_norm": 5.258403274425658, "learning_rate": 7.536192096795961e-06, "loss": 0.6719, "step": 12248 }, { "epoch": 0.35, "grad_norm": 7.896022770872975, "learning_rate": 7.535792416158412e-06, "loss": 0.6689, "step": 12249 }, { "epoch": 0.35, "grad_norm": 5.586711973907155, "learning_rate": 7.535392713705753e-06, "loss": 0.3599, "step": 12250 }, { "epoch": 0.35, "grad_norm": 5.924476394373206, "learning_rate": 7.534992989441421e-06, "loss": 0.3097, "step": 12251 }, { "epoch": 0.35, "grad_norm": 7.717687031685664, "learning_rate": 7.534593243368857e-06, "loss": 0.384, "step": 12252 }, { "epoch": 0.35, "grad_norm": 4.910228452553741, "learning_rate": 7.5341934754915e-06, "loss": 0.4277, "step": 12253 }, { "epoch": 0.35, "grad_norm": 4.406615438254909, "learning_rate": 7.533793685812789e-06, "loss": 0.2067, "step": 12254 }, { "epoch": 0.35, "grad_norm": 11.580798624200836, "learning_rate": 7.533393874336161e-06, "loss": 0.3314, "step": 12255 }, { "epoch": 0.35, "grad_norm": 5.300413407091166, "learning_rate": 7.532994041065059e-06, "loss": 0.5154, "step": 12256 }, { "epoch": 0.35, "grad_norm": 3.5293993362390252, "learning_rate": 7.532594186002921e-06, "loss": 0.1491, "step": 12257 }, { "epoch": 0.35, "grad_norm": 5.891259278165268, "learning_rate": 7.532194309153187e-06, "loss": 0.5829, "step": 12258 }, { "epoch": 0.35, "grad_norm": 5.651163923168698, "learning_rate": 7.531794410519295e-06, "loss": 0.5981, "step": 12259 }, { "epoch": 0.35, "grad_norm": 8.646907129975938, "learning_rate": 7.531394490104691e-06, "loss": 0.2679, "step": 12260 }, { "epoch": 0.35, "grad_norm": 5.767758272007762, "learning_rate": 7.53099454791281e-06, "loss": 0.3255, "step": 12261 }, { "epoch": 0.35, "grad_norm": 4.314503577486952, "learning_rate": 7.530594583947095e-06, "loss": 0.5803, "step": 12262 }, { "epoch": 0.35, "grad_norm": 3.7171517792533173, "learning_rate": 7.5301945982109845e-06, "loss": 0.3762, "step": 12263 }, { "epoch": 0.35, "grad_norm": 8.210341180549404, "learning_rate": 7.529794590707924e-06, "loss": 0.7067, "step": 12264 }, { "epoch": 0.35, "grad_norm": 2.9725086711888613, "learning_rate": 7.52939456144135e-06, "loss": 0.1273, "step": 12265 }, { "epoch": 0.35, "grad_norm": 5.56444905455987, "learning_rate": 7.528994510414709e-06, "loss": 0.6642, "step": 12266 }, { "epoch": 0.35, "grad_norm": 3.297435405523674, "learning_rate": 7.528594437631437e-06, "loss": 0.2669, "step": 12267 }, { "epoch": 0.35, "grad_norm": 4.655349849252268, "learning_rate": 7.528194343094979e-06, "loss": 0.4142, "step": 12268 }, { "epoch": 0.35, "grad_norm": 4.565076283530954, "learning_rate": 7.527794226808775e-06, "loss": 0.1589, "step": 12269 }, { "epoch": 0.35, "grad_norm": 4.039911354338797, "learning_rate": 7.527394088776272e-06, "loss": 0.1825, "step": 12270 }, { "epoch": 0.35, "grad_norm": 8.953361148615862, "learning_rate": 7.526993929000907e-06, "loss": 0.7278, "step": 12271 }, { "epoch": 0.35, "grad_norm": 6.136022925310441, "learning_rate": 7.526593747486122e-06, "loss": 0.1879, "step": 12272 }, { "epoch": 0.35, "grad_norm": 5.844914916914623, "learning_rate": 7.526193544235365e-06, "loss": 0.2666, "step": 12273 }, { "epoch": 0.35, "grad_norm": 4.646251517093669, "learning_rate": 7.5257933192520745e-06, "loss": 0.4319, "step": 12274 }, { "epoch": 0.35, "grad_norm": 12.604251997897919, "learning_rate": 7.525393072539697e-06, "loss": 0.9302, "step": 12275 }, { "epoch": 0.35, "grad_norm": 3.325125260252373, "learning_rate": 7.524992804101672e-06, "loss": 0.3752, "step": 12276 }, { "epoch": 0.35, "grad_norm": 4.495151769093571, "learning_rate": 7.524592513941447e-06, "loss": 0.4646, "step": 12277 }, { "epoch": 0.35, "grad_norm": 4.739982884372118, "learning_rate": 7.52419220206246e-06, "loss": 0.4563, "step": 12278 }, { "epoch": 0.35, "grad_norm": 4.959269614202656, "learning_rate": 7.52379186846816e-06, "loss": 0.6483, "step": 12279 }, { "epoch": 0.35, "grad_norm": 7.995179929635028, "learning_rate": 7.52339151316199e-06, "loss": 0.8551, "step": 12280 }, { "epoch": 0.35, "grad_norm": 4.38544212509099, "learning_rate": 7.522991136147393e-06, "loss": 0.5359, "step": 12281 }, { "epoch": 0.35, "grad_norm": 5.671215113565531, "learning_rate": 7.522590737427813e-06, "loss": 0.3763, "step": 12282 }, { "epoch": 0.35, "grad_norm": 8.690609931041132, "learning_rate": 7.522190317006697e-06, "loss": 0.6193, "step": 12283 }, { "epoch": 0.35, "grad_norm": 3.9256571128538718, "learning_rate": 7.5217898748874864e-06, "loss": 0.4295, "step": 12284 }, { "epoch": 0.35, "grad_norm": 5.929519148335289, "learning_rate": 7.5213894110736295e-06, "loss": 0.3162, "step": 12285 }, { "epoch": 0.35, "grad_norm": 8.714832095587653, "learning_rate": 7.520988925568568e-06, "loss": 0.4201, "step": 12286 }, { "epoch": 0.35, "grad_norm": 5.516933080726829, "learning_rate": 7.520588418375753e-06, "loss": 0.5115, "step": 12287 }, { "epoch": 0.35, "grad_norm": 3.696460214983575, "learning_rate": 7.520187889498622e-06, "loss": 0.4607, "step": 12288 }, { "epoch": 0.35, "grad_norm": 3.7063584033622394, "learning_rate": 7.5197873389406276e-06, "loss": 0.2736, "step": 12289 }, { "epoch": 0.35, "grad_norm": 4.424641151748561, "learning_rate": 7.5193867667052125e-06, "loss": 0.3337, "step": 12290 }, { "epoch": 0.35, "grad_norm": 3.0285734570887164, "learning_rate": 7.518986172795823e-06, "loss": 0.2003, "step": 12291 }, { "epoch": 0.35, "grad_norm": 8.891887662286926, "learning_rate": 7.518585557215905e-06, "loss": 1.1333, "step": 12292 }, { "epoch": 0.35, "grad_norm": 5.586821458139409, "learning_rate": 7.518184919968905e-06, "loss": 0.4512, "step": 12293 }, { "epoch": 0.35, "grad_norm": 6.552854879666923, "learning_rate": 7.5177842610582714e-06, "loss": 0.5961, "step": 12294 }, { "epoch": 0.35, "grad_norm": 6.244151793449945, "learning_rate": 7.517383580487449e-06, "loss": 0.3997, "step": 12295 }, { "epoch": 0.35, "grad_norm": 8.159875641230489, "learning_rate": 7.516982878259888e-06, "loss": 0.7138, "step": 12296 }, { "epoch": 0.35, "grad_norm": 5.224376591995824, "learning_rate": 7.516582154379031e-06, "loss": 0.4469, "step": 12297 }, { "epoch": 0.35, "grad_norm": 9.006361408590239, "learning_rate": 7.516181408848327e-06, "loss": 0.5541, "step": 12298 }, { "epoch": 0.35, "grad_norm": 5.23307472134689, "learning_rate": 7.515780641671225e-06, "loss": 0.1866, "step": 12299 }, { "epoch": 0.35, "grad_norm": 11.74729234867889, "learning_rate": 7.515379852851173e-06, "loss": 0.7138, "step": 12300 }, { "epoch": 0.35, "grad_norm": 6.569391247258615, "learning_rate": 7.514979042391617e-06, "loss": 0.4927, "step": 12301 }, { "epoch": 0.35, "grad_norm": 4.956267602415443, "learning_rate": 7.514578210296005e-06, "loss": 0.4907, "step": 12302 }, { "epoch": 0.35, "grad_norm": 14.110145184522231, "learning_rate": 7.514177356567787e-06, "loss": 0.4264, "step": 12303 }, { "epoch": 0.35, "grad_norm": 7.349560650520293, "learning_rate": 7.513776481210411e-06, "loss": 0.5247, "step": 12304 }, { "epoch": 0.35, "grad_norm": 8.10614684428254, "learning_rate": 7.5133755842273245e-06, "loss": 0.6469, "step": 12305 }, { "epoch": 0.35, "grad_norm": 9.541951959556616, "learning_rate": 7.512974665621977e-06, "loss": 0.6244, "step": 12306 }, { "epoch": 0.35, "grad_norm": 5.702375681524194, "learning_rate": 7.51257372539782e-06, "loss": 0.5246, "step": 12307 }, { "epoch": 0.35, "grad_norm": 4.788013436696153, "learning_rate": 7.5121727635583e-06, "loss": 0.2137, "step": 12308 }, { "epoch": 0.35, "grad_norm": 3.8470889607603223, "learning_rate": 7.511771780106867e-06, "loss": 0.2119, "step": 12309 }, { "epoch": 0.35, "grad_norm": 4.911406316174979, "learning_rate": 7.51137077504697e-06, "loss": 0.333, "step": 12310 }, { "epoch": 0.35, "grad_norm": 5.439389963656522, "learning_rate": 7.51096974838206e-06, "loss": 0.5905, "step": 12311 }, { "epoch": 0.35, "grad_norm": 4.960835132894387, "learning_rate": 7.510568700115586e-06, "loss": 0.6374, "step": 12312 }, { "epoch": 0.35, "grad_norm": 5.801414172646863, "learning_rate": 7.510167630250997e-06, "loss": 0.3252, "step": 12313 }, { "epoch": 0.35, "grad_norm": 6.283280419279488, "learning_rate": 7.509766538791747e-06, "loss": 0.4965, "step": 12314 }, { "epoch": 0.35, "grad_norm": 13.13101436325495, "learning_rate": 7.509365425741284e-06, "loss": 0.7992, "step": 12315 }, { "epoch": 0.35, "grad_norm": 8.539204532597287, "learning_rate": 7.508964291103058e-06, "loss": 0.6793, "step": 12316 }, { "epoch": 0.35, "grad_norm": 6.792751947138239, "learning_rate": 7.5085631348805245e-06, "loss": 0.5903, "step": 12317 }, { "epoch": 0.35, "grad_norm": 5.909528881794795, "learning_rate": 7.508161957077128e-06, "loss": 0.4967, "step": 12318 }, { "epoch": 0.35, "grad_norm": 8.174994491435305, "learning_rate": 7.507760757696323e-06, "loss": 0.9963, "step": 12319 }, { "epoch": 0.35, "grad_norm": 5.032237838534087, "learning_rate": 7.507359536741563e-06, "loss": 0.6181, "step": 12320 }, { "epoch": 0.35, "grad_norm": 5.399445369282656, "learning_rate": 7.506958294216296e-06, "loss": 0.593, "step": 12321 }, { "epoch": 0.35, "grad_norm": 9.07163772298987, "learning_rate": 7.506557030123976e-06, "loss": 0.6375, "step": 12322 }, { "epoch": 0.35, "grad_norm": 5.045855628840327, "learning_rate": 7.506155744468053e-06, "loss": 0.3539, "step": 12323 }, { "epoch": 0.35, "grad_norm": 6.945637441098967, "learning_rate": 7.505754437251984e-06, "loss": 0.6134, "step": 12324 }, { "epoch": 0.35, "grad_norm": 4.845875919556172, "learning_rate": 7.505353108479216e-06, "loss": 0.6702, "step": 12325 }, { "epoch": 0.35, "grad_norm": 4.792214630418317, "learning_rate": 7.5049517581532035e-06, "loss": 0.4265, "step": 12326 }, { "epoch": 0.35, "grad_norm": 7.5866583531752605, "learning_rate": 7.5045503862774e-06, "loss": 0.5588, "step": 12327 }, { "epoch": 0.35, "grad_norm": 3.3674737582922405, "learning_rate": 7.504148992855258e-06, "loss": 0.2887, "step": 12328 }, { "epoch": 0.35, "grad_norm": 3.189878062844511, "learning_rate": 7.50374757789023e-06, "loss": 0.2664, "step": 12329 }, { "epoch": 0.35, "grad_norm": 12.828735634398852, "learning_rate": 7.503346141385771e-06, "loss": 0.8456, "step": 12330 }, { "epoch": 0.35, "grad_norm": 8.60410342682019, "learning_rate": 7.502944683345332e-06, "loss": 0.7948, "step": 12331 }, { "epoch": 0.35, "grad_norm": 10.838187690220504, "learning_rate": 7.5025432037723686e-06, "loss": 0.7463, "step": 12332 }, { "epoch": 0.35, "grad_norm": 1.980765198259401, "learning_rate": 7.502141702670334e-06, "loss": 0.2138, "step": 12333 }, { "epoch": 0.35, "grad_norm": 8.339032214522337, "learning_rate": 7.501740180042684e-06, "loss": 0.7589, "step": 12334 }, { "epoch": 0.35, "grad_norm": 5.01782499176027, "learning_rate": 7.5013386358928694e-06, "loss": 0.5574, "step": 12335 }, { "epoch": 0.35, "grad_norm": 8.274066386036969, "learning_rate": 7.5009370702243465e-06, "loss": 0.5249, "step": 12336 }, { "epoch": 0.35, "grad_norm": 1.8753019089821652, "learning_rate": 7.50053548304057e-06, "loss": 0.1845, "step": 12337 }, { "epoch": 0.35, "grad_norm": 6.355109093822928, "learning_rate": 7.500133874344995e-06, "loss": 0.5617, "step": 12338 }, { "epoch": 0.35, "grad_norm": 5.4783073399299145, "learning_rate": 7.499732244141076e-06, "loss": 0.3548, "step": 12339 }, { "epoch": 0.35, "grad_norm": 7.41597508481438, "learning_rate": 7.499330592432269e-06, "loss": 0.5779, "step": 12340 }, { "epoch": 0.35, "grad_norm": 5.875901863190115, "learning_rate": 7.4989289192220275e-06, "loss": 0.4587, "step": 12341 }, { "epoch": 0.35, "grad_norm": 7.684787302641553, "learning_rate": 7.4985272245138084e-06, "loss": 0.5074, "step": 12342 }, { "epoch": 0.35, "grad_norm": 14.760646404595368, "learning_rate": 7.498125508311067e-06, "loss": 0.7369, "step": 12343 }, { "epoch": 0.35, "grad_norm": 8.054853968560447, "learning_rate": 7.4977237706172616e-06, "loss": 0.7063, "step": 12344 }, { "epoch": 0.35, "grad_norm": 7.901929677617036, "learning_rate": 7.497322011435844e-06, "loss": 0.3467, "step": 12345 }, { "epoch": 0.35, "grad_norm": 5.990185936731022, "learning_rate": 7.4969202307702715e-06, "loss": 0.5462, "step": 12346 }, { "epoch": 0.35, "grad_norm": 5.017060451746326, "learning_rate": 7.496518428624005e-06, "loss": 0.4606, "step": 12347 }, { "epoch": 0.35, "grad_norm": 5.443653351969268, "learning_rate": 7.496116605000496e-06, "loss": 0.7476, "step": 12348 }, { "epoch": 0.35, "grad_norm": 8.429076277782013, "learning_rate": 7.495714759903202e-06, "loss": 0.7838, "step": 12349 }, { "epoch": 0.35, "grad_norm": 7.556878577908179, "learning_rate": 7.495312893335583e-06, "loss": 0.3873, "step": 12350 }, { "epoch": 0.35, "grad_norm": 4.758643517792129, "learning_rate": 7.494911005301093e-06, "loss": 0.457, "step": 12351 }, { "epoch": 0.35, "grad_norm": 1.830353869790796, "learning_rate": 7.494509095803193e-06, "loss": 0.1561, "step": 12352 }, { "epoch": 0.35, "grad_norm": 18.714336403514977, "learning_rate": 7.4941071648453365e-06, "loss": 0.4297, "step": 12353 }, { "epoch": 0.35, "grad_norm": 10.347296668573543, "learning_rate": 7.493705212430983e-06, "loss": 0.7791, "step": 12354 }, { "epoch": 0.35, "grad_norm": 5.096676880747455, "learning_rate": 7.493303238563592e-06, "loss": 0.4117, "step": 12355 }, { "epoch": 0.35, "grad_norm": 7.3891817582997605, "learning_rate": 7.492901243246619e-06, "loss": 0.5893, "step": 12356 }, { "epoch": 0.35, "grad_norm": 9.168494880314944, "learning_rate": 7.492499226483524e-06, "loss": 0.609, "step": 12357 }, { "epoch": 0.35, "grad_norm": 4.521563627730779, "learning_rate": 7.492097188277765e-06, "loss": 0.5069, "step": 12358 }, { "epoch": 0.35, "grad_norm": 5.861403945590076, "learning_rate": 7.491695128632801e-06, "loss": 0.4266, "step": 12359 }, { "epoch": 0.35, "grad_norm": 4.879928151993195, "learning_rate": 7.491293047552091e-06, "loss": 0.4962, "step": 12360 }, { "epoch": 0.35, "grad_norm": 5.361318519458608, "learning_rate": 7.490890945039093e-06, "loss": 0.254, "step": 12361 }, { "epoch": 0.35, "grad_norm": 8.372559262940467, "learning_rate": 7.490488821097266e-06, "loss": 0.4801, "step": 12362 }, { "epoch": 0.35, "grad_norm": 6.746951721801181, "learning_rate": 7.49008667573007e-06, "loss": 0.2463, "step": 12363 }, { "epoch": 0.35, "grad_norm": 9.10836142500454, "learning_rate": 7.489684508940968e-06, "loss": 0.5345, "step": 12364 }, { "epoch": 0.35, "grad_norm": 6.806495610228911, "learning_rate": 7.489282320733414e-06, "loss": 0.4574, "step": 12365 }, { "epoch": 0.35, "grad_norm": 11.23262126959877, "learning_rate": 7.4888801111108715e-06, "loss": 0.9875, "step": 12366 }, { "epoch": 0.35, "grad_norm": 6.3323250478258, "learning_rate": 7.4884778800768e-06, "loss": 0.8483, "step": 12367 }, { "epoch": 0.35, "grad_norm": 3.5802446109168327, "learning_rate": 7.48807562763466e-06, "loss": 0.2906, "step": 12368 }, { "epoch": 0.35, "grad_norm": 6.256712017853548, "learning_rate": 7.487673353787911e-06, "loss": 0.3902, "step": 12369 }, { "epoch": 0.35, "grad_norm": 7.661715989196472, "learning_rate": 7.487271058540014e-06, "loss": 0.5363, "step": 12370 }, { "epoch": 0.35, "grad_norm": 6.339370493638563, "learning_rate": 7.486868741894431e-06, "loss": 0.8834, "step": 12371 }, { "epoch": 0.35, "grad_norm": 11.080311203039432, "learning_rate": 7.486466403854623e-06, "loss": 0.424, "step": 12372 }, { "epoch": 0.35, "grad_norm": 8.641739895791453, "learning_rate": 7.48606404442405e-06, "loss": 0.8337, "step": 12373 }, { "epoch": 0.35, "grad_norm": 6.797451056261765, "learning_rate": 7.485661663606174e-06, "loss": 0.6315, "step": 12374 }, { "epoch": 0.35, "grad_norm": 5.169633085560217, "learning_rate": 7.485259261404457e-06, "loss": 0.8271, "step": 12375 }, { "epoch": 0.35, "grad_norm": 5.564681686147259, "learning_rate": 7.484856837822361e-06, "loss": 0.545, "step": 12376 }, { "epoch": 0.35, "grad_norm": 7.98054534979911, "learning_rate": 7.4844543928633475e-06, "loss": 0.745, "step": 12377 }, { "epoch": 0.35, "grad_norm": 7.67754519629086, "learning_rate": 7.484051926530878e-06, "loss": 0.6616, "step": 12378 }, { "epoch": 0.35, "grad_norm": 6.283184000495452, "learning_rate": 7.483649438828415e-06, "loss": 0.3997, "step": 12379 }, { "epoch": 0.35, "grad_norm": 7.197552807486006, "learning_rate": 7.483246929759423e-06, "loss": 0.5659, "step": 12380 }, { "epoch": 0.35, "grad_norm": 3.238229325525871, "learning_rate": 7.482844399327363e-06, "loss": 0.3822, "step": 12381 }, { "epoch": 0.35, "grad_norm": 6.542230966449563, "learning_rate": 7.482441847535699e-06, "loss": 0.2548, "step": 12382 }, { "epoch": 0.35, "grad_norm": 5.922380491236898, "learning_rate": 7.4820392743878924e-06, "loss": 1.0757, "step": 12383 }, { "epoch": 0.35, "grad_norm": 7.766079772282117, "learning_rate": 7.481636679887409e-06, "loss": 0.2247, "step": 12384 }, { "epoch": 0.35, "grad_norm": 6.2816267896914635, "learning_rate": 7.4812340640377094e-06, "loss": 0.4098, "step": 12385 }, { "epoch": 0.35, "grad_norm": 4.640541101589949, "learning_rate": 7.480831426842259e-06, "loss": 0.5493, "step": 12386 }, { "epoch": 0.35, "grad_norm": 4.486085146949255, "learning_rate": 7.480428768304521e-06, "loss": 0.9758, "step": 12387 }, { "epoch": 0.35, "grad_norm": 5.627091675853042, "learning_rate": 7.480026088427959e-06, "loss": 0.5921, "step": 12388 }, { "epoch": 0.35, "grad_norm": 6.52869847300159, "learning_rate": 7.47962338721604e-06, "loss": 0.4961, "step": 12389 }, { "epoch": 0.35, "grad_norm": 2.1754905328397762, "learning_rate": 7.479220664672224e-06, "loss": 0.1934, "step": 12390 }, { "epoch": 0.35, "grad_norm": 3.890356889548549, "learning_rate": 7.47881792079998e-06, "loss": 0.3987, "step": 12391 }, { "epoch": 0.35, "grad_norm": 8.801761374519938, "learning_rate": 7.478415155602769e-06, "loss": 0.4076, "step": 12392 }, { "epoch": 0.35, "grad_norm": 4.183422494725899, "learning_rate": 7.4780123690840575e-06, "loss": 0.3721, "step": 12393 }, { "epoch": 0.35, "grad_norm": 5.898824110687185, "learning_rate": 7.477609561247312e-06, "loss": 0.5897, "step": 12394 }, { "epoch": 0.35, "grad_norm": 4.601736742227796, "learning_rate": 7.477206732095996e-06, "loss": 0.5968, "step": 12395 }, { "epoch": 0.35, "grad_norm": 7.250825834923296, "learning_rate": 7.476803881633576e-06, "loss": 0.2419, "step": 12396 }, { "epoch": 0.36, "grad_norm": 5.560269069130043, "learning_rate": 7.476401009863516e-06, "loss": 0.3789, "step": 12397 }, { "epoch": 0.36, "grad_norm": 5.349770662258657, "learning_rate": 7.4759981167892835e-06, "loss": 0.4056, "step": 12398 }, { "epoch": 0.36, "grad_norm": 5.578052958389231, "learning_rate": 7.475595202414344e-06, "loss": 0.498, "step": 12399 }, { "epoch": 0.36, "grad_norm": 5.247460909330278, "learning_rate": 7.475192266742164e-06, "loss": 0.3147, "step": 12400 }, { "epoch": 0.36, "grad_norm": 9.334074618215995, "learning_rate": 7.47478930977621e-06, "loss": 1.072, "step": 12401 }, { "epoch": 0.36, "grad_norm": 7.3419052079497416, "learning_rate": 7.474386331519948e-06, "loss": 0.3325, "step": 12402 }, { "epoch": 0.36, "grad_norm": 3.741723368299335, "learning_rate": 7.473983331976844e-06, "loss": 0.4098, "step": 12403 }, { "epoch": 0.36, "grad_norm": 5.554688015064586, "learning_rate": 7.473580311150368e-06, "loss": 0.7981, "step": 12404 }, { "epoch": 0.36, "grad_norm": 9.594634521664132, "learning_rate": 7.473177269043983e-06, "loss": 0.5542, "step": 12405 }, { "epoch": 0.36, "grad_norm": 3.5990715763513843, "learning_rate": 7.472774205661159e-06, "loss": 0.3509, "step": 12406 }, { "epoch": 0.36, "grad_norm": 3.6553784660315354, "learning_rate": 7.472371121005364e-06, "loss": 0.5276, "step": 12407 }, { "epoch": 0.36, "grad_norm": 6.837627267437659, "learning_rate": 7.471968015080064e-06, "loss": 0.6192, "step": 12408 }, { "epoch": 0.36, "grad_norm": 4.067363835138239, "learning_rate": 7.471564887888728e-06, "loss": 0.2834, "step": 12409 }, { "epoch": 0.36, "grad_norm": 6.240302344296795, "learning_rate": 7.4711617394348235e-06, "loss": 0.6323, "step": 12410 }, { "epoch": 0.36, "grad_norm": 4.263991521717753, "learning_rate": 7.470758569721819e-06, "loss": 0.3153, "step": 12411 }, { "epoch": 0.36, "grad_norm": 3.6175159939594925, "learning_rate": 7.470355378753181e-06, "loss": 0.2861, "step": 12412 }, { "epoch": 0.36, "grad_norm": 4.545502288307534, "learning_rate": 7.469952166532381e-06, "loss": 0.3502, "step": 12413 }, { "epoch": 0.36, "grad_norm": 3.1632026728553173, "learning_rate": 7.469548933062888e-06, "loss": 0.2136, "step": 12414 }, { "epoch": 0.36, "grad_norm": 5.97065197264797, "learning_rate": 7.469145678348169e-06, "loss": 0.5584, "step": 12415 }, { "epoch": 0.36, "grad_norm": 5.764728714249723, "learning_rate": 7.468742402391692e-06, "loss": 0.41, "step": 12416 }, { "epoch": 0.36, "grad_norm": 4.744259376971312, "learning_rate": 7.468339105196929e-06, "loss": 0.3523, "step": 12417 }, { "epoch": 0.36, "grad_norm": 5.285378371799038, "learning_rate": 7.467935786767349e-06, "loss": 0.5829, "step": 12418 }, { "epoch": 0.36, "grad_norm": 3.609244934644987, "learning_rate": 7.467532447106422e-06, "loss": 0.2625, "step": 12419 }, { "epoch": 0.36, "grad_norm": 7.293316612671974, "learning_rate": 7.467129086217614e-06, "loss": 0.5465, "step": 12420 }, { "epoch": 0.36, "grad_norm": 6.7054646283656725, "learning_rate": 7.466725704104402e-06, "loss": 0.8001, "step": 12421 }, { "epoch": 0.36, "grad_norm": 35.1385631507649, "learning_rate": 7.46632230077025e-06, "loss": 0.6712, "step": 12422 }, { "epoch": 0.36, "grad_norm": 6.355074391383592, "learning_rate": 7.465918876218632e-06, "loss": 0.296, "step": 12423 }, { "epoch": 0.36, "grad_norm": 13.278666740960121, "learning_rate": 7.465515430453016e-06, "loss": 0.7684, "step": 12424 }, { "epoch": 0.36, "grad_norm": 5.679055620853785, "learning_rate": 7.465111963476877e-06, "loss": 0.6642, "step": 12425 }, { "epoch": 0.36, "grad_norm": 7.336098406147269, "learning_rate": 7.464708475293682e-06, "loss": 0.5414, "step": 12426 }, { "epoch": 0.36, "grad_norm": 8.754335310369337, "learning_rate": 7.464304965906904e-06, "loss": 0.7121, "step": 12427 }, { "epoch": 0.36, "grad_norm": 2.3286937044621516, "learning_rate": 7.4639014353200125e-06, "loss": 0.286, "step": 12428 }, { "epoch": 0.36, "grad_norm": 9.19024181325941, "learning_rate": 7.46349788353648e-06, "loss": 1.0149, "step": 12429 }, { "epoch": 0.36, "grad_norm": 4.518712502900401, "learning_rate": 7.46309431055978e-06, "loss": 0.4237, "step": 12430 }, { "epoch": 0.36, "grad_norm": 6.828103491009458, "learning_rate": 7.462690716393383e-06, "loss": 0.4081, "step": 12431 }, { "epoch": 0.36, "grad_norm": 4.164760237704273, "learning_rate": 7.4622871010407594e-06, "loss": 0.2466, "step": 12432 }, { "epoch": 0.36, "grad_norm": 3.7424237648557983, "learning_rate": 7.461883464505384e-06, "loss": 0.412, "step": 12433 }, { "epoch": 0.36, "grad_norm": 7.274013213780473, "learning_rate": 7.4614798067907276e-06, "loss": 0.3821, "step": 12434 }, { "epoch": 0.36, "grad_norm": 7.231308091308639, "learning_rate": 7.461076127900266e-06, "loss": 0.6953, "step": 12435 }, { "epoch": 0.36, "grad_norm": 8.792531108090728, "learning_rate": 7.460672427837466e-06, "loss": 0.4626, "step": 12436 }, { "epoch": 0.36, "grad_norm": 9.949199219689373, "learning_rate": 7.460268706605807e-06, "loss": 0.3867, "step": 12437 }, { "epoch": 0.36, "grad_norm": 6.0622835808891695, "learning_rate": 7.459864964208759e-06, "loss": 0.7977, "step": 12438 }, { "epoch": 0.36, "grad_norm": 10.552582955213968, "learning_rate": 7.459461200649794e-06, "loss": 0.8671, "step": 12439 }, { "epoch": 0.36, "grad_norm": 4.422354358800139, "learning_rate": 7.459057415932387e-06, "loss": 0.3188, "step": 12440 }, { "epoch": 0.36, "grad_norm": 5.48686605106662, "learning_rate": 7.458653610060013e-06, "loss": 0.6141, "step": 12441 }, { "epoch": 0.36, "grad_norm": 6.240678053292755, "learning_rate": 7.4582497830361445e-06, "loss": 0.5352, "step": 12442 }, { "epoch": 0.36, "grad_norm": 5.0458667799169, "learning_rate": 7.457845934864254e-06, "loss": 0.5602, "step": 12443 }, { "epoch": 0.36, "grad_norm": 5.426763436034967, "learning_rate": 7.45744206554782e-06, "loss": 0.3594, "step": 12444 }, { "epoch": 0.36, "grad_norm": 8.53934994115184, "learning_rate": 7.457038175090313e-06, "loss": 0.6521, "step": 12445 }, { "epoch": 0.36, "grad_norm": 6.403706822693743, "learning_rate": 7.4566342634952095e-06, "loss": 1.0431, "step": 12446 }, { "epoch": 0.36, "grad_norm": 8.486770769346293, "learning_rate": 7.456230330765983e-06, "loss": 0.8757, "step": 12447 }, { "epoch": 0.36, "grad_norm": 9.959929962065154, "learning_rate": 7.455826376906112e-06, "loss": 1.0265, "step": 12448 }, { "epoch": 0.36, "grad_norm": 4.301902964261528, "learning_rate": 7.455422401919067e-06, "loss": 0.2873, "step": 12449 }, { "epoch": 0.36, "grad_norm": 8.871360354866576, "learning_rate": 7.455018405808326e-06, "loss": 0.3118, "step": 12450 }, { "epoch": 0.36, "grad_norm": 6.641488950290978, "learning_rate": 7.454614388577364e-06, "loss": 0.3004, "step": 12451 }, { "epoch": 0.36, "grad_norm": 4.328098118006545, "learning_rate": 7.454210350229656e-06, "loss": 0.5068, "step": 12452 }, { "epoch": 0.36, "grad_norm": 5.786069067752771, "learning_rate": 7.4538062907686784e-06, "loss": 0.5823, "step": 12453 }, { "epoch": 0.36, "grad_norm": 5.492490627122815, "learning_rate": 7.453402210197907e-06, "loss": 0.1809, "step": 12454 }, { "epoch": 0.36, "grad_norm": 4.985871236917314, "learning_rate": 7.45299810852082e-06, "loss": 0.3947, "step": 12455 }, { "epoch": 0.36, "grad_norm": 6.581580316961321, "learning_rate": 7.452593985740892e-06, "loss": 0.6727, "step": 12456 }, { "epoch": 0.36, "grad_norm": 9.926097926372947, "learning_rate": 7.452189841861599e-06, "loss": 0.5116, "step": 12457 }, { "epoch": 0.36, "grad_norm": 5.013995324740153, "learning_rate": 7.4517856768864205e-06, "loss": 0.5744, "step": 12458 }, { "epoch": 0.36, "grad_norm": 8.283132317583764, "learning_rate": 7.45138149081883e-06, "loss": 1.0558, "step": 12459 }, { "epoch": 0.36, "grad_norm": 11.911301549967485, "learning_rate": 7.450977283662306e-06, "loss": 0.7704, "step": 12460 }, { "epoch": 0.36, "grad_norm": 5.83230827271494, "learning_rate": 7.450573055420327e-06, "loss": 0.6246, "step": 12461 }, { "epoch": 0.36, "grad_norm": 4.72194911815304, "learning_rate": 7.4501688060963695e-06, "loss": 0.371, "step": 12462 }, { "epoch": 0.36, "grad_norm": 2.4281708377944837, "learning_rate": 7.449764535693912e-06, "loss": 0.1864, "step": 12463 }, { "epoch": 0.36, "grad_norm": 8.271263521346844, "learning_rate": 7.449360244216432e-06, "loss": 0.7645, "step": 12464 }, { "epoch": 0.36, "grad_norm": 3.927747729263101, "learning_rate": 7.448955931667407e-06, "loss": 0.382, "step": 12465 }, { "epoch": 0.36, "grad_norm": 5.861742238278965, "learning_rate": 7.4485515980503156e-06, "loss": 0.9234, "step": 12466 }, { "epoch": 0.36, "grad_norm": 6.766675871888212, "learning_rate": 7.448147243368635e-06, "loss": 0.6467, "step": 12467 }, { "epoch": 0.36, "grad_norm": 5.241545409036719, "learning_rate": 7.447742867625847e-06, "loss": 0.5905, "step": 12468 }, { "epoch": 0.36, "grad_norm": 4.31661711993313, "learning_rate": 7.447338470825427e-06, "loss": 0.3096, "step": 12469 }, { "epoch": 0.36, "grad_norm": 5.695961160441648, "learning_rate": 7.4469340529708565e-06, "loss": 0.4587, "step": 12470 }, { "epoch": 0.36, "grad_norm": 3.8277063880455504, "learning_rate": 7.446529614065614e-06, "loss": 0.2383, "step": 12471 }, { "epoch": 0.36, "grad_norm": 4.2285981978531995, "learning_rate": 7.446125154113177e-06, "loss": 0.556, "step": 12472 }, { "epoch": 0.36, "grad_norm": 4.893045137391718, "learning_rate": 7.445720673117028e-06, "loss": 0.2345, "step": 12473 }, { "epoch": 0.36, "grad_norm": 7.681784637443596, "learning_rate": 7.445316171080644e-06, "loss": 0.5905, "step": 12474 }, { "epoch": 0.36, "grad_norm": 3.8789473394117278, "learning_rate": 7.444911648007508e-06, "loss": 0.1997, "step": 12475 }, { "epoch": 0.36, "grad_norm": 3.303119621164206, "learning_rate": 7.444507103901096e-06, "loss": 0.3496, "step": 12476 }, { "epoch": 0.36, "grad_norm": 4.610826399578647, "learning_rate": 7.44410253876489e-06, "loss": 0.6111, "step": 12477 }, { "epoch": 0.36, "grad_norm": 4.598153008887335, "learning_rate": 7.443697952602372e-06, "loss": 0.6359, "step": 12478 }, { "epoch": 0.36, "grad_norm": 9.89917721652709, "learning_rate": 7.443293345417022e-06, "loss": 0.1856, "step": 12479 }, { "epoch": 0.36, "grad_norm": 5.756842149726292, "learning_rate": 7.442888717212319e-06, "loss": 0.7135, "step": 12480 }, { "epoch": 0.36, "grad_norm": 7.550570323699711, "learning_rate": 7.442484067991745e-06, "loss": 0.6717, "step": 12481 }, { "epoch": 0.36, "grad_norm": 3.3410242504060714, "learning_rate": 7.442079397758782e-06, "loss": 0.3951, "step": 12482 }, { "epoch": 0.36, "grad_norm": 4.513095794190195, "learning_rate": 7.44167470651691e-06, "loss": 0.3597, "step": 12483 }, { "epoch": 0.36, "grad_norm": 7.1672851236504584, "learning_rate": 7.441269994269612e-06, "loss": 0.5606, "step": 12484 }, { "epoch": 0.36, "grad_norm": 8.462961759605047, "learning_rate": 7.440865261020368e-06, "loss": 0.6514, "step": 12485 }, { "epoch": 0.36, "grad_norm": 7.682538361979625, "learning_rate": 7.440460506772661e-06, "loss": 0.5018, "step": 12486 }, { "epoch": 0.36, "grad_norm": 3.9262824198863893, "learning_rate": 7.440055731529973e-06, "loss": 0.2414, "step": 12487 }, { "epoch": 0.36, "grad_norm": 7.2481910158759275, "learning_rate": 7.439650935295786e-06, "loss": 0.6453, "step": 12488 }, { "epoch": 0.36, "grad_norm": 6.436386086097088, "learning_rate": 7.439246118073582e-06, "loss": 0.3242, "step": 12489 }, { "epoch": 0.36, "grad_norm": 3.047074551038702, "learning_rate": 7.438841279866844e-06, "loss": 0.6856, "step": 12490 }, { "epoch": 0.36, "grad_norm": 1.976512566225979, "learning_rate": 7.438436420679055e-06, "loss": 0.1773, "step": 12491 }, { "epoch": 0.36, "grad_norm": 5.888811414087036, "learning_rate": 7.438031540513699e-06, "loss": 0.3278, "step": 12492 }, { "epoch": 0.36, "grad_norm": 7.86397470978156, "learning_rate": 7.437626639374256e-06, "loss": 0.6486, "step": 12493 }, { "epoch": 0.36, "grad_norm": 6.574196156436314, "learning_rate": 7.4372217172642095e-06, "loss": 0.6742, "step": 12494 }, { "epoch": 0.36, "grad_norm": 4.393450496444619, "learning_rate": 7.4368167741870475e-06, "loss": 0.3925, "step": 12495 }, { "epoch": 0.36, "grad_norm": 3.824858318771579, "learning_rate": 7.43641181014625e-06, "loss": 0.4521, "step": 12496 }, { "epoch": 0.36, "grad_norm": 6.556872689029736, "learning_rate": 7.436006825145301e-06, "loss": 0.5135, "step": 12497 }, { "epoch": 0.36, "grad_norm": 7.702416563439558, "learning_rate": 7.435601819187687e-06, "loss": 0.6358, "step": 12498 }, { "epoch": 0.36, "grad_norm": 4.009910198119381, "learning_rate": 7.43519679227689e-06, "loss": 0.3005, "step": 12499 }, { "epoch": 0.36, "grad_norm": 4.666060360054952, "learning_rate": 7.434791744416393e-06, "loss": 0.3394, "step": 12500 }, { "epoch": 0.36, "grad_norm": 8.384807253347047, "learning_rate": 7.434386675609684e-06, "loss": 0.3767, "step": 12501 }, { "epoch": 0.36, "grad_norm": 6.263841084929663, "learning_rate": 7.433981585860245e-06, "loss": 0.4027, "step": 12502 }, { "epoch": 0.36, "grad_norm": 4.10428635024147, "learning_rate": 7.433576475171562e-06, "loss": 0.7998, "step": 12503 }, { "epoch": 0.36, "grad_norm": 5.315213935113562, "learning_rate": 7.433171343547122e-06, "loss": 0.3214, "step": 12504 }, { "epoch": 0.36, "grad_norm": 5.030091821750088, "learning_rate": 7.432766190990408e-06, "loss": 0.3119, "step": 12505 }, { "epoch": 0.36, "grad_norm": 6.629756713333278, "learning_rate": 7.432361017504906e-06, "loss": 0.4471, "step": 12506 }, { "epoch": 0.36, "grad_norm": 6.074208741011147, "learning_rate": 7.4319558230941e-06, "loss": 0.684, "step": 12507 }, { "epoch": 0.36, "grad_norm": 4.006749121747915, "learning_rate": 7.431550607761479e-06, "loss": 0.5993, "step": 12508 }, { "epoch": 0.36, "grad_norm": 4.981425820936767, "learning_rate": 7.431145371510527e-06, "loss": 0.4863, "step": 12509 }, { "epoch": 0.36, "grad_norm": 3.175861071419847, "learning_rate": 7.43074011434473e-06, "loss": 0.3464, "step": 12510 }, { "epoch": 0.36, "grad_norm": 2.8225893039614505, "learning_rate": 7.430334836267576e-06, "loss": 0.1706, "step": 12511 }, { "epoch": 0.36, "grad_norm": 6.900981706082868, "learning_rate": 7.429929537282552e-06, "loss": 0.7204, "step": 12512 }, { "epoch": 0.36, "grad_norm": 4.5209110563895365, "learning_rate": 7.429524217393142e-06, "loss": 0.4977, "step": 12513 }, { "epoch": 0.36, "grad_norm": 6.769181371805381, "learning_rate": 7.429118876602833e-06, "loss": 0.3957, "step": 12514 }, { "epoch": 0.36, "grad_norm": 3.1228174408020672, "learning_rate": 7.428713514915116e-06, "loss": 0.2788, "step": 12515 }, { "epoch": 0.36, "grad_norm": 5.695512598025964, "learning_rate": 7.4283081323334734e-06, "loss": 0.4726, "step": 12516 }, { "epoch": 0.36, "grad_norm": 4.66115710885337, "learning_rate": 7.4279027288613955e-06, "loss": 0.2575, "step": 12517 }, { "epoch": 0.36, "grad_norm": 7.227010319211652, "learning_rate": 7.42749730450237e-06, "loss": 1.1571, "step": 12518 }, { "epoch": 0.36, "grad_norm": 7.164816647171471, "learning_rate": 7.427091859259883e-06, "loss": 0.7997, "step": 12519 }, { "epoch": 0.36, "grad_norm": 6.377049771054105, "learning_rate": 7.426686393137425e-06, "loss": 0.527, "step": 12520 }, { "epoch": 0.36, "grad_norm": 4.927544032596388, "learning_rate": 7.426280906138481e-06, "loss": 0.2859, "step": 12521 }, { "epoch": 0.36, "grad_norm": 8.315016974949671, "learning_rate": 7.425875398266543e-06, "loss": 0.7669, "step": 12522 }, { "epoch": 0.36, "grad_norm": 10.790482700358602, "learning_rate": 7.425469869525097e-06, "loss": 0.9848, "step": 12523 }, { "epoch": 0.36, "grad_norm": 4.400406933520184, "learning_rate": 7.4250643199176316e-06, "loss": 0.6513, "step": 12524 }, { "epoch": 0.36, "grad_norm": 7.462688843655595, "learning_rate": 7.424658749447637e-06, "loss": 0.4269, "step": 12525 }, { "epoch": 0.36, "grad_norm": 2.806860822880918, "learning_rate": 7.4242531581186015e-06, "loss": 0.2297, "step": 12526 }, { "epoch": 0.36, "grad_norm": 4.263528693010337, "learning_rate": 7.423847545934014e-06, "loss": 0.3242, "step": 12527 }, { "epoch": 0.36, "grad_norm": 3.358788328175687, "learning_rate": 7.423441912897366e-06, "loss": 0.2315, "step": 12528 }, { "epoch": 0.36, "grad_norm": 6.154912232021902, "learning_rate": 7.423036259012145e-06, "loss": 0.5812, "step": 12529 }, { "epoch": 0.36, "grad_norm": 8.168923656482802, "learning_rate": 7.42263058428184e-06, "loss": 0.5907, "step": 12530 }, { "epoch": 0.36, "grad_norm": 5.983294078127876, "learning_rate": 7.422224888709944e-06, "loss": 0.4208, "step": 12531 }, { "epoch": 0.36, "grad_norm": 3.0181591474537126, "learning_rate": 7.4218191722999466e-06, "loss": 0.2578, "step": 12532 }, { "epoch": 0.36, "grad_norm": 5.92231950123334, "learning_rate": 7.4214134350553345e-06, "loss": 0.4301, "step": 12533 }, { "epoch": 0.36, "grad_norm": 6.283088415045623, "learning_rate": 7.421007676979601e-06, "loss": 0.6941, "step": 12534 }, { "epoch": 0.36, "grad_norm": 5.906161716346328, "learning_rate": 7.4206018980762385e-06, "loss": 0.8352, "step": 12535 }, { "epoch": 0.36, "grad_norm": 2.403533946202589, "learning_rate": 7.420196098348734e-06, "loss": 0.0808, "step": 12536 }, { "epoch": 0.36, "grad_norm": 8.866912380437372, "learning_rate": 7.4197902778005815e-06, "loss": 0.6292, "step": 12537 }, { "epoch": 0.36, "grad_norm": 8.129731722507444, "learning_rate": 7.419384436435271e-06, "loss": 0.6235, "step": 12538 }, { "epoch": 0.36, "grad_norm": 2.890187926369663, "learning_rate": 7.4189785742562936e-06, "loss": 0.2815, "step": 12539 }, { "epoch": 0.36, "grad_norm": 6.598247928411942, "learning_rate": 7.418572691267142e-06, "loss": 0.4428, "step": 12540 }, { "epoch": 0.36, "grad_norm": 3.5543595047423993, "learning_rate": 7.4181667874713055e-06, "loss": 0.6583, "step": 12541 }, { "epoch": 0.36, "grad_norm": 7.497859331445088, "learning_rate": 7.4177608628722805e-06, "loss": 0.7429, "step": 12542 }, { "epoch": 0.36, "grad_norm": 6.616124019263019, "learning_rate": 7.417354917473555e-06, "loss": 0.2896, "step": 12543 }, { "epoch": 0.36, "grad_norm": 4.014021824340716, "learning_rate": 7.416948951278623e-06, "loss": 0.3609, "step": 12544 }, { "epoch": 0.36, "grad_norm": 4.596403432551424, "learning_rate": 7.416542964290978e-06, "loss": 0.2964, "step": 12545 }, { "epoch": 0.36, "grad_norm": 5.302872517228301, "learning_rate": 7.41613695651411e-06, "loss": 0.6969, "step": 12546 }, { "epoch": 0.36, "grad_norm": 4.9780411133101, "learning_rate": 7.415730927951512e-06, "loss": 0.4134, "step": 12547 }, { "epoch": 0.36, "grad_norm": 5.227344859232433, "learning_rate": 7.41532487860668e-06, "loss": 0.5624, "step": 12548 }, { "epoch": 0.36, "grad_norm": 9.456253618540957, "learning_rate": 7.414918808483107e-06, "loss": 0.8891, "step": 12549 }, { "epoch": 0.36, "grad_norm": 7.186504129165358, "learning_rate": 7.414512717584283e-06, "loss": 0.6391, "step": 12550 }, { "epoch": 0.36, "grad_norm": 6.556513389918749, "learning_rate": 7.414106605913701e-06, "loss": 0.6406, "step": 12551 }, { "epoch": 0.36, "grad_norm": 3.42584795653101, "learning_rate": 7.41370047347486e-06, "loss": 0.1985, "step": 12552 }, { "epoch": 0.36, "grad_norm": 4.941201862435587, "learning_rate": 7.413294320271252e-06, "loss": 0.5843, "step": 12553 }, { "epoch": 0.36, "grad_norm": 4.736686771617711, "learning_rate": 7.412888146306367e-06, "loss": 0.3695, "step": 12554 }, { "epoch": 0.36, "grad_norm": 4.5398179572952655, "learning_rate": 7.412481951583704e-06, "loss": 0.2715, "step": 12555 }, { "epoch": 0.36, "grad_norm": 4.240942995521673, "learning_rate": 7.412075736106757e-06, "loss": 0.3895, "step": 12556 }, { "epoch": 0.36, "grad_norm": 4.392013466018781, "learning_rate": 7.411669499879019e-06, "loss": 0.5409, "step": 12557 }, { "epoch": 0.36, "grad_norm": 6.022769123998527, "learning_rate": 7.411263242903983e-06, "loss": 0.2843, "step": 12558 }, { "epoch": 0.36, "grad_norm": 4.452056542427355, "learning_rate": 7.410856965185148e-06, "loss": 0.4332, "step": 12559 }, { "epoch": 0.36, "grad_norm": 4.922661830437008, "learning_rate": 7.410450666726007e-06, "loss": 0.5876, "step": 12560 }, { "epoch": 0.36, "grad_norm": 2.3388273805165443, "learning_rate": 7.410044347530056e-06, "loss": 0.2774, "step": 12561 }, { "epoch": 0.36, "grad_norm": 8.35066411009998, "learning_rate": 7.40963800760079e-06, "loss": 0.9396, "step": 12562 }, { "epoch": 0.36, "grad_norm": 4.7828566401182355, "learning_rate": 7.409231646941705e-06, "loss": 0.6814, "step": 12563 }, { "epoch": 0.36, "grad_norm": 3.077203680503208, "learning_rate": 7.408825265556296e-06, "loss": 0.508, "step": 12564 }, { "epoch": 0.36, "grad_norm": 6.8361996322508976, "learning_rate": 7.4084188634480605e-06, "loss": 0.7739, "step": 12565 }, { "epoch": 0.36, "grad_norm": 2.186247698770122, "learning_rate": 7.408012440620494e-06, "loss": 0.2205, "step": 12566 }, { "epoch": 0.36, "grad_norm": 5.689322441635244, "learning_rate": 7.407605997077093e-06, "loss": 0.5579, "step": 12567 }, { "epoch": 0.36, "grad_norm": 4.4618920475764625, "learning_rate": 7.407199532821353e-06, "loss": 0.392, "step": 12568 }, { "epoch": 0.36, "grad_norm": 8.125016608588043, "learning_rate": 7.406793047856773e-06, "loss": 0.5364, "step": 12569 }, { "epoch": 0.36, "grad_norm": 6.176504267739651, "learning_rate": 7.406386542186848e-06, "loss": 0.4143, "step": 12570 }, { "epoch": 0.36, "grad_norm": 5.72930272258695, "learning_rate": 7.405980015815078e-06, "loss": 0.5065, "step": 12571 }, { "epoch": 0.36, "grad_norm": 9.434970036607154, "learning_rate": 7.405573468744957e-06, "loss": 0.4202, "step": 12572 }, { "epoch": 0.36, "grad_norm": 5.498907804416563, "learning_rate": 7.405166900979982e-06, "loss": 0.5228, "step": 12573 }, { "epoch": 0.36, "grad_norm": 7.133850991400002, "learning_rate": 7.404760312523653e-06, "loss": 0.9081, "step": 12574 }, { "epoch": 0.36, "grad_norm": 10.117270450914768, "learning_rate": 7.4043537033794675e-06, "loss": 0.6237, "step": 12575 }, { "epoch": 0.36, "grad_norm": 3.631065769085285, "learning_rate": 7.403947073550923e-06, "loss": 0.3352, "step": 12576 }, { "epoch": 0.36, "grad_norm": 7.460100312721861, "learning_rate": 7.4035404230415175e-06, "loss": 0.3541, "step": 12577 }, { "epoch": 0.36, "grad_norm": 6.2403000137148394, "learning_rate": 7.403133751854751e-06, "loss": 0.2204, "step": 12578 }, { "epoch": 0.36, "grad_norm": 4.990436133320487, "learning_rate": 7.4027270599941205e-06, "loss": 0.4414, "step": 12579 }, { "epoch": 0.36, "grad_norm": 5.334947262389805, "learning_rate": 7.402320347463123e-06, "loss": 0.6684, "step": 12580 }, { "epoch": 0.36, "grad_norm": 6.38380049082559, "learning_rate": 7.401913614265262e-06, "loss": 0.4317, "step": 12581 }, { "epoch": 0.36, "grad_norm": 3.296026907218433, "learning_rate": 7.401506860404032e-06, "loss": 0.2817, "step": 12582 }, { "epoch": 0.36, "grad_norm": 8.401766946556496, "learning_rate": 7.401100085882935e-06, "loss": 1.0058, "step": 12583 }, { "epoch": 0.36, "grad_norm": 6.284477353697033, "learning_rate": 7.4006932907054685e-06, "loss": 0.708, "step": 12584 }, { "epoch": 0.36, "grad_norm": 4.666433501662968, "learning_rate": 7.400286474875133e-06, "loss": 0.5978, "step": 12585 }, { "epoch": 0.36, "grad_norm": 6.487262680328865, "learning_rate": 7.399879638395431e-06, "loss": 0.8098, "step": 12586 }, { "epoch": 0.36, "grad_norm": 8.561306668401873, "learning_rate": 7.399472781269858e-06, "loss": 0.567, "step": 12587 }, { "epoch": 0.36, "grad_norm": 7.393415831417167, "learning_rate": 7.399065903501917e-06, "loss": 0.9061, "step": 12588 }, { "epoch": 0.36, "grad_norm": 6.836191994425673, "learning_rate": 7.398659005095107e-06, "loss": 0.6396, "step": 12589 }, { "epoch": 0.36, "grad_norm": 5.542920489433802, "learning_rate": 7.3982520860529286e-06, "loss": 0.521, "step": 12590 }, { "epoch": 0.36, "grad_norm": 7.837627437839216, "learning_rate": 7.3978451463788836e-06, "loss": 0.8018, "step": 12591 }, { "epoch": 0.36, "grad_norm": 11.231293447980443, "learning_rate": 7.397438186076471e-06, "loss": 0.9758, "step": 12592 }, { "epoch": 0.36, "grad_norm": 6.243876585569771, "learning_rate": 7.397031205149193e-06, "loss": 0.5027, "step": 12593 }, { "epoch": 0.36, "grad_norm": 7.038738992689546, "learning_rate": 7.396624203600551e-06, "loss": 0.442, "step": 12594 }, { "epoch": 0.36, "grad_norm": 5.422761124636994, "learning_rate": 7.396217181434046e-06, "loss": 0.5579, "step": 12595 }, { "epoch": 0.36, "grad_norm": 7.418255696464297, "learning_rate": 7.39581013865318e-06, "loss": 0.6636, "step": 12596 }, { "epoch": 0.36, "grad_norm": 3.7636330112099743, "learning_rate": 7.3954030752614534e-06, "loss": 0.5648, "step": 12597 }, { "epoch": 0.36, "grad_norm": 7.576947055780197, "learning_rate": 7.39499599126237e-06, "loss": 0.4894, "step": 12598 }, { "epoch": 0.36, "grad_norm": 9.731906339038682, "learning_rate": 7.39458888665943e-06, "loss": 0.5254, "step": 12599 }, { "epoch": 0.36, "grad_norm": 2.242045753597452, "learning_rate": 7.394181761456137e-06, "loss": 0.2948, "step": 12600 }, { "epoch": 0.36, "grad_norm": 3.95962749540203, "learning_rate": 7.393774615655994e-06, "loss": 0.373, "step": 12601 }, { "epoch": 0.36, "grad_norm": 5.672225478932418, "learning_rate": 7.393367449262503e-06, "loss": 0.3744, "step": 12602 }, { "epoch": 0.36, "grad_norm": 3.991831604981668, "learning_rate": 7.392960262279165e-06, "loss": 0.509, "step": 12603 }, { "epoch": 0.36, "grad_norm": 6.892964629797129, "learning_rate": 7.3925530547094845e-06, "loss": 0.4371, "step": 12604 }, { "epoch": 0.36, "grad_norm": 5.895934082716084, "learning_rate": 7.392145826556966e-06, "loss": 0.6276, "step": 12605 }, { "epoch": 0.36, "grad_norm": 3.834060333945318, "learning_rate": 7.391738577825111e-06, "loss": 0.3448, "step": 12606 }, { "epoch": 0.36, "grad_norm": 3.4596945638294208, "learning_rate": 7.391331308517423e-06, "loss": 0.5242, "step": 12607 }, { "epoch": 0.36, "grad_norm": 6.042165336637291, "learning_rate": 7.390924018637406e-06, "loss": 0.4415, "step": 12608 }, { "epoch": 0.36, "grad_norm": 6.509695268463089, "learning_rate": 7.390516708188565e-06, "loss": 0.4637, "step": 12609 }, { "epoch": 0.36, "grad_norm": 7.84077354585396, "learning_rate": 7.390109377174403e-06, "loss": 0.7404, "step": 12610 }, { "epoch": 0.36, "grad_norm": 11.069357088436453, "learning_rate": 7.3897020255984235e-06, "loss": 0.5515, "step": 12611 }, { "epoch": 0.36, "grad_norm": 3.871700728145231, "learning_rate": 7.389294653464132e-06, "loss": 0.4262, "step": 12612 }, { "epoch": 0.36, "grad_norm": 5.398008426115534, "learning_rate": 7.388887260775033e-06, "loss": 0.271, "step": 12613 }, { "epoch": 0.36, "grad_norm": 8.39749955018597, "learning_rate": 7.388479847534631e-06, "loss": 0.4195, "step": 12614 }, { "epoch": 0.36, "grad_norm": 5.008965060570106, "learning_rate": 7.38807241374643e-06, "loss": 0.5666, "step": 12615 }, { "epoch": 0.36, "grad_norm": 12.368888810252544, "learning_rate": 7.387664959413938e-06, "loss": 0.7254, "step": 12616 }, { "epoch": 0.36, "grad_norm": 4.0839120282880526, "learning_rate": 7.387257484540655e-06, "loss": 0.516, "step": 12617 }, { "epoch": 0.36, "grad_norm": 6.578480473063111, "learning_rate": 7.3868499891300925e-06, "loss": 0.5233, "step": 12618 }, { "epoch": 0.36, "grad_norm": 3.956949878997005, "learning_rate": 7.386442473185754e-06, "loss": 0.5732, "step": 12619 }, { "epoch": 0.36, "grad_norm": 5.658480968244124, "learning_rate": 7.386034936711144e-06, "loss": 0.4235, "step": 12620 }, { "epoch": 0.36, "grad_norm": 6.936936982824214, "learning_rate": 7.385627379709767e-06, "loss": 0.5952, "step": 12621 }, { "epoch": 0.36, "grad_norm": 6.98357535271377, "learning_rate": 7.385219802185134e-06, "loss": 0.4033, "step": 12622 }, { "epoch": 0.36, "grad_norm": 4.179419535665879, "learning_rate": 7.384812204140749e-06, "loss": 0.6445, "step": 12623 }, { "epoch": 0.36, "grad_norm": 6.999370751027506, "learning_rate": 7.384404585580116e-06, "loss": 0.3016, "step": 12624 }, { "epoch": 0.36, "grad_norm": 11.966599154126946, "learning_rate": 7.383996946506744e-06, "loss": 0.6674, "step": 12625 }, { "epoch": 0.36, "grad_norm": 8.53210109615388, "learning_rate": 7.383589286924142e-06, "loss": 0.4329, "step": 12626 }, { "epoch": 0.36, "grad_norm": 5.585665609505146, "learning_rate": 7.383181606835814e-06, "loss": 0.2451, "step": 12627 }, { "epoch": 0.36, "grad_norm": 7.179007945314446, "learning_rate": 7.3827739062452676e-06, "loss": 0.5087, "step": 12628 }, { "epoch": 0.36, "grad_norm": 6.822180633059189, "learning_rate": 7.38236618515601e-06, "loss": 0.2627, "step": 12629 }, { "epoch": 0.36, "grad_norm": 6.13537831664176, "learning_rate": 7.3819584435715515e-06, "loss": 0.6603, "step": 12630 }, { "epoch": 0.36, "grad_norm": 7.471337544549158, "learning_rate": 7.3815506814953964e-06, "loss": 0.3367, "step": 12631 }, { "epoch": 0.36, "grad_norm": 8.800881593067, "learning_rate": 7.381142898931054e-06, "loss": 0.8924, "step": 12632 }, { "epoch": 0.36, "grad_norm": 9.200912753727698, "learning_rate": 7.380735095882033e-06, "loss": 0.71, "step": 12633 }, { "epoch": 0.36, "grad_norm": 3.7238529839541776, "learning_rate": 7.380327272351841e-06, "loss": 0.6793, "step": 12634 }, { "epoch": 0.36, "grad_norm": 11.672244309962215, "learning_rate": 7.379919428343984e-06, "loss": 0.6932, "step": 12635 }, { "epoch": 0.36, "grad_norm": 7.149242918573746, "learning_rate": 7.379511563861977e-06, "loss": 0.7601, "step": 12636 }, { "epoch": 0.36, "grad_norm": 6.896528764061691, "learning_rate": 7.379103678909324e-06, "loss": 0.5915, "step": 12637 }, { "epoch": 0.36, "grad_norm": 8.656131261568934, "learning_rate": 7.378695773489533e-06, "loss": 0.3229, "step": 12638 }, { "epoch": 0.36, "grad_norm": 6.696444154267119, "learning_rate": 7.378287847606117e-06, "loss": 0.8243, "step": 12639 }, { "epoch": 0.36, "grad_norm": 2.329796977959958, "learning_rate": 7.3778799012625836e-06, "loss": 0.2102, "step": 12640 }, { "epoch": 0.36, "grad_norm": 5.520371942013855, "learning_rate": 7.377471934462441e-06, "loss": 0.2981, "step": 12641 }, { "epoch": 0.36, "grad_norm": 5.015286894718714, "learning_rate": 7.3770639472092e-06, "loss": 0.667, "step": 12642 }, { "epoch": 0.36, "grad_norm": 5.18889502645906, "learning_rate": 7.376655939506372e-06, "loss": 0.4142, "step": 12643 }, { "epoch": 0.36, "grad_norm": 6.238223463159873, "learning_rate": 7.3762479113574645e-06, "loss": 0.5848, "step": 12644 }, { "epoch": 0.36, "grad_norm": 3.8787675200794127, "learning_rate": 7.3758398627659886e-06, "loss": 0.308, "step": 12645 }, { "epoch": 0.36, "grad_norm": 4.914907216062668, "learning_rate": 7.375431793735457e-06, "loss": 0.654, "step": 12646 }, { "epoch": 0.36, "grad_norm": 4.541047074403865, "learning_rate": 7.375023704269376e-06, "loss": 0.597, "step": 12647 }, { "epoch": 0.36, "grad_norm": 8.736545299932404, "learning_rate": 7.37461559437126e-06, "loss": 0.8982, "step": 12648 }, { "epoch": 0.36, "grad_norm": 8.862422573682194, "learning_rate": 7.374207464044616e-06, "loss": 0.7764, "step": 12649 }, { "epoch": 0.36, "grad_norm": 6.767638578599037, "learning_rate": 7.373799313292959e-06, "loss": 0.7552, "step": 12650 }, { "epoch": 0.36, "grad_norm": 2.9851423046082837, "learning_rate": 7.373391142119798e-06, "loss": 0.3623, "step": 12651 }, { "epoch": 0.36, "grad_norm": 7.144652818716518, "learning_rate": 7.3729829505286466e-06, "loss": 0.5224, "step": 12652 }, { "epoch": 0.36, "grad_norm": 7.0629277732728335, "learning_rate": 7.372574738523015e-06, "loss": 0.3923, "step": 12653 }, { "epoch": 0.36, "grad_norm": 8.324879599035649, "learning_rate": 7.372166506106413e-06, "loss": 0.4685, "step": 12654 }, { "epoch": 0.36, "grad_norm": 7.712301858245549, "learning_rate": 7.371758253282356e-06, "loss": 0.529, "step": 12655 }, { "epoch": 0.36, "grad_norm": 3.4379043688053486, "learning_rate": 7.3713499800543555e-06, "loss": 0.2783, "step": 12656 }, { "epoch": 0.36, "grad_norm": 5.565151568436865, "learning_rate": 7.370941686425923e-06, "loss": 0.5141, "step": 12657 }, { "epoch": 0.36, "grad_norm": 7.201793026188216, "learning_rate": 7.370533372400569e-06, "loss": 0.4369, "step": 12658 }, { "epoch": 0.36, "grad_norm": 4.667204394467978, "learning_rate": 7.370125037981809e-06, "loss": 0.3708, "step": 12659 }, { "epoch": 0.36, "grad_norm": 4.832336366806733, "learning_rate": 7.369716683173157e-06, "loss": 0.4831, "step": 12660 }, { "epoch": 0.36, "grad_norm": 5.921503704579779, "learning_rate": 7.369308307978123e-06, "loss": 0.5844, "step": 12661 }, { "epoch": 0.36, "grad_norm": 9.239882838839911, "learning_rate": 7.36889991240022e-06, "loss": 0.6108, "step": 12662 }, { "epoch": 0.36, "grad_norm": 10.30309037447738, "learning_rate": 7.3684914964429655e-06, "loss": 0.8289, "step": 12663 }, { "epoch": 0.36, "grad_norm": 4.7312540021549365, "learning_rate": 7.368083060109868e-06, "loss": 0.5319, "step": 12664 }, { "epoch": 0.36, "grad_norm": 7.71331088905509, "learning_rate": 7.367674603404445e-06, "loss": 0.6261, "step": 12665 }, { "epoch": 0.36, "grad_norm": 3.377350041994459, "learning_rate": 7.367266126330208e-06, "loss": 0.4149, "step": 12666 }, { "epoch": 0.36, "grad_norm": 5.061873974181219, "learning_rate": 7.366857628890673e-06, "loss": 0.4538, "step": 12667 }, { "epoch": 0.36, "grad_norm": 7.285594446334658, "learning_rate": 7.366449111089351e-06, "loss": 0.4075, "step": 12668 }, { "epoch": 0.36, "grad_norm": 5.092726575392616, "learning_rate": 7.36604057292976e-06, "loss": 0.0704, "step": 12669 }, { "epoch": 0.36, "grad_norm": 10.57032052975143, "learning_rate": 7.3656320144154135e-06, "loss": 0.6947, "step": 12670 }, { "epoch": 0.36, "grad_norm": 4.663513457987137, "learning_rate": 7.365223435549827e-06, "loss": 0.4099, "step": 12671 }, { "epoch": 0.36, "grad_norm": 7.840213844827792, "learning_rate": 7.364814836336512e-06, "loss": 0.6632, "step": 12672 }, { "epoch": 0.36, "grad_norm": 3.706573361576338, "learning_rate": 7.364406216778989e-06, "loss": 0.2963, "step": 12673 }, { "epoch": 0.36, "grad_norm": 2.829948553958923, "learning_rate": 7.363997576880768e-06, "loss": 0.5527, "step": 12674 }, { "epoch": 0.36, "grad_norm": 8.525659259352299, "learning_rate": 7.363588916645368e-06, "loss": 0.9423, "step": 12675 }, { "epoch": 0.36, "grad_norm": 11.848424628118833, "learning_rate": 7.363180236076304e-06, "loss": 0.8576, "step": 12676 }, { "epoch": 0.36, "grad_norm": 4.0664520700824935, "learning_rate": 7.36277153517709e-06, "loss": 0.3633, "step": 12677 }, { "epoch": 0.36, "grad_norm": 3.614892911525187, "learning_rate": 7.362362813951245e-06, "loss": 0.4885, "step": 12678 }, { "epoch": 0.36, "grad_norm": 3.3739499825239503, "learning_rate": 7.361954072402282e-06, "loss": 0.4941, "step": 12679 }, { "epoch": 0.36, "grad_norm": 7.146174505187636, "learning_rate": 7.36154531053372e-06, "loss": 0.5864, "step": 12680 }, { "epoch": 0.36, "grad_norm": 3.4605176683935825, "learning_rate": 7.361136528349074e-06, "loss": 0.2666, "step": 12681 }, { "epoch": 0.36, "grad_norm": 7.158337151286211, "learning_rate": 7.360727725851862e-06, "loss": 0.6696, "step": 12682 }, { "epoch": 0.36, "grad_norm": 7.500588838985814, "learning_rate": 7.360318903045599e-06, "loss": 0.6087, "step": 12683 }, { "epoch": 0.36, "grad_norm": 5.039374952461765, "learning_rate": 7.359910059933804e-06, "loss": 0.2435, "step": 12684 }, { "epoch": 0.36, "grad_norm": 5.644748269103363, "learning_rate": 7.359501196519991e-06, "loss": 0.8629, "step": 12685 }, { "epoch": 0.36, "grad_norm": 6.488552466780726, "learning_rate": 7.359092312807682e-06, "loss": 0.6666, "step": 12686 }, { "epoch": 0.36, "grad_norm": 3.7237903832752184, "learning_rate": 7.358683408800392e-06, "loss": 0.5435, "step": 12687 }, { "epoch": 0.36, "grad_norm": 4.958213647097786, "learning_rate": 7.35827448450164e-06, "loss": 0.4844, "step": 12688 }, { "epoch": 0.36, "grad_norm": 5.539531607193293, "learning_rate": 7.35786553991494e-06, "loss": 0.357, "step": 12689 }, { "epoch": 0.36, "grad_norm": 5.682207684430969, "learning_rate": 7.357456575043816e-06, "loss": 0.6533, "step": 12690 }, { "epoch": 0.36, "grad_norm": 8.970797102436114, "learning_rate": 7.3570475898917815e-06, "loss": 0.3874, "step": 12691 }, { "epoch": 0.36, "grad_norm": 9.536642981505045, "learning_rate": 7.3566385844623565e-06, "loss": 1.0167, "step": 12692 }, { "epoch": 0.36, "grad_norm": 3.644830901981462, "learning_rate": 7.3562295587590625e-06, "loss": 0.6266, "step": 12693 }, { "epoch": 0.36, "grad_norm": 3.793470198291362, "learning_rate": 7.355820512785413e-06, "loss": 0.5838, "step": 12694 }, { "epoch": 0.36, "grad_norm": 4.180070121578521, "learning_rate": 7.355411446544931e-06, "loss": 0.2697, "step": 12695 }, { "epoch": 0.36, "grad_norm": 4.935162630812274, "learning_rate": 7.3550023600411344e-06, "loss": 0.5407, "step": 12696 }, { "epoch": 0.36, "grad_norm": 4.987248348876215, "learning_rate": 7.354593253277542e-06, "loss": 0.4441, "step": 12697 }, { "epoch": 0.36, "grad_norm": 5.713934908895701, "learning_rate": 7.354184126257674e-06, "loss": 0.6258, "step": 12698 }, { "epoch": 0.36, "grad_norm": 5.446481806296124, "learning_rate": 7.353774978985049e-06, "loss": 0.4739, "step": 12699 }, { "epoch": 0.36, "grad_norm": 10.318825625460766, "learning_rate": 7.353365811463189e-06, "loss": 0.6649, "step": 12700 }, { "epoch": 0.36, "grad_norm": 8.99656913849568, "learning_rate": 7.352956623695612e-06, "loss": 0.9972, "step": 12701 }, { "epoch": 0.36, "grad_norm": 7.394033859621155, "learning_rate": 7.352547415685839e-06, "loss": 0.5152, "step": 12702 }, { "epoch": 0.36, "grad_norm": 5.867633868633856, "learning_rate": 7.352138187437391e-06, "loss": 0.3865, "step": 12703 }, { "epoch": 0.36, "grad_norm": 3.3849200583840973, "learning_rate": 7.351728938953786e-06, "loss": 0.7468, "step": 12704 }, { "epoch": 0.36, "grad_norm": 6.258878276615877, "learning_rate": 7.351319670238548e-06, "loss": 0.5163, "step": 12705 }, { "epoch": 0.36, "grad_norm": 4.151174615751572, "learning_rate": 7.350910381295196e-06, "loss": 0.5987, "step": 12706 }, { "epoch": 0.36, "grad_norm": 3.9029633695489814, "learning_rate": 7.3505010721272504e-06, "loss": 0.4276, "step": 12707 }, { "epoch": 0.36, "grad_norm": 3.8277724123140358, "learning_rate": 7.350091742738234e-06, "loss": 0.6576, "step": 12708 }, { "epoch": 0.36, "grad_norm": 18.356499733296, "learning_rate": 7.349682393131669e-06, "loss": 1.1722, "step": 12709 }, { "epoch": 0.36, "grad_norm": 3.8538206649671256, "learning_rate": 7.349273023311074e-06, "loss": 0.5581, "step": 12710 }, { "epoch": 0.36, "grad_norm": 4.074345909467719, "learning_rate": 7.348863633279974e-06, "loss": 0.3804, "step": 12711 }, { "epoch": 0.36, "grad_norm": 5.994946378097424, "learning_rate": 7.348454223041888e-06, "loss": 0.3833, "step": 12712 }, { "epoch": 0.36, "grad_norm": 5.227294961784027, "learning_rate": 7.348044792600339e-06, "loss": 0.5888, "step": 12713 }, { "epoch": 0.36, "grad_norm": 5.7099504570223, "learning_rate": 7.347635341958852e-06, "loss": 0.5521, "step": 12714 }, { "epoch": 0.36, "grad_norm": 3.7610724697134623, "learning_rate": 7.347225871120946e-06, "loss": 0.3803, "step": 12715 }, { "epoch": 0.36, "grad_norm": 5.761029073526965, "learning_rate": 7.346816380090144e-06, "loss": 0.5922, "step": 12716 }, { "epoch": 0.36, "grad_norm": 9.221943137402885, "learning_rate": 7.346406868869971e-06, "loss": 0.6903, "step": 12717 }, { "epoch": 0.36, "grad_norm": 6.1336170227674325, "learning_rate": 7.345997337463948e-06, "loss": 0.6234, "step": 12718 }, { "epoch": 0.36, "grad_norm": 8.128809930614784, "learning_rate": 7.345587785875599e-06, "loss": 0.45, "step": 12719 }, { "epoch": 0.36, "grad_norm": 4.023604483981753, "learning_rate": 7.345178214108446e-06, "loss": 0.4807, "step": 12720 }, { "epoch": 0.36, "grad_norm": 7.949996107928205, "learning_rate": 7.3447686221660145e-06, "loss": 0.0933, "step": 12721 }, { "epoch": 0.36, "grad_norm": 7.422242791950952, "learning_rate": 7.344359010051827e-06, "loss": 0.2975, "step": 12722 }, { "epoch": 0.36, "grad_norm": 5.605222860120282, "learning_rate": 7.343949377769408e-06, "loss": 0.6246, "step": 12723 }, { "epoch": 0.36, "grad_norm": 6.780951954844534, "learning_rate": 7.343539725322281e-06, "loss": 0.2919, "step": 12724 }, { "epoch": 0.36, "grad_norm": 4.8488337510682555, "learning_rate": 7.343130052713969e-06, "loss": 0.3848, "step": 12725 }, { "epoch": 0.36, "grad_norm": 4.9272913119379265, "learning_rate": 7.342720359947999e-06, "loss": 0.4329, "step": 12726 }, { "epoch": 0.36, "grad_norm": 4.542321432085786, "learning_rate": 7.342310647027895e-06, "loss": 0.3911, "step": 12727 }, { "epoch": 0.36, "grad_norm": 3.986432939017759, "learning_rate": 7.34190091395718e-06, "loss": 0.3987, "step": 12728 }, { "epoch": 0.36, "grad_norm": 3.756035588852502, "learning_rate": 7.341491160739378e-06, "loss": 0.2875, "step": 12729 }, { "epoch": 0.36, "grad_norm": 11.291197472615586, "learning_rate": 7.3410813873780175e-06, "loss": 0.5903, "step": 12730 }, { "epoch": 0.36, "grad_norm": 4.606167986901864, "learning_rate": 7.340671593876623e-06, "loss": 0.4692, "step": 12731 }, { "epoch": 0.36, "grad_norm": 5.483999668765458, "learning_rate": 7.3402617802387164e-06, "loss": 0.5059, "step": 12732 }, { "epoch": 0.36, "grad_norm": 5.996387427279825, "learning_rate": 7.339851946467829e-06, "loss": 0.5527, "step": 12733 }, { "epoch": 0.36, "grad_norm": 5.5160077964500775, "learning_rate": 7.339442092567482e-06, "loss": 0.1795, "step": 12734 }, { "epoch": 0.36, "grad_norm": 6.234118654722058, "learning_rate": 7.3390322185412025e-06, "loss": 0.5646, "step": 12735 }, { "epoch": 0.36, "grad_norm": 4.675375861087946, "learning_rate": 7.3386223243925155e-06, "loss": 0.5331, "step": 12736 }, { "epoch": 0.36, "grad_norm": 6.84828655782424, "learning_rate": 7.338212410124951e-06, "loss": 0.7049, "step": 12737 }, { "epoch": 0.36, "grad_norm": 5.756258916692631, "learning_rate": 7.337802475742032e-06, "loss": 0.5341, "step": 12738 }, { "epoch": 0.36, "grad_norm": 7.344566851698928, "learning_rate": 7.337392521247286e-06, "loss": 0.325, "step": 12739 }, { "epoch": 0.36, "grad_norm": 4.467682110629509, "learning_rate": 7.33698254664424e-06, "loss": 0.3279, "step": 12740 }, { "epoch": 0.36, "grad_norm": 5.397667836861342, "learning_rate": 7.336572551936421e-06, "loss": 0.3686, "step": 12741 }, { "epoch": 0.36, "grad_norm": 7.729625993294619, "learning_rate": 7.336162537127355e-06, "loss": 0.648, "step": 12742 }, { "epoch": 0.36, "grad_norm": 12.029719426283293, "learning_rate": 7.335752502220571e-06, "loss": 0.6016, "step": 12743 }, { "epoch": 0.36, "grad_norm": 5.814408183925246, "learning_rate": 7.335342447219597e-06, "loss": 0.5267, "step": 12744 }, { "epoch": 0.36, "grad_norm": 4.873198579861646, "learning_rate": 7.334932372127959e-06, "loss": 0.3566, "step": 12745 }, { "epoch": 0.37, "grad_norm": 4.399637983341408, "learning_rate": 7.334522276949184e-06, "loss": 0.3824, "step": 12746 }, { "epoch": 0.37, "grad_norm": 4.580903209555598, "learning_rate": 7.3341121616868024e-06, "loss": 0.5167, "step": 12747 }, { "epoch": 0.37, "grad_norm": 9.112011274463086, "learning_rate": 7.33370202634434e-06, "loss": 0.5245, "step": 12748 }, { "epoch": 0.37, "grad_norm": 6.4830324736497795, "learning_rate": 7.333291870925328e-06, "loss": 0.4217, "step": 12749 }, { "epoch": 0.37, "grad_norm": 9.691896880529749, "learning_rate": 7.332881695433293e-06, "loss": 0.6905, "step": 12750 }, { "epoch": 0.37, "grad_norm": 6.633662264861733, "learning_rate": 7.332471499871763e-06, "loss": 0.6948, "step": 12751 }, { "epoch": 0.37, "grad_norm": 7.534032094106829, "learning_rate": 7.3320612842442684e-06, "loss": 0.7576, "step": 12752 }, { "epoch": 0.37, "grad_norm": 6.4793133018884745, "learning_rate": 7.331651048554338e-06, "loss": 0.587, "step": 12753 }, { "epoch": 0.37, "grad_norm": 3.8318464602090594, "learning_rate": 7.3312407928055e-06, "loss": 0.4206, "step": 12754 }, { "epoch": 0.37, "grad_norm": 8.625863018258139, "learning_rate": 7.3308305170012865e-06, "loss": 0.7141, "step": 12755 }, { "epoch": 0.37, "grad_norm": 7.03205121031896, "learning_rate": 7.330420221145223e-06, "loss": 0.4998, "step": 12756 }, { "epoch": 0.37, "grad_norm": 19.896526890461473, "learning_rate": 7.330009905240841e-06, "loss": 0.3105, "step": 12757 }, { "epoch": 0.37, "grad_norm": 5.874379206385092, "learning_rate": 7.329599569291672e-06, "loss": 0.7203, "step": 12758 }, { "epoch": 0.37, "grad_norm": 9.271864541526478, "learning_rate": 7.3291892133012445e-06, "loss": 0.7047, "step": 12759 }, { "epoch": 0.37, "grad_norm": 4.92984027897852, "learning_rate": 7.328778837273089e-06, "loss": 0.3715, "step": 12760 }, { "epoch": 0.37, "grad_norm": 8.812590037217351, "learning_rate": 7.328368441210736e-06, "loss": 0.6029, "step": 12761 }, { "epoch": 0.37, "grad_norm": 4.834871337898706, "learning_rate": 7.327958025117715e-06, "loss": 0.4405, "step": 12762 }, { "epoch": 0.37, "grad_norm": 3.3195953312679327, "learning_rate": 7.3275475889975575e-06, "loss": 0.483, "step": 12763 }, { "epoch": 0.37, "grad_norm": 4.345283265137224, "learning_rate": 7.327137132853797e-06, "loss": 0.3079, "step": 12764 }, { "epoch": 0.37, "grad_norm": 4.58522876936293, "learning_rate": 7.32672665668996e-06, "loss": 0.1099, "step": 12765 }, { "epoch": 0.37, "grad_norm": 9.190243784895245, "learning_rate": 7.32631616050958e-06, "loss": 0.7647, "step": 12766 }, { "epoch": 0.37, "grad_norm": 5.181665839613076, "learning_rate": 7.32590564431619e-06, "loss": 0.4929, "step": 12767 }, { "epoch": 0.37, "grad_norm": 13.218623311763626, "learning_rate": 7.3254951081133195e-06, "loss": 0.4084, "step": 12768 }, { "epoch": 0.37, "grad_norm": 4.158452217808157, "learning_rate": 7.3250845519045e-06, "loss": 0.3688, "step": 12769 }, { "epoch": 0.37, "grad_norm": 4.71031865599058, "learning_rate": 7.3246739756932665e-06, "loss": 0.6911, "step": 12770 }, { "epoch": 0.37, "grad_norm": 4.930992256071826, "learning_rate": 7.324263379483147e-06, "loss": 0.2303, "step": 12771 }, { "epoch": 0.37, "grad_norm": 8.145182324190129, "learning_rate": 7.323852763277677e-06, "loss": 0.7141, "step": 12772 }, { "epoch": 0.37, "grad_norm": 3.850996645202602, "learning_rate": 7.323442127080386e-06, "loss": 0.4062, "step": 12773 }, { "epoch": 0.37, "grad_norm": 6.314729391296894, "learning_rate": 7.32303147089481e-06, "loss": 0.3485, "step": 12774 }, { "epoch": 0.37, "grad_norm": 5.334004042095745, "learning_rate": 7.32262079472448e-06, "loss": 0.8459, "step": 12775 }, { "epoch": 0.37, "grad_norm": 10.541677050277562, "learning_rate": 7.32221009857293e-06, "loss": 0.7563, "step": 12776 }, { "epoch": 0.37, "grad_norm": 5.836990179797681, "learning_rate": 7.321799382443692e-06, "loss": 0.2744, "step": 12777 }, { "epoch": 0.37, "grad_norm": 7.041301680215573, "learning_rate": 7.321388646340299e-06, "loss": 0.3229, "step": 12778 }, { "epoch": 0.37, "grad_norm": 6.7196540756540335, "learning_rate": 7.320977890266287e-06, "loss": 0.5534, "step": 12779 }, { "epoch": 0.37, "grad_norm": 10.49455228900128, "learning_rate": 7.320567114225185e-06, "loss": 0.4203, "step": 12780 }, { "epoch": 0.37, "grad_norm": 10.734561568725287, "learning_rate": 7.320156318220533e-06, "loss": 0.5861, "step": 12781 }, { "epoch": 0.37, "grad_norm": 4.826880112432538, "learning_rate": 7.3197455022558595e-06, "loss": 0.6229, "step": 12782 }, { "epoch": 0.37, "grad_norm": 5.380596618329351, "learning_rate": 7.319334666334702e-06, "loss": 0.6909, "step": 12783 }, { "epoch": 0.37, "grad_norm": 6.340904349259747, "learning_rate": 7.318923810460595e-06, "loss": 0.3996, "step": 12784 }, { "epoch": 0.37, "grad_norm": 6.315943619034936, "learning_rate": 7.31851293463707e-06, "loss": 0.3987, "step": 12785 }, { "epoch": 0.37, "grad_norm": 3.5683934669974895, "learning_rate": 7.318102038867665e-06, "loss": 0.337, "step": 12786 }, { "epoch": 0.37, "grad_norm": 4.81781418417729, "learning_rate": 7.317691123155914e-06, "loss": 0.1307, "step": 12787 }, { "epoch": 0.37, "grad_norm": 5.77648316861057, "learning_rate": 7.317280187505351e-06, "loss": 0.6162, "step": 12788 }, { "epoch": 0.37, "grad_norm": 5.175000816382007, "learning_rate": 7.316869231919511e-06, "loss": 0.791, "step": 12789 }, { "epoch": 0.37, "grad_norm": 4.392841363450973, "learning_rate": 7.31645825640193e-06, "loss": 0.481, "step": 12790 }, { "epoch": 0.37, "grad_norm": 4.4427170509291605, "learning_rate": 7.316047260956145e-06, "loss": 0.1529, "step": 12791 }, { "epoch": 0.37, "grad_norm": 6.044870522424243, "learning_rate": 7.315636245585691e-06, "loss": 0.5393, "step": 12792 }, { "epoch": 0.37, "grad_norm": 5.771565011069524, "learning_rate": 7.315225210294102e-06, "loss": 0.3153, "step": 12793 }, { "epoch": 0.37, "grad_norm": 5.2460107633456285, "learning_rate": 7.314814155084917e-06, "loss": 0.6112, "step": 12794 }, { "epoch": 0.37, "grad_norm": 8.457659113529232, "learning_rate": 7.314403079961671e-06, "loss": 0.4169, "step": 12795 }, { "epoch": 0.37, "grad_norm": 4.209290077808485, "learning_rate": 7.3139919849278995e-06, "loss": 0.4404, "step": 12796 }, { "epoch": 0.37, "grad_norm": 5.409061913379794, "learning_rate": 7.3135808699871395e-06, "loss": 0.7282, "step": 12797 }, { "epoch": 0.37, "grad_norm": 6.702569542836725, "learning_rate": 7.313169735142929e-06, "loss": 0.9443, "step": 12798 }, { "epoch": 0.37, "grad_norm": 4.54368921472748, "learning_rate": 7.312758580398802e-06, "loss": 0.3783, "step": 12799 }, { "epoch": 0.37, "grad_norm": 4.8904656332454515, "learning_rate": 7.3123474057583e-06, "loss": 0.4972, "step": 12800 }, { "epoch": 0.37, "grad_norm": 5.578829167902845, "learning_rate": 7.311936211224957e-06, "loss": 0.2889, "step": 12801 }, { "epoch": 0.37, "grad_norm": 4.541176439944798, "learning_rate": 7.311524996802312e-06, "loss": 0.3716, "step": 12802 }, { "epoch": 0.37, "grad_norm": 4.8742394465382946, "learning_rate": 7.311113762493902e-06, "loss": 0.4299, "step": 12803 }, { "epoch": 0.37, "grad_norm": 5.769242782458, "learning_rate": 7.310702508303264e-06, "loss": 0.2944, "step": 12804 }, { "epoch": 0.37, "grad_norm": 4.306726045691263, "learning_rate": 7.310291234233938e-06, "loss": 0.3599, "step": 12805 }, { "epoch": 0.37, "grad_norm": 7.841693319774686, "learning_rate": 7.30987994028946e-06, "loss": 0.797, "step": 12806 }, { "epoch": 0.37, "grad_norm": 6.912151783869398, "learning_rate": 7.309468626473369e-06, "loss": 0.8217, "step": 12807 }, { "epoch": 0.37, "grad_norm": 3.7459916626959147, "learning_rate": 7.309057292789206e-06, "loss": 0.4876, "step": 12808 }, { "epoch": 0.37, "grad_norm": 5.092032874476205, "learning_rate": 7.308645939240504e-06, "loss": 0.7874, "step": 12809 }, { "epoch": 0.37, "grad_norm": 6.814344523890996, "learning_rate": 7.308234565830808e-06, "loss": 0.9377, "step": 12810 }, { "epoch": 0.37, "grad_norm": 5.8191406708880145, "learning_rate": 7.307823172563654e-06, "loss": 0.4954, "step": 12811 }, { "epoch": 0.37, "grad_norm": 3.547565376211619, "learning_rate": 7.3074117594425795e-06, "loss": 0.5658, "step": 12812 }, { "epoch": 0.37, "grad_norm": 7.1536989954160495, "learning_rate": 7.3070003264711276e-06, "loss": 0.4154, "step": 12813 }, { "epoch": 0.37, "grad_norm": 12.119680004427881, "learning_rate": 7.306588873652835e-06, "loss": 0.9866, "step": 12814 }, { "epoch": 0.37, "grad_norm": 5.647359531474366, "learning_rate": 7.306177400991243e-06, "loss": 0.4329, "step": 12815 }, { "epoch": 0.37, "grad_norm": 4.395291655132011, "learning_rate": 7.305765908489889e-06, "loss": 0.301, "step": 12816 }, { "epoch": 0.37, "grad_norm": 3.776829776294319, "learning_rate": 7.305354396152316e-06, "loss": 0.2915, "step": 12817 }, { "epoch": 0.37, "grad_norm": 5.090534903968394, "learning_rate": 7.304942863982064e-06, "loss": 0.6317, "step": 12818 }, { "epoch": 0.37, "grad_norm": 4.375719610705333, "learning_rate": 7.304531311982671e-06, "loss": 0.5174, "step": 12819 }, { "epoch": 0.37, "grad_norm": 4.794066759608282, "learning_rate": 7.304119740157679e-06, "loss": 0.4801, "step": 12820 }, { "epoch": 0.37, "grad_norm": 3.7796091665324933, "learning_rate": 7.30370814851063e-06, "loss": 0.4578, "step": 12821 }, { "epoch": 0.37, "grad_norm": 2.6823230272453564, "learning_rate": 7.303296537045062e-06, "loss": 0.3825, "step": 12822 }, { "epoch": 0.37, "grad_norm": 3.6253399525107644, "learning_rate": 7.3028849057645165e-06, "loss": 0.2215, "step": 12823 }, { "epoch": 0.37, "grad_norm": 4.635606794708576, "learning_rate": 7.302473254672537e-06, "loss": 0.409, "step": 12824 }, { "epoch": 0.37, "grad_norm": 5.326339713439775, "learning_rate": 7.3020615837726635e-06, "loss": 0.4187, "step": 12825 }, { "epoch": 0.37, "grad_norm": 8.298103083582813, "learning_rate": 7.301649893068438e-06, "loss": 1.1291, "step": 12826 }, { "epoch": 0.37, "grad_norm": 3.6078496905458994, "learning_rate": 7.301238182563402e-06, "loss": 0.4923, "step": 12827 }, { "epoch": 0.37, "grad_norm": 3.5747362106209883, "learning_rate": 7.300826452261097e-06, "loss": 0.6163, "step": 12828 }, { "epoch": 0.37, "grad_norm": 4.1842839146358894, "learning_rate": 7.300414702165065e-06, "loss": 0.1859, "step": 12829 }, { "epoch": 0.37, "grad_norm": 5.691659894225232, "learning_rate": 7.300002932278848e-06, "loss": 0.7024, "step": 12830 }, { "epoch": 0.37, "grad_norm": 4.738583950553721, "learning_rate": 7.299591142605989e-06, "loss": 0.4105, "step": 12831 }, { "epoch": 0.37, "grad_norm": 3.180894889572381, "learning_rate": 7.299179333150031e-06, "loss": 0.2493, "step": 12832 }, { "epoch": 0.37, "grad_norm": 3.4972633153029413, "learning_rate": 7.298767503914517e-06, "loss": 0.3554, "step": 12833 }, { "epoch": 0.37, "grad_norm": 6.191150089584543, "learning_rate": 7.2983556549029885e-06, "loss": 0.6833, "step": 12834 }, { "epoch": 0.37, "grad_norm": 2.0158278784560473, "learning_rate": 7.29794378611899e-06, "loss": 0.1839, "step": 12835 }, { "epoch": 0.37, "grad_norm": 9.25062891394403, "learning_rate": 7.297531897566064e-06, "loss": 0.5179, "step": 12836 }, { "epoch": 0.37, "grad_norm": 5.121698292912761, "learning_rate": 7.297119989247752e-06, "loss": 0.7774, "step": 12837 }, { "epoch": 0.37, "grad_norm": 9.767466525588466, "learning_rate": 7.296708061167602e-06, "loss": 0.9428, "step": 12838 }, { "epoch": 0.37, "grad_norm": 6.666775257497764, "learning_rate": 7.296296113329153e-06, "loss": 0.9137, "step": 12839 }, { "epoch": 0.37, "grad_norm": 3.7722507292828764, "learning_rate": 7.295884145735952e-06, "loss": 0.253, "step": 12840 }, { "epoch": 0.37, "grad_norm": 7.048151074092583, "learning_rate": 7.295472158391544e-06, "loss": 0.542, "step": 12841 }, { "epoch": 0.37, "grad_norm": 5.116361618371562, "learning_rate": 7.29506015129947e-06, "loss": 0.5368, "step": 12842 }, { "epoch": 0.37, "grad_norm": 8.926893169168517, "learning_rate": 7.294648124463277e-06, "loss": 0.3845, "step": 12843 }, { "epoch": 0.37, "grad_norm": 2.5384594010297405, "learning_rate": 7.294236077886507e-06, "loss": 0.2212, "step": 12844 }, { "epoch": 0.37, "grad_norm": 9.814961750268857, "learning_rate": 7.29382401157271e-06, "loss": 0.5632, "step": 12845 }, { "epoch": 0.37, "grad_norm": 7.57890407057714, "learning_rate": 7.293411925525424e-06, "loss": 0.6351, "step": 12846 }, { "epoch": 0.37, "grad_norm": 4.129171545190685, "learning_rate": 7.292999819748199e-06, "loss": 0.4968, "step": 12847 }, { "epoch": 0.37, "grad_norm": 1.8067134038339172, "learning_rate": 7.29258769424458e-06, "loss": 0.1116, "step": 12848 }, { "epoch": 0.37, "grad_norm": 6.856346487167901, "learning_rate": 7.292175549018109e-06, "loss": 0.3439, "step": 12849 }, { "epoch": 0.37, "grad_norm": 4.838490393140756, "learning_rate": 7.2917633840723345e-06, "loss": 0.3946, "step": 12850 }, { "epoch": 0.37, "grad_norm": 9.104826169310662, "learning_rate": 7.291351199410803e-06, "loss": 0.5609, "step": 12851 }, { "epoch": 0.37, "grad_norm": 3.1101631430861194, "learning_rate": 7.2909389950370594e-06, "loss": 0.4425, "step": 12852 }, { "epoch": 0.37, "grad_norm": 7.722882311401294, "learning_rate": 7.290526770954648e-06, "loss": 0.5759, "step": 12853 }, { "epoch": 0.37, "grad_norm": 4.33329637523932, "learning_rate": 7.2901145271671185e-06, "loss": 0.1535, "step": 12854 }, { "epoch": 0.37, "grad_norm": 3.4544852499073424, "learning_rate": 7.2897022636780136e-06, "loss": 0.4851, "step": 12855 }, { "epoch": 0.37, "grad_norm": 5.447299457936009, "learning_rate": 7.289289980490883e-06, "loss": 0.4167, "step": 12856 }, { "epoch": 0.37, "grad_norm": 5.539631500933766, "learning_rate": 7.2888776776092725e-06, "loss": 0.6958, "step": 12857 }, { "epoch": 0.37, "grad_norm": 4.643484448404108, "learning_rate": 7.288465355036731e-06, "loss": 0.3174, "step": 12858 }, { "epoch": 0.37, "grad_norm": 6.529913258991454, "learning_rate": 7.288053012776802e-06, "loss": 0.6703, "step": 12859 }, { "epoch": 0.37, "grad_norm": 3.416904793946619, "learning_rate": 7.287640650833034e-06, "loss": 0.4961, "step": 12860 }, { "epoch": 0.37, "grad_norm": 5.837204148435089, "learning_rate": 7.287228269208977e-06, "loss": 0.3905, "step": 12861 }, { "epoch": 0.37, "grad_norm": 7.75909978629695, "learning_rate": 7.286815867908177e-06, "loss": 0.5831, "step": 12862 }, { "epoch": 0.37, "grad_norm": 9.399439376995133, "learning_rate": 7.2864034469341785e-06, "loss": 0.4674, "step": 12863 }, { "epoch": 0.37, "grad_norm": 3.896489820739459, "learning_rate": 7.285991006290536e-06, "loss": 0.3265, "step": 12864 }, { "epoch": 0.37, "grad_norm": 6.9601501636091, "learning_rate": 7.285578545980793e-06, "loss": 0.5861, "step": 12865 }, { "epoch": 0.37, "grad_norm": 7.277202610975355, "learning_rate": 7.285166066008499e-06, "loss": 1.0757, "step": 12866 }, { "epoch": 0.37, "grad_norm": 6.305799043559728, "learning_rate": 7.284753566377203e-06, "loss": 0.5208, "step": 12867 }, { "epoch": 0.37, "grad_norm": 5.9686209180365335, "learning_rate": 7.284341047090454e-06, "loss": 0.7675, "step": 12868 }, { "epoch": 0.37, "grad_norm": 11.214576178547317, "learning_rate": 7.283928508151799e-06, "loss": 0.7444, "step": 12869 }, { "epoch": 0.37, "grad_norm": 4.547544895388806, "learning_rate": 7.283515949564789e-06, "loss": 0.3323, "step": 12870 }, { "epoch": 0.37, "grad_norm": 4.314354815556762, "learning_rate": 7.2831033713329714e-06, "loss": 0.4163, "step": 12871 }, { "epoch": 0.37, "grad_norm": 3.74020294893374, "learning_rate": 7.2826907734598974e-06, "loss": 0.632, "step": 12872 }, { "epoch": 0.37, "grad_norm": 7.508904511646612, "learning_rate": 7.282278155949116e-06, "loss": 0.6873, "step": 12873 }, { "epoch": 0.37, "grad_norm": 7.785464053426177, "learning_rate": 7.281865518804177e-06, "loss": 0.4666, "step": 12874 }, { "epoch": 0.37, "grad_norm": 8.999503227934827, "learning_rate": 7.281452862028629e-06, "loss": 0.5791, "step": 12875 }, { "epoch": 0.37, "grad_norm": 7.207879373739251, "learning_rate": 7.281040185626022e-06, "loss": 0.6584, "step": 12876 }, { "epoch": 0.37, "grad_norm": 7.553985720903001, "learning_rate": 7.28062748959991e-06, "loss": 0.7503, "step": 12877 }, { "epoch": 0.37, "grad_norm": 5.219556689018399, "learning_rate": 7.280214773953839e-06, "loss": 0.6089, "step": 12878 }, { "epoch": 0.37, "grad_norm": 5.806308771499135, "learning_rate": 7.27980203869136e-06, "loss": 0.3401, "step": 12879 }, { "epoch": 0.37, "grad_norm": 5.676457427165672, "learning_rate": 7.279389283816025e-06, "loss": 0.5087, "step": 12880 }, { "epoch": 0.37, "grad_norm": 10.729857670034663, "learning_rate": 7.278976509331386e-06, "loss": 0.9145, "step": 12881 }, { "epoch": 0.37, "grad_norm": 2.452097975417904, "learning_rate": 7.278563715240992e-06, "loss": 0.2028, "step": 12882 }, { "epoch": 0.37, "grad_norm": 5.788161317023288, "learning_rate": 7.278150901548395e-06, "loss": 0.6381, "step": 12883 }, { "epoch": 0.37, "grad_norm": 4.636783615468445, "learning_rate": 7.277738068257145e-06, "loss": 0.694, "step": 12884 }, { "epoch": 0.37, "grad_norm": 9.355710339064924, "learning_rate": 7.2773252153707974e-06, "loss": 0.8796, "step": 12885 }, { "epoch": 0.37, "grad_norm": 11.26372877185647, "learning_rate": 7.2769123428929005e-06, "loss": 0.5094, "step": 12886 }, { "epoch": 0.37, "grad_norm": 10.766819592817885, "learning_rate": 7.276499450827006e-06, "loss": 0.4811, "step": 12887 }, { "epoch": 0.37, "grad_norm": 9.319075854902605, "learning_rate": 7.276086539176667e-06, "loss": 0.2797, "step": 12888 }, { "epoch": 0.37, "grad_norm": 2.90509665584463, "learning_rate": 7.275673607945436e-06, "loss": 0.3277, "step": 12889 }, { "epoch": 0.37, "grad_norm": 4.901876602104497, "learning_rate": 7.275260657136866e-06, "loss": 0.4475, "step": 12890 }, { "epoch": 0.37, "grad_norm": 10.559979787070263, "learning_rate": 7.274847686754507e-06, "loss": 0.4282, "step": 12891 }, { "epoch": 0.37, "grad_norm": 6.414760091670725, "learning_rate": 7.2744346968019156e-06, "loss": 0.9016, "step": 12892 }, { "epoch": 0.37, "grad_norm": 3.8139644999339715, "learning_rate": 7.274021687282642e-06, "loss": 0.3996, "step": 12893 }, { "epoch": 0.37, "grad_norm": 4.806740571819345, "learning_rate": 7.2736086582002386e-06, "loss": 0.2628, "step": 12894 }, { "epoch": 0.37, "grad_norm": 12.532630749211984, "learning_rate": 7.273195609558261e-06, "loss": 0.5624, "step": 12895 }, { "epoch": 0.37, "grad_norm": 5.450360490968129, "learning_rate": 7.27278254136026e-06, "loss": 0.4411, "step": 12896 }, { "epoch": 0.37, "grad_norm": 9.954125083132837, "learning_rate": 7.272369453609791e-06, "loss": 0.6385, "step": 12897 }, { "epoch": 0.37, "grad_norm": 9.213965930015418, "learning_rate": 7.2719563463104095e-06, "loss": 0.7482, "step": 12898 }, { "epoch": 0.37, "grad_norm": 5.1531813304406295, "learning_rate": 7.271543219465666e-06, "loss": 0.5399, "step": 12899 }, { "epoch": 0.37, "grad_norm": 2.3399616651166304, "learning_rate": 7.2711300730791144e-06, "loss": 0.2494, "step": 12900 }, { "epoch": 0.37, "grad_norm": 13.712371125219843, "learning_rate": 7.270716907154312e-06, "loss": 0.629, "step": 12901 }, { "epoch": 0.37, "grad_norm": 6.593695617740788, "learning_rate": 7.270303721694813e-06, "loss": 0.4959, "step": 12902 }, { "epoch": 0.37, "grad_norm": 6.698338540309234, "learning_rate": 7.269890516704168e-06, "loss": 0.7249, "step": 12903 }, { "epoch": 0.37, "grad_norm": 3.3252398382894452, "learning_rate": 7.2694772921859346e-06, "loss": 0.3657, "step": 12904 }, { "epoch": 0.37, "grad_norm": 5.601819361034057, "learning_rate": 7.269064048143668e-06, "loss": 0.9899, "step": 12905 }, { "epoch": 0.37, "grad_norm": 5.214193490737575, "learning_rate": 7.268650784580923e-06, "loss": 0.6075, "step": 12906 }, { "epoch": 0.37, "grad_norm": 5.601378497688881, "learning_rate": 7.268237501501254e-06, "loss": 0.7526, "step": 12907 }, { "epoch": 0.37, "grad_norm": 5.67688389232409, "learning_rate": 7.267824198908218e-06, "loss": 0.3814, "step": 12908 }, { "epoch": 0.37, "grad_norm": 6.413186760428337, "learning_rate": 7.26741087680537e-06, "loss": 0.7965, "step": 12909 }, { "epoch": 0.37, "grad_norm": 6.21609793235585, "learning_rate": 7.266997535196264e-06, "loss": 0.3742, "step": 12910 }, { "epoch": 0.37, "grad_norm": 5.115288907838612, "learning_rate": 7.266584174084458e-06, "loss": 0.2226, "step": 12911 }, { "epoch": 0.37, "grad_norm": 4.000642009711462, "learning_rate": 7.266170793473509e-06, "loss": 0.4979, "step": 12912 }, { "epoch": 0.37, "grad_norm": 5.893189537284821, "learning_rate": 7.265757393366968e-06, "loss": 0.5307, "step": 12913 }, { "epoch": 0.37, "grad_norm": 5.954877860660803, "learning_rate": 7.265343973768398e-06, "loss": 0.5274, "step": 12914 }, { "epoch": 0.37, "grad_norm": 6.521891665471658, "learning_rate": 7.264930534681351e-06, "loss": 0.6402, "step": 12915 }, { "epoch": 0.37, "grad_norm": 6.278854444707043, "learning_rate": 7.264517076109386e-06, "loss": 0.2334, "step": 12916 }, { "epoch": 0.37, "grad_norm": 16.69157014994428, "learning_rate": 7.26410359805606e-06, "loss": 0.4498, "step": 12917 }, { "epoch": 0.37, "grad_norm": 6.114389415397602, "learning_rate": 7.26369010052493e-06, "loss": 0.7017, "step": 12918 }, { "epoch": 0.37, "grad_norm": 4.092063964284157, "learning_rate": 7.263276583519552e-06, "loss": 0.5752, "step": 12919 }, { "epoch": 0.37, "grad_norm": 9.104150179943343, "learning_rate": 7.262863047043485e-06, "loss": 0.7241, "step": 12920 }, { "epoch": 0.37, "grad_norm": 4.428262848145354, "learning_rate": 7.262449491100283e-06, "loss": 0.6479, "step": 12921 }, { "epoch": 0.37, "grad_norm": 8.866014767705698, "learning_rate": 7.262035915693509e-06, "loss": 0.3925, "step": 12922 }, { "epoch": 0.37, "grad_norm": 10.86118544162596, "learning_rate": 7.261622320826719e-06, "loss": 0.7633, "step": 12923 }, { "epoch": 0.37, "grad_norm": 4.69706497874154, "learning_rate": 7.26120870650347e-06, "loss": 0.5867, "step": 12924 }, { "epoch": 0.37, "grad_norm": 4.215059912992238, "learning_rate": 7.260795072727322e-06, "loss": 0.5684, "step": 12925 }, { "epoch": 0.37, "grad_norm": 2.946032651087113, "learning_rate": 7.260381419501829e-06, "loss": 0.2658, "step": 12926 }, { "epoch": 0.37, "grad_norm": 5.500189799588482, "learning_rate": 7.259967746830556e-06, "loss": 0.5595, "step": 12927 }, { "epoch": 0.37, "grad_norm": 6.664870826917491, "learning_rate": 7.259554054717057e-06, "loss": 0.6366, "step": 12928 }, { "epoch": 0.37, "grad_norm": 2.5339043440102698, "learning_rate": 7.259140343164894e-06, "loss": 0.3032, "step": 12929 }, { "epoch": 0.37, "grad_norm": 5.267992296863506, "learning_rate": 7.258726612177624e-06, "loss": 0.5991, "step": 12930 }, { "epoch": 0.37, "grad_norm": 5.640237092181867, "learning_rate": 7.258312861758807e-06, "loss": 0.831, "step": 12931 }, { "epoch": 0.37, "grad_norm": 1.7008543517488999, "learning_rate": 7.257899091912003e-06, "loss": 0.1123, "step": 12932 }, { "epoch": 0.37, "grad_norm": 5.526143923097096, "learning_rate": 7.257485302640771e-06, "loss": 0.7103, "step": 12933 }, { "epoch": 0.37, "grad_norm": 7.191239288231922, "learning_rate": 7.257071493948671e-06, "loss": 0.5367, "step": 12934 }, { "epoch": 0.37, "grad_norm": 6.719369158936981, "learning_rate": 7.256657665839263e-06, "loss": 0.5367, "step": 12935 }, { "epoch": 0.37, "grad_norm": 4.403622865381995, "learning_rate": 7.256243818316106e-06, "loss": 0.3881, "step": 12936 }, { "epoch": 0.37, "grad_norm": 4.667530604273901, "learning_rate": 7.25582995138276e-06, "loss": 0.5022, "step": 12937 }, { "epoch": 0.37, "grad_norm": 4.346567379167574, "learning_rate": 7.25541606504279e-06, "loss": 0.3006, "step": 12938 }, { "epoch": 0.37, "grad_norm": 3.149283359388151, "learning_rate": 7.255002159299751e-06, "loss": 0.3098, "step": 12939 }, { "epoch": 0.37, "grad_norm": 3.618622826695329, "learning_rate": 7.254588234157207e-06, "loss": 0.284, "step": 12940 }, { "epoch": 0.37, "grad_norm": 6.820631161790705, "learning_rate": 7.254174289618719e-06, "loss": 0.4027, "step": 12941 }, { "epoch": 0.37, "grad_norm": 9.795574168794609, "learning_rate": 7.253760325687846e-06, "loss": 0.45, "step": 12942 }, { "epoch": 0.37, "grad_norm": 12.105876353203543, "learning_rate": 7.25334634236815e-06, "loss": 0.5167, "step": 12943 }, { "epoch": 0.37, "grad_norm": 5.562085639864929, "learning_rate": 7.252932339663192e-06, "loss": 0.5543, "step": 12944 }, { "epoch": 0.37, "grad_norm": 5.01141603883213, "learning_rate": 7.252518317576535e-06, "loss": 0.2879, "step": 12945 }, { "epoch": 0.37, "grad_norm": 6.336122484246653, "learning_rate": 7.2521042761117425e-06, "loss": 0.3042, "step": 12946 }, { "epoch": 0.37, "grad_norm": 3.451971387841828, "learning_rate": 7.251690215272372e-06, "loss": 0.2342, "step": 12947 }, { "epoch": 0.37, "grad_norm": 1.5436386851089383, "learning_rate": 7.251276135061988e-06, "loss": 0.2218, "step": 12948 }, { "epoch": 0.37, "grad_norm": 4.258298401711079, "learning_rate": 7.250862035484154e-06, "loss": 0.3587, "step": 12949 }, { "epoch": 0.37, "grad_norm": 3.807254058749559, "learning_rate": 7.25044791654243e-06, "loss": 0.6827, "step": 12950 }, { "epoch": 0.37, "grad_norm": 5.840759409689168, "learning_rate": 7.25003377824038e-06, "loss": 0.4754, "step": 12951 }, { "epoch": 0.37, "grad_norm": 3.707955248271506, "learning_rate": 7.2496196205815665e-06, "loss": 0.451, "step": 12952 }, { "epoch": 0.37, "grad_norm": 6.538559031730598, "learning_rate": 7.249205443569551e-06, "loss": 0.4455, "step": 12953 }, { "epoch": 0.37, "grad_norm": 5.198090830033919, "learning_rate": 7.248791247207898e-06, "loss": 0.7951, "step": 12954 }, { "epoch": 0.37, "grad_norm": 7.556253864549228, "learning_rate": 7.248377031500173e-06, "loss": 0.6348, "step": 12955 }, { "epoch": 0.37, "grad_norm": 6.975919209005892, "learning_rate": 7.247962796449936e-06, "loss": 0.5988, "step": 12956 }, { "epoch": 0.37, "grad_norm": 2.405925035207804, "learning_rate": 7.2475485420607515e-06, "loss": 0.2333, "step": 12957 }, { "epoch": 0.37, "grad_norm": 10.114899589447496, "learning_rate": 7.247134268336185e-06, "loss": 0.2574, "step": 12958 }, { "epoch": 0.37, "grad_norm": 5.29177511910066, "learning_rate": 7.246719975279798e-06, "loss": 0.3886, "step": 12959 }, { "epoch": 0.37, "grad_norm": 4.625455241093861, "learning_rate": 7.246305662895155e-06, "loss": 0.3147, "step": 12960 }, { "epoch": 0.37, "grad_norm": 5.671977249779157, "learning_rate": 7.2458913311858225e-06, "loss": 0.3262, "step": 12961 }, { "epoch": 0.37, "grad_norm": 11.127133368488133, "learning_rate": 7.245476980155362e-06, "loss": 0.7236, "step": 12962 }, { "epoch": 0.37, "grad_norm": 9.433970189136199, "learning_rate": 7.245062609807341e-06, "loss": 0.3663, "step": 12963 }, { "epoch": 0.37, "grad_norm": 7.273729099283642, "learning_rate": 7.2446482201453204e-06, "loss": 0.8531, "step": 12964 }, { "epoch": 0.37, "grad_norm": 6.246692850606969, "learning_rate": 7.244233811172871e-06, "loss": 0.3642, "step": 12965 }, { "epoch": 0.37, "grad_norm": 5.088584129237019, "learning_rate": 7.243819382893551e-06, "loss": 0.7485, "step": 12966 }, { "epoch": 0.37, "grad_norm": 10.572952876788491, "learning_rate": 7.243404935310931e-06, "loss": 0.9009, "step": 12967 }, { "epoch": 0.37, "grad_norm": 18.19239389967085, "learning_rate": 7.242990468428575e-06, "loss": 0.8192, "step": 12968 }, { "epoch": 0.37, "grad_norm": 3.8777655454128137, "learning_rate": 7.242575982250048e-06, "loss": 0.2279, "step": 12969 }, { "epoch": 0.37, "grad_norm": 15.080583820576901, "learning_rate": 7.242161476778913e-06, "loss": 0.3709, "step": 12970 }, { "epoch": 0.37, "grad_norm": 3.0550428413096844, "learning_rate": 7.241746952018741e-06, "loss": 0.1197, "step": 12971 }, { "epoch": 0.37, "grad_norm": 6.299340042485063, "learning_rate": 7.241332407973098e-06, "loss": 0.7478, "step": 12972 }, { "epoch": 0.37, "grad_norm": 8.401051101316645, "learning_rate": 7.240917844645545e-06, "loss": 0.6312, "step": 12973 }, { "epoch": 0.37, "grad_norm": 5.877930539294775, "learning_rate": 7.240503262039653e-06, "loss": 0.795, "step": 12974 }, { "epoch": 0.37, "grad_norm": 10.934869329982591, "learning_rate": 7.240088660158987e-06, "loss": 0.7952, "step": 12975 }, { "epoch": 0.37, "grad_norm": 10.028477460896834, "learning_rate": 7.239674039007115e-06, "loss": 0.4722, "step": 12976 }, { "epoch": 0.37, "grad_norm": 4.827355308043508, "learning_rate": 7.239259398587603e-06, "loss": 0.2698, "step": 12977 }, { "epoch": 0.37, "grad_norm": 5.980084267982589, "learning_rate": 7.2388447389040174e-06, "loss": 0.572, "step": 12978 }, { "epoch": 0.37, "grad_norm": 7.283072722360395, "learning_rate": 7.238430059959925e-06, "loss": 0.3328, "step": 12979 }, { "epoch": 0.37, "grad_norm": 3.2694023954643483, "learning_rate": 7.238015361758897e-06, "loss": 0.2515, "step": 12980 }, { "epoch": 0.37, "grad_norm": 4.877636367792034, "learning_rate": 7.237600644304497e-06, "loss": 0.2932, "step": 12981 }, { "epoch": 0.37, "grad_norm": 5.261071746626987, "learning_rate": 7.2371859076002945e-06, "loss": 0.31, "step": 12982 }, { "epoch": 0.37, "grad_norm": 8.6686968504054, "learning_rate": 7.236771151649856e-06, "loss": 0.8243, "step": 12983 }, { "epoch": 0.37, "grad_norm": 7.745985037098012, "learning_rate": 7.236356376456752e-06, "loss": 0.7082, "step": 12984 }, { "epoch": 0.37, "grad_norm": 6.995360778546564, "learning_rate": 7.235941582024549e-06, "loss": 0.524, "step": 12985 }, { "epoch": 0.37, "grad_norm": 5.836325840623717, "learning_rate": 7.235526768356816e-06, "loss": 0.2847, "step": 12986 }, { "epoch": 0.37, "grad_norm": 5.027530740789047, "learning_rate": 7.23511193545712e-06, "loss": 0.1921, "step": 12987 }, { "epoch": 0.37, "grad_norm": 8.988710316111687, "learning_rate": 7.234697083329032e-06, "loss": 0.4007, "step": 12988 }, { "epoch": 0.37, "grad_norm": 3.8701196208968343, "learning_rate": 7.234282211976123e-06, "loss": 0.5654, "step": 12989 }, { "epoch": 0.37, "grad_norm": 5.946125551388566, "learning_rate": 7.233867321401955e-06, "loss": 0.3138, "step": 12990 }, { "epoch": 0.37, "grad_norm": 3.1719691257886415, "learning_rate": 7.233452411610103e-06, "loss": 0.5389, "step": 12991 }, { "epoch": 0.37, "grad_norm": 4.652845277185928, "learning_rate": 7.233037482604135e-06, "loss": 0.4377, "step": 12992 }, { "epoch": 0.37, "grad_norm": 5.977533398721994, "learning_rate": 7.232622534387622e-06, "loss": 0.2555, "step": 12993 }, { "epoch": 0.37, "grad_norm": 2.5053450486965536, "learning_rate": 7.2322075669641315e-06, "loss": 0.2496, "step": 12994 }, { "epoch": 0.37, "grad_norm": 8.292059468944444, "learning_rate": 7.231792580337232e-06, "loss": 0.6946, "step": 12995 }, { "epoch": 0.37, "grad_norm": 5.846318501545643, "learning_rate": 7.2313775745104975e-06, "loss": 0.4271, "step": 12996 }, { "epoch": 0.37, "grad_norm": 4.092931767584401, "learning_rate": 7.2309625494874966e-06, "loss": 0.6521, "step": 12997 }, { "epoch": 0.37, "grad_norm": 4.579394909750474, "learning_rate": 7.230547505271799e-06, "loss": 0.442, "step": 12998 }, { "epoch": 0.37, "grad_norm": 4.991271512524727, "learning_rate": 7.230132441866977e-06, "loss": 0.2288, "step": 12999 }, { "epoch": 0.37, "grad_norm": 7.839581327888366, "learning_rate": 7.229717359276599e-06, "loss": 0.3814, "step": 13000 }, { "epoch": 0.37, "grad_norm": 5.345199098474115, "learning_rate": 7.2293022575042374e-06, "loss": 0.5818, "step": 13001 }, { "epoch": 0.37, "grad_norm": 4.833061336668788, "learning_rate": 7.228887136553462e-06, "loss": 0.5868, "step": 13002 }, { "epoch": 0.37, "grad_norm": 5.239495531361463, "learning_rate": 7.228471996427846e-06, "loss": 0.5435, "step": 13003 }, { "epoch": 0.37, "grad_norm": 4.619465248362835, "learning_rate": 7.228056837130959e-06, "loss": 0.2886, "step": 13004 }, { "epoch": 0.37, "grad_norm": 6.305888462194518, "learning_rate": 7.227641658666374e-06, "loss": 0.3304, "step": 13005 }, { "epoch": 0.37, "grad_norm": 4.972747682028996, "learning_rate": 7.227226461037663e-06, "loss": 0.7681, "step": 13006 }, { "epoch": 0.37, "grad_norm": 5.785321364020188, "learning_rate": 7.2268112442483955e-06, "loss": 0.3942, "step": 13007 }, { "epoch": 0.37, "grad_norm": 8.00340603800835, "learning_rate": 7.226396008302146e-06, "loss": 0.7252, "step": 13008 }, { "epoch": 0.37, "grad_norm": 2.926328457640524, "learning_rate": 7.2259807532024865e-06, "loss": 0.1533, "step": 13009 }, { "epoch": 0.37, "grad_norm": 6.918102054183972, "learning_rate": 7.225565478952987e-06, "loss": 0.4884, "step": 13010 }, { "epoch": 0.37, "grad_norm": 7.4635039863425, "learning_rate": 7.2251501855572205e-06, "loss": 0.6016, "step": 13011 }, { "epoch": 0.37, "grad_norm": 5.3554803025937, "learning_rate": 7.224734873018764e-06, "loss": 0.5595, "step": 13012 }, { "epoch": 0.37, "grad_norm": 4.839195127951461, "learning_rate": 7.224319541341185e-06, "loss": 0.4165, "step": 13013 }, { "epoch": 0.37, "grad_norm": 5.176873261035672, "learning_rate": 7.2239041905280595e-06, "loss": 0.2731, "step": 13014 }, { "epoch": 0.37, "grad_norm": 5.162548310651868, "learning_rate": 7.22348882058296e-06, "loss": 0.5665, "step": 13015 }, { "epoch": 0.37, "grad_norm": 6.353690566273328, "learning_rate": 7.223073431509462e-06, "loss": 0.6526, "step": 13016 }, { "epoch": 0.37, "grad_norm": 6.629680815330802, "learning_rate": 7.222658023311136e-06, "loss": 0.5571, "step": 13017 }, { "epoch": 0.37, "grad_norm": 2.083226789293227, "learning_rate": 7.222242595991554e-06, "loss": 0.3014, "step": 13018 }, { "epoch": 0.37, "grad_norm": 8.280431627826852, "learning_rate": 7.221827149554294e-06, "loss": 0.7825, "step": 13019 }, { "epoch": 0.37, "grad_norm": 7.233315276930578, "learning_rate": 7.22141168400293e-06, "loss": 0.7986, "step": 13020 }, { "epoch": 0.37, "grad_norm": 10.881923532428678, "learning_rate": 7.2209961993410335e-06, "loss": 0.8315, "step": 13021 }, { "epoch": 0.37, "grad_norm": 6.805498514996011, "learning_rate": 7.22058069557218e-06, "loss": 0.5545, "step": 13022 }, { "epoch": 0.37, "grad_norm": 6.704214296553061, "learning_rate": 7.220165172699947e-06, "loss": 0.3527, "step": 13023 }, { "epoch": 0.37, "grad_norm": 3.5830095758483154, "learning_rate": 7.219749630727903e-06, "loss": 0.3449, "step": 13024 }, { "epoch": 0.37, "grad_norm": 3.0214859531548885, "learning_rate": 7.219334069659627e-06, "loss": 0.2228, "step": 13025 }, { "epoch": 0.37, "grad_norm": 14.177319339952534, "learning_rate": 7.218918489498694e-06, "loss": 0.6363, "step": 13026 }, { "epoch": 0.37, "grad_norm": 3.4427736923469587, "learning_rate": 7.218502890248678e-06, "loss": 0.4923, "step": 13027 }, { "epoch": 0.37, "grad_norm": 7.207443961548525, "learning_rate": 7.2180872719131545e-06, "loss": 0.6727, "step": 13028 }, { "epoch": 0.37, "grad_norm": 9.098742553271865, "learning_rate": 7.2176716344956995e-06, "loss": 0.3956, "step": 13029 }, { "epoch": 0.37, "grad_norm": 5.323895115017691, "learning_rate": 7.217255977999889e-06, "loss": 0.5529, "step": 13030 }, { "epoch": 0.37, "grad_norm": 4.750157855573071, "learning_rate": 7.2168403024292974e-06, "loss": 0.365, "step": 13031 }, { "epoch": 0.37, "grad_norm": 8.707135302134551, "learning_rate": 7.216424607787502e-06, "loss": 0.7022, "step": 13032 }, { "epoch": 0.37, "grad_norm": 8.961047862802568, "learning_rate": 7.216008894078079e-06, "loss": 0.7599, "step": 13033 }, { "epoch": 0.37, "grad_norm": 4.995995252889511, "learning_rate": 7.215593161304605e-06, "loss": 0.5705, "step": 13034 }, { "epoch": 0.37, "grad_norm": 6.206117076545175, "learning_rate": 7.2151774094706535e-06, "loss": 0.7312, "step": 13035 }, { "epoch": 0.37, "grad_norm": 3.8542103979705544, "learning_rate": 7.214761638579806e-06, "loss": 0.2738, "step": 13036 }, { "epoch": 0.37, "grad_norm": 5.67604437137436, "learning_rate": 7.2143458486356355e-06, "loss": 0.2486, "step": 13037 }, { "epoch": 0.37, "grad_norm": 6.274097982675743, "learning_rate": 7.21393003964172e-06, "loss": 0.6216, "step": 13038 }, { "epoch": 0.37, "grad_norm": 3.478337689774592, "learning_rate": 7.213514211601638e-06, "loss": 0.4325, "step": 13039 }, { "epoch": 0.37, "grad_norm": 6.268241050802983, "learning_rate": 7.213098364518964e-06, "loss": 0.7453, "step": 13040 }, { "epoch": 0.37, "grad_norm": 6.849268948046986, "learning_rate": 7.212682498397279e-06, "loss": 0.3352, "step": 13041 }, { "epoch": 0.37, "grad_norm": 4.380163959848424, "learning_rate": 7.212266613240159e-06, "loss": 0.512, "step": 13042 }, { "epoch": 0.37, "grad_norm": 4.1385544548281965, "learning_rate": 7.211850709051181e-06, "loss": 0.4486, "step": 13043 }, { "epoch": 0.37, "grad_norm": 9.282762860131882, "learning_rate": 7.211434785833923e-06, "loss": 0.6744, "step": 13044 }, { "epoch": 0.37, "grad_norm": 4.4823979927706645, "learning_rate": 7.211018843591963e-06, "loss": 0.4177, "step": 13045 }, { "epoch": 0.37, "grad_norm": 7.123296902208909, "learning_rate": 7.2106028823288835e-06, "loss": 0.457, "step": 13046 }, { "epoch": 0.37, "grad_norm": 4.180969711853661, "learning_rate": 7.210186902048257e-06, "loss": 0.1584, "step": 13047 }, { "epoch": 0.37, "grad_norm": 3.2125964183661777, "learning_rate": 7.209770902753666e-06, "loss": 0.3302, "step": 13048 }, { "epoch": 0.37, "grad_norm": 3.554961921243838, "learning_rate": 7.209354884448687e-06, "loss": 0.2698, "step": 13049 }, { "epoch": 0.37, "grad_norm": 7.557525827437526, "learning_rate": 7.208938847136901e-06, "loss": 0.5082, "step": 13050 }, { "epoch": 0.37, "grad_norm": 9.932453145536668, "learning_rate": 7.208522790821886e-06, "loss": 0.7728, "step": 13051 }, { "epoch": 0.37, "grad_norm": 4.868344630161139, "learning_rate": 7.208106715507221e-06, "loss": 0.4279, "step": 13052 }, { "epoch": 0.37, "grad_norm": 7.465117524864171, "learning_rate": 7.207690621196485e-06, "loss": 0.5177, "step": 13053 }, { "epoch": 0.37, "grad_norm": 9.823840251934689, "learning_rate": 7.2072745078932585e-06, "loss": 0.5254, "step": 13054 }, { "epoch": 0.37, "grad_norm": 5.637949914170526, "learning_rate": 7.206858375601122e-06, "loss": 0.6178, "step": 13055 }, { "epoch": 0.37, "grad_norm": 4.459995564492761, "learning_rate": 7.2064422243236555e-06, "loss": 0.2256, "step": 13056 }, { "epoch": 0.37, "grad_norm": 7.67669619477402, "learning_rate": 7.206026054064436e-06, "loss": 0.7541, "step": 13057 }, { "epoch": 0.37, "grad_norm": 9.384267956164168, "learning_rate": 7.205609864827047e-06, "loss": 0.5421, "step": 13058 }, { "epoch": 0.37, "grad_norm": 4.743157402286798, "learning_rate": 7.205193656615068e-06, "loss": 0.4207, "step": 13059 }, { "epoch": 0.37, "grad_norm": 6.124194189509768, "learning_rate": 7.204777429432079e-06, "loss": 0.4542, "step": 13060 }, { "epoch": 0.37, "grad_norm": 7.017318181426655, "learning_rate": 7.204361183281661e-06, "loss": 0.8995, "step": 13061 }, { "epoch": 0.37, "grad_norm": 7.071656966546273, "learning_rate": 7.203944918167396e-06, "loss": 0.6023, "step": 13062 }, { "epoch": 0.37, "grad_norm": 6.622775838082031, "learning_rate": 7.2035286340928645e-06, "loss": 0.7003, "step": 13063 }, { "epoch": 0.37, "grad_norm": 5.632342357300388, "learning_rate": 7.203112331061647e-06, "loss": 0.7485, "step": 13064 }, { "epoch": 0.37, "grad_norm": 2.601810936048609, "learning_rate": 7.2026960090773255e-06, "loss": 0.3718, "step": 13065 }, { "epoch": 0.37, "grad_norm": 3.639898088438108, "learning_rate": 7.2022796681434814e-06, "loss": 0.2791, "step": 13066 }, { "epoch": 0.37, "grad_norm": 8.592176813568834, "learning_rate": 7.2018633082636965e-06, "loss": 0.6559, "step": 13067 }, { "epoch": 0.37, "grad_norm": 4.935896842574149, "learning_rate": 7.201446929441552e-06, "loss": 0.5637, "step": 13068 }, { "epoch": 0.37, "grad_norm": 6.775486408523193, "learning_rate": 7.201030531680631e-06, "loss": 0.4657, "step": 13069 }, { "epoch": 0.37, "grad_norm": 3.8186537060872854, "learning_rate": 7.200614114984515e-06, "loss": 0.4497, "step": 13070 }, { "epoch": 0.37, "grad_norm": 4.076738991650745, "learning_rate": 7.200197679356787e-06, "loss": 0.7147, "step": 13071 }, { "epoch": 0.37, "grad_norm": 5.4522309540454374, "learning_rate": 7.199781224801029e-06, "loss": 0.2721, "step": 13072 }, { "epoch": 0.37, "grad_norm": 8.540398132906608, "learning_rate": 7.199364751320825e-06, "loss": 0.8503, "step": 13073 }, { "epoch": 0.37, "grad_norm": 4.1634754676064025, "learning_rate": 7.198948258919757e-06, "loss": 0.1493, "step": 13074 }, { "epoch": 0.37, "grad_norm": 4.758457609893098, "learning_rate": 7.198531747601407e-06, "loss": 0.4913, "step": 13075 }, { "epoch": 0.37, "grad_norm": 6.2675104895413805, "learning_rate": 7.198115217369359e-06, "loss": 0.3913, "step": 13076 }, { "epoch": 0.37, "grad_norm": 4.241168579480298, "learning_rate": 7.197698668227196e-06, "loss": 0.4418, "step": 13077 }, { "epoch": 0.37, "grad_norm": 7.074777615829775, "learning_rate": 7.1972821001785024e-06, "loss": 0.6166, "step": 13078 }, { "epoch": 0.37, "grad_norm": 8.160151429126971, "learning_rate": 7.19686551322686e-06, "loss": 0.8576, "step": 13079 }, { "epoch": 0.37, "grad_norm": 6.727602019938079, "learning_rate": 7.196448907375857e-06, "loss": 0.5403, "step": 13080 }, { "epoch": 0.37, "grad_norm": 5.248807839867346, "learning_rate": 7.196032282629073e-06, "loss": 0.5505, "step": 13081 }, { "epoch": 0.37, "grad_norm": 5.447748632617383, "learning_rate": 7.195615638990095e-06, "loss": 0.4315, "step": 13082 }, { "epoch": 0.37, "grad_norm": 5.290110767336974, "learning_rate": 7.195198976462505e-06, "loss": 0.3399, "step": 13083 }, { "epoch": 0.37, "grad_norm": 5.34836938643595, "learning_rate": 7.194782295049888e-06, "loss": 0.7367, "step": 13084 }, { "epoch": 0.37, "grad_norm": 8.06403838865286, "learning_rate": 7.194365594755827e-06, "loss": 0.9082, "step": 13085 }, { "epoch": 0.37, "grad_norm": 5.977485056961763, "learning_rate": 7.1939488755839125e-06, "loss": 0.5338, "step": 13086 }, { "epoch": 0.37, "grad_norm": 4.186717942499907, "learning_rate": 7.193532137537725e-06, "loss": 0.2366, "step": 13087 }, { "epoch": 0.37, "grad_norm": 5.222853823043946, "learning_rate": 7.19311538062085e-06, "loss": 0.4575, "step": 13088 }, { "epoch": 0.37, "grad_norm": 2.048676731563698, "learning_rate": 7.192698604836874e-06, "loss": 0.0942, "step": 13089 }, { "epoch": 0.37, "grad_norm": 5.120336131744748, "learning_rate": 7.192281810189382e-06, "loss": 0.6415, "step": 13090 }, { "epoch": 0.37, "grad_norm": 11.285387205174999, "learning_rate": 7.19186499668196e-06, "loss": 0.7806, "step": 13091 }, { "epoch": 0.37, "grad_norm": 4.4896298765290945, "learning_rate": 7.191448164318191e-06, "loss": 0.6192, "step": 13092 }, { "epoch": 0.37, "grad_norm": 3.558528912117929, "learning_rate": 7.191031313101665e-06, "loss": 0.5549, "step": 13093 }, { "epoch": 0.37, "grad_norm": 6.434035841315809, "learning_rate": 7.190614443035966e-06, "loss": 0.521, "step": 13094 }, { "epoch": 0.38, "grad_norm": 3.074641682131938, "learning_rate": 7.19019755412468e-06, "loss": 0.282, "step": 13095 }, { "epoch": 0.38, "grad_norm": 4.1568696270031715, "learning_rate": 7.189780646371396e-06, "loss": 0.3901, "step": 13096 }, { "epoch": 0.38, "grad_norm": 4.124248638503636, "learning_rate": 7.189363719779698e-06, "loss": 0.7218, "step": 13097 }, { "epoch": 0.38, "grad_norm": 9.93383539155687, "learning_rate": 7.1889467743531725e-06, "loss": 0.4896, "step": 13098 }, { "epoch": 0.38, "grad_norm": 2.1124734092483486, "learning_rate": 7.188529810095408e-06, "loss": 0.1237, "step": 13099 }, { "epoch": 0.38, "grad_norm": 6.437906900527311, "learning_rate": 7.188112827009992e-06, "loss": 0.5034, "step": 13100 }, { "epoch": 0.38, "grad_norm": 6.663766691165157, "learning_rate": 7.187695825100509e-06, "loss": 0.5379, "step": 13101 }, { "epoch": 0.38, "grad_norm": 3.424314909308724, "learning_rate": 7.187278804370548e-06, "loss": 0.2111, "step": 13102 }, { "epoch": 0.38, "grad_norm": 4.888117479504633, "learning_rate": 7.186861764823698e-06, "loss": 0.7004, "step": 13103 }, { "epoch": 0.38, "grad_norm": 5.576310212512068, "learning_rate": 7.186444706463545e-06, "loss": 0.3692, "step": 13104 }, { "epoch": 0.38, "grad_norm": 5.24418537255772, "learning_rate": 7.186027629293678e-06, "loss": 0.3879, "step": 13105 }, { "epoch": 0.38, "grad_norm": 5.108158281675981, "learning_rate": 7.185610533317684e-06, "loss": 0.3153, "step": 13106 }, { "epoch": 0.38, "grad_norm": 5.708407299462844, "learning_rate": 7.1851934185391524e-06, "loss": 0.8471, "step": 13107 }, { "epoch": 0.38, "grad_norm": 7.564946338541404, "learning_rate": 7.18477628496167e-06, "loss": 0.4001, "step": 13108 }, { "epoch": 0.38, "grad_norm": 3.9603859782681274, "learning_rate": 7.1843591325888266e-06, "loss": 0.4414, "step": 13109 }, { "epoch": 0.38, "grad_norm": 7.817672909006825, "learning_rate": 7.18394196142421e-06, "loss": 0.7219, "step": 13110 }, { "epoch": 0.38, "grad_norm": 4.214548349855999, "learning_rate": 7.1835247714714095e-06, "loss": 0.4731, "step": 13111 }, { "epoch": 0.38, "grad_norm": 5.489202814347517, "learning_rate": 7.183107562734014e-06, "loss": 0.4036, "step": 13112 }, { "epoch": 0.38, "grad_norm": 5.682835269407231, "learning_rate": 7.182690335215614e-06, "loss": 0.504, "step": 13113 }, { "epoch": 0.38, "grad_norm": 3.276027774427561, "learning_rate": 7.182273088919798e-06, "loss": 0.2821, "step": 13114 }, { "epoch": 0.38, "grad_norm": 7.144932689202869, "learning_rate": 7.1818558238501544e-06, "loss": 0.6755, "step": 13115 }, { "epoch": 0.38, "grad_norm": 5.074704658671449, "learning_rate": 7.181438540010274e-06, "loss": 0.5465, "step": 13116 }, { "epoch": 0.38, "grad_norm": 5.824946050435018, "learning_rate": 7.181021237403747e-06, "loss": 0.6886, "step": 13117 }, { "epoch": 0.38, "grad_norm": 3.787461939705455, "learning_rate": 7.180603916034161e-06, "loss": 0.107, "step": 13118 }, { "epoch": 0.38, "grad_norm": 4.856704468738177, "learning_rate": 7.180186575905109e-06, "loss": 0.2319, "step": 13119 }, { "epoch": 0.38, "grad_norm": 5.521178521455142, "learning_rate": 7.179769217020181e-06, "loss": 0.7211, "step": 13120 }, { "epoch": 0.38, "grad_norm": 7.121672003657958, "learning_rate": 7.179351839382966e-06, "loss": 0.807, "step": 13121 }, { "epoch": 0.38, "grad_norm": 9.801312380128682, "learning_rate": 7.178934442997056e-06, "loss": 0.8643, "step": 13122 }, { "epoch": 0.38, "grad_norm": 9.6305668573124, "learning_rate": 7.178517027866041e-06, "loss": 0.537, "step": 13123 }, { "epoch": 0.38, "grad_norm": 4.306374276503643, "learning_rate": 7.178099593993514e-06, "loss": 0.5654, "step": 13124 }, { "epoch": 0.38, "grad_norm": 3.857900731749104, "learning_rate": 7.177682141383062e-06, "loss": 0.1319, "step": 13125 }, { "epoch": 0.38, "grad_norm": 8.177848427002028, "learning_rate": 7.17726467003828e-06, "loss": 0.4387, "step": 13126 }, { "epoch": 0.38, "grad_norm": 6.172061849735405, "learning_rate": 7.176847179962757e-06, "loss": 0.5603, "step": 13127 }, { "epoch": 0.38, "grad_norm": 4.244132423305179, "learning_rate": 7.176429671160085e-06, "loss": 0.443, "step": 13128 }, { "epoch": 0.38, "grad_norm": 8.496253085341404, "learning_rate": 7.176012143633858e-06, "loss": 0.6391, "step": 13129 }, { "epoch": 0.38, "grad_norm": 6.0789802949752, "learning_rate": 7.175594597387668e-06, "loss": 0.4421, "step": 13130 }, { "epoch": 0.38, "grad_norm": 8.406968323601735, "learning_rate": 7.175177032425102e-06, "loss": 0.5969, "step": 13131 }, { "epoch": 0.38, "grad_norm": 6.856658745320513, "learning_rate": 7.174759448749759e-06, "loss": 0.7344, "step": 13132 }, { "epoch": 0.38, "grad_norm": 6.640514238219202, "learning_rate": 7.174341846365226e-06, "loss": 0.263, "step": 13133 }, { "epoch": 0.38, "grad_norm": 6.591581883068085, "learning_rate": 7.1739242252750995e-06, "loss": 0.5595, "step": 13134 }, { "epoch": 0.38, "grad_norm": 3.700169571006479, "learning_rate": 7.173506585482968e-06, "loss": 0.2459, "step": 13135 }, { "epoch": 0.38, "grad_norm": 10.822195656300167, "learning_rate": 7.1730889269924285e-06, "loss": 0.3995, "step": 13136 }, { "epoch": 0.38, "grad_norm": 4.168936533951067, "learning_rate": 7.1726712498070735e-06, "loss": 0.6073, "step": 13137 }, { "epoch": 0.38, "grad_norm": 5.651285511387067, "learning_rate": 7.172253553930495e-06, "loss": 0.7784, "step": 13138 }, { "epoch": 0.38, "grad_norm": 4.420003002735344, "learning_rate": 7.171835839366286e-06, "loss": 0.402, "step": 13139 }, { "epoch": 0.38, "grad_norm": 8.834457637803828, "learning_rate": 7.171418106118041e-06, "loss": 0.5579, "step": 13140 }, { "epoch": 0.38, "grad_norm": 3.8263679364587118, "learning_rate": 7.171000354189352e-06, "loss": 0.2925, "step": 13141 }, { "epoch": 0.38, "grad_norm": 7.081632507174796, "learning_rate": 7.170582583583816e-06, "loss": 0.875, "step": 13142 }, { "epoch": 0.38, "grad_norm": 4.806884337830193, "learning_rate": 7.170164794305024e-06, "loss": 0.291, "step": 13143 }, { "epoch": 0.38, "grad_norm": 6.965299564664301, "learning_rate": 7.169746986356571e-06, "loss": 0.8368, "step": 13144 }, { "epoch": 0.38, "grad_norm": 6.396438454033638, "learning_rate": 7.169329159742053e-06, "loss": 0.6702, "step": 13145 }, { "epoch": 0.38, "grad_norm": 3.1860217425184585, "learning_rate": 7.168911314465064e-06, "loss": 0.4807, "step": 13146 }, { "epoch": 0.38, "grad_norm": 9.741978597963241, "learning_rate": 7.168493450529198e-06, "loss": 0.8887, "step": 13147 }, { "epoch": 0.38, "grad_norm": 4.487157722532147, "learning_rate": 7.168075567938048e-06, "loss": 0.4168, "step": 13148 }, { "epoch": 0.38, "grad_norm": 8.21994932293643, "learning_rate": 7.167657666695213e-06, "loss": 0.7867, "step": 13149 }, { "epoch": 0.38, "grad_norm": 6.143440513538304, "learning_rate": 7.167239746804285e-06, "loss": 0.7112, "step": 13150 }, { "epoch": 0.38, "grad_norm": 6.954017839006786, "learning_rate": 7.166821808268859e-06, "loss": 0.5063, "step": 13151 }, { "epoch": 0.38, "grad_norm": 4.149778820945956, "learning_rate": 7.166403851092533e-06, "loss": 0.435, "step": 13152 }, { "epoch": 0.38, "grad_norm": 5.950884820558409, "learning_rate": 7.1659858752789e-06, "loss": 0.463, "step": 13153 }, { "epoch": 0.38, "grad_norm": 9.986579186073763, "learning_rate": 7.16556788083156e-06, "loss": 0.6739, "step": 13154 }, { "epoch": 0.38, "grad_norm": 14.285563095178277, "learning_rate": 7.165149867754104e-06, "loss": 0.6741, "step": 13155 }, { "epoch": 0.38, "grad_norm": 5.492499525775985, "learning_rate": 7.164731836050132e-06, "loss": 0.6009, "step": 13156 }, { "epoch": 0.38, "grad_norm": 6.215358863836779, "learning_rate": 7.164313785723238e-06, "loss": 0.2928, "step": 13157 }, { "epoch": 0.38, "grad_norm": 3.305541476029153, "learning_rate": 7.163895716777018e-06, "loss": 0.4523, "step": 13158 }, { "epoch": 0.38, "grad_norm": 7.085730821987383, "learning_rate": 7.163477629215069e-06, "loss": 0.6983, "step": 13159 }, { "epoch": 0.38, "grad_norm": 4.12613644984216, "learning_rate": 7.16305952304099e-06, "loss": 0.5876, "step": 13160 }, { "epoch": 0.38, "grad_norm": 4.326209956361033, "learning_rate": 7.162641398258375e-06, "loss": 0.6514, "step": 13161 }, { "epoch": 0.38, "grad_norm": 8.110287124405362, "learning_rate": 7.162223254870824e-06, "loss": 0.2673, "step": 13162 }, { "epoch": 0.38, "grad_norm": 2.9631333273734612, "learning_rate": 7.161805092881931e-06, "loss": 0.1806, "step": 13163 }, { "epoch": 0.38, "grad_norm": 5.629796759153694, "learning_rate": 7.1613869122952966e-06, "loss": 0.4285, "step": 13164 }, { "epoch": 0.38, "grad_norm": 7.509473031523693, "learning_rate": 7.160968713114515e-06, "loss": 0.5098, "step": 13165 }, { "epoch": 0.38, "grad_norm": 6.036160737920256, "learning_rate": 7.160550495343186e-06, "loss": 0.5921, "step": 13166 }, { "epoch": 0.38, "grad_norm": 4.719306837157246, "learning_rate": 7.160132258984907e-06, "loss": 0.2785, "step": 13167 }, { "epoch": 0.38, "grad_norm": 6.157597660555599, "learning_rate": 7.159714004043276e-06, "loss": 0.6803, "step": 13168 }, { "epoch": 0.38, "grad_norm": 5.789080250412965, "learning_rate": 7.1592957305218915e-06, "loss": 0.5138, "step": 13169 }, { "epoch": 0.38, "grad_norm": 7.919171295159311, "learning_rate": 7.158877438424353e-06, "loss": 0.7187, "step": 13170 }, { "epoch": 0.38, "grad_norm": 7.507134445645119, "learning_rate": 7.1584591277542555e-06, "loss": 0.5872, "step": 13171 }, { "epoch": 0.38, "grad_norm": 10.36663278735677, "learning_rate": 7.158040798515203e-06, "loss": 0.707, "step": 13172 }, { "epoch": 0.38, "grad_norm": 3.7242188511490935, "learning_rate": 7.157622450710788e-06, "loss": 0.3484, "step": 13173 }, { "epoch": 0.38, "grad_norm": 5.313522240516535, "learning_rate": 7.157204084344614e-06, "loss": 0.3827, "step": 13174 }, { "epoch": 0.38, "grad_norm": 4.331141993729672, "learning_rate": 7.156785699420279e-06, "loss": 0.2715, "step": 13175 }, { "epoch": 0.38, "grad_norm": 6.14225269905058, "learning_rate": 7.156367295941381e-06, "loss": 0.5452, "step": 13176 }, { "epoch": 0.38, "grad_norm": 4.51191819315753, "learning_rate": 7.155948873911523e-06, "loss": 0.2888, "step": 13177 }, { "epoch": 0.38, "grad_norm": 4.327351580211733, "learning_rate": 7.1555304333343e-06, "loss": 0.4458, "step": 13178 }, { "epoch": 0.38, "grad_norm": 7.971162199051584, "learning_rate": 7.155111974213316e-06, "loss": 0.4682, "step": 13179 }, { "epoch": 0.38, "grad_norm": 5.831752599254041, "learning_rate": 7.154693496552168e-06, "loss": 0.5154, "step": 13180 }, { "epoch": 0.38, "grad_norm": 9.010492936768888, "learning_rate": 7.154275000354457e-06, "loss": 0.4351, "step": 13181 }, { "epoch": 0.38, "grad_norm": 3.612615493063404, "learning_rate": 7.153856485623784e-06, "loss": 0.6105, "step": 13182 }, { "epoch": 0.38, "grad_norm": 15.03537482689749, "learning_rate": 7.153437952363748e-06, "loss": 0.6578, "step": 13183 }, { "epoch": 0.38, "grad_norm": 1.8468515607634193, "learning_rate": 7.153019400577951e-06, "loss": 0.0828, "step": 13184 }, { "epoch": 0.38, "grad_norm": 5.850878687019351, "learning_rate": 7.152600830269992e-06, "loss": 0.403, "step": 13185 }, { "epoch": 0.38, "grad_norm": 3.3771165109958283, "learning_rate": 7.152182241443475e-06, "loss": 0.1576, "step": 13186 }, { "epoch": 0.38, "grad_norm": 10.048371246598757, "learning_rate": 7.151763634101998e-06, "loss": 0.4201, "step": 13187 }, { "epoch": 0.38, "grad_norm": 8.743236652653675, "learning_rate": 7.151345008249164e-06, "loss": 0.7828, "step": 13188 }, { "epoch": 0.38, "grad_norm": 4.657068551843183, "learning_rate": 7.150926363888573e-06, "loss": 0.5132, "step": 13189 }, { "epoch": 0.38, "grad_norm": 2.776455486341278, "learning_rate": 7.150507701023828e-06, "loss": 0.1865, "step": 13190 }, { "epoch": 0.38, "grad_norm": 4.63310665335394, "learning_rate": 7.1500890196585305e-06, "loss": 0.3692, "step": 13191 }, { "epoch": 0.38, "grad_norm": 6.348047013212647, "learning_rate": 7.14967031979628e-06, "loss": 1.1756, "step": 13192 }, { "epoch": 0.38, "grad_norm": 4.126403396468523, "learning_rate": 7.149251601440682e-06, "loss": 0.4268, "step": 13193 }, { "epoch": 0.38, "grad_norm": 5.643563474696828, "learning_rate": 7.148832864595338e-06, "loss": 0.3388, "step": 13194 }, { "epoch": 0.38, "grad_norm": 5.19653342679153, "learning_rate": 7.148414109263847e-06, "loss": 0.563, "step": 13195 }, { "epoch": 0.38, "grad_norm": 1.8330102838205904, "learning_rate": 7.147995335449816e-06, "loss": 0.0968, "step": 13196 }, { "epoch": 0.38, "grad_norm": 9.333232331297099, "learning_rate": 7.147576543156844e-06, "loss": 0.6696, "step": 13197 }, { "epoch": 0.38, "grad_norm": 7.893676288710613, "learning_rate": 7.147157732388537e-06, "loss": 1.0446, "step": 13198 }, { "epoch": 0.38, "grad_norm": 5.81610709215868, "learning_rate": 7.146738903148496e-06, "loss": 0.2181, "step": 13199 }, { "epoch": 0.38, "grad_norm": 5.908066081830036, "learning_rate": 7.146320055440323e-06, "loss": 0.5958, "step": 13200 }, { "epoch": 0.38, "grad_norm": 5.175894797177251, "learning_rate": 7.145901189267623e-06, "loss": 0.4801, "step": 13201 }, { "epoch": 0.38, "grad_norm": 5.040340196541797, "learning_rate": 7.1454823046340006e-06, "loss": 0.5098, "step": 13202 }, { "epoch": 0.38, "grad_norm": 3.3155956738008285, "learning_rate": 7.1450634015430575e-06, "loss": 0.1385, "step": 13203 }, { "epoch": 0.38, "grad_norm": 6.7889042562604605, "learning_rate": 7.1446444799984e-06, "loss": 0.9389, "step": 13204 }, { "epoch": 0.38, "grad_norm": 6.932900218395791, "learning_rate": 7.144225540003628e-06, "loss": 0.3649, "step": 13205 }, { "epoch": 0.38, "grad_norm": 3.901499491753033, "learning_rate": 7.143806581562347e-06, "loss": 0.6665, "step": 13206 }, { "epoch": 0.38, "grad_norm": 6.181346877894292, "learning_rate": 7.143387604678162e-06, "loss": 0.7823, "step": 13207 }, { "epoch": 0.38, "grad_norm": 3.767211105165135, "learning_rate": 7.14296860935468e-06, "loss": 0.6079, "step": 13208 }, { "epoch": 0.38, "grad_norm": 4.489394857369869, "learning_rate": 7.142549595595501e-06, "loss": 0.4305, "step": 13209 }, { "epoch": 0.38, "grad_norm": 5.149819913511691, "learning_rate": 7.14213056340423e-06, "loss": 0.4152, "step": 13210 }, { "epoch": 0.38, "grad_norm": 7.586667875257417, "learning_rate": 7.141711512784477e-06, "loss": 0.8936, "step": 13211 }, { "epoch": 0.38, "grad_norm": 3.324085882918188, "learning_rate": 7.141292443739841e-06, "loss": 0.4422, "step": 13212 }, { "epoch": 0.38, "grad_norm": 8.631984315389502, "learning_rate": 7.1408733562739305e-06, "loss": 0.7544, "step": 13213 }, { "epoch": 0.38, "grad_norm": 9.879440082020261, "learning_rate": 7.14045425039035e-06, "loss": 0.8065, "step": 13214 }, { "epoch": 0.38, "grad_norm": 6.34517052441597, "learning_rate": 7.140035126092706e-06, "loss": 0.4472, "step": 13215 }, { "epoch": 0.38, "grad_norm": 9.806854519223698, "learning_rate": 7.139615983384602e-06, "loss": 0.7349, "step": 13216 }, { "epoch": 0.38, "grad_norm": 4.51714463056914, "learning_rate": 7.139196822269646e-06, "loss": 0.2596, "step": 13217 }, { "epoch": 0.38, "grad_norm": 8.762041743290741, "learning_rate": 7.138777642751443e-06, "loss": 0.6207, "step": 13218 }, { "epoch": 0.38, "grad_norm": 7.0932030824065455, "learning_rate": 7.138358444833599e-06, "loss": 0.321, "step": 13219 }, { "epoch": 0.38, "grad_norm": 5.373803249467099, "learning_rate": 7.13793922851972e-06, "loss": 0.724, "step": 13220 }, { "epoch": 0.38, "grad_norm": 10.825858227052203, "learning_rate": 7.137519993813416e-06, "loss": 0.5328, "step": 13221 }, { "epoch": 0.38, "grad_norm": 7.273561142670555, "learning_rate": 7.137100740718288e-06, "loss": 1.0012, "step": 13222 }, { "epoch": 0.38, "grad_norm": 9.055593002634561, "learning_rate": 7.136681469237945e-06, "loss": 0.5694, "step": 13223 }, { "epoch": 0.38, "grad_norm": 6.065752277876482, "learning_rate": 7.1362621793759944e-06, "loss": 0.6277, "step": 13224 }, { "epoch": 0.38, "grad_norm": 11.006136634671734, "learning_rate": 7.135842871136045e-06, "loss": 1.063, "step": 13225 }, { "epoch": 0.38, "grad_norm": 3.91806696486289, "learning_rate": 7.135423544521701e-06, "loss": 0.4749, "step": 13226 }, { "epoch": 0.38, "grad_norm": 5.068013709009864, "learning_rate": 7.1350041995365725e-06, "loss": 0.6682, "step": 13227 }, { "epoch": 0.38, "grad_norm": 4.199221588843518, "learning_rate": 7.134584836184265e-06, "loss": 0.4463, "step": 13228 }, { "epoch": 0.38, "grad_norm": 7.012510836179796, "learning_rate": 7.134165454468387e-06, "loss": 0.6397, "step": 13229 }, { "epoch": 0.38, "grad_norm": 5.612451798671712, "learning_rate": 7.133746054392545e-06, "loss": 0.3349, "step": 13230 }, { "epoch": 0.38, "grad_norm": 3.268895222955392, "learning_rate": 7.133326635960352e-06, "loss": 0.4597, "step": 13231 }, { "epoch": 0.38, "grad_norm": 8.412685044224602, "learning_rate": 7.13290719917541e-06, "loss": 0.2407, "step": 13232 }, { "epoch": 0.38, "grad_norm": 7.702921030930568, "learning_rate": 7.132487744041329e-06, "loss": 0.937, "step": 13233 }, { "epoch": 0.38, "grad_norm": 4.490277277279819, "learning_rate": 7.13206827056172e-06, "loss": 0.3319, "step": 13234 }, { "epoch": 0.38, "grad_norm": 7.602638976041439, "learning_rate": 7.13164877874019e-06, "loss": 0.8035, "step": 13235 }, { "epoch": 0.38, "grad_norm": 3.8049789881032092, "learning_rate": 7.1312292685803475e-06, "loss": 0.2462, "step": 13236 }, { "epoch": 0.38, "grad_norm": 5.474652924706378, "learning_rate": 7.1308097400858025e-06, "loss": 0.4334, "step": 13237 }, { "epoch": 0.38, "grad_norm": 4.435749944609813, "learning_rate": 7.1303901932601635e-06, "loss": 0.5445, "step": 13238 }, { "epoch": 0.38, "grad_norm": 7.620307103574186, "learning_rate": 7.129970628107041e-06, "loss": 0.5991, "step": 13239 }, { "epoch": 0.38, "grad_norm": 6.333942367315603, "learning_rate": 7.129551044630042e-06, "loss": 0.5097, "step": 13240 }, { "epoch": 0.38, "grad_norm": 8.12937187536729, "learning_rate": 7.1291314428327795e-06, "loss": 0.7754, "step": 13241 }, { "epoch": 0.38, "grad_norm": 5.008705095318805, "learning_rate": 7.128711822718859e-06, "loss": 0.4165, "step": 13242 }, { "epoch": 0.38, "grad_norm": 6.57448841614007, "learning_rate": 7.128292184291893e-06, "loss": 0.4153, "step": 13243 }, { "epoch": 0.38, "grad_norm": 3.284660202114721, "learning_rate": 7.127872527555493e-06, "loss": 0.3021, "step": 13244 }, { "epoch": 0.38, "grad_norm": 6.445912318207072, "learning_rate": 7.127452852513266e-06, "loss": 0.4505, "step": 13245 }, { "epoch": 0.38, "grad_norm": 5.011747721397288, "learning_rate": 7.127033159168825e-06, "loss": 0.4587, "step": 13246 }, { "epoch": 0.38, "grad_norm": 4.870600451321037, "learning_rate": 7.126613447525779e-06, "loss": 0.8197, "step": 13247 }, { "epoch": 0.38, "grad_norm": 7.841892098386668, "learning_rate": 7.1261937175877405e-06, "loss": 0.4588, "step": 13248 }, { "epoch": 0.38, "grad_norm": 2.990893270803039, "learning_rate": 7.125773969358318e-06, "loss": 0.2682, "step": 13249 }, { "epoch": 0.38, "grad_norm": 3.63799646142302, "learning_rate": 7.125354202841124e-06, "loss": 0.4445, "step": 13250 }, { "epoch": 0.38, "grad_norm": 3.263293442396274, "learning_rate": 7.12493441803977e-06, "loss": 0.2249, "step": 13251 }, { "epoch": 0.38, "grad_norm": 9.746109014892856, "learning_rate": 7.124514614957867e-06, "loss": 0.7294, "step": 13252 }, { "epoch": 0.38, "grad_norm": 6.861341468283354, "learning_rate": 7.124094793599025e-06, "loss": 0.6026, "step": 13253 }, { "epoch": 0.38, "grad_norm": 10.016351777605419, "learning_rate": 7.123674953966858e-06, "loss": 0.432, "step": 13254 }, { "epoch": 0.38, "grad_norm": 5.8294881499801745, "learning_rate": 7.123255096064978e-06, "loss": 0.5952, "step": 13255 }, { "epoch": 0.38, "grad_norm": 6.973387710287555, "learning_rate": 7.122835219896994e-06, "loss": 0.7534, "step": 13256 }, { "epoch": 0.38, "grad_norm": 3.841322496737424, "learning_rate": 7.1224153254665205e-06, "loss": 0.396, "step": 13257 }, { "epoch": 0.38, "grad_norm": 4.5865521254671995, "learning_rate": 7.121995412777169e-06, "loss": 0.3795, "step": 13258 }, { "epoch": 0.38, "grad_norm": 5.071009748154231, "learning_rate": 7.121575481832552e-06, "loss": 0.4206, "step": 13259 }, { "epoch": 0.38, "grad_norm": 6.391906908171419, "learning_rate": 7.121155532636283e-06, "loss": 0.5317, "step": 13260 }, { "epoch": 0.38, "grad_norm": 7.414393587427573, "learning_rate": 7.120735565191975e-06, "loss": 0.4542, "step": 13261 }, { "epoch": 0.38, "grad_norm": 5.451089736062681, "learning_rate": 7.1203155795032385e-06, "loss": 1.0053, "step": 13262 }, { "epoch": 0.38, "grad_norm": 6.2773629682587355, "learning_rate": 7.119895575573689e-06, "loss": 0.4621, "step": 13263 }, { "epoch": 0.38, "grad_norm": 8.127959314026715, "learning_rate": 7.119475553406938e-06, "loss": 0.3715, "step": 13264 }, { "epoch": 0.38, "grad_norm": 7.731104211195923, "learning_rate": 7.119055513006602e-06, "loss": 0.6463, "step": 13265 }, { "epoch": 0.38, "grad_norm": 2.9320574114084925, "learning_rate": 7.118635454376289e-06, "loss": 0.2078, "step": 13266 }, { "epoch": 0.38, "grad_norm": 3.326433029271284, "learning_rate": 7.1182153775196175e-06, "loss": 0.2841, "step": 13267 }, { "epoch": 0.38, "grad_norm": 4.748793047102108, "learning_rate": 7.117795282440201e-06, "loss": 0.3735, "step": 13268 }, { "epoch": 0.38, "grad_norm": 10.304378525134139, "learning_rate": 7.117375169141652e-06, "loss": 0.9627, "step": 13269 }, { "epoch": 0.38, "grad_norm": 4.69854603405126, "learning_rate": 7.1169550376275846e-06, "loss": 0.2285, "step": 13270 }, { "epoch": 0.38, "grad_norm": 9.378650920781308, "learning_rate": 7.116534887901614e-06, "loss": 0.6738, "step": 13271 }, { "epoch": 0.38, "grad_norm": 2.836457154975018, "learning_rate": 7.116114719967354e-06, "loss": 0.356, "step": 13272 }, { "epoch": 0.38, "grad_norm": 8.396140049584616, "learning_rate": 7.115694533828419e-06, "loss": 0.4271, "step": 13273 }, { "epoch": 0.38, "grad_norm": 4.969507987308733, "learning_rate": 7.115274329488425e-06, "loss": 0.4211, "step": 13274 }, { "epoch": 0.38, "grad_norm": 5.773143735026989, "learning_rate": 7.114854106950987e-06, "loss": 0.5156, "step": 13275 }, { "epoch": 0.38, "grad_norm": 4.550877752631571, "learning_rate": 7.1144338662197185e-06, "loss": 0.3539, "step": 13276 }, { "epoch": 0.38, "grad_norm": 3.4808031600096134, "learning_rate": 7.114013607298236e-06, "loss": 0.2448, "step": 13277 }, { "epoch": 0.38, "grad_norm": 6.387654479187442, "learning_rate": 7.1135933301901565e-06, "loss": 0.4566, "step": 13278 }, { "epoch": 0.38, "grad_norm": 7.30930271512512, "learning_rate": 7.113173034899091e-06, "loss": 0.415, "step": 13279 }, { "epoch": 0.38, "grad_norm": 7.533163183817016, "learning_rate": 7.1127527214286595e-06, "loss": 0.5773, "step": 13280 }, { "epoch": 0.38, "grad_norm": 3.7247379082764414, "learning_rate": 7.112332389782477e-06, "loss": 0.3871, "step": 13281 }, { "epoch": 0.38, "grad_norm": 8.343748514125279, "learning_rate": 7.111912039964159e-06, "loss": 0.4998, "step": 13282 }, { "epoch": 0.38, "grad_norm": 2.624624895361346, "learning_rate": 7.11149167197732e-06, "loss": 0.162, "step": 13283 }, { "epoch": 0.38, "grad_norm": 8.295711514779065, "learning_rate": 7.11107128582558e-06, "loss": 0.4159, "step": 13284 }, { "epoch": 0.38, "grad_norm": 5.911351498438063, "learning_rate": 7.110650881512553e-06, "loss": 0.6403, "step": 13285 }, { "epoch": 0.38, "grad_norm": 7.753868398886282, "learning_rate": 7.110230459041856e-06, "loss": 0.6952, "step": 13286 }, { "epoch": 0.38, "grad_norm": 10.787310591324472, "learning_rate": 7.109810018417106e-06, "loss": 0.7449, "step": 13287 }, { "epoch": 0.38, "grad_norm": 8.521292328227874, "learning_rate": 7.109389559641921e-06, "loss": 0.6319, "step": 13288 }, { "epoch": 0.38, "grad_norm": 9.635135664407919, "learning_rate": 7.1089690827199165e-06, "loss": 0.5118, "step": 13289 }, { "epoch": 0.38, "grad_norm": 8.163196514141456, "learning_rate": 7.1085485876547115e-06, "loss": 0.4374, "step": 13290 }, { "epoch": 0.38, "grad_norm": 3.0062770739186813, "learning_rate": 7.108128074449922e-06, "loss": 0.3268, "step": 13291 }, { "epoch": 0.38, "grad_norm": 11.283470421375732, "learning_rate": 7.107707543109166e-06, "loss": 0.4634, "step": 13292 }, { "epoch": 0.38, "grad_norm": 4.387761495682413, "learning_rate": 7.107286993636062e-06, "loss": 0.5093, "step": 13293 }, { "epoch": 0.38, "grad_norm": 4.1313733946685245, "learning_rate": 7.1068664260342265e-06, "loss": 0.5735, "step": 13294 }, { "epoch": 0.38, "grad_norm": 4.451363626779707, "learning_rate": 7.106445840307281e-06, "loss": 0.2332, "step": 13295 }, { "epoch": 0.38, "grad_norm": 8.169866428100457, "learning_rate": 7.106025236458839e-06, "loss": 0.5386, "step": 13296 }, { "epoch": 0.38, "grad_norm": 5.247054000082241, "learning_rate": 7.105604614492522e-06, "loss": 0.4472, "step": 13297 }, { "epoch": 0.38, "grad_norm": 6.991950311313989, "learning_rate": 7.105183974411949e-06, "loss": 0.5818, "step": 13298 }, { "epoch": 0.38, "grad_norm": 9.62455312508929, "learning_rate": 7.104763316220734e-06, "loss": 0.456, "step": 13299 }, { "epoch": 0.38, "grad_norm": 8.291049535747145, "learning_rate": 7.104342639922501e-06, "loss": 0.3557, "step": 13300 }, { "epoch": 0.38, "grad_norm": 7.331580414932052, "learning_rate": 7.103921945520869e-06, "loss": 0.4808, "step": 13301 }, { "epoch": 0.38, "grad_norm": 7.415261046122457, "learning_rate": 7.103501233019454e-06, "loss": 0.647, "step": 13302 }, { "epoch": 0.38, "grad_norm": 10.926845920093987, "learning_rate": 7.103080502421877e-06, "loss": 0.6012, "step": 13303 }, { "epoch": 0.38, "grad_norm": 4.771523899034906, "learning_rate": 7.1026597537317575e-06, "loss": 0.5059, "step": 13304 }, { "epoch": 0.38, "grad_norm": 3.8201984786353984, "learning_rate": 7.1022389869527165e-06, "loss": 0.356, "step": 13305 }, { "epoch": 0.38, "grad_norm": 2.6108589292948383, "learning_rate": 7.101818202088371e-06, "loss": 0.4612, "step": 13306 }, { "epoch": 0.38, "grad_norm": 8.928619935040183, "learning_rate": 7.10139739914234e-06, "loss": 0.9266, "step": 13307 }, { "epoch": 0.38, "grad_norm": 6.687546988348905, "learning_rate": 7.100976578118249e-06, "loss": 0.4215, "step": 13308 }, { "epoch": 0.38, "grad_norm": 5.843535965044458, "learning_rate": 7.100555739019715e-06, "loss": 0.6299, "step": 13309 }, { "epoch": 0.38, "grad_norm": 6.541199912490523, "learning_rate": 7.100134881850358e-06, "loss": 0.8941, "step": 13310 }, { "epoch": 0.38, "grad_norm": 4.698815230931343, "learning_rate": 7.099714006613798e-06, "loss": 0.5039, "step": 13311 }, { "epoch": 0.38, "grad_norm": 4.388841857470378, "learning_rate": 7.099293113313659e-06, "loss": 0.2616, "step": 13312 }, { "epoch": 0.38, "grad_norm": 3.521920294076913, "learning_rate": 7.098872201953559e-06, "loss": 0.494, "step": 13313 }, { "epoch": 0.38, "grad_norm": 9.622566064026538, "learning_rate": 7.098451272537119e-06, "loss": 0.4896, "step": 13314 }, { "epoch": 0.38, "grad_norm": 6.143331770763036, "learning_rate": 7.098030325067964e-06, "loss": 0.681, "step": 13315 }, { "epoch": 0.38, "grad_norm": 8.324820773741438, "learning_rate": 7.0976093595497085e-06, "loss": 0.637, "step": 13316 }, { "epoch": 0.38, "grad_norm": 8.90386823964204, "learning_rate": 7.09718837598598e-06, "loss": 0.4593, "step": 13317 }, { "epoch": 0.38, "grad_norm": 13.732781744191296, "learning_rate": 7.096767374380397e-06, "loss": 0.7007, "step": 13318 }, { "epoch": 0.38, "grad_norm": 4.3433116719127085, "learning_rate": 7.0963463547365845e-06, "loss": 0.3022, "step": 13319 }, { "epoch": 0.38, "grad_norm": 6.034429275529374, "learning_rate": 7.095925317058161e-06, "loss": 0.5725, "step": 13320 }, { "epoch": 0.38, "grad_norm": 4.837239178075213, "learning_rate": 7.095504261348749e-06, "loss": 0.5225, "step": 13321 }, { "epoch": 0.38, "grad_norm": 5.084798377661929, "learning_rate": 7.095083187611974e-06, "loss": 0.6569, "step": 13322 }, { "epoch": 0.38, "grad_norm": 9.353346218726521, "learning_rate": 7.094662095851456e-06, "loss": 0.5789, "step": 13323 }, { "epoch": 0.38, "grad_norm": 6.15191164467193, "learning_rate": 7.094240986070816e-06, "loss": 0.6274, "step": 13324 }, { "epoch": 0.38, "grad_norm": 9.346972672064695, "learning_rate": 7.093819858273681e-06, "loss": 0.3752, "step": 13325 }, { "epoch": 0.38, "grad_norm": 2.5659437067572135, "learning_rate": 7.093398712463669e-06, "loss": 0.2092, "step": 13326 }, { "epoch": 0.38, "grad_norm": 6.108079192359535, "learning_rate": 7.092977548644407e-06, "loss": 0.6216, "step": 13327 }, { "epoch": 0.38, "grad_norm": 4.9045068353322785, "learning_rate": 7.092556366819519e-06, "loss": 0.3559, "step": 13328 }, { "epoch": 0.38, "grad_norm": 3.6758776012606935, "learning_rate": 7.092135166992623e-06, "loss": 0.3552, "step": 13329 }, { "epoch": 0.38, "grad_norm": 9.148775662916382, "learning_rate": 7.091713949167347e-06, "loss": 0.7468, "step": 13330 }, { "epoch": 0.38, "grad_norm": 10.170428663989023, "learning_rate": 7.091292713347313e-06, "loss": 0.3762, "step": 13331 }, { "epoch": 0.38, "grad_norm": 9.427460186979703, "learning_rate": 7.090871459536146e-06, "loss": 1.0573, "step": 13332 }, { "epoch": 0.38, "grad_norm": 6.148730337689644, "learning_rate": 7.0904501877374675e-06, "loss": 0.6404, "step": 13333 }, { "epoch": 0.38, "grad_norm": 5.987021077858233, "learning_rate": 7.090028897954906e-06, "loss": 0.4959, "step": 13334 }, { "epoch": 0.38, "grad_norm": 3.756810996615125, "learning_rate": 7.089607590192082e-06, "loss": 0.3371, "step": 13335 }, { "epoch": 0.38, "grad_norm": 4.537272325168557, "learning_rate": 7.089186264452621e-06, "loss": 0.1751, "step": 13336 }, { "epoch": 0.38, "grad_norm": 4.468384681121108, "learning_rate": 7.088764920740148e-06, "loss": 0.3793, "step": 13337 }, { "epoch": 0.38, "grad_norm": 3.1667413368121657, "learning_rate": 7.088343559058287e-06, "loss": 0.282, "step": 13338 }, { "epoch": 0.38, "grad_norm": 6.345777817013866, "learning_rate": 7.087922179410665e-06, "loss": 0.5325, "step": 13339 }, { "epoch": 0.38, "grad_norm": 7.731584172593713, "learning_rate": 7.087500781800903e-06, "loss": 0.5256, "step": 13340 }, { "epoch": 0.38, "grad_norm": 5.151997378884709, "learning_rate": 7.087079366232631e-06, "loss": 0.3705, "step": 13341 }, { "epoch": 0.38, "grad_norm": 2.1196560984257724, "learning_rate": 7.086657932709472e-06, "loss": 0.211, "step": 13342 }, { "epoch": 0.38, "grad_norm": 5.009862590245513, "learning_rate": 7.086236481235051e-06, "loss": 0.3561, "step": 13343 }, { "epoch": 0.38, "grad_norm": 8.424459397038763, "learning_rate": 7.085815011812995e-06, "loss": 0.7368, "step": 13344 }, { "epoch": 0.38, "grad_norm": 3.819582972856146, "learning_rate": 7.08539352444693e-06, "loss": 0.1808, "step": 13345 }, { "epoch": 0.38, "grad_norm": 7.620818508071898, "learning_rate": 7.084972019140479e-06, "loss": 0.7399, "step": 13346 }, { "epoch": 0.38, "grad_norm": 6.41310434003327, "learning_rate": 7.084550495897273e-06, "loss": 0.2017, "step": 13347 }, { "epoch": 0.38, "grad_norm": 6.768587587051229, "learning_rate": 7.084128954720934e-06, "loss": 0.5632, "step": 13348 }, { "epoch": 0.38, "grad_norm": 8.63308066387442, "learning_rate": 7.083707395615089e-06, "loss": 0.8095, "step": 13349 }, { "epoch": 0.38, "grad_norm": 4.962703720830904, "learning_rate": 7.083285818583367e-06, "loss": 0.4687, "step": 13350 }, { "epoch": 0.38, "grad_norm": 3.9724239255175995, "learning_rate": 7.082864223629395e-06, "loss": 0.1483, "step": 13351 }, { "epoch": 0.38, "grad_norm": 7.086772427898628, "learning_rate": 7.082442610756798e-06, "loss": 0.4829, "step": 13352 }, { "epoch": 0.38, "grad_norm": 8.224406279495652, "learning_rate": 7.082020979969203e-06, "loss": 0.598, "step": 13353 }, { "epoch": 0.38, "grad_norm": 7.193284336002172, "learning_rate": 7.081599331270238e-06, "loss": 0.6378, "step": 13354 }, { "epoch": 0.38, "grad_norm": 9.521880951276422, "learning_rate": 7.0811776646635295e-06, "loss": 0.7095, "step": 13355 }, { "epoch": 0.38, "grad_norm": 2.25433513454875, "learning_rate": 7.080755980152708e-06, "loss": 0.2673, "step": 13356 }, { "epoch": 0.38, "grad_norm": 7.214450678834517, "learning_rate": 7.0803342777413964e-06, "loss": 0.4577, "step": 13357 }, { "epoch": 0.38, "grad_norm": 9.82640062982212, "learning_rate": 7.079912557433227e-06, "loss": 0.5991, "step": 13358 }, { "epoch": 0.38, "grad_norm": 4.260195340079176, "learning_rate": 7.079490819231825e-06, "loss": 0.5483, "step": 13359 }, { "epoch": 0.38, "grad_norm": 2.841626537763068, "learning_rate": 7.07906906314082e-06, "loss": 0.3065, "step": 13360 }, { "epoch": 0.38, "grad_norm": 3.928533911835515, "learning_rate": 7.078647289163839e-06, "loss": 0.3761, "step": 13361 }, { "epoch": 0.38, "grad_norm": 8.098390640691413, "learning_rate": 7.0782254973045125e-06, "loss": 0.6893, "step": 13362 }, { "epoch": 0.38, "grad_norm": 8.072559852508165, "learning_rate": 7.077803687566468e-06, "loss": 0.8085, "step": 13363 }, { "epoch": 0.38, "grad_norm": 6.540385742477303, "learning_rate": 7.077381859953333e-06, "loss": 0.8228, "step": 13364 }, { "epoch": 0.38, "grad_norm": 5.25495642621298, "learning_rate": 7.076960014468738e-06, "loss": 0.4834, "step": 13365 }, { "epoch": 0.38, "grad_norm": 2.2881286200312654, "learning_rate": 7.076538151116312e-06, "loss": 0.2916, "step": 13366 }, { "epoch": 0.38, "grad_norm": 7.263694982653484, "learning_rate": 7.076116269899683e-06, "loss": 0.3747, "step": 13367 }, { "epoch": 0.38, "grad_norm": 8.587326256828195, "learning_rate": 7.075694370822482e-06, "loss": 0.7118, "step": 13368 }, { "epoch": 0.38, "grad_norm": 5.748473794153249, "learning_rate": 7.07527245388834e-06, "loss": 0.2846, "step": 13369 }, { "epoch": 0.38, "grad_norm": 5.526549524190099, "learning_rate": 7.074850519100882e-06, "loss": 0.3257, "step": 13370 }, { "epoch": 0.38, "grad_norm": 2.91660241101648, "learning_rate": 7.0744285664637404e-06, "loss": 0.387, "step": 13371 }, { "epoch": 0.38, "grad_norm": 9.410619588919918, "learning_rate": 7.074006595980547e-06, "loss": 0.5867, "step": 13372 }, { "epoch": 0.38, "grad_norm": 2.641138444450232, "learning_rate": 7.0735846076549285e-06, "loss": 0.4418, "step": 13373 }, { "epoch": 0.38, "grad_norm": 7.563574817036514, "learning_rate": 7.073162601490517e-06, "loss": 0.6953, "step": 13374 }, { "epoch": 0.38, "grad_norm": 6.5001955002948355, "learning_rate": 7.072740577490944e-06, "loss": 0.7533, "step": 13375 }, { "epoch": 0.38, "grad_norm": 6.742319399713748, "learning_rate": 7.072318535659839e-06, "loss": 0.4592, "step": 13376 }, { "epoch": 0.38, "grad_norm": 7.842304598094986, "learning_rate": 7.071896476000832e-06, "loss": 0.4343, "step": 13377 }, { "epoch": 0.38, "grad_norm": 7.083784034826297, "learning_rate": 7.0714743985175546e-06, "loss": 0.4428, "step": 13378 }, { "epoch": 0.38, "grad_norm": 4.682617085668664, "learning_rate": 7.071052303213639e-06, "loss": 0.3736, "step": 13379 }, { "epoch": 0.38, "grad_norm": 7.039145889381023, "learning_rate": 7.0706301900927155e-06, "loss": 0.8504, "step": 13380 }, { "epoch": 0.38, "grad_norm": 6.19455078368247, "learning_rate": 7.070208059158413e-06, "loss": 0.5895, "step": 13381 }, { "epoch": 0.38, "grad_norm": 5.404425053925526, "learning_rate": 7.069785910414368e-06, "loss": 0.4935, "step": 13382 }, { "epoch": 0.38, "grad_norm": 4.2606706458063055, "learning_rate": 7.069363743864209e-06, "loss": 0.3221, "step": 13383 }, { "epoch": 0.38, "grad_norm": 9.838718763419866, "learning_rate": 7.068941559511569e-06, "loss": 0.6001, "step": 13384 }, { "epoch": 0.38, "grad_norm": 7.338682495960687, "learning_rate": 7.068519357360077e-06, "loss": 0.2592, "step": 13385 }, { "epoch": 0.38, "grad_norm": 11.88212560390111, "learning_rate": 7.068097137413371e-06, "loss": 0.6152, "step": 13386 }, { "epoch": 0.38, "grad_norm": 6.515021902297847, "learning_rate": 7.067674899675079e-06, "loss": 0.4422, "step": 13387 }, { "epoch": 0.38, "grad_norm": 3.3486056678541734, "learning_rate": 7.067252644148833e-06, "loss": 0.2805, "step": 13388 }, { "epoch": 0.38, "grad_norm": 8.523251595292306, "learning_rate": 7.066830370838269e-06, "loss": 0.5649, "step": 13389 }, { "epoch": 0.38, "grad_norm": 4.107657098859657, "learning_rate": 7.066408079747016e-06, "loss": 0.661, "step": 13390 }, { "epoch": 0.38, "grad_norm": 6.810095476441897, "learning_rate": 7.065985770878709e-06, "loss": 0.5887, "step": 13391 }, { "epoch": 0.38, "grad_norm": 5.07775109895273, "learning_rate": 7.0655634442369826e-06, "loss": 0.1519, "step": 13392 }, { "epoch": 0.38, "grad_norm": 6.046720833008287, "learning_rate": 7.065141099825466e-06, "loss": 0.3194, "step": 13393 }, { "epoch": 0.38, "grad_norm": 21.507726301248443, "learning_rate": 7.0647187376477955e-06, "loss": 0.7953, "step": 13394 }, { "epoch": 0.38, "grad_norm": 5.275836766121907, "learning_rate": 7.064296357707604e-06, "loss": 0.7791, "step": 13395 }, { "epoch": 0.38, "grad_norm": 4.010333957892647, "learning_rate": 7.063873960008526e-06, "loss": 0.2119, "step": 13396 }, { "epoch": 0.38, "grad_norm": 4.10280458080984, "learning_rate": 7.063451544554194e-06, "loss": 0.3105, "step": 13397 }, { "epoch": 0.38, "grad_norm": 7.838612488757265, "learning_rate": 7.063029111348241e-06, "loss": 0.7383, "step": 13398 }, { "epoch": 0.38, "grad_norm": 8.219627304538369, "learning_rate": 7.062606660394303e-06, "loss": 0.4758, "step": 13399 }, { "epoch": 0.38, "grad_norm": 6.733919769973861, "learning_rate": 7.062184191696015e-06, "loss": 0.5316, "step": 13400 }, { "epoch": 0.38, "grad_norm": 4.756414348955929, "learning_rate": 7.0617617052570095e-06, "loss": 0.5735, "step": 13401 }, { "epoch": 0.38, "grad_norm": 3.8310724394632762, "learning_rate": 7.061339201080924e-06, "loss": 0.4421, "step": 13402 }, { "epoch": 0.38, "grad_norm": 7.814992217232703, "learning_rate": 7.060916679171388e-06, "loss": 0.5881, "step": 13403 }, { "epoch": 0.38, "grad_norm": 5.150144163910134, "learning_rate": 7.0604941395320405e-06, "loss": 0.6425, "step": 13404 }, { "epoch": 0.38, "grad_norm": 8.270273037500463, "learning_rate": 7.060071582166516e-06, "loss": 0.9964, "step": 13405 }, { "epoch": 0.38, "grad_norm": 4.487087054366058, "learning_rate": 7.0596490070784485e-06, "loss": 0.588, "step": 13406 }, { "epoch": 0.38, "grad_norm": 3.6419519944805714, "learning_rate": 7.059226414271475e-06, "loss": 0.5453, "step": 13407 }, { "epoch": 0.38, "grad_norm": 7.9386105699147205, "learning_rate": 7.05880380374923e-06, "loss": 0.5988, "step": 13408 }, { "epoch": 0.38, "grad_norm": 4.414392779661622, "learning_rate": 7.058381175515351e-06, "loss": 0.3078, "step": 13409 }, { "epoch": 0.38, "grad_norm": 4.871834216090907, "learning_rate": 7.05795852957347e-06, "loss": 0.3508, "step": 13410 }, { "epoch": 0.38, "grad_norm": 7.955977492299183, "learning_rate": 7.0575358659272255e-06, "loss": 0.7387, "step": 13411 }, { "epoch": 0.38, "grad_norm": 5.1018000455409265, "learning_rate": 7.057113184580253e-06, "loss": 0.4897, "step": 13412 }, { "epoch": 0.38, "grad_norm": 9.6547382245365, "learning_rate": 7.05669048553619e-06, "loss": 0.5035, "step": 13413 }, { "epoch": 0.38, "grad_norm": 7.314431774887482, "learning_rate": 7.056267768798671e-06, "loss": 0.6635, "step": 13414 }, { "epoch": 0.38, "grad_norm": 6.356087530829772, "learning_rate": 7.055845034371335e-06, "loss": 0.4652, "step": 13415 }, { "epoch": 0.38, "grad_norm": 4.980336912317566, "learning_rate": 7.055422282257817e-06, "loss": 0.6755, "step": 13416 }, { "epoch": 0.38, "grad_norm": 7.518527004014092, "learning_rate": 7.054999512461753e-06, "loss": 0.5236, "step": 13417 }, { "epoch": 0.38, "grad_norm": 4.396255927842487, "learning_rate": 7.054576724986782e-06, "loss": 0.4124, "step": 13418 }, { "epoch": 0.38, "grad_norm": 7.039530917383928, "learning_rate": 7.054153919836542e-06, "loss": 0.4086, "step": 13419 }, { "epoch": 0.38, "grad_norm": 8.487950591990696, "learning_rate": 7.053731097014667e-06, "loss": 0.4266, "step": 13420 }, { "epoch": 0.38, "grad_norm": 6.715417079626904, "learning_rate": 7.0533082565247955e-06, "loss": 0.3749, "step": 13421 }, { "epoch": 0.38, "grad_norm": 7.070470717540845, "learning_rate": 7.0528853983705655e-06, "loss": 0.9891, "step": 13422 }, { "epoch": 0.38, "grad_norm": 4.95811557558742, "learning_rate": 7.052462522555616e-06, "loss": 0.1923, "step": 13423 }, { "epoch": 0.38, "grad_norm": 16.067731280029662, "learning_rate": 7.0520396290835844e-06, "loss": 0.6838, "step": 13424 }, { "epoch": 0.38, "grad_norm": 6.760865825981089, "learning_rate": 7.0516167179581085e-06, "loss": 0.3292, "step": 13425 }, { "epoch": 0.38, "grad_norm": 4.7871748422903195, "learning_rate": 7.051193789182828e-06, "loss": 0.9054, "step": 13426 }, { "epoch": 0.38, "grad_norm": 4.033110196795653, "learning_rate": 7.050770842761379e-06, "loss": 0.3504, "step": 13427 }, { "epoch": 0.38, "grad_norm": 4.545620643658743, "learning_rate": 7.050347878697399e-06, "loss": 0.898, "step": 13428 }, { "epoch": 0.38, "grad_norm": 5.928047606787983, "learning_rate": 7.049924896994531e-06, "loss": 0.5864, "step": 13429 }, { "epoch": 0.38, "grad_norm": 6.294714696719633, "learning_rate": 7.049501897656411e-06, "loss": 0.6921, "step": 13430 }, { "epoch": 0.38, "grad_norm": 4.987616641848551, "learning_rate": 7.049078880686677e-06, "loss": 0.3679, "step": 13431 }, { "epoch": 0.38, "grad_norm": 3.9751648910678647, "learning_rate": 7.0486558460889724e-06, "loss": 0.4202, "step": 13432 }, { "epoch": 0.38, "grad_norm": 4.952036596625189, "learning_rate": 7.048232793866932e-06, "loss": 0.3307, "step": 13433 }, { "epoch": 0.38, "grad_norm": 5.539536190896388, "learning_rate": 7.047809724024196e-06, "loss": 0.3072, "step": 13434 }, { "epoch": 0.38, "grad_norm": 7.2069650869252975, "learning_rate": 7.047386636564406e-06, "loss": 0.1991, "step": 13435 }, { "epoch": 0.38, "grad_norm": 4.629238197238687, "learning_rate": 7.0469635314912024e-06, "loss": 0.9208, "step": 13436 }, { "epoch": 0.38, "grad_norm": 14.13564058415558, "learning_rate": 7.046540408808222e-06, "loss": 0.5346, "step": 13437 }, { "epoch": 0.38, "grad_norm": 7.1189645334660545, "learning_rate": 7.046117268519107e-06, "loss": 0.6927, "step": 13438 }, { "epoch": 0.38, "grad_norm": 8.638471172846591, "learning_rate": 7.045694110627497e-06, "loss": 0.5796, "step": 13439 }, { "epoch": 0.38, "grad_norm": 5.077908578809219, "learning_rate": 7.0452709351370315e-06, "loss": 0.5519, "step": 13440 }, { "epoch": 0.38, "grad_norm": 7.068906528348692, "learning_rate": 7.044847742051353e-06, "loss": 0.4974, "step": 13441 }, { "epoch": 0.38, "grad_norm": 5.010326510257341, "learning_rate": 7.0444245313741e-06, "loss": 0.2947, "step": 13442 }, { "epoch": 0.38, "grad_norm": 8.64855311866949, "learning_rate": 7.044001303108916e-06, "loss": 0.7953, "step": 13443 }, { "epoch": 0.39, "grad_norm": 8.148205732814809, "learning_rate": 7.04357805725944e-06, "loss": 0.432, "step": 13444 }, { "epoch": 0.39, "grad_norm": 3.6216375607197113, "learning_rate": 7.0431547938293134e-06, "loss": 0.2349, "step": 13445 }, { "epoch": 0.39, "grad_norm": 5.261877452606452, "learning_rate": 7.042731512822179e-06, "loss": 0.4419, "step": 13446 }, { "epoch": 0.39, "grad_norm": 5.943212230915543, "learning_rate": 7.042308214241674e-06, "loss": 0.5181, "step": 13447 }, { "epoch": 0.39, "grad_norm": 4.4423066822639, "learning_rate": 7.041884898091444e-06, "loss": 0.3408, "step": 13448 }, { "epoch": 0.39, "grad_norm": 8.022335820026319, "learning_rate": 7.041461564375132e-06, "loss": 0.6469, "step": 13449 }, { "epoch": 0.39, "grad_norm": 8.611481839748508, "learning_rate": 7.041038213096374e-06, "loss": 0.6465, "step": 13450 }, { "epoch": 0.39, "grad_norm": 4.311584195738197, "learning_rate": 7.040614844258818e-06, "loss": 0.4404, "step": 13451 }, { "epoch": 0.39, "grad_norm": 5.560019100588641, "learning_rate": 7.040191457866102e-06, "loss": 0.477, "step": 13452 }, { "epoch": 0.39, "grad_norm": 6.463700465962519, "learning_rate": 7.039768053921872e-06, "loss": 0.713, "step": 13453 }, { "epoch": 0.39, "grad_norm": 5.406412265525412, "learning_rate": 7.039344632429767e-06, "loss": 0.2173, "step": 13454 }, { "epoch": 0.39, "grad_norm": 10.237731569533327, "learning_rate": 7.038921193393431e-06, "loss": 0.6844, "step": 13455 }, { "epoch": 0.39, "grad_norm": 6.313213647972118, "learning_rate": 7.038497736816508e-06, "loss": 0.2144, "step": 13456 }, { "epoch": 0.39, "grad_norm": 3.7961094716388915, "learning_rate": 7.03807426270264e-06, "loss": 0.3126, "step": 13457 }, { "epoch": 0.39, "grad_norm": 6.83971202294316, "learning_rate": 7.0376507710554694e-06, "loss": 0.7022, "step": 13458 }, { "epoch": 0.39, "grad_norm": 8.968775732555093, "learning_rate": 7.0372272618786415e-06, "loss": 1.019, "step": 13459 }, { "epoch": 0.39, "grad_norm": 3.859330922234141, "learning_rate": 7.036803735175796e-06, "loss": 0.3107, "step": 13460 }, { "epoch": 0.39, "grad_norm": 4.37810139401244, "learning_rate": 7.0363801909505815e-06, "loss": 0.6038, "step": 13461 }, { "epoch": 0.39, "grad_norm": 6.62068759268867, "learning_rate": 7.035956629206637e-06, "loss": 0.3052, "step": 13462 }, { "epoch": 0.39, "grad_norm": 4.342272884686571, "learning_rate": 7.0355330499476085e-06, "loss": 0.2402, "step": 13463 }, { "epoch": 0.39, "grad_norm": 5.023524353320661, "learning_rate": 7.03510945317714e-06, "loss": 0.4166, "step": 13464 }, { "epoch": 0.39, "grad_norm": 2.717990856952461, "learning_rate": 7.034685838898876e-06, "loss": 0.4511, "step": 13465 }, { "epoch": 0.39, "grad_norm": 8.256838045396316, "learning_rate": 7.03426220711646e-06, "loss": 0.6675, "step": 13466 }, { "epoch": 0.39, "grad_norm": 7.6759332928146895, "learning_rate": 7.033838557833537e-06, "loss": 0.3991, "step": 13467 }, { "epoch": 0.39, "grad_norm": 8.436197703136251, "learning_rate": 7.0334148910537495e-06, "loss": 0.3012, "step": 13468 }, { "epoch": 0.39, "grad_norm": 3.0440708265473626, "learning_rate": 7.032991206780746e-06, "loss": 0.2273, "step": 13469 }, { "epoch": 0.39, "grad_norm": 4.636607090827572, "learning_rate": 7.032567505018168e-06, "loss": 0.4366, "step": 13470 }, { "epoch": 0.39, "grad_norm": 5.724197336424714, "learning_rate": 7.032143785769664e-06, "loss": 0.3546, "step": 13471 }, { "epoch": 0.39, "grad_norm": 4.625866035621174, "learning_rate": 7.031720049038875e-06, "loss": 0.7702, "step": 13472 }, { "epoch": 0.39, "grad_norm": 7.028769704135423, "learning_rate": 7.03129629482945e-06, "loss": 0.685, "step": 13473 }, { "epoch": 0.39, "grad_norm": 13.188390765004614, "learning_rate": 7.030872523145033e-06, "loss": 0.2685, "step": 13474 }, { "epoch": 0.39, "grad_norm": 5.011432012143813, "learning_rate": 7.030448733989268e-06, "loss": 0.3028, "step": 13475 }, { "epoch": 0.39, "grad_norm": 10.216805882669634, "learning_rate": 7.030024927365805e-06, "loss": 0.3517, "step": 13476 }, { "epoch": 0.39, "grad_norm": 6.812585987633309, "learning_rate": 7.029601103278287e-06, "loss": 0.2597, "step": 13477 }, { "epoch": 0.39, "grad_norm": 4.629795475143864, "learning_rate": 7.029177261730359e-06, "loss": 0.8655, "step": 13478 }, { "epoch": 0.39, "grad_norm": 5.424251508277325, "learning_rate": 7.02875340272567e-06, "loss": 0.6663, "step": 13479 }, { "epoch": 0.39, "grad_norm": 8.014862143222784, "learning_rate": 7.0283295262678656e-06, "loss": 0.7438, "step": 13480 }, { "epoch": 0.39, "grad_norm": 6.214270660620184, "learning_rate": 7.02790563236059e-06, "loss": 0.4237, "step": 13481 }, { "epoch": 0.39, "grad_norm": 4.19467621423294, "learning_rate": 7.027481721007494e-06, "loss": 0.3309, "step": 13482 }, { "epoch": 0.39, "grad_norm": 15.024464556758259, "learning_rate": 7.027057792212222e-06, "loss": 0.3857, "step": 13483 }, { "epoch": 0.39, "grad_norm": 10.576787100498086, "learning_rate": 7.026633845978421e-06, "loss": 0.5872, "step": 13484 }, { "epoch": 0.39, "grad_norm": 7.206860316454047, "learning_rate": 7.0262098823097394e-06, "loss": 0.7295, "step": 13485 }, { "epoch": 0.39, "grad_norm": 7.478048465909404, "learning_rate": 7.025785901209823e-06, "loss": 0.5766, "step": 13486 }, { "epoch": 0.39, "grad_norm": 4.889486259512139, "learning_rate": 7.02536190268232e-06, "loss": 0.574, "step": 13487 }, { "epoch": 0.39, "grad_norm": 9.288888786879705, "learning_rate": 7.0249378867308775e-06, "loss": 0.4121, "step": 13488 }, { "epoch": 0.39, "grad_norm": 9.854713752727339, "learning_rate": 7.024513853359145e-06, "loss": 0.6484, "step": 13489 }, { "epoch": 0.39, "grad_norm": 6.040874530120086, "learning_rate": 7.0240898025707675e-06, "loss": 0.2981, "step": 13490 }, { "epoch": 0.39, "grad_norm": 12.992079119020941, "learning_rate": 7.023665734369395e-06, "loss": 0.7056, "step": 13491 }, { "epoch": 0.39, "grad_norm": 8.281987686409694, "learning_rate": 7.023241648758675e-06, "loss": 0.614, "step": 13492 }, { "epoch": 0.39, "grad_norm": 3.192168556788287, "learning_rate": 7.022817545742258e-06, "loss": 0.468, "step": 13493 }, { "epoch": 0.39, "grad_norm": 5.351557421159771, "learning_rate": 7.022393425323789e-06, "loss": 0.3792, "step": 13494 }, { "epoch": 0.39, "grad_norm": 8.961379687998694, "learning_rate": 7.021969287506919e-06, "loss": 0.6092, "step": 13495 }, { "epoch": 0.39, "grad_norm": 7.200858332546394, "learning_rate": 7.0215451322952954e-06, "loss": 0.5028, "step": 13496 }, { "epoch": 0.39, "grad_norm": 8.220808779381265, "learning_rate": 7.021120959692569e-06, "loss": 0.4426, "step": 13497 }, { "epoch": 0.39, "grad_norm": 5.2738017090664995, "learning_rate": 7.020696769702388e-06, "loss": 0.2582, "step": 13498 }, { "epoch": 0.39, "grad_norm": 7.158761529102237, "learning_rate": 7.0202725623284e-06, "loss": 0.4636, "step": 13499 }, { "epoch": 0.39, "grad_norm": 6.553770453555568, "learning_rate": 7.0198483375742574e-06, "loss": 0.6025, "step": 13500 }, { "epoch": 0.39, "grad_norm": 6.061309933608995, "learning_rate": 7.019424095443608e-06, "loss": 0.5984, "step": 13501 }, { "epoch": 0.39, "grad_norm": 7.140145650922104, "learning_rate": 7.018999835940101e-06, "loss": 0.2988, "step": 13502 }, { "epoch": 0.39, "grad_norm": 12.13289636901047, "learning_rate": 7.018575559067389e-06, "loss": 0.8233, "step": 13503 }, { "epoch": 0.39, "grad_norm": 8.043721175420018, "learning_rate": 7.018151264829118e-06, "loss": 0.6807, "step": 13504 }, { "epoch": 0.39, "grad_norm": 4.512444375876182, "learning_rate": 7.017726953228938e-06, "loss": 0.2422, "step": 13505 }, { "epoch": 0.39, "grad_norm": 6.698617126320125, "learning_rate": 7.017302624270506e-06, "loss": 0.8026, "step": 13506 }, { "epoch": 0.39, "grad_norm": 5.953361286270001, "learning_rate": 7.016878277957465e-06, "loss": 0.6009, "step": 13507 }, { "epoch": 0.39, "grad_norm": 4.522449128480048, "learning_rate": 7.01645391429347e-06, "loss": 0.4184, "step": 13508 }, { "epoch": 0.39, "grad_norm": 4.9110548703874874, "learning_rate": 7.016029533282169e-06, "loss": 0.6034, "step": 13509 }, { "epoch": 0.39, "grad_norm": 8.655775670025106, "learning_rate": 7.015605134927216e-06, "loss": 0.6393, "step": 13510 }, { "epoch": 0.39, "grad_norm": 4.1852694591700805, "learning_rate": 7.015180719232258e-06, "loss": 0.4876, "step": 13511 }, { "epoch": 0.39, "grad_norm": 5.921767945944972, "learning_rate": 7.01475628620095e-06, "loss": 0.6637, "step": 13512 }, { "epoch": 0.39, "grad_norm": 10.5275551854347, "learning_rate": 7.014331835836941e-06, "loss": 0.3292, "step": 13513 }, { "epoch": 0.39, "grad_norm": 5.6767132513061584, "learning_rate": 7.013907368143883e-06, "loss": 0.5159, "step": 13514 }, { "epoch": 0.39, "grad_norm": 6.722916947695696, "learning_rate": 7.0134828831254275e-06, "loss": 0.4141, "step": 13515 }, { "epoch": 0.39, "grad_norm": 5.011110311965334, "learning_rate": 7.013058380785227e-06, "loss": 0.7264, "step": 13516 }, { "epoch": 0.39, "grad_norm": 4.855654650786804, "learning_rate": 7.012633861126934e-06, "loss": 0.2469, "step": 13517 }, { "epoch": 0.39, "grad_norm": 4.678639646467529, "learning_rate": 7.0122093241542e-06, "loss": 0.215, "step": 13518 }, { "epoch": 0.39, "grad_norm": 3.9820335389673853, "learning_rate": 7.011784769870675e-06, "loss": 0.132, "step": 13519 }, { "epoch": 0.39, "grad_norm": 5.893203656631212, "learning_rate": 7.0113601982800165e-06, "loss": 0.8661, "step": 13520 }, { "epoch": 0.39, "grad_norm": 4.489028138116494, "learning_rate": 7.010935609385869e-06, "loss": 0.3322, "step": 13521 }, { "epoch": 0.39, "grad_norm": 11.31868625330128, "learning_rate": 7.010511003191893e-06, "loss": 0.6361, "step": 13522 }, { "epoch": 0.39, "grad_norm": 7.00106885106646, "learning_rate": 7.0100863797017395e-06, "loss": 0.6212, "step": 13523 }, { "epoch": 0.39, "grad_norm": 8.179066048727284, "learning_rate": 7.0096617389190594e-06, "loss": 1.0105, "step": 13524 }, { "epoch": 0.39, "grad_norm": 5.310562745463769, "learning_rate": 7.009237080847506e-06, "loss": 0.4366, "step": 13525 }, { "epoch": 0.39, "grad_norm": 8.193223830728504, "learning_rate": 7.008812405490733e-06, "loss": 0.8539, "step": 13526 }, { "epoch": 0.39, "grad_norm": 4.185292316764929, "learning_rate": 7.0083877128523955e-06, "loss": 0.4873, "step": 13527 }, { "epoch": 0.39, "grad_norm": 6.9553230840260785, "learning_rate": 7.007963002936146e-06, "loss": 0.5234, "step": 13528 }, { "epoch": 0.39, "grad_norm": 8.563031145314717, "learning_rate": 7.007538275745636e-06, "loss": 0.9777, "step": 13529 }, { "epoch": 0.39, "grad_norm": 4.878237065281041, "learning_rate": 7.007113531284524e-06, "loss": 0.5072, "step": 13530 }, { "epoch": 0.39, "grad_norm": 5.1563225480958055, "learning_rate": 7.00668876955646e-06, "loss": 0.4238, "step": 13531 }, { "epoch": 0.39, "grad_norm": 6.9945959791437025, "learning_rate": 7.006263990565099e-06, "loss": 0.5246, "step": 13532 }, { "epoch": 0.39, "grad_norm": 13.098200112373336, "learning_rate": 7.005839194314098e-06, "loss": 0.3673, "step": 13533 }, { "epoch": 0.39, "grad_norm": 6.31388144909667, "learning_rate": 7.005414380807107e-06, "loss": 0.5391, "step": 13534 }, { "epoch": 0.39, "grad_norm": 9.91675537668081, "learning_rate": 7.004989550047785e-06, "loss": 0.6844, "step": 13535 }, { "epoch": 0.39, "grad_norm": 6.150242541733094, "learning_rate": 7.0045647020397825e-06, "loss": 0.5781, "step": 13536 }, { "epoch": 0.39, "grad_norm": 6.079123603750203, "learning_rate": 7.004139836786759e-06, "loss": 0.3181, "step": 13537 }, { "epoch": 0.39, "grad_norm": 6.1096865679424255, "learning_rate": 7.0037149542923655e-06, "loss": 0.6987, "step": 13538 }, { "epoch": 0.39, "grad_norm": 7.045571454048651, "learning_rate": 7.003290054560259e-06, "loss": 0.2606, "step": 13539 }, { "epoch": 0.39, "grad_norm": 4.44940988935554, "learning_rate": 7.0028651375940974e-06, "loss": 0.5419, "step": 13540 }, { "epoch": 0.39, "grad_norm": 8.996532407957615, "learning_rate": 7.002440203397531e-06, "loss": 0.7294, "step": 13541 }, { "epoch": 0.39, "grad_norm": 20.320813036088452, "learning_rate": 7.002015251974218e-06, "loss": 0.1335, "step": 13542 }, { "epoch": 0.39, "grad_norm": 2.465585080247887, "learning_rate": 7.001590283327815e-06, "loss": 0.3469, "step": 13543 }, { "epoch": 0.39, "grad_norm": 8.886574088795104, "learning_rate": 7.001165297461978e-06, "loss": 0.2444, "step": 13544 }, { "epoch": 0.39, "grad_norm": 6.840457803731423, "learning_rate": 7.000740294380361e-06, "loss": 0.5523, "step": 13545 }, { "epoch": 0.39, "grad_norm": 3.790008552662401, "learning_rate": 7.000315274086622e-06, "loss": 0.4847, "step": 13546 }, { "epoch": 0.39, "grad_norm": 6.085075523045712, "learning_rate": 6.9998902365844166e-06, "loss": 0.3174, "step": 13547 }, { "epoch": 0.39, "grad_norm": 8.96230996753759, "learning_rate": 6.999465181877403e-06, "loss": 0.5796, "step": 13548 }, { "epoch": 0.39, "grad_norm": 10.232467823103795, "learning_rate": 6.999040109969235e-06, "loss": 0.7801, "step": 13549 }, { "epoch": 0.39, "grad_norm": 8.914272081885201, "learning_rate": 6.998615020863574e-06, "loss": 0.6019, "step": 13550 }, { "epoch": 0.39, "grad_norm": 7.206250371213375, "learning_rate": 6.998189914564071e-06, "loss": 0.639, "step": 13551 }, { "epoch": 0.39, "grad_norm": 8.681946275992566, "learning_rate": 6.997764791074387e-06, "loss": 0.4196, "step": 13552 }, { "epoch": 0.39, "grad_norm": 4.899553235764993, "learning_rate": 6.997339650398179e-06, "loss": 0.8979, "step": 13553 }, { "epoch": 0.39, "grad_norm": 6.6599304518920235, "learning_rate": 6.996914492539103e-06, "loss": 0.3278, "step": 13554 }, { "epoch": 0.39, "grad_norm": 3.197320955108828, "learning_rate": 6.996489317500818e-06, "loss": 0.5963, "step": 13555 }, { "epoch": 0.39, "grad_norm": 7.3727691638510295, "learning_rate": 6.996064125286981e-06, "loss": 0.7202, "step": 13556 }, { "epoch": 0.39, "grad_norm": 48.10211376117795, "learning_rate": 6.995638915901252e-06, "loss": 0.3406, "step": 13557 }, { "epoch": 0.39, "grad_norm": 13.002398508061228, "learning_rate": 6.995213689347285e-06, "loss": 0.7533, "step": 13558 }, { "epoch": 0.39, "grad_norm": 8.866234197921148, "learning_rate": 6.9947884456287405e-06, "loss": 0.2375, "step": 13559 }, { "epoch": 0.39, "grad_norm": 6.403046192970687, "learning_rate": 6.9943631847492775e-06, "loss": 0.7313, "step": 13560 }, { "epoch": 0.39, "grad_norm": 4.112389245904345, "learning_rate": 6.993937906712553e-06, "loss": 0.2535, "step": 13561 }, { "epoch": 0.39, "grad_norm": 6.079633961845986, "learning_rate": 6.993512611522225e-06, "loss": 0.3658, "step": 13562 }, { "epoch": 0.39, "grad_norm": 6.467252438189937, "learning_rate": 6.993087299181955e-06, "loss": 0.3682, "step": 13563 }, { "epoch": 0.39, "grad_norm": 4.189568193896139, "learning_rate": 6.9926619696954e-06, "loss": 0.5, "step": 13564 }, { "epoch": 0.39, "grad_norm": 6.767008721423938, "learning_rate": 6.9922366230662195e-06, "loss": 0.2353, "step": 13565 }, { "epoch": 0.39, "grad_norm": 41.408708008557674, "learning_rate": 6.991811259298073e-06, "loss": 0.8054, "step": 13566 }, { "epoch": 0.39, "grad_norm": 6.164158732118752, "learning_rate": 6.9913858783946186e-06, "loss": 0.4128, "step": 13567 }, { "epoch": 0.39, "grad_norm": 6.092508355824873, "learning_rate": 6.9909604803595175e-06, "loss": 0.5374, "step": 13568 }, { "epoch": 0.39, "grad_norm": 6.48890233842263, "learning_rate": 6.990535065196428e-06, "loss": 0.5749, "step": 13569 }, { "epoch": 0.39, "grad_norm": 8.173406103763359, "learning_rate": 6.99010963290901e-06, "loss": 1.0915, "step": 13570 }, { "epoch": 0.39, "grad_norm": 5.435359445809013, "learning_rate": 6.989684183500924e-06, "loss": 0.5622, "step": 13571 }, { "epoch": 0.39, "grad_norm": 5.114494675167638, "learning_rate": 6.989258716975831e-06, "loss": 0.6936, "step": 13572 }, { "epoch": 0.39, "grad_norm": 5.493155208326198, "learning_rate": 6.988833233337389e-06, "loss": 0.4131, "step": 13573 }, { "epoch": 0.39, "grad_norm": 6.016775680075715, "learning_rate": 6.988407732589261e-06, "loss": 0.6183, "step": 13574 }, { "epoch": 0.39, "grad_norm": 6.763665635769484, "learning_rate": 6.987982214735105e-06, "loss": 0.2032, "step": 13575 }, { "epoch": 0.39, "grad_norm": 7.546691386830491, "learning_rate": 6.987556679778582e-06, "loss": 0.9363, "step": 13576 }, { "epoch": 0.39, "grad_norm": 5.497498463573648, "learning_rate": 6.9871311277233555e-06, "loss": 0.6116, "step": 13577 }, { "epoch": 0.39, "grad_norm": 4.236134997371736, "learning_rate": 6.9867055585730834e-06, "loss": 0.3386, "step": 13578 }, { "epoch": 0.39, "grad_norm": 7.876802661990069, "learning_rate": 6.986279972331426e-06, "loss": 0.4229, "step": 13579 }, { "epoch": 0.39, "grad_norm": 5.373813231993065, "learning_rate": 6.985854369002051e-06, "loss": 0.2711, "step": 13580 }, { "epoch": 0.39, "grad_norm": 4.355881224118973, "learning_rate": 6.985428748588613e-06, "loss": 0.1875, "step": 13581 }, { "epoch": 0.39, "grad_norm": 5.413313380347124, "learning_rate": 6.985003111094776e-06, "loss": 0.5473, "step": 13582 }, { "epoch": 0.39, "grad_norm": 6.235700041040385, "learning_rate": 6.984577456524201e-06, "loss": 0.6359, "step": 13583 }, { "epoch": 0.39, "grad_norm": 6.01510820026699, "learning_rate": 6.984151784880552e-06, "loss": 0.2424, "step": 13584 }, { "epoch": 0.39, "grad_norm": 5.784893228242887, "learning_rate": 6.983726096167489e-06, "loss": 0.2301, "step": 13585 }, { "epoch": 0.39, "grad_norm": 7.132105914546288, "learning_rate": 6.983300390388674e-06, "loss": 0.6786, "step": 13586 }, { "epoch": 0.39, "grad_norm": 6.106061031732502, "learning_rate": 6.98287466754777e-06, "loss": 0.4911, "step": 13587 }, { "epoch": 0.39, "grad_norm": 2.90868232831936, "learning_rate": 6.98244892764844e-06, "loss": 0.2866, "step": 13588 }, { "epoch": 0.39, "grad_norm": 10.483482811796582, "learning_rate": 6.9820231706943445e-06, "loss": 0.9103, "step": 13589 }, { "epoch": 0.39, "grad_norm": 3.80722667709943, "learning_rate": 6.981597396689149e-06, "loss": 0.2789, "step": 13590 }, { "epoch": 0.39, "grad_norm": 4.657660609868431, "learning_rate": 6.981171605636515e-06, "loss": 0.7139, "step": 13591 }, { "epoch": 0.39, "grad_norm": 12.812647376724804, "learning_rate": 6.9807457975401046e-06, "loss": 0.9331, "step": 13592 }, { "epoch": 0.39, "grad_norm": 6.997251243408293, "learning_rate": 6.980319972403584e-06, "loss": 0.8798, "step": 13593 }, { "epoch": 0.39, "grad_norm": 2.5697308493846123, "learning_rate": 6.9798941302306136e-06, "loss": 0.3868, "step": 13594 }, { "epoch": 0.39, "grad_norm": 3.177399245434783, "learning_rate": 6.979468271024857e-06, "loss": 0.4426, "step": 13595 }, { "epoch": 0.39, "grad_norm": 6.0268732902178686, "learning_rate": 6.979042394789979e-06, "loss": 0.3951, "step": 13596 }, { "epoch": 0.39, "grad_norm": 5.931334315867409, "learning_rate": 6.978616501529643e-06, "loss": 0.6503, "step": 13597 }, { "epoch": 0.39, "grad_norm": 8.186732569247017, "learning_rate": 6.978190591247513e-06, "loss": 0.5792, "step": 13598 }, { "epoch": 0.39, "grad_norm": 5.403884237767691, "learning_rate": 6.977764663947253e-06, "loss": 0.8253, "step": 13599 }, { "epoch": 0.39, "grad_norm": 6.610771662904789, "learning_rate": 6.977338719632526e-06, "loss": 0.6693, "step": 13600 }, { "epoch": 0.39, "grad_norm": 6.44867728517504, "learning_rate": 6.9769127583069994e-06, "loss": 0.6604, "step": 13601 }, { "epoch": 0.39, "grad_norm": 7.995025280118652, "learning_rate": 6.976486779974334e-06, "loss": 0.6637, "step": 13602 }, { "epoch": 0.39, "grad_norm": 4.989262471670058, "learning_rate": 6.976060784638195e-06, "loss": 0.7704, "step": 13603 }, { "epoch": 0.39, "grad_norm": 6.576306832551415, "learning_rate": 6.975634772302252e-06, "loss": 0.5599, "step": 13604 }, { "epoch": 0.39, "grad_norm": 5.332784723834875, "learning_rate": 6.975208742970164e-06, "loss": 0.4794, "step": 13605 }, { "epoch": 0.39, "grad_norm": 8.02930795812155, "learning_rate": 6.974782696645599e-06, "loss": 0.4835, "step": 13606 }, { "epoch": 0.39, "grad_norm": 2.62800272000548, "learning_rate": 6.974356633332222e-06, "loss": 0.113, "step": 13607 }, { "epoch": 0.39, "grad_norm": 3.9038160752253983, "learning_rate": 6.973930553033698e-06, "loss": 0.3269, "step": 13608 }, { "epoch": 0.39, "grad_norm": 10.035369122401603, "learning_rate": 6.973504455753692e-06, "loss": 0.867, "step": 13609 }, { "epoch": 0.39, "grad_norm": 9.119849332100282, "learning_rate": 6.9730783414958695e-06, "loss": 0.9354, "step": 13610 }, { "epoch": 0.39, "grad_norm": 4.693688922794547, "learning_rate": 6.972652210263897e-06, "loss": 0.2938, "step": 13611 }, { "epoch": 0.39, "grad_norm": 6.518027176104179, "learning_rate": 6.972226062061441e-06, "loss": 0.6083, "step": 13612 }, { "epoch": 0.39, "grad_norm": 4.201338093908581, "learning_rate": 6.971799896892167e-06, "loss": 0.3319, "step": 13613 }, { "epoch": 0.39, "grad_norm": 7.294372848420213, "learning_rate": 6.971373714759742e-06, "loss": 0.5323, "step": 13614 }, { "epoch": 0.39, "grad_norm": 4.224559965298857, "learning_rate": 6.9709475156678306e-06, "loss": 0.3539, "step": 13615 }, { "epoch": 0.39, "grad_norm": 3.728643448138799, "learning_rate": 6.970521299620101e-06, "loss": 0.393, "step": 13616 }, { "epoch": 0.39, "grad_norm": 6.747374765845893, "learning_rate": 6.970095066620218e-06, "loss": 0.7606, "step": 13617 }, { "epoch": 0.39, "grad_norm": 4.644267749289818, "learning_rate": 6.969668816671852e-06, "loss": 0.416, "step": 13618 }, { "epoch": 0.39, "grad_norm": 5.886216595956538, "learning_rate": 6.969242549778667e-06, "loss": 0.8313, "step": 13619 }, { "epoch": 0.39, "grad_norm": 4.265206340883177, "learning_rate": 6.968816265944329e-06, "loss": 0.8914, "step": 13620 }, { "epoch": 0.39, "grad_norm": 5.0671530168073415, "learning_rate": 6.968389965172508e-06, "loss": 0.4259, "step": 13621 }, { "epoch": 0.39, "grad_norm": 9.151594469152347, "learning_rate": 6.967963647466871e-06, "loss": 0.7333, "step": 13622 }, { "epoch": 0.39, "grad_norm": 5.2771829147169855, "learning_rate": 6.967537312831084e-06, "loss": 0.7956, "step": 13623 }, { "epoch": 0.39, "grad_norm": 2.9707522617065996, "learning_rate": 6.967110961268817e-06, "loss": 0.4664, "step": 13624 }, { "epoch": 0.39, "grad_norm": 4.195184972092797, "learning_rate": 6.9666845927837365e-06, "loss": 0.453, "step": 13625 }, { "epoch": 0.39, "grad_norm": 6.330968900655676, "learning_rate": 6.966258207379511e-06, "loss": 0.5489, "step": 13626 }, { "epoch": 0.39, "grad_norm": 5.456218071808094, "learning_rate": 6.965831805059806e-06, "loss": 0.6155, "step": 13627 }, { "epoch": 0.39, "grad_norm": 3.6763923291123533, "learning_rate": 6.965405385828295e-06, "loss": 0.4254, "step": 13628 }, { "epoch": 0.39, "grad_norm": 9.050428308710364, "learning_rate": 6.964978949688641e-06, "loss": 0.7325, "step": 13629 }, { "epoch": 0.39, "grad_norm": 4.738096279172471, "learning_rate": 6.964552496644516e-06, "loss": 0.4585, "step": 13630 }, { "epoch": 0.39, "grad_norm": 26.7071529004578, "learning_rate": 6.964126026699588e-06, "loss": 0.3385, "step": 13631 }, { "epoch": 0.39, "grad_norm": 4.823279310177423, "learning_rate": 6.963699539857524e-06, "loss": 0.5375, "step": 13632 }, { "epoch": 0.39, "grad_norm": 13.965464297292103, "learning_rate": 6.963273036121997e-06, "loss": 0.5333, "step": 13633 }, { "epoch": 0.39, "grad_norm": 4.171973580899441, "learning_rate": 6.962846515496673e-06, "loss": 0.3591, "step": 13634 }, { "epoch": 0.39, "grad_norm": 4.993378665758191, "learning_rate": 6.962419977985222e-06, "loss": 0.1826, "step": 13635 }, { "epoch": 0.39, "grad_norm": 9.490315369650773, "learning_rate": 6.961993423591312e-06, "loss": 0.8369, "step": 13636 }, { "epoch": 0.39, "grad_norm": 5.176920109816771, "learning_rate": 6.9615668523186165e-06, "loss": 0.6148, "step": 13637 }, { "epoch": 0.39, "grad_norm": 4.600631619710244, "learning_rate": 6.961140264170801e-06, "loss": 0.4916, "step": 13638 }, { "epoch": 0.39, "grad_norm": 5.745605530993721, "learning_rate": 6.960713659151539e-06, "loss": 0.3613, "step": 13639 }, { "epoch": 0.39, "grad_norm": 4.820432664557415, "learning_rate": 6.9602870372644974e-06, "loss": 0.4704, "step": 13640 }, { "epoch": 0.39, "grad_norm": 7.532481365921465, "learning_rate": 6.959860398513349e-06, "loss": 0.7569, "step": 13641 }, { "epoch": 0.39, "grad_norm": 5.532749533113165, "learning_rate": 6.959433742901762e-06, "loss": 0.287, "step": 13642 }, { "epoch": 0.39, "grad_norm": 8.617155515969372, "learning_rate": 6.9590070704334075e-06, "loss": 0.6598, "step": 13643 }, { "epoch": 0.39, "grad_norm": 3.5329191600924084, "learning_rate": 6.958580381111958e-06, "loss": 0.3681, "step": 13644 }, { "epoch": 0.39, "grad_norm": 10.537047219645517, "learning_rate": 6.9581536749410816e-06, "loss": 0.362, "step": 13645 }, { "epoch": 0.39, "grad_norm": 5.225734354277242, "learning_rate": 6.9577269519244496e-06, "loss": 0.4896, "step": 13646 }, { "epoch": 0.39, "grad_norm": 9.453842564610975, "learning_rate": 6.957300212065734e-06, "loss": 0.4753, "step": 13647 }, { "epoch": 0.39, "grad_norm": 9.570543135374654, "learning_rate": 6.956873455368607e-06, "loss": 0.9062, "step": 13648 }, { "epoch": 0.39, "grad_norm": 4.6132796995724, "learning_rate": 6.9564466818367375e-06, "loss": 0.4559, "step": 13649 }, { "epoch": 0.39, "grad_norm": 5.181530470671156, "learning_rate": 6.9560198914737974e-06, "loss": 0.2308, "step": 13650 }, { "epoch": 0.39, "grad_norm": 9.989356432510053, "learning_rate": 6.955593084283461e-06, "loss": 0.7246, "step": 13651 }, { "epoch": 0.39, "grad_norm": 3.841386362939405, "learning_rate": 6.955166260269396e-06, "loss": 0.1972, "step": 13652 }, { "epoch": 0.39, "grad_norm": 7.491045056255574, "learning_rate": 6.9547394194352754e-06, "loss": 0.8216, "step": 13653 }, { "epoch": 0.39, "grad_norm": 5.497281833359956, "learning_rate": 6.954312561784775e-06, "loss": 0.4386, "step": 13654 }, { "epoch": 0.39, "grad_norm": 16.132087746659387, "learning_rate": 6.953885687321563e-06, "loss": 0.7365, "step": 13655 }, { "epoch": 0.39, "grad_norm": 4.680453382685557, "learning_rate": 6.953458796049314e-06, "loss": 0.3761, "step": 13656 }, { "epoch": 0.39, "grad_norm": 3.1938825359577314, "learning_rate": 6.953031887971699e-06, "loss": 0.3754, "step": 13657 }, { "epoch": 0.39, "grad_norm": 7.8242514767202795, "learning_rate": 6.952604963092391e-06, "loss": 0.4932, "step": 13658 }, { "epoch": 0.39, "grad_norm": 7.312181645171796, "learning_rate": 6.952178021415062e-06, "loss": 0.6247, "step": 13659 }, { "epoch": 0.39, "grad_norm": 8.701503965674465, "learning_rate": 6.951751062943387e-06, "loss": 1.1864, "step": 13660 }, { "epoch": 0.39, "grad_norm": 3.4926513773416095, "learning_rate": 6.951324087681037e-06, "loss": 0.2446, "step": 13661 }, { "epoch": 0.39, "grad_norm": 5.793032048420401, "learning_rate": 6.950897095631687e-06, "loss": 0.483, "step": 13662 }, { "epoch": 0.39, "grad_norm": 3.6114042943552156, "learning_rate": 6.950470086799009e-06, "loss": 0.3643, "step": 13663 }, { "epoch": 0.39, "grad_norm": 31.755385054829187, "learning_rate": 6.950043061186677e-06, "loss": 0.8673, "step": 13664 }, { "epoch": 0.39, "grad_norm": 9.522358132985852, "learning_rate": 6.949616018798366e-06, "loss": 0.4975, "step": 13665 }, { "epoch": 0.39, "grad_norm": 4.731073418046974, "learning_rate": 6.949188959637746e-06, "loss": 0.3747, "step": 13666 }, { "epoch": 0.39, "grad_norm": 9.194280704245278, "learning_rate": 6.948761883708495e-06, "loss": 1.6252, "step": 13667 }, { "epoch": 0.39, "grad_norm": 7.747824117325394, "learning_rate": 6.948334791014287e-06, "loss": 0.507, "step": 13668 }, { "epoch": 0.39, "grad_norm": 2.643131838104358, "learning_rate": 6.9479076815587896e-06, "loss": 0.4708, "step": 13669 }, { "epoch": 0.39, "grad_norm": 5.163458946149782, "learning_rate": 6.947480555345686e-06, "loss": 0.3242, "step": 13670 }, { "epoch": 0.39, "grad_norm": 9.269770757809697, "learning_rate": 6.947053412378647e-06, "loss": 0.5464, "step": 13671 }, { "epoch": 0.39, "grad_norm": 10.005102679627933, "learning_rate": 6.946626252661346e-06, "loss": 0.3295, "step": 13672 }, { "epoch": 0.39, "grad_norm": 5.698265429974824, "learning_rate": 6.94619907619746e-06, "loss": 0.4756, "step": 13673 }, { "epoch": 0.39, "grad_norm": 3.592555834522801, "learning_rate": 6.945771882990662e-06, "loss": 0.254, "step": 13674 }, { "epoch": 0.39, "grad_norm": 4.2478855988153885, "learning_rate": 6.945344673044629e-06, "loss": 0.4764, "step": 13675 }, { "epoch": 0.39, "grad_norm": 3.412653302672204, "learning_rate": 6.944917446363035e-06, "loss": 0.5026, "step": 13676 }, { "epoch": 0.39, "grad_norm": 2.9517433829993855, "learning_rate": 6.944490202949554e-06, "loss": 0.2528, "step": 13677 }, { "epoch": 0.39, "grad_norm": 8.860125905126152, "learning_rate": 6.944062942807867e-06, "loss": 0.7636, "step": 13678 }, { "epoch": 0.39, "grad_norm": 4.547965138634743, "learning_rate": 6.943635665941642e-06, "loss": 0.4235, "step": 13679 }, { "epoch": 0.39, "grad_norm": 7.50608203963655, "learning_rate": 6.9432083723545605e-06, "loss": 0.4169, "step": 13680 }, { "epoch": 0.39, "grad_norm": 4.452418917175222, "learning_rate": 6.942781062050298e-06, "loss": 0.6146, "step": 13681 }, { "epoch": 0.39, "grad_norm": 4.448873263669423, "learning_rate": 6.942353735032528e-06, "loss": 0.3133, "step": 13682 }, { "epoch": 0.39, "grad_norm": 6.549908429153636, "learning_rate": 6.941926391304926e-06, "loss": 0.2273, "step": 13683 }, { "epoch": 0.39, "grad_norm": 8.70323844027596, "learning_rate": 6.941499030871173e-06, "loss": 0.7835, "step": 13684 }, { "epoch": 0.39, "grad_norm": 5.758121021550521, "learning_rate": 6.941071653734941e-06, "loss": 0.3418, "step": 13685 }, { "epoch": 0.39, "grad_norm": 6.933420270441993, "learning_rate": 6.940644259899909e-06, "loss": 0.8503, "step": 13686 }, { "epoch": 0.39, "grad_norm": 9.136496369524773, "learning_rate": 6.940216849369754e-06, "loss": 0.6939, "step": 13687 }, { "epoch": 0.39, "grad_norm": 8.658909468318859, "learning_rate": 6.939789422148153e-06, "loss": 0.4854, "step": 13688 }, { "epoch": 0.39, "grad_norm": 3.773480780375013, "learning_rate": 6.939361978238781e-06, "loss": 0.4303, "step": 13689 }, { "epoch": 0.39, "grad_norm": 6.6615289520823335, "learning_rate": 6.9389345176453175e-06, "loss": 0.5658, "step": 13690 }, { "epoch": 0.39, "grad_norm": 6.976197783194934, "learning_rate": 6.938507040371439e-06, "loss": 0.8802, "step": 13691 }, { "epoch": 0.39, "grad_norm": 2.5207354484553504, "learning_rate": 6.938079546420823e-06, "loss": 0.3632, "step": 13692 }, { "epoch": 0.39, "grad_norm": 5.4020365795265555, "learning_rate": 6.937652035797146e-06, "loss": 0.3543, "step": 13693 }, { "epoch": 0.39, "grad_norm": 6.406363993305062, "learning_rate": 6.9372245085040894e-06, "loss": 0.5122, "step": 13694 }, { "epoch": 0.39, "grad_norm": 3.4635857639039123, "learning_rate": 6.936796964545327e-06, "loss": 0.3908, "step": 13695 }, { "epoch": 0.39, "grad_norm": 2.2422677002496436, "learning_rate": 6.93636940392454e-06, "loss": 0.1406, "step": 13696 }, { "epoch": 0.39, "grad_norm": 7.854808959871751, "learning_rate": 6.935941826645405e-06, "loss": 0.429, "step": 13697 }, { "epoch": 0.39, "grad_norm": 5.495731257757708, "learning_rate": 6.935514232711601e-06, "loss": 0.3485, "step": 13698 }, { "epoch": 0.39, "grad_norm": 4.043429966663136, "learning_rate": 6.935086622126807e-06, "loss": 0.7473, "step": 13699 }, { "epoch": 0.39, "grad_norm": 4.740204662077412, "learning_rate": 6.9346589948947e-06, "loss": 0.1581, "step": 13700 }, { "epoch": 0.39, "grad_norm": 6.322022535607958, "learning_rate": 6.934231351018959e-06, "loss": 0.5931, "step": 13701 }, { "epoch": 0.39, "grad_norm": 7.4139326142489494, "learning_rate": 6.933803690503265e-06, "loss": 0.4394, "step": 13702 }, { "epoch": 0.39, "grad_norm": 16.026480993431992, "learning_rate": 6.9333760133512964e-06, "loss": 0.6357, "step": 13703 }, { "epoch": 0.39, "grad_norm": 3.682793668910013, "learning_rate": 6.93294831956673e-06, "loss": 0.1799, "step": 13704 }, { "epoch": 0.39, "grad_norm": 10.079870667195532, "learning_rate": 6.93252060915325e-06, "loss": 0.6076, "step": 13705 }, { "epoch": 0.39, "grad_norm": 4.026783265000409, "learning_rate": 6.932092882114532e-06, "loss": 0.4267, "step": 13706 }, { "epoch": 0.39, "grad_norm": 11.9934423330661, "learning_rate": 6.931665138454256e-06, "loss": 0.9844, "step": 13707 }, { "epoch": 0.39, "grad_norm": 10.697537176179488, "learning_rate": 6.931237378176105e-06, "loss": 0.634, "step": 13708 }, { "epoch": 0.39, "grad_norm": 5.893664114883555, "learning_rate": 6.9308096012837545e-06, "loss": 0.6042, "step": 13709 }, { "epoch": 0.39, "grad_norm": 5.528688430322559, "learning_rate": 6.930381807780885e-06, "loss": 0.4483, "step": 13710 }, { "epoch": 0.39, "grad_norm": 3.0418834783194124, "learning_rate": 6.929953997671182e-06, "loss": 0.2752, "step": 13711 }, { "epoch": 0.39, "grad_norm": 4.942809352773666, "learning_rate": 6.9295261709583205e-06, "loss": 0.5273, "step": 13712 }, { "epoch": 0.39, "grad_norm": 9.526748669922974, "learning_rate": 6.929098327645984e-06, "loss": 0.6356, "step": 13713 }, { "epoch": 0.39, "grad_norm": 7.666115271048239, "learning_rate": 6.92867046773785e-06, "loss": 0.703, "step": 13714 }, { "epoch": 0.39, "grad_norm": 5.299278366696087, "learning_rate": 6.928242591237604e-06, "loss": 0.441, "step": 13715 }, { "epoch": 0.39, "grad_norm": 5.171755878655547, "learning_rate": 6.927814698148923e-06, "loss": 0.4163, "step": 13716 }, { "epoch": 0.39, "grad_norm": 3.220395565673728, "learning_rate": 6.9273867884754894e-06, "loss": 0.4941, "step": 13717 }, { "epoch": 0.39, "grad_norm": 4.623527086117745, "learning_rate": 6.926958862220985e-06, "loss": 0.3396, "step": 13718 }, { "epoch": 0.39, "grad_norm": 5.311026963605259, "learning_rate": 6.926530919389092e-06, "loss": 0.1295, "step": 13719 }, { "epoch": 0.39, "grad_norm": 6.4856905786118215, "learning_rate": 6.926102959983488e-06, "loss": 0.7629, "step": 13720 }, { "epoch": 0.39, "grad_norm": 3.768881691423727, "learning_rate": 6.92567498400786e-06, "loss": 0.4698, "step": 13721 }, { "epoch": 0.39, "grad_norm": 4.884941528032839, "learning_rate": 6.925246991465886e-06, "loss": 0.7407, "step": 13722 }, { "epoch": 0.39, "grad_norm": 13.020606709860528, "learning_rate": 6.924818982361249e-06, "loss": 0.4019, "step": 13723 }, { "epoch": 0.39, "grad_norm": 4.796374490577281, "learning_rate": 6.924390956697631e-06, "loss": 0.6189, "step": 13724 }, { "epoch": 0.39, "grad_norm": 11.370764897635505, "learning_rate": 6.923962914478717e-06, "loss": 0.8632, "step": 13725 }, { "epoch": 0.39, "grad_norm": 3.552213742872447, "learning_rate": 6.923534855708183e-06, "loss": 0.2802, "step": 13726 }, { "epoch": 0.39, "grad_norm": 4.135066551678498, "learning_rate": 6.923106780389717e-06, "loss": 0.6498, "step": 13727 }, { "epoch": 0.39, "grad_norm": 4.631328068331337, "learning_rate": 6.9226786885270015e-06, "loss": 0.3581, "step": 13728 }, { "epoch": 0.39, "grad_norm": 5.06353775031587, "learning_rate": 6.922250580123717e-06, "loss": 0.2848, "step": 13729 }, { "epoch": 0.39, "grad_norm": 4.009210710734881, "learning_rate": 6.921822455183548e-06, "loss": 0.6545, "step": 13730 }, { "epoch": 0.39, "grad_norm": 6.418391932944171, "learning_rate": 6.921394313710176e-06, "loss": 0.3748, "step": 13731 }, { "epoch": 0.39, "grad_norm": 2.4848948960364794, "learning_rate": 6.920966155707287e-06, "loss": 0.2772, "step": 13732 }, { "epoch": 0.39, "grad_norm": 5.4895920678008325, "learning_rate": 6.920537981178561e-06, "loss": 0.5305, "step": 13733 }, { "epoch": 0.39, "grad_norm": 7.3715504483088825, "learning_rate": 6.920109790127684e-06, "loss": 0.6295, "step": 13734 }, { "epoch": 0.39, "grad_norm": 4.715096733177014, "learning_rate": 6.919681582558337e-06, "loss": 0.3099, "step": 13735 }, { "epoch": 0.39, "grad_norm": 6.225823678058661, "learning_rate": 6.919253358474208e-06, "loss": 0.6015, "step": 13736 }, { "epoch": 0.39, "grad_norm": 6.342824332816397, "learning_rate": 6.918825117878978e-06, "loss": 0.3404, "step": 13737 }, { "epoch": 0.39, "grad_norm": 4.914209190811283, "learning_rate": 6.918396860776333e-06, "loss": 0.2871, "step": 13738 }, { "epoch": 0.39, "grad_norm": 4.8020699170546495, "learning_rate": 6.917968587169954e-06, "loss": 0.4041, "step": 13739 }, { "epoch": 0.39, "grad_norm": 4.125577149329668, "learning_rate": 6.917540297063528e-06, "loss": 0.255, "step": 13740 }, { "epoch": 0.39, "grad_norm": 7.449858327453019, "learning_rate": 6.917111990460738e-06, "loss": 0.6738, "step": 13741 }, { "epoch": 0.39, "grad_norm": 5.763071378568683, "learning_rate": 6.9166836673652715e-06, "loss": 0.7023, "step": 13742 }, { "epoch": 0.39, "grad_norm": 6.803467248062489, "learning_rate": 6.91625532778081e-06, "loss": 0.6386, "step": 13743 }, { "epoch": 0.39, "grad_norm": 9.362320272556653, "learning_rate": 6.9158269717110406e-06, "loss": 0.6978, "step": 13744 }, { "epoch": 0.39, "grad_norm": 7.69961031262083, "learning_rate": 6.915398599159648e-06, "loss": 0.4996, "step": 13745 }, { "epoch": 0.39, "grad_norm": 5.902085465864969, "learning_rate": 6.914970210130316e-06, "loss": 0.4406, "step": 13746 }, { "epoch": 0.39, "grad_norm": 5.401761457350491, "learning_rate": 6.914541804626731e-06, "loss": 0.338, "step": 13747 }, { "epoch": 0.39, "grad_norm": 5.219188922995221, "learning_rate": 6.91411338265258e-06, "loss": 0.6529, "step": 13748 }, { "epoch": 0.39, "grad_norm": 5.002060274989588, "learning_rate": 6.913684944211547e-06, "loss": 0.7044, "step": 13749 }, { "epoch": 0.39, "grad_norm": 4.982799268999832, "learning_rate": 6.9132564893073175e-06, "loss": 0.702, "step": 13750 }, { "epoch": 0.39, "grad_norm": 3.284996908816389, "learning_rate": 6.912828017943577e-06, "loss": 0.2664, "step": 13751 }, { "epoch": 0.39, "grad_norm": 3.656730685466223, "learning_rate": 6.912399530124013e-06, "loss": 0.236, "step": 13752 }, { "epoch": 0.39, "grad_norm": 3.94836389362064, "learning_rate": 6.911971025852312e-06, "loss": 0.6486, "step": 13753 }, { "epoch": 0.39, "grad_norm": 5.392110426425727, "learning_rate": 6.911542505132159e-06, "loss": 0.2948, "step": 13754 }, { "epoch": 0.39, "grad_norm": 5.381779985547133, "learning_rate": 6.911113967967243e-06, "loss": 0.2724, "step": 13755 }, { "epoch": 0.39, "grad_norm": 5.917971489531132, "learning_rate": 6.910685414361246e-06, "loss": 0.4305, "step": 13756 }, { "epoch": 0.39, "grad_norm": 10.186471079722061, "learning_rate": 6.9102568443178596e-06, "loss": 0.5667, "step": 13757 }, { "epoch": 0.39, "grad_norm": 11.458595707519263, "learning_rate": 6.90982825784077e-06, "loss": 0.7493, "step": 13758 }, { "epoch": 0.39, "grad_norm": 7.588690371202913, "learning_rate": 6.909399654933659e-06, "loss": 0.5311, "step": 13759 }, { "epoch": 0.39, "grad_norm": 5.878482679145086, "learning_rate": 6.908971035600221e-06, "loss": 0.4829, "step": 13760 }, { "epoch": 0.39, "grad_norm": 4.538136793278095, "learning_rate": 6.908542399844139e-06, "loss": 0.5553, "step": 13761 }, { "epoch": 0.39, "grad_norm": 3.3840394647159306, "learning_rate": 6.908113747669103e-06, "loss": 0.322, "step": 13762 }, { "epoch": 0.39, "grad_norm": 6.5117476748148455, "learning_rate": 6.907685079078798e-06, "loss": 0.3768, "step": 13763 }, { "epoch": 0.39, "grad_norm": 6.743513239963715, "learning_rate": 6.907256394076913e-06, "loss": 0.2905, "step": 13764 }, { "epoch": 0.39, "grad_norm": 4.769014022737742, "learning_rate": 6.906827692667138e-06, "loss": 0.2456, "step": 13765 }, { "epoch": 0.39, "grad_norm": 3.9865778200483573, "learning_rate": 6.906398974853158e-06, "loss": 0.4441, "step": 13766 }, { "epoch": 0.39, "grad_norm": 2.795384185537111, "learning_rate": 6.905970240638662e-06, "loss": 0.177, "step": 13767 }, { "epoch": 0.39, "grad_norm": 10.381188718464472, "learning_rate": 6.9055414900273386e-06, "loss": 1.0456, "step": 13768 }, { "epoch": 0.39, "grad_norm": 5.982812622739802, "learning_rate": 6.9051127230228765e-06, "loss": 0.6231, "step": 13769 }, { "epoch": 0.39, "grad_norm": 11.03713184058057, "learning_rate": 6.904683939628965e-06, "loss": 0.6306, "step": 13770 }, { "epoch": 0.39, "grad_norm": 5.68567787577739, "learning_rate": 6.90425513984929e-06, "loss": 0.4641, "step": 13771 }, { "epoch": 0.39, "grad_norm": 3.7235109162706546, "learning_rate": 6.903826323687546e-06, "loss": 0.3064, "step": 13772 }, { "epoch": 0.39, "grad_norm": 15.36306566232143, "learning_rate": 6.903397491147415e-06, "loss": 0.4164, "step": 13773 }, { "epoch": 0.39, "grad_norm": 5.379156944843296, "learning_rate": 6.902968642232591e-06, "loss": 0.5394, "step": 13774 }, { "epoch": 0.39, "grad_norm": 8.964770560703418, "learning_rate": 6.9025397769467625e-06, "loss": 0.7773, "step": 13775 }, { "epoch": 0.39, "grad_norm": 6.151232150942808, "learning_rate": 6.902110895293619e-06, "loss": 0.6642, "step": 13776 }, { "epoch": 0.39, "grad_norm": 5.949302828038717, "learning_rate": 6.901681997276848e-06, "loss": 0.5864, "step": 13777 }, { "epoch": 0.39, "grad_norm": 8.437054262043509, "learning_rate": 6.901253082900142e-06, "loss": 0.9421, "step": 13778 }, { "epoch": 0.39, "grad_norm": 3.095383819574908, "learning_rate": 6.90082415216719e-06, "loss": 0.3311, "step": 13779 }, { "epoch": 0.39, "grad_norm": 5.073606822739576, "learning_rate": 6.900395205081682e-06, "loss": 0.2398, "step": 13780 }, { "epoch": 0.39, "grad_norm": 5.334788633018878, "learning_rate": 6.899966241647307e-06, "loss": 0.7147, "step": 13781 }, { "epoch": 0.39, "grad_norm": 5.380686391275723, "learning_rate": 6.899537261867758e-06, "loss": 0.429, "step": 13782 }, { "epoch": 0.39, "grad_norm": 3.942950026722975, "learning_rate": 6.8991082657467234e-06, "loss": 0.1633, "step": 13783 }, { "epoch": 0.39, "grad_norm": 4.676453812110777, "learning_rate": 6.898679253287892e-06, "loss": 0.427, "step": 13784 }, { "epoch": 0.39, "grad_norm": 7.180557961978997, "learning_rate": 6.898250224494959e-06, "loss": 0.5839, "step": 13785 }, { "epoch": 0.39, "grad_norm": 6.708172833751869, "learning_rate": 6.897821179371612e-06, "loss": 0.7887, "step": 13786 }, { "epoch": 0.39, "grad_norm": 5.504968782937551, "learning_rate": 6.897392117921543e-06, "loss": 0.4457, "step": 13787 }, { "epoch": 0.39, "grad_norm": 7.595627164178868, "learning_rate": 6.896963040148443e-06, "loss": 0.357, "step": 13788 }, { "epoch": 0.39, "grad_norm": 9.198896126242879, "learning_rate": 6.896533946056006e-06, "loss": 0.4647, "step": 13789 }, { "epoch": 0.39, "grad_norm": 8.043824234199766, "learning_rate": 6.896104835647919e-06, "loss": 1.0295, "step": 13790 }, { "epoch": 0.39, "grad_norm": 4.971645735004868, "learning_rate": 6.895675708927876e-06, "loss": 0.276, "step": 13791 }, { "epoch": 0.39, "grad_norm": 7.264493201918003, "learning_rate": 6.895246565899567e-06, "loss": 0.9467, "step": 13792 }, { "epoch": 0.39, "grad_norm": 8.14549188963346, "learning_rate": 6.8948174065666865e-06, "loss": 0.7165, "step": 13793 }, { "epoch": 0.4, "grad_norm": 6.670824026196207, "learning_rate": 6.894388230932925e-06, "loss": 0.4831, "step": 13794 }, { "epoch": 0.4, "grad_norm": 2.4875298148433926, "learning_rate": 6.893959039001975e-06, "loss": 0.3346, "step": 13795 }, { "epoch": 0.4, "grad_norm": 3.333466644402883, "learning_rate": 6.893529830777528e-06, "loss": 0.4383, "step": 13796 }, { "epoch": 0.4, "grad_norm": 4.257413372966167, "learning_rate": 6.8931006062632765e-06, "loss": 0.1125, "step": 13797 }, { "epoch": 0.4, "grad_norm": 5.698517074426835, "learning_rate": 6.892671365462914e-06, "loss": 0.5759, "step": 13798 }, { "epoch": 0.4, "grad_norm": 7.323667362057477, "learning_rate": 6.892242108380134e-06, "loss": 0.6962, "step": 13799 }, { "epoch": 0.4, "grad_norm": 4.962144143247686, "learning_rate": 6.891812835018626e-06, "loss": 0.4521, "step": 13800 }, { "epoch": 0.4, "grad_norm": 6.801825463478177, "learning_rate": 6.891383545382087e-06, "loss": 0.5001, "step": 13801 }, { "epoch": 0.4, "grad_norm": 9.544845140498797, "learning_rate": 6.890954239474208e-06, "loss": 0.3626, "step": 13802 }, { "epoch": 0.4, "grad_norm": 10.548570499477322, "learning_rate": 6.890524917298681e-06, "loss": 0.3521, "step": 13803 }, { "epoch": 0.4, "grad_norm": 9.552311973927631, "learning_rate": 6.890095578859202e-06, "loss": 0.3902, "step": 13804 }, { "epoch": 0.4, "grad_norm": 7.1900262124932715, "learning_rate": 6.889666224159464e-06, "loss": 0.442, "step": 13805 }, { "epoch": 0.4, "grad_norm": 4.1564950512228265, "learning_rate": 6.88923685320316e-06, "loss": 0.4542, "step": 13806 }, { "epoch": 0.4, "grad_norm": 5.955784721654721, "learning_rate": 6.888807465993983e-06, "loss": 0.5874, "step": 13807 }, { "epoch": 0.4, "grad_norm": 6.73354042351658, "learning_rate": 6.8883780625356274e-06, "loss": 0.3703, "step": 13808 }, { "epoch": 0.4, "grad_norm": 6.9129065975201485, "learning_rate": 6.887948642831789e-06, "loss": 0.8244, "step": 13809 }, { "epoch": 0.4, "grad_norm": 10.937765543302765, "learning_rate": 6.887519206886159e-06, "loss": 0.3893, "step": 13810 }, { "epoch": 0.4, "grad_norm": 5.040542881882654, "learning_rate": 6.887089754702436e-06, "loss": 0.3888, "step": 13811 }, { "epoch": 0.4, "grad_norm": 6.844839140044738, "learning_rate": 6.886660286284312e-06, "loss": 0.7864, "step": 13812 }, { "epoch": 0.4, "grad_norm": 4.257549760743843, "learning_rate": 6.886230801635482e-06, "loss": 0.3166, "step": 13813 }, { "epoch": 0.4, "grad_norm": 4.3515085871043135, "learning_rate": 6.885801300759641e-06, "loss": 0.362, "step": 13814 }, { "epoch": 0.4, "grad_norm": 3.5053674889415927, "learning_rate": 6.8853717836604816e-06, "loss": 0.9637, "step": 13815 }, { "epoch": 0.4, "grad_norm": 14.15139869274191, "learning_rate": 6.884942250341702e-06, "loss": 0.5951, "step": 13816 }, { "epoch": 0.4, "grad_norm": 7.847657404472756, "learning_rate": 6.884512700806996e-06, "loss": 0.3674, "step": 13817 }, { "epoch": 0.4, "grad_norm": 9.950213714681434, "learning_rate": 6.884083135060058e-06, "loss": 0.842, "step": 13818 }, { "epoch": 0.4, "grad_norm": 5.861254744052899, "learning_rate": 6.883653553104588e-06, "loss": 0.3757, "step": 13819 }, { "epoch": 0.4, "grad_norm": 9.287350585885696, "learning_rate": 6.883223954944276e-06, "loss": 1.0739, "step": 13820 }, { "epoch": 0.4, "grad_norm": 7.249208209768781, "learning_rate": 6.88279434058282e-06, "loss": 0.4166, "step": 13821 }, { "epoch": 0.4, "grad_norm": 5.854824628768955, "learning_rate": 6.882364710023919e-06, "loss": 0.3379, "step": 13822 }, { "epoch": 0.4, "grad_norm": 8.963799841777737, "learning_rate": 6.881935063271263e-06, "loss": 0.6407, "step": 13823 }, { "epoch": 0.4, "grad_norm": 12.216064099382859, "learning_rate": 6.881505400328552e-06, "loss": 0.3167, "step": 13824 }, { "epoch": 0.4, "grad_norm": 5.101528851104573, "learning_rate": 6.881075721199482e-06, "loss": 0.432, "step": 13825 }, { "epoch": 0.4, "grad_norm": 5.928387123631037, "learning_rate": 6.880646025887749e-06, "loss": 0.661, "step": 13826 }, { "epoch": 0.4, "grad_norm": 9.869783423964682, "learning_rate": 6.88021631439705e-06, "loss": 0.4052, "step": 13827 }, { "epoch": 0.4, "grad_norm": 4.321075372099913, "learning_rate": 6.879786586731081e-06, "loss": 0.4471, "step": 13828 }, { "epoch": 0.4, "grad_norm": 10.533571389280738, "learning_rate": 6.879356842893541e-06, "loss": 0.5286, "step": 13829 }, { "epoch": 0.4, "grad_norm": 5.6213133704761775, "learning_rate": 6.878927082888125e-06, "loss": 0.4264, "step": 13830 }, { "epoch": 0.4, "grad_norm": 4.8365881363539955, "learning_rate": 6.87849730671853e-06, "loss": 0.1529, "step": 13831 }, { "epoch": 0.4, "grad_norm": 4.7625422278106555, "learning_rate": 6.8780675143884535e-06, "loss": 0.581, "step": 13832 }, { "epoch": 0.4, "grad_norm": 6.644219213444032, "learning_rate": 6.8776377059015945e-06, "loss": 0.6305, "step": 13833 }, { "epoch": 0.4, "grad_norm": 7.668281070763558, "learning_rate": 6.8772078812616486e-06, "loss": 0.5472, "step": 13834 }, { "epoch": 0.4, "grad_norm": 7.80280340018554, "learning_rate": 6.876778040472314e-06, "loss": 0.3499, "step": 13835 }, { "epoch": 0.4, "grad_norm": 7.098973774957316, "learning_rate": 6.876348183537291e-06, "loss": 0.6736, "step": 13836 }, { "epoch": 0.4, "grad_norm": 6.698467281445306, "learning_rate": 6.8759183104602746e-06, "loss": 0.5568, "step": 13837 }, { "epoch": 0.4, "grad_norm": 7.976484995137584, "learning_rate": 6.8754884212449635e-06, "loss": 0.5944, "step": 13838 }, { "epoch": 0.4, "grad_norm": 8.235511542102739, "learning_rate": 6.875058515895058e-06, "loss": 0.9732, "step": 13839 }, { "epoch": 0.4, "grad_norm": 12.8605369886468, "learning_rate": 6.874628594414253e-06, "loss": 0.974, "step": 13840 }, { "epoch": 0.4, "grad_norm": 11.94324315311655, "learning_rate": 6.87419865680625e-06, "loss": 0.9711, "step": 13841 }, { "epoch": 0.4, "grad_norm": 3.3165745417262094, "learning_rate": 6.873768703074745e-06, "loss": 0.208, "step": 13842 }, { "epoch": 0.4, "grad_norm": 7.247924343133208, "learning_rate": 6.87333873322344e-06, "loss": 0.6773, "step": 13843 }, { "epoch": 0.4, "grad_norm": 6.841759100354256, "learning_rate": 6.872908747256033e-06, "loss": 0.6018, "step": 13844 }, { "epoch": 0.4, "grad_norm": 11.737883265598773, "learning_rate": 6.872478745176222e-06, "loss": 0.4301, "step": 13845 }, { "epoch": 0.4, "grad_norm": 5.9355149815848876, "learning_rate": 6.872048726987708e-06, "loss": 0.5932, "step": 13846 }, { "epoch": 0.4, "grad_norm": 7.590685598948956, "learning_rate": 6.871618692694188e-06, "loss": 0.6629, "step": 13847 }, { "epoch": 0.4, "grad_norm": 2.269407495482951, "learning_rate": 6.871188642299362e-06, "loss": 0.2522, "step": 13848 }, { "epoch": 0.4, "grad_norm": 4.562693134557343, "learning_rate": 6.870758575806933e-06, "loss": 0.8465, "step": 13849 }, { "epoch": 0.4, "grad_norm": 6.981315574235576, "learning_rate": 6.870328493220596e-06, "loss": 0.5587, "step": 13850 }, { "epoch": 0.4, "grad_norm": 30.14534035635272, "learning_rate": 6.869898394544054e-06, "loss": 0.2694, "step": 13851 }, { "epoch": 0.4, "grad_norm": 4.4130519554145, "learning_rate": 6.8694682797810074e-06, "loss": 0.5964, "step": 13852 }, { "epoch": 0.4, "grad_norm": 8.858429738905052, "learning_rate": 6.8690381489351546e-06, "loss": 0.5603, "step": 13853 }, { "epoch": 0.4, "grad_norm": 3.8107639800322493, "learning_rate": 6.868608002010196e-06, "loss": 0.2991, "step": 13854 }, { "epoch": 0.4, "grad_norm": 2.6778280184765, "learning_rate": 6.868177839009833e-06, "loss": 0.2728, "step": 13855 }, { "epoch": 0.4, "grad_norm": 5.178283754354307, "learning_rate": 6.867747659937768e-06, "loss": 0.5738, "step": 13856 }, { "epoch": 0.4, "grad_norm": 5.306549599392987, "learning_rate": 6.8673174647976985e-06, "loss": 0.526, "step": 13857 }, { "epoch": 0.4, "grad_norm": 3.6448786530095196, "learning_rate": 6.866887253593325e-06, "loss": 0.2975, "step": 13858 }, { "epoch": 0.4, "grad_norm": 4.563355457112025, "learning_rate": 6.866457026328352e-06, "loss": 0.4666, "step": 13859 }, { "epoch": 0.4, "grad_norm": 3.2928304948665326, "learning_rate": 6.86602678300648e-06, "loss": 0.1316, "step": 13860 }, { "epoch": 0.4, "grad_norm": 4.845090643642081, "learning_rate": 6.865596523631408e-06, "loss": 0.3559, "step": 13861 }, { "epoch": 0.4, "grad_norm": 5.828469642082976, "learning_rate": 6.865166248206838e-06, "loss": 0.6225, "step": 13862 }, { "epoch": 0.4, "grad_norm": 8.507804485278738, "learning_rate": 6.864735956736475e-06, "loss": 0.6445, "step": 13863 }, { "epoch": 0.4, "grad_norm": 6.866391080430927, "learning_rate": 6.864305649224017e-06, "loss": 0.4319, "step": 13864 }, { "epoch": 0.4, "grad_norm": 2.8013075010942297, "learning_rate": 6.863875325673166e-06, "loss": 0.1436, "step": 13865 }, { "epoch": 0.4, "grad_norm": 5.376176217483533, "learning_rate": 6.863444986087626e-06, "loss": 0.1995, "step": 13866 }, { "epoch": 0.4, "grad_norm": 140.9425250661037, "learning_rate": 6.863014630471098e-06, "loss": 0.5905, "step": 13867 }, { "epoch": 0.4, "grad_norm": 5.331083578215588, "learning_rate": 6.862584258827284e-06, "loss": 0.4682, "step": 13868 }, { "epoch": 0.4, "grad_norm": 9.837870655565895, "learning_rate": 6.862153871159888e-06, "loss": 0.5471, "step": 13869 }, { "epoch": 0.4, "grad_norm": 7.568329133004094, "learning_rate": 6.861723467472611e-06, "loss": 0.3682, "step": 13870 }, { "epoch": 0.4, "grad_norm": 8.627179630580084, "learning_rate": 6.861293047769156e-06, "loss": 0.3279, "step": 13871 }, { "epoch": 0.4, "grad_norm": 3.6527072990751224, "learning_rate": 6.860862612053226e-06, "loss": 0.2991, "step": 13872 }, { "epoch": 0.4, "grad_norm": 4.375546611971347, "learning_rate": 6.860432160328525e-06, "loss": 0.2387, "step": 13873 }, { "epoch": 0.4, "grad_norm": 10.231092040019963, "learning_rate": 6.860001692598753e-06, "loss": 0.2429, "step": 13874 }, { "epoch": 0.4, "grad_norm": 6.297650481149131, "learning_rate": 6.859571208867617e-06, "loss": 0.3242, "step": 13875 }, { "epoch": 0.4, "grad_norm": 8.279221311432714, "learning_rate": 6.85914070913882e-06, "loss": 0.8931, "step": 13876 }, { "epoch": 0.4, "grad_norm": 6.974129076219133, "learning_rate": 6.858710193416063e-06, "loss": 0.4751, "step": 13877 }, { "epoch": 0.4, "grad_norm": 4.702250123632777, "learning_rate": 6.858279661703051e-06, "loss": 0.1642, "step": 13878 }, { "epoch": 0.4, "grad_norm": 9.142093064533194, "learning_rate": 6.857849114003487e-06, "loss": 0.9129, "step": 13879 }, { "epoch": 0.4, "grad_norm": 4.02769484964407, "learning_rate": 6.857418550321079e-06, "loss": 0.5187, "step": 13880 }, { "epoch": 0.4, "grad_norm": 3.434195108882946, "learning_rate": 6.856987970659525e-06, "loss": 0.5379, "step": 13881 }, { "epoch": 0.4, "grad_norm": 4.855791592165149, "learning_rate": 6.856557375022533e-06, "loss": 0.2738, "step": 13882 }, { "epoch": 0.4, "grad_norm": 9.381058401124644, "learning_rate": 6.856126763413806e-06, "loss": 0.3537, "step": 13883 }, { "epoch": 0.4, "grad_norm": 6.230317097794349, "learning_rate": 6.85569613583705e-06, "loss": 0.6992, "step": 13884 }, { "epoch": 0.4, "grad_norm": 12.962392549428086, "learning_rate": 6.8552654922959686e-06, "loss": 0.7141, "step": 13885 }, { "epoch": 0.4, "grad_norm": 7.838302970285054, "learning_rate": 6.854834832794266e-06, "loss": 0.8784, "step": 13886 }, { "epoch": 0.4, "grad_norm": 8.356582603026698, "learning_rate": 6.8544041573356475e-06, "loss": 0.6858, "step": 13887 }, { "epoch": 0.4, "grad_norm": 13.026215676303732, "learning_rate": 6.853973465923819e-06, "loss": 0.6501, "step": 13888 }, { "epoch": 0.4, "grad_norm": 7.146940481055239, "learning_rate": 6.853542758562483e-06, "loss": 0.5517, "step": 13889 }, { "epoch": 0.4, "grad_norm": 5.549373096423205, "learning_rate": 6.85311203525535e-06, "loss": 0.6429, "step": 13890 }, { "epoch": 0.4, "grad_norm": 4.4516832125770565, "learning_rate": 6.852681296006121e-06, "loss": 0.3348, "step": 13891 }, { "epoch": 0.4, "grad_norm": 8.457219117789181, "learning_rate": 6.852250540818502e-06, "loss": 0.6206, "step": 13892 }, { "epoch": 0.4, "grad_norm": 8.087562994733743, "learning_rate": 6.851819769696201e-06, "loss": 0.3911, "step": 13893 }, { "epoch": 0.4, "grad_norm": 3.9396935665228843, "learning_rate": 6.851388982642921e-06, "loss": 0.2361, "step": 13894 }, { "epoch": 0.4, "grad_norm": 7.813909052620025, "learning_rate": 6.8509581796623714e-06, "loss": 0.4863, "step": 13895 }, { "epoch": 0.4, "grad_norm": 4.463425859204957, "learning_rate": 6.850527360758257e-06, "loss": 0.5274, "step": 13896 }, { "epoch": 0.4, "grad_norm": 10.291083065788637, "learning_rate": 6.8500965259342814e-06, "loss": 0.4389, "step": 13897 }, { "epoch": 0.4, "grad_norm": 5.643606692297486, "learning_rate": 6.8496656751941534e-06, "loss": 0.2723, "step": 13898 }, { "epoch": 0.4, "grad_norm": 2.931903641787927, "learning_rate": 6.84923480854158e-06, "loss": 0.3528, "step": 13899 }, { "epoch": 0.4, "grad_norm": 7.04747030446108, "learning_rate": 6.848803925980266e-06, "loss": 0.5064, "step": 13900 }, { "epoch": 0.4, "grad_norm": 6.646907285137528, "learning_rate": 6.84837302751392e-06, "loss": 0.5618, "step": 13901 }, { "epoch": 0.4, "grad_norm": 11.005230505373794, "learning_rate": 6.8479421131462494e-06, "loss": 0.4437, "step": 13902 }, { "epoch": 0.4, "grad_norm": 6.753592665338408, "learning_rate": 6.847511182880962e-06, "loss": 0.2808, "step": 13903 }, { "epoch": 0.4, "grad_norm": 7.02426448107498, "learning_rate": 6.847080236721759e-06, "loss": 0.4277, "step": 13904 }, { "epoch": 0.4, "grad_norm": 7.4792494773707725, "learning_rate": 6.846649274672355e-06, "loss": 0.2935, "step": 13905 }, { "epoch": 0.4, "grad_norm": 5.836184495144473, "learning_rate": 6.8462182967364546e-06, "loss": 0.1645, "step": 13906 }, { "epoch": 0.4, "grad_norm": 6.558188775233348, "learning_rate": 6.845787302917763e-06, "loss": 0.4967, "step": 13907 }, { "epoch": 0.4, "grad_norm": 4.636193519674052, "learning_rate": 6.845356293219993e-06, "loss": 0.7483, "step": 13908 }, { "epoch": 0.4, "grad_norm": 4.1159532061308965, "learning_rate": 6.844925267646849e-06, "loss": 0.2739, "step": 13909 }, { "epoch": 0.4, "grad_norm": 3.074740451620815, "learning_rate": 6.844494226202042e-06, "loss": 0.1261, "step": 13910 }, { "epoch": 0.4, "grad_norm": 5.504469702572899, "learning_rate": 6.844063168889277e-06, "loss": 0.476, "step": 13911 }, { "epoch": 0.4, "grad_norm": 7.438942008113334, "learning_rate": 6.8436320957122624e-06, "loss": 0.3758, "step": 13912 }, { "epoch": 0.4, "grad_norm": 6.732618133721033, "learning_rate": 6.8432010066747105e-06, "loss": 0.3965, "step": 13913 }, { "epoch": 0.4, "grad_norm": 4.9964366851814, "learning_rate": 6.842769901780325e-06, "loss": 0.6465, "step": 13914 }, { "epoch": 0.4, "grad_norm": 9.88579683741871, "learning_rate": 6.842338781032818e-06, "loss": 0.5237, "step": 13915 }, { "epoch": 0.4, "grad_norm": 6.590507035264628, "learning_rate": 6.8419076444358966e-06, "loss": 0.5341, "step": 13916 }, { "epoch": 0.4, "grad_norm": 10.184748763711028, "learning_rate": 6.84147649199327e-06, "loss": 0.2442, "step": 13917 }, { "epoch": 0.4, "grad_norm": 6.5505317362954205, "learning_rate": 6.8410453237086485e-06, "loss": 0.438, "step": 13918 }, { "epoch": 0.4, "grad_norm": 4.974406447538245, "learning_rate": 6.840614139585741e-06, "loss": 0.371, "step": 13919 }, { "epoch": 0.4, "grad_norm": 4.835573074833157, "learning_rate": 6.840182939628257e-06, "loss": 0.3016, "step": 13920 }, { "epoch": 0.4, "grad_norm": 5.785088270201381, "learning_rate": 6.839751723839905e-06, "loss": 0.5184, "step": 13921 }, { "epoch": 0.4, "grad_norm": 6.31094522496974, "learning_rate": 6.839320492224396e-06, "loss": 0.4084, "step": 13922 }, { "epoch": 0.4, "grad_norm": 5.058406540635764, "learning_rate": 6.838889244785439e-06, "loss": 0.3639, "step": 13923 }, { "epoch": 0.4, "grad_norm": 10.294247698723701, "learning_rate": 6.838457981526744e-06, "loss": 0.6569, "step": 13924 }, { "epoch": 0.4, "grad_norm": 8.094393082334085, "learning_rate": 6.838026702452021e-06, "loss": 0.7732, "step": 13925 }, { "epoch": 0.4, "grad_norm": 3.12604223037574, "learning_rate": 6.837595407564982e-06, "loss": 0.3002, "step": 13926 }, { "epoch": 0.4, "grad_norm": 5.534661507150506, "learning_rate": 6.837164096869335e-06, "loss": 0.5161, "step": 13927 }, { "epoch": 0.4, "grad_norm": 5.542207758403756, "learning_rate": 6.8367327703687916e-06, "loss": 0.7219, "step": 13928 }, { "epoch": 0.4, "grad_norm": 7.864819864585607, "learning_rate": 6.836301428067063e-06, "loss": 0.5717, "step": 13929 }, { "epoch": 0.4, "grad_norm": 4.294273004787707, "learning_rate": 6.835870069967858e-06, "loss": 0.6877, "step": 13930 }, { "epoch": 0.4, "grad_norm": 1.7294662016990137, "learning_rate": 6.83543869607489e-06, "loss": 0.1407, "step": 13931 }, { "epoch": 0.4, "grad_norm": 9.080219650888752, "learning_rate": 6.835007306391868e-06, "loss": 1.1222, "step": 13932 }, { "epoch": 0.4, "grad_norm": 5.364769403384955, "learning_rate": 6.8345759009225044e-06, "loss": 0.6923, "step": 13933 }, { "epoch": 0.4, "grad_norm": 5.813767284760487, "learning_rate": 6.83414447967051e-06, "loss": 0.6812, "step": 13934 }, { "epoch": 0.4, "grad_norm": 10.371529653695395, "learning_rate": 6.833713042639597e-06, "loss": 0.8047, "step": 13935 }, { "epoch": 0.4, "grad_norm": 7.523147973563255, "learning_rate": 6.833281589833476e-06, "loss": 0.5977, "step": 13936 }, { "epoch": 0.4, "grad_norm": 4.5108887012089784, "learning_rate": 6.83285012125586e-06, "loss": 0.4986, "step": 13937 }, { "epoch": 0.4, "grad_norm": 4.660183109484231, "learning_rate": 6.83241863691046e-06, "loss": 0.8278, "step": 13938 }, { "epoch": 0.4, "grad_norm": 4.040832367876792, "learning_rate": 6.831987136800986e-06, "loss": 0.3377, "step": 13939 }, { "epoch": 0.4, "grad_norm": 4.821145132604201, "learning_rate": 6.831555620931154e-06, "loss": 0.6406, "step": 13940 }, { "epoch": 0.4, "grad_norm": 2.5247251455449313, "learning_rate": 6.8311240893046745e-06, "loss": 0.1156, "step": 13941 }, { "epoch": 0.4, "grad_norm": 10.972174563619603, "learning_rate": 6.830692541925259e-06, "loss": 0.5666, "step": 13942 }, { "epoch": 0.4, "grad_norm": 5.424895882376475, "learning_rate": 6.830260978796622e-06, "loss": 0.5685, "step": 13943 }, { "epoch": 0.4, "grad_norm": 9.753054604953121, "learning_rate": 6.8298293999224755e-06, "loss": 0.5031, "step": 13944 }, { "epoch": 0.4, "grad_norm": 6.596059024280668, "learning_rate": 6.82939780530653e-06, "loss": 0.4809, "step": 13945 }, { "epoch": 0.4, "grad_norm": 7.178131001212136, "learning_rate": 6.828966194952502e-06, "loss": 0.7158, "step": 13946 }, { "epoch": 0.4, "grad_norm": 5.26986700053024, "learning_rate": 6.828534568864104e-06, "loss": 0.5115, "step": 13947 }, { "epoch": 0.4, "grad_norm": 7.368150213290496, "learning_rate": 6.828102927045046e-06, "loss": 0.9709, "step": 13948 }, { "epoch": 0.4, "grad_norm": 8.204714143412302, "learning_rate": 6.827671269499044e-06, "loss": 0.6328, "step": 13949 }, { "epoch": 0.4, "grad_norm": 5.370235549335493, "learning_rate": 6.8272395962298124e-06, "loss": 0.4013, "step": 13950 }, { "epoch": 0.4, "grad_norm": 6.615293697950534, "learning_rate": 6.826807907241062e-06, "loss": 0.7796, "step": 13951 }, { "epoch": 0.4, "grad_norm": 8.084059610544365, "learning_rate": 6.826376202536508e-06, "loss": 0.6713, "step": 13952 }, { "epoch": 0.4, "grad_norm": 6.338406120696188, "learning_rate": 6.8259444821198665e-06, "loss": 0.3143, "step": 13953 }, { "epoch": 0.4, "grad_norm": 6.361153318810211, "learning_rate": 6.825512745994849e-06, "loss": 0.4388, "step": 13954 }, { "epoch": 0.4, "grad_norm": 4.705333964041796, "learning_rate": 6.8250809941651684e-06, "loss": 0.4983, "step": 13955 }, { "epoch": 0.4, "grad_norm": 3.274314289524102, "learning_rate": 6.824649226634542e-06, "loss": 0.285, "step": 13956 }, { "epoch": 0.4, "grad_norm": 9.800338984484947, "learning_rate": 6.824217443406682e-06, "loss": 0.7412, "step": 13957 }, { "epoch": 0.4, "grad_norm": 4.155208342386346, "learning_rate": 6.823785644485305e-06, "loss": 0.4702, "step": 13958 }, { "epoch": 0.4, "grad_norm": 5.765555570024139, "learning_rate": 6.823353829874124e-06, "loss": 0.7989, "step": 13959 }, { "epoch": 0.4, "grad_norm": 2.398441824148051, "learning_rate": 6.822921999576855e-06, "loss": 0.0661, "step": 13960 }, { "epoch": 0.4, "grad_norm": 4.77515826137769, "learning_rate": 6.822490153597212e-06, "loss": 0.6531, "step": 13961 }, { "epoch": 0.4, "grad_norm": 13.625421570021803, "learning_rate": 6.822058291938911e-06, "loss": 0.7974, "step": 13962 }, { "epoch": 0.4, "grad_norm": 8.095841362190551, "learning_rate": 6.821626414605666e-06, "loss": 0.6304, "step": 13963 }, { "epoch": 0.4, "grad_norm": 8.301399864931426, "learning_rate": 6.821194521601194e-06, "loss": 0.2373, "step": 13964 }, { "epoch": 0.4, "grad_norm": 4.9542109264752705, "learning_rate": 6.820762612929209e-06, "loss": 0.3386, "step": 13965 }, { "epoch": 0.4, "grad_norm": 3.8964855681682855, "learning_rate": 6.820330688593428e-06, "loss": 0.3732, "step": 13966 }, { "epoch": 0.4, "grad_norm": 6.894359865300802, "learning_rate": 6.819898748597566e-06, "loss": 0.4655, "step": 13967 }, { "epoch": 0.4, "grad_norm": 4.382569250200542, "learning_rate": 6.81946679294534e-06, "loss": 0.9004, "step": 13968 }, { "epoch": 0.4, "grad_norm": 3.735838567364342, "learning_rate": 6.819034821640465e-06, "loss": 0.3436, "step": 13969 }, { "epoch": 0.4, "grad_norm": 6.228895796007862, "learning_rate": 6.8186028346866576e-06, "loss": 0.6845, "step": 13970 }, { "epoch": 0.4, "grad_norm": 2.3336572521599934, "learning_rate": 6.818170832087633e-06, "loss": 0.5039, "step": 13971 }, { "epoch": 0.4, "grad_norm": 6.883597806540343, "learning_rate": 6.8177388138471075e-06, "loss": 0.5281, "step": 13972 }, { "epoch": 0.4, "grad_norm": 5.824729810023099, "learning_rate": 6.817306779968801e-06, "loss": 0.5553, "step": 13973 }, { "epoch": 0.4, "grad_norm": 7.150520129725553, "learning_rate": 6.816874730456427e-06, "loss": 0.6807, "step": 13974 }, { "epoch": 0.4, "grad_norm": 6.805867562526911, "learning_rate": 6.816442665313703e-06, "loss": 0.7937, "step": 13975 }, { "epoch": 0.4, "grad_norm": 12.409557756755948, "learning_rate": 6.816010584544347e-06, "loss": 0.4409, "step": 13976 }, { "epoch": 0.4, "grad_norm": 7.79371337392563, "learning_rate": 6.815578488152077e-06, "loss": 0.6315, "step": 13977 }, { "epoch": 0.4, "grad_norm": 7.906676239465684, "learning_rate": 6.8151463761406065e-06, "loss": 0.5235, "step": 13978 }, { "epoch": 0.4, "grad_norm": 6.7907657571837365, "learning_rate": 6.814714248513656e-06, "loss": 0.6913, "step": 13979 }, { "epoch": 0.4, "grad_norm": 3.7084776103781487, "learning_rate": 6.814282105274942e-06, "loss": 0.6076, "step": 13980 }, { "epoch": 0.4, "grad_norm": 6.046790819789638, "learning_rate": 6.813849946428182e-06, "loss": 0.3751, "step": 13981 }, { "epoch": 0.4, "grad_norm": 4.744138514273666, "learning_rate": 6.813417771977096e-06, "loss": 0.6201, "step": 13982 }, { "epoch": 0.4, "grad_norm": 9.081108194019832, "learning_rate": 6.8129855819253995e-06, "loss": 0.5047, "step": 13983 }, { "epoch": 0.4, "grad_norm": 6.980354466113824, "learning_rate": 6.8125533762768104e-06, "loss": 0.6606, "step": 13984 }, { "epoch": 0.4, "grad_norm": 7.5111363066614985, "learning_rate": 6.812121155035049e-06, "loss": 0.6735, "step": 13985 }, { "epoch": 0.4, "grad_norm": 3.4950221630479734, "learning_rate": 6.81168891820383e-06, "loss": 0.1928, "step": 13986 }, { "epoch": 0.4, "grad_norm": 10.66240579221369, "learning_rate": 6.811256665786878e-06, "loss": 0.9211, "step": 13987 }, { "epoch": 0.4, "grad_norm": 2.6299501021623177, "learning_rate": 6.810824397787905e-06, "loss": 0.1494, "step": 13988 }, { "epoch": 0.4, "grad_norm": 6.076452343806199, "learning_rate": 6.810392114210633e-06, "loss": 0.5441, "step": 13989 }, { "epoch": 0.4, "grad_norm": 3.9832961173826247, "learning_rate": 6.809959815058782e-06, "loss": 0.6374, "step": 13990 }, { "epoch": 0.4, "grad_norm": 6.273529356097541, "learning_rate": 6.809527500336068e-06, "loss": 0.5088, "step": 13991 }, { "epoch": 0.4, "grad_norm": 2.9173461985316025, "learning_rate": 6.809095170046211e-06, "loss": 0.2958, "step": 13992 }, { "epoch": 0.4, "grad_norm": 3.6961477678472336, "learning_rate": 6.808662824192933e-06, "loss": 0.2831, "step": 13993 }, { "epoch": 0.4, "grad_norm": 15.311056512797736, "learning_rate": 6.808230462779952e-06, "loss": 0.6333, "step": 13994 }, { "epoch": 0.4, "grad_norm": 3.137652380537093, "learning_rate": 6.807798085810986e-06, "loss": 0.3222, "step": 13995 }, { "epoch": 0.4, "grad_norm": 5.226534308797989, "learning_rate": 6.807365693289755e-06, "loss": 0.3943, "step": 13996 }, { "epoch": 0.4, "grad_norm": 7.208522239223694, "learning_rate": 6.806933285219981e-06, "loss": 0.8468, "step": 13997 }, { "epoch": 0.4, "grad_norm": 4.458165765744673, "learning_rate": 6.80650086160538e-06, "loss": 0.1395, "step": 13998 }, { "epoch": 0.4, "grad_norm": 4.119941065700081, "learning_rate": 6.8060684224496745e-06, "loss": 0.1026, "step": 13999 }, { "epoch": 0.4, "grad_norm": 6.6627035600150855, "learning_rate": 6.805635967756588e-06, "loss": 0.7351, "step": 14000 }, { "epoch": 0.4, "grad_norm": 6.1090132662325765, "learning_rate": 6.805203497529835e-06, "loss": 0.741, "step": 14001 }, { "epoch": 0.4, "grad_norm": 4.594401306899022, "learning_rate": 6.804771011773139e-06, "loss": 0.314, "step": 14002 }, { "epoch": 0.4, "grad_norm": 5.80780249659693, "learning_rate": 6.804338510490221e-06, "loss": 0.3914, "step": 14003 }, { "epoch": 0.4, "grad_norm": 5.885633969866725, "learning_rate": 6.803905993684801e-06, "loss": 0.4601, "step": 14004 }, { "epoch": 0.4, "grad_norm": 4.715681695312894, "learning_rate": 6.8034734613605975e-06, "loss": 0.755, "step": 14005 }, { "epoch": 0.4, "grad_norm": 9.498578918980595, "learning_rate": 6.803040913521335e-06, "loss": 1.119, "step": 14006 }, { "epoch": 0.4, "grad_norm": 4.13145700014025, "learning_rate": 6.802608350170734e-06, "loss": 0.1773, "step": 14007 }, { "epoch": 0.4, "grad_norm": 6.375205148874138, "learning_rate": 6.8021757713125155e-06, "loss": 0.4074, "step": 14008 }, { "epoch": 0.4, "grad_norm": 4.247535608152472, "learning_rate": 6.8017431769504e-06, "loss": 0.5051, "step": 14009 }, { "epoch": 0.4, "grad_norm": 4.648900010636809, "learning_rate": 6.80131056708811e-06, "loss": 0.3843, "step": 14010 }, { "epoch": 0.4, "grad_norm": 5.408316355160875, "learning_rate": 6.800877941729369e-06, "loss": 0.4432, "step": 14011 }, { "epoch": 0.4, "grad_norm": 5.2270158191921015, "learning_rate": 6.800445300877895e-06, "loss": 0.7993, "step": 14012 }, { "epoch": 0.4, "grad_norm": 10.283956432373394, "learning_rate": 6.800012644537412e-06, "loss": 0.2235, "step": 14013 }, { "epoch": 0.4, "grad_norm": 11.936032030210907, "learning_rate": 6.7995799727116405e-06, "loss": 1.002, "step": 14014 }, { "epoch": 0.4, "grad_norm": 3.0487294544624084, "learning_rate": 6.799147285404306e-06, "loss": 0.3513, "step": 14015 }, { "epoch": 0.4, "grad_norm": 4.423896475762843, "learning_rate": 6.798714582619128e-06, "loss": 0.4641, "step": 14016 }, { "epoch": 0.4, "grad_norm": 9.98189450585536, "learning_rate": 6.798281864359831e-06, "loss": 0.7303, "step": 14017 }, { "epoch": 0.4, "grad_norm": 5.945191631517139, "learning_rate": 6.797849130630135e-06, "loss": 0.4612, "step": 14018 }, { "epoch": 0.4, "grad_norm": 12.092570994075873, "learning_rate": 6.797416381433766e-06, "loss": 0.6703, "step": 14019 }, { "epoch": 0.4, "grad_norm": 4.084141849088789, "learning_rate": 6.796983616774443e-06, "loss": 0.3411, "step": 14020 }, { "epoch": 0.4, "grad_norm": 5.3063680377317946, "learning_rate": 6.796550836655893e-06, "loss": 0.7793, "step": 14021 }, { "epoch": 0.4, "grad_norm": 9.292018384791302, "learning_rate": 6.796118041081836e-06, "loss": 0.2915, "step": 14022 }, { "epoch": 0.4, "grad_norm": 8.83531271751856, "learning_rate": 6.795685230055997e-06, "loss": 0.5525, "step": 14023 }, { "epoch": 0.4, "grad_norm": 8.100722678641688, "learning_rate": 6.7952524035821e-06, "loss": 0.5619, "step": 14024 }, { "epoch": 0.4, "grad_norm": 12.921817470362507, "learning_rate": 6.794819561663867e-06, "loss": 0.3846, "step": 14025 }, { "epoch": 0.4, "grad_norm": 6.087964955458364, "learning_rate": 6.794386704305021e-06, "loss": 0.9254, "step": 14026 }, { "epoch": 0.4, "grad_norm": 7.665725994105736, "learning_rate": 6.79395383150929e-06, "loss": 0.2779, "step": 14027 }, { "epoch": 0.4, "grad_norm": 9.314452715453964, "learning_rate": 6.793520943280393e-06, "loss": 0.4683, "step": 14028 }, { "epoch": 0.4, "grad_norm": 6.46821190259158, "learning_rate": 6.7930880396220565e-06, "loss": 0.8842, "step": 14029 }, { "epoch": 0.4, "grad_norm": 6.356228886722095, "learning_rate": 6.792655120538004e-06, "loss": 0.6965, "step": 14030 }, { "epoch": 0.4, "grad_norm": 5.048731176157553, "learning_rate": 6.79222218603196e-06, "loss": 0.4905, "step": 14031 }, { "epoch": 0.4, "grad_norm": 7.634431353004638, "learning_rate": 6.7917892361076505e-06, "loss": 0.8993, "step": 14032 }, { "epoch": 0.4, "grad_norm": 3.2445017632573556, "learning_rate": 6.791356270768798e-06, "loss": 0.3651, "step": 14033 }, { "epoch": 0.4, "grad_norm": 3.990290659592941, "learning_rate": 6.7909232900191295e-06, "loss": 0.5913, "step": 14034 }, { "epoch": 0.4, "grad_norm": 2.4013688505150683, "learning_rate": 6.790490293862367e-06, "loss": 0.1394, "step": 14035 }, { "epoch": 0.4, "grad_norm": 5.056622048023765, "learning_rate": 6.790057282302236e-06, "loss": 0.2348, "step": 14036 }, { "epoch": 0.4, "grad_norm": 6.067551857782099, "learning_rate": 6.789624255342464e-06, "loss": 0.6325, "step": 14037 }, { "epoch": 0.4, "grad_norm": 2.9259046011618, "learning_rate": 6.789191212986776e-06, "loss": 0.5533, "step": 14038 }, { "epoch": 0.4, "grad_norm": 7.110757881901851, "learning_rate": 6.788758155238893e-06, "loss": 0.2492, "step": 14039 }, { "epoch": 0.4, "grad_norm": 6.724409407878556, "learning_rate": 6.788325082102546e-06, "loss": 0.6443, "step": 14040 }, { "epoch": 0.4, "grad_norm": 1.4664239345819419, "learning_rate": 6.787891993581459e-06, "loss": 0.1118, "step": 14041 }, { "epoch": 0.4, "grad_norm": 3.111033510099386, "learning_rate": 6.787458889679356e-06, "loss": 0.2432, "step": 14042 }, { "epoch": 0.4, "grad_norm": 5.984200537307321, "learning_rate": 6.787025770399964e-06, "loss": 0.5091, "step": 14043 }, { "epoch": 0.4, "grad_norm": 4.278672298536065, "learning_rate": 6.786592635747011e-06, "loss": 0.59, "step": 14044 }, { "epoch": 0.4, "grad_norm": 4.5497898053255215, "learning_rate": 6.786159485724221e-06, "loss": 0.9599, "step": 14045 }, { "epoch": 0.4, "grad_norm": 6.539901551955132, "learning_rate": 6.78572632033532e-06, "loss": 0.1812, "step": 14046 }, { "epoch": 0.4, "grad_norm": 5.9240938322632495, "learning_rate": 6.785293139584036e-06, "loss": 0.5154, "step": 14047 }, { "epoch": 0.4, "grad_norm": 2.568775865559167, "learning_rate": 6.784859943474095e-06, "loss": 0.2698, "step": 14048 }, { "epoch": 0.4, "grad_norm": 5.335584100997942, "learning_rate": 6.784426732009223e-06, "loss": 0.216, "step": 14049 }, { "epoch": 0.4, "grad_norm": 3.7643463369789134, "learning_rate": 6.7839935051931475e-06, "loss": 0.5096, "step": 14050 }, { "epoch": 0.4, "grad_norm": 5.317978132566647, "learning_rate": 6.783560263029597e-06, "loss": 0.2537, "step": 14051 }, { "epoch": 0.4, "grad_norm": 7.589252380169662, "learning_rate": 6.783127005522296e-06, "loss": 0.5678, "step": 14052 }, { "epoch": 0.4, "grad_norm": 6.160614325618718, "learning_rate": 6.782693732674972e-06, "loss": 0.1536, "step": 14053 }, { "epoch": 0.4, "grad_norm": 6.396048112508185, "learning_rate": 6.782260444491356e-06, "loss": 0.8318, "step": 14054 }, { "epoch": 0.4, "grad_norm": 6.624831215489798, "learning_rate": 6.78182714097517e-06, "loss": 0.416, "step": 14055 }, { "epoch": 0.4, "grad_norm": 8.564528176338776, "learning_rate": 6.781393822130145e-06, "loss": 0.5144, "step": 14056 }, { "epoch": 0.4, "grad_norm": 8.201989900074423, "learning_rate": 6.78096048796001e-06, "loss": 0.4003, "step": 14057 }, { "epoch": 0.4, "grad_norm": 6.743927378615306, "learning_rate": 6.78052713846849e-06, "loss": 0.3917, "step": 14058 }, { "epoch": 0.4, "grad_norm": 6.0838239376167405, "learning_rate": 6.780093773659314e-06, "loss": 0.3049, "step": 14059 }, { "epoch": 0.4, "grad_norm": 5.8081475235906534, "learning_rate": 6.77966039353621e-06, "loss": 0.7126, "step": 14060 }, { "epoch": 0.4, "grad_norm": 4.3639371920199626, "learning_rate": 6.779226998102909e-06, "loss": 0.3447, "step": 14061 }, { "epoch": 0.4, "grad_norm": 6.407471926386159, "learning_rate": 6.7787935873631365e-06, "loss": 0.1494, "step": 14062 }, { "epoch": 0.4, "grad_norm": 12.333013476700389, "learning_rate": 6.77836016132062e-06, "loss": 0.8831, "step": 14063 }, { "epoch": 0.4, "grad_norm": 4.994434907468225, "learning_rate": 6.777926719979091e-06, "loss": 0.4385, "step": 14064 }, { "epoch": 0.4, "grad_norm": 10.701585377688076, "learning_rate": 6.777493263342277e-06, "loss": 0.5556, "step": 14065 }, { "epoch": 0.4, "grad_norm": 7.230587192185627, "learning_rate": 6.777059791413908e-06, "loss": 0.5435, "step": 14066 }, { "epoch": 0.4, "grad_norm": 7.092769202097558, "learning_rate": 6.776626304197712e-06, "loss": 0.3019, "step": 14067 }, { "epoch": 0.4, "grad_norm": 4.198319818844206, "learning_rate": 6.776192801697419e-06, "loss": 0.2513, "step": 14068 }, { "epoch": 0.4, "grad_norm": 5.098375829428305, "learning_rate": 6.775759283916758e-06, "loss": 0.4301, "step": 14069 }, { "epoch": 0.4, "grad_norm": 3.910754787735501, "learning_rate": 6.775325750859459e-06, "loss": 0.3448, "step": 14070 }, { "epoch": 0.4, "grad_norm": 5.4645091589041925, "learning_rate": 6.77489220252925e-06, "loss": 0.5022, "step": 14071 }, { "epoch": 0.4, "grad_norm": 4.012490113861169, "learning_rate": 6.774458638929862e-06, "loss": 0.4554, "step": 14072 }, { "epoch": 0.4, "grad_norm": 6.2037355521266555, "learning_rate": 6.774025060065025e-06, "loss": 0.3604, "step": 14073 }, { "epoch": 0.4, "grad_norm": 5.872906941052526, "learning_rate": 6.773591465938469e-06, "loss": 0.66, "step": 14074 }, { "epoch": 0.4, "grad_norm": 2.934254578220774, "learning_rate": 6.773157856553925e-06, "loss": 0.2887, "step": 14075 }, { "epoch": 0.4, "grad_norm": 3.9641457229797696, "learning_rate": 6.77272423191512e-06, "loss": 0.2649, "step": 14076 }, { "epoch": 0.4, "grad_norm": 10.630727278985365, "learning_rate": 6.772290592025787e-06, "loss": 0.4884, "step": 14077 }, { "epoch": 0.4, "grad_norm": 7.780880363907546, "learning_rate": 6.771856936889659e-06, "loss": 0.3748, "step": 14078 }, { "epoch": 0.4, "grad_norm": 7.334625874386785, "learning_rate": 6.7714232665104614e-06, "loss": 0.8913, "step": 14079 }, { "epoch": 0.4, "grad_norm": 4.773625896126235, "learning_rate": 6.770989580891928e-06, "loss": 0.2238, "step": 14080 }, { "epoch": 0.4, "grad_norm": 11.987796618279377, "learning_rate": 6.7705558800377904e-06, "loss": 0.8662, "step": 14081 }, { "epoch": 0.4, "grad_norm": 6.18579783290396, "learning_rate": 6.770122163951777e-06, "loss": 0.5427, "step": 14082 }, { "epoch": 0.4, "grad_norm": 4.221058898928764, "learning_rate": 6.769688432637621e-06, "loss": 0.1966, "step": 14083 }, { "epoch": 0.4, "grad_norm": 6.7577751379137885, "learning_rate": 6.769254686099054e-06, "loss": 0.2509, "step": 14084 }, { "epoch": 0.4, "grad_norm": 9.23193678318904, "learning_rate": 6.768820924339807e-06, "loss": 0.5817, "step": 14085 }, { "epoch": 0.4, "grad_norm": 6.687651445331765, "learning_rate": 6.768387147363611e-06, "loss": 0.3973, "step": 14086 }, { "epoch": 0.4, "grad_norm": 6.531923323242567, "learning_rate": 6.767953355174197e-06, "loss": 0.5924, "step": 14087 }, { "epoch": 0.4, "grad_norm": 3.980790182355848, "learning_rate": 6.7675195477752984e-06, "loss": 0.5383, "step": 14088 }, { "epoch": 0.4, "grad_norm": 5.815007110027189, "learning_rate": 6.767085725170646e-06, "loss": 0.7301, "step": 14089 }, { "epoch": 0.4, "grad_norm": 8.159741177007064, "learning_rate": 6.7666518873639756e-06, "loss": 0.4984, "step": 14090 }, { "epoch": 0.4, "grad_norm": 5.27693636639751, "learning_rate": 6.766218034359015e-06, "loss": 0.3174, "step": 14091 }, { "epoch": 0.4, "grad_norm": 7.571092720885852, "learning_rate": 6.765784166159497e-06, "loss": 0.2542, "step": 14092 }, { "epoch": 0.4, "grad_norm": 3.1114970111688516, "learning_rate": 6.765350282769156e-06, "loss": 0.3694, "step": 14093 }, { "epoch": 0.4, "grad_norm": 2.9405812680826573, "learning_rate": 6.764916384191725e-06, "loss": 0.1277, "step": 14094 }, { "epoch": 0.4, "grad_norm": 6.300521646580362, "learning_rate": 6.764482470430934e-06, "loss": 0.4694, "step": 14095 }, { "epoch": 0.4, "grad_norm": 7.472361565306573, "learning_rate": 6.764048541490517e-06, "loss": 0.6525, "step": 14096 }, { "epoch": 0.4, "grad_norm": 1.5205965206114962, "learning_rate": 6.763614597374208e-06, "loss": 0.0842, "step": 14097 }, { "epoch": 0.4, "grad_norm": 4.812835830266336, "learning_rate": 6.763180638085742e-06, "loss": 0.3463, "step": 14098 }, { "epoch": 0.4, "grad_norm": 6.217912742188999, "learning_rate": 6.762746663628848e-06, "loss": 0.505, "step": 14099 }, { "epoch": 0.4, "grad_norm": 8.22764657060695, "learning_rate": 6.762312674007263e-06, "loss": 0.6559, "step": 14100 }, { "epoch": 0.4, "grad_norm": 4.394173325267666, "learning_rate": 6.761878669224719e-06, "loss": 0.479, "step": 14101 }, { "epoch": 0.4, "grad_norm": 4.050357160007832, "learning_rate": 6.7614446492849485e-06, "loss": 0.2713, "step": 14102 }, { "epoch": 0.4, "grad_norm": 4.000697015591807, "learning_rate": 6.761010614191687e-06, "loss": 0.3201, "step": 14103 }, { "epoch": 0.4, "grad_norm": 8.942181514862634, "learning_rate": 6.7605765639486675e-06, "loss": 0.9975, "step": 14104 }, { "epoch": 0.4, "grad_norm": 10.858193268730124, "learning_rate": 6.760142498559625e-06, "loss": 0.541, "step": 14105 }, { "epoch": 0.4, "grad_norm": 3.5358205940316174, "learning_rate": 6.759708418028292e-06, "loss": 0.4939, "step": 14106 }, { "epoch": 0.4, "grad_norm": 4.076352724560791, "learning_rate": 6.759274322358406e-06, "loss": 0.5291, "step": 14107 }, { "epoch": 0.4, "grad_norm": 3.6110536709317085, "learning_rate": 6.7588402115537e-06, "loss": 0.4192, "step": 14108 }, { "epoch": 0.4, "grad_norm": 13.19362407788678, "learning_rate": 6.758406085617907e-06, "loss": 0.8184, "step": 14109 }, { "epoch": 0.4, "grad_norm": 4.489684573608218, "learning_rate": 6.757971944554763e-06, "loss": 0.4834, "step": 14110 }, { "epoch": 0.4, "grad_norm": 3.442640146146756, "learning_rate": 6.757537788368002e-06, "loss": 0.4636, "step": 14111 }, { "epoch": 0.4, "grad_norm": 7.271777954240881, "learning_rate": 6.757103617061362e-06, "loss": 0.6558, "step": 14112 }, { "epoch": 0.4, "grad_norm": 11.991018112773384, "learning_rate": 6.756669430638574e-06, "loss": 0.6697, "step": 14113 }, { "epoch": 0.4, "grad_norm": 4.999703303117262, "learning_rate": 6.756235229103377e-06, "loss": 0.5599, "step": 14114 }, { "epoch": 0.4, "grad_norm": 3.463931646357621, "learning_rate": 6.755801012459503e-06, "loss": 0.5526, "step": 14115 }, { "epoch": 0.4, "grad_norm": 6.884528544787695, "learning_rate": 6.755366780710689e-06, "loss": 0.5662, "step": 14116 }, { "epoch": 0.4, "grad_norm": 4.689559687460632, "learning_rate": 6.7549325338606715e-06, "loss": 0.4641, "step": 14117 }, { "epoch": 0.4, "grad_norm": 9.437864789165209, "learning_rate": 6.754498271913186e-06, "loss": 0.6609, "step": 14118 }, { "epoch": 0.4, "grad_norm": 9.052746364746055, "learning_rate": 6.754063994871967e-06, "loss": 0.405, "step": 14119 }, { "epoch": 0.4, "grad_norm": 3.746166240480026, "learning_rate": 6.753629702740752e-06, "loss": 0.6561, "step": 14120 }, { "epoch": 0.4, "grad_norm": 5.613278871454785, "learning_rate": 6.753195395523275e-06, "loss": 0.324, "step": 14121 }, { "epoch": 0.4, "grad_norm": 4.836472193626685, "learning_rate": 6.752761073223275e-06, "loss": 0.4694, "step": 14122 }, { "epoch": 0.4, "grad_norm": 3.619883642697638, "learning_rate": 6.752326735844487e-06, "loss": 0.394, "step": 14123 }, { "epoch": 0.4, "grad_norm": 5.427957909734999, "learning_rate": 6.751892383390648e-06, "loss": 0.2317, "step": 14124 }, { "epoch": 0.4, "grad_norm": 5.173985214987895, "learning_rate": 6.751458015865496e-06, "loss": 0.2628, "step": 14125 }, { "epoch": 0.4, "grad_norm": 4.466396805928582, "learning_rate": 6.751023633272764e-06, "loss": 0.5362, "step": 14126 }, { "epoch": 0.4, "grad_norm": 6.9508253773792115, "learning_rate": 6.750589235616193e-06, "loss": 0.3231, "step": 14127 }, { "epoch": 0.4, "grad_norm": 5.657772133384807, "learning_rate": 6.7501548228995185e-06, "loss": 0.3872, "step": 14128 }, { "epoch": 0.4, "grad_norm": 10.353957334637215, "learning_rate": 6.749720395126476e-06, "loss": 0.7615, "step": 14129 }, { "epoch": 0.4, "grad_norm": 3.6512167431062155, "learning_rate": 6.749285952300804e-06, "loss": 0.5788, "step": 14130 }, { "epoch": 0.4, "grad_norm": 5.004453344759717, "learning_rate": 6.7488514944262425e-06, "loss": 0.6281, "step": 14131 }, { "epoch": 0.4, "grad_norm": 4.292929263015765, "learning_rate": 6.748417021506526e-06, "loss": 0.2964, "step": 14132 }, { "epoch": 0.4, "grad_norm": 9.46117206159865, "learning_rate": 6.747982533545392e-06, "loss": 0.5263, "step": 14133 }, { "epoch": 0.4, "grad_norm": 6.151499430531994, "learning_rate": 6.74754803054658e-06, "loss": 0.376, "step": 14134 }, { "epoch": 0.4, "grad_norm": 8.597893842909178, "learning_rate": 6.747113512513829e-06, "loss": 0.3974, "step": 14135 }, { "epoch": 0.4, "grad_norm": 7.668713697178107, "learning_rate": 6.746678979450875e-06, "loss": 0.685, "step": 14136 }, { "epoch": 0.4, "grad_norm": 12.67520869156622, "learning_rate": 6.746244431361455e-06, "loss": 0.2128, "step": 14137 }, { "epoch": 0.4, "grad_norm": 5.141536226065759, "learning_rate": 6.745809868249311e-06, "loss": 0.5192, "step": 14138 }, { "epoch": 0.4, "grad_norm": 4.825513423523529, "learning_rate": 6.745375290118179e-06, "loss": 0.4345, "step": 14139 }, { "epoch": 0.4, "grad_norm": 4.804406171796583, "learning_rate": 6.744940696971798e-06, "loss": 0.3094, "step": 14140 }, { "epoch": 0.4, "grad_norm": 7.043315622903511, "learning_rate": 6.744506088813907e-06, "loss": 0.4978, "step": 14141 }, { "epoch": 0.4, "grad_norm": 3.312361588375396, "learning_rate": 6.744071465648245e-06, "loss": 0.3761, "step": 14142 }, { "epoch": 0.41, "grad_norm": 4.992288866043764, "learning_rate": 6.7436368274785516e-06, "loss": 0.5161, "step": 14143 }, { "epoch": 0.41, "grad_norm": 7.499192257935809, "learning_rate": 6.743202174308565e-06, "loss": 0.4189, "step": 14144 }, { "epoch": 0.41, "grad_norm": 3.399300792267765, "learning_rate": 6.742767506142025e-06, "loss": 0.1179, "step": 14145 }, { "epoch": 0.41, "grad_norm": 28.518617204419243, "learning_rate": 6.742332822982669e-06, "loss": 0.6107, "step": 14146 }, { "epoch": 0.41, "grad_norm": 4.554722676214937, "learning_rate": 6.7418981248342395e-06, "loss": 0.5869, "step": 14147 }, { "epoch": 0.41, "grad_norm": 6.872546538482988, "learning_rate": 6.741463411700475e-06, "loss": 0.2591, "step": 14148 }, { "epoch": 0.41, "grad_norm": 11.29995031430284, "learning_rate": 6.741028683585115e-06, "loss": 0.3629, "step": 14149 }, { "epoch": 0.41, "grad_norm": 4.0750445427829725, "learning_rate": 6.7405939404919e-06, "loss": 0.4384, "step": 14150 }, { "epoch": 0.41, "grad_norm": 3.15950500605664, "learning_rate": 6.740159182424569e-06, "loss": 0.356, "step": 14151 }, { "epoch": 0.41, "grad_norm": 4.753568588715373, "learning_rate": 6.7397244093868645e-06, "loss": 0.593, "step": 14152 }, { "epoch": 0.41, "grad_norm": 3.903055059388171, "learning_rate": 6.739289621382523e-06, "loss": 0.2071, "step": 14153 }, { "epoch": 0.41, "grad_norm": 5.382163408672133, "learning_rate": 6.738854818415287e-06, "loss": 0.4513, "step": 14154 }, { "epoch": 0.41, "grad_norm": 7.511164493515304, "learning_rate": 6.738420000488899e-06, "loss": 0.4648, "step": 14155 }, { "epoch": 0.41, "grad_norm": 6.662500088747178, "learning_rate": 6.737985167607096e-06, "loss": 0.5638, "step": 14156 }, { "epoch": 0.41, "grad_norm": 2.5343979699238077, "learning_rate": 6.737550319773621e-06, "loss": 0.3373, "step": 14157 }, { "epoch": 0.41, "grad_norm": 2.3239180169967897, "learning_rate": 6.737115456992216e-06, "loss": 0.3218, "step": 14158 }, { "epoch": 0.41, "grad_norm": 8.418769900971744, "learning_rate": 6.736680579266619e-06, "loss": 0.6212, "step": 14159 }, { "epoch": 0.41, "grad_norm": 6.4930708521789136, "learning_rate": 6.736245686600573e-06, "loss": 0.3973, "step": 14160 }, { "epoch": 0.41, "grad_norm": 2.8868615679068133, "learning_rate": 6.735810778997819e-06, "loss": 0.2744, "step": 14161 }, { "epoch": 0.41, "grad_norm": 6.851430371879769, "learning_rate": 6.735375856462098e-06, "loss": 0.6026, "step": 14162 }, { "epoch": 0.41, "grad_norm": 4.394101595749778, "learning_rate": 6.7349409189971535e-06, "loss": 0.2894, "step": 14163 }, { "epoch": 0.41, "grad_norm": 6.0632112616731755, "learning_rate": 6.734505966606726e-06, "loss": 0.4696, "step": 14164 }, { "epoch": 0.41, "grad_norm": 5.249477337751332, "learning_rate": 6.7340709992945565e-06, "loss": 0.2834, "step": 14165 }, { "epoch": 0.41, "grad_norm": 6.0354253122541905, "learning_rate": 6.733636017064388e-06, "loss": 0.827, "step": 14166 }, { "epoch": 0.41, "grad_norm": 5.70874361201982, "learning_rate": 6.733201019919961e-06, "loss": 0.6554, "step": 14167 }, { "epoch": 0.41, "grad_norm": 5.1884766544031145, "learning_rate": 6.7327660078650216e-06, "loss": 0.59, "step": 14168 }, { "epoch": 0.41, "grad_norm": 9.513790786930397, "learning_rate": 6.732330980903307e-06, "loss": 0.8139, "step": 14169 }, { "epoch": 0.41, "grad_norm": 5.681129429946807, "learning_rate": 6.731895939038564e-06, "loss": 0.3477, "step": 14170 }, { "epoch": 0.41, "grad_norm": 5.660466872090785, "learning_rate": 6.731460882274532e-06, "loss": 0.2792, "step": 14171 }, { "epoch": 0.41, "grad_norm": 5.057808715705764, "learning_rate": 6.731025810614958e-06, "loss": 0.7506, "step": 14172 }, { "epoch": 0.41, "grad_norm": 5.199025873494395, "learning_rate": 6.730590724063579e-06, "loss": 0.3639, "step": 14173 }, { "epoch": 0.41, "grad_norm": 5.840883051527804, "learning_rate": 6.730155622624142e-06, "loss": 0.5351, "step": 14174 }, { "epoch": 0.41, "grad_norm": 8.939212341599678, "learning_rate": 6.72972050630039e-06, "loss": 0.77, "step": 14175 }, { "epoch": 0.41, "grad_norm": 6.849733637065216, "learning_rate": 6.7292853750960655e-06, "loss": 0.6416, "step": 14176 }, { "epoch": 0.41, "grad_norm": 6.919330826856864, "learning_rate": 6.72885022901491e-06, "loss": 0.6008, "step": 14177 }, { "epoch": 0.41, "grad_norm": 6.393815267906537, "learning_rate": 6.728415068060671e-06, "loss": 0.4405, "step": 14178 }, { "epoch": 0.41, "grad_norm": 3.0087996807362947, "learning_rate": 6.727979892237089e-06, "loss": 0.2575, "step": 14179 }, { "epoch": 0.41, "grad_norm": 4.195018736714474, "learning_rate": 6.727544701547908e-06, "loss": 0.5666, "step": 14180 }, { "epoch": 0.41, "grad_norm": 8.780520344350192, "learning_rate": 6.727109495996873e-06, "loss": 0.639, "step": 14181 }, { "epoch": 0.41, "grad_norm": 6.114357597001105, "learning_rate": 6.726674275587729e-06, "loss": 0.4309, "step": 14182 }, { "epoch": 0.41, "grad_norm": 8.914729475365897, "learning_rate": 6.726239040324218e-06, "loss": 0.7838, "step": 14183 }, { "epoch": 0.41, "grad_norm": 12.582325943459775, "learning_rate": 6.725803790210085e-06, "loss": 0.8127, "step": 14184 }, { "epoch": 0.41, "grad_norm": 4.418211588775312, "learning_rate": 6.725368525249074e-06, "loss": 0.5564, "step": 14185 }, { "epoch": 0.41, "grad_norm": 2.941689516893708, "learning_rate": 6.724933245444931e-06, "loss": 0.2781, "step": 14186 }, { "epoch": 0.41, "grad_norm": 3.7584662552790777, "learning_rate": 6.724497950801398e-06, "loss": 0.2756, "step": 14187 }, { "epoch": 0.41, "grad_norm": 9.918540382213436, "learning_rate": 6.724062641322224e-06, "loss": 0.4964, "step": 14188 }, { "epoch": 0.41, "grad_norm": 7.42447014661705, "learning_rate": 6.72362731701115e-06, "loss": 0.7615, "step": 14189 }, { "epoch": 0.41, "grad_norm": 11.895118415759423, "learning_rate": 6.723191977871922e-06, "loss": 0.6335, "step": 14190 }, { "epoch": 0.41, "grad_norm": 4.479617476140563, "learning_rate": 6.722756623908285e-06, "loss": 0.6445, "step": 14191 }, { "epoch": 0.41, "grad_norm": 5.116195652475259, "learning_rate": 6.722321255123987e-06, "loss": 0.6392, "step": 14192 }, { "epoch": 0.41, "grad_norm": 3.8247121740019523, "learning_rate": 6.721885871522769e-06, "loss": 0.6614, "step": 14193 }, { "epoch": 0.41, "grad_norm": 4.983634989463632, "learning_rate": 6.7214504731083806e-06, "loss": 0.2208, "step": 14194 }, { "epoch": 0.41, "grad_norm": 4.0051048382837715, "learning_rate": 6.721015059884566e-06, "loss": 0.1974, "step": 14195 }, { "epoch": 0.41, "grad_norm": 5.805143397463348, "learning_rate": 6.720579631855069e-06, "loss": 0.3884, "step": 14196 }, { "epoch": 0.41, "grad_norm": 3.095767971767583, "learning_rate": 6.720144189023638e-06, "loss": 0.1246, "step": 14197 }, { "epoch": 0.41, "grad_norm": 11.989715937796149, "learning_rate": 6.7197087313940175e-06, "loss": 0.9502, "step": 14198 }, { "epoch": 0.41, "grad_norm": 3.476614653806928, "learning_rate": 6.719273258969957e-06, "loss": 0.5479, "step": 14199 }, { "epoch": 0.41, "grad_norm": 6.294888772383294, "learning_rate": 6.7188377717551975e-06, "loss": 0.4324, "step": 14200 }, { "epoch": 0.41, "grad_norm": 4.057870339622932, "learning_rate": 6.7184022697534904e-06, "loss": 0.4027, "step": 14201 }, { "epoch": 0.41, "grad_norm": 11.914883424586426, "learning_rate": 6.71796675296858e-06, "loss": 0.3748, "step": 14202 }, { "epoch": 0.41, "grad_norm": 4.7024686995442595, "learning_rate": 6.717531221404211e-06, "loss": 0.2512, "step": 14203 }, { "epoch": 0.41, "grad_norm": 5.723029614660043, "learning_rate": 6.717095675064133e-06, "loss": 0.287, "step": 14204 }, { "epoch": 0.41, "grad_norm": 7.1940467541267425, "learning_rate": 6.716660113952095e-06, "loss": 0.6271, "step": 14205 }, { "epoch": 0.41, "grad_norm": 16.45050584426961, "learning_rate": 6.7162245380718394e-06, "loss": 0.6514, "step": 14206 }, { "epoch": 0.41, "grad_norm": 2.560753925253815, "learning_rate": 6.7157889474271155e-06, "loss": 0.3577, "step": 14207 }, { "epoch": 0.41, "grad_norm": 6.4630833375431065, "learning_rate": 6.715353342021671e-06, "loss": 0.3764, "step": 14208 }, { "epoch": 0.41, "grad_norm": 6.459859885004219, "learning_rate": 6.714917721859253e-06, "loss": 0.4678, "step": 14209 }, { "epoch": 0.41, "grad_norm": 3.9306978477779393, "learning_rate": 6.714482086943608e-06, "loss": 0.1461, "step": 14210 }, { "epoch": 0.41, "grad_norm": 4.858672051320946, "learning_rate": 6.714046437278484e-06, "loss": 0.3791, "step": 14211 }, { "epoch": 0.41, "grad_norm": 9.74871318835134, "learning_rate": 6.713610772867631e-06, "loss": 1.0352, "step": 14212 }, { "epoch": 0.41, "grad_norm": 7.02776667460426, "learning_rate": 6.7131750937147956e-06, "loss": 0.6439, "step": 14213 }, { "epoch": 0.41, "grad_norm": 5.3431651592898435, "learning_rate": 6.7127393998237256e-06, "loss": 0.2789, "step": 14214 }, { "epoch": 0.41, "grad_norm": 3.426832205355435, "learning_rate": 6.7123036911981685e-06, "loss": 0.5336, "step": 14215 }, { "epoch": 0.41, "grad_norm": 5.494800515792977, "learning_rate": 6.711867967841875e-06, "loss": 0.3939, "step": 14216 }, { "epoch": 0.41, "grad_norm": 7.199894542716511, "learning_rate": 6.711432229758592e-06, "loss": 0.771, "step": 14217 }, { "epoch": 0.41, "grad_norm": 2.3658652310865307, "learning_rate": 6.710996476952068e-06, "loss": 0.1002, "step": 14218 }, { "epoch": 0.41, "grad_norm": 5.652754277714966, "learning_rate": 6.710560709426052e-06, "loss": 0.632, "step": 14219 }, { "epoch": 0.41, "grad_norm": 12.19565125800724, "learning_rate": 6.710124927184291e-06, "loss": 0.9644, "step": 14220 }, { "epoch": 0.41, "grad_norm": 2.976622032711591, "learning_rate": 6.709689130230537e-06, "loss": 0.3018, "step": 14221 }, { "epoch": 0.41, "grad_norm": 3.5577038050999117, "learning_rate": 6.709253318568539e-06, "loss": 0.1729, "step": 14222 }, { "epoch": 0.41, "grad_norm": 5.41399935070382, "learning_rate": 6.708817492202044e-06, "loss": 0.4211, "step": 14223 }, { "epoch": 0.41, "grad_norm": 8.558373229451652, "learning_rate": 6.708381651134803e-06, "loss": 0.5058, "step": 14224 }, { "epoch": 0.41, "grad_norm": 3.6012248048493523, "learning_rate": 6.707945795370565e-06, "loss": 0.771, "step": 14225 }, { "epoch": 0.41, "grad_norm": 5.132303662676761, "learning_rate": 6.70750992491308e-06, "loss": 0.6462, "step": 14226 }, { "epoch": 0.41, "grad_norm": 5.436917810952318, "learning_rate": 6.707074039766097e-06, "loss": 0.3574, "step": 14227 }, { "epoch": 0.41, "grad_norm": 6.043674025901278, "learning_rate": 6.706638139933365e-06, "loss": 0.7677, "step": 14228 }, { "epoch": 0.41, "grad_norm": 5.408712410634206, "learning_rate": 6.706202225418637e-06, "loss": 0.8814, "step": 14229 }, { "epoch": 0.41, "grad_norm": 7.67482118305065, "learning_rate": 6.70576629622566e-06, "loss": 0.5831, "step": 14230 }, { "epoch": 0.41, "grad_norm": 4.745632874517339, "learning_rate": 6.705330352358186e-06, "loss": 0.7426, "step": 14231 }, { "epoch": 0.41, "grad_norm": 5.257371767795737, "learning_rate": 6.704894393819966e-06, "loss": 0.3541, "step": 14232 }, { "epoch": 0.41, "grad_norm": 4.275069460388194, "learning_rate": 6.704458420614749e-06, "loss": 0.3946, "step": 14233 }, { "epoch": 0.41, "grad_norm": 9.412412836043591, "learning_rate": 6.704022432746286e-06, "loss": 0.6644, "step": 14234 }, { "epoch": 0.41, "grad_norm": 4.513354407072994, "learning_rate": 6.703586430218326e-06, "loss": 0.3805, "step": 14235 }, { "epoch": 0.41, "grad_norm": 6.432748022120007, "learning_rate": 6.703150413034623e-06, "loss": 0.4881, "step": 14236 }, { "epoch": 0.41, "grad_norm": 4.131779042129252, "learning_rate": 6.702714381198927e-06, "loss": 0.4941, "step": 14237 }, { "epoch": 0.41, "grad_norm": 6.355640523942019, "learning_rate": 6.702278334714989e-06, "loss": 0.8241, "step": 14238 }, { "epoch": 0.41, "grad_norm": 6.094843917863498, "learning_rate": 6.7018422735865604e-06, "loss": 0.6553, "step": 14239 }, { "epoch": 0.41, "grad_norm": 1.8690131770802745, "learning_rate": 6.7014061978173904e-06, "loss": 0.191, "step": 14240 }, { "epoch": 0.41, "grad_norm": 3.970177251140281, "learning_rate": 6.7009701074112345e-06, "loss": 0.3263, "step": 14241 }, { "epoch": 0.41, "grad_norm": 8.529236720147958, "learning_rate": 6.70053400237184e-06, "loss": 0.7343, "step": 14242 }, { "epoch": 0.41, "grad_norm": 5.3087586860774305, "learning_rate": 6.700097882702964e-06, "loss": 0.3521, "step": 14243 }, { "epoch": 0.41, "grad_norm": 3.2997107310380063, "learning_rate": 6.699661748408351e-06, "loss": 0.1089, "step": 14244 }, { "epoch": 0.41, "grad_norm": 3.3085158517938287, "learning_rate": 6.69922559949176e-06, "loss": 0.3532, "step": 14245 }, { "epoch": 0.41, "grad_norm": 11.213411298821734, "learning_rate": 6.698789435956941e-06, "loss": 0.6446, "step": 14246 }, { "epoch": 0.41, "grad_norm": 7.1896135622677, "learning_rate": 6.698353257807644e-06, "loss": 0.5047, "step": 14247 }, { "epoch": 0.41, "grad_norm": 10.846796756551202, "learning_rate": 6.697917065047623e-06, "loss": 0.6479, "step": 14248 }, { "epoch": 0.41, "grad_norm": 5.383249472721758, "learning_rate": 6.697480857680633e-06, "loss": 0.3632, "step": 14249 }, { "epoch": 0.41, "grad_norm": 7.674469363479558, "learning_rate": 6.697044635710422e-06, "loss": 0.4266, "step": 14250 }, { "epoch": 0.41, "grad_norm": 8.642506510119956, "learning_rate": 6.696608399140745e-06, "loss": 0.9121, "step": 14251 }, { "epoch": 0.41, "grad_norm": 6.043716966288251, "learning_rate": 6.696172147975355e-06, "loss": 0.2894, "step": 14252 }, { "epoch": 0.41, "grad_norm": 13.12186212723008, "learning_rate": 6.6957358822180056e-06, "loss": 0.7833, "step": 14253 }, { "epoch": 0.41, "grad_norm": 3.1119458099663095, "learning_rate": 6.695299601872447e-06, "loss": 0.2842, "step": 14254 }, { "epoch": 0.41, "grad_norm": 6.369571094535701, "learning_rate": 6.6948633069424365e-06, "loss": 0.6341, "step": 14255 }, { "epoch": 0.41, "grad_norm": 3.6043278964956627, "learning_rate": 6.694426997431727e-06, "loss": 0.1848, "step": 14256 }, { "epoch": 0.41, "grad_norm": 4.481526550101963, "learning_rate": 6.693990673344068e-06, "loss": 0.3761, "step": 14257 }, { "epoch": 0.41, "grad_norm": 8.194413680397806, "learning_rate": 6.693554334683216e-06, "loss": 0.9468, "step": 14258 }, { "epoch": 0.41, "grad_norm": 13.177064644821645, "learning_rate": 6.6931179814529265e-06, "loss": 0.5866, "step": 14259 }, { "epoch": 0.41, "grad_norm": 5.416315047760261, "learning_rate": 6.692681613656951e-06, "loss": 0.6842, "step": 14260 }, { "epoch": 0.41, "grad_norm": 6.904804440932646, "learning_rate": 6.692245231299042e-06, "loss": 0.486, "step": 14261 }, { "epoch": 0.41, "grad_norm": 3.8220118076295777, "learning_rate": 6.691808834382958e-06, "loss": 0.5941, "step": 14262 }, { "epoch": 0.41, "grad_norm": 6.992561100190593, "learning_rate": 6.69137242291245e-06, "loss": 0.6712, "step": 14263 }, { "epoch": 0.41, "grad_norm": 6.467197766403009, "learning_rate": 6.690935996891272e-06, "loss": 0.522, "step": 14264 }, { "epoch": 0.41, "grad_norm": 7.799565281124579, "learning_rate": 6.690499556323181e-06, "loss": 0.7094, "step": 14265 }, { "epoch": 0.41, "grad_norm": 6.094654602985353, "learning_rate": 6.690063101211932e-06, "loss": 0.1581, "step": 14266 }, { "epoch": 0.41, "grad_norm": 3.005746503449562, "learning_rate": 6.689626631561277e-06, "loss": 0.6705, "step": 14267 }, { "epoch": 0.41, "grad_norm": 4.416457156244415, "learning_rate": 6.689190147374971e-06, "loss": 0.6737, "step": 14268 }, { "epoch": 0.41, "grad_norm": 4.873679666963151, "learning_rate": 6.6887536486567705e-06, "loss": 0.5225, "step": 14269 }, { "epoch": 0.41, "grad_norm": 9.770851531076193, "learning_rate": 6.6883171354104315e-06, "loss": 0.716, "step": 14270 }, { "epoch": 0.41, "grad_norm": 6.481413438404516, "learning_rate": 6.687880607639706e-06, "loss": 0.6971, "step": 14271 }, { "epoch": 0.41, "grad_norm": 5.309964798022181, "learning_rate": 6.687444065348353e-06, "loss": 0.3812, "step": 14272 }, { "epoch": 0.41, "grad_norm": 5.554111134260322, "learning_rate": 6.687007508540127e-06, "loss": 0.5406, "step": 14273 }, { "epoch": 0.41, "grad_norm": 6.684993238424279, "learning_rate": 6.686570937218782e-06, "loss": 0.5409, "step": 14274 }, { "epoch": 0.41, "grad_norm": 11.797108608737, "learning_rate": 6.686134351388075e-06, "loss": 0.5768, "step": 14275 }, { "epoch": 0.41, "grad_norm": 6.053822904813775, "learning_rate": 6.685697751051763e-06, "loss": 0.415, "step": 14276 }, { "epoch": 0.41, "grad_norm": 8.164983728442825, "learning_rate": 6.685261136213599e-06, "loss": 0.289, "step": 14277 }, { "epoch": 0.41, "grad_norm": 6.545907834521638, "learning_rate": 6.684824506877341e-06, "loss": 0.4892, "step": 14278 }, { "epoch": 0.41, "grad_norm": 2.2537019131227956, "learning_rate": 6.684387863046747e-06, "loss": 0.1106, "step": 14279 }, { "epoch": 0.41, "grad_norm": 5.979569221086348, "learning_rate": 6.6839512047255704e-06, "loss": 0.3834, "step": 14280 }, { "epoch": 0.41, "grad_norm": 2.550269409982369, "learning_rate": 6.683514531917569e-06, "loss": 0.3977, "step": 14281 }, { "epoch": 0.41, "grad_norm": 5.497478167051197, "learning_rate": 6.683077844626499e-06, "loss": 1.0424, "step": 14282 }, { "epoch": 0.41, "grad_norm": 11.220171230049951, "learning_rate": 6.682641142856119e-06, "loss": 0.5819, "step": 14283 }, { "epoch": 0.41, "grad_norm": 2.896484261819499, "learning_rate": 6.682204426610183e-06, "loss": 0.1072, "step": 14284 }, { "epoch": 0.41, "grad_norm": 2.402137065153049, "learning_rate": 6.681767695892448e-06, "loss": 0.2636, "step": 14285 }, { "epoch": 0.41, "grad_norm": 6.779499583948999, "learning_rate": 6.681330950706676e-06, "loss": 0.5423, "step": 14286 }, { "epoch": 0.41, "grad_norm": 5.006501167464094, "learning_rate": 6.6808941910566196e-06, "loss": 0.5552, "step": 14287 }, { "epoch": 0.41, "grad_norm": 6.102086004445682, "learning_rate": 6.680457416946035e-06, "loss": 0.8204, "step": 14288 }, { "epoch": 0.41, "grad_norm": 4.612266073523653, "learning_rate": 6.680020628378686e-06, "loss": 0.4009, "step": 14289 }, { "epoch": 0.41, "grad_norm": 4.714900802608449, "learning_rate": 6.679583825358324e-06, "loss": 0.6149, "step": 14290 }, { "epoch": 0.41, "grad_norm": 6.2995821905102485, "learning_rate": 6.67914700788871e-06, "loss": 0.5369, "step": 14291 }, { "epoch": 0.41, "grad_norm": 7.272469953027093, "learning_rate": 6.6787101759736e-06, "loss": 0.4965, "step": 14292 }, { "epoch": 0.41, "grad_norm": 2.7283025834532064, "learning_rate": 6.678273329616755e-06, "loss": 0.6906, "step": 14293 }, { "epoch": 0.41, "grad_norm": 9.692671349215917, "learning_rate": 6.677836468821929e-06, "loss": 0.5699, "step": 14294 }, { "epoch": 0.41, "grad_norm": 5.435117638129894, "learning_rate": 6.677399593592884e-06, "loss": 0.5138, "step": 14295 }, { "epoch": 0.41, "grad_norm": 6.169919826969725, "learning_rate": 6.676962703933376e-06, "loss": 0.6829, "step": 14296 }, { "epoch": 0.41, "grad_norm": 6.814144181092394, "learning_rate": 6.676525799847165e-06, "loss": 0.7231, "step": 14297 }, { "epoch": 0.41, "grad_norm": 5.129435318027095, "learning_rate": 6.676088881338009e-06, "loss": 0.7273, "step": 14298 }, { "epoch": 0.41, "grad_norm": 6.7351275242638815, "learning_rate": 6.675651948409665e-06, "loss": 0.6063, "step": 14299 }, { "epoch": 0.41, "grad_norm": 4.461994373302597, "learning_rate": 6.6752150010658955e-06, "loss": 0.3968, "step": 14300 }, { "epoch": 0.41, "grad_norm": 16.578608729633448, "learning_rate": 6.6747780393104576e-06, "loss": 0.3059, "step": 14301 }, { "epoch": 0.41, "grad_norm": 22.10693522764523, "learning_rate": 6.674341063147109e-06, "loss": 0.597, "step": 14302 }, { "epoch": 0.41, "grad_norm": 8.256799439045588, "learning_rate": 6.673904072579611e-06, "loss": 0.4567, "step": 14303 }, { "epoch": 0.41, "grad_norm": 3.5269978671563633, "learning_rate": 6.673467067611723e-06, "loss": 0.2342, "step": 14304 }, { "epoch": 0.41, "grad_norm": 8.447027803281555, "learning_rate": 6.673030048247203e-06, "loss": 1.0671, "step": 14305 }, { "epoch": 0.41, "grad_norm": 4.771479428226688, "learning_rate": 6.672593014489812e-06, "loss": 0.5726, "step": 14306 }, { "epoch": 0.41, "grad_norm": 4.602901192770857, "learning_rate": 6.67215596634331e-06, "loss": 0.4958, "step": 14307 }, { "epoch": 0.41, "grad_norm": 5.26850665108575, "learning_rate": 6.671718903811455e-06, "loss": 0.5604, "step": 14308 }, { "epoch": 0.41, "grad_norm": 5.905047824158838, "learning_rate": 6.671281826898008e-06, "loss": 0.8253, "step": 14309 }, { "epoch": 0.41, "grad_norm": 8.245528743439115, "learning_rate": 6.67084473560673e-06, "loss": 0.3004, "step": 14310 }, { "epoch": 0.41, "grad_norm": 5.231152034740071, "learning_rate": 6.670407629941379e-06, "loss": 0.581, "step": 14311 }, { "epoch": 0.41, "grad_norm": 7.43362300063215, "learning_rate": 6.669970509905718e-06, "loss": 0.478, "step": 14312 }, { "epoch": 0.41, "grad_norm": 9.410469401506466, "learning_rate": 6.669533375503506e-06, "loss": 1.009, "step": 14313 }, { "epoch": 0.41, "grad_norm": 3.390859534242931, "learning_rate": 6.669096226738505e-06, "loss": 0.4874, "step": 14314 }, { "epoch": 0.41, "grad_norm": 7.357988263337795, "learning_rate": 6.668659063614473e-06, "loss": 0.9063, "step": 14315 }, { "epoch": 0.41, "grad_norm": 7.301309875650297, "learning_rate": 6.668221886135174e-06, "loss": 0.5467, "step": 14316 }, { "epoch": 0.41, "grad_norm": 4.195728814124817, "learning_rate": 6.6677846943043666e-06, "loss": 0.379, "step": 14317 }, { "epoch": 0.41, "grad_norm": 8.640831722387277, "learning_rate": 6.667347488125812e-06, "loss": 0.8376, "step": 14318 }, { "epoch": 0.41, "grad_norm": 2.2366442594339104, "learning_rate": 6.6669102676032735e-06, "loss": 0.3696, "step": 14319 }, { "epoch": 0.41, "grad_norm": 4.07978726998821, "learning_rate": 6.666473032740511e-06, "loss": 0.2503, "step": 14320 }, { "epoch": 0.41, "grad_norm": 18.184381905918414, "learning_rate": 6.6660357835412855e-06, "loss": 0.799, "step": 14321 }, { "epoch": 0.41, "grad_norm": 5.147628975392296, "learning_rate": 6.66559852000936e-06, "loss": 0.5487, "step": 14322 }, { "epoch": 0.41, "grad_norm": 2.0264948425528524, "learning_rate": 6.665161242148497e-06, "loss": 0.1096, "step": 14323 }, { "epoch": 0.41, "grad_norm": 4.82190565493834, "learning_rate": 6.664723949962455e-06, "loss": 0.3016, "step": 14324 }, { "epoch": 0.41, "grad_norm": 4.357106948476762, "learning_rate": 6.664286643454998e-06, "loss": 0.3816, "step": 14325 }, { "epoch": 0.41, "grad_norm": 6.297071808799177, "learning_rate": 6.663849322629887e-06, "loss": 0.5151, "step": 14326 }, { "epoch": 0.41, "grad_norm": 6.572435868256159, "learning_rate": 6.663411987490886e-06, "loss": 0.3239, "step": 14327 }, { "epoch": 0.41, "grad_norm": 7.623063576377265, "learning_rate": 6.662974638041757e-06, "loss": 0.6252, "step": 14328 }, { "epoch": 0.41, "grad_norm": 6.789923539777557, "learning_rate": 6.662537274286261e-06, "loss": 0.5013, "step": 14329 }, { "epoch": 0.41, "grad_norm": 8.261943379407839, "learning_rate": 6.662099896228163e-06, "loss": 0.8156, "step": 14330 }, { "epoch": 0.41, "grad_norm": 6.168946781131098, "learning_rate": 6.6616625038712235e-06, "loss": 0.4731, "step": 14331 }, { "epoch": 0.41, "grad_norm": 3.5759065238973133, "learning_rate": 6.661225097219206e-06, "loss": 0.2912, "step": 14332 }, { "epoch": 0.41, "grad_norm": 5.393451557765503, "learning_rate": 6.660787676275874e-06, "loss": 0.5174, "step": 14333 }, { "epoch": 0.41, "grad_norm": 5.572973699069905, "learning_rate": 6.660350241044989e-06, "loss": 0.4226, "step": 14334 }, { "epoch": 0.41, "grad_norm": 7.294558433048178, "learning_rate": 6.659912791530315e-06, "loss": 0.3858, "step": 14335 }, { "epoch": 0.41, "grad_norm": 3.1287080794881637, "learning_rate": 6.659475327735617e-06, "loss": 0.1755, "step": 14336 }, { "epoch": 0.41, "grad_norm": 12.266090102398012, "learning_rate": 6.659037849664657e-06, "loss": 0.6474, "step": 14337 }, { "epoch": 0.41, "grad_norm": 7.141886960626577, "learning_rate": 6.658600357321198e-06, "loss": 0.6177, "step": 14338 }, { "epoch": 0.41, "grad_norm": 6.044108859654053, "learning_rate": 6.658162850709004e-06, "loss": 0.7482, "step": 14339 }, { "epoch": 0.41, "grad_norm": 3.2743736876681515, "learning_rate": 6.65772532983184e-06, "loss": 0.2194, "step": 14340 }, { "epoch": 0.41, "grad_norm": 2.664826493331787, "learning_rate": 6.657287794693469e-06, "loss": 0.1934, "step": 14341 }, { "epoch": 0.41, "grad_norm": 4.8641764724569, "learning_rate": 6.6568502452976546e-06, "loss": 0.4825, "step": 14342 }, { "epoch": 0.41, "grad_norm": 3.7940868925682443, "learning_rate": 6.6564126816481615e-06, "loss": 0.3271, "step": 14343 }, { "epoch": 0.41, "grad_norm": 7.112809778852116, "learning_rate": 6.655975103748754e-06, "loss": 0.4988, "step": 14344 }, { "epoch": 0.41, "grad_norm": 7.777104449266907, "learning_rate": 6.655537511603197e-06, "loss": 0.873, "step": 14345 }, { "epoch": 0.41, "grad_norm": 7.768921672369726, "learning_rate": 6.655099905215254e-06, "loss": 0.5362, "step": 14346 }, { "epoch": 0.41, "grad_norm": 12.209850807184484, "learning_rate": 6.654662284588692e-06, "loss": 0.9975, "step": 14347 }, { "epoch": 0.41, "grad_norm": 11.599721496625937, "learning_rate": 6.654224649727273e-06, "loss": 0.7363, "step": 14348 }, { "epoch": 0.41, "grad_norm": 9.401032378948551, "learning_rate": 6.653787000634763e-06, "loss": 0.8266, "step": 14349 }, { "epoch": 0.41, "grad_norm": 3.9891076497422704, "learning_rate": 6.653349337314927e-06, "loss": 0.556, "step": 14350 }, { "epoch": 0.41, "grad_norm": 7.703202964898855, "learning_rate": 6.6529116597715305e-06, "loss": 0.489, "step": 14351 }, { "epoch": 0.41, "grad_norm": 3.292659649986344, "learning_rate": 6.652473968008337e-06, "loss": 0.4771, "step": 14352 }, { "epoch": 0.41, "grad_norm": 2.9011451022452523, "learning_rate": 6.6520362620291155e-06, "loss": 0.4167, "step": 14353 }, { "epoch": 0.41, "grad_norm": 6.4822123034771915, "learning_rate": 6.65159854183763e-06, "loss": 0.6017, "step": 14354 }, { "epoch": 0.41, "grad_norm": 4.495735704365434, "learning_rate": 6.651160807437643e-06, "loss": 0.2649, "step": 14355 }, { "epoch": 0.41, "grad_norm": 2.508427363330419, "learning_rate": 6.650723058832924e-06, "loss": 0.1051, "step": 14356 }, { "epoch": 0.41, "grad_norm": 5.761161047889606, "learning_rate": 6.6502852960272394e-06, "loss": 0.3015, "step": 14357 }, { "epoch": 0.41, "grad_norm": 7.187974599013242, "learning_rate": 6.649847519024351e-06, "loss": 0.8245, "step": 14358 }, { "epoch": 0.41, "grad_norm": 5.2340193442605605, "learning_rate": 6.649409727828027e-06, "loss": 0.4321, "step": 14359 }, { "epoch": 0.41, "grad_norm": 11.139551268807823, "learning_rate": 6.648971922442037e-06, "loss": 0.3784, "step": 14360 }, { "epoch": 0.41, "grad_norm": 7.399853032173417, "learning_rate": 6.648534102870144e-06, "loss": 0.9587, "step": 14361 }, { "epoch": 0.41, "grad_norm": 6.545403217225201, "learning_rate": 6.648096269116114e-06, "loss": 1.0883, "step": 14362 }, { "epoch": 0.41, "grad_norm": 5.04114244693937, "learning_rate": 6.647658421183716e-06, "loss": 0.7617, "step": 14363 }, { "epoch": 0.41, "grad_norm": 8.42178089889016, "learning_rate": 6.6472205590767134e-06, "loss": 0.5777, "step": 14364 }, { "epoch": 0.41, "grad_norm": 4.0879002483052345, "learning_rate": 6.646782682798876e-06, "loss": 0.2215, "step": 14365 }, { "epoch": 0.41, "grad_norm": 4.374182924538504, "learning_rate": 6.64634479235397e-06, "loss": 0.6535, "step": 14366 }, { "epoch": 0.41, "grad_norm": 4.9945258453354215, "learning_rate": 6.6459068877457645e-06, "loss": 0.2624, "step": 14367 }, { "epoch": 0.41, "grad_norm": 5.469598671593365, "learning_rate": 6.645468968978022e-06, "loss": 0.2761, "step": 14368 }, { "epoch": 0.41, "grad_norm": 4.602671050913532, "learning_rate": 6.645031036054512e-06, "loss": 0.6239, "step": 14369 }, { "epoch": 0.41, "grad_norm": 3.475416050162263, "learning_rate": 6.644593088979006e-06, "loss": 0.3009, "step": 14370 }, { "epoch": 0.41, "grad_norm": 4.6828152452574745, "learning_rate": 6.644155127755265e-06, "loss": 0.3915, "step": 14371 }, { "epoch": 0.41, "grad_norm": 6.886429458747662, "learning_rate": 6.643717152387061e-06, "loss": 0.5399, "step": 14372 }, { "epoch": 0.41, "grad_norm": 5.597460491114248, "learning_rate": 6.64327916287816e-06, "loss": 0.5128, "step": 14373 }, { "epoch": 0.41, "grad_norm": 8.059486180722281, "learning_rate": 6.642841159232333e-06, "loss": 0.7705, "step": 14374 }, { "epoch": 0.41, "grad_norm": 6.50720360369994, "learning_rate": 6.642403141453344e-06, "loss": 0.6013, "step": 14375 }, { "epoch": 0.41, "grad_norm": 5.053558223462557, "learning_rate": 6.641965109544962e-06, "loss": 0.4853, "step": 14376 }, { "epoch": 0.41, "grad_norm": 4.470504043032864, "learning_rate": 6.6415270635109585e-06, "loss": 0.6164, "step": 14377 }, { "epoch": 0.41, "grad_norm": 5.672957808277321, "learning_rate": 6.641089003355099e-06, "loss": 0.248, "step": 14378 }, { "epoch": 0.41, "grad_norm": 8.640816381173384, "learning_rate": 6.640650929081152e-06, "loss": 1.2625, "step": 14379 }, { "epoch": 0.41, "grad_norm": 6.55354264528837, "learning_rate": 6.640212840692889e-06, "loss": 0.5788, "step": 14380 }, { "epoch": 0.41, "grad_norm": 6.856302533430949, "learning_rate": 6.6397747381940756e-06, "loss": 0.8133, "step": 14381 }, { "epoch": 0.41, "grad_norm": 4.907774305756708, "learning_rate": 6.639336621588481e-06, "loss": 0.6052, "step": 14382 }, { "epoch": 0.41, "grad_norm": 4.137234548483743, "learning_rate": 6.638898490879877e-06, "loss": 0.5795, "step": 14383 }, { "epoch": 0.41, "grad_norm": 5.146154149154053, "learning_rate": 6.63846034607203e-06, "loss": 0.3528, "step": 14384 }, { "epoch": 0.41, "grad_norm": 6.6990760365402, "learning_rate": 6.63802218716871e-06, "loss": 1.1274, "step": 14385 }, { "epoch": 0.41, "grad_norm": 5.913476146419917, "learning_rate": 6.637584014173688e-06, "loss": 0.435, "step": 14386 }, { "epoch": 0.41, "grad_norm": 8.213782749415516, "learning_rate": 6.6371458270907315e-06, "loss": 0.6331, "step": 14387 }, { "epoch": 0.41, "grad_norm": 5.0622703594352405, "learning_rate": 6.636707625923612e-06, "loss": 0.46, "step": 14388 }, { "epoch": 0.41, "grad_norm": 7.402744091148938, "learning_rate": 6.6362694106760976e-06, "loss": 0.4667, "step": 14389 }, { "epoch": 0.41, "grad_norm": 4.359722875572291, "learning_rate": 6.63583118135196e-06, "loss": 0.4324, "step": 14390 }, { "epoch": 0.41, "grad_norm": 4.210811634341242, "learning_rate": 6.635392937954967e-06, "loss": 0.4963, "step": 14391 }, { "epoch": 0.41, "grad_norm": 6.9540050849709445, "learning_rate": 6.634954680488889e-06, "loss": 0.4106, "step": 14392 }, { "epoch": 0.41, "grad_norm": 6.406062723539313, "learning_rate": 6.634516408957499e-06, "loss": 0.5564, "step": 14393 }, { "epoch": 0.41, "grad_norm": 10.00722590684586, "learning_rate": 6.634078123364565e-06, "loss": 0.4326, "step": 14394 }, { "epoch": 0.41, "grad_norm": 7.830753334413224, "learning_rate": 6.633639823713858e-06, "loss": 0.5065, "step": 14395 }, { "epoch": 0.41, "grad_norm": 5.620410827336556, "learning_rate": 6.633201510009149e-06, "loss": 0.3397, "step": 14396 }, { "epoch": 0.41, "grad_norm": 4.418994949652157, "learning_rate": 6.632763182254208e-06, "loss": 0.5114, "step": 14397 }, { "epoch": 0.41, "grad_norm": 2.63049202827416, "learning_rate": 6.632324840452806e-06, "loss": 0.308, "step": 14398 }, { "epoch": 0.41, "grad_norm": 4.143744103613063, "learning_rate": 6.6318864846087165e-06, "loss": 0.443, "step": 14399 }, { "epoch": 0.41, "grad_norm": 8.00087494833939, "learning_rate": 6.631448114725706e-06, "loss": 0.5814, "step": 14400 }, { "epoch": 0.41, "grad_norm": 7.468754660632862, "learning_rate": 6.631009730807549e-06, "loss": 0.2587, "step": 14401 }, { "epoch": 0.41, "grad_norm": 5.381537786180805, "learning_rate": 6.630571332858016e-06, "loss": 0.385, "step": 14402 }, { "epoch": 0.41, "grad_norm": 4.054841819575451, "learning_rate": 6.630132920880878e-06, "loss": 0.3844, "step": 14403 }, { "epoch": 0.41, "grad_norm": 11.242713475863113, "learning_rate": 6.6296944948799105e-06, "loss": 0.8003, "step": 14404 }, { "epoch": 0.41, "grad_norm": 4.112162177869401, "learning_rate": 6.629256054858879e-06, "loss": 0.1886, "step": 14405 }, { "epoch": 0.41, "grad_norm": 5.569829890463219, "learning_rate": 6.628817600821558e-06, "loss": 0.8379, "step": 14406 }, { "epoch": 0.41, "grad_norm": 9.940161774391512, "learning_rate": 6.628379132771722e-06, "loss": 0.6877, "step": 14407 }, { "epoch": 0.41, "grad_norm": 5.316851191572202, "learning_rate": 6.627940650713139e-06, "loss": 0.9554, "step": 14408 }, { "epoch": 0.41, "grad_norm": 8.558853375298208, "learning_rate": 6.627502154649582e-06, "loss": 0.5424, "step": 14409 }, { "epoch": 0.41, "grad_norm": 2.2115324857560887, "learning_rate": 6.627063644584826e-06, "loss": 0.144, "step": 14410 }, { "epoch": 0.41, "grad_norm": 13.502036929991345, "learning_rate": 6.626625120522642e-06, "loss": 0.2278, "step": 14411 }, { "epoch": 0.41, "grad_norm": 4.755121983067578, "learning_rate": 6.6261865824668016e-06, "loss": 0.337, "step": 14412 }, { "epoch": 0.41, "grad_norm": 10.190495290577045, "learning_rate": 6.625748030421078e-06, "loss": 0.6989, "step": 14413 }, { "epoch": 0.41, "grad_norm": 4.1742547209480465, "learning_rate": 6.625309464389246e-06, "loss": 0.3709, "step": 14414 }, { "epoch": 0.41, "grad_norm": 5.520787748712407, "learning_rate": 6.624870884375074e-06, "loss": 0.3446, "step": 14415 }, { "epoch": 0.41, "grad_norm": 4.9405152976435565, "learning_rate": 6.62443229038234e-06, "loss": 0.4988, "step": 14416 }, { "epoch": 0.41, "grad_norm": 3.7789025389294464, "learning_rate": 6.623993682414814e-06, "loss": 0.3608, "step": 14417 }, { "epoch": 0.41, "grad_norm": 2.7737146588309307, "learning_rate": 6.623555060476271e-06, "loss": 0.2577, "step": 14418 }, { "epoch": 0.41, "grad_norm": 4.739690371412081, "learning_rate": 6.623116424570483e-06, "loss": 0.729, "step": 14419 }, { "epoch": 0.41, "grad_norm": 9.039418680860546, "learning_rate": 6.622677774701225e-06, "loss": 0.5014, "step": 14420 }, { "epoch": 0.41, "grad_norm": 5.912239650454172, "learning_rate": 6.622239110872269e-06, "loss": 0.4529, "step": 14421 }, { "epoch": 0.41, "grad_norm": 7.694690721826669, "learning_rate": 6.621800433087391e-06, "loss": 0.7132, "step": 14422 }, { "epoch": 0.41, "grad_norm": 2.092836080905032, "learning_rate": 6.621361741350363e-06, "loss": 0.106, "step": 14423 }, { "epoch": 0.41, "grad_norm": 7.8822594742596275, "learning_rate": 6.620923035664959e-06, "loss": 0.52, "step": 14424 }, { "epoch": 0.41, "grad_norm": 4.377571739728723, "learning_rate": 6.620484316034953e-06, "loss": 0.6286, "step": 14425 }, { "epoch": 0.41, "grad_norm": 4.075834076597377, "learning_rate": 6.62004558246412e-06, "loss": 0.4853, "step": 14426 }, { "epoch": 0.41, "grad_norm": 7.793432786936203, "learning_rate": 6.619606834956237e-06, "loss": 0.3878, "step": 14427 }, { "epoch": 0.41, "grad_norm": 1.663655670115756, "learning_rate": 6.619168073515075e-06, "loss": 0.1536, "step": 14428 }, { "epoch": 0.41, "grad_norm": 8.290433405370864, "learning_rate": 6.618729298144407e-06, "loss": 0.4413, "step": 14429 }, { "epoch": 0.41, "grad_norm": 3.3547474948615865, "learning_rate": 6.618290508848012e-06, "loss": 0.2444, "step": 14430 }, { "epoch": 0.41, "grad_norm": 5.777231128928401, "learning_rate": 6.617851705629663e-06, "loss": 0.6012, "step": 14431 }, { "epoch": 0.41, "grad_norm": 5.469174962562285, "learning_rate": 6.6174128884931355e-06, "loss": 0.3256, "step": 14432 }, { "epoch": 0.41, "grad_norm": 12.593037246715662, "learning_rate": 6.616974057442202e-06, "loss": 1.0714, "step": 14433 }, { "epoch": 0.41, "grad_norm": 6.173800243011819, "learning_rate": 6.6165352124806416e-06, "loss": 0.8895, "step": 14434 }, { "epoch": 0.41, "grad_norm": 5.93622920842565, "learning_rate": 6.616096353612227e-06, "loss": 0.9736, "step": 14435 }, { "epoch": 0.41, "grad_norm": 2.244301241579202, "learning_rate": 6.6156574808407335e-06, "loss": 0.3237, "step": 14436 }, { "epoch": 0.41, "grad_norm": 2.690693266708122, "learning_rate": 6.615218594169941e-06, "loss": 0.1587, "step": 14437 }, { "epoch": 0.41, "grad_norm": 7.4841436298580595, "learning_rate": 6.614779693603619e-06, "loss": 0.6894, "step": 14438 }, { "epoch": 0.41, "grad_norm": 12.18733888177255, "learning_rate": 6.614340779145546e-06, "loss": 0.6979, "step": 14439 }, { "epoch": 0.41, "grad_norm": 11.481104710822011, "learning_rate": 6.613901850799499e-06, "loss": 0.6948, "step": 14440 }, { "epoch": 0.41, "grad_norm": 7.682467666592694, "learning_rate": 6.6134629085692536e-06, "loss": 0.623, "step": 14441 }, { "epoch": 0.41, "grad_norm": 22.853166802325173, "learning_rate": 6.613023952458584e-06, "loss": 0.5718, "step": 14442 }, { "epoch": 0.41, "grad_norm": 10.65681165182718, "learning_rate": 6.612584982471267e-06, "loss": 0.915, "step": 14443 }, { "epoch": 0.41, "grad_norm": 3.485583074549096, "learning_rate": 6.6121459986110826e-06, "loss": 0.576, "step": 14444 }, { "epoch": 0.41, "grad_norm": 3.325087759824493, "learning_rate": 6.611707000881804e-06, "loss": 0.5038, "step": 14445 }, { "epoch": 0.41, "grad_norm": 6.749895271619104, "learning_rate": 6.611267989287208e-06, "loss": 0.2934, "step": 14446 }, { "epoch": 0.41, "grad_norm": 2.988747903128303, "learning_rate": 6.610828963831071e-06, "loss": 0.2065, "step": 14447 }, { "epoch": 0.41, "grad_norm": 4.379602899186801, "learning_rate": 6.6103899245171735e-06, "loss": 0.2833, "step": 14448 }, { "epoch": 0.41, "grad_norm": 4.17800007920838, "learning_rate": 6.609950871349286e-06, "loss": 0.5718, "step": 14449 }, { "epoch": 0.41, "grad_norm": 8.426859355601623, "learning_rate": 6.6095118043311925e-06, "loss": 0.6404, "step": 14450 }, { "epoch": 0.41, "grad_norm": 6.883387876052835, "learning_rate": 6.609072723466665e-06, "loss": 0.6663, "step": 14451 }, { "epoch": 0.41, "grad_norm": 6.446984202596801, "learning_rate": 6.608633628759485e-06, "loss": 0.812, "step": 14452 }, { "epoch": 0.41, "grad_norm": 6.962184875589152, "learning_rate": 6.608194520213427e-06, "loss": 0.5904, "step": 14453 }, { "epoch": 0.41, "grad_norm": 5.002831277320589, "learning_rate": 6.6077553978322695e-06, "loss": 0.893, "step": 14454 }, { "epoch": 0.41, "grad_norm": 2.957505060934862, "learning_rate": 6.607316261619789e-06, "loss": 0.4559, "step": 14455 }, { "epoch": 0.41, "grad_norm": 6.101515140911355, "learning_rate": 6.606877111579767e-06, "loss": 0.3929, "step": 14456 }, { "epoch": 0.41, "grad_norm": 6.681392304891732, "learning_rate": 6.606437947715978e-06, "loss": 0.4793, "step": 14457 }, { "epoch": 0.41, "grad_norm": 4.190611323960851, "learning_rate": 6.6059987700322e-06, "loss": 0.2775, "step": 14458 }, { "epoch": 0.41, "grad_norm": 5.595780627394674, "learning_rate": 6.605559578532213e-06, "loss": 0.8546, "step": 14459 }, { "epoch": 0.41, "grad_norm": 5.725117083355898, "learning_rate": 6.6051203732197955e-06, "loss": 0.7453, "step": 14460 }, { "epoch": 0.41, "grad_norm": 4.908421855747848, "learning_rate": 6.604681154098725e-06, "loss": 0.5103, "step": 14461 }, { "epoch": 0.41, "grad_norm": 8.782689203992742, "learning_rate": 6.60424192117278e-06, "loss": 0.7184, "step": 14462 }, { "epoch": 0.41, "grad_norm": 5.976112837487054, "learning_rate": 6.603802674445739e-06, "loss": 0.5623, "step": 14463 }, { "epoch": 0.41, "grad_norm": 3.9972375927481503, "learning_rate": 6.603363413921381e-06, "loss": 0.355, "step": 14464 }, { "epoch": 0.41, "grad_norm": 6.235183778087138, "learning_rate": 6.602924139603483e-06, "loss": 0.6171, "step": 14465 }, { "epoch": 0.41, "grad_norm": 3.1304831561372795, "learning_rate": 6.602484851495829e-06, "loss": 0.1541, "step": 14466 }, { "epoch": 0.41, "grad_norm": 4.632052791890871, "learning_rate": 6.602045549602194e-06, "loss": 0.671, "step": 14467 }, { "epoch": 0.41, "grad_norm": 4.058738316741472, "learning_rate": 6.601606233926359e-06, "loss": 0.3211, "step": 14468 }, { "epoch": 0.41, "grad_norm": 2.635504599877144, "learning_rate": 6.6011669044721e-06, "loss": 0.3229, "step": 14469 }, { "epoch": 0.41, "grad_norm": 10.592655541702982, "learning_rate": 6.600727561243202e-06, "loss": 1.119, "step": 14470 }, { "epoch": 0.41, "grad_norm": 5.833184467414329, "learning_rate": 6.6002882042434425e-06, "loss": 0.6493, "step": 14471 }, { "epoch": 0.41, "grad_norm": 22.862401275671893, "learning_rate": 6.599848833476599e-06, "loss": 0.6038, "step": 14472 }, { "epoch": 0.41, "grad_norm": 17.549512085829726, "learning_rate": 6.599409448946454e-06, "loss": 1.17, "step": 14473 }, { "epoch": 0.41, "grad_norm": 5.348754992856029, "learning_rate": 6.598970050656785e-06, "loss": 0.3773, "step": 14474 }, { "epoch": 0.41, "grad_norm": 4.233082169243299, "learning_rate": 6.598530638611373e-06, "loss": 0.2552, "step": 14475 }, { "epoch": 0.41, "grad_norm": 2.8273115727031635, "learning_rate": 6.598091212814001e-06, "loss": 0.2257, "step": 14476 }, { "epoch": 0.41, "grad_norm": 4.078873605822194, "learning_rate": 6.597651773268445e-06, "loss": 0.2651, "step": 14477 }, { "epoch": 0.41, "grad_norm": 5.7867634438444115, "learning_rate": 6.5972123199784895e-06, "loss": 0.2973, "step": 14478 }, { "epoch": 0.41, "grad_norm": 7.262141485770612, "learning_rate": 6.596772852947912e-06, "loss": 0.52, "step": 14479 }, { "epoch": 0.41, "grad_norm": 5.201414405591528, "learning_rate": 6.5963333721804945e-06, "loss": 0.3369, "step": 14480 }, { "epoch": 0.41, "grad_norm": 4.192570818744798, "learning_rate": 6.595893877680019e-06, "loss": 0.3281, "step": 14481 }, { "epoch": 0.41, "grad_norm": 11.378019414221672, "learning_rate": 6.595454369450263e-06, "loss": 1.1573, "step": 14482 }, { "epoch": 0.41, "grad_norm": 5.653148158897422, "learning_rate": 6.595014847495009e-06, "loss": 0.314, "step": 14483 }, { "epoch": 0.41, "grad_norm": 5.5325121113144275, "learning_rate": 6.59457531181804e-06, "loss": 0.6974, "step": 14484 }, { "epoch": 0.41, "grad_norm": 9.43397846582659, "learning_rate": 6.594135762423135e-06, "loss": 0.4681, "step": 14485 }, { "epoch": 0.41, "grad_norm": 10.671813517849984, "learning_rate": 6.593696199314077e-06, "loss": 0.477, "step": 14486 }, { "epoch": 0.41, "grad_norm": 7.094945663986057, "learning_rate": 6.593256622494647e-06, "loss": 0.3999, "step": 14487 }, { "epoch": 0.41, "grad_norm": 5.170145488957118, "learning_rate": 6.5928170319686265e-06, "loss": 0.5218, "step": 14488 }, { "epoch": 0.41, "grad_norm": 3.948089679694693, "learning_rate": 6.592377427739797e-06, "loss": 0.4442, "step": 14489 }, { "epoch": 0.41, "grad_norm": 5.404823511931671, "learning_rate": 6.59193780981194e-06, "loss": 0.3667, "step": 14490 }, { "epoch": 0.41, "grad_norm": 5.108362546695153, "learning_rate": 6.591498178188838e-06, "loss": 0.6066, "step": 14491 }, { "epoch": 0.42, "grad_norm": 10.244883306908282, "learning_rate": 6.591058532874273e-06, "loss": 0.513, "step": 14492 }, { "epoch": 0.42, "grad_norm": 4.377027205419498, "learning_rate": 6.590618873872027e-06, "loss": 0.5089, "step": 14493 }, { "epoch": 0.42, "grad_norm": 3.707273453255688, "learning_rate": 6.590179201185884e-06, "loss": 0.148, "step": 14494 }, { "epoch": 0.42, "grad_norm": 6.777089860101702, "learning_rate": 6.589739514819625e-06, "loss": 0.4579, "step": 14495 }, { "epoch": 0.42, "grad_norm": 3.311267209524937, "learning_rate": 6.589299814777033e-06, "loss": 0.3573, "step": 14496 }, { "epoch": 0.42, "grad_norm": 5.1800399591311255, "learning_rate": 6.588860101061889e-06, "loss": 0.3891, "step": 14497 }, { "epoch": 0.42, "grad_norm": 3.0749061135946696, "learning_rate": 6.588420373677979e-06, "loss": 0.1266, "step": 14498 }, { "epoch": 0.42, "grad_norm": 3.293287494840102, "learning_rate": 6.5879806326290815e-06, "loss": 0.321, "step": 14499 }, { "epoch": 0.42, "grad_norm": 2.769786613108738, "learning_rate": 6.587540877918983e-06, "loss": 0.2858, "step": 14500 }, { "epoch": 0.42, "grad_norm": 4.446950707826583, "learning_rate": 6.587101109551468e-06, "loss": 0.2396, "step": 14501 }, { "epoch": 0.42, "grad_norm": 6.764207017411713, "learning_rate": 6.586661327530315e-06, "loss": 0.599, "step": 14502 }, { "epoch": 0.42, "grad_norm": 4.47201242935487, "learning_rate": 6.5862215318593115e-06, "loss": 0.4808, "step": 14503 }, { "epoch": 0.42, "grad_norm": 5.6016800148526915, "learning_rate": 6.585781722542238e-06, "loss": 0.2403, "step": 14504 }, { "epoch": 0.42, "grad_norm": 5.239644646206856, "learning_rate": 6.585341899582881e-06, "loss": 0.4607, "step": 14505 }, { "epoch": 0.42, "grad_norm": 5.555440816754062, "learning_rate": 6.5849020629850226e-06, "loss": 0.4971, "step": 14506 }, { "epoch": 0.42, "grad_norm": 6.237192841269586, "learning_rate": 6.584462212752445e-06, "loss": 0.6071, "step": 14507 }, { "epoch": 0.42, "grad_norm": 6.286229337763361, "learning_rate": 6.584022348888937e-06, "loss": 0.5452, "step": 14508 }, { "epoch": 0.42, "grad_norm": 5.074912454761405, "learning_rate": 6.583582471398278e-06, "loss": 0.5777, "step": 14509 }, { "epoch": 0.42, "grad_norm": 7.57781459526572, "learning_rate": 6.5831425802842545e-06, "loss": 0.4023, "step": 14510 }, { "epoch": 0.42, "grad_norm": 1.7519983862024109, "learning_rate": 6.58270267555065e-06, "loss": 0.2063, "step": 14511 }, { "epoch": 0.42, "grad_norm": 8.31272342926567, "learning_rate": 6.58226275720125e-06, "loss": 0.6967, "step": 14512 }, { "epoch": 0.42, "grad_norm": 4.060773746818648, "learning_rate": 6.581822825239838e-06, "loss": 0.3548, "step": 14513 }, { "epoch": 0.42, "grad_norm": 7.532269673945566, "learning_rate": 6.5813828796701975e-06, "loss": 0.4866, "step": 14514 }, { "epoch": 0.42, "grad_norm": 9.49221055769769, "learning_rate": 6.580942920496117e-06, "loss": 1.0819, "step": 14515 }, { "epoch": 0.42, "grad_norm": 5.994362010080635, "learning_rate": 6.580502947721377e-06, "loss": 0.4633, "step": 14516 }, { "epoch": 0.42, "grad_norm": 8.34673158900186, "learning_rate": 6.580062961349766e-06, "loss": 0.9292, "step": 14517 }, { "epoch": 0.42, "grad_norm": 4.890474847310609, "learning_rate": 6.579622961385068e-06, "loss": 0.8645, "step": 14518 }, { "epoch": 0.42, "grad_norm": 5.539597328094713, "learning_rate": 6.579182947831069e-06, "loss": 0.5402, "step": 14519 }, { "epoch": 0.42, "grad_norm": 12.25748572998959, "learning_rate": 6.5787429206915525e-06, "loss": 0.5765, "step": 14520 }, { "epoch": 0.42, "grad_norm": 6.94881863055963, "learning_rate": 6.578302879970305e-06, "loss": 0.2017, "step": 14521 }, { "epoch": 0.42, "grad_norm": 3.9569442905161027, "learning_rate": 6.577862825671112e-06, "loss": 0.4174, "step": 14522 }, { "epoch": 0.42, "grad_norm": 3.047790590291793, "learning_rate": 6.57742275779776e-06, "loss": 0.0971, "step": 14523 }, { "epoch": 0.42, "grad_norm": 8.960997071604448, "learning_rate": 6.576982676354034e-06, "loss": 0.7091, "step": 14524 }, { "epoch": 0.42, "grad_norm": 6.947068077283736, "learning_rate": 6.576542581343719e-06, "loss": 0.1234, "step": 14525 }, { "epoch": 0.42, "grad_norm": 4.105057105016837, "learning_rate": 6.576102472770603e-06, "loss": 0.54, "step": 14526 }, { "epoch": 0.42, "grad_norm": 3.7915140302907333, "learning_rate": 6.575662350638472e-06, "loss": 0.4323, "step": 14527 }, { "epoch": 0.42, "grad_norm": 6.011186066761678, "learning_rate": 6.575222214951112e-06, "loss": 0.7709, "step": 14528 }, { "epoch": 0.42, "grad_norm": 4.66756298903312, "learning_rate": 6.574782065712309e-06, "loss": 0.1512, "step": 14529 }, { "epoch": 0.42, "grad_norm": 8.1426073907851, "learning_rate": 6.574341902925849e-06, "loss": 0.3942, "step": 14530 }, { "epoch": 0.42, "grad_norm": 7.2157082032056845, "learning_rate": 6.57390172659552e-06, "loss": 0.707, "step": 14531 }, { "epoch": 0.42, "grad_norm": 5.575691782364652, "learning_rate": 6.573461536725108e-06, "loss": 0.3447, "step": 14532 }, { "epoch": 0.42, "grad_norm": 5.560759004031057, "learning_rate": 6.5730213333184e-06, "loss": 0.2141, "step": 14533 }, { "epoch": 0.42, "grad_norm": 9.02635477311369, "learning_rate": 6.572581116379183e-06, "loss": 0.492, "step": 14534 }, { "epoch": 0.42, "grad_norm": 6.333027840658384, "learning_rate": 6.572140885911246e-06, "loss": 0.4981, "step": 14535 }, { "epoch": 0.42, "grad_norm": 3.6268164342411136, "learning_rate": 6.571700641918373e-06, "loss": 0.2918, "step": 14536 }, { "epoch": 0.42, "grad_norm": 8.829767179087433, "learning_rate": 6.5712603844043536e-06, "loss": 0.6386, "step": 14537 }, { "epoch": 0.42, "grad_norm": 7.562839973311221, "learning_rate": 6.570820113372974e-06, "loss": 0.7187, "step": 14538 }, { "epoch": 0.42, "grad_norm": 5.043512718357854, "learning_rate": 6.570379828828023e-06, "loss": 0.3262, "step": 14539 }, { "epoch": 0.42, "grad_norm": 4.52345299594577, "learning_rate": 6.5699395307732865e-06, "loss": 0.2493, "step": 14540 }, { "epoch": 0.42, "grad_norm": 4.3333989590786235, "learning_rate": 6.5694992192125555e-06, "loss": 0.7952, "step": 14541 }, { "epoch": 0.42, "grad_norm": 3.9811252879876546, "learning_rate": 6.569058894149614e-06, "loss": 0.3767, "step": 14542 }, { "epoch": 0.42, "grad_norm": 5.5743862412971605, "learning_rate": 6.568618555588252e-06, "loss": 0.604, "step": 14543 }, { "epoch": 0.42, "grad_norm": 3.110968138516587, "learning_rate": 6.5681782035322584e-06, "loss": 0.5532, "step": 14544 }, { "epoch": 0.42, "grad_norm": 4.333903672514314, "learning_rate": 6.567737837985422e-06, "loss": 0.4789, "step": 14545 }, { "epoch": 0.42, "grad_norm": 9.731535961381613, "learning_rate": 6.5672974589515285e-06, "loss": 1.0524, "step": 14546 }, { "epoch": 0.42, "grad_norm": 7.845093387901293, "learning_rate": 6.5668570664343675e-06, "loss": 1.0887, "step": 14547 }, { "epoch": 0.42, "grad_norm": 2.23902353491091, "learning_rate": 6.566416660437729e-06, "loss": 0.2363, "step": 14548 }, { "epoch": 0.42, "grad_norm": 9.854846984627594, "learning_rate": 6.5659762409654e-06, "loss": 0.311, "step": 14549 }, { "epoch": 0.42, "grad_norm": 3.621167788113865, "learning_rate": 6.565535808021172e-06, "loss": 0.4664, "step": 14550 }, { "epoch": 0.42, "grad_norm": 6.033412288795465, "learning_rate": 6.565095361608831e-06, "loss": 0.4956, "step": 14551 }, { "epoch": 0.42, "grad_norm": 7.354903691203901, "learning_rate": 6.564654901732168e-06, "loss": 0.6281, "step": 14552 }, { "epoch": 0.42, "grad_norm": 9.343986661331572, "learning_rate": 6.564214428394971e-06, "loss": 0.7569, "step": 14553 }, { "epoch": 0.42, "grad_norm": 8.357599429932089, "learning_rate": 6.563773941601028e-06, "loss": 0.3118, "step": 14554 }, { "epoch": 0.42, "grad_norm": 3.2022444654505198, "learning_rate": 6.5633334413541334e-06, "loss": 0.2225, "step": 14555 }, { "epoch": 0.42, "grad_norm": 3.498379536566388, "learning_rate": 6.562892927658073e-06, "loss": 0.3511, "step": 14556 }, { "epoch": 0.42, "grad_norm": 6.352014879771372, "learning_rate": 6.562452400516635e-06, "loss": 0.5073, "step": 14557 }, { "epoch": 0.42, "grad_norm": 7.632178445209404, "learning_rate": 6.562011859933613e-06, "loss": 0.6049, "step": 14558 }, { "epoch": 0.42, "grad_norm": 7.4753031053616015, "learning_rate": 6.561571305912796e-06, "loss": 0.4882, "step": 14559 }, { "epoch": 0.42, "grad_norm": 5.706204019551364, "learning_rate": 6.561130738457972e-06, "loss": 0.7497, "step": 14560 }, { "epoch": 0.42, "grad_norm": 11.463535682622355, "learning_rate": 6.560690157572933e-06, "loss": 0.2547, "step": 14561 }, { "epoch": 0.42, "grad_norm": 1.5675423644906423, "learning_rate": 6.5602495632614695e-06, "loss": 0.1318, "step": 14562 }, { "epoch": 0.42, "grad_norm": 6.82857890769973, "learning_rate": 6.55980895552737e-06, "loss": 0.5942, "step": 14563 }, { "epoch": 0.42, "grad_norm": 2.890594049236486, "learning_rate": 6.559368334374427e-06, "loss": 0.4632, "step": 14564 }, { "epoch": 0.42, "grad_norm": 8.409919023157965, "learning_rate": 6.5589276998064285e-06, "loss": 0.6528, "step": 14565 }, { "epoch": 0.42, "grad_norm": 8.253313468448026, "learning_rate": 6.558487051827168e-06, "loss": 0.488, "step": 14566 }, { "epoch": 0.42, "grad_norm": 4.871502232803699, "learning_rate": 6.558046390440436e-06, "loss": 0.5183, "step": 14567 }, { "epoch": 0.42, "grad_norm": 7.157872878050865, "learning_rate": 6.557605715650023e-06, "loss": 0.7158, "step": 14568 }, { "epoch": 0.42, "grad_norm": 8.889717256836358, "learning_rate": 6.557165027459718e-06, "loss": 0.8114, "step": 14569 }, { "epoch": 0.42, "grad_norm": 16.40335656219018, "learning_rate": 6.556724325873315e-06, "loss": 0.8555, "step": 14570 }, { "epoch": 0.42, "grad_norm": 6.577735400045481, "learning_rate": 6.556283610894604e-06, "loss": 0.8589, "step": 14571 }, { "epoch": 0.42, "grad_norm": 7.462995091665291, "learning_rate": 6.555842882527378e-06, "loss": 0.9964, "step": 14572 }, { "epoch": 0.42, "grad_norm": 6.1816241560923, "learning_rate": 6.5554021407754244e-06, "loss": 0.2386, "step": 14573 }, { "epoch": 0.42, "grad_norm": 7.684498953236474, "learning_rate": 6.55496138564254e-06, "loss": 1.0265, "step": 14574 }, { "epoch": 0.42, "grad_norm": 9.18844096237391, "learning_rate": 6.5545206171325135e-06, "loss": 0.5433, "step": 14575 }, { "epoch": 0.42, "grad_norm": 5.192092976382726, "learning_rate": 6.554079835249139e-06, "loss": 0.6857, "step": 14576 }, { "epoch": 0.42, "grad_norm": 6.8793557673833785, "learning_rate": 6.553639039996205e-06, "loss": 1.0432, "step": 14577 }, { "epoch": 0.42, "grad_norm": 3.3259031195038773, "learning_rate": 6.553198231377506e-06, "loss": 0.419, "step": 14578 }, { "epoch": 0.42, "grad_norm": 2.6537232779200988, "learning_rate": 6.552757409396835e-06, "loss": 0.2037, "step": 14579 }, { "epoch": 0.42, "grad_norm": 30.563806049547807, "learning_rate": 6.552316574057982e-06, "loss": 0.646, "step": 14580 }, { "epoch": 0.42, "grad_norm": 7.561349261038, "learning_rate": 6.5518757253647415e-06, "loss": 0.5972, "step": 14581 }, { "epoch": 0.42, "grad_norm": 2.7872565056207153, "learning_rate": 6.551434863320905e-06, "loss": 0.2164, "step": 14582 }, { "epoch": 0.42, "grad_norm": 6.780303607770806, "learning_rate": 6.550993987930264e-06, "loss": 0.5915, "step": 14583 }, { "epoch": 0.42, "grad_norm": 3.795838579550397, "learning_rate": 6.550553099196614e-06, "loss": 0.3379, "step": 14584 }, { "epoch": 0.42, "grad_norm": 5.193633119298547, "learning_rate": 6.550112197123747e-06, "loss": 0.4714, "step": 14585 }, { "epoch": 0.42, "grad_norm": 6.520433001269766, "learning_rate": 6.549671281715455e-06, "loss": 0.7396, "step": 14586 }, { "epoch": 0.42, "grad_norm": 5.90913304600664, "learning_rate": 6.549230352975532e-06, "loss": 0.577, "step": 14587 }, { "epoch": 0.42, "grad_norm": 10.15590789218736, "learning_rate": 6.54878941090777e-06, "loss": 0.7794, "step": 14588 }, { "epoch": 0.42, "grad_norm": 5.000907660592367, "learning_rate": 6.548348455515965e-06, "loss": 0.1579, "step": 14589 }, { "epoch": 0.42, "grad_norm": 5.790596947805327, "learning_rate": 6.547907486803907e-06, "loss": 0.8538, "step": 14590 }, { "epoch": 0.42, "grad_norm": 6.231088866940706, "learning_rate": 6.547466504775393e-06, "loss": 0.5905, "step": 14591 }, { "epoch": 0.42, "grad_norm": 3.8736388984693533, "learning_rate": 6.547025509434215e-06, "loss": 0.723, "step": 14592 }, { "epoch": 0.42, "grad_norm": 2.6481836923159214, "learning_rate": 6.546584500784166e-06, "loss": 0.4029, "step": 14593 }, { "epoch": 0.42, "grad_norm": 3.598859124074473, "learning_rate": 6.546143478829042e-06, "loss": 0.3372, "step": 14594 }, { "epoch": 0.42, "grad_norm": 5.590569129865115, "learning_rate": 6.545702443572637e-06, "loss": 0.578, "step": 14595 }, { "epoch": 0.42, "grad_norm": 3.9100849095561805, "learning_rate": 6.545261395018743e-06, "loss": 0.4617, "step": 14596 }, { "epoch": 0.42, "grad_norm": 6.419114904979509, "learning_rate": 6.544820333171154e-06, "loss": 0.4645, "step": 14597 }, { "epoch": 0.42, "grad_norm": 5.553254748853511, "learning_rate": 6.544379258033668e-06, "loss": 0.8325, "step": 14598 }, { "epoch": 0.42, "grad_norm": 7.333852583390587, "learning_rate": 6.543938169610077e-06, "loss": 0.6381, "step": 14599 }, { "epoch": 0.42, "grad_norm": 3.8771153490540624, "learning_rate": 6.543497067904175e-06, "loss": 0.4499, "step": 14600 }, { "epoch": 0.42, "grad_norm": 5.828595835303381, "learning_rate": 6.543055952919759e-06, "loss": 0.8353, "step": 14601 }, { "epoch": 0.42, "grad_norm": 6.619156509609979, "learning_rate": 6.542614824660622e-06, "loss": 0.5977, "step": 14602 }, { "epoch": 0.42, "grad_norm": 2.9417278828253477, "learning_rate": 6.542173683130559e-06, "loss": 0.1516, "step": 14603 }, { "epoch": 0.42, "grad_norm": 5.1635326858184865, "learning_rate": 6.5417325283333666e-06, "loss": 0.2222, "step": 14604 }, { "epoch": 0.42, "grad_norm": 5.934956055810305, "learning_rate": 6.541291360272839e-06, "loss": 0.3263, "step": 14605 }, { "epoch": 0.42, "grad_norm": 11.310554547868016, "learning_rate": 6.540850178952771e-06, "loss": 0.6279, "step": 14606 }, { "epoch": 0.42, "grad_norm": 5.556079137554353, "learning_rate": 6.540408984376959e-06, "loss": 0.3997, "step": 14607 }, { "epoch": 0.42, "grad_norm": 3.573384372064376, "learning_rate": 6.5399677765491984e-06, "loss": 0.4662, "step": 14608 }, { "epoch": 0.42, "grad_norm": 8.425744011964246, "learning_rate": 6.539526555473285e-06, "loss": 0.5032, "step": 14609 }, { "epoch": 0.42, "grad_norm": 3.275198360434645, "learning_rate": 6.539085321153013e-06, "loss": 0.3634, "step": 14610 }, { "epoch": 0.42, "grad_norm": 3.3086058020794455, "learning_rate": 6.538644073592181e-06, "loss": 0.2082, "step": 14611 }, { "epoch": 0.42, "grad_norm": 3.7917101232253154, "learning_rate": 6.538202812794583e-06, "loss": 0.2647, "step": 14612 }, { "epoch": 0.42, "grad_norm": 3.9238156992068527, "learning_rate": 6.537761538764016e-06, "loss": 0.4727, "step": 14613 }, { "epoch": 0.42, "grad_norm": 3.393188000085548, "learning_rate": 6.537320251504273e-06, "loss": 0.3643, "step": 14614 }, { "epoch": 0.42, "grad_norm": 4.086768800621871, "learning_rate": 6.536878951019157e-06, "loss": 0.5074, "step": 14615 }, { "epoch": 0.42, "grad_norm": 5.836799874480407, "learning_rate": 6.5364376373124584e-06, "loss": 0.4622, "step": 14616 }, { "epoch": 0.42, "grad_norm": 9.62194954229052, "learning_rate": 6.535996310387975e-06, "loss": 0.5145, "step": 14617 }, { "epoch": 0.42, "grad_norm": 4.876333250006445, "learning_rate": 6.535554970249506e-06, "loss": 0.492, "step": 14618 }, { "epoch": 0.42, "grad_norm": 11.921140315665328, "learning_rate": 6.535113616900848e-06, "loss": 0.6469, "step": 14619 }, { "epoch": 0.42, "grad_norm": 5.8946404570214215, "learning_rate": 6.534672250345795e-06, "loss": 0.4405, "step": 14620 }, { "epoch": 0.42, "grad_norm": 4.004348432733491, "learning_rate": 6.534230870588145e-06, "loss": 0.2701, "step": 14621 }, { "epoch": 0.42, "grad_norm": 6.641266699808909, "learning_rate": 6.533789477631696e-06, "loss": 0.3393, "step": 14622 }, { "epoch": 0.42, "grad_norm": 8.046877725841467, "learning_rate": 6.533348071480246e-06, "loss": 0.5768, "step": 14623 }, { "epoch": 0.42, "grad_norm": 5.697578513126302, "learning_rate": 6.532906652137589e-06, "loss": 0.3997, "step": 14624 }, { "epoch": 0.42, "grad_norm": 2.9462283706356636, "learning_rate": 6.5324652196075275e-06, "loss": 0.3357, "step": 14625 }, { "epoch": 0.42, "grad_norm": 4.887443409467727, "learning_rate": 6.532023773893856e-06, "loss": 0.5973, "step": 14626 }, { "epoch": 0.42, "grad_norm": 4.789194077244665, "learning_rate": 6.531582315000371e-06, "loss": 0.378, "step": 14627 }, { "epoch": 0.42, "grad_norm": 9.757978842594879, "learning_rate": 6.531140842930873e-06, "loss": 0.7551, "step": 14628 }, { "epoch": 0.42, "grad_norm": 7.440246523700345, "learning_rate": 6.53069935768916e-06, "loss": 0.4314, "step": 14629 }, { "epoch": 0.42, "grad_norm": 5.921928787993686, "learning_rate": 6.5302578592790276e-06, "loss": 0.974, "step": 14630 }, { "epoch": 0.42, "grad_norm": 6.593901411008507, "learning_rate": 6.529816347704274e-06, "loss": 0.3565, "step": 14631 }, { "epoch": 0.42, "grad_norm": 3.9735186305671957, "learning_rate": 6.529374822968701e-06, "loss": 0.3281, "step": 14632 }, { "epoch": 0.42, "grad_norm": 3.005826854467923, "learning_rate": 6.528933285076102e-06, "loss": 0.2443, "step": 14633 }, { "epoch": 0.42, "grad_norm": 2.457091483877334, "learning_rate": 6.52849173403028e-06, "loss": 0.2846, "step": 14634 }, { "epoch": 0.42, "grad_norm": 6.290533161582587, "learning_rate": 6.5280501698350315e-06, "loss": 0.6252, "step": 14635 }, { "epoch": 0.42, "grad_norm": 12.718930704768585, "learning_rate": 6.527608592494157e-06, "loss": 0.9659, "step": 14636 }, { "epoch": 0.42, "grad_norm": 6.378794718770074, "learning_rate": 6.527167002011452e-06, "loss": 0.5862, "step": 14637 }, { "epoch": 0.42, "grad_norm": 6.26691162902165, "learning_rate": 6.526725398390718e-06, "loss": 0.5083, "step": 14638 }, { "epoch": 0.42, "grad_norm": 10.325335704527667, "learning_rate": 6.526283781635754e-06, "loss": 0.5721, "step": 14639 }, { "epoch": 0.42, "grad_norm": 10.641721066838967, "learning_rate": 6.525842151750356e-06, "loss": 0.932, "step": 14640 }, { "epoch": 0.42, "grad_norm": 3.162782386566324, "learning_rate": 6.525400508738329e-06, "loss": 0.1354, "step": 14641 }, { "epoch": 0.42, "grad_norm": 6.533524842601923, "learning_rate": 6.524958852603468e-06, "loss": 0.6411, "step": 14642 }, { "epoch": 0.42, "grad_norm": 11.058876780476032, "learning_rate": 6.524517183349573e-06, "loss": 0.3723, "step": 14643 }, { "epoch": 0.42, "grad_norm": 4.592500970972215, "learning_rate": 6.524075500980446e-06, "loss": 0.7743, "step": 14644 }, { "epoch": 0.42, "grad_norm": 4.926146092164756, "learning_rate": 6.523633805499883e-06, "loss": 0.2989, "step": 14645 }, { "epoch": 0.42, "grad_norm": 5.488044619634651, "learning_rate": 6.523192096911688e-06, "loss": 0.7583, "step": 14646 }, { "epoch": 0.42, "grad_norm": 4.292644707179599, "learning_rate": 6.522750375219657e-06, "loss": 0.2736, "step": 14647 }, { "epoch": 0.42, "grad_norm": 9.488219134149807, "learning_rate": 6.522308640427593e-06, "loss": 0.7675, "step": 14648 }, { "epoch": 0.42, "grad_norm": 3.4317705089672956, "learning_rate": 6.521866892539296e-06, "loss": 0.19, "step": 14649 }, { "epoch": 0.42, "grad_norm": 4.787933639727778, "learning_rate": 6.521425131558564e-06, "loss": 0.4053, "step": 14650 }, { "epoch": 0.42, "grad_norm": 5.303556341321365, "learning_rate": 6.5209833574892e-06, "loss": 0.2961, "step": 14651 }, { "epoch": 0.42, "grad_norm": 3.23862763727646, "learning_rate": 6.5205415703350035e-06, "loss": 0.2075, "step": 14652 }, { "epoch": 0.42, "grad_norm": 4.670982198983052, "learning_rate": 6.520099770099775e-06, "loss": 0.2884, "step": 14653 }, { "epoch": 0.42, "grad_norm": 7.257786975565233, "learning_rate": 6.519657956787315e-06, "loss": 0.5133, "step": 14654 }, { "epoch": 0.42, "grad_norm": 4.569472615594158, "learning_rate": 6.519216130401424e-06, "loss": 0.2489, "step": 14655 }, { "epoch": 0.42, "grad_norm": 5.813420868806592, "learning_rate": 6.518774290945904e-06, "loss": 0.8839, "step": 14656 }, { "epoch": 0.42, "grad_norm": 3.5837897557108196, "learning_rate": 6.518332438424556e-06, "loss": 0.4947, "step": 14657 }, { "epoch": 0.42, "grad_norm": 5.161935503857137, "learning_rate": 6.517890572841181e-06, "loss": 0.3014, "step": 14658 }, { "epoch": 0.42, "grad_norm": 4.732427190068401, "learning_rate": 6.517448694199582e-06, "loss": 0.7103, "step": 14659 }, { "epoch": 0.42, "grad_norm": 7.468876346812294, "learning_rate": 6.517006802503556e-06, "loss": 0.6658, "step": 14660 }, { "epoch": 0.42, "grad_norm": 5.599995839594249, "learning_rate": 6.516564897756908e-06, "loss": 0.4917, "step": 14661 }, { "epoch": 0.42, "grad_norm": 6.016831472716295, "learning_rate": 6.51612297996344e-06, "loss": 0.4151, "step": 14662 }, { "epoch": 0.42, "grad_norm": 3.3063245645923356, "learning_rate": 6.515681049126952e-06, "loss": 0.5848, "step": 14663 }, { "epoch": 0.42, "grad_norm": 3.4099348155836333, "learning_rate": 6.515239105251245e-06, "loss": 0.7579, "step": 14664 }, { "epoch": 0.42, "grad_norm": 5.2169523055309615, "learning_rate": 6.514797148340125e-06, "loss": 0.8755, "step": 14665 }, { "epoch": 0.42, "grad_norm": 5.387814128949165, "learning_rate": 6.514355178397391e-06, "loss": 0.5689, "step": 14666 }, { "epoch": 0.42, "grad_norm": 5.505605615327738, "learning_rate": 6.513913195426846e-06, "loss": 0.5161, "step": 14667 }, { "epoch": 0.42, "grad_norm": 4.248041038070703, "learning_rate": 6.513471199432291e-06, "loss": 0.5037, "step": 14668 }, { "epoch": 0.42, "grad_norm": 7.162775021743702, "learning_rate": 6.513029190417532e-06, "loss": 0.7946, "step": 14669 }, { "epoch": 0.42, "grad_norm": 4.401318543281501, "learning_rate": 6.5125871683863675e-06, "loss": 0.2589, "step": 14670 }, { "epoch": 0.42, "grad_norm": 5.605426386762829, "learning_rate": 6.512145133342601e-06, "loss": 0.3909, "step": 14671 }, { "epoch": 0.42, "grad_norm": 4.214767780626899, "learning_rate": 6.511703085290037e-06, "loss": 0.5395, "step": 14672 }, { "epoch": 0.42, "grad_norm": 4.076533931633537, "learning_rate": 6.5112610242324765e-06, "loss": 0.4616, "step": 14673 }, { "epoch": 0.42, "grad_norm": 4.283134212887556, "learning_rate": 6.5108189501737255e-06, "loss": 0.3236, "step": 14674 }, { "epoch": 0.42, "grad_norm": 8.92359020628173, "learning_rate": 6.510376863117583e-06, "loss": 0.7297, "step": 14675 }, { "epoch": 0.42, "grad_norm": 6.0090889436637, "learning_rate": 6.509934763067857e-06, "loss": 0.3828, "step": 14676 }, { "epoch": 0.42, "grad_norm": 6.623160016762052, "learning_rate": 6.509492650028346e-06, "loss": 0.8589, "step": 14677 }, { "epoch": 0.42, "grad_norm": 7.650836509895337, "learning_rate": 6.509050524002856e-06, "loss": 0.5497, "step": 14678 }, { "epoch": 0.42, "grad_norm": 4.722211717456777, "learning_rate": 6.50860838499519e-06, "loss": 0.4067, "step": 14679 }, { "epoch": 0.42, "grad_norm": 6.779554963865956, "learning_rate": 6.508166233009151e-06, "loss": 0.3156, "step": 14680 }, { "epoch": 0.42, "grad_norm": 6.452529103842733, "learning_rate": 6.507724068048544e-06, "loss": 0.3538, "step": 14681 }, { "epoch": 0.42, "grad_norm": 6.488490845971191, "learning_rate": 6.507281890117174e-06, "loss": 0.4029, "step": 14682 }, { "epoch": 0.42, "grad_norm": 9.741778318473976, "learning_rate": 6.506839699218843e-06, "loss": 0.3553, "step": 14683 }, { "epoch": 0.42, "grad_norm": 10.975927998405327, "learning_rate": 6.506397495357355e-06, "loss": 0.6289, "step": 14684 }, { "epoch": 0.42, "grad_norm": 11.42270689725361, "learning_rate": 6.505955278536515e-06, "loss": 0.7419, "step": 14685 }, { "epoch": 0.42, "grad_norm": 6.17451941901936, "learning_rate": 6.505513048760127e-06, "loss": 0.5458, "step": 14686 }, { "epoch": 0.42, "grad_norm": 8.269512452188424, "learning_rate": 6.505070806031996e-06, "loss": 0.5735, "step": 14687 }, { "epoch": 0.42, "grad_norm": 3.416520497444128, "learning_rate": 6.504628550355925e-06, "loss": 0.5297, "step": 14688 }, { "epoch": 0.42, "grad_norm": 4.054090158198658, "learning_rate": 6.50418628173572e-06, "loss": 0.2114, "step": 14689 }, { "epoch": 0.42, "grad_norm": 6.7011432697127, "learning_rate": 6.503744000175187e-06, "loss": 0.1279, "step": 14690 }, { "epoch": 0.42, "grad_norm": 11.293193380786493, "learning_rate": 6.503301705678129e-06, "loss": 0.5898, "step": 14691 }, { "epoch": 0.42, "grad_norm": 4.3030968581355, "learning_rate": 6.502859398248351e-06, "loss": 0.5271, "step": 14692 }, { "epoch": 0.42, "grad_norm": 3.8397983303857037, "learning_rate": 6.50241707788966e-06, "loss": 0.4198, "step": 14693 }, { "epoch": 0.42, "grad_norm": 3.4681029188752825, "learning_rate": 6.5019747446058586e-06, "loss": 0.2792, "step": 14694 }, { "epoch": 0.42, "grad_norm": 4.691958176594747, "learning_rate": 6.501532398400754e-06, "loss": 0.5406, "step": 14695 }, { "epoch": 0.42, "grad_norm": 9.51161111696353, "learning_rate": 6.50109003927815e-06, "loss": 0.625, "step": 14696 }, { "epoch": 0.42, "grad_norm": 8.795793321145934, "learning_rate": 6.500647667241854e-06, "loss": 0.6175, "step": 14697 }, { "epoch": 0.42, "grad_norm": 6.99037782916768, "learning_rate": 6.500205282295671e-06, "loss": 0.4539, "step": 14698 }, { "epoch": 0.42, "grad_norm": 7.514193773302904, "learning_rate": 6.499762884443407e-06, "loss": 0.7308, "step": 14699 }, { "epoch": 0.42, "grad_norm": 10.451120957115265, "learning_rate": 6.499320473688866e-06, "loss": 0.5467, "step": 14700 }, { "epoch": 0.42, "grad_norm": 3.5792044797830935, "learning_rate": 6.498878050035857e-06, "loss": 0.3379, "step": 14701 }, { "epoch": 0.42, "grad_norm": 5.382944623103527, "learning_rate": 6.498435613488183e-06, "loss": 0.2743, "step": 14702 }, { "epoch": 0.42, "grad_norm": 8.03305155219004, "learning_rate": 6.497993164049655e-06, "loss": 0.6603, "step": 14703 }, { "epoch": 0.42, "grad_norm": 6.968554164717846, "learning_rate": 6.4975507017240735e-06, "loss": 0.3703, "step": 14704 }, { "epoch": 0.42, "grad_norm": 5.347440705507891, "learning_rate": 6.497108226515248e-06, "loss": 0.5904, "step": 14705 }, { "epoch": 0.42, "grad_norm": 6.511821853562877, "learning_rate": 6.496665738426986e-06, "loss": 0.8416, "step": 14706 }, { "epoch": 0.42, "grad_norm": 10.052983730225966, "learning_rate": 6.496223237463093e-06, "loss": 0.6858, "step": 14707 }, { "epoch": 0.42, "grad_norm": 4.712973539000107, "learning_rate": 6.495780723627374e-06, "loss": 0.6393, "step": 14708 }, { "epoch": 0.42, "grad_norm": 3.4175032816426745, "learning_rate": 6.495338196923638e-06, "loss": 0.24, "step": 14709 }, { "epoch": 0.42, "grad_norm": 5.59206620772574, "learning_rate": 6.494895657355693e-06, "loss": 0.4911, "step": 14710 }, { "epoch": 0.42, "grad_norm": 4.712496118518637, "learning_rate": 6.494453104927344e-06, "loss": 0.4706, "step": 14711 }, { "epoch": 0.42, "grad_norm": 8.400911018378942, "learning_rate": 6.494010539642399e-06, "loss": 0.476, "step": 14712 }, { "epoch": 0.42, "grad_norm": 13.173103935860853, "learning_rate": 6.493567961504666e-06, "loss": 0.494, "step": 14713 }, { "epoch": 0.42, "grad_norm": 3.364437653229068, "learning_rate": 6.49312537051795e-06, "loss": 0.243, "step": 14714 }, { "epoch": 0.42, "grad_norm": 5.561516321192769, "learning_rate": 6.49268276668606e-06, "loss": 0.4353, "step": 14715 }, { "epoch": 0.42, "grad_norm": 4.5521195829971886, "learning_rate": 6.492240150012806e-06, "loss": 0.3448, "step": 14716 }, { "epoch": 0.42, "grad_norm": 8.37688481771521, "learning_rate": 6.491797520501994e-06, "loss": 0.4682, "step": 14717 }, { "epoch": 0.42, "grad_norm": 7.917776217422857, "learning_rate": 6.49135487815743e-06, "loss": 0.8724, "step": 14718 }, { "epoch": 0.42, "grad_norm": 2.871896976557049, "learning_rate": 6.490912222982924e-06, "loss": 0.1055, "step": 14719 }, { "epoch": 0.42, "grad_norm": 4.862112200455677, "learning_rate": 6.490469554982284e-06, "loss": 0.2438, "step": 14720 }, { "epoch": 0.42, "grad_norm": 7.636588686252553, "learning_rate": 6.490026874159316e-06, "loss": 0.5807, "step": 14721 }, { "epoch": 0.42, "grad_norm": 7.311076881627452, "learning_rate": 6.489584180517831e-06, "loss": 0.6818, "step": 14722 }, { "epoch": 0.42, "grad_norm": 4.1475145710602375, "learning_rate": 6.48914147406164e-06, "loss": 0.3838, "step": 14723 }, { "epoch": 0.42, "grad_norm": 6.337733944218468, "learning_rate": 6.488698754794545e-06, "loss": 0.3724, "step": 14724 }, { "epoch": 0.42, "grad_norm": 3.1896555848579102, "learning_rate": 6.4882560227203585e-06, "loss": 0.457, "step": 14725 }, { "epoch": 0.42, "grad_norm": 11.486631335268537, "learning_rate": 6.487813277842889e-06, "loss": 0.3722, "step": 14726 }, { "epoch": 0.42, "grad_norm": 5.860836975813938, "learning_rate": 6.487370520165945e-06, "loss": 0.4938, "step": 14727 }, { "epoch": 0.42, "grad_norm": 8.582902727776109, "learning_rate": 6.486927749693336e-06, "loss": 0.5164, "step": 14728 }, { "epoch": 0.42, "grad_norm": 6.137781081590462, "learning_rate": 6.48648496642887e-06, "loss": 0.9153, "step": 14729 }, { "epoch": 0.42, "grad_norm": 8.610828311610685, "learning_rate": 6.486042170376357e-06, "loss": 0.7358, "step": 14730 }, { "epoch": 0.42, "grad_norm": 4.163443886215785, "learning_rate": 6.4855993615396065e-06, "loss": 0.7474, "step": 14731 }, { "epoch": 0.42, "grad_norm": 4.437892842701452, "learning_rate": 6.485156539922427e-06, "loss": 0.6153, "step": 14732 }, { "epoch": 0.42, "grad_norm": 7.72546372734681, "learning_rate": 6.484713705528629e-06, "loss": 0.5926, "step": 14733 }, { "epoch": 0.42, "grad_norm": 7.200797079258668, "learning_rate": 6.484270858362022e-06, "loss": 0.6517, "step": 14734 }, { "epoch": 0.42, "grad_norm": 4.477971279703018, "learning_rate": 6.483827998426415e-06, "loss": 0.3829, "step": 14735 }, { "epoch": 0.42, "grad_norm": 8.671234987027876, "learning_rate": 6.483385125725619e-06, "loss": 0.6753, "step": 14736 }, { "epoch": 0.42, "grad_norm": 7.044014225279846, "learning_rate": 6.482942240263443e-06, "loss": 0.5953, "step": 14737 }, { "epoch": 0.42, "grad_norm": 5.359299506275753, "learning_rate": 6.482499342043697e-06, "loss": 0.4206, "step": 14738 }, { "epoch": 0.42, "grad_norm": 3.8276592983431943, "learning_rate": 6.482056431070191e-06, "loss": 0.6042, "step": 14739 }, { "epoch": 0.42, "grad_norm": 4.527028136158969, "learning_rate": 6.481613507346739e-06, "loss": 0.3458, "step": 14740 }, { "epoch": 0.42, "grad_norm": 4.793608954591497, "learning_rate": 6.481170570877146e-06, "loss": 0.6738, "step": 14741 }, { "epoch": 0.42, "grad_norm": 3.861570282184501, "learning_rate": 6.4807276216652256e-06, "loss": 0.8469, "step": 14742 }, { "epoch": 0.42, "grad_norm": 4.870039421211814, "learning_rate": 6.48028465971479e-06, "loss": 0.6454, "step": 14743 }, { "epoch": 0.42, "grad_norm": 6.842430231354007, "learning_rate": 6.479841685029645e-06, "loss": 0.726, "step": 14744 }, { "epoch": 0.42, "grad_norm": 4.78000160823779, "learning_rate": 6.479398697613605e-06, "loss": 0.621, "step": 14745 }, { "epoch": 0.42, "grad_norm": 3.3702304835414107, "learning_rate": 6.478955697470479e-06, "loss": 0.4951, "step": 14746 }, { "epoch": 0.42, "grad_norm": 7.427934923255663, "learning_rate": 6.47851268460408e-06, "loss": 0.7924, "step": 14747 }, { "epoch": 0.42, "grad_norm": 5.5163241367471265, "learning_rate": 6.478069659018218e-06, "loss": 0.3099, "step": 14748 }, { "epoch": 0.42, "grad_norm": 5.959188382167811, "learning_rate": 6.477626620716706e-06, "loss": 0.4403, "step": 14749 }, { "epoch": 0.42, "grad_norm": 5.1195202118736995, "learning_rate": 6.477183569703354e-06, "loss": 0.3176, "step": 14750 }, { "epoch": 0.42, "grad_norm": 5.909912911175239, "learning_rate": 6.476740505981973e-06, "loss": 0.4031, "step": 14751 }, { "epoch": 0.42, "grad_norm": 6.717148274965797, "learning_rate": 6.476297429556375e-06, "loss": 0.6529, "step": 14752 }, { "epoch": 0.42, "grad_norm": 8.479679613898753, "learning_rate": 6.475854340430373e-06, "loss": 0.4972, "step": 14753 }, { "epoch": 0.42, "grad_norm": 6.393846888847504, "learning_rate": 6.475411238607776e-06, "loss": 0.4133, "step": 14754 }, { "epoch": 0.42, "grad_norm": 3.134914311480607, "learning_rate": 6.4749681240923976e-06, "loss": 0.3256, "step": 14755 }, { "epoch": 0.42, "grad_norm": 6.748846909261223, "learning_rate": 6.474524996888053e-06, "loss": 0.6852, "step": 14756 }, { "epoch": 0.42, "grad_norm": 4.441896356198785, "learning_rate": 6.47408185699855e-06, "loss": 0.5006, "step": 14757 }, { "epoch": 0.42, "grad_norm": 5.820635854456376, "learning_rate": 6.473638704427702e-06, "loss": 0.563, "step": 14758 }, { "epoch": 0.42, "grad_norm": 2.8926615165316747, "learning_rate": 6.4731955391793196e-06, "loss": 0.2288, "step": 14759 }, { "epoch": 0.42, "grad_norm": 7.592360443952913, "learning_rate": 6.4727523612572214e-06, "loss": 0.8545, "step": 14760 }, { "epoch": 0.42, "grad_norm": 5.794176362942712, "learning_rate": 6.472309170665214e-06, "loss": 0.6223, "step": 14761 }, { "epoch": 0.42, "grad_norm": 11.958566142794435, "learning_rate": 6.471865967407111e-06, "loss": 0.879, "step": 14762 }, { "epoch": 0.42, "grad_norm": 3.1552455361306513, "learning_rate": 6.471422751486728e-06, "loss": 0.2827, "step": 14763 }, { "epoch": 0.42, "grad_norm": 3.599448818821627, "learning_rate": 6.470979522907876e-06, "loss": 0.3118, "step": 14764 }, { "epoch": 0.42, "grad_norm": 2.6644102747723215, "learning_rate": 6.470536281674367e-06, "loss": 0.4709, "step": 14765 }, { "epoch": 0.42, "grad_norm": 4.3872406972570985, "learning_rate": 6.470093027790015e-06, "loss": 0.2715, "step": 14766 }, { "epoch": 0.42, "grad_norm": 4.934952718629989, "learning_rate": 6.469649761258636e-06, "loss": 0.704, "step": 14767 }, { "epoch": 0.42, "grad_norm": 7.786099803074233, "learning_rate": 6.46920648208404e-06, "loss": 0.5887, "step": 14768 }, { "epoch": 0.42, "grad_norm": 5.715090096431836, "learning_rate": 6.468763190270042e-06, "loss": 0.5773, "step": 14769 }, { "epoch": 0.42, "grad_norm": 2.85443894045926, "learning_rate": 6.468319885820454e-06, "loss": 0.3122, "step": 14770 }, { "epoch": 0.42, "grad_norm": 6.236184758594212, "learning_rate": 6.467876568739091e-06, "loss": 0.3741, "step": 14771 }, { "epoch": 0.42, "grad_norm": 3.4678614397627054, "learning_rate": 6.467433239029768e-06, "loss": 0.3761, "step": 14772 }, { "epoch": 0.42, "grad_norm": 4.608429634003197, "learning_rate": 6.4669898966962965e-06, "loss": 0.7143, "step": 14773 }, { "epoch": 0.42, "grad_norm": 16.561255282288116, "learning_rate": 6.466546541742491e-06, "loss": 0.5935, "step": 14774 }, { "epoch": 0.42, "grad_norm": 5.733504351386627, "learning_rate": 6.466103174172167e-06, "loss": 0.6822, "step": 14775 }, { "epoch": 0.42, "grad_norm": 3.469011520287438, "learning_rate": 6.465659793989137e-06, "loss": 0.2859, "step": 14776 }, { "epoch": 0.42, "grad_norm": 6.624783998242996, "learning_rate": 6.465216401197218e-06, "loss": 0.3373, "step": 14777 }, { "epoch": 0.42, "grad_norm": 6.623866722056121, "learning_rate": 6.4647729958002216e-06, "loss": 0.851, "step": 14778 }, { "epoch": 0.42, "grad_norm": 4.940672133122594, "learning_rate": 6.4643295778019625e-06, "loss": 0.3863, "step": 14779 }, { "epoch": 0.42, "grad_norm": 4.522095871640871, "learning_rate": 6.463886147206258e-06, "loss": 0.5583, "step": 14780 }, { "epoch": 0.42, "grad_norm": 3.620424803472498, "learning_rate": 6.46344270401692e-06, "loss": 0.292, "step": 14781 }, { "epoch": 0.42, "grad_norm": 4.350377686864853, "learning_rate": 6.462999248237766e-06, "loss": 0.4423, "step": 14782 }, { "epoch": 0.42, "grad_norm": 5.43989634052206, "learning_rate": 6.4625557798726095e-06, "loss": 0.3395, "step": 14783 }, { "epoch": 0.42, "grad_norm": 9.526776436481867, "learning_rate": 6.462112298925265e-06, "loss": 0.3744, "step": 14784 }, { "epoch": 0.42, "grad_norm": 6.29920670192266, "learning_rate": 6.461668805399548e-06, "loss": 0.2259, "step": 14785 }, { "epoch": 0.42, "grad_norm": 8.209637452785028, "learning_rate": 6.461225299299274e-06, "loss": 0.5664, "step": 14786 }, { "epoch": 0.42, "grad_norm": 4.775007972910351, "learning_rate": 6.460781780628259e-06, "loss": 0.6468, "step": 14787 }, { "epoch": 0.42, "grad_norm": 5.990355568459887, "learning_rate": 6.460338249390319e-06, "loss": 0.3667, "step": 14788 }, { "epoch": 0.42, "grad_norm": 5.9579978884014215, "learning_rate": 6.459894705589267e-06, "loss": 0.4919, "step": 14789 }, { "epoch": 0.42, "grad_norm": 3.5050017941724727, "learning_rate": 6.459451149228924e-06, "loss": 0.3363, "step": 14790 }, { "epoch": 0.42, "grad_norm": 7.798419929290464, "learning_rate": 6.459007580313099e-06, "loss": 0.7205, "step": 14791 }, { "epoch": 0.42, "grad_norm": 7.075248083071838, "learning_rate": 6.458563998845613e-06, "loss": 0.3581, "step": 14792 }, { "epoch": 0.42, "grad_norm": 5.6876672573438425, "learning_rate": 6.45812040483028e-06, "loss": 0.6733, "step": 14793 }, { "epoch": 0.42, "grad_norm": 1.4114687826560448, "learning_rate": 6.4576767982709175e-06, "loss": 0.1085, "step": 14794 }, { "epoch": 0.42, "grad_norm": 4.108286202078487, "learning_rate": 6.4572331791713395e-06, "loss": 0.4951, "step": 14795 }, { "epoch": 0.42, "grad_norm": 7.626081327465401, "learning_rate": 6.456789547535365e-06, "loss": 0.7241, "step": 14796 }, { "epoch": 0.42, "grad_norm": 3.6967738288456578, "learning_rate": 6.45634590336681e-06, "loss": 0.4699, "step": 14797 }, { "epoch": 0.42, "grad_norm": 5.137997312213199, "learning_rate": 6.45590224666949e-06, "loss": 0.5387, "step": 14798 }, { "epoch": 0.42, "grad_norm": 3.624296185800386, "learning_rate": 6.455458577447221e-06, "loss": 0.4724, "step": 14799 }, { "epoch": 0.42, "grad_norm": 1.9547408019680181, "learning_rate": 6.455014895703823e-06, "loss": 0.1347, "step": 14800 }, { "epoch": 0.42, "grad_norm": 6.982611038740091, "learning_rate": 6.454571201443109e-06, "loss": 0.4388, "step": 14801 }, { "epoch": 0.42, "grad_norm": 3.9902373027744096, "learning_rate": 6.454127494668899e-06, "loss": 0.5622, "step": 14802 }, { "epoch": 0.42, "grad_norm": 5.221587705898934, "learning_rate": 6.453683775385009e-06, "loss": 0.4763, "step": 14803 }, { "epoch": 0.42, "grad_norm": 8.242084057778015, "learning_rate": 6.453240043595257e-06, "loss": 0.4266, "step": 14804 }, { "epoch": 0.42, "grad_norm": 3.5023865907539156, "learning_rate": 6.45279629930346e-06, "loss": 0.4196, "step": 14805 }, { "epoch": 0.42, "grad_norm": 4.464140321816307, "learning_rate": 6.4523525425134335e-06, "loss": 0.2837, "step": 14806 }, { "epoch": 0.42, "grad_norm": 2.2768407433001125, "learning_rate": 6.451908773229e-06, "loss": 0.2821, "step": 14807 }, { "epoch": 0.42, "grad_norm": 4.907922933332516, "learning_rate": 6.451464991453972e-06, "loss": 0.6185, "step": 14808 }, { "epoch": 0.42, "grad_norm": 9.383816096036353, "learning_rate": 6.451021197192169e-06, "loss": 0.5443, "step": 14809 }, { "epoch": 0.42, "grad_norm": 2.629847637158002, "learning_rate": 6.45057739044741e-06, "loss": 0.17, "step": 14810 }, { "epoch": 0.42, "grad_norm": 10.41486943317383, "learning_rate": 6.450133571223513e-06, "loss": 0.7484, "step": 14811 }, { "epoch": 0.42, "grad_norm": 7.088538923597027, "learning_rate": 6.449689739524294e-06, "loss": 0.3972, "step": 14812 }, { "epoch": 0.42, "grad_norm": 4.322735884536342, "learning_rate": 6.449245895353574e-06, "loss": 0.4702, "step": 14813 }, { "epoch": 0.42, "grad_norm": 7.678616702304327, "learning_rate": 6.448802038715169e-06, "loss": 0.5836, "step": 14814 }, { "epoch": 0.42, "grad_norm": 2.494701349353062, "learning_rate": 6.4483581696129e-06, "loss": 0.3145, "step": 14815 }, { "epoch": 0.42, "grad_norm": 6.958720769394251, "learning_rate": 6.447914288050582e-06, "loss": 0.3429, "step": 14816 }, { "epoch": 0.42, "grad_norm": 11.177328653952628, "learning_rate": 6.447470394032037e-06, "loss": 0.417, "step": 14817 }, { "epoch": 0.42, "grad_norm": 3.961381665368185, "learning_rate": 6.4470264875610826e-06, "loss": 0.5949, "step": 14818 }, { "epoch": 0.42, "grad_norm": 5.87186579599517, "learning_rate": 6.446582568641535e-06, "loss": 0.7953, "step": 14819 }, { "epoch": 0.42, "grad_norm": 8.071378476847062, "learning_rate": 6.446138637277218e-06, "loss": 0.5074, "step": 14820 }, { "epoch": 0.42, "grad_norm": 4.321496135685668, "learning_rate": 6.445694693471947e-06, "loss": 0.3011, "step": 14821 }, { "epoch": 0.42, "grad_norm": 3.976856662984344, "learning_rate": 6.445250737229544e-06, "loss": 0.3821, "step": 14822 }, { "epoch": 0.42, "grad_norm": 6.251433475138289, "learning_rate": 6.444806768553826e-06, "loss": 0.5303, "step": 14823 }, { "epoch": 0.42, "grad_norm": 6.402454647572382, "learning_rate": 6.444362787448614e-06, "loss": 0.9014, "step": 14824 }, { "epoch": 0.42, "grad_norm": 4.193616982637328, "learning_rate": 6.4439187939177265e-06, "loss": 0.3584, "step": 14825 }, { "epoch": 0.42, "grad_norm": 7.402354411014338, "learning_rate": 6.443474787964983e-06, "loss": 0.4627, "step": 14826 }, { "epoch": 0.42, "grad_norm": 6.8787500384311855, "learning_rate": 6.4430307695942056e-06, "loss": 0.5272, "step": 14827 }, { "epoch": 0.42, "grad_norm": 6.2771969144842386, "learning_rate": 6.442586738809208e-06, "loss": 0.5997, "step": 14828 }, { "epoch": 0.42, "grad_norm": 9.675317549114782, "learning_rate": 6.442142695613817e-06, "loss": 0.5085, "step": 14829 }, { "epoch": 0.42, "grad_norm": 6.325636913814883, "learning_rate": 6.44169864001185e-06, "loss": 0.5812, "step": 14830 }, { "epoch": 0.42, "grad_norm": 3.911131197087291, "learning_rate": 6.4412545720071265e-06, "loss": 0.5875, "step": 14831 }, { "epoch": 0.42, "grad_norm": 6.889855062845504, "learning_rate": 6.440810491603467e-06, "loss": 1.0582, "step": 14832 }, { "epoch": 0.42, "grad_norm": 7.736565081389507, "learning_rate": 6.440366398804693e-06, "loss": 0.8536, "step": 14833 }, { "epoch": 0.42, "grad_norm": 5.201616773153184, "learning_rate": 6.439922293614624e-06, "loss": 0.3984, "step": 14834 }, { "epoch": 0.42, "grad_norm": 4.551008934251954, "learning_rate": 6.439478176037082e-06, "loss": 0.624, "step": 14835 }, { "epoch": 0.42, "grad_norm": 4.504687146449133, "learning_rate": 6.439034046075884e-06, "loss": 0.4352, "step": 14836 }, { "epoch": 0.42, "grad_norm": 4.4810861895427045, "learning_rate": 6.438589903734855e-06, "loss": 0.4823, "step": 14837 }, { "epoch": 0.42, "grad_norm": 8.015243565814252, "learning_rate": 6.438145749017813e-06, "loss": 0.718, "step": 14838 }, { "epoch": 0.42, "grad_norm": 4.480678350572904, "learning_rate": 6.437701581928582e-06, "loss": 0.3931, "step": 14839 }, { "epoch": 0.42, "grad_norm": 6.362865155726602, "learning_rate": 6.4372574024709796e-06, "loss": 0.5351, "step": 14840 }, { "epoch": 0.43, "grad_norm": 7.285482527148174, "learning_rate": 6.43681321064883e-06, "loss": 0.4555, "step": 14841 }, { "epoch": 0.43, "grad_norm": 4.501376140302709, "learning_rate": 6.436369006465952e-06, "loss": 0.5154, "step": 14842 }, { "epoch": 0.43, "grad_norm": 8.062972210207558, "learning_rate": 6.435924789926169e-06, "loss": 0.3079, "step": 14843 }, { "epoch": 0.43, "grad_norm": 5.551423306146771, "learning_rate": 6.435480561033302e-06, "loss": 0.4813, "step": 14844 }, { "epoch": 0.43, "grad_norm": 5.049693616694214, "learning_rate": 6.435036319791172e-06, "loss": 0.511, "step": 14845 }, { "epoch": 0.43, "grad_norm": 5.694181269738574, "learning_rate": 6.434592066203601e-06, "loss": 0.1931, "step": 14846 }, { "epoch": 0.43, "grad_norm": 5.589861194682008, "learning_rate": 6.434147800274413e-06, "loss": 0.2258, "step": 14847 }, { "epoch": 0.43, "grad_norm": 4.267899965070283, "learning_rate": 6.4337035220074266e-06, "loss": 0.6008, "step": 14848 }, { "epoch": 0.43, "grad_norm": 3.2376818222921897, "learning_rate": 6.433259231406465e-06, "loss": 0.3222, "step": 14849 }, { "epoch": 0.43, "grad_norm": 5.182671333347853, "learning_rate": 6.43281492847535e-06, "loss": 0.683, "step": 14850 }, { "epoch": 0.43, "grad_norm": 4.750498820765752, "learning_rate": 6.4323706132179065e-06, "loss": 0.606, "step": 14851 }, { "epoch": 0.43, "grad_norm": 5.534939822944119, "learning_rate": 6.431926285637954e-06, "loss": 0.5833, "step": 14852 }, { "epoch": 0.43, "grad_norm": 5.515756059769927, "learning_rate": 6.431481945739315e-06, "loss": 0.4005, "step": 14853 }, { "epoch": 0.43, "grad_norm": 2.9345168326228697, "learning_rate": 6.431037593525815e-06, "loss": 0.229, "step": 14854 }, { "epoch": 0.43, "grad_norm": 4.901168621606676, "learning_rate": 6.430593229001274e-06, "loss": 0.2418, "step": 14855 }, { "epoch": 0.43, "grad_norm": 8.765252770121066, "learning_rate": 6.430148852169515e-06, "loss": 0.574, "step": 14856 }, { "epoch": 0.43, "grad_norm": 7.097477542724769, "learning_rate": 6.429704463034363e-06, "loss": 0.6364, "step": 14857 }, { "epoch": 0.43, "grad_norm": 5.291761107115155, "learning_rate": 6.429260061599638e-06, "loss": 0.311, "step": 14858 }, { "epoch": 0.43, "grad_norm": 3.770681271024876, "learning_rate": 6.428815647869166e-06, "loss": 0.7134, "step": 14859 }, { "epoch": 0.43, "grad_norm": 5.190892913904295, "learning_rate": 6.428371221846768e-06, "loss": 0.2487, "step": 14860 }, { "epoch": 0.43, "grad_norm": 6.741480943913876, "learning_rate": 6.427926783536267e-06, "loss": 0.717, "step": 14861 }, { "epoch": 0.43, "grad_norm": 4.6341480305011205, "learning_rate": 6.4274823329414896e-06, "loss": 0.4163, "step": 14862 }, { "epoch": 0.43, "grad_norm": 5.956797750930915, "learning_rate": 6.427037870066256e-06, "loss": 0.4304, "step": 14863 }, { "epoch": 0.43, "grad_norm": 5.086708373731629, "learning_rate": 6.426593394914393e-06, "loss": 0.3522, "step": 14864 }, { "epoch": 0.43, "grad_norm": 5.750426566845593, "learning_rate": 6.426148907489722e-06, "loss": 0.4771, "step": 14865 }, { "epoch": 0.43, "grad_norm": 5.412039110325752, "learning_rate": 6.4257044077960676e-06, "loss": 0.5427, "step": 14866 }, { "epoch": 0.43, "grad_norm": 7.4407949762263215, "learning_rate": 6.425259895837252e-06, "loss": 0.4491, "step": 14867 }, { "epoch": 0.43, "grad_norm": 4.763428729156286, "learning_rate": 6.424815371617103e-06, "loss": 0.4198, "step": 14868 }, { "epoch": 0.43, "grad_norm": 6.255001413023473, "learning_rate": 6.424370835139442e-06, "loss": 0.5708, "step": 14869 }, { "epoch": 0.43, "grad_norm": 4.831415925131449, "learning_rate": 6.423926286408094e-06, "loss": 0.3808, "step": 14870 }, { "epoch": 0.43, "grad_norm": 15.680776910802832, "learning_rate": 6.423481725426885e-06, "loss": 0.8352, "step": 14871 }, { "epoch": 0.43, "grad_norm": 4.612686571105745, "learning_rate": 6.4230371521996375e-06, "loss": 0.2387, "step": 14872 }, { "epoch": 0.43, "grad_norm": 3.9873216989767686, "learning_rate": 6.422592566730175e-06, "loss": 0.5411, "step": 14873 }, { "epoch": 0.43, "grad_norm": 5.593727793729381, "learning_rate": 6.422147969022326e-06, "loss": 0.4286, "step": 14874 }, { "epoch": 0.43, "grad_norm": 8.878702815050096, "learning_rate": 6.4217033590799115e-06, "loss": 0.8086, "step": 14875 }, { "epoch": 0.43, "grad_norm": 3.584175998647805, "learning_rate": 6.421258736906758e-06, "loss": 0.4046, "step": 14876 }, { "epoch": 0.43, "grad_norm": 5.380612392953572, "learning_rate": 6.42081410250669e-06, "loss": 0.197, "step": 14877 }, { "epoch": 0.43, "grad_norm": 2.775810236823784, "learning_rate": 6.420369455883535e-06, "loss": 0.2542, "step": 14878 }, { "epoch": 0.43, "grad_norm": 7.4825705182658675, "learning_rate": 6.419924797041115e-06, "loss": 0.8121, "step": 14879 }, { "epoch": 0.43, "grad_norm": 2.5400553185435184, "learning_rate": 6.419480125983257e-06, "loss": 0.1704, "step": 14880 }, { "epoch": 0.43, "grad_norm": 3.1105928575948694, "learning_rate": 6.419035442713788e-06, "loss": 0.2131, "step": 14881 }, { "epoch": 0.43, "grad_norm": 7.412002550644495, "learning_rate": 6.418590747236531e-06, "loss": 0.9043, "step": 14882 }, { "epoch": 0.43, "grad_norm": 10.821944307198418, "learning_rate": 6.418146039555311e-06, "loss": 0.5857, "step": 14883 }, { "epoch": 0.43, "grad_norm": 4.112446032694675, "learning_rate": 6.417701319673957e-06, "loss": 0.6389, "step": 14884 }, { "epoch": 0.43, "grad_norm": 4.834943382618101, "learning_rate": 6.417256587596291e-06, "loss": 0.7008, "step": 14885 }, { "epoch": 0.43, "grad_norm": 3.343011988542737, "learning_rate": 6.416811843326142e-06, "loss": 0.2634, "step": 14886 }, { "epoch": 0.43, "grad_norm": 4.832460302672366, "learning_rate": 6.416367086867337e-06, "loss": 0.2585, "step": 14887 }, { "epoch": 0.43, "grad_norm": 8.71286265905972, "learning_rate": 6.4159223182236986e-06, "loss": 0.8085, "step": 14888 }, { "epoch": 0.43, "grad_norm": 9.918206878002986, "learning_rate": 6.415477537399054e-06, "loss": 0.8507, "step": 14889 }, { "epoch": 0.43, "grad_norm": 5.737741090450572, "learning_rate": 6.415032744397232e-06, "loss": 0.2446, "step": 14890 }, { "epoch": 0.43, "grad_norm": 7.006179296049999, "learning_rate": 6.414587939222058e-06, "loss": 0.746, "step": 14891 }, { "epoch": 0.43, "grad_norm": 7.3064983550871085, "learning_rate": 6.414143121877356e-06, "loss": 0.4084, "step": 14892 }, { "epoch": 0.43, "grad_norm": 10.7266730561918, "learning_rate": 6.413698292366957e-06, "loss": 0.5711, "step": 14893 }, { "epoch": 0.43, "grad_norm": 4.217670754739243, "learning_rate": 6.413253450694684e-06, "loss": 0.3353, "step": 14894 }, { "epoch": 0.43, "grad_norm": 5.789904781540279, "learning_rate": 6.412808596864366e-06, "loss": 0.3791, "step": 14895 }, { "epoch": 0.43, "grad_norm": 2.933852792571712, "learning_rate": 6.412363730879829e-06, "loss": 0.4743, "step": 14896 }, { "epoch": 0.43, "grad_norm": 4.300522965411473, "learning_rate": 6.411918852744902e-06, "loss": 0.2949, "step": 14897 }, { "epoch": 0.43, "grad_norm": 4.751668335909747, "learning_rate": 6.411473962463411e-06, "loss": 0.7434, "step": 14898 }, { "epoch": 0.43, "grad_norm": 7.181120836309815, "learning_rate": 6.411029060039182e-06, "loss": 0.7652, "step": 14899 }, { "epoch": 0.43, "grad_norm": 2.9284005351995805, "learning_rate": 6.410584145476044e-06, "loss": 0.2448, "step": 14900 }, { "epoch": 0.43, "grad_norm": 5.097627579765521, "learning_rate": 6.4101392187778266e-06, "loss": 0.4911, "step": 14901 }, { "epoch": 0.43, "grad_norm": 7.698881547376598, "learning_rate": 6.4096942799483515e-06, "loss": 0.5447, "step": 14902 }, { "epoch": 0.43, "grad_norm": 7.197278113593727, "learning_rate": 6.409249328991452e-06, "loss": 0.5548, "step": 14903 }, { "epoch": 0.43, "grad_norm": 7.922755326032526, "learning_rate": 6.408804365910954e-06, "loss": 0.7606, "step": 14904 }, { "epoch": 0.43, "grad_norm": 5.789190787722759, "learning_rate": 6.408359390710686e-06, "loss": 0.5315, "step": 14905 }, { "epoch": 0.43, "grad_norm": 6.287018096536155, "learning_rate": 6.407914403394475e-06, "loss": 0.9451, "step": 14906 }, { "epoch": 0.43, "grad_norm": 6.7285347593614, "learning_rate": 6.407469403966149e-06, "loss": 0.7733, "step": 14907 }, { "epoch": 0.43, "grad_norm": 4.641740170461274, "learning_rate": 6.407024392429538e-06, "loss": 0.5942, "step": 14908 }, { "epoch": 0.43, "grad_norm": 5.230596425419064, "learning_rate": 6.406579368788469e-06, "loss": 0.4017, "step": 14909 }, { "epoch": 0.43, "grad_norm": 9.119935968994094, "learning_rate": 6.406134333046769e-06, "loss": 0.9739, "step": 14910 }, { "epoch": 0.43, "grad_norm": 8.10842307793513, "learning_rate": 6.405689285208272e-06, "loss": 0.5965, "step": 14911 }, { "epoch": 0.43, "grad_norm": 2.2102600650400697, "learning_rate": 6.405244225276802e-06, "loss": 0.3348, "step": 14912 }, { "epoch": 0.43, "grad_norm": 5.83074573164088, "learning_rate": 6.404799153256188e-06, "loss": 0.7097, "step": 14913 }, { "epoch": 0.43, "grad_norm": 3.4686997212383166, "learning_rate": 6.404354069150261e-06, "loss": 0.3216, "step": 14914 }, { "epoch": 0.43, "grad_norm": 7.030939392010918, "learning_rate": 6.403908972962849e-06, "loss": 0.3486, "step": 14915 }, { "epoch": 0.43, "grad_norm": 7.885681205017686, "learning_rate": 6.403463864697779e-06, "loss": 0.5619, "step": 14916 }, { "epoch": 0.43, "grad_norm": 2.8553572396815396, "learning_rate": 6.403018744358883e-06, "loss": 0.3318, "step": 14917 }, { "epoch": 0.43, "grad_norm": 5.336395065326724, "learning_rate": 6.40257361194999e-06, "loss": 0.5398, "step": 14918 }, { "epoch": 0.43, "grad_norm": 2.656641493725505, "learning_rate": 6.402128467474928e-06, "loss": 0.4994, "step": 14919 }, { "epoch": 0.43, "grad_norm": 3.269567546392345, "learning_rate": 6.401683310937529e-06, "loss": 0.4052, "step": 14920 }, { "epoch": 0.43, "grad_norm": 4.5168015422366405, "learning_rate": 6.4012381423416206e-06, "loss": 0.622, "step": 14921 }, { "epoch": 0.43, "grad_norm": 4.185532947682659, "learning_rate": 6.400792961691032e-06, "loss": 0.6364, "step": 14922 }, { "epoch": 0.43, "grad_norm": 4.383434720673839, "learning_rate": 6.4003477689895945e-06, "loss": 0.582, "step": 14923 }, { "epoch": 0.43, "grad_norm": 8.963360273501122, "learning_rate": 6.399902564241138e-06, "loss": 0.3744, "step": 14924 }, { "epoch": 0.43, "grad_norm": 7.69662009036627, "learning_rate": 6.399457347449492e-06, "loss": 0.4893, "step": 14925 }, { "epoch": 0.43, "grad_norm": 11.071497388704952, "learning_rate": 6.399012118618486e-06, "loss": 0.4877, "step": 14926 }, { "epoch": 0.43, "grad_norm": 4.39300532318147, "learning_rate": 6.39856687775195e-06, "loss": 0.6407, "step": 14927 }, { "epoch": 0.43, "grad_norm": 4.381326841804244, "learning_rate": 6.398121624853719e-06, "loss": 0.2999, "step": 14928 }, { "epoch": 0.43, "grad_norm": 6.372367745542703, "learning_rate": 6.397676359927617e-06, "loss": 0.5956, "step": 14929 }, { "epoch": 0.43, "grad_norm": 3.8057505788897257, "learning_rate": 6.397231082977478e-06, "loss": 0.4162, "step": 14930 }, { "epoch": 0.43, "grad_norm": 5.573040608486568, "learning_rate": 6.396785794007133e-06, "loss": 0.7822, "step": 14931 }, { "epoch": 0.43, "grad_norm": 5.4069225796969675, "learning_rate": 6.396340493020411e-06, "loss": 0.5903, "step": 14932 }, { "epoch": 0.43, "grad_norm": 5.966336546277562, "learning_rate": 6.3958951800211436e-06, "loss": 0.6768, "step": 14933 }, { "epoch": 0.43, "grad_norm": 3.3733015378451228, "learning_rate": 6.39544985501316e-06, "loss": 0.3303, "step": 14934 }, { "epoch": 0.43, "grad_norm": 3.0406347684321067, "learning_rate": 6.395004518000296e-06, "loss": 0.244, "step": 14935 }, { "epoch": 0.43, "grad_norm": 7.18950585281449, "learning_rate": 6.394559168986378e-06, "loss": 0.7068, "step": 14936 }, { "epoch": 0.43, "grad_norm": 5.589104113638501, "learning_rate": 6.39411380797524e-06, "loss": 0.8006, "step": 14937 }, { "epoch": 0.43, "grad_norm": 6.707751013871417, "learning_rate": 6.393668434970713e-06, "loss": 0.6482, "step": 14938 }, { "epoch": 0.43, "grad_norm": 5.887254879135254, "learning_rate": 6.393223049976628e-06, "loss": 0.5715, "step": 14939 }, { "epoch": 0.43, "grad_norm": 4.997203569422774, "learning_rate": 6.392777652996815e-06, "loss": 0.5171, "step": 14940 }, { "epoch": 0.43, "grad_norm": 16.3438511803378, "learning_rate": 6.392332244035109e-06, "loss": 0.7554, "step": 14941 }, { "epoch": 0.43, "grad_norm": 2.4509974808985424, "learning_rate": 6.39188682309534e-06, "loss": 0.1424, "step": 14942 }, { "epoch": 0.43, "grad_norm": 3.6063576508690103, "learning_rate": 6.391441390181339e-06, "loss": 0.4372, "step": 14943 }, { "epoch": 0.43, "grad_norm": 8.474558369205447, "learning_rate": 6.39099594529694e-06, "loss": 0.9138, "step": 14944 }, { "epoch": 0.43, "grad_norm": 14.530769848838098, "learning_rate": 6.390550488445973e-06, "loss": 0.6838, "step": 14945 }, { "epoch": 0.43, "grad_norm": 5.137258569469578, "learning_rate": 6.390105019632273e-06, "loss": 0.4918, "step": 14946 }, { "epoch": 0.43, "grad_norm": 3.511271167106016, "learning_rate": 6.389659538859669e-06, "loss": 0.1252, "step": 14947 }, { "epoch": 0.43, "grad_norm": 4.254807361899524, "learning_rate": 6.3892140461319975e-06, "loss": 0.2337, "step": 14948 }, { "epoch": 0.43, "grad_norm": 8.384590692692422, "learning_rate": 6.388768541453086e-06, "loss": 0.7987, "step": 14949 }, { "epoch": 0.43, "grad_norm": 10.163555628154537, "learning_rate": 6.388323024826771e-06, "loss": 0.5143, "step": 14950 }, { "epoch": 0.43, "grad_norm": 6.284523409888846, "learning_rate": 6.387877496256884e-06, "loss": 0.4994, "step": 14951 }, { "epoch": 0.43, "grad_norm": 6.511910676559136, "learning_rate": 6.3874319557472576e-06, "loss": 0.5121, "step": 14952 }, { "epoch": 0.43, "grad_norm": 7.679453911241211, "learning_rate": 6.386986403301725e-06, "loss": 0.8458, "step": 14953 }, { "epoch": 0.43, "grad_norm": 3.948538309372041, "learning_rate": 6.386540838924118e-06, "loss": 0.4572, "step": 14954 }, { "epoch": 0.43, "grad_norm": 7.2929750267782305, "learning_rate": 6.386095262618273e-06, "loss": 0.8216, "step": 14955 }, { "epoch": 0.43, "grad_norm": 3.356333184942907, "learning_rate": 6.385649674388019e-06, "loss": 0.3341, "step": 14956 }, { "epoch": 0.43, "grad_norm": 5.3249307493623, "learning_rate": 6.3852040742371926e-06, "loss": 0.3686, "step": 14957 }, { "epoch": 0.43, "grad_norm": 12.415139153415456, "learning_rate": 6.3847584621696265e-06, "loss": 0.7276, "step": 14958 }, { "epoch": 0.43, "grad_norm": 5.719289535335789, "learning_rate": 6.384312838189153e-06, "loss": 0.4011, "step": 14959 }, { "epoch": 0.43, "grad_norm": 6.59574621275162, "learning_rate": 6.383867202299606e-06, "loss": 0.1656, "step": 14960 }, { "epoch": 0.43, "grad_norm": 7.326898988760791, "learning_rate": 6.383421554504822e-06, "loss": 0.4142, "step": 14961 }, { "epoch": 0.43, "grad_norm": 6.718270076200604, "learning_rate": 6.382975894808631e-06, "loss": 0.3763, "step": 14962 }, { "epoch": 0.43, "grad_norm": 5.639974329396259, "learning_rate": 6.382530223214869e-06, "loss": 0.5077, "step": 14963 }, { "epoch": 0.43, "grad_norm": 8.415866414037943, "learning_rate": 6.3820845397273686e-06, "loss": 0.7574, "step": 14964 }, { "epoch": 0.43, "grad_norm": 6.022869553365767, "learning_rate": 6.381638844349968e-06, "loss": 0.5328, "step": 14965 }, { "epoch": 0.43, "grad_norm": 5.813595658269006, "learning_rate": 6.3811931370864964e-06, "loss": 0.5188, "step": 14966 }, { "epoch": 0.43, "grad_norm": 5.327424615821603, "learning_rate": 6.38074741794079e-06, "loss": 0.4415, "step": 14967 }, { "epoch": 0.43, "grad_norm": 7.69584965259231, "learning_rate": 6.380301686916685e-06, "loss": 0.5748, "step": 14968 }, { "epoch": 0.43, "grad_norm": 8.36093917900843, "learning_rate": 6.379855944018013e-06, "loss": 0.2386, "step": 14969 }, { "epoch": 0.43, "grad_norm": 7.194320162956101, "learning_rate": 6.379410189248612e-06, "loss": 0.3563, "step": 14970 }, { "epoch": 0.43, "grad_norm": 6.237349219019149, "learning_rate": 6.378964422612313e-06, "loss": 0.4106, "step": 14971 }, { "epoch": 0.43, "grad_norm": 2.986630451275171, "learning_rate": 6.378518644112955e-06, "loss": 0.492, "step": 14972 }, { "epoch": 0.43, "grad_norm": 8.854036852315069, "learning_rate": 6.378072853754369e-06, "loss": 1.1149, "step": 14973 }, { "epoch": 0.43, "grad_norm": 14.568367721405892, "learning_rate": 6.3776270515403915e-06, "loss": 0.6188, "step": 14974 }, { "epoch": 0.43, "grad_norm": 2.1343991033888194, "learning_rate": 6.377181237474861e-06, "loss": 0.4868, "step": 14975 }, { "epoch": 0.43, "grad_norm": 5.21201753647636, "learning_rate": 6.376735411561607e-06, "loss": 0.6097, "step": 14976 }, { "epoch": 0.43, "grad_norm": 5.1300336727353395, "learning_rate": 6.376289573804468e-06, "loss": 0.3818, "step": 14977 }, { "epoch": 0.43, "grad_norm": 5.532070379249458, "learning_rate": 6.3758437242072815e-06, "loss": 0.251, "step": 14978 }, { "epoch": 0.43, "grad_norm": 6.404466520332876, "learning_rate": 6.375397862773879e-06, "loss": 0.9552, "step": 14979 }, { "epoch": 0.43, "grad_norm": 7.977098347957013, "learning_rate": 6.374951989508098e-06, "loss": 0.8238, "step": 14980 }, { "epoch": 0.43, "grad_norm": 7.185827442113541, "learning_rate": 6.3745061044137746e-06, "loss": 0.7113, "step": 14981 }, { "epoch": 0.43, "grad_norm": 7.946073572547445, "learning_rate": 6.374060207494746e-06, "loss": 0.735, "step": 14982 }, { "epoch": 0.43, "grad_norm": 4.192081791600331, "learning_rate": 6.373614298754845e-06, "loss": 0.2492, "step": 14983 }, { "epoch": 0.43, "grad_norm": 7.377683183754306, "learning_rate": 6.373168378197907e-06, "loss": 0.3436, "step": 14984 }, { "epoch": 0.43, "grad_norm": 6.9125237600735625, "learning_rate": 6.372722445827775e-06, "loss": 0.5784, "step": 14985 }, { "epoch": 0.43, "grad_norm": 3.5959359196733773, "learning_rate": 6.372276501648278e-06, "loss": 0.2927, "step": 14986 }, { "epoch": 0.43, "grad_norm": 2.2598435331612343, "learning_rate": 6.3718305456632555e-06, "loss": 0.3219, "step": 14987 }, { "epoch": 0.43, "grad_norm": 8.407824003774973, "learning_rate": 6.371384577876545e-06, "loss": 0.5061, "step": 14988 }, { "epoch": 0.43, "grad_norm": 7.094298488569668, "learning_rate": 6.3709385982919815e-06, "loss": 0.4199, "step": 14989 }, { "epoch": 0.43, "grad_norm": 5.523127783221763, "learning_rate": 6.370492606913401e-06, "loss": 0.326, "step": 14990 }, { "epoch": 0.43, "grad_norm": 8.564426734387789, "learning_rate": 6.370046603744642e-06, "loss": 0.5716, "step": 14991 }, { "epoch": 0.43, "grad_norm": 5.957595428265984, "learning_rate": 6.3696005887895406e-06, "loss": 0.6087, "step": 14992 }, { "epoch": 0.43, "grad_norm": 3.6378600135743313, "learning_rate": 6.369154562051933e-06, "loss": 0.2456, "step": 14993 }, { "epoch": 0.43, "grad_norm": 6.947460069956863, "learning_rate": 6.368708523535658e-06, "loss": 0.3888, "step": 14994 }, { "epoch": 0.43, "grad_norm": 3.430337831192199, "learning_rate": 6.368262473244554e-06, "loss": 0.35, "step": 14995 }, { "epoch": 0.43, "grad_norm": 8.785870514121976, "learning_rate": 6.367816411182455e-06, "loss": 0.6213, "step": 14996 }, { "epoch": 0.43, "grad_norm": 6.294178917976145, "learning_rate": 6.367370337353199e-06, "loss": 0.3325, "step": 14997 }, { "epoch": 0.43, "grad_norm": 4.87866034395806, "learning_rate": 6.3669242517606245e-06, "loss": 0.5773, "step": 14998 }, { "epoch": 0.43, "grad_norm": 6.676708677099076, "learning_rate": 6.366478154408571e-06, "loss": 0.7894, "step": 14999 }, { "epoch": 0.43, "grad_norm": 5.580841663797347, "learning_rate": 6.366032045300872e-06, "loss": 0.614, "step": 15000 }, { "epoch": 0.43, "grad_norm": 4.22719469119597, "learning_rate": 6.365585924441367e-06, "loss": 0.5169, "step": 15001 }, { "epoch": 0.43, "grad_norm": 5.445514428924955, "learning_rate": 6.365139791833897e-06, "loss": 0.3514, "step": 15002 }, { "epoch": 0.43, "grad_norm": 3.937421404341762, "learning_rate": 6.364693647482295e-06, "loss": 0.4351, "step": 15003 }, { "epoch": 0.43, "grad_norm": 8.31860838439224, "learning_rate": 6.364247491390403e-06, "loss": 0.4344, "step": 15004 }, { "epoch": 0.43, "grad_norm": 5.840804882759464, "learning_rate": 6.363801323562058e-06, "loss": 0.9144, "step": 15005 }, { "epoch": 0.43, "grad_norm": 7.617746937088549, "learning_rate": 6.363355144001098e-06, "loss": 0.9094, "step": 15006 }, { "epoch": 0.43, "grad_norm": 2.8580543511670404, "learning_rate": 6.362908952711359e-06, "loss": 0.5191, "step": 15007 }, { "epoch": 0.43, "grad_norm": 2.626419909508265, "learning_rate": 6.362462749696685e-06, "loss": 0.4628, "step": 15008 }, { "epoch": 0.43, "grad_norm": 3.8986677510794485, "learning_rate": 6.362016534960911e-06, "loss": 0.4561, "step": 15009 }, { "epoch": 0.43, "grad_norm": 4.694547721032742, "learning_rate": 6.361570308507876e-06, "loss": 0.5311, "step": 15010 }, { "epoch": 0.43, "grad_norm": 10.625141546765105, "learning_rate": 6.36112407034142e-06, "loss": 0.8156, "step": 15011 }, { "epoch": 0.43, "grad_norm": 4.265579782323151, "learning_rate": 6.3606778204653814e-06, "loss": 0.5204, "step": 15012 }, { "epoch": 0.43, "grad_norm": 11.511114637291692, "learning_rate": 6.3602315588835985e-06, "loss": 0.5071, "step": 15013 }, { "epoch": 0.43, "grad_norm": 6.607871892467773, "learning_rate": 6.359785285599911e-06, "loss": 0.4011, "step": 15014 }, { "epoch": 0.43, "grad_norm": 6.138345115575048, "learning_rate": 6.359339000618159e-06, "loss": 0.479, "step": 15015 }, { "epoch": 0.43, "grad_norm": 6.9448833869388125, "learning_rate": 6.358892703942179e-06, "loss": 0.829, "step": 15016 }, { "epoch": 0.43, "grad_norm": 5.960392838339013, "learning_rate": 6.358446395575813e-06, "loss": 0.3378, "step": 15017 }, { "epoch": 0.43, "grad_norm": 6.175981049077454, "learning_rate": 6.358000075522901e-06, "loss": 0.5612, "step": 15018 }, { "epoch": 0.43, "grad_norm": 3.2946395888775704, "learning_rate": 6.35755374378728e-06, "loss": 0.3153, "step": 15019 }, { "epoch": 0.43, "grad_norm": 6.197642904267488, "learning_rate": 6.357107400372791e-06, "loss": 0.474, "step": 15020 }, { "epoch": 0.43, "grad_norm": 5.1287908722682785, "learning_rate": 6.3566610452832754e-06, "loss": 0.8145, "step": 15021 }, { "epoch": 0.43, "grad_norm": 4.08252370683803, "learning_rate": 6.356214678522573e-06, "loss": 0.7166, "step": 15022 }, { "epoch": 0.43, "grad_norm": 7.292607760779108, "learning_rate": 6.355768300094521e-06, "loss": 0.6785, "step": 15023 }, { "epoch": 0.43, "grad_norm": 8.220411937629185, "learning_rate": 6.355321910002962e-06, "loss": 0.2891, "step": 15024 }, { "epoch": 0.43, "grad_norm": 6.625901988543819, "learning_rate": 6.354875508251734e-06, "loss": 0.5168, "step": 15025 }, { "epoch": 0.43, "grad_norm": 2.9189460067773725, "learning_rate": 6.354429094844681e-06, "loss": 0.3395, "step": 15026 }, { "epoch": 0.43, "grad_norm": 5.5246132728246256, "learning_rate": 6.35398266978564e-06, "loss": 0.4789, "step": 15027 }, { "epoch": 0.43, "grad_norm": 7.214497721604084, "learning_rate": 6.353536233078452e-06, "loss": 0.6639, "step": 15028 }, { "epoch": 0.43, "grad_norm": 3.9456384807412572, "learning_rate": 6.3530897847269605e-06, "loss": 0.2547, "step": 15029 }, { "epoch": 0.43, "grad_norm": 9.320548656506267, "learning_rate": 6.352643324735003e-06, "loss": 0.4999, "step": 15030 }, { "epoch": 0.43, "grad_norm": 4.755096988551251, "learning_rate": 6.3521968531064215e-06, "loss": 0.8021, "step": 15031 }, { "epoch": 0.43, "grad_norm": 7.4614700526762725, "learning_rate": 6.351750369845057e-06, "loss": 0.5755, "step": 15032 }, { "epoch": 0.43, "grad_norm": 1.903289313183785, "learning_rate": 6.351303874954751e-06, "loss": 0.151, "step": 15033 }, { "epoch": 0.43, "grad_norm": 13.562297907435498, "learning_rate": 6.350857368439342e-06, "loss": 0.7492, "step": 15034 }, { "epoch": 0.43, "grad_norm": 4.483659959733955, "learning_rate": 6.3504108503026775e-06, "loss": 0.2118, "step": 15035 }, { "epoch": 0.43, "grad_norm": 6.789167080722302, "learning_rate": 6.349964320548591e-06, "loss": 0.6485, "step": 15036 }, { "epoch": 0.43, "grad_norm": 6.843515470483791, "learning_rate": 6.349517779180929e-06, "loss": 0.2425, "step": 15037 }, { "epoch": 0.43, "grad_norm": 7.406425828094013, "learning_rate": 6.349071226203531e-06, "loss": 0.4697, "step": 15038 }, { "epoch": 0.43, "grad_norm": 14.79126476046289, "learning_rate": 6.348624661620242e-06, "loss": 0.7915, "step": 15039 }, { "epoch": 0.43, "grad_norm": 4.6113902971987475, "learning_rate": 6.348178085434899e-06, "loss": 0.3502, "step": 15040 }, { "epoch": 0.43, "grad_norm": 5.480147533832366, "learning_rate": 6.347731497651346e-06, "loss": 0.56, "step": 15041 }, { "epoch": 0.43, "grad_norm": 5.4472501747018915, "learning_rate": 6.347284898273425e-06, "loss": 0.4201, "step": 15042 }, { "epoch": 0.43, "grad_norm": 4.570795173914611, "learning_rate": 6.346838287304977e-06, "loss": 0.4623, "step": 15043 }, { "epoch": 0.43, "grad_norm": 4.158636970875901, "learning_rate": 6.346391664749845e-06, "loss": 0.4217, "step": 15044 }, { "epoch": 0.43, "grad_norm": 6.548831328753317, "learning_rate": 6.345945030611874e-06, "loss": 0.4222, "step": 15045 }, { "epoch": 0.43, "grad_norm": 7.318652963361504, "learning_rate": 6.345498384894901e-06, "loss": 0.3733, "step": 15046 }, { "epoch": 0.43, "grad_norm": 5.022479379743569, "learning_rate": 6.345051727602773e-06, "loss": 0.5439, "step": 15047 }, { "epoch": 0.43, "grad_norm": 9.985748339839946, "learning_rate": 6.344605058739328e-06, "loss": 0.2955, "step": 15048 }, { "epoch": 0.43, "grad_norm": 6.02099294571906, "learning_rate": 6.344158378308414e-06, "loss": 0.3345, "step": 15049 }, { "epoch": 0.43, "grad_norm": 4.263214044808424, "learning_rate": 6.343711686313868e-06, "loss": 0.2546, "step": 15050 }, { "epoch": 0.43, "grad_norm": 8.746010470281336, "learning_rate": 6.343264982759537e-06, "loss": 0.4652, "step": 15051 }, { "epoch": 0.43, "grad_norm": 5.5556335931171414, "learning_rate": 6.342818267649264e-06, "loss": 0.346, "step": 15052 }, { "epoch": 0.43, "grad_norm": 10.55922363750784, "learning_rate": 6.34237154098689e-06, "loss": 1.1414, "step": 15053 }, { "epoch": 0.43, "grad_norm": 6.986340547016843, "learning_rate": 6.341924802776259e-06, "loss": 0.4637, "step": 15054 }, { "epoch": 0.43, "grad_norm": 5.999303737614151, "learning_rate": 6.341478053021214e-06, "loss": 0.708, "step": 15055 }, { "epoch": 0.43, "grad_norm": 3.5615476037919143, "learning_rate": 6.341031291725599e-06, "loss": 0.4077, "step": 15056 }, { "epoch": 0.43, "grad_norm": 8.198511537508749, "learning_rate": 6.340584518893255e-06, "loss": 0.6732, "step": 15057 }, { "epoch": 0.43, "grad_norm": 3.7699532860611487, "learning_rate": 6.340137734528029e-06, "loss": 0.2912, "step": 15058 }, { "epoch": 0.43, "grad_norm": 6.300242921899041, "learning_rate": 6.339690938633763e-06, "loss": 0.5281, "step": 15059 }, { "epoch": 0.43, "grad_norm": 4.470493610050697, "learning_rate": 6.339244131214301e-06, "loss": 0.2553, "step": 15060 }, { "epoch": 0.43, "grad_norm": 4.133555844813934, "learning_rate": 6.3387973122734855e-06, "loss": 0.3579, "step": 15061 }, { "epoch": 0.43, "grad_norm": 3.671728317902159, "learning_rate": 6.338350481815165e-06, "loss": 0.3749, "step": 15062 }, { "epoch": 0.43, "grad_norm": 3.0676055506708373, "learning_rate": 6.337903639843177e-06, "loss": 0.3736, "step": 15063 }, { "epoch": 0.43, "grad_norm": 9.302683732213088, "learning_rate": 6.337456786361369e-06, "loss": 0.9554, "step": 15064 }, { "epoch": 0.43, "grad_norm": 3.4294628165298624, "learning_rate": 6.337009921373585e-06, "loss": 0.1839, "step": 15065 }, { "epoch": 0.43, "grad_norm": 6.373806710852212, "learning_rate": 6.336563044883668e-06, "loss": 0.6798, "step": 15066 }, { "epoch": 0.43, "grad_norm": 3.425528574621544, "learning_rate": 6.336116156895465e-06, "loss": 0.5293, "step": 15067 }, { "epoch": 0.43, "grad_norm": 4.782747620046254, "learning_rate": 6.33566925741282e-06, "loss": 0.5921, "step": 15068 }, { "epoch": 0.43, "grad_norm": 3.8960683347164893, "learning_rate": 6.3352223464395765e-06, "loss": 0.5074, "step": 15069 }, { "epoch": 0.43, "grad_norm": 10.090449782782116, "learning_rate": 6.3347754239795786e-06, "loss": 0.6014, "step": 15070 }, { "epoch": 0.43, "grad_norm": 4.390798354884059, "learning_rate": 6.3343284900366734e-06, "loss": 0.4049, "step": 15071 }, { "epoch": 0.43, "grad_norm": 6.8950076189891165, "learning_rate": 6.333881544614703e-06, "loss": 0.5108, "step": 15072 }, { "epoch": 0.43, "grad_norm": 6.799030321769806, "learning_rate": 6.333434587717515e-06, "loss": 0.7549, "step": 15073 }, { "epoch": 0.43, "grad_norm": 4.641953236495966, "learning_rate": 6.3329876193489524e-06, "loss": 0.7089, "step": 15074 }, { "epoch": 0.43, "grad_norm": 5.459471935684652, "learning_rate": 6.332540639512863e-06, "loss": 0.7331, "step": 15075 }, { "epoch": 0.43, "grad_norm": 6.290578756527219, "learning_rate": 6.332093648213089e-06, "loss": 0.9793, "step": 15076 }, { "epoch": 0.43, "grad_norm": 13.775515515880757, "learning_rate": 6.331646645453478e-06, "loss": 0.5818, "step": 15077 }, { "epoch": 0.43, "grad_norm": 5.020883186641553, "learning_rate": 6.3311996312378745e-06, "loss": 0.1314, "step": 15078 }, { "epoch": 0.43, "grad_norm": 20.636987450818182, "learning_rate": 6.330752605570126e-06, "loss": 0.3735, "step": 15079 }, { "epoch": 0.43, "grad_norm": 13.967672319252245, "learning_rate": 6.330305568454076e-06, "loss": 0.5819, "step": 15080 }, { "epoch": 0.43, "grad_norm": 8.11339497383209, "learning_rate": 6.32985851989357e-06, "loss": 0.4612, "step": 15081 }, { "epoch": 0.43, "grad_norm": 8.963546093871852, "learning_rate": 6.329411459892455e-06, "loss": 0.984, "step": 15082 }, { "epoch": 0.43, "grad_norm": 6.803946138239411, "learning_rate": 6.328964388454577e-06, "loss": 0.5332, "step": 15083 }, { "epoch": 0.43, "grad_norm": 3.853127554268466, "learning_rate": 6.328517305583782e-06, "loss": 0.4015, "step": 15084 }, { "epoch": 0.43, "grad_norm": 4.8644155137101315, "learning_rate": 6.328070211283916e-06, "loss": 0.3683, "step": 15085 }, { "epoch": 0.43, "grad_norm": 6.945877996193173, "learning_rate": 6.327623105558826e-06, "loss": 0.7116, "step": 15086 }, { "epoch": 0.43, "grad_norm": 7.074543364653254, "learning_rate": 6.327175988412358e-06, "loss": 0.7212, "step": 15087 }, { "epoch": 0.43, "grad_norm": 13.436566271610715, "learning_rate": 6.326728859848359e-06, "loss": 0.7346, "step": 15088 }, { "epoch": 0.43, "grad_norm": 6.394570249742671, "learning_rate": 6.326281719870675e-06, "loss": 0.8346, "step": 15089 }, { "epoch": 0.43, "grad_norm": 3.413784565731395, "learning_rate": 6.3258345684831515e-06, "loss": 0.2782, "step": 15090 }, { "epoch": 0.43, "grad_norm": 5.2672118108024835, "learning_rate": 6.325387405689636e-06, "loss": 0.5488, "step": 15091 }, { "epoch": 0.43, "grad_norm": 5.160613000076644, "learning_rate": 6.324940231493977e-06, "loss": 0.4778, "step": 15092 }, { "epoch": 0.43, "grad_norm": 4.866336288843374, "learning_rate": 6.32449304590002e-06, "loss": 0.3946, "step": 15093 }, { "epoch": 0.43, "grad_norm": 6.173258836343406, "learning_rate": 6.324045848911612e-06, "loss": 0.5086, "step": 15094 }, { "epoch": 0.43, "grad_norm": 5.0737468685075635, "learning_rate": 6.3235986405326e-06, "loss": 0.5317, "step": 15095 }, { "epoch": 0.43, "grad_norm": 4.867223822150473, "learning_rate": 6.3231514207668344e-06, "loss": 0.2808, "step": 15096 }, { "epoch": 0.43, "grad_norm": 8.560286744029936, "learning_rate": 6.322704189618158e-06, "loss": 0.3411, "step": 15097 }, { "epoch": 0.43, "grad_norm": 4.2925812786763125, "learning_rate": 6.32225694709042e-06, "loss": 0.1644, "step": 15098 }, { "epoch": 0.43, "grad_norm": 7.961422831497652, "learning_rate": 6.321809693187469e-06, "loss": 0.6019, "step": 15099 }, { "epoch": 0.43, "grad_norm": 5.3249341521870655, "learning_rate": 6.321362427913153e-06, "loss": 0.4638, "step": 15100 }, { "epoch": 0.43, "grad_norm": 5.0473709571368905, "learning_rate": 6.320915151271316e-06, "loss": 0.264, "step": 15101 }, { "epoch": 0.43, "grad_norm": 3.709751011752488, "learning_rate": 6.3204678632658115e-06, "loss": 0.2417, "step": 15102 }, { "epoch": 0.43, "grad_norm": 4.994141006917271, "learning_rate": 6.320020563900484e-06, "loss": 0.1563, "step": 15103 }, { "epoch": 0.43, "grad_norm": 3.22468061270257, "learning_rate": 6.319573253179181e-06, "loss": 0.0898, "step": 15104 }, { "epoch": 0.43, "grad_norm": 2.0559291496518957, "learning_rate": 6.3191259311057516e-06, "loss": 0.1189, "step": 15105 }, { "epoch": 0.43, "grad_norm": 8.891051565439295, "learning_rate": 6.3186785976840455e-06, "loss": 0.6134, "step": 15106 }, { "epoch": 0.43, "grad_norm": 5.35715058099099, "learning_rate": 6.318231252917908e-06, "loss": 0.746, "step": 15107 }, { "epoch": 0.43, "grad_norm": 4.846250712995105, "learning_rate": 6.317783896811191e-06, "loss": 0.4363, "step": 15108 }, { "epoch": 0.43, "grad_norm": 3.3140182974177295, "learning_rate": 6.317336529367741e-06, "loss": 0.3683, "step": 15109 }, { "epoch": 0.43, "grad_norm": 2.5010072109693486, "learning_rate": 6.316889150591408e-06, "loss": 0.297, "step": 15110 }, { "epoch": 0.43, "grad_norm": 6.395781397738089, "learning_rate": 6.316441760486037e-06, "loss": 0.4581, "step": 15111 }, { "epoch": 0.43, "grad_norm": 3.781147947589389, "learning_rate": 6.315994359055482e-06, "loss": 0.3338, "step": 15112 }, { "epoch": 0.43, "grad_norm": 5.057039117711225, "learning_rate": 6.31554694630359e-06, "loss": 0.4732, "step": 15113 }, { "epoch": 0.43, "grad_norm": 4.929205748509791, "learning_rate": 6.315099522234207e-06, "loss": 0.541, "step": 15114 }, { "epoch": 0.43, "grad_norm": 5.616891144852077, "learning_rate": 6.3146520868511876e-06, "loss": 0.416, "step": 15115 }, { "epoch": 0.43, "grad_norm": 4.240079239968536, "learning_rate": 6.314204640158376e-06, "loss": 0.4433, "step": 15116 }, { "epoch": 0.43, "grad_norm": 5.531345948502422, "learning_rate": 6.313757182159624e-06, "loss": 0.7849, "step": 15117 }, { "epoch": 0.43, "grad_norm": 7.11534036059506, "learning_rate": 6.31330971285878e-06, "loss": 0.568, "step": 15118 }, { "epoch": 0.43, "grad_norm": 7.490113927079147, "learning_rate": 6.312862232259698e-06, "loss": 0.6023, "step": 15119 }, { "epoch": 0.43, "grad_norm": 6.962544916032044, "learning_rate": 6.31241474036622e-06, "loss": 0.6083, "step": 15120 }, { "epoch": 0.43, "grad_norm": 2.9945151933955634, "learning_rate": 6.3119672371822005e-06, "loss": 0.3557, "step": 15121 }, { "epoch": 0.43, "grad_norm": 11.040691754948845, "learning_rate": 6.311519722711489e-06, "loss": 0.5878, "step": 15122 }, { "epoch": 0.43, "grad_norm": 7.243852310840519, "learning_rate": 6.311072196957935e-06, "loss": 0.4707, "step": 15123 }, { "epoch": 0.43, "grad_norm": 7.530228224964776, "learning_rate": 6.310624659925386e-06, "loss": 0.3574, "step": 15124 }, { "epoch": 0.43, "grad_norm": 4.891438060121507, "learning_rate": 6.310177111617697e-06, "loss": 0.4379, "step": 15125 }, { "epoch": 0.43, "grad_norm": 3.775991503502896, "learning_rate": 6.309729552038716e-06, "loss": 0.1901, "step": 15126 }, { "epoch": 0.43, "grad_norm": 3.2542777335978017, "learning_rate": 6.309281981192291e-06, "loss": 0.6075, "step": 15127 }, { "epoch": 0.43, "grad_norm": 5.15984427283633, "learning_rate": 6.308834399082275e-06, "loss": 0.3895, "step": 15128 }, { "epoch": 0.43, "grad_norm": 4.9391692273424175, "learning_rate": 6.308386805712517e-06, "loss": 0.3269, "step": 15129 }, { "epoch": 0.43, "grad_norm": 4.246416965735968, "learning_rate": 6.307939201086871e-06, "loss": 0.7509, "step": 15130 }, { "epoch": 0.43, "grad_norm": 3.634271250889714, "learning_rate": 6.307491585209184e-06, "loss": 0.3775, "step": 15131 }, { "epoch": 0.43, "grad_norm": 4.86566206783809, "learning_rate": 6.307043958083305e-06, "loss": 0.3304, "step": 15132 }, { "epoch": 0.43, "grad_norm": 11.950645917729808, "learning_rate": 6.306596319713092e-06, "loss": 0.5026, "step": 15133 }, { "epoch": 0.43, "grad_norm": 3.418518597653663, "learning_rate": 6.306148670102389e-06, "loss": 0.394, "step": 15134 }, { "epoch": 0.43, "grad_norm": 4.8984744908641185, "learning_rate": 6.3057010092550494e-06, "loss": 0.6164, "step": 15135 }, { "epoch": 0.43, "grad_norm": 5.1479180729533605, "learning_rate": 6.3052533371749284e-06, "loss": 0.2057, "step": 15136 }, { "epoch": 0.43, "grad_norm": 7.980474187295474, "learning_rate": 6.304805653865871e-06, "loss": 0.7951, "step": 15137 }, { "epoch": 0.43, "grad_norm": 5.563399360044748, "learning_rate": 6.304357959331731e-06, "loss": 0.7179, "step": 15138 }, { "epoch": 0.43, "grad_norm": 4.913046144988924, "learning_rate": 6.30391025357636e-06, "loss": 0.399, "step": 15139 }, { "epoch": 0.43, "grad_norm": 5.272052814806638, "learning_rate": 6.3034625366036096e-06, "loss": 0.8264, "step": 15140 }, { "epoch": 0.43, "grad_norm": 2.470716152232207, "learning_rate": 6.303014808417331e-06, "loss": 0.118, "step": 15141 }, { "epoch": 0.43, "grad_norm": 3.9987554998380874, "learning_rate": 6.302567069021378e-06, "loss": 0.475, "step": 15142 }, { "epoch": 0.43, "grad_norm": 11.844329900308988, "learning_rate": 6.302119318419602e-06, "loss": 0.2868, "step": 15143 }, { "epoch": 0.43, "grad_norm": 6.092136932499744, "learning_rate": 6.301671556615851e-06, "loss": 0.4507, "step": 15144 }, { "epoch": 0.43, "grad_norm": 2.775695741057335, "learning_rate": 6.3012237836139815e-06, "loss": 0.2472, "step": 15145 }, { "epoch": 0.43, "grad_norm": 3.6140777204945924, "learning_rate": 6.3007759994178445e-06, "loss": 0.3083, "step": 15146 }, { "epoch": 0.43, "grad_norm": 2.581222315124866, "learning_rate": 6.300328204031291e-06, "loss": 0.189, "step": 15147 }, { "epoch": 0.43, "grad_norm": 6.915417209818367, "learning_rate": 6.2998803974581715e-06, "loss": 0.5063, "step": 15148 }, { "epoch": 0.43, "grad_norm": 4.497199829754771, "learning_rate": 6.299432579702344e-06, "loss": 0.3672, "step": 15149 }, { "epoch": 0.43, "grad_norm": 9.35641791958109, "learning_rate": 6.2989847507676585e-06, "loss": 0.891, "step": 15150 }, { "epoch": 0.43, "grad_norm": 7.494810725106792, "learning_rate": 6.298536910657965e-06, "loss": 0.7288, "step": 15151 }, { "epoch": 0.43, "grad_norm": 3.3545013116659708, "learning_rate": 6.2980890593771195e-06, "loss": 0.2873, "step": 15152 }, { "epoch": 0.43, "grad_norm": 4.5662424859278286, "learning_rate": 6.2976411969289745e-06, "loss": 0.4434, "step": 15153 }, { "epoch": 0.43, "grad_norm": 7.041693429689128, "learning_rate": 6.29719332331738e-06, "loss": 0.6762, "step": 15154 }, { "epoch": 0.43, "grad_norm": 3.5449154267896112, "learning_rate": 6.296745438546192e-06, "loss": 0.2995, "step": 15155 }, { "epoch": 0.43, "grad_norm": 1.8170921753328038, "learning_rate": 6.296297542619263e-06, "loss": 0.2425, "step": 15156 }, { "epoch": 0.43, "grad_norm": 4.684597744522428, "learning_rate": 6.295849635540446e-06, "loss": 0.6263, "step": 15157 }, { "epoch": 0.43, "grad_norm": 2.8464916239102123, "learning_rate": 6.295401717313592e-06, "loss": 0.1744, "step": 15158 }, { "epoch": 0.43, "grad_norm": 2.4186905136481944, "learning_rate": 6.294953787942558e-06, "loss": 0.3556, "step": 15159 }, { "epoch": 0.43, "grad_norm": 5.3415476404379625, "learning_rate": 6.294505847431197e-06, "loss": 0.5467, "step": 15160 }, { "epoch": 0.43, "grad_norm": 4.014266648972099, "learning_rate": 6.2940578957833605e-06, "loss": 0.4762, "step": 15161 }, { "epoch": 0.43, "grad_norm": 11.688477888353432, "learning_rate": 6.293609933002904e-06, "loss": 0.5673, "step": 15162 }, { "epoch": 0.43, "grad_norm": 3.502517663201851, "learning_rate": 6.293161959093681e-06, "loss": 0.4414, "step": 15163 }, { "epoch": 0.43, "grad_norm": 12.838885571849715, "learning_rate": 6.292713974059543e-06, "loss": 1.211, "step": 15164 }, { "epoch": 0.43, "grad_norm": 6.679898478427527, "learning_rate": 6.292265977904346e-06, "loss": 0.3346, "step": 15165 }, { "epoch": 0.43, "grad_norm": 4.901488745550683, "learning_rate": 6.291817970631946e-06, "loss": 0.6286, "step": 15166 }, { "epoch": 0.43, "grad_norm": 5.324004227031116, "learning_rate": 6.291369952246193e-06, "loss": 0.6267, "step": 15167 }, { "epoch": 0.43, "grad_norm": 6.190891376832086, "learning_rate": 6.290921922750943e-06, "loss": 0.7029, "step": 15168 }, { "epoch": 0.43, "grad_norm": 2.719856365948407, "learning_rate": 6.290473882150052e-06, "loss": 0.3376, "step": 15169 }, { "epoch": 0.43, "grad_norm": 4.415748247288044, "learning_rate": 6.290025830447374e-06, "loss": 0.2635, "step": 15170 }, { "epoch": 0.43, "grad_norm": 4.922269532875924, "learning_rate": 6.289577767646761e-06, "loss": 0.5401, "step": 15171 }, { "epoch": 0.43, "grad_norm": 4.039747523170768, "learning_rate": 6.289129693752069e-06, "loss": 0.3894, "step": 15172 }, { "epoch": 0.43, "grad_norm": 5.867667512521999, "learning_rate": 6.288681608767153e-06, "loss": 0.5372, "step": 15173 }, { "epoch": 0.43, "grad_norm": 4.97628680413211, "learning_rate": 6.2882335126958696e-06, "loss": 0.4864, "step": 15174 }, { "epoch": 0.43, "grad_norm": 6.645129769452403, "learning_rate": 6.287785405542069e-06, "loss": 0.321, "step": 15175 }, { "epoch": 0.43, "grad_norm": 6.961139851549954, "learning_rate": 6.287337287309612e-06, "loss": 0.6705, "step": 15176 }, { "epoch": 0.43, "grad_norm": 5.275717642180507, "learning_rate": 6.286889158002349e-06, "loss": 0.8569, "step": 15177 }, { "epoch": 0.43, "grad_norm": 6.8082488208445175, "learning_rate": 6.2864410176241384e-06, "loss": 0.8277, "step": 15178 }, { "epoch": 0.43, "grad_norm": 8.156246375306008, "learning_rate": 6.285992866178833e-06, "loss": 0.5108, "step": 15179 }, { "epoch": 0.43, "grad_norm": 5.46242973230066, "learning_rate": 6.28554470367029e-06, "loss": 0.6823, "step": 15180 }, { "epoch": 0.43, "grad_norm": 6.766209812746168, "learning_rate": 6.285096530102362e-06, "loss": 0.2988, "step": 15181 }, { "epoch": 0.43, "grad_norm": 9.746799824122963, "learning_rate": 6.28464834547891e-06, "loss": 0.696, "step": 15182 }, { "epoch": 0.43, "grad_norm": 6.30728893663225, "learning_rate": 6.2842001498037855e-06, "loss": 0.4102, "step": 15183 }, { "epoch": 0.43, "grad_norm": 5.623244371884454, "learning_rate": 6.283751943080845e-06, "loss": 0.5458, "step": 15184 }, { "epoch": 0.43, "grad_norm": 2.4099720988102886, "learning_rate": 6.283303725313943e-06, "loss": 0.1565, "step": 15185 }, { "epoch": 0.43, "grad_norm": 16.16557146406794, "learning_rate": 6.282855496506939e-06, "loss": 0.8061, "step": 15186 }, { "epoch": 0.43, "grad_norm": 2.7343266510094906, "learning_rate": 6.282407256663687e-06, "loss": 0.3846, "step": 15187 }, { "epoch": 0.43, "grad_norm": 4.303433535062058, "learning_rate": 6.2819590057880435e-06, "loss": 0.4646, "step": 15188 }, { "epoch": 0.43, "grad_norm": 4.268629420352146, "learning_rate": 6.2815107438838615e-06, "loss": 0.8543, "step": 15189 }, { "epoch": 0.44, "grad_norm": 3.51408174339036, "learning_rate": 6.281062470955004e-06, "loss": 0.2894, "step": 15190 }, { "epoch": 0.44, "grad_norm": 4.0877182181900995, "learning_rate": 6.2806141870053214e-06, "loss": 0.7417, "step": 15191 }, { "epoch": 0.44, "grad_norm": 12.270531089872968, "learning_rate": 6.280165892038675e-06, "loss": 0.8607, "step": 15192 }, { "epoch": 0.44, "grad_norm": 2.931215096532268, "learning_rate": 6.279717586058917e-06, "loss": 0.3402, "step": 15193 }, { "epoch": 0.44, "grad_norm": 6.915236275388268, "learning_rate": 6.279269269069908e-06, "loss": 0.1649, "step": 15194 }, { "epoch": 0.44, "grad_norm": 5.091904850126972, "learning_rate": 6.278820941075501e-06, "loss": 0.4181, "step": 15195 }, { "epoch": 0.44, "grad_norm": 3.706547342759407, "learning_rate": 6.278372602079555e-06, "loss": 0.3484, "step": 15196 }, { "epoch": 0.44, "grad_norm": 7.262031306311455, "learning_rate": 6.27792425208593e-06, "loss": 0.5125, "step": 15197 }, { "epoch": 0.44, "grad_norm": 5.275760302960323, "learning_rate": 6.2774758910984756e-06, "loss": 0.8105, "step": 15198 }, { "epoch": 0.44, "grad_norm": 2.728199399128094, "learning_rate": 6.277027519121055e-06, "loss": 0.4358, "step": 15199 }, { "epoch": 0.44, "grad_norm": 3.2151365830462812, "learning_rate": 6.276579136157526e-06, "loss": 0.7179, "step": 15200 }, { "epoch": 0.44, "grad_norm": 7.654330390531688, "learning_rate": 6.276130742211741e-06, "loss": 0.2902, "step": 15201 }, { "epoch": 0.44, "grad_norm": 4.519306160770898, "learning_rate": 6.275682337287562e-06, "loss": 0.2025, "step": 15202 }, { "epoch": 0.44, "grad_norm": 5.468273992258137, "learning_rate": 6.275233921388843e-06, "loss": 0.5388, "step": 15203 }, { "epoch": 0.44, "grad_norm": 5.898374504106635, "learning_rate": 6.274785494519448e-06, "loss": 0.3374, "step": 15204 }, { "epoch": 0.44, "grad_norm": 9.088361579109128, "learning_rate": 6.274337056683226e-06, "loss": 0.8753, "step": 15205 }, { "epoch": 0.44, "grad_norm": 3.9177934905965803, "learning_rate": 6.273888607884041e-06, "loss": 0.4576, "step": 15206 }, { "epoch": 0.44, "grad_norm": 2.6295569692889957, "learning_rate": 6.273440148125749e-06, "loss": 0.2215, "step": 15207 }, { "epoch": 0.44, "grad_norm": 5.619109757291879, "learning_rate": 6.272991677412207e-06, "loss": 0.4109, "step": 15208 }, { "epoch": 0.44, "grad_norm": 4.066529476413065, "learning_rate": 6.272543195747276e-06, "loss": 0.4126, "step": 15209 }, { "epoch": 0.44, "grad_norm": 5.208258218859392, "learning_rate": 6.2720947031348125e-06, "loss": 0.5242, "step": 15210 }, { "epoch": 0.44, "grad_norm": 2.8933045211212938, "learning_rate": 6.271646199578674e-06, "loss": 0.3245, "step": 15211 }, { "epoch": 0.44, "grad_norm": 3.9260589349572235, "learning_rate": 6.27119768508272e-06, "loss": 0.5729, "step": 15212 }, { "epoch": 0.44, "grad_norm": 7.845039535238063, "learning_rate": 6.2707491596508085e-06, "loss": 0.5727, "step": 15213 }, { "epoch": 0.44, "grad_norm": 5.575697619150522, "learning_rate": 6.270300623286798e-06, "loss": 0.4371, "step": 15214 }, { "epoch": 0.44, "grad_norm": 7.081456694991128, "learning_rate": 6.2698520759945495e-06, "loss": 0.4869, "step": 15215 }, { "epoch": 0.44, "grad_norm": 4.719588022154232, "learning_rate": 6.269403517777918e-06, "loss": 0.7861, "step": 15216 }, { "epoch": 0.44, "grad_norm": 2.989830144248688, "learning_rate": 6.268954948640766e-06, "loss": 0.5196, "step": 15217 }, { "epoch": 0.44, "grad_norm": 8.909275170244944, "learning_rate": 6.268506368586949e-06, "loss": 0.5396, "step": 15218 }, { "epoch": 0.44, "grad_norm": 10.395774173504536, "learning_rate": 6.26805777762033e-06, "loss": 0.733, "step": 15219 }, { "epoch": 0.44, "grad_norm": 9.121398214939846, "learning_rate": 6.2676091757447654e-06, "loss": 0.6583, "step": 15220 }, { "epoch": 0.44, "grad_norm": 7.151258135189023, "learning_rate": 6.2671605629641155e-06, "loss": 0.3919, "step": 15221 }, { "epoch": 0.44, "grad_norm": 9.806053814162453, "learning_rate": 6.266711939282237e-06, "loss": 0.5283, "step": 15222 }, { "epoch": 0.44, "grad_norm": 6.24776807987616, "learning_rate": 6.2662633047029945e-06, "loss": 0.4172, "step": 15223 }, { "epoch": 0.44, "grad_norm": 5.860158069418587, "learning_rate": 6.265814659230242e-06, "loss": 0.3461, "step": 15224 }, { "epoch": 0.44, "grad_norm": 5.578568726743578, "learning_rate": 6.265366002867843e-06, "loss": 0.4804, "step": 15225 }, { "epoch": 0.44, "grad_norm": 4.192547901314388, "learning_rate": 6.264917335619657e-06, "loss": 0.4981, "step": 15226 }, { "epoch": 0.44, "grad_norm": 5.293668693629687, "learning_rate": 6.264468657489543e-06, "loss": 0.6256, "step": 15227 }, { "epoch": 0.44, "grad_norm": 6.065213353271769, "learning_rate": 6.26401996848136e-06, "loss": 0.4761, "step": 15228 }, { "epoch": 0.44, "grad_norm": 4.678706198387158, "learning_rate": 6.263571268598968e-06, "loss": 0.4452, "step": 15229 }, { "epoch": 0.44, "grad_norm": 6.445905697430141, "learning_rate": 6.263122557846228e-06, "loss": 0.4763, "step": 15230 }, { "epoch": 0.44, "grad_norm": 4.105254758417299, "learning_rate": 6.262673836227002e-06, "loss": 0.4461, "step": 15231 }, { "epoch": 0.44, "grad_norm": 4.6038875826087065, "learning_rate": 6.262225103745148e-06, "loss": 0.7069, "step": 15232 }, { "epoch": 0.44, "grad_norm": 5.534304018173936, "learning_rate": 6.261776360404526e-06, "loss": 0.8271, "step": 15233 }, { "epoch": 0.44, "grad_norm": 5.227957637682365, "learning_rate": 6.261327606208998e-06, "loss": 0.6237, "step": 15234 }, { "epoch": 0.44, "grad_norm": 6.257044217575759, "learning_rate": 6.260878841162423e-06, "loss": 0.273, "step": 15235 }, { "epoch": 0.44, "grad_norm": 8.156002581705879, "learning_rate": 6.260430065268663e-06, "loss": 0.5058, "step": 15236 }, { "epoch": 0.44, "grad_norm": 10.67063018553818, "learning_rate": 6.2599812785315795e-06, "loss": 1.0742, "step": 15237 }, { "epoch": 0.44, "grad_norm": 5.865191266094855, "learning_rate": 6.2595324809550305e-06, "loss": 0.7529, "step": 15238 }, { "epoch": 0.44, "grad_norm": 5.221223302201684, "learning_rate": 6.259083672542879e-06, "loss": 0.4078, "step": 15239 }, { "epoch": 0.44, "grad_norm": 6.391831039377505, "learning_rate": 6.258634853298986e-06, "loss": 0.1241, "step": 15240 }, { "epoch": 0.44, "grad_norm": 3.9278077925351043, "learning_rate": 6.258186023227212e-06, "loss": 0.6248, "step": 15241 }, { "epoch": 0.44, "grad_norm": 4.887163247217137, "learning_rate": 6.257737182331418e-06, "loss": 0.3661, "step": 15242 }, { "epoch": 0.44, "grad_norm": 4.477245178894859, "learning_rate": 6.257288330615468e-06, "loss": 0.484, "step": 15243 }, { "epoch": 0.44, "grad_norm": 7.783724349313364, "learning_rate": 6.25683946808322e-06, "loss": 0.3073, "step": 15244 }, { "epoch": 0.44, "grad_norm": 4.337876884510825, "learning_rate": 6.256390594738536e-06, "loss": 0.6353, "step": 15245 }, { "epoch": 0.44, "grad_norm": 5.407695621086038, "learning_rate": 6.255941710585279e-06, "loss": 0.4125, "step": 15246 }, { "epoch": 0.44, "grad_norm": 5.197133185510152, "learning_rate": 6.255492815627311e-06, "loss": 0.8123, "step": 15247 }, { "epoch": 0.44, "grad_norm": 3.421120477690895, "learning_rate": 6.255043909868493e-06, "loss": 0.5843, "step": 15248 }, { "epoch": 0.44, "grad_norm": 5.739300517218184, "learning_rate": 6.254594993312684e-06, "loss": 0.5072, "step": 15249 }, { "epoch": 0.44, "grad_norm": 4.885502042478517, "learning_rate": 6.254146065963752e-06, "loss": 1.0644, "step": 15250 }, { "epoch": 0.44, "grad_norm": 4.607008866593258, "learning_rate": 6.253697127825554e-06, "loss": 0.5915, "step": 15251 }, { "epoch": 0.44, "grad_norm": 7.235361744629848, "learning_rate": 6.253248178901954e-06, "loss": 0.7505, "step": 15252 }, { "epoch": 0.44, "grad_norm": 6.620706318460589, "learning_rate": 6.252799219196814e-06, "loss": 0.4391, "step": 15253 }, { "epoch": 0.44, "grad_norm": 2.7384558171721087, "learning_rate": 6.252350248713998e-06, "loss": 0.3335, "step": 15254 }, { "epoch": 0.44, "grad_norm": 5.890235058250466, "learning_rate": 6.251901267457366e-06, "loss": 0.7464, "step": 15255 }, { "epoch": 0.44, "grad_norm": 11.42440538257186, "learning_rate": 6.2514522754307815e-06, "loss": 0.647, "step": 15256 }, { "epoch": 0.44, "grad_norm": 4.552523404752009, "learning_rate": 6.251003272638109e-06, "loss": 0.3353, "step": 15257 }, { "epoch": 0.44, "grad_norm": 2.574550045847962, "learning_rate": 6.250554259083208e-06, "loss": 0.346, "step": 15258 }, { "epoch": 0.44, "grad_norm": 7.632214806854639, "learning_rate": 6.250105234769942e-06, "loss": 1.2146, "step": 15259 }, { "epoch": 0.44, "grad_norm": 4.228715725364333, "learning_rate": 6.249656199702174e-06, "loss": 0.1689, "step": 15260 }, { "epoch": 0.44, "grad_norm": 4.376701024296622, "learning_rate": 6.24920715388377e-06, "loss": 0.5167, "step": 15261 }, { "epoch": 0.44, "grad_norm": 1.4950838814083547, "learning_rate": 6.248758097318589e-06, "loss": 0.0938, "step": 15262 }, { "epoch": 0.44, "grad_norm": 5.95949776037446, "learning_rate": 6.2483090300104955e-06, "loss": 0.5706, "step": 15263 }, { "epoch": 0.44, "grad_norm": 3.805890811452441, "learning_rate": 6.247859951963354e-06, "loss": 0.4202, "step": 15264 }, { "epoch": 0.44, "grad_norm": 6.0382604029321865, "learning_rate": 6.247410863181027e-06, "loss": 0.4936, "step": 15265 }, { "epoch": 0.44, "grad_norm": 4.045363921026147, "learning_rate": 6.246961763667377e-06, "loss": 0.564, "step": 15266 }, { "epoch": 0.44, "grad_norm": 7.321522876251772, "learning_rate": 6.246512653426271e-06, "loss": 0.2534, "step": 15267 }, { "epoch": 0.44, "grad_norm": 5.079473979478891, "learning_rate": 6.246063532461568e-06, "loss": 0.4985, "step": 15268 }, { "epoch": 0.44, "grad_norm": 10.986427256497205, "learning_rate": 6.245614400777134e-06, "loss": 0.5591, "step": 15269 }, { "epoch": 0.44, "grad_norm": 7.120600563502261, "learning_rate": 6.245165258376832e-06, "loss": 0.5176, "step": 15270 }, { "epoch": 0.44, "grad_norm": 10.208334137793269, "learning_rate": 6.244716105264529e-06, "loss": 0.6156, "step": 15271 }, { "epoch": 0.44, "grad_norm": 7.160519987649659, "learning_rate": 6.244266941444084e-06, "loss": 0.3823, "step": 15272 }, { "epoch": 0.44, "grad_norm": 5.924665332146882, "learning_rate": 6.243817766919363e-06, "loss": 0.379, "step": 15273 }, { "epoch": 0.44, "grad_norm": 3.4930263254313227, "learning_rate": 6.243368581694234e-06, "loss": 0.4186, "step": 15274 }, { "epoch": 0.44, "grad_norm": 4.67302845268314, "learning_rate": 6.242919385772557e-06, "loss": 0.3614, "step": 15275 }, { "epoch": 0.44, "grad_norm": 5.564014528662816, "learning_rate": 6.242470179158196e-06, "loss": 0.699, "step": 15276 }, { "epoch": 0.44, "grad_norm": 5.663423404811829, "learning_rate": 6.2420209618550194e-06, "loss": 0.2431, "step": 15277 }, { "epoch": 0.44, "grad_norm": 7.203871748274561, "learning_rate": 6.2415717338668895e-06, "loss": 0.9537, "step": 15278 }, { "epoch": 0.44, "grad_norm": 7.142440765370233, "learning_rate": 6.241122495197669e-06, "loss": 0.7648, "step": 15279 }, { "epoch": 0.44, "grad_norm": 9.172495312485493, "learning_rate": 6.240673245851225e-06, "loss": 0.4451, "step": 15280 }, { "epoch": 0.44, "grad_norm": 6.253891991927259, "learning_rate": 6.240223985831421e-06, "loss": 0.4103, "step": 15281 }, { "epoch": 0.44, "grad_norm": 3.7907421361273963, "learning_rate": 6.239774715142124e-06, "loss": 0.4968, "step": 15282 }, { "epoch": 0.44, "grad_norm": 7.391472927107664, "learning_rate": 6.239325433787198e-06, "loss": 0.3349, "step": 15283 }, { "epoch": 0.44, "grad_norm": 6.843930594791865, "learning_rate": 6.238876141770508e-06, "loss": 0.7626, "step": 15284 }, { "epoch": 0.44, "grad_norm": 5.04576419853311, "learning_rate": 6.238426839095918e-06, "loss": 0.7451, "step": 15285 }, { "epoch": 0.44, "grad_norm": 5.931004555128834, "learning_rate": 6.237977525767295e-06, "loss": 0.4841, "step": 15286 }, { "epoch": 0.44, "grad_norm": 4.697282044824147, "learning_rate": 6.237528201788503e-06, "loss": 0.6125, "step": 15287 }, { "epoch": 0.44, "grad_norm": 4.796916998763235, "learning_rate": 6.237078867163408e-06, "loss": 0.5425, "step": 15288 }, { "epoch": 0.44, "grad_norm": 8.122478739486677, "learning_rate": 6.2366295218958775e-06, "loss": 0.2294, "step": 15289 }, { "epoch": 0.44, "grad_norm": 4.453581455327368, "learning_rate": 6.2361801659897746e-06, "loss": 0.4749, "step": 15290 }, { "epoch": 0.44, "grad_norm": 2.4146244172843354, "learning_rate": 6.235730799448967e-06, "loss": 0.257, "step": 15291 }, { "epoch": 0.44, "grad_norm": 5.048700905825855, "learning_rate": 6.235281422277317e-06, "loss": 0.3277, "step": 15292 }, { "epoch": 0.44, "grad_norm": 4.293406442504476, "learning_rate": 6.234832034478695e-06, "loss": 0.4913, "step": 15293 }, { "epoch": 0.44, "grad_norm": 6.606408646193519, "learning_rate": 6.234382636056966e-06, "loss": 0.4558, "step": 15294 }, { "epoch": 0.44, "grad_norm": 5.151808125937576, "learning_rate": 6.233933227015994e-06, "loss": 0.3434, "step": 15295 }, { "epoch": 0.44, "grad_norm": 5.4063739211152, "learning_rate": 6.233483807359643e-06, "loss": 0.811, "step": 15296 }, { "epoch": 0.44, "grad_norm": 6.44378059446544, "learning_rate": 6.233034377091788e-06, "loss": 0.6644, "step": 15297 }, { "epoch": 0.44, "grad_norm": 5.8564576884665795, "learning_rate": 6.232584936216287e-06, "loss": 0.6515, "step": 15298 }, { "epoch": 0.44, "grad_norm": 6.208897592730662, "learning_rate": 6.2321354847370105e-06, "loss": 0.6311, "step": 15299 }, { "epoch": 0.44, "grad_norm": 4.726827919813954, "learning_rate": 6.231686022657824e-06, "loss": 0.6132, "step": 15300 }, { "epoch": 0.44, "grad_norm": 5.4479950436654985, "learning_rate": 6.2312365499825956e-06, "loss": 0.6755, "step": 15301 }, { "epoch": 0.44, "grad_norm": 3.716281215640555, "learning_rate": 6.230787066715188e-06, "loss": 0.3083, "step": 15302 }, { "epoch": 0.44, "grad_norm": 21.3076892468504, "learning_rate": 6.230337572859473e-06, "loss": 0.6974, "step": 15303 }, { "epoch": 0.44, "grad_norm": 4.372785689070629, "learning_rate": 6.229888068419314e-06, "loss": 0.4882, "step": 15304 }, { "epoch": 0.44, "grad_norm": 9.66144883531513, "learning_rate": 6.22943855339858e-06, "loss": 1.0009, "step": 15305 }, { "epoch": 0.44, "grad_norm": 2.525667769823475, "learning_rate": 6.228989027801137e-06, "loss": 0.4486, "step": 15306 }, { "epoch": 0.44, "grad_norm": 5.16289324659714, "learning_rate": 6.228539491630854e-06, "loss": 0.3517, "step": 15307 }, { "epoch": 0.44, "grad_norm": 32.68843987537805, "learning_rate": 6.228089944891597e-06, "loss": 0.727, "step": 15308 }, { "epoch": 0.44, "grad_norm": 7.021952980331719, "learning_rate": 6.227640387587231e-06, "loss": 0.7184, "step": 15309 }, { "epoch": 0.44, "grad_norm": 5.462990250836604, "learning_rate": 6.227190819721628e-06, "loss": 0.2604, "step": 15310 }, { "epoch": 0.44, "grad_norm": 8.802722566104515, "learning_rate": 6.226741241298654e-06, "loss": 0.4489, "step": 15311 }, { "epoch": 0.44, "grad_norm": 3.9510460916996073, "learning_rate": 6.226291652322175e-06, "loss": 0.4589, "step": 15312 }, { "epoch": 0.44, "grad_norm": 8.841511365537496, "learning_rate": 6.225842052796059e-06, "loss": 0.5057, "step": 15313 }, { "epoch": 0.44, "grad_norm": 4.1024671156435275, "learning_rate": 6.225392442724176e-06, "loss": 0.4276, "step": 15314 }, { "epoch": 0.44, "grad_norm": 11.385258083978332, "learning_rate": 6.224942822110392e-06, "loss": 0.312, "step": 15315 }, { "epoch": 0.44, "grad_norm": 4.373817638300496, "learning_rate": 6.224493190958577e-06, "loss": 0.3103, "step": 15316 }, { "epoch": 0.44, "grad_norm": 4.37905577949624, "learning_rate": 6.224043549272596e-06, "loss": 0.2678, "step": 15317 }, { "epoch": 0.44, "grad_norm": 2.41745757583507, "learning_rate": 6.223593897056321e-06, "loss": 0.2059, "step": 15318 }, { "epoch": 0.44, "grad_norm": 5.579510428578804, "learning_rate": 6.223144234313618e-06, "loss": 0.5114, "step": 15319 }, { "epoch": 0.44, "grad_norm": 5.185136486416411, "learning_rate": 6.222694561048354e-06, "loss": 0.2547, "step": 15320 }, { "epoch": 0.44, "grad_norm": 7.916320930427016, "learning_rate": 6.222244877264401e-06, "loss": 0.708, "step": 15321 }, { "epoch": 0.44, "grad_norm": 4.186406633346233, "learning_rate": 6.221795182965625e-06, "loss": 0.3257, "step": 15322 }, { "epoch": 0.44, "grad_norm": 10.783065687235442, "learning_rate": 6.221345478155896e-06, "loss": 0.8872, "step": 15323 }, { "epoch": 0.44, "grad_norm": 6.283209006532354, "learning_rate": 6.220895762839082e-06, "loss": 0.5451, "step": 15324 }, { "epoch": 0.44, "grad_norm": 7.187413621466351, "learning_rate": 6.220446037019052e-06, "loss": 0.4444, "step": 15325 }, { "epoch": 0.44, "grad_norm": 4.766148872973409, "learning_rate": 6.219996300699676e-06, "loss": 0.558, "step": 15326 }, { "epoch": 0.44, "grad_norm": 5.677295963143457, "learning_rate": 6.21954655388482e-06, "loss": 0.7634, "step": 15327 }, { "epoch": 0.44, "grad_norm": 5.387561934042118, "learning_rate": 6.219096796578357e-06, "loss": 0.569, "step": 15328 }, { "epoch": 0.44, "grad_norm": 4.486495284344691, "learning_rate": 6.218647028784153e-06, "loss": 0.7014, "step": 15329 }, { "epoch": 0.44, "grad_norm": 5.204468241562167, "learning_rate": 6.2181972505060785e-06, "loss": 0.4446, "step": 15330 }, { "epoch": 0.44, "grad_norm": 7.269011071486785, "learning_rate": 6.2177474617480045e-06, "loss": 0.8835, "step": 15331 }, { "epoch": 0.44, "grad_norm": 4.753868510363693, "learning_rate": 6.217297662513798e-06, "loss": 0.6188, "step": 15332 }, { "epoch": 0.44, "grad_norm": 6.556240584345803, "learning_rate": 6.21684785280733e-06, "loss": 0.5529, "step": 15333 }, { "epoch": 0.44, "grad_norm": 3.4314915598071147, "learning_rate": 6.216398032632469e-06, "loss": 0.4016, "step": 15334 }, { "epoch": 0.44, "grad_norm": 6.665628336473248, "learning_rate": 6.215948201993088e-06, "loss": 0.555, "step": 15335 }, { "epoch": 0.44, "grad_norm": 4.4460931236683585, "learning_rate": 6.2154983608930506e-06, "loss": 0.3991, "step": 15336 }, { "epoch": 0.44, "grad_norm": 7.516099688792077, "learning_rate": 6.2150485093362334e-06, "loss": 0.4172, "step": 15337 }, { "epoch": 0.44, "grad_norm": 5.575709741686269, "learning_rate": 6.214598647326502e-06, "loss": 0.4508, "step": 15338 }, { "epoch": 0.44, "grad_norm": 4.872978133517529, "learning_rate": 6.214148774867727e-06, "loss": 0.3463, "step": 15339 }, { "epoch": 0.44, "grad_norm": 4.965846959045395, "learning_rate": 6.213698891963782e-06, "loss": 0.4294, "step": 15340 }, { "epoch": 0.44, "grad_norm": 5.116917074498085, "learning_rate": 6.213248998618534e-06, "loss": 0.169, "step": 15341 }, { "epoch": 0.44, "grad_norm": 8.459622070402178, "learning_rate": 6.212799094835854e-06, "loss": 0.724, "step": 15342 }, { "epoch": 0.44, "grad_norm": 7.239039719478037, "learning_rate": 6.212349180619612e-06, "loss": 0.6737, "step": 15343 }, { "epoch": 0.44, "grad_norm": 4.052470871045842, "learning_rate": 6.21189925597368e-06, "loss": 0.2888, "step": 15344 }, { "epoch": 0.44, "grad_norm": 5.178949824259359, "learning_rate": 6.211449320901929e-06, "loss": 0.5423, "step": 15345 }, { "epoch": 0.44, "grad_norm": 6.014312244204026, "learning_rate": 6.210999375408226e-06, "loss": 0.3148, "step": 15346 }, { "epoch": 0.44, "grad_norm": 4.623367923730023, "learning_rate": 6.210549419496447e-06, "loss": 0.6703, "step": 15347 }, { "epoch": 0.44, "grad_norm": 4.654760775416716, "learning_rate": 6.210099453170459e-06, "loss": 0.5172, "step": 15348 }, { "epoch": 0.44, "grad_norm": 7.0392960690646635, "learning_rate": 6.209649476434135e-06, "loss": 0.3208, "step": 15349 }, { "epoch": 0.44, "grad_norm": 10.629962233604601, "learning_rate": 6.209199489291345e-06, "loss": 0.8249, "step": 15350 }, { "epoch": 0.44, "grad_norm": 5.087359345387383, "learning_rate": 6.208749491745962e-06, "loss": 0.4858, "step": 15351 }, { "epoch": 0.44, "grad_norm": 5.374333628665927, "learning_rate": 6.208299483801855e-06, "loss": 0.4949, "step": 15352 }, { "epoch": 0.44, "grad_norm": 7.638166909826638, "learning_rate": 6.207849465462896e-06, "loss": 0.5106, "step": 15353 }, { "epoch": 0.44, "grad_norm": 6.664590919715289, "learning_rate": 6.207399436732957e-06, "loss": 0.6252, "step": 15354 }, { "epoch": 0.44, "grad_norm": 6.984439994629077, "learning_rate": 6.206949397615909e-06, "loss": 0.5028, "step": 15355 }, { "epoch": 0.44, "grad_norm": 7.358605087406, "learning_rate": 6.206499348115625e-06, "loss": 0.5173, "step": 15356 }, { "epoch": 0.44, "grad_norm": 5.178591236771908, "learning_rate": 6.2060492882359745e-06, "loss": 0.4684, "step": 15357 }, { "epoch": 0.44, "grad_norm": 3.1346702299725187, "learning_rate": 6.2055992179808315e-06, "loss": 0.1718, "step": 15358 }, { "epoch": 0.44, "grad_norm": 2.3051858266245127, "learning_rate": 6.2051491373540665e-06, "loss": 0.1025, "step": 15359 }, { "epoch": 0.44, "grad_norm": 7.412264639091452, "learning_rate": 6.204699046359551e-06, "loss": 0.3366, "step": 15360 }, { "epoch": 0.44, "grad_norm": 4.682958080942339, "learning_rate": 6.2042489450011586e-06, "loss": 0.599, "step": 15361 }, { "epoch": 0.44, "grad_norm": 5.754461713551662, "learning_rate": 6.20379883328276e-06, "loss": 0.4081, "step": 15362 }, { "epoch": 0.44, "grad_norm": 9.74734685941408, "learning_rate": 6.203348711208229e-06, "loss": 0.817, "step": 15363 }, { "epoch": 0.44, "grad_norm": 5.5853372958314855, "learning_rate": 6.202898578781436e-06, "loss": 0.2721, "step": 15364 }, { "epoch": 0.44, "grad_norm": 4.449317991307045, "learning_rate": 6.202448436006258e-06, "loss": 0.2013, "step": 15365 }, { "epoch": 0.44, "grad_norm": 5.899477512048091, "learning_rate": 6.201998282886561e-06, "loss": 0.471, "step": 15366 }, { "epoch": 0.44, "grad_norm": 9.92966694683406, "learning_rate": 6.2015481194262215e-06, "loss": 0.733, "step": 15367 }, { "epoch": 0.44, "grad_norm": 8.464155323180833, "learning_rate": 6.201097945629113e-06, "loss": 0.5223, "step": 15368 }, { "epoch": 0.44, "grad_norm": 6.762093554003321, "learning_rate": 6.200647761499106e-06, "loss": 0.5667, "step": 15369 }, { "epoch": 0.44, "grad_norm": 1.9607325898973036, "learning_rate": 6.200197567040072e-06, "loss": 0.1904, "step": 15370 }, { "epoch": 0.44, "grad_norm": 7.614474990289651, "learning_rate": 6.1997473622558876e-06, "loss": 0.8189, "step": 15371 }, { "epoch": 0.44, "grad_norm": 6.9888186048273155, "learning_rate": 6.199297147150425e-06, "loss": 0.3853, "step": 15372 }, { "epoch": 0.44, "grad_norm": 9.149405541417542, "learning_rate": 6.198846921727556e-06, "loss": 0.7793, "step": 15373 }, { "epoch": 0.44, "grad_norm": 8.317053702018347, "learning_rate": 6.198396685991155e-06, "loss": 0.6162, "step": 15374 }, { "epoch": 0.44, "grad_norm": 4.4091101758173075, "learning_rate": 6.197946439945095e-06, "loss": 0.3789, "step": 15375 }, { "epoch": 0.44, "grad_norm": 3.942826830730652, "learning_rate": 6.197496183593249e-06, "loss": 0.4435, "step": 15376 }, { "epoch": 0.44, "grad_norm": 6.88526587001323, "learning_rate": 6.19704591693949e-06, "loss": 0.5892, "step": 15377 }, { "epoch": 0.44, "grad_norm": 5.033408750301681, "learning_rate": 6.196595639987692e-06, "loss": 0.3646, "step": 15378 }, { "epoch": 0.44, "grad_norm": 6.384240615184742, "learning_rate": 6.196145352741731e-06, "loss": 0.8156, "step": 15379 }, { "epoch": 0.44, "grad_norm": 5.159010824270146, "learning_rate": 6.195695055205478e-06, "loss": 0.6266, "step": 15380 }, { "epoch": 0.44, "grad_norm": 5.161040883612465, "learning_rate": 6.195244747382808e-06, "loss": 0.3019, "step": 15381 }, { "epoch": 0.44, "grad_norm": 6.170283824692702, "learning_rate": 6.194794429277593e-06, "loss": 0.6208, "step": 15382 }, { "epoch": 0.44, "grad_norm": 9.933331340964543, "learning_rate": 6.19434410089371e-06, "loss": 0.5404, "step": 15383 }, { "epoch": 0.44, "grad_norm": 9.006558439332531, "learning_rate": 6.1938937622350315e-06, "loss": 0.2254, "step": 15384 }, { "epoch": 0.44, "grad_norm": 3.5598229170807794, "learning_rate": 6.193443413305432e-06, "loss": 0.4069, "step": 15385 }, { "epoch": 0.44, "grad_norm": 7.875887079079388, "learning_rate": 6.1929930541087855e-06, "loss": 0.672, "step": 15386 }, { "epoch": 0.44, "grad_norm": 5.324375746407105, "learning_rate": 6.192542684648966e-06, "loss": 0.343, "step": 15387 }, { "epoch": 0.44, "grad_norm": 2.685017592551849, "learning_rate": 6.192092304929851e-06, "loss": 0.1694, "step": 15388 }, { "epoch": 0.44, "grad_norm": 10.763996859320015, "learning_rate": 6.191641914955311e-06, "loss": 0.7763, "step": 15389 }, { "epoch": 0.44, "grad_norm": 8.294109681069532, "learning_rate": 6.191191514729221e-06, "loss": 0.6113, "step": 15390 }, { "epoch": 0.44, "grad_norm": 5.79548574905019, "learning_rate": 6.190741104255459e-06, "loss": 0.1711, "step": 15391 }, { "epoch": 0.44, "grad_norm": 5.794969847158826, "learning_rate": 6.190290683537898e-06, "loss": 0.3473, "step": 15392 }, { "epoch": 0.44, "grad_norm": 7.8494678341229385, "learning_rate": 6.189840252580413e-06, "loss": 0.8879, "step": 15393 }, { "epoch": 0.44, "grad_norm": 7.260667615044501, "learning_rate": 6.189389811386876e-06, "loss": 0.3702, "step": 15394 }, { "epoch": 0.44, "grad_norm": 10.08126417846061, "learning_rate": 6.188939359961168e-06, "loss": 1.0555, "step": 15395 }, { "epoch": 0.44, "grad_norm": 5.254815821990703, "learning_rate": 6.188488898307159e-06, "loss": 0.6101, "step": 15396 }, { "epoch": 0.44, "grad_norm": 5.896670267486048, "learning_rate": 6.1880384264287276e-06, "loss": 0.5385, "step": 15397 }, { "epoch": 0.44, "grad_norm": 4.784792018185074, "learning_rate": 6.187587944329748e-06, "loss": 0.131, "step": 15398 }, { "epoch": 0.44, "grad_norm": 5.346474471661026, "learning_rate": 6.187137452014096e-06, "loss": 0.7495, "step": 15399 }, { "epoch": 0.44, "grad_norm": 6.3881789471828725, "learning_rate": 6.186686949485645e-06, "loss": 0.5679, "step": 15400 }, { "epoch": 0.44, "grad_norm": 8.263624471998659, "learning_rate": 6.186236436748273e-06, "loss": 0.8353, "step": 15401 }, { "epoch": 0.44, "grad_norm": 4.583802774259192, "learning_rate": 6.1857859138058555e-06, "loss": 0.55, "step": 15402 }, { "epoch": 0.44, "grad_norm": 6.815484119552661, "learning_rate": 6.185335380662266e-06, "loss": 0.7419, "step": 15403 }, { "epoch": 0.44, "grad_norm": 4.243500114803194, "learning_rate": 6.184884837321384e-06, "loss": 0.4096, "step": 15404 }, { "epoch": 0.44, "grad_norm": 3.916949503743806, "learning_rate": 6.184434283787084e-06, "loss": 0.5155, "step": 15405 }, { "epoch": 0.44, "grad_norm": 7.165864208302943, "learning_rate": 6.18398372006324e-06, "loss": 0.7167, "step": 15406 }, { "epoch": 0.44, "grad_norm": 4.0850856031509055, "learning_rate": 6.1835331461537305e-06, "loss": 0.6609, "step": 15407 }, { "epoch": 0.44, "grad_norm": 9.485609498979949, "learning_rate": 6.1830825620624325e-06, "loss": 0.4006, "step": 15408 }, { "epoch": 0.44, "grad_norm": 4.494403378839061, "learning_rate": 6.182631967793219e-06, "loss": 0.5964, "step": 15409 }, { "epoch": 0.44, "grad_norm": 4.326298407432877, "learning_rate": 6.182181363349969e-06, "loss": 0.2615, "step": 15410 }, { "epoch": 0.44, "grad_norm": 5.999989986411322, "learning_rate": 6.181730748736558e-06, "loss": 0.5087, "step": 15411 }, { "epoch": 0.44, "grad_norm": 4.402171364280303, "learning_rate": 6.181280123956863e-06, "loss": 0.3093, "step": 15412 }, { "epoch": 0.44, "grad_norm": 5.064996103998853, "learning_rate": 6.18082948901476e-06, "loss": 0.3804, "step": 15413 }, { "epoch": 0.44, "grad_norm": 5.386784498492591, "learning_rate": 6.180378843914126e-06, "loss": 0.539, "step": 15414 }, { "epoch": 0.44, "grad_norm": 5.23325561396885, "learning_rate": 6.17992818865884e-06, "loss": 0.539, "step": 15415 }, { "epoch": 0.44, "grad_norm": 8.551770135222222, "learning_rate": 6.179477523252777e-06, "loss": 0.4936, "step": 15416 }, { "epoch": 0.44, "grad_norm": 6.273966043708121, "learning_rate": 6.179026847699813e-06, "loss": 0.5561, "step": 15417 }, { "epoch": 0.44, "grad_norm": 5.568377357970348, "learning_rate": 6.178576162003825e-06, "loss": 0.8517, "step": 15418 }, { "epoch": 0.44, "grad_norm": 4.239730348154714, "learning_rate": 6.178125466168694e-06, "loss": 0.5598, "step": 15419 }, { "epoch": 0.44, "grad_norm": 4.722346091359955, "learning_rate": 6.177674760198292e-06, "loss": 0.4645, "step": 15420 }, { "epoch": 0.44, "grad_norm": 4.278078282660571, "learning_rate": 6.1772240440965e-06, "loss": 0.7001, "step": 15421 }, { "epoch": 0.44, "grad_norm": 6.942426899506919, "learning_rate": 6.176773317867196e-06, "loss": 0.3291, "step": 15422 }, { "epoch": 0.44, "grad_norm": 14.150283128581512, "learning_rate": 6.176322581514256e-06, "loss": 0.8062, "step": 15423 }, { "epoch": 0.44, "grad_norm": 6.468740638320709, "learning_rate": 6.1758718350415555e-06, "loss": 0.6017, "step": 15424 }, { "epoch": 0.44, "grad_norm": 1.9793816391159313, "learning_rate": 6.175421078452977e-06, "loss": 0.1746, "step": 15425 }, { "epoch": 0.44, "grad_norm": 5.383270332786623, "learning_rate": 6.174970311752394e-06, "loss": 0.2018, "step": 15426 }, { "epoch": 0.44, "grad_norm": 4.134232448389531, "learning_rate": 6.174519534943686e-06, "loss": 0.3709, "step": 15427 }, { "epoch": 0.44, "grad_norm": 5.772758598829726, "learning_rate": 6.174068748030731e-06, "loss": 0.6163, "step": 15428 }, { "epoch": 0.44, "grad_norm": 5.650642270742274, "learning_rate": 6.1736179510174074e-06, "loss": 0.5204, "step": 15429 }, { "epoch": 0.44, "grad_norm": 7.3492719302641305, "learning_rate": 6.173167143907593e-06, "loss": 0.5939, "step": 15430 }, { "epoch": 0.44, "grad_norm": 5.103817918761393, "learning_rate": 6.172716326705166e-06, "loss": 0.2983, "step": 15431 }, { "epoch": 0.44, "grad_norm": 5.251816072161767, "learning_rate": 6.172265499414006e-06, "loss": 0.4268, "step": 15432 }, { "epoch": 0.44, "grad_norm": 5.558490679838179, "learning_rate": 6.171814662037989e-06, "loss": 0.6743, "step": 15433 }, { "epoch": 0.44, "grad_norm": 5.014819594510925, "learning_rate": 6.171363814580995e-06, "loss": 0.2581, "step": 15434 }, { "epoch": 0.44, "grad_norm": 1.9769865644092002, "learning_rate": 6.170912957046902e-06, "loss": 0.1583, "step": 15435 }, { "epoch": 0.44, "grad_norm": 9.269663556196004, "learning_rate": 6.170462089439589e-06, "loss": 0.7874, "step": 15436 }, { "epoch": 0.44, "grad_norm": 7.360318327944238, "learning_rate": 6.170011211762935e-06, "loss": 0.9569, "step": 15437 }, { "epoch": 0.44, "grad_norm": 9.67025708792721, "learning_rate": 6.169560324020819e-06, "loss": 0.5419, "step": 15438 }, { "epoch": 0.44, "grad_norm": 4.708380731270926, "learning_rate": 6.169109426217119e-06, "loss": 0.6209, "step": 15439 }, { "epoch": 0.44, "grad_norm": 5.875486353770304, "learning_rate": 6.1686585183557144e-06, "loss": 0.4308, "step": 15440 }, { "epoch": 0.44, "grad_norm": 7.741480790265504, "learning_rate": 6.168207600440485e-06, "loss": 0.8937, "step": 15441 }, { "epoch": 0.44, "grad_norm": 11.184242669404789, "learning_rate": 6.16775667247531e-06, "loss": 0.4459, "step": 15442 }, { "epoch": 0.44, "grad_norm": 7.264126399549558, "learning_rate": 6.167305734464067e-06, "loss": 0.3884, "step": 15443 }, { "epoch": 0.44, "grad_norm": 4.6461622295653795, "learning_rate": 6.166854786410637e-06, "loss": 0.4268, "step": 15444 }, { "epoch": 0.44, "grad_norm": 4.395804123382182, "learning_rate": 6.1664038283188984e-06, "loss": 0.2732, "step": 15445 }, { "epoch": 0.44, "grad_norm": 6.1529105176688486, "learning_rate": 6.165952860192732e-06, "loss": 1.0341, "step": 15446 }, { "epoch": 0.44, "grad_norm": 2.7010597100901395, "learning_rate": 6.165501882036017e-06, "loss": 0.4, "step": 15447 }, { "epoch": 0.44, "grad_norm": 8.054958868034191, "learning_rate": 6.165050893852632e-06, "loss": 0.7026, "step": 15448 }, { "epoch": 0.44, "grad_norm": 4.529424760139043, "learning_rate": 6.1645998956464585e-06, "loss": 0.3715, "step": 15449 }, { "epoch": 0.44, "grad_norm": 8.009110270220821, "learning_rate": 6.164148887421375e-06, "loss": 0.5801, "step": 15450 }, { "epoch": 0.44, "grad_norm": 3.0570977499884786, "learning_rate": 6.163697869181261e-06, "loss": 0.2524, "step": 15451 }, { "epoch": 0.44, "grad_norm": 9.040806106773456, "learning_rate": 6.163246840929999e-06, "loss": 0.7587, "step": 15452 }, { "epoch": 0.44, "grad_norm": 4.029029500783709, "learning_rate": 6.162795802671466e-06, "loss": 0.2472, "step": 15453 }, { "epoch": 0.44, "grad_norm": 8.87153051284645, "learning_rate": 6.162344754409545e-06, "loss": 0.3492, "step": 15454 }, { "epoch": 0.44, "grad_norm": 6.237123309057934, "learning_rate": 6.161893696148116e-06, "loss": 0.8831, "step": 15455 }, { "epoch": 0.44, "grad_norm": 4.98158262475241, "learning_rate": 6.161442627891056e-06, "loss": 0.2363, "step": 15456 }, { "epoch": 0.44, "grad_norm": 8.575151828055672, "learning_rate": 6.16099154964225e-06, "loss": 1.2565, "step": 15457 }, { "epoch": 0.44, "grad_norm": 5.741515471908903, "learning_rate": 6.160540461405576e-06, "loss": 0.5107, "step": 15458 }, { "epoch": 0.44, "grad_norm": 2.9911403865632877, "learning_rate": 6.160089363184917e-06, "loss": 0.1797, "step": 15459 }, { "epoch": 0.44, "grad_norm": 7.243474325442503, "learning_rate": 6.15963825498415e-06, "loss": 0.5724, "step": 15460 }, { "epoch": 0.44, "grad_norm": 8.379398955684144, "learning_rate": 6.159187136807157e-06, "loss": 0.4145, "step": 15461 }, { "epoch": 0.44, "grad_norm": 10.370144454700357, "learning_rate": 6.158736008657823e-06, "loss": 0.5758, "step": 15462 }, { "epoch": 0.44, "grad_norm": 4.103232953640883, "learning_rate": 6.158284870540024e-06, "loss": 0.5478, "step": 15463 }, { "epoch": 0.44, "grad_norm": 9.374035900134308, "learning_rate": 6.1578337224576425e-06, "loss": 0.4122, "step": 15464 }, { "epoch": 0.44, "grad_norm": 7.6237705677891485, "learning_rate": 6.15738256441456e-06, "loss": 0.568, "step": 15465 }, { "epoch": 0.44, "grad_norm": 7.196371557637992, "learning_rate": 6.156931396414659e-06, "loss": 0.5586, "step": 15466 }, { "epoch": 0.44, "grad_norm": 4.996352820580323, "learning_rate": 6.15648021846182e-06, "loss": 0.3248, "step": 15467 }, { "epoch": 0.44, "grad_norm": 8.395815304215402, "learning_rate": 6.156029030559922e-06, "loss": 0.8724, "step": 15468 }, { "epoch": 0.44, "grad_norm": 8.416698301526536, "learning_rate": 6.155577832712849e-06, "loss": 0.635, "step": 15469 }, { "epoch": 0.44, "grad_norm": 6.720998556333596, "learning_rate": 6.155126624924484e-06, "loss": 0.6292, "step": 15470 }, { "epoch": 0.44, "grad_norm": 7.689243026006414, "learning_rate": 6.1546754071987046e-06, "loss": 0.7339, "step": 15471 }, { "epoch": 0.44, "grad_norm": 4.4773050860691, "learning_rate": 6.1542241795393965e-06, "loss": 0.4477, "step": 15472 }, { "epoch": 0.44, "grad_norm": 8.017499262134981, "learning_rate": 6.15377294195044e-06, "loss": 0.3113, "step": 15473 }, { "epoch": 0.44, "grad_norm": 6.958911776532012, "learning_rate": 6.153321694435716e-06, "loss": 0.7104, "step": 15474 }, { "epoch": 0.44, "grad_norm": 7.832179742104639, "learning_rate": 6.152870436999107e-06, "loss": 0.404, "step": 15475 }, { "epoch": 0.44, "grad_norm": 5.527597719430346, "learning_rate": 6.1524191696444965e-06, "loss": 0.576, "step": 15476 }, { "epoch": 0.44, "grad_norm": 6.362141319162385, "learning_rate": 6.1519678923757655e-06, "loss": 0.4037, "step": 15477 }, { "epoch": 0.44, "grad_norm": 7.079632764818122, "learning_rate": 6.151516605196796e-06, "loss": 0.5189, "step": 15478 }, { "epoch": 0.44, "grad_norm": 10.642739510183278, "learning_rate": 6.151065308111472e-06, "loss": 0.6697, "step": 15479 }, { "epoch": 0.44, "grad_norm": 8.742200372463154, "learning_rate": 6.150614001123675e-06, "loss": 0.7344, "step": 15480 }, { "epoch": 0.44, "grad_norm": 4.13374925175635, "learning_rate": 6.150162684237287e-06, "loss": 0.3078, "step": 15481 }, { "epoch": 0.44, "grad_norm": 3.19319997033816, "learning_rate": 6.149711357456191e-06, "loss": 0.2335, "step": 15482 }, { "epoch": 0.44, "grad_norm": 6.7713270584462455, "learning_rate": 6.149260020784269e-06, "loss": 0.4503, "step": 15483 }, { "epoch": 0.44, "grad_norm": 9.530648784749113, "learning_rate": 6.148808674225405e-06, "loss": 0.4663, "step": 15484 }, { "epoch": 0.44, "grad_norm": 3.453458364284038, "learning_rate": 6.148357317783482e-06, "loss": 0.3841, "step": 15485 }, { "epoch": 0.44, "grad_norm": 1.6838950044741923, "learning_rate": 6.147905951462382e-06, "loss": 0.1172, "step": 15486 }, { "epoch": 0.44, "grad_norm": 8.456657531797502, "learning_rate": 6.147454575265988e-06, "loss": 0.9207, "step": 15487 }, { "epoch": 0.44, "grad_norm": 7.214721810839299, "learning_rate": 6.147003189198183e-06, "loss": 0.3568, "step": 15488 }, { "epoch": 0.44, "grad_norm": 3.850106387712039, "learning_rate": 6.146551793262853e-06, "loss": 0.3751, "step": 15489 }, { "epoch": 0.44, "grad_norm": 5.93107546518387, "learning_rate": 6.146100387463878e-06, "loss": 0.3141, "step": 15490 }, { "epoch": 0.44, "grad_norm": 6.348735935821247, "learning_rate": 6.145648971805141e-06, "loss": 0.5036, "step": 15491 }, { "epoch": 0.44, "grad_norm": 4.5212078496029715, "learning_rate": 6.145197546290529e-06, "loss": 0.6058, "step": 15492 }, { "epoch": 0.44, "grad_norm": 7.8362832568362455, "learning_rate": 6.144746110923924e-06, "loss": 0.8112, "step": 15493 }, { "epoch": 0.44, "grad_norm": 6.2488727697470825, "learning_rate": 6.1442946657092065e-06, "loss": 0.8404, "step": 15494 }, { "epoch": 0.44, "grad_norm": 6.334147848489346, "learning_rate": 6.143843210650263e-06, "loss": 0.6921, "step": 15495 }, { "epoch": 0.44, "grad_norm": 4.676768008380512, "learning_rate": 6.14339174575098e-06, "loss": 0.6304, "step": 15496 }, { "epoch": 0.44, "grad_norm": 4.427574676474096, "learning_rate": 6.142940271015237e-06, "loss": 0.258, "step": 15497 }, { "epoch": 0.44, "grad_norm": 4.83014853827599, "learning_rate": 6.14248878644692e-06, "loss": 0.2272, "step": 15498 }, { "epoch": 0.44, "grad_norm": 4.617376178429945, "learning_rate": 6.142037292049913e-06, "loss": 0.3669, "step": 15499 }, { "epoch": 0.44, "grad_norm": 4.353607683333683, "learning_rate": 6.141585787828098e-06, "loss": 0.6036, "step": 15500 }, { "epoch": 0.44, "grad_norm": 4.95390968244428, "learning_rate": 6.141134273785362e-06, "loss": 0.4714, "step": 15501 }, { "epoch": 0.44, "grad_norm": 6.154457509067006, "learning_rate": 6.140682749925588e-06, "loss": 0.2912, "step": 15502 }, { "epoch": 0.44, "grad_norm": 7.317506494600668, "learning_rate": 6.140231216252661e-06, "loss": 0.6053, "step": 15503 }, { "epoch": 0.44, "grad_norm": 5.143666118619242, "learning_rate": 6.139779672770466e-06, "loss": 0.6864, "step": 15504 }, { "epoch": 0.44, "grad_norm": 3.706733120827422, "learning_rate": 6.139328119482886e-06, "loss": 0.2078, "step": 15505 }, { "epoch": 0.44, "grad_norm": 7.877401197776924, "learning_rate": 6.138876556393807e-06, "loss": 0.582, "step": 15506 }, { "epoch": 0.44, "grad_norm": 3.85512376534652, "learning_rate": 6.138424983507113e-06, "loss": 0.6796, "step": 15507 }, { "epoch": 0.44, "grad_norm": 5.390090606295926, "learning_rate": 6.1379734008266885e-06, "loss": 0.454, "step": 15508 }, { "epoch": 0.44, "grad_norm": 6.402493152190229, "learning_rate": 6.137521808356419e-06, "loss": 0.4745, "step": 15509 }, { "epoch": 0.44, "grad_norm": 9.576457390291635, "learning_rate": 6.137070206100191e-06, "loss": 0.6446, "step": 15510 }, { "epoch": 0.44, "grad_norm": 5.688166778380847, "learning_rate": 6.136618594061886e-06, "loss": 0.2814, "step": 15511 }, { "epoch": 0.44, "grad_norm": 5.483090459475011, "learning_rate": 6.136166972245393e-06, "loss": 0.3353, "step": 15512 }, { "epoch": 0.44, "grad_norm": 6.798097838094756, "learning_rate": 6.135715340654594e-06, "loss": 0.529, "step": 15513 }, { "epoch": 0.44, "grad_norm": 5.76386504979036, "learning_rate": 6.135263699293376e-06, "loss": 0.3603, "step": 15514 }, { "epoch": 0.44, "grad_norm": 3.2072663122308387, "learning_rate": 6.1348120481656235e-06, "loss": 0.3479, "step": 15515 }, { "epoch": 0.44, "grad_norm": 5.522579013091716, "learning_rate": 6.134360387275225e-06, "loss": 0.432, "step": 15516 }, { "epoch": 0.44, "grad_norm": 2.021358305459887, "learning_rate": 6.133908716626062e-06, "loss": 0.2849, "step": 15517 }, { "epoch": 0.44, "grad_norm": 7.486195321003608, "learning_rate": 6.133457036222021e-06, "loss": 0.5934, "step": 15518 }, { "epoch": 0.44, "grad_norm": 5.128161780895529, "learning_rate": 6.133005346066991e-06, "loss": 0.4494, "step": 15519 }, { "epoch": 0.44, "grad_norm": 9.801136799486876, "learning_rate": 6.132553646164854e-06, "loss": 0.4777, "step": 15520 }, { "epoch": 0.44, "grad_norm": 45.83989090333976, "learning_rate": 6.1321019365194975e-06, "loss": 0.3051, "step": 15521 }, { "epoch": 0.44, "grad_norm": 6.003864553749772, "learning_rate": 6.131650217134808e-06, "loss": 0.5625, "step": 15522 }, { "epoch": 0.44, "grad_norm": 4.600934359774755, "learning_rate": 6.131198488014672e-06, "loss": 0.4244, "step": 15523 }, { "epoch": 0.44, "grad_norm": 6.0097204782540095, "learning_rate": 6.130746749162972e-06, "loss": 0.3579, "step": 15524 }, { "epoch": 0.44, "grad_norm": 4.678236849561955, "learning_rate": 6.130295000583599e-06, "loss": 0.7594, "step": 15525 }, { "epoch": 0.44, "grad_norm": 8.104295848872663, "learning_rate": 6.129843242280437e-06, "loss": 0.6281, "step": 15526 }, { "epoch": 0.44, "grad_norm": 5.528333423021462, "learning_rate": 6.129391474257371e-06, "loss": 0.5779, "step": 15527 }, { "epoch": 0.44, "grad_norm": 6.060455370749156, "learning_rate": 6.128939696518291e-06, "loss": 0.2963, "step": 15528 }, { "epoch": 0.44, "grad_norm": 2.284145111957413, "learning_rate": 6.128487909067082e-06, "loss": 0.2608, "step": 15529 }, { "epoch": 0.44, "grad_norm": 7.1963873276852155, "learning_rate": 6.12803611190763e-06, "loss": 0.2291, "step": 15530 }, { "epoch": 0.44, "grad_norm": 6.421061294869083, "learning_rate": 6.127584305043822e-06, "loss": 0.3929, "step": 15531 }, { "epoch": 0.44, "grad_norm": 10.43940233266409, "learning_rate": 6.127132488479545e-06, "loss": 0.7964, "step": 15532 }, { "epoch": 0.44, "grad_norm": 4.667277852407622, "learning_rate": 6.126680662218687e-06, "loss": 0.4804, "step": 15533 }, { "epoch": 0.44, "grad_norm": 2.4550811961783756, "learning_rate": 6.126228826265132e-06, "loss": 0.2075, "step": 15534 }, { "epoch": 0.44, "grad_norm": 6.809792076667349, "learning_rate": 6.12577698062277e-06, "loss": 0.6852, "step": 15535 }, { "epoch": 0.44, "grad_norm": 5.641491514310952, "learning_rate": 6.1253251252954875e-06, "loss": 0.716, "step": 15536 }, { "epoch": 0.44, "grad_norm": 3.595945864994487, "learning_rate": 6.124873260287172e-06, "loss": 0.4663, "step": 15537 }, { "epoch": 0.44, "grad_norm": 10.336558420643106, "learning_rate": 6.124421385601709e-06, "loss": 0.8698, "step": 15538 }, { "epoch": 0.45, "grad_norm": 3.277587554192836, "learning_rate": 6.123969501242988e-06, "loss": 0.2281, "step": 15539 }, { "epoch": 0.45, "grad_norm": 14.704097346748416, "learning_rate": 6.123517607214896e-06, "loss": 0.6017, "step": 15540 }, { "epoch": 0.45, "grad_norm": 7.139511188476139, "learning_rate": 6.1230657035213205e-06, "loss": 0.965, "step": 15541 }, { "epoch": 0.45, "grad_norm": 11.076808835117024, "learning_rate": 6.122613790166148e-06, "loss": 0.3505, "step": 15542 }, { "epoch": 0.45, "grad_norm": 3.599590431863137, "learning_rate": 6.1221618671532675e-06, "loss": 0.3902, "step": 15543 }, { "epoch": 0.45, "grad_norm": 6.4866828764013444, "learning_rate": 6.121709934486567e-06, "loss": 0.3723, "step": 15544 }, { "epoch": 0.45, "grad_norm": 7.259993013110622, "learning_rate": 6.1212579921699345e-06, "loss": 0.5252, "step": 15545 }, { "epoch": 0.45, "grad_norm": 5.671246033991468, "learning_rate": 6.120806040207258e-06, "loss": 0.364, "step": 15546 }, { "epoch": 0.45, "grad_norm": 12.54083479005466, "learning_rate": 6.120354078602424e-06, "loss": 0.6272, "step": 15547 }, { "epoch": 0.45, "grad_norm": 6.090388461472166, "learning_rate": 6.119902107359321e-06, "loss": 0.4925, "step": 15548 }, { "epoch": 0.45, "grad_norm": 7.291969038733847, "learning_rate": 6.11945012648184e-06, "loss": 0.5863, "step": 15549 }, { "epoch": 0.45, "grad_norm": 4.835192892651033, "learning_rate": 6.118998135973867e-06, "loss": 0.4878, "step": 15550 }, { "epoch": 0.45, "grad_norm": 4.565813037464929, "learning_rate": 6.118546135839289e-06, "loss": 0.6703, "step": 15551 }, { "epoch": 0.45, "grad_norm": 5.4931099823726806, "learning_rate": 6.118094126081997e-06, "loss": 0.8481, "step": 15552 }, { "epoch": 0.45, "grad_norm": 5.481605787050307, "learning_rate": 6.117642106705881e-06, "loss": 0.3263, "step": 15553 }, { "epoch": 0.45, "grad_norm": 6.480725817327062, "learning_rate": 6.1171900777148255e-06, "loss": 0.784, "step": 15554 }, { "epoch": 0.45, "grad_norm": 4.522077207651081, "learning_rate": 6.116738039112722e-06, "loss": 0.4575, "step": 15555 }, { "epoch": 0.45, "grad_norm": 9.249028953926794, "learning_rate": 6.1162859909034585e-06, "loss": 0.5864, "step": 15556 }, { "epoch": 0.45, "grad_norm": 5.626076256256574, "learning_rate": 6.115833933090924e-06, "loss": 0.5851, "step": 15557 }, { "epoch": 0.45, "grad_norm": 3.0851869080740415, "learning_rate": 6.1153818656790076e-06, "loss": 0.3277, "step": 15558 }, { "epoch": 0.45, "grad_norm": 6.832015476459422, "learning_rate": 6.114929788671599e-06, "loss": 0.7355, "step": 15559 }, { "epoch": 0.45, "grad_norm": 5.478031282553569, "learning_rate": 6.114477702072585e-06, "loss": 0.4395, "step": 15560 }, { "epoch": 0.45, "grad_norm": 3.3053695479897205, "learning_rate": 6.114025605885858e-06, "loss": 0.3152, "step": 15561 }, { "epoch": 0.45, "grad_norm": 7.193907875032184, "learning_rate": 6.113573500115305e-06, "loss": 0.4241, "step": 15562 }, { "epoch": 0.45, "grad_norm": 4.479834727065691, "learning_rate": 6.113121384764818e-06, "loss": 0.6441, "step": 15563 }, { "epoch": 0.45, "grad_norm": 9.145266927386906, "learning_rate": 6.1126692598382834e-06, "loss": 0.9933, "step": 15564 }, { "epoch": 0.45, "grad_norm": 4.744533304270251, "learning_rate": 6.112217125339592e-06, "loss": 0.5728, "step": 15565 }, { "epoch": 0.45, "grad_norm": 3.7875195065238385, "learning_rate": 6.111764981272634e-06, "loss": 0.5065, "step": 15566 }, { "epoch": 0.45, "grad_norm": 10.983062882824896, "learning_rate": 6.111312827641299e-06, "loss": 0.532, "step": 15567 }, { "epoch": 0.45, "grad_norm": 4.085270931219455, "learning_rate": 6.110860664449476e-06, "loss": 0.3365, "step": 15568 }, { "epoch": 0.45, "grad_norm": 4.507300600707608, "learning_rate": 6.1104084917010565e-06, "loss": 0.5819, "step": 15569 }, { "epoch": 0.45, "grad_norm": 5.398326690464029, "learning_rate": 6.109956309399928e-06, "loss": 0.4858, "step": 15570 }, { "epoch": 0.45, "grad_norm": 4.972492799672187, "learning_rate": 6.109504117549983e-06, "loss": 0.4172, "step": 15571 }, { "epoch": 0.45, "grad_norm": 8.585213155971823, "learning_rate": 6.10905191615511e-06, "loss": 0.4465, "step": 15572 }, { "epoch": 0.45, "grad_norm": 5.914323889172797, "learning_rate": 6.1085997052192015e-06, "loss": 0.9003, "step": 15573 }, { "epoch": 0.45, "grad_norm": 4.018163657833897, "learning_rate": 6.1081474847461444e-06, "loss": 0.2879, "step": 15574 }, { "epoch": 0.45, "grad_norm": 5.342389340644775, "learning_rate": 6.1076952547398315e-06, "loss": 0.6889, "step": 15575 }, { "epoch": 0.45, "grad_norm": 8.225136657785425, "learning_rate": 6.107243015204153e-06, "loss": 0.6768, "step": 15576 }, { "epoch": 0.45, "grad_norm": 10.79150239286744, "learning_rate": 6.106790766142998e-06, "loss": 0.7551, "step": 15577 }, { "epoch": 0.45, "grad_norm": 3.5672311821099765, "learning_rate": 6.10633850756026e-06, "loss": 0.2746, "step": 15578 }, { "epoch": 0.45, "grad_norm": 3.917509559809566, "learning_rate": 6.105886239459826e-06, "loss": 0.2191, "step": 15579 }, { "epoch": 0.45, "grad_norm": 4.622322183658053, "learning_rate": 6.105433961845591e-06, "loss": 0.2795, "step": 15580 }, { "epoch": 0.45, "grad_norm": 3.9863809360592604, "learning_rate": 6.1049816747214444e-06, "loss": 0.3937, "step": 15581 }, { "epoch": 0.45, "grad_norm": 6.875775102790969, "learning_rate": 6.104529378091274e-06, "loss": 0.4056, "step": 15582 }, { "epoch": 0.45, "grad_norm": 7.2835339087213296, "learning_rate": 6.104077071958975e-06, "loss": 0.6643, "step": 15583 }, { "epoch": 0.45, "grad_norm": 8.582987423441905, "learning_rate": 6.103624756328434e-06, "loss": 0.4541, "step": 15584 }, { "epoch": 0.45, "grad_norm": 1.688670035331901, "learning_rate": 6.103172431203548e-06, "loss": 0.194, "step": 15585 }, { "epoch": 0.45, "grad_norm": 5.235398240922877, "learning_rate": 6.102720096588206e-06, "loss": 0.7052, "step": 15586 }, { "epoch": 0.45, "grad_norm": 7.260010812399303, "learning_rate": 6.102267752486298e-06, "loss": 0.725, "step": 15587 }, { "epoch": 0.45, "grad_norm": 9.860406449860234, "learning_rate": 6.101815398901715e-06, "loss": 0.5242, "step": 15588 }, { "epoch": 0.45, "grad_norm": 6.295816072162198, "learning_rate": 6.1013630358383504e-06, "loss": 0.4652, "step": 15589 }, { "epoch": 0.45, "grad_norm": 5.299648582756031, "learning_rate": 6.100910663300096e-06, "loss": 0.3953, "step": 15590 }, { "epoch": 0.45, "grad_norm": 5.480874272934504, "learning_rate": 6.100458281290842e-06, "loss": 0.5103, "step": 15591 }, { "epoch": 0.45, "grad_norm": 1.8047552219911913, "learning_rate": 6.1000058898144795e-06, "loss": 0.1285, "step": 15592 }, { "epoch": 0.45, "grad_norm": 4.560161566130778, "learning_rate": 6.099553488874904e-06, "loss": 0.3556, "step": 15593 }, { "epoch": 0.45, "grad_norm": 4.720462412570273, "learning_rate": 6.099101078476004e-06, "loss": 0.5027, "step": 15594 }, { "epoch": 0.45, "grad_norm": 4.160619274752624, "learning_rate": 6.098648658621673e-06, "loss": 0.2104, "step": 15595 }, { "epoch": 0.45, "grad_norm": 6.467278170266848, "learning_rate": 6.098196229315804e-06, "loss": 0.9598, "step": 15596 }, { "epoch": 0.45, "grad_norm": 3.613175278217968, "learning_rate": 6.0977437905622875e-06, "loss": 0.199, "step": 15597 }, { "epoch": 0.45, "grad_norm": 3.353872752294483, "learning_rate": 6.097291342365016e-06, "loss": 0.4785, "step": 15598 }, { "epoch": 0.45, "grad_norm": 8.636458484001068, "learning_rate": 6.0968388847278815e-06, "loss": 0.6475, "step": 15599 }, { "epoch": 0.45, "grad_norm": 4.276695724937648, "learning_rate": 6.096386417654777e-06, "loss": 0.4129, "step": 15600 }, { "epoch": 0.45, "grad_norm": 8.902759604221414, "learning_rate": 6.095933941149597e-06, "loss": 0.7084, "step": 15601 }, { "epoch": 0.45, "grad_norm": 8.369119373058636, "learning_rate": 6.09548145521623e-06, "loss": 0.6685, "step": 15602 }, { "epoch": 0.45, "grad_norm": 3.1051361097766144, "learning_rate": 6.0950289598585736e-06, "loss": 0.2455, "step": 15603 }, { "epoch": 0.45, "grad_norm": 3.9429301103361856, "learning_rate": 6.0945764550805154e-06, "loss": 0.3993, "step": 15604 }, { "epoch": 0.45, "grad_norm": 4.518729650681132, "learning_rate": 6.094123940885952e-06, "loss": 0.4235, "step": 15605 }, { "epoch": 0.45, "grad_norm": 7.334819801993091, "learning_rate": 6.0936714172787735e-06, "loss": 0.6575, "step": 15606 }, { "epoch": 0.45, "grad_norm": 9.276651289946898, "learning_rate": 6.093218884262877e-06, "loss": 0.8618, "step": 15607 }, { "epoch": 0.45, "grad_norm": 8.528564253943484, "learning_rate": 6.092766341842152e-06, "loss": 0.5293, "step": 15608 }, { "epoch": 0.45, "grad_norm": 4.3080549136622714, "learning_rate": 6.092313790020491e-06, "loss": 0.298, "step": 15609 }, { "epoch": 0.45, "grad_norm": 6.907737192063984, "learning_rate": 6.091861228801791e-06, "loss": 0.6612, "step": 15610 }, { "epoch": 0.45, "grad_norm": 4.969323095224023, "learning_rate": 6.091408658189943e-06, "loss": 0.4734, "step": 15611 }, { "epoch": 0.45, "grad_norm": 5.011514851324049, "learning_rate": 6.09095607818884e-06, "loss": 0.2983, "step": 15612 }, { "epoch": 0.45, "grad_norm": 5.8220141489924355, "learning_rate": 6.090503488802378e-06, "loss": 0.3642, "step": 15613 }, { "epoch": 0.45, "grad_norm": 6.319252293032719, "learning_rate": 6.090050890034445e-06, "loss": 0.5454, "step": 15614 }, { "epoch": 0.45, "grad_norm": 8.32860004150927, "learning_rate": 6.089598281888941e-06, "loss": 0.6218, "step": 15615 }, { "epoch": 0.45, "grad_norm": 7.6191660966914325, "learning_rate": 6.0891456643697565e-06, "loss": 0.8285, "step": 15616 }, { "epoch": 0.45, "grad_norm": 6.051591162659255, "learning_rate": 6.088693037480786e-06, "loss": 0.3636, "step": 15617 }, { "epoch": 0.45, "grad_norm": 4.722248927520835, "learning_rate": 6.088240401225925e-06, "loss": 0.7227, "step": 15618 }, { "epoch": 0.45, "grad_norm": 5.455429814273626, "learning_rate": 6.0877877556090626e-06, "loss": 0.7791, "step": 15619 }, { "epoch": 0.45, "grad_norm": 7.046581659745623, "learning_rate": 6.0873351006341e-06, "loss": 0.5603, "step": 15620 }, { "epoch": 0.45, "grad_norm": 7.6645064696840155, "learning_rate": 6.086882436304924e-06, "loss": 0.6474, "step": 15621 }, { "epoch": 0.45, "grad_norm": 6.004673091683533, "learning_rate": 6.086429762625434e-06, "loss": 0.536, "step": 15622 }, { "epoch": 0.45, "grad_norm": 5.0229731177511185, "learning_rate": 6.085977079599521e-06, "loss": 0.4199, "step": 15623 }, { "epoch": 0.45, "grad_norm": 3.9368379732683874, "learning_rate": 6.085524387231083e-06, "loss": 0.5041, "step": 15624 }, { "epoch": 0.45, "grad_norm": 4.308612481056647, "learning_rate": 6.08507168552401e-06, "loss": 0.4375, "step": 15625 }, { "epoch": 0.45, "grad_norm": 4.21515519314617, "learning_rate": 6.0846189744822e-06, "loss": 0.4626, "step": 15626 }, { "epoch": 0.45, "grad_norm": 3.777372105678415, "learning_rate": 6.084166254109548e-06, "loss": 0.4503, "step": 15627 }, { "epoch": 0.45, "grad_norm": 6.733999857001534, "learning_rate": 6.0837135244099455e-06, "loss": 0.9059, "step": 15628 }, { "epoch": 0.45, "grad_norm": 4.88753337458462, "learning_rate": 6.083260785387289e-06, "loss": 0.6411, "step": 15629 }, { "epoch": 0.45, "grad_norm": 9.885056843186577, "learning_rate": 6.0828080370454745e-06, "loss": 0.9304, "step": 15630 }, { "epoch": 0.45, "grad_norm": 3.2125412400266664, "learning_rate": 6.082355279388395e-06, "loss": 0.2305, "step": 15631 }, { "epoch": 0.45, "grad_norm": 7.764296788430711, "learning_rate": 6.081902512419946e-06, "loss": 0.5834, "step": 15632 }, { "epoch": 0.45, "grad_norm": 6.986801647576181, "learning_rate": 6.081449736144023e-06, "loss": 0.7243, "step": 15633 }, { "epoch": 0.45, "grad_norm": 5.785875192049693, "learning_rate": 6.08099695056452e-06, "loss": 0.3516, "step": 15634 }, { "epoch": 0.45, "grad_norm": 5.760455866629119, "learning_rate": 6.080544155685335e-06, "loss": 0.7402, "step": 15635 }, { "epoch": 0.45, "grad_norm": 6.935987806041079, "learning_rate": 6.080091351510361e-06, "loss": 0.5752, "step": 15636 }, { "epoch": 0.45, "grad_norm": 5.9893627844853805, "learning_rate": 6.079638538043495e-06, "loss": 0.5183, "step": 15637 }, { "epoch": 0.45, "grad_norm": 5.7802108243092585, "learning_rate": 6.079185715288632e-06, "loss": 0.5053, "step": 15638 }, { "epoch": 0.45, "grad_norm": 6.9405115825071935, "learning_rate": 6.078732883249665e-06, "loss": 0.5456, "step": 15639 }, { "epoch": 0.45, "grad_norm": 4.323267445254601, "learning_rate": 6.078280041930492e-06, "loss": 0.4141, "step": 15640 }, { "epoch": 0.45, "grad_norm": 5.5375830822776315, "learning_rate": 6.0778271913350106e-06, "loss": 0.6588, "step": 15641 }, { "epoch": 0.45, "grad_norm": 3.9646813570788177, "learning_rate": 6.077374331467112e-06, "loss": 0.2933, "step": 15642 }, { "epoch": 0.45, "grad_norm": 8.608847323272297, "learning_rate": 6.076921462330697e-06, "loss": 0.7278, "step": 15643 }, { "epoch": 0.45, "grad_norm": 3.124159413771318, "learning_rate": 6.076468583929659e-06, "loss": 0.4247, "step": 15644 }, { "epoch": 0.45, "grad_norm": 6.465057203566396, "learning_rate": 6.076015696267893e-06, "loss": 0.692, "step": 15645 }, { "epoch": 0.45, "grad_norm": 10.447485253405107, "learning_rate": 6.075562799349297e-06, "loss": 0.333, "step": 15646 }, { "epoch": 0.45, "grad_norm": 7.853552813703995, "learning_rate": 6.0751098931777675e-06, "loss": 0.8619, "step": 15647 }, { "epoch": 0.45, "grad_norm": 8.837786385780298, "learning_rate": 6.074656977757198e-06, "loss": 0.4411, "step": 15648 }, { "epoch": 0.45, "grad_norm": 7.3934141384274685, "learning_rate": 6.074204053091489e-06, "loss": 0.4584, "step": 15649 }, { "epoch": 0.45, "grad_norm": 5.288393144498738, "learning_rate": 6.073751119184533e-06, "loss": 0.435, "step": 15650 }, { "epoch": 0.45, "grad_norm": 6.356439048633897, "learning_rate": 6.07329817604023e-06, "loss": 0.5536, "step": 15651 }, { "epoch": 0.45, "grad_norm": 9.23447321340935, "learning_rate": 6.072845223662474e-06, "loss": 0.6194, "step": 15652 }, { "epoch": 0.45, "grad_norm": 6.071685812413224, "learning_rate": 6.072392262055162e-06, "loss": 0.5728, "step": 15653 }, { "epoch": 0.45, "grad_norm": 4.650961618564054, "learning_rate": 6.0719392912221934e-06, "loss": 0.4513, "step": 15654 }, { "epoch": 0.45, "grad_norm": 37.42142471329268, "learning_rate": 6.07148631116746e-06, "loss": 0.8607, "step": 15655 }, { "epoch": 0.45, "grad_norm": 3.5033309277079483, "learning_rate": 6.071033321894865e-06, "loss": 0.252, "step": 15656 }, { "epoch": 0.45, "grad_norm": 3.320759354983484, "learning_rate": 6.070580323408302e-06, "loss": 0.2006, "step": 15657 }, { "epoch": 0.45, "grad_norm": 5.873118687791021, "learning_rate": 6.070127315711665e-06, "loss": 0.466, "step": 15658 }, { "epoch": 0.45, "grad_norm": 3.251317637442875, "learning_rate": 6.0696742988088566e-06, "loss": 0.413, "step": 15659 }, { "epoch": 0.45, "grad_norm": 5.177472338526579, "learning_rate": 6.069221272703772e-06, "loss": 0.6087, "step": 15660 }, { "epoch": 0.45, "grad_norm": 8.056642637310354, "learning_rate": 6.068768237400309e-06, "loss": 0.9125, "step": 15661 }, { "epoch": 0.45, "grad_norm": 2.1695186539072417, "learning_rate": 6.068315192902363e-06, "loss": 0.2316, "step": 15662 }, { "epoch": 0.45, "grad_norm": 8.064655222893025, "learning_rate": 6.067862139213833e-06, "loss": 0.6419, "step": 15663 }, { "epoch": 0.45, "grad_norm": 6.477761446388963, "learning_rate": 6.067409076338618e-06, "loss": 0.6592, "step": 15664 }, { "epoch": 0.45, "grad_norm": 5.371564654428022, "learning_rate": 6.066956004280613e-06, "loss": 0.5865, "step": 15665 }, { "epoch": 0.45, "grad_norm": 14.912626617994063, "learning_rate": 6.066502923043717e-06, "loss": 0.9, "step": 15666 }, { "epoch": 0.45, "grad_norm": 8.348361737469574, "learning_rate": 6.0660498326318285e-06, "loss": 0.9057, "step": 15667 }, { "epoch": 0.45, "grad_norm": 4.2072560029841295, "learning_rate": 6.065596733048843e-06, "loss": 0.3465, "step": 15668 }, { "epoch": 0.45, "grad_norm": 3.807576361508494, "learning_rate": 6.065143624298662e-06, "loss": 0.3936, "step": 15669 }, { "epoch": 0.45, "grad_norm": 2.6777362002666325, "learning_rate": 6.064690506385179e-06, "loss": 0.3262, "step": 15670 }, { "epoch": 0.45, "grad_norm": 7.443333365746017, "learning_rate": 6.064237379312297e-06, "loss": 0.6895, "step": 15671 }, { "epoch": 0.45, "grad_norm": 11.22127068979065, "learning_rate": 6.063784243083911e-06, "loss": 0.4951, "step": 15672 }, { "epoch": 0.45, "grad_norm": 7.129353180553709, "learning_rate": 6.06333109770392e-06, "loss": 0.6384, "step": 15673 }, { "epoch": 0.45, "grad_norm": 3.8532602000281253, "learning_rate": 6.062877943176222e-06, "loss": 0.5506, "step": 15674 }, { "epoch": 0.45, "grad_norm": 6.250917443650101, "learning_rate": 6.062424779504716e-06, "loss": 0.6483, "step": 15675 }, { "epoch": 0.45, "grad_norm": 5.79559262633088, "learning_rate": 6.061971606693301e-06, "loss": 0.6873, "step": 15676 }, { "epoch": 0.45, "grad_norm": 8.081366532408238, "learning_rate": 6.0615184247458754e-06, "loss": 0.8931, "step": 15677 }, { "epoch": 0.45, "grad_norm": 5.817985981116218, "learning_rate": 6.061065233666338e-06, "loss": 0.3877, "step": 15678 }, { "epoch": 0.45, "grad_norm": 3.8525585101546924, "learning_rate": 6.060612033458585e-06, "loss": 0.2545, "step": 15679 }, { "epoch": 0.45, "grad_norm": 5.8361298147683875, "learning_rate": 6.0601588241265185e-06, "loss": 0.3554, "step": 15680 }, { "epoch": 0.45, "grad_norm": 5.088573329466512, "learning_rate": 6.059705605674036e-06, "loss": 0.6587, "step": 15681 }, { "epoch": 0.45, "grad_norm": 8.29811552439806, "learning_rate": 6.059252378105038e-06, "loss": 0.8142, "step": 15682 }, { "epoch": 0.45, "grad_norm": 3.2592360133147262, "learning_rate": 6.0587991414234196e-06, "loss": 0.3912, "step": 15683 }, { "epoch": 0.45, "grad_norm": 6.887487964654228, "learning_rate": 6.0583458956330845e-06, "loss": 0.4686, "step": 15684 }, { "epoch": 0.45, "grad_norm": 6.237277089265896, "learning_rate": 6.057892640737928e-06, "loss": 0.3679, "step": 15685 }, { "epoch": 0.45, "grad_norm": 5.880535115525975, "learning_rate": 6.057439376741853e-06, "loss": 0.6892, "step": 15686 }, { "epoch": 0.45, "grad_norm": 6.97120171770815, "learning_rate": 6.056986103648758e-06, "loss": 0.4659, "step": 15687 }, { "epoch": 0.45, "grad_norm": 7.442531390497131, "learning_rate": 6.05653282146254e-06, "loss": 0.4411, "step": 15688 }, { "epoch": 0.45, "grad_norm": 2.833336491209028, "learning_rate": 6.0560795301871e-06, "loss": 0.492, "step": 15689 }, { "epoch": 0.45, "grad_norm": 3.6052382241528877, "learning_rate": 6.055626229826339e-06, "loss": 0.3655, "step": 15690 }, { "epoch": 0.45, "grad_norm": 8.825948801448407, "learning_rate": 6.055172920384155e-06, "loss": 0.4232, "step": 15691 }, { "epoch": 0.45, "grad_norm": 6.002934850712559, "learning_rate": 6.054719601864448e-06, "loss": 0.4859, "step": 15692 }, { "epoch": 0.45, "grad_norm": 6.858207168747383, "learning_rate": 6.054266274271119e-06, "loss": 0.6606, "step": 15693 }, { "epoch": 0.45, "grad_norm": 5.068151874741478, "learning_rate": 6.0538129376080665e-06, "loss": 0.6009, "step": 15694 }, { "epoch": 0.45, "grad_norm": 2.3696017590957816, "learning_rate": 6.053359591879191e-06, "loss": 0.3028, "step": 15695 }, { "epoch": 0.45, "grad_norm": 5.215030075344947, "learning_rate": 6.052906237088393e-06, "loss": 0.3203, "step": 15696 }, { "epoch": 0.45, "grad_norm": 6.21029003295305, "learning_rate": 6.052452873239572e-06, "loss": 0.4454, "step": 15697 }, { "epoch": 0.45, "grad_norm": 3.495568569525181, "learning_rate": 6.05199950033663e-06, "loss": 0.5596, "step": 15698 }, { "epoch": 0.45, "grad_norm": 4.701202354443286, "learning_rate": 6.051546118383462e-06, "loss": 0.5636, "step": 15699 }, { "epoch": 0.45, "grad_norm": 6.015910273141525, "learning_rate": 6.051092727383976e-06, "loss": 0.5931, "step": 15700 }, { "epoch": 0.45, "grad_norm": 4.611358810510131, "learning_rate": 6.050639327342067e-06, "loss": 0.6918, "step": 15701 }, { "epoch": 0.45, "grad_norm": 6.966076119807909, "learning_rate": 6.0501859182616375e-06, "loss": 0.3667, "step": 15702 }, { "epoch": 0.45, "grad_norm": 6.54153752857556, "learning_rate": 6.049732500146587e-06, "loss": 0.3647, "step": 15703 }, { "epoch": 0.45, "grad_norm": 2.4134904621956066, "learning_rate": 6.049279073000819e-06, "loss": 0.3468, "step": 15704 }, { "epoch": 0.45, "grad_norm": 5.571386493228779, "learning_rate": 6.04882563682823e-06, "loss": 0.3388, "step": 15705 }, { "epoch": 0.45, "grad_norm": 5.632771908511965, "learning_rate": 6.048372191632725e-06, "loss": 0.4562, "step": 15706 }, { "epoch": 0.45, "grad_norm": 8.171417281860418, "learning_rate": 6.047918737418203e-06, "loss": 0.6882, "step": 15707 }, { "epoch": 0.45, "grad_norm": 2.8478882306587803, "learning_rate": 6.047465274188564e-06, "loss": 0.3935, "step": 15708 }, { "epoch": 0.45, "grad_norm": 5.067959396661813, "learning_rate": 6.047011801947711e-06, "loss": 0.4265, "step": 15709 }, { "epoch": 0.45, "grad_norm": 4.283684810858154, "learning_rate": 6.046558320699544e-06, "loss": 0.6883, "step": 15710 }, { "epoch": 0.45, "grad_norm": 2.9499616280985808, "learning_rate": 6.046104830447966e-06, "loss": 0.1631, "step": 15711 }, { "epoch": 0.45, "grad_norm": 3.882499160250679, "learning_rate": 6.045651331196874e-06, "loss": 0.4657, "step": 15712 }, { "epoch": 0.45, "grad_norm": 4.477558950465305, "learning_rate": 6.045197822950175e-06, "loss": 0.2131, "step": 15713 }, { "epoch": 0.45, "grad_norm": 3.9616667548310547, "learning_rate": 6.044744305711767e-06, "loss": 0.4819, "step": 15714 }, { "epoch": 0.45, "grad_norm": 6.285347432417034, "learning_rate": 6.044290779485554e-06, "loss": 0.5538, "step": 15715 }, { "epoch": 0.45, "grad_norm": 3.9951702404392675, "learning_rate": 6.043837244275432e-06, "loss": 0.456, "step": 15716 }, { "epoch": 0.45, "grad_norm": 4.794679592871684, "learning_rate": 6.04338370008531e-06, "loss": 0.464, "step": 15717 }, { "epoch": 0.45, "grad_norm": 6.880660345191345, "learning_rate": 6.042930146919085e-06, "loss": 0.629, "step": 15718 }, { "epoch": 0.45, "grad_norm": 5.650581470039561, "learning_rate": 6.042476584780661e-06, "loss": 0.6358, "step": 15719 }, { "epoch": 0.45, "grad_norm": 2.9505216032309454, "learning_rate": 6.042023013673939e-06, "loss": 0.523, "step": 15720 }, { "epoch": 0.45, "grad_norm": 6.089756405285364, "learning_rate": 6.041569433602822e-06, "loss": 0.7495, "step": 15721 }, { "epoch": 0.45, "grad_norm": 8.828512969817721, "learning_rate": 6.04111584457121e-06, "loss": 0.697, "step": 15722 }, { "epoch": 0.45, "grad_norm": 17.14617902268972, "learning_rate": 6.040662246583008e-06, "loss": 0.8406, "step": 15723 }, { "epoch": 0.45, "grad_norm": 10.739497710120052, "learning_rate": 6.040208639642115e-06, "loss": 0.5486, "step": 15724 }, { "epoch": 0.45, "grad_norm": 4.730959777799737, "learning_rate": 6.039755023752437e-06, "loss": 0.3962, "step": 15725 }, { "epoch": 0.45, "grad_norm": 7.192009382012668, "learning_rate": 6.039301398917874e-06, "loss": 1.0179, "step": 15726 }, { "epoch": 0.45, "grad_norm": 4.344081070173332, "learning_rate": 6.038847765142329e-06, "loss": 0.3891, "step": 15727 }, { "epoch": 0.45, "grad_norm": 6.87996667576637, "learning_rate": 6.038394122429705e-06, "loss": 0.1703, "step": 15728 }, { "epoch": 0.45, "grad_norm": 7.063130983505908, "learning_rate": 6.037940470783904e-06, "loss": 0.3569, "step": 15729 }, { "epoch": 0.45, "grad_norm": 7.228157156193672, "learning_rate": 6.037486810208828e-06, "loss": 0.3609, "step": 15730 }, { "epoch": 0.45, "grad_norm": 5.775532632680912, "learning_rate": 6.0370331407083825e-06, "loss": 0.2744, "step": 15731 }, { "epoch": 0.45, "grad_norm": 5.424038546048261, "learning_rate": 6.036579462286466e-06, "loss": 0.4224, "step": 15732 }, { "epoch": 0.45, "grad_norm": 4.5126598483064315, "learning_rate": 6.036125774946986e-06, "loss": 0.18, "step": 15733 }, { "epoch": 0.45, "grad_norm": 2.877108277735948, "learning_rate": 6.035672078693844e-06, "loss": 0.1714, "step": 15734 }, { "epoch": 0.45, "grad_norm": 5.802163151500766, "learning_rate": 6.035218373530941e-06, "loss": 0.6125, "step": 15735 }, { "epoch": 0.45, "grad_norm": 6.193168163204968, "learning_rate": 6.034764659462182e-06, "loss": 0.5786, "step": 15736 }, { "epoch": 0.45, "grad_norm": 6.6835388145663845, "learning_rate": 6.034310936491471e-06, "loss": 0.5083, "step": 15737 }, { "epoch": 0.45, "grad_norm": 4.8451550322491785, "learning_rate": 6.033857204622711e-06, "loss": 0.3828, "step": 15738 }, { "epoch": 0.45, "grad_norm": 5.836018389405528, "learning_rate": 6.033403463859803e-06, "loss": 0.4794, "step": 15739 }, { "epoch": 0.45, "grad_norm": 2.184637819832161, "learning_rate": 6.0329497142066515e-06, "loss": 0.177, "step": 15740 }, { "epoch": 0.45, "grad_norm": 8.051700189776376, "learning_rate": 6.032495955667162e-06, "loss": 0.8323, "step": 15741 }, { "epoch": 0.45, "grad_norm": 5.284694518948077, "learning_rate": 6.032042188245237e-06, "loss": 0.5092, "step": 15742 }, { "epoch": 0.45, "grad_norm": 5.505711580899824, "learning_rate": 6.031588411944781e-06, "loss": 0.5225, "step": 15743 }, { "epoch": 0.45, "grad_norm": 8.050844091385777, "learning_rate": 6.031134626769697e-06, "loss": 0.7701, "step": 15744 }, { "epoch": 0.45, "grad_norm": 4.788243134991549, "learning_rate": 6.0306808327238875e-06, "loss": 0.4202, "step": 15745 }, { "epoch": 0.45, "grad_norm": 6.065018592799684, "learning_rate": 6.030227029811258e-06, "loss": 0.9747, "step": 15746 }, { "epoch": 0.45, "grad_norm": 5.441926272724556, "learning_rate": 6.029773218035711e-06, "loss": 0.4325, "step": 15747 }, { "epoch": 0.45, "grad_norm": 5.0104541208793165, "learning_rate": 6.029319397401153e-06, "loss": 0.4337, "step": 15748 }, { "epoch": 0.45, "grad_norm": 5.776758707277757, "learning_rate": 6.028865567911487e-06, "loss": 0.1854, "step": 15749 }, { "epoch": 0.45, "grad_norm": 2.599417641416759, "learning_rate": 6.0284117295706176e-06, "loss": 0.0589, "step": 15750 }, { "epoch": 0.45, "grad_norm": 5.088035795223831, "learning_rate": 6.027957882382449e-06, "loss": 0.3141, "step": 15751 }, { "epoch": 0.45, "grad_norm": 7.427132665640633, "learning_rate": 6.0275040263508846e-06, "loss": 0.51, "step": 15752 }, { "epoch": 0.45, "grad_norm": 7.009437092043879, "learning_rate": 6.027050161479829e-06, "loss": 0.5307, "step": 15753 }, { "epoch": 0.45, "grad_norm": 4.709748100582898, "learning_rate": 6.0265962877731876e-06, "loss": 0.3542, "step": 15754 }, { "epoch": 0.45, "grad_norm": 4.37902224792871, "learning_rate": 6.026142405234866e-06, "loss": 0.2419, "step": 15755 }, { "epoch": 0.45, "grad_norm": 5.673240812438511, "learning_rate": 6.025688513868767e-06, "loss": 0.5233, "step": 15756 }, { "epoch": 0.45, "grad_norm": 7.672886612773131, "learning_rate": 6.025234613678794e-06, "loss": 0.249, "step": 15757 }, { "epoch": 0.45, "grad_norm": 7.966261052402771, "learning_rate": 6.024780704668857e-06, "loss": 0.4551, "step": 15758 }, { "epoch": 0.45, "grad_norm": 6.189731003026831, "learning_rate": 6.024326786842856e-06, "loss": 0.4985, "step": 15759 }, { "epoch": 0.45, "grad_norm": 6.0313946009877935, "learning_rate": 6.0238728602046985e-06, "loss": 0.6281, "step": 15760 }, { "epoch": 0.45, "grad_norm": 7.239498820371892, "learning_rate": 6.023418924758289e-06, "loss": 0.7488, "step": 15761 }, { "epoch": 0.45, "grad_norm": 7.3897143845345985, "learning_rate": 6.0229649805075305e-06, "loss": 0.6543, "step": 15762 }, { "epoch": 0.45, "grad_norm": 6.26527658763797, "learning_rate": 6.022511027456333e-06, "loss": 0.5077, "step": 15763 }, { "epoch": 0.45, "grad_norm": 3.409805035065659, "learning_rate": 6.0220570656085965e-06, "loss": 0.7092, "step": 15764 }, { "epoch": 0.45, "grad_norm": 4.338773882247096, "learning_rate": 6.02160309496823e-06, "loss": 0.4218, "step": 15765 }, { "epoch": 0.45, "grad_norm": 2.7877734435037103, "learning_rate": 6.0211491155391375e-06, "loss": 0.2853, "step": 15766 }, { "epoch": 0.45, "grad_norm": 4.045070201561666, "learning_rate": 6.020695127325225e-06, "loss": 0.6035, "step": 15767 }, { "epoch": 0.45, "grad_norm": 5.9312685541118, "learning_rate": 6.0202411303304e-06, "loss": 0.6513, "step": 15768 }, { "epoch": 0.45, "grad_norm": 2.6675967268491414, "learning_rate": 6.019787124558564e-06, "loss": 0.3252, "step": 15769 }, { "epoch": 0.45, "grad_norm": 5.963916379214444, "learning_rate": 6.019333110013625e-06, "loss": 0.3933, "step": 15770 }, { "epoch": 0.45, "grad_norm": 5.086797380784388, "learning_rate": 6.018879086699488e-06, "loss": 0.4275, "step": 15771 }, { "epoch": 0.45, "grad_norm": 4.48782545519366, "learning_rate": 6.018425054620063e-06, "loss": 0.5205, "step": 15772 }, { "epoch": 0.45, "grad_norm": 4.251403408585852, "learning_rate": 6.017971013779249e-06, "loss": 0.2517, "step": 15773 }, { "epoch": 0.45, "grad_norm": 4.599953958032148, "learning_rate": 6.017516964180957e-06, "loss": 0.3482, "step": 15774 }, { "epoch": 0.45, "grad_norm": 3.8281356499971437, "learning_rate": 6.017062905829092e-06, "loss": 0.1507, "step": 15775 }, { "epoch": 0.45, "grad_norm": 5.1868595279435725, "learning_rate": 6.01660883872756e-06, "loss": 0.4442, "step": 15776 }, { "epoch": 0.45, "grad_norm": 7.890082735370009, "learning_rate": 6.016154762880267e-06, "loss": 0.787, "step": 15777 }, { "epoch": 0.45, "grad_norm": 4.396604654097982, "learning_rate": 6.015700678291121e-06, "loss": 0.2929, "step": 15778 }, { "epoch": 0.45, "grad_norm": 3.8311313890825, "learning_rate": 6.015246584964026e-06, "loss": 0.2567, "step": 15779 }, { "epoch": 0.45, "grad_norm": 6.492547164116999, "learning_rate": 6.0147924829028906e-06, "loss": 0.5205, "step": 15780 }, { "epoch": 0.45, "grad_norm": 3.4853660645187228, "learning_rate": 6.014338372111619e-06, "loss": 0.345, "step": 15781 }, { "epoch": 0.45, "grad_norm": 5.520889687400478, "learning_rate": 6.01388425259412e-06, "loss": 0.6357, "step": 15782 }, { "epoch": 0.45, "grad_norm": 5.896908229619025, "learning_rate": 6.013430124354299e-06, "loss": 0.3926, "step": 15783 }, { "epoch": 0.45, "grad_norm": 1.2919486651943666, "learning_rate": 6.012975987396063e-06, "loss": 0.1849, "step": 15784 }, { "epoch": 0.45, "grad_norm": 5.585942152328154, "learning_rate": 6.0125218417233225e-06, "loss": 0.651, "step": 15785 }, { "epoch": 0.45, "grad_norm": 9.603491196010722, "learning_rate": 6.012067687339978e-06, "loss": 0.6645, "step": 15786 }, { "epoch": 0.45, "grad_norm": 8.634173393864204, "learning_rate": 6.0116135242499405e-06, "loss": 0.7552, "step": 15787 }, { "epoch": 0.45, "grad_norm": 3.3500249577062315, "learning_rate": 6.0111593524571175e-06, "loss": 0.3887, "step": 15788 }, { "epoch": 0.45, "grad_norm": 9.49702607838349, "learning_rate": 6.010705171965413e-06, "loss": 0.6034, "step": 15789 }, { "epoch": 0.45, "grad_norm": 6.14510044534159, "learning_rate": 6.010250982778738e-06, "loss": 0.441, "step": 15790 }, { "epoch": 0.45, "grad_norm": 10.413783282166227, "learning_rate": 6.009796784900999e-06, "loss": 0.6088, "step": 15791 }, { "epoch": 0.45, "grad_norm": 3.133518871857414, "learning_rate": 6.0093425783361004e-06, "loss": 0.295, "step": 15792 }, { "epoch": 0.45, "grad_norm": 4.366538120639345, "learning_rate": 6.008888363087952e-06, "loss": 0.1822, "step": 15793 }, { "epoch": 0.45, "grad_norm": 5.10497453744873, "learning_rate": 6.008434139160463e-06, "loss": 0.3306, "step": 15794 }, { "epoch": 0.45, "grad_norm": 6.0697250180120195, "learning_rate": 6.00797990655754e-06, "loss": 0.4594, "step": 15795 }, { "epoch": 0.45, "grad_norm": 8.851669701434632, "learning_rate": 6.007525665283087e-06, "loss": 0.9592, "step": 15796 }, { "epoch": 0.45, "grad_norm": 6.626036616863535, "learning_rate": 6.007071415341016e-06, "loss": 0.7187, "step": 15797 }, { "epoch": 0.45, "grad_norm": 3.642394736738527, "learning_rate": 6.006617156735234e-06, "loss": 0.3073, "step": 15798 }, { "epoch": 0.45, "grad_norm": 2.728791842067751, "learning_rate": 6.006162889469648e-06, "loss": 0.6087, "step": 15799 }, { "epoch": 0.45, "grad_norm": 7.406690624666423, "learning_rate": 6.005708613548166e-06, "loss": 0.4204, "step": 15800 }, { "epoch": 0.45, "grad_norm": 5.097165887274756, "learning_rate": 6.005254328974696e-06, "loss": 0.6737, "step": 15801 }, { "epoch": 0.45, "grad_norm": 6.4122495702731275, "learning_rate": 6.00480003575315e-06, "loss": 0.5633, "step": 15802 }, { "epoch": 0.45, "grad_norm": 8.021070032631792, "learning_rate": 6.004345733887431e-06, "loss": 0.4523, "step": 15803 }, { "epoch": 0.45, "grad_norm": 4.648688096416152, "learning_rate": 6.0038914233814496e-06, "loss": 0.5605, "step": 15804 }, { "epoch": 0.45, "grad_norm": 6.956667736908203, "learning_rate": 6.003437104239115e-06, "loss": 0.6134, "step": 15805 }, { "epoch": 0.45, "grad_norm": 15.731279661554526, "learning_rate": 6.002982776464331e-06, "loss": 0.4168, "step": 15806 }, { "epoch": 0.45, "grad_norm": 6.514095721545548, "learning_rate": 6.002528440061013e-06, "loss": 0.4372, "step": 15807 }, { "epoch": 0.45, "grad_norm": 4.534856531719074, "learning_rate": 6.002074095033066e-06, "loss": 0.5681, "step": 15808 }, { "epoch": 0.45, "grad_norm": 7.493977990074607, "learning_rate": 6.001619741384398e-06, "loss": 0.6426, "step": 15809 }, { "epoch": 0.45, "grad_norm": 4.435312188481162, "learning_rate": 6.0011653791189185e-06, "loss": 0.4518, "step": 15810 }, { "epoch": 0.45, "grad_norm": 8.74037753025222, "learning_rate": 6.000711008240536e-06, "loss": 0.3888, "step": 15811 }, { "epoch": 0.45, "grad_norm": 7.453065468092547, "learning_rate": 6.000256628753162e-06, "loss": 0.9799, "step": 15812 }, { "epoch": 0.45, "grad_norm": 2.9504180692782436, "learning_rate": 5.999802240660701e-06, "loss": 0.2387, "step": 15813 }, { "epoch": 0.45, "grad_norm": 2.6615429035543428, "learning_rate": 5.999347843967065e-06, "loss": 0.2826, "step": 15814 }, { "epoch": 0.45, "grad_norm": 12.40415264244598, "learning_rate": 5.998893438676164e-06, "loss": 0.4164, "step": 15815 }, { "epoch": 0.45, "grad_norm": 5.200695856925325, "learning_rate": 5.998439024791905e-06, "loss": 0.5128, "step": 15816 }, { "epoch": 0.45, "grad_norm": 7.448949639950951, "learning_rate": 5.997984602318198e-06, "loss": 0.7838, "step": 15817 }, { "epoch": 0.45, "grad_norm": 8.332650620787064, "learning_rate": 5.997530171258953e-06, "loss": 0.5892, "step": 15818 }, { "epoch": 0.45, "grad_norm": 6.399715961918486, "learning_rate": 5.997075731618077e-06, "loss": 0.5467, "step": 15819 }, { "epoch": 0.45, "grad_norm": 4.257411048932096, "learning_rate": 5.996621283399482e-06, "loss": 0.2825, "step": 15820 }, { "epoch": 0.45, "grad_norm": 6.026956087156573, "learning_rate": 5.996166826607077e-06, "loss": 0.5768, "step": 15821 }, { "epoch": 0.45, "grad_norm": 4.843446586704228, "learning_rate": 5.995712361244772e-06, "loss": 0.3976, "step": 15822 }, { "epoch": 0.45, "grad_norm": 6.709864538391641, "learning_rate": 5.995257887316475e-06, "loss": 0.4838, "step": 15823 }, { "epoch": 0.45, "grad_norm": 5.775087649848601, "learning_rate": 5.9948034048260975e-06, "loss": 0.7415, "step": 15824 }, { "epoch": 0.45, "grad_norm": 3.2059856759439698, "learning_rate": 5.994348913777549e-06, "loss": 0.4226, "step": 15825 }, { "epoch": 0.45, "grad_norm": 3.791197342524228, "learning_rate": 5.993894414174739e-06, "loss": 0.3001, "step": 15826 }, { "epoch": 0.45, "grad_norm": 3.7480466841657387, "learning_rate": 5.993439906021578e-06, "loss": 0.2025, "step": 15827 }, { "epoch": 0.45, "grad_norm": 6.675050038068005, "learning_rate": 5.9929853893219745e-06, "loss": 0.7231, "step": 15828 }, { "epoch": 0.45, "grad_norm": 5.48581278938172, "learning_rate": 5.992530864079843e-06, "loss": 0.5608, "step": 15829 }, { "epoch": 0.45, "grad_norm": 6.5189542246471115, "learning_rate": 5.992076330299088e-06, "loss": 0.5268, "step": 15830 }, { "epoch": 0.45, "grad_norm": 4.333186498623998, "learning_rate": 5.991621787983624e-06, "loss": 0.2966, "step": 15831 }, { "epoch": 0.45, "grad_norm": 4.368138300211869, "learning_rate": 5.991167237137358e-06, "loss": 0.5552, "step": 15832 }, { "epoch": 0.45, "grad_norm": 5.936752191934571, "learning_rate": 5.9907126777642035e-06, "loss": 0.3505, "step": 15833 }, { "epoch": 0.45, "grad_norm": 6.545755732234369, "learning_rate": 5.990258109868069e-06, "loss": 0.3464, "step": 15834 }, { "epoch": 0.45, "grad_norm": 6.9698849425267095, "learning_rate": 5.9898035334528685e-06, "loss": 0.6508, "step": 15835 }, { "epoch": 0.45, "grad_norm": 4.998258668944584, "learning_rate": 5.989348948522506e-06, "loss": 0.6531, "step": 15836 }, { "epoch": 0.45, "grad_norm": 4.991987484072621, "learning_rate": 5.9888943550809e-06, "loss": 0.4137, "step": 15837 }, { "epoch": 0.45, "grad_norm": 5.287882776135491, "learning_rate": 5.988439753131955e-06, "loss": 0.6801, "step": 15838 }, { "epoch": 0.45, "grad_norm": 5.209655626927266, "learning_rate": 5.987985142679585e-06, "loss": 0.7408, "step": 15839 }, { "epoch": 0.45, "grad_norm": 5.153328177551085, "learning_rate": 5.987530523727702e-06, "loss": 0.6742, "step": 15840 }, { "epoch": 0.45, "grad_norm": 9.37009291335118, "learning_rate": 5.987075896280213e-06, "loss": 0.999, "step": 15841 }, { "epoch": 0.45, "grad_norm": 13.187411122677956, "learning_rate": 5.986621260341034e-06, "loss": 0.6532, "step": 15842 }, { "epoch": 0.45, "grad_norm": 4.092610324311686, "learning_rate": 5.986166615914073e-06, "loss": 0.2356, "step": 15843 }, { "epoch": 0.45, "grad_norm": 3.002738487545535, "learning_rate": 5.985711963003242e-06, "loss": 0.0668, "step": 15844 }, { "epoch": 0.45, "grad_norm": 3.9713386443621688, "learning_rate": 5.985257301612452e-06, "loss": 0.4627, "step": 15845 }, { "epoch": 0.45, "grad_norm": 6.121409122641537, "learning_rate": 5.984802631745617e-06, "loss": 0.4308, "step": 15846 }, { "epoch": 0.45, "grad_norm": 4.988142353654297, "learning_rate": 5.984347953406642e-06, "loss": 0.3001, "step": 15847 }, { "epoch": 0.45, "grad_norm": 4.594920080733133, "learning_rate": 5.9838932665994456e-06, "loss": 0.5877, "step": 15848 }, { "epoch": 0.45, "grad_norm": 3.1525185878063495, "learning_rate": 5.983438571327935e-06, "loss": 0.4379, "step": 15849 }, { "epoch": 0.45, "grad_norm": 5.158208116892227, "learning_rate": 5.982983867596025e-06, "loss": 0.2634, "step": 15850 }, { "epoch": 0.45, "grad_norm": 5.951967862826925, "learning_rate": 5.982529155407625e-06, "loss": 0.2684, "step": 15851 }, { "epoch": 0.45, "grad_norm": 5.732825566232991, "learning_rate": 5.98207443476665e-06, "loss": 0.5029, "step": 15852 }, { "epoch": 0.45, "grad_norm": 16.679887213840225, "learning_rate": 5.981619705677007e-06, "loss": 0.3892, "step": 15853 }, { "epoch": 0.45, "grad_norm": 3.435190881758867, "learning_rate": 5.981164968142609e-06, "loss": 0.2736, "step": 15854 }, { "epoch": 0.45, "grad_norm": 5.191917928102847, "learning_rate": 5.980710222167372e-06, "loss": 0.2912, "step": 15855 }, { "epoch": 0.45, "grad_norm": 5.417387322634628, "learning_rate": 5.980255467755205e-06, "loss": 0.1652, "step": 15856 }, { "epoch": 0.45, "grad_norm": 5.3716439260123146, "learning_rate": 5.979800704910022e-06, "loss": 0.6898, "step": 15857 }, { "epoch": 0.45, "grad_norm": 8.919861253933759, "learning_rate": 5.979345933635731e-06, "loss": 0.5029, "step": 15858 }, { "epoch": 0.45, "grad_norm": 7.877381464224655, "learning_rate": 5.978891153936252e-06, "loss": 0.6872, "step": 15859 }, { "epoch": 0.45, "grad_norm": 9.282009414789727, "learning_rate": 5.97843636581549e-06, "loss": 0.9853, "step": 15860 }, { "epoch": 0.45, "grad_norm": 9.110950680485878, "learning_rate": 5.9779815692773615e-06, "loss": 0.6158, "step": 15861 }, { "epoch": 0.45, "grad_norm": 8.560624912749626, "learning_rate": 5.977526764325778e-06, "loss": 0.7581, "step": 15862 }, { "epoch": 0.45, "grad_norm": 9.667688181495494, "learning_rate": 5.97707195096465e-06, "loss": 0.5941, "step": 15863 }, { "epoch": 0.45, "grad_norm": 5.289483643842312, "learning_rate": 5.976617129197895e-06, "loss": 0.2838, "step": 15864 }, { "epoch": 0.45, "grad_norm": 6.4654262314891415, "learning_rate": 5.976162299029422e-06, "loss": 0.4798, "step": 15865 }, { "epoch": 0.45, "grad_norm": 2.9540441788073277, "learning_rate": 5.975707460463145e-06, "loss": 0.3343, "step": 15866 }, { "epoch": 0.45, "grad_norm": 5.28262063021838, "learning_rate": 5.975252613502978e-06, "loss": 0.3699, "step": 15867 }, { "epoch": 0.45, "grad_norm": 12.44139797144544, "learning_rate": 5.974797758152831e-06, "loss": 0.8044, "step": 15868 }, { "epoch": 0.45, "grad_norm": 6.266701698543487, "learning_rate": 5.9743428944166204e-06, "loss": 0.5823, "step": 15869 }, { "epoch": 0.45, "grad_norm": 6.870983198793534, "learning_rate": 5.973888022298257e-06, "loss": 0.3964, "step": 15870 }, { "epoch": 0.45, "grad_norm": 5.292045034062692, "learning_rate": 5.973433141801654e-06, "loss": 0.4722, "step": 15871 }, { "epoch": 0.45, "grad_norm": 10.061328274762078, "learning_rate": 5.972978252930727e-06, "loss": 0.8474, "step": 15872 }, { "epoch": 0.45, "grad_norm": 4.446298714436042, "learning_rate": 5.972523355689387e-06, "loss": 0.5248, "step": 15873 }, { "epoch": 0.45, "grad_norm": 2.6147676126857275, "learning_rate": 5.972068450081549e-06, "loss": 0.2832, "step": 15874 }, { "epoch": 0.45, "grad_norm": 5.39121789297816, "learning_rate": 5.971613536111126e-06, "loss": 0.754, "step": 15875 }, { "epoch": 0.45, "grad_norm": 8.989039634862497, "learning_rate": 5.97115861378203e-06, "loss": 0.787, "step": 15876 }, { "epoch": 0.45, "grad_norm": 5.468266013390972, "learning_rate": 5.970703683098177e-06, "loss": 0.3545, "step": 15877 }, { "epoch": 0.45, "grad_norm": 2.1522813569872095, "learning_rate": 5.970248744063479e-06, "loss": 0.2746, "step": 15878 }, { "epoch": 0.45, "grad_norm": 4.514329691499377, "learning_rate": 5.969793796681852e-06, "loss": 0.6305, "step": 15879 }, { "epoch": 0.45, "grad_norm": 4.538131907365483, "learning_rate": 5.9693388409572066e-06, "loss": 0.8126, "step": 15880 }, { "epoch": 0.45, "grad_norm": 3.4344759036818395, "learning_rate": 5.968883876893459e-06, "loss": 0.4202, "step": 15881 }, { "epoch": 0.45, "grad_norm": 6.535337857164509, "learning_rate": 5.968428904494524e-06, "loss": 0.4621, "step": 15882 }, { "epoch": 0.45, "grad_norm": 6.136242921521689, "learning_rate": 5.967973923764313e-06, "loss": 0.4755, "step": 15883 }, { "epoch": 0.45, "grad_norm": 8.502039047707909, "learning_rate": 5.967518934706742e-06, "loss": 0.7478, "step": 15884 }, { "epoch": 0.45, "grad_norm": 6.543209643128473, "learning_rate": 5.967063937325725e-06, "loss": 0.6889, "step": 15885 }, { "epoch": 0.45, "grad_norm": 8.76453609128902, "learning_rate": 5.9666089316251765e-06, "loss": 0.5495, "step": 15886 }, { "epoch": 0.45, "grad_norm": 10.1909543365258, "learning_rate": 5.966153917609009e-06, "loss": 0.4447, "step": 15887 }, { "epoch": 0.45, "grad_norm": 5.32209841964273, "learning_rate": 5.965698895281138e-06, "loss": 0.3111, "step": 15888 }, { "epoch": 0.46, "grad_norm": 4.845363766504496, "learning_rate": 5.9652438646454806e-06, "loss": 0.3616, "step": 15889 }, { "epoch": 0.46, "grad_norm": 3.4284989854561614, "learning_rate": 5.964788825705947e-06, "loss": 0.4826, "step": 15890 }, { "epoch": 0.46, "grad_norm": 6.561047438672269, "learning_rate": 5.964333778466454e-06, "loss": 0.4494, "step": 15891 }, { "epoch": 0.46, "grad_norm": 7.789471002424981, "learning_rate": 5.963878722930918e-06, "loss": 0.9001, "step": 15892 }, { "epoch": 0.46, "grad_norm": 3.500477230370119, "learning_rate": 5.96342365910325e-06, "loss": 0.2903, "step": 15893 }, { "epoch": 0.46, "grad_norm": 7.2955514822531615, "learning_rate": 5.962968586987367e-06, "loss": 0.433, "step": 15894 }, { "epoch": 0.46, "grad_norm": 4.834847248784872, "learning_rate": 5.962513506587185e-06, "loss": 0.2322, "step": 15895 }, { "epoch": 0.46, "grad_norm": 14.782035855135854, "learning_rate": 5.962058417906618e-06, "loss": 0.8209, "step": 15896 }, { "epoch": 0.46, "grad_norm": 3.160869447180159, "learning_rate": 5.961603320949579e-06, "loss": 0.2505, "step": 15897 }, { "epoch": 0.46, "grad_norm": 6.832109558808288, "learning_rate": 5.961148215719986e-06, "loss": 0.4854, "step": 15898 }, { "epoch": 0.46, "grad_norm": 7.685668541067637, "learning_rate": 5.960693102221755e-06, "loss": 0.4442, "step": 15899 }, { "epoch": 0.46, "grad_norm": 6.04691821336707, "learning_rate": 5.960237980458798e-06, "loss": 0.4996, "step": 15900 }, { "epoch": 0.46, "grad_norm": 3.5581166924333236, "learning_rate": 5.959782850435032e-06, "loss": 0.4327, "step": 15901 }, { "epoch": 0.46, "grad_norm": 4.927723537195772, "learning_rate": 5.959327712154371e-06, "loss": 0.4119, "step": 15902 }, { "epoch": 0.46, "grad_norm": 3.942064727431786, "learning_rate": 5.958872565620735e-06, "loss": 0.2438, "step": 15903 }, { "epoch": 0.46, "grad_norm": 3.3263763167785902, "learning_rate": 5.958417410838033e-06, "loss": 0.6157, "step": 15904 }, { "epoch": 0.46, "grad_norm": 6.382944318140927, "learning_rate": 5.9579622478101865e-06, "loss": 0.6804, "step": 15905 }, { "epoch": 0.46, "grad_norm": 9.553995826370512, "learning_rate": 5.957507076541107e-06, "loss": 0.5661, "step": 15906 }, { "epoch": 0.46, "grad_norm": 3.6397225730885023, "learning_rate": 5.957051897034713e-06, "loss": 0.2916, "step": 15907 }, { "epoch": 0.46, "grad_norm": 5.873573292316598, "learning_rate": 5.956596709294919e-06, "loss": 0.7785, "step": 15908 }, { "epoch": 0.46, "grad_norm": 6.935591151558895, "learning_rate": 5.9561415133256416e-06, "loss": 0.6671, "step": 15909 }, { "epoch": 0.46, "grad_norm": 9.358430690604314, "learning_rate": 5.955686309130796e-06, "loss": 1.5854, "step": 15910 }, { "epoch": 0.46, "grad_norm": 4.012295496711479, "learning_rate": 5.955231096714298e-06, "loss": 0.4257, "step": 15911 }, { "epoch": 0.46, "grad_norm": 6.052060961613585, "learning_rate": 5.954775876080065e-06, "loss": 0.3476, "step": 15912 }, { "epoch": 0.46, "grad_norm": 5.914777100298008, "learning_rate": 5.954320647232013e-06, "loss": 0.6864, "step": 15913 }, { "epoch": 0.46, "grad_norm": 25.437016156817986, "learning_rate": 5.953865410174059e-06, "loss": 0.4843, "step": 15914 }, { "epoch": 0.46, "grad_norm": 5.1543987303134715, "learning_rate": 5.953410164910116e-06, "loss": 0.4718, "step": 15915 }, { "epoch": 0.46, "grad_norm": 8.252358012903484, "learning_rate": 5.952954911444105e-06, "loss": 0.5846, "step": 15916 }, { "epoch": 0.46, "grad_norm": 5.070179777362655, "learning_rate": 5.95249964977994e-06, "loss": 0.2548, "step": 15917 }, { "epoch": 0.46, "grad_norm": 6.666368950712977, "learning_rate": 5.952044379921536e-06, "loss": 0.549, "step": 15918 }, { "epoch": 0.46, "grad_norm": 13.266748746940719, "learning_rate": 5.951589101872814e-06, "loss": 0.8384, "step": 15919 }, { "epoch": 0.46, "grad_norm": 2.372795336543315, "learning_rate": 5.951133815637686e-06, "loss": 0.1868, "step": 15920 }, { "epoch": 0.46, "grad_norm": 2.7680346126618014, "learning_rate": 5.950678521220071e-06, "loss": 0.1376, "step": 15921 }, { "epoch": 0.46, "grad_norm": 4.148821566919277, "learning_rate": 5.950223218623888e-06, "loss": 0.3038, "step": 15922 }, { "epoch": 0.46, "grad_norm": 2.106068088814678, "learning_rate": 5.949767907853049e-06, "loss": 0.139, "step": 15923 }, { "epoch": 0.46, "grad_norm": 7.5706661051236805, "learning_rate": 5.949312588911475e-06, "loss": 0.5219, "step": 15924 }, { "epoch": 0.46, "grad_norm": 7.695632230826062, "learning_rate": 5.94885726180308e-06, "loss": 0.3633, "step": 15925 }, { "epoch": 0.46, "grad_norm": 5.428301034553502, "learning_rate": 5.948401926531786e-06, "loss": 0.6577, "step": 15926 }, { "epoch": 0.46, "grad_norm": 2.3379678482393507, "learning_rate": 5.9479465831015046e-06, "loss": 0.2549, "step": 15927 }, { "epoch": 0.46, "grad_norm": 3.6156518696122038, "learning_rate": 5.947491231516156e-06, "loss": 0.3887, "step": 15928 }, { "epoch": 0.46, "grad_norm": 4.604782894662165, "learning_rate": 5.947035871779656e-06, "loss": 0.4299, "step": 15929 }, { "epoch": 0.46, "grad_norm": 5.705127897375215, "learning_rate": 5.946580503895925e-06, "loss": 0.5423, "step": 15930 }, { "epoch": 0.46, "grad_norm": 4.003324319372476, "learning_rate": 5.946125127868876e-06, "loss": 0.3909, "step": 15931 }, { "epoch": 0.46, "grad_norm": 6.941138129779736, "learning_rate": 5.945669743702431e-06, "loss": 0.5844, "step": 15932 }, { "epoch": 0.46, "grad_norm": 4.050632514540511, "learning_rate": 5.9452143514005055e-06, "loss": 0.6811, "step": 15933 }, { "epoch": 0.46, "grad_norm": 4.712824505224773, "learning_rate": 5.944758950967017e-06, "loss": 0.2437, "step": 15934 }, { "epoch": 0.46, "grad_norm": 3.1154706907177805, "learning_rate": 5.9443035424058824e-06, "loss": 0.1466, "step": 15935 }, { "epoch": 0.46, "grad_norm": 8.128653130538526, "learning_rate": 5.943848125721024e-06, "loss": 0.737, "step": 15936 }, { "epoch": 0.46, "grad_norm": 3.4868859280272946, "learning_rate": 5.9433927009163515e-06, "loss": 0.6841, "step": 15937 }, { "epoch": 0.46, "grad_norm": 3.7731556599565415, "learning_rate": 5.942937267995791e-06, "loss": 0.4404, "step": 15938 }, { "epoch": 0.46, "grad_norm": 5.637130331027308, "learning_rate": 5.942481826963258e-06, "loss": 0.8224, "step": 15939 }, { "epoch": 0.46, "grad_norm": 6.9034391180627726, "learning_rate": 5.9420263778226675e-06, "loss": 0.664, "step": 15940 }, { "epoch": 0.46, "grad_norm": 4.2524654586169595, "learning_rate": 5.941570920577941e-06, "loss": 0.2198, "step": 15941 }, { "epoch": 0.46, "grad_norm": 3.567146216148193, "learning_rate": 5.9411154552329955e-06, "loss": 0.3645, "step": 15942 }, { "epoch": 0.46, "grad_norm": 7.091423224436932, "learning_rate": 5.940659981791751e-06, "loss": 0.6812, "step": 15943 }, { "epoch": 0.46, "grad_norm": 5.1606157258560135, "learning_rate": 5.940204500258123e-06, "loss": 0.4984, "step": 15944 }, { "epoch": 0.46, "grad_norm": 2.7786898027043825, "learning_rate": 5.9397490106360326e-06, "loss": 0.3539, "step": 15945 }, { "epoch": 0.46, "grad_norm": 5.864125861240559, "learning_rate": 5.939293512929396e-06, "loss": 0.3499, "step": 15946 }, { "epoch": 0.46, "grad_norm": 4.037057906709974, "learning_rate": 5.938838007142132e-06, "loss": 0.5039, "step": 15947 }, { "epoch": 0.46, "grad_norm": 8.15187130606896, "learning_rate": 5.938382493278162e-06, "loss": 0.5072, "step": 15948 }, { "epoch": 0.46, "grad_norm": 6.535881189244635, "learning_rate": 5.9379269713414034e-06, "loss": 0.6498, "step": 15949 }, { "epoch": 0.46, "grad_norm": 2.437947495048865, "learning_rate": 5.9374714413357736e-06, "loss": 0.209, "step": 15950 }, { "epoch": 0.46, "grad_norm": 8.28586406642853, "learning_rate": 5.937015903265193e-06, "loss": 1.072, "step": 15951 }, { "epoch": 0.46, "grad_norm": 3.352868205935796, "learning_rate": 5.93656035713358e-06, "loss": 0.346, "step": 15952 }, { "epoch": 0.46, "grad_norm": 10.193690004996657, "learning_rate": 5.9361048029448535e-06, "loss": 0.8376, "step": 15953 }, { "epoch": 0.46, "grad_norm": 6.578076650120153, "learning_rate": 5.935649240702931e-06, "loss": 0.2574, "step": 15954 }, { "epoch": 0.46, "grad_norm": 14.907996821066167, "learning_rate": 5.935193670411734e-06, "loss": 0.5657, "step": 15955 }, { "epoch": 0.46, "grad_norm": 5.088439044939273, "learning_rate": 5.934738092075182e-06, "loss": 0.4945, "step": 15956 }, { "epoch": 0.46, "grad_norm": 7.505820368303862, "learning_rate": 5.934282505697193e-06, "loss": 0.518, "step": 15957 }, { "epoch": 0.46, "grad_norm": 3.49621711385438, "learning_rate": 5.933826911281686e-06, "loss": 0.3553, "step": 15958 }, { "epoch": 0.46, "grad_norm": 3.8278459096885147, "learning_rate": 5.933371308832582e-06, "loss": 0.5258, "step": 15959 }, { "epoch": 0.46, "grad_norm": 3.1448407133425786, "learning_rate": 5.932915698353799e-06, "loss": 0.2723, "step": 15960 }, { "epoch": 0.46, "grad_norm": 6.331229496251427, "learning_rate": 5.932460079849257e-06, "loss": 0.7763, "step": 15961 }, { "epoch": 0.46, "grad_norm": 5.13272282770289, "learning_rate": 5.932004453322875e-06, "loss": 0.4657, "step": 15962 }, { "epoch": 0.46, "grad_norm": 6.585583660354029, "learning_rate": 5.9315488187785755e-06, "loss": 0.6392, "step": 15963 }, { "epoch": 0.46, "grad_norm": 14.272577703561442, "learning_rate": 5.931093176220274e-06, "loss": 0.4288, "step": 15964 }, { "epoch": 0.46, "grad_norm": 7.275586328114503, "learning_rate": 5.930637525651894e-06, "loss": 0.3473, "step": 15965 }, { "epoch": 0.46, "grad_norm": 5.711329873274368, "learning_rate": 5.9301818670773536e-06, "loss": 0.5847, "step": 15966 }, { "epoch": 0.46, "grad_norm": 13.686630430425982, "learning_rate": 5.929726200500573e-06, "loss": 0.4855, "step": 15967 }, { "epoch": 0.46, "grad_norm": 6.213328389560113, "learning_rate": 5.929270525925472e-06, "loss": 0.929, "step": 15968 }, { "epoch": 0.46, "grad_norm": 4.254832479561394, "learning_rate": 5.928814843355972e-06, "loss": 0.5233, "step": 15969 }, { "epoch": 0.46, "grad_norm": 3.2876178350470973, "learning_rate": 5.9283591527959915e-06, "loss": 0.3727, "step": 15970 }, { "epoch": 0.46, "grad_norm": 5.358648979457442, "learning_rate": 5.927903454249452e-06, "loss": 0.6631, "step": 15971 }, { "epoch": 0.46, "grad_norm": 5.944623508707538, "learning_rate": 5.927447747720272e-06, "loss": 0.5531, "step": 15972 }, { "epoch": 0.46, "grad_norm": 4.9496739154877085, "learning_rate": 5.926992033212375e-06, "loss": 0.4293, "step": 15973 }, { "epoch": 0.46, "grad_norm": 7.744045554271414, "learning_rate": 5.926536310729679e-06, "loss": 1.0903, "step": 15974 }, { "epoch": 0.46, "grad_norm": 4.505276182983107, "learning_rate": 5.926080580276106e-06, "loss": 0.5206, "step": 15975 }, { "epoch": 0.46, "grad_norm": 7.204923260483287, "learning_rate": 5.925624841855575e-06, "loss": 0.4265, "step": 15976 }, { "epoch": 0.46, "grad_norm": 7.2787226109330865, "learning_rate": 5.925169095472007e-06, "loss": 0.406, "step": 15977 }, { "epoch": 0.46, "grad_norm": 5.83668597016482, "learning_rate": 5.9247133411293235e-06, "loss": 0.6144, "step": 15978 }, { "epoch": 0.46, "grad_norm": 4.672691682095392, "learning_rate": 5.924257578831445e-06, "loss": 0.5068, "step": 15979 }, { "epoch": 0.46, "grad_norm": 10.048103648948016, "learning_rate": 5.9238018085822925e-06, "loss": 0.5565, "step": 15980 }, { "epoch": 0.46, "grad_norm": 11.48954899293685, "learning_rate": 5.923346030385786e-06, "loss": 0.7628, "step": 15981 }, { "epoch": 0.46, "grad_norm": 7.187214190564027, "learning_rate": 5.922890244245848e-06, "loss": 1.053, "step": 15982 }, { "epoch": 0.46, "grad_norm": 4.58833603789701, "learning_rate": 5.9224344501664e-06, "loss": 0.3223, "step": 15983 }, { "epoch": 0.46, "grad_norm": 8.202406788220504, "learning_rate": 5.92197864815136e-06, "loss": 0.4672, "step": 15984 }, { "epoch": 0.46, "grad_norm": 4.5956671989388775, "learning_rate": 5.921522838204652e-06, "loss": 0.3972, "step": 15985 }, { "epoch": 0.46, "grad_norm": 5.668543864648194, "learning_rate": 5.9210670203301965e-06, "loss": 0.301, "step": 15986 }, { "epoch": 0.46, "grad_norm": 4.281054137267432, "learning_rate": 5.920611194531916e-06, "loss": 0.3298, "step": 15987 }, { "epoch": 0.46, "grad_norm": 5.682718446848456, "learning_rate": 5.920155360813729e-06, "loss": 0.2585, "step": 15988 }, { "epoch": 0.46, "grad_norm": 2.589263241064328, "learning_rate": 5.919699519179558e-06, "loss": 0.2569, "step": 15989 }, { "epoch": 0.46, "grad_norm": 5.660469420347159, "learning_rate": 5.919243669633328e-06, "loss": 0.3419, "step": 15990 }, { "epoch": 0.46, "grad_norm": 6.420168016320764, "learning_rate": 5.918787812178956e-06, "loss": 0.5646, "step": 15991 }, { "epoch": 0.46, "grad_norm": 5.226202663014738, "learning_rate": 5.918331946820366e-06, "loss": 0.3382, "step": 15992 }, { "epoch": 0.46, "grad_norm": 5.834116211082575, "learning_rate": 5.917876073561481e-06, "loss": 0.4968, "step": 15993 }, { "epoch": 0.46, "grad_norm": 3.5629775915080923, "learning_rate": 5.917420192406219e-06, "loss": 0.348, "step": 15994 }, { "epoch": 0.46, "grad_norm": 8.257936070725744, "learning_rate": 5.916964303358503e-06, "loss": 0.6942, "step": 15995 }, { "epoch": 0.46, "grad_norm": 4.89362309122018, "learning_rate": 5.916508406422259e-06, "loss": 0.3447, "step": 15996 }, { "epoch": 0.46, "grad_norm": 4.060822595406894, "learning_rate": 5.916052501601404e-06, "loss": 0.3136, "step": 15997 }, { "epoch": 0.46, "grad_norm": 6.79800928231818, "learning_rate": 5.915596588899862e-06, "loss": 0.395, "step": 15998 }, { "epoch": 0.46, "grad_norm": 5.8055265700708265, "learning_rate": 5.915140668321556e-06, "loss": 0.5643, "step": 15999 }, { "epoch": 0.46, "grad_norm": 10.023254821016536, "learning_rate": 5.914684739870409e-06, "loss": 0.5802, "step": 16000 }, { "epoch": 0.46, "grad_norm": 4.498691050574386, "learning_rate": 5.914228803550341e-06, "loss": 0.5397, "step": 16001 }, { "epoch": 0.46, "grad_norm": 2.9292875703591887, "learning_rate": 5.913772859365274e-06, "loss": 0.1801, "step": 16002 }, { "epoch": 0.46, "grad_norm": 4.683606628960217, "learning_rate": 5.913316907319131e-06, "loss": 0.4432, "step": 16003 }, { "epoch": 0.46, "grad_norm": 3.2665091234935466, "learning_rate": 5.912860947415836e-06, "loss": 0.4226, "step": 16004 }, { "epoch": 0.46, "grad_norm": 3.2995291056036447, "learning_rate": 5.912404979659311e-06, "loss": 0.2715, "step": 16005 }, { "epoch": 0.46, "grad_norm": 3.7376671807908424, "learning_rate": 5.911949004053479e-06, "loss": 0.4395, "step": 16006 }, { "epoch": 0.46, "grad_norm": 6.057555000789134, "learning_rate": 5.911493020602261e-06, "loss": 0.6865, "step": 16007 }, { "epoch": 0.46, "grad_norm": 3.9890594171615983, "learning_rate": 5.911037029309581e-06, "loss": 0.491, "step": 16008 }, { "epoch": 0.46, "grad_norm": 3.7250138429730133, "learning_rate": 5.910581030179361e-06, "loss": 0.1917, "step": 16009 }, { "epoch": 0.46, "grad_norm": 8.086168427786383, "learning_rate": 5.910125023215526e-06, "loss": 0.6191, "step": 16010 }, { "epoch": 0.46, "grad_norm": 5.363094985072396, "learning_rate": 5.909669008421996e-06, "loss": 0.6854, "step": 16011 }, { "epoch": 0.46, "grad_norm": 4.632158101327037, "learning_rate": 5.909212985802695e-06, "loss": 0.6036, "step": 16012 }, { "epoch": 0.46, "grad_norm": 16.27726541623445, "learning_rate": 5.908756955361548e-06, "loss": 0.6428, "step": 16013 }, { "epoch": 0.46, "grad_norm": 8.338034683309125, "learning_rate": 5.908300917102476e-06, "loss": 0.3073, "step": 16014 }, { "epoch": 0.46, "grad_norm": 6.476128116116762, "learning_rate": 5.907844871029404e-06, "loss": 0.522, "step": 16015 }, { "epoch": 0.46, "grad_norm": 3.0548727462287597, "learning_rate": 5.907388817146254e-06, "loss": 0.3529, "step": 16016 }, { "epoch": 0.46, "grad_norm": 3.174760830499139, "learning_rate": 5.90693275545695e-06, "loss": 0.1948, "step": 16017 }, { "epoch": 0.46, "grad_norm": 3.56239884216175, "learning_rate": 5.906476685965414e-06, "loss": 0.3389, "step": 16018 }, { "epoch": 0.46, "grad_norm": 8.40885981211744, "learning_rate": 5.906020608675572e-06, "loss": 1.079, "step": 16019 }, { "epoch": 0.46, "grad_norm": 13.918815781875102, "learning_rate": 5.905564523591345e-06, "loss": 0.802, "step": 16020 }, { "epoch": 0.46, "grad_norm": 6.191020850090861, "learning_rate": 5.905108430716658e-06, "loss": 0.5301, "step": 16021 }, { "epoch": 0.46, "grad_norm": 2.373183509440941, "learning_rate": 5.904652330055436e-06, "loss": 0.181, "step": 16022 }, { "epoch": 0.46, "grad_norm": 6.96507405720244, "learning_rate": 5.904196221611601e-06, "loss": 0.5643, "step": 16023 }, { "epoch": 0.46, "grad_norm": 4.2643422863547045, "learning_rate": 5.903740105389078e-06, "loss": 0.5108, "step": 16024 }, { "epoch": 0.46, "grad_norm": 6.231040081746842, "learning_rate": 5.9032839813917876e-06, "loss": 0.8034, "step": 16025 }, { "epoch": 0.46, "grad_norm": 4.578458598093953, "learning_rate": 5.902827849623658e-06, "loss": 0.7546, "step": 16026 }, { "epoch": 0.46, "grad_norm": 8.517784530773428, "learning_rate": 5.902371710088611e-06, "loss": 1.0106, "step": 16027 }, { "epoch": 0.46, "grad_norm": 9.072933163428976, "learning_rate": 5.901915562790571e-06, "loss": 0.6686, "step": 16028 }, { "epoch": 0.46, "grad_norm": 6.743929146268888, "learning_rate": 5.901459407733463e-06, "loss": 0.5408, "step": 16029 }, { "epoch": 0.46, "grad_norm": 4.8295919418137565, "learning_rate": 5.901003244921212e-06, "loss": 0.7397, "step": 16030 }, { "epoch": 0.46, "grad_norm": 7.003384555294647, "learning_rate": 5.900547074357738e-06, "loss": 0.9022, "step": 16031 }, { "epoch": 0.46, "grad_norm": 6.129739854186957, "learning_rate": 5.90009089604697e-06, "loss": 0.4512, "step": 16032 }, { "epoch": 0.46, "grad_norm": 10.825445330756231, "learning_rate": 5.899634709992832e-06, "loss": 0.7091, "step": 16033 }, { "epoch": 0.46, "grad_norm": 12.54451340888316, "learning_rate": 5.8991785161992465e-06, "loss": 0.987, "step": 16034 }, { "epoch": 0.46, "grad_norm": 5.498795594210423, "learning_rate": 5.898722314670139e-06, "loss": 0.7231, "step": 16035 }, { "epoch": 0.46, "grad_norm": 5.794573468366576, "learning_rate": 5.898266105409434e-06, "loss": 0.4096, "step": 16036 }, { "epoch": 0.46, "grad_norm": 8.724286963502829, "learning_rate": 5.897809888421056e-06, "loss": 0.4333, "step": 16037 }, { "epoch": 0.46, "grad_norm": 5.939231218622635, "learning_rate": 5.897353663708929e-06, "loss": 0.416, "step": 16038 }, { "epoch": 0.46, "grad_norm": 5.326596127418191, "learning_rate": 5.89689743127698e-06, "loss": 0.5097, "step": 16039 }, { "epoch": 0.46, "grad_norm": 6.8318645092708525, "learning_rate": 5.8964411911291344e-06, "loss": 0.3853, "step": 16040 }, { "epoch": 0.46, "grad_norm": 5.157512995619865, "learning_rate": 5.895984943269314e-06, "loss": 0.6124, "step": 16041 }, { "epoch": 0.46, "grad_norm": 7.213358595598775, "learning_rate": 5.895528687701445e-06, "loss": 0.4148, "step": 16042 }, { "epoch": 0.46, "grad_norm": 3.2221047039269592, "learning_rate": 5.895072424429453e-06, "loss": 0.2376, "step": 16043 }, { "epoch": 0.46, "grad_norm": 4.479345817827474, "learning_rate": 5.894616153457265e-06, "loss": 0.5535, "step": 16044 }, { "epoch": 0.46, "grad_norm": 9.534320274253483, "learning_rate": 5.894159874788802e-06, "loss": 0.6868, "step": 16045 }, { "epoch": 0.46, "grad_norm": 6.0566340904360505, "learning_rate": 5.893703588427992e-06, "loss": 0.5076, "step": 16046 }, { "epoch": 0.46, "grad_norm": 4.177698763873471, "learning_rate": 5.893247294378762e-06, "loss": 0.6881, "step": 16047 }, { "epoch": 0.46, "grad_norm": 8.502298380954944, "learning_rate": 5.892790992645034e-06, "loss": 0.4289, "step": 16048 }, { "epoch": 0.46, "grad_norm": 4.681678717708502, "learning_rate": 5.892334683230736e-06, "loss": 0.346, "step": 16049 }, { "epoch": 0.46, "grad_norm": 7.54274376050151, "learning_rate": 5.8918783661397926e-06, "loss": 0.8118, "step": 16050 }, { "epoch": 0.46, "grad_norm": 10.487896346376564, "learning_rate": 5.891422041376128e-06, "loss": 0.614, "step": 16051 }, { "epoch": 0.46, "grad_norm": 8.823113536477335, "learning_rate": 5.89096570894367e-06, "loss": 0.57, "step": 16052 }, { "epoch": 0.46, "grad_norm": 6.5920075559503335, "learning_rate": 5.890509368846344e-06, "loss": 0.7845, "step": 16053 }, { "epoch": 0.46, "grad_norm": 6.590162991952327, "learning_rate": 5.890053021088075e-06, "loss": 0.7869, "step": 16054 }, { "epoch": 0.46, "grad_norm": 4.256264158328983, "learning_rate": 5.88959666567279e-06, "loss": 0.1582, "step": 16055 }, { "epoch": 0.46, "grad_norm": 9.918068030793092, "learning_rate": 5.889140302604414e-06, "loss": 0.5139, "step": 16056 }, { "epoch": 0.46, "grad_norm": 6.936141972321867, "learning_rate": 5.8886839318868735e-06, "loss": 0.8633, "step": 16057 }, { "epoch": 0.46, "grad_norm": 5.961693632032699, "learning_rate": 5.888227553524095e-06, "loss": 0.634, "step": 16058 }, { "epoch": 0.46, "grad_norm": 5.468072947416691, "learning_rate": 5.8877711675200044e-06, "loss": 0.5743, "step": 16059 }, { "epoch": 0.46, "grad_norm": 2.694968049527025, "learning_rate": 5.887314773878526e-06, "loss": 0.4568, "step": 16060 }, { "epoch": 0.46, "grad_norm": 5.800163344023364, "learning_rate": 5.88685837260359e-06, "loss": 0.3583, "step": 16061 }, { "epoch": 0.46, "grad_norm": 6.693081033227238, "learning_rate": 5.886401963699119e-06, "loss": 0.6594, "step": 16062 }, { "epoch": 0.46, "grad_norm": 5.567558046268739, "learning_rate": 5.8859455471690415e-06, "loss": 0.4632, "step": 16063 }, { "epoch": 0.46, "grad_norm": 7.8112162641583325, "learning_rate": 5.885489123017285e-06, "loss": 0.5433, "step": 16064 }, { "epoch": 0.46, "grad_norm": 6.7444640169622545, "learning_rate": 5.885032691247773e-06, "loss": 0.3261, "step": 16065 }, { "epoch": 0.46, "grad_norm": 3.414874157129835, "learning_rate": 5.884576251864434e-06, "loss": 0.2977, "step": 16066 }, { "epoch": 0.46, "grad_norm": 8.723405476982295, "learning_rate": 5.884119804871196e-06, "loss": 0.8346, "step": 16067 }, { "epoch": 0.46, "grad_norm": 4.943915388232015, "learning_rate": 5.883663350271982e-06, "loss": 0.8058, "step": 16068 }, { "epoch": 0.46, "grad_norm": 6.379755004555571, "learning_rate": 5.883206888070721e-06, "loss": 0.8307, "step": 16069 }, { "epoch": 0.46, "grad_norm": 8.811420131019846, "learning_rate": 5.882750418271341e-06, "loss": 0.6183, "step": 16070 }, { "epoch": 0.46, "grad_norm": 8.353402637130518, "learning_rate": 5.882293940877767e-06, "loss": 0.2231, "step": 16071 }, { "epoch": 0.46, "grad_norm": 3.973383324029139, "learning_rate": 5.881837455893927e-06, "loss": 0.332, "step": 16072 }, { "epoch": 0.46, "grad_norm": 3.3837143574624684, "learning_rate": 5.8813809633237485e-06, "loss": 0.2487, "step": 16073 }, { "epoch": 0.46, "grad_norm": 3.2415182319103675, "learning_rate": 5.8809244631711585e-06, "loss": 0.3875, "step": 16074 }, { "epoch": 0.46, "grad_norm": 4.299651868167813, "learning_rate": 5.880467955440083e-06, "loss": 0.3061, "step": 16075 }, { "epoch": 0.46, "grad_norm": 6.495211818374498, "learning_rate": 5.88001144013445e-06, "loss": 0.5837, "step": 16076 }, { "epoch": 0.46, "grad_norm": 4.014048701136753, "learning_rate": 5.879554917258187e-06, "loss": 0.5721, "step": 16077 }, { "epoch": 0.46, "grad_norm": 4.286458932807791, "learning_rate": 5.879098386815222e-06, "loss": 0.3865, "step": 16078 }, { "epoch": 0.46, "grad_norm": 1.8115324035587161, "learning_rate": 5.878641848809481e-06, "loss": 0.1187, "step": 16079 }, { "epoch": 0.46, "grad_norm": 3.6593998114105526, "learning_rate": 5.878185303244893e-06, "loss": 0.2979, "step": 16080 }, { "epoch": 0.46, "grad_norm": 4.277625282811732, "learning_rate": 5.8777287501253846e-06, "loss": 0.466, "step": 16081 }, { "epoch": 0.46, "grad_norm": 4.834145505127043, "learning_rate": 5.877272189454883e-06, "loss": 0.2223, "step": 16082 }, { "epoch": 0.46, "grad_norm": 5.85752894503076, "learning_rate": 5.876815621237318e-06, "loss": 0.5519, "step": 16083 }, { "epoch": 0.46, "grad_norm": 9.125529469845079, "learning_rate": 5.8763590454766176e-06, "loss": 0.5515, "step": 16084 }, { "epoch": 0.46, "grad_norm": 5.844382083591953, "learning_rate": 5.875902462176704e-06, "loss": 0.597, "step": 16085 }, { "epoch": 0.46, "grad_norm": 7.477841896599713, "learning_rate": 5.8754458713415116e-06, "loss": 0.5312, "step": 16086 }, { "epoch": 0.46, "grad_norm": 8.744196411254787, "learning_rate": 5.874989272974968e-06, "loss": 0.5442, "step": 16087 }, { "epoch": 0.46, "grad_norm": 5.8355721764054636, "learning_rate": 5.874532667080998e-06, "loss": 0.4945, "step": 16088 }, { "epoch": 0.46, "grad_norm": 10.35854620516596, "learning_rate": 5.87407605366353e-06, "loss": 0.6598, "step": 16089 }, { "epoch": 0.46, "grad_norm": 5.237591293376901, "learning_rate": 5.8736194327264935e-06, "loss": 0.4969, "step": 16090 }, { "epoch": 0.46, "grad_norm": 6.012484773707788, "learning_rate": 5.873162804273819e-06, "loss": 0.304, "step": 16091 }, { "epoch": 0.46, "grad_norm": 5.731054753229848, "learning_rate": 5.87270616830943e-06, "loss": 0.2989, "step": 16092 }, { "epoch": 0.46, "grad_norm": 2.860909711951783, "learning_rate": 5.872249524837258e-06, "loss": 0.1302, "step": 16093 }, { "epoch": 0.46, "grad_norm": 2.9377463927316225, "learning_rate": 5.871792873861231e-06, "loss": 0.4158, "step": 16094 }, { "epoch": 0.46, "grad_norm": 6.625193251184417, "learning_rate": 5.871336215385276e-06, "loss": 0.49, "step": 16095 }, { "epoch": 0.46, "grad_norm": 11.008396585317895, "learning_rate": 5.870879549413324e-06, "loss": 0.387, "step": 16096 }, { "epoch": 0.46, "grad_norm": 7.074212245422102, "learning_rate": 5.8704228759493035e-06, "loss": 0.4973, "step": 16097 }, { "epoch": 0.46, "grad_norm": 5.854665058501221, "learning_rate": 5.869966194997141e-06, "loss": 0.6716, "step": 16098 }, { "epoch": 0.46, "grad_norm": 8.056172275686182, "learning_rate": 5.869509506560765e-06, "loss": 1.1069, "step": 16099 }, { "epoch": 0.46, "grad_norm": 5.414319218085436, "learning_rate": 5.869052810644108e-06, "loss": 0.495, "step": 16100 }, { "epoch": 0.46, "grad_norm": 8.979988739074578, "learning_rate": 5.8685961072510965e-06, "loss": 0.4864, "step": 16101 }, { "epoch": 0.46, "grad_norm": 5.107523404747902, "learning_rate": 5.868139396385657e-06, "loss": 0.2932, "step": 16102 }, { "epoch": 0.46, "grad_norm": 5.175559445792775, "learning_rate": 5.867682678051724e-06, "loss": 0.4026, "step": 16103 }, { "epoch": 0.46, "grad_norm": 4.559913581481008, "learning_rate": 5.867225952253223e-06, "loss": 0.5316, "step": 16104 }, { "epoch": 0.46, "grad_norm": 22.02375178875714, "learning_rate": 5.866769218994084e-06, "loss": 0.6356, "step": 16105 }, { "epoch": 0.46, "grad_norm": 4.31990529464298, "learning_rate": 5.866312478278236e-06, "loss": 0.5473, "step": 16106 }, { "epoch": 0.46, "grad_norm": 5.936525706613781, "learning_rate": 5.865855730109609e-06, "loss": 0.9577, "step": 16107 }, { "epoch": 0.46, "grad_norm": 5.715137946065144, "learning_rate": 5.86539897449213e-06, "loss": 0.4572, "step": 16108 }, { "epoch": 0.46, "grad_norm": 2.46733832740828, "learning_rate": 5.8649422114297314e-06, "loss": 0.2173, "step": 16109 }, { "epoch": 0.46, "grad_norm": 6.14648344405379, "learning_rate": 5.8644854409263415e-06, "loss": 0.8978, "step": 16110 }, { "epoch": 0.46, "grad_norm": 7.956548047832261, "learning_rate": 5.864028662985889e-06, "loss": 0.3588, "step": 16111 }, { "epoch": 0.46, "grad_norm": 7.2502720880424185, "learning_rate": 5.863571877612305e-06, "loss": 0.8133, "step": 16112 }, { "epoch": 0.46, "grad_norm": 5.331061351161914, "learning_rate": 5.863115084809519e-06, "loss": 0.9732, "step": 16113 }, { "epoch": 0.46, "grad_norm": 10.400376366260765, "learning_rate": 5.86265828458146e-06, "loss": 0.2725, "step": 16114 }, { "epoch": 0.46, "grad_norm": 6.496232114454924, "learning_rate": 5.862201476932057e-06, "loss": 0.7046, "step": 16115 }, { "epoch": 0.46, "grad_norm": 3.044073430762184, "learning_rate": 5.861744661865241e-06, "loss": 0.2568, "step": 16116 }, { "epoch": 0.46, "grad_norm": 3.6165760421532354, "learning_rate": 5.8612878393849415e-06, "loss": 0.3854, "step": 16117 }, { "epoch": 0.46, "grad_norm": 8.020596990364753, "learning_rate": 5.860831009495089e-06, "loss": 0.5686, "step": 16118 }, { "epoch": 0.46, "grad_norm": 5.070502937396544, "learning_rate": 5.860374172199614e-06, "loss": 0.3746, "step": 16119 }, { "epoch": 0.46, "grad_norm": 9.597527173563513, "learning_rate": 5.859917327502446e-06, "loss": 0.6181, "step": 16120 }, { "epoch": 0.46, "grad_norm": 3.8223593753473026, "learning_rate": 5.8594604754075154e-06, "loss": 0.5117, "step": 16121 }, { "epoch": 0.46, "grad_norm": 4.001276766143195, "learning_rate": 5.8590036159187515e-06, "loss": 0.4414, "step": 16122 }, { "epoch": 0.46, "grad_norm": 6.622961126440078, "learning_rate": 5.858546749040085e-06, "loss": 0.2578, "step": 16123 }, { "epoch": 0.46, "grad_norm": 2.7091174433341467, "learning_rate": 5.858089874775448e-06, "loss": 0.1529, "step": 16124 }, { "epoch": 0.46, "grad_norm": 5.9798469643802115, "learning_rate": 5.857632993128768e-06, "loss": 0.3559, "step": 16125 }, { "epoch": 0.46, "grad_norm": 11.227621454615274, "learning_rate": 5.857176104103975e-06, "loss": 0.8997, "step": 16126 }, { "epoch": 0.46, "grad_norm": 6.523818172837599, "learning_rate": 5.856719207705005e-06, "loss": 0.8067, "step": 16127 }, { "epoch": 0.46, "grad_norm": 5.70409913823031, "learning_rate": 5.856262303935784e-06, "loss": 0.7317, "step": 16128 }, { "epoch": 0.46, "grad_norm": 6.016389079924532, "learning_rate": 5.855805392800244e-06, "loss": 0.4575, "step": 16129 }, { "epoch": 0.46, "grad_norm": 8.087302331410257, "learning_rate": 5.8553484743023145e-06, "loss": 0.4569, "step": 16130 }, { "epoch": 0.46, "grad_norm": 3.485855985241553, "learning_rate": 5.854891548445929e-06, "loss": 0.3983, "step": 16131 }, { "epoch": 0.46, "grad_norm": 6.319930754686622, "learning_rate": 5.854434615235016e-06, "loss": 0.4538, "step": 16132 }, { "epoch": 0.46, "grad_norm": 6.183043368070685, "learning_rate": 5.853977674673507e-06, "loss": 0.6723, "step": 16133 }, { "epoch": 0.46, "grad_norm": 3.9677531574922438, "learning_rate": 5.853520726765332e-06, "loss": 0.7418, "step": 16134 }, { "epoch": 0.46, "grad_norm": 11.463848802587695, "learning_rate": 5.853063771514426e-06, "loss": 0.6219, "step": 16135 }, { "epoch": 0.46, "grad_norm": 1.9803067654608972, "learning_rate": 5.852606808924714e-06, "loss": 0.116, "step": 16136 }, { "epoch": 0.46, "grad_norm": 5.7092728595752895, "learning_rate": 5.852149839000134e-06, "loss": 0.5819, "step": 16137 }, { "epoch": 0.46, "grad_norm": 3.354253066867354, "learning_rate": 5.851692861744614e-06, "loss": 0.4527, "step": 16138 }, { "epoch": 0.46, "grad_norm": 8.776008100285091, "learning_rate": 5.851235877162083e-06, "loss": 0.6406, "step": 16139 }, { "epoch": 0.46, "grad_norm": 3.8087083535191355, "learning_rate": 5.850778885256474e-06, "loss": 0.5956, "step": 16140 }, { "epoch": 0.46, "grad_norm": 8.914415330782903, "learning_rate": 5.850321886031721e-06, "loss": 0.5563, "step": 16141 }, { "epoch": 0.46, "grad_norm": 4.561115472868676, "learning_rate": 5.849864879491753e-06, "loss": 0.5355, "step": 16142 }, { "epoch": 0.46, "grad_norm": 1.7542873554740168, "learning_rate": 5.8494078656405e-06, "loss": 0.149, "step": 16143 }, { "epoch": 0.46, "grad_norm": 7.989393952341317, "learning_rate": 5.8489508444818984e-06, "loss": 0.7702, "step": 16144 }, { "epoch": 0.46, "grad_norm": 1.5834882267260444, "learning_rate": 5.848493816019877e-06, "loss": 0.1132, "step": 16145 }, { "epoch": 0.46, "grad_norm": 6.070694409388651, "learning_rate": 5.848036780258366e-06, "loss": 0.5589, "step": 16146 }, { "epoch": 0.46, "grad_norm": 6.657665585712306, "learning_rate": 5.8475797372013e-06, "loss": 0.5158, "step": 16147 }, { "epoch": 0.46, "grad_norm": 5.935125578891068, "learning_rate": 5.847122686852611e-06, "loss": 0.6295, "step": 16148 }, { "epoch": 0.46, "grad_norm": 6.1982761909221695, "learning_rate": 5.846665629216228e-06, "loss": 0.3398, "step": 16149 }, { "epoch": 0.46, "grad_norm": 13.822253619775413, "learning_rate": 5.8462085642960865e-06, "loss": 0.489, "step": 16150 }, { "epoch": 0.46, "grad_norm": 3.522606576183117, "learning_rate": 5.845751492096115e-06, "loss": 0.2097, "step": 16151 }, { "epoch": 0.46, "grad_norm": 5.496661386682039, "learning_rate": 5.845294412620249e-06, "loss": 0.6225, "step": 16152 }, { "epoch": 0.46, "grad_norm": 4.324772303987634, "learning_rate": 5.844837325872419e-06, "loss": 0.3732, "step": 16153 }, { "epoch": 0.46, "grad_norm": 3.7014261203696126, "learning_rate": 5.844380231856558e-06, "loss": 0.3004, "step": 16154 }, { "epoch": 0.46, "grad_norm": 4.332022884488091, "learning_rate": 5.843923130576596e-06, "loss": 0.5061, "step": 16155 }, { "epoch": 0.46, "grad_norm": 8.012881874827647, "learning_rate": 5.843466022036469e-06, "loss": 0.7114, "step": 16156 }, { "epoch": 0.46, "grad_norm": 7.7010143553190495, "learning_rate": 5.8430089062401064e-06, "loss": 0.9046, "step": 16157 }, { "epoch": 0.46, "grad_norm": 4.7991603414939075, "learning_rate": 5.842551783191444e-06, "loss": 0.3596, "step": 16158 }, { "epoch": 0.46, "grad_norm": 5.667698438996022, "learning_rate": 5.842094652894409e-06, "loss": 0.4189, "step": 16159 }, { "epoch": 0.46, "grad_norm": 6.009177302194756, "learning_rate": 5.841637515352939e-06, "loss": 0.414, "step": 16160 }, { "epoch": 0.46, "grad_norm": 3.6929868351877033, "learning_rate": 5.841180370570967e-06, "loss": 0.3919, "step": 16161 }, { "epoch": 0.46, "grad_norm": 9.124165326966537, "learning_rate": 5.840723218552422e-06, "loss": 0.3332, "step": 16162 }, { "epoch": 0.46, "grad_norm": 3.8343510969764676, "learning_rate": 5.840266059301238e-06, "loss": 0.3781, "step": 16163 }, { "epoch": 0.46, "grad_norm": 2.8244871589725298, "learning_rate": 5.83980889282135e-06, "loss": 0.2955, "step": 16164 }, { "epoch": 0.46, "grad_norm": 9.104175163166147, "learning_rate": 5.839351719116689e-06, "loss": 1.2974, "step": 16165 }, { "epoch": 0.46, "grad_norm": 6.056067367335228, "learning_rate": 5.8388945381911885e-06, "loss": 0.2858, "step": 16166 }, { "epoch": 0.46, "grad_norm": 5.779754362727645, "learning_rate": 5.838437350048782e-06, "loss": 0.5529, "step": 16167 }, { "epoch": 0.46, "grad_norm": 5.8299753764697435, "learning_rate": 5.837980154693401e-06, "loss": 0.6028, "step": 16168 }, { "epoch": 0.46, "grad_norm": 3.890289659870498, "learning_rate": 5.837522952128981e-06, "loss": 0.4048, "step": 16169 }, { "epoch": 0.46, "grad_norm": 7.231647479871536, "learning_rate": 5.837065742359454e-06, "loss": 1.0011, "step": 16170 }, { "epoch": 0.46, "grad_norm": 7.614591404553548, "learning_rate": 5.836608525388754e-06, "loss": 0.6144, "step": 16171 }, { "epoch": 0.46, "grad_norm": 7.224068536080569, "learning_rate": 5.8361513012208126e-06, "loss": 0.7541, "step": 16172 }, { "epoch": 0.46, "grad_norm": 7.160719895640949, "learning_rate": 5.835694069859564e-06, "loss": 1.018, "step": 16173 }, { "epoch": 0.46, "grad_norm": 3.599652609355924, "learning_rate": 5.835236831308943e-06, "loss": 0.3101, "step": 16174 }, { "epoch": 0.46, "grad_norm": 3.9066792976991085, "learning_rate": 5.8347795855728835e-06, "loss": 0.4193, "step": 16175 }, { "epoch": 0.46, "grad_norm": 7.2396216595044605, "learning_rate": 5.834322332655317e-06, "loss": 0.6477, "step": 16176 }, { "epoch": 0.46, "grad_norm": 3.3600434192764177, "learning_rate": 5.833865072560177e-06, "loss": 0.2001, "step": 16177 }, { "epoch": 0.46, "grad_norm": 6.7063545325558955, "learning_rate": 5.8334078052914015e-06, "loss": 0.3508, "step": 16178 }, { "epoch": 0.46, "grad_norm": 6.160627638560943, "learning_rate": 5.832950530852919e-06, "loss": 0.3424, "step": 16179 }, { "epoch": 0.46, "grad_norm": 4.655264730987505, "learning_rate": 5.832493249248666e-06, "loss": 0.4735, "step": 16180 }, { "epoch": 0.46, "grad_norm": 3.0553072327557405, "learning_rate": 5.832035960482577e-06, "loss": 0.3421, "step": 16181 }, { "epoch": 0.46, "grad_norm": 5.418393383149033, "learning_rate": 5.831578664558585e-06, "loss": 0.5396, "step": 16182 }, { "epoch": 0.46, "grad_norm": 5.46219220025324, "learning_rate": 5.831121361480622e-06, "loss": 0.5337, "step": 16183 }, { "epoch": 0.46, "grad_norm": 3.3465211205698715, "learning_rate": 5.830664051252627e-06, "loss": 0.4254, "step": 16184 }, { "epoch": 0.46, "grad_norm": 5.786742781667352, "learning_rate": 5.830206733878529e-06, "loss": 0.3763, "step": 16185 }, { "epoch": 0.46, "grad_norm": 5.7688774301458885, "learning_rate": 5.829749409362266e-06, "loss": 0.3752, "step": 16186 }, { "epoch": 0.46, "grad_norm": 5.70397909379421, "learning_rate": 5.829292077707771e-06, "loss": 0.2078, "step": 16187 }, { "epoch": 0.46, "grad_norm": 5.18625512180549, "learning_rate": 5.8288347389189795e-06, "loss": 0.4789, "step": 16188 }, { "epoch": 0.46, "grad_norm": 12.096720781577542, "learning_rate": 5.828377392999823e-06, "loss": 0.3273, "step": 16189 }, { "epoch": 0.46, "grad_norm": 5.384355741832829, "learning_rate": 5.827920039954238e-06, "loss": 0.2809, "step": 16190 }, { "epoch": 0.46, "grad_norm": 4.386572926272411, "learning_rate": 5.827462679786159e-06, "loss": 0.4153, "step": 16191 }, { "epoch": 0.46, "grad_norm": 3.2203287678566332, "learning_rate": 5.827005312499521e-06, "loss": 0.282, "step": 16192 }, { "epoch": 0.46, "grad_norm": 4.424451151716674, "learning_rate": 5.826547938098257e-06, "loss": 0.5629, "step": 16193 }, { "epoch": 0.46, "grad_norm": 7.240706439459594, "learning_rate": 5.8260905565863036e-06, "loss": 0.3879, "step": 16194 }, { "epoch": 0.46, "grad_norm": 3.7186741540691415, "learning_rate": 5.825633167967595e-06, "loss": 0.2644, "step": 16195 }, { "epoch": 0.46, "grad_norm": 9.343058378719682, "learning_rate": 5.825175772246066e-06, "loss": 0.3395, "step": 16196 }, { "epoch": 0.46, "grad_norm": 4.41100557864573, "learning_rate": 5.8247183694256505e-06, "loss": 0.5224, "step": 16197 }, { "epoch": 0.46, "grad_norm": 4.409912535130927, "learning_rate": 5.824260959510286e-06, "loss": 0.6296, "step": 16198 }, { "epoch": 0.46, "grad_norm": 6.54145512167956, "learning_rate": 5.823803542503905e-06, "loss": 0.7623, "step": 16199 }, { "epoch": 0.46, "grad_norm": 6.55841925765498, "learning_rate": 5.823346118410442e-06, "loss": 0.6504, "step": 16200 }, { "epoch": 0.46, "grad_norm": 6.846734672024169, "learning_rate": 5.822888687233836e-06, "loss": 0.791, "step": 16201 }, { "epoch": 0.46, "grad_norm": 7.783768411016982, "learning_rate": 5.82243124897802e-06, "loss": 0.4075, "step": 16202 }, { "epoch": 0.46, "grad_norm": 2.861479469436963, "learning_rate": 5.821973803646928e-06, "loss": 0.1805, "step": 16203 }, { "epoch": 0.46, "grad_norm": 6.491124990028264, "learning_rate": 5.821516351244496e-06, "loss": 0.8097, "step": 16204 }, { "epoch": 0.46, "grad_norm": 5.1297484474151895, "learning_rate": 5.8210588917746626e-06, "loss": 0.2754, "step": 16205 }, { "epoch": 0.46, "grad_norm": 6.111345046676172, "learning_rate": 5.820601425241359e-06, "loss": 0.353, "step": 16206 }, { "epoch": 0.46, "grad_norm": 4.82762328568792, "learning_rate": 5.820143951648522e-06, "loss": 0.4543, "step": 16207 }, { "epoch": 0.46, "grad_norm": 4.360252428796797, "learning_rate": 5.819686471000089e-06, "loss": 0.4207, "step": 16208 }, { "epoch": 0.46, "grad_norm": 5.580997357945926, "learning_rate": 5.819228983299992e-06, "loss": 0.6509, "step": 16209 }, { "epoch": 0.46, "grad_norm": 3.174834500902965, "learning_rate": 5.81877148855217e-06, "loss": 0.3441, "step": 16210 }, { "epoch": 0.46, "grad_norm": 8.427109940293931, "learning_rate": 5.818313986760557e-06, "loss": 0.7762, "step": 16211 }, { "epoch": 0.46, "grad_norm": 3.7484605490328478, "learning_rate": 5.817856477929091e-06, "loss": 0.1933, "step": 16212 }, { "epoch": 0.46, "grad_norm": 5.0391745384831514, "learning_rate": 5.817398962061705e-06, "loss": 0.477, "step": 16213 }, { "epoch": 0.46, "grad_norm": 3.1703158149683106, "learning_rate": 5.816941439162338e-06, "loss": 0.4277, "step": 16214 }, { "epoch": 0.46, "grad_norm": 4.655118561575705, "learning_rate": 5.816483909234925e-06, "loss": 0.4466, "step": 16215 }, { "epoch": 0.46, "grad_norm": 4.909804544773366, "learning_rate": 5.816026372283399e-06, "loss": 0.4635, "step": 16216 }, { "epoch": 0.46, "grad_norm": 2.8432699364201612, "learning_rate": 5.815568828311699e-06, "loss": 0.1715, "step": 16217 }, { "epoch": 0.46, "grad_norm": 7.9003008145389675, "learning_rate": 5.815111277323762e-06, "loss": 0.6218, "step": 16218 }, { "epoch": 0.46, "grad_norm": 9.859651395426882, "learning_rate": 5.814653719323522e-06, "loss": 0.7078, "step": 16219 }, { "epoch": 0.46, "grad_norm": 7.73050604488866, "learning_rate": 5.814196154314916e-06, "loss": 0.4308, "step": 16220 }, { "epoch": 0.46, "grad_norm": 4.348434858359036, "learning_rate": 5.813738582301881e-06, "loss": 0.4146, "step": 16221 }, { "epoch": 0.46, "grad_norm": 5.425368796615089, "learning_rate": 5.813281003288354e-06, "loss": 0.3726, "step": 16222 }, { "epoch": 0.46, "grad_norm": 4.72612131124674, "learning_rate": 5.812823417278271e-06, "loss": 0.6215, "step": 16223 }, { "epoch": 0.46, "grad_norm": 5.820447513115266, "learning_rate": 5.812365824275568e-06, "loss": 0.3798, "step": 16224 }, { "epoch": 0.46, "grad_norm": 7.466896305923928, "learning_rate": 5.811908224284181e-06, "loss": 0.6506, "step": 16225 }, { "epoch": 0.46, "grad_norm": 11.164358317978767, "learning_rate": 5.811450617308047e-06, "loss": 0.5763, "step": 16226 }, { "epoch": 0.46, "grad_norm": 12.622622728784325, "learning_rate": 5.8109930033511045e-06, "loss": 0.8692, "step": 16227 }, { "epoch": 0.46, "grad_norm": 5.300860995407543, "learning_rate": 5.8105353824172894e-06, "loss": 0.8942, "step": 16228 }, { "epoch": 0.46, "grad_norm": 6.311317125289482, "learning_rate": 5.810077754510537e-06, "loss": 0.5925, "step": 16229 }, { "epoch": 0.46, "grad_norm": 5.179564576682607, "learning_rate": 5.809620119634786e-06, "loss": 0.3691, "step": 16230 }, { "epoch": 0.46, "grad_norm": 6.727290558264156, "learning_rate": 5.809162477793972e-06, "loss": 0.3927, "step": 16231 }, { "epoch": 0.46, "grad_norm": 4.63917488021128, "learning_rate": 5.808704828992034e-06, "loss": 0.5317, "step": 16232 }, { "epoch": 0.46, "grad_norm": 4.2389553114165865, "learning_rate": 5.808247173232906e-06, "loss": 0.5743, "step": 16233 }, { "epoch": 0.46, "grad_norm": 5.17576496625534, "learning_rate": 5.807789510520529e-06, "loss": 0.2923, "step": 16234 }, { "epoch": 0.46, "grad_norm": 11.647301509121403, "learning_rate": 5.807331840858839e-06, "loss": 0.3515, "step": 16235 }, { "epoch": 0.46, "grad_norm": 6.067501954155769, "learning_rate": 5.80687416425177e-06, "loss": 0.7287, "step": 16236 }, { "epoch": 0.46, "grad_norm": 6.321710646449783, "learning_rate": 5.806416480703263e-06, "loss": 0.3246, "step": 16237 }, { "epoch": 0.47, "grad_norm": 5.448454860072133, "learning_rate": 5.805958790217256e-06, "loss": 0.5963, "step": 16238 }, { "epoch": 0.47, "grad_norm": 7.030559658493291, "learning_rate": 5.805501092797683e-06, "loss": 0.6349, "step": 16239 }, { "epoch": 0.47, "grad_norm": 4.846272703781255, "learning_rate": 5.805043388448483e-06, "loss": 0.2871, "step": 16240 }, { "epoch": 0.47, "grad_norm": 6.023288136175271, "learning_rate": 5.804585677173595e-06, "loss": 0.6066, "step": 16241 }, { "epoch": 0.47, "grad_norm": 6.471162834383606, "learning_rate": 5.804127958976955e-06, "loss": 0.2068, "step": 16242 }, { "epoch": 0.47, "grad_norm": 2.9680673517093816, "learning_rate": 5.8036702338625005e-06, "loss": 0.233, "step": 16243 }, { "epoch": 0.47, "grad_norm": 6.862476699039111, "learning_rate": 5.80321250183417e-06, "loss": 0.3986, "step": 16244 }, { "epoch": 0.47, "grad_norm": 4.569981463580445, "learning_rate": 5.802754762895904e-06, "loss": 0.4741, "step": 16245 }, { "epoch": 0.47, "grad_norm": 4.256302143959452, "learning_rate": 5.802297017051634e-06, "loss": 0.7837, "step": 16246 }, { "epoch": 0.47, "grad_norm": 4.8080884852399475, "learning_rate": 5.801839264305303e-06, "loss": 0.254, "step": 16247 }, { "epoch": 0.47, "grad_norm": 7.232728643089878, "learning_rate": 5.801381504660848e-06, "loss": 0.6102, "step": 16248 }, { "epoch": 0.47, "grad_norm": 4.664053554302691, "learning_rate": 5.800923738122206e-06, "loss": 0.3245, "step": 16249 }, { "epoch": 0.47, "grad_norm": 6.495705579766058, "learning_rate": 5.800465964693315e-06, "loss": 0.277, "step": 16250 }, { "epoch": 0.47, "grad_norm": 4.60515111292153, "learning_rate": 5.800008184378114e-06, "loss": 0.4344, "step": 16251 }, { "epoch": 0.47, "grad_norm": 6.758125730101901, "learning_rate": 5.799550397180543e-06, "loss": 0.3701, "step": 16252 }, { "epoch": 0.47, "grad_norm": 10.464954701711543, "learning_rate": 5.799092603104537e-06, "loss": 0.715, "step": 16253 }, { "epoch": 0.47, "grad_norm": 8.449467740422206, "learning_rate": 5.798634802154035e-06, "loss": 0.7545, "step": 16254 }, { "epoch": 0.47, "grad_norm": 3.921061800051116, "learning_rate": 5.798176994332978e-06, "loss": 0.253, "step": 16255 }, { "epoch": 0.47, "grad_norm": 8.649170662697426, "learning_rate": 5.797719179645302e-06, "loss": 0.3367, "step": 16256 }, { "epoch": 0.47, "grad_norm": 5.330982683642015, "learning_rate": 5.797261358094946e-06, "loss": 0.5088, "step": 16257 }, { "epoch": 0.47, "grad_norm": 6.4240551888409945, "learning_rate": 5.796803529685847e-06, "loss": 0.6044, "step": 16258 }, { "epoch": 0.47, "grad_norm": 4.404627589223952, "learning_rate": 5.796345694421947e-06, "loss": 0.2608, "step": 16259 }, { "epoch": 0.47, "grad_norm": 7.273451791784092, "learning_rate": 5.795887852307183e-06, "loss": 0.3887, "step": 16260 }, { "epoch": 0.47, "grad_norm": 6.4286359480995845, "learning_rate": 5.795430003345492e-06, "loss": 0.2779, "step": 16261 }, { "epoch": 0.47, "grad_norm": 9.918995693267002, "learning_rate": 5.794972147540817e-06, "loss": 0.6306, "step": 16262 }, { "epoch": 0.47, "grad_norm": 9.298157983295628, "learning_rate": 5.794514284897093e-06, "loss": 0.5093, "step": 16263 }, { "epoch": 0.47, "grad_norm": 10.154522652087003, "learning_rate": 5.7940564154182624e-06, "loss": 1.5115, "step": 16264 }, { "epoch": 0.47, "grad_norm": 6.929529508662603, "learning_rate": 5.793598539108261e-06, "loss": 0.5559, "step": 16265 }, { "epoch": 0.47, "grad_norm": 2.01404753029101, "learning_rate": 5.793140655971028e-06, "loss": 0.1064, "step": 16266 }, { "epoch": 0.47, "grad_norm": 4.795383407849109, "learning_rate": 5.792682766010505e-06, "loss": 0.5861, "step": 16267 }, { "epoch": 0.47, "grad_norm": 4.61327807162286, "learning_rate": 5.792224869230629e-06, "loss": 0.3005, "step": 16268 }, { "epoch": 0.47, "grad_norm": 4.568014235422564, "learning_rate": 5.7917669656353405e-06, "loss": 0.6643, "step": 16269 }, { "epoch": 0.47, "grad_norm": 3.6585002071812753, "learning_rate": 5.791309055228578e-06, "loss": 0.6639, "step": 16270 }, { "epoch": 0.47, "grad_norm": 5.0815068178860265, "learning_rate": 5.7908511380142804e-06, "loss": 0.6699, "step": 16271 }, { "epoch": 0.47, "grad_norm": 3.180767953620678, "learning_rate": 5.790393213996391e-06, "loss": 0.2229, "step": 16272 }, { "epoch": 0.47, "grad_norm": 7.448846320617624, "learning_rate": 5.789935283178842e-06, "loss": 0.7698, "step": 16273 }, { "epoch": 0.47, "grad_norm": 5.301355992799978, "learning_rate": 5.789477345565578e-06, "loss": 0.2606, "step": 16274 }, { "epoch": 0.47, "grad_norm": 4.56735756055349, "learning_rate": 5.789019401160539e-06, "loss": 0.1009, "step": 16275 }, { "epoch": 0.47, "grad_norm": 5.279176203300857, "learning_rate": 5.7885614499676625e-06, "loss": 0.3002, "step": 16276 }, { "epoch": 0.47, "grad_norm": 4.96466597171999, "learning_rate": 5.788103491990889e-06, "loss": 0.43, "step": 16277 }, { "epoch": 0.47, "grad_norm": 3.388386000301949, "learning_rate": 5.787645527234158e-06, "loss": 0.3677, "step": 16278 }, { "epoch": 0.47, "grad_norm": 6.001222962674316, "learning_rate": 5.787187555701411e-06, "loss": 0.5307, "step": 16279 }, { "epoch": 0.47, "grad_norm": 7.139278827544782, "learning_rate": 5.786729577396585e-06, "loss": 0.5671, "step": 16280 }, { "epoch": 0.47, "grad_norm": 9.944984352709398, "learning_rate": 5.786271592323621e-06, "loss": 0.6558, "step": 16281 }, { "epoch": 0.47, "grad_norm": 3.0583833351690535, "learning_rate": 5.78581360048646e-06, "loss": 0.8221, "step": 16282 }, { "epoch": 0.47, "grad_norm": 3.0215004524143505, "learning_rate": 5.78535560188904e-06, "loss": 0.1949, "step": 16283 }, { "epoch": 0.47, "grad_norm": 4.137799720626306, "learning_rate": 5.784897596535304e-06, "loss": 0.4548, "step": 16284 }, { "epoch": 0.47, "grad_norm": 4.487296452063308, "learning_rate": 5.784439584429191e-06, "loss": 0.3255, "step": 16285 }, { "epoch": 0.47, "grad_norm": 5.389116557163439, "learning_rate": 5.783981565574639e-06, "loss": 0.5765, "step": 16286 }, { "epoch": 0.47, "grad_norm": 3.656416962552764, "learning_rate": 5.783523539975592e-06, "loss": 0.6103, "step": 16287 }, { "epoch": 0.47, "grad_norm": 4.387808931782726, "learning_rate": 5.783065507635986e-06, "loss": 0.3616, "step": 16288 }, { "epoch": 0.47, "grad_norm": 5.328154354070533, "learning_rate": 5.782607468559767e-06, "loss": 0.6047, "step": 16289 }, { "epoch": 0.47, "grad_norm": 4.560144417299568, "learning_rate": 5.78214942275087e-06, "loss": 0.2226, "step": 16290 }, { "epoch": 0.47, "grad_norm": 4.257289805395163, "learning_rate": 5.781691370213238e-06, "loss": 0.4162, "step": 16291 }, { "epoch": 0.47, "grad_norm": 5.016194961820067, "learning_rate": 5.781233310950812e-06, "loss": 0.5182, "step": 16292 }, { "epoch": 0.47, "grad_norm": 5.851863186168469, "learning_rate": 5.780775244967533e-06, "loss": 0.4821, "step": 16293 }, { "epoch": 0.47, "grad_norm": 9.295943090069311, "learning_rate": 5.780317172267339e-06, "loss": 0.4202, "step": 16294 }, { "epoch": 0.47, "grad_norm": 3.3573798353970705, "learning_rate": 5.779859092854172e-06, "loss": 0.4055, "step": 16295 }, { "epoch": 0.47, "grad_norm": 3.193754977765469, "learning_rate": 5.779401006731976e-06, "loss": 0.3761, "step": 16296 }, { "epoch": 0.47, "grad_norm": 12.51442115508839, "learning_rate": 5.778942913904687e-06, "loss": 1.0486, "step": 16297 }, { "epoch": 0.47, "grad_norm": 10.548357281544428, "learning_rate": 5.778484814376249e-06, "loss": 1.0721, "step": 16298 }, { "epoch": 0.47, "grad_norm": 7.74871326347365, "learning_rate": 5.7780267081506e-06, "loss": 0.783, "step": 16299 }, { "epoch": 0.47, "grad_norm": 2.570130362043163, "learning_rate": 5.777568595231685e-06, "loss": 0.3853, "step": 16300 }, { "epoch": 0.47, "grad_norm": 9.303539958050248, "learning_rate": 5.777110475623442e-06, "loss": 0.6431, "step": 16301 }, { "epoch": 0.47, "grad_norm": 6.8139168552087925, "learning_rate": 5.776652349329814e-06, "loss": 0.4436, "step": 16302 }, { "epoch": 0.47, "grad_norm": 3.898205400487623, "learning_rate": 5.7761942163547425e-06, "loss": 0.3435, "step": 16303 }, { "epoch": 0.47, "grad_norm": 8.18105895633155, "learning_rate": 5.775736076702165e-06, "loss": 0.6066, "step": 16304 }, { "epoch": 0.47, "grad_norm": 3.488047644299618, "learning_rate": 5.775277930376027e-06, "loss": 0.1787, "step": 16305 }, { "epoch": 0.47, "grad_norm": 4.60074625078059, "learning_rate": 5.77481977738027e-06, "loss": 0.5367, "step": 16306 }, { "epoch": 0.47, "grad_norm": 5.069793674627182, "learning_rate": 5.774361617718831e-06, "loss": 0.273, "step": 16307 }, { "epoch": 0.47, "grad_norm": 9.204897284071938, "learning_rate": 5.773903451395655e-06, "loss": 0.6634, "step": 16308 }, { "epoch": 0.47, "grad_norm": 2.0190723643071546, "learning_rate": 5.773445278414684e-06, "loss": 0.0951, "step": 16309 }, { "epoch": 0.47, "grad_norm": 4.980138431141746, "learning_rate": 5.772987098779859e-06, "loss": 0.5882, "step": 16310 }, { "epoch": 0.47, "grad_norm": 3.8028617880645355, "learning_rate": 5.7725289124951205e-06, "loss": 0.2942, "step": 16311 }, { "epoch": 0.47, "grad_norm": 7.478894992948607, "learning_rate": 5.772070719564411e-06, "loss": 0.739, "step": 16312 }, { "epoch": 0.47, "grad_norm": 9.127291248016745, "learning_rate": 5.771612519991672e-06, "loss": 0.6679, "step": 16313 }, { "epoch": 0.47, "grad_norm": 3.8177757656218922, "learning_rate": 5.771154313780845e-06, "loss": 0.2465, "step": 16314 }, { "epoch": 0.47, "grad_norm": 5.867606766472561, "learning_rate": 5.770696100935872e-06, "loss": 0.7083, "step": 16315 }, { "epoch": 0.47, "grad_norm": 5.361573571873347, "learning_rate": 5.770237881460695e-06, "loss": 0.6293, "step": 16316 }, { "epoch": 0.47, "grad_norm": 3.98606815639897, "learning_rate": 5.769779655359258e-06, "loss": 0.502, "step": 16317 }, { "epoch": 0.47, "grad_norm": 10.38313558174852, "learning_rate": 5.7693214226355e-06, "loss": 0.7595, "step": 16318 }, { "epoch": 0.47, "grad_norm": 6.794957524069319, "learning_rate": 5.768863183293366e-06, "loss": 0.3578, "step": 16319 }, { "epoch": 0.47, "grad_norm": 4.3299347161620085, "learning_rate": 5.768404937336796e-06, "loss": 0.7019, "step": 16320 }, { "epoch": 0.47, "grad_norm": 4.950494783892482, "learning_rate": 5.767946684769733e-06, "loss": 0.4991, "step": 16321 }, { "epoch": 0.47, "grad_norm": 3.752557486412102, "learning_rate": 5.767488425596118e-06, "loss": 0.2467, "step": 16322 }, { "epoch": 0.47, "grad_norm": 10.100406683863941, "learning_rate": 5.767030159819897e-06, "loss": 0.6588, "step": 16323 }, { "epoch": 0.47, "grad_norm": 9.879746467333515, "learning_rate": 5.7665718874450076e-06, "loss": 0.3639, "step": 16324 }, { "epoch": 0.47, "grad_norm": 6.70670843486154, "learning_rate": 5.7661136084753955e-06, "loss": 0.2951, "step": 16325 }, { "epoch": 0.47, "grad_norm": 5.650264122789977, "learning_rate": 5.765655322915004e-06, "loss": 0.4409, "step": 16326 }, { "epoch": 0.47, "grad_norm": 7.221051926632575, "learning_rate": 5.765197030767772e-06, "loss": 0.4042, "step": 16327 }, { "epoch": 0.47, "grad_norm": 3.5159376556393735, "learning_rate": 5.764738732037645e-06, "loss": 0.3266, "step": 16328 }, { "epoch": 0.47, "grad_norm": 7.281871392580919, "learning_rate": 5.764280426728564e-06, "loss": 0.6165, "step": 16329 }, { "epoch": 0.47, "grad_norm": 9.770869978217169, "learning_rate": 5.763822114844474e-06, "loss": 0.7416, "step": 16330 }, { "epoch": 0.47, "grad_norm": 7.392012322768554, "learning_rate": 5.763363796389315e-06, "loss": 0.4695, "step": 16331 }, { "epoch": 0.47, "grad_norm": 4.381967337270278, "learning_rate": 5.762905471367032e-06, "loss": 0.4518, "step": 16332 }, { "epoch": 0.47, "grad_norm": 6.194471343038611, "learning_rate": 5.762447139781566e-06, "loss": 0.5883, "step": 16333 }, { "epoch": 0.47, "grad_norm": 5.4331283973394475, "learning_rate": 5.761988801636862e-06, "loss": 0.558, "step": 16334 }, { "epoch": 0.47, "grad_norm": 7.2491667696197855, "learning_rate": 5.761530456936863e-06, "loss": 0.7758, "step": 16335 }, { "epoch": 0.47, "grad_norm": 4.097313952990623, "learning_rate": 5.76107210568551e-06, "loss": 0.4319, "step": 16336 }, { "epoch": 0.47, "grad_norm": 7.99771013866833, "learning_rate": 5.760613747886748e-06, "loss": 0.6838, "step": 16337 }, { "epoch": 0.47, "grad_norm": 3.091329591083572, "learning_rate": 5.760155383544519e-06, "loss": 0.4197, "step": 16338 }, { "epoch": 0.47, "grad_norm": 5.026177923961251, "learning_rate": 5.759697012662768e-06, "loss": 0.4646, "step": 16339 }, { "epoch": 0.47, "grad_norm": 6.377023039313336, "learning_rate": 5.759238635245436e-06, "loss": 0.5493, "step": 16340 }, { "epoch": 0.47, "grad_norm": 7.468944722594019, "learning_rate": 5.758780251296468e-06, "loss": 0.7701, "step": 16341 }, { "epoch": 0.47, "grad_norm": 7.8710866317527515, "learning_rate": 5.7583218608198064e-06, "loss": 0.5155, "step": 16342 }, { "epoch": 0.47, "grad_norm": 6.219498325735615, "learning_rate": 5.7578634638193965e-06, "loss": 0.6817, "step": 16343 }, { "epoch": 0.47, "grad_norm": 3.6182421471204735, "learning_rate": 5.757405060299179e-06, "loss": 0.4308, "step": 16344 }, { "epoch": 0.47, "grad_norm": 5.828651649877378, "learning_rate": 5.7569466502631e-06, "loss": 0.4982, "step": 16345 }, { "epoch": 0.47, "grad_norm": 1.8497692711748246, "learning_rate": 5.7564882337151015e-06, "loss": 0.1971, "step": 16346 }, { "epoch": 0.47, "grad_norm": 6.707123388636191, "learning_rate": 5.756029810659128e-06, "loss": 0.3377, "step": 16347 }, { "epoch": 0.47, "grad_norm": 2.468457192548742, "learning_rate": 5.755571381099123e-06, "loss": 0.3709, "step": 16348 }, { "epoch": 0.47, "grad_norm": 4.786105862806063, "learning_rate": 5.755112945039032e-06, "loss": 0.3544, "step": 16349 }, { "epoch": 0.47, "grad_norm": 5.186341305671659, "learning_rate": 5.754654502482795e-06, "loss": 0.7914, "step": 16350 }, { "epoch": 0.47, "grad_norm": 4.348653865671584, "learning_rate": 5.75419605343436e-06, "loss": 0.174, "step": 16351 }, { "epoch": 0.47, "grad_norm": 1.2582076085936593, "learning_rate": 5.753737597897667e-06, "loss": 0.0815, "step": 16352 }, { "epoch": 0.47, "grad_norm": 4.986081826641447, "learning_rate": 5.7532791358766645e-06, "loss": 0.6426, "step": 16353 }, { "epoch": 0.47, "grad_norm": 5.774803351072066, "learning_rate": 5.752820667375294e-06, "loss": 0.3327, "step": 16354 }, { "epoch": 0.47, "grad_norm": 6.339938366948381, "learning_rate": 5.752362192397499e-06, "loss": 0.6872, "step": 16355 }, { "epoch": 0.47, "grad_norm": 5.436969511967127, "learning_rate": 5.751903710947227e-06, "loss": 0.6168, "step": 16356 }, { "epoch": 0.47, "grad_norm": 7.642621425054633, "learning_rate": 5.751445223028417e-06, "loss": 0.6777, "step": 16357 }, { "epoch": 0.47, "grad_norm": 7.788860750789972, "learning_rate": 5.750986728645017e-06, "loss": 0.6135, "step": 16358 }, { "epoch": 0.47, "grad_norm": 7.798782879674038, "learning_rate": 5.750528227800972e-06, "loss": 0.686, "step": 16359 }, { "epoch": 0.47, "grad_norm": 5.733737754234875, "learning_rate": 5.750069720500224e-06, "loss": 0.8766, "step": 16360 }, { "epoch": 0.47, "grad_norm": 4.859551257895717, "learning_rate": 5.749611206746719e-06, "loss": 0.6768, "step": 16361 }, { "epoch": 0.47, "grad_norm": 7.232246299959227, "learning_rate": 5.7491526865444e-06, "loss": 0.8465, "step": 16362 }, { "epoch": 0.47, "grad_norm": 3.55855527620423, "learning_rate": 5.748694159897215e-06, "loss": 0.3755, "step": 16363 }, { "epoch": 0.47, "grad_norm": 39.54092382946556, "learning_rate": 5.748235626809104e-06, "loss": 0.5966, "step": 16364 }, { "epoch": 0.47, "grad_norm": 4.8247159014457095, "learning_rate": 5.747777087284013e-06, "loss": 0.3155, "step": 16365 }, { "epoch": 0.47, "grad_norm": 10.21941595211506, "learning_rate": 5.747318541325891e-06, "loss": 0.4451, "step": 16366 }, { "epoch": 0.47, "grad_norm": 8.18042565980065, "learning_rate": 5.746859988938678e-06, "loss": 0.6476, "step": 16367 }, { "epoch": 0.47, "grad_norm": 12.01094414423112, "learning_rate": 5.746401430126321e-06, "loss": 0.9861, "step": 16368 }, { "epoch": 0.47, "grad_norm": 8.228667045266683, "learning_rate": 5.745942864892765e-06, "loss": 0.8281, "step": 16369 }, { "epoch": 0.47, "grad_norm": 3.550748080024181, "learning_rate": 5.745484293241953e-06, "loss": 0.7116, "step": 16370 }, { "epoch": 0.47, "grad_norm": 6.567810162675744, "learning_rate": 5.745025715177832e-06, "loss": 0.6174, "step": 16371 }, { "epoch": 0.47, "grad_norm": 5.7393919906522575, "learning_rate": 5.744567130704346e-06, "loss": 0.3937, "step": 16372 }, { "epoch": 0.47, "grad_norm": 4.636348796377044, "learning_rate": 5.744108539825442e-06, "loss": 0.6637, "step": 16373 }, { "epoch": 0.47, "grad_norm": 2.923155804778252, "learning_rate": 5.743649942545062e-06, "loss": 0.2283, "step": 16374 }, { "epoch": 0.47, "grad_norm": 4.197490219277488, "learning_rate": 5.743191338867153e-06, "loss": 0.5496, "step": 16375 }, { "epoch": 0.47, "grad_norm": 5.499977241815818, "learning_rate": 5.742732728795662e-06, "loss": 0.2707, "step": 16376 }, { "epoch": 0.47, "grad_norm": 7.030118961494353, "learning_rate": 5.7422741123345324e-06, "loss": 0.9484, "step": 16377 }, { "epoch": 0.47, "grad_norm": 5.935912632821077, "learning_rate": 5.74181548948771e-06, "loss": 0.9043, "step": 16378 }, { "epoch": 0.47, "grad_norm": 5.380244911958246, "learning_rate": 5.74135686025914e-06, "loss": 0.1843, "step": 16379 }, { "epoch": 0.47, "grad_norm": 8.26517123327093, "learning_rate": 5.740898224652769e-06, "loss": 1.0342, "step": 16380 }, { "epoch": 0.47, "grad_norm": 6.943726022646319, "learning_rate": 5.74043958267254e-06, "loss": 0.8318, "step": 16381 }, { "epoch": 0.47, "grad_norm": 4.632105395437338, "learning_rate": 5.739980934322401e-06, "loss": 0.1046, "step": 16382 }, { "epoch": 0.47, "grad_norm": 4.625439055970254, "learning_rate": 5.7395222796062984e-06, "loss": 0.361, "step": 16383 }, { "epoch": 0.47, "grad_norm": 7.9986115681299506, "learning_rate": 5.739063618528176e-06, "loss": 0.4313, "step": 16384 }, { "epoch": 0.47, "grad_norm": 3.487730728228513, "learning_rate": 5.73860495109198e-06, "loss": 0.2622, "step": 16385 }, { "epoch": 0.47, "grad_norm": 7.979060440418788, "learning_rate": 5.7381462773016575e-06, "loss": 0.3119, "step": 16386 }, { "epoch": 0.47, "grad_norm": 5.528634956434214, "learning_rate": 5.737687597161152e-06, "loss": 0.6561, "step": 16387 }, { "epoch": 0.47, "grad_norm": 6.835597717559968, "learning_rate": 5.737228910674412e-06, "loss": 0.513, "step": 16388 }, { "epoch": 0.47, "grad_norm": 11.147355495627583, "learning_rate": 5.736770217845382e-06, "loss": 0.4488, "step": 16389 }, { "epoch": 0.47, "grad_norm": 4.047934787387049, "learning_rate": 5.7363115186780085e-06, "loss": 0.6661, "step": 16390 }, { "epoch": 0.47, "grad_norm": 3.0124127964995067, "learning_rate": 5.735852813176237e-06, "loss": 0.4743, "step": 16391 }, { "epoch": 0.47, "grad_norm": 7.950478583751631, "learning_rate": 5.7353941013440155e-06, "loss": 0.8968, "step": 16392 }, { "epoch": 0.47, "grad_norm": 7.25045327709852, "learning_rate": 5.734935383185289e-06, "loss": 0.5287, "step": 16393 }, { "epoch": 0.47, "grad_norm": 5.2185191486033276, "learning_rate": 5.7344766587040035e-06, "loss": 0.305, "step": 16394 }, { "epoch": 0.47, "grad_norm": 5.8315919888252, "learning_rate": 5.734017927904106e-06, "loss": 0.4154, "step": 16395 }, { "epoch": 0.47, "grad_norm": 6.842773437500001, "learning_rate": 5.733559190789542e-06, "loss": 0.7053, "step": 16396 }, { "epoch": 0.47, "grad_norm": 8.253095450704095, "learning_rate": 5.73310044736426e-06, "loss": 0.521, "step": 16397 }, { "epoch": 0.47, "grad_norm": 7.015663911503397, "learning_rate": 5.732641697632203e-06, "loss": 0.6384, "step": 16398 }, { "epoch": 0.47, "grad_norm": 5.725876250528363, "learning_rate": 5.7321829415973215e-06, "loss": 0.7288, "step": 16399 }, { "epoch": 0.47, "grad_norm": 3.5729391840604263, "learning_rate": 5.7317241792635605e-06, "loss": 0.3348, "step": 16400 }, { "epoch": 0.47, "grad_norm": 8.45861784608389, "learning_rate": 5.731265410634865e-06, "loss": 0.8663, "step": 16401 }, { "epoch": 0.47, "grad_norm": 3.478458564714396, "learning_rate": 5.730806635715184e-06, "loss": 0.731, "step": 16402 }, { "epoch": 0.47, "grad_norm": 3.7457286350496655, "learning_rate": 5.730347854508465e-06, "loss": 0.2163, "step": 16403 }, { "epoch": 0.47, "grad_norm": 7.474257732778945, "learning_rate": 5.72988906701865e-06, "loss": 0.4441, "step": 16404 }, { "epoch": 0.47, "grad_norm": 3.596367073818564, "learning_rate": 5.729430273249691e-06, "loss": 0.2672, "step": 16405 }, { "epoch": 0.47, "grad_norm": 6.077784158520059, "learning_rate": 5.728971473205533e-06, "loss": 0.6594, "step": 16406 }, { "epoch": 0.47, "grad_norm": 4.056361005583729, "learning_rate": 5.7285126668901235e-06, "loss": 0.7202, "step": 16407 }, { "epoch": 0.47, "grad_norm": 2.419997441156469, "learning_rate": 5.7280538543074085e-06, "loss": 0.35, "step": 16408 }, { "epoch": 0.47, "grad_norm": 3.9504398378716554, "learning_rate": 5.727595035461336e-06, "loss": 0.489, "step": 16409 }, { "epoch": 0.47, "grad_norm": 6.818199433390664, "learning_rate": 5.727136210355855e-06, "loss": 0.636, "step": 16410 }, { "epoch": 0.47, "grad_norm": 5.54592251188867, "learning_rate": 5.7266773789949084e-06, "loss": 0.5564, "step": 16411 }, { "epoch": 0.47, "grad_norm": 4.865533857037066, "learning_rate": 5.726218541382445e-06, "loss": 0.287, "step": 16412 }, { "epoch": 0.47, "grad_norm": 6.6473072853000135, "learning_rate": 5.725759697522416e-06, "loss": 0.6581, "step": 16413 }, { "epoch": 0.47, "grad_norm": 4.9999202721914955, "learning_rate": 5.725300847418763e-06, "loss": 0.4801, "step": 16414 }, { "epoch": 0.47, "grad_norm": 3.3935458351952885, "learning_rate": 5.724841991075437e-06, "loss": 0.514, "step": 16415 }, { "epoch": 0.47, "grad_norm": 2.7712750883433643, "learning_rate": 5.724383128496385e-06, "loss": 0.0902, "step": 16416 }, { "epoch": 0.47, "grad_norm": 5.246795175659702, "learning_rate": 5.723924259685554e-06, "loss": 0.7902, "step": 16417 }, { "epoch": 0.47, "grad_norm": 5.174078941385908, "learning_rate": 5.723465384646891e-06, "loss": 0.4562, "step": 16418 }, { "epoch": 0.47, "grad_norm": 10.264853624135453, "learning_rate": 5.723006503384345e-06, "loss": 0.8485, "step": 16419 }, { "epoch": 0.47, "grad_norm": 4.627147845981163, "learning_rate": 5.722547615901863e-06, "loss": 0.5091, "step": 16420 }, { "epoch": 0.47, "grad_norm": 5.327182496239084, "learning_rate": 5.722088722203394e-06, "loss": 0.3627, "step": 16421 }, { "epoch": 0.47, "grad_norm": 5.9995178187853035, "learning_rate": 5.7216298222928815e-06, "loss": 0.5309, "step": 16422 }, { "epoch": 0.47, "grad_norm": 10.221766688613577, "learning_rate": 5.721170916174279e-06, "loss": 1.0083, "step": 16423 }, { "epoch": 0.47, "grad_norm": 7.077662909710293, "learning_rate": 5.720712003851532e-06, "loss": 0.3775, "step": 16424 }, { "epoch": 0.47, "grad_norm": 6.6684679538391265, "learning_rate": 5.720253085328586e-06, "loss": 0.5369, "step": 16425 }, { "epoch": 0.47, "grad_norm": 4.3740406891980195, "learning_rate": 5.719794160609394e-06, "loss": 0.2447, "step": 16426 }, { "epoch": 0.47, "grad_norm": 4.2409132981040205, "learning_rate": 5.719335229697901e-06, "loss": 0.357, "step": 16427 }, { "epoch": 0.47, "grad_norm": 4.250408405427683, "learning_rate": 5.718876292598056e-06, "loss": 0.2419, "step": 16428 }, { "epoch": 0.47, "grad_norm": 4.102731331086194, "learning_rate": 5.718417349313805e-06, "loss": 0.3593, "step": 16429 }, { "epoch": 0.47, "grad_norm": 8.226238914566908, "learning_rate": 5.717958399849099e-06, "loss": 0.2366, "step": 16430 }, { "epoch": 0.47, "grad_norm": 13.533414953828173, "learning_rate": 5.717499444207885e-06, "loss": 0.5783, "step": 16431 }, { "epoch": 0.47, "grad_norm": 7.92809609666126, "learning_rate": 5.7170404823941115e-06, "loss": 0.8527, "step": 16432 }, { "epoch": 0.47, "grad_norm": 5.657304275640059, "learning_rate": 5.7165815144117276e-06, "loss": 0.516, "step": 16433 }, { "epoch": 0.47, "grad_norm": 6.797489392804382, "learning_rate": 5.716122540264681e-06, "loss": 0.5683, "step": 16434 }, { "epoch": 0.47, "grad_norm": 4.3721032225542, "learning_rate": 5.71566355995692e-06, "loss": 0.5011, "step": 16435 }, { "epoch": 0.47, "grad_norm": 4.688920174675613, "learning_rate": 5.715204573492393e-06, "loss": 0.8085, "step": 16436 }, { "epoch": 0.47, "grad_norm": 5.790709432448548, "learning_rate": 5.714745580875052e-06, "loss": 0.4545, "step": 16437 }, { "epoch": 0.47, "grad_norm": 4.558311342515268, "learning_rate": 5.71428658210884e-06, "loss": 0.5774, "step": 16438 }, { "epoch": 0.47, "grad_norm": 5.483786526922028, "learning_rate": 5.713827577197708e-06, "loss": 0.8271, "step": 16439 }, { "epoch": 0.47, "grad_norm": 5.870280622568866, "learning_rate": 5.713368566145607e-06, "loss": 0.6076, "step": 16440 }, { "epoch": 0.47, "grad_norm": 5.285672294420446, "learning_rate": 5.7129095489564835e-06, "loss": 0.6762, "step": 16441 }, { "epoch": 0.47, "grad_norm": 6.992240828838184, "learning_rate": 5.712450525634287e-06, "loss": 0.4485, "step": 16442 }, { "epoch": 0.47, "grad_norm": 6.3609715456368905, "learning_rate": 5.711991496182966e-06, "loss": 0.3713, "step": 16443 }, { "epoch": 0.47, "grad_norm": 5.194380437090537, "learning_rate": 5.71153246060647e-06, "loss": 0.1298, "step": 16444 }, { "epoch": 0.47, "grad_norm": 3.3697401491891856, "learning_rate": 5.711073418908748e-06, "loss": 0.1819, "step": 16445 }, { "epoch": 0.47, "grad_norm": 5.88804602389126, "learning_rate": 5.710614371093748e-06, "loss": 0.4291, "step": 16446 }, { "epoch": 0.47, "grad_norm": 5.5044869413717326, "learning_rate": 5.710155317165421e-06, "loss": 0.409, "step": 16447 }, { "epoch": 0.47, "grad_norm": 5.9126969125229065, "learning_rate": 5.709696257127715e-06, "loss": 0.3881, "step": 16448 }, { "epoch": 0.47, "grad_norm": 3.5319517164213403, "learning_rate": 5.709237190984579e-06, "loss": 0.4191, "step": 16449 }, { "epoch": 0.47, "grad_norm": 7.19078519626971, "learning_rate": 5.708778118739964e-06, "loss": 1.0033, "step": 16450 }, { "epoch": 0.47, "grad_norm": 4.169581951071114, "learning_rate": 5.708319040397817e-06, "loss": 0.4396, "step": 16451 }, { "epoch": 0.47, "grad_norm": 3.339412549832457, "learning_rate": 5.707859955962089e-06, "loss": 0.2685, "step": 16452 }, { "epoch": 0.47, "grad_norm": 8.469659284924628, "learning_rate": 5.707400865436728e-06, "loss": 0.5596, "step": 16453 }, { "epoch": 0.47, "grad_norm": 5.87248590122575, "learning_rate": 5.706941768825687e-06, "loss": 0.2711, "step": 16454 }, { "epoch": 0.47, "grad_norm": 6.285249793552059, "learning_rate": 5.706482666132911e-06, "loss": 0.2311, "step": 16455 }, { "epoch": 0.47, "grad_norm": 3.3110135179967815, "learning_rate": 5.706023557362351e-06, "loss": 0.5976, "step": 16456 }, { "epoch": 0.47, "grad_norm": 5.722263111762624, "learning_rate": 5.705564442517959e-06, "loss": 0.2579, "step": 16457 }, { "epoch": 0.47, "grad_norm": 4.479013966759387, "learning_rate": 5.705105321603682e-06, "loss": 0.4085, "step": 16458 }, { "epoch": 0.47, "grad_norm": 12.216093140358833, "learning_rate": 5.704646194623471e-06, "loss": 0.647, "step": 16459 }, { "epoch": 0.47, "grad_norm": 7.6667665616728335, "learning_rate": 5.704187061581276e-06, "loss": 0.5123, "step": 16460 }, { "epoch": 0.47, "grad_norm": 5.845614375579227, "learning_rate": 5.703727922481046e-06, "loss": 0.3058, "step": 16461 }, { "epoch": 0.47, "grad_norm": 3.418076030682448, "learning_rate": 5.703268777326732e-06, "loss": 0.3901, "step": 16462 }, { "epoch": 0.47, "grad_norm": 3.8580730883258942, "learning_rate": 5.702809626122281e-06, "loss": 0.3047, "step": 16463 }, { "epoch": 0.47, "grad_norm": 5.118691259845923, "learning_rate": 5.702350468871647e-06, "loss": 0.4303, "step": 16464 }, { "epoch": 0.47, "grad_norm": 2.6325744159030595, "learning_rate": 5.701891305578778e-06, "loss": 0.1275, "step": 16465 }, { "epoch": 0.47, "grad_norm": 5.420185452487347, "learning_rate": 5.701432136247624e-06, "loss": 0.5918, "step": 16466 }, { "epoch": 0.47, "grad_norm": 7.118538486463219, "learning_rate": 5.700972960882138e-06, "loss": 0.1961, "step": 16467 }, { "epoch": 0.47, "grad_norm": 8.391143157701578, "learning_rate": 5.700513779486266e-06, "loss": 1.3154, "step": 16468 }, { "epoch": 0.47, "grad_norm": 5.538921919358275, "learning_rate": 5.700054592063961e-06, "loss": 0.7496, "step": 16469 }, { "epoch": 0.47, "grad_norm": 4.807200449212101, "learning_rate": 5.699595398619172e-06, "loss": 0.617, "step": 16470 }, { "epoch": 0.47, "grad_norm": 7.756258252774996, "learning_rate": 5.699136199155851e-06, "loss": 0.8488, "step": 16471 }, { "epoch": 0.47, "grad_norm": 8.217927956899182, "learning_rate": 5.698676993677944e-06, "loss": 0.853, "step": 16472 }, { "epoch": 0.47, "grad_norm": 6.2895637265085425, "learning_rate": 5.698217782189408e-06, "loss": 0.3107, "step": 16473 }, { "epoch": 0.47, "grad_norm": 7.720879264924116, "learning_rate": 5.697758564694189e-06, "loss": 0.7893, "step": 16474 }, { "epoch": 0.47, "grad_norm": 7.722490709273905, "learning_rate": 5.6972993411962394e-06, "loss": 0.8803, "step": 16475 }, { "epoch": 0.47, "grad_norm": 4.912032491943055, "learning_rate": 5.69684011169951e-06, "loss": 0.5296, "step": 16476 }, { "epoch": 0.47, "grad_norm": 6.283919037790886, "learning_rate": 5.696380876207952e-06, "loss": 0.6939, "step": 16477 }, { "epoch": 0.47, "grad_norm": 9.937515738612733, "learning_rate": 5.695921634725513e-06, "loss": 0.8301, "step": 16478 }, { "epoch": 0.47, "grad_norm": 9.862454182913641, "learning_rate": 5.695462387256146e-06, "loss": 0.6486, "step": 16479 }, { "epoch": 0.47, "grad_norm": 7.076314328239377, "learning_rate": 5.695003133803803e-06, "loss": 0.6235, "step": 16480 }, { "epoch": 0.47, "grad_norm": 3.468912241122848, "learning_rate": 5.694543874372431e-06, "loss": 0.3614, "step": 16481 }, { "epoch": 0.47, "grad_norm": 3.4416860235068607, "learning_rate": 5.694084608965986e-06, "loss": 0.4217, "step": 16482 }, { "epoch": 0.47, "grad_norm": 3.9745373018745602, "learning_rate": 5.693625337588415e-06, "loss": 0.4606, "step": 16483 }, { "epoch": 0.47, "grad_norm": 2.898345277455269, "learning_rate": 5.693166060243674e-06, "loss": 0.1713, "step": 16484 }, { "epoch": 0.47, "grad_norm": 5.499425142763891, "learning_rate": 5.692706776935707e-06, "loss": 0.434, "step": 16485 }, { "epoch": 0.47, "grad_norm": 4.227508636456764, "learning_rate": 5.692247487668469e-06, "loss": 0.491, "step": 16486 }, { "epoch": 0.47, "grad_norm": 4.611039872876728, "learning_rate": 5.691788192445914e-06, "loss": 0.605, "step": 16487 }, { "epoch": 0.47, "grad_norm": 4.020939216142014, "learning_rate": 5.691328891271988e-06, "loss": 0.5386, "step": 16488 }, { "epoch": 0.47, "grad_norm": 4.724312617241695, "learning_rate": 5.690869584150645e-06, "loss": 0.4888, "step": 16489 }, { "epoch": 0.47, "grad_norm": 5.316138020405025, "learning_rate": 5.690410271085837e-06, "loss": 0.5381, "step": 16490 }, { "epoch": 0.47, "grad_norm": 6.010946302256277, "learning_rate": 5.689950952081513e-06, "loss": 0.5755, "step": 16491 }, { "epoch": 0.47, "grad_norm": 6.295100338282382, "learning_rate": 5.689491627141627e-06, "loss": 0.6408, "step": 16492 }, { "epoch": 0.47, "grad_norm": 5.096352877557429, "learning_rate": 5.6890322962701285e-06, "loss": 0.3857, "step": 16493 }, { "epoch": 0.47, "grad_norm": 6.342044995819875, "learning_rate": 5.688572959470971e-06, "loss": 0.9837, "step": 16494 }, { "epoch": 0.47, "grad_norm": 6.437949007541614, "learning_rate": 5.688113616748105e-06, "loss": 0.6541, "step": 16495 }, { "epoch": 0.47, "grad_norm": 2.965581950286238, "learning_rate": 5.6876542681054805e-06, "loss": 0.3747, "step": 16496 }, { "epoch": 0.47, "grad_norm": 5.1493936440988834, "learning_rate": 5.687194913547054e-06, "loss": 0.4849, "step": 16497 }, { "epoch": 0.47, "grad_norm": 6.660432781843923, "learning_rate": 5.686735553076772e-06, "loss": 0.4398, "step": 16498 }, { "epoch": 0.47, "grad_norm": 4.993397812246635, "learning_rate": 5.6862761866985895e-06, "loss": 0.6215, "step": 16499 }, { "epoch": 0.47, "grad_norm": 4.317165283852849, "learning_rate": 5.685816814416457e-06, "loss": 0.3298, "step": 16500 }, { "epoch": 0.47, "grad_norm": 5.3130337166559105, "learning_rate": 5.685357436234327e-06, "loss": 0.2188, "step": 16501 }, { "epoch": 0.47, "grad_norm": 4.4639046282920365, "learning_rate": 5.684898052156151e-06, "loss": 0.2342, "step": 16502 }, { "epoch": 0.47, "grad_norm": 7.197116538668535, "learning_rate": 5.684438662185883e-06, "loss": 0.3971, "step": 16503 }, { "epoch": 0.47, "grad_norm": 8.207793035652964, "learning_rate": 5.683979266327473e-06, "loss": 0.3121, "step": 16504 }, { "epoch": 0.47, "grad_norm": 3.7760409580975445, "learning_rate": 5.683519864584871e-06, "loss": 0.3526, "step": 16505 }, { "epoch": 0.47, "grad_norm": 6.584247376771636, "learning_rate": 5.683060456962034e-06, "loss": 0.6753, "step": 16506 }, { "epoch": 0.47, "grad_norm": 7.325152967763142, "learning_rate": 5.682601043462912e-06, "loss": 0.6853, "step": 16507 }, { "epoch": 0.47, "grad_norm": 5.589197448005659, "learning_rate": 5.682141624091457e-06, "loss": 0.3194, "step": 16508 }, { "epoch": 0.47, "grad_norm": 6.608873713536566, "learning_rate": 5.6816821988516215e-06, "loss": 1.0539, "step": 16509 }, { "epoch": 0.47, "grad_norm": 5.655982711967907, "learning_rate": 5.6812227677473575e-06, "loss": 0.1911, "step": 16510 }, { "epoch": 0.47, "grad_norm": 4.304064198419326, "learning_rate": 5.680763330782619e-06, "loss": 0.4813, "step": 16511 }, { "epoch": 0.47, "grad_norm": 3.109086143073242, "learning_rate": 5.680303887961357e-06, "loss": 0.1788, "step": 16512 }, { "epoch": 0.47, "grad_norm": 4.308294678421794, "learning_rate": 5.679844439287523e-06, "loss": 0.3065, "step": 16513 }, { "epoch": 0.47, "grad_norm": 3.5354057097541514, "learning_rate": 5.679384984765073e-06, "loss": 0.3887, "step": 16514 }, { "epoch": 0.47, "grad_norm": 6.056020951639213, "learning_rate": 5.678925524397957e-06, "loss": 0.5059, "step": 16515 }, { "epoch": 0.47, "grad_norm": 5.221760618097407, "learning_rate": 5.678466058190128e-06, "loss": 0.2657, "step": 16516 }, { "epoch": 0.47, "grad_norm": 5.820247450087412, "learning_rate": 5.67800658614554e-06, "loss": 0.5307, "step": 16517 }, { "epoch": 0.47, "grad_norm": 5.084624418343645, "learning_rate": 5.677547108268144e-06, "loss": 0.5586, "step": 16518 }, { "epoch": 0.47, "grad_norm": 7.530477587079996, "learning_rate": 5.677087624561893e-06, "loss": 0.2566, "step": 16519 }, { "epoch": 0.47, "grad_norm": 1.7740577419768877, "learning_rate": 5.6766281350307415e-06, "loss": 0.2543, "step": 16520 }, { "epoch": 0.47, "grad_norm": 4.159256442907486, "learning_rate": 5.6761686396786405e-06, "loss": 0.4264, "step": 16521 }, { "epoch": 0.47, "grad_norm": 2.5517919620604084, "learning_rate": 5.675709138509544e-06, "loss": 0.3258, "step": 16522 }, { "epoch": 0.47, "grad_norm": 7.351900909775111, "learning_rate": 5.675249631527406e-06, "loss": 0.7679, "step": 16523 }, { "epoch": 0.47, "grad_norm": 5.1445571562767185, "learning_rate": 5.674790118736179e-06, "loss": 0.5623, "step": 16524 }, { "epoch": 0.47, "grad_norm": 5.6522859624454, "learning_rate": 5.674330600139815e-06, "loss": 0.7047, "step": 16525 }, { "epoch": 0.47, "grad_norm": 7.463508522472983, "learning_rate": 5.673871075742266e-06, "loss": 0.4467, "step": 16526 }, { "epoch": 0.47, "grad_norm": 6.428348517716052, "learning_rate": 5.6734115455474895e-06, "loss": 0.6436, "step": 16527 }, { "epoch": 0.47, "grad_norm": 4.58532507997195, "learning_rate": 5.672952009559437e-06, "loss": 0.2659, "step": 16528 }, { "epoch": 0.47, "grad_norm": 5.081400146705925, "learning_rate": 5.672492467782058e-06, "loss": 0.2599, "step": 16529 }, { "epoch": 0.47, "grad_norm": 5.7917749225552635, "learning_rate": 5.672032920219312e-06, "loss": 0.5656, "step": 16530 }, { "epoch": 0.47, "grad_norm": 3.95403849183306, "learning_rate": 5.671573366875148e-06, "loss": 0.2699, "step": 16531 }, { "epoch": 0.47, "grad_norm": 6.29288187564909, "learning_rate": 5.671113807753522e-06, "loss": 0.5386, "step": 16532 }, { "epoch": 0.47, "grad_norm": 6.138111794118345, "learning_rate": 5.670654242858385e-06, "loss": 0.6578, "step": 16533 }, { "epoch": 0.47, "grad_norm": 4.0716732042478485, "learning_rate": 5.670194672193695e-06, "loss": 0.3764, "step": 16534 }, { "epoch": 0.47, "grad_norm": 4.193637790685327, "learning_rate": 5.6697350957634e-06, "loss": 0.5605, "step": 16535 }, { "epoch": 0.47, "grad_norm": 4.876939681114455, "learning_rate": 5.669275513571457e-06, "loss": 0.3106, "step": 16536 }, { "epoch": 0.47, "grad_norm": 2.7921747746361727, "learning_rate": 5.66881592562182e-06, "loss": 0.1782, "step": 16537 }, { "epoch": 0.47, "grad_norm": 5.8882882013794875, "learning_rate": 5.668356331918441e-06, "loss": 0.5371, "step": 16538 }, { "epoch": 0.47, "grad_norm": 5.888043432404651, "learning_rate": 5.667896732465276e-06, "loss": 0.509, "step": 16539 }, { "epoch": 0.47, "grad_norm": 4.600716194085427, "learning_rate": 5.6674371272662765e-06, "loss": 0.2434, "step": 16540 }, { "epoch": 0.47, "grad_norm": 2.939589001122699, "learning_rate": 5.666977516325398e-06, "loss": 0.2528, "step": 16541 }, { "epoch": 0.47, "grad_norm": 4.387696202284627, "learning_rate": 5.666517899646594e-06, "loss": 0.1755, "step": 16542 }, { "epoch": 0.47, "grad_norm": 6.855267296246564, "learning_rate": 5.666058277233819e-06, "loss": 0.615, "step": 16543 }, { "epoch": 0.47, "grad_norm": 7.108235764663068, "learning_rate": 5.6655986490910265e-06, "loss": 0.6339, "step": 16544 }, { "epoch": 0.47, "grad_norm": 6.058126425412844, "learning_rate": 5.665139015222171e-06, "loss": 0.5071, "step": 16545 }, { "epoch": 0.47, "grad_norm": 4.1864627293606915, "learning_rate": 5.664679375631204e-06, "loss": 0.6006, "step": 16546 }, { "epoch": 0.47, "grad_norm": 4.459467283174868, "learning_rate": 5.664219730322084e-06, "loss": 0.3219, "step": 16547 }, { "epoch": 0.47, "grad_norm": 3.7071781270039628, "learning_rate": 5.663760079298764e-06, "loss": 0.3233, "step": 16548 }, { "epoch": 0.47, "grad_norm": 8.345068780925297, "learning_rate": 5.663300422565198e-06, "loss": 0.5113, "step": 16549 }, { "epoch": 0.47, "grad_norm": 3.89120626129067, "learning_rate": 5.662840760125339e-06, "loss": 0.3516, "step": 16550 }, { "epoch": 0.47, "grad_norm": 5.706996053369125, "learning_rate": 5.662381091983144e-06, "loss": 0.4051, "step": 16551 }, { "epoch": 0.47, "grad_norm": 7.128048830072741, "learning_rate": 5.6619214181425645e-06, "loss": 0.6414, "step": 16552 }, { "epoch": 0.47, "grad_norm": 7.807438545974597, "learning_rate": 5.661461738607556e-06, "loss": 0.6982, "step": 16553 }, { "epoch": 0.47, "grad_norm": 7.754865165241695, "learning_rate": 5.661002053382076e-06, "loss": 0.629, "step": 16554 }, { "epoch": 0.47, "grad_norm": 5.2936860784389275, "learning_rate": 5.660542362470075e-06, "loss": 0.7981, "step": 16555 }, { "epoch": 0.47, "grad_norm": 5.9767355900052745, "learning_rate": 5.660082665875509e-06, "loss": 0.6501, "step": 16556 }, { "epoch": 0.47, "grad_norm": 4.899352649823148, "learning_rate": 5.659622963602334e-06, "loss": 0.3592, "step": 16557 }, { "epoch": 0.47, "grad_norm": 3.7742641325432906, "learning_rate": 5.659163255654505e-06, "loss": 0.1149, "step": 16558 }, { "epoch": 0.47, "grad_norm": 3.454948461675406, "learning_rate": 5.6587035420359736e-06, "loss": 0.3934, "step": 16559 }, { "epoch": 0.47, "grad_norm": 5.606511928771894, "learning_rate": 5.658243822750698e-06, "loss": 0.5762, "step": 16560 }, { "epoch": 0.47, "grad_norm": 9.120956622446855, "learning_rate": 5.657784097802632e-06, "loss": 0.7027, "step": 16561 }, { "epoch": 0.47, "grad_norm": 7.971024072827298, "learning_rate": 5.657324367195728e-06, "loss": 0.5236, "step": 16562 }, { "epoch": 0.47, "grad_norm": 3.7616787568657077, "learning_rate": 5.656864630933945e-06, "loss": 0.2896, "step": 16563 }, { "epoch": 0.47, "grad_norm": 15.89080060263562, "learning_rate": 5.656404889021238e-06, "loss": 0.6763, "step": 16564 }, { "epoch": 0.47, "grad_norm": 3.5943446123552976, "learning_rate": 5.655945141461558e-06, "loss": 0.4277, "step": 16565 }, { "epoch": 0.47, "grad_norm": 5.950301614698005, "learning_rate": 5.6554853882588645e-06, "loss": 0.9938, "step": 16566 }, { "epoch": 0.47, "grad_norm": 6.013970164786681, "learning_rate": 5.65502562941711e-06, "loss": 0.2104, "step": 16567 }, { "epoch": 0.47, "grad_norm": 4.461253754584009, "learning_rate": 5.654565864940251e-06, "loss": 0.3385, "step": 16568 }, { "epoch": 0.47, "grad_norm": 7.985040386864577, "learning_rate": 5.654106094832244e-06, "loss": 0.4009, "step": 16569 }, { "epoch": 0.47, "grad_norm": 2.5174312385934483, "learning_rate": 5.653646319097039e-06, "loss": 0.2133, "step": 16570 }, { "epoch": 0.47, "grad_norm": 5.65871822394602, "learning_rate": 5.6531865377385984e-06, "loss": 0.3829, "step": 16571 }, { "epoch": 0.47, "grad_norm": 3.5696508683525248, "learning_rate": 5.652726750760873e-06, "loss": 0.1227, "step": 16572 }, { "epoch": 0.47, "grad_norm": 5.0759287618533575, "learning_rate": 5.65226695816782e-06, "loss": 0.5568, "step": 16573 }, { "epoch": 0.47, "grad_norm": 5.238853474539219, "learning_rate": 5.651807159963396e-06, "loss": 0.5895, "step": 16574 }, { "epoch": 0.47, "grad_norm": 7.562910399847766, "learning_rate": 5.651347356151553e-06, "loss": 0.7338, "step": 16575 }, { "epoch": 0.47, "grad_norm": 4.141272382690111, "learning_rate": 5.65088754673625e-06, "loss": 0.4135, "step": 16576 }, { "epoch": 0.47, "grad_norm": 5.389405264589735, "learning_rate": 5.650427731721442e-06, "loss": 0.6797, "step": 16577 }, { "epoch": 0.47, "grad_norm": 3.0817681842118, "learning_rate": 5.649967911111085e-06, "loss": 0.2469, "step": 16578 }, { "epoch": 0.47, "grad_norm": 5.229608010541894, "learning_rate": 5.6495080849091314e-06, "loss": 0.2259, "step": 16579 }, { "epoch": 0.47, "grad_norm": 7.062212440240411, "learning_rate": 5.649048253119541e-06, "loss": 0.4735, "step": 16580 }, { "epoch": 0.47, "grad_norm": 4.140747701878144, "learning_rate": 5.64858841574627e-06, "loss": 0.2251, "step": 16581 }, { "epoch": 0.47, "grad_norm": 4.388428868491248, "learning_rate": 5.64812857279327e-06, "loss": 0.7646, "step": 16582 }, { "epoch": 0.47, "grad_norm": 6.267971598255928, "learning_rate": 5.647668724264501e-06, "loss": 0.6297, "step": 16583 }, { "epoch": 0.47, "grad_norm": 3.605731388333621, "learning_rate": 5.647208870163918e-06, "loss": 0.093, "step": 16584 }, { "epoch": 0.47, "grad_norm": 6.040346746849396, "learning_rate": 5.646749010495478e-06, "loss": 0.3907, "step": 16585 }, { "epoch": 0.47, "grad_norm": 5.305121200428216, "learning_rate": 5.646289145263134e-06, "loss": 0.4548, "step": 16586 }, { "epoch": 0.48, "grad_norm": 9.602232025864614, "learning_rate": 5.645829274470843e-06, "loss": 0.3634, "step": 16587 }, { "epoch": 0.48, "grad_norm": 8.41869573087129, "learning_rate": 5.645369398122564e-06, "loss": 0.5983, "step": 16588 }, { "epoch": 0.48, "grad_norm": 6.2890527239421425, "learning_rate": 5.644909516222252e-06, "loss": 0.329, "step": 16589 }, { "epoch": 0.48, "grad_norm": 6.411182402791596, "learning_rate": 5.644449628773862e-06, "loss": 0.4894, "step": 16590 }, { "epoch": 0.48, "grad_norm": 3.9057376983396113, "learning_rate": 5.6439897357813525e-06, "loss": 0.3685, "step": 16591 }, { "epoch": 0.48, "grad_norm": 5.77180839958657, "learning_rate": 5.643529837248677e-06, "loss": 0.1787, "step": 16592 }, { "epoch": 0.48, "grad_norm": 4.158967871914021, "learning_rate": 5.643069933179793e-06, "loss": 0.3916, "step": 16593 }, { "epoch": 0.48, "grad_norm": 4.515988820345127, "learning_rate": 5.642610023578659e-06, "loss": 0.4615, "step": 16594 }, { "epoch": 0.48, "grad_norm": 4.82919494723755, "learning_rate": 5.642150108449229e-06, "loss": 0.2847, "step": 16595 }, { "epoch": 0.48, "grad_norm": 8.061374704062157, "learning_rate": 5.641690187795461e-06, "loss": 0.3626, "step": 16596 }, { "epoch": 0.48, "grad_norm": 6.0935111757086595, "learning_rate": 5.641230261621311e-06, "loss": 0.4766, "step": 16597 }, { "epoch": 0.48, "grad_norm": 4.2037331236909745, "learning_rate": 5.640770329930738e-06, "loss": 0.5487, "step": 16598 }, { "epoch": 0.48, "grad_norm": 6.618464251033303, "learning_rate": 5.640310392727695e-06, "loss": 0.861, "step": 16599 }, { "epoch": 0.48, "grad_norm": 5.997787067496749, "learning_rate": 5.639850450016141e-06, "loss": 0.306, "step": 16600 }, { "epoch": 0.48, "grad_norm": 5.145381454622011, "learning_rate": 5.639390501800033e-06, "loss": 0.3296, "step": 16601 }, { "epoch": 0.48, "grad_norm": 2.7614626408249223, "learning_rate": 5.638930548083326e-06, "loss": 0.2104, "step": 16602 }, { "epoch": 0.48, "grad_norm": 5.663888147235693, "learning_rate": 5.638470588869977e-06, "loss": 0.7149, "step": 16603 }, { "epoch": 0.48, "grad_norm": 3.341019906306972, "learning_rate": 5.638010624163948e-06, "loss": 0.1913, "step": 16604 }, { "epoch": 0.48, "grad_norm": 8.055625616697036, "learning_rate": 5.637550653969189e-06, "loss": 0.5423, "step": 16605 }, { "epoch": 0.48, "grad_norm": 8.337342791812391, "learning_rate": 5.637090678289661e-06, "loss": 0.7295, "step": 16606 }, { "epoch": 0.48, "grad_norm": 5.616165174212568, "learning_rate": 5.63663069712932e-06, "loss": 0.5594, "step": 16607 }, { "epoch": 0.48, "grad_norm": 5.397559750161296, "learning_rate": 5.636170710492125e-06, "loss": 0.4929, "step": 16608 }, { "epoch": 0.48, "grad_norm": 5.333741480389396, "learning_rate": 5.635710718382031e-06, "loss": 0.7229, "step": 16609 }, { "epoch": 0.48, "grad_norm": 4.115432799651634, "learning_rate": 5.635250720802995e-06, "loss": 0.5482, "step": 16610 }, { "epoch": 0.48, "grad_norm": 6.810870089353779, "learning_rate": 5.634790717758975e-06, "loss": 0.2842, "step": 16611 }, { "epoch": 0.48, "grad_norm": 5.177341303803934, "learning_rate": 5.63433070925393e-06, "loss": 0.5131, "step": 16612 }, { "epoch": 0.48, "grad_norm": 12.582269021469765, "learning_rate": 5.633870695291815e-06, "loss": 0.5076, "step": 16613 }, { "epoch": 0.48, "grad_norm": 6.574050402511619, "learning_rate": 5.633410675876589e-06, "loss": 0.6091, "step": 16614 }, { "epoch": 0.48, "grad_norm": 5.073671530215217, "learning_rate": 5.63295065101221e-06, "loss": 0.4184, "step": 16615 }, { "epoch": 0.48, "grad_norm": 8.993816582789172, "learning_rate": 5.6324906207026334e-06, "loss": 0.4465, "step": 16616 }, { "epoch": 0.48, "grad_norm": 6.018564670201035, "learning_rate": 5.632030584951817e-06, "loss": 0.3884, "step": 16617 }, { "epoch": 0.48, "grad_norm": 5.296092828035028, "learning_rate": 5.631570543763721e-06, "loss": 0.548, "step": 16618 }, { "epoch": 0.48, "grad_norm": 1.778712833161002, "learning_rate": 5.6311104971423e-06, "loss": 0.1271, "step": 16619 }, { "epoch": 0.48, "grad_norm": 3.7398199504208973, "learning_rate": 5.630650445091513e-06, "loss": 0.5607, "step": 16620 }, { "epoch": 0.48, "grad_norm": 8.292159872546337, "learning_rate": 5.630190387615319e-06, "loss": 0.8686, "step": 16621 }, { "epoch": 0.48, "grad_norm": 4.693038059495656, "learning_rate": 5.629730324717675e-06, "loss": 0.4062, "step": 16622 }, { "epoch": 0.48, "grad_norm": 4.833505095795171, "learning_rate": 5.629270256402537e-06, "loss": 0.3813, "step": 16623 }, { "epoch": 0.48, "grad_norm": 5.8207840491656695, "learning_rate": 5.628810182673865e-06, "loss": 0.4519, "step": 16624 }, { "epoch": 0.48, "grad_norm": 5.514656651894228, "learning_rate": 5.628350103535618e-06, "loss": 0.8323, "step": 16625 }, { "epoch": 0.48, "grad_norm": 8.94443221779166, "learning_rate": 5.627890018991751e-06, "loss": 0.591, "step": 16626 }, { "epoch": 0.48, "grad_norm": 6.224583597312387, "learning_rate": 5.627429929046223e-06, "loss": 0.549, "step": 16627 }, { "epoch": 0.48, "grad_norm": 6.7051445824085105, "learning_rate": 5.626969833702992e-06, "loss": 0.8887, "step": 16628 }, { "epoch": 0.48, "grad_norm": 13.566030297752942, "learning_rate": 5.626509732966018e-06, "loss": 0.5624, "step": 16629 }, { "epoch": 0.48, "grad_norm": 5.354263849032028, "learning_rate": 5.6260496268392574e-06, "loss": 0.3183, "step": 16630 }, { "epoch": 0.48, "grad_norm": 6.608434695909193, "learning_rate": 5.62558951532667e-06, "loss": 0.4595, "step": 16631 }, { "epoch": 0.48, "grad_norm": 7.308043271104211, "learning_rate": 5.625129398432212e-06, "loss": 0.4376, "step": 16632 }, { "epoch": 0.48, "grad_norm": 6.276797562238971, "learning_rate": 5.624669276159843e-06, "loss": 0.5958, "step": 16633 }, { "epoch": 0.48, "grad_norm": 6.0515852136084725, "learning_rate": 5.6242091485135205e-06, "loss": 0.8698, "step": 16634 }, { "epoch": 0.48, "grad_norm": 4.288302828525453, "learning_rate": 5.623749015497206e-06, "loss": 0.3582, "step": 16635 }, { "epoch": 0.48, "grad_norm": 2.255349938144142, "learning_rate": 5.623288877114852e-06, "loss": 0.164, "step": 16636 }, { "epoch": 0.48, "grad_norm": 4.0877481974337115, "learning_rate": 5.6228287333704225e-06, "loss": 0.2265, "step": 16637 }, { "epoch": 0.48, "grad_norm": 7.13445457785058, "learning_rate": 5.622368584267874e-06, "loss": 0.8523, "step": 16638 }, { "epoch": 0.48, "grad_norm": 8.626289216037966, "learning_rate": 5.621908429811166e-06, "loss": 0.8762, "step": 16639 }, { "epoch": 0.48, "grad_norm": 9.201438762281718, "learning_rate": 5.621448270004255e-06, "loss": 0.5081, "step": 16640 }, { "epoch": 0.48, "grad_norm": 5.447906182925152, "learning_rate": 5.6209881048511005e-06, "loss": 0.5816, "step": 16641 }, { "epoch": 0.48, "grad_norm": 10.47230346495271, "learning_rate": 5.620527934355664e-06, "loss": 0.6072, "step": 16642 }, { "epoch": 0.48, "grad_norm": 8.099130263553056, "learning_rate": 5.620067758521901e-06, "loss": 0.9832, "step": 16643 }, { "epoch": 0.48, "grad_norm": 9.390398832063509, "learning_rate": 5.6196075773537695e-06, "loss": 0.4207, "step": 16644 }, { "epoch": 0.48, "grad_norm": 6.8822814875125635, "learning_rate": 5.619147390855233e-06, "loss": 0.6102, "step": 16645 }, { "epoch": 0.48, "grad_norm": 6.287160512355377, "learning_rate": 5.618687199030246e-06, "loss": 0.4201, "step": 16646 }, { "epoch": 0.48, "grad_norm": 7.080279243506634, "learning_rate": 5.61822700188277e-06, "loss": 0.3119, "step": 16647 }, { "epoch": 0.48, "grad_norm": 8.279905620350341, "learning_rate": 5.617766799416764e-06, "loss": 0.6749, "step": 16648 }, { "epoch": 0.48, "grad_norm": 7.2944737144800404, "learning_rate": 5.6173065916361856e-06, "loss": 0.4113, "step": 16649 }, { "epoch": 0.48, "grad_norm": 6.47264814635318, "learning_rate": 5.616846378544994e-06, "loss": 0.6244, "step": 16650 }, { "epoch": 0.48, "grad_norm": 5.955456334781329, "learning_rate": 5.616386160147149e-06, "loss": 0.5286, "step": 16651 }, { "epoch": 0.48, "grad_norm": 6.864164205518829, "learning_rate": 5.615925936446612e-06, "loss": 0.3415, "step": 16652 }, { "epoch": 0.48, "grad_norm": 5.155980883424537, "learning_rate": 5.615465707447336e-06, "loss": 0.7354, "step": 16653 }, { "epoch": 0.48, "grad_norm": 5.876995042148749, "learning_rate": 5.615005473153287e-06, "loss": 0.5326, "step": 16654 }, { "epoch": 0.48, "grad_norm": 3.9348273516421846, "learning_rate": 5.614545233568421e-06, "loss": 0.3527, "step": 16655 }, { "epoch": 0.48, "grad_norm": 2.1886848102100873, "learning_rate": 5.6140849886966985e-06, "loss": 0.1826, "step": 16656 }, { "epoch": 0.48, "grad_norm": 6.896918953745572, "learning_rate": 5.613624738542079e-06, "loss": 0.5099, "step": 16657 }, { "epoch": 0.48, "grad_norm": 10.816956323899952, "learning_rate": 5.61316448310852e-06, "loss": 0.8543, "step": 16658 }, { "epoch": 0.48, "grad_norm": 7.759787193900146, "learning_rate": 5.6127042223999824e-06, "loss": 0.5355, "step": 16659 }, { "epoch": 0.48, "grad_norm": 4.773993576799794, "learning_rate": 5.6122439564204265e-06, "loss": 0.5217, "step": 16660 }, { "epoch": 0.48, "grad_norm": 3.966696861429988, "learning_rate": 5.611783685173811e-06, "loss": 0.4129, "step": 16661 }, { "epoch": 0.48, "grad_norm": 4.717045457185441, "learning_rate": 5.611323408664097e-06, "loss": 0.7391, "step": 16662 }, { "epoch": 0.48, "grad_norm": 7.4916381952793545, "learning_rate": 5.61086312689524e-06, "loss": 0.5385, "step": 16663 }, { "epoch": 0.48, "grad_norm": 5.790246469196433, "learning_rate": 5.610402839871205e-06, "loss": 0.493, "step": 16664 }, { "epoch": 0.48, "grad_norm": 7.624935024797605, "learning_rate": 5.60994254759595e-06, "loss": 0.7962, "step": 16665 }, { "epoch": 0.48, "grad_norm": 5.605337490977359, "learning_rate": 5.6094822500734335e-06, "loss": 0.451, "step": 16666 }, { "epoch": 0.48, "grad_norm": 6.523672718672852, "learning_rate": 5.609021947307615e-06, "loss": 0.4724, "step": 16667 }, { "epoch": 0.48, "grad_norm": 3.55735882164646, "learning_rate": 5.608561639302458e-06, "loss": 0.3844, "step": 16668 }, { "epoch": 0.48, "grad_norm": 6.479828861001739, "learning_rate": 5.608101326061919e-06, "loss": 0.5398, "step": 16669 }, { "epoch": 0.48, "grad_norm": 6.957852588729111, "learning_rate": 5.607641007589959e-06, "loss": 0.5179, "step": 16670 }, { "epoch": 0.48, "grad_norm": 8.0214678492723, "learning_rate": 5.607180683890537e-06, "loss": 0.3071, "step": 16671 }, { "epoch": 0.48, "grad_norm": 2.0390077634750603, "learning_rate": 5.606720354967617e-06, "loss": 0.316, "step": 16672 }, { "epoch": 0.48, "grad_norm": 8.181640974688165, "learning_rate": 5.606260020825155e-06, "loss": 1.2913, "step": 16673 }, { "epoch": 0.48, "grad_norm": 4.989445466092655, "learning_rate": 5.605799681467113e-06, "loss": 0.3442, "step": 16674 }, { "epoch": 0.48, "grad_norm": 10.348135119511156, "learning_rate": 5.605339336897452e-06, "loss": 0.5601, "step": 16675 }, { "epoch": 0.48, "grad_norm": 5.736002844321638, "learning_rate": 5.60487898712013e-06, "loss": 0.5732, "step": 16676 }, { "epoch": 0.48, "grad_norm": 9.898280743906017, "learning_rate": 5.604418632139107e-06, "loss": 0.8854, "step": 16677 }, { "epoch": 0.48, "grad_norm": 3.405177306209598, "learning_rate": 5.6039582719583475e-06, "loss": 0.1764, "step": 16678 }, { "epoch": 0.48, "grad_norm": 24.277781136854664, "learning_rate": 5.603497906581809e-06, "loss": 0.5377, "step": 16679 }, { "epoch": 0.48, "grad_norm": 14.634829225889424, "learning_rate": 5.60303753601345e-06, "loss": 0.6905, "step": 16680 }, { "epoch": 0.48, "grad_norm": 7.4886480250552605, "learning_rate": 5.602577160257235e-06, "loss": 0.8461, "step": 16681 }, { "epoch": 0.48, "grad_norm": 12.559062848147478, "learning_rate": 5.602116779317125e-06, "loss": 0.3459, "step": 16682 }, { "epoch": 0.48, "grad_norm": 9.356942040504752, "learning_rate": 5.6016563931970755e-06, "loss": 0.8185, "step": 16683 }, { "epoch": 0.48, "grad_norm": 3.9949267893804143, "learning_rate": 5.601196001901052e-06, "loss": 0.2373, "step": 16684 }, { "epoch": 0.48, "grad_norm": 6.82019598687662, "learning_rate": 5.600735605433011e-06, "loss": 0.4333, "step": 16685 }, { "epoch": 0.48, "grad_norm": 5.867898666822658, "learning_rate": 5.600275203796917e-06, "loss": 0.6352, "step": 16686 }, { "epoch": 0.48, "grad_norm": 3.5347824657796556, "learning_rate": 5.599814796996728e-06, "loss": 0.4732, "step": 16687 }, { "epoch": 0.48, "grad_norm": 4.144739287797147, "learning_rate": 5.599354385036408e-06, "loss": 0.3896, "step": 16688 }, { "epoch": 0.48, "grad_norm": 6.1110181233045715, "learning_rate": 5.5988939679199164e-06, "loss": 0.5461, "step": 16689 }, { "epoch": 0.48, "grad_norm": 5.801640528476, "learning_rate": 5.598433545651213e-06, "loss": 0.3713, "step": 16690 }, { "epoch": 0.48, "grad_norm": 5.931988657870417, "learning_rate": 5.5979731182342585e-06, "loss": 0.4684, "step": 16691 }, { "epoch": 0.48, "grad_norm": 13.64236358714187, "learning_rate": 5.597512685673016e-06, "loss": 0.6421, "step": 16692 }, { "epoch": 0.48, "grad_norm": 5.011646296987609, "learning_rate": 5.597052247971445e-06, "loss": 0.599, "step": 16693 }, { "epoch": 0.48, "grad_norm": 5.847865894053277, "learning_rate": 5.5965918051335065e-06, "loss": 0.6766, "step": 16694 }, { "epoch": 0.48, "grad_norm": 5.028126119989302, "learning_rate": 5.596131357163164e-06, "loss": 0.41, "step": 16695 }, { "epoch": 0.48, "grad_norm": 5.7109873882990065, "learning_rate": 5.5956709040643745e-06, "loss": 0.336, "step": 16696 }, { "epoch": 0.48, "grad_norm": 6.445438046244117, "learning_rate": 5.595210445841103e-06, "loss": 0.3504, "step": 16697 }, { "epoch": 0.48, "grad_norm": 3.7676014125770703, "learning_rate": 5.5947499824973085e-06, "loss": 0.2909, "step": 16698 }, { "epoch": 0.48, "grad_norm": 5.967107536856932, "learning_rate": 5.594289514036955e-06, "loss": 0.7782, "step": 16699 }, { "epoch": 0.48, "grad_norm": 5.072096030448344, "learning_rate": 5.593829040464e-06, "loss": 0.2868, "step": 16700 }, { "epoch": 0.48, "grad_norm": 5.391224791841326, "learning_rate": 5.593368561782406e-06, "loss": 0.6026, "step": 16701 }, { "epoch": 0.48, "grad_norm": 5.304130423822314, "learning_rate": 5.592908077996138e-06, "loss": 0.2316, "step": 16702 }, { "epoch": 0.48, "grad_norm": 7.385121592952142, "learning_rate": 5.592447589109153e-06, "loss": 1.0034, "step": 16703 }, { "epoch": 0.48, "grad_norm": 5.441692402278843, "learning_rate": 5.591987095125414e-06, "loss": 0.6274, "step": 16704 }, { "epoch": 0.48, "grad_norm": 6.515400432128873, "learning_rate": 5.591526596048885e-06, "loss": 0.5677, "step": 16705 }, { "epoch": 0.48, "grad_norm": 8.101081973680252, "learning_rate": 5.591066091883524e-06, "loss": 0.4218, "step": 16706 }, { "epoch": 0.48, "grad_norm": 5.885968095271039, "learning_rate": 5.590605582633294e-06, "loss": 0.6979, "step": 16707 }, { "epoch": 0.48, "grad_norm": 4.808560356563387, "learning_rate": 5.5901450683021575e-06, "loss": 0.4172, "step": 16708 }, { "epoch": 0.48, "grad_norm": 6.9989578288704966, "learning_rate": 5.589684548894077e-06, "loss": 0.3807, "step": 16709 }, { "epoch": 0.48, "grad_norm": 3.857633962474523, "learning_rate": 5.5892240244130096e-06, "loss": 0.2383, "step": 16710 }, { "epoch": 0.48, "grad_norm": 2.930137111495742, "learning_rate": 5.588763494862922e-06, "loss": 0.6506, "step": 16711 }, { "epoch": 0.48, "grad_norm": 7.492439782884244, "learning_rate": 5.588302960247776e-06, "loss": 0.9313, "step": 16712 }, { "epoch": 0.48, "grad_norm": 4.202649624967475, "learning_rate": 5.587842420571531e-06, "loss": 0.3253, "step": 16713 }, { "epoch": 0.48, "grad_norm": 3.124200642393293, "learning_rate": 5.587381875838149e-06, "loss": 0.3378, "step": 16714 }, { "epoch": 0.48, "grad_norm": 5.762274701726669, "learning_rate": 5.586921326051594e-06, "loss": 0.5621, "step": 16715 }, { "epoch": 0.48, "grad_norm": 5.911421514948209, "learning_rate": 5.586460771215828e-06, "loss": 0.3674, "step": 16716 }, { "epoch": 0.48, "grad_norm": 5.7691115508844035, "learning_rate": 5.586000211334811e-06, "loss": 0.4219, "step": 16717 }, { "epoch": 0.48, "grad_norm": 6.4443080419961865, "learning_rate": 5.585539646412505e-06, "loss": 0.7536, "step": 16718 }, { "epoch": 0.48, "grad_norm": 5.4358988629772025, "learning_rate": 5.585079076452876e-06, "loss": 0.5915, "step": 16719 }, { "epoch": 0.48, "grad_norm": 3.8978621950868018, "learning_rate": 5.584618501459883e-06, "loss": 0.1716, "step": 16720 }, { "epoch": 0.48, "grad_norm": 4.858651601051634, "learning_rate": 5.584157921437488e-06, "loss": 0.4432, "step": 16721 }, { "epoch": 0.48, "grad_norm": 4.331276665367404, "learning_rate": 5.583697336389657e-06, "loss": 0.4221, "step": 16722 }, { "epoch": 0.48, "grad_norm": 7.023475451679825, "learning_rate": 5.583236746320347e-06, "loss": 0.5607, "step": 16723 }, { "epoch": 0.48, "grad_norm": 3.939001705270995, "learning_rate": 5.582776151233524e-06, "loss": 0.5074, "step": 16724 }, { "epoch": 0.48, "grad_norm": 3.1359015097308616, "learning_rate": 5.5823155511331494e-06, "loss": 0.4146, "step": 16725 }, { "epoch": 0.48, "grad_norm": 2.243093965304545, "learning_rate": 5.581854946023186e-06, "loss": 0.2079, "step": 16726 }, { "epoch": 0.48, "grad_norm": 4.655080814909776, "learning_rate": 5.581394335907597e-06, "loss": 0.4353, "step": 16727 }, { "epoch": 0.48, "grad_norm": 4.7903429718511665, "learning_rate": 5.5809337207903424e-06, "loss": 0.4681, "step": 16728 }, { "epoch": 0.48, "grad_norm": 6.340972668256632, "learning_rate": 5.580473100675387e-06, "loss": 0.4677, "step": 16729 }, { "epoch": 0.48, "grad_norm": 3.508379212885142, "learning_rate": 5.580012475566694e-06, "loss": 0.3824, "step": 16730 }, { "epoch": 0.48, "grad_norm": 7.860663245854886, "learning_rate": 5.579551845468224e-06, "loss": 0.4338, "step": 16731 }, { "epoch": 0.48, "grad_norm": 6.224289961616154, "learning_rate": 5.579091210383942e-06, "loss": 0.5832, "step": 16732 }, { "epoch": 0.48, "grad_norm": 6.324248607770006, "learning_rate": 5.578630570317809e-06, "loss": 0.6564, "step": 16733 }, { "epoch": 0.48, "grad_norm": 8.373372617293292, "learning_rate": 5.578169925273788e-06, "loss": 1.0393, "step": 16734 }, { "epoch": 0.48, "grad_norm": 5.909942481895466, "learning_rate": 5.577709275255843e-06, "loss": 0.4096, "step": 16735 }, { "epoch": 0.48, "grad_norm": 3.227860879940829, "learning_rate": 5.577248620267935e-06, "loss": 0.317, "step": 16736 }, { "epoch": 0.48, "grad_norm": 5.84449445093097, "learning_rate": 5.576787960314028e-06, "loss": 0.6016, "step": 16737 }, { "epoch": 0.48, "grad_norm": 5.3964971431066155, "learning_rate": 5.576327295398086e-06, "loss": 0.4915, "step": 16738 }, { "epoch": 0.48, "grad_norm": 5.654136600100535, "learning_rate": 5.575866625524073e-06, "loss": 0.5888, "step": 16739 }, { "epoch": 0.48, "grad_norm": 3.787085357479685, "learning_rate": 5.575405950695948e-06, "loss": 0.7478, "step": 16740 }, { "epoch": 0.48, "grad_norm": 4.9223957573434545, "learning_rate": 5.574945270917678e-06, "loss": 0.3886, "step": 16741 }, { "epoch": 0.48, "grad_norm": 4.639449307795932, "learning_rate": 5.574484586193223e-06, "loss": 0.3054, "step": 16742 }, { "epoch": 0.48, "grad_norm": 3.9680435272759658, "learning_rate": 5.5740238965265485e-06, "loss": 0.4033, "step": 16743 }, { "epoch": 0.48, "grad_norm": 5.226148010149236, "learning_rate": 5.573563201921617e-06, "loss": 0.346, "step": 16744 }, { "epoch": 0.48, "grad_norm": 3.926888047007978, "learning_rate": 5.573102502382391e-06, "loss": 0.6438, "step": 16745 }, { "epoch": 0.48, "grad_norm": 5.535005016858757, "learning_rate": 5.572641797912838e-06, "loss": 0.7868, "step": 16746 }, { "epoch": 0.48, "grad_norm": 1.1291441831675868, "learning_rate": 5.572181088516916e-06, "loss": 0.0682, "step": 16747 }, { "epoch": 0.48, "grad_norm": 5.264007591624714, "learning_rate": 5.57172037419859e-06, "loss": 0.5861, "step": 16748 }, { "epoch": 0.48, "grad_norm": 4.6645002814607786, "learning_rate": 5.571259654961826e-06, "loss": 0.5715, "step": 16749 }, { "epoch": 0.48, "grad_norm": 6.678962605723097, "learning_rate": 5.570798930810584e-06, "loss": 0.4459, "step": 16750 }, { "epoch": 0.48, "grad_norm": 4.221413880415544, "learning_rate": 5.570338201748827e-06, "loss": 0.2444, "step": 16751 }, { "epoch": 0.48, "grad_norm": 14.138260010275808, "learning_rate": 5.569877467780523e-06, "loss": 0.6412, "step": 16752 }, { "epoch": 0.48, "grad_norm": 3.7943907722920716, "learning_rate": 5.569416728909634e-06, "loss": 0.309, "step": 16753 }, { "epoch": 0.48, "grad_norm": 5.118074436345579, "learning_rate": 5.568955985140122e-06, "loss": 0.3125, "step": 16754 }, { "epoch": 0.48, "grad_norm": 7.946624497498031, "learning_rate": 5.568495236475951e-06, "loss": 0.5479, "step": 16755 }, { "epoch": 0.48, "grad_norm": 4.646292144916764, "learning_rate": 5.568034482921088e-06, "loss": 0.2198, "step": 16756 }, { "epoch": 0.48, "grad_norm": 4.3453142519346875, "learning_rate": 5.567573724479492e-06, "loss": 0.2165, "step": 16757 }, { "epoch": 0.48, "grad_norm": 9.516370922599593, "learning_rate": 5.56711296115513e-06, "loss": 1.0401, "step": 16758 }, { "epoch": 0.48, "grad_norm": 6.9646128874560675, "learning_rate": 5.566652192951965e-06, "loss": 0.4523, "step": 16759 }, { "epoch": 0.48, "grad_norm": 6.692630867318022, "learning_rate": 5.56619141987396e-06, "loss": 0.5141, "step": 16760 }, { "epoch": 0.48, "grad_norm": 11.422139927913548, "learning_rate": 5.56573064192508e-06, "loss": 0.5888, "step": 16761 }, { "epoch": 0.48, "grad_norm": 5.041379221558958, "learning_rate": 5.56526985910929e-06, "loss": 0.2632, "step": 16762 }, { "epoch": 0.48, "grad_norm": 6.563103275726287, "learning_rate": 5.564809071430553e-06, "loss": 0.9717, "step": 16763 }, { "epoch": 0.48, "grad_norm": 3.957097526514931, "learning_rate": 5.564348278892833e-06, "loss": 0.5669, "step": 16764 }, { "epoch": 0.48, "grad_norm": 5.6331722869462775, "learning_rate": 5.563887481500093e-06, "loss": 0.3188, "step": 16765 }, { "epoch": 0.48, "grad_norm": 6.780457692071903, "learning_rate": 5.563426679256301e-06, "loss": 0.4174, "step": 16766 }, { "epoch": 0.48, "grad_norm": 5.319769340046505, "learning_rate": 5.562965872165416e-06, "loss": 0.4317, "step": 16767 }, { "epoch": 0.48, "grad_norm": 3.783664752011227, "learning_rate": 5.562505060231405e-06, "loss": 0.3106, "step": 16768 }, { "epoch": 0.48, "grad_norm": 3.608007394871884, "learning_rate": 5.562044243458234e-06, "loss": 0.3046, "step": 16769 }, { "epoch": 0.48, "grad_norm": 9.903501231545285, "learning_rate": 5.561583421849865e-06, "loss": 0.2606, "step": 16770 }, { "epoch": 0.48, "grad_norm": 15.370243329149131, "learning_rate": 5.561122595410262e-06, "loss": 0.7115, "step": 16771 }, { "epoch": 0.48, "grad_norm": 4.079170896796168, "learning_rate": 5.560661764143391e-06, "loss": 0.3685, "step": 16772 }, { "epoch": 0.48, "grad_norm": 5.655146032685715, "learning_rate": 5.560200928053216e-06, "loss": 0.7414, "step": 16773 }, { "epoch": 0.48, "grad_norm": 6.267520912601136, "learning_rate": 5.559740087143701e-06, "loss": 0.4246, "step": 16774 }, { "epoch": 0.48, "grad_norm": 4.38781183878938, "learning_rate": 5.559279241418811e-06, "loss": 0.6223, "step": 16775 }, { "epoch": 0.48, "grad_norm": 6.3948492438327955, "learning_rate": 5.558818390882511e-06, "loss": 0.5354, "step": 16776 }, { "epoch": 0.48, "grad_norm": 3.969455566165246, "learning_rate": 5.558357535538765e-06, "loss": 0.2098, "step": 16777 }, { "epoch": 0.48, "grad_norm": 3.6634289940124596, "learning_rate": 5.557896675391537e-06, "loss": 0.3469, "step": 16778 }, { "epoch": 0.48, "grad_norm": 5.383277751155386, "learning_rate": 5.557435810444794e-06, "loss": 0.1697, "step": 16779 }, { "epoch": 0.48, "grad_norm": 3.3499299810693084, "learning_rate": 5.5569749407025e-06, "loss": 0.3042, "step": 16780 }, { "epoch": 0.48, "grad_norm": 6.180035722376222, "learning_rate": 5.556514066168618e-06, "loss": 0.2983, "step": 16781 }, { "epoch": 0.48, "grad_norm": 5.854258752643901, "learning_rate": 5.556053186847114e-06, "loss": 0.5475, "step": 16782 }, { "epoch": 0.48, "grad_norm": 4.418369696625896, "learning_rate": 5.5555923027419535e-06, "loss": 0.5658, "step": 16783 }, { "epoch": 0.48, "grad_norm": 8.47514344040555, "learning_rate": 5.555131413857099e-06, "loss": 1.3642, "step": 16784 }, { "epoch": 0.48, "grad_norm": 6.722424447176632, "learning_rate": 5.554670520196518e-06, "loss": 0.2368, "step": 16785 }, { "epoch": 0.48, "grad_norm": 7.135978305366708, "learning_rate": 5.554209621764176e-06, "loss": 0.5561, "step": 16786 }, { "epoch": 0.48, "grad_norm": 3.8056158856129114, "learning_rate": 5.553748718564036e-06, "loss": 0.1624, "step": 16787 }, { "epoch": 0.48, "grad_norm": 5.021228452887444, "learning_rate": 5.553287810600065e-06, "loss": 0.3743, "step": 16788 }, { "epoch": 0.48, "grad_norm": 4.044366272510302, "learning_rate": 5.552826897876224e-06, "loss": 0.3025, "step": 16789 }, { "epoch": 0.48, "grad_norm": 7.7219617084619845, "learning_rate": 5.552365980396486e-06, "loss": 0.6409, "step": 16790 }, { "epoch": 0.48, "grad_norm": 5.330896903863513, "learning_rate": 5.5519050581648086e-06, "loss": 0.7839, "step": 16791 }, { "epoch": 0.48, "grad_norm": 7.1418625575129395, "learning_rate": 5.55144413118516e-06, "loss": 0.8405, "step": 16792 }, { "epoch": 0.48, "grad_norm": 6.959168899834824, "learning_rate": 5.550983199461505e-06, "loss": 0.3764, "step": 16793 }, { "epoch": 0.48, "grad_norm": 7.2354026707159536, "learning_rate": 5.550522262997811e-06, "loss": 0.8396, "step": 16794 }, { "epoch": 0.48, "grad_norm": 3.4412454141017457, "learning_rate": 5.550061321798039e-06, "loss": 0.2911, "step": 16795 }, { "epoch": 0.48, "grad_norm": 5.2395437199919686, "learning_rate": 5.549600375866161e-06, "loss": 0.3971, "step": 16796 }, { "epoch": 0.48, "grad_norm": 7.116734646272239, "learning_rate": 5.549139425206136e-06, "loss": 0.5421, "step": 16797 }, { "epoch": 0.48, "grad_norm": 8.450783678540951, "learning_rate": 5.548678469821933e-06, "loss": 0.7191, "step": 16798 }, { "epoch": 0.48, "grad_norm": 5.447537770658992, "learning_rate": 5.548217509717516e-06, "loss": 0.8134, "step": 16799 }, { "epoch": 0.48, "grad_norm": 6.646087733295498, "learning_rate": 5.547756544896852e-06, "loss": 0.6267, "step": 16800 }, { "epoch": 0.48, "grad_norm": 4.788774067841393, "learning_rate": 5.547295575363905e-06, "loss": 0.3647, "step": 16801 }, { "epoch": 0.48, "grad_norm": 7.6014475799059875, "learning_rate": 5.5468346011226415e-06, "loss": 1.0088, "step": 16802 }, { "epoch": 0.48, "grad_norm": 5.218820765580866, "learning_rate": 5.546373622177029e-06, "loss": 0.3749, "step": 16803 }, { "epoch": 0.48, "grad_norm": 3.96480204743501, "learning_rate": 5.54591263853103e-06, "loss": 0.41, "step": 16804 }, { "epoch": 0.48, "grad_norm": 8.066261005710368, "learning_rate": 5.5454516501886125e-06, "loss": 0.8465, "step": 16805 }, { "epoch": 0.48, "grad_norm": 5.534373204442599, "learning_rate": 5.544990657153743e-06, "loss": 0.8947, "step": 16806 }, { "epoch": 0.48, "grad_norm": 8.46553766903307, "learning_rate": 5.5445296594303835e-06, "loss": 0.762, "step": 16807 }, { "epoch": 0.48, "grad_norm": 5.131492503077741, "learning_rate": 5.544068657022503e-06, "loss": 0.3132, "step": 16808 }, { "epoch": 0.48, "grad_norm": 4.2073326746670565, "learning_rate": 5.543607649934066e-06, "loss": 0.221, "step": 16809 }, { "epoch": 0.48, "grad_norm": 4.9242320880470185, "learning_rate": 5.543146638169041e-06, "loss": 0.7243, "step": 16810 }, { "epoch": 0.48, "grad_norm": 2.2714618475101145, "learning_rate": 5.5426856217313915e-06, "loss": 0.1175, "step": 16811 }, { "epoch": 0.48, "grad_norm": 6.384931121669739, "learning_rate": 5.542224600625084e-06, "loss": 0.3786, "step": 16812 }, { "epoch": 0.48, "grad_norm": 6.462461058980764, "learning_rate": 5.5417635748540864e-06, "loss": 0.422, "step": 16813 }, { "epoch": 0.48, "grad_norm": 8.521822516766187, "learning_rate": 5.541302544422363e-06, "loss": 0.6719, "step": 16814 }, { "epoch": 0.48, "grad_norm": 5.328396720334193, "learning_rate": 5.540841509333879e-06, "loss": 0.5376, "step": 16815 }, { "epoch": 0.48, "grad_norm": 2.7610370922422516, "learning_rate": 5.540380469592604e-06, "loss": 0.221, "step": 16816 }, { "epoch": 0.48, "grad_norm": 4.162990611034383, "learning_rate": 5.539919425202502e-06, "loss": 0.5475, "step": 16817 }, { "epoch": 0.48, "grad_norm": 5.355070025558741, "learning_rate": 5.539458376167538e-06, "loss": 0.6007, "step": 16818 }, { "epoch": 0.48, "grad_norm": 3.8030299780579653, "learning_rate": 5.5389973224916815e-06, "loss": 0.4283, "step": 16819 }, { "epoch": 0.48, "grad_norm": 5.125514958259444, "learning_rate": 5.5385362641788974e-06, "loss": 0.4399, "step": 16820 }, { "epoch": 0.48, "grad_norm": 4.385448893642335, "learning_rate": 5.538075201233153e-06, "loss": 0.4077, "step": 16821 }, { "epoch": 0.48, "grad_norm": 6.050729395506906, "learning_rate": 5.537614133658412e-06, "loss": 0.3753, "step": 16822 }, { "epoch": 0.48, "grad_norm": 7.6394132187580235, "learning_rate": 5.537153061458645e-06, "loss": 0.575, "step": 16823 }, { "epoch": 0.48, "grad_norm": 4.528280139948101, "learning_rate": 5.5366919846378145e-06, "loss": 0.5681, "step": 16824 }, { "epoch": 0.48, "grad_norm": 4.099954606014262, "learning_rate": 5.5362309031998875e-06, "loss": 0.652, "step": 16825 }, { "epoch": 0.48, "grad_norm": 4.883676193144348, "learning_rate": 5.535769817148835e-06, "loss": 0.5574, "step": 16826 }, { "epoch": 0.48, "grad_norm": 5.342160797275552, "learning_rate": 5.535308726488619e-06, "loss": 0.5693, "step": 16827 }, { "epoch": 0.48, "grad_norm": 5.962770672618417, "learning_rate": 5.534847631223209e-06, "loss": 0.4341, "step": 16828 }, { "epoch": 0.48, "grad_norm": 4.770757094305751, "learning_rate": 5.534386531356571e-06, "loss": 0.566, "step": 16829 }, { "epoch": 0.48, "grad_norm": 8.240362578210963, "learning_rate": 5.533925426892671e-06, "loss": 0.5501, "step": 16830 }, { "epoch": 0.48, "grad_norm": 4.092067110520001, "learning_rate": 5.533464317835475e-06, "loss": 0.516, "step": 16831 }, { "epoch": 0.48, "grad_norm": 6.5674994181098, "learning_rate": 5.533003204188952e-06, "loss": 0.6078, "step": 16832 }, { "epoch": 0.48, "grad_norm": 4.725912153411519, "learning_rate": 5.5325420859570685e-06, "loss": 0.3986, "step": 16833 }, { "epoch": 0.48, "grad_norm": 5.871158195949001, "learning_rate": 5.532080963143791e-06, "loss": 0.5151, "step": 16834 }, { "epoch": 0.48, "grad_norm": 7.163296757399509, "learning_rate": 5.531619835753086e-06, "loss": 0.7211, "step": 16835 }, { "epoch": 0.48, "grad_norm": 5.168911225872989, "learning_rate": 5.531158703788922e-06, "loss": 0.7826, "step": 16836 }, { "epoch": 0.48, "grad_norm": 4.709990853869717, "learning_rate": 5.530697567255264e-06, "loss": 0.7372, "step": 16837 }, { "epoch": 0.48, "grad_norm": 3.901665675426446, "learning_rate": 5.53023642615608e-06, "loss": 0.2141, "step": 16838 }, { "epoch": 0.48, "grad_norm": 5.630886304078427, "learning_rate": 5.529775280495337e-06, "loss": 0.5113, "step": 16839 }, { "epoch": 0.48, "grad_norm": 2.7891341982903395, "learning_rate": 5.529314130277005e-06, "loss": 0.2925, "step": 16840 }, { "epoch": 0.48, "grad_norm": 8.373965299087539, "learning_rate": 5.528852975505047e-06, "loss": 0.5938, "step": 16841 }, { "epoch": 0.48, "grad_norm": 3.0526420008179436, "learning_rate": 5.52839181618343e-06, "loss": 0.194, "step": 16842 }, { "epoch": 0.48, "grad_norm": 7.858374105316034, "learning_rate": 5.527930652316127e-06, "loss": 0.4254, "step": 16843 }, { "epoch": 0.48, "grad_norm": 2.4096778629799402, "learning_rate": 5.5274694839071e-06, "loss": 0.2147, "step": 16844 }, { "epoch": 0.48, "grad_norm": 4.975230591188825, "learning_rate": 5.5270083109603165e-06, "loss": 0.599, "step": 16845 }, { "epoch": 0.48, "grad_norm": 4.398016440258029, "learning_rate": 5.526547133479747e-06, "loss": 0.7559, "step": 16846 }, { "epoch": 0.48, "grad_norm": 7.559712802740069, "learning_rate": 5.526085951469358e-06, "loss": 0.7234, "step": 16847 }, { "epoch": 0.48, "grad_norm": 4.809784457478815, "learning_rate": 5.525624764933115e-06, "loss": 0.2634, "step": 16848 }, { "epoch": 0.48, "grad_norm": 1.884391882015792, "learning_rate": 5.525163573874987e-06, "loss": 0.1532, "step": 16849 }, { "epoch": 0.48, "grad_norm": 2.944559704326047, "learning_rate": 5.524702378298941e-06, "loss": 0.1191, "step": 16850 }, { "epoch": 0.48, "grad_norm": 4.2436020882944305, "learning_rate": 5.524241178208946e-06, "loss": 0.2597, "step": 16851 }, { "epoch": 0.48, "grad_norm": 3.3240699241665013, "learning_rate": 5.523779973608968e-06, "loss": 0.4293, "step": 16852 }, { "epoch": 0.48, "grad_norm": 4.909950440189699, "learning_rate": 5.523318764502976e-06, "loss": 0.1132, "step": 16853 }, { "epoch": 0.48, "grad_norm": 5.1278068950584546, "learning_rate": 5.522857550894938e-06, "loss": 0.7495, "step": 16854 }, { "epoch": 0.48, "grad_norm": 5.336464427215037, "learning_rate": 5.522396332788819e-06, "loss": 0.2832, "step": 16855 }, { "epoch": 0.48, "grad_norm": 3.9525199698535447, "learning_rate": 5.521935110188589e-06, "loss": 0.4165, "step": 16856 }, { "epoch": 0.48, "grad_norm": 11.327064369798064, "learning_rate": 5.5214738830982175e-06, "loss": 1.2222, "step": 16857 }, { "epoch": 0.48, "grad_norm": 6.202557516204206, "learning_rate": 5.521012651521667e-06, "loss": 0.2583, "step": 16858 }, { "epoch": 0.48, "grad_norm": 3.4769170344594302, "learning_rate": 5.52055141546291e-06, "loss": 0.293, "step": 16859 }, { "epoch": 0.48, "grad_norm": 7.077627539258664, "learning_rate": 5.520090174925915e-06, "loss": 0.6279, "step": 16860 }, { "epoch": 0.48, "grad_norm": 7.027233587295315, "learning_rate": 5.519628929914646e-06, "loss": 0.528, "step": 16861 }, { "epoch": 0.48, "grad_norm": 5.467681562411827, "learning_rate": 5.519167680433075e-06, "loss": 0.5361, "step": 16862 }, { "epoch": 0.48, "grad_norm": 8.977089145174858, "learning_rate": 5.518706426485169e-06, "loss": 0.5665, "step": 16863 }, { "epoch": 0.48, "grad_norm": 6.1259475675699715, "learning_rate": 5.518245168074894e-06, "loss": 0.3548, "step": 16864 }, { "epoch": 0.48, "grad_norm": 6.4112331827039455, "learning_rate": 5.51778390520622e-06, "loss": 0.544, "step": 16865 }, { "epoch": 0.48, "grad_norm": 6.127641244612115, "learning_rate": 5.517322637883114e-06, "loss": 0.6439, "step": 16866 }, { "epoch": 0.48, "grad_norm": 4.042097430853264, "learning_rate": 5.516861366109546e-06, "loss": 0.221, "step": 16867 }, { "epoch": 0.48, "grad_norm": 7.090566315624552, "learning_rate": 5.516400089889482e-06, "loss": 0.4591, "step": 16868 }, { "epoch": 0.48, "grad_norm": 4.5303124608873135, "learning_rate": 5.515938809226893e-06, "loss": 0.6028, "step": 16869 }, { "epoch": 0.48, "grad_norm": 3.6262125996130754, "learning_rate": 5.5154775241257464e-06, "loss": 0.197, "step": 16870 }, { "epoch": 0.48, "grad_norm": 4.736840314404668, "learning_rate": 5.515016234590009e-06, "loss": 0.644, "step": 16871 }, { "epoch": 0.48, "grad_norm": 4.8046263652106544, "learning_rate": 5.514554940623651e-06, "loss": 0.6282, "step": 16872 }, { "epoch": 0.48, "grad_norm": 8.388037426440741, "learning_rate": 5.514093642230639e-06, "loss": 1.0547, "step": 16873 }, { "epoch": 0.48, "grad_norm": 3.6166096631551823, "learning_rate": 5.513632339414944e-06, "loss": 0.336, "step": 16874 }, { "epoch": 0.48, "grad_norm": 3.136460841158993, "learning_rate": 5.513171032180532e-06, "loss": 0.3285, "step": 16875 }, { "epoch": 0.48, "grad_norm": 4.833891995250497, "learning_rate": 5.512709720531374e-06, "loss": 0.2558, "step": 16876 }, { "epoch": 0.48, "grad_norm": 4.124984423290089, "learning_rate": 5.5122484044714375e-06, "loss": 0.3494, "step": 16877 }, { "epoch": 0.48, "grad_norm": 4.978195090318306, "learning_rate": 5.5117870840046905e-06, "loss": 0.2324, "step": 16878 }, { "epoch": 0.48, "grad_norm": 6.8592096191490075, "learning_rate": 5.511325759135102e-06, "loss": 0.7251, "step": 16879 }, { "epoch": 0.48, "grad_norm": 5.569561794035118, "learning_rate": 5.510864429866642e-06, "loss": 0.9106, "step": 16880 }, { "epoch": 0.48, "grad_norm": 5.977449159362109, "learning_rate": 5.510403096203277e-06, "loss": 0.6152, "step": 16881 }, { "epoch": 0.48, "grad_norm": 8.424036035375511, "learning_rate": 5.5099417581489775e-06, "loss": 0.4501, "step": 16882 }, { "epoch": 0.48, "grad_norm": 4.5025155400882255, "learning_rate": 5.509480415707712e-06, "loss": 0.8418, "step": 16883 }, { "epoch": 0.48, "grad_norm": 3.434923316010765, "learning_rate": 5.509019068883449e-06, "loss": 0.3407, "step": 16884 }, { "epoch": 0.48, "grad_norm": 6.606828852964984, "learning_rate": 5.508557717680156e-06, "loss": 0.6987, "step": 16885 }, { "epoch": 0.48, "grad_norm": 12.857257951493777, "learning_rate": 5.508096362101806e-06, "loss": 0.3642, "step": 16886 }, { "epoch": 0.48, "grad_norm": 5.886513608358963, "learning_rate": 5.507635002152365e-06, "loss": 0.751, "step": 16887 }, { "epoch": 0.48, "grad_norm": 3.1030902126855544, "learning_rate": 5.507173637835803e-06, "loss": 0.1323, "step": 16888 }, { "epoch": 0.48, "grad_norm": 4.416060706021967, "learning_rate": 5.506712269156087e-06, "loss": 0.5125, "step": 16889 }, { "epoch": 0.48, "grad_norm": 7.282106995799966, "learning_rate": 5.5062508961171886e-06, "loss": 0.8256, "step": 16890 }, { "epoch": 0.48, "grad_norm": 2.3518012423053896, "learning_rate": 5.505789518723076e-06, "loss": 0.1827, "step": 16891 }, { "epoch": 0.48, "grad_norm": 6.531945716271437, "learning_rate": 5.505328136977719e-06, "loss": 0.5541, "step": 16892 }, { "epoch": 0.48, "grad_norm": 1.8072607429856318, "learning_rate": 5.504866750885086e-06, "loss": 0.1035, "step": 16893 }, { "epoch": 0.48, "grad_norm": 5.695651302019478, "learning_rate": 5.504405360449146e-06, "loss": 0.7954, "step": 16894 }, { "epoch": 0.48, "grad_norm": 5.9952332317171395, "learning_rate": 5.503943965673869e-06, "loss": 0.4194, "step": 16895 }, { "epoch": 0.48, "grad_norm": 5.599825729485454, "learning_rate": 5.503482566563224e-06, "loss": 0.2291, "step": 16896 }, { "epoch": 0.48, "grad_norm": 1.529202386463143, "learning_rate": 5.503021163121183e-06, "loss": 0.1238, "step": 16897 }, { "epoch": 0.48, "grad_norm": 4.192717504392588, "learning_rate": 5.5025597553517095e-06, "loss": 0.4883, "step": 16898 }, { "epoch": 0.48, "grad_norm": 4.275456288174594, "learning_rate": 5.502098343258776e-06, "loss": 0.2631, "step": 16899 }, { "epoch": 0.48, "grad_norm": 4.535676622395286, "learning_rate": 5.501636926846354e-06, "loss": 0.2229, "step": 16900 }, { "epoch": 0.48, "grad_norm": 5.287618330044022, "learning_rate": 5.501175506118412e-06, "loss": 0.4643, "step": 16901 }, { "epoch": 0.48, "grad_norm": 3.431829665507579, "learning_rate": 5.5007140810789175e-06, "loss": 0.4866, "step": 16902 }, { "epoch": 0.48, "grad_norm": 4.2059864386731265, "learning_rate": 5.500252651731841e-06, "loss": 0.3095, "step": 16903 }, { "epoch": 0.48, "grad_norm": 3.7556264312126895, "learning_rate": 5.499791218081154e-06, "loss": 0.2908, "step": 16904 }, { "epoch": 0.48, "grad_norm": 5.100184255899223, "learning_rate": 5.499329780130824e-06, "loss": 0.8075, "step": 16905 }, { "epoch": 0.48, "grad_norm": 4.23360658549707, "learning_rate": 5.498868337884821e-06, "loss": 0.5214, "step": 16906 }, { "epoch": 0.48, "grad_norm": 10.870950142079447, "learning_rate": 5.498406891347115e-06, "loss": 0.4597, "step": 16907 }, { "epoch": 0.48, "grad_norm": 5.521443958222257, "learning_rate": 5.497945440521676e-06, "loss": 0.3448, "step": 16908 }, { "epoch": 0.48, "grad_norm": 4.2805260094831725, "learning_rate": 5.497483985412474e-06, "loss": 0.3624, "step": 16909 }, { "epoch": 0.48, "grad_norm": 3.575574840666586, "learning_rate": 5.4970225260234796e-06, "loss": 0.4793, "step": 16910 }, { "epoch": 0.48, "grad_norm": 9.475263011170686, "learning_rate": 5.4965610623586605e-06, "loss": 0.9782, "step": 16911 }, { "epoch": 0.48, "grad_norm": 8.109295501025302, "learning_rate": 5.496099594421987e-06, "loss": 0.5817, "step": 16912 }, { "epoch": 0.48, "grad_norm": 7.568909506215625, "learning_rate": 5.49563812221743e-06, "loss": 0.6363, "step": 16913 }, { "epoch": 0.48, "grad_norm": 1.7072054776245948, "learning_rate": 5.495176645748961e-06, "loss": 0.1487, "step": 16914 }, { "epoch": 0.48, "grad_norm": 8.63420768959509, "learning_rate": 5.494715165020546e-06, "loss": 0.8226, "step": 16915 }, { "epoch": 0.48, "grad_norm": 3.1265342760694717, "learning_rate": 5.494253680036158e-06, "loss": 0.573, "step": 16916 }, { "epoch": 0.48, "grad_norm": 4.7341262198069325, "learning_rate": 5.493792190799768e-06, "loss": 0.3321, "step": 16917 }, { "epoch": 0.48, "grad_norm": 8.774992807499475, "learning_rate": 5.493330697315343e-06, "loss": 0.6865, "step": 16918 }, { "epoch": 0.48, "grad_norm": 7.802996478017256, "learning_rate": 5.492869199586854e-06, "loss": 0.8528, "step": 16919 }, { "epoch": 0.48, "grad_norm": 4.275561012577739, "learning_rate": 5.4924076976182735e-06, "loss": 0.2234, "step": 16920 }, { "epoch": 0.48, "grad_norm": 3.5015562718756055, "learning_rate": 5.491946191413571e-06, "loss": 0.3872, "step": 16921 }, { "epoch": 0.48, "grad_norm": 4.832692180508484, "learning_rate": 5.491484680976714e-06, "loss": 0.3935, "step": 16922 }, { "epoch": 0.48, "grad_norm": 8.031763973191513, "learning_rate": 5.491023166311677e-06, "loss": 0.8414, "step": 16923 }, { "epoch": 0.48, "grad_norm": 3.675717749538678, "learning_rate": 5.490561647422425e-06, "loss": 0.4767, "step": 16924 }, { "epoch": 0.48, "grad_norm": 7.771621722062031, "learning_rate": 5.490100124312934e-06, "loss": 0.3742, "step": 16925 }, { "epoch": 0.48, "grad_norm": 7.450007044385614, "learning_rate": 5.48963859698717e-06, "loss": 0.3791, "step": 16926 }, { "epoch": 0.48, "grad_norm": 5.197098825007988, "learning_rate": 5.489177065449109e-06, "loss": 0.3052, "step": 16927 }, { "epoch": 0.48, "grad_norm": 4.88620087511805, "learning_rate": 5.488715529702715e-06, "loss": 0.4027, "step": 16928 }, { "epoch": 0.48, "grad_norm": 5.659875477105134, "learning_rate": 5.4882539897519614e-06, "loss": 0.4419, "step": 16929 }, { "epoch": 0.48, "grad_norm": 6.397936418263305, "learning_rate": 5.487792445600819e-06, "loss": 0.3433, "step": 16930 }, { "epoch": 0.48, "grad_norm": 7.989817218432469, "learning_rate": 5.48733089725326e-06, "loss": 0.6119, "step": 16931 }, { "epoch": 0.48, "grad_norm": 3.3185454210348784, "learning_rate": 5.486869344713251e-06, "loss": 0.353, "step": 16932 }, { "epoch": 0.48, "grad_norm": 7.3625791179505615, "learning_rate": 5.4864077879847665e-06, "loss": 0.8488, "step": 16933 }, { "epoch": 0.48, "grad_norm": 6.828734626005555, "learning_rate": 5.485946227071775e-06, "loss": 0.3826, "step": 16934 }, { "epoch": 0.48, "grad_norm": 6.431567189631155, "learning_rate": 5.485484661978249e-06, "loss": 0.4013, "step": 16935 }, { "epoch": 0.49, "grad_norm": 2.6385298322936053, "learning_rate": 5.485023092708157e-06, "loss": 0.4752, "step": 16936 }, { "epoch": 0.49, "grad_norm": 6.156847968324361, "learning_rate": 5.484561519265472e-06, "loss": 0.7353, "step": 16937 }, { "epoch": 0.49, "grad_norm": 5.556310961016663, "learning_rate": 5.484099941654164e-06, "loss": 0.2072, "step": 16938 }, { "epoch": 0.49, "grad_norm": 7.553791517679849, "learning_rate": 5.483638359878203e-06, "loss": 0.59, "step": 16939 }, { "epoch": 0.49, "grad_norm": 4.476333612312522, "learning_rate": 5.48317677394156e-06, "loss": 0.4752, "step": 16940 }, { "epoch": 0.49, "grad_norm": 5.904610905954271, "learning_rate": 5.4827151838482075e-06, "loss": 0.4477, "step": 16941 }, { "epoch": 0.49, "grad_norm": 4.00321665710293, "learning_rate": 5.482253589602116e-06, "loss": 0.2451, "step": 16942 }, { "epoch": 0.49, "grad_norm": 2.673523845872323, "learning_rate": 5.481791991207256e-06, "loss": 0.3365, "step": 16943 }, { "epoch": 0.49, "grad_norm": 10.94972544112513, "learning_rate": 5.481330388667599e-06, "loss": 0.7759, "step": 16944 }, { "epoch": 0.49, "grad_norm": 3.929009289925443, "learning_rate": 5.480868781987117e-06, "loss": 0.6252, "step": 16945 }, { "epoch": 0.49, "grad_norm": 3.6793010857040915, "learning_rate": 5.480407171169778e-06, "loss": 0.3245, "step": 16946 }, { "epoch": 0.49, "grad_norm": 8.872049620315028, "learning_rate": 5.4799455562195555e-06, "loss": 0.6522, "step": 16947 }, { "epoch": 0.49, "grad_norm": 4.695501720404998, "learning_rate": 5.479483937140421e-06, "loss": 0.475, "step": 16948 }, { "epoch": 0.49, "grad_norm": 4.105465236557057, "learning_rate": 5.479022313936345e-06, "loss": 0.3751, "step": 16949 }, { "epoch": 0.49, "grad_norm": 3.448437024329515, "learning_rate": 5.4785606866113e-06, "loss": 0.1807, "step": 16950 }, { "epoch": 0.49, "grad_norm": 4.929782195473956, "learning_rate": 5.478099055169256e-06, "loss": 0.3729, "step": 16951 }, { "epoch": 0.49, "grad_norm": 2.891705883119315, "learning_rate": 5.477637419614185e-06, "loss": 0.538, "step": 16952 }, { "epoch": 0.49, "grad_norm": 7.543473829825663, "learning_rate": 5.477175779950057e-06, "loss": 0.4559, "step": 16953 }, { "epoch": 0.49, "grad_norm": 4.683235975635992, "learning_rate": 5.476714136180845e-06, "loss": 0.4996, "step": 16954 }, { "epoch": 0.49, "grad_norm": 5.526914783452366, "learning_rate": 5.476252488310521e-06, "loss": 0.5946, "step": 16955 }, { "epoch": 0.49, "grad_norm": 4.982306359064907, "learning_rate": 5.475790836343054e-06, "loss": 0.279, "step": 16956 }, { "epoch": 0.49, "grad_norm": 3.710523297132259, "learning_rate": 5.475329180282417e-06, "loss": 0.3651, "step": 16957 }, { "epoch": 0.49, "grad_norm": 4.766213502661636, "learning_rate": 5.474867520132582e-06, "loss": 0.4617, "step": 16958 }, { "epoch": 0.49, "grad_norm": 13.109150208283893, "learning_rate": 5.47440585589752e-06, "loss": 0.5441, "step": 16959 }, { "epoch": 0.49, "grad_norm": 7.326913501664699, "learning_rate": 5.473944187581202e-06, "loss": 0.5194, "step": 16960 }, { "epoch": 0.49, "grad_norm": 5.735034478540386, "learning_rate": 5.473482515187604e-06, "loss": 0.5031, "step": 16961 }, { "epoch": 0.49, "grad_norm": 3.564481518776037, "learning_rate": 5.473020838720691e-06, "loss": 0.1778, "step": 16962 }, { "epoch": 0.49, "grad_norm": 14.952424195388776, "learning_rate": 5.472559158184438e-06, "loss": 1.0243, "step": 16963 }, { "epoch": 0.49, "grad_norm": 6.362172066981652, "learning_rate": 5.472097473582818e-06, "loss": 0.5816, "step": 16964 }, { "epoch": 0.49, "grad_norm": 8.038386339643893, "learning_rate": 5.471635784919801e-06, "loss": 0.5603, "step": 16965 }, { "epoch": 0.49, "grad_norm": 6.333673639524156, "learning_rate": 5.471174092199359e-06, "loss": 0.3506, "step": 16966 }, { "epoch": 0.49, "grad_norm": 4.468730099507016, "learning_rate": 5.470712395425466e-06, "loss": 0.3834, "step": 16967 }, { "epoch": 0.49, "grad_norm": 5.7368030864506725, "learning_rate": 5.4702506946020905e-06, "loss": 0.2845, "step": 16968 }, { "epoch": 0.49, "grad_norm": 8.374012390663268, "learning_rate": 5.4697889897332064e-06, "loss": 0.5165, "step": 16969 }, { "epoch": 0.49, "grad_norm": 10.191537256009273, "learning_rate": 5.469327280822786e-06, "loss": 0.7937, "step": 16970 }, { "epoch": 0.49, "grad_norm": 4.344751942295295, "learning_rate": 5.468865567874802e-06, "loss": 0.2957, "step": 16971 }, { "epoch": 0.49, "grad_norm": 5.622766814391055, "learning_rate": 5.468403850893223e-06, "loss": 0.4224, "step": 16972 }, { "epoch": 0.49, "grad_norm": 6.81556055457401, "learning_rate": 5.467942129882023e-06, "loss": 0.6611, "step": 16973 }, { "epoch": 0.49, "grad_norm": 9.28716503170757, "learning_rate": 5.467480404845176e-06, "loss": 0.6645, "step": 16974 }, { "epoch": 0.49, "grad_norm": 10.648464815772238, "learning_rate": 5.467018675786653e-06, "loss": 1.194, "step": 16975 }, { "epoch": 0.49, "grad_norm": 7.710039977428927, "learning_rate": 5.466556942710424e-06, "loss": 0.5281, "step": 16976 }, { "epoch": 0.49, "grad_norm": 5.607900186545611, "learning_rate": 5.466095205620464e-06, "loss": 0.6694, "step": 16977 }, { "epoch": 0.49, "grad_norm": 6.723873261007857, "learning_rate": 5.465633464520746e-06, "loss": 0.3727, "step": 16978 }, { "epoch": 0.49, "grad_norm": 3.9647540603791054, "learning_rate": 5.465171719415239e-06, "loss": 0.3293, "step": 16979 }, { "epoch": 0.49, "grad_norm": 4.760723507575531, "learning_rate": 5.464709970307917e-06, "loss": 0.3126, "step": 16980 }, { "epoch": 0.49, "grad_norm": 2.2960836837978826, "learning_rate": 5.464248217202751e-06, "loss": 0.1198, "step": 16981 }, { "epoch": 0.49, "grad_norm": 5.14530833522467, "learning_rate": 5.463786460103716e-06, "loss": 0.4047, "step": 16982 }, { "epoch": 0.49, "grad_norm": 3.582712589140199, "learning_rate": 5.463324699014784e-06, "loss": 0.318, "step": 16983 }, { "epoch": 0.49, "grad_norm": 6.985893135723417, "learning_rate": 5.462862933939926e-06, "loss": 1.1542, "step": 16984 }, { "epoch": 0.49, "grad_norm": 3.7786565982482108, "learning_rate": 5.462401164883116e-06, "loss": 0.1506, "step": 16985 }, { "epoch": 0.49, "grad_norm": 4.833078380415617, "learning_rate": 5.461939391848324e-06, "loss": 0.5597, "step": 16986 }, { "epoch": 0.49, "grad_norm": 4.623434033639117, "learning_rate": 5.461477614839525e-06, "loss": 0.4819, "step": 16987 }, { "epoch": 0.49, "grad_norm": 4.262630403599095, "learning_rate": 5.461015833860692e-06, "loss": 0.222, "step": 16988 }, { "epoch": 0.49, "grad_norm": 5.30818943844706, "learning_rate": 5.460554048915795e-06, "loss": 0.8143, "step": 16989 }, { "epoch": 0.49, "grad_norm": 7.627407913227269, "learning_rate": 5.460092260008808e-06, "loss": 0.6023, "step": 16990 }, { "epoch": 0.49, "grad_norm": 4.89016965263654, "learning_rate": 5.459630467143706e-06, "loss": 0.3915, "step": 16991 }, { "epoch": 0.49, "grad_norm": 6.548571382763677, "learning_rate": 5.459168670324459e-06, "loss": 0.7079, "step": 16992 }, { "epoch": 0.49, "grad_norm": 4.984153049035607, "learning_rate": 5.45870686955504e-06, "loss": 0.3913, "step": 16993 }, { "epoch": 0.49, "grad_norm": 4.291299761053892, "learning_rate": 5.458245064839423e-06, "loss": 0.3801, "step": 16994 }, { "epoch": 0.49, "grad_norm": 6.854823364135677, "learning_rate": 5.457783256181578e-06, "loss": 0.4828, "step": 16995 }, { "epoch": 0.49, "grad_norm": 7.402930694902874, "learning_rate": 5.457321443585481e-06, "loss": 0.4686, "step": 16996 }, { "epoch": 0.49, "grad_norm": 8.607444477506062, "learning_rate": 5.456859627055104e-06, "loss": 0.6839, "step": 16997 }, { "epoch": 0.49, "grad_norm": 6.794042378497526, "learning_rate": 5.4563978065944205e-06, "loss": 0.5144, "step": 16998 }, { "epoch": 0.49, "grad_norm": 5.923386755755879, "learning_rate": 5.455935982207402e-06, "loss": 0.5666, "step": 16999 }, { "epoch": 0.49, "grad_norm": 5.972065588895628, "learning_rate": 5.455474153898023e-06, "loss": 0.7344, "step": 17000 }, { "epoch": 0.49, "grad_norm": 5.047800363084321, "learning_rate": 5.455012321670257e-06, "loss": 0.5439, "step": 17001 }, { "epoch": 0.49, "grad_norm": 8.317878961853301, "learning_rate": 5.454550485528074e-06, "loss": 0.8524, "step": 17002 }, { "epoch": 0.49, "grad_norm": 3.186397773306028, "learning_rate": 5.454088645475451e-06, "loss": 0.3192, "step": 17003 }, { "epoch": 0.49, "grad_norm": 4.441636320276472, "learning_rate": 5.453626801516357e-06, "loss": 0.2129, "step": 17004 }, { "epoch": 0.49, "grad_norm": 2.4839304760242658, "learning_rate": 5.453164953654769e-06, "loss": 0.1909, "step": 17005 }, { "epoch": 0.49, "grad_norm": 4.360607574568043, "learning_rate": 5.452703101894658e-06, "loss": 0.4287, "step": 17006 }, { "epoch": 0.49, "grad_norm": 5.2526658419718855, "learning_rate": 5.4522412462399975e-06, "loss": 0.592, "step": 17007 }, { "epoch": 0.49, "grad_norm": 3.2660870749901703, "learning_rate": 5.451779386694763e-06, "loss": 0.3988, "step": 17008 }, { "epoch": 0.49, "grad_norm": 9.04807211642612, "learning_rate": 5.451317523262926e-06, "loss": 0.4561, "step": 17009 }, { "epoch": 0.49, "grad_norm": 7.78416918188077, "learning_rate": 5.450855655948458e-06, "loss": 0.3011, "step": 17010 }, { "epoch": 0.49, "grad_norm": 4.4965868297546745, "learning_rate": 5.450393784755336e-06, "loss": 0.4015, "step": 17011 }, { "epoch": 0.49, "grad_norm": 4.866862866396468, "learning_rate": 5.449931909687531e-06, "loss": 0.2889, "step": 17012 }, { "epoch": 0.49, "grad_norm": 5.313495452965065, "learning_rate": 5.449470030749018e-06, "loss": 0.5586, "step": 17013 }, { "epoch": 0.49, "grad_norm": 6.544591173032227, "learning_rate": 5.449008147943768e-06, "loss": 0.6479, "step": 17014 }, { "epoch": 0.49, "grad_norm": 2.0785560734452746, "learning_rate": 5.448546261275758e-06, "loss": 0.2104, "step": 17015 }, { "epoch": 0.49, "grad_norm": 4.725037913700157, "learning_rate": 5.448084370748959e-06, "loss": 0.3001, "step": 17016 }, { "epoch": 0.49, "grad_norm": 5.1992035714378595, "learning_rate": 5.447622476367345e-06, "loss": 0.7242, "step": 17017 }, { "epoch": 0.49, "grad_norm": 4.8965262254129325, "learning_rate": 5.44716057813489e-06, "loss": 0.5542, "step": 17018 }, { "epoch": 0.49, "grad_norm": 7.542016908159829, "learning_rate": 5.4466986760555685e-06, "loss": 0.7399, "step": 17019 }, { "epoch": 0.49, "grad_norm": 3.9866128359119153, "learning_rate": 5.446236770133352e-06, "loss": 0.1688, "step": 17020 }, { "epoch": 0.49, "grad_norm": 10.596454916445625, "learning_rate": 5.445774860372216e-06, "loss": 0.6012, "step": 17021 }, { "epoch": 0.49, "grad_norm": 4.114154197751759, "learning_rate": 5.445312946776133e-06, "loss": 0.4226, "step": 17022 }, { "epoch": 0.49, "grad_norm": 10.936194467284748, "learning_rate": 5.444851029349079e-06, "loss": 1.1013, "step": 17023 }, { "epoch": 0.49, "grad_norm": 7.256671401083504, "learning_rate": 5.444389108095026e-06, "loss": 0.3593, "step": 17024 }, { "epoch": 0.49, "grad_norm": 7.543805179220315, "learning_rate": 5.443927183017948e-06, "loss": 0.512, "step": 17025 }, { "epoch": 0.49, "grad_norm": 4.722939557711479, "learning_rate": 5.4434652541218194e-06, "loss": 0.6674, "step": 17026 }, { "epoch": 0.49, "grad_norm": 4.047576961245103, "learning_rate": 5.443003321410613e-06, "loss": 0.2939, "step": 17027 }, { "epoch": 0.49, "grad_norm": 5.042014979581208, "learning_rate": 5.442541384888305e-06, "loss": 0.5604, "step": 17028 }, { "epoch": 0.49, "grad_norm": 4.467879924234733, "learning_rate": 5.442079444558867e-06, "loss": 0.2859, "step": 17029 }, { "epoch": 0.49, "grad_norm": 10.551174163563902, "learning_rate": 5.441617500426274e-06, "loss": 0.389, "step": 17030 }, { "epoch": 0.49, "grad_norm": 10.091767957973376, "learning_rate": 5.441155552494499e-06, "loss": 0.8898, "step": 17031 }, { "epoch": 0.49, "grad_norm": 6.243207673890575, "learning_rate": 5.4406936007675185e-06, "loss": 0.7615, "step": 17032 }, { "epoch": 0.49, "grad_norm": 4.551610414335964, "learning_rate": 5.440231645249304e-06, "loss": 0.4164, "step": 17033 }, { "epoch": 0.49, "grad_norm": 2.3298497052078524, "learning_rate": 5.439769685943832e-06, "loss": 0.1357, "step": 17034 }, { "epoch": 0.49, "grad_norm": 6.190679792751901, "learning_rate": 5.439307722855076e-06, "loss": 0.7934, "step": 17035 }, { "epoch": 0.49, "grad_norm": 7.3443647411418596, "learning_rate": 5.438845755987009e-06, "loss": 0.7421, "step": 17036 }, { "epoch": 0.49, "grad_norm": 3.3521553528902244, "learning_rate": 5.438383785343606e-06, "loss": 0.0476, "step": 17037 }, { "epoch": 0.49, "grad_norm": 2.6161207729635176, "learning_rate": 5.437921810928842e-06, "loss": 0.3706, "step": 17038 }, { "epoch": 0.49, "grad_norm": 3.4515077074570786, "learning_rate": 5.437459832746688e-06, "loss": 0.1565, "step": 17039 }, { "epoch": 0.49, "grad_norm": 7.028152190308918, "learning_rate": 5.436997850801123e-06, "loss": 0.2975, "step": 17040 }, { "epoch": 0.49, "grad_norm": 4.4419107679003424, "learning_rate": 5.43653586509612e-06, "loss": 0.4007, "step": 17041 }, { "epoch": 0.49, "grad_norm": 6.003835048513942, "learning_rate": 5.436073875635651e-06, "loss": 0.4268, "step": 17042 }, { "epoch": 0.49, "grad_norm": 3.8803730599002506, "learning_rate": 5.435611882423693e-06, "loss": 0.3685, "step": 17043 }, { "epoch": 0.49, "grad_norm": 6.5328081818452075, "learning_rate": 5.435149885464219e-06, "loss": 0.4738, "step": 17044 }, { "epoch": 0.49, "grad_norm": 11.431314306980328, "learning_rate": 5.434687884761206e-06, "loss": 0.3687, "step": 17045 }, { "epoch": 0.49, "grad_norm": 7.517449234897938, "learning_rate": 5.434225880318625e-06, "loss": 0.3138, "step": 17046 }, { "epoch": 0.49, "grad_norm": 4.373132088952441, "learning_rate": 5.4337638721404505e-06, "loss": 0.4736, "step": 17047 }, { "epoch": 0.49, "grad_norm": 5.045640824055792, "learning_rate": 5.433301860230662e-06, "loss": 0.5513, "step": 17048 }, { "epoch": 0.49, "grad_norm": 2.4353949676103923, "learning_rate": 5.43283984459323e-06, "loss": 0.1621, "step": 17049 }, { "epoch": 0.49, "grad_norm": 2.4330660521889396, "learning_rate": 5.432377825232129e-06, "loss": 0.1843, "step": 17050 }, { "epoch": 0.49, "grad_norm": 4.511033571737796, "learning_rate": 5.431915802151336e-06, "loss": 0.5448, "step": 17051 }, { "epoch": 0.49, "grad_norm": 6.705230204465701, "learning_rate": 5.431453775354825e-06, "loss": 1.119, "step": 17052 }, { "epoch": 0.49, "grad_norm": 4.568656947575126, "learning_rate": 5.430991744846569e-06, "loss": 0.266, "step": 17053 }, { "epoch": 0.49, "grad_norm": 21.898732069404353, "learning_rate": 5.430529710630544e-06, "loss": 0.9218, "step": 17054 }, { "epoch": 0.49, "grad_norm": 7.092783656238336, "learning_rate": 5.430067672710726e-06, "loss": 0.5328, "step": 17055 }, { "epoch": 0.49, "grad_norm": 3.95688356988196, "learning_rate": 5.4296056310910885e-06, "loss": 0.2112, "step": 17056 }, { "epoch": 0.49, "grad_norm": 3.458787099338056, "learning_rate": 5.4291435857756055e-06, "loss": 0.4198, "step": 17057 }, { "epoch": 0.49, "grad_norm": 8.29573669096293, "learning_rate": 5.428681536768254e-06, "loss": 0.9799, "step": 17058 }, { "epoch": 0.49, "grad_norm": 3.7054441891555365, "learning_rate": 5.428219484073008e-06, "loss": 0.4249, "step": 17059 }, { "epoch": 0.49, "grad_norm": 5.012881565553988, "learning_rate": 5.4277574276938426e-06, "loss": 0.2825, "step": 17060 }, { "epoch": 0.49, "grad_norm": 7.487616902045381, "learning_rate": 5.427295367634733e-06, "loss": 0.3412, "step": 17061 }, { "epoch": 0.49, "grad_norm": 6.202336547093955, "learning_rate": 5.4268333038996545e-06, "loss": 0.5214, "step": 17062 }, { "epoch": 0.49, "grad_norm": 7.201266895202195, "learning_rate": 5.426371236492579e-06, "loss": 0.604, "step": 17063 }, { "epoch": 0.49, "grad_norm": 6.622591984571917, "learning_rate": 5.425909165417486e-06, "loss": 0.577, "step": 17064 }, { "epoch": 0.49, "grad_norm": 11.412873401361844, "learning_rate": 5.42544709067835e-06, "loss": 0.3525, "step": 17065 }, { "epoch": 0.49, "grad_norm": 8.022638773325648, "learning_rate": 5.424985012279142e-06, "loss": 0.4799, "step": 17066 }, { "epoch": 0.49, "grad_norm": 7.958019676485205, "learning_rate": 5.424522930223843e-06, "loss": 0.8851, "step": 17067 }, { "epoch": 0.49, "grad_norm": 7.333541527596002, "learning_rate": 5.424060844516426e-06, "loss": 0.8993, "step": 17068 }, { "epoch": 0.49, "grad_norm": 14.693790987737255, "learning_rate": 5.423598755160864e-06, "loss": 0.6324, "step": 17069 }, { "epoch": 0.49, "grad_norm": 8.163586441104815, "learning_rate": 5.423136662161134e-06, "loss": 0.4593, "step": 17070 }, { "epoch": 0.49, "grad_norm": 5.220031238367436, "learning_rate": 5.422674565521212e-06, "loss": 0.2309, "step": 17071 }, { "epoch": 0.49, "grad_norm": 2.3108370510861955, "learning_rate": 5.422212465245072e-06, "loss": 0.1694, "step": 17072 }, { "epoch": 0.49, "grad_norm": 3.5353026301811763, "learning_rate": 5.421750361336691e-06, "loss": 0.3297, "step": 17073 }, { "epoch": 0.49, "grad_norm": 4.1640523370042635, "learning_rate": 5.4212882538000435e-06, "loss": 0.3768, "step": 17074 }, { "epoch": 0.49, "grad_norm": 7.700359784427696, "learning_rate": 5.420826142639105e-06, "loss": 0.5201, "step": 17075 }, { "epoch": 0.49, "grad_norm": 6.569821115958716, "learning_rate": 5.42036402785785e-06, "loss": 0.2418, "step": 17076 }, { "epoch": 0.49, "grad_norm": 7.534338100347738, "learning_rate": 5.419901909460257e-06, "loss": 0.6335, "step": 17077 }, { "epoch": 0.49, "grad_norm": 7.599017102278118, "learning_rate": 5.419439787450298e-06, "loss": 0.668, "step": 17078 }, { "epoch": 0.49, "grad_norm": 2.5393617306962786, "learning_rate": 5.418977661831951e-06, "loss": 0.4789, "step": 17079 }, { "epoch": 0.49, "grad_norm": 6.845803166201297, "learning_rate": 5.4185155326091885e-06, "loss": 0.4064, "step": 17080 }, { "epoch": 0.49, "grad_norm": 5.821806891181676, "learning_rate": 5.4180533997859906e-06, "loss": 0.3476, "step": 17081 }, { "epoch": 0.49, "grad_norm": 7.493415485262086, "learning_rate": 5.417591263366332e-06, "loss": 0.6634, "step": 17082 }, { "epoch": 0.49, "grad_norm": 5.684356900396572, "learning_rate": 5.417129123354184e-06, "loss": 0.2906, "step": 17083 }, { "epoch": 0.49, "grad_norm": 2.3166742218075056, "learning_rate": 5.416666979753527e-06, "loss": 0.2845, "step": 17084 }, { "epoch": 0.49, "grad_norm": 3.959458791784018, "learning_rate": 5.416204832568337e-06, "loss": 0.5946, "step": 17085 }, { "epoch": 0.49, "grad_norm": 6.822687633446155, "learning_rate": 5.415742681802586e-06, "loss": 0.5611, "step": 17086 }, { "epoch": 0.49, "grad_norm": 7.116006361668505, "learning_rate": 5.415280527460253e-06, "loss": 0.4755, "step": 17087 }, { "epoch": 0.49, "grad_norm": 9.023110678562935, "learning_rate": 5.414818369545312e-06, "loss": 0.8893, "step": 17088 }, { "epoch": 0.49, "grad_norm": 9.347053607240476, "learning_rate": 5.41435620806174e-06, "loss": 1.0719, "step": 17089 }, { "epoch": 0.49, "grad_norm": 6.434922970990262, "learning_rate": 5.4138940430135115e-06, "loss": 0.5907, "step": 17090 }, { "epoch": 0.49, "grad_norm": 4.454432165133126, "learning_rate": 5.413431874404605e-06, "loss": 0.3741, "step": 17091 }, { "epoch": 0.49, "grad_norm": 4.006438110743798, "learning_rate": 5.412969702238994e-06, "loss": 0.8849, "step": 17092 }, { "epoch": 0.49, "grad_norm": 1.7721208188261293, "learning_rate": 5.412507526520656e-06, "loss": 0.0877, "step": 17093 }, { "epoch": 0.49, "grad_norm": 4.527858754209465, "learning_rate": 5.412045347253566e-06, "loss": 0.6133, "step": 17094 }, { "epoch": 0.49, "grad_norm": 4.836381291007192, "learning_rate": 5.411583164441701e-06, "loss": 0.681, "step": 17095 }, { "epoch": 0.49, "grad_norm": 6.576120465323813, "learning_rate": 5.411120978089036e-06, "loss": 0.6565, "step": 17096 }, { "epoch": 0.49, "grad_norm": 4.310717753457368, "learning_rate": 5.410658788199549e-06, "loss": 0.422, "step": 17097 }, { "epoch": 0.49, "grad_norm": 7.2910933205577635, "learning_rate": 5.410196594777215e-06, "loss": 0.5723, "step": 17098 }, { "epoch": 0.49, "grad_norm": 3.0388365124619727, "learning_rate": 5.40973439782601e-06, "loss": 0.3417, "step": 17099 }, { "epoch": 0.49, "grad_norm": 6.039469518923605, "learning_rate": 5.40927219734991e-06, "loss": 0.731, "step": 17100 }, { "epoch": 0.49, "grad_norm": 3.61329104860369, "learning_rate": 5.408809993352892e-06, "loss": 0.4687, "step": 17101 }, { "epoch": 0.49, "grad_norm": 4.0614316856108195, "learning_rate": 5.408347785838932e-06, "loss": 0.3473, "step": 17102 }, { "epoch": 0.49, "grad_norm": 4.129648104078147, "learning_rate": 5.4078855748120055e-06, "loss": 0.2257, "step": 17103 }, { "epoch": 0.49, "grad_norm": 2.192879146135589, "learning_rate": 5.4074233602760905e-06, "loss": 0.155, "step": 17104 }, { "epoch": 0.49, "grad_norm": 10.779195794379643, "learning_rate": 5.406961142235162e-06, "loss": 0.3221, "step": 17105 }, { "epoch": 0.49, "grad_norm": 5.905166102664512, "learning_rate": 5.406498920693196e-06, "loss": 0.4792, "step": 17106 }, { "epoch": 0.49, "grad_norm": 5.287582573572831, "learning_rate": 5.406036695654171e-06, "loss": 0.2936, "step": 17107 }, { "epoch": 0.49, "grad_norm": 3.5192968563562776, "learning_rate": 5.405574467122062e-06, "loss": 0.2971, "step": 17108 }, { "epoch": 0.49, "grad_norm": 5.356734312010578, "learning_rate": 5.405112235100847e-06, "loss": 0.4208, "step": 17109 }, { "epoch": 0.49, "grad_norm": 6.10198742587214, "learning_rate": 5.4046499995945e-06, "loss": 0.613, "step": 17110 }, { "epoch": 0.49, "grad_norm": 5.542429300286612, "learning_rate": 5.404187760606999e-06, "loss": 0.4148, "step": 17111 }, { "epoch": 0.49, "grad_norm": 4.717158876668965, "learning_rate": 5.403725518142322e-06, "loss": 0.4258, "step": 17112 }, { "epoch": 0.49, "grad_norm": 9.125692262910455, "learning_rate": 5.403263272204441e-06, "loss": 0.6342, "step": 17113 }, { "epoch": 0.49, "grad_norm": 7.079378198161716, "learning_rate": 5.402801022797337e-06, "loss": 0.3378, "step": 17114 }, { "epoch": 0.49, "grad_norm": 4.252414129916325, "learning_rate": 5.402338769924987e-06, "loss": 0.2722, "step": 17115 }, { "epoch": 0.49, "grad_norm": 9.466525933961297, "learning_rate": 5.401876513591365e-06, "loss": 0.688, "step": 17116 }, { "epoch": 0.49, "grad_norm": 7.364196024363478, "learning_rate": 5.401414253800449e-06, "loss": 0.4804, "step": 17117 }, { "epoch": 0.49, "grad_norm": 4.4614548792333215, "learning_rate": 5.4009519905562145e-06, "loss": 0.4525, "step": 17118 }, { "epoch": 0.49, "grad_norm": 7.1908214026680115, "learning_rate": 5.4004897238626415e-06, "loss": 0.8952, "step": 17119 }, { "epoch": 0.49, "grad_norm": 5.372675526140476, "learning_rate": 5.400027453723703e-06, "loss": 0.3416, "step": 17120 }, { "epoch": 0.49, "grad_norm": 4.697043710649388, "learning_rate": 5.399565180143377e-06, "loss": 0.3693, "step": 17121 }, { "epoch": 0.49, "grad_norm": 1.9514114187017748, "learning_rate": 5.399102903125643e-06, "loss": 0.1229, "step": 17122 }, { "epoch": 0.49, "grad_norm": 7.978641847991762, "learning_rate": 5.398640622674475e-06, "loss": 0.5676, "step": 17123 }, { "epoch": 0.49, "grad_norm": 5.356662564318609, "learning_rate": 5.3981783387938505e-06, "loss": 0.5384, "step": 17124 }, { "epoch": 0.49, "grad_norm": 8.051956261293839, "learning_rate": 5.397716051487748e-06, "loss": 0.5641, "step": 17125 }, { "epoch": 0.49, "grad_norm": 3.2763394623221482, "learning_rate": 5.397253760760142e-06, "loss": 0.1893, "step": 17126 }, { "epoch": 0.49, "grad_norm": 5.838077035520473, "learning_rate": 5.39679146661501e-06, "loss": 0.6813, "step": 17127 }, { "epoch": 0.49, "grad_norm": 5.285672023780983, "learning_rate": 5.396329169056332e-06, "loss": 0.2761, "step": 17128 }, { "epoch": 0.49, "grad_norm": 3.7741800845168703, "learning_rate": 5.395866868088081e-06, "loss": 0.4552, "step": 17129 }, { "epoch": 0.49, "grad_norm": 5.3864094048543025, "learning_rate": 5.3954045637142374e-06, "loss": 0.3045, "step": 17130 }, { "epoch": 0.49, "grad_norm": 4.289144768820515, "learning_rate": 5.394942255938775e-06, "loss": 0.3607, "step": 17131 }, { "epoch": 0.49, "grad_norm": 4.410763316105352, "learning_rate": 5.394479944765676e-06, "loss": 0.5474, "step": 17132 }, { "epoch": 0.49, "grad_norm": 7.279734048491913, "learning_rate": 5.394017630198913e-06, "loss": 0.3042, "step": 17133 }, { "epoch": 0.49, "grad_norm": 9.6898536038043, "learning_rate": 5.393555312242464e-06, "loss": 0.6692, "step": 17134 }, { "epoch": 0.49, "grad_norm": 2.4535799029912493, "learning_rate": 5.393092990900308e-06, "loss": 0.3442, "step": 17135 }, { "epoch": 0.49, "grad_norm": 7.714634527918548, "learning_rate": 5.3926306661764225e-06, "loss": 0.6179, "step": 17136 }, { "epoch": 0.49, "grad_norm": 5.265588324444559, "learning_rate": 5.392168338074783e-06, "loss": 0.4633, "step": 17137 }, { "epoch": 0.49, "grad_norm": 2.401248241703913, "learning_rate": 5.391706006599367e-06, "loss": 0.1874, "step": 17138 }, { "epoch": 0.49, "grad_norm": 2.8129482547856917, "learning_rate": 5.391243671754153e-06, "loss": 0.4841, "step": 17139 }, { "epoch": 0.49, "grad_norm": 6.847446157711024, "learning_rate": 5.390781333543117e-06, "loss": 0.8797, "step": 17140 }, { "epoch": 0.49, "grad_norm": 4.795864682242871, "learning_rate": 5.390318991970238e-06, "loss": 0.4558, "step": 17141 }, { "epoch": 0.49, "grad_norm": 6.500302234372152, "learning_rate": 5.389856647039493e-06, "loss": 0.4343, "step": 17142 }, { "epoch": 0.49, "grad_norm": 3.4152992330512144, "learning_rate": 5.3893942987548595e-06, "loss": 0.4354, "step": 17143 }, { "epoch": 0.49, "grad_norm": 5.03206452594256, "learning_rate": 5.3889319471203135e-06, "loss": 0.7194, "step": 17144 }, { "epoch": 0.49, "grad_norm": 5.551534109045688, "learning_rate": 5.388469592139834e-06, "loss": 0.4659, "step": 17145 }, { "epoch": 0.49, "grad_norm": 6.255295455626586, "learning_rate": 5.3880072338174e-06, "loss": 0.7832, "step": 17146 }, { "epoch": 0.49, "grad_norm": 6.344204891037402, "learning_rate": 5.3875448721569855e-06, "loss": 0.3274, "step": 17147 }, { "epoch": 0.49, "grad_norm": 7.865970036270874, "learning_rate": 5.387082507162571e-06, "loss": 0.5684, "step": 17148 }, { "epoch": 0.49, "grad_norm": 9.640883678755792, "learning_rate": 5.386620138838134e-06, "loss": 0.7338, "step": 17149 }, { "epoch": 0.49, "grad_norm": 6.72213067455674, "learning_rate": 5.386157767187652e-06, "loss": 0.1799, "step": 17150 }, { "epoch": 0.49, "grad_norm": 5.875430842083723, "learning_rate": 5.385695392215101e-06, "loss": 0.6931, "step": 17151 }, { "epoch": 0.49, "grad_norm": 6.338393218768747, "learning_rate": 5.38523301392446e-06, "loss": 0.8934, "step": 17152 }, { "epoch": 0.49, "grad_norm": 8.89174442584982, "learning_rate": 5.384770632319709e-06, "loss": 1.4012, "step": 17153 }, { "epoch": 0.49, "grad_norm": 4.510425570554806, "learning_rate": 5.38430824740482e-06, "loss": 0.3039, "step": 17154 }, { "epoch": 0.49, "grad_norm": 6.598910910885853, "learning_rate": 5.383845859183777e-06, "loss": 0.4878, "step": 17155 }, { "epoch": 0.49, "grad_norm": 5.1974687504345, "learning_rate": 5.383383467660555e-06, "loss": 0.2221, "step": 17156 }, { "epoch": 0.49, "grad_norm": 7.310347933231853, "learning_rate": 5.382921072839131e-06, "loss": 0.9609, "step": 17157 }, { "epoch": 0.49, "grad_norm": 9.09411321484892, "learning_rate": 5.382458674723485e-06, "loss": 0.8645, "step": 17158 }, { "epoch": 0.49, "grad_norm": 5.110525345868123, "learning_rate": 5.381996273317596e-06, "loss": 0.4012, "step": 17159 }, { "epoch": 0.49, "grad_norm": 7.063363216613568, "learning_rate": 5.381533868625438e-06, "loss": 0.7366, "step": 17160 }, { "epoch": 0.49, "grad_norm": 4.4444367898769155, "learning_rate": 5.381071460650991e-06, "loss": 0.3216, "step": 17161 }, { "epoch": 0.49, "grad_norm": 4.3984659034801155, "learning_rate": 5.380609049398233e-06, "loss": 0.3292, "step": 17162 }, { "epoch": 0.49, "grad_norm": 7.976383995577034, "learning_rate": 5.380146634871143e-06, "loss": 0.6375, "step": 17163 }, { "epoch": 0.49, "grad_norm": 6.467465074973517, "learning_rate": 5.379684217073697e-06, "loss": 0.5399, "step": 17164 }, { "epoch": 0.49, "grad_norm": 3.5241952491539887, "learning_rate": 5.3792217960098755e-06, "loss": 0.4589, "step": 17165 }, { "epoch": 0.49, "grad_norm": 6.554627628530385, "learning_rate": 5.378759371683657e-06, "loss": 0.2208, "step": 17166 }, { "epoch": 0.49, "grad_norm": 5.520395890185599, "learning_rate": 5.378296944099016e-06, "loss": 0.3674, "step": 17167 }, { "epoch": 0.49, "grad_norm": 4.087655007342601, "learning_rate": 5.377834513259934e-06, "loss": 0.281, "step": 17168 }, { "epoch": 0.49, "grad_norm": 4.865009634982661, "learning_rate": 5.377372079170389e-06, "loss": 0.4249, "step": 17169 }, { "epoch": 0.49, "grad_norm": 6.313320068874476, "learning_rate": 5.376909641834356e-06, "loss": 0.3275, "step": 17170 }, { "epoch": 0.49, "grad_norm": 2.9853947280393593, "learning_rate": 5.376447201255817e-06, "loss": 0.3476, "step": 17171 }, { "epoch": 0.49, "grad_norm": 1.9375255183108382, "learning_rate": 5.37598475743875e-06, "loss": 0.2471, "step": 17172 }, { "epoch": 0.49, "grad_norm": 6.412816734002127, "learning_rate": 5.375522310387132e-06, "loss": 0.6938, "step": 17173 }, { "epoch": 0.49, "grad_norm": 3.5357591989480217, "learning_rate": 5.375059860104941e-06, "loss": 0.3645, "step": 17174 }, { "epoch": 0.49, "grad_norm": 6.996504106526262, "learning_rate": 5.374597406596156e-06, "loss": 0.4069, "step": 17175 }, { "epoch": 0.49, "grad_norm": 4.909385262520479, "learning_rate": 5.3741349498647575e-06, "loss": 0.542, "step": 17176 }, { "epoch": 0.49, "grad_norm": 4.835021935287944, "learning_rate": 5.3736724899147206e-06, "loss": 0.6786, "step": 17177 }, { "epoch": 0.49, "grad_norm": 4.914100586178049, "learning_rate": 5.373210026750025e-06, "loss": 0.3593, "step": 17178 }, { "epoch": 0.49, "grad_norm": 4.763171255309814, "learning_rate": 5.37274756037465e-06, "loss": 0.5504, "step": 17179 }, { "epoch": 0.49, "grad_norm": 4.334842399253479, "learning_rate": 5.372285090792574e-06, "loss": 0.3138, "step": 17180 }, { "epoch": 0.49, "grad_norm": 6.411402644502534, "learning_rate": 5.3718226180077746e-06, "loss": 0.6793, "step": 17181 }, { "epoch": 0.49, "grad_norm": 2.7887119211788627, "learning_rate": 5.3713601420242315e-06, "loss": 0.3508, "step": 17182 }, { "epoch": 0.49, "grad_norm": 3.922751750440707, "learning_rate": 5.370897662845923e-06, "loss": 0.4177, "step": 17183 }, { "epoch": 0.49, "grad_norm": 2.5794810110222492, "learning_rate": 5.3704351804768275e-06, "loss": 0.2484, "step": 17184 }, { "epoch": 0.49, "grad_norm": 5.645796837870566, "learning_rate": 5.369972694920923e-06, "loss": 0.3556, "step": 17185 }, { "epoch": 0.49, "grad_norm": 6.175605284025565, "learning_rate": 5.36951020618219e-06, "loss": 0.5605, "step": 17186 }, { "epoch": 0.49, "grad_norm": 4.537818909044155, "learning_rate": 5.3690477142646045e-06, "loss": 0.5201, "step": 17187 }, { "epoch": 0.49, "grad_norm": 5.489665682884976, "learning_rate": 5.368585219172148e-06, "loss": 0.7184, "step": 17188 }, { "epoch": 0.49, "grad_norm": 9.460407068958522, "learning_rate": 5.368122720908799e-06, "loss": 0.7713, "step": 17189 }, { "epoch": 0.49, "grad_norm": 10.628193992294355, "learning_rate": 5.367660219478536e-06, "loss": 0.9221, "step": 17190 }, { "epoch": 0.49, "grad_norm": 3.9287410074022464, "learning_rate": 5.367197714885335e-06, "loss": 0.509, "step": 17191 }, { "epoch": 0.49, "grad_norm": 3.583910200718308, "learning_rate": 5.366735207133178e-06, "loss": 0.3098, "step": 17192 }, { "epoch": 0.49, "grad_norm": 5.5892983736341435, "learning_rate": 5.366272696226045e-06, "loss": 0.6636, "step": 17193 }, { "epoch": 0.49, "grad_norm": 3.343360486026494, "learning_rate": 5.365810182167912e-06, "loss": 0.177, "step": 17194 }, { "epoch": 0.49, "grad_norm": 2.9790303370265057, "learning_rate": 5.3653476649627565e-06, "loss": 0.4517, "step": 17195 }, { "epoch": 0.49, "grad_norm": 10.936666707584653, "learning_rate": 5.364885144614564e-06, "loss": 0.8124, "step": 17196 }, { "epoch": 0.49, "grad_norm": 5.702738584204235, "learning_rate": 5.364422621127308e-06, "loss": 0.2511, "step": 17197 }, { "epoch": 0.49, "grad_norm": 2.716665762528657, "learning_rate": 5.363960094504969e-06, "loss": 0.2091, "step": 17198 }, { "epoch": 0.49, "grad_norm": 8.108062637949079, "learning_rate": 5.363497564751526e-06, "loss": 0.782, "step": 17199 }, { "epoch": 0.49, "grad_norm": 3.6076585395644143, "learning_rate": 5.363035031870958e-06, "loss": 0.3011, "step": 17200 }, { "epoch": 0.49, "grad_norm": 7.459607134540302, "learning_rate": 5.362572495867244e-06, "loss": 0.4312, "step": 17201 }, { "epoch": 0.49, "grad_norm": 6.048427688011227, "learning_rate": 5.362109956744365e-06, "loss": 0.4124, "step": 17202 }, { "epoch": 0.49, "grad_norm": 5.708023162638417, "learning_rate": 5.361647414506297e-06, "loss": 0.2508, "step": 17203 }, { "epoch": 0.49, "grad_norm": 11.27171679481717, "learning_rate": 5.361184869157021e-06, "loss": 0.4795, "step": 17204 }, { "epoch": 0.49, "grad_norm": 4.182776277746511, "learning_rate": 5.360722320700516e-06, "loss": 0.5997, "step": 17205 }, { "epoch": 0.49, "grad_norm": 5.709924944703345, "learning_rate": 5.3602597691407636e-06, "loss": 0.4628, "step": 17206 }, { "epoch": 0.49, "grad_norm": 2.612725590817518, "learning_rate": 5.359797214481739e-06, "loss": 0.3724, "step": 17207 }, { "epoch": 0.49, "grad_norm": 4.496086220026546, "learning_rate": 5.359334656727423e-06, "loss": 0.3121, "step": 17208 }, { "epoch": 0.49, "grad_norm": 5.349887691638103, "learning_rate": 5.358872095881795e-06, "loss": 0.4371, "step": 17209 }, { "epoch": 0.49, "grad_norm": 1.5402354403672778, "learning_rate": 5.358409531948836e-06, "loss": 0.0643, "step": 17210 }, { "epoch": 0.49, "grad_norm": 1.6869416019426149, "learning_rate": 5.357946964932521e-06, "loss": 0.1259, "step": 17211 }, { "epoch": 0.49, "grad_norm": 4.351371747258555, "learning_rate": 5.3574843948368346e-06, "loss": 0.273, "step": 17212 }, { "epoch": 0.49, "grad_norm": 7.845941123552854, "learning_rate": 5.357021821665754e-06, "loss": 0.8566, "step": 17213 }, { "epoch": 0.49, "grad_norm": 6.699123655483965, "learning_rate": 5.356559245423258e-06, "loss": 0.8015, "step": 17214 }, { "epoch": 0.49, "grad_norm": 6.370246910448075, "learning_rate": 5.356096666113328e-06, "loss": 0.7267, "step": 17215 }, { "epoch": 0.49, "grad_norm": 8.995932401600731, "learning_rate": 5.355634083739941e-06, "loss": 0.4822, "step": 17216 }, { "epoch": 0.49, "grad_norm": 6.006962154217771, "learning_rate": 5.355171498307078e-06, "loss": 0.7187, "step": 17217 }, { "epoch": 0.49, "grad_norm": 5.452369790671622, "learning_rate": 5.354708909818718e-06, "loss": 0.4472, "step": 17218 }, { "epoch": 0.49, "grad_norm": 4.874941360903464, "learning_rate": 5.354246318278841e-06, "loss": 0.3286, "step": 17219 }, { "epoch": 0.49, "grad_norm": 3.6269672580458017, "learning_rate": 5.353783723691427e-06, "loss": 0.4338, "step": 17220 }, { "epoch": 0.49, "grad_norm": 4.013607542538717, "learning_rate": 5.353321126060454e-06, "loss": 0.4516, "step": 17221 }, { "epoch": 0.49, "grad_norm": 6.836362849490048, "learning_rate": 5.352858525389903e-06, "loss": 0.2887, "step": 17222 }, { "epoch": 0.49, "grad_norm": 5.313759867556466, "learning_rate": 5.352395921683756e-06, "loss": 0.5908, "step": 17223 }, { "epoch": 0.49, "grad_norm": 5.90911575708967, "learning_rate": 5.351933314945988e-06, "loss": 0.4036, "step": 17224 }, { "epoch": 0.49, "grad_norm": 3.8134016627967857, "learning_rate": 5.351470705180582e-06, "loss": 0.6674, "step": 17225 }, { "epoch": 0.49, "grad_norm": 6.469176476467166, "learning_rate": 5.351008092391517e-06, "loss": 0.4807, "step": 17226 }, { "epoch": 0.49, "grad_norm": 2.1853776990715317, "learning_rate": 5.3505454765827714e-06, "loss": 0.1177, "step": 17227 }, { "epoch": 0.49, "grad_norm": 4.451143914972244, "learning_rate": 5.350082857758326e-06, "loss": 0.2844, "step": 17228 }, { "epoch": 0.49, "grad_norm": 3.7536940499784346, "learning_rate": 5.3496202359221625e-06, "loss": 0.3602, "step": 17229 }, { "epoch": 0.49, "grad_norm": 5.924198147882423, "learning_rate": 5.349157611078258e-06, "loss": 0.4095, "step": 17230 }, { "epoch": 0.49, "grad_norm": 5.6236446019199695, "learning_rate": 5.348694983230593e-06, "loss": 0.3683, "step": 17231 }, { "epoch": 0.49, "grad_norm": 8.138289806752935, "learning_rate": 5.348232352383149e-06, "loss": 0.4786, "step": 17232 }, { "epoch": 0.49, "grad_norm": 6.129058894340782, "learning_rate": 5.347769718539907e-06, "loss": 0.4307, "step": 17233 }, { "epoch": 0.49, "grad_norm": 6.631183905096837, "learning_rate": 5.3473070817048426e-06, "loss": 0.3752, "step": 17234 }, { "epoch": 0.49, "grad_norm": 9.43342158632367, "learning_rate": 5.3468444418819374e-06, "loss": 0.6539, "step": 17235 }, { "epoch": 0.49, "grad_norm": 4.871405572401458, "learning_rate": 5.346381799075173e-06, "loss": 0.8034, "step": 17236 }, { "epoch": 0.49, "grad_norm": 5.55472166584737, "learning_rate": 5.345919153288529e-06, "loss": 0.3188, "step": 17237 }, { "epoch": 0.49, "grad_norm": 5.78146574287984, "learning_rate": 5.345456504525984e-06, "loss": 0.7551, "step": 17238 }, { "epoch": 0.49, "grad_norm": 5.049226667659736, "learning_rate": 5.34499385279152e-06, "loss": 0.3959, "step": 17239 }, { "epoch": 0.49, "grad_norm": 4.443384593850137, "learning_rate": 5.344531198089118e-06, "loss": 0.545, "step": 17240 }, { "epoch": 0.49, "grad_norm": 8.200175657949531, "learning_rate": 5.344068540422754e-06, "loss": 0.8018, "step": 17241 }, { "epoch": 0.49, "grad_norm": 3.585311618945468, "learning_rate": 5.3436058797964115e-06, "loss": 0.3653, "step": 17242 }, { "epoch": 0.49, "grad_norm": 3.428129915635541, "learning_rate": 5.343143216214072e-06, "loss": 0.4346, "step": 17243 }, { "epoch": 0.49, "grad_norm": 6.195818097528308, "learning_rate": 5.342680549679709e-06, "loss": 0.6143, "step": 17244 }, { "epoch": 0.49, "grad_norm": 4.3227010405050565, "learning_rate": 5.342217880197311e-06, "loss": 0.3998, "step": 17245 }, { "epoch": 0.49, "grad_norm": 5.781304106588025, "learning_rate": 5.341755207770854e-06, "loss": 0.2239, "step": 17246 }, { "epoch": 0.49, "grad_norm": 6.354618852712146, "learning_rate": 5.34129253240432e-06, "loss": 0.3287, "step": 17247 }, { "epoch": 0.49, "grad_norm": 4.9088722559210245, "learning_rate": 5.340829854101687e-06, "loss": 0.4871, "step": 17248 }, { "epoch": 0.49, "grad_norm": 5.69302937776486, "learning_rate": 5.340367172866936e-06, "loss": 0.8722, "step": 17249 }, { "epoch": 0.49, "grad_norm": 7.347281138667231, "learning_rate": 5.33990448870405e-06, "loss": 0.8787, "step": 17250 }, { "epoch": 0.49, "grad_norm": 9.810330631000088, "learning_rate": 5.339441801617007e-06, "loss": 0.7331, "step": 17251 }, { "epoch": 0.49, "grad_norm": 4.586311169141288, "learning_rate": 5.338979111609785e-06, "loss": 0.1343, "step": 17252 }, { "epoch": 0.49, "grad_norm": 10.250253534670467, "learning_rate": 5.338516418686371e-06, "loss": 1.0268, "step": 17253 }, { "epoch": 0.49, "grad_norm": 5.935458986937752, "learning_rate": 5.3380537228507415e-06, "loss": 0.5524, "step": 17254 }, { "epoch": 0.49, "grad_norm": 5.027285891943192, "learning_rate": 5.337591024106878e-06, "loss": 0.2629, "step": 17255 }, { "epoch": 0.49, "grad_norm": 6.5118070618168, "learning_rate": 5.3371283224587574e-06, "loss": 0.4671, "step": 17256 }, { "epoch": 0.49, "grad_norm": 8.556698246674564, "learning_rate": 5.3366656179103675e-06, "loss": 0.3617, "step": 17257 }, { "epoch": 0.49, "grad_norm": 3.231144051502699, "learning_rate": 5.336202910465682e-06, "loss": 0.1133, "step": 17258 }, { "epoch": 0.49, "grad_norm": 5.493389036191688, "learning_rate": 5.335740200128685e-06, "loss": 0.6834, "step": 17259 }, { "epoch": 0.49, "grad_norm": 4.460857584382789, "learning_rate": 5.335277486903357e-06, "loss": 0.4933, "step": 17260 }, { "epoch": 0.49, "grad_norm": 3.8306762912955525, "learning_rate": 5.334814770793676e-06, "loss": 0.2275, "step": 17261 }, { "epoch": 0.49, "grad_norm": 3.160084859185448, "learning_rate": 5.334352051803626e-06, "loss": 0.1213, "step": 17262 }, { "epoch": 0.49, "grad_norm": 7.3743338203201825, "learning_rate": 5.333889329937187e-06, "loss": 0.6849, "step": 17263 }, { "epoch": 0.49, "grad_norm": 6.628546467303035, "learning_rate": 5.333426605198338e-06, "loss": 0.4199, "step": 17264 }, { "epoch": 0.49, "grad_norm": 7.259886479186423, "learning_rate": 5.332963877591062e-06, "loss": 1.0921, "step": 17265 }, { "epoch": 0.49, "grad_norm": 5.854826685215883, "learning_rate": 5.332501147119339e-06, "loss": 0.7081, "step": 17266 }, { "epoch": 0.49, "grad_norm": 2.7614632559816013, "learning_rate": 5.33203841378715e-06, "loss": 0.2419, "step": 17267 }, { "epoch": 0.49, "grad_norm": 4.1428240712494775, "learning_rate": 5.331575677598474e-06, "loss": 0.6071, "step": 17268 }, { "epoch": 0.49, "grad_norm": 2.37711230256741, "learning_rate": 5.331112938557292e-06, "loss": 0.2218, "step": 17269 }, { "epoch": 0.49, "grad_norm": 6.183855235582896, "learning_rate": 5.33065019666759e-06, "loss": 0.6786, "step": 17270 }, { "epoch": 0.49, "grad_norm": 2.6485923789320505, "learning_rate": 5.330187451933342e-06, "loss": 0.1839, "step": 17271 }, { "epoch": 0.49, "grad_norm": 2.5499166101022483, "learning_rate": 5.329724704358532e-06, "loss": 0.3118, "step": 17272 }, { "epoch": 0.49, "grad_norm": 4.854001032850787, "learning_rate": 5.329261953947143e-06, "loss": 0.6734, "step": 17273 }, { "epoch": 0.49, "grad_norm": 2.554972804759019, "learning_rate": 5.328799200703153e-06, "loss": 0.1046, "step": 17274 }, { "epoch": 0.49, "grad_norm": 4.47468847224019, "learning_rate": 5.328336444630544e-06, "loss": 0.5902, "step": 17275 }, { "epoch": 0.49, "grad_norm": 2.8065773698637746, "learning_rate": 5.3278736857332955e-06, "loss": 0.1841, "step": 17276 }, { "epoch": 0.49, "grad_norm": 6.559043300869004, "learning_rate": 5.327410924015392e-06, "loss": 0.2251, "step": 17277 }, { "epoch": 0.49, "grad_norm": 2.847067804852941, "learning_rate": 5.326948159480811e-06, "loss": 0.1453, "step": 17278 }, { "epoch": 0.49, "grad_norm": 4.695656152929319, "learning_rate": 5.3264853921335366e-06, "loss": 0.4718, "step": 17279 }, { "epoch": 0.49, "grad_norm": 9.808690977962955, "learning_rate": 5.326022621977549e-06, "loss": 0.5008, "step": 17280 }, { "epoch": 0.49, "grad_norm": 3.6047250346398605, "learning_rate": 5.325559849016828e-06, "loss": 0.291, "step": 17281 }, { "epoch": 0.49, "grad_norm": 7.082312484181716, "learning_rate": 5.325097073255355e-06, "loss": 0.284, "step": 17282 }, { "epoch": 0.49, "grad_norm": 3.2546092873276176, "learning_rate": 5.324634294697113e-06, "loss": 0.364, "step": 17283 }, { "epoch": 0.49, "grad_norm": 4.580921998193255, "learning_rate": 5.324171513346083e-06, "loss": 0.5967, "step": 17284 }, { "epoch": 0.5, "grad_norm": 2.1718066980064386, "learning_rate": 5.3237087292062425e-06, "loss": 0.0653, "step": 17285 }, { "epoch": 0.5, "grad_norm": 4.843090381624122, "learning_rate": 5.3232459422815784e-06, "loss": 0.4459, "step": 17286 }, { "epoch": 0.5, "grad_norm": 16.661172164585217, "learning_rate": 5.322783152576069e-06, "loss": 0.7089, "step": 17287 }, { "epoch": 0.5, "grad_norm": 4.4600938844421565, "learning_rate": 5.3223203600936946e-06, "loss": 0.3076, "step": 17288 }, { "epoch": 0.5, "grad_norm": 6.418791315972799, "learning_rate": 5.3218575648384386e-06, "loss": 0.3209, "step": 17289 }, { "epoch": 0.5, "grad_norm": 5.8807011193120795, "learning_rate": 5.321394766814282e-06, "loss": 0.3561, "step": 17290 }, { "epoch": 0.5, "grad_norm": 6.483870704987897, "learning_rate": 5.320931966025205e-06, "loss": 0.8421, "step": 17291 }, { "epoch": 0.5, "grad_norm": 5.705536173265803, "learning_rate": 5.32046916247519e-06, "loss": 0.5602, "step": 17292 }, { "epoch": 0.5, "grad_norm": 3.3120838119756675, "learning_rate": 5.320006356168218e-06, "loss": 0.2812, "step": 17293 }, { "epoch": 0.5, "grad_norm": 6.99577582151831, "learning_rate": 5.3195435471082705e-06, "loss": 0.8088, "step": 17294 }, { "epoch": 0.5, "grad_norm": 2.53584630476682, "learning_rate": 5.319080735299329e-06, "loss": 0.3942, "step": 17295 }, { "epoch": 0.5, "grad_norm": 3.8075071379105605, "learning_rate": 5.318617920745376e-06, "loss": 0.2347, "step": 17296 }, { "epoch": 0.5, "grad_norm": 15.210906402446383, "learning_rate": 5.318155103450393e-06, "loss": 0.7743, "step": 17297 }, { "epoch": 0.5, "grad_norm": 6.13088994111228, "learning_rate": 5.31769228341836e-06, "loss": 0.3261, "step": 17298 }, { "epoch": 0.5, "grad_norm": 9.890078609374, "learning_rate": 5.317229460653259e-06, "loss": 0.5025, "step": 17299 }, { "epoch": 0.5, "grad_norm": 6.659641172118948, "learning_rate": 5.316766635159073e-06, "loss": 0.7802, "step": 17300 }, { "epoch": 0.5, "grad_norm": 9.108554705005998, "learning_rate": 5.316303806939782e-06, "loss": 0.4343, "step": 17301 }, { "epoch": 0.5, "grad_norm": 5.080333569831376, "learning_rate": 5.315840975999367e-06, "loss": 0.5929, "step": 17302 }, { "epoch": 0.5, "grad_norm": 4.479722377400497, "learning_rate": 5.315378142341814e-06, "loss": 0.3987, "step": 17303 }, { "epoch": 0.5, "grad_norm": 5.037823191988075, "learning_rate": 5.314915305971099e-06, "loss": 0.4159, "step": 17304 }, { "epoch": 0.5, "grad_norm": 5.3763727386874995, "learning_rate": 5.314452466891206e-06, "loss": 0.3325, "step": 17305 }, { "epoch": 0.5, "grad_norm": 5.088421568011292, "learning_rate": 5.31398962510612e-06, "loss": 0.3923, "step": 17306 }, { "epoch": 0.5, "grad_norm": 6.195058348398623, "learning_rate": 5.313526780619819e-06, "loss": 0.8473, "step": 17307 }, { "epoch": 0.5, "grad_norm": 8.543976286965941, "learning_rate": 5.313063933436284e-06, "loss": 0.4, "step": 17308 }, { "epoch": 0.5, "grad_norm": 2.357963796808451, "learning_rate": 5.3126010835594995e-06, "loss": 0.3316, "step": 17309 }, { "epoch": 0.5, "grad_norm": 4.841308790736885, "learning_rate": 5.312138230993446e-06, "loss": 0.3742, "step": 17310 }, { "epoch": 0.5, "grad_norm": 5.011966214570738, "learning_rate": 5.311675375742107e-06, "loss": 0.6133, "step": 17311 }, { "epoch": 0.5, "grad_norm": 5.54635344726298, "learning_rate": 5.3112125178094624e-06, "loss": 0.5072, "step": 17312 }, { "epoch": 0.5, "grad_norm": 6.6946581740004785, "learning_rate": 5.310749657199494e-06, "loss": 0.2739, "step": 17313 }, { "epoch": 0.5, "grad_norm": 3.794352694404851, "learning_rate": 5.310286793916186e-06, "loss": 0.1821, "step": 17314 }, { "epoch": 0.5, "grad_norm": 10.871767462505305, "learning_rate": 5.309823927963519e-06, "loss": 1.0886, "step": 17315 }, { "epoch": 0.5, "grad_norm": 6.870219076847506, "learning_rate": 5.309361059345473e-06, "loss": 0.6227, "step": 17316 }, { "epoch": 0.5, "grad_norm": 5.815207927328792, "learning_rate": 5.308898188066034e-06, "loss": 0.3847, "step": 17317 }, { "epoch": 0.5, "grad_norm": 3.1857550182906205, "learning_rate": 5.30843531412918e-06, "loss": 0.3314, "step": 17318 }, { "epoch": 0.5, "grad_norm": 10.527876044636471, "learning_rate": 5.3079724375388965e-06, "loss": 0.4535, "step": 17319 }, { "epoch": 0.5, "grad_norm": 8.964729178700075, "learning_rate": 5.307509558299164e-06, "loss": 0.7524, "step": 17320 }, { "epoch": 0.5, "grad_norm": 7.279799975958949, "learning_rate": 5.307046676413964e-06, "loss": 0.4769, "step": 17321 }, { "epoch": 0.5, "grad_norm": 11.852120131749915, "learning_rate": 5.306583791887278e-06, "loss": 0.5172, "step": 17322 }, { "epoch": 0.5, "grad_norm": 3.4249079253357437, "learning_rate": 5.306120904723091e-06, "loss": 0.4355, "step": 17323 }, { "epoch": 0.5, "grad_norm": 7.509497287753056, "learning_rate": 5.305658014925385e-06, "loss": 0.3349, "step": 17324 }, { "epoch": 0.5, "grad_norm": 4.368600169874686, "learning_rate": 5.305195122498138e-06, "loss": 0.2429, "step": 17325 }, { "epoch": 0.5, "grad_norm": 5.212699169131924, "learning_rate": 5.304732227445335e-06, "loss": 0.4857, "step": 17326 }, { "epoch": 0.5, "grad_norm": 7.91379446496494, "learning_rate": 5.30426932977096e-06, "loss": 0.2498, "step": 17327 }, { "epoch": 0.5, "grad_norm": 5.015607839774845, "learning_rate": 5.303806429478992e-06, "loss": 0.5878, "step": 17328 }, { "epoch": 0.5, "grad_norm": 3.639457302535344, "learning_rate": 5.303343526573415e-06, "loss": 0.3588, "step": 17329 }, { "epoch": 0.5, "grad_norm": 8.000246103792836, "learning_rate": 5.302880621058213e-06, "loss": 0.5039, "step": 17330 }, { "epoch": 0.5, "grad_norm": 9.263848840517953, "learning_rate": 5.302417712937363e-06, "loss": 0.2973, "step": 17331 }, { "epoch": 0.5, "grad_norm": 1.990892026469051, "learning_rate": 5.301954802214853e-06, "loss": 0.0708, "step": 17332 }, { "epoch": 0.5, "grad_norm": 3.7288409930900324, "learning_rate": 5.301491888894662e-06, "loss": 0.2648, "step": 17333 }, { "epoch": 0.5, "grad_norm": 7.927700722137321, "learning_rate": 5.301028972980774e-06, "loss": 0.6993, "step": 17334 }, { "epoch": 0.5, "grad_norm": 4.388716775132256, "learning_rate": 5.30056605447717e-06, "loss": 0.4869, "step": 17335 }, { "epoch": 0.5, "grad_norm": 3.1919753128760653, "learning_rate": 5.300103133387832e-06, "loss": 0.3101, "step": 17336 }, { "epoch": 0.5, "grad_norm": 3.876577825181552, "learning_rate": 5.299640209716746e-06, "loss": 0.384, "step": 17337 }, { "epoch": 0.5, "grad_norm": 4.418017777525888, "learning_rate": 5.299177283467892e-06, "loss": 0.2015, "step": 17338 }, { "epoch": 0.5, "grad_norm": 2.4874443670981394, "learning_rate": 5.298714354645253e-06, "loss": 0.3043, "step": 17339 }, { "epoch": 0.5, "grad_norm": 10.143250488467777, "learning_rate": 5.298251423252809e-06, "loss": 0.5948, "step": 17340 }, { "epoch": 0.5, "grad_norm": 3.8287707971300096, "learning_rate": 5.297788489294548e-06, "loss": 0.2143, "step": 17341 }, { "epoch": 0.5, "grad_norm": 4.789225639315157, "learning_rate": 5.297325552774446e-06, "loss": 0.4215, "step": 17342 }, { "epoch": 0.5, "grad_norm": 8.33629077566782, "learning_rate": 5.29686261369649e-06, "loss": 0.573, "step": 17343 }, { "epoch": 0.5, "grad_norm": 4.4379730778359345, "learning_rate": 5.296399672064663e-06, "loss": 0.4681, "step": 17344 }, { "epoch": 0.5, "grad_norm": 16.30357441852144, "learning_rate": 5.295936727882944e-06, "loss": 0.4218, "step": 17345 }, { "epoch": 0.5, "grad_norm": 5.628937127520381, "learning_rate": 5.295473781155318e-06, "loss": 0.7314, "step": 17346 }, { "epoch": 0.5, "grad_norm": 3.7016464694069118, "learning_rate": 5.29501083188577e-06, "loss": 0.3772, "step": 17347 }, { "epoch": 0.5, "grad_norm": 2.671164658021718, "learning_rate": 5.294547880078277e-06, "loss": 0.3136, "step": 17348 }, { "epoch": 0.5, "grad_norm": 6.974378596418671, "learning_rate": 5.2940849257368265e-06, "loss": 0.6049, "step": 17349 }, { "epoch": 0.5, "grad_norm": 6.135395259420443, "learning_rate": 5.293621968865399e-06, "loss": 0.5924, "step": 17350 }, { "epoch": 0.5, "grad_norm": 3.103416052361891, "learning_rate": 5.2931590094679775e-06, "loss": 0.1397, "step": 17351 }, { "epoch": 0.5, "grad_norm": 7.5314417137975695, "learning_rate": 5.292696047548545e-06, "loss": 0.5286, "step": 17352 }, { "epoch": 0.5, "grad_norm": 6.530014103674152, "learning_rate": 5.292233083111085e-06, "loss": 0.5593, "step": 17353 }, { "epoch": 0.5, "grad_norm": 4.763161094217426, "learning_rate": 5.2917701161595805e-06, "loss": 0.3649, "step": 17354 }, { "epoch": 0.5, "grad_norm": 3.7826585589429444, "learning_rate": 5.291307146698013e-06, "loss": 0.481, "step": 17355 }, { "epoch": 0.5, "grad_norm": 4.435500540715996, "learning_rate": 5.290844174730367e-06, "loss": 0.8841, "step": 17356 }, { "epoch": 0.5, "grad_norm": 7.527693799228056, "learning_rate": 5.290381200260622e-06, "loss": 0.5631, "step": 17357 }, { "epoch": 0.5, "grad_norm": 2.596626123375857, "learning_rate": 5.289918223292766e-06, "loss": 0.3223, "step": 17358 }, { "epoch": 0.5, "grad_norm": 6.26386904190577, "learning_rate": 5.289455243830775e-06, "loss": 0.5825, "step": 17359 }, { "epoch": 0.5, "grad_norm": 4.620752910513824, "learning_rate": 5.28899226187864e-06, "loss": 0.7879, "step": 17360 }, { "epoch": 0.5, "grad_norm": 7.132156258318125, "learning_rate": 5.28852927744034e-06, "loss": 0.6107, "step": 17361 }, { "epoch": 0.5, "grad_norm": 3.999997183679543, "learning_rate": 5.288066290519857e-06, "loss": 0.721, "step": 17362 }, { "epoch": 0.5, "grad_norm": 1.1969046305183368, "learning_rate": 5.287603301121175e-06, "loss": 0.1031, "step": 17363 }, { "epoch": 0.5, "grad_norm": 5.341623161880669, "learning_rate": 5.287140309248279e-06, "loss": 0.4803, "step": 17364 }, { "epoch": 0.5, "grad_norm": 6.736366561642844, "learning_rate": 5.286677314905149e-06, "loss": 0.7239, "step": 17365 }, { "epoch": 0.5, "grad_norm": 4.343525174037788, "learning_rate": 5.286214318095768e-06, "loss": 0.2495, "step": 17366 }, { "epoch": 0.5, "grad_norm": 4.017887012733963, "learning_rate": 5.285751318824122e-06, "loss": 0.3518, "step": 17367 }, { "epoch": 0.5, "grad_norm": 7.868361263353513, "learning_rate": 5.2852883170941925e-06, "loss": 0.7782, "step": 17368 }, { "epoch": 0.5, "grad_norm": 9.039255468209516, "learning_rate": 5.2848253129099625e-06, "loss": 0.4902, "step": 17369 }, { "epoch": 0.5, "grad_norm": 8.078449530511199, "learning_rate": 5.2843623062754145e-06, "loss": 0.612, "step": 17370 }, { "epoch": 0.5, "grad_norm": 6.907778954742649, "learning_rate": 5.2838992971945345e-06, "loss": 0.6382, "step": 17371 }, { "epoch": 0.5, "grad_norm": 9.295010317201106, "learning_rate": 5.283436285671303e-06, "loss": 0.8262, "step": 17372 }, { "epoch": 0.5, "grad_norm": 3.8433252115376115, "learning_rate": 5.282973271709704e-06, "loss": 0.3866, "step": 17373 }, { "epoch": 0.5, "grad_norm": 6.654828864604885, "learning_rate": 5.282510255313722e-06, "loss": 0.5495, "step": 17374 }, { "epoch": 0.5, "grad_norm": 5.582174546235753, "learning_rate": 5.282047236487336e-06, "loss": 0.3121, "step": 17375 }, { "epoch": 0.5, "grad_norm": 6.28681372960322, "learning_rate": 5.2815842152345345e-06, "loss": 0.5123, "step": 17376 }, { "epoch": 0.5, "grad_norm": 6.9081833148615095, "learning_rate": 5.281121191559299e-06, "loss": 0.6139, "step": 17377 }, { "epoch": 0.5, "grad_norm": 4.754285008960484, "learning_rate": 5.2806581654656105e-06, "loss": 0.2216, "step": 17378 }, { "epoch": 0.5, "grad_norm": 5.419660440274124, "learning_rate": 5.2801951369574556e-06, "loss": 0.6319, "step": 17379 }, { "epoch": 0.5, "grad_norm": 4.275881025090117, "learning_rate": 5.279732106038816e-06, "loss": 0.3115, "step": 17380 }, { "epoch": 0.5, "grad_norm": 5.125929445794007, "learning_rate": 5.279269072713678e-06, "loss": 0.4006, "step": 17381 }, { "epoch": 0.5, "grad_norm": 2.1856135681176263, "learning_rate": 5.27880603698602e-06, "loss": 0.1356, "step": 17382 }, { "epoch": 0.5, "grad_norm": 4.7998883105237775, "learning_rate": 5.278342998859828e-06, "loss": 0.5085, "step": 17383 }, { "epoch": 0.5, "grad_norm": 6.215895107685304, "learning_rate": 5.277879958339086e-06, "loss": 0.2237, "step": 17384 }, { "epoch": 0.5, "grad_norm": 4.812983203756389, "learning_rate": 5.277416915427776e-06, "loss": 0.7215, "step": 17385 }, { "epoch": 0.5, "grad_norm": 5.063897587516505, "learning_rate": 5.276953870129884e-06, "loss": 0.5077, "step": 17386 }, { "epoch": 0.5, "grad_norm": 7.923132922577905, "learning_rate": 5.276490822449392e-06, "loss": 0.7863, "step": 17387 }, { "epoch": 0.5, "grad_norm": 8.67437785003657, "learning_rate": 5.2760277723902835e-06, "loss": 0.6162, "step": 17388 }, { "epoch": 0.5, "grad_norm": 5.016974674582929, "learning_rate": 5.275564719956541e-06, "loss": 0.3237, "step": 17389 }, { "epoch": 0.5, "grad_norm": 10.72667274501795, "learning_rate": 5.275101665152149e-06, "loss": 0.9879, "step": 17390 }, { "epoch": 0.5, "grad_norm": 4.234720532722896, "learning_rate": 5.274638607981093e-06, "loss": 0.4668, "step": 17391 }, { "epoch": 0.5, "grad_norm": 4.945483694382687, "learning_rate": 5.274175548447352e-06, "loss": 1.0039, "step": 17392 }, { "epoch": 0.5, "grad_norm": 6.416891275743602, "learning_rate": 5.273712486554914e-06, "loss": 0.6375, "step": 17393 }, { "epoch": 0.5, "grad_norm": 5.60298949384683, "learning_rate": 5.273249422307762e-06, "loss": 0.5875, "step": 17394 }, { "epoch": 0.5, "grad_norm": 6.389990628195324, "learning_rate": 5.272786355709878e-06, "loss": 0.4841, "step": 17395 }, { "epoch": 0.5, "grad_norm": 4.6424909366032345, "learning_rate": 5.272323286765246e-06, "loss": 0.2349, "step": 17396 }, { "epoch": 0.5, "grad_norm": 7.111471215044551, "learning_rate": 5.271860215477851e-06, "loss": 0.8048, "step": 17397 }, { "epoch": 0.5, "grad_norm": 5.6173848845668815, "learning_rate": 5.271397141851678e-06, "loss": 0.5101, "step": 17398 }, { "epoch": 0.5, "grad_norm": 3.1579873336268602, "learning_rate": 5.270934065890706e-06, "loss": 0.409, "step": 17399 }, { "epoch": 0.5, "grad_norm": 6.595918271772353, "learning_rate": 5.270470987598921e-06, "loss": 0.7612, "step": 17400 }, { "epoch": 0.5, "grad_norm": 5.116663456065675, "learning_rate": 5.27000790698031e-06, "loss": 0.6624, "step": 17401 }, { "epoch": 0.5, "grad_norm": 4.4649836534156035, "learning_rate": 5.269544824038853e-06, "loss": 0.4388, "step": 17402 }, { "epoch": 0.5, "grad_norm": 5.134016247113225, "learning_rate": 5.269081738778534e-06, "loss": 0.4411, "step": 17403 }, { "epoch": 0.5, "grad_norm": 7.217993180536784, "learning_rate": 5.268618651203341e-06, "loss": 0.5444, "step": 17404 }, { "epoch": 0.5, "grad_norm": 4.244916484222402, "learning_rate": 5.268155561317252e-06, "loss": 0.299, "step": 17405 }, { "epoch": 0.5, "grad_norm": 8.921177611343948, "learning_rate": 5.267692469124255e-06, "loss": 0.5341, "step": 17406 }, { "epoch": 0.5, "grad_norm": 3.1555209875918475, "learning_rate": 5.267229374628332e-06, "loss": 0.3477, "step": 17407 }, { "epoch": 0.5, "grad_norm": 4.1849492688220415, "learning_rate": 5.266766277833468e-06, "loss": 0.2872, "step": 17408 }, { "epoch": 0.5, "grad_norm": 4.166329917015333, "learning_rate": 5.266303178743646e-06, "loss": 0.3161, "step": 17409 }, { "epoch": 0.5, "grad_norm": 5.30284365265653, "learning_rate": 5.26584007736285e-06, "loss": 0.1347, "step": 17410 }, { "epoch": 0.5, "grad_norm": 4.718978623668566, "learning_rate": 5.265376973695067e-06, "loss": 0.2461, "step": 17411 }, { "epoch": 0.5, "grad_norm": 3.5716892818251766, "learning_rate": 5.264913867744277e-06, "loss": 0.4589, "step": 17412 }, { "epoch": 0.5, "grad_norm": 6.089698383585647, "learning_rate": 5.264450759514466e-06, "loss": 0.6032, "step": 17413 }, { "epoch": 0.5, "grad_norm": 4.667237930868275, "learning_rate": 5.263987649009617e-06, "loss": 0.571, "step": 17414 }, { "epoch": 0.5, "grad_norm": 4.346831840778057, "learning_rate": 5.263524536233715e-06, "loss": 0.5882, "step": 17415 }, { "epoch": 0.5, "grad_norm": 2.1775980543227056, "learning_rate": 5.263061421190745e-06, "loss": 0.185, "step": 17416 }, { "epoch": 0.5, "grad_norm": 5.65891114750587, "learning_rate": 5.262598303884688e-06, "loss": 0.7771, "step": 17417 }, { "epoch": 0.5, "grad_norm": 7.837178490476617, "learning_rate": 5.262135184319531e-06, "loss": 0.9148, "step": 17418 }, { "epoch": 0.5, "grad_norm": 5.650120359168431, "learning_rate": 5.261672062499257e-06, "loss": 0.1996, "step": 17419 }, { "epoch": 0.5, "grad_norm": 4.781728558494915, "learning_rate": 5.2612089384278505e-06, "loss": 0.3514, "step": 17420 }, { "epoch": 0.5, "grad_norm": 5.401803541998511, "learning_rate": 5.260745812109297e-06, "loss": 0.6227, "step": 17421 }, { "epoch": 0.5, "grad_norm": 2.09529688613843, "learning_rate": 5.260282683547578e-06, "loss": 0.2442, "step": 17422 }, { "epoch": 0.5, "grad_norm": 5.412441038378802, "learning_rate": 5.25981955274668e-06, "loss": 0.7502, "step": 17423 }, { "epoch": 0.5, "grad_norm": 5.233546313833763, "learning_rate": 5.259356419710585e-06, "loss": 0.3908, "step": 17424 }, { "epoch": 0.5, "grad_norm": 2.8674302790911783, "learning_rate": 5.25889328444328e-06, "loss": 0.1472, "step": 17425 }, { "epoch": 0.5, "grad_norm": 3.6297852895883893, "learning_rate": 5.258430146948747e-06, "loss": 0.4109, "step": 17426 }, { "epoch": 0.5, "grad_norm": 2.459540585247587, "learning_rate": 5.257967007230972e-06, "loss": 0.4956, "step": 17427 }, { "epoch": 0.5, "grad_norm": 4.335736161789771, "learning_rate": 5.257503865293939e-06, "loss": 0.4073, "step": 17428 }, { "epoch": 0.5, "grad_norm": 4.476020340768449, "learning_rate": 5.257040721141631e-06, "loss": 0.8958, "step": 17429 }, { "epoch": 0.5, "grad_norm": 4.738275815704324, "learning_rate": 5.256577574778034e-06, "loss": 0.5841, "step": 17430 }, { "epoch": 0.5, "grad_norm": 4.783230290427274, "learning_rate": 5.256114426207132e-06, "loss": 0.2732, "step": 17431 }, { "epoch": 0.5, "grad_norm": 4.689308326325504, "learning_rate": 5.255651275432908e-06, "loss": 0.1695, "step": 17432 }, { "epoch": 0.5, "grad_norm": 7.0200678769386675, "learning_rate": 5.255188122459347e-06, "loss": 0.7939, "step": 17433 }, { "epoch": 0.5, "grad_norm": 7.997348167072839, "learning_rate": 5.254724967290435e-06, "loss": 0.7319, "step": 17434 }, { "epoch": 0.5, "grad_norm": 4.117231423073124, "learning_rate": 5.254261809930155e-06, "loss": 0.6562, "step": 17435 }, { "epoch": 0.5, "grad_norm": 4.9441249198872095, "learning_rate": 5.253798650382493e-06, "loss": 0.1185, "step": 17436 }, { "epoch": 0.5, "grad_norm": 4.2807294710131885, "learning_rate": 5.253335488651432e-06, "loss": 0.4633, "step": 17437 }, { "epoch": 0.5, "grad_norm": 2.0100976801262824, "learning_rate": 5.2528723247409574e-06, "loss": 0.2163, "step": 17438 }, { "epoch": 0.5, "grad_norm": 8.076787664514967, "learning_rate": 5.252409158655053e-06, "loss": 0.8147, "step": 17439 }, { "epoch": 0.5, "grad_norm": 4.477959027246014, "learning_rate": 5.251945990397703e-06, "loss": 0.3222, "step": 17440 }, { "epoch": 0.5, "grad_norm": 10.110413487673174, "learning_rate": 5.251482819972893e-06, "loss": 0.5857, "step": 17441 }, { "epoch": 0.5, "grad_norm": 3.8310516380747166, "learning_rate": 5.251019647384607e-06, "loss": 0.174, "step": 17442 }, { "epoch": 0.5, "grad_norm": 3.350091962833123, "learning_rate": 5.2505564726368305e-06, "loss": 0.1999, "step": 17443 }, { "epoch": 0.5, "grad_norm": 11.676805305177002, "learning_rate": 5.250093295733547e-06, "loss": 0.7115, "step": 17444 }, { "epoch": 0.5, "grad_norm": 6.7045237542317, "learning_rate": 5.249630116678744e-06, "loss": 0.5201, "step": 17445 }, { "epoch": 0.5, "grad_norm": 4.358330823752931, "learning_rate": 5.249166935476401e-06, "loss": 0.4194, "step": 17446 }, { "epoch": 0.5, "grad_norm": 4.766984064143156, "learning_rate": 5.248703752130506e-06, "loss": 0.4608, "step": 17447 }, { "epoch": 0.5, "grad_norm": 6.9293686923520195, "learning_rate": 5.248240566645044e-06, "loss": 0.6778, "step": 17448 }, { "epoch": 0.5, "grad_norm": 6.0880483313783715, "learning_rate": 5.247777379023998e-06, "loss": 0.8543, "step": 17449 }, { "epoch": 0.5, "grad_norm": 5.032895642422547, "learning_rate": 5.247314189271354e-06, "loss": 0.5066, "step": 17450 }, { "epoch": 0.5, "grad_norm": 6.564376925951492, "learning_rate": 5.246850997391098e-06, "loss": 0.6891, "step": 17451 }, { "epoch": 0.5, "grad_norm": 7.130686214270199, "learning_rate": 5.246387803387212e-06, "loss": 0.7859, "step": 17452 }, { "epoch": 0.5, "grad_norm": 4.343779707882512, "learning_rate": 5.245924607263681e-06, "loss": 0.3034, "step": 17453 }, { "epoch": 0.5, "grad_norm": 5.318322883786167, "learning_rate": 5.245461409024491e-06, "loss": 0.3587, "step": 17454 }, { "epoch": 0.5, "grad_norm": 7.339421201747462, "learning_rate": 5.244998208673629e-06, "loss": 0.5044, "step": 17455 }, { "epoch": 0.5, "grad_norm": 8.70504755943843, "learning_rate": 5.2445350062150766e-06, "loss": 0.4157, "step": 17456 }, { "epoch": 0.5, "grad_norm": 1.9236713016318443, "learning_rate": 5.244071801652819e-06, "loss": 0.24, "step": 17457 }, { "epoch": 0.5, "grad_norm": 4.739203077998891, "learning_rate": 5.243608594990842e-06, "loss": 0.2156, "step": 17458 }, { "epoch": 0.5, "grad_norm": 6.529088972400708, "learning_rate": 5.2431453862331305e-06, "loss": 0.4285, "step": 17459 }, { "epoch": 0.5, "grad_norm": 5.56072060916904, "learning_rate": 5.242682175383669e-06, "loss": 0.6025, "step": 17460 }, { "epoch": 0.5, "grad_norm": 5.591773722950784, "learning_rate": 5.242218962446443e-06, "loss": 0.8548, "step": 17461 }, { "epoch": 0.5, "grad_norm": 2.6099050862860014, "learning_rate": 5.241755747425437e-06, "loss": 0.2477, "step": 17462 }, { "epoch": 0.5, "grad_norm": 4.028118277367308, "learning_rate": 5.2412925303246355e-06, "loss": 0.4055, "step": 17463 }, { "epoch": 0.5, "grad_norm": 3.313914518815897, "learning_rate": 5.240829311148023e-06, "loss": 0.1937, "step": 17464 }, { "epoch": 0.5, "grad_norm": 4.759943642963842, "learning_rate": 5.240366089899589e-06, "loss": 0.2864, "step": 17465 }, { "epoch": 0.5, "grad_norm": 3.668931649827128, "learning_rate": 5.239902866583312e-06, "loss": 0.3781, "step": 17466 }, { "epoch": 0.5, "grad_norm": 7.289892151483533, "learning_rate": 5.23943964120318e-06, "loss": 0.7135, "step": 17467 }, { "epoch": 0.5, "grad_norm": 6.8503310033084475, "learning_rate": 5.23897641376318e-06, "loss": 0.7307, "step": 17468 }, { "epoch": 0.5, "grad_norm": 6.381265739996886, "learning_rate": 5.238513184267295e-06, "loss": 0.6206, "step": 17469 }, { "epoch": 0.5, "grad_norm": 3.547618939191549, "learning_rate": 5.238049952719511e-06, "loss": 0.4882, "step": 17470 }, { "epoch": 0.5, "grad_norm": 8.507829762450921, "learning_rate": 5.237586719123812e-06, "loss": 0.7502, "step": 17471 }, { "epoch": 0.5, "grad_norm": 4.928389775934815, "learning_rate": 5.2371234834841835e-06, "loss": 0.4306, "step": 17472 }, { "epoch": 0.5, "grad_norm": 5.395663132309097, "learning_rate": 5.236660245804611e-06, "loss": 0.3491, "step": 17473 }, { "epoch": 0.5, "grad_norm": 5.169708489660951, "learning_rate": 5.236197006089078e-06, "loss": 0.6083, "step": 17474 }, { "epoch": 0.5, "grad_norm": 7.852722811010829, "learning_rate": 5.235733764341574e-06, "loss": 0.755, "step": 17475 }, { "epoch": 0.5, "grad_norm": 6.181082007552409, "learning_rate": 5.235270520566079e-06, "loss": 0.5582, "step": 17476 }, { "epoch": 0.5, "grad_norm": 10.669022141099232, "learning_rate": 5.234807274766582e-06, "loss": 0.9807, "step": 17477 }, { "epoch": 0.5, "grad_norm": 3.078092313486429, "learning_rate": 5.234344026947068e-06, "loss": 0.199, "step": 17478 }, { "epoch": 0.5, "grad_norm": 5.79069176937503, "learning_rate": 5.2338807771115195e-06, "loss": 0.3408, "step": 17479 }, { "epoch": 0.5, "grad_norm": 5.163496093158975, "learning_rate": 5.233417525263924e-06, "loss": 0.1488, "step": 17480 }, { "epoch": 0.5, "grad_norm": 4.346105526927548, "learning_rate": 5.232954271408266e-06, "loss": 0.2601, "step": 17481 }, { "epoch": 0.5, "grad_norm": 6.186287770829482, "learning_rate": 5.232491015548532e-06, "loss": 0.3731, "step": 17482 }, { "epoch": 0.5, "grad_norm": 4.027964887255733, "learning_rate": 5.2320277576887045e-06, "loss": 0.5461, "step": 17483 }, { "epoch": 0.5, "grad_norm": 3.5528836378856785, "learning_rate": 5.231564497832772e-06, "loss": 0.5539, "step": 17484 }, { "epoch": 0.5, "grad_norm": 7.244219777735961, "learning_rate": 5.231101235984719e-06, "loss": 0.6893, "step": 17485 }, { "epoch": 0.5, "grad_norm": 4.82898558813912, "learning_rate": 5.23063797214853e-06, "loss": 1.1188, "step": 17486 }, { "epoch": 0.5, "grad_norm": 4.609421655451116, "learning_rate": 5.23017470632819e-06, "loss": 0.3894, "step": 17487 }, { "epoch": 0.5, "grad_norm": 3.010080151520483, "learning_rate": 5.229711438527687e-06, "loss": 0.1663, "step": 17488 }, { "epoch": 0.5, "grad_norm": 11.352117305673746, "learning_rate": 5.229248168751006e-06, "loss": 0.6776, "step": 17489 }, { "epoch": 0.5, "grad_norm": 6.6992536983190085, "learning_rate": 5.228784897002128e-06, "loss": 1.0093, "step": 17490 }, { "epoch": 0.5, "grad_norm": 5.88070977513868, "learning_rate": 5.228321623285043e-06, "loss": 0.343, "step": 17491 }, { "epoch": 0.5, "grad_norm": 2.9978383939243685, "learning_rate": 5.227858347603735e-06, "loss": 0.1883, "step": 17492 }, { "epoch": 0.5, "grad_norm": 12.609699306548213, "learning_rate": 5.22739506996219e-06, "loss": 0.4241, "step": 17493 }, { "epoch": 0.5, "grad_norm": 5.850795150579361, "learning_rate": 5.226931790364392e-06, "loss": 0.2537, "step": 17494 }, { "epoch": 0.5, "grad_norm": 6.2669065311194485, "learning_rate": 5.226468508814331e-06, "loss": 0.6275, "step": 17495 }, { "epoch": 0.5, "grad_norm": 2.7025669161251753, "learning_rate": 5.226005225315987e-06, "loss": 0.3957, "step": 17496 }, { "epoch": 0.5, "grad_norm": 3.9836205160824436, "learning_rate": 5.225541939873348e-06, "loss": 0.3871, "step": 17497 }, { "epoch": 0.5, "grad_norm": 6.6764365154211465, "learning_rate": 5.2250786524903984e-06, "loss": 0.4552, "step": 17498 }, { "epoch": 0.5, "grad_norm": 4.0434875450277366, "learning_rate": 5.224615363171126e-06, "loss": 0.4448, "step": 17499 }, { "epoch": 0.5, "grad_norm": 10.339412044654097, "learning_rate": 5.224152071919515e-06, "loss": 0.4291, "step": 17500 }, { "epoch": 0.5, "grad_norm": 6.3270156453112385, "learning_rate": 5.2236887787395515e-06, "loss": 0.3976, "step": 17501 }, { "epoch": 0.5, "grad_norm": 4.52878542994413, "learning_rate": 5.223225483635223e-06, "loss": 0.2881, "step": 17502 }, { "epoch": 0.5, "grad_norm": 6.999243763537142, "learning_rate": 5.222762186610511e-06, "loss": 1.0486, "step": 17503 }, { "epoch": 0.5, "grad_norm": 8.570328635076255, "learning_rate": 5.222298887669402e-06, "loss": 0.694, "step": 17504 }, { "epoch": 0.5, "grad_norm": 5.358157428308264, "learning_rate": 5.2218355868158865e-06, "loss": 0.3391, "step": 17505 }, { "epoch": 0.5, "grad_norm": 6.107799902605565, "learning_rate": 5.2213722840539445e-06, "loss": 0.777, "step": 17506 }, { "epoch": 0.5, "grad_norm": 2.5266707171287712, "learning_rate": 5.220908979387563e-06, "loss": 0.3414, "step": 17507 }, { "epoch": 0.5, "grad_norm": 8.10458097077153, "learning_rate": 5.2204456728207306e-06, "loss": 0.7445, "step": 17508 }, { "epoch": 0.5, "grad_norm": 7.836078311647633, "learning_rate": 5.219982364357431e-06, "loss": 0.5755, "step": 17509 }, { "epoch": 0.5, "grad_norm": 4.142972921059681, "learning_rate": 5.21951905400165e-06, "loss": 0.5779, "step": 17510 }, { "epoch": 0.5, "grad_norm": 7.452841827323359, "learning_rate": 5.219055741757373e-06, "loss": 0.3964, "step": 17511 }, { "epoch": 0.5, "grad_norm": 4.182941175725238, "learning_rate": 5.218592427628588e-06, "loss": 0.5004, "step": 17512 }, { "epoch": 0.5, "grad_norm": 7.086003884674308, "learning_rate": 5.218129111619278e-06, "loss": 0.7281, "step": 17513 }, { "epoch": 0.5, "grad_norm": 4.943576116192544, "learning_rate": 5.2176657937334305e-06, "loss": 0.6388, "step": 17514 }, { "epoch": 0.5, "grad_norm": 3.6322732668776094, "learning_rate": 5.21720247397503e-06, "loss": 0.3262, "step": 17515 }, { "epoch": 0.5, "grad_norm": 6.704116498981905, "learning_rate": 5.216739152348065e-06, "loss": 0.3932, "step": 17516 }, { "epoch": 0.5, "grad_norm": 5.2898104658588165, "learning_rate": 5.216275828856518e-06, "loss": 0.6705, "step": 17517 }, { "epoch": 0.5, "grad_norm": 3.273084962930316, "learning_rate": 5.215812503504377e-06, "loss": 0.3659, "step": 17518 }, { "epoch": 0.5, "grad_norm": 2.789807089354018, "learning_rate": 5.21534917629563e-06, "loss": 0.2345, "step": 17519 }, { "epoch": 0.5, "grad_norm": 4.2903223740889675, "learning_rate": 5.214885847234258e-06, "loss": 0.542, "step": 17520 }, { "epoch": 0.5, "grad_norm": 10.004414061051055, "learning_rate": 5.2144225163242505e-06, "loss": 0.3721, "step": 17521 }, { "epoch": 0.5, "grad_norm": 3.644262379564758, "learning_rate": 5.213959183569593e-06, "loss": 0.1632, "step": 17522 }, { "epoch": 0.5, "grad_norm": 7.010433527646093, "learning_rate": 5.21349584897427e-06, "loss": 1.1319, "step": 17523 }, { "epoch": 0.5, "grad_norm": 4.089808306317287, "learning_rate": 5.213032512542267e-06, "loss": 0.3179, "step": 17524 }, { "epoch": 0.5, "grad_norm": 8.093078703271015, "learning_rate": 5.212569174277575e-06, "loss": 0.8024, "step": 17525 }, { "epoch": 0.5, "grad_norm": 4.717393642291968, "learning_rate": 5.212105834184175e-06, "loss": 0.2942, "step": 17526 }, { "epoch": 0.5, "grad_norm": 6.1465796022944765, "learning_rate": 5.211642492266054e-06, "loss": 0.6194, "step": 17527 }, { "epoch": 0.5, "grad_norm": 3.334732374007861, "learning_rate": 5.211179148527199e-06, "loss": 0.2267, "step": 17528 }, { "epoch": 0.5, "grad_norm": 7.288533542162027, "learning_rate": 5.210715802971596e-06, "loss": 0.9764, "step": 17529 }, { "epoch": 0.5, "grad_norm": 4.362830962713165, "learning_rate": 5.2102524556032305e-06, "loss": 0.3324, "step": 17530 }, { "epoch": 0.5, "grad_norm": 5.13839942502751, "learning_rate": 5.2097891064260895e-06, "loss": 0.8616, "step": 17531 }, { "epoch": 0.5, "grad_norm": 4.849767822921915, "learning_rate": 5.209325755444159e-06, "loss": 0.5547, "step": 17532 }, { "epoch": 0.5, "grad_norm": 11.453060384154357, "learning_rate": 5.2088624026614235e-06, "loss": 0.5002, "step": 17533 }, { "epoch": 0.5, "grad_norm": 5.265824673542852, "learning_rate": 5.2083990480818716e-06, "loss": 0.4139, "step": 17534 }, { "epoch": 0.5, "grad_norm": 5.0189774619655, "learning_rate": 5.207935691709488e-06, "loss": 0.5445, "step": 17535 }, { "epoch": 0.5, "grad_norm": 6.2039078379437935, "learning_rate": 5.20747233354826e-06, "loss": 0.2791, "step": 17536 }, { "epoch": 0.5, "grad_norm": 5.532399462214318, "learning_rate": 5.207008973602172e-06, "loss": 0.6, "step": 17537 }, { "epoch": 0.5, "grad_norm": 7.777068427829747, "learning_rate": 5.206545611875211e-06, "loss": 0.6681, "step": 17538 }, { "epoch": 0.5, "grad_norm": 7.27670539189774, "learning_rate": 5.2060822483713655e-06, "loss": 0.3702, "step": 17539 }, { "epoch": 0.5, "grad_norm": 5.5754772067571805, "learning_rate": 5.205618883094617e-06, "loss": 0.4647, "step": 17540 }, { "epoch": 0.5, "grad_norm": 12.282791157475733, "learning_rate": 5.205155516048956e-06, "loss": 0.6208, "step": 17541 }, { "epoch": 0.5, "grad_norm": 10.020292482961352, "learning_rate": 5.2046921472383685e-06, "loss": 0.4929, "step": 17542 }, { "epoch": 0.5, "grad_norm": 4.790643676801034, "learning_rate": 5.204228776666838e-06, "loss": 0.2736, "step": 17543 }, { "epoch": 0.5, "grad_norm": 3.4033374408307324, "learning_rate": 5.203765404338353e-06, "loss": 0.4016, "step": 17544 }, { "epoch": 0.5, "grad_norm": 4.839245381285241, "learning_rate": 5.203302030256898e-06, "loss": 0.8015, "step": 17545 }, { "epoch": 0.5, "grad_norm": 8.873071380386587, "learning_rate": 5.202838654426463e-06, "loss": 0.6146, "step": 17546 }, { "epoch": 0.5, "grad_norm": 3.3624254083691305, "learning_rate": 5.202375276851032e-06, "loss": 0.3384, "step": 17547 }, { "epoch": 0.5, "grad_norm": 5.261703978293343, "learning_rate": 5.201911897534588e-06, "loss": 0.7052, "step": 17548 }, { "epoch": 0.5, "grad_norm": 4.60400579640399, "learning_rate": 5.201448516481125e-06, "loss": 0.5822, "step": 17549 }, { "epoch": 0.5, "grad_norm": 3.7733785810273828, "learning_rate": 5.200985133694623e-06, "loss": 0.2965, "step": 17550 }, { "epoch": 0.5, "grad_norm": 3.4582098552001077, "learning_rate": 5.200521749179071e-06, "loss": 0.4547, "step": 17551 }, { "epoch": 0.5, "grad_norm": 4.706738272132148, "learning_rate": 5.200058362938456e-06, "loss": 0.4723, "step": 17552 }, { "epoch": 0.5, "grad_norm": 6.0816994480383135, "learning_rate": 5.199594974976763e-06, "loss": 0.4402, "step": 17553 }, { "epoch": 0.5, "grad_norm": 4.608872596386881, "learning_rate": 5.199131585297977e-06, "loss": 0.2157, "step": 17554 }, { "epoch": 0.5, "grad_norm": 11.726914392283268, "learning_rate": 5.198668193906089e-06, "loss": 0.5173, "step": 17555 }, { "epoch": 0.5, "grad_norm": 5.192121377547445, "learning_rate": 5.198204800805084e-06, "loss": 0.3559, "step": 17556 }, { "epoch": 0.5, "grad_norm": 5.149617964268561, "learning_rate": 5.197741405998943e-06, "loss": 0.5594, "step": 17557 }, { "epoch": 0.5, "grad_norm": 4.773521360169542, "learning_rate": 5.19727800949166e-06, "loss": 0.5107, "step": 17558 }, { "epoch": 0.5, "grad_norm": 9.47441807673491, "learning_rate": 5.196814611287218e-06, "loss": 0.633, "step": 17559 }, { "epoch": 0.5, "grad_norm": 4.874645758253941, "learning_rate": 5.196351211389606e-06, "loss": 0.5867, "step": 17560 }, { "epoch": 0.5, "grad_norm": 3.348765951576416, "learning_rate": 5.195887809802807e-06, "loss": 0.3968, "step": 17561 }, { "epoch": 0.5, "grad_norm": 4.2465051979815565, "learning_rate": 5.195424406530809e-06, "loss": 0.2557, "step": 17562 }, { "epoch": 0.5, "grad_norm": 5.611288185389101, "learning_rate": 5.1949610015776e-06, "loss": 0.5725, "step": 17563 }, { "epoch": 0.5, "grad_norm": 5.322212585744892, "learning_rate": 5.1944975949471645e-06, "loss": 0.4258, "step": 17564 }, { "epoch": 0.5, "grad_norm": 4.432318550264417, "learning_rate": 5.19403418664349e-06, "loss": 0.4033, "step": 17565 }, { "epoch": 0.5, "grad_norm": 3.718787874301411, "learning_rate": 5.193570776670563e-06, "loss": 0.2975, "step": 17566 }, { "epoch": 0.5, "grad_norm": 7.293362411229518, "learning_rate": 5.193107365032372e-06, "loss": 0.6947, "step": 17567 }, { "epoch": 0.5, "grad_norm": 7.69398113681354, "learning_rate": 5.192643951732901e-06, "loss": 0.5855, "step": 17568 }, { "epoch": 0.5, "grad_norm": 3.7943854627746187, "learning_rate": 5.192180536776138e-06, "loss": 0.3965, "step": 17569 }, { "epoch": 0.5, "grad_norm": 3.8238000221634327, "learning_rate": 5.19171712016607e-06, "loss": 0.4228, "step": 17570 }, { "epoch": 0.5, "grad_norm": 7.0578133891106996, "learning_rate": 5.191253701906682e-06, "loss": 0.8379, "step": 17571 }, { "epoch": 0.5, "grad_norm": 2.4737694330208737, "learning_rate": 5.190790282001963e-06, "loss": 0.2479, "step": 17572 }, { "epoch": 0.5, "grad_norm": 21.28702678501646, "learning_rate": 5.190326860455899e-06, "loss": 0.3642, "step": 17573 }, { "epoch": 0.5, "grad_norm": 8.044310551861562, "learning_rate": 5.1898634372724756e-06, "loss": 0.6456, "step": 17574 }, { "epoch": 0.5, "grad_norm": 4.617668217195679, "learning_rate": 5.1894000124556794e-06, "loss": 0.46, "step": 17575 }, { "epoch": 0.5, "grad_norm": 3.1589057249351904, "learning_rate": 5.188936586009501e-06, "loss": 0.2663, "step": 17576 }, { "epoch": 0.5, "grad_norm": 4.272007621040404, "learning_rate": 5.188473157937923e-06, "loss": 0.3953, "step": 17577 }, { "epoch": 0.5, "grad_norm": 7.0410858023819385, "learning_rate": 5.188009728244933e-06, "loss": 0.3763, "step": 17578 }, { "epoch": 0.5, "grad_norm": 2.5983544121336455, "learning_rate": 5.1875462969345205e-06, "loss": 0.1635, "step": 17579 }, { "epoch": 0.5, "grad_norm": 5.9032302079687815, "learning_rate": 5.187082864010669e-06, "loss": 0.3161, "step": 17580 }, { "epoch": 0.5, "grad_norm": 6.504949959072071, "learning_rate": 5.1866194294773646e-06, "loss": 0.4485, "step": 17581 }, { "epoch": 0.5, "grad_norm": 7.16190923947433, "learning_rate": 5.186155993338598e-06, "loss": 0.9633, "step": 17582 }, { "epoch": 0.5, "grad_norm": 8.69633443607443, "learning_rate": 5.1856925555983555e-06, "loss": 0.5391, "step": 17583 }, { "epoch": 0.5, "grad_norm": 5.368839459456621, "learning_rate": 5.185229116260622e-06, "loss": 0.3406, "step": 17584 }, { "epoch": 0.5, "grad_norm": 5.514121653109011, "learning_rate": 5.184765675329384e-06, "loss": 0.2335, "step": 17585 }, { "epoch": 0.5, "grad_norm": 3.618152464978039, "learning_rate": 5.184302232808631e-06, "loss": 0.2636, "step": 17586 }, { "epoch": 0.5, "grad_norm": 4.310714462610766, "learning_rate": 5.183838788702349e-06, "loss": 0.3973, "step": 17587 }, { "epoch": 0.5, "grad_norm": 4.518563340844318, "learning_rate": 5.183375343014523e-06, "loss": 0.4876, "step": 17588 }, { "epoch": 0.5, "grad_norm": 8.954590043439696, "learning_rate": 5.182911895749142e-06, "loss": 0.9569, "step": 17589 }, { "epoch": 0.5, "grad_norm": 2.9275349758414126, "learning_rate": 5.182448446910192e-06, "loss": 0.3387, "step": 17590 }, { "epoch": 0.5, "grad_norm": 7.571312490894681, "learning_rate": 5.181984996501661e-06, "loss": 0.7149, "step": 17591 }, { "epoch": 0.5, "grad_norm": 4.648377644129468, "learning_rate": 5.181521544527537e-06, "loss": 0.3721, "step": 17592 }, { "epoch": 0.5, "grad_norm": 6.930505061385049, "learning_rate": 5.181058090991804e-06, "loss": 0.6397, "step": 17593 }, { "epoch": 0.5, "grad_norm": 5.825896340689116, "learning_rate": 5.1805946358984505e-06, "loss": 0.4345, "step": 17594 }, { "epoch": 0.5, "grad_norm": 5.861961994007591, "learning_rate": 5.180131179251463e-06, "loss": 0.6482, "step": 17595 }, { "epoch": 0.5, "grad_norm": 5.436849270172226, "learning_rate": 5.179667721054832e-06, "loss": 0.5457, "step": 17596 }, { "epoch": 0.5, "grad_norm": 5.60979772478478, "learning_rate": 5.1792042613125395e-06, "loss": 0.8213, "step": 17597 }, { "epoch": 0.5, "grad_norm": 7.199900999978046, "learning_rate": 5.178740800028574e-06, "loss": 0.3711, "step": 17598 }, { "epoch": 0.5, "grad_norm": 11.34190189660456, "learning_rate": 5.178277337206924e-06, "loss": 0.6398, "step": 17599 }, { "epoch": 0.5, "grad_norm": 6.260320820341825, "learning_rate": 5.177813872851576e-06, "loss": 0.2695, "step": 17600 }, { "epoch": 0.5, "grad_norm": 4.9178514292375395, "learning_rate": 5.1773504069665184e-06, "loss": 0.3648, "step": 17601 }, { "epoch": 0.5, "grad_norm": 4.449428402697704, "learning_rate": 5.1768869395557354e-06, "loss": 0.3998, "step": 17602 }, { "epoch": 0.5, "grad_norm": 9.506884238850718, "learning_rate": 5.176423470623217e-06, "loss": 0.5932, "step": 17603 }, { "epoch": 0.5, "grad_norm": 3.4641851338938974, "learning_rate": 5.1759600001729496e-06, "loss": 0.3766, "step": 17604 }, { "epoch": 0.5, "grad_norm": 6.977149622562306, "learning_rate": 5.175496528208919e-06, "loss": 0.6436, "step": 17605 }, { "epoch": 0.5, "grad_norm": 8.29406667764509, "learning_rate": 5.175033054735112e-06, "loss": 0.3648, "step": 17606 }, { "epoch": 0.5, "grad_norm": 7.614987350022547, "learning_rate": 5.17456957975552e-06, "loss": 0.3893, "step": 17607 }, { "epoch": 0.5, "grad_norm": 6.936176620592972, "learning_rate": 5.174106103274126e-06, "loss": 0.7874, "step": 17608 }, { "epoch": 0.5, "grad_norm": 5.076157408778444, "learning_rate": 5.173642625294919e-06, "loss": 0.4038, "step": 17609 }, { "epoch": 0.5, "grad_norm": 7.258521103939356, "learning_rate": 5.173179145821886e-06, "loss": 0.5661, "step": 17610 }, { "epoch": 0.5, "grad_norm": 4.484010475575225, "learning_rate": 5.172715664859014e-06, "loss": 0.5404, "step": 17611 }, { "epoch": 0.5, "grad_norm": 4.573628479541388, "learning_rate": 5.172252182410289e-06, "loss": 0.4283, "step": 17612 }, { "epoch": 0.5, "grad_norm": 3.7898303640141386, "learning_rate": 5.1717886984797015e-06, "loss": 0.176, "step": 17613 }, { "epoch": 0.5, "grad_norm": 3.822933210844713, "learning_rate": 5.171325213071235e-06, "loss": 0.1686, "step": 17614 }, { "epoch": 0.5, "grad_norm": 3.3741616514744464, "learning_rate": 5.17086172618888e-06, "loss": 0.3479, "step": 17615 }, { "epoch": 0.5, "grad_norm": 4.678182624253037, "learning_rate": 5.1703982378366245e-06, "loss": 0.1661, "step": 17616 }, { "epoch": 0.5, "grad_norm": 3.499894753644826, "learning_rate": 5.1699347480184514e-06, "loss": 0.2885, "step": 17617 }, { "epoch": 0.5, "grad_norm": 4.775265607387064, "learning_rate": 5.169471256738352e-06, "loss": 0.5804, "step": 17618 }, { "epoch": 0.5, "grad_norm": 2.9046686956772696, "learning_rate": 5.1690077640003115e-06, "loss": 0.2263, "step": 17619 }, { "epoch": 0.5, "grad_norm": 3.6646898460559583, "learning_rate": 5.168544269808319e-06, "loss": 0.3781, "step": 17620 }, { "epoch": 0.5, "grad_norm": 7.8084391301443805, "learning_rate": 5.1680807741663594e-06, "loss": 0.4667, "step": 17621 }, { "epoch": 0.5, "grad_norm": 3.467617056094031, "learning_rate": 5.167617277078423e-06, "loss": 0.4563, "step": 17622 }, { "epoch": 0.5, "grad_norm": 7.465220714814287, "learning_rate": 5.167153778548494e-06, "loss": 0.4409, "step": 17623 }, { "epoch": 0.5, "grad_norm": 5.110327535609603, "learning_rate": 5.166690278580563e-06, "loss": 0.7231, "step": 17624 }, { "epoch": 0.5, "grad_norm": 2.2764236921042422, "learning_rate": 5.1662267771786145e-06, "loss": 0.2772, "step": 17625 }, { "epoch": 0.5, "grad_norm": 6.953781816984737, "learning_rate": 5.16576327434664e-06, "loss": 0.3327, "step": 17626 }, { "epoch": 0.5, "grad_norm": 4.562965682837633, "learning_rate": 5.1652997700886235e-06, "loss": 0.2359, "step": 17627 }, { "epoch": 0.5, "grad_norm": 6.939171804431772, "learning_rate": 5.164836264408552e-06, "loss": 0.7273, "step": 17628 }, { "epoch": 0.5, "grad_norm": 6.4455468707387915, "learning_rate": 5.164372757310417e-06, "loss": 0.6966, "step": 17629 }, { "epoch": 0.5, "grad_norm": 8.532944954537559, "learning_rate": 5.163909248798203e-06, "loss": 0.8654, "step": 17630 }, { "epoch": 0.5, "grad_norm": 3.889685521199227, "learning_rate": 5.163445738875894e-06, "loss": 0.7037, "step": 17631 }, { "epoch": 0.5, "grad_norm": 7.869061728825454, "learning_rate": 5.1629822275474835e-06, "loss": 0.6816, "step": 17632 }, { "epoch": 0.5, "grad_norm": 4.8496824298200165, "learning_rate": 5.16251871481696e-06, "loss": 0.6354, "step": 17633 }, { "epoch": 0.5, "grad_norm": 5.094132882922537, "learning_rate": 5.162055200688305e-06, "loss": 0.2085, "step": 17634 }, { "epoch": 0.51, "grad_norm": 7.071243073148337, "learning_rate": 5.16159168516551e-06, "loss": 0.7882, "step": 17635 }, { "epoch": 0.51, "grad_norm": 5.691563422782587, "learning_rate": 5.1611281682525625e-06, "loss": 0.8241, "step": 17636 }, { "epoch": 0.51, "grad_norm": 5.679689347005803, "learning_rate": 5.160664649953448e-06, "loss": 0.4675, "step": 17637 }, { "epoch": 0.51, "grad_norm": 8.117204183317192, "learning_rate": 5.160201130272154e-06, "loss": 0.7742, "step": 17638 }, { "epoch": 0.51, "grad_norm": 3.929180893749209, "learning_rate": 5.159737609212672e-06, "loss": 0.3031, "step": 17639 }, { "epoch": 0.51, "grad_norm": 4.285586033900286, "learning_rate": 5.159274086778985e-06, "loss": 0.5395, "step": 17640 }, { "epoch": 0.51, "grad_norm": 9.18852248957262, "learning_rate": 5.158810562975084e-06, "loss": 0.4732, "step": 17641 }, { "epoch": 0.51, "grad_norm": 9.591351088020112, "learning_rate": 5.158347037804954e-06, "loss": 0.7181, "step": 17642 }, { "epoch": 0.51, "grad_norm": 7.298064255444875, "learning_rate": 5.1578835112725856e-06, "loss": 0.2574, "step": 17643 }, { "epoch": 0.51, "grad_norm": 10.860686518008706, "learning_rate": 5.157419983381964e-06, "loss": 0.6274, "step": 17644 }, { "epoch": 0.51, "grad_norm": 3.649447119657278, "learning_rate": 5.156956454137078e-06, "loss": 0.2949, "step": 17645 }, { "epoch": 0.51, "grad_norm": 2.7107407776090726, "learning_rate": 5.156492923541913e-06, "loss": 0.1448, "step": 17646 }, { "epoch": 0.51, "grad_norm": 5.410286427388054, "learning_rate": 5.15602939160046e-06, "loss": 0.6076, "step": 17647 }, { "epoch": 0.51, "grad_norm": 4.361217259912555, "learning_rate": 5.1555658583167055e-06, "loss": 0.5395, "step": 17648 }, { "epoch": 0.51, "grad_norm": 7.158964350298698, "learning_rate": 5.155102323694637e-06, "loss": 0.5829, "step": 17649 }, { "epoch": 0.51, "grad_norm": 12.062211463125845, "learning_rate": 5.154638787738243e-06, "loss": 0.574, "step": 17650 }, { "epoch": 0.51, "grad_norm": 5.986454057224548, "learning_rate": 5.15417525045151e-06, "loss": 0.3446, "step": 17651 }, { "epoch": 0.51, "grad_norm": 7.653920344703517, "learning_rate": 5.153711711838426e-06, "loss": 0.4539, "step": 17652 }, { "epoch": 0.51, "grad_norm": 3.988426019056821, "learning_rate": 5.153248171902981e-06, "loss": 0.1875, "step": 17653 }, { "epoch": 0.51, "grad_norm": 10.264250269888459, "learning_rate": 5.152784630649159e-06, "loss": 0.6815, "step": 17654 }, { "epoch": 0.51, "grad_norm": 9.171927300845079, "learning_rate": 5.1523210880809484e-06, "loss": 0.9618, "step": 17655 }, { "epoch": 0.51, "grad_norm": 79.78523271349228, "learning_rate": 5.15185754420234e-06, "loss": 1.0319, "step": 17656 }, { "epoch": 0.51, "grad_norm": 3.973131629610674, "learning_rate": 5.15139399901732e-06, "loss": 0.6245, "step": 17657 }, { "epoch": 0.51, "grad_norm": 4.498050638347182, "learning_rate": 5.150930452529875e-06, "loss": 0.4386, "step": 17658 }, { "epoch": 0.51, "grad_norm": 4.30246917004265, "learning_rate": 5.150466904743993e-06, "loss": 0.2335, "step": 17659 }, { "epoch": 0.51, "grad_norm": 3.1937307346009276, "learning_rate": 5.150003355663667e-06, "loss": 0.535, "step": 17660 }, { "epoch": 0.51, "grad_norm": 3.465078008359865, "learning_rate": 5.149539805292877e-06, "loss": 0.2562, "step": 17661 }, { "epoch": 0.51, "grad_norm": 6.625330071054199, "learning_rate": 5.149076253635616e-06, "loss": 0.6882, "step": 17662 }, { "epoch": 0.51, "grad_norm": 3.8580619647921712, "learning_rate": 5.148612700695869e-06, "loss": 0.5071, "step": 17663 }, { "epoch": 0.51, "grad_norm": 7.161427985849525, "learning_rate": 5.148149146477624e-06, "loss": 0.3923, "step": 17664 }, { "epoch": 0.51, "grad_norm": 7.25478054153753, "learning_rate": 5.1476855909848735e-06, "loss": 0.8442, "step": 17665 }, { "epoch": 0.51, "grad_norm": 5.405108292700781, "learning_rate": 5.1472220342216005e-06, "loss": 0.4418, "step": 17666 }, { "epoch": 0.51, "grad_norm": 4.983081566097294, "learning_rate": 5.146758476191795e-06, "loss": 0.3821, "step": 17667 }, { "epoch": 0.51, "grad_norm": 2.7848891548821952, "learning_rate": 5.146294916899444e-06, "loss": 0.2217, "step": 17668 }, { "epoch": 0.51, "grad_norm": 7.780100358350047, "learning_rate": 5.145831356348535e-06, "loss": 0.4872, "step": 17669 }, { "epoch": 0.51, "grad_norm": 4.701371255663447, "learning_rate": 5.145367794543058e-06, "loss": 0.4167, "step": 17670 }, { "epoch": 0.51, "grad_norm": 7.194040391033897, "learning_rate": 5.144904231486999e-06, "loss": 0.9349, "step": 17671 }, { "epoch": 0.51, "grad_norm": 8.272026366724903, "learning_rate": 5.144440667184346e-06, "loss": 1.0366, "step": 17672 }, { "epoch": 0.51, "grad_norm": 5.923930110990499, "learning_rate": 5.143977101639089e-06, "loss": 0.4336, "step": 17673 }, { "epoch": 0.51, "grad_norm": 4.874446397305678, "learning_rate": 5.143513534855215e-06, "loss": 0.7135, "step": 17674 }, { "epoch": 0.51, "grad_norm": 4.17176915866734, "learning_rate": 5.143049966836711e-06, "loss": 0.4175, "step": 17675 }, { "epoch": 0.51, "grad_norm": 4.440539272911708, "learning_rate": 5.142586397587566e-06, "loss": 0.4272, "step": 17676 }, { "epoch": 0.51, "grad_norm": 4.465718741797798, "learning_rate": 5.142122827111769e-06, "loss": 0.4001, "step": 17677 }, { "epoch": 0.51, "grad_norm": 3.732125534518046, "learning_rate": 5.141659255413305e-06, "loss": 0.3505, "step": 17678 }, { "epoch": 0.51, "grad_norm": 5.826998399759482, "learning_rate": 5.141195682496164e-06, "loss": 0.5607, "step": 17679 }, { "epoch": 0.51, "grad_norm": 6.164245641426884, "learning_rate": 5.140732108364333e-06, "loss": 0.4916, "step": 17680 }, { "epoch": 0.51, "grad_norm": 4.723711247765945, "learning_rate": 5.1402685330218025e-06, "loss": 0.3589, "step": 17681 }, { "epoch": 0.51, "grad_norm": 3.52503183499293, "learning_rate": 5.13980495647256e-06, "loss": 0.2271, "step": 17682 }, { "epoch": 0.51, "grad_norm": 4.47798562854541, "learning_rate": 5.139341378720591e-06, "loss": 0.5207, "step": 17683 }, { "epoch": 0.51, "grad_norm": 8.069711402450242, "learning_rate": 5.1388777997698865e-06, "loss": 0.5185, "step": 17684 }, { "epoch": 0.51, "grad_norm": 5.858625399642287, "learning_rate": 5.1384142196244325e-06, "loss": 0.5241, "step": 17685 }, { "epoch": 0.51, "grad_norm": 2.897370689640026, "learning_rate": 5.137950638288219e-06, "loss": 0.2871, "step": 17686 }, { "epoch": 0.51, "grad_norm": 6.7047308929966505, "learning_rate": 5.137487055765232e-06, "loss": 0.7007, "step": 17687 }, { "epoch": 0.51, "grad_norm": 7.082318947656239, "learning_rate": 5.1370234720594606e-06, "loss": 0.5648, "step": 17688 }, { "epoch": 0.51, "grad_norm": 4.8190267941034195, "learning_rate": 5.136559887174894e-06, "loss": 0.3809, "step": 17689 }, { "epoch": 0.51, "grad_norm": 3.4307735305117624, "learning_rate": 5.13609630111552e-06, "loss": 0.2445, "step": 17690 }, { "epoch": 0.51, "grad_norm": 3.177682286487993, "learning_rate": 5.135632713885325e-06, "loss": 0.3118, "step": 17691 }, { "epoch": 0.51, "grad_norm": 4.880258658308079, "learning_rate": 5.1351691254882985e-06, "loss": 0.4463, "step": 17692 }, { "epoch": 0.51, "grad_norm": 3.598528546048223, "learning_rate": 5.134705535928431e-06, "loss": 0.2348, "step": 17693 }, { "epoch": 0.51, "grad_norm": 8.16955673565464, "learning_rate": 5.134241945209706e-06, "loss": 0.6642, "step": 17694 }, { "epoch": 0.51, "grad_norm": 6.345185028276188, "learning_rate": 5.133778353336114e-06, "loss": 0.4078, "step": 17695 }, { "epoch": 0.51, "grad_norm": 4.606402896947039, "learning_rate": 5.133314760311643e-06, "loss": 0.6975, "step": 17696 }, { "epoch": 0.51, "grad_norm": 7.392599958944391, "learning_rate": 5.132851166140283e-06, "loss": 0.7082, "step": 17697 }, { "epoch": 0.51, "grad_norm": 3.867044866224175, "learning_rate": 5.1323875708260195e-06, "loss": 0.3102, "step": 17698 }, { "epoch": 0.51, "grad_norm": 4.494918709466492, "learning_rate": 5.131923974372842e-06, "loss": 0.3698, "step": 17699 }, { "epoch": 0.51, "grad_norm": 7.291897107070627, "learning_rate": 5.131460376784741e-06, "loss": 0.5462, "step": 17700 }, { "epoch": 0.51, "grad_norm": 5.672547712970202, "learning_rate": 5.1309967780657014e-06, "loss": 0.1287, "step": 17701 }, { "epoch": 0.51, "grad_norm": 8.559534603551441, "learning_rate": 5.130533178219711e-06, "loss": 0.4917, "step": 17702 }, { "epoch": 0.51, "grad_norm": 2.9083690813722223, "learning_rate": 5.130069577250761e-06, "loss": 0.4108, "step": 17703 }, { "epoch": 0.51, "grad_norm": 5.354497486190498, "learning_rate": 5.1296059751628406e-06, "loss": 0.5965, "step": 17704 }, { "epoch": 0.51, "grad_norm": 7.010357278815093, "learning_rate": 5.129142371959932e-06, "loss": 0.9589, "step": 17705 }, { "epoch": 0.51, "grad_norm": 10.54397995386548, "learning_rate": 5.128678767646029e-06, "loss": 0.7059, "step": 17706 }, { "epoch": 0.51, "grad_norm": 6.415731196725378, "learning_rate": 5.128215162225119e-06, "loss": 0.4038, "step": 17707 }, { "epoch": 0.51, "grad_norm": 8.257732582246918, "learning_rate": 5.127751555701189e-06, "loss": 0.5926, "step": 17708 }, { "epoch": 0.51, "grad_norm": 5.832000793663672, "learning_rate": 5.127287948078229e-06, "loss": 0.4713, "step": 17709 }, { "epoch": 0.51, "grad_norm": 7.811472710779632, "learning_rate": 5.126824339360227e-06, "loss": 0.4341, "step": 17710 }, { "epoch": 0.51, "grad_norm": 7.418432268442718, "learning_rate": 5.126360729551169e-06, "loss": 0.8328, "step": 17711 }, { "epoch": 0.51, "grad_norm": 4.712253190718652, "learning_rate": 5.125897118655046e-06, "loss": 0.3427, "step": 17712 }, { "epoch": 0.51, "grad_norm": 6.518469941806334, "learning_rate": 5.125433506675845e-06, "loss": 0.4892, "step": 17713 }, { "epoch": 0.51, "grad_norm": 5.088751768722358, "learning_rate": 5.1249698936175565e-06, "loss": 0.4352, "step": 17714 }, { "epoch": 0.51, "grad_norm": 5.139432867430221, "learning_rate": 5.124506279484166e-06, "loss": 0.6554, "step": 17715 }, { "epoch": 0.51, "grad_norm": 8.488638886572861, "learning_rate": 5.124042664279663e-06, "loss": 0.4783, "step": 17716 }, { "epoch": 0.51, "grad_norm": 8.722130889036428, "learning_rate": 5.123579048008038e-06, "loss": 0.5861, "step": 17717 }, { "epoch": 0.51, "grad_norm": 9.216526810847897, "learning_rate": 5.123115430673276e-06, "loss": 1.1229, "step": 17718 }, { "epoch": 0.51, "grad_norm": 8.947152320862205, "learning_rate": 5.122651812279369e-06, "loss": 0.7894, "step": 17719 }, { "epoch": 0.51, "grad_norm": 5.041139018083105, "learning_rate": 5.122188192830302e-06, "loss": 0.4417, "step": 17720 }, { "epoch": 0.51, "grad_norm": 4.2405457806197955, "learning_rate": 5.121724572330064e-06, "loss": 0.2609, "step": 17721 }, { "epoch": 0.51, "grad_norm": 6.848136889339543, "learning_rate": 5.121260950782647e-06, "loss": 0.6335, "step": 17722 }, { "epoch": 0.51, "grad_norm": 5.016478682742, "learning_rate": 5.1207973281920366e-06, "loss": 0.4522, "step": 17723 }, { "epoch": 0.51, "grad_norm": 7.1635139613087055, "learning_rate": 5.12033370456222e-06, "loss": 0.6164, "step": 17724 }, { "epoch": 0.51, "grad_norm": 4.373327889385751, "learning_rate": 5.119870079897189e-06, "loss": 0.5146, "step": 17725 }, { "epoch": 0.51, "grad_norm": 5.63319622113703, "learning_rate": 5.119406454200929e-06, "loss": 0.3085, "step": 17726 }, { "epoch": 0.51, "grad_norm": 4.167898733450097, "learning_rate": 5.1189428274774325e-06, "loss": 0.5778, "step": 17727 }, { "epoch": 0.51, "grad_norm": 5.779568731856844, "learning_rate": 5.118479199730683e-06, "loss": 0.5776, "step": 17728 }, { "epoch": 0.51, "grad_norm": 4.693044206607359, "learning_rate": 5.118015570964671e-06, "loss": 0.4273, "step": 17729 }, { "epoch": 0.51, "grad_norm": 5.672443960357691, "learning_rate": 5.117551941183388e-06, "loss": 0.4104, "step": 17730 }, { "epoch": 0.51, "grad_norm": 6.565758032633461, "learning_rate": 5.117088310390818e-06, "loss": 0.5929, "step": 17731 }, { "epoch": 0.51, "grad_norm": 5.188115416640757, "learning_rate": 5.116624678590952e-06, "loss": 0.3067, "step": 17732 }, { "epoch": 0.51, "grad_norm": 8.254423978268422, "learning_rate": 5.11616104578778e-06, "loss": 0.5718, "step": 17733 }, { "epoch": 0.51, "grad_norm": 5.603003110480003, "learning_rate": 5.115697411985287e-06, "loss": 0.3559, "step": 17734 }, { "epoch": 0.51, "grad_norm": 7.477594557495562, "learning_rate": 5.115233777187465e-06, "loss": 0.4498, "step": 17735 }, { "epoch": 0.51, "grad_norm": 5.546712222867424, "learning_rate": 5.1147701413983e-06, "loss": 0.8127, "step": 17736 }, { "epoch": 0.51, "grad_norm": 6.7423037699092445, "learning_rate": 5.114306504621781e-06, "loss": 0.8953, "step": 17737 }, { "epoch": 0.51, "grad_norm": 7.0934198781639966, "learning_rate": 5.113842866861898e-06, "loss": 0.6162, "step": 17738 }, { "epoch": 0.51, "grad_norm": 2.7349628143641906, "learning_rate": 5.113379228122639e-06, "loss": 0.1781, "step": 17739 }, { "epoch": 0.51, "grad_norm": 4.817552663676585, "learning_rate": 5.112915588407993e-06, "loss": 0.6121, "step": 17740 }, { "epoch": 0.51, "grad_norm": 3.564819367287955, "learning_rate": 5.112451947721947e-06, "loss": 0.1302, "step": 17741 }, { "epoch": 0.51, "grad_norm": 7.012196474085299, "learning_rate": 5.11198830606849e-06, "loss": 0.4737, "step": 17742 }, { "epoch": 0.51, "grad_norm": 6.4168654159217215, "learning_rate": 5.111524663451614e-06, "loss": 0.6606, "step": 17743 }, { "epoch": 0.51, "grad_norm": 4.0375278147161575, "learning_rate": 5.111061019875303e-06, "loss": 0.4403, "step": 17744 }, { "epoch": 0.51, "grad_norm": 11.427097864179434, "learning_rate": 5.110597375343549e-06, "loss": 0.9049, "step": 17745 }, { "epoch": 0.51, "grad_norm": 5.886544997720893, "learning_rate": 5.110133729860337e-06, "loss": 0.4125, "step": 17746 }, { "epoch": 0.51, "grad_norm": 3.8533046411372234, "learning_rate": 5.109670083429661e-06, "loss": 0.2834, "step": 17747 }, { "epoch": 0.51, "grad_norm": 4.708626023481474, "learning_rate": 5.1092064360555046e-06, "loss": 0.689, "step": 17748 }, { "epoch": 0.51, "grad_norm": 12.027125615232228, "learning_rate": 5.10874278774186e-06, "loss": 0.3115, "step": 17749 }, { "epoch": 0.51, "grad_norm": 10.561501179255902, "learning_rate": 5.108279138492713e-06, "loss": 0.7658, "step": 17750 }, { "epoch": 0.51, "grad_norm": 4.687415719864076, "learning_rate": 5.107815488312057e-06, "loss": 0.4026, "step": 17751 }, { "epoch": 0.51, "grad_norm": 5.452820209194061, "learning_rate": 5.107351837203874e-06, "loss": 0.3839, "step": 17752 }, { "epoch": 0.51, "grad_norm": 7.678425589155516, "learning_rate": 5.106888185172159e-06, "loss": 0.3481, "step": 17753 }, { "epoch": 0.51, "grad_norm": 4.10834197197574, "learning_rate": 5.1064245322208964e-06, "loss": 0.3518, "step": 17754 }, { "epoch": 0.51, "grad_norm": 7.21235558482596, "learning_rate": 5.105960878354077e-06, "loss": 0.7129, "step": 17755 }, { "epoch": 0.51, "grad_norm": 5.482059000615596, "learning_rate": 5.10549722357569e-06, "loss": 0.4494, "step": 17756 }, { "epoch": 0.51, "grad_norm": 6.79954869090993, "learning_rate": 5.105033567889723e-06, "loss": 0.3855, "step": 17757 }, { "epoch": 0.51, "grad_norm": 5.193687976599943, "learning_rate": 5.104569911300166e-06, "loss": 0.3861, "step": 17758 }, { "epoch": 0.51, "grad_norm": 6.089260540747576, "learning_rate": 5.104106253811005e-06, "loss": 0.1903, "step": 17759 }, { "epoch": 0.51, "grad_norm": 2.2572825116680573, "learning_rate": 5.103642595426232e-06, "loss": 0.3558, "step": 17760 }, { "epoch": 0.51, "grad_norm": 3.0965025823179837, "learning_rate": 5.103178936149835e-06, "loss": 0.3141, "step": 17761 }, { "epoch": 0.51, "grad_norm": 2.7755265440869876, "learning_rate": 5.1027152759858e-06, "loss": 0.2171, "step": 17762 }, { "epoch": 0.51, "grad_norm": 3.2355507082089185, "learning_rate": 5.10225161493812e-06, "loss": 0.2709, "step": 17763 }, { "epoch": 0.51, "grad_norm": 5.099945957234093, "learning_rate": 5.1017879530107826e-06, "loss": 0.444, "step": 17764 }, { "epoch": 0.51, "grad_norm": 3.467669997610218, "learning_rate": 5.101324290207775e-06, "loss": 0.4162, "step": 17765 }, { "epoch": 0.51, "grad_norm": 9.241606641688563, "learning_rate": 5.100860626533086e-06, "loss": 0.7091, "step": 17766 }, { "epoch": 0.51, "grad_norm": 3.7271578011377398, "learning_rate": 5.100396961990708e-06, "loss": 0.3321, "step": 17767 }, { "epoch": 0.51, "grad_norm": 5.937121379224497, "learning_rate": 5.099933296584626e-06, "loss": 0.5427, "step": 17768 }, { "epoch": 0.51, "grad_norm": 6.437614847288364, "learning_rate": 5.099469630318829e-06, "loss": 0.2031, "step": 17769 }, { "epoch": 0.51, "grad_norm": 2.7168053435598827, "learning_rate": 5.099005963197307e-06, "loss": 0.1719, "step": 17770 }, { "epoch": 0.51, "grad_norm": 4.705418176583481, "learning_rate": 5.0985422952240506e-06, "loss": 0.2082, "step": 17771 }, { "epoch": 0.51, "grad_norm": 9.532055029719437, "learning_rate": 5.098078626403046e-06, "loss": 0.5111, "step": 17772 }, { "epoch": 0.51, "grad_norm": 3.255137198115979, "learning_rate": 5.097614956738284e-06, "loss": 0.3637, "step": 17773 }, { "epoch": 0.51, "grad_norm": 4.558761319020107, "learning_rate": 5.097151286233752e-06, "loss": 0.3376, "step": 17774 }, { "epoch": 0.51, "grad_norm": 8.613780395161898, "learning_rate": 5.0966876148934385e-06, "loss": 0.7569, "step": 17775 }, { "epoch": 0.51, "grad_norm": 8.960020435769755, "learning_rate": 5.096223942721334e-06, "loss": 0.8704, "step": 17776 }, { "epoch": 0.51, "grad_norm": 3.6078064222520045, "learning_rate": 5.095760269721427e-06, "loss": 0.2931, "step": 17777 }, { "epoch": 0.51, "grad_norm": 4.9477586285967545, "learning_rate": 5.095296595897707e-06, "loss": 0.544, "step": 17778 }, { "epoch": 0.51, "grad_norm": 3.0481092056262558, "learning_rate": 5.09483292125416e-06, "loss": 0.3824, "step": 17779 }, { "epoch": 0.51, "grad_norm": 5.131597598681615, "learning_rate": 5.094369245794778e-06, "loss": 0.615, "step": 17780 }, { "epoch": 0.51, "grad_norm": 2.333011108242819, "learning_rate": 5.093905569523551e-06, "loss": 0.1889, "step": 17781 }, { "epoch": 0.51, "grad_norm": 4.725673383836658, "learning_rate": 5.093441892444463e-06, "loss": 0.1387, "step": 17782 }, { "epoch": 0.51, "grad_norm": 4.1376849406879685, "learning_rate": 5.092978214561507e-06, "loss": 0.4792, "step": 17783 }, { "epoch": 0.51, "grad_norm": 4.618751609502572, "learning_rate": 5.092514535878672e-06, "loss": 0.205, "step": 17784 }, { "epoch": 0.51, "grad_norm": 8.185171946570327, "learning_rate": 5.092050856399945e-06, "loss": 0.6503, "step": 17785 }, { "epoch": 0.51, "grad_norm": 2.0394029734987593, "learning_rate": 5.0915871761293146e-06, "loss": 0.1533, "step": 17786 }, { "epoch": 0.51, "grad_norm": 4.78813782270353, "learning_rate": 5.091123495070773e-06, "loss": 0.3926, "step": 17787 }, { "epoch": 0.51, "grad_norm": 6.857334259177531, "learning_rate": 5.090659813228305e-06, "loss": 0.8188, "step": 17788 }, { "epoch": 0.51, "grad_norm": 6.52083805396871, "learning_rate": 5.090196130605903e-06, "loss": 0.7727, "step": 17789 }, { "epoch": 0.51, "grad_norm": 7.577200418287354, "learning_rate": 5.089732447207555e-06, "loss": 0.5484, "step": 17790 }, { "epoch": 0.51, "grad_norm": 8.753877162246686, "learning_rate": 5.089268763037251e-06, "loss": 0.8774, "step": 17791 }, { "epoch": 0.51, "grad_norm": 5.08776541352647, "learning_rate": 5.088805078098976e-06, "loss": 0.6832, "step": 17792 }, { "epoch": 0.51, "grad_norm": 3.700143088331889, "learning_rate": 5.0883413923967226e-06, "loss": 0.0963, "step": 17793 }, { "epoch": 0.51, "grad_norm": 6.383584693694049, "learning_rate": 5.087877705934482e-06, "loss": 0.9091, "step": 17794 }, { "epoch": 0.51, "grad_norm": 5.85870127555717, "learning_rate": 5.087414018716235e-06, "loss": 0.4175, "step": 17795 }, { "epoch": 0.51, "grad_norm": 6.529456152358238, "learning_rate": 5.08695033074598e-06, "loss": 0.7013, "step": 17796 }, { "epoch": 0.51, "grad_norm": 6.706173361476336, "learning_rate": 5.086486642027701e-06, "loss": 0.5216, "step": 17797 }, { "epoch": 0.51, "grad_norm": 4.291511267407144, "learning_rate": 5.086022952565389e-06, "loss": 0.42, "step": 17798 }, { "epoch": 0.51, "grad_norm": 5.4134911790315945, "learning_rate": 5.0855592623630314e-06, "loss": 0.5513, "step": 17799 }, { "epoch": 0.51, "grad_norm": 4.529140060098658, "learning_rate": 5.0850955714246176e-06, "loss": 0.7724, "step": 17800 }, { "epoch": 0.51, "grad_norm": 4.4597312244974, "learning_rate": 5.0846318797541385e-06, "loss": 0.2059, "step": 17801 }, { "epoch": 0.51, "grad_norm": 5.9131585945938685, "learning_rate": 5.08416818735558e-06, "loss": 0.5567, "step": 17802 }, { "epoch": 0.51, "grad_norm": 6.301006353322119, "learning_rate": 5.083704494232932e-06, "loss": 0.4615, "step": 17803 }, { "epoch": 0.51, "grad_norm": 3.037684993840422, "learning_rate": 5.083240800390187e-06, "loss": 0.2313, "step": 17804 }, { "epoch": 0.51, "grad_norm": 4.487513304666757, "learning_rate": 5.08277710583133e-06, "loss": 0.436, "step": 17805 }, { "epoch": 0.51, "grad_norm": 4.6357195837363, "learning_rate": 5.082313410560352e-06, "loss": 0.5931, "step": 17806 }, { "epoch": 0.51, "grad_norm": 8.647087756849245, "learning_rate": 5.081849714581243e-06, "loss": 0.71, "step": 17807 }, { "epoch": 0.51, "grad_norm": 8.737706894942253, "learning_rate": 5.081386017897991e-06, "loss": 0.9724, "step": 17808 }, { "epoch": 0.51, "grad_norm": 6.88883180705728, "learning_rate": 5.080922320514584e-06, "loss": 0.4496, "step": 17809 }, { "epoch": 0.51, "grad_norm": 7.1567432949842145, "learning_rate": 5.080458622435012e-06, "loss": 0.4694, "step": 17810 }, { "epoch": 0.51, "grad_norm": 6.5416929726365325, "learning_rate": 5.079994923663265e-06, "loss": 0.7631, "step": 17811 }, { "epoch": 0.51, "grad_norm": 2.7178592757425593, "learning_rate": 5.079531224203331e-06, "loss": 0.1663, "step": 17812 }, { "epoch": 0.51, "grad_norm": 5.384394220903466, "learning_rate": 5.0790675240592005e-06, "loss": 0.2842, "step": 17813 }, { "epoch": 0.51, "grad_norm": 3.795339265919796, "learning_rate": 5.078603823234862e-06, "loss": 0.1761, "step": 17814 }, { "epoch": 0.51, "grad_norm": 5.60647235884436, "learning_rate": 5.078140121734304e-06, "loss": 0.6695, "step": 17815 }, { "epoch": 0.51, "grad_norm": 8.04277165056995, "learning_rate": 5.0776764195615166e-06, "loss": 0.7899, "step": 17816 }, { "epoch": 0.51, "grad_norm": 3.682804965762887, "learning_rate": 5.077212716720487e-06, "loss": 0.3278, "step": 17817 }, { "epoch": 0.51, "grad_norm": 5.441978145212005, "learning_rate": 5.076749013215209e-06, "loss": 0.7634, "step": 17818 }, { "epoch": 0.51, "grad_norm": 6.731836712414638, "learning_rate": 5.076285309049667e-06, "loss": 0.6899, "step": 17819 }, { "epoch": 0.51, "grad_norm": 5.5494432333312895, "learning_rate": 5.07582160422785e-06, "loss": 0.5526, "step": 17820 }, { "epoch": 0.51, "grad_norm": 4.108304395567297, "learning_rate": 5.075357898753751e-06, "loss": 0.3464, "step": 17821 }, { "epoch": 0.51, "grad_norm": 5.2162303666952665, "learning_rate": 5.074894192631357e-06, "loss": 0.3999, "step": 17822 }, { "epoch": 0.51, "grad_norm": 7.6565012053715, "learning_rate": 5.074430485864657e-06, "loss": 0.6554, "step": 17823 }, { "epoch": 0.51, "grad_norm": 5.722174322777586, "learning_rate": 5.073966778457642e-06, "loss": 0.4879, "step": 17824 }, { "epoch": 0.51, "grad_norm": 12.711468779862235, "learning_rate": 5.073503070414298e-06, "loss": 0.5043, "step": 17825 }, { "epoch": 0.51, "grad_norm": 2.8850438649497203, "learning_rate": 5.073039361738617e-06, "loss": 0.2528, "step": 17826 }, { "epoch": 0.51, "grad_norm": 6.580304579830806, "learning_rate": 5.072575652434588e-06, "loss": 0.4976, "step": 17827 }, { "epoch": 0.51, "grad_norm": 6.08657339982813, "learning_rate": 5.072111942506199e-06, "loss": 0.4047, "step": 17828 }, { "epoch": 0.51, "grad_norm": 5.0707066269619965, "learning_rate": 5.07164823195744e-06, "loss": 0.4809, "step": 17829 }, { "epoch": 0.51, "grad_norm": 4.7281397812354475, "learning_rate": 5.0711845207923e-06, "loss": 0.3708, "step": 17830 }, { "epoch": 0.51, "grad_norm": 3.835841035892483, "learning_rate": 5.070720809014769e-06, "loss": 0.5525, "step": 17831 }, { "epoch": 0.51, "grad_norm": 3.4418350368276522, "learning_rate": 5.0702570966288345e-06, "loss": 0.3256, "step": 17832 }, { "epoch": 0.51, "grad_norm": 6.762071129813753, "learning_rate": 5.069793383638487e-06, "loss": 1.0121, "step": 17833 }, { "epoch": 0.51, "grad_norm": 3.60821128019159, "learning_rate": 5.069329670047716e-06, "loss": 0.2374, "step": 17834 }, { "epoch": 0.51, "grad_norm": 3.2248331196804436, "learning_rate": 5.068865955860511e-06, "loss": 0.1907, "step": 17835 }, { "epoch": 0.51, "grad_norm": 4.34775572574737, "learning_rate": 5.068402241080859e-06, "loss": 0.4179, "step": 17836 }, { "epoch": 0.51, "grad_norm": 5.708245975169352, "learning_rate": 5.067938525712751e-06, "loss": 0.5971, "step": 17837 }, { "epoch": 0.51, "grad_norm": 6.23494375593549, "learning_rate": 5.067474809760179e-06, "loss": 0.7212, "step": 17838 }, { "epoch": 0.51, "grad_norm": 4.605913990037619, "learning_rate": 5.067011093227126e-06, "loss": 0.3354, "step": 17839 }, { "epoch": 0.51, "grad_norm": 3.525257968255951, "learning_rate": 5.066547376117587e-06, "loss": 0.5197, "step": 17840 }, { "epoch": 0.51, "grad_norm": 8.342895292588667, "learning_rate": 5.06608365843555e-06, "loss": 0.6532, "step": 17841 }, { "epoch": 0.51, "grad_norm": 3.708762083377832, "learning_rate": 5.0656199401850015e-06, "loss": 0.6412, "step": 17842 }, { "epoch": 0.51, "grad_norm": 7.17799432192982, "learning_rate": 5.065156221369934e-06, "loss": 0.3053, "step": 17843 }, { "epoch": 0.51, "grad_norm": 7.4366983414418115, "learning_rate": 5.0646925019943334e-06, "loss": 0.3468, "step": 17844 }, { "epoch": 0.51, "grad_norm": 8.362162358833434, "learning_rate": 5.064228782062193e-06, "loss": 0.8598, "step": 17845 }, { "epoch": 0.51, "grad_norm": 5.062069344746809, "learning_rate": 5.063765061577499e-06, "loss": 0.3481, "step": 17846 }, { "epoch": 0.51, "grad_norm": 8.432187194993864, "learning_rate": 5.063301340544243e-06, "loss": 0.6015, "step": 17847 }, { "epoch": 0.51, "grad_norm": 7.1378042714054954, "learning_rate": 5.0628376189664145e-06, "loss": 0.5641, "step": 17848 }, { "epoch": 0.51, "grad_norm": 3.378600919301585, "learning_rate": 5.062373896848e-06, "loss": 0.328, "step": 17849 }, { "epoch": 0.51, "grad_norm": 9.969215071302173, "learning_rate": 5.06191017419299e-06, "loss": 0.564, "step": 17850 }, { "epoch": 0.51, "grad_norm": 4.3633281462769435, "learning_rate": 5.061446451005375e-06, "loss": 0.2587, "step": 17851 }, { "epoch": 0.51, "grad_norm": 9.021442143171475, "learning_rate": 5.060982727289144e-06, "loss": 1.2556, "step": 17852 }, { "epoch": 0.51, "grad_norm": 8.722634164702784, "learning_rate": 5.0605190030482865e-06, "loss": 0.3283, "step": 17853 }, { "epoch": 0.51, "grad_norm": 6.5516545186220885, "learning_rate": 5.060055278286792e-06, "loss": 0.5198, "step": 17854 }, { "epoch": 0.51, "grad_norm": 3.975402003179495, "learning_rate": 5.059591553008648e-06, "loss": 0.4006, "step": 17855 }, { "epoch": 0.51, "grad_norm": 4.6371128136570094, "learning_rate": 5.059127827217846e-06, "loss": 0.5734, "step": 17856 }, { "epoch": 0.51, "grad_norm": 5.758504797466214, "learning_rate": 5.0586641009183745e-06, "loss": 0.8919, "step": 17857 }, { "epoch": 0.51, "grad_norm": 4.237192687422425, "learning_rate": 5.058200374114224e-06, "loss": 0.7177, "step": 17858 }, { "epoch": 0.51, "grad_norm": 7.931565987715577, "learning_rate": 5.0577366468093815e-06, "loss": 0.7542, "step": 17859 }, { "epoch": 0.51, "grad_norm": 6.1173838538838865, "learning_rate": 5.057272919007837e-06, "loss": 0.673, "step": 17860 }, { "epoch": 0.51, "grad_norm": 8.608117428854309, "learning_rate": 5.0568091907135805e-06, "loss": 0.5903, "step": 17861 }, { "epoch": 0.51, "grad_norm": 4.8867728568054, "learning_rate": 5.056345461930603e-06, "loss": 0.713, "step": 17862 }, { "epoch": 0.51, "grad_norm": 6.359963054789593, "learning_rate": 5.055881732662892e-06, "loss": 0.4116, "step": 17863 }, { "epoch": 0.51, "grad_norm": 4.918320259215609, "learning_rate": 5.055418002914437e-06, "loss": 0.5633, "step": 17864 }, { "epoch": 0.51, "grad_norm": 13.207064466369795, "learning_rate": 5.054954272689229e-06, "loss": 0.7419, "step": 17865 }, { "epoch": 0.51, "grad_norm": 6.369084362353557, "learning_rate": 5.054490541991255e-06, "loss": 0.3768, "step": 17866 }, { "epoch": 0.51, "grad_norm": 4.1354118494474985, "learning_rate": 5.054026810824505e-06, "loss": 0.2816, "step": 17867 }, { "epoch": 0.51, "grad_norm": 4.477693930706744, "learning_rate": 5.053563079192972e-06, "loss": 0.5347, "step": 17868 }, { "epoch": 0.51, "grad_norm": 7.963448110812215, "learning_rate": 5.053099347100639e-06, "loss": 0.6915, "step": 17869 }, { "epoch": 0.51, "grad_norm": 1.944132588749135, "learning_rate": 5.0526356145514984e-06, "loss": 0.1587, "step": 17870 }, { "epoch": 0.51, "grad_norm": 7.407257912858013, "learning_rate": 5.052171881549544e-06, "loss": 0.2986, "step": 17871 }, { "epoch": 0.51, "grad_norm": 6.676120238293887, "learning_rate": 5.051708148098759e-06, "loss": 0.4607, "step": 17872 }, { "epoch": 0.51, "grad_norm": 5.4517568713781746, "learning_rate": 5.051244414203133e-06, "loss": 0.2222, "step": 17873 }, { "epoch": 0.51, "grad_norm": 8.331511934230756, "learning_rate": 5.05078067986666e-06, "loss": 0.5884, "step": 17874 }, { "epoch": 0.51, "grad_norm": 6.853620613709413, "learning_rate": 5.050316945093328e-06, "loss": 0.6444, "step": 17875 }, { "epoch": 0.51, "grad_norm": 6.533208690236092, "learning_rate": 5.049853209887124e-06, "loss": 0.5395, "step": 17876 }, { "epoch": 0.51, "grad_norm": 9.298686208525394, "learning_rate": 5.049389474252039e-06, "loss": 0.8752, "step": 17877 }, { "epoch": 0.51, "grad_norm": 7.817861319579838, "learning_rate": 5.048925738192063e-06, "loss": 0.857, "step": 17878 }, { "epoch": 0.51, "grad_norm": 5.804922394344649, "learning_rate": 5.0484620017111855e-06, "loss": 0.5073, "step": 17879 }, { "epoch": 0.51, "grad_norm": 5.596684075298729, "learning_rate": 5.047998264813394e-06, "loss": 0.218, "step": 17880 }, { "epoch": 0.51, "grad_norm": 5.802933685978248, "learning_rate": 5.04753452750268e-06, "loss": 0.5241, "step": 17881 }, { "epoch": 0.51, "grad_norm": 6.212364560652268, "learning_rate": 5.047070789783033e-06, "loss": 0.4534, "step": 17882 }, { "epoch": 0.51, "grad_norm": 3.480993605919335, "learning_rate": 5.046607051658441e-06, "loss": 0.5044, "step": 17883 }, { "epoch": 0.51, "grad_norm": 6.489567380572245, "learning_rate": 5.046143313132894e-06, "loss": 0.7733, "step": 17884 }, { "epoch": 0.51, "grad_norm": 6.5362788648144905, "learning_rate": 5.0456795742103814e-06, "loss": 0.5088, "step": 17885 }, { "epoch": 0.51, "grad_norm": 23.147718023782758, "learning_rate": 5.045215834894893e-06, "loss": 0.5134, "step": 17886 }, { "epoch": 0.51, "grad_norm": 4.60353338912236, "learning_rate": 5.04475209519042e-06, "loss": 0.4152, "step": 17887 }, { "epoch": 0.51, "grad_norm": 6.134802933864584, "learning_rate": 5.044288355100951e-06, "loss": 0.5209, "step": 17888 }, { "epoch": 0.51, "grad_norm": 9.318604382324498, "learning_rate": 5.043824614630472e-06, "loss": 0.8044, "step": 17889 }, { "epoch": 0.51, "grad_norm": 7.312180927847028, "learning_rate": 5.043360873782975e-06, "loss": 0.4269, "step": 17890 }, { "epoch": 0.51, "grad_norm": 8.020993849564237, "learning_rate": 5.042897132562451e-06, "loss": 0.5596, "step": 17891 }, { "epoch": 0.51, "grad_norm": 3.1518804500426056, "learning_rate": 5.04243339097289e-06, "loss": 0.1521, "step": 17892 }, { "epoch": 0.51, "grad_norm": 1.2197528161307427, "learning_rate": 5.041969649018278e-06, "loss": 0.0826, "step": 17893 }, { "epoch": 0.51, "grad_norm": 5.474174359744415, "learning_rate": 5.041505906702604e-06, "loss": 0.3542, "step": 17894 }, { "epoch": 0.51, "grad_norm": 5.938710942510285, "learning_rate": 5.041042164029864e-06, "loss": 0.4366, "step": 17895 }, { "epoch": 0.51, "grad_norm": 8.815700193434024, "learning_rate": 5.040578421004041e-06, "loss": 0.5511, "step": 17896 }, { "epoch": 0.51, "grad_norm": 7.55243100823942, "learning_rate": 5.040114677629127e-06, "loss": 0.8084, "step": 17897 }, { "epoch": 0.51, "grad_norm": 7.140580926337588, "learning_rate": 5.039650933909113e-06, "loss": 0.4445, "step": 17898 }, { "epoch": 0.51, "grad_norm": 7.620038216062016, "learning_rate": 5.039187189847985e-06, "loss": 0.5504, "step": 17899 }, { "epoch": 0.51, "grad_norm": 8.331050738207509, "learning_rate": 5.038723445449735e-06, "loss": 0.8852, "step": 17900 }, { "epoch": 0.51, "grad_norm": 9.064009047523083, "learning_rate": 5.038259700718351e-06, "loss": 0.6115, "step": 17901 }, { "epoch": 0.51, "grad_norm": 9.305686557537218, "learning_rate": 5.037795955657824e-06, "loss": 0.8887, "step": 17902 }, { "epoch": 0.51, "grad_norm": 10.660073318801867, "learning_rate": 5.037332210272143e-06, "loss": 0.6526, "step": 17903 }, { "epoch": 0.51, "grad_norm": 4.959772120286218, "learning_rate": 5.036868464565299e-06, "loss": 0.6481, "step": 17904 }, { "epoch": 0.51, "grad_norm": 3.2131310833864295, "learning_rate": 5.036404718541279e-06, "loss": 0.2534, "step": 17905 }, { "epoch": 0.51, "grad_norm": 13.841330771109359, "learning_rate": 5.035940972204074e-06, "loss": 0.5725, "step": 17906 }, { "epoch": 0.51, "grad_norm": 4.025994830663085, "learning_rate": 5.035477225557673e-06, "loss": 0.2591, "step": 17907 }, { "epoch": 0.51, "grad_norm": 4.235751586346391, "learning_rate": 5.035013478606066e-06, "loss": 0.4601, "step": 17908 }, { "epoch": 0.51, "grad_norm": 6.4043707347448775, "learning_rate": 5.034549731353243e-06, "loss": 0.4382, "step": 17909 }, { "epoch": 0.51, "grad_norm": 5.962467822102024, "learning_rate": 5.034085983803191e-06, "loss": 0.2662, "step": 17910 }, { "epoch": 0.51, "grad_norm": 6.054228576987704, "learning_rate": 5.033622235959902e-06, "loss": 0.8358, "step": 17911 }, { "epoch": 0.51, "grad_norm": 3.7834780725585033, "learning_rate": 5.033158487827367e-06, "loss": 0.4105, "step": 17912 }, { "epoch": 0.51, "grad_norm": 5.417414388644704, "learning_rate": 5.0326947394095715e-06, "loss": 0.3313, "step": 17913 }, { "epoch": 0.51, "grad_norm": 3.409638705313078, "learning_rate": 5.032230990710508e-06, "loss": 0.5035, "step": 17914 }, { "epoch": 0.51, "grad_norm": 7.084314263432195, "learning_rate": 5.0317672417341666e-06, "loss": 0.1443, "step": 17915 }, { "epoch": 0.51, "grad_norm": 6.3972412779750565, "learning_rate": 5.031303492484534e-06, "loss": 0.5503, "step": 17916 }, { "epoch": 0.51, "grad_norm": 7.911919713248433, "learning_rate": 5.030839742965601e-06, "loss": 0.6544, "step": 17917 }, { "epoch": 0.51, "grad_norm": 9.440704509017689, "learning_rate": 5.030375993181358e-06, "loss": 0.2076, "step": 17918 }, { "epoch": 0.51, "grad_norm": 5.253668752074269, "learning_rate": 5.029912243135794e-06, "loss": 0.3759, "step": 17919 }, { "epoch": 0.51, "grad_norm": 5.281868937864335, "learning_rate": 5.029448492832898e-06, "loss": 0.424, "step": 17920 }, { "epoch": 0.51, "grad_norm": 9.178916035152698, "learning_rate": 5.028984742276662e-06, "loss": 0.6492, "step": 17921 }, { "epoch": 0.51, "grad_norm": 5.63433093637172, "learning_rate": 5.028520991471073e-06, "loss": 0.3417, "step": 17922 }, { "epoch": 0.51, "grad_norm": 10.422676204327463, "learning_rate": 5.028057240420121e-06, "loss": 0.3076, "step": 17923 }, { "epoch": 0.51, "grad_norm": 6.287559452875464, "learning_rate": 5.027593489127796e-06, "loss": 0.5997, "step": 17924 }, { "epoch": 0.51, "grad_norm": 8.68602053991494, "learning_rate": 5.027129737598089e-06, "loss": 0.7683, "step": 17925 }, { "epoch": 0.51, "grad_norm": 5.2823704992435045, "learning_rate": 5.026665985834985e-06, "loss": 0.6721, "step": 17926 }, { "epoch": 0.51, "grad_norm": 6.378550942985655, "learning_rate": 5.026202233842479e-06, "loss": 0.6982, "step": 17927 }, { "epoch": 0.51, "grad_norm": 6.749444302890954, "learning_rate": 5.025738481624559e-06, "loss": 0.6935, "step": 17928 }, { "epoch": 0.51, "grad_norm": 4.900714156599009, "learning_rate": 5.025274729185213e-06, "loss": 0.3354, "step": 17929 }, { "epoch": 0.51, "grad_norm": 2.7662339590739196, "learning_rate": 5.024810976528431e-06, "loss": 0.1953, "step": 17930 }, { "epoch": 0.51, "grad_norm": 4.098313707059324, "learning_rate": 5.024347223658205e-06, "loss": 0.3359, "step": 17931 }, { "epoch": 0.51, "grad_norm": 4.665497959908778, "learning_rate": 5.0238834705785225e-06, "loss": 0.4883, "step": 17932 }, { "epoch": 0.51, "grad_norm": 2.9680880963137275, "learning_rate": 5.023419717293373e-06, "loss": 0.2891, "step": 17933 }, { "epoch": 0.51, "grad_norm": 5.388962641382822, "learning_rate": 5.022955963806746e-06, "loss": 0.363, "step": 17934 }, { "epoch": 0.51, "grad_norm": 5.9096745649608415, "learning_rate": 5.022492210122632e-06, "loss": 0.4548, "step": 17935 }, { "epoch": 0.51, "grad_norm": 4.7309268190976566, "learning_rate": 5.02202845624502e-06, "loss": 0.5864, "step": 17936 }, { "epoch": 0.51, "grad_norm": 7.67461897808225, "learning_rate": 5.0215647021779e-06, "loss": 0.6516, "step": 17937 }, { "epoch": 0.51, "grad_norm": 5.049582118505768, "learning_rate": 5.021100947925262e-06, "loss": 0.3201, "step": 17938 }, { "epoch": 0.51, "grad_norm": 4.566817187638595, "learning_rate": 5.020637193491096e-06, "loss": 0.4941, "step": 17939 }, { "epoch": 0.51, "grad_norm": 5.092110726970811, "learning_rate": 5.020173438879389e-06, "loss": 0.3897, "step": 17940 }, { "epoch": 0.51, "grad_norm": 4.104193724464001, "learning_rate": 5.019709684094133e-06, "loss": 0.3391, "step": 17941 }, { "epoch": 0.51, "grad_norm": 6.04716487061599, "learning_rate": 5.019245929139318e-06, "loss": 0.6674, "step": 17942 }, { "epoch": 0.51, "grad_norm": 4.385345542977319, "learning_rate": 5.01878217401893e-06, "loss": 0.5006, "step": 17943 }, { "epoch": 0.51, "grad_norm": 4.8131876862875576, "learning_rate": 5.018318418736962e-06, "loss": 0.3703, "step": 17944 }, { "epoch": 0.51, "grad_norm": 10.611896468076283, "learning_rate": 5.017854663297405e-06, "loss": 0.9552, "step": 17945 }, { "epoch": 0.51, "grad_norm": 6.918400014900433, "learning_rate": 5.017390907704246e-06, "loss": 0.4144, "step": 17946 }, { "epoch": 0.51, "grad_norm": 8.34206599125429, "learning_rate": 5.016927151961475e-06, "loss": 0.7546, "step": 17947 }, { "epoch": 0.51, "grad_norm": 4.54843327932578, "learning_rate": 5.016463396073081e-06, "loss": 0.5725, "step": 17948 }, { "epoch": 0.51, "grad_norm": 6.292383527863742, "learning_rate": 5.015999640043056e-06, "loss": 0.3788, "step": 17949 }, { "epoch": 0.51, "grad_norm": 5.813207337194485, "learning_rate": 5.015535883875387e-06, "loss": 0.4295, "step": 17950 }, { "epoch": 0.51, "grad_norm": 4.940710110688308, "learning_rate": 5.015072127574063e-06, "loss": 0.3614, "step": 17951 }, { "epoch": 0.51, "grad_norm": 6.69868542742051, "learning_rate": 5.014608371143077e-06, "loss": 0.3299, "step": 17952 }, { "epoch": 0.51, "grad_norm": 4.112052770257463, "learning_rate": 5.014144614586417e-06, "loss": 0.4037, "step": 17953 }, { "epoch": 0.51, "grad_norm": 6.862943446661585, "learning_rate": 5.013680857908073e-06, "loss": 0.7689, "step": 17954 }, { "epoch": 0.51, "grad_norm": 4.511327975966194, "learning_rate": 5.013217101112034e-06, "loss": 0.6747, "step": 17955 }, { "epoch": 0.51, "grad_norm": 9.243548592107144, "learning_rate": 5.01275334420229e-06, "loss": 0.9118, "step": 17956 }, { "epoch": 0.51, "grad_norm": 6.385908778914493, "learning_rate": 5.01228958718283e-06, "loss": 0.4888, "step": 17957 }, { "epoch": 0.51, "grad_norm": 6.1294120545821675, "learning_rate": 5.011825830057644e-06, "loss": 0.5627, "step": 17958 }, { "epoch": 0.51, "grad_norm": 7.6930754997290425, "learning_rate": 5.011362072830722e-06, "loss": 0.7072, "step": 17959 }, { "epoch": 0.51, "grad_norm": 3.7214234303176634, "learning_rate": 5.010898315506054e-06, "loss": 0.3249, "step": 17960 }, { "epoch": 0.51, "grad_norm": 3.191455705275853, "learning_rate": 5.010434558087629e-06, "loss": 0.2931, "step": 17961 }, { "epoch": 0.51, "grad_norm": 5.278072145964181, "learning_rate": 5.009970800579438e-06, "loss": 0.7088, "step": 17962 }, { "epoch": 0.51, "grad_norm": 4.968415494971225, "learning_rate": 5.009507042985468e-06, "loss": 0.4689, "step": 17963 }, { "epoch": 0.51, "grad_norm": 7.128643877609749, "learning_rate": 5.009043285309709e-06, "loss": 0.4824, "step": 17964 }, { "epoch": 0.51, "grad_norm": 6.254205662018926, "learning_rate": 5.008579527556153e-06, "loss": 0.4934, "step": 17965 }, { "epoch": 0.51, "grad_norm": 3.118843652087384, "learning_rate": 5.00811576972879e-06, "loss": 0.3675, "step": 17966 }, { "epoch": 0.51, "grad_norm": 8.091742837418378, "learning_rate": 5.007652011831605e-06, "loss": 0.5072, "step": 17967 }, { "epoch": 0.51, "grad_norm": 4.588237776924629, "learning_rate": 5.007188253868591e-06, "loss": 0.8042, "step": 17968 }, { "epoch": 0.51, "grad_norm": 4.758387914479281, "learning_rate": 5.00672449584374e-06, "loss": 0.295, "step": 17969 }, { "epoch": 0.51, "grad_norm": 8.590238984483895, "learning_rate": 5.006260737761037e-06, "loss": 0.5406, "step": 17970 }, { "epoch": 0.51, "grad_norm": 13.948684823172528, "learning_rate": 5.005796979624474e-06, "loss": 1.0228, "step": 17971 }, { "epoch": 0.51, "grad_norm": 5.518022875120028, "learning_rate": 5.005333221438041e-06, "loss": 0.47, "step": 17972 }, { "epoch": 0.51, "grad_norm": 4.811953525975185, "learning_rate": 5.004869463205726e-06, "loss": 0.9201, "step": 17973 }, { "epoch": 0.51, "grad_norm": 8.110076932718595, "learning_rate": 5.00440570493152e-06, "loss": 0.5601, "step": 17974 }, { "epoch": 0.51, "grad_norm": 8.49938022934552, "learning_rate": 5.0039419466194114e-06, "loss": 0.5512, "step": 17975 }, { "epoch": 0.51, "grad_norm": 3.978576869427663, "learning_rate": 5.0034781882733914e-06, "loss": 0.575, "step": 17976 }, { "epoch": 0.51, "grad_norm": 3.7638264709933975, "learning_rate": 5.003014429897449e-06, "loss": 0.0555, "step": 17977 }, { "epoch": 0.51, "grad_norm": 4.050141360323703, "learning_rate": 5.002550671495574e-06, "loss": 0.5582, "step": 17978 }, { "epoch": 0.51, "grad_norm": 5.474380210737233, "learning_rate": 5.002086913071757e-06, "loss": 0.5917, "step": 17979 }, { "epoch": 0.51, "grad_norm": 6.459136748845252, "learning_rate": 5.001623154629985e-06, "loss": 0.8181, "step": 17980 }, { "epoch": 0.51, "grad_norm": 8.559875420171856, "learning_rate": 5.001159396174249e-06, "loss": 0.514, "step": 17981 }, { "epoch": 0.51, "grad_norm": 9.943827791706926, "learning_rate": 5.000695637708539e-06, "loss": 0.7231, "step": 17982 }, { "epoch": 0.51, "grad_norm": 7.369511663466871, "learning_rate": 5.000231879236846e-06, "loss": 0.6396, "step": 17983 }, { "epoch": 0.52, "grad_norm": 3.6545221533522056, "learning_rate": 4.999768120763157e-06, "loss": 0.1188, "step": 17984 }, { "epoch": 0.52, "grad_norm": 9.161239878768212, "learning_rate": 4.9993043622914624e-06, "loss": 0.7515, "step": 17985 }, { "epoch": 0.52, "grad_norm": 7.22316194273171, "learning_rate": 4.998840603825753e-06, "loss": 0.626, "step": 17986 }, { "epoch": 0.52, "grad_norm": 3.3940421960418483, "learning_rate": 4.998376845370017e-06, "loss": 0.3268, "step": 17987 }, { "epoch": 0.52, "grad_norm": 6.087349214977558, "learning_rate": 4.997913086928245e-06, "loss": 0.7575, "step": 17988 }, { "epoch": 0.52, "grad_norm": 6.155719017715237, "learning_rate": 4.997449328504428e-06, "loss": 0.5539, "step": 17989 }, { "epoch": 0.52, "grad_norm": 7.744214205494544, "learning_rate": 4.996985570102551e-06, "loss": 0.5854, "step": 17990 }, { "epoch": 0.52, "grad_norm": 3.7757953361755208, "learning_rate": 4.996521811726609e-06, "loss": 0.2575, "step": 17991 }, { "epoch": 0.52, "grad_norm": 4.4131258620642315, "learning_rate": 4.996058053380589e-06, "loss": 0.1495, "step": 17992 }, { "epoch": 0.52, "grad_norm": 3.521794818009431, "learning_rate": 4.995594295068483e-06, "loss": 0.4588, "step": 17993 }, { "epoch": 0.52, "grad_norm": 3.486327936936488, "learning_rate": 4.995130536794275e-06, "loss": 0.3731, "step": 17994 }, { "epoch": 0.52, "grad_norm": 3.1853509652094334, "learning_rate": 4.994666778561962e-06, "loss": 0.1928, "step": 17995 }, { "epoch": 0.52, "grad_norm": 7.311995073149876, "learning_rate": 4.994203020375528e-06, "loss": 0.8065, "step": 17996 }, { "epoch": 0.52, "grad_norm": 7.071499314993297, "learning_rate": 4.993739262238964e-06, "loss": 0.3098, "step": 17997 }, { "epoch": 0.52, "grad_norm": 4.3462060255887565, "learning_rate": 4.993275504156262e-06, "loss": 0.2487, "step": 17998 }, { "epoch": 0.52, "grad_norm": 5.837876739434391, "learning_rate": 4.9928117461314095e-06, "loss": 0.6661, "step": 17999 }, { "epoch": 0.52, "grad_norm": 5.595178112811956, "learning_rate": 4.9923479881683966e-06, "loss": 0.371, "step": 18000 }, { "epoch": 0.52, "grad_norm": 4.133861848409824, "learning_rate": 4.991884230271212e-06, "loss": 0.288, "step": 18001 }, { "epoch": 0.52, "grad_norm": 3.481369415782767, "learning_rate": 4.991420472443848e-06, "loss": 0.1697, "step": 18002 }, { "epoch": 0.52, "grad_norm": 5.941472822489768, "learning_rate": 4.990956714690292e-06, "loss": 0.4934, "step": 18003 }, { "epoch": 0.52, "grad_norm": 8.305988888048564, "learning_rate": 4.990492957014535e-06, "loss": 0.5435, "step": 18004 }, { "epoch": 0.52, "grad_norm": 5.169180222749295, "learning_rate": 4.990029199420564e-06, "loss": 0.7709, "step": 18005 }, { "epoch": 0.52, "grad_norm": 4.781112893326565, "learning_rate": 4.989565441912371e-06, "loss": 0.2429, "step": 18006 }, { "epoch": 0.52, "grad_norm": 5.397723447442411, "learning_rate": 4.989101684493947e-06, "loss": 0.5597, "step": 18007 }, { "epoch": 0.52, "grad_norm": 4.221337182091022, "learning_rate": 4.988637927169278e-06, "loss": 0.5294, "step": 18008 }, { "epoch": 0.52, "grad_norm": 5.538426113830478, "learning_rate": 4.988174169942357e-06, "loss": 0.6058, "step": 18009 }, { "epoch": 0.52, "grad_norm": 8.242637585830028, "learning_rate": 4.987710412817171e-06, "loss": 0.8722, "step": 18010 }, { "epoch": 0.52, "grad_norm": 7.231654930811486, "learning_rate": 4.9872466557977126e-06, "loss": 0.69, "step": 18011 }, { "epoch": 0.52, "grad_norm": 6.348551619544069, "learning_rate": 4.9867828988879665e-06, "loss": 0.5654, "step": 18012 }, { "epoch": 0.52, "grad_norm": 6.569919932790567, "learning_rate": 4.986319142091928e-06, "loss": 0.5146, "step": 18013 }, { "epoch": 0.52, "grad_norm": 3.536812443858312, "learning_rate": 4.9858553854135836e-06, "loss": 0.2828, "step": 18014 }, { "epoch": 0.52, "grad_norm": 4.014333940462593, "learning_rate": 4.985391628856924e-06, "loss": 0.3503, "step": 18015 }, { "epoch": 0.52, "grad_norm": 4.766812111007197, "learning_rate": 4.984927872425939e-06, "loss": 0.7768, "step": 18016 }, { "epoch": 0.52, "grad_norm": 4.071607416689716, "learning_rate": 4.984464116124615e-06, "loss": 0.2773, "step": 18017 }, { "epoch": 0.52, "grad_norm": 5.848036229408853, "learning_rate": 4.984000359956947e-06, "loss": 0.6292, "step": 18018 }, { "epoch": 0.52, "grad_norm": 4.765623549164097, "learning_rate": 4.9835366039269206e-06, "loss": 0.3622, "step": 18019 }, { "epoch": 0.52, "grad_norm": 5.156061296188921, "learning_rate": 4.983072848038528e-06, "loss": 0.439, "step": 18020 }, { "epoch": 0.52, "grad_norm": 7.024565507477858, "learning_rate": 4.982609092295755e-06, "loss": 0.4005, "step": 18021 }, { "epoch": 0.52, "grad_norm": 8.143916718422505, "learning_rate": 4.9821453367025954e-06, "loss": 0.4037, "step": 18022 }, { "epoch": 0.52, "grad_norm": 5.285091740904583, "learning_rate": 4.9816815812630385e-06, "loss": 0.3755, "step": 18023 }, { "epoch": 0.52, "grad_norm": 8.962935313954835, "learning_rate": 4.9812178259810704e-06, "loss": 0.6835, "step": 18024 }, { "epoch": 0.52, "grad_norm": 10.905046719079772, "learning_rate": 4.980754070860684e-06, "loss": 0.4337, "step": 18025 }, { "epoch": 0.52, "grad_norm": 4.693164556288079, "learning_rate": 4.980290315905869e-06, "loss": 0.519, "step": 18026 }, { "epoch": 0.52, "grad_norm": 5.106976707415836, "learning_rate": 4.9798265611206135e-06, "loss": 0.6358, "step": 18027 }, { "epoch": 0.52, "grad_norm": 5.062840885883109, "learning_rate": 4.979362806508906e-06, "loss": 0.3832, "step": 18028 }, { "epoch": 0.52, "grad_norm": 5.796772197624128, "learning_rate": 4.978899052074739e-06, "loss": 0.2863, "step": 18029 }, { "epoch": 0.52, "grad_norm": 7.510316429227631, "learning_rate": 4.9784352978221015e-06, "loss": 0.7026, "step": 18030 }, { "epoch": 0.52, "grad_norm": 6.778888203209022, "learning_rate": 4.97797154375498e-06, "loss": 0.6197, "step": 18031 }, { "epoch": 0.52, "grad_norm": 3.2340064829358557, "learning_rate": 4.97750778987737e-06, "loss": 0.6058, "step": 18032 }, { "epoch": 0.52, "grad_norm": 2.9753258005827945, "learning_rate": 4.977044036193256e-06, "loss": 0.1528, "step": 18033 }, { "epoch": 0.52, "grad_norm": 6.015191951869406, "learning_rate": 4.97658028270663e-06, "loss": 0.5442, "step": 18034 }, { "epoch": 0.52, "grad_norm": 4.4177083040702305, "learning_rate": 4.976116529421479e-06, "loss": 0.2896, "step": 18035 }, { "epoch": 0.52, "grad_norm": 6.640173827159573, "learning_rate": 4.9756527763417976e-06, "loss": 0.754, "step": 18036 }, { "epoch": 0.52, "grad_norm": 6.858371044192767, "learning_rate": 4.97518902347157e-06, "loss": 0.6772, "step": 18037 }, { "epoch": 0.52, "grad_norm": 8.510656912940547, "learning_rate": 4.974725270814788e-06, "loss": 0.599, "step": 18038 }, { "epoch": 0.52, "grad_norm": 4.8307021833490476, "learning_rate": 4.974261518375443e-06, "loss": 0.2604, "step": 18039 }, { "epoch": 0.52, "grad_norm": 4.583186609636223, "learning_rate": 4.973797766157521e-06, "loss": 0.4978, "step": 18040 }, { "epoch": 0.52, "grad_norm": 3.4246906210398413, "learning_rate": 4.973334014165017e-06, "loss": 0.2404, "step": 18041 }, { "epoch": 0.52, "grad_norm": 7.442389411758162, "learning_rate": 4.972870262401913e-06, "loss": 0.626, "step": 18042 }, { "epoch": 0.52, "grad_norm": 6.182603690530797, "learning_rate": 4.972406510872207e-06, "loss": 0.3872, "step": 18043 }, { "epoch": 0.52, "grad_norm": 9.981878837215447, "learning_rate": 4.971942759579881e-06, "loss": 0.7541, "step": 18044 }, { "epoch": 0.52, "grad_norm": 5.305545653200257, "learning_rate": 4.9714790085289296e-06, "loss": 0.4486, "step": 18045 }, { "epoch": 0.52, "grad_norm": 6.446512228540309, "learning_rate": 4.971015257723341e-06, "loss": 0.4145, "step": 18046 }, { "epoch": 0.52, "grad_norm": 5.367685886214961, "learning_rate": 4.9705515071671025e-06, "loss": 0.6179, "step": 18047 }, { "epoch": 0.52, "grad_norm": 4.1020510137371975, "learning_rate": 4.970087756864208e-06, "loss": 0.3962, "step": 18048 }, { "epoch": 0.52, "grad_norm": 5.682224950448279, "learning_rate": 4.969624006818644e-06, "loss": 0.2646, "step": 18049 }, { "epoch": 0.52, "grad_norm": 6.9667113191673335, "learning_rate": 4.969160257034401e-06, "loss": 0.3527, "step": 18050 }, { "epoch": 0.52, "grad_norm": 6.196107176026759, "learning_rate": 4.9686965075154675e-06, "loss": 0.5779, "step": 18051 }, { "epoch": 0.52, "grad_norm": 8.072704274654825, "learning_rate": 4.968232758265836e-06, "loss": 0.6398, "step": 18052 }, { "epoch": 0.52, "grad_norm": 21.816482836145788, "learning_rate": 4.967769009289493e-06, "loss": 0.2451, "step": 18053 }, { "epoch": 0.52, "grad_norm": 6.955503489838076, "learning_rate": 4.9673052605904284e-06, "loss": 0.277, "step": 18054 }, { "epoch": 0.52, "grad_norm": 5.165158223353354, "learning_rate": 4.966841512172635e-06, "loss": 0.4718, "step": 18055 }, { "epoch": 0.52, "grad_norm": 11.257021768346082, "learning_rate": 4.966377764040098e-06, "loss": 0.4129, "step": 18056 }, { "epoch": 0.52, "grad_norm": 2.9894588767982935, "learning_rate": 4.965914016196811e-06, "loss": 0.4999, "step": 18057 }, { "epoch": 0.52, "grad_norm": 5.711607344517546, "learning_rate": 4.9654502686467585e-06, "loss": 0.5971, "step": 18058 }, { "epoch": 0.52, "grad_norm": 3.62743013415894, "learning_rate": 4.964986521393936e-06, "loss": 0.2859, "step": 18059 }, { "epoch": 0.52, "grad_norm": 5.406441304785643, "learning_rate": 4.9645227744423285e-06, "loss": 0.5981, "step": 18060 }, { "epoch": 0.52, "grad_norm": 5.645362090635476, "learning_rate": 4.964059027795928e-06, "loss": 0.4889, "step": 18061 }, { "epoch": 0.52, "grad_norm": 2.2951170363534503, "learning_rate": 4.963595281458722e-06, "loss": 0.3417, "step": 18062 }, { "epoch": 0.52, "grad_norm": 8.555680615335403, "learning_rate": 4.963131535434702e-06, "loss": 0.9724, "step": 18063 }, { "epoch": 0.52, "grad_norm": 4.392079584131986, "learning_rate": 4.962667789727858e-06, "loss": 0.3722, "step": 18064 }, { "epoch": 0.52, "grad_norm": 8.174157372774445, "learning_rate": 4.962204044342176e-06, "loss": 0.6849, "step": 18065 }, { "epoch": 0.52, "grad_norm": 6.96446960436999, "learning_rate": 4.96174029928165e-06, "loss": 0.4074, "step": 18066 }, { "epoch": 0.52, "grad_norm": 6.514286882686032, "learning_rate": 4.961276554550267e-06, "loss": 0.698, "step": 18067 }, { "epoch": 0.52, "grad_norm": 5.683611284471449, "learning_rate": 4.960812810152018e-06, "loss": 0.3336, "step": 18068 }, { "epoch": 0.52, "grad_norm": 10.986355707430654, "learning_rate": 4.96034906609089e-06, "loss": 0.6304, "step": 18069 }, { "epoch": 0.52, "grad_norm": 5.030178500927197, "learning_rate": 4.959885322370874e-06, "loss": 0.3746, "step": 18070 }, { "epoch": 0.52, "grad_norm": 3.717102262747148, "learning_rate": 4.959421578995961e-06, "loss": 0.336, "step": 18071 }, { "epoch": 0.52, "grad_norm": 11.416891072613245, "learning_rate": 4.958957835970136e-06, "loss": 0.9946, "step": 18072 }, { "epoch": 0.52, "grad_norm": 9.87764805888689, "learning_rate": 4.958494093297397e-06, "loss": 0.6956, "step": 18073 }, { "epoch": 0.52, "grad_norm": 6.907155007836194, "learning_rate": 4.958030350981724e-06, "loss": 0.5539, "step": 18074 }, { "epoch": 0.52, "grad_norm": 10.113037907990584, "learning_rate": 4.957566609027113e-06, "loss": 0.4889, "step": 18075 }, { "epoch": 0.52, "grad_norm": 9.470345693157805, "learning_rate": 4.95710286743755e-06, "loss": 0.398, "step": 18076 }, { "epoch": 0.52, "grad_norm": 5.219134333729298, "learning_rate": 4.956639126217027e-06, "loss": 0.3821, "step": 18077 }, { "epoch": 0.52, "grad_norm": 6.271839209028227, "learning_rate": 4.95617538536953e-06, "loss": 0.6715, "step": 18078 }, { "epoch": 0.52, "grad_norm": 3.4004080748397887, "learning_rate": 4.955711644899052e-06, "loss": 0.3274, "step": 18079 }, { "epoch": 0.52, "grad_norm": 8.991911644437034, "learning_rate": 4.955247904809582e-06, "loss": 0.6179, "step": 18080 }, { "epoch": 0.52, "grad_norm": 6.4954071691394155, "learning_rate": 4.954784165105106e-06, "loss": 0.4077, "step": 18081 }, { "epoch": 0.52, "grad_norm": 4.349306353808311, "learning_rate": 4.95432042578962e-06, "loss": 0.2899, "step": 18082 }, { "epoch": 0.52, "grad_norm": 7.336644359004718, "learning_rate": 4.9538566868671075e-06, "loss": 0.9768, "step": 18083 }, { "epoch": 0.52, "grad_norm": 4.178540737379404, "learning_rate": 4.953392948341562e-06, "loss": 0.5941, "step": 18084 }, { "epoch": 0.52, "grad_norm": 3.206031522687502, "learning_rate": 4.952929210216969e-06, "loss": 0.3221, "step": 18085 }, { "epoch": 0.52, "grad_norm": 7.793676175010279, "learning_rate": 4.952465472497322e-06, "loss": 0.5796, "step": 18086 }, { "epoch": 0.52, "grad_norm": 5.038015472694252, "learning_rate": 4.9520017351866075e-06, "loss": 0.2736, "step": 18087 }, { "epoch": 0.52, "grad_norm": 7.470506884422166, "learning_rate": 4.951537998288815e-06, "loss": 0.5294, "step": 18088 }, { "epoch": 0.52, "grad_norm": 8.807545398656874, "learning_rate": 4.951074261807938e-06, "loss": 0.612, "step": 18089 }, { "epoch": 0.52, "grad_norm": 3.3795348536374163, "learning_rate": 4.950610525747961e-06, "loss": 0.2664, "step": 18090 }, { "epoch": 0.52, "grad_norm": 6.835159065611383, "learning_rate": 4.950146790112878e-06, "loss": 0.3216, "step": 18091 }, { "epoch": 0.52, "grad_norm": 5.3138848911879, "learning_rate": 4.949683054906673e-06, "loss": 0.7686, "step": 18092 }, { "epoch": 0.52, "grad_norm": 5.5090918387587635, "learning_rate": 4.949219320133341e-06, "loss": 0.5268, "step": 18093 }, { "epoch": 0.52, "grad_norm": 3.4374180177102414, "learning_rate": 4.9487555857968675e-06, "loss": 0.2652, "step": 18094 }, { "epoch": 0.52, "grad_norm": 6.433540052179424, "learning_rate": 4.948291851901242e-06, "loss": 0.5882, "step": 18095 }, { "epoch": 0.52, "grad_norm": 5.141798818776697, "learning_rate": 4.947828118450458e-06, "loss": 0.67, "step": 18096 }, { "epoch": 0.52, "grad_norm": 4.963489458810913, "learning_rate": 4.9473643854485015e-06, "loss": 0.6851, "step": 18097 }, { "epoch": 0.52, "grad_norm": 6.4383513989426735, "learning_rate": 4.946900652899364e-06, "loss": 0.3261, "step": 18098 }, { "epoch": 0.52, "grad_norm": 5.12661401408275, "learning_rate": 4.94643692080703e-06, "loss": 0.5541, "step": 18099 }, { "epoch": 0.52, "grad_norm": 6.418419272455738, "learning_rate": 4.9459731891754965e-06, "loss": 0.3161, "step": 18100 }, { "epoch": 0.52, "grad_norm": 4.317124361397298, "learning_rate": 4.945509458008746e-06, "loss": 0.5817, "step": 18101 }, { "epoch": 0.52, "grad_norm": 6.063853397907228, "learning_rate": 4.9450457273107735e-06, "loss": 0.5655, "step": 18102 }, { "epoch": 0.52, "grad_norm": 4.5769176779026575, "learning_rate": 4.944581997085565e-06, "loss": 0.5678, "step": 18103 }, { "epoch": 0.52, "grad_norm": 27.22829868029653, "learning_rate": 4.944118267337108e-06, "loss": 1.0584, "step": 18104 }, { "epoch": 0.52, "grad_norm": 1.5716120338787984, "learning_rate": 4.943654538069398e-06, "loss": 0.1078, "step": 18105 }, { "epoch": 0.52, "grad_norm": 11.266514080376211, "learning_rate": 4.94319080928642e-06, "loss": 0.8458, "step": 18106 }, { "epoch": 0.52, "grad_norm": 3.1810731797457548, "learning_rate": 4.942727080992165e-06, "loss": 0.5269, "step": 18107 }, { "epoch": 0.52, "grad_norm": 6.929102514921573, "learning_rate": 4.94226335319062e-06, "loss": 0.2372, "step": 18108 }, { "epoch": 0.52, "grad_norm": 11.204160421664557, "learning_rate": 4.941799625885779e-06, "loss": 0.3428, "step": 18109 }, { "epoch": 0.52, "grad_norm": 8.835607979563731, "learning_rate": 4.941335899081627e-06, "loss": 0.7093, "step": 18110 }, { "epoch": 0.52, "grad_norm": 3.541103204148062, "learning_rate": 4.940872172782154e-06, "loss": 0.4819, "step": 18111 }, { "epoch": 0.52, "grad_norm": 9.27486541732663, "learning_rate": 4.940408446991353e-06, "loss": 0.3921, "step": 18112 }, { "epoch": 0.52, "grad_norm": 6.135982748534559, "learning_rate": 4.939944721713209e-06, "loss": 0.6002, "step": 18113 }, { "epoch": 0.52, "grad_norm": 5.723043445598888, "learning_rate": 4.939480996951715e-06, "loss": 0.257, "step": 18114 }, { "epoch": 0.52, "grad_norm": 12.71361687479222, "learning_rate": 4.939017272710856e-06, "loss": 0.7864, "step": 18115 }, { "epoch": 0.52, "grad_norm": 8.994857166422152, "learning_rate": 4.938553548994626e-06, "loss": 0.626, "step": 18116 }, { "epoch": 0.52, "grad_norm": 2.344898311325657, "learning_rate": 4.938089825807011e-06, "loss": 0.2375, "step": 18117 }, { "epoch": 0.52, "grad_norm": 7.062097622371938, "learning_rate": 4.937626103152003e-06, "loss": 0.4977, "step": 18118 }, { "epoch": 0.52, "grad_norm": 9.532379471191716, "learning_rate": 4.937162381033587e-06, "loss": 0.3646, "step": 18119 }, { "epoch": 0.52, "grad_norm": 3.6079097267157847, "learning_rate": 4.936698659455758e-06, "loss": 0.3491, "step": 18120 }, { "epoch": 0.52, "grad_norm": 7.361141341125346, "learning_rate": 4.936234938422502e-06, "loss": 0.6805, "step": 18121 }, { "epoch": 0.52, "grad_norm": 3.6537609920538645, "learning_rate": 4.935771217937808e-06, "loss": 0.4333, "step": 18122 }, { "epoch": 0.52, "grad_norm": 10.19166526596879, "learning_rate": 4.935307498005668e-06, "loss": 1.1749, "step": 18123 }, { "epoch": 0.52, "grad_norm": 6.399557870255837, "learning_rate": 4.934843778630069e-06, "loss": 0.3519, "step": 18124 }, { "epoch": 0.52, "grad_norm": 5.40320391092653, "learning_rate": 4.934380059815001e-06, "loss": 0.4893, "step": 18125 }, { "epoch": 0.52, "grad_norm": 5.496292468547393, "learning_rate": 4.933916341564452e-06, "loss": 0.4973, "step": 18126 }, { "epoch": 0.52, "grad_norm": 5.845053787440812, "learning_rate": 4.933452623882414e-06, "loss": 0.7739, "step": 18127 }, { "epoch": 0.52, "grad_norm": 7.990768291693892, "learning_rate": 4.932988906772875e-06, "loss": 0.5271, "step": 18128 }, { "epoch": 0.52, "grad_norm": 4.297991798226193, "learning_rate": 4.932525190239822e-06, "loss": 0.3206, "step": 18129 }, { "epoch": 0.52, "grad_norm": 5.211137484669231, "learning_rate": 4.9320614742872504e-06, "loss": 0.2617, "step": 18130 }, { "epoch": 0.52, "grad_norm": 6.441594571610355, "learning_rate": 4.931597758919142e-06, "loss": 0.663, "step": 18131 }, { "epoch": 0.52, "grad_norm": 2.3732756453393984, "learning_rate": 4.9311340441394914e-06, "loss": 0.1787, "step": 18132 }, { "epoch": 0.52, "grad_norm": 7.346465692188585, "learning_rate": 4.930670329952286e-06, "loss": 0.4593, "step": 18133 }, { "epoch": 0.52, "grad_norm": 10.102325953452493, "learning_rate": 4.9302066163615155e-06, "loss": 1.3289, "step": 18134 }, { "epoch": 0.52, "grad_norm": 6.21318554770355, "learning_rate": 4.929742903371167e-06, "loss": 0.6093, "step": 18135 }, { "epoch": 0.52, "grad_norm": 3.2375913098983324, "learning_rate": 4.929279190985233e-06, "loss": 0.3877, "step": 18136 }, { "epoch": 0.52, "grad_norm": 4.969398264311735, "learning_rate": 4.928815479207702e-06, "loss": 0.4094, "step": 18137 }, { "epoch": 0.52, "grad_norm": 3.9409069202444518, "learning_rate": 4.9283517680425605e-06, "loss": 0.2263, "step": 18138 }, { "epoch": 0.52, "grad_norm": 7.013785819963447, "learning_rate": 4.9278880574938025e-06, "loss": 0.5306, "step": 18139 }, { "epoch": 0.52, "grad_norm": 2.351056624333828, "learning_rate": 4.927424347565413e-06, "loss": 0.2689, "step": 18140 }, { "epoch": 0.52, "grad_norm": 4.943176483183307, "learning_rate": 4.926960638261385e-06, "loss": 0.6154, "step": 18141 }, { "epoch": 0.52, "grad_norm": 5.598718450567667, "learning_rate": 4.926496929585703e-06, "loss": 0.4829, "step": 18142 }, { "epoch": 0.52, "grad_norm": 10.850078220349165, "learning_rate": 4.92603322154236e-06, "loss": 0.5254, "step": 18143 }, { "epoch": 0.52, "grad_norm": 7.72743089075449, "learning_rate": 4.925569514135344e-06, "loss": 0.7521, "step": 18144 }, { "epoch": 0.52, "grad_norm": 5.415863882388462, "learning_rate": 4.925105807368643e-06, "loss": 0.3416, "step": 18145 }, { "epoch": 0.52, "grad_norm": 6.550091592861731, "learning_rate": 4.924642101246251e-06, "loss": 0.5953, "step": 18146 }, { "epoch": 0.52, "grad_norm": 6.2499204249084634, "learning_rate": 4.924178395772151e-06, "loss": 0.4758, "step": 18147 }, { "epoch": 0.52, "grad_norm": 3.0568197374607418, "learning_rate": 4.923714690950337e-06, "loss": 0.466, "step": 18148 }, { "epoch": 0.52, "grad_norm": 3.3684491407131514, "learning_rate": 4.923250986784793e-06, "loss": 0.3181, "step": 18149 }, { "epoch": 0.52, "grad_norm": 8.816435239623363, "learning_rate": 4.9227872832795135e-06, "loss": 0.7927, "step": 18150 }, { "epoch": 0.52, "grad_norm": 5.096132247945475, "learning_rate": 4.922323580438485e-06, "loss": 0.6144, "step": 18151 }, { "epoch": 0.52, "grad_norm": 3.3811138548738726, "learning_rate": 4.921859878265695e-06, "loss": 0.1989, "step": 18152 }, { "epoch": 0.52, "grad_norm": 9.527433538072449, "learning_rate": 4.921396176765139e-06, "loss": 0.735, "step": 18153 }, { "epoch": 0.52, "grad_norm": 3.5675964876264348, "learning_rate": 4.9209324759407994e-06, "loss": 0.5381, "step": 18154 }, { "epoch": 0.52, "grad_norm": 6.468879993256714, "learning_rate": 4.92046877579667e-06, "loss": 0.4905, "step": 18155 }, { "epoch": 0.52, "grad_norm": 5.118417094191315, "learning_rate": 4.920005076336735e-06, "loss": 0.4718, "step": 18156 }, { "epoch": 0.52, "grad_norm": 6.882198518523296, "learning_rate": 4.91954137756499e-06, "loss": 0.7189, "step": 18157 }, { "epoch": 0.52, "grad_norm": 4.3002756429639195, "learning_rate": 4.919077679485417e-06, "loss": 0.479, "step": 18158 }, { "epoch": 0.52, "grad_norm": 5.416045955403827, "learning_rate": 4.9186139821020115e-06, "loss": 0.5936, "step": 18159 }, { "epoch": 0.52, "grad_norm": 4.827275099530883, "learning_rate": 4.918150285418759e-06, "loss": 0.5197, "step": 18160 }, { "epoch": 0.52, "grad_norm": 5.289823987228397, "learning_rate": 4.917686589439648e-06, "loss": 0.37, "step": 18161 }, { "epoch": 0.52, "grad_norm": 4.0858506727748685, "learning_rate": 4.917222894168671e-06, "loss": 0.2307, "step": 18162 }, { "epoch": 0.52, "grad_norm": 2.8655491027432323, "learning_rate": 4.916759199609815e-06, "loss": 0.25, "step": 18163 }, { "epoch": 0.52, "grad_norm": 4.092717455456281, "learning_rate": 4.91629550576707e-06, "loss": 0.2116, "step": 18164 }, { "epoch": 0.52, "grad_norm": 7.41275475950353, "learning_rate": 4.915831812644421e-06, "loss": 0.4478, "step": 18165 }, { "epoch": 0.52, "grad_norm": 3.607564199435501, "learning_rate": 4.915368120245865e-06, "loss": 0.4943, "step": 18166 }, { "epoch": 0.52, "grad_norm": 4.523918745567586, "learning_rate": 4.914904428575384e-06, "loss": 0.4373, "step": 18167 }, { "epoch": 0.52, "grad_norm": 4.872100358978557, "learning_rate": 4.914440737636972e-06, "loss": 0.4395, "step": 18168 }, { "epoch": 0.52, "grad_norm": 3.657180048362835, "learning_rate": 4.913977047434613e-06, "loss": 0.4196, "step": 18169 }, { "epoch": 0.52, "grad_norm": 4.460162895546481, "learning_rate": 4.9135133579722995e-06, "loss": 0.5753, "step": 18170 }, { "epoch": 0.52, "grad_norm": 5.1912942361910295, "learning_rate": 4.913049669254022e-06, "loss": 0.315, "step": 18171 }, { "epoch": 0.52, "grad_norm": 6.564798179757933, "learning_rate": 4.912585981283764e-06, "loss": 0.4065, "step": 18172 }, { "epoch": 0.52, "grad_norm": 5.590562221117046, "learning_rate": 4.912122294065521e-06, "loss": 0.3001, "step": 18173 }, { "epoch": 0.52, "grad_norm": 3.423238056036939, "learning_rate": 4.911658607603278e-06, "loss": 0.4821, "step": 18174 }, { "epoch": 0.52, "grad_norm": 3.1853155242716946, "learning_rate": 4.911194921901026e-06, "loss": 0.3024, "step": 18175 }, { "epoch": 0.52, "grad_norm": 8.169081726576934, "learning_rate": 4.910731236962752e-06, "loss": 0.3586, "step": 18176 }, { "epoch": 0.52, "grad_norm": 6.000809455947332, "learning_rate": 4.910267552792446e-06, "loss": 0.5251, "step": 18177 }, { "epoch": 0.52, "grad_norm": 5.416140379158437, "learning_rate": 4.9098038693940985e-06, "loss": 0.4305, "step": 18178 }, { "epoch": 0.52, "grad_norm": 7.7064825327643165, "learning_rate": 4.909340186771695e-06, "loss": 0.9662, "step": 18179 }, { "epoch": 0.52, "grad_norm": 2.3952584337078657, "learning_rate": 4.908876504929229e-06, "loss": 0.2906, "step": 18180 }, { "epoch": 0.52, "grad_norm": 7.930394108300477, "learning_rate": 4.908412823870686e-06, "loss": 0.5021, "step": 18181 }, { "epoch": 0.52, "grad_norm": 6.179609643033368, "learning_rate": 4.907949143600058e-06, "loss": 0.5036, "step": 18182 }, { "epoch": 0.52, "grad_norm": 7.419448359463137, "learning_rate": 4.90748546412133e-06, "loss": 0.6088, "step": 18183 }, { "epoch": 0.52, "grad_norm": 4.9724879569727225, "learning_rate": 4.907021785438494e-06, "loss": 0.3538, "step": 18184 }, { "epoch": 0.52, "grad_norm": 7.268933271096812, "learning_rate": 4.906558107555538e-06, "loss": 0.706, "step": 18185 }, { "epoch": 0.52, "grad_norm": 6.0986666089748605, "learning_rate": 4.90609443047645e-06, "loss": 0.6082, "step": 18186 }, { "epoch": 0.52, "grad_norm": 6.098097732074807, "learning_rate": 4.905630754205223e-06, "loss": 0.7668, "step": 18187 }, { "epoch": 0.52, "grad_norm": 8.880215898587014, "learning_rate": 4.905167078745841e-06, "loss": 0.8223, "step": 18188 }, { "epoch": 0.52, "grad_norm": 6.126236421001448, "learning_rate": 4.904703404102295e-06, "loss": 0.8297, "step": 18189 }, { "epoch": 0.52, "grad_norm": 4.362614347604295, "learning_rate": 4.904239730278574e-06, "loss": 0.2947, "step": 18190 }, { "epoch": 0.52, "grad_norm": 1.6336844548165246, "learning_rate": 4.903776057278668e-06, "loss": 0.2744, "step": 18191 }, { "epoch": 0.52, "grad_norm": 6.770767090302204, "learning_rate": 4.903312385106562e-06, "loss": 0.5375, "step": 18192 }, { "epoch": 0.52, "grad_norm": 5.330556186192055, "learning_rate": 4.9028487137662504e-06, "loss": 0.5563, "step": 18193 }, { "epoch": 0.52, "grad_norm": 3.9913821369188693, "learning_rate": 4.902385043261718e-06, "loss": 0.342, "step": 18194 }, { "epoch": 0.52, "grad_norm": 9.477404677289783, "learning_rate": 4.901921373596954e-06, "loss": 0.5613, "step": 18195 }, { "epoch": 0.52, "grad_norm": 6.86613836509169, "learning_rate": 4.90145770477595e-06, "loss": 0.6538, "step": 18196 }, { "epoch": 0.52, "grad_norm": 7.247669042567972, "learning_rate": 4.9009940368026934e-06, "loss": 0.8607, "step": 18197 }, { "epoch": 0.52, "grad_norm": 3.5050899075610116, "learning_rate": 4.900530369681173e-06, "loss": 0.0945, "step": 18198 }, { "epoch": 0.52, "grad_norm": 2.718621459202322, "learning_rate": 4.9000667034153756e-06, "loss": 0.1579, "step": 18199 }, { "epoch": 0.52, "grad_norm": 3.8014045666846887, "learning_rate": 4.899603038009294e-06, "loss": 0.3502, "step": 18200 }, { "epoch": 0.52, "grad_norm": 4.839308591355226, "learning_rate": 4.899139373466915e-06, "loss": 0.2672, "step": 18201 }, { "epoch": 0.52, "grad_norm": 5.898819988051367, "learning_rate": 4.898675709792226e-06, "loss": 0.4066, "step": 18202 }, { "epoch": 0.52, "grad_norm": 4.316733148546466, "learning_rate": 4.898212046989219e-06, "loss": 0.438, "step": 18203 }, { "epoch": 0.52, "grad_norm": 5.208188614424151, "learning_rate": 4.89774838506188e-06, "loss": 0.3545, "step": 18204 }, { "epoch": 0.52, "grad_norm": 4.199983777287351, "learning_rate": 4.897284724014201e-06, "loss": 0.5504, "step": 18205 }, { "epoch": 0.52, "grad_norm": 3.512861021501025, "learning_rate": 4.896821063850166e-06, "loss": 0.3208, "step": 18206 }, { "epoch": 0.52, "grad_norm": 6.119713428619455, "learning_rate": 4.896357404573769e-06, "loss": 0.4591, "step": 18207 }, { "epoch": 0.52, "grad_norm": 5.5338649064502095, "learning_rate": 4.895893746188996e-06, "loss": 0.7422, "step": 18208 }, { "epoch": 0.52, "grad_norm": 4.447638253544605, "learning_rate": 4.895430088699837e-06, "loss": 0.3971, "step": 18209 }, { "epoch": 0.52, "grad_norm": 11.33276137610401, "learning_rate": 4.894966432110278e-06, "loss": 0.2624, "step": 18210 }, { "epoch": 0.52, "grad_norm": 5.6619710915224, "learning_rate": 4.894502776424311e-06, "loss": 0.3735, "step": 18211 }, { "epoch": 0.52, "grad_norm": 9.447927663531033, "learning_rate": 4.8940391216459245e-06, "loss": 0.6649, "step": 18212 }, { "epoch": 0.52, "grad_norm": 6.8708497785375675, "learning_rate": 4.8935754677791035e-06, "loss": 0.5872, "step": 18213 }, { "epoch": 0.52, "grad_norm": 7.356364871726834, "learning_rate": 4.893111814827843e-06, "loss": 0.416, "step": 18214 }, { "epoch": 0.52, "grad_norm": 4.588502183008937, "learning_rate": 4.892648162796127e-06, "loss": 0.6361, "step": 18215 }, { "epoch": 0.52, "grad_norm": 10.383531670388027, "learning_rate": 4.892184511687946e-06, "loss": 0.7471, "step": 18216 }, { "epoch": 0.52, "grad_norm": 8.758790042437788, "learning_rate": 4.891720861507288e-06, "loss": 0.743, "step": 18217 }, { "epoch": 0.52, "grad_norm": 8.340614615555749, "learning_rate": 4.891257212258141e-06, "loss": 0.6098, "step": 18218 }, { "epoch": 0.52, "grad_norm": 3.7869731058173572, "learning_rate": 4.890793563944497e-06, "loss": 0.3065, "step": 18219 }, { "epoch": 0.52, "grad_norm": 7.565144478476575, "learning_rate": 4.890329916570341e-06, "loss": 0.5272, "step": 18220 }, { "epoch": 0.52, "grad_norm": 4.889857320283684, "learning_rate": 4.889866270139665e-06, "loss": 0.373, "step": 18221 }, { "epoch": 0.52, "grad_norm": 2.2846826111401146, "learning_rate": 4.8894026246564526e-06, "loss": 0.1848, "step": 18222 }, { "epoch": 0.52, "grad_norm": 5.333387935875492, "learning_rate": 4.888938980124699e-06, "loss": 0.4381, "step": 18223 }, { "epoch": 0.52, "grad_norm": 4.350859717115129, "learning_rate": 4.888475336548388e-06, "loss": 0.5698, "step": 18224 }, { "epoch": 0.52, "grad_norm": 3.6422926558243915, "learning_rate": 4.888011693931512e-06, "loss": 0.5373, "step": 18225 }, { "epoch": 0.52, "grad_norm": 4.903053141177378, "learning_rate": 4.887548052278054e-06, "loss": 0.3857, "step": 18226 }, { "epoch": 0.52, "grad_norm": 6.8852664586783225, "learning_rate": 4.887084411592009e-06, "loss": 0.7898, "step": 18227 }, { "epoch": 0.52, "grad_norm": 2.049631875701462, "learning_rate": 4.886620771877363e-06, "loss": 0.1084, "step": 18228 }, { "epoch": 0.52, "grad_norm": 6.263891803245496, "learning_rate": 4.886157133138102e-06, "loss": 0.7328, "step": 18229 }, { "epoch": 0.52, "grad_norm": 10.053410683919518, "learning_rate": 4.88569349537822e-06, "loss": 0.3857, "step": 18230 }, { "epoch": 0.52, "grad_norm": 6.22828847591153, "learning_rate": 4.885229858601702e-06, "loss": 0.7003, "step": 18231 }, { "epoch": 0.52, "grad_norm": 6.402362444084018, "learning_rate": 4.884766222812537e-06, "loss": 0.7817, "step": 18232 }, { "epoch": 0.52, "grad_norm": 11.464088760809673, "learning_rate": 4.8843025880147134e-06, "loss": 0.6056, "step": 18233 }, { "epoch": 0.52, "grad_norm": 4.82699571754166, "learning_rate": 4.883838954212223e-06, "loss": 0.5399, "step": 18234 }, { "epoch": 0.52, "grad_norm": 7.956860098030435, "learning_rate": 4.883375321409049e-06, "loss": 1.0647, "step": 18235 }, { "epoch": 0.52, "grad_norm": 4.189733564331734, "learning_rate": 4.8829116896091825e-06, "loss": 0.525, "step": 18236 }, { "epoch": 0.52, "grad_norm": 3.9539087596644897, "learning_rate": 4.882448058816614e-06, "loss": 0.3485, "step": 18237 }, { "epoch": 0.52, "grad_norm": 2.263440150725874, "learning_rate": 4.88198442903533e-06, "loss": 0.1861, "step": 18238 }, { "epoch": 0.52, "grad_norm": 5.953340941990643, "learning_rate": 4.881520800269319e-06, "loss": 0.366, "step": 18239 }, { "epoch": 0.52, "grad_norm": 9.8147562919594, "learning_rate": 4.88105717252257e-06, "loss": 0.7068, "step": 18240 }, { "epoch": 0.52, "grad_norm": 3.497343400499202, "learning_rate": 4.880593545799072e-06, "loss": 0.179, "step": 18241 }, { "epoch": 0.52, "grad_norm": 3.0180361501982045, "learning_rate": 4.880129920102812e-06, "loss": 0.5328, "step": 18242 }, { "epoch": 0.52, "grad_norm": 6.816046395408079, "learning_rate": 4.879666295437779e-06, "loss": 0.4653, "step": 18243 }, { "epoch": 0.52, "grad_norm": 9.950237340348844, "learning_rate": 4.879202671807965e-06, "loss": 0.9894, "step": 18244 }, { "epoch": 0.52, "grad_norm": 19.151851379302283, "learning_rate": 4.878739049217354e-06, "loss": 0.8007, "step": 18245 }, { "epoch": 0.52, "grad_norm": 4.724983897509712, "learning_rate": 4.878275427669936e-06, "loss": 0.4886, "step": 18246 }, { "epoch": 0.52, "grad_norm": 6.1305983901946055, "learning_rate": 4.877811807169699e-06, "loss": 0.4803, "step": 18247 }, { "epoch": 0.52, "grad_norm": 4.99247829205221, "learning_rate": 4.877348187720634e-06, "loss": 0.7558, "step": 18248 }, { "epoch": 0.52, "grad_norm": 6.502309168941708, "learning_rate": 4.876884569326724e-06, "loss": 0.7105, "step": 18249 }, { "epoch": 0.52, "grad_norm": 4.785127800058921, "learning_rate": 4.8764209519919636e-06, "loss": 0.2651, "step": 18250 }, { "epoch": 0.52, "grad_norm": 2.1423428040238686, "learning_rate": 4.8759573357203375e-06, "loss": 0.1579, "step": 18251 }, { "epoch": 0.52, "grad_norm": 4.634878228737077, "learning_rate": 4.875493720515834e-06, "loss": 0.1957, "step": 18252 }, { "epoch": 0.52, "grad_norm": 6.133352306930938, "learning_rate": 4.875030106382445e-06, "loss": 0.4719, "step": 18253 }, { "epoch": 0.52, "grad_norm": 3.937432848645365, "learning_rate": 4.8745664933241555e-06, "loss": 0.314, "step": 18254 }, { "epoch": 0.52, "grad_norm": 8.729339567549397, "learning_rate": 4.874102881344956e-06, "loss": 0.8566, "step": 18255 }, { "epoch": 0.52, "grad_norm": 5.78545090918549, "learning_rate": 4.8736392704488315e-06, "loss": 0.6732, "step": 18256 }, { "epoch": 0.52, "grad_norm": 4.710848147935088, "learning_rate": 4.873175660639775e-06, "loss": 0.4604, "step": 18257 }, { "epoch": 0.52, "grad_norm": 4.969620250570006, "learning_rate": 4.872712051921772e-06, "loss": 0.5141, "step": 18258 }, { "epoch": 0.52, "grad_norm": 10.994872371944195, "learning_rate": 4.872248444298811e-06, "loss": 0.4796, "step": 18259 }, { "epoch": 0.52, "grad_norm": 3.0933852895910157, "learning_rate": 4.871784837774882e-06, "loss": 0.1185, "step": 18260 }, { "epoch": 0.52, "grad_norm": 4.669613957673845, "learning_rate": 4.871321232353972e-06, "loss": 0.2791, "step": 18261 }, { "epoch": 0.52, "grad_norm": 7.831388238781231, "learning_rate": 4.87085762804007e-06, "loss": 0.4888, "step": 18262 }, { "epoch": 0.52, "grad_norm": 4.676097836847177, "learning_rate": 4.870394024837162e-06, "loss": 0.4074, "step": 18263 }, { "epoch": 0.52, "grad_norm": 6.435282204819309, "learning_rate": 4.86993042274924e-06, "loss": 0.4856, "step": 18264 }, { "epoch": 0.52, "grad_norm": 3.2337977682094263, "learning_rate": 4.86946682178029e-06, "loss": 0.2081, "step": 18265 }, { "epoch": 0.52, "grad_norm": 4.814513862644577, "learning_rate": 4.869003221934302e-06, "loss": 0.3396, "step": 18266 }, { "epoch": 0.52, "grad_norm": 3.309104411632231, "learning_rate": 4.86853962321526e-06, "loss": 0.3989, "step": 18267 }, { "epoch": 0.52, "grad_norm": 5.095063935235271, "learning_rate": 4.868076025627157e-06, "loss": 0.2218, "step": 18268 }, { "epoch": 0.52, "grad_norm": 8.647394270723398, "learning_rate": 4.867612429173982e-06, "loss": 0.5092, "step": 18269 }, { "epoch": 0.52, "grad_norm": 3.337549753568319, "learning_rate": 4.867148833859718e-06, "loss": 0.4359, "step": 18270 }, { "epoch": 0.52, "grad_norm": 10.793255299958988, "learning_rate": 4.866685239688358e-06, "loss": 0.3729, "step": 18271 }, { "epoch": 0.52, "grad_norm": 3.5257496827950465, "learning_rate": 4.8662216466638875e-06, "loss": 0.3481, "step": 18272 }, { "epoch": 0.52, "grad_norm": 7.811376811350468, "learning_rate": 4.8657580547902976e-06, "loss": 0.2848, "step": 18273 }, { "epoch": 0.52, "grad_norm": 3.434178412159013, "learning_rate": 4.865294464071572e-06, "loss": 0.2468, "step": 18274 }, { "epoch": 0.52, "grad_norm": 3.6461293272935476, "learning_rate": 4.864830874511704e-06, "loss": 0.4117, "step": 18275 }, { "epoch": 0.52, "grad_norm": 7.500546530837669, "learning_rate": 4.864367286114677e-06, "loss": 0.7179, "step": 18276 }, { "epoch": 0.52, "grad_norm": 10.841282632541795, "learning_rate": 4.863903698884483e-06, "loss": 0.5986, "step": 18277 }, { "epoch": 0.52, "grad_norm": 6.313422673844359, "learning_rate": 4.863440112825108e-06, "loss": 0.3428, "step": 18278 }, { "epoch": 0.52, "grad_norm": 5.9422524155013186, "learning_rate": 4.86297652794054e-06, "loss": 0.5781, "step": 18279 }, { "epoch": 0.52, "grad_norm": 3.167364399151883, "learning_rate": 4.86251294423477e-06, "loss": 0.3639, "step": 18280 }, { "epoch": 0.52, "grad_norm": 6.85779732393718, "learning_rate": 4.862049361711783e-06, "loss": 0.3108, "step": 18281 }, { "epoch": 0.52, "grad_norm": 9.666604228201727, "learning_rate": 4.86158578037557e-06, "loss": 0.8054, "step": 18282 }, { "epoch": 0.52, "grad_norm": 4.951277716293517, "learning_rate": 4.861122200230115e-06, "loss": 0.6212, "step": 18283 }, { "epoch": 0.52, "grad_norm": 5.5395086671010985, "learning_rate": 4.8606586212794095e-06, "loss": 0.3989, "step": 18284 }, { "epoch": 0.52, "grad_norm": 10.169345148509404, "learning_rate": 4.860195043527442e-06, "loss": 0.5843, "step": 18285 }, { "epoch": 0.52, "grad_norm": 10.63572752694736, "learning_rate": 4.859731466978197e-06, "loss": 0.3761, "step": 18286 }, { "epoch": 0.52, "grad_norm": 5.912383754627849, "learning_rate": 4.859267891635668e-06, "loss": 0.8249, "step": 18287 }, { "epoch": 0.52, "grad_norm": 10.156787282963888, "learning_rate": 4.8588043175038375e-06, "loss": 0.412, "step": 18288 }, { "epoch": 0.52, "grad_norm": 5.327182182953383, "learning_rate": 4.858340744586698e-06, "loss": 0.6545, "step": 18289 }, { "epoch": 0.52, "grad_norm": 5.649695355295784, "learning_rate": 4.857877172888233e-06, "loss": 0.6656, "step": 18290 }, { "epoch": 0.52, "grad_norm": 5.861503438288497, "learning_rate": 4.857413602412436e-06, "loss": 0.5217, "step": 18291 }, { "epoch": 0.52, "grad_norm": 3.291127234131339, "learning_rate": 4.856950033163291e-06, "loss": 0.2257, "step": 18292 }, { "epoch": 0.52, "grad_norm": 5.143147948062409, "learning_rate": 4.856486465144785e-06, "loss": 0.2224, "step": 18293 }, { "epoch": 0.52, "grad_norm": 4.592865268341585, "learning_rate": 4.856022898360912e-06, "loss": 0.3738, "step": 18294 }, { "epoch": 0.52, "grad_norm": 3.706924662047521, "learning_rate": 4.855559332815654e-06, "loss": 0.2186, "step": 18295 }, { "epoch": 0.52, "grad_norm": 9.744290709892836, "learning_rate": 4.855095768513003e-06, "loss": 0.7014, "step": 18296 }, { "epoch": 0.52, "grad_norm": 7.682758388673968, "learning_rate": 4.854632205456944e-06, "loss": 0.5237, "step": 18297 }, { "epoch": 0.52, "grad_norm": 6.277842418184075, "learning_rate": 4.8541686436514665e-06, "loss": 0.5696, "step": 18298 }, { "epoch": 0.52, "grad_norm": 8.946313607113817, "learning_rate": 4.853705083100559e-06, "loss": 0.4775, "step": 18299 }, { "epoch": 0.52, "grad_norm": 1.894626418407453, "learning_rate": 4.853241523808206e-06, "loss": 0.2113, "step": 18300 }, { "epoch": 0.52, "grad_norm": 3.851564821316095, "learning_rate": 4.8527779657784e-06, "loss": 0.76, "step": 18301 }, { "epoch": 0.52, "grad_norm": 14.335611432184626, "learning_rate": 4.852314409015127e-06, "loss": 0.5869, "step": 18302 }, { "epoch": 0.52, "grad_norm": 8.899853628005046, "learning_rate": 4.851850853522377e-06, "loss": 0.5341, "step": 18303 }, { "epoch": 0.52, "grad_norm": 1.8168969752790687, "learning_rate": 4.851387299304132e-06, "loss": 0.284, "step": 18304 }, { "epoch": 0.52, "grad_norm": 4.200307163859235, "learning_rate": 4.850923746364388e-06, "loss": 0.3451, "step": 18305 }, { "epoch": 0.52, "grad_norm": 5.451226677340637, "learning_rate": 4.850460194707124e-06, "loss": 0.4471, "step": 18306 }, { "epoch": 0.52, "grad_norm": 6.1938916726177915, "learning_rate": 4.849996644336336e-06, "loss": 0.7419, "step": 18307 }, { "epoch": 0.52, "grad_norm": 6.515576332697254, "learning_rate": 4.8495330952560074e-06, "loss": 0.585, "step": 18308 }, { "epoch": 0.52, "grad_norm": 10.15499931264074, "learning_rate": 4.849069547470126e-06, "loss": 0.8007, "step": 18309 }, { "epoch": 0.52, "grad_norm": 5.606042663754831, "learning_rate": 4.848606000982682e-06, "loss": 0.4867, "step": 18310 }, { "epoch": 0.52, "grad_norm": 7.963490923629841, "learning_rate": 4.848142455797662e-06, "loss": 0.5069, "step": 18311 }, { "epoch": 0.52, "grad_norm": 7.886730721225127, "learning_rate": 4.847678911919053e-06, "loss": 0.3638, "step": 18312 }, { "epoch": 0.52, "grad_norm": 7.1648321539099005, "learning_rate": 4.847215369350843e-06, "loss": 0.6235, "step": 18313 }, { "epoch": 0.52, "grad_norm": 3.716822020244685, "learning_rate": 4.846751828097022e-06, "loss": 0.4387, "step": 18314 }, { "epoch": 0.52, "grad_norm": 3.6931608359534436, "learning_rate": 4.846288288161575e-06, "loss": 0.3948, "step": 18315 }, { "epoch": 0.52, "grad_norm": 7.25826167594263, "learning_rate": 4.845824749548492e-06, "loss": 0.7528, "step": 18316 }, { "epoch": 0.52, "grad_norm": 8.839408401914888, "learning_rate": 4.8453612122617575e-06, "loss": 0.4989, "step": 18317 }, { "epoch": 0.52, "grad_norm": 7.7615588958359565, "learning_rate": 4.844897676305363e-06, "loss": 0.8392, "step": 18318 }, { "epoch": 0.52, "grad_norm": 7.535475119512666, "learning_rate": 4.844434141683295e-06, "loss": 0.9329, "step": 18319 }, { "epoch": 0.52, "grad_norm": 3.538239507582662, "learning_rate": 4.843970608399539e-06, "loss": 0.3788, "step": 18320 }, { "epoch": 0.52, "grad_norm": 4.683733711346701, "learning_rate": 4.843507076458087e-06, "loss": 0.3554, "step": 18321 }, { "epoch": 0.52, "grad_norm": 3.8041150486853894, "learning_rate": 4.843043545862924e-06, "loss": 0.3058, "step": 18322 }, { "epoch": 0.52, "grad_norm": 7.106494764475973, "learning_rate": 4.842580016618039e-06, "loss": 0.6187, "step": 18323 }, { "epoch": 0.52, "grad_norm": 4.9083581270758545, "learning_rate": 4.842116488727416e-06, "loss": 0.7565, "step": 18324 }, { "epoch": 0.52, "grad_norm": 5.395193182285703, "learning_rate": 4.841652962195046e-06, "loss": 0.4789, "step": 18325 }, { "epoch": 0.52, "grad_norm": 6.757843052789496, "learning_rate": 4.841189437024918e-06, "loss": 0.5022, "step": 18326 }, { "epoch": 0.52, "grad_norm": 6.366742189901434, "learning_rate": 4.840725913221015e-06, "loss": 0.5853, "step": 18327 }, { "epoch": 0.52, "grad_norm": 13.642610735297907, "learning_rate": 4.84026239078733e-06, "loss": 0.9271, "step": 18328 }, { "epoch": 0.52, "grad_norm": 5.7901965226207, "learning_rate": 4.839798869727847e-06, "loss": 0.4799, "step": 18329 }, { "epoch": 0.52, "grad_norm": 4.413362052503265, "learning_rate": 4.839335350046555e-06, "loss": 0.3985, "step": 18330 }, { "epoch": 0.52, "grad_norm": 4.054413155919689, "learning_rate": 4.838871831747439e-06, "loss": 0.3316, "step": 18331 }, { "epoch": 0.52, "grad_norm": 4.655643193176996, "learning_rate": 4.838408314834493e-06, "loss": 0.3899, "step": 18332 }, { "epoch": 0.53, "grad_norm": 8.420841981737722, "learning_rate": 4.8379447993116966e-06, "loss": 0.5702, "step": 18333 }, { "epoch": 0.53, "grad_norm": 4.160384509491302, "learning_rate": 4.837481285183041e-06, "loss": 0.1592, "step": 18334 }, { "epoch": 0.53, "grad_norm": 4.355422165967949, "learning_rate": 4.837017772452517e-06, "loss": 0.295, "step": 18335 }, { "epoch": 0.53, "grad_norm": 4.672419946840979, "learning_rate": 4.836554261124106e-06, "loss": 0.5599, "step": 18336 }, { "epoch": 0.53, "grad_norm": 3.9060041731730566, "learning_rate": 4.836090751201801e-06, "loss": 0.5162, "step": 18337 }, { "epoch": 0.53, "grad_norm": 8.257313001353365, "learning_rate": 4.835627242689586e-06, "loss": 0.4484, "step": 18338 }, { "epoch": 0.53, "grad_norm": 4.428351643213033, "learning_rate": 4.83516373559145e-06, "loss": 0.5015, "step": 18339 }, { "epoch": 0.53, "grad_norm": 2.000964766505311, "learning_rate": 4.834700229911378e-06, "loss": 0.2053, "step": 18340 }, { "epoch": 0.53, "grad_norm": 3.1088865078542116, "learning_rate": 4.834236725653363e-06, "loss": 0.279, "step": 18341 }, { "epoch": 0.53, "grad_norm": 4.4621412720734455, "learning_rate": 4.833773222821386e-06, "loss": 0.404, "step": 18342 }, { "epoch": 0.53, "grad_norm": 5.185417791509244, "learning_rate": 4.833309721419438e-06, "loss": 0.4551, "step": 18343 }, { "epoch": 0.53, "grad_norm": 6.388624554385663, "learning_rate": 4.832846221451508e-06, "loss": 0.6885, "step": 18344 }, { "epoch": 0.53, "grad_norm": 11.301794223632806, "learning_rate": 4.832382722921579e-06, "loss": 0.4574, "step": 18345 }, { "epoch": 0.53, "grad_norm": 4.4923202893077505, "learning_rate": 4.831919225833643e-06, "loss": 0.47, "step": 18346 }, { "epoch": 0.53, "grad_norm": 6.396534321131337, "learning_rate": 4.831455730191682e-06, "loss": 0.6649, "step": 18347 }, { "epoch": 0.53, "grad_norm": 8.046823445867924, "learning_rate": 4.83099223599969e-06, "loss": 0.526, "step": 18348 }, { "epoch": 0.53, "grad_norm": 4.14075147327668, "learning_rate": 4.83052874326165e-06, "loss": 0.1715, "step": 18349 }, { "epoch": 0.53, "grad_norm": 4.490651858361612, "learning_rate": 4.8300652519815485e-06, "loss": 0.4302, "step": 18350 }, { "epoch": 0.53, "grad_norm": 7.993035324635236, "learning_rate": 4.829601762163377e-06, "loss": 0.6694, "step": 18351 }, { "epoch": 0.53, "grad_norm": 8.06432895467799, "learning_rate": 4.82913827381112e-06, "loss": 0.5205, "step": 18352 }, { "epoch": 0.53, "grad_norm": 8.092935528518067, "learning_rate": 4.8286747869287655e-06, "loss": 0.4678, "step": 18353 }, { "epoch": 0.53, "grad_norm": 10.700745121689554, "learning_rate": 4.828211301520299e-06, "loss": 1.1507, "step": 18354 }, { "epoch": 0.53, "grad_norm": 2.50714993860132, "learning_rate": 4.827747817589712e-06, "loss": 0.1928, "step": 18355 }, { "epoch": 0.53, "grad_norm": 5.223888813848072, "learning_rate": 4.827284335140988e-06, "loss": 0.4438, "step": 18356 }, { "epoch": 0.53, "grad_norm": 3.905433264226038, "learning_rate": 4.826820854178117e-06, "loss": 0.3611, "step": 18357 }, { "epoch": 0.53, "grad_norm": 4.467900041964726, "learning_rate": 4.826357374705083e-06, "loss": 0.5256, "step": 18358 }, { "epoch": 0.53, "grad_norm": 2.2149623724710095, "learning_rate": 4.825893896725875e-06, "loss": 0.1417, "step": 18359 }, { "epoch": 0.53, "grad_norm": 7.945139655199586, "learning_rate": 4.825430420244482e-06, "loss": 0.3146, "step": 18360 }, { "epoch": 0.53, "grad_norm": 5.218920025368615, "learning_rate": 4.824966945264887e-06, "loss": 0.3132, "step": 18361 }, { "epoch": 0.53, "grad_norm": 5.238395467788784, "learning_rate": 4.824503471791084e-06, "loss": 0.8601, "step": 18362 }, { "epoch": 0.53, "grad_norm": 4.50308217353231, "learning_rate": 4.824039999827052e-06, "loss": 0.549, "step": 18363 }, { "epoch": 0.53, "grad_norm": 3.468253796114728, "learning_rate": 4.823576529376785e-06, "loss": 0.8322, "step": 18364 }, { "epoch": 0.53, "grad_norm": 3.155485476066761, "learning_rate": 4.823113060444266e-06, "loss": 0.4755, "step": 18365 }, { "epoch": 0.53, "grad_norm": 4.667482741844501, "learning_rate": 4.822649593033482e-06, "loss": 0.481, "step": 18366 }, { "epoch": 0.53, "grad_norm": 5.179190679460234, "learning_rate": 4.822186127148425e-06, "loss": 0.2766, "step": 18367 }, { "epoch": 0.53, "grad_norm": 3.303539094619406, "learning_rate": 4.821722662793077e-06, "loss": 0.258, "step": 18368 }, { "epoch": 0.53, "grad_norm": 9.650375702813044, "learning_rate": 4.821259199971429e-06, "loss": 0.1829, "step": 18369 }, { "epoch": 0.53, "grad_norm": 5.641761327181346, "learning_rate": 4.820795738687463e-06, "loss": 0.5557, "step": 18370 }, { "epoch": 0.53, "grad_norm": 7.2048212069181305, "learning_rate": 4.820332278945171e-06, "loss": 0.5574, "step": 18371 }, { "epoch": 0.53, "grad_norm": 3.2378836135705504, "learning_rate": 4.819868820748538e-06, "loss": 0.4011, "step": 18372 }, { "epoch": 0.53, "grad_norm": 5.426048761066445, "learning_rate": 4.819405364101552e-06, "loss": 0.7336, "step": 18373 }, { "epoch": 0.53, "grad_norm": 2.3963861670731097, "learning_rate": 4.818941909008198e-06, "loss": 0.3355, "step": 18374 }, { "epoch": 0.53, "grad_norm": 6.0958730276105655, "learning_rate": 4.818478455472465e-06, "loss": 0.5089, "step": 18375 }, { "epoch": 0.53, "grad_norm": 4.540418241851292, "learning_rate": 4.81801500349834e-06, "loss": 0.3542, "step": 18376 }, { "epoch": 0.53, "grad_norm": 3.7874754108600177, "learning_rate": 4.817551553089808e-06, "loss": 0.8668, "step": 18377 }, { "epoch": 0.53, "grad_norm": 3.888951382059213, "learning_rate": 4.817088104250859e-06, "loss": 0.4349, "step": 18378 }, { "epoch": 0.53, "grad_norm": 4.758405776844738, "learning_rate": 4.816624656985478e-06, "loss": 0.5872, "step": 18379 }, { "epoch": 0.53, "grad_norm": 3.6644815233815926, "learning_rate": 4.816161211297654e-06, "loss": 0.2174, "step": 18380 }, { "epoch": 0.53, "grad_norm": 4.6573528033516745, "learning_rate": 4.8156977671913705e-06, "loss": 0.4568, "step": 18381 }, { "epoch": 0.53, "grad_norm": 4.494885107360609, "learning_rate": 4.815234324670618e-06, "loss": 0.2634, "step": 18382 }, { "epoch": 0.53, "grad_norm": 6.264534853041567, "learning_rate": 4.8147708837393806e-06, "loss": 0.623, "step": 18383 }, { "epoch": 0.53, "grad_norm": 5.954243752594759, "learning_rate": 4.814307444401645e-06, "loss": 0.5448, "step": 18384 }, { "epoch": 0.53, "grad_norm": 4.533847353815273, "learning_rate": 4.8138440066614024e-06, "loss": 0.774, "step": 18385 }, { "epoch": 0.53, "grad_norm": 7.6446134926373315, "learning_rate": 4.813380570522636e-06, "loss": 0.5512, "step": 18386 }, { "epoch": 0.53, "grad_norm": 5.0903173476631745, "learning_rate": 4.812917135989335e-06, "loss": 0.2793, "step": 18387 }, { "epoch": 0.53, "grad_norm": 3.806350532544549, "learning_rate": 4.812453703065481e-06, "loss": 0.3897, "step": 18388 }, { "epoch": 0.53, "grad_norm": 4.395081617011968, "learning_rate": 4.811990271755068e-06, "loss": 0.2825, "step": 18389 }, { "epoch": 0.53, "grad_norm": 3.0295021681147443, "learning_rate": 4.811526842062078e-06, "loss": 0.1623, "step": 18390 }, { "epoch": 0.53, "grad_norm": 5.332040461158481, "learning_rate": 4.8110634139905e-06, "loss": 0.485, "step": 18391 }, { "epoch": 0.53, "grad_norm": 4.671691150939087, "learning_rate": 4.810599987544321e-06, "loss": 0.4475, "step": 18392 }, { "epoch": 0.53, "grad_norm": 3.9227975315041497, "learning_rate": 4.810136562727525e-06, "loss": 0.4863, "step": 18393 }, { "epoch": 0.53, "grad_norm": 5.875217758363592, "learning_rate": 4.8096731395441034e-06, "loss": 0.5929, "step": 18394 }, { "epoch": 0.53, "grad_norm": 6.818752604203466, "learning_rate": 4.809209717998039e-06, "loss": 0.6922, "step": 18395 }, { "epoch": 0.53, "grad_norm": 6.632410655075928, "learning_rate": 4.80874629809332e-06, "loss": 0.7032, "step": 18396 }, { "epoch": 0.53, "grad_norm": 7.783374235983741, "learning_rate": 4.8082828798339315e-06, "loss": 0.5128, "step": 18397 }, { "epoch": 0.53, "grad_norm": 4.084539374828667, "learning_rate": 4.807819463223864e-06, "loss": 0.4127, "step": 18398 }, { "epoch": 0.53, "grad_norm": 4.608182022112373, "learning_rate": 4.807356048267101e-06, "loss": 0.4956, "step": 18399 }, { "epoch": 0.53, "grad_norm": 6.212013391361184, "learning_rate": 4.806892634967629e-06, "loss": 0.5297, "step": 18400 }, { "epoch": 0.53, "grad_norm": 3.30372681418, "learning_rate": 4.806429223329438e-06, "loss": 0.2715, "step": 18401 }, { "epoch": 0.53, "grad_norm": 5.061298745886987, "learning_rate": 4.805965813356512e-06, "loss": 0.5377, "step": 18402 }, { "epoch": 0.53, "grad_norm": 4.467205741127241, "learning_rate": 4.805502405052839e-06, "loss": 0.6671, "step": 18403 }, { "epoch": 0.53, "grad_norm": 7.467659379824637, "learning_rate": 4.805038998422403e-06, "loss": 0.4361, "step": 18404 }, { "epoch": 0.53, "grad_norm": 8.844098950298768, "learning_rate": 4.804575593469193e-06, "loss": 0.4586, "step": 18405 }, { "epoch": 0.53, "grad_norm": 5.197154012726651, "learning_rate": 4.804112190197195e-06, "loss": 0.7346, "step": 18406 }, { "epoch": 0.53, "grad_norm": 7.49635035407107, "learning_rate": 4.803648788610395e-06, "loss": 0.749, "step": 18407 }, { "epoch": 0.53, "grad_norm": 6.077895643128548, "learning_rate": 4.803185388712782e-06, "loss": 0.5567, "step": 18408 }, { "epoch": 0.53, "grad_norm": 5.225188252831361, "learning_rate": 4.80272199050834e-06, "loss": 0.7268, "step": 18409 }, { "epoch": 0.53, "grad_norm": 3.0940839028550107, "learning_rate": 4.802258594001058e-06, "loss": 0.2843, "step": 18410 }, { "epoch": 0.53, "grad_norm": 5.00509753255104, "learning_rate": 4.801795199194919e-06, "loss": 0.3533, "step": 18411 }, { "epoch": 0.53, "grad_norm": 5.790499242496795, "learning_rate": 4.801331806093913e-06, "loss": 0.1324, "step": 18412 }, { "epoch": 0.53, "grad_norm": 4.729739043878346, "learning_rate": 4.8008684147020235e-06, "loss": 0.2717, "step": 18413 }, { "epoch": 0.53, "grad_norm": 8.904282844534077, "learning_rate": 4.800405025023241e-06, "loss": 0.3539, "step": 18414 }, { "epoch": 0.53, "grad_norm": 10.193927398952992, "learning_rate": 4.799941637061546e-06, "loss": 0.8673, "step": 18415 }, { "epoch": 0.53, "grad_norm": 5.806650161093776, "learning_rate": 4.79947825082093e-06, "loss": 0.4073, "step": 18416 }, { "epoch": 0.53, "grad_norm": 8.415201575799616, "learning_rate": 4.799014866305379e-06, "loss": 0.4574, "step": 18417 }, { "epoch": 0.53, "grad_norm": 3.852162003044787, "learning_rate": 4.798551483518876e-06, "loss": 0.2287, "step": 18418 }, { "epoch": 0.53, "grad_norm": 5.1060775709046196, "learning_rate": 4.798088102465413e-06, "loss": 0.5191, "step": 18419 }, { "epoch": 0.53, "grad_norm": 3.31079554341242, "learning_rate": 4.797624723148971e-06, "loss": 0.2247, "step": 18420 }, { "epoch": 0.53, "grad_norm": 5.725722226559419, "learning_rate": 4.797161345573539e-06, "loss": 0.2857, "step": 18421 }, { "epoch": 0.53, "grad_norm": 3.892730357729036, "learning_rate": 4.796697969743103e-06, "loss": 0.3927, "step": 18422 }, { "epoch": 0.53, "grad_norm": 6.263057001740409, "learning_rate": 4.79623459566165e-06, "loss": 0.7497, "step": 18423 }, { "epoch": 0.53, "grad_norm": 3.996330425752562, "learning_rate": 4.795771223333164e-06, "loss": 0.3891, "step": 18424 }, { "epoch": 0.53, "grad_norm": 3.280075626024805, "learning_rate": 4.795307852761634e-06, "loss": 0.6579, "step": 18425 }, { "epoch": 0.53, "grad_norm": 1.5754906885295048, "learning_rate": 4.794844483951045e-06, "loss": 0.2511, "step": 18426 }, { "epoch": 0.53, "grad_norm": 10.140205739057759, "learning_rate": 4.794381116905383e-06, "loss": 0.6938, "step": 18427 }, { "epoch": 0.53, "grad_norm": 3.501628667094763, "learning_rate": 4.793917751628637e-06, "loss": 0.313, "step": 18428 }, { "epoch": 0.53, "grad_norm": 2.8054952494326617, "learning_rate": 4.79345438812479e-06, "loss": 0.3038, "step": 18429 }, { "epoch": 0.53, "grad_norm": 6.36225934425995, "learning_rate": 4.792991026397831e-06, "loss": 0.3606, "step": 18430 }, { "epoch": 0.53, "grad_norm": 3.8253231304785427, "learning_rate": 4.792527666451742e-06, "loss": 0.5206, "step": 18431 }, { "epoch": 0.53, "grad_norm": 5.793098926689234, "learning_rate": 4.792064308290513e-06, "loss": 0.4378, "step": 18432 }, { "epoch": 0.53, "grad_norm": 5.526194357912224, "learning_rate": 4.79160095191813e-06, "loss": 0.3475, "step": 18433 }, { "epoch": 0.53, "grad_norm": 7.068883795783188, "learning_rate": 4.7911375973385765e-06, "loss": 0.2614, "step": 18434 }, { "epoch": 0.53, "grad_norm": 3.315127895816683, "learning_rate": 4.790674244555843e-06, "loss": 0.2593, "step": 18435 }, { "epoch": 0.53, "grad_norm": 5.283406669376477, "learning_rate": 4.790210893573911e-06, "loss": 0.7999, "step": 18436 }, { "epoch": 0.53, "grad_norm": 5.653087849070548, "learning_rate": 4.789747544396771e-06, "loss": 0.6738, "step": 18437 }, { "epoch": 0.53, "grad_norm": 7.068770772699381, "learning_rate": 4.789284197028405e-06, "loss": 0.6378, "step": 18438 }, { "epoch": 0.53, "grad_norm": 4.503855775283964, "learning_rate": 4.788820851472803e-06, "loss": 0.6648, "step": 18439 }, { "epoch": 0.53, "grad_norm": 5.359807789402262, "learning_rate": 4.788357507733948e-06, "loss": 0.6143, "step": 18440 }, { "epoch": 0.53, "grad_norm": 9.784396606928295, "learning_rate": 4.787894165815826e-06, "loss": 1.0478, "step": 18441 }, { "epoch": 0.53, "grad_norm": 9.756535516015331, "learning_rate": 4.787430825722426e-06, "loss": 0.5273, "step": 18442 }, { "epoch": 0.53, "grad_norm": 7.1581971297297535, "learning_rate": 4.786967487457733e-06, "loss": 0.6638, "step": 18443 }, { "epoch": 0.53, "grad_norm": 4.875599164088687, "learning_rate": 4.786504151025733e-06, "loss": 0.3565, "step": 18444 }, { "epoch": 0.53, "grad_norm": 7.398977451298869, "learning_rate": 4.786040816430408e-06, "loss": 0.6071, "step": 18445 }, { "epoch": 0.53, "grad_norm": 9.575206019324076, "learning_rate": 4.785577483675751e-06, "loss": 0.7418, "step": 18446 }, { "epoch": 0.53, "grad_norm": 1.8605059381947406, "learning_rate": 4.785114152765743e-06, "loss": 0.0431, "step": 18447 }, { "epoch": 0.53, "grad_norm": 6.354105047542116, "learning_rate": 4.784650823704371e-06, "loss": 0.8557, "step": 18448 }, { "epoch": 0.53, "grad_norm": 3.7759705565322976, "learning_rate": 4.784187496495624e-06, "loss": 0.2175, "step": 18449 }, { "epoch": 0.53, "grad_norm": 6.4591033435497085, "learning_rate": 4.7837241711434825e-06, "loss": 0.5418, "step": 18450 }, { "epoch": 0.53, "grad_norm": 2.654502944862743, "learning_rate": 4.783260847651937e-06, "loss": 0.375, "step": 18451 }, { "epoch": 0.53, "grad_norm": 5.652716570977659, "learning_rate": 4.782797526024971e-06, "loss": 0.4274, "step": 18452 }, { "epoch": 0.53, "grad_norm": 6.009347945145969, "learning_rate": 4.782334206266572e-06, "loss": 0.5067, "step": 18453 }, { "epoch": 0.53, "grad_norm": 3.147394580242171, "learning_rate": 4.781870888380724e-06, "loss": 0.2434, "step": 18454 }, { "epoch": 0.53, "grad_norm": 7.4189387164903176, "learning_rate": 4.781407572371414e-06, "loss": 0.2851, "step": 18455 }, { "epoch": 0.53, "grad_norm": 5.31839109166859, "learning_rate": 4.780944258242629e-06, "loss": 0.2541, "step": 18456 }, { "epoch": 0.53, "grad_norm": 6.195184001576145, "learning_rate": 4.78048094599835e-06, "loss": 0.1845, "step": 18457 }, { "epoch": 0.53, "grad_norm": 8.358464956213872, "learning_rate": 4.780017635642571e-06, "loss": 0.6019, "step": 18458 }, { "epoch": 0.53, "grad_norm": 5.78145664979854, "learning_rate": 4.77955432717927e-06, "loss": 0.5644, "step": 18459 }, { "epoch": 0.53, "grad_norm": 5.504579544849558, "learning_rate": 4.7790910206124384e-06, "loss": 0.5273, "step": 18460 }, { "epoch": 0.53, "grad_norm": 6.709935496513406, "learning_rate": 4.778627715946057e-06, "loss": 0.5689, "step": 18461 }, { "epoch": 0.53, "grad_norm": 7.016067830746332, "learning_rate": 4.778164413184117e-06, "loss": 0.588, "step": 18462 }, { "epoch": 0.53, "grad_norm": 4.257164078080784, "learning_rate": 4.7777011123305985e-06, "loss": 0.4508, "step": 18463 }, { "epoch": 0.53, "grad_norm": 6.274950199005566, "learning_rate": 4.777237813389492e-06, "loss": 0.639, "step": 18464 }, { "epoch": 0.53, "grad_norm": 10.314610167111265, "learning_rate": 4.776774516364779e-06, "loss": 0.6941, "step": 18465 }, { "epoch": 0.53, "grad_norm": 4.025973111745545, "learning_rate": 4.7763112212604485e-06, "loss": 0.1268, "step": 18466 }, { "epoch": 0.53, "grad_norm": 3.366891445235175, "learning_rate": 4.775847928080486e-06, "loss": 0.2221, "step": 18467 }, { "epoch": 0.53, "grad_norm": 4.182762341203929, "learning_rate": 4.775384636828874e-06, "loss": 0.2102, "step": 18468 }, { "epoch": 0.53, "grad_norm": 6.439811254602387, "learning_rate": 4.774921347509602e-06, "loss": 0.5561, "step": 18469 }, { "epoch": 0.53, "grad_norm": 4.222020668662261, "learning_rate": 4.774458060126654e-06, "loss": 0.1911, "step": 18470 }, { "epoch": 0.53, "grad_norm": 7.279457002500339, "learning_rate": 4.773994774684016e-06, "loss": 0.4352, "step": 18471 }, { "epoch": 0.53, "grad_norm": 2.781715782681199, "learning_rate": 4.773531491185671e-06, "loss": 0.4211, "step": 18472 }, { "epoch": 0.53, "grad_norm": 3.1077258083091466, "learning_rate": 4.773068209635607e-06, "loss": 0.3469, "step": 18473 }, { "epoch": 0.53, "grad_norm": 4.74363533052308, "learning_rate": 4.772604930037811e-06, "loss": 0.5492, "step": 18474 }, { "epoch": 0.53, "grad_norm": 4.011703595185744, "learning_rate": 4.772141652396265e-06, "loss": 0.4699, "step": 18475 }, { "epoch": 0.53, "grad_norm": 14.091854703894251, "learning_rate": 4.771678376714958e-06, "loss": 0.7342, "step": 18476 }, { "epoch": 0.53, "grad_norm": 3.3321088667815046, "learning_rate": 4.7712151029978735e-06, "loss": 0.2493, "step": 18477 }, { "epoch": 0.53, "grad_norm": 9.149592925224962, "learning_rate": 4.770751831248998e-06, "loss": 0.8194, "step": 18478 }, { "epoch": 0.53, "grad_norm": 3.7769713035727563, "learning_rate": 4.770288561472314e-06, "loss": 0.2871, "step": 18479 }, { "epoch": 0.53, "grad_norm": 6.809017056619842, "learning_rate": 4.769825293671812e-06, "loss": 0.2465, "step": 18480 }, { "epoch": 0.53, "grad_norm": 8.127974625912055, "learning_rate": 4.7693620278514714e-06, "loss": 0.4853, "step": 18481 }, { "epoch": 0.53, "grad_norm": 5.223933289893515, "learning_rate": 4.7688987640152825e-06, "loss": 0.4215, "step": 18482 }, { "epoch": 0.53, "grad_norm": 4.776685792618923, "learning_rate": 4.76843550216723e-06, "loss": 0.864, "step": 18483 }, { "epoch": 0.53, "grad_norm": 5.2555920927369595, "learning_rate": 4.767972242311296e-06, "loss": 0.5263, "step": 18484 }, { "epoch": 0.53, "grad_norm": 6.247360816678278, "learning_rate": 4.76750898445147e-06, "loss": 0.4463, "step": 18485 }, { "epoch": 0.53, "grad_norm": 5.239108732152065, "learning_rate": 4.767045728591736e-06, "loss": 0.3981, "step": 18486 }, { "epoch": 0.53, "grad_norm": 11.8923596033001, "learning_rate": 4.766582474736079e-06, "loss": 0.5834, "step": 18487 }, { "epoch": 0.53, "grad_norm": 3.0519307568183387, "learning_rate": 4.766119222888482e-06, "loss": 0.2176, "step": 18488 }, { "epoch": 0.53, "grad_norm": 5.090601246253246, "learning_rate": 4.765655973052933e-06, "loss": 0.5043, "step": 18489 }, { "epoch": 0.53, "grad_norm": 3.3235337373029212, "learning_rate": 4.765192725233419e-06, "loss": 0.3465, "step": 18490 }, { "epoch": 0.53, "grad_norm": 9.922786256289747, "learning_rate": 4.764729479433921e-06, "loss": 0.4997, "step": 18491 }, { "epoch": 0.53, "grad_norm": 6.860043712510062, "learning_rate": 4.764266235658428e-06, "loss": 0.6034, "step": 18492 }, { "epoch": 0.53, "grad_norm": 5.5575717782736245, "learning_rate": 4.763802993910922e-06, "loss": 0.4166, "step": 18493 }, { "epoch": 0.53, "grad_norm": 6.1526153988887575, "learning_rate": 4.763339754195392e-06, "loss": 0.4416, "step": 18494 }, { "epoch": 0.53, "grad_norm": 8.046012817414326, "learning_rate": 4.762876516515817e-06, "loss": 0.4044, "step": 18495 }, { "epoch": 0.53, "grad_norm": 5.506869837397319, "learning_rate": 4.762413280876191e-06, "loss": 0.2799, "step": 18496 }, { "epoch": 0.53, "grad_norm": 4.834691048803846, "learning_rate": 4.761950047280491e-06, "loss": 0.7981, "step": 18497 }, { "epoch": 0.53, "grad_norm": 3.874007897890358, "learning_rate": 4.761486815732705e-06, "loss": 0.4335, "step": 18498 }, { "epoch": 0.53, "grad_norm": 4.566908274417503, "learning_rate": 4.761023586236821e-06, "loss": 0.6226, "step": 18499 }, { "epoch": 0.53, "grad_norm": 5.116924902315121, "learning_rate": 4.760560358796819e-06, "loss": 0.4857, "step": 18500 }, { "epoch": 0.53, "grad_norm": 3.2056310387143774, "learning_rate": 4.76009713341669e-06, "loss": 0.5357, "step": 18501 }, { "epoch": 0.53, "grad_norm": 4.216713449095496, "learning_rate": 4.759633910100413e-06, "loss": 0.6722, "step": 18502 }, { "epoch": 0.53, "grad_norm": 3.0134009426755264, "learning_rate": 4.759170688851978e-06, "loss": 0.2826, "step": 18503 }, { "epoch": 0.53, "grad_norm": 4.509447961376933, "learning_rate": 4.758707469675366e-06, "loss": 0.4027, "step": 18504 }, { "epoch": 0.53, "grad_norm": 5.321816611291672, "learning_rate": 4.758244252574566e-06, "loss": 0.3867, "step": 18505 }, { "epoch": 0.53, "grad_norm": 4.6965464209917736, "learning_rate": 4.757781037553558e-06, "loss": 0.4552, "step": 18506 }, { "epoch": 0.53, "grad_norm": 3.5566755834596995, "learning_rate": 4.757317824616332e-06, "loss": 0.2604, "step": 18507 }, { "epoch": 0.53, "grad_norm": 6.133937970537174, "learning_rate": 4.756854613766871e-06, "loss": 0.394, "step": 18508 }, { "epoch": 0.53, "grad_norm": 5.713873613347737, "learning_rate": 4.756391405009159e-06, "loss": 0.7298, "step": 18509 }, { "epoch": 0.53, "grad_norm": 3.8337419990030686, "learning_rate": 4.755928198347183e-06, "loss": 0.3888, "step": 18510 }, { "epoch": 0.53, "grad_norm": 3.841855132568383, "learning_rate": 4.755464993784924e-06, "loss": 0.5181, "step": 18511 }, { "epoch": 0.53, "grad_norm": 9.911082632321362, "learning_rate": 4.755001791326373e-06, "loss": 0.3618, "step": 18512 }, { "epoch": 0.53, "grad_norm": 5.091170447664849, "learning_rate": 4.75453859097551e-06, "loss": 0.4356, "step": 18513 }, { "epoch": 0.53, "grad_norm": 4.550372741438777, "learning_rate": 4.754075392736319e-06, "loss": 0.3527, "step": 18514 }, { "epoch": 0.53, "grad_norm": 6.250358113519673, "learning_rate": 4.7536121966127905e-06, "loss": 0.4895, "step": 18515 }, { "epoch": 0.53, "grad_norm": 6.172066716946696, "learning_rate": 4.753149002608904e-06, "loss": 0.6096, "step": 18516 }, { "epoch": 0.53, "grad_norm": 5.163185772364412, "learning_rate": 4.752685810728648e-06, "loss": 0.3176, "step": 18517 }, { "epoch": 0.53, "grad_norm": 6.976848942428545, "learning_rate": 4.7522226209760025e-06, "loss": 0.9717, "step": 18518 }, { "epoch": 0.53, "grad_norm": 5.742218557260824, "learning_rate": 4.751759433354958e-06, "loss": 0.3554, "step": 18519 }, { "epoch": 0.53, "grad_norm": 7.446908305450251, "learning_rate": 4.751296247869496e-06, "loss": 0.3989, "step": 18520 }, { "epoch": 0.53, "grad_norm": 12.492373237875174, "learning_rate": 4.750833064523602e-06, "loss": 0.7869, "step": 18521 }, { "epoch": 0.53, "grad_norm": 4.145552039387902, "learning_rate": 4.750369883321259e-06, "loss": 0.4012, "step": 18522 }, { "epoch": 0.53, "grad_norm": 6.163946346450476, "learning_rate": 4.7499067042664535e-06, "loss": 0.5492, "step": 18523 }, { "epoch": 0.53, "grad_norm": 8.032441967333947, "learning_rate": 4.74944352736317e-06, "loss": 0.5264, "step": 18524 }, { "epoch": 0.53, "grad_norm": 5.542221072675761, "learning_rate": 4.748980352615393e-06, "loss": 0.4855, "step": 18525 }, { "epoch": 0.53, "grad_norm": 5.57394333648898, "learning_rate": 4.748517180027108e-06, "loss": 0.5291, "step": 18526 }, { "epoch": 0.53, "grad_norm": 21.900011183679243, "learning_rate": 4.748054009602298e-06, "loss": 0.4275, "step": 18527 }, { "epoch": 0.53, "grad_norm": 5.022685895017337, "learning_rate": 4.747590841344949e-06, "loss": 0.5526, "step": 18528 }, { "epoch": 0.53, "grad_norm": 7.0646217332398376, "learning_rate": 4.747127675259043e-06, "loss": 0.7695, "step": 18529 }, { "epoch": 0.53, "grad_norm": 8.021922502425351, "learning_rate": 4.74666451134857e-06, "loss": 0.4883, "step": 18530 }, { "epoch": 0.53, "grad_norm": 4.628957241115507, "learning_rate": 4.746201349617508e-06, "loss": 0.4905, "step": 18531 }, { "epoch": 0.53, "grad_norm": 6.523966766632962, "learning_rate": 4.745738190069844e-06, "loss": 0.7775, "step": 18532 }, { "epoch": 0.53, "grad_norm": 2.6527514137391566, "learning_rate": 4.745275032709566e-06, "loss": 0.2619, "step": 18533 }, { "epoch": 0.53, "grad_norm": 5.590997860683796, "learning_rate": 4.744811877540654e-06, "loss": 0.4572, "step": 18534 }, { "epoch": 0.53, "grad_norm": 5.1330516184669985, "learning_rate": 4.744348724567094e-06, "loss": 0.6947, "step": 18535 }, { "epoch": 0.53, "grad_norm": 4.9654761030993235, "learning_rate": 4.74388557379287e-06, "loss": 0.4313, "step": 18536 }, { "epoch": 0.53, "grad_norm": 5.2131162375218825, "learning_rate": 4.743422425221969e-06, "loss": 0.6308, "step": 18537 }, { "epoch": 0.53, "grad_norm": 5.717729811009243, "learning_rate": 4.74295927885837e-06, "loss": 0.5975, "step": 18538 }, { "epoch": 0.53, "grad_norm": 5.573558230919867, "learning_rate": 4.742496134706063e-06, "loss": 0.6161, "step": 18539 }, { "epoch": 0.53, "grad_norm": 4.900768717003779, "learning_rate": 4.74203299276903e-06, "loss": 0.6478, "step": 18540 }, { "epoch": 0.53, "grad_norm": 8.500640171851694, "learning_rate": 4.741569853051253e-06, "loss": 0.6715, "step": 18541 }, { "epoch": 0.53, "grad_norm": 7.216017432959363, "learning_rate": 4.741106715556721e-06, "loss": 0.3527, "step": 18542 }, { "epoch": 0.53, "grad_norm": 6.982563816575196, "learning_rate": 4.740643580289416e-06, "loss": 0.5, "step": 18543 }, { "epoch": 0.53, "grad_norm": 7.399570076284349, "learning_rate": 4.740180447253323e-06, "loss": 0.5401, "step": 18544 }, { "epoch": 0.53, "grad_norm": 3.5379597880664355, "learning_rate": 4.7397173164524226e-06, "loss": 0.3804, "step": 18545 }, { "epoch": 0.53, "grad_norm": 6.970655309118189, "learning_rate": 4.739254187890705e-06, "loss": 0.6194, "step": 18546 }, { "epoch": 0.53, "grad_norm": 5.7389314307812445, "learning_rate": 4.73879106157215e-06, "loss": 0.4402, "step": 18547 }, { "epoch": 0.53, "grad_norm": 3.7728385370942714, "learning_rate": 4.738327937500743e-06, "loss": 0.245, "step": 18548 }, { "epoch": 0.53, "grad_norm": 7.161935272056394, "learning_rate": 4.73786481568047e-06, "loss": 0.5516, "step": 18549 }, { "epoch": 0.53, "grad_norm": 8.292972084939567, "learning_rate": 4.737401696115313e-06, "loss": 0.5809, "step": 18550 }, { "epoch": 0.53, "grad_norm": 6.265161739736735, "learning_rate": 4.736938578809258e-06, "loss": 0.1976, "step": 18551 }, { "epoch": 0.53, "grad_norm": 13.271897341693913, "learning_rate": 4.736475463766286e-06, "loss": 0.6548, "step": 18552 }, { "epoch": 0.53, "grad_norm": 9.147149051461515, "learning_rate": 4.736012350990385e-06, "loss": 0.5906, "step": 18553 }, { "epoch": 0.53, "grad_norm": 4.405147184517479, "learning_rate": 4.735549240485536e-06, "loss": 0.3512, "step": 18554 }, { "epoch": 0.53, "grad_norm": 7.334416468421396, "learning_rate": 4.735086132255723e-06, "loss": 0.7727, "step": 18555 }, { "epoch": 0.53, "grad_norm": 4.51994412843387, "learning_rate": 4.734623026304934e-06, "loss": 0.3491, "step": 18556 }, { "epoch": 0.53, "grad_norm": 13.232096920738991, "learning_rate": 4.73415992263715e-06, "loss": 0.3241, "step": 18557 }, { "epoch": 0.53, "grad_norm": 6.018000146218651, "learning_rate": 4.733696821256356e-06, "loss": 0.3153, "step": 18558 }, { "epoch": 0.53, "grad_norm": 3.1548527939274646, "learning_rate": 4.733233722166533e-06, "loss": 0.2549, "step": 18559 }, { "epoch": 0.53, "grad_norm": 5.263549803271995, "learning_rate": 4.732770625371669e-06, "loss": 0.5816, "step": 18560 }, { "epoch": 0.53, "grad_norm": 1.737461222414869, "learning_rate": 4.732307530875747e-06, "loss": 0.2766, "step": 18561 }, { "epoch": 0.53, "grad_norm": 18.073721069844662, "learning_rate": 4.73184443868275e-06, "loss": 0.5016, "step": 18562 }, { "epoch": 0.53, "grad_norm": 9.729923730729682, "learning_rate": 4.7313813487966606e-06, "loss": 0.6339, "step": 18563 }, { "epoch": 0.53, "grad_norm": 3.3396345106468415, "learning_rate": 4.730918261221465e-06, "loss": 0.3306, "step": 18564 }, { "epoch": 0.53, "grad_norm": 3.0007069072735804, "learning_rate": 4.730455175961149e-06, "loss": 0.1934, "step": 18565 }, { "epoch": 0.53, "grad_norm": 5.888153448105983, "learning_rate": 4.729992093019691e-06, "loss": 0.6026, "step": 18566 }, { "epoch": 0.53, "grad_norm": 7.372094713585801, "learning_rate": 4.72952901240108e-06, "loss": 0.4249, "step": 18567 }, { "epoch": 0.53, "grad_norm": 7.331578658884367, "learning_rate": 4.729065934109296e-06, "loss": 0.4541, "step": 18568 }, { "epoch": 0.53, "grad_norm": 6.203766374076404, "learning_rate": 4.728602858148325e-06, "loss": 0.452, "step": 18569 }, { "epoch": 0.53, "grad_norm": 7.588164736240974, "learning_rate": 4.72813978452215e-06, "loss": 0.8787, "step": 18570 }, { "epoch": 0.53, "grad_norm": 9.084509333039541, "learning_rate": 4.727676713234756e-06, "loss": 0.8813, "step": 18571 }, { "epoch": 0.53, "grad_norm": 8.29015179954119, "learning_rate": 4.727213644290123e-06, "loss": 0.689, "step": 18572 }, { "epoch": 0.53, "grad_norm": 3.2530455990939546, "learning_rate": 4.726750577692239e-06, "loss": 0.4038, "step": 18573 }, { "epoch": 0.53, "grad_norm": 3.391019912434023, "learning_rate": 4.726287513445088e-06, "loss": 0.307, "step": 18574 }, { "epoch": 0.53, "grad_norm": 7.084773766157037, "learning_rate": 4.725824451552649e-06, "loss": 0.4357, "step": 18575 }, { "epoch": 0.53, "grad_norm": 2.0246553475049196, "learning_rate": 4.72536139201891e-06, "loss": 0.2191, "step": 18576 }, { "epoch": 0.53, "grad_norm": 10.892148583123348, "learning_rate": 4.724898334847852e-06, "loss": 0.6856, "step": 18577 }, { "epoch": 0.53, "grad_norm": 7.07985802459608, "learning_rate": 4.724435280043462e-06, "loss": 0.7172, "step": 18578 }, { "epoch": 0.53, "grad_norm": 7.664225414228352, "learning_rate": 4.723972227609719e-06, "loss": 0.382, "step": 18579 }, { "epoch": 0.53, "grad_norm": 5.384093510049512, "learning_rate": 4.723509177550609e-06, "loss": 0.6687, "step": 18580 }, { "epoch": 0.53, "grad_norm": 3.2163332226894514, "learning_rate": 4.7230461298701174e-06, "loss": 0.3514, "step": 18581 }, { "epoch": 0.53, "grad_norm": 10.396076992036026, "learning_rate": 4.722583084572224e-06, "loss": 0.7558, "step": 18582 }, { "epoch": 0.53, "grad_norm": 4.4592928017389974, "learning_rate": 4.7221200416609155e-06, "loss": 0.3412, "step": 18583 }, { "epoch": 0.53, "grad_norm": 7.033664899019385, "learning_rate": 4.721657001140173e-06, "loss": 0.5146, "step": 18584 }, { "epoch": 0.53, "grad_norm": 7.373635246598378, "learning_rate": 4.721193963013983e-06, "loss": 0.7365, "step": 18585 }, { "epoch": 0.53, "grad_norm": 4.558172551352576, "learning_rate": 4.720730927286324e-06, "loss": 0.5242, "step": 18586 }, { "epoch": 0.53, "grad_norm": 4.492467430349072, "learning_rate": 4.720267893961185e-06, "loss": 0.5796, "step": 18587 }, { "epoch": 0.53, "grad_norm": 4.239367817767613, "learning_rate": 4.719804863042545e-06, "loss": 0.5304, "step": 18588 }, { "epoch": 0.53, "grad_norm": 5.935164986288016, "learning_rate": 4.7193418345343895e-06, "loss": 0.5728, "step": 18589 }, { "epoch": 0.53, "grad_norm": 6.874495886613676, "learning_rate": 4.718878808440703e-06, "loss": 0.6663, "step": 18590 }, { "epoch": 0.53, "grad_norm": 8.202902276784826, "learning_rate": 4.718415784765466e-06, "loss": 0.8326, "step": 18591 }, { "epoch": 0.53, "grad_norm": 3.4011145560071183, "learning_rate": 4.717952763512665e-06, "loss": 0.3334, "step": 18592 }, { "epoch": 0.53, "grad_norm": 5.186568739615453, "learning_rate": 4.71748974468628e-06, "loss": 0.5429, "step": 18593 }, { "epoch": 0.53, "grad_norm": 6.6699702203877935, "learning_rate": 4.717026728290299e-06, "loss": 0.813, "step": 18594 }, { "epoch": 0.53, "grad_norm": 5.311246387447243, "learning_rate": 4.716563714328699e-06, "loss": 0.1984, "step": 18595 }, { "epoch": 0.53, "grad_norm": 5.662697505243298, "learning_rate": 4.7161007028054655e-06, "loss": 0.7808, "step": 18596 }, { "epoch": 0.53, "grad_norm": 6.343267826491123, "learning_rate": 4.715637693724586e-06, "loss": 0.5078, "step": 18597 }, { "epoch": 0.53, "grad_norm": 8.913230810391319, "learning_rate": 4.715174687090038e-06, "loss": 0.2264, "step": 18598 }, { "epoch": 0.53, "grad_norm": 8.254312255232701, "learning_rate": 4.714711682905809e-06, "loss": 0.3946, "step": 18599 }, { "epoch": 0.53, "grad_norm": 7.249876087050584, "learning_rate": 4.714248681175879e-06, "loss": 0.3983, "step": 18600 }, { "epoch": 0.53, "grad_norm": 9.41540512704365, "learning_rate": 4.713785681904234e-06, "loss": 0.7974, "step": 18601 }, { "epoch": 0.53, "grad_norm": 4.717978584947614, "learning_rate": 4.7133226850948536e-06, "loss": 0.557, "step": 18602 }, { "epoch": 0.53, "grad_norm": 6.093706883987091, "learning_rate": 4.7128596907517234e-06, "loss": 0.628, "step": 18603 }, { "epoch": 0.53, "grad_norm": 3.8932931016586294, "learning_rate": 4.712396698878826e-06, "loss": 0.3084, "step": 18604 }, { "epoch": 0.53, "grad_norm": 3.754437554874843, "learning_rate": 4.711933709480144e-06, "loss": 0.2521, "step": 18605 }, { "epoch": 0.53, "grad_norm": 7.287873869346753, "learning_rate": 4.711470722559662e-06, "loss": 0.4587, "step": 18606 }, { "epoch": 0.53, "grad_norm": 8.994238810878294, "learning_rate": 4.7110077381213605e-06, "loss": 0.2945, "step": 18607 }, { "epoch": 0.53, "grad_norm": 8.421539809540288, "learning_rate": 4.7105447561692255e-06, "loss": 0.9171, "step": 18608 }, { "epoch": 0.53, "grad_norm": 4.870051831590369, "learning_rate": 4.710081776707236e-06, "loss": 0.4763, "step": 18609 }, { "epoch": 0.53, "grad_norm": 5.572471446378723, "learning_rate": 4.709618799739379e-06, "loss": 0.3863, "step": 18610 }, { "epoch": 0.53, "grad_norm": 2.1304595180489105, "learning_rate": 4.709155825269636e-06, "loss": 0.1227, "step": 18611 }, { "epoch": 0.53, "grad_norm": 8.92645635076009, "learning_rate": 4.708692853301989e-06, "loss": 0.9096, "step": 18612 }, { "epoch": 0.53, "grad_norm": 7.199004676686975, "learning_rate": 4.70822988384042e-06, "loss": 0.3258, "step": 18613 }, { "epoch": 0.53, "grad_norm": 5.332062952415484, "learning_rate": 4.707766916888915e-06, "loss": 0.4209, "step": 18614 }, { "epoch": 0.53, "grad_norm": 3.720212632736989, "learning_rate": 4.7073039524514564e-06, "loss": 0.4033, "step": 18615 }, { "epoch": 0.53, "grad_norm": 4.748457256550938, "learning_rate": 4.7068409905320225e-06, "loss": 0.3875, "step": 18616 }, { "epoch": 0.53, "grad_norm": 22.339041943476282, "learning_rate": 4.706378031134603e-06, "loss": 0.8032, "step": 18617 }, { "epoch": 0.53, "grad_norm": 5.36361739959068, "learning_rate": 4.705915074263175e-06, "loss": 0.1679, "step": 18618 }, { "epoch": 0.53, "grad_norm": 8.30544012755693, "learning_rate": 4.705452119921725e-06, "loss": 1.0554, "step": 18619 }, { "epoch": 0.53, "grad_norm": 6.207169835516293, "learning_rate": 4.704989168114232e-06, "loss": 0.5861, "step": 18620 }, { "epoch": 0.53, "grad_norm": 6.354513498648374, "learning_rate": 4.704526218844682e-06, "loss": 0.4558, "step": 18621 }, { "epoch": 0.53, "grad_norm": 4.958162531862742, "learning_rate": 4.704063272117058e-06, "loss": 0.3491, "step": 18622 }, { "epoch": 0.53, "grad_norm": 3.831582419332235, "learning_rate": 4.703600327935338e-06, "loss": 0.8077, "step": 18623 }, { "epoch": 0.53, "grad_norm": 7.788587182635479, "learning_rate": 4.703137386303512e-06, "loss": 0.8343, "step": 18624 }, { "epoch": 0.53, "grad_norm": 3.10475568922059, "learning_rate": 4.702674447225555e-06, "loss": 0.2452, "step": 18625 }, { "epoch": 0.53, "grad_norm": 9.857970462633313, "learning_rate": 4.702211510705455e-06, "loss": 0.9196, "step": 18626 }, { "epoch": 0.53, "grad_norm": 6.72434267993786, "learning_rate": 4.701748576747192e-06, "loss": 0.485, "step": 18627 }, { "epoch": 0.53, "grad_norm": 3.141509221041727, "learning_rate": 4.70128564535475e-06, "loss": 0.309, "step": 18628 }, { "epoch": 0.53, "grad_norm": 5.642883862920207, "learning_rate": 4.700822716532109e-06, "loss": 0.2141, "step": 18629 }, { "epoch": 0.53, "grad_norm": 4.7116550892239095, "learning_rate": 4.7003597902832546e-06, "loss": 0.6684, "step": 18630 }, { "epoch": 0.53, "grad_norm": 8.654587238238587, "learning_rate": 4.6998968666121685e-06, "loss": 0.5618, "step": 18631 }, { "epoch": 0.53, "grad_norm": 1.6906964087785836, "learning_rate": 4.699433945522831e-06, "loss": 0.2582, "step": 18632 }, { "epoch": 0.53, "grad_norm": 5.286777335127425, "learning_rate": 4.698971027019229e-06, "loss": 0.4984, "step": 18633 }, { "epoch": 0.53, "grad_norm": 4.690664393128052, "learning_rate": 4.69850811110534e-06, "loss": 0.4534, "step": 18634 }, { "epoch": 0.53, "grad_norm": 7.459837763558815, "learning_rate": 4.69804519778515e-06, "loss": 1.1214, "step": 18635 }, { "epoch": 0.53, "grad_norm": 3.4598222748364234, "learning_rate": 4.697582287062638e-06, "loss": 0.1831, "step": 18636 }, { "epoch": 0.53, "grad_norm": 5.242561156462368, "learning_rate": 4.697119378941789e-06, "loss": 0.6421, "step": 18637 }, { "epoch": 0.53, "grad_norm": 7.275066910268879, "learning_rate": 4.696656473426586e-06, "loss": 1.0393, "step": 18638 }, { "epoch": 0.53, "grad_norm": 6.5431506504565755, "learning_rate": 4.6961935705210085e-06, "loss": 0.4915, "step": 18639 }, { "epoch": 0.53, "grad_norm": 10.999740835951517, "learning_rate": 4.695730670229041e-06, "loss": 0.412, "step": 18640 }, { "epoch": 0.53, "grad_norm": 7.082655915423548, "learning_rate": 4.695267772554665e-06, "loss": 0.2857, "step": 18641 }, { "epoch": 0.53, "grad_norm": 7.371567072628601, "learning_rate": 4.694804877501864e-06, "loss": 0.6386, "step": 18642 }, { "epoch": 0.53, "grad_norm": 4.753059405815371, "learning_rate": 4.694341985074617e-06, "loss": 0.2594, "step": 18643 }, { "epoch": 0.53, "grad_norm": 3.9289858364161594, "learning_rate": 4.6938790952769105e-06, "loss": 0.5132, "step": 18644 }, { "epoch": 0.53, "grad_norm": 7.622509534009737, "learning_rate": 4.6934162081127225e-06, "loss": 0.8293, "step": 18645 }, { "epoch": 0.53, "grad_norm": 11.2303726218712, "learning_rate": 4.692953323586037e-06, "loss": 0.274, "step": 18646 }, { "epoch": 0.53, "grad_norm": 4.349274696523373, "learning_rate": 4.6924904417008375e-06, "loss": 0.4291, "step": 18647 }, { "epoch": 0.53, "grad_norm": 3.9610816131799504, "learning_rate": 4.692027562461104e-06, "loss": 0.4276, "step": 18648 }, { "epoch": 0.53, "grad_norm": 7.979496310789383, "learning_rate": 4.691564685870822e-06, "loss": 0.509, "step": 18649 }, { "epoch": 0.53, "grad_norm": 1.8795747499587776, "learning_rate": 4.691101811933967e-06, "loss": 0.0668, "step": 18650 }, { "epoch": 0.53, "grad_norm": 4.3191371098328375, "learning_rate": 4.690638940654528e-06, "loss": 0.3377, "step": 18651 }, { "epoch": 0.53, "grad_norm": 8.428227507661289, "learning_rate": 4.6901760720364834e-06, "loss": 0.9138, "step": 18652 }, { "epoch": 0.53, "grad_norm": 3.420118116045681, "learning_rate": 4.689713206083816e-06, "loss": 0.5654, "step": 18653 }, { "epoch": 0.53, "grad_norm": 7.970314979216313, "learning_rate": 4.689250342800507e-06, "loss": 0.6958, "step": 18654 }, { "epoch": 0.53, "grad_norm": 8.543059730132542, "learning_rate": 4.688787482190538e-06, "loss": 0.8958, "step": 18655 }, { "epoch": 0.53, "grad_norm": 1.93873372714274, "learning_rate": 4.6883246242578945e-06, "loss": 0.2961, "step": 18656 }, { "epoch": 0.53, "grad_norm": 2.954904292730336, "learning_rate": 4.6878617690065545e-06, "loss": 0.3036, "step": 18657 }, { "epoch": 0.53, "grad_norm": 6.558921783041378, "learning_rate": 4.687398916440503e-06, "loss": 0.3768, "step": 18658 }, { "epoch": 0.53, "grad_norm": 9.149527519826341, "learning_rate": 4.686936066563717e-06, "loss": 0.7142, "step": 18659 }, { "epoch": 0.53, "grad_norm": 5.451057020102943, "learning_rate": 4.686473219380183e-06, "loss": 0.3067, "step": 18660 }, { "epoch": 0.53, "grad_norm": 4.965316521849856, "learning_rate": 4.686010374893882e-06, "loss": 0.5307, "step": 18661 }, { "epoch": 0.53, "grad_norm": 5.935029087911282, "learning_rate": 4.685547533108793e-06, "loss": 0.2453, "step": 18662 }, { "epoch": 0.53, "grad_norm": 3.862301199775211, "learning_rate": 4.6850846940289025e-06, "loss": 0.3255, "step": 18663 }, { "epoch": 0.53, "grad_norm": 3.1865395987752905, "learning_rate": 4.684621857658188e-06, "loss": 0.3357, "step": 18664 }, { "epoch": 0.53, "grad_norm": 3.0270276895245267, "learning_rate": 4.684159024000634e-06, "loss": 0.4662, "step": 18665 }, { "epoch": 0.53, "grad_norm": 7.256151614971979, "learning_rate": 4.68369619306022e-06, "loss": 0.8119, "step": 18666 }, { "epoch": 0.53, "grad_norm": 1.0737461923347433, "learning_rate": 4.683233364840929e-06, "loss": 0.0428, "step": 18667 }, { "epoch": 0.53, "grad_norm": 5.555308118184254, "learning_rate": 4.6827705393467425e-06, "loss": 0.8184, "step": 18668 }, { "epoch": 0.53, "grad_norm": 4.392620515597617, "learning_rate": 4.682307716581643e-06, "loss": 0.476, "step": 18669 }, { "epoch": 0.53, "grad_norm": 6.796254079234107, "learning_rate": 4.681844896549608e-06, "loss": 0.6532, "step": 18670 }, { "epoch": 0.53, "grad_norm": 6.763772336393862, "learning_rate": 4.6813820792546245e-06, "loss": 0.5275, "step": 18671 }, { "epoch": 0.53, "grad_norm": 5.917180195761847, "learning_rate": 4.6809192647006715e-06, "loss": 0.2502, "step": 18672 }, { "epoch": 0.53, "grad_norm": 4.184249192583701, "learning_rate": 4.68045645289173e-06, "loss": 0.1374, "step": 18673 }, { "epoch": 0.53, "grad_norm": 11.311934314725923, "learning_rate": 4.679993643831784e-06, "loss": 0.5651, "step": 18674 }, { "epoch": 0.53, "grad_norm": 6.3484915691366925, "learning_rate": 4.679530837524811e-06, "loss": 0.3634, "step": 18675 }, { "epoch": 0.53, "grad_norm": 4.41295470793696, "learning_rate": 4.679068033974798e-06, "loss": 0.3167, "step": 18676 }, { "epoch": 0.53, "grad_norm": 1.6397578899846859, "learning_rate": 4.67860523318572e-06, "loss": 0.0514, "step": 18677 }, { "epoch": 0.53, "grad_norm": 7.022529891355514, "learning_rate": 4.678142435161563e-06, "loss": 0.5308, "step": 18678 }, { "epoch": 0.53, "grad_norm": 4.672466176881624, "learning_rate": 4.677679639906307e-06, "loss": 0.4115, "step": 18679 }, { "epoch": 0.53, "grad_norm": 3.898053106342568, "learning_rate": 4.677216847423931e-06, "loss": 0.3099, "step": 18680 }, { "epoch": 0.53, "grad_norm": 3.07078529192657, "learning_rate": 4.676754057718422e-06, "loss": 0.2973, "step": 18681 }, { "epoch": 0.54, "grad_norm": 5.23395759854107, "learning_rate": 4.6762912707937574e-06, "loss": 0.4708, "step": 18682 }, { "epoch": 0.54, "grad_norm": 6.326379907431603, "learning_rate": 4.675828486653919e-06, "loss": 0.4039, "step": 18683 }, { "epoch": 0.54, "grad_norm": 2.1163853325695507, "learning_rate": 4.675365705302888e-06, "loss": 0.1433, "step": 18684 }, { "epoch": 0.54, "grad_norm": 3.5283671401585655, "learning_rate": 4.674902926744647e-06, "loss": 0.4743, "step": 18685 }, { "epoch": 0.54, "grad_norm": 4.055540550035793, "learning_rate": 4.674440150983173e-06, "loss": 0.3552, "step": 18686 }, { "epoch": 0.54, "grad_norm": 13.215466535677628, "learning_rate": 4.673977378022453e-06, "loss": 0.4837, "step": 18687 }, { "epoch": 0.54, "grad_norm": 4.453597087263961, "learning_rate": 4.673514607866464e-06, "loss": 0.2087, "step": 18688 }, { "epoch": 0.54, "grad_norm": 13.273858237151698, "learning_rate": 4.673051840519189e-06, "loss": 0.7547, "step": 18689 }, { "epoch": 0.54, "grad_norm": 5.662009157713044, "learning_rate": 4.672589075984609e-06, "loss": 0.4606, "step": 18690 }, { "epoch": 0.54, "grad_norm": 3.427736879005496, "learning_rate": 4.672126314266705e-06, "loss": 0.332, "step": 18691 }, { "epoch": 0.54, "grad_norm": 4.888637272845294, "learning_rate": 4.6716635553694585e-06, "loss": 0.6406, "step": 18692 }, { "epoch": 0.54, "grad_norm": 8.265634691777162, "learning_rate": 4.671200799296848e-06, "loss": 0.4926, "step": 18693 }, { "epoch": 0.54, "grad_norm": 8.338571663174514, "learning_rate": 4.670738046052859e-06, "loss": 0.8713, "step": 18694 }, { "epoch": 0.54, "grad_norm": 6.804534560016628, "learning_rate": 4.670275295641469e-06, "loss": 0.7148, "step": 18695 }, { "epoch": 0.54, "grad_norm": 4.654573894137499, "learning_rate": 4.669812548066659e-06, "loss": 0.6256, "step": 18696 }, { "epoch": 0.54, "grad_norm": 4.054194249486973, "learning_rate": 4.669349803332413e-06, "loss": 0.3341, "step": 18697 }, { "epoch": 0.54, "grad_norm": 6.118982278313482, "learning_rate": 4.668887061442708e-06, "loss": 0.4946, "step": 18698 }, { "epoch": 0.54, "grad_norm": 8.079602783325553, "learning_rate": 4.668424322401529e-06, "loss": 0.3736, "step": 18699 }, { "epoch": 0.54, "grad_norm": 5.654875319545795, "learning_rate": 4.667961586212852e-06, "loss": 0.4191, "step": 18700 }, { "epoch": 0.54, "grad_norm": 5.232480267024115, "learning_rate": 4.667498852880663e-06, "loss": 0.7777, "step": 18701 }, { "epoch": 0.54, "grad_norm": 7.52257611343656, "learning_rate": 4.66703612240894e-06, "loss": 0.6361, "step": 18702 }, { "epoch": 0.54, "grad_norm": 6.167602296250691, "learning_rate": 4.666573394801662e-06, "loss": 0.5596, "step": 18703 }, { "epoch": 0.54, "grad_norm": 4.2740856843712685, "learning_rate": 4.666110670062814e-06, "loss": 0.6381, "step": 18704 }, { "epoch": 0.54, "grad_norm": 6.801877375508281, "learning_rate": 4.665647948196375e-06, "loss": 0.5112, "step": 18705 }, { "epoch": 0.54, "grad_norm": 7.454173975927253, "learning_rate": 4.665185229206326e-06, "loss": 0.5574, "step": 18706 }, { "epoch": 0.54, "grad_norm": 4.010269824555761, "learning_rate": 4.664722513096644e-06, "loss": 0.3647, "step": 18707 }, { "epoch": 0.54, "grad_norm": 8.150663486925442, "learning_rate": 4.6642597998713165e-06, "loss": 0.339, "step": 18708 }, { "epoch": 0.54, "grad_norm": 6.027365268513111, "learning_rate": 4.6637970895343195e-06, "loss": 0.7342, "step": 18709 }, { "epoch": 0.54, "grad_norm": 3.146413981090087, "learning_rate": 4.663334382089635e-06, "loss": 0.2131, "step": 18710 }, { "epoch": 0.54, "grad_norm": 3.5855662833159285, "learning_rate": 4.662871677541243e-06, "loss": 0.3733, "step": 18711 }, { "epoch": 0.54, "grad_norm": 4.299777226443757, "learning_rate": 4.662408975893123e-06, "loss": 0.5377, "step": 18712 }, { "epoch": 0.54, "grad_norm": 4.858272390599522, "learning_rate": 4.661946277149259e-06, "loss": 0.4869, "step": 18713 }, { "epoch": 0.54, "grad_norm": 7.806391777174353, "learning_rate": 4.66148358131363e-06, "loss": 0.6937, "step": 18714 }, { "epoch": 0.54, "grad_norm": 4.652722244816958, "learning_rate": 4.661020888390216e-06, "loss": 0.4543, "step": 18715 }, { "epoch": 0.54, "grad_norm": 6.394141239490394, "learning_rate": 4.660558198382995e-06, "loss": 0.5871, "step": 18716 }, { "epoch": 0.54, "grad_norm": 3.0818728175375054, "learning_rate": 4.6600955112959525e-06, "loss": 0.228, "step": 18717 }, { "epoch": 0.54, "grad_norm": 3.383319596480371, "learning_rate": 4.6596328271330655e-06, "loss": 0.347, "step": 18718 }, { "epoch": 0.54, "grad_norm": 4.620668806077114, "learning_rate": 4.659170145898316e-06, "loss": 0.3772, "step": 18719 }, { "epoch": 0.54, "grad_norm": 9.909340746341282, "learning_rate": 4.658707467595682e-06, "loss": 0.1384, "step": 18720 }, { "epoch": 0.54, "grad_norm": 6.902057497126059, "learning_rate": 4.6582447922291465e-06, "loss": 0.3377, "step": 18721 }, { "epoch": 0.54, "grad_norm": 7.335487620053789, "learning_rate": 4.6577821198026905e-06, "loss": 0.7696, "step": 18722 }, { "epoch": 0.54, "grad_norm": 8.007668038902752, "learning_rate": 4.657319450320291e-06, "loss": 0.4086, "step": 18723 }, { "epoch": 0.54, "grad_norm": 2.897791787991593, "learning_rate": 4.656856783785931e-06, "loss": 0.2317, "step": 18724 }, { "epoch": 0.54, "grad_norm": 8.586841594659667, "learning_rate": 4.656394120203589e-06, "loss": 0.3478, "step": 18725 }, { "epoch": 0.54, "grad_norm": 2.7788379368014535, "learning_rate": 4.655931459577248e-06, "loss": 0.3247, "step": 18726 }, { "epoch": 0.54, "grad_norm": 4.732376331420347, "learning_rate": 4.655468801910884e-06, "loss": 0.5495, "step": 18727 }, { "epoch": 0.54, "grad_norm": 7.848179634076018, "learning_rate": 4.65500614720848e-06, "loss": 0.8643, "step": 18728 }, { "epoch": 0.54, "grad_norm": 11.006887273843883, "learning_rate": 4.654543495474017e-06, "loss": 0.5684, "step": 18729 }, { "epoch": 0.54, "grad_norm": 5.096097019305036, "learning_rate": 4.6540808467114715e-06, "loss": 0.4636, "step": 18730 }, { "epoch": 0.54, "grad_norm": 6.6405996524102635, "learning_rate": 4.6536182009248284e-06, "loss": 0.72, "step": 18731 }, { "epoch": 0.54, "grad_norm": 5.233128686819551, "learning_rate": 4.653155558118063e-06, "loss": 0.1685, "step": 18732 }, { "epoch": 0.54, "grad_norm": 3.69811328132427, "learning_rate": 4.65269291829516e-06, "loss": 0.4599, "step": 18733 }, { "epoch": 0.54, "grad_norm": 6.072492838792909, "learning_rate": 4.652230281460095e-06, "loss": 0.5764, "step": 18734 }, { "epoch": 0.54, "grad_norm": 8.752593255634462, "learning_rate": 4.651767647616852e-06, "loss": 0.4524, "step": 18735 }, { "epoch": 0.54, "grad_norm": 6.997953405243116, "learning_rate": 4.651305016769408e-06, "loss": 0.3292, "step": 18736 }, { "epoch": 0.54, "grad_norm": 3.6184269900934884, "learning_rate": 4.650842388921742e-06, "loss": 0.3031, "step": 18737 }, { "epoch": 0.54, "grad_norm": 5.698680201205716, "learning_rate": 4.650379764077838e-06, "loss": 0.4552, "step": 18738 }, { "epoch": 0.54, "grad_norm": 5.447270570836877, "learning_rate": 4.6499171422416745e-06, "loss": 0.6345, "step": 18739 }, { "epoch": 0.54, "grad_norm": 7.348806385512415, "learning_rate": 4.64945452341723e-06, "loss": 0.3441, "step": 18740 }, { "epoch": 0.54, "grad_norm": 6.688916047526227, "learning_rate": 4.648991907608486e-06, "loss": 0.4513, "step": 18741 }, { "epoch": 0.54, "grad_norm": 6.769340957106592, "learning_rate": 4.648529294819421e-06, "loss": 0.3899, "step": 18742 }, { "epoch": 0.54, "grad_norm": 9.11105901682571, "learning_rate": 4.6480666850540125e-06, "loss": 0.3914, "step": 18743 }, { "epoch": 0.54, "grad_norm": 6.045276124893593, "learning_rate": 4.647604078316246e-06, "loss": 0.7024, "step": 18744 }, { "epoch": 0.54, "grad_norm": 7.1935182668313855, "learning_rate": 4.647141474610098e-06, "loss": 0.3309, "step": 18745 }, { "epoch": 0.54, "grad_norm": 5.963255825160943, "learning_rate": 4.6466788739395466e-06, "loss": 0.3878, "step": 18746 }, { "epoch": 0.54, "grad_norm": 7.384861511398944, "learning_rate": 4.646216276308575e-06, "loss": 0.668, "step": 18747 }, { "epoch": 0.54, "grad_norm": 3.9989113518327577, "learning_rate": 4.645753681721161e-06, "loss": 0.3865, "step": 18748 }, { "epoch": 0.54, "grad_norm": 9.357681732614388, "learning_rate": 4.645291090181284e-06, "loss": 0.3427, "step": 18749 }, { "epoch": 0.54, "grad_norm": 5.136788974760587, "learning_rate": 4.644828501692924e-06, "loss": 0.5824, "step": 18750 }, { "epoch": 0.54, "grad_norm": 7.339238343336223, "learning_rate": 4.6443659162600615e-06, "loss": 0.6729, "step": 18751 }, { "epoch": 0.54, "grad_norm": 5.069973127530904, "learning_rate": 4.643903333886675e-06, "loss": 0.3233, "step": 18752 }, { "epoch": 0.54, "grad_norm": 4.564797280313515, "learning_rate": 4.643440754576742e-06, "loss": 0.408, "step": 18753 }, { "epoch": 0.54, "grad_norm": 6.4235268948736435, "learning_rate": 4.642978178334247e-06, "loss": 0.7239, "step": 18754 }, { "epoch": 0.54, "grad_norm": 5.594065737204736, "learning_rate": 4.642515605163166e-06, "loss": 0.5122, "step": 18755 }, { "epoch": 0.54, "grad_norm": 7.61954015150267, "learning_rate": 4.6420530350674806e-06, "loss": 0.4808, "step": 18756 }, { "epoch": 0.54, "grad_norm": 8.685133797027287, "learning_rate": 4.641590468051166e-06, "loss": 0.6513, "step": 18757 }, { "epoch": 0.54, "grad_norm": 3.1665270025841856, "learning_rate": 4.641127904118207e-06, "loss": 0.1137, "step": 18758 }, { "epoch": 0.54, "grad_norm": 3.1005964336067424, "learning_rate": 4.640665343272579e-06, "loss": 0.4609, "step": 18759 }, { "epoch": 0.54, "grad_norm": 4.785919900800158, "learning_rate": 4.640202785518264e-06, "loss": 0.8003, "step": 18760 }, { "epoch": 0.54, "grad_norm": 3.4305145552515035, "learning_rate": 4.639740230859238e-06, "loss": 0.2417, "step": 18761 }, { "epoch": 0.54, "grad_norm": 7.209225733501116, "learning_rate": 4.6392776792994836e-06, "loss": 0.5481, "step": 18762 }, { "epoch": 0.54, "grad_norm": 4.074052787172853, "learning_rate": 4.63881513084298e-06, "loss": 0.2727, "step": 18763 }, { "epoch": 0.54, "grad_norm": 5.311021262414885, "learning_rate": 4.638352585493703e-06, "loss": 0.4343, "step": 18764 }, { "epoch": 0.54, "grad_norm": 6.924223473457, "learning_rate": 4.637890043255637e-06, "loss": 0.2415, "step": 18765 }, { "epoch": 0.54, "grad_norm": 7.388733829538032, "learning_rate": 4.637427504132757e-06, "loss": 0.5473, "step": 18766 }, { "epoch": 0.54, "grad_norm": 6.295802817869257, "learning_rate": 4.636964968129044e-06, "loss": 0.314, "step": 18767 }, { "epoch": 0.54, "grad_norm": 9.67267010059787, "learning_rate": 4.636502435248475e-06, "loss": 0.5608, "step": 18768 }, { "epoch": 0.54, "grad_norm": 8.033108566695942, "learning_rate": 4.636039905495032e-06, "loss": 0.7781, "step": 18769 }, { "epoch": 0.54, "grad_norm": 6.004078353172343, "learning_rate": 4.635577378872694e-06, "loss": 0.2694, "step": 18770 }, { "epoch": 0.54, "grad_norm": 3.787387816023548, "learning_rate": 4.635114855385437e-06, "loss": 0.3441, "step": 18771 }, { "epoch": 0.54, "grad_norm": 5.9233234214413555, "learning_rate": 4.634652335037244e-06, "loss": 0.4844, "step": 18772 }, { "epoch": 0.54, "grad_norm": 5.977998887530032, "learning_rate": 4.63418981783209e-06, "loss": 0.4358, "step": 18773 }, { "epoch": 0.54, "grad_norm": 4.239717414229125, "learning_rate": 4.6337273037739575e-06, "loss": 0.5756, "step": 18774 }, { "epoch": 0.54, "grad_norm": 3.0525277934890602, "learning_rate": 4.6332647928668225e-06, "loss": 0.2963, "step": 18775 }, { "epoch": 0.54, "grad_norm": 8.196753274331543, "learning_rate": 4.6328022851146676e-06, "loss": 0.8286, "step": 18776 }, { "epoch": 0.54, "grad_norm": 8.359017680009238, "learning_rate": 4.632339780521466e-06, "loss": 0.3808, "step": 18777 }, { "epoch": 0.54, "grad_norm": 3.526918252520529, "learning_rate": 4.631877279091202e-06, "loss": 0.4669, "step": 18778 }, { "epoch": 0.54, "grad_norm": 4.527398228420405, "learning_rate": 4.631414780827853e-06, "loss": 0.5467, "step": 18779 }, { "epoch": 0.54, "grad_norm": 9.460054691874376, "learning_rate": 4.6309522857353955e-06, "loss": 0.416, "step": 18780 }, { "epoch": 0.54, "grad_norm": 9.107406796175594, "learning_rate": 4.6304897938178125e-06, "loss": 0.495, "step": 18781 }, { "epoch": 0.54, "grad_norm": 4.69045068059858, "learning_rate": 4.630027305079078e-06, "loss": 0.3163, "step": 18782 }, { "epoch": 0.54, "grad_norm": 4.80356844143767, "learning_rate": 4.629564819523175e-06, "loss": 0.356, "step": 18783 }, { "epoch": 0.54, "grad_norm": 4.312480650042923, "learning_rate": 4.6291023371540784e-06, "loss": 0.3271, "step": 18784 }, { "epoch": 0.54, "grad_norm": 6.138660456385538, "learning_rate": 4.628639857975769e-06, "loss": 0.5078, "step": 18785 }, { "epoch": 0.54, "grad_norm": 4.737692385166747, "learning_rate": 4.628177381992226e-06, "loss": 0.3644, "step": 18786 }, { "epoch": 0.54, "grad_norm": 3.7684247542394376, "learning_rate": 4.627714909207426e-06, "loss": 0.2215, "step": 18787 }, { "epoch": 0.54, "grad_norm": 4.573586020291596, "learning_rate": 4.627252439625351e-06, "loss": 0.4402, "step": 18788 }, { "epoch": 0.54, "grad_norm": 4.60217628206694, "learning_rate": 4.626789973249976e-06, "loss": 0.4455, "step": 18789 }, { "epoch": 0.54, "grad_norm": 5.510100799718558, "learning_rate": 4.626327510085282e-06, "loss": 0.6007, "step": 18790 }, { "epoch": 0.54, "grad_norm": 5.172107195755988, "learning_rate": 4.625865050135243e-06, "loss": 0.5314, "step": 18791 }, { "epoch": 0.54, "grad_norm": 11.031674039217437, "learning_rate": 4.625402593403845e-06, "loss": 0.6476, "step": 18792 }, { "epoch": 0.54, "grad_norm": 8.98138476159831, "learning_rate": 4.62494013989506e-06, "loss": 0.6431, "step": 18793 }, { "epoch": 0.54, "grad_norm": 6.279207723171555, "learning_rate": 4.624477689612868e-06, "loss": 0.5981, "step": 18794 }, { "epoch": 0.54, "grad_norm": 7.666124663336803, "learning_rate": 4.624015242561251e-06, "loss": 0.9147, "step": 18795 }, { "epoch": 0.54, "grad_norm": 6.61742896998453, "learning_rate": 4.623552798744184e-06, "loss": 0.5339, "step": 18796 }, { "epoch": 0.54, "grad_norm": 3.7332357161112046, "learning_rate": 4.6230903581656456e-06, "loss": 0.6554, "step": 18797 }, { "epoch": 0.54, "grad_norm": 5.432381094918494, "learning_rate": 4.622627920829613e-06, "loss": 0.3618, "step": 18798 }, { "epoch": 0.54, "grad_norm": 10.886151088537257, "learning_rate": 4.622165486740069e-06, "loss": 0.8491, "step": 18799 }, { "epoch": 0.54, "grad_norm": 4.789355195950484, "learning_rate": 4.6217030559009855e-06, "loss": 0.4463, "step": 18800 }, { "epoch": 0.54, "grad_norm": 6.407412037478207, "learning_rate": 4.621240628316346e-06, "loss": 0.5608, "step": 18801 }, { "epoch": 0.54, "grad_norm": 2.6655503757849273, "learning_rate": 4.620778203990125e-06, "loss": 0.3875, "step": 18802 }, { "epoch": 0.54, "grad_norm": 6.687971098519122, "learning_rate": 4.620315782926303e-06, "loss": 0.5168, "step": 18803 }, { "epoch": 0.54, "grad_norm": 6.642102496766003, "learning_rate": 4.619853365128859e-06, "loss": 0.4156, "step": 18804 }, { "epoch": 0.54, "grad_norm": 5.844508157599581, "learning_rate": 4.619390950601768e-06, "loss": 0.5108, "step": 18805 }, { "epoch": 0.54, "grad_norm": 7.365793786038929, "learning_rate": 4.618928539349012e-06, "loss": 0.4274, "step": 18806 }, { "epoch": 0.54, "grad_norm": 4.310765871431684, "learning_rate": 4.618466131374564e-06, "loss": 0.303, "step": 18807 }, { "epoch": 0.54, "grad_norm": 7.5477256532429085, "learning_rate": 4.618003726682407e-06, "loss": 0.7154, "step": 18808 }, { "epoch": 0.54, "grad_norm": 4.460127641750147, "learning_rate": 4.617541325276516e-06, "loss": 0.2545, "step": 18809 }, { "epoch": 0.54, "grad_norm": 5.327167346616589, "learning_rate": 4.617078927160868e-06, "loss": 0.5429, "step": 18810 }, { "epoch": 0.54, "grad_norm": 6.271813891559319, "learning_rate": 4.616616532339446e-06, "loss": 0.5284, "step": 18811 }, { "epoch": 0.54, "grad_norm": 3.790705546807205, "learning_rate": 4.616154140816224e-06, "loss": 0.5517, "step": 18812 }, { "epoch": 0.54, "grad_norm": 7.266461855367289, "learning_rate": 4.615691752595181e-06, "loss": 0.5555, "step": 18813 }, { "epoch": 0.54, "grad_norm": 6.907152798707908, "learning_rate": 4.615229367680293e-06, "loss": 0.4825, "step": 18814 }, { "epoch": 0.54, "grad_norm": 3.9287252594195827, "learning_rate": 4.6147669860755415e-06, "loss": 0.5769, "step": 18815 }, { "epoch": 0.54, "grad_norm": 5.394013818541391, "learning_rate": 4.6143046077849e-06, "loss": 0.651, "step": 18816 }, { "epoch": 0.54, "grad_norm": 5.719410759112998, "learning_rate": 4.613842232812351e-06, "loss": 0.2963, "step": 18817 }, { "epoch": 0.54, "grad_norm": 3.2425023305233105, "learning_rate": 4.6133798611618666e-06, "loss": 0.4299, "step": 18818 }, { "epoch": 0.54, "grad_norm": 7.144138264889024, "learning_rate": 4.612917492837429e-06, "loss": 0.7075, "step": 18819 }, { "epoch": 0.54, "grad_norm": 2.683223648870248, "learning_rate": 4.612455127843015e-06, "loss": 0.2122, "step": 18820 }, { "epoch": 0.54, "grad_norm": 3.5291729954404207, "learning_rate": 4.611992766182601e-06, "loss": 0.5129, "step": 18821 }, { "epoch": 0.54, "grad_norm": 7.527737728433089, "learning_rate": 4.611530407860167e-06, "loss": 0.234, "step": 18822 }, { "epoch": 0.54, "grad_norm": 7.965459825038615, "learning_rate": 4.611068052879687e-06, "loss": 0.386, "step": 18823 }, { "epoch": 0.54, "grad_norm": 4.10493674855632, "learning_rate": 4.610605701245143e-06, "loss": 0.3318, "step": 18824 }, { "epoch": 0.54, "grad_norm": 3.7380820991619585, "learning_rate": 4.610143352960508e-06, "loss": 0.3998, "step": 18825 }, { "epoch": 0.54, "grad_norm": 4.953475025216081, "learning_rate": 4.609681008029762e-06, "loss": 0.4016, "step": 18826 }, { "epoch": 0.54, "grad_norm": 5.063345991490915, "learning_rate": 4.609218666456884e-06, "loss": 0.2088, "step": 18827 }, { "epoch": 0.54, "grad_norm": 7.742369248367321, "learning_rate": 4.608756328245848e-06, "loss": 0.2172, "step": 18828 }, { "epoch": 0.54, "grad_norm": 1.200132478538911, "learning_rate": 4.608293993400636e-06, "loss": 0.0455, "step": 18829 }, { "epoch": 0.54, "grad_norm": 2.7139600674060533, "learning_rate": 4.607831661925218e-06, "loss": 0.1968, "step": 18830 }, { "epoch": 0.54, "grad_norm": 5.0799799890950075, "learning_rate": 4.607369333823579e-06, "loss": 0.5103, "step": 18831 }, { "epoch": 0.54, "grad_norm": 4.569463302099293, "learning_rate": 4.606907009099693e-06, "loss": 0.8282, "step": 18832 }, { "epoch": 0.54, "grad_norm": 4.877770125917261, "learning_rate": 4.606444687757538e-06, "loss": 0.4462, "step": 18833 }, { "epoch": 0.54, "grad_norm": 6.534848476716768, "learning_rate": 4.6059823698010885e-06, "loss": 0.4669, "step": 18834 }, { "epoch": 0.54, "grad_norm": 7.9526716601478435, "learning_rate": 4.6055200552343255e-06, "loss": 0.5076, "step": 18835 }, { "epoch": 0.54, "grad_norm": 5.638032608078664, "learning_rate": 4.605057744061226e-06, "loss": 0.653, "step": 18836 }, { "epoch": 0.54, "grad_norm": 2.5576362973286177, "learning_rate": 4.604595436285763e-06, "loss": 0.1487, "step": 18837 }, { "epoch": 0.54, "grad_norm": 3.4955515350473747, "learning_rate": 4.6041331319119204e-06, "loss": 0.188, "step": 18838 }, { "epoch": 0.54, "grad_norm": 4.411294687141518, "learning_rate": 4.60367083094367e-06, "loss": 0.8298, "step": 18839 }, { "epoch": 0.54, "grad_norm": 7.121382246592392, "learning_rate": 4.6032085333849915e-06, "loss": 0.261, "step": 18840 }, { "epoch": 0.54, "grad_norm": 4.489122781637583, "learning_rate": 4.60274623923986e-06, "loss": 0.4103, "step": 18841 }, { "epoch": 0.54, "grad_norm": 3.098978152599056, "learning_rate": 4.602283948512255e-06, "loss": 0.2821, "step": 18842 }, { "epoch": 0.54, "grad_norm": 1.780303075965337, "learning_rate": 4.601821661206151e-06, "loss": 0.269, "step": 18843 }, { "epoch": 0.54, "grad_norm": 7.959300643160204, "learning_rate": 4.601359377325526e-06, "loss": 0.7902, "step": 18844 }, { "epoch": 0.54, "grad_norm": 6.315375475741499, "learning_rate": 4.600897096874358e-06, "loss": 0.3874, "step": 18845 }, { "epoch": 0.54, "grad_norm": 5.507125313819083, "learning_rate": 4.600434819856623e-06, "loss": 0.7563, "step": 18846 }, { "epoch": 0.54, "grad_norm": 6.269334774563303, "learning_rate": 4.599972546276299e-06, "loss": 0.7217, "step": 18847 }, { "epoch": 0.54, "grad_norm": 10.099995620651052, "learning_rate": 4.59951027613736e-06, "loss": 0.6046, "step": 18848 }, { "epoch": 0.54, "grad_norm": 5.497631321544651, "learning_rate": 4.599048009443786e-06, "loss": 0.3909, "step": 18849 }, { "epoch": 0.54, "grad_norm": 6.888051804663406, "learning_rate": 4.5985857461995526e-06, "loss": 0.441, "step": 18850 }, { "epoch": 0.54, "grad_norm": 10.147072339760314, "learning_rate": 4.598123486408636e-06, "loss": 0.6147, "step": 18851 }, { "epoch": 0.54, "grad_norm": 8.550785711225423, "learning_rate": 4.597661230075014e-06, "loss": 0.5551, "step": 18852 }, { "epoch": 0.54, "grad_norm": 6.291712459709135, "learning_rate": 4.597198977202663e-06, "loss": 0.7739, "step": 18853 }, { "epoch": 0.54, "grad_norm": 4.102342571373884, "learning_rate": 4.59673672779556e-06, "loss": 0.3857, "step": 18854 }, { "epoch": 0.54, "grad_norm": 5.93506353480872, "learning_rate": 4.59627448185768e-06, "loss": 0.5473, "step": 18855 }, { "epoch": 0.54, "grad_norm": 5.740028816376769, "learning_rate": 4.595812239393003e-06, "loss": 0.6762, "step": 18856 }, { "epoch": 0.54, "grad_norm": 5.298341531202213, "learning_rate": 4.595350000405501e-06, "loss": 0.4458, "step": 18857 }, { "epoch": 0.54, "grad_norm": 4.129808137930717, "learning_rate": 4.594887764899155e-06, "loss": 0.3005, "step": 18858 }, { "epoch": 0.54, "grad_norm": 12.239831246104067, "learning_rate": 4.594425532877939e-06, "loss": 0.5368, "step": 18859 }, { "epoch": 0.54, "grad_norm": 3.058344532446318, "learning_rate": 4.593963304345829e-06, "loss": 0.1913, "step": 18860 }, { "epoch": 0.54, "grad_norm": 4.2644971537241165, "learning_rate": 4.593501079306805e-06, "loss": 0.4277, "step": 18861 }, { "epoch": 0.54, "grad_norm": 6.906311656279678, "learning_rate": 4.59303885776484e-06, "loss": 0.6542, "step": 18862 }, { "epoch": 0.54, "grad_norm": 6.71124444510503, "learning_rate": 4.592576639723913e-06, "loss": 0.4259, "step": 18863 }, { "epoch": 0.54, "grad_norm": 2.923303866734497, "learning_rate": 4.592114425187995e-06, "loss": 0.1653, "step": 18864 }, { "epoch": 0.54, "grad_norm": 4.625378954028365, "learning_rate": 4.591652214161071e-06, "loss": 0.4345, "step": 18865 }, { "epoch": 0.54, "grad_norm": 3.686274761376792, "learning_rate": 4.591190006647111e-06, "loss": 0.4345, "step": 18866 }, { "epoch": 0.54, "grad_norm": 4.813246458547327, "learning_rate": 4.5907278026500925e-06, "loss": 0.2416, "step": 18867 }, { "epoch": 0.54, "grad_norm": 8.647507890673982, "learning_rate": 4.590265602173992e-06, "loss": 0.2872, "step": 18868 }, { "epoch": 0.54, "grad_norm": 5.2216230924103, "learning_rate": 4.589803405222787e-06, "loss": 0.4252, "step": 18869 }, { "epoch": 0.54, "grad_norm": 5.511800591005164, "learning_rate": 4.589341211800453e-06, "loss": 0.5669, "step": 18870 }, { "epoch": 0.54, "grad_norm": 5.221783744156172, "learning_rate": 4.588879021910964e-06, "loss": 0.5622, "step": 18871 }, { "epoch": 0.54, "grad_norm": 7.230192619174385, "learning_rate": 4.588416835558301e-06, "loss": 1.1067, "step": 18872 }, { "epoch": 0.54, "grad_norm": 3.0326985702851905, "learning_rate": 4.587954652746435e-06, "loss": 0.2438, "step": 18873 }, { "epoch": 0.54, "grad_norm": 3.390963207903612, "learning_rate": 4.587492473479347e-06, "loss": 0.4769, "step": 18874 }, { "epoch": 0.54, "grad_norm": 3.697141198887309, "learning_rate": 4.587030297761007e-06, "loss": 0.6365, "step": 18875 }, { "epoch": 0.54, "grad_norm": 5.09434302218775, "learning_rate": 4.586568125595396e-06, "loss": 0.3528, "step": 18876 }, { "epoch": 0.54, "grad_norm": 2.7361230113055233, "learning_rate": 4.58610595698649e-06, "loss": 0.2891, "step": 18877 }, { "epoch": 0.54, "grad_norm": 6.257372818072341, "learning_rate": 4.585643791938261e-06, "loss": 0.6263, "step": 18878 }, { "epoch": 0.54, "grad_norm": 11.5952928858289, "learning_rate": 4.58518163045469e-06, "loss": 0.6087, "step": 18879 }, { "epoch": 0.54, "grad_norm": 5.502046301949547, "learning_rate": 4.584719472539749e-06, "loss": 0.5665, "step": 18880 }, { "epoch": 0.54, "grad_norm": 9.728631935427611, "learning_rate": 4.584257318197416e-06, "loss": 0.6615, "step": 18881 }, { "epoch": 0.54, "grad_norm": 6.606597605413913, "learning_rate": 4.583795167431664e-06, "loss": 0.7482, "step": 18882 }, { "epoch": 0.54, "grad_norm": 5.336976158290399, "learning_rate": 4.583333020246474e-06, "loss": 0.4902, "step": 18883 }, { "epoch": 0.54, "grad_norm": 7.749330553243777, "learning_rate": 4.5828708766458165e-06, "loss": 0.6959, "step": 18884 }, { "epoch": 0.54, "grad_norm": 7.598386565658797, "learning_rate": 4.582408736633669e-06, "loss": 0.5848, "step": 18885 }, { "epoch": 0.54, "grad_norm": 6.375811955224127, "learning_rate": 4.58194660021401e-06, "loss": 0.4166, "step": 18886 }, { "epoch": 0.54, "grad_norm": 3.6382376575049604, "learning_rate": 4.581484467390811e-06, "loss": 0.4392, "step": 18887 }, { "epoch": 0.54, "grad_norm": 5.889545189925466, "learning_rate": 4.5810223381680515e-06, "loss": 0.3485, "step": 18888 }, { "epoch": 0.54, "grad_norm": 4.945216101118173, "learning_rate": 4.580560212549704e-06, "loss": 0.6683, "step": 18889 }, { "epoch": 0.54, "grad_norm": 8.126346828633137, "learning_rate": 4.580098090539746e-06, "loss": 0.6089, "step": 18890 }, { "epoch": 0.54, "grad_norm": 6.540374077414056, "learning_rate": 4.5796359721421505e-06, "loss": 0.5963, "step": 18891 }, { "epoch": 0.54, "grad_norm": 3.859710910402749, "learning_rate": 4.579173857360897e-06, "loss": 0.3406, "step": 18892 }, { "epoch": 0.54, "grad_norm": 8.385242689393193, "learning_rate": 4.578711746199959e-06, "loss": 0.5683, "step": 18893 }, { "epoch": 0.54, "grad_norm": 4.160392790324556, "learning_rate": 4.578249638663309e-06, "loss": 0.1908, "step": 18894 }, { "epoch": 0.54, "grad_norm": 7.628468459420735, "learning_rate": 4.5777875347549295e-06, "loss": 0.8854, "step": 18895 }, { "epoch": 0.54, "grad_norm": 5.187184335687085, "learning_rate": 4.5773254344787895e-06, "loss": 0.7986, "step": 18896 }, { "epoch": 0.54, "grad_norm": 8.011115636866904, "learning_rate": 4.576863337838868e-06, "loss": 0.4801, "step": 18897 }, { "epoch": 0.54, "grad_norm": 6.555066776328324, "learning_rate": 4.5764012448391375e-06, "loss": 0.5119, "step": 18898 }, { "epoch": 0.54, "grad_norm": 9.087158750819624, "learning_rate": 4.5759391554835766e-06, "loss": 0.7147, "step": 18899 }, { "epoch": 0.54, "grad_norm": 5.404826202774336, "learning_rate": 4.5754770697761585e-06, "loss": 0.4266, "step": 18900 }, { "epoch": 0.54, "grad_norm": 5.401566257094803, "learning_rate": 4.575014987720857e-06, "loss": 0.8286, "step": 18901 }, { "epoch": 0.54, "grad_norm": 5.757191348894286, "learning_rate": 4.574552909321653e-06, "loss": 0.7502, "step": 18902 }, { "epoch": 0.54, "grad_norm": 8.758304905384023, "learning_rate": 4.574090834582515e-06, "loss": 0.4795, "step": 18903 }, { "epoch": 0.54, "grad_norm": 9.562594743645741, "learning_rate": 4.573628763507423e-06, "loss": 0.4685, "step": 18904 }, { "epoch": 0.54, "grad_norm": 4.182132070242892, "learning_rate": 4.573166696100347e-06, "loss": 0.5386, "step": 18905 }, { "epoch": 0.54, "grad_norm": 7.83517023287465, "learning_rate": 4.572704632365269e-06, "loss": 0.1894, "step": 18906 }, { "epoch": 0.54, "grad_norm": 1.7551199021027317, "learning_rate": 4.572242572306158e-06, "loss": 0.1453, "step": 18907 }, { "epoch": 0.54, "grad_norm": 2.741630410666786, "learning_rate": 4.571780515926994e-06, "loss": 0.2974, "step": 18908 }, { "epoch": 0.54, "grad_norm": 2.6722102484717887, "learning_rate": 4.571318463231747e-06, "loss": 0.2589, "step": 18909 }, { "epoch": 0.54, "grad_norm": 5.879275389580046, "learning_rate": 4.570856414224395e-06, "loss": 0.6509, "step": 18910 }, { "epoch": 0.54, "grad_norm": 3.692214667389404, "learning_rate": 4.570394368908914e-06, "loss": 0.3289, "step": 18911 }, { "epoch": 0.54, "grad_norm": 6.725603857192395, "learning_rate": 4.569932327289275e-06, "loss": 0.7121, "step": 18912 }, { "epoch": 0.54, "grad_norm": 7.697285696418262, "learning_rate": 4.569470289369457e-06, "loss": 0.2452, "step": 18913 }, { "epoch": 0.54, "grad_norm": 9.319562141545697, "learning_rate": 4.569008255153432e-06, "loss": 1.0859, "step": 18914 }, { "epoch": 0.54, "grad_norm": 5.681250439802264, "learning_rate": 4.568546224645177e-06, "loss": 0.305, "step": 18915 }, { "epoch": 0.54, "grad_norm": 6.348368499703859, "learning_rate": 4.568084197848666e-06, "loss": 0.5975, "step": 18916 }, { "epoch": 0.54, "grad_norm": 4.403451747967341, "learning_rate": 4.56762217476787e-06, "loss": 0.3008, "step": 18917 }, { "epoch": 0.54, "grad_norm": 5.619832950566498, "learning_rate": 4.567160155406772e-06, "loss": 0.7894, "step": 18918 }, { "epoch": 0.54, "grad_norm": 5.585179588278715, "learning_rate": 4.566698139769339e-06, "loss": 0.5358, "step": 18919 }, { "epoch": 0.54, "grad_norm": 3.777041244623015, "learning_rate": 4.56623612785955e-06, "loss": 0.4257, "step": 18920 }, { "epoch": 0.54, "grad_norm": 6.6823369889619055, "learning_rate": 4.565774119681376e-06, "loss": 0.5915, "step": 18921 }, { "epoch": 0.54, "grad_norm": 7.745153696149453, "learning_rate": 4.5653121152387965e-06, "loss": 0.4521, "step": 18922 }, { "epoch": 0.54, "grad_norm": 2.101201512130728, "learning_rate": 4.5648501145357824e-06, "loss": 0.3093, "step": 18923 }, { "epoch": 0.54, "grad_norm": 6.740283471776779, "learning_rate": 4.564388117576309e-06, "loss": 0.3046, "step": 18924 }, { "epoch": 0.54, "grad_norm": 2.7808435550265966, "learning_rate": 4.56392612436435e-06, "loss": 0.2328, "step": 18925 }, { "epoch": 0.54, "grad_norm": 3.835009367432773, "learning_rate": 4.563464134903882e-06, "loss": 0.3543, "step": 18926 }, { "epoch": 0.54, "grad_norm": 7.163259246955468, "learning_rate": 4.5630021491988785e-06, "loss": 0.9525, "step": 18927 }, { "epoch": 0.54, "grad_norm": 5.974511127343759, "learning_rate": 4.5625401672533125e-06, "loss": 0.5104, "step": 18928 }, { "epoch": 0.54, "grad_norm": 6.1191113249378315, "learning_rate": 4.56207818907116e-06, "loss": 0.4115, "step": 18929 }, { "epoch": 0.54, "grad_norm": 6.126223850595286, "learning_rate": 4.561616214656396e-06, "loss": 0.4107, "step": 18930 }, { "epoch": 0.54, "grad_norm": 5.550720407795457, "learning_rate": 4.561154244012994e-06, "loss": 0.4873, "step": 18931 }, { "epoch": 0.54, "grad_norm": 5.018646696649842, "learning_rate": 4.560692277144925e-06, "loss": 0.1863, "step": 18932 }, { "epoch": 0.54, "grad_norm": 7.57519877459393, "learning_rate": 4.560230314056168e-06, "loss": 0.615, "step": 18933 }, { "epoch": 0.54, "grad_norm": 4.481447068141544, "learning_rate": 4.559768354750697e-06, "loss": 0.5901, "step": 18934 }, { "epoch": 0.54, "grad_norm": 8.120828584761297, "learning_rate": 4.5593063992324815e-06, "loss": 0.8243, "step": 18935 }, { "epoch": 0.54, "grad_norm": 9.683244570414477, "learning_rate": 4.558844447505502e-06, "loss": 0.5534, "step": 18936 }, { "epoch": 0.54, "grad_norm": 4.002994012885818, "learning_rate": 4.558382499573727e-06, "loss": 0.5997, "step": 18937 }, { "epoch": 0.54, "grad_norm": 4.007857116093948, "learning_rate": 4.557920555441136e-06, "loss": 0.482, "step": 18938 }, { "epoch": 0.54, "grad_norm": 7.842534248391576, "learning_rate": 4.557458615111696e-06, "loss": 0.7253, "step": 18939 }, { "epoch": 0.54, "grad_norm": 8.143724052990954, "learning_rate": 4.556996678589388e-06, "loss": 0.4314, "step": 18940 }, { "epoch": 0.54, "grad_norm": 6.087348118322745, "learning_rate": 4.556534745878182e-06, "loss": 0.7511, "step": 18941 }, { "epoch": 0.54, "grad_norm": 4.352033387599526, "learning_rate": 4.5560728169820525e-06, "loss": 0.4833, "step": 18942 }, { "epoch": 0.54, "grad_norm": 2.4271912462095817, "learning_rate": 4.555610891904975e-06, "loss": 0.0724, "step": 18943 }, { "epoch": 0.54, "grad_norm": 3.9140123680086973, "learning_rate": 4.555148970650921e-06, "loss": 0.4953, "step": 18944 }, { "epoch": 0.54, "grad_norm": 3.3454861990935183, "learning_rate": 4.5546870532238676e-06, "loss": 0.4181, "step": 18945 }, { "epoch": 0.54, "grad_norm": 8.702798011978945, "learning_rate": 4.5542251396277854e-06, "loss": 0.6617, "step": 18946 }, { "epoch": 0.54, "grad_norm": 4.185635450514447, "learning_rate": 4.553763229866651e-06, "loss": 0.3221, "step": 18947 }, { "epoch": 0.54, "grad_norm": 7.3944011501468605, "learning_rate": 4.553301323944433e-06, "loss": 0.7366, "step": 18948 }, { "epoch": 0.54, "grad_norm": 9.562527725079804, "learning_rate": 4.552839421865112e-06, "loss": 0.3712, "step": 18949 }, { "epoch": 0.54, "grad_norm": 5.972371785067864, "learning_rate": 4.552377523632657e-06, "loss": 0.6071, "step": 18950 }, { "epoch": 0.54, "grad_norm": 5.083419457732659, "learning_rate": 4.551915629251042e-06, "loss": 0.2795, "step": 18951 }, { "epoch": 0.54, "grad_norm": 5.222533173919103, "learning_rate": 4.551453738724244e-06, "loss": 0.3576, "step": 18952 }, { "epoch": 0.54, "grad_norm": 4.058286286237371, "learning_rate": 4.550991852056233e-06, "loss": 0.3046, "step": 18953 }, { "epoch": 0.54, "grad_norm": 10.723746511483462, "learning_rate": 4.550529969250985e-06, "loss": 0.5967, "step": 18954 }, { "epoch": 0.54, "grad_norm": 3.190294312499908, "learning_rate": 4.5500680903124695e-06, "loss": 0.2425, "step": 18955 }, { "epoch": 0.54, "grad_norm": 5.137571036411546, "learning_rate": 4.549606215244666e-06, "loss": 0.3761, "step": 18956 }, { "epoch": 0.54, "grad_norm": 2.372346751224931, "learning_rate": 4.549144344051543e-06, "loss": 0.1587, "step": 18957 }, { "epoch": 0.54, "grad_norm": 6.148300809201156, "learning_rate": 4.548682476737074e-06, "loss": 0.3303, "step": 18958 }, { "epoch": 0.54, "grad_norm": 5.344647861920999, "learning_rate": 4.548220613305238e-06, "loss": 0.281, "step": 18959 }, { "epoch": 0.54, "grad_norm": 5.852399004847369, "learning_rate": 4.547758753760002e-06, "loss": 0.7131, "step": 18960 }, { "epoch": 0.54, "grad_norm": 4.5673009617558975, "learning_rate": 4.547296898105343e-06, "loss": 0.4247, "step": 18961 }, { "epoch": 0.54, "grad_norm": 7.319635219448341, "learning_rate": 4.546835046345232e-06, "loss": 0.9591, "step": 18962 }, { "epoch": 0.54, "grad_norm": 9.05819557831524, "learning_rate": 4.5463731984836446e-06, "loss": 0.8114, "step": 18963 }, { "epoch": 0.54, "grad_norm": 4.390077495716447, "learning_rate": 4.545911354524552e-06, "loss": 0.5527, "step": 18964 }, { "epoch": 0.54, "grad_norm": 5.78622284509332, "learning_rate": 4.545449514471928e-06, "loss": 0.3422, "step": 18965 }, { "epoch": 0.54, "grad_norm": 5.942354326015328, "learning_rate": 4.544987678329745e-06, "loss": 0.3611, "step": 18966 }, { "epoch": 0.54, "grad_norm": 6.466079234285837, "learning_rate": 4.544525846101978e-06, "loss": 0.5244, "step": 18967 }, { "epoch": 0.54, "grad_norm": 4.580248213417908, "learning_rate": 4.5440640177926e-06, "loss": 0.3439, "step": 18968 }, { "epoch": 0.54, "grad_norm": 4.808212550258858, "learning_rate": 4.54360219340558e-06, "loss": 0.3566, "step": 18969 }, { "epoch": 0.54, "grad_norm": 3.072466689100721, "learning_rate": 4.543140372944897e-06, "loss": 0.3088, "step": 18970 }, { "epoch": 0.54, "grad_norm": 4.701735738070897, "learning_rate": 4.54267855641452e-06, "loss": 0.3321, "step": 18971 }, { "epoch": 0.54, "grad_norm": 4.949240042408075, "learning_rate": 4.542216743818424e-06, "loss": 0.637, "step": 18972 }, { "epoch": 0.54, "grad_norm": 4.310452292977122, "learning_rate": 4.54175493516058e-06, "loss": 0.3042, "step": 18973 }, { "epoch": 0.54, "grad_norm": 8.458292116963605, "learning_rate": 4.541293130444961e-06, "loss": 0.5603, "step": 18974 }, { "epoch": 0.54, "grad_norm": 3.201529016873528, "learning_rate": 4.540831329675544e-06, "loss": 0.2564, "step": 18975 }, { "epoch": 0.54, "grad_norm": 5.2167020651293745, "learning_rate": 4.540369532856296e-06, "loss": 0.5636, "step": 18976 }, { "epoch": 0.54, "grad_norm": 2.5725522355947876, "learning_rate": 4.5399077399911935e-06, "loss": 0.2539, "step": 18977 }, { "epoch": 0.54, "grad_norm": 5.720446809110441, "learning_rate": 4.539445951084206e-06, "loss": 0.5903, "step": 18978 }, { "epoch": 0.54, "grad_norm": 5.9704919639835525, "learning_rate": 4.53898416613931e-06, "loss": 0.6376, "step": 18979 }, { "epoch": 0.54, "grad_norm": 7.912383101866846, "learning_rate": 4.538522385160476e-06, "loss": 0.3659, "step": 18980 }, { "epoch": 0.54, "grad_norm": 7.933170008647766, "learning_rate": 4.538060608151678e-06, "loss": 0.6684, "step": 18981 }, { "epoch": 0.54, "grad_norm": 7.753905389050645, "learning_rate": 4.537598835116886e-06, "loss": 0.5411, "step": 18982 }, { "epoch": 0.54, "grad_norm": 6.552414109978104, "learning_rate": 4.537137066060075e-06, "loss": 0.4615, "step": 18983 }, { "epoch": 0.54, "grad_norm": 4.74482756627973, "learning_rate": 4.536675300985218e-06, "loss": 0.7211, "step": 18984 }, { "epoch": 0.54, "grad_norm": 8.246280120521668, "learning_rate": 4.536213539896284e-06, "loss": 0.661, "step": 18985 }, { "epoch": 0.54, "grad_norm": 2.2113881831258912, "learning_rate": 4.53575178279725e-06, "loss": 0.1003, "step": 18986 }, { "epoch": 0.54, "grad_norm": 4.979558261598298, "learning_rate": 4.535290029692086e-06, "loss": 0.3894, "step": 18987 }, { "epoch": 0.54, "grad_norm": 5.700955289129066, "learning_rate": 4.534828280584765e-06, "loss": 0.3934, "step": 18988 }, { "epoch": 0.54, "grad_norm": 3.2304030276789275, "learning_rate": 4.534366535479256e-06, "loss": 0.3911, "step": 18989 }, { "epoch": 0.54, "grad_norm": 2.9619782261952885, "learning_rate": 4.533904794379537e-06, "loss": 0.1859, "step": 18990 }, { "epoch": 0.54, "grad_norm": 6.175088282583414, "learning_rate": 4.533443057289577e-06, "loss": 0.5308, "step": 18991 }, { "epoch": 0.54, "grad_norm": 2.062257405665818, "learning_rate": 4.532981324213348e-06, "loss": 0.1644, "step": 18992 }, { "epoch": 0.54, "grad_norm": 3.780920313814923, "learning_rate": 4.532519595154825e-06, "loss": 0.3556, "step": 18993 }, { "epoch": 0.54, "grad_norm": 4.462932508009159, "learning_rate": 4.532057870117977e-06, "loss": 0.3367, "step": 18994 }, { "epoch": 0.54, "grad_norm": 6.065031014887545, "learning_rate": 4.53159614910678e-06, "loss": 0.2754, "step": 18995 }, { "epoch": 0.54, "grad_norm": 5.478618894205505, "learning_rate": 4.5311344321252e-06, "loss": 0.6824, "step": 18996 }, { "epoch": 0.54, "grad_norm": 3.5721244304403035, "learning_rate": 4.530672719177216e-06, "loss": 0.2341, "step": 18997 }, { "epoch": 0.54, "grad_norm": 5.255559520751303, "learning_rate": 4.530211010266794e-06, "loss": 0.4096, "step": 18998 }, { "epoch": 0.54, "grad_norm": 4.849507017741547, "learning_rate": 4.5297493053979094e-06, "loss": 0.3604, "step": 18999 }, { "epoch": 0.54, "grad_norm": 7.469776865066279, "learning_rate": 4.529287604574535e-06, "loss": 0.8733, "step": 19000 }, { "epoch": 0.54, "grad_norm": 3.854074568120629, "learning_rate": 4.528825907800642e-06, "loss": 0.3384, "step": 19001 }, { "epoch": 0.54, "grad_norm": 5.458844483686672, "learning_rate": 4.5283642150802005e-06, "loss": 0.3219, "step": 19002 }, { "epoch": 0.54, "grad_norm": 9.591249307776348, "learning_rate": 4.5279025264171836e-06, "loss": 0.1655, "step": 19003 }, { "epoch": 0.54, "grad_norm": 4.560754886433249, "learning_rate": 4.527440841815564e-06, "loss": 0.5189, "step": 19004 }, { "epoch": 0.54, "grad_norm": 7.443235990392015, "learning_rate": 4.52697916127931e-06, "loss": 0.4603, "step": 19005 }, { "epoch": 0.54, "grad_norm": 2.6630990308181635, "learning_rate": 4.5265174848123996e-06, "loss": 0.3848, "step": 19006 }, { "epoch": 0.54, "grad_norm": 5.584485378786221, "learning_rate": 4.5260558124187985e-06, "loss": 0.6928, "step": 19007 }, { "epoch": 0.54, "grad_norm": 2.3031068198160667, "learning_rate": 4.52559414410248e-06, "loss": 0.4085, "step": 19008 }, { "epoch": 0.54, "grad_norm": 6.024498990703647, "learning_rate": 4.525132479867419e-06, "loss": 0.318, "step": 19009 }, { "epoch": 0.54, "grad_norm": 5.760180189671988, "learning_rate": 4.5246708197175845e-06, "loss": 0.3016, "step": 19010 }, { "epoch": 0.54, "grad_norm": 7.4917770448332055, "learning_rate": 4.524209163656949e-06, "loss": 0.6765, "step": 19011 }, { "epoch": 0.54, "grad_norm": 3.9125686505422097, "learning_rate": 4.523747511689481e-06, "loss": 0.2451, "step": 19012 }, { "epoch": 0.54, "grad_norm": 5.825417060820205, "learning_rate": 4.523285863819157e-06, "loss": 0.5032, "step": 19013 }, { "epoch": 0.54, "grad_norm": 7.705137968616622, "learning_rate": 4.522824220049945e-06, "loss": 0.444, "step": 19014 }, { "epoch": 0.54, "grad_norm": 5.870318556399924, "learning_rate": 4.522362580385815e-06, "loss": 0.5627, "step": 19015 }, { "epoch": 0.54, "grad_norm": 8.493090570155326, "learning_rate": 4.521900944830745e-06, "loss": 0.2297, "step": 19016 }, { "epoch": 0.54, "grad_norm": 4.664043637338192, "learning_rate": 4.521439313388701e-06, "loss": 0.1097, "step": 19017 }, { "epoch": 0.54, "grad_norm": 3.5788017532491865, "learning_rate": 4.520977686063656e-06, "loss": 0.2224, "step": 19018 }, { "epoch": 0.54, "grad_norm": 7.021201009961468, "learning_rate": 4.520516062859579e-06, "loss": 0.6677, "step": 19019 }, { "epoch": 0.54, "grad_norm": 6.40389301391291, "learning_rate": 4.520054443780445e-06, "loss": 0.8657, "step": 19020 }, { "epoch": 0.54, "grad_norm": 3.7011056697832436, "learning_rate": 4.519592828830224e-06, "loss": 0.2329, "step": 19021 }, { "epoch": 0.54, "grad_norm": 6.492510662459357, "learning_rate": 4.519131218012887e-06, "loss": 0.833, "step": 19022 }, { "epoch": 0.54, "grad_norm": 9.37289389795264, "learning_rate": 4.518669611332402e-06, "loss": 0.2232, "step": 19023 }, { "epoch": 0.54, "grad_norm": 5.57131449286552, "learning_rate": 4.5182080087927445e-06, "loss": 0.3742, "step": 19024 }, { "epoch": 0.54, "grad_norm": 2.9774663158724652, "learning_rate": 4.517746410397886e-06, "loss": 0.3076, "step": 19025 }, { "epoch": 0.54, "grad_norm": 5.572034443934357, "learning_rate": 4.5172848161517924e-06, "loss": 0.3542, "step": 19026 }, { "epoch": 0.54, "grad_norm": 11.05648517347746, "learning_rate": 4.516823226058441e-06, "loss": 0.7511, "step": 19027 }, { "epoch": 0.54, "grad_norm": 6.433494581003679, "learning_rate": 4.516361640121799e-06, "loss": 0.4464, "step": 19028 }, { "epoch": 0.54, "grad_norm": 19.766240191026917, "learning_rate": 4.515900058345839e-06, "loss": 1.1553, "step": 19029 }, { "epoch": 0.54, "grad_norm": 6.9124866478568565, "learning_rate": 4.515438480734529e-06, "loss": 0.3546, "step": 19030 }, { "epoch": 0.55, "grad_norm": 4.535983539995473, "learning_rate": 4.514976907291845e-06, "loss": 0.6829, "step": 19031 }, { "epoch": 0.55, "grad_norm": 5.0084111514951175, "learning_rate": 4.514515338021753e-06, "loss": 0.2258, "step": 19032 }, { "epoch": 0.55, "grad_norm": 5.344629761850653, "learning_rate": 4.5140537729282254e-06, "loss": 0.5084, "step": 19033 }, { "epoch": 0.55, "grad_norm": 5.451155211130366, "learning_rate": 4.513592212015235e-06, "loss": 0.4534, "step": 19034 }, { "epoch": 0.55, "grad_norm": 3.5975906581470234, "learning_rate": 4.513130655286749e-06, "loss": 0.246, "step": 19035 }, { "epoch": 0.55, "grad_norm": 7.932468892399253, "learning_rate": 4.5126691027467425e-06, "loss": 0.9346, "step": 19036 }, { "epoch": 0.55, "grad_norm": 2.6803406789587525, "learning_rate": 4.512207554399182e-06, "loss": 0.2148, "step": 19037 }, { "epoch": 0.55, "grad_norm": 4.3974510157179045, "learning_rate": 4.511746010248041e-06, "loss": 0.2811, "step": 19038 }, { "epoch": 0.55, "grad_norm": 5.156303451723423, "learning_rate": 4.511284470297287e-06, "loss": 0.3952, "step": 19039 }, { "epoch": 0.55, "grad_norm": 6.864170144997863, "learning_rate": 4.510822934550893e-06, "loss": 0.4277, "step": 19040 }, { "epoch": 0.55, "grad_norm": 9.112201598923892, "learning_rate": 4.510361403012831e-06, "loss": 0.5077, "step": 19041 }, { "epoch": 0.55, "grad_norm": 5.659490110426276, "learning_rate": 4.509899875687067e-06, "loss": 0.2326, "step": 19042 }, { "epoch": 0.55, "grad_norm": 3.0048642460122705, "learning_rate": 4.509438352577576e-06, "loss": 0.302, "step": 19043 }, { "epoch": 0.55, "grad_norm": 5.476547611232659, "learning_rate": 4.508976833688326e-06, "loss": 0.3454, "step": 19044 }, { "epoch": 0.55, "grad_norm": 9.026579550102499, "learning_rate": 4.508515319023288e-06, "loss": 0.8981, "step": 19045 }, { "epoch": 0.55, "grad_norm": 7.156277385809192, "learning_rate": 4.508053808586431e-06, "loss": 0.645, "step": 19046 }, { "epoch": 0.55, "grad_norm": 4.638615593819753, "learning_rate": 4.507592302381727e-06, "loss": 0.5819, "step": 19047 }, { "epoch": 0.55, "grad_norm": 7.675454541890114, "learning_rate": 4.507130800413147e-06, "loss": 0.8221, "step": 19048 }, { "epoch": 0.55, "grad_norm": 15.080788506577932, "learning_rate": 4.506669302684658e-06, "loss": 0.5107, "step": 19049 }, { "epoch": 0.55, "grad_norm": 5.338499199923192, "learning_rate": 4.5062078092002335e-06, "loss": 0.4207, "step": 19050 }, { "epoch": 0.55, "grad_norm": 7.9836591485526585, "learning_rate": 4.505746319963842e-06, "loss": 1.1988, "step": 19051 }, { "epoch": 0.55, "grad_norm": 5.773606108189507, "learning_rate": 4.505284834979455e-06, "loss": 0.2344, "step": 19052 }, { "epoch": 0.55, "grad_norm": 6.568399738297778, "learning_rate": 4.50482335425104e-06, "loss": 0.4164, "step": 19053 }, { "epoch": 0.55, "grad_norm": 5.753293711163763, "learning_rate": 4.504361877782571e-06, "loss": 0.76, "step": 19054 }, { "epoch": 0.55, "grad_norm": 6.556805146954017, "learning_rate": 4.503900405578014e-06, "loss": 0.3329, "step": 19055 }, { "epoch": 0.55, "grad_norm": 4.527160561832246, "learning_rate": 4.503438937641342e-06, "loss": 0.2902, "step": 19056 }, { "epoch": 0.55, "grad_norm": 10.525501346200436, "learning_rate": 4.502977473976522e-06, "loss": 0.8678, "step": 19057 }, { "epoch": 0.55, "grad_norm": 3.949223066529463, "learning_rate": 4.502516014587526e-06, "loss": 0.3157, "step": 19058 }, { "epoch": 0.55, "grad_norm": 7.10760831575248, "learning_rate": 4.502054559478325e-06, "loss": 0.2117, "step": 19059 }, { "epoch": 0.55, "grad_norm": 6.006815456801371, "learning_rate": 4.501593108652886e-06, "loss": 0.764, "step": 19060 }, { "epoch": 0.55, "grad_norm": 8.501842186955685, "learning_rate": 4.501131662115181e-06, "loss": 0.586, "step": 19061 }, { "epoch": 0.55, "grad_norm": 7.68596075511624, "learning_rate": 4.500670219869177e-06, "loss": 0.5959, "step": 19062 }, { "epoch": 0.55, "grad_norm": 3.242479904041943, "learning_rate": 4.500208781918849e-06, "loss": 0.174, "step": 19063 }, { "epoch": 0.55, "grad_norm": 3.202867730206316, "learning_rate": 4.49974734826816e-06, "loss": 0.2428, "step": 19064 }, { "epoch": 0.55, "grad_norm": 2.9852816018544392, "learning_rate": 4.499285918921083e-06, "loss": 0.105, "step": 19065 }, { "epoch": 0.55, "grad_norm": 4.102670981164283, "learning_rate": 4.49882449388159e-06, "loss": 0.2891, "step": 19066 }, { "epoch": 0.55, "grad_norm": 5.773224740244155, "learning_rate": 4.498363073153647e-06, "loss": 0.9852, "step": 19067 }, { "epoch": 0.55, "grad_norm": 4.921368769694238, "learning_rate": 4.497901656741226e-06, "loss": 0.3705, "step": 19068 }, { "epoch": 0.55, "grad_norm": 8.518456056691418, "learning_rate": 4.497440244648292e-06, "loss": 0.8658, "step": 19069 }, { "epoch": 0.55, "grad_norm": 11.03109836203984, "learning_rate": 4.496978836878821e-06, "loss": 0.1323, "step": 19070 }, { "epoch": 0.55, "grad_norm": 7.613773836941841, "learning_rate": 4.496517433436777e-06, "loss": 0.4717, "step": 19071 }, { "epoch": 0.55, "grad_norm": 7.1302013819799726, "learning_rate": 4.496056034326133e-06, "loss": 0.6312, "step": 19072 }, { "epoch": 0.55, "grad_norm": 6.505697101182845, "learning_rate": 4.495594639550855e-06, "loss": 0.551, "step": 19073 }, { "epoch": 0.55, "grad_norm": 5.956699329572006, "learning_rate": 4.4951332491149154e-06, "loss": 0.3632, "step": 19074 }, { "epoch": 0.55, "grad_norm": 4.149871434667661, "learning_rate": 4.494671863022283e-06, "loss": 0.4703, "step": 19075 }, { "epoch": 0.55, "grad_norm": 3.0857201608344567, "learning_rate": 4.494210481276924e-06, "loss": 0.431, "step": 19076 }, { "epoch": 0.55, "grad_norm": 5.342453693408773, "learning_rate": 4.493749103882812e-06, "loss": 0.2335, "step": 19077 }, { "epoch": 0.55, "grad_norm": 4.149260200490248, "learning_rate": 4.493287730843914e-06, "loss": 0.5158, "step": 19078 }, { "epoch": 0.55, "grad_norm": 8.995311495999719, "learning_rate": 4.4928263621642e-06, "loss": 0.8206, "step": 19079 }, { "epoch": 0.55, "grad_norm": 7.764359062059938, "learning_rate": 4.4923649978476356e-06, "loss": 0.7466, "step": 19080 }, { "epoch": 0.55, "grad_norm": 8.46244104046476, "learning_rate": 4.491903637898194e-06, "loss": 0.7548, "step": 19081 }, { "epoch": 0.55, "grad_norm": 6.177184088551593, "learning_rate": 4.4914422823198445e-06, "loss": 0.5732, "step": 19082 }, { "epoch": 0.55, "grad_norm": 5.323662463630876, "learning_rate": 4.490980931116552e-06, "loss": 0.5881, "step": 19083 }, { "epoch": 0.55, "grad_norm": 4.207556363259208, "learning_rate": 4.49051958429229e-06, "loss": 0.294, "step": 19084 }, { "epoch": 0.55, "grad_norm": 4.236302348894323, "learning_rate": 4.490058241851023e-06, "loss": 0.3136, "step": 19085 }, { "epoch": 0.55, "grad_norm": 11.727017428769269, "learning_rate": 4.489596903796726e-06, "loss": 0.3878, "step": 19086 }, { "epoch": 0.55, "grad_norm": 6.225965712987477, "learning_rate": 4.489135570133359e-06, "loss": 0.5385, "step": 19087 }, { "epoch": 0.55, "grad_norm": 7.43145435716387, "learning_rate": 4.488674240864901e-06, "loss": 0.453, "step": 19088 }, { "epoch": 0.55, "grad_norm": 5.001599056130562, "learning_rate": 4.488212915995311e-06, "loss": 0.2908, "step": 19089 }, { "epoch": 0.55, "grad_norm": 7.1265108614746255, "learning_rate": 4.4877515955285624e-06, "loss": 0.5396, "step": 19090 }, { "epoch": 0.55, "grad_norm": 4.934312056852074, "learning_rate": 4.487290279468628e-06, "loss": 0.3325, "step": 19091 }, { "epoch": 0.55, "grad_norm": 8.2570810851211, "learning_rate": 4.486828967819468e-06, "loss": 0.6908, "step": 19092 }, { "epoch": 0.55, "grad_norm": 7.468003431220306, "learning_rate": 4.4863676605850575e-06, "loss": 0.1926, "step": 19093 }, { "epoch": 0.55, "grad_norm": 4.659019670555051, "learning_rate": 4.4859063577693615e-06, "loss": 0.6212, "step": 19094 }, { "epoch": 0.55, "grad_norm": 6.925821113794433, "learning_rate": 4.485445059376352e-06, "loss": 0.5742, "step": 19095 }, { "epoch": 0.55, "grad_norm": 10.913875504504897, "learning_rate": 4.4849837654099925e-06, "loss": 0.5639, "step": 19096 }, { "epoch": 0.55, "grad_norm": 6.296595754182037, "learning_rate": 4.484522475874256e-06, "loss": 0.4884, "step": 19097 }, { "epoch": 0.55, "grad_norm": 4.881883993749767, "learning_rate": 4.484061190773108e-06, "loss": 0.632, "step": 19098 }, { "epoch": 0.55, "grad_norm": 7.046910727805771, "learning_rate": 4.483599910110518e-06, "loss": 0.604, "step": 19099 }, { "epoch": 0.55, "grad_norm": 8.308255825873687, "learning_rate": 4.483138633890456e-06, "loss": 0.5281, "step": 19100 }, { "epoch": 0.55, "grad_norm": 2.9279148486437423, "learning_rate": 4.482677362116887e-06, "loss": 0.2786, "step": 19101 }, { "epoch": 0.55, "grad_norm": 7.523128388294668, "learning_rate": 4.4822160947937836e-06, "loss": 0.4562, "step": 19102 }, { "epoch": 0.55, "grad_norm": 6.669848811138985, "learning_rate": 4.4817548319251075e-06, "loss": 0.6755, "step": 19103 }, { "epoch": 0.55, "grad_norm": 2.8249129712363183, "learning_rate": 4.4812935735148335e-06, "loss": 0.2868, "step": 19104 }, { "epoch": 0.55, "grad_norm": 5.149503652486701, "learning_rate": 4.480832319566926e-06, "loss": 0.3864, "step": 19105 }, { "epoch": 0.55, "grad_norm": 6.393093238727057, "learning_rate": 4.480371070085353e-06, "loss": 0.6177, "step": 19106 }, { "epoch": 0.55, "grad_norm": 6.058667495445969, "learning_rate": 4.479909825074086e-06, "loss": 0.6783, "step": 19107 }, { "epoch": 0.55, "grad_norm": 7.198036100847678, "learning_rate": 4.47944858453709e-06, "loss": 0.5787, "step": 19108 }, { "epoch": 0.55, "grad_norm": 5.267472664608703, "learning_rate": 4.478987348478335e-06, "loss": 0.5026, "step": 19109 }, { "epoch": 0.55, "grad_norm": 3.013892946274737, "learning_rate": 4.478526116901785e-06, "loss": 0.3727, "step": 19110 }, { "epoch": 0.55, "grad_norm": 3.459454203511346, "learning_rate": 4.478064889811413e-06, "loss": 0.4594, "step": 19111 }, { "epoch": 0.55, "grad_norm": 6.796777343048436, "learning_rate": 4.477603667211183e-06, "loss": 0.6885, "step": 19112 }, { "epoch": 0.55, "grad_norm": 5.8989737765836265, "learning_rate": 4.477142449105065e-06, "loss": 0.5189, "step": 19113 }, { "epoch": 0.55, "grad_norm": 6.454757631687792, "learning_rate": 4.476681235497025e-06, "loss": 0.596, "step": 19114 }, { "epoch": 0.55, "grad_norm": 5.616992909930051, "learning_rate": 4.476220026391032e-06, "loss": 0.7214, "step": 19115 }, { "epoch": 0.55, "grad_norm": 2.2450863444900677, "learning_rate": 4.475758821791056e-06, "loss": 0.1757, "step": 19116 }, { "epoch": 0.55, "grad_norm": 5.404191832629657, "learning_rate": 4.475297621701059e-06, "loss": 0.25, "step": 19117 }, { "epoch": 0.55, "grad_norm": 7.870650952475941, "learning_rate": 4.474836426125016e-06, "loss": 0.8028, "step": 19118 }, { "epoch": 0.55, "grad_norm": 4.271560172902868, "learning_rate": 4.474375235066887e-06, "loss": 0.2857, "step": 19119 }, { "epoch": 0.55, "grad_norm": 4.7466500289876326, "learning_rate": 4.473914048530645e-06, "loss": 0.2568, "step": 19120 }, { "epoch": 0.55, "grad_norm": 5.995103507019332, "learning_rate": 4.473452866520255e-06, "loss": 0.4138, "step": 19121 }, { "epoch": 0.55, "grad_norm": 6.005665825927261, "learning_rate": 4.4729916890396835e-06, "loss": 0.4497, "step": 19122 }, { "epoch": 0.55, "grad_norm": 4.340122656868842, "learning_rate": 4.472530516092902e-06, "loss": 0.4812, "step": 19123 }, { "epoch": 0.55, "grad_norm": 3.2537238125987438, "learning_rate": 4.472069347683875e-06, "loss": 0.2153, "step": 19124 }, { "epoch": 0.55, "grad_norm": 4.571710656341863, "learning_rate": 4.4716081838165705e-06, "loss": 0.2459, "step": 19125 }, { "epoch": 0.55, "grad_norm": 6.095251749298719, "learning_rate": 4.471147024494954e-06, "loss": 0.4486, "step": 19126 }, { "epoch": 0.55, "grad_norm": 13.002176762783941, "learning_rate": 4.470685869722997e-06, "loss": 0.6176, "step": 19127 }, { "epoch": 0.55, "grad_norm": 8.288674995360292, "learning_rate": 4.4702247195046635e-06, "loss": 0.9851, "step": 19128 }, { "epoch": 0.55, "grad_norm": 9.12572194209069, "learning_rate": 4.4697635738439225e-06, "loss": 0.5352, "step": 19129 }, { "epoch": 0.55, "grad_norm": 3.5873527642849643, "learning_rate": 4.469302432744738e-06, "loss": 0.2146, "step": 19130 }, { "epoch": 0.55, "grad_norm": 5.872165909443707, "learning_rate": 4.468841296211079e-06, "loss": 0.2854, "step": 19131 }, { "epoch": 0.55, "grad_norm": 6.662858789455918, "learning_rate": 4.468380164246916e-06, "loss": 0.3953, "step": 19132 }, { "epoch": 0.55, "grad_norm": 7.191449597760066, "learning_rate": 4.46791903685621e-06, "loss": 0.7392, "step": 19133 }, { "epoch": 0.55, "grad_norm": 2.9844131467288575, "learning_rate": 4.467457914042933e-06, "loss": 0.3125, "step": 19134 }, { "epoch": 0.55, "grad_norm": 5.095447444678805, "learning_rate": 4.4669967958110485e-06, "loss": 0.2233, "step": 19135 }, { "epoch": 0.55, "grad_norm": 3.71935895533972, "learning_rate": 4.466535682164527e-06, "loss": 0.1473, "step": 19136 }, { "epoch": 0.55, "grad_norm": 6.553262476099821, "learning_rate": 4.46607457310733e-06, "loss": 0.547, "step": 19137 }, { "epoch": 0.55, "grad_norm": 5.369473677012532, "learning_rate": 4.465613468643432e-06, "loss": 0.6247, "step": 19138 }, { "epoch": 0.55, "grad_norm": 2.790904977899638, "learning_rate": 4.465152368776792e-06, "loss": 0.2126, "step": 19139 }, { "epoch": 0.55, "grad_norm": 7.205328780064578, "learning_rate": 4.464691273511381e-06, "loss": 0.7041, "step": 19140 }, { "epoch": 0.55, "grad_norm": 9.564704291901988, "learning_rate": 4.4642301828511664e-06, "loss": 0.5163, "step": 19141 }, { "epoch": 0.55, "grad_norm": 4.320895191928207, "learning_rate": 4.4637690968001125e-06, "loss": 0.5486, "step": 19142 }, { "epoch": 0.55, "grad_norm": 2.3905034501833375, "learning_rate": 4.463308015362188e-06, "loss": 0.2972, "step": 19143 }, { "epoch": 0.55, "grad_norm": 4.382693311189208, "learning_rate": 4.462846938541357e-06, "loss": 0.7223, "step": 19144 }, { "epoch": 0.55, "grad_norm": 4.696011597545119, "learning_rate": 4.462385866341589e-06, "loss": 0.3628, "step": 19145 }, { "epoch": 0.55, "grad_norm": 8.461729933666424, "learning_rate": 4.461924798766849e-06, "loss": 0.7617, "step": 19146 }, { "epoch": 0.55, "grad_norm": 5.255473190403277, "learning_rate": 4.4614637358211025e-06, "loss": 0.5279, "step": 19147 }, { "epoch": 0.55, "grad_norm": 4.658656426532223, "learning_rate": 4.46100267750832e-06, "loss": 0.8472, "step": 19148 }, { "epoch": 0.55, "grad_norm": 7.762081051392247, "learning_rate": 4.460541623832462e-06, "loss": 0.5689, "step": 19149 }, { "epoch": 0.55, "grad_norm": 3.0469340196419705, "learning_rate": 4.4600805747975e-06, "loss": 0.5668, "step": 19150 }, { "epoch": 0.55, "grad_norm": 4.806219709521659, "learning_rate": 4.459619530407397e-06, "loss": 0.3626, "step": 19151 }, { "epoch": 0.55, "grad_norm": 6.667960311625079, "learning_rate": 4.459158490666122e-06, "loss": 0.4913, "step": 19152 }, { "epoch": 0.55, "grad_norm": 7.530737705139249, "learning_rate": 4.458697455577638e-06, "loss": 0.3231, "step": 19153 }, { "epoch": 0.55, "grad_norm": 8.57094324758841, "learning_rate": 4.458236425145915e-06, "loss": 0.7205, "step": 19154 }, { "epoch": 0.55, "grad_norm": 5.877659540643726, "learning_rate": 4.457775399374917e-06, "loss": 0.4031, "step": 19155 }, { "epoch": 0.55, "grad_norm": 8.070485115974853, "learning_rate": 4.457314378268609e-06, "loss": 0.5728, "step": 19156 }, { "epoch": 0.55, "grad_norm": 3.7145297201541227, "learning_rate": 4.456853361830961e-06, "loss": 0.3255, "step": 19157 }, { "epoch": 0.55, "grad_norm": 7.15515124952184, "learning_rate": 4.4563923500659345e-06, "loss": 0.3819, "step": 19158 }, { "epoch": 0.55, "grad_norm": 6.475579678425002, "learning_rate": 4.4559313429775e-06, "loss": 0.7574, "step": 19159 }, { "epoch": 0.55, "grad_norm": 3.687477079417229, "learning_rate": 4.455470340569618e-06, "loss": 0.2767, "step": 19160 }, { "epoch": 0.55, "grad_norm": 4.271543288729451, "learning_rate": 4.45500934284626e-06, "loss": 0.2079, "step": 19161 }, { "epoch": 0.55, "grad_norm": 3.862340613968497, "learning_rate": 4.454548349811389e-06, "loss": 0.5696, "step": 19162 }, { "epoch": 0.55, "grad_norm": 6.96790852619573, "learning_rate": 4.45408736146897e-06, "loss": 0.5422, "step": 19163 }, { "epoch": 0.55, "grad_norm": 3.3993570421056294, "learning_rate": 4.4536263778229724e-06, "loss": 0.1687, "step": 19164 }, { "epoch": 0.55, "grad_norm": 2.377678991493396, "learning_rate": 4.4531653988773585e-06, "loss": 0.2955, "step": 19165 }, { "epoch": 0.55, "grad_norm": 3.891054060843506, "learning_rate": 4.452704424636096e-06, "loss": 0.3951, "step": 19166 }, { "epoch": 0.55, "grad_norm": 10.265714229852048, "learning_rate": 4.452243455103149e-06, "loss": 0.6179, "step": 19167 }, { "epoch": 0.55, "grad_norm": 4.998628666696084, "learning_rate": 4.451782490282486e-06, "loss": 0.4807, "step": 19168 }, { "epoch": 0.55, "grad_norm": 5.1387523730362314, "learning_rate": 4.451321530178068e-06, "loss": 0.3121, "step": 19169 }, { "epoch": 0.55, "grad_norm": 6.228684124920733, "learning_rate": 4.450860574793867e-06, "loss": 0.2422, "step": 19170 }, { "epoch": 0.55, "grad_norm": 6.198156331875445, "learning_rate": 4.450399624133841e-06, "loss": 0.2496, "step": 19171 }, { "epoch": 0.55, "grad_norm": 4.398146164021779, "learning_rate": 4.44993867820196e-06, "loss": 0.2988, "step": 19172 }, { "epoch": 0.55, "grad_norm": 6.841000880078093, "learning_rate": 4.449477737002192e-06, "loss": 0.4803, "step": 19173 }, { "epoch": 0.55, "grad_norm": 4.370998132663835, "learning_rate": 4.449016800538495e-06, "loss": 0.3986, "step": 19174 }, { "epoch": 0.55, "grad_norm": 4.6751155522799435, "learning_rate": 4.448555868814842e-06, "loss": 0.7163, "step": 19175 }, { "epoch": 0.55, "grad_norm": 6.738202416884869, "learning_rate": 4.448094941835193e-06, "loss": 0.8588, "step": 19176 }, { "epoch": 0.55, "grad_norm": 2.350888976408035, "learning_rate": 4.447634019603517e-06, "loss": 0.2302, "step": 19177 }, { "epoch": 0.55, "grad_norm": 6.755298301158452, "learning_rate": 4.4471731021237765e-06, "loss": 0.565, "step": 19178 }, { "epoch": 0.55, "grad_norm": 6.5230583400523905, "learning_rate": 4.446712189399939e-06, "loss": 0.2523, "step": 19179 }, { "epoch": 0.55, "grad_norm": 4.156871806504682, "learning_rate": 4.446251281435965e-06, "loss": 0.4659, "step": 19180 }, { "epoch": 0.55, "grad_norm": 5.438811527676579, "learning_rate": 4.445790378235825e-06, "loss": 0.3369, "step": 19181 }, { "epoch": 0.55, "grad_norm": 4.97572932911786, "learning_rate": 4.445329479803483e-06, "loss": 0.7251, "step": 19182 }, { "epoch": 0.55, "grad_norm": 4.448182746498432, "learning_rate": 4.444868586142902e-06, "loss": 0.4915, "step": 19183 }, { "epoch": 0.55, "grad_norm": 12.621539859803843, "learning_rate": 4.44440769725805e-06, "loss": 1.7435, "step": 19184 }, { "epoch": 0.55, "grad_norm": 4.157656725546164, "learning_rate": 4.443946813152889e-06, "loss": 0.0873, "step": 19185 }, { "epoch": 0.55, "grad_norm": 5.304082619699634, "learning_rate": 4.443485933831385e-06, "loss": 0.4603, "step": 19186 }, { "epoch": 0.55, "grad_norm": 3.86430877505408, "learning_rate": 4.443025059297502e-06, "loss": 0.366, "step": 19187 }, { "epoch": 0.55, "grad_norm": 3.239698037981947, "learning_rate": 4.4425641895552064e-06, "loss": 0.268, "step": 19188 }, { "epoch": 0.55, "grad_norm": 8.507467916216847, "learning_rate": 4.4421033246084636e-06, "loss": 0.5852, "step": 19189 }, { "epoch": 0.55, "grad_norm": 3.3311299472382596, "learning_rate": 4.4416424644612355e-06, "loss": 0.4278, "step": 19190 }, { "epoch": 0.55, "grad_norm": 3.9964536652080946, "learning_rate": 4.441181609117491e-06, "loss": 0.3011, "step": 19191 }, { "epoch": 0.55, "grad_norm": 6.969388273970722, "learning_rate": 4.44072075858119e-06, "loss": 0.4593, "step": 19192 }, { "epoch": 0.55, "grad_norm": 7.260641542415684, "learning_rate": 4.440259912856301e-06, "loss": 0.7068, "step": 19193 }, { "epoch": 0.55, "grad_norm": 6.851677227038416, "learning_rate": 4.4397990719467854e-06, "loss": 0.5747, "step": 19194 }, { "epoch": 0.55, "grad_norm": 2.864489270602171, "learning_rate": 4.43933823585661e-06, "loss": 0.2467, "step": 19195 }, { "epoch": 0.55, "grad_norm": 8.891763409770828, "learning_rate": 4.43887740458974e-06, "loss": 1.2284, "step": 19196 }, { "epoch": 0.55, "grad_norm": 4.614923738736033, "learning_rate": 4.438416578150136e-06, "loss": 0.5911, "step": 19197 }, { "epoch": 0.55, "grad_norm": 3.5142935032819147, "learning_rate": 4.437955756541767e-06, "loss": 0.1051, "step": 19198 }, { "epoch": 0.55, "grad_norm": 7.593893246025671, "learning_rate": 4.437494939768596e-06, "loss": 0.5944, "step": 19199 }, { "epoch": 0.55, "grad_norm": 4.3890050431398775, "learning_rate": 4.4370341278345865e-06, "loss": 0.5337, "step": 19200 }, { "epoch": 0.55, "grad_norm": 11.351123441644495, "learning_rate": 4.4365733207437015e-06, "loss": 0.9988, "step": 19201 }, { "epoch": 0.55, "grad_norm": 2.841542749742156, "learning_rate": 4.436112518499908e-06, "loss": 0.5775, "step": 19202 }, { "epoch": 0.55, "grad_norm": 6.683821192159449, "learning_rate": 4.435651721107169e-06, "loss": 0.2569, "step": 19203 }, { "epoch": 0.55, "grad_norm": 5.582375539168682, "learning_rate": 4.43519092856945e-06, "loss": 0.7224, "step": 19204 }, { "epoch": 0.55, "grad_norm": 3.4629962386513755, "learning_rate": 4.434730140890711e-06, "loss": 0.3258, "step": 19205 }, { "epoch": 0.55, "grad_norm": 9.99888037135784, "learning_rate": 4.43426935807492e-06, "loss": 0.3322, "step": 19206 }, { "epoch": 0.55, "grad_norm": 2.7575130570241755, "learning_rate": 4.433808580126042e-06, "loss": 0.3191, "step": 19207 }, { "epoch": 0.55, "grad_norm": 8.65532651857207, "learning_rate": 4.433347807048038e-06, "loss": 0.9814, "step": 19208 }, { "epoch": 0.55, "grad_norm": 2.74672139403378, "learning_rate": 4.432887038844873e-06, "loss": 0.1539, "step": 19209 }, { "epoch": 0.55, "grad_norm": 2.5981278471831324, "learning_rate": 4.432426275520509e-06, "loss": 0.1511, "step": 19210 }, { "epoch": 0.55, "grad_norm": 11.234340535032757, "learning_rate": 4.431965517078915e-06, "loss": 0.2642, "step": 19211 }, { "epoch": 0.55, "grad_norm": 7.866243792606744, "learning_rate": 4.43150476352405e-06, "loss": 0.7144, "step": 19212 }, { "epoch": 0.55, "grad_norm": 3.322940872167999, "learning_rate": 4.431044014859878e-06, "loss": 0.4986, "step": 19213 }, { "epoch": 0.55, "grad_norm": 5.780672364116669, "learning_rate": 4.430583271090367e-06, "loss": 0.5894, "step": 19214 }, { "epoch": 0.55, "grad_norm": 4.872749200264339, "learning_rate": 4.430122532219477e-06, "loss": 0.1954, "step": 19215 }, { "epoch": 0.55, "grad_norm": 6.8791339236550195, "learning_rate": 4.429661798251174e-06, "loss": 0.6661, "step": 19216 }, { "epoch": 0.55, "grad_norm": 5.838988296980513, "learning_rate": 4.429201069189418e-06, "loss": 0.5088, "step": 19217 }, { "epoch": 0.55, "grad_norm": 6.003337408753148, "learning_rate": 4.428740345038177e-06, "loss": 0.8716, "step": 19218 }, { "epoch": 0.55, "grad_norm": 4.520157700547336, "learning_rate": 4.4282796258014114e-06, "loss": 0.5504, "step": 19219 }, { "epoch": 0.55, "grad_norm": 4.917579011267818, "learning_rate": 4.427818911483087e-06, "loss": 0.3113, "step": 19220 }, { "epoch": 0.55, "grad_norm": 106.22750182766471, "learning_rate": 4.427358202087164e-06, "loss": 0.4845, "step": 19221 }, { "epoch": 0.55, "grad_norm": 6.30739730969091, "learning_rate": 4.426897497617609e-06, "loss": 0.5394, "step": 19222 }, { "epoch": 0.55, "grad_norm": 5.567931234705177, "learning_rate": 4.426436798078385e-06, "loss": 0.1774, "step": 19223 }, { "epoch": 0.55, "grad_norm": 4.410947257398353, "learning_rate": 4.4259761034734515e-06, "loss": 0.6199, "step": 19224 }, { "epoch": 0.55, "grad_norm": 5.337505395069121, "learning_rate": 4.425515413806779e-06, "loss": 0.6831, "step": 19225 }, { "epoch": 0.55, "grad_norm": 8.055219394056822, "learning_rate": 4.425054729082325e-06, "loss": 0.7425, "step": 19226 }, { "epoch": 0.55, "grad_norm": 5.0457778068698245, "learning_rate": 4.424594049304054e-06, "loss": 0.5893, "step": 19227 }, { "epoch": 0.55, "grad_norm": 4.329890858373738, "learning_rate": 4.424133374475928e-06, "loss": 0.4775, "step": 19228 }, { "epoch": 0.55, "grad_norm": 5.667032491805883, "learning_rate": 4.423672704601914e-06, "loss": 0.2373, "step": 19229 }, { "epoch": 0.55, "grad_norm": 6.344328078817132, "learning_rate": 4.423212039685973e-06, "loss": 0.7687, "step": 19230 }, { "epoch": 0.55, "grad_norm": 6.26391524962268, "learning_rate": 4.422751379732065e-06, "loss": 0.3727, "step": 19231 }, { "epoch": 0.55, "grad_norm": 9.017081210842411, "learning_rate": 4.422290724744159e-06, "loss": 0.7248, "step": 19232 }, { "epoch": 0.55, "grad_norm": 5.1302105537319385, "learning_rate": 4.421830074726214e-06, "loss": 0.6193, "step": 19233 }, { "epoch": 0.55, "grad_norm": 5.9752510177683575, "learning_rate": 4.421369429682194e-06, "loss": 0.6127, "step": 19234 }, { "epoch": 0.55, "grad_norm": 5.8111330701552095, "learning_rate": 4.42090878961606e-06, "loss": 0.3437, "step": 19235 }, { "epoch": 0.55, "grad_norm": 3.2414422706925046, "learning_rate": 4.420448154531778e-06, "loss": 0.1767, "step": 19236 }, { "epoch": 0.55, "grad_norm": 6.9536797805798285, "learning_rate": 4.419987524433308e-06, "loss": 0.3754, "step": 19237 }, { "epoch": 0.55, "grad_norm": 5.212247006248, "learning_rate": 4.4195268993246136e-06, "loss": 0.1718, "step": 19238 }, { "epoch": 0.55, "grad_norm": 3.156743332105757, "learning_rate": 4.419066279209659e-06, "loss": 0.1402, "step": 19239 }, { "epoch": 0.55, "grad_norm": 4.123648248615679, "learning_rate": 4.418605664092404e-06, "loss": 0.5254, "step": 19240 }, { "epoch": 0.55, "grad_norm": 4.1102776932009375, "learning_rate": 4.418145053976815e-06, "loss": 0.3744, "step": 19241 }, { "epoch": 0.55, "grad_norm": 11.264373667474077, "learning_rate": 4.417684448866851e-06, "loss": 0.4495, "step": 19242 }, { "epoch": 0.55, "grad_norm": 3.2929160225878977, "learning_rate": 4.417223848766478e-06, "loss": 0.1814, "step": 19243 }, { "epoch": 0.55, "grad_norm": 3.865247852673966, "learning_rate": 4.4167632536796535e-06, "loss": 0.4766, "step": 19244 }, { "epoch": 0.55, "grad_norm": 4.65665026998853, "learning_rate": 4.416302663610345e-06, "loss": 0.2813, "step": 19245 }, { "epoch": 0.55, "grad_norm": 8.340158439613216, "learning_rate": 4.4158420785625126e-06, "loss": 0.8219, "step": 19246 }, { "epoch": 0.55, "grad_norm": 2.443723204775591, "learning_rate": 4.415381498540117e-06, "loss": 0.0805, "step": 19247 }, { "epoch": 0.55, "grad_norm": 3.395908423141583, "learning_rate": 4.4149209235471245e-06, "loss": 0.2555, "step": 19248 }, { "epoch": 0.55, "grad_norm": 6.573134679146082, "learning_rate": 4.414460353587495e-06, "loss": 0.3848, "step": 19249 }, { "epoch": 0.55, "grad_norm": 9.960301373682919, "learning_rate": 4.413999788665191e-06, "loss": 0.5688, "step": 19250 }, { "epoch": 0.55, "grad_norm": 6.478926647029514, "learning_rate": 4.4135392287841735e-06, "loss": 0.296, "step": 19251 }, { "epoch": 0.55, "grad_norm": 4.7493340627756115, "learning_rate": 4.413078673948407e-06, "loss": 0.3391, "step": 19252 }, { "epoch": 0.55, "grad_norm": 6.327491031491161, "learning_rate": 4.412618124161852e-06, "loss": 0.4648, "step": 19253 }, { "epoch": 0.55, "grad_norm": 7.384626345366511, "learning_rate": 4.41215757942847e-06, "loss": 0.6525, "step": 19254 }, { "epoch": 0.55, "grad_norm": 6.9681358922130245, "learning_rate": 4.411697039752226e-06, "loss": 0.2801, "step": 19255 }, { "epoch": 0.55, "grad_norm": 6.210413995908794, "learning_rate": 4.411236505137078e-06, "loss": 0.3587, "step": 19256 }, { "epoch": 0.55, "grad_norm": 3.26415464162539, "learning_rate": 4.410775975586991e-06, "loss": 0.3396, "step": 19257 }, { "epoch": 0.55, "grad_norm": 3.220948977366212, "learning_rate": 4.410315451105925e-06, "loss": 0.1907, "step": 19258 }, { "epoch": 0.55, "grad_norm": 4.659248845665137, "learning_rate": 4.409854931697844e-06, "loss": 0.7, "step": 19259 }, { "epoch": 0.55, "grad_norm": 6.13952246321772, "learning_rate": 4.409394417366707e-06, "loss": 0.7294, "step": 19260 }, { "epoch": 0.55, "grad_norm": 3.3092502451607886, "learning_rate": 4.408933908116478e-06, "loss": 0.2777, "step": 19261 }, { "epoch": 0.55, "grad_norm": 7.669386963323975, "learning_rate": 4.408473403951117e-06, "loss": 0.7321, "step": 19262 }, { "epoch": 0.55, "grad_norm": 5.547880355634004, "learning_rate": 4.408012904874586e-06, "loss": 0.7339, "step": 19263 }, { "epoch": 0.55, "grad_norm": 4.211009716766459, "learning_rate": 4.407552410890849e-06, "loss": 0.2066, "step": 19264 }, { "epoch": 0.55, "grad_norm": 3.8670431399132745, "learning_rate": 4.407091922003863e-06, "loss": 0.4696, "step": 19265 }, { "epoch": 0.55, "grad_norm": 6.052514947764844, "learning_rate": 4.406631438217595e-06, "loss": 0.5184, "step": 19266 }, { "epoch": 0.55, "grad_norm": 6.908340759027936, "learning_rate": 4.406170959536002e-06, "loss": 1.0464, "step": 19267 }, { "epoch": 0.55, "grad_norm": 4.44563839326233, "learning_rate": 4.405710485963048e-06, "loss": 0.2601, "step": 19268 }, { "epoch": 0.55, "grad_norm": 5.692445784433328, "learning_rate": 4.405250017502693e-06, "loss": 0.1998, "step": 19269 }, { "epoch": 0.55, "grad_norm": 6.645023083298705, "learning_rate": 4.404789554158898e-06, "loss": 0.5492, "step": 19270 }, { "epoch": 0.55, "grad_norm": 11.861157189149912, "learning_rate": 4.404329095935626e-06, "loss": 0.5041, "step": 19271 }, { "epoch": 0.55, "grad_norm": 3.648667452833427, "learning_rate": 4.403868642836838e-06, "loss": 0.2924, "step": 19272 }, { "epoch": 0.55, "grad_norm": 5.493092273836463, "learning_rate": 4.403408194866495e-06, "loss": 0.5381, "step": 19273 }, { "epoch": 0.55, "grad_norm": 3.7118845002347785, "learning_rate": 4.402947752028556e-06, "loss": 0.3935, "step": 19274 }, { "epoch": 0.55, "grad_norm": 5.546064720609818, "learning_rate": 4.402487314326986e-06, "loss": 0.4511, "step": 19275 }, { "epoch": 0.55, "grad_norm": 4.713637127269671, "learning_rate": 4.402026881765743e-06, "loss": 0.7451, "step": 19276 }, { "epoch": 0.55, "grad_norm": 3.0174797955636974, "learning_rate": 4.40156645434879e-06, "loss": 0.4847, "step": 19277 }, { "epoch": 0.55, "grad_norm": 7.029474805745924, "learning_rate": 4.401106032080085e-06, "loss": 0.3349, "step": 19278 }, { "epoch": 0.55, "grad_norm": 4.691316246420816, "learning_rate": 4.400645614963593e-06, "loss": 0.5205, "step": 19279 }, { "epoch": 0.55, "grad_norm": 7.379899967259984, "learning_rate": 4.4001852030032725e-06, "loss": 0.3172, "step": 19280 }, { "epoch": 0.55, "grad_norm": 18.05403544735347, "learning_rate": 4.399724796203084e-06, "loss": 0.4971, "step": 19281 }, { "epoch": 0.55, "grad_norm": 5.048402442916667, "learning_rate": 4.399264394566991e-06, "loss": 0.1614, "step": 19282 }, { "epoch": 0.55, "grad_norm": 7.706267545731147, "learning_rate": 4.398803998098951e-06, "loss": 0.92, "step": 19283 }, { "epoch": 0.55, "grad_norm": 4.031641460603787, "learning_rate": 4.398343606802927e-06, "loss": 0.4371, "step": 19284 }, { "epoch": 0.55, "grad_norm": 4.718211111661682, "learning_rate": 4.397883220682877e-06, "loss": 0.4759, "step": 19285 }, { "epoch": 0.55, "grad_norm": 3.5105175301902354, "learning_rate": 4.397422839742766e-06, "loss": 0.2916, "step": 19286 }, { "epoch": 0.55, "grad_norm": 4.7241671206575635, "learning_rate": 4.396962463986551e-06, "loss": 0.6755, "step": 19287 }, { "epoch": 0.55, "grad_norm": 8.147063712804934, "learning_rate": 4.396502093418192e-06, "loss": 0.5769, "step": 19288 }, { "epoch": 0.55, "grad_norm": 7.828506308156474, "learning_rate": 4.396041728041654e-06, "loss": 0.3493, "step": 19289 }, { "epoch": 0.55, "grad_norm": 5.535009970444932, "learning_rate": 4.3955813678608935e-06, "loss": 0.5578, "step": 19290 }, { "epoch": 0.55, "grad_norm": 4.652750812597882, "learning_rate": 4.3951210128798725e-06, "loss": 0.5135, "step": 19291 }, { "epoch": 0.55, "grad_norm": 5.235501569531835, "learning_rate": 4.394660663102549e-06, "loss": 0.4862, "step": 19292 }, { "epoch": 0.55, "grad_norm": 4.4478655357310535, "learning_rate": 4.3942003185328895e-06, "loss": 0.3408, "step": 19293 }, { "epoch": 0.55, "grad_norm": 6.508198629329295, "learning_rate": 4.393739979174846e-06, "loss": 0.4862, "step": 19294 }, { "epoch": 0.55, "grad_norm": 4.589169145071647, "learning_rate": 4.393279645032384e-06, "loss": 0.5824, "step": 19295 }, { "epoch": 0.55, "grad_norm": 5.910811020508608, "learning_rate": 4.392819316109464e-06, "loss": 0.4965, "step": 19296 }, { "epoch": 0.55, "grad_norm": 6.745756687563767, "learning_rate": 4.392358992410042e-06, "loss": 0.465, "step": 19297 }, { "epoch": 0.55, "grad_norm": 5.353210017413809, "learning_rate": 4.391898673938083e-06, "loss": 0.4194, "step": 19298 }, { "epoch": 0.55, "grad_norm": 4.396443512979603, "learning_rate": 4.391438360697544e-06, "loss": 0.1354, "step": 19299 }, { "epoch": 0.55, "grad_norm": 8.52099428903142, "learning_rate": 4.390978052692387e-06, "loss": 0.9368, "step": 19300 }, { "epoch": 0.55, "grad_norm": 8.737072522971724, "learning_rate": 4.390517749926568e-06, "loss": 0.6937, "step": 19301 }, { "epoch": 0.55, "grad_norm": 5.89067907333852, "learning_rate": 4.3900574524040524e-06, "loss": 1.0375, "step": 19302 }, { "epoch": 0.55, "grad_norm": 5.89276246164066, "learning_rate": 4.389597160128797e-06, "loss": 0.6355, "step": 19303 }, { "epoch": 0.55, "grad_norm": 9.709703951721602, "learning_rate": 4.38913687310476e-06, "loss": 0.7843, "step": 19304 }, { "epoch": 0.55, "grad_norm": 6.812810033340466, "learning_rate": 4.388676591335906e-06, "loss": 1.1099, "step": 19305 }, { "epoch": 0.55, "grad_norm": 4.556980399991398, "learning_rate": 4.38821631482619e-06, "loss": 0.3546, "step": 19306 }, { "epoch": 0.55, "grad_norm": 5.608927568107755, "learning_rate": 4.387756043579575e-06, "loss": 0.6991, "step": 19307 }, { "epoch": 0.55, "grad_norm": 7.370881433870892, "learning_rate": 4.387295777600018e-06, "loss": 0.95, "step": 19308 }, { "epoch": 0.55, "grad_norm": 4.337565568323928, "learning_rate": 4.3868355168914824e-06, "loss": 0.5083, "step": 19309 }, { "epoch": 0.55, "grad_norm": 4.552552548921689, "learning_rate": 4.386375261457924e-06, "loss": 0.4572, "step": 19310 }, { "epoch": 0.55, "grad_norm": 5.141649161586711, "learning_rate": 4.3859150113033015e-06, "loss": 0.4984, "step": 19311 }, { "epoch": 0.55, "grad_norm": 7.338407480507987, "learning_rate": 4.38545476643158e-06, "loss": 0.3506, "step": 19312 }, { "epoch": 0.55, "grad_norm": 5.952365776769276, "learning_rate": 4.384994526846714e-06, "loss": 0.4628, "step": 19313 }, { "epoch": 0.55, "grad_norm": 1.593091777374292, "learning_rate": 4.384534292552665e-06, "loss": 0.1718, "step": 19314 }, { "epoch": 0.55, "grad_norm": 6.806247571764356, "learning_rate": 4.384074063553389e-06, "loss": 0.3848, "step": 19315 }, { "epoch": 0.55, "grad_norm": 3.5341437307121897, "learning_rate": 4.383613839852852e-06, "loss": 0.2492, "step": 19316 }, { "epoch": 0.55, "grad_norm": 4.630489184941253, "learning_rate": 4.383153621455007e-06, "loss": 0.3385, "step": 19317 }, { "epoch": 0.55, "grad_norm": 6.63858316782544, "learning_rate": 4.382693408363817e-06, "loss": 0.5606, "step": 19318 }, { "epoch": 0.55, "grad_norm": 5.515508418161406, "learning_rate": 4.382233200583237e-06, "loss": 0.4406, "step": 19319 }, { "epoch": 0.55, "grad_norm": 3.340278284906008, "learning_rate": 4.38177299811723e-06, "loss": 0.128, "step": 19320 }, { "epoch": 0.55, "grad_norm": 5.876214997480052, "learning_rate": 4.381312800969755e-06, "loss": 0.4535, "step": 19321 }, { "epoch": 0.55, "grad_norm": 3.012414359620511, "learning_rate": 4.3808526091447675e-06, "loss": 0.2685, "step": 19322 }, { "epoch": 0.55, "grad_norm": 3.6350809869572753, "learning_rate": 4.380392422646232e-06, "loss": 0.5285, "step": 19323 }, { "epoch": 0.55, "grad_norm": 10.613849398639818, "learning_rate": 4.379932241478101e-06, "loss": 0.8369, "step": 19324 }, { "epoch": 0.55, "grad_norm": 6.968935155760899, "learning_rate": 4.379472065644339e-06, "loss": 0.6691, "step": 19325 }, { "epoch": 0.55, "grad_norm": 4.618835361708812, "learning_rate": 4.3790118951489e-06, "loss": 0.7823, "step": 19326 }, { "epoch": 0.55, "grad_norm": 8.861568118926083, "learning_rate": 4.378551729995748e-06, "loss": 0.6553, "step": 19327 }, { "epoch": 0.55, "grad_norm": 3.2904676847176746, "learning_rate": 4.378091570188836e-06, "loss": 0.4237, "step": 19328 }, { "epoch": 0.55, "grad_norm": 5.18939686760148, "learning_rate": 4.377631415732127e-06, "loss": 0.5559, "step": 19329 }, { "epoch": 0.55, "grad_norm": 8.371586659169578, "learning_rate": 4.377171266629579e-06, "loss": 0.3699, "step": 19330 }, { "epoch": 0.55, "grad_norm": 4.723340763435272, "learning_rate": 4.376711122885148e-06, "loss": 0.3533, "step": 19331 }, { "epoch": 0.55, "grad_norm": 5.160204856118068, "learning_rate": 4.376250984502797e-06, "loss": 0.4066, "step": 19332 }, { "epoch": 0.55, "grad_norm": 4.8838486205376626, "learning_rate": 4.37579085148648e-06, "loss": 0.6496, "step": 19333 }, { "epoch": 0.55, "grad_norm": 10.056876749692028, "learning_rate": 4.375330723840159e-06, "loss": 0.5757, "step": 19334 }, { "epoch": 0.55, "grad_norm": 4.880453092068462, "learning_rate": 4.374870601567789e-06, "loss": 0.486, "step": 19335 }, { "epoch": 0.55, "grad_norm": 5.337925541997689, "learning_rate": 4.3744104846733305e-06, "loss": 0.5187, "step": 19336 }, { "epoch": 0.55, "grad_norm": 3.4466937836281883, "learning_rate": 4.373950373160743e-06, "loss": 0.4893, "step": 19337 }, { "epoch": 0.55, "grad_norm": 8.953889078553658, "learning_rate": 4.373490267033982e-06, "loss": 0.5839, "step": 19338 }, { "epoch": 0.55, "grad_norm": 6.11078769906962, "learning_rate": 4.3730301662970086e-06, "loss": 0.2887, "step": 19339 }, { "epoch": 0.55, "grad_norm": 5.3964940946686815, "learning_rate": 4.372570070953779e-06, "loss": 0.4604, "step": 19340 }, { "epoch": 0.55, "grad_norm": 6.459554521920094, "learning_rate": 4.372109981008252e-06, "loss": 0.5741, "step": 19341 }, { "epoch": 0.55, "grad_norm": 5.215540167208335, "learning_rate": 4.371649896464384e-06, "loss": 0.5562, "step": 19342 }, { "epoch": 0.55, "grad_norm": 5.978876680614805, "learning_rate": 4.371189817326136e-06, "loss": 0.4625, "step": 19343 }, { "epoch": 0.55, "grad_norm": 3.812716415391427, "learning_rate": 4.370729743597464e-06, "loss": 0.4487, "step": 19344 }, { "epoch": 0.55, "grad_norm": 4.52200869327673, "learning_rate": 4.370269675282326e-06, "loss": 0.6091, "step": 19345 }, { "epoch": 0.55, "grad_norm": 5.242779443921652, "learning_rate": 4.369809612384682e-06, "loss": 0.5349, "step": 19346 }, { "epoch": 0.55, "grad_norm": 4.245641380715354, "learning_rate": 4.3693495549084875e-06, "loss": 0.2517, "step": 19347 }, { "epoch": 0.55, "grad_norm": 4.969129415936808, "learning_rate": 4.368889502857701e-06, "loss": 0.5883, "step": 19348 }, { "epoch": 0.55, "grad_norm": 4.48277456188403, "learning_rate": 4.36842945623628e-06, "loss": 0.2236, "step": 19349 }, { "epoch": 0.55, "grad_norm": 5.768309983719935, "learning_rate": 4.367969415048185e-06, "loss": 0.2868, "step": 19350 }, { "epoch": 0.55, "grad_norm": 3.815597526582603, "learning_rate": 4.367509379297368e-06, "loss": 0.6157, "step": 19351 }, { "epoch": 0.55, "grad_norm": 7.562649244617057, "learning_rate": 4.367049348987791e-06, "loss": 0.4938, "step": 19352 }, { "epoch": 0.55, "grad_norm": 5.569848467958906, "learning_rate": 4.366589324123412e-06, "loss": 0.8642, "step": 19353 }, { "epoch": 0.55, "grad_norm": 8.854758699856843, "learning_rate": 4.366129304708185e-06, "loss": 0.492, "step": 19354 }, { "epoch": 0.55, "grad_norm": 6.49175733846562, "learning_rate": 4.3656692907460715e-06, "loss": 0.4891, "step": 19355 }, { "epoch": 0.55, "grad_norm": 11.439226364173017, "learning_rate": 4.3652092822410255e-06, "loss": 0.2318, "step": 19356 }, { "epoch": 0.55, "grad_norm": 8.575237184141454, "learning_rate": 4.364749279197008e-06, "loss": 0.7135, "step": 19357 }, { "epoch": 0.55, "grad_norm": 2.566383222363424, "learning_rate": 4.364289281617971e-06, "loss": 0.1989, "step": 19358 }, { "epoch": 0.55, "grad_norm": 4.345132155038317, "learning_rate": 4.363829289507878e-06, "loss": 0.3868, "step": 19359 }, { "epoch": 0.55, "grad_norm": 1.1062199631616463, "learning_rate": 4.3633693028706815e-06, "loss": 0.0374, "step": 19360 }, { "epoch": 0.55, "grad_norm": 5.669210727731079, "learning_rate": 4.36290932171034e-06, "loss": 0.2705, "step": 19361 }, { "epoch": 0.55, "grad_norm": 7.04688799195033, "learning_rate": 4.3624493460308125e-06, "loss": 0.5012, "step": 19362 }, { "epoch": 0.55, "grad_norm": 5.424271155764481, "learning_rate": 4.3619893758360546e-06, "loss": 0.473, "step": 19363 }, { "epoch": 0.55, "grad_norm": 2.683746999837534, "learning_rate": 4.3615294111300235e-06, "loss": 0.2161, "step": 19364 }, { "epoch": 0.55, "grad_norm": 4.879597525571771, "learning_rate": 4.361069451916675e-06, "loss": 0.271, "step": 19365 }, { "epoch": 0.55, "grad_norm": 4.6645406097777204, "learning_rate": 4.36060949819997e-06, "loss": 0.2488, "step": 19366 }, { "epoch": 0.55, "grad_norm": 4.383470536878013, "learning_rate": 4.360149549983861e-06, "loss": 0.445, "step": 19367 }, { "epoch": 0.55, "grad_norm": 3.8613314592407835, "learning_rate": 4.359689607272308e-06, "loss": 0.1998, "step": 19368 }, { "epoch": 0.55, "grad_norm": 3.716852425236812, "learning_rate": 4.3592296700692635e-06, "loss": 0.2686, "step": 19369 }, { "epoch": 0.55, "grad_norm": 7.432110701142007, "learning_rate": 4.358769738378689e-06, "loss": 0.7455, "step": 19370 }, { "epoch": 0.55, "grad_norm": 4.533586728269539, "learning_rate": 4.3583098122045405e-06, "loss": 0.5437, "step": 19371 }, { "epoch": 0.55, "grad_norm": 7.82524339096284, "learning_rate": 4.357849891550772e-06, "loss": 0.6077, "step": 19372 }, { "epoch": 0.55, "grad_norm": 6.712616964560762, "learning_rate": 4.357389976421343e-06, "loss": 0.383, "step": 19373 }, { "epoch": 0.55, "grad_norm": 5.605137534108831, "learning_rate": 4.3569300668202084e-06, "loss": 0.5352, "step": 19374 }, { "epoch": 0.55, "grad_norm": 3.7874099903301466, "learning_rate": 4.356470162751326e-06, "loss": 0.2847, "step": 19375 }, { "epoch": 0.55, "grad_norm": 7.8043154149527645, "learning_rate": 4.35601026421865e-06, "loss": 0.6787, "step": 19376 }, { "epoch": 0.55, "grad_norm": 12.724766613332031, "learning_rate": 4.355550371226139e-06, "loss": 0.6518, "step": 19377 }, { "epoch": 0.55, "grad_norm": 1.6506603043206478, "learning_rate": 4.355090483777749e-06, "loss": 0.1888, "step": 19378 }, { "epoch": 0.55, "grad_norm": 8.592031078090615, "learning_rate": 4.354630601877436e-06, "loss": 0.8321, "step": 19379 }, { "epoch": 0.55, "grad_norm": 5.353078362951536, "learning_rate": 4.354170725529159e-06, "loss": 0.603, "step": 19380 }, { "epoch": 0.56, "grad_norm": 4.831832227871633, "learning_rate": 4.353710854736868e-06, "loss": 0.4471, "step": 19381 }, { "epoch": 0.56, "grad_norm": 4.376225000270293, "learning_rate": 4.353250989504525e-06, "loss": 0.4066, "step": 19382 }, { "epoch": 0.56, "grad_norm": 6.241511894398706, "learning_rate": 4.352791129836084e-06, "loss": 0.9246, "step": 19383 }, { "epoch": 0.56, "grad_norm": 5.049133102679833, "learning_rate": 4.352331275735501e-06, "loss": 0.4593, "step": 19384 }, { "epoch": 0.56, "grad_norm": 5.0130680494850965, "learning_rate": 4.351871427206731e-06, "loss": 0.2844, "step": 19385 }, { "epoch": 0.56, "grad_norm": 1.976372389263957, "learning_rate": 4.351411584253732e-06, "loss": 0.0804, "step": 19386 }, { "epoch": 0.56, "grad_norm": 8.908717526381619, "learning_rate": 4.350951746880461e-06, "loss": 0.6724, "step": 19387 }, { "epoch": 0.56, "grad_norm": 4.901433171665892, "learning_rate": 4.350491915090869e-06, "loss": 0.29, "step": 19388 }, { "epoch": 0.56, "grad_norm": 6.572132126244455, "learning_rate": 4.350032088888918e-06, "loss": 0.3402, "step": 19389 }, { "epoch": 0.56, "grad_norm": 4.138872257870593, "learning_rate": 4.34957226827856e-06, "loss": 0.143, "step": 19390 }, { "epoch": 0.56, "grad_norm": 3.337992586796176, "learning_rate": 4.349112453263752e-06, "loss": 0.1697, "step": 19391 }, { "epoch": 0.56, "grad_norm": 4.5603154321793244, "learning_rate": 4.348652643848448e-06, "loss": 0.5692, "step": 19392 }, { "epoch": 0.56, "grad_norm": 6.578604855104128, "learning_rate": 4.348192840036607e-06, "loss": 0.5459, "step": 19393 }, { "epoch": 0.56, "grad_norm": 6.36203333524133, "learning_rate": 4.347733041832182e-06, "loss": 0.5768, "step": 19394 }, { "epoch": 0.56, "grad_norm": 1.7956097585002693, "learning_rate": 4.347273249239127e-06, "loss": 0.2914, "step": 19395 }, { "epoch": 0.56, "grad_norm": 3.4391015396779356, "learning_rate": 4.346813462261403e-06, "loss": 0.1457, "step": 19396 }, { "epoch": 0.56, "grad_norm": 4.2439039632203235, "learning_rate": 4.346353680902962e-06, "loss": 0.2012, "step": 19397 }, { "epoch": 0.56, "grad_norm": 5.774780571472056, "learning_rate": 4.3458939051677595e-06, "loss": 0.4298, "step": 19398 }, { "epoch": 0.56, "grad_norm": 3.049330206321355, "learning_rate": 4.3454341350597495e-06, "loss": 0.2195, "step": 19399 }, { "epoch": 0.56, "grad_norm": 8.42963551530904, "learning_rate": 4.3449743705828915e-06, "loss": 0.6739, "step": 19400 }, { "epoch": 0.56, "grad_norm": 4.142968029501958, "learning_rate": 4.344514611741137e-06, "loss": 0.3292, "step": 19401 }, { "epoch": 0.56, "grad_norm": 3.507432165108475, "learning_rate": 4.344054858538442e-06, "loss": 0.3432, "step": 19402 }, { "epoch": 0.56, "grad_norm": 4.398648922651343, "learning_rate": 4.343595110978764e-06, "loss": 0.2195, "step": 19403 }, { "epoch": 0.56, "grad_norm": 7.097040026823435, "learning_rate": 4.343135369066055e-06, "loss": 0.3754, "step": 19404 }, { "epoch": 0.56, "grad_norm": 7.177185220276624, "learning_rate": 4.342675632804273e-06, "loss": 0.7763, "step": 19405 }, { "epoch": 0.56, "grad_norm": 5.981354929961271, "learning_rate": 4.34221590219737e-06, "loss": 0.6107, "step": 19406 }, { "epoch": 0.56, "grad_norm": 2.909630634537577, "learning_rate": 4.341756177249304e-06, "loss": 0.139, "step": 19407 }, { "epoch": 0.56, "grad_norm": 6.780930190741458, "learning_rate": 4.341296457964027e-06, "loss": 0.4699, "step": 19408 }, { "epoch": 0.56, "grad_norm": 6.327064670220781, "learning_rate": 4.340836744345498e-06, "loss": 0.7218, "step": 19409 }, { "epoch": 0.56, "grad_norm": 6.8675699485546575, "learning_rate": 4.340377036397668e-06, "loss": 0.6575, "step": 19410 }, { "epoch": 0.56, "grad_norm": 6.506569330329034, "learning_rate": 4.339917334124491e-06, "loss": 0.3392, "step": 19411 }, { "epoch": 0.56, "grad_norm": 7.326986797898254, "learning_rate": 4.339457637529927e-06, "loss": 0.3974, "step": 19412 }, { "epoch": 0.56, "grad_norm": 5.680248542371434, "learning_rate": 4.338997946617926e-06, "loss": 0.6344, "step": 19413 }, { "epoch": 0.56, "grad_norm": 13.536875694018095, "learning_rate": 4.3385382613924454e-06, "loss": 0.6446, "step": 19414 }, { "epoch": 0.56, "grad_norm": 6.596543646820782, "learning_rate": 4.338078581857436e-06, "loss": 0.657, "step": 19415 }, { "epoch": 0.56, "grad_norm": 5.873909382335138, "learning_rate": 4.337618908016859e-06, "loss": 0.2762, "step": 19416 }, { "epoch": 0.56, "grad_norm": 7.294055140632835, "learning_rate": 4.337159239874663e-06, "loss": 0.5988, "step": 19417 }, { "epoch": 0.56, "grad_norm": 4.500115393112777, "learning_rate": 4.336699577434802e-06, "loss": 0.2129, "step": 19418 }, { "epoch": 0.56, "grad_norm": 2.9145198800571728, "learning_rate": 4.336239920701237e-06, "loss": 0.2242, "step": 19419 }, { "epoch": 0.56, "grad_norm": 6.269544065468308, "learning_rate": 4.335780269677916e-06, "loss": 0.2954, "step": 19420 }, { "epoch": 0.56, "grad_norm": 5.630813095638273, "learning_rate": 4.335320624368797e-06, "loss": 0.3028, "step": 19421 }, { "epoch": 0.56, "grad_norm": 5.0198908933052, "learning_rate": 4.334860984777831e-06, "loss": 0.4929, "step": 19422 }, { "epoch": 0.56, "grad_norm": 4.7189284284596, "learning_rate": 4.334401350908975e-06, "loss": 0.6825, "step": 19423 }, { "epoch": 0.56, "grad_norm": 11.719243845388583, "learning_rate": 4.3339417227661824e-06, "loss": 0.2545, "step": 19424 }, { "epoch": 0.56, "grad_norm": 5.161926243189709, "learning_rate": 4.333482100353408e-06, "loss": 0.5166, "step": 19425 }, { "epoch": 0.56, "grad_norm": 5.870053664291961, "learning_rate": 4.3330224836746035e-06, "loss": 0.3123, "step": 19426 }, { "epoch": 0.56, "grad_norm": 4.426851202170919, "learning_rate": 4.332562872733724e-06, "loss": 0.579, "step": 19427 }, { "epoch": 0.56, "grad_norm": 3.8149248643040687, "learning_rate": 4.3321032675347265e-06, "loss": 0.1936, "step": 19428 }, { "epoch": 0.56, "grad_norm": 5.767586557890466, "learning_rate": 4.331643668081559e-06, "loss": 0.574, "step": 19429 }, { "epoch": 0.56, "grad_norm": 4.2976347650387225, "learning_rate": 4.331184074378181e-06, "loss": 0.5153, "step": 19430 }, { "epoch": 0.56, "grad_norm": 6.5063218405034275, "learning_rate": 4.330724486428544e-06, "loss": 0.4172, "step": 19431 }, { "epoch": 0.56, "grad_norm": 6.260966255154323, "learning_rate": 4.330264904236602e-06, "loss": 0.4886, "step": 19432 }, { "epoch": 0.56, "grad_norm": 5.899788607026949, "learning_rate": 4.329805327806307e-06, "loss": 0.2762, "step": 19433 }, { "epoch": 0.56, "grad_norm": 6.0645120172512845, "learning_rate": 4.3293457571416155e-06, "loss": 0.4962, "step": 19434 }, { "epoch": 0.56, "grad_norm": 6.954547316561726, "learning_rate": 4.32888619224648e-06, "loss": 0.6045, "step": 19435 }, { "epoch": 0.56, "grad_norm": 4.2354481312036025, "learning_rate": 4.328426633124852e-06, "loss": 0.4948, "step": 19436 }, { "epoch": 0.56, "grad_norm": 3.9915379482903846, "learning_rate": 4.3279670797806895e-06, "loss": 0.5274, "step": 19437 }, { "epoch": 0.56, "grad_norm": 1.968262316836903, "learning_rate": 4.327507532217942e-06, "loss": 0.2593, "step": 19438 }, { "epoch": 0.56, "grad_norm": 5.020177231125937, "learning_rate": 4.327047990440566e-06, "loss": 0.2729, "step": 19439 }, { "epoch": 0.56, "grad_norm": 5.559835588782459, "learning_rate": 4.326588454452511e-06, "loss": 0.7581, "step": 19440 }, { "epoch": 0.56, "grad_norm": 4.1401839687180955, "learning_rate": 4.3261289242577355e-06, "loss": 0.3384, "step": 19441 }, { "epoch": 0.56, "grad_norm": 5.710860600195109, "learning_rate": 4.325669399860187e-06, "loss": 0.4789, "step": 19442 }, { "epoch": 0.56, "grad_norm": 2.991270796388291, "learning_rate": 4.325209881263823e-06, "loss": 0.2383, "step": 19443 }, { "epoch": 0.56, "grad_norm": 5.267486831728241, "learning_rate": 4.324750368472595e-06, "loss": 0.5443, "step": 19444 }, { "epoch": 0.56, "grad_norm": 10.886335340851526, "learning_rate": 4.324290861490456e-06, "loss": 0.7222, "step": 19445 }, { "epoch": 0.56, "grad_norm": 4.22381886331553, "learning_rate": 4.323831360321361e-06, "loss": 0.3464, "step": 19446 }, { "epoch": 0.56, "grad_norm": 6.477839179572257, "learning_rate": 4.32337186496926e-06, "loss": 0.2777, "step": 19447 }, { "epoch": 0.56, "grad_norm": 7.666247384036539, "learning_rate": 4.322912375438109e-06, "loss": 0.8047, "step": 19448 }, { "epoch": 0.56, "grad_norm": 4.708847872995647, "learning_rate": 4.322452891731858e-06, "loss": 0.1969, "step": 19449 }, { "epoch": 0.56, "grad_norm": 5.79556882799996, "learning_rate": 4.321993413854463e-06, "loss": 0.5398, "step": 19450 }, { "epoch": 0.56, "grad_norm": 6.485122600799456, "learning_rate": 4.321533941809873e-06, "loss": 0.5358, "step": 19451 }, { "epoch": 0.56, "grad_norm": 4.136914127047826, "learning_rate": 4.321074475602044e-06, "loss": 0.225, "step": 19452 }, { "epoch": 0.56, "grad_norm": 3.492206718511418, "learning_rate": 4.320615015234928e-06, "loss": 0.4707, "step": 19453 }, { "epoch": 0.56, "grad_norm": 5.338564448114184, "learning_rate": 4.320155560712477e-06, "loss": 0.7672, "step": 19454 }, { "epoch": 0.56, "grad_norm": 6.086391603425148, "learning_rate": 4.319696112038645e-06, "loss": 0.8466, "step": 19455 }, { "epoch": 0.56, "grad_norm": 4.232935164467162, "learning_rate": 4.319236669217382e-06, "loss": 0.5377, "step": 19456 }, { "epoch": 0.56, "grad_norm": 4.202048578656041, "learning_rate": 4.318777232252643e-06, "loss": 0.4308, "step": 19457 }, { "epoch": 0.56, "grad_norm": 8.052592969971787, "learning_rate": 4.31831780114838e-06, "loss": 0.2628, "step": 19458 }, { "epoch": 0.56, "grad_norm": 5.63496783073876, "learning_rate": 4.317858375908543e-06, "loss": 0.7652, "step": 19459 }, { "epoch": 0.56, "grad_norm": 7.475718292121119, "learning_rate": 4.317398956537089e-06, "loss": 0.7874, "step": 19460 }, { "epoch": 0.56, "grad_norm": 6.91576623992177, "learning_rate": 4.316939543037967e-06, "loss": 0.4186, "step": 19461 }, { "epoch": 0.56, "grad_norm": 9.276990304739318, "learning_rate": 4.31648013541513e-06, "loss": 0.5113, "step": 19462 }, { "epoch": 0.56, "grad_norm": 2.594913727165958, "learning_rate": 4.316020733672529e-06, "loss": 0.1039, "step": 19463 }, { "epoch": 0.56, "grad_norm": 7.771148927499803, "learning_rate": 4.3155613378141194e-06, "loss": 0.5534, "step": 19464 }, { "epoch": 0.56, "grad_norm": 8.520005301711176, "learning_rate": 4.31510194784385e-06, "loss": 0.9906, "step": 19465 }, { "epoch": 0.56, "grad_norm": 4.308551252056956, "learning_rate": 4.314642563765675e-06, "loss": 0.2371, "step": 19466 }, { "epoch": 0.56, "grad_norm": 8.541652611201437, "learning_rate": 4.3141831855835435e-06, "loss": 0.8217, "step": 19467 }, { "epoch": 0.56, "grad_norm": 8.316112851781295, "learning_rate": 4.313723813301411e-06, "loss": 0.5641, "step": 19468 }, { "epoch": 0.56, "grad_norm": 5.347316109739926, "learning_rate": 4.31326444692323e-06, "loss": 0.346, "step": 19469 }, { "epoch": 0.56, "grad_norm": 6.37476739272066, "learning_rate": 4.312805086452948e-06, "loss": 0.4725, "step": 19470 }, { "epoch": 0.56, "grad_norm": 5.136108826372624, "learning_rate": 4.312345731894521e-06, "loss": 0.3128, "step": 19471 }, { "epoch": 0.56, "grad_norm": 6.312811551101301, "learning_rate": 4.311886383251896e-06, "loss": 0.4591, "step": 19472 }, { "epoch": 0.56, "grad_norm": 8.04800258995151, "learning_rate": 4.311427040529031e-06, "loss": 0.5459, "step": 19473 }, { "epoch": 0.56, "grad_norm": 3.689095329839807, "learning_rate": 4.310967703729873e-06, "loss": 0.6139, "step": 19474 }, { "epoch": 0.56, "grad_norm": 4.559441599578443, "learning_rate": 4.310508372858376e-06, "loss": 0.6083, "step": 19475 }, { "epoch": 0.56, "grad_norm": 4.6147482636587585, "learning_rate": 4.310049047918489e-06, "loss": 0.3778, "step": 19476 }, { "epoch": 0.56, "grad_norm": 5.229012842942802, "learning_rate": 4.309589728914165e-06, "loss": 0.4808, "step": 19477 }, { "epoch": 0.56, "grad_norm": 7.238886240232225, "learning_rate": 4.309130415849357e-06, "loss": 0.6607, "step": 19478 }, { "epoch": 0.56, "grad_norm": 6.811770058819405, "learning_rate": 4.308671108728013e-06, "loss": 0.3441, "step": 19479 }, { "epoch": 0.56, "grad_norm": 8.37390835607877, "learning_rate": 4.308211807554088e-06, "loss": 0.3469, "step": 19480 }, { "epoch": 0.56, "grad_norm": 3.3059489410574545, "learning_rate": 4.307752512331531e-06, "loss": 0.3261, "step": 19481 }, { "epoch": 0.56, "grad_norm": 5.2764851219642015, "learning_rate": 4.307293223064296e-06, "loss": 0.7549, "step": 19482 }, { "epoch": 0.56, "grad_norm": 11.27980027962129, "learning_rate": 4.306833939756329e-06, "loss": 0.6133, "step": 19483 }, { "epoch": 0.56, "grad_norm": 2.017782239753504, "learning_rate": 4.306374662411585e-06, "loss": 0.0908, "step": 19484 }, { "epoch": 0.56, "grad_norm": 5.029044550519535, "learning_rate": 4.305915391034015e-06, "loss": 0.694, "step": 19485 }, { "epoch": 0.56, "grad_norm": 7.802058452028361, "learning_rate": 4.305456125627568e-06, "loss": 0.553, "step": 19486 }, { "epoch": 0.56, "grad_norm": 5.552518332535709, "learning_rate": 4.3049968661962e-06, "loss": 0.5072, "step": 19487 }, { "epoch": 0.56, "grad_norm": 3.837345835916097, "learning_rate": 4.3045376127438555e-06, "loss": 0.4035, "step": 19488 }, { "epoch": 0.56, "grad_norm": 5.342184852583821, "learning_rate": 4.30407836527449e-06, "loss": 0.3057, "step": 19489 }, { "epoch": 0.56, "grad_norm": 6.979406753130303, "learning_rate": 4.30361912379205e-06, "loss": 0.5685, "step": 19490 }, { "epoch": 0.56, "grad_norm": 2.647432299676815, "learning_rate": 4.303159888300491e-06, "loss": 0.2269, "step": 19491 }, { "epoch": 0.56, "grad_norm": 4.975453299569181, "learning_rate": 4.302700658803761e-06, "loss": 0.2288, "step": 19492 }, { "epoch": 0.56, "grad_norm": 4.300686564083152, "learning_rate": 4.302241435305811e-06, "loss": 0.5693, "step": 19493 }, { "epoch": 0.56, "grad_norm": 5.068903722913239, "learning_rate": 4.301782217810593e-06, "loss": 0.2545, "step": 19494 }, { "epoch": 0.56, "grad_norm": 5.8077317645578255, "learning_rate": 4.301323006322056e-06, "loss": 0.4822, "step": 19495 }, { "epoch": 0.56, "grad_norm": 5.212618486626619, "learning_rate": 4.300863800844153e-06, "loss": 0.5374, "step": 19496 }, { "epoch": 0.56, "grad_norm": 4.656534787884004, "learning_rate": 4.300404601380829e-06, "loss": 0.8939, "step": 19497 }, { "epoch": 0.56, "grad_norm": 3.6394234176525497, "learning_rate": 4.299945407936043e-06, "loss": 0.2464, "step": 19498 }, { "epoch": 0.56, "grad_norm": 6.019492315702385, "learning_rate": 4.299486220513735e-06, "loss": 0.4879, "step": 19499 }, { "epoch": 0.56, "grad_norm": 9.587172350202398, "learning_rate": 4.299027039117863e-06, "loss": 0.7898, "step": 19500 }, { "epoch": 0.56, "grad_norm": 10.291176963203181, "learning_rate": 4.298567863752377e-06, "loss": 0.5383, "step": 19501 }, { "epoch": 0.56, "grad_norm": 2.738387722062272, "learning_rate": 4.298108694421223e-06, "loss": 0.1527, "step": 19502 }, { "epoch": 0.56, "grad_norm": 5.886162643643092, "learning_rate": 4.297649531128355e-06, "loss": 0.5708, "step": 19503 }, { "epoch": 0.56, "grad_norm": 5.39557254804208, "learning_rate": 4.29719037387772e-06, "loss": 0.3645, "step": 19504 }, { "epoch": 0.56, "grad_norm": 3.9083719212322303, "learning_rate": 4.296731222673272e-06, "loss": 0.2699, "step": 19505 }, { "epoch": 0.56, "grad_norm": 8.883987067277161, "learning_rate": 4.296272077518955e-06, "loss": 0.5617, "step": 19506 }, { "epoch": 0.56, "grad_norm": 3.56160249614491, "learning_rate": 4.295812938418726e-06, "loss": 0.3084, "step": 19507 }, { "epoch": 0.56, "grad_norm": 3.936214327948864, "learning_rate": 4.29535380537653e-06, "loss": 0.378, "step": 19508 }, { "epoch": 0.56, "grad_norm": 3.930479603105823, "learning_rate": 4.294894678396318e-06, "loss": 0.2004, "step": 19509 }, { "epoch": 0.56, "grad_norm": 9.848814760342115, "learning_rate": 4.294435557482042e-06, "loss": 0.4701, "step": 19510 }, { "epoch": 0.56, "grad_norm": 4.661500057552296, "learning_rate": 4.2939764426376496e-06, "loss": 0.2214, "step": 19511 }, { "epoch": 0.56, "grad_norm": 5.762578267788552, "learning_rate": 4.293517333867091e-06, "loss": 0.2413, "step": 19512 }, { "epoch": 0.56, "grad_norm": 7.3497806217904245, "learning_rate": 4.293058231174314e-06, "loss": 0.547, "step": 19513 }, { "epoch": 0.56, "grad_norm": 5.822389045336241, "learning_rate": 4.2925991345632725e-06, "loss": 0.4694, "step": 19514 }, { "epoch": 0.56, "grad_norm": 5.565871727693861, "learning_rate": 4.292140044037912e-06, "loss": 0.6203, "step": 19515 }, { "epoch": 0.56, "grad_norm": 3.4484033711675677, "learning_rate": 4.2916809596021845e-06, "loss": 0.3049, "step": 19516 }, { "epoch": 0.56, "grad_norm": 5.142742938143606, "learning_rate": 4.291221881260037e-06, "loss": 0.696, "step": 19517 }, { "epoch": 0.56, "grad_norm": 2.7920366665958767, "learning_rate": 4.290762809015422e-06, "loss": 0.1464, "step": 19518 }, { "epoch": 0.56, "grad_norm": 8.064636124917946, "learning_rate": 4.290303742872287e-06, "loss": 0.6782, "step": 19519 }, { "epoch": 0.56, "grad_norm": 6.23749045636932, "learning_rate": 4.28984468283458e-06, "loss": 0.52, "step": 19520 }, { "epoch": 0.56, "grad_norm": 11.605480418782, "learning_rate": 4.289385628906253e-06, "loss": 0.9608, "step": 19521 }, { "epoch": 0.56, "grad_norm": 3.851118251722439, "learning_rate": 4.288926581091254e-06, "loss": 0.2436, "step": 19522 }, { "epoch": 0.56, "grad_norm": 10.141063945186849, "learning_rate": 4.288467539393533e-06, "loss": 0.3517, "step": 19523 }, { "epoch": 0.56, "grad_norm": 4.2547331987911425, "learning_rate": 4.288008503817035e-06, "loss": 0.6618, "step": 19524 }, { "epoch": 0.56, "grad_norm": 3.9586525068934586, "learning_rate": 4.287549474365714e-06, "loss": 0.488, "step": 19525 }, { "epoch": 0.56, "grad_norm": 3.8278516399329856, "learning_rate": 4.287090451043518e-06, "loss": 0.1765, "step": 19526 }, { "epoch": 0.56, "grad_norm": 5.058458716751656, "learning_rate": 4.286631433854393e-06, "loss": 0.4892, "step": 19527 }, { "epoch": 0.56, "grad_norm": 4.641822390737806, "learning_rate": 4.286172422802294e-06, "loss": 0.2586, "step": 19528 }, { "epoch": 0.56, "grad_norm": 4.585732580542306, "learning_rate": 4.285713417891162e-06, "loss": 0.3258, "step": 19529 }, { "epoch": 0.56, "grad_norm": 4.042920599756677, "learning_rate": 4.285254419124951e-06, "loss": 0.6316, "step": 19530 }, { "epoch": 0.56, "grad_norm": 9.665273111020088, "learning_rate": 4.2847954265076075e-06, "loss": 0.3648, "step": 19531 }, { "epoch": 0.56, "grad_norm": 6.142584451693383, "learning_rate": 4.2843364400430825e-06, "loss": 0.3605, "step": 19532 }, { "epoch": 0.56, "grad_norm": 7.795010024499228, "learning_rate": 4.2838774597353206e-06, "loss": 0.7574, "step": 19533 }, { "epoch": 0.56, "grad_norm": 6.297281824794718, "learning_rate": 4.283418485588273e-06, "loss": 0.3328, "step": 19534 }, { "epoch": 0.56, "grad_norm": 2.739467347642724, "learning_rate": 4.28295951760589e-06, "loss": 0.2539, "step": 19535 }, { "epoch": 0.56, "grad_norm": 4.28131089550064, "learning_rate": 4.282500555792116e-06, "loss": 0.5049, "step": 19536 }, { "epoch": 0.56, "grad_norm": 4.965402975334277, "learning_rate": 4.282041600150902e-06, "loss": 0.4016, "step": 19537 }, { "epoch": 0.56, "grad_norm": 6.9243035802903306, "learning_rate": 4.281582650686196e-06, "loss": 0.7165, "step": 19538 }, { "epoch": 0.56, "grad_norm": 8.924449756233606, "learning_rate": 4.281123707401948e-06, "loss": 0.7174, "step": 19539 }, { "epoch": 0.56, "grad_norm": 6.0214792672877095, "learning_rate": 4.2806647703021e-06, "loss": 0.8128, "step": 19540 }, { "epoch": 0.56, "grad_norm": 3.336079489480176, "learning_rate": 4.280205839390607e-06, "loss": 0.2151, "step": 19541 }, { "epoch": 0.56, "grad_norm": 3.8491462727020402, "learning_rate": 4.279746914671415e-06, "loss": 0.1181, "step": 19542 }, { "epoch": 0.56, "grad_norm": 7.817169258970197, "learning_rate": 4.279287996148469e-06, "loss": 0.9152, "step": 19543 }, { "epoch": 0.56, "grad_norm": 5.0891635115252285, "learning_rate": 4.278829083825722e-06, "loss": 0.4601, "step": 19544 }, { "epoch": 0.56, "grad_norm": 8.27141813681787, "learning_rate": 4.2783701777071185e-06, "loss": 0.5055, "step": 19545 }, { "epoch": 0.56, "grad_norm": 5.4181093886644796, "learning_rate": 4.277911277796609e-06, "loss": 0.2773, "step": 19546 }, { "epoch": 0.56, "grad_norm": 6.009431538982902, "learning_rate": 4.2774523840981374e-06, "loss": 0.4234, "step": 19547 }, { "epoch": 0.56, "grad_norm": 11.045262241937222, "learning_rate": 4.276993496615657e-06, "loss": 0.6048, "step": 19548 }, { "epoch": 0.56, "grad_norm": 8.178655929152699, "learning_rate": 4.27653461535311e-06, "loss": 0.8172, "step": 19549 }, { "epoch": 0.56, "grad_norm": 4.64855059317849, "learning_rate": 4.276075740314447e-06, "loss": 0.5685, "step": 19550 }, { "epoch": 0.56, "grad_norm": 6.5960148180511196, "learning_rate": 4.275616871503616e-06, "loss": 0.7233, "step": 19551 }, { "epoch": 0.56, "grad_norm": 8.582209269674467, "learning_rate": 4.275158008924564e-06, "loss": 0.4647, "step": 19552 }, { "epoch": 0.56, "grad_norm": 4.188633409526458, "learning_rate": 4.2746991525812385e-06, "loss": 0.2424, "step": 19553 }, { "epoch": 0.56, "grad_norm": 7.334548379923536, "learning_rate": 4.2742403024775855e-06, "loss": 1.0354, "step": 19554 }, { "epoch": 0.56, "grad_norm": 1.923628061803629, "learning_rate": 4.273781458617556e-06, "loss": 0.1598, "step": 19555 }, { "epoch": 0.56, "grad_norm": 4.603804917889644, "learning_rate": 4.273322621005093e-06, "loss": 0.4445, "step": 19556 }, { "epoch": 0.56, "grad_norm": 4.6615893581737, "learning_rate": 4.272863789644148e-06, "loss": 0.3223, "step": 19557 }, { "epoch": 0.56, "grad_norm": 2.9228013146156955, "learning_rate": 4.272404964538665e-06, "loss": 0.1767, "step": 19558 }, { "epoch": 0.56, "grad_norm": 5.235691349136914, "learning_rate": 4.271946145692592e-06, "loss": 0.3009, "step": 19559 }, { "epoch": 0.56, "grad_norm": 3.7067007836022743, "learning_rate": 4.271487333109878e-06, "loss": 0.4185, "step": 19560 }, { "epoch": 0.56, "grad_norm": 5.9574524377228135, "learning_rate": 4.271028526794468e-06, "loss": 0.2871, "step": 19561 }, { "epoch": 0.56, "grad_norm": 5.982847890403006, "learning_rate": 4.270569726750311e-06, "loss": 0.5881, "step": 19562 }, { "epoch": 0.56, "grad_norm": 3.1009828616815485, "learning_rate": 4.270110932981351e-06, "loss": 0.3491, "step": 19563 }, { "epoch": 0.56, "grad_norm": 4.33799032468583, "learning_rate": 4.269652145491539e-06, "loss": 0.4711, "step": 19564 }, { "epoch": 0.56, "grad_norm": 6.505824340616041, "learning_rate": 4.269193364284818e-06, "loss": 0.457, "step": 19565 }, { "epoch": 0.56, "grad_norm": 5.843564280450406, "learning_rate": 4.268734589365136e-06, "loss": 0.4723, "step": 19566 }, { "epoch": 0.56, "grad_norm": 5.011053574885639, "learning_rate": 4.268275820736442e-06, "loss": 0.3348, "step": 19567 }, { "epoch": 0.56, "grad_norm": 5.835781192345971, "learning_rate": 4.267817058402679e-06, "loss": 0.4108, "step": 19568 }, { "epoch": 0.56, "grad_norm": 4.537564186293756, "learning_rate": 4.2673583023677975e-06, "loss": 0.2763, "step": 19569 }, { "epoch": 0.56, "grad_norm": 4.276030818849558, "learning_rate": 4.266899552635741e-06, "loss": 0.1939, "step": 19570 }, { "epoch": 0.56, "grad_norm": 11.482018406737444, "learning_rate": 4.266440809210459e-06, "loss": 0.4929, "step": 19571 }, { "epoch": 0.56, "grad_norm": 4.651726900423963, "learning_rate": 4.265982072095896e-06, "loss": 0.2946, "step": 19572 }, { "epoch": 0.56, "grad_norm": 5.483556789649179, "learning_rate": 4.265523341295998e-06, "loss": 0.7348, "step": 19573 }, { "epoch": 0.56, "grad_norm": 8.996648800316175, "learning_rate": 4.265064616814712e-06, "loss": 0.5165, "step": 19574 }, { "epoch": 0.56, "grad_norm": 9.311324640641391, "learning_rate": 4.2646058986559845e-06, "loss": 0.9151, "step": 19575 }, { "epoch": 0.56, "grad_norm": 8.775756855464405, "learning_rate": 4.2641471868237635e-06, "loss": 0.7221, "step": 19576 }, { "epoch": 0.56, "grad_norm": 3.2313024328467557, "learning_rate": 4.263688481321992e-06, "loss": 0.3399, "step": 19577 }, { "epoch": 0.56, "grad_norm": 9.479327748924666, "learning_rate": 4.26322978215462e-06, "loss": 0.5375, "step": 19578 }, { "epoch": 0.56, "grad_norm": 5.110293244609201, "learning_rate": 4.262771089325589e-06, "loss": 0.2109, "step": 19579 }, { "epoch": 0.56, "grad_norm": 4.0788480621904535, "learning_rate": 4.2623124028388495e-06, "loss": 0.4204, "step": 19580 }, { "epoch": 0.56, "grad_norm": 6.031748854804594, "learning_rate": 4.261853722698343e-06, "loss": 0.5398, "step": 19581 }, { "epoch": 0.56, "grad_norm": 8.978347056451364, "learning_rate": 4.261395048908022e-06, "loss": 0.5528, "step": 19582 }, { "epoch": 0.56, "grad_norm": 5.4955386143756755, "learning_rate": 4.260936381471825e-06, "loss": 0.6076, "step": 19583 }, { "epoch": 0.56, "grad_norm": 6.616498782945127, "learning_rate": 4.260477720393702e-06, "loss": 0.712, "step": 19584 }, { "epoch": 0.56, "grad_norm": 5.414219621412371, "learning_rate": 4.2600190656776e-06, "loss": 0.3372, "step": 19585 }, { "epoch": 0.56, "grad_norm": 7.10160926660765, "learning_rate": 4.259560417327461e-06, "loss": 0.7083, "step": 19586 }, { "epoch": 0.56, "grad_norm": 3.7587484831786324, "learning_rate": 4.259101775347233e-06, "loss": 0.2225, "step": 19587 }, { "epoch": 0.56, "grad_norm": 8.706553908587184, "learning_rate": 4.258643139740862e-06, "loss": 0.7155, "step": 19588 }, { "epoch": 0.56, "grad_norm": 7.375144439834576, "learning_rate": 4.258184510512292e-06, "loss": 0.2714, "step": 19589 }, { "epoch": 0.56, "grad_norm": 8.650697159649125, "learning_rate": 4.257725887665468e-06, "loss": 0.9083, "step": 19590 }, { "epoch": 0.56, "grad_norm": 7.206453361189336, "learning_rate": 4.2572672712043385e-06, "loss": 0.7441, "step": 19591 }, { "epoch": 0.56, "grad_norm": 2.864076033090438, "learning_rate": 4.256808661132848e-06, "loss": 0.3453, "step": 19592 }, { "epoch": 0.56, "grad_norm": 6.453941681472202, "learning_rate": 4.256350057454939e-06, "loss": 0.5047, "step": 19593 }, { "epoch": 0.56, "grad_norm": 8.155916521409425, "learning_rate": 4.25589146017456e-06, "loss": 0.6189, "step": 19594 }, { "epoch": 0.56, "grad_norm": 5.9259091931124495, "learning_rate": 4.255432869295656e-06, "loss": 0.4741, "step": 19595 }, { "epoch": 0.56, "grad_norm": 6.210819036925487, "learning_rate": 4.254974284822171e-06, "loss": 0.5855, "step": 19596 }, { "epoch": 0.56, "grad_norm": 5.717287137064361, "learning_rate": 4.2545157067580484e-06, "loss": 0.3894, "step": 19597 }, { "epoch": 0.56, "grad_norm": 6.178564304754055, "learning_rate": 4.254057135107238e-06, "loss": 0.8894, "step": 19598 }, { "epoch": 0.56, "grad_norm": 4.3076209615493255, "learning_rate": 4.25359856987368e-06, "loss": 0.1236, "step": 19599 }, { "epoch": 0.56, "grad_norm": 4.59985838133044, "learning_rate": 4.253140011061322e-06, "loss": 0.4012, "step": 19600 }, { "epoch": 0.56, "grad_norm": 7.86923091227833, "learning_rate": 4.25268145867411e-06, "loss": 0.3556, "step": 19601 }, { "epoch": 0.56, "grad_norm": 6.60825444816209, "learning_rate": 4.252222912715987e-06, "loss": 0.572, "step": 19602 }, { "epoch": 0.56, "grad_norm": 4.517451408644223, "learning_rate": 4.251764373190898e-06, "loss": 0.5436, "step": 19603 }, { "epoch": 0.56, "grad_norm": 5.185915509643089, "learning_rate": 4.251305840102787e-06, "loss": 0.5742, "step": 19604 }, { "epoch": 0.56, "grad_norm": 8.614466468421186, "learning_rate": 4.250847313455601e-06, "loss": 0.6302, "step": 19605 }, { "epoch": 0.56, "grad_norm": 5.97696027242354, "learning_rate": 4.250388793253283e-06, "loss": 0.3518, "step": 19606 }, { "epoch": 0.56, "grad_norm": 4.181728085953351, "learning_rate": 4.2499302794997765e-06, "loss": 0.488, "step": 19607 }, { "epoch": 0.56, "grad_norm": 6.482350375754382, "learning_rate": 4.24947177219903e-06, "loss": 0.4338, "step": 19608 }, { "epoch": 0.56, "grad_norm": 2.7190140124637203, "learning_rate": 4.249013271354984e-06, "loss": 0.1476, "step": 19609 }, { "epoch": 0.56, "grad_norm": 7.66599795977264, "learning_rate": 4.248554776971585e-06, "loss": 0.4745, "step": 19610 }, { "epoch": 0.56, "grad_norm": 2.4550309825367846, "learning_rate": 4.248096289052774e-06, "loss": 0.2985, "step": 19611 }, { "epoch": 0.56, "grad_norm": 8.748762642700985, "learning_rate": 4.247637807602502e-06, "loss": 0.2522, "step": 19612 }, { "epoch": 0.56, "grad_norm": 6.132126963015822, "learning_rate": 4.247179332624708e-06, "loss": 0.6357, "step": 19613 }, { "epoch": 0.56, "grad_norm": 3.4640541252065966, "learning_rate": 4.246720864123337e-06, "loss": 0.2982, "step": 19614 }, { "epoch": 0.56, "grad_norm": 3.6081847172732484, "learning_rate": 4.2462624021023335e-06, "loss": 0.56, "step": 19615 }, { "epoch": 0.56, "grad_norm": 2.8705788494497417, "learning_rate": 4.245803946565641e-06, "loss": 0.2092, "step": 19616 }, { "epoch": 0.56, "grad_norm": 4.190055992303153, "learning_rate": 4.245345497517206e-06, "loss": 0.3297, "step": 19617 }, { "epoch": 0.56, "grad_norm": 7.705945811954623, "learning_rate": 4.24488705496097e-06, "loss": 0.5377, "step": 19618 }, { "epoch": 0.56, "grad_norm": 8.627372525958302, "learning_rate": 4.244428618900879e-06, "loss": 0.5668, "step": 19619 }, { "epoch": 0.56, "grad_norm": 3.9461983167848405, "learning_rate": 4.243970189340873e-06, "loss": 0.4113, "step": 19620 }, { "epoch": 0.56, "grad_norm": 4.605098330911036, "learning_rate": 4.2435117662849e-06, "loss": 0.2367, "step": 19621 }, { "epoch": 0.56, "grad_norm": 4.350868758782117, "learning_rate": 4.243053349736902e-06, "loss": 0.5157, "step": 19622 }, { "epoch": 0.56, "grad_norm": 4.4646290004078, "learning_rate": 4.242594939700823e-06, "loss": 0.4029, "step": 19623 }, { "epoch": 0.56, "grad_norm": 5.927506016664255, "learning_rate": 4.242136536180605e-06, "loss": 0.5586, "step": 19624 }, { "epoch": 0.56, "grad_norm": 4.981735965854672, "learning_rate": 4.241678139180194e-06, "loss": 0.2154, "step": 19625 }, { "epoch": 0.56, "grad_norm": 7.865207032657496, "learning_rate": 4.241219748703535e-06, "loss": 0.7245, "step": 19626 }, { "epoch": 0.56, "grad_norm": 8.111473738026756, "learning_rate": 4.240761364754565e-06, "loss": 0.5415, "step": 19627 }, { "epoch": 0.56, "grad_norm": 1.4440791048087114, "learning_rate": 4.240302987337234e-06, "loss": 0.0685, "step": 19628 }, { "epoch": 0.56, "grad_norm": 6.784425528121582, "learning_rate": 4.2398446164554815e-06, "loss": 0.178, "step": 19629 }, { "epoch": 0.56, "grad_norm": 7.933092650868461, "learning_rate": 4.239386252113254e-06, "loss": 1.1553, "step": 19630 }, { "epoch": 0.56, "grad_norm": 6.361322727968236, "learning_rate": 4.2389278943144906e-06, "loss": 0.7929, "step": 19631 }, { "epoch": 0.56, "grad_norm": 3.5106519578737743, "learning_rate": 4.238469543063138e-06, "loss": 0.2407, "step": 19632 }, { "epoch": 0.56, "grad_norm": 4.138818123480592, "learning_rate": 4.238011198363139e-06, "loss": 0.2018, "step": 19633 }, { "epoch": 0.56, "grad_norm": 10.193273488033933, "learning_rate": 4.237552860218434e-06, "loss": 0.5206, "step": 19634 }, { "epoch": 0.56, "grad_norm": 4.628843514705414, "learning_rate": 4.23709452863297e-06, "loss": 0.3089, "step": 19635 }, { "epoch": 0.56, "grad_norm": 3.9050673869005297, "learning_rate": 4.2366362036106866e-06, "loss": 0.378, "step": 19636 }, { "epoch": 0.56, "grad_norm": 6.378952969942388, "learning_rate": 4.236177885155529e-06, "loss": 0.5256, "step": 19637 }, { "epoch": 0.56, "grad_norm": 5.293182931768354, "learning_rate": 4.2357195732714365e-06, "loss": 0.3328, "step": 19638 }, { "epoch": 0.56, "grad_norm": 7.296716529056594, "learning_rate": 4.2352612679623575e-06, "loss": 0.8502, "step": 19639 }, { "epoch": 0.56, "grad_norm": 4.352607478305272, "learning_rate": 4.23480296923223e-06, "loss": 0.3172, "step": 19640 }, { "epoch": 0.56, "grad_norm": 4.069567797110965, "learning_rate": 4.234344677084997e-06, "loss": 0.2726, "step": 19641 }, { "epoch": 0.56, "grad_norm": 9.380755730302022, "learning_rate": 4.233886391524606e-06, "loss": 0.8833, "step": 19642 }, { "epoch": 0.56, "grad_norm": 5.600357500654028, "learning_rate": 4.233428112554992e-06, "loss": 0.6257, "step": 19643 }, { "epoch": 0.56, "grad_norm": 5.105647859591068, "learning_rate": 4.232969840180105e-06, "loss": 0.3297, "step": 19644 }, { "epoch": 0.56, "grad_norm": 4.20538788189239, "learning_rate": 4.232511574403883e-06, "loss": 0.3258, "step": 19645 }, { "epoch": 0.56, "grad_norm": 4.549901368046768, "learning_rate": 4.23205331523027e-06, "loss": 0.4278, "step": 19646 }, { "epoch": 0.56, "grad_norm": 4.870347566206073, "learning_rate": 4.231595062663205e-06, "loss": 0.5289, "step": 19647 }, { "epoch": 0.56, "grad_norm": 6.551222857408318, "learning_rate": 4.231136816706635e-06, "loss": 0.5813, "step": 19648 }, { "epoch": 0.56, "grad_norm": 5.0504375320157555, "learning_rate": 4.230678577364502e-06, "loss": 0.3272, "step": 19649 }, { "epoch": 0.56, "grad_norm": 7.141286030562771, "learning_rate": 4.230220344640743e-06, "loss": 0.3042, "step": 19650 }, { "epoch": 0.56, "grad_norm": 12.414160524609054, "learning_rate": 4.229762118539306e-06, "loss": 0.8891, "step": 19651 }, { "epoch": 0.56, "grad_norm": 6.196398107171897, "learning_rate": 4.22930389906413e-06, "loss": 0.7094, "step": 19652 }, { "epoch": 0.56, "grad_norm": 11.29392491993133, "learning_rate": 4.228845686219159e-06, "loss": 0.5947, "step": 19653 }, { "epoch": 0.56, "grad_norm": 4.32339276675625, "learning_rate": 4.22838748000833e-06, "loss": 0.4187, "step": 19654 }, { "epoch": 0.56, "grad_norm": 7.199093797399341, "learning_rate": 4.227929280435592e-06, "loss": 0.7756, "step": 19655 }, { "epoch": 0.56, "grad_norm": 5.278681903772957, "learning_rate": 4.227471087504882e-06, "loss": 0.4354, "step": 19656 }, { "epoch": 0.56, "grad_norm": 5.199578562953222, "learning_rate": 4.227012901220142e-06, "loss": 0.2193, "step": 19657 }, { "epoch": 0.56, "grad_norm": 8.369601174928354, "learning_rate": 4.226554721585317e-06, "loss": 0.615, "step": 19658 }, { "epoch": 0.56, "grad_norm": 8.452528391319055, "learning_rate": 4.226096548604345e-06, "loss": 0.6419, "step": 19659 }, { "epoch": 0.56, "grad_norm": 3.6178286748338198, "learning_rate": 4.225638382281171e-06, "loss": 0.4483, "step": 19660 }, { "epoch": 0.56, "grad_norm": 4.871220615328681, "learning_rate": 4.225180222619731e-06, "loss": 0.2253, "step": 19661 }, { "epoch": 0.56, "grad_norm": 4.151393031522413, "learning_rate": 4.2247220696239735e-06, "loss": 0.1691, "step": 19662 }, { "epoch": 0.56, "grad_norm": 6.695212649443231, "learning_rate": 4.2242639232978355e-06, "loss": 0.4731, "step": 19663 }, { "epoch": 0.56, "grad_norm": 6.0876439925441685, "learning_rate": 4.223805783645261e-06, "loss": 0.4119, "step": 19664 }, { "epoch": 0.56, "grad_norm": 5.495455728572779, "learning_rate": 4.223347650670187e-06, "loss": 0.4803, "step": 19665 }, { "epoch": 0.56, "grad_norm": 7.597844468165886, "learning_rate": 4.222889524376558e-06, "loss": 0.6814, "step": 19666 }, { "epoch": 0.56, "grad_norm": 3.1003696444304856, "learning_rate": 4.222431404768317e-06, "loss": 0.3571, "step": 19667 }, { "epoch": 0.56, "grad_norm": 8.025199659484178, "learning_rate": 4.2219732918494e-06, "loss": 0.7166, "step": 19668 }, { "epoch": 0.56, "grad_norm": 4.792569020211584, "learning_rate": 4.221515185623754e-06, "loss": 0.5395, "step": 19669 }, { "epoch": 0.56, "grad_norm": 7.534973363797622, "learning_rate": 4.221057086095315e-06, "loss": 0.49, "step": 19670 }, { "epoch": 0.56, "grad_norm": 8.461997855993804, "learning_rate": 4.2205989932680266e-06, "loss": 0.5904, "step": 19671 }, { "epoch": 0.56, "grad_norm": 3.899956691941641, "learning_rate": 4.2201409071458285e-06, "loss": 0.5627, "step": 19672 }, { "epoch": 0.56, "grad_norm": 2.7035691883087236, "learning_rate": 4.2196828277326616e-06, "loss": 0.1532, "step": 19673 }, { "epoch": 0.56, "grad_norm": 8.863781595040647, "learning_rate": 4.21922475503247e-06, "loss": 0.4521, "step": 19674 }, { "epoch": 0.56, "grad_norm": 2.8660802756165746, "learning_rate": 4.218766689049189e-06, "loss": 0.2117, "step": 19675 }, { "epoch": 0.56, "grad_norm": 4.095485261244409, "learning_rate": 4.218308629786764e-06, "loss": 0.5496, "step": 19676 }, { "epoch": 0.56, "grad_norm": 4.624321862070264, "learning_rate": 4.217850577249131e-06, "loss": 0.4262, "step": 19677 }, { "epoch": 0.56, "grad_norm": 6.941476730306578, "learning_rate": 4.217392531440235e-06, "loss": 0.551, "step": 19678 }, { "epoch": 0.56, "grad_norm": 5.12917181345818, "learning_rate": 4.2169344923640145e-06, "loss": 0.1804, "step": 19679 }, { "epoch": 0.56, "grad_norm": 3.1932639384809574, "learning_rate": 4.2164764600244114e-06, "loss": 0.1997, "step": 19680 }, { "epoch": 0.56, "grad_norm": 5.290310688450005, "learning_rate": 4.216018434425362e-06, "loss": 0.6345, "step": 19681 }, { "epoch": 0.56, "grad_norm": 10.94788734923314, "learning_rate": 4.2155604155708104e-06, "loss": 0.511, "step": 19682 }, { "epoch": 0.56, "grad_norm": 4.443724953839719, "learning_rate": 4.2151024034646975e-06, "loss": 0.3865, "step": 19683 }, { "epoch": 0.56, "grad_norm": 2.2937688909890896, "learning_rate": 4.214644398110959e-06, "loss": 0.2309, "step": 19684 }, { "epoch": 0.56, "grad_norm": 7.436083105705318, "learning_rate": 4.214186399513541e-06, "loss": 0.4095, "step": 19685 }, { "epoch": 0.56, "grad_norm": 2.2800795282748756, "learning_rate": 4.213728407676381e-06, "loss": 0.1654, "step": 19686 }, { "epoch": 0.56, "grad_norm": 4.639930698847365, "learning_rate": 4.213270422603417e-06, "loss": 0.8112, "step": 19687 }, { "epoch": 0.56, "grad_norm": 7.049513396927009, "learning_rate": 4.2128124442985905e-06, "loss": 0.4193, "step": 19688 }, { "epoch": 0.56, "grad_norm": 4.077001263366347, "learning_rate": 4.212354472765842e-06, "loss": 0.2808, "step": 19689 }, { "epoch": 0.56, "grad_norm": 2.799412519267828, "learning_rate": 4.211896508009112e-06, "loss": 0.4202, "step": 19690 }, { "epoch": 0.56, "grad_norm": 8.10271811267343, "learning_rate": 4.2114385500323375e-06, "loss": 0.7692, "step": 19691 }, { "epoch": 0.56, "grad_norm": 3.0867542200119127, "learning_rate": 4.210980598839462e-06, "loss": 0.4208, "step": 19692 }, { "epoch": 0.56, "grad_norm": 6.305953682174952, "learning_rate": 4.210522654434423e-06, "loss": 0.494, "step": 19693 }, { "epoch": 0.56, "grad_norm": 5.2086774280052355, "learning_rate": 4.21006471682116e-06, "loss": 0.5999, "step": 19694 }, { "epoch": 0.56, "grad_norm": 10.269723199013233, "learning_rate": 4.209606786003612e-06, "loss": 0.6809, "step": 19695 }, { "epoch": 0.56, "grad_norm": 3.069997824736455, "learning_rate": 4.20914886198572e-06, "loss": 0.1817, "step": 19696 }, { "epoch": 0.56, "grad_norm": 4.804586766125168, "learning_rate": 4.208690944771424e-06, "loss": 0.2279, "step": 19697 }, { "epoch": 0.56, "grad_norm": 4.394214235527347, "learning_rate": 4.2082330343646595e-06, "loss": 0.5578, "step": 19698 }, { "epoch": 0.56, "grad_norm": 7.858132751456167, "learning_rate": 4.207775130769372e-06, "loss": 0.7123, "step": 19699 }, { "epoch": 0.56, "grad_norm": 6.4826900269254715, "learning_rate": 4.207317233989496e-06, "loss": 0.4829, "step": 19700 }, { "epoch": 0.56, "grad_norm": 6.168750947683584, "learning_rate": 4.206859344028974e-06, "loss": 0.5398, "step": 19701 }, { "epoch": 0.56, "grad_norm": 6.136938332769829, "learning_rate": 4.206401460891742e-06, "loss": 0.8367, "step": 19702 }, { "epoch": 0.56, "grad_norm": 3.5069209199794136, "learning_rate": 4.205943584581741e-06, "loss": 0.272, "step": 19703 }, { "epoch": 0.56, "grad_norm": 5.348882541534306, "learning_rate": 4.2054857151029075e-06, "loss": 0.4841, "step": 19704 }, { "epoch": 0.56, "grad_norm": 5.548878224733129, "learning_rate": 4.205027852459184e-06, "loss": 0.6395, "step": 19705 }, { "epoch": 0.56, "grad_norm": 7.715460690324359, "learning_rate": 4.204569996654509e-06, "loss": 0.5741, "step": 19706 }, { "epoch": 0.56, "grad_norm": 9.360123746086547, "learning_rate": 4.204112147692818e-06, "loss": 0.6839, "step": 19707 }, { "epoch": 0.56, "grad_norm": 4.981291148782778, "learning_rate": 4.203654305578055e-06, "loss": 0.655, "step": 19708 }, { "epoch": 0.56, "grad_norm": 4.257612731166887, "learning_rate": 4.203196470314154e-06, "loss": 0.4868, "step": 19709 }, { "epoch": 0.56, "grad_norm": 5.745662338482416, "learning_rate": 4.2027386419050576e-06, "loss": 0.6615, "step": 19710 }, { "epoch": 0.56, "grad_norm": 4.72057979045418, "learning_rate": 4.2022808203547e-06, "loss": 0.3428, "step": 19711 }, { "epoch": 0.56, "grad_norm": 4.445460741088771, "learning_rate": 4.201823005667024e-06, "loss": 0.509, "step": 19712 }, { "epoch": 0.56, "grad_norm": 7.079388368879702, "learning_rate": 4.201365197845966e-06, "loss": 0.6121, "step": 19713 }, { "epoch": 0.56, "grad_norm": 6.6716375018335325, "learning_rate": 4.200907396895463e-06, "loss": 0.7169, "step": 19714 }, { "epoch": 0.56, "grad_norm": 4.662651219101769, "learning_rate": 4.200449602819458e-06, "loss": 0.4749, "step": 19715 }, { "epoch": 0.56, "grad_norm": 6.355792636976956, "learning_rate": 4.199991815621886e-06, "loss": 0.5881, "step": 19716 }, { "epoch": 0.56, "grad_norm": 6.1015383906749, "learning_rate": 4.199534035306686e-06, "loss": 0.5987, "step": 19717 }, { "epoch": 0.56, "grad_norm": 3.8961086158935436, "learning_rate": 4.199076261877795e-06, "loss": 0.5316, "step": 19718 }, { "epoch": 0.56, "grad_norm": 4.280907331800768, "learning_rate": 4.198618495339154e-06, "loss": 0.4764, "step": 19719 }, { "epoch": 0.56, "grad_norm": 4.158655832713371, "learning_rate": 4.198160735694699e-06, "loss": 0.4083, "step": 19720 }, { "epoch": 0.56, "grad_norm": 14.536334192266894, "learning_rate": 4.197702982948368e-06, "loss": 0.8496, "step": 19721 }, { "epoch": 0.56, "grad_norm": 4.011306696960946, "learning_rate": 4.1972452371040986e-06, "loss": 0.203, "step": 19722 }, { "epoch": 0.56, "grad_norm": 7.338164230253512, "learning_rate": 4.19678749816583e-06, "loss": 0.6413, "step": 19723 }, { "epoch": 0.56, "grad_norm": 5.243534512008039, "learning_rate": 4.196329766137501e-06, "loss": 0.5469, "step": 19724 }, { "epoch": 0.56, "grad_norm": 3.5693597503160004, "learning_rate": 4.195872041023046e-06, "loss": 0.2546, "step": 19725 }, { "epoch": 0.56, "grad_norm": 8.919382632406455, "learning_rate": 4.195414322826407e-06, "loss": 0.5144, "step": 19726 }, { "epoch": 0.56, "grad_norm": 6.187679018455709, "learning_rate": 4.1949566115515175e-06, "loss": 0.6035, "step": 19727 }, { "epoch": 0.56, "grad_norm": 8.313832398520901, "learning_rate": 4.194498907202319e-06, "loss": 1.0327, "step": 19728 }, { "epoch": 0.56, "grad_norm": 4.330510360074663, "learning_rate": 4.1940412097827456e-06, "loss": 0.2988, "step": 19729 }, { "epoch": 0.57, "grad_norm": 2.8723389293375168, "learning_rate": 4.1935835192967385e-06, "loss": 0.2546, "step": 19730 }, { "epoch": 0.57, "grad_norm": 4.405123735783533, "learning_rate": 4.193125835748231e-06, "loss": 0.2657, "step": 19731 }, { "epoch": 0.57, "grad_norm": 4.5907479681966015, "learning_rate": 4.192668159141163e-06, "loss": 0.3734, "step": 19732 }, { "epoch": 0.57, "grad_norm": 4.747907441138488, "learning_rate": 4.192210489479472e-06, "loss": 0.3983, "step": 19733 }, { "epoch": 0.57, "grad_norm": 4.800593124937405, "learning_rate": 4.191752826767093e-06, "loss": 0.5459, "step": 19734 }, { "epoch": 0.57, "grad_norm": 6.743467030497009, "learning_rate": 4.191295171007968e-06, "loss": 0.4547, "step": 19735 }, { "epoch": 0.57, "grad_norm": 5.766657956434665, "learning_rate": 4.190837522206029e-06, "loss": 0.5745, "step": 19736 }, { "epoch": 0.57, "grad_norm": 4.314327350404717, "learning_rate": 4.190379880365217e-06, "loss": 0.5562, "step": 19737 }, { "epoch": 0.57, "grad_norm": 4.002852153190043, "learning_rate": 4.189922245489464e-06, "loss": 0.1477, "step": 19738 }, { "epoch": 0.57, "grad_norm": 8.480068174484003, "learning_rate": 4.189464617582712e-06, "loss": 0.6245, "step": 19739 }, { "epoch": 0.57, "grad_norm": 8.92336024332911, "learning_rate": 4.189006996648898e-06, "loss": 0.7517, "step": 19740 }, { "epoch": 0.57, "grad_norm": 6.3204373982729285, "learning_rate": 4.1885493826919535e-06, "loss": 0.3846, "step": 19741 }, { "epoch": 0.57, "grad_norm": 5.257955064583725, "learning_rate": 4.188091775715821e-06, "loss": 0.3715, "step": 19742 }, { "epoch": 0.57, "grad_norm": 5.368945748576402, "learning_rate": 4.187634175724435e-06, "loss": 0.5371, "step": 19743 }, { "epoch": 0.57, "grad_norm": 6.051596284353918, "learning_rate": 4.187176582721732e-06, "loss": 0.5292, "step": 19744 }, { "epoch": 0.57, "grad_norm": 3.6099832856169116, "learning_rate": 4.1867189967116465e-06, "loss": 0.5898, "step": 19745 }, { "epoch": 0.57, "grad_norm": 4.014009113483889, "learning_rate": 4.18626141769812e-06, "loss": 0.3402, "step": 19746 }, { "epoch": 0.57, "grad_norm": 7.779662188572878, "learning_rate": 4.185803845685085e-06, "loss": 0.661, "step": 19747 }, { "epoch": 0.57, "grad_norm": 17.166325291075648, "learning_rate": 4.185346280676479e-06, "loss": 0.25, "step": 19748 }, { "epoch": 0.57, "grad_norm": 11.062844955324746, "learning_rate": 4.1848887226762405e-06, "loss": 0.7273, "step": 19749 }, { "epoch": 0.57, "grad_norm": 5.256084798786363, "learning_rate": 4.184431171688302e-06, "loss": 0.6109, "step": 19750 }, { "epoch": 0.57, "grad_norm": 5.050065298885774, "learning_rate": 4.183973627716604e-06, "loss": 0.6579, "step": 19751 }, { "epoch": 0.57, "grad_norm": 5.674042531885071, "learning_rate": 4.183516090765077e-06, "loss": 0.426, "step": 19752 }, { "epoch": 0.57, "grad_norm": 9.896697483813508, "learning_rate": 4.183058560837664e-06, "loss": 0.7269, "step": 19753 }, { "epoch": 0.57, "grad_norm": 4.501459679692165, "learning_rate": 4.1826010379382955e-06, "loss": 0.3876, "step": 19754 }, { "epoch": 0.57, "grad_norm": 6.346083226066635, "learning_rate": 4.182143522070909e-06, "loss": 0.6722, "step": 19755 }, { "epoch": 0.57, "grad_norm": 7.456138017809555, "learning_rate": 4.181686013239444e-06, "loss": 0.4061, "step": 19756 }, { "epoch": 0.57, "grad_norm": 5.897148446200422, "learning_rate": 4.181228511447831e-06, "loss": 0.4301, "step": 19757 }, { "epoch": 0.57, "grad_norm": 4.154009250858311, "learning_rate": 4.18077101670001e-06, "loss": 0.4426, "step": 19758 }, { "epoch": 0.57, "grad_norm": 3.6902383884453736, "learning_rate": 4.1803135289999135e-06, "loss": 0.3637, "step": 19759 }, { "epoch": 0.57, "grad_norm": 5.317513736622818, "learning_rate": 4.179856048351481e-06, "loss": 0.4186, "step": 19760 }, { "epoch": 0.57, "grad_norm": 6.581530742689094, "learning_rate": 4.179398574758643e-06, "loss": 0.483, "step": 19761 }, { "epoch": 0.57, "grad_norm": 4.825278421383793, "learning_rate": 4.17894110822534e-06, "loss": 0.2367, "step": 19762 }, { "epoch": 0.57, "grad_norm": 6.452349341376584, "learning_rate": 4.178483648755505e-06, "loss": 0.5875, "step": 19763 }, { "epoch": 0.57, "grad_norm": 7.597232663621843, "learning_rate": 4.178026196353072e-06, "loss": 0.9775, "step": 19764 }, { "epoch": 0.57, "grad_norm": 6.491773736762038, "learning_rate": 4.177568751021982e-06, "loss": 0.5377, "step": 19765 }, { "epoch": 0.57, "grad_norm": 6.44027135508324, "learning_rate": 4.177111312766165e-06, "loss": 0.456, "step": 19766 }, { "epoch": 0.57, "grad_norm": 4.388773843542404, "learning_rate": 4.176653881589559e-06, "loss": 0.4603, "step": 19767 }, { "epoch": 0.57, "grad_norm": 7.139532961490066, "learning_rate": 4.176196457496096e-06, "loss": 0.2249, "step": 19768 }, { "epoch": 0.57, "grad_norm": 6.1361526626385805, "learning_rate": 4.175739040489716e-06, "loss": 0.7536, "step": 19769 }, { "epoch": 0.57, "grad_norm": 1.977189722169942, "learning_rate": 4.17528163057435e-06, "loss": 0.165, "step": 19770 }, { "epoch": 0.57, "grad_norm": 4.239574885379457, "learning_rate": 4.1748242277539365e-06, "loss": 0.4761, "step": 19771 }, { "epoch": 0.57, "grad_norm": 77.39140458988021, "learning_rate": 4.174366832032406e-06, "loss": 0.5871, "step": 19772 }, { "epoch": 0.57, "grad_norm": 4.501533697288389, "learning_rate": 4.173909443413697e-06, "loss": 0.3532, "step": 19773 }, { "epoch": 0.57, "grad_norm": 7.019747264257648, "learning_rate": 4.173452061901745e-06, "loss": 0.6352, "step": 19774 }, { "epoch": 0.57, "grad_norm": 10.090165863034894, "learning_rate": 4.17299468750048e-06, "loss": 0.5533, "step": 19775 }, { "epoch": 0.57, "grad_norm": 3.943854306106657, "learning_rate": 4.172537320213843e-06, "loss": 0.3506, "step": 19776 }, { "epoch": 0.57, "grad_norm": 3.419908245763194, "learning_rate": 4.172079960045763e-06, "loss": 0.4424, "step": 19777 }, { "epoch": 0.57, "grad_norm": 5.170602232722572, "learning_rate": 4.17162260700018e-06, "loss": 0.5227, "step": 19778 }, { "epoch": 0.57, "grad_norm": 2.5795871749768815, "learning_rate": 4.171165261081022e-06, "loss": 0.355, "step": 19779 }, { "epoch": 0.57, "grad_norm": 3.8408539259172256, "learning_rate": 4.170707922292229e-06, "loss": 0.6257, "step": 19780 }, { "epoch": 0.57, "grad_norm": 4.498641047322992, "learning_rate": 4.170250590637735e-06, "loss": 0.4658, "step": 19781 }, { "epoch": 0.57, "grad_norm": 2.13554898449096, "learning_rate": 4.169793266121471e-06, "loss": 0.3924, "step": 19782 }, { "epoch": 0.57, "grad_norm": 6.314448971662917, "learning_rate": 4.169335948747375e-06, "loss": 0.5953, "step": 19783 }, { "epoch": 0.57, "grad_norm": 2.690692159100321, "learning_rate": 4.168878638519378e-06, "loss": 0.3292, "step": 19784 }, { "epoch": 0.57, "grad_norm": 1.9369877784197707, "learning_rate": 4.1684213354414174e-06, "loss": 0.1962, "step": 19785 }, { "epoch": 0.57, "grad_norm": 6.260018901491726, "learning_rate": 4.167964039517424e-06, "loss": 0.4161, "step": 19786 }, { "epoch": 0.57, "grad_norm": 4.604796227037794, "learning_rate": 4.1675067507513365e-06, "loss": 0.3498, "step": 19787 }, { "epoch": 0.57, "grad_norm": 9.187887585827054, "learning_rate": 4.167049469147082e-06, "loss": 0.8255, "step": 19788 }, { "epoch": 0.57, "grad_norm": 2.679338182196825, "learning_rate": 4.1665921947085984e-06, "loss": 0.0622, "step": 19789 }, { "epoch": 0.57, "grad_norm": 8.834262625077727, "learning_rate": 4.1661349274398235e-06, "loss": 0.6875, "step": 19790 }, { "epoch": 0.57, "grad_norm": 3.961473266963946, "learning_rate": 4.165677667344683e-06, "loss": 0.4566, "step": 19791 }, { "epoch": 0.57, "grad_norm": 8.83291717364704, "learning_rate": 4.165220414427118e-06, "loss": 0.8206, "step": 19792 }, { "epoch": 0.57, "grad_norm": 5.301008833832676, "learning_rate": 4.1647631686910575e-06, "loss": 0.6765, "step": 19793 }, { "epoch": 0.57, "grad_norm": 3.6224582738954476, "learning_rate": 4.1643059301404374e-06, "loss": 0.2624, "step": 19794 }, { "epoch": 0.57, "grad_norm": 2.6914526239173115, "learning_rate": 4.163848698779189e-06, "loss": 0.3184, "step": 19795 }, { "epoch": 0.57, "grad_norm": 4.115373881563933, "learning_rate": 4.163391474611248e-06, "loss": 0.2728, "step": 19796 }, { "epoch": 0.57, "grad_norm": 4.012592461967527, "learning_rate": 4.162934257640548e-06, "loss": 0.5052, "step": 19797 }, { "epoch": 0.57, "grad_norm": 8.51142482941608, "learning_rate": 4.16247704787102e-06, "loss": 0.8844, "step": 19798 }, { "epoch": 0.57, "grad_norm": 6.891326626372908, "learning_rate": 4.1620198453066005e-06, "loss": 0.6341, "step": 19799 }, { "epoch": 0.57, "grad_norm": 9.242818983339935, "learning_rate": 4.161562649951219e-06, "loss": 0.4141, "step": 19800 }, { "epoch": 0.57, "grad_norm": 8.054318083793431, "learning_rate": 4.161105461808814e-06, "loss": 0.3432, "step": 19801 }, { "epoch": 0.57, "grad_norm": 4.182278950989785, "learning_rate": 4.160648280883312e-06, "loss": 0.2729, "step": 19802 }, { "epoch": 0.57, "grad_norm": 8.64520771260257, "learning_rate": 4.1601911071786516e-06, "loss": 0.4299, "step": 19803 }, { "epoch": 0.57, "grad_norm": 3.7434581598262198, "learning_rate": 4.159733940698763e-06, "loss": 0.3211, "step": 19804 }, { "epoch": 0.57, "grad_norm": 4.580538012875369, "learning_rate": 4.159276781447579e-06, "loss": 0.2117, "step": 19805 }, { "epoch": 0.57, "grad_norm": 9.531075097652407, "learning_rate": 4.158819629429035e-06, "loss": 0.749, "step": 19806 }, { "epoch": 0.57, "grad_norm": 3.1922411532754835, "learning_rate": 4.158362484647061e-06, "loss": 0.3496, "step": 19807 }, { "epoch": 0.57, "grad_norm": 5.023850656845503, "learning_rate": 4.157905347105592e-06, "loss": 0.2778, "step": 19808 }, { "epoch": 0.57, "grad_norm": 1.6468905743146627, "learning_rate": 4.157448216808558e-06, "loss": 0.1074, "step": 19809 }, { "epoch": 0.57, "grad_norm": 5.958970031420944, "learning_rate": 4.156991093759894e-06, "loss": 0.6297, "step": 19810 }, { "epoch": 0.57, "grad_norm": 3.085225508437266, "learning_rate": 4.156533977963533e-06, "loss": 0.364, "step": 19811 }, { "epoch": 0.57, "grad_norm": 9.712003371651331, "learning_rate": 4.156076869423406e-06, "loss": 0.5093, "step": 19812 }, { "epoch": 0.57, "grad_norm": 6.650851178011678, "learning_rate": 4.155619768143444e-06, "loss": 0.5289, "step": 19813 }, { "epoch": 0.57, "grad_norm": 4.132524245941612, "learning_rate": 4.155162674127582e-06, "loss": 0.352, "step": 19814 }, { "epoch": 0.57, "grad_norm": 3.0508947607749195, "learning_rate": 4.154705587379752e-06, "loss": 0.2856, "step": 19815 }, { "epoch": 0.57, "grad_norm": 6.30811807356726, "learning_rate": 4.154248507903886e-06, "loss": 0.6333, "step": 19816 }, { "epoch": 0.57, "grad_norm": 5.207707675230231, "learning_rate": 4.153791435703917e-06, "loss": 0.7408, "step": 19817 }, { "epoch": 0.57, "grad_norm": 2.848573513414227, "learning_rate": 4.153334370783773e-06, "loss": 0.2076, "step": 19818 }, { "epoch": 0.57, "grad_norm": 4.1193022246335715, "learning_rate": 4.152877313147391e-06, "loss": 0.2951, "step": 19819 }, { "epoch": 0.57, "grad_norm": 7.180899599721423, "learning_rate": 4.152420262798701e-06, "loss": 0.7734, "step": 19820 }, { "epoch": 0.57, "grad_norm": 10.544879458929092, "learning_rate": 4.151963219741634e-06, "loss": 0.2973, "step": 19821 }, { "epoch": 0.57, "grad_norm": 7.057613944452347, "learning_rate": 4.151506183980125e-06, "loss": 0.5734, "step": 19822 }, { "epoch": 0.57, "grad_norm": 4.667775067896123, "learning_rate": 4.151049155518102e-06, "loss": 0.5085, "step": 19823 }, { "epoch": 0.57, "grad_norm": 6.9055573129008065, "learning_rate": 4.150592134359501e-06, "loss": 0.3596, "step": 19824 }, { "epoch": 0.57, "grad_norm": 6.089919446434365, "learning_rate": 4.150135120508248e-06, "loss": 0.6131, "step": 19825 }, { "epoch": 0.57, "grad_norm": 6.540950015067335, "learning_rate": 4.1496781139682814e-06, "loss": 0.713, "step": 19826 }, { "epoch": 0.57, "grad_norm": 7.152508819691975, "learning_rate": 4.149221114743527e-06, "loss": 0.6796, "step": 19827 }, { "epoch": 0.57, "grad_norm": 7.303216649444695, "learning_rate": 4.148764122837921e-06, "loss": 0.4961, "step": 19828 }, { "epoch": 0.57, "grad_norm": 4.372436262727285, "learning_rate": 4.148307138255389e-06, "loss": 0.2494, "step": 19829 }, { "epoch": 0.57, "grad_norm": 7.367364325075059, "learning_rate": 4.147850160999867e-06, "loss": 0.9246, "step": 19830 }, { "epoch": 0.57, "grad_norm": 9.626414504706863, "learning_rate": 4.147393191075287e-06, "loss": 0.4291, "step": 19831 }, { "epoch": 0.57, "grad_norm": 3.330826374909137, "learning_rate": 4.146936228485575e-06, "loss": 0.3266, "step": 19832 }, { "epoch": 0.57, "grad_norm": 3.5205321926166913, "learning_rate": 4.1464792732346685e-06, "loss": 0.4131, "step": 19833 }, { "epoch": 0.57, "grad_norm": 9.91248796613812, "learning_rate": 4.146022325326495e-06, "loss": 0.6585, "step": 19834 }, { "epoch": 0.57, "grad_norm": 19.519712119402918, "learning_rate": 4.145565384764987e-06, "loss": 0.7448, "step": 19835 }, { "epoch": 0.57, "grad_norm": 5.2438646524510055, "learning_rate": 4.145108451554072e-06, "loss": 0.3288, "step": 19836 }, { "epoch": 0.57, "grad_norm": 4.2173878201329655, "learning_rate": 4.144651525697686e-06, "loss": 0.2691, "step": 19837 }, { "epoch": 0.57, "grad_norm": 2.810824212870846, "learning_rate": 4.144194607199758e-06, "loss": 0.4076, "step": 19838 }, { "epoch": 0.57, "grad_norm": 3.881580763773021, "learning_rate": 4.143737696064216e-06, "loss": 0.3139, "step": 19839 }, { "epoch": 0.57, "grad_norm": 5.579008956666777, "learning_rate": 4.143280792294997e-06, "loss": 0.5785, "step": 19840 }, { "epoch": 0.57, "grad_norm": 4.655708844785304, "learning_rate": 4.142823895896025e-06, "loss": 0.4909, "step": 19841 }, { "epoch": 0.57, "grad_norm": 5.20895622597674, "learning_rate": 4.1423670068712355e-06, "loss": 0.6852, "step": 19842 }, { "epoch": 0.57, "grad_norm": 4.55276752396032, "learning_rate": 4.141910125224554e-06, "loss": 0.4619, "step": 19843 }, { "epoch": 0.57, "grad_norm": 4.987446236380389, "learning_rate": 4.141453250959917e-06, "loss": 0.68, "step": 19844 }, { "epoch": 0.57, "grad_norm": 5.2587401888107275, "learning_rate": 4.14099638408125e-06, "loss": 0.6644, "step": 19845 }, { "epoch": 0.57, "grad_norm": 4.591154494108823, "learning_rate": 4.140539524592485e-06, "loss": 0.4229, "step": 19846 }, { "epoch": 0.57, "grad_norm": 3.390867532239048, "learning_rate": 4.140082672497556e-06, "loss": 0.2808, "step": 19847 }, { "epoch": 0.57, "grad_norm": 6.260399977608353, "learning_rate": 4.139625827800387e-06, "loss": 0.3835, "step": 19848 }, { "epoch": 0.57, "grad_norm": 2.4397440875021146, "learning_rate": 4.139168990504912e-06, "loss": 0.1731, "step": 19849 }, { "epoch": 0.57, "grad_norm": 5.516969554726539, "learning_rate": 4.138712160615059e-06, "loss": 0.5784, "step": 19850 }, { "epoch": 0.57, "grad_norm": 3.1438741246133843, "learning_rate": 4.1382553381347615e-06, "loss": 0.2272, "step": 19851 }, { "epoch": 0.57, "grad_norm": 5.955434156104944, "learning_rate": 4.137798523067945e-06, "loss": 0.7761, "step": 19852 }, { "epoch": 0.57, "grad_norm": 4.120751273582413, "learning_rate": 4.137341715418543e-06, "loss": 0.5273, "step": 19853 }, { "epoch": 0.57, "grad_norm": 2.8487266757842153, "learning_rate": 4.136884915190483e-06, "loss": 0.4, "step": 19854 }, { "epoch": 0.57, "grad_norm": 5.2819261964468325, "learning_rate": 4.1364281223876955e-06, "loss": 0.4124, "step": 19855 }, { "epoch": 0.57, "grad_norm": 5.949921354206822, "learning_rate": 4.135971337014112e-06, "loss": 0.4645, "step": 19856 }, { "epoch": 0.57, "grad_norm": 4.875656083781387, "learning_rate": 4.13551455907366e-06, "loss": 0.1388, "step": 19857 }, { "epoch": 0.57, "grad_norm": 6.0627192821368645, "learning_rate": 4.135057788570271e-06, "loss": 0.431, "step": 19858 }, { "epoch": 0.57, "grad_norm": 6.526580528506528, "learning_rate": 4.134601025507871e-06, "loss": 0.6592, "step": 19859 }, { "epoch": 0.57, "grad_norm": 4.758315888131803, "learning_rate": 4.134144269890394e-06, "loss": 0.4478, "step": 19860 }, { "epoch": 0.57, "grad_norm": 4.8689277929865895, "learning_rate": 4.133687521721766e-06, "loss": 0.5391, "step": 19861 }, { "epoch": 0.57, "grad_norm": 5.333189629565823, "learning_rate": 4.133230781005917e-06, "loss": 0.5781, "step": 19862 }, { "epoch": 0.57, "grad_norm": 7.458100423603744, "learning_rate": 4.132774047746778e-06, "loss": 0.6784, "step": 19863 }, { "epoch": 0.57, "grad_norm": 4.188654270791482, "learning_rate": 4.132317321948277e-06, "loss": 0.8213, "step": 19864 }, { "epoch": 0.57, "grad_norm": 6.783554459075504, "learning_rate": 4.131860603614344e-06, "loss": 0.2754, "step": 19865 }, { "epoch": 0.57, "grad_norm": 5.235378248946861, "learning_rate": 4.131403892748905e-06, "loss": 0.4897, "step": 19866 }, { "epoch": 0.57, "grad_norm": 6.506795082381964, "learning_rate": 4.130947189355894e-06, "loss": 0.7066, "step": 19867 }, { "epoch": 0.57, "grad_norm": 6.691996479893116, "learning_rate": 4.1304904934392355e-06, "loss": 0.719, "step": 19868 }, { "epoch": 0.57, "grad_norm": 6.071017898166233, "learning_rate": 4.130033805002862e-06, "loss": 0.7186, "step": 19869 }, { "epoch": 0.57, "grad_norm": 3.080341838222101, "learning_rate": 4.129577124050698e-06, "loss": 0.7086, "step": 19870 }, { "epoch": 0.57, "grad_norm": 6.288459777383102, "learning_rate": 4.1291204505866765e-06, "loss": 0.4183, "step": 19871 }, { "epoch": 0.57, "grad_norm": 9.870259849428447, "learning_rate": 4.128663784614725e-06, "loss": 0.8152, "step": 19872 }, { "epoch": 0.57, "grad_norm": 3.631503798500542, "learning_rate": 4.12820712613877e-06, "loss": 0.2239, "step": 19873 }, { "epoch": 0.57, "grad_norm": 7.631595042033454, "learning_rate": 4.127750475162744e-06, "loss": 0.4543, "step": 19874 }, { "epoch": 0.57, "grad_norm": 5.4677580451510215, "learning_rate": 4.127293831690571e-06, "loss": 0.4672, "step": 19875 }, { "epoch": 0.57, "grad_norm": 4.144208027193278, "learning_rate": 4.126837195726184e-06, "loss": 0.5561, "step": 19876 }, { "epoch": 0.57, "grad_norm": 5.292046836152713, "learning_rate": 4.126380567273507e-06, "loss": 0.8156, "step": 19877 }, { "epoch": 0.57, "grad_norm": 5.291381631768864, "learning_rate": 4.125923946336473e-06, "loss": 0.3707, "step": 19878 }, { "epoch": 0.57, "grad_norm": 4.87392154404426, "learning_rate": 4.125467332919004e-06, "loss": 0.3753, "step": 19879 }, { "epoch": 0.57, "grad_norm": 6.683542738540903, "learning_rate": 4.125010727025034e-06, "loss": 0.494, "step": 19880 }, { "epoch": 0.57, "grad_norm": 6.965666460792641, "learning_rate": 4.124554128658489e-06, "loss": 0.3289, "step": 19881 }, { "epoch": 0.57, "grad_norm": 8.889160142839366, "learning_rate": 4.1240975378232964e-06, "loss": 0.777, "step": 19882 }, { "epoch": 0.57, "grad_norm": 15.348520737610357, "learning_rate": 4.123640954523386e-06, "loss": 0.532, "step": 19883 }, { "epoch": 0.57, "grad_norm": 2.3191009081163463, "learning_rate": 4.123184378762683e-06, "loss": 0.1505, "step": 19884 }, { "epoch": 0.57, "grad_norm": 6.877468394266137, "learning_rate": 4.1227278105451184e-06, "loss": 0.4185, "step": 19885 }, { "epoch": 0.57, "grad_norm": 7.9495270090530274, "learning_rate": 4.122271249874616e-06, "loss": 0.4328, "step": 19886 }, { "epoch": 0.57, "grad_norm": 3.2390254767989113, "learning_rate": 4.121814696755108e-06, "loss": 0.3533, "step": 19887 }, { "epoch": 0.57, "grad_norm": 5.76495864036129, "learning_rate": 4.121358151190521e-06, "loss": 0.2751, "step": 19888 }, { "epoch": 0.57, "grad_norm": 5.174497003113394, "learning_rate": 4.120901613184779e-06, "loss": 0.4348, "step": 19889 }, { "epoch": 0.57, "grad_norm": 4.053833976391003, "learning_rate": 4.120445082741814e-06, "loss": 0.2704, "step": 19890 }, { "epoch": 0.57, "grad_norm": 7.579097221382116, "learning_rate": 4.119988559865551e-06, "loss": 0.7571, "step": 19891 }, { "epoch": 0.57, "grad_norm": 1.7676968023598079, "learning_rate": 4.11953204455992e-06, "loss": 0.1491, "step": 19892 }, { "epoch": 0.57, "grad_norm": 2.8052112653461765, "learning_rate": 4.119075536828842e-06, "loss": 0.2792, "step": 19893 }, { "epoch": 0.57, "grad_norm": 5.597693177638465, "learning_rate": 4.118619036676253e-06, "loss": 0.2427, "step": 19894 }, { "epoch": 0.57, "grad_norm": 5.180533407650882, "learning_rate": 4.118162544106074e-06, "loss": 0.3054, "step": 19895 }, { "epoch": 0.57, "grad_norm": 7.482681050861605, "learning_rate": 4.117706059122233e-06, "loss": 0.5601, "step": 19896 }, { "epoch": 0.57, "grad_norm": 6.336368494391429, "learning_rate": 4.1172495817286604e-06, "loss": 0.6469, "step": 19897 }, { "epoch": 0.57, "grad_norm": 5.33354090743641, "learning_rate": 4.11679311192928e-06, "loss": 0.8452, "step": 19898 }, { "epoch": 0.57, "grad_norm": 4.90126357621733, "learning_rate": 4.116336649728021e-06, "loss": 0.4166, "step": 19899 }, { "epoch": 0.57, "grad_norm": 5.6433399461248195, "learning_rate": 4.115880195128806e-06, "loss": 0.2057, "step": 19900 }, { "epoch": 0.57, "grad_norm": 4.036512001255074, "learning_rate": 4.115423748135567e-06, "loss": 0.5286, "step": 19901 }, { "epoch": 0.57, "grad_norm": 9.37747187769636, "learning_rate": 4.114967308752229e-06, "loss": 0.4319, "step": 19902 }, { "epoch": 0.57, "grad_norm": 14.441071237893842, "learning_rate": 4.114510876982716e-06, "loss": 0.9356, "step": 19903 }, { "epoch": 0.57, "grad_norm": 2.8181575245523303, "learning_rate": 4.11405445283096e-06, "loss": 0.2427, "step": 19904 }, { "epoch": 0.57, "grad_norm": 4.64207492359316, "learning_rate": 4.113598036300881e-06, "loss": 0.0982, "step": 19905 }, { "epoch": 0.57, "grad_norm": 2.8569160895103933, "learning_rate": 4.113141627396412e-06, "loss": 0.3263, "step": 19906 }, { "epoch": 0.57, "grad_norm": 4.139593650921483, "learning_rate": 4.112685226121475e-06, "loss": 0.539, "step": 19907 }, { "epoch": 0.57, "grad_norm": 5.029710143062917, "learning_rate": 4.112228832479999e-06, "loss": 0.2925, "step": 19908 }, { "epoch": 0.57, "grad_norm": 6.3039663060686895, "learning_rate": 4.111772446475906e-06, "loss": 0.5772, "step": 19909 }, { "epoch": 0.57, "grad_norm": 6.468020854159055, "learning_rate": 4.111316068113128e-06, "loss": 0.4975, "step": 19910 }, { "epoch": 0.57, "grad_norm": 4.8229149913853595, "learning_rate": 4.110859697395588e-06, "loss": 0.2954, "step": 19911 }, { "epoch": 0.57, "grad_norm": 4.613432672585035, "learning_rate": 4.110403334327211e-06, "loss": 0.5562, "step": 19912 }, { "epoch": 0.57, "grad_norm": 8.762655533785644, "learning_rate": 4.109946978911926e-06, "loss": 0.5608, "step": 19913 }, { "epoch": 0.57, "grad_norm": 7.653531023799615, "learning_rate": 4.1094906311536575e-06, "loss": 0.6582, "step": 19914 }, { "epoch": 0.57, "grad_norm": 9.563792072842292, "learning_rate": 4.109034291056333e-06, "loss": 0.5511, "step": 19915 }, { "epoch": 0.57, "grad_norm": 5.303484593497522, "learning_rate": 4.108577958623873e-06, "loss": 0.4144, "step": 19916 }, { "epoch": 0.57, "grad_norm": 3.819072467530933, "learning_rate": 4.10812163386021e-06, "loss": 0.4077, "step": 19917 }, { "epoch": 0.57, "grad_norm": 4.499088883881346, "learning_rate": 4.107665316769266e-06, "loss": 0.5357, "step": 19918 }, { "epoch": 0.57, "grad_norm": 7.752814181892446, "learning_rate": 4.107209007354968e-06, "loss": 0.9074, "step": 19919 }, { "epoch": 0.57, "grad_norm": 2.5538449195739736, "learning_rate": 4.1067527056212394e-06, "loss": 0.1168, "step": 19920 }, { "epoch": 0.57, "grad_norm": 5.096688107748361, "learning_rate": 4.106296411572008e-06, "loss": 0.5217, "step": 19921 }, { "epoch": 0.57, "grad_norm": 7.7870372408168285, "learning_rate": 4.1058401252111995e-06, "loss": 0.6088, "step": 19922 }, { "epoch": 0.57, "grad_norm": 3.3182686652436932, "learning_rate": 4.105383846542736e-06, "loss": 0.6575, "step": 19923 }, { "epoch": 0.57, "grad_norm": 1.6042731025816312, "learning_rate": 4.104927575570548e-06, "loss": 0.0925, "step": 19924 }, { "epoch": 0.57, "grad_norm": 12.412354934628375, "learning_rate": 4.104471312298557e-06, "loss": 0.4397, "step": 19925 }, { "epoch": 0.57, "grad_norm": 4.346733248913496, "learning_rate": 4.104015056730688e-06, "loss": 0.4004, "step": 19926 }, { "epoch": 0.57, "grad_norm": 3.1795555293559215, "learning_rate": 4.103558808870867e-06, "loss": 0.1683, "step": 19927 }, { "epoch": 0.57, "grad_norm": 6.044441738996804, "learning_rate": 4.1031025687230194e-06, "loss": 0.2987, "step": 19928 }, { "epoch": 0.57, "grad_norm": 8.268502727798978, "learning_rate": 4.102646336291072e-06, "loss": 0.4158, "step": 19929 }, { "epoch": 0.57, "grad_norm": 4.84675229456495, "learning_rate": 4.102190111578945e-06, "loss": 0.8446, "step": 19930 }, { "epoch": 0.57, "grad_norm": 7.822420169670465, "learning_rate": 4.101733894590568e-06, "loss": 0.476, "step": 19931 }, { "epoch": 0.57, "grad_norm": 4.206505135993176, "learning_rate": 4.101277685329863e-06, "loss": 0.3567, "step": 19932 }, { "epoch": 0.57, "grad_norm": 2.0623255713715323, "learning_rate": 4.100821483800755e-06, "loss": 0.2134, "step": 19933 }, { "epoch": 0.57, "grad_norm": 17.294263344477653, "learning_rate": 4.10036529000717e-06, "loss": 0.7116, "step": 19934 }, { "epoch": 0.57, "grad_norm": 13.187277453022709, "learning_rate": 4.0999091039530315e-06, "loss": 0.2995, "step": 19935 }, { "epoch": 0.57, "grad_norm": 7.202258738562149, "learning_rate": 4.099452925642262e-06, "loss": 0.4649, "step": 19936 }, { "epoch": 0.57, "grad_norm": 5.59331830452744, "learning_rate": 4.09899675507879e-06, "loss": 0.3777, "step": 19937 }, { "epoch": 0.57, "grad_norm": 11.462227195563472, "learning_rate": 4.098540592266539e-06, "loss": 0.4767, "step": 19938 }, { "epoch": 0.57, "grad_norm": 7.382010796831783, "learning_rate": 4.098084437209429e-06, "loss": 0.7316, "step": 19939 }, { "epoch": 0.57, "grad_norm": 9.466230907171589, "learning_rate": 4.0976282899113905e-06, "loss": 0.8764, "step": 19940 }, { "epoch": 0.57, "grad_norm": 3.1603289770351073, "learning_rate": 4.097172150376344e-06, "loss": 0.2982, "step": 19941 }, { "epoch": 0.57, "grad_norm": 7.501499661875885, "learning_rate": 4.096716018608214e-06, "loss": 0.5356, "step": 19942 }, { "epoch": 0.57, "grad_norm": 5.840988077682817, "learning_rate": 4.096259894610925e-06, "loss": 0.379, "step": 19943 }, { "epoch": 0.57, "grad_norm": 3.9122578463707582, "learning_rate": 4.0958037783884e-06, "loss": 0.2044, "step": 19944 }, { "epoch": 0.57, "grad_norm": 6.104865788164708, "learning_rate": 4.095347669944566e-06, "loss": 0.3978, "step": 19945 }, { "epoch": 0.57, "grad_norm": 6.348268036924247, "learning_rate": 4.094891569283342e-06, "loss": 0.4111, "step": 19946 }, { "epoch": 0.57, "grad_norm": 9.34138081090835, "learning_rate": 4.094435476408656e-06, "loss": 1.0518, "step": 19947 }, { "epoch": 0.57, "grad_norm": 7.47022274292939, "learning_rate": 4.093979391324429e-06, "loss": 0.324, "step": 19948 }, { "epoch": 0.57, "grad_norm": 3.999280060113545, "learning_rate": 4.093523314034587e-06, "loss": 0.1565, "step": 19949 }, { "epoch": 0.57, "grad_norm": 6.792200625283859, "learning_rate": 4.0930672445430515e-06, "loss": 1.0865, "step": 19950 }, { "epoch": 0.57, "grad_norm": 4.592487148646947, "learning_rate": 4.092611182853748e-06, "loss": 0.2883, "step": 19951 }, { "epoch": 0.57, "grad_norm": 3.0702513477554514, "learning_rate": 4.0921551289705975e-06, "loss": 0.6704, "step": 19952 }, { "epoch": 0.57, "grad_norm": 5.550235772494673, "learning_rate": 4.091699082897524e-06, "loss": 0.9076, "step": 19953 }, { "epoch": 0.57, "grad_norm": 6.667103474929901, "learning_rate": 4.091243044638453e-06, "loss": 0.6179, "step": 19954 }, { "epoch": 0.57, "grad_norm": 7.209590302465649, "learning_rate": 4.090787014197306e-06, "loss": 0.2717, "step": 19955 }, { "epoch": 0.57, "grad_norm": 4.999409354609652, "learning_rate": 4.090330991578007e-06, "loss": 0.6079, "step": 19956 }, { "epoch": 0.57, "grad_norm": 6.8503485096730286, "learning_rate": 4.089874976784476e-06, "loss": 0.4776, "step": 19957 }, { "epoch": 0.57, "grad_norm": 3.7971692069395275, "learning_rate": 4.08941896982064e-06, "loss": 0.4095, "step": 19958 }, { "epoch": 0.57, "grad_norm": 2.592723424971539, "learning_rate": 4.088962970690421e-06, "loss": 0.2143, "step": 19959 }, { "epoch": 0.57, "grad_norm": 5.5185229331422505, "learning_rate": 4.088506979397742e-06, "loss": 0.6299, "step": 19960 }, { "epoch": 0.57, "grad_norm": 3.52751029161105, "learning_rate": 4.088050995946522e-06, "loss": 0.1862, "step": 19961 }, { "epoch": 0.57, "grad_norm": 2.821110197836446, "learning_rate": 4.087595020340689e-06, "loss": 0.468, "step": 19962 }, { "epoch": 0.57, "grad_norm": 4.8947924325826975, "learning_rate": 4.087139052584166e-06, "loss": 0.6725, "step": 19963 }, { "epoch": 0.57, "grad_norm": 4.0112816740588215, "learning_rate": 4.086683092680871e-06, "loss": 0.1885, "step": 19964 }, { "epoch": 0.57, "grad_norm": 6.418842267837681, "learning_rate": 4.086227140634729e-06, "loss": 0.6109, "step": 19965 }, { "epoch": 0.57, "grad_norm": 4.197912195675017, "learning_rate": 4.085771196449662e-06, "loss": 0.3135, "step": 19966 }, { "epoch": 0.57, "grad_norm": 7.347115512482417, "learning_rate": 4.085315260129594e-06, "loss": 0.5268, "step": 19967 }, { "epoch": 0.57, "grad_norm": 2.788949317665846, "learning_rate": 4.084859331678445e-06, "loss": 0.1377, "step": 19968 }, { "epoch": 0.57, "grad_norm": 7.1161378993095505, "learning_rate": 4.084403411100137e-06, "loss": 0.772, "step": 19969 }, { "epoch": 0.57, "grad_norm": 3.840236639837599, "learning_rate": 4.083947498398597e-06, "loss": 0.3177, "step": 19970 }, { "epoch": 0.57, "grad_norm": 9.277995454083607, "learning_rate": 4.083491593577743e-06, "loss": 0.5182, "step": 19971 }, { "epoch": 0.57, "grad_norm": 7.60037134292289, "learning_rate": 4.083035696641498e-06, "loss": 0.4183, "step": 19972 }, { "epoch": 0.57, "grad_norm": 9.674965409406784, "learning_rate": 4.082579807593783e-06, "loss": 0.5371, "step": 19973 }, { "epoch": 0.57, "grad_norm": 4.956675030449637, "learning_rate": 4.082123926438522e-06, "loss": 0.188, "step": 19974 }, { "epoch": 0.57, "grad_norm": 3.8111533616457742, "learning_rate": 4.081668053179635e-06, "loss": 0.607, "step": 19975 }, { "epoch": 0.57, "grad_norm": 5.498621789608674, "learning_rate": 4.081212187821046e-06, "loss": 0.5964, "step": 19976 }, { "epoch": 0.57, "grad_norm": 9.937532484853179, "learning_rate": 4.080756330366673e-06, "loss": 0.7465, "step": 19977 }, { "epoch": 0.57, "grad_norm": 2.3220359489753255, "learning_rate": 4.080300480820442e-06, "loss": 0.2263, "step": 19978 }, { "epoch": 0.57, "grad_norm": 7.096755613585171, "learning_rate": 4.079844639186273e-06, "loss": 0.6961, "step": 19979 }, { "epoch": 0.57, "grad_norm": 3.1652343463768244, "learning_rate": 4.079388805468085e-06, "loss": 0.3141, "step": 19980 }, { "epoch": 0.57, "grad_norm": 6.264322635447379, "learning_rate": 4.078932979669804e-06, "loss": 0.3932, "step": 19981 }, { "epoch": 0.57, "grad_norm": 4.125388445048573, "learning_rate": 4.078477161795349e-06, "loss": 0.3536, "step": 19982 }, { "epoch": 0.57, "grad_norm": 7.599453514930263, "learning_rate": 4.078021351848641e-06, "loss": 0.6715, "step": 19983 }, { "epoch": 0.57, "grad_norm": 4.548430343931565, "learning_rate": 4.0775655498336015e-06, "loss": 0.5519, "step": 19984 }, { "epoch": 0.57, "grad_norm": 4.701498973514876, "learning_rate": 4.077109755754153e-06, "loss": 0.4481, "step": 19985 }, { "epoch": 0.57, "grad_norm": 4.160769565128515, "learning_rate": 4.076653969614215e-06, "loss": 0.4668, "step": 19986 }, { "epoch": 0.57, "grad_norm": 20.470876149415467, "learning_rate": 4.076198191417708e-06, "loss": 0.8147, "step": 19987 }, { "epoch": 0.57, "grad_norm": 8.112903158224196, "learning_rate": 4.0757424211685556e-06, "loss": 0.2171, "step": 19988 }, { "epoch": 0.57, "grad_norm": 5.2437177491134035, "learning_rate": 4.075286658870677e-06, "loss": 0.3616, "step": 19989 }, { "epoch": 0.57, "grad_norm": 2.7364780839648484, "learning_rate": 4.074830904527995e-06, "loss": 0.3179, "step": 19990 }, { "epoch": 0.57, "grad_norm": 4.504632075799412, "learning_rate": 4.074375158144426e-06, "loss": 0.5185, "step": 19991 }, { "epoch": 0.57, "grad_norm": 7.026692180526903, "learning_rate": 4.0739194197238975e-06, "loss": 0.8312, "step": 19992 }, { "epoch": 0.57, "grad_norm": 8.564984622413741, "learning_rate": 4.073463689270322e-06, "loss": 0.5669, "step": 19993 }, { "epoch": 0.57, "grad_norm": 3.8154817396508567, "learning_rate": 4.073007966787626e-06, "loss": 0.5666, "step": 19994 }, { "epoch": 0.57, "grad_norm": 6.321898536053761, "learning_rate": 4.072552252279729e-06, "loss": 0.6477, "step": 19995 }, { "epoch": 0.57, "grad_norm": 4.960383442854643, "learning_rate": 4.072096545750549e-06, "loss": 0.3695, "step": 19996 }, { "epoch": 0.57, "grad_norm": 5.234863964415588, "learning_rate": 4.07164084720401e-06, "loss": 0.6658, "step": 19997 }, { "epoch": 0.57, "grad_norm": 2.76695859404928, "learning_rate": 4.07118515664403e-06, "loss": 0.2681, "step": 19998 }, { "epoch": 0.57, "grad_norm": 6.9945269714703695, "learning_rate": 4.07072947407453e-06, "loss": 0.4654, "step": 19999 }, { "epoch": 0.57, "grad_norm": 4.108567858431429, "learning_rate": 4.070273799499428e-06, "loss": 0.4533, "step": 20000 }, { "epoch": 0.57, "grad_norm": 11.797259131234295, "learning_rate": 4.069818132922648e-06, "loss": 0.7705, "step": 20001 }, { "epoch": 0.57, "grad_norm": 3.8296081161209017, "learning_rate": 4.069362474348107e-06, "loss": 0.2234, "step": 20002 }, { "epoch": 0.57, "grad_norm": 7.949700958097804, "learning_rate": 4.068906823779726e-06, "loss": 0.2219, "step": 20003 }, { "epoch": 0.57, "grad_norm": 3.3625827467142524, "learning_rate": 4.068451181221427e-06, "loss": 0.2863, "step": 20004 }, { "epoch": 0.57, "grad_norm": 3.284393711926034, "learning_rate": 4.067995546677125e-06, "loss": 0.6893, "step": 20005 }, { "epoch": 0.57, "grad_norm": 3.003868688142666, "learning_rate": 4.067539920150745e-06, "loss": 0.2776, "step": 20006 }, { "epoch": 0.57, "grad_norm": 7.698076988963083, "learning_rate": 4.067084301646202e-06, "loss": 0.7563, "step": 20007 }, { "epoch": 0.57, "grad_norm": 4.364579570225419, "learning_rate": 4.0666286911674205e-06, "loss": 0.619, "step": 20008 }, { "epoch": 0.57, "grad_norm": 5.099059935490391, "learning_rate": 4.066173088718315e-06, "loss": 0.5428, "step": 20009 }, { "epoch": 0.57, "grad_norm": 5.789448034136136, "learning_rate": 4.0657174943028075e-06, "loss": 0.6685, "step": 20010 }, { "epoch": 0.57, "grad_norm": 3.9791508253224923, "learning_rate": 4.065261907924819e-06, "loss": 0.3985, "step": 20011 }, { "epoch": 0.57, "grad_norm": 7.080220230254927, "learning_rate": 4.064806329588267e-06, "loss": 0.1881, "step": 20012 }, { "epoch": 0.57, "grad_norm": 6.212837322141966, "learning_rate": 4.064350759297071e-06, "loss": 0.8113, "step": 20013 }, { "epoch": 0.57, "grad_norm": 2.853331646742352, "learning_rate": 4.063895197055148e-06, "loss": 0.1729, "step": 20014 }, { "epoch": 0.57, "grad_norm": 4.000962529007684, "learning_rate": 4.063439642866423e-06, "loss": 0.5686, "step": 20015 }, { "epoch": 0.57, "grad_norm": 6.663779750250227, "learning_rate": 4.0629840967348085e-06, "loss": 0.4012, "step": 20016 }, { "epoch": 0.57, "grad_norm": 4.471414212012728, "learning_rate": 4.062528558664228e-06, "loss": 0.6641, "step": 20017 }, { "epoch": 0.57, "grad_norm": 6.102211091056572, "learning_rate": 4.062073028658597e-06, "loss": 0.5397, "step": 20018 }, { "epoch": 0.57, "grad_norm": 3.137344240980511, "learning_rate": 4.061617506721838e-06, "loss": 0.4751, "step": 20019 }, { "epoch": 0.57, "grad_norm": 5.136311492440415, "learning_rate": 4.0611619928578686e-06, "loss": 0.6432, "step": 20020 }, { "epoch": 0.57, "grad_norm": 5.498525486876543, "learning_rate": 4.060706487070605e-06, "loss": 0.2561, "step": 20021 }, { "epoch": 0.57, "grad_norm": 4.304555624380125, "learning_rate": 4.060250989363971e-06, "loss": 0.5628, "step": 20022 }, { "epoch": 0.57, "grad_norm": 4.595336562232791, "learning_rate": 4.059795499741878e-06, "loss": 0.4245, "step": 20023 }, { "epoch": 0.57, "grad_norm": 6.138093110927989, "learning_rate": 4.059340018208252e-06, "loss": 0.3281, "step": 20024 }, { "epoch": 0.57, "grad_norm": 6.027702128052336, "learning_rate": 4.058884544767005e-06, "loss": 0.5161, "step": 20025 }, { "epoch": 0.57, "grad_norm": 5.14465424572385, "learning_rate": 4.05842907942206e-06, "loss": 0.4939, "step": 20026 }, { "epoch": 0.57, "grad_norm": 7.206385406254243, "learning_rate": 4.057973622177333e-06, "loss": 0.5616, "step": 20027 }, { "epoch": 0.57, "grad_norm": 2.297485024026704, "learning_rate": 4.057518173036744e-06, "loss": 0.1487, "step": 20028 }, { "epoch": 0.57, "grad_norm": 4.013260913609476, "learning_rate": 4.057062732004211e-06, "loss": 0.3793, "step": 20029 }, { "epoch": 0.57, "grad_norm": 4.293970020770536, "learning_rate": 4.056607299083648e-06, "loss": 0.4905, "step": 20030 }, { "epoch": 0.57, "grad_norm": 4.583901000270353, "learning_rate": 4.0561518742789795e-06, "loss": 0.4694, "step": 20031 }, { "epoch": 0.57, "grad_norm": 5.828285808236749, "learning_rate": 4.055696457594118e-06, "loss": 0.4641, "step": 20032 }, { "epoch": 0.57, "grad_norm": 4.7652782876888375, "learning_rate": 4.055241049032986e-06, "loss": 0.5792, "step": 20033 }, { "epoch": 0.57, "grad_norm": 4.1654897680746465, "learning_rate": 4.054785648599495e-06, "loss": 0.3413, "step": 20034 }, { "epoch": 0.57, "grad_norm": 6.363472632989448, "learning_rate": 4.054330256297569e-06, "loss": 0.3639, "step": 20035 }, { "epoch": 0.57, "grad_norm": 4.1805298143666665, "learning_rate": 4.053874872131125e-06, "loss": 0.3395, "step": 20036 }, { "epoch": 0.57, "grad_norm": 5.065930511091401, "learning_rate": 4.053419496104076e-06, "loss": 0.4243, "step": 20037 }, { "epoch": 0.57, "grad_norm": 9.521450721094372, "learning_rate": 4.052964128220345e-06, "loss": 1.0946, "step": 20038 }, { "epoch": 0.57, "grad_norm": 7.362760700474129, "learning_rate": 4.052508768483846e-06, "loss": 0.4355, "step": 20039 }, { "epoch": 0.57, "grad_norm": 2.6778000838149576, "learning_rate": 4.052053416898498e-06, "loss": 0.2602, "step": 20040 }, { "epoch": 0.57, "grad_norm": 4.424645004472522, "learning_rate": 4.051598073468216e-06, "loss": 0.2727, "step": 20041 }, { "epoch": 0.57, "grad_norm": 5.037151855668278, "learning_rate": 4.051142738196921e-06, "loss": 0.5006, "step": 20042 }, { "epoch": 0.57, "grad_norm": 6.513363800312708, "learning_rate": 4.050687411088527e-06, "loss": 0.3197, "step": 20043 }, { "epoch": 0.57, "grad_norm": 2.454140981227827, "learning_rate": 4.0502320921469515e-06, "loss": 0.1906, "step": 20044 }, { "epoch": 0.57, "grad_norm": 6.931504967061513, "learning_rate": 4.049776781376114e-06, "loss": 0.8176, "step": 20045 }, { "epoch": 0.57, "grad_norm": 9.880779265278631, "learning_rate": 4.049321478779929e-06, "loss": 0.2817, "step": 20046 }, { "epoch": 0.57, "grad_norm": 5.077387759171588, "learning_rate": 4.0488661843623154e-06, "loss": 0.5351, "step": 20047 }, { "epoch": 0.57, "grad_norm": 6.722610562139906, "learning_rate": 4.048410898127187e-06, "loss": 0.2455, "step": 20048 }, { "epoch": 0.57, "grad_norm": 5.594145009675941, "learning_rate": 4.047955620078465e-06, "loss": 0.5047, "step": 20049 }, { "epoch": 0.57, "grad_norm": 5.929006665613858, "learning_rate": 4.047500350220061e-06, "loss": 0.5507, "step": 20050 }, { "epoch": 0.57, "grad_norm": 5.352879227197787, "learning_rate": 4.047045088555895e-06, "loss": 0.5045, "step": 20051 }, { "epoch": 0.57, "grad_norm": 3.7648033733621116, "learning_rate": 4.046589835089885e-06, "loss": 0.6138, "step": 20052 }, { "epoch": 0.57, "grad_norm": 5.351791532864166, "learning_rate": 4.046134589825942e-06, "loss": 0.5838, "step": 20053 }, { "epoch": 0.57, "grad_norm": 3.588637040213019, "learning_rate": 4.0456793527679875e-06, "loss": 0.3526, "step": 20054 }, { "epoch": 0.57, "grad_norm": 4.392990900289453, "learning_rate": 4.0452241239199356e-06, "loss": 0.2709, "step": 20055 }, { "epoch": 0.57, "grad_norm": 3.517769629874733, "learning_rate": 4.044768903285704e-06, "loss": 0.5871, "step": 20056 }, { "epoch": 0.57, "grad_norm": 16.5455562653211, "learning_rate": 4.044313690869206e-06, "loss": 0.7853, "step": 20057 }, { "epoch": 0.57, "grad_norm": 6.383984723068066, "learning_rate": 4.043858486674361e-06, "loss": 0.6378, "step": 20058 }, { "epoch": 0.57, "grad_norm": 5.115298183024253, "learning_rate": 4.043403290705084e-06, "loss": 0.8394, "step": 20059 }, { "epoch": 0.57, "grad_norm": 2.011306024082873, "learning_rate": 4.042948102965288e-06, "loss": 0.2646, "step": 20060 }, { "epoch": 0.57, "grad_norm": 6.056493320045387, "learning_rate": 4.042492923458894e-06, "loss": 0.2458, "step": 20061 }, { "epoch": 0.57, "grad_norm": 3.688082956043087, "learning_rate": 4.042037752189816e-06, "loss": 0.3767, "step": 20062 }, { "epoch": 0.57, "grad_norm": 6.039412830056259, "learning_rate": 4.041582589161969e-06, "loss": 1.0462, "step": 20063 }, { "epoch": 0.57, "grad_norm": 6.905486465985519, "learning_rate": 4.041127434379267e-06, "loss": 0.707, "step": 20064 }, { "epoch": 0.57, "grad_norm": 12.765357548166033, "learning_rate": 4.04067228784563e-06, "loss": 0.7771, "step": 20065 }, { "epoch": 0.57, "grad_norm": 5.6715739566998975, "learning_rate": 4.04021714956497e-06, "loss": 0.5135, "step": 20066 }, { "epoch": 0.57, "grad_norm": 16.140275801863023, "learning_rate": 4.039762019541204e-06, "loss": 0.5869, "step": 20067 }, { "epoch": 0.57, "grad_norm": 1.6520504251484116, "learning_rate": 4.039306897778247e-06, "loss": 0.1196, "step": 20068 }, { "epoch": 0.57, "grad_norm": 3.7682584995621116, "learning_rate": 4.038851784280014e-06, "loss": 0.377, "step": 20069 }, { "epoch": 0.57, "grad_norm": 5.781605230191764, "learning_rate": 4.038396679050422e-06, "loss": 0.6033, "step": 20070 }, { "epoch": 0.57, "grad_norm": 7.279879543361125, "learning_rate": 4.037941582093383e-06, "loss": 0.4882, "step": 20071 }, { "epoch": 0.57, "grad_norm": 4.161965964257436, "learning_rate": 4.037486493412816e-06, "loss": 0.4168, "step": 20072 }, { "epoch": 0.57, "grad_norm": 5.46017666866247, "learning_rate": 4.0370314130126335e-06, "loss": 0.238, "step": 20073 }, { "epoch": 0.57, "grad_norm": 8.447793516742324, "learning_rate": 4.036576340896752e-06, "loss": 0.7, "step": 20074 }, { "epoch": 0.57, "grad_norm": 8.80540082964722, "learning_rate": 4.036121277069084e-06, "loss": 0.4464, "step": 20075 }, { "epoch": 0.57, "grad_norm": 7.218365118161831, "learning_rate": 4.035666221533546e-06, "loss": 0.6662, "step": 20076 }, { "epoch": 0.57, "grad_norm": 9.879119689735328, "learning_rate": 4.035211174294055e-06, "loss": 0.9586, "step": 20077 }, { "epoch": 0.57, "grad_norm": 6.802069702446343, "learning_rate": 4.03475613535452e-06, "loss": 0.4775, "step": 20078 }, { "epoch": 0.58, "grad_norm": 5.860767778803539, "learning_rate": 4.0343011047188636e-06, "loss": 0.5066, "step": 20079 }, { "epoch": 0.58, "grad_norm": 5.912724896724496, "learning_rate": 4.033846082390992e-06, "loss": 0.7278, "step": 20080 }, { "epoch": 0.58, "grad_norm": 8.611461573479763, "learning_rate": 4.033391068374826e-06, "loss": 0.7794, "step": 20081 }, { "epoch": 0.58, "grad_norm": 6.249069678684985, "learning_rate": 4.032936062674277e-06, "loss": 0.4869, "step": 20082 }, { "epoch": 0.58, "grad_norm": 5.267292064442475, "learning_rate": 4.032481065293261e-06, "loss": 0.4689, "step": 20083 }, { "epoch": 0.58, "grad_norm": 3.21730878655988, "learning_rate": 4.032026076235688e-06, "loss": 0.4251, "step": 20084 }, { "epoch": 0.58, "grad_norm": 7.691608726583564, "learning_rate": 4.0315710955054774e-06, "loss": 0.6137, "step": 20085 }, { "epoch": 0.58, "grad_norm": 4.453156641797535, "learning_rate": 4.031116123106542e-06, "loss": 0.3595, "step": 20086 }, { "epoch": 0.58, "grad_norm": 3.9968829526963794, "learning_rate": 4.030661159042793e-06, "loss": 0.3836, "step": 20087 }, { "epoch": 0.58, "grad_norm": 6.713477581025091, "learning_rate": 4.0302062033181495e-06, "loss": 0.5983, "step": 20088 }, { "epoch": 0.58, "grad_norm": 9.907677003144219, "learning_rate": 4.029751255936522e-06, "loss": 0.4194, "step": 20089 }, { "epoch": 0.58, "grad_norm": 4.129692096685997, "learning_rate": 4.029296316901825e-06, "loss": 0.4043, "step": 20090 }, { "epoch": 0.58, "grad_norm": 8.259521653962201, "learning_rate": 4.028841386217971e-06, "loss": 0.3112, "step": 20091 }, { "epoch": 0.58, "grad_norm": 5.02638978954901, "learning_rate": 4.0283864638888755e-06, "loss": 0.5001, "step": 20092 }, { "epoch": 0.58, "grad_norm": 6.159422120137118, "learning_rate": 4.027931549918453e-06, "loss": 0.8946, "step": 20093 }, { "epoch": 0.58, "grad_norm": 8.355479764284611, "learning_rate": 4.027476644310613e-06, "loss": 0.6654, "step": 20094 }, { "epoch": 0.58, "grad_norm": 2.933393306462269, "learning_rate": 4.0270217470692745e-06, "loss": 0.0696, "step": 20095 }, { "epoch": 0.58, "grad_norm": 7.017851104650531, "learning_rate": 4.026566858198347e-06, "loss": 0.4079, "step": 20096 }, { "epoch": 0.58, "grad_norm": 5.593814274082987, "learning_rate": 4.026111977701746e-06, "loss": 0.3459, "step": 20097 }, { "epoch": 0.58, "grad_norm": 4.7965447625917035, "learning_rate": 4.025657105583381e-06, "loss": 0.6925, "step": 20098 }, { "epoch": 0.58, "grad_norm": 5.827167565295351, "learning_rate": 4.025202241847171e-06, "loss": 0.536, "step": 20099 }, { "epoch": 0.58, "grad_norm": 5.703178091977833, "learning_rate": 4.024747386497024e-06, "loss": 0.486, "step": 20100 }, { "epoch": 0.58, "grad_norm": 6.679978534983795, "learning_rate": 4.0242925395368544e-06, "loss": 0.5408, "step": 20101 }, { "epoch": 0.58, "grad_norm": 4.356377066424225, "learning_rate": 4.023837700970579e-06, "loss": 0.7563, "step": 20102 }, { "epoch": 0.58, "grad_norm": 8.45264928469854, "learning_rate": 4.023382870802106e-06, "loss": 0.2393, "step": 20103 }, { "epoch": 0.58, "grad_norm": 4.280478108565196, "learning_rate": 4.0229280490353506e-06, "loss": 0.2416, "step": 20104 }, { "epoch": 0.58, "grad_norm": 5.228855924251577, "learning_rate": 4.022473235674223e-06, "loss": 0.5192, "step": 20105 }, { "epoch": 0.58, "grad_norm": 5.668793484858539, "learning_rate": 4.02201843072264e-06, "loss": 0.292, "step": 20106 }, { "epoch": 0.58, "grad_norm": 9.578415236190674, "learning_rate": 4.021563634184511e-06, "loss": 0.6961, "step": 20107 }, { "epoch": 0.58, "grad_norm": 5.716518154198541, "learning_rate": 4.021108846063751e-06, "loss": 0.3168, "step": 20108 }, { "epoch": 0.58, "grad_norm": 3.0693986615026487, "learning_rate": 4.020654066364269e-06, "loss": 0.2082, "step": 20109 }, { "epoch": 0.58, "grad_norm": 3.953447984611124, "learning_rate": 4.020199295089979e-06, "loss": 0.3005, "step": 20110 }, { "epoch": 0.58, "grad_norm": 5.706367887551469, "learning_rate": 4.019744532244797e-06, "loss": 0.5465, "step": 20111 }, { "epoch": 0.58, "grad_norm": 5.110183021977998, "learning_rate": 4.0192897778326294e-06, "loss": 0.7172, "step": 20112 }, { "epoch": 0.58, "grad_norm": 6.629355060802392, "learning_rate": 4.018835031857392e-06, "loss": 0.5744, "step": 20113 }, { "epoch": 0.58, "grad_norm": 5.704752943154115, "learning_rate": 4.018380294322995e-06, "loss": 0.4762, "step": 20114 }, { "epoch": 0.58, "grad_norm": 7.602731299251484, "learning_rate": 4.017925565233354e-06, "loss": 0.6388, "step": 20115 }, { "epoch": 0.58, "grad_norm": 2.808284875176232, "learning_rate": 4.017470844592376e-06, "loss": 0.2449, "step": 20116 }, { "epoch": 0.58, "grad_norm": 2.8062058273271235, "learning_rate": 4.017016132403975e-06, "loss": 0.2228, "step": 20117 }, { "epoch": 0.58, "grad_norm": 6.4646602362548675, "learning_rate": 4.016561428672066e-06, "loss": 0.4369, "step": 20118 }, { "epoch": 0.58, "grad_norm": 6.089789291833257, "learning_rate": 4.016106733400555e-06, "loss": 0.3098, "step": 20119 }, { "epoch": 0.58, "grad_norm": 3.9697092954440736, "learning_rate": 4.0156520465933595e-06, "loss": 0.4155, "step": 20120 }, { "epoch": 0.58, "grad_norm": 6.889146313121767, "learning_rate": 4.015197368254385e-06, "loss": 0.4379, "step": 20121 }, { "epoch": 0.58, "grad_norm": 6.562681795054492, "learning_rate": 4.014742698387549e-06, "loss": 0.7691, "step": 20122 }, { "epoch": 0.58, "grad_norm": 7.299358263941335, "learning_rate": 4.01428803699676e-06, "loss": 0.5656, "step": 20123 }, { "epoch": 0.58, "grad_norm": 20.942443810567788, "learning_rate": 4.0138333840859285e-06, "loss": 0.5372, "step": 20124 }, { "epoch": 0.58, "grad_norm": 5.2324108481068485, "learning_rate": 4.013378739658967e-06, "loss": 0.3403, "step": 20125 }, { "epoch": 0.58, "grad_norm": 3.2184225860288675, "learning_rate": 4.012924103719787e-06, "loss": 0.1241, "step": 20126 }, { "epoch": 0.58, "grad_norm": 3.0604637538818302, "learning_rate": 4.0124694762723e-06, "loss": 0.1058, "step": 20127 }, { "epoch": 0.58, "grad_norm": 6.983271910671182, "learning_rate": 4.012014857320415e-06, "loss": 0.5235, "step": 20128 }, { "epoch": 0.58, "grad_norm": 6.008549797543136, "learning_rate": 4.011560246868046e-06, "loss": 0.7164, "step": 20129 }, { "epoch": 0.58, "grad_norm": 6.641121304983067, "learning_rate": 4.011105644919102e-06, "loss": 0.8482, "step": 20130 }, { "epoch": 0.58, "grad_norm": 8.384453860781816, "learning_rate": 4.010651051477495e-06, "loss": 0.7315, "step": 20131 }, { "epoch": 0.58, "grad_norm": 8.615290387130885, "learning_rate": 4.010196466547134e-06, "loss": 0.4643, "step": 20132 }, { "epoch": 0.58, "grad_norm": 4.756166796764814, "learning_rate": 4.009741890131931e-06, "loss": 0.4111, "step": 20133 }, { "epoch": 0.58, "grad_norm": 2.912060251723506, "learning_rate": 4.009287322235798e-06, "loss": 0.3747, "step": 20134 }, { "epoch": 0.58, "grad_norm": 14.574000912901472, "learning_rate": 4.008832762862643e-06, "loss": 1.2593, "step": 20135 }, { "epoch": 0.58, "grad_norm": 5.463299329224005, "learning_rate": 4.008378212016378e-06, "loss": 0.3711, "step": 20136 }, { "epoch": 0.58, "grad_norm": 9.125227938378753, "learning_rate": 4.007923669700913e-06, "loss": 0.5781, "step": 20137 }, { "epoch": 0.58, "grad_norm": 5.835913682350264, "learning_rate": 4.00746913592016e-06, "loss": 0.5928, "step": 20138 }, { "epoch": 0.58, "grad_norm": 8.264071053457062, "learning_rate": 4.007014610678026e-06, "loss": 0.2887, "step": 20139 }, { "epoch": 0.58, "grad_norm": 6.881581572011696, "learning_rate": 4.0065600939784246e-06, "loss": 0.6512, "step": 20140 }, { "epoch": 0.58, "grad_norm": 4.913154748667152, "learning_rate": 4.0061055858252615e-06, "loss": 0.4341, "step": 20141 }, { "epoch": 0.58, "grad_norm": 4.248150591406292, "learning_rate": 4.0056510862224514e-06, "loss": 0.6091, "step": 20142 }, { "epoch": 0.58, "grad_norm": 7.631121945068296, "learning_rate": 4.005196595173904e-06, "loss": 0.6518, "step": 20143 }, { "epoch": 0.58, "grad_norm": 1.874937851193721, "learning_rate": 4.004742112683525e-06, "loss": 0.1232, "step": 20144 }, { "epoch": 0.58, "grad_norm": 3.58023530455837, "learning_rate": 4.00428763875523e-06, "loss": 0.6215, "step": 20145 }, { "epoch": 0.58, "grad_norm": 18.787623258733316, "learning_rate": 4.003833173392924e-06, "loss": 0.7555, "step": 20146 }, { "epoch": 0.58, "grad_norm": 5.547715974912519, "learning_rate": 4.00337871660052e-06, "loss": 0.2915, "step": 20147 }, { "epoch": 0.58, "grad_norm": 4.177942499984006, "learning_rate": 4.0029242683819235e-06, "loss": 0.1761, "step": 20148 }, { "epoch": 0.58, "grad_norm": 5.237926633296995, "learning_rate": 4.002469828741049e-06, "loss": 0.902, "step": 20149 }, { "epoch": 0.58, "grad_norm": 3.5914391261171223, "learning_rate": 4.002015397681804e-06, "loss": 0.4108, "step": 20150 }, { "epoch": 0.58, "grad_norm": 5.499686925820729, "learning_rate": 4.001560975208095e-06, "loss": 0.3357, "step": 20151 }, { "epoch": 0.58, "grad_norm": 5.8820530779926665, "learning_rate": 4.001106561323837e-06, "loss": 1.0743, "step": 20152 }, { "epoch": 0.58, "grad_norm": 8.75099982952562, "learning_rate": 4.0006521560329345e-06, "loss": 0.9792, "step": 20153 }, { "epoch": 0.58, "grad_norm": 3.5774146961675712, "learning_rate": 4.0001977593393005e-06, "loss": 0.7853, "step": 20154 }, { "epoch": 0.58, "grad_norm": 6.363489230733933, "learning_rate": 3.999743371246839e-06, "loss": 0.2022, "step": 20155 }, { "epoch": 0.58, "grad_norm": 2.5804396902279203, "learning_rate": 3.999288991759465e-06, "loss": 0.39, "step": 20156 }, { "epoch": 0.58, "grad_norm": 6.498277821176129, "learning_rate": 3.998834620881083e-06, "loss": 0.5248, "step": 20157 }, { "epoch": 0.58, "grad_norm": 6.306063628301882, "learning_rate": 3.998380258615603e-06, "loss": 0.48, "step": 20158 }, { "epoch": 0.58, "grad_norm": 1.3652294633637843, "learning_rate": 3.997925904966936e-06, "loss": 0.0478, "step": 20159 }, { "epoch": 0.58, "grad_norm": 6.671438770193039, "learning_rate": 3.997471559938988e-06, "loss": 0.4671, "step": 20160 }, { "epoch": 0.58, "grad_norm": 3.352949180153052, "learning_rate": 3.99701722353567e-06, "loss": 0.338, "step": 20161 }, { "epoch": 0.58, "grad_norm": 3.798481228638313, "learning_rate": 3.996562895760887e-06, "loss": 0.182, "step": 20162 }, { "epoch": 0.58, "grad_norm": 6.175952896927422, "learning_rate": 3.996108576618552e-06, "loss": 0.3399, "step": 20163 }, { "epoch": 0.58, "grad_norm": 10.673845279606525, "learning_rate": 3.99565426611257e-06, "loss": 0.5778, "step": 20164 }, { "epoch": 0.58, "grad_norm": 3.1512192962433865, "learning_rate": 3.995199964246852e-06, "loss": 0.523, "step": 20165 }, { "epoch": 0.58, "grad_norm": 6.999800883594701, "learning_rate": 3.9947456710253045e-06, "loss": 0.7215, "step": 20166 }, { "epoch": 0.58, "grad_norm": 3.4377117438557865, "learning_rate": 3.994291386451834e-06, "loss": 0.2093, "step": 20167 }, { "epoch": 0.58, "grad_norm": 9.93864573465628, "learning_rate": 3.993837110530354e-06, "loss": 0.8576, "step": 20168 }, { "epoch": 0.58, "grad_norm": 5.841455427296714, "learning_rate": 3.993382843264768e-06, "loss": 0.7011, "step": 20169 }, { "epoch": 0.58, "grad_norm": 8.259836170890212, "learning_rate": 3.992928584658986e-06, "loss": 0.6488, "step": 20170 }, { "epoch": 0.58, "grad_norm": 9.197231925340416, "learning_rate": 3.992474334716914e-06, "loss": 0.657, "step": 20171 }, { "epoch": 0.58, "grad_norm": 4.675644492537179, "learning_rate": 3.992020093442464e-06, "loss": 0.7536, "step": 20172 }, { "epoch": 0.58, "grad_norm": 2.596390069976674, "learning_rate": 3.9915658608395395e-06, "loss": 0.2223, "step": 20173 }, { "epoch": 0.58, "grad_norm": 7.093391510227714, "learning_rate": 3.991111636912047e-06, "loss": 0.2836, "step": 20174 }, { "epoch": 0.58, "grad_norm": 4.21400424915601, "learning_rate": 3.990657421663901e-06, "loss": 0.594, "step": 20175 }, { "epoch": 0.58, "grad_norm": 4.488145446050809, "learning_rate": 3.990203215099004e-06, "loss": 0.6177, "step": 20176 }, { "epoch": 0.58, "grad_norm": 8.75281029938696, "learning_rate": 3.989749017221264e-06, "loss": 0.4318, "step": 20177 }, { "epoch": 0.58, "grad_norm": 10.424583163264016, "learning_rate": 3.989294828034587e-06, "loss": 0.9731, "step": 20178 }, { "epoch": 0.58, "grad_norm": 10.111998699574448, "learning_rate": 3.988840647542885e-06, "loss": 0.2888, "step": 20179 }, { "epoch": 0.58, "grad_norm": 4.34860764724711, "learning_rate": 3.98838647575006e-06, "loss": 0.5207, "step": 20180 }, { "epoch": 0.58, "grad_norm": 7.506539354933017, "learning_rate": 3.987932312660024e-06, "loss": 0.5007, "step": 20181 }, { "epoch": 0.58, "grad_norm": 5.888051854731958, "learning_rate": 3.98747815827668e-06, "loss": 0.4675, "step": 20182 }, { "epoch": 0.58, "grad_norm": 9.078985097982775, "learning_rate": 3.987024012603937e-06, "loss": 0.6871, "step": 20183 }, { "epoch": 0.58, "grad_norm": 5.319288382082151, "learning_rate": 3.986569875645703e-06, "loss": 0.736, "step": 20184 }, { "epoch": 0.58, "grad_norm": 4.995602438169531, "learning_rate": 3.986115747405881e-06, "loss": 0.3574, "step": 20185 }, { "epoch": 0.58, "grad_norm": 4.766513679067803, "learning_rate": 3.985661627888382e-06, "loss": 0.4225, "step": 20186 }, { "epoch": 0.58, "grad_norm": 4.304740877737877, "learning_rate": 3.985207517097112e-06, "loss": 0.1378, "step": 20187 }, { "epoch": 0.58, "grad_norm": 3.7135125471616823, "learning_rate": 3.9847534150359765e-06, "loss": 0.578, "step": 20188 }, { "epoch": 0.58, "grad_norm": 4.434081574208832, "learning_rate": 3.98429932170888e-06, "loss": 0.273, "step": 20189 }, { "epoch": 0.58, "grad_norm": 2.8787912270744753, "learning_rate": 3.9838452371197336e-06, "loss": 0.1385, "step": 20190 }, { "epoch": 0.58, "grad_norm": 4.201881423257585, "learning_rate": 3.983391161272442e-06, "loss": 0.4032, "step": 20191 }, { "epoch": 0.58, "grad_norm": 5.1704259496273455, "learning_rate": 3.982937094170908e-06, "loss": 0.5016, "step": 20192 }, { "epoch": 0.58, "grad_norm": 5.208732172635914, "learning_rate": 3.9824830358190436e-06, "loss": 0.5309, "step": 20193 }, { "epoch": 0.58, "grad_norm": 4.04137892862255, "learning_rate": 3.982028986220752e-06, "loss": 0.3569, "step": 20194 }, { "epoch": 0.58, "grad_norm": 2.653285088443962, "learning_rate": 3.98157494537994e-06, "loss": 0.3223, "step": 20195 }, { "epoch": 0.58, "grad_norm": 6.7512739533464, "learning_rate": 3.981120913300513e-06, "loss": 0.3612, "step": 20196 }, { "epoch": 0.58, "grad_norm": 3.367854112104518, "learning_rate": 3.980666889986378e-06, "loss": 0.1427, "step": 20197 }, { "epoch": 0.58, "grad_norm": 5.86624085044188, "learning_rate": 3.980212875441438e-06, "loss": 0.4129, "step": 20198 }, { "epoch": 0.58, "grad_norm": 5.769357563728446, "learning_rate": 3.979758869669602e-06, "loss": 0.5132, "step": 20199 }, { "epoch": 0.58, "grad_norm": 6.658532337879894, "learning_rate": 3.979304872674776e-06, "loss": 0.5535, "step": 20200 }, { "epoch": 0.58, "grad_norm": 6.686837404171973, "learning_rate": 3.978850884460863e-06, "loss": 0.1785, "step": 20201 }, { "epoch": 0.58, "grad_norm": 7.955153830411775, "learning_rate": 3.978396905031771e-06, "loss": 0.7257, "step": 20202 }, { "epoch": 0.58, "grad_norm": 7.989445220536017, "learning_rate": 3.977942934391405e-06, "loss": 0.4938, "step": 20203 }, { "epoch": 0.58, "grad_norm": 2.694246805672488, "learning_rate": 3.9774889725436705e-06, "loss": 0.2789, "step": 20204 }, { "epoch": 0.58, "grad_norm": 5.791178739929803, "learning_rate": 3.97703501949247e-06, "loss": 0.2605, "step": 20205 }, { "epoch": 0.58, "grad_norm": 4.320742677101128, "learning_rate": 3.976581075241715e-06, "loss": 0.2593, "step": 20206 }, { "epoch": 0.58, "grad_norm": 3.2645020630406814, "learning_rate": 3.976127139795304e-06, "loss": 0.3902, "step": 20207 }, { "epoch": 0.58, "grad_norm": 5.785129791755626, "learning_rate": 3.975673213157145e-06, "loss": 0.7136, "step": 20208 }, { "epoch": 0.58, "grad_norm": 4.80541381308395, "learning_rate": 3.975219295331145e-06, "loss": 0.4497, "step": 20209 }, { "epoch": 0.58, "grad_norm": 4.359102849087846, "learning_rate": 3.974765386321206e-06, "loss": 0.3034, "step": 20210 }, { "epoch": 0.58, "grad_norm": 3.461911669301227, "learning_rate": 3.974311486131236e-06, "loss": 0.2238, "step": 20211 }, { "epoch": 0.58, "grad_norm": 4.028518845723238, "learning_rate": 3.9738575947651345e-06, "loss": 0.6401, "step": 20212 }, { "epoch": 0.58, "grad_norm": 4.587285482187998, "learning_rate": 3.973403712226813e-06, "loss": 0.4455, "step": 20213 }, { "epoch": 0.58, "grad_norm": 5.915153150889432, "learning_rate": 3.972949838520172e-06, "loss": 0.5066, "step": 20214 }, { "epoch": 0.58, "grad_norm": 13.304332179612903, "learning_rate": 3.972495973649116e-06, "loss": 0.639, "step": 20215 }, { "epoch": 0.58, "grad_norm": 3.75390955262927, "learning_rate": 3.972042117617552e-06, "loss": 0.6626, "step": 20216 }, { "epoch": 0.58, "grad_norm": 5.9406143854599245, "learning_rate": 3.971588270429383e-06, "loss": 0.263, "step": 20217 }, { "epoch": 0.58, "grad_norm": 4.412353941145351, "learning_rate": 3.971134432088514e-06, "loss": 0.1234, "step": 20218 }, { "epoch": 0.58, "grad_norm": 5.525711950975642, "learning_rate": 3.970680602598847e-06, "loss": 0.2651, "step": 20219 }, { "epoch": 0.58, "grad_norm": 5.781491186936096, "learning_rate": 3.97022678196429e-06, "loss": 0.3511, "step": 20220 }, { "epoch": 0.58, "grad_norm": 7.606653373009012, "learning_rate": 3.969772970188744e-06, "loss": 0.4326, "step": 20221 }, { "epoch": 0.58, "grad_norm": 6.5609352062754605, "learning_rate": 3.969319167276115e-06, "loss": 0.5965, "step": 20222 }, { "epoch": 0.58, "grad_norm": 5.168003119644901, "learning_rate": 3.968865373230305e-06, "loss": 0.5458, "step": 20223 }, { "epoch": 0.58, "grad_norm": 7.991778291460367, "learning_rate": 3.96841158805522e-06, "loss": 0.348, "step": 20224 }, { "epoch": 0.58, "grad_norm": 5.669832350457437, "learning_rate": 3.9679578117547634e-06, "loss": 0.5798, "step": 20225 }, { "epoch": 0.58, "grad_norm": 4.7558760938409295, "learning_rate": 3.967504044332839e-06, "loss": 0.4501, "step": 20226 }, { "epoch": 0.58, "grad_norm": 8.064345422137803, "learning_rate": 3.96705028579335e-06, "loss": 0.3698, "step": 20227 }, { "epoch": 0.58, "grad_norm": 4.5465567565919445, "learning_rate": 3.966596536140199e-06, "loss": 0.2702, "step": 20228 }, { "epoch": 0.58, "grad_norm": 5.105601839462949, "learning_rate": 3.9661427953772926e-06, "loss": 0.7536, "step": 20229 }, { "epoch": 0.58, "grad_norm": 5.023685787347711, "learning_rate": 3.965689063508531e-06, "loss": 0.4238, "step": 20230 }, { "epoch": 0.58, "grad_norm": 6.021082000701159, "learning_rate": 3.965235340537821e-06, "loss": 0.4532, "step": 20231 }, { "epoch": 0.58, "grad_norm": 6.678816889192851, "learning_rate": 3.96478162646906e-06, "loss": 0.5653, "step": 20232 }, { "epoch": 0.58, "grad_norm": 3.60096729184471, "learning_rate": 3.964327921306158e-06, "loss": 0.2577, "step": 20233 }, { "epoch": 0.58, "grad_norm": 7.133125490505944, "learning_rate": 3.963874225053016e-06, "loss": 0.5701, "step": 20234 }, { "epoch": 0.58, "grad_norm": 5.499954396839083, "learning_rate": 3.963420537713534e-06, "loss": 0.5222, "step": 20235 }, { "epoch": 0.58, "grad_norm": 4.068838515249093, "learning_rate": 3.96296685929162e-06, "loss": 0.2401, "step": 20236 }, { "epoch": 0.58, "grad_norm": 4.633441052568288, "learning_rate": 3.962513189791173e-06, "loss": 0.654, "step": 20237 }, { "epoch": 0.58, "grad_norm": 6.395669228833176, "learning_rate": 3.962059529216099e-06, "loss": 0.3461, "step": 20238 }, { "epoch": 0.58, "grad_norm": 6.251302316405161, "learning_rate": 3.961605877570296e-06, "loss": 0.5011, "step": 20239 }, { "epoch": 0.58, "grad_norm": 6.34485020986013, "learning_rate": 3.961152234857671e-06, "loss": 0.3931, "step": 20240 }, { "epoch": 0.58, "grad_norm": 5.180054296334158, "learning_rate": 3.960698601082128e-06, "loss": 0.5642, "step": 20241 }, { "epoch": 0.58, "grad_norm": 7.697873164885532, "learning_rate": 3.9602449762475635e-06, "loss": 0.8976, "step": 20242 }, { "epoch": 0.58, "grad_norm": 6.269341410677266, "learning_rate": 3.959791360357886e-06, "loss": 0.6441, "step": 20243 }, { "epoch": 0.58, "grad_norm": 7.430290671011397, "learning_rate": 3.959337753416994e-06, "loss": 0.257, "step": 20244 }, { "epoch": 0.58, "grad_norm": 7.564817971852701, "learning_rate": 3.958884155428792e-06, "loss": 0.4329, "step": 20245 }, { "epoch": 0.58, "grad_norm": 3.9805665607807525, "learning_rate": 3.95843056639718e-06, "loss": 0.4525, "step": 20246 }, { "epoch": 0.58, "grad_norm": 10.399208642122018, "learning_rate": 3.957976986326063e-06, "loss": 0.3836, "step": 20247 }, { "epoch": 0.58, "grad_norm": 11.076088076459312, "learning_rate": 3.957523415219341e-06, "loss": 0.751, "step": 20248 }, { "epoch": 0.58, "grad_norm": 4.8531703772453, "learning_rate": 3.957069853080916e-06, "loss": 0.4715, "step": 20249 }, { "epoch": 0.58, "grad_norm": 15.608518040438724, "learning_rate": 3.956616299914692e-06, "loss": 0.6583, "step": 20250 }, { "epoch": 0.58, "grad_norm": 5.679524205522047, "learning_rate": 3.956162755724569e-06, "loss": 0.5187, "step": 20251 }, { "epoch": 0.58, "grad_norm": 4.192802260711124, "learning_rate": 3.95570922051445e-06, "loss": 0.2599, "step": 20252 }, { "epoch": 0.58, "grad_norm": 5.4117778454893015, "learning_rate": 3.955255694288234e-06, "loss": 0.4457, "step": 20253 }, { "epoch": 0.58, "grad_norm": 4.254847566897714, "learning_rate": 3.954802177049828e-06, "loss": 0.4279, "step": 20254 }, { "epoch": 0.58, "grad_norm": 12.476993904466326, "learning_rate": 3.9543486688031265e-06, "loss": 0.6598, "step": 20255 }, { "epoch": 0.58, "grad_norm": 8.184787504761985, "learning_rate": 3.953895169552037e-06, "loss": 0.5814, "step": 20256 }, { "epoch": 0.58, "grad_norm": 2.9137969113324838, "learning_rate": 3.953441679300458e-06, "loss": 0.1926, "step": 20257 }, { "epoch": 0.58, "grad_norm": 8.389136041982141, "learning_rate": 3.95298819805229e-06, "loss": 0.306, "step": 20258 }, { "epoch": 0.58, "grad_norm": 4.229289051533833, "learning_rate": 3.952534725811438e-06, "loss": 0.3375, "step": 20259 }, { "epoch": 0.58, "grad_norm": 4.518835332336498, "learning_rate": 3.952081262581799e-06, "loss": 0.4172, "step": 20260 }, { "epoch": 0.58, "grad_norm": 5.793254904389894, "learning_rate": 3.951627808367277e-06, "loss": 0.3084, "step": 20261 }, { "epoch": 0.58, "grad_norm": 6.20836903947978, "learning_rate": 3.9511743631717705e-06, "loss": 0.6948, "step": 20262 }, { "epoch": 0.58, "grad_norm": 5.306367858009181, "learning_rate": 3.950720926999184e-06, "loss": 0.577, "step": 20263 }, { "epoch": 0.58, "grad_norm": 9.331646789576851, "learning_rate": 3.950267499853414e-06, "loss": 0.4864, "step": 20264 }, { "epoch": 0.58, "grad_norm": 4.017282937814265, "learning_rate": 3.949814081738363e-06, "loss": 0.4021, "step": 20265 }, { "epoch": 0.58, "grad_norm": 9.56919551636946, "learning_rate": 3.949360672657935e-06, "loss": 0.6891, "step": 20266 }, { "epoch": 0.58, "grad_norm": 6.102196693431067, "learning_rate": 3.9489072726160264e-06, "loss": 0.71, "step": 20267 }, { "epoch": 0.58, "grad_norm": 6.520400495047387, "learning_rate": 3.9484538816165395e-06, "loss": 0.6443, "step": 20268 }, { "epoch": 0.58, "grad_norm": 8.850274715362458, "learning_rate": 3.948000499663373e-06, "loss": 0.8487, "step": 20269 }, { "epoch": 0.58, "grad_norm": 5.5790946395513235, "learning_rate": 3.94754712676043e-06, "loss": 0.6954, "step": 20270 }, { "epoch": 0.58, "grad_norm": 5.9025688810098025, "learning_rate": 3.947093762911608e-06, "loss": 0.7021, "step": 20271 }, { "epoch": 0.58, "grad_norm": 4.815695828579736, "learning_rate": 3.946640408120811e-06, "loss": 0.4024, "step": 20272 }, { "epoch": 0.58, "grad_norm": 9.856686716800025, "learning_rate": 3.946187062391934e-06, "loss": 0.6858, "step": 20273 }, { "epoch": 0.58, "grad_norm": 9.424592393312844, "learning_rate": 3.945733725728882e-06, "loss": 0.5142, "step": 20274 }, { "epoch": 0.58, "grad_norm": 6.687421334775492, "learning_rate": 3.945280398135553e-06, "loss": 0.2841, "step": 20275 }, { "epoch": 0.58, "grad_norm": 7.331592186944559, "learning_rate": 3.944827079615846e-06, "loss": 0.816, "step": 20276 }, { "epoch": 0.58, "grad_norm": 5.323827268743282, "learning_rate": 3.944373770173662e-06, "loss": 0.4331, "step": 20277 }, { "epoch": 0.58, "grad_norm": 5.64720830561736, "learning_rate": 3.943920469812901e-06, "loss": 0.4042, "step": 20278 }, { "epoch": 0.58, "grad_norm": 6.126911526782898, "learning_rate": 3.9434671785374625e-06, "loss": 0.5252, "step": 20279 }, { "epoch": 0.58, "grad_norm": 5.1918857832337775, "learning_rate": 3.943013896351244e-06, "loss": 0.5259, "step": 20280 }, { "epoch": 0.58, "grad_norm": 2.0995029298280867, "learning_rate": 3.942560623258148e-06, "loss": 0.1861, "step": 20281 }, { "epoch": 0.58, "grad_norm": 7.335991547439265, "learning_rate": 3.942107359262073e-06, "loss": 0.4449, "step": 20282 }, { "epoch": 0.58, "grad_norm": 7.220080364551426, "learning_rate": 3.941654104366916e-06, "loss": 1.0049, "step": 20283 }, { "epoch": 0.58, "grad_norm": 4.24291098029772, "learning_rate": 3.941200858576583e-06, "loss": 0.4077, "step": 20284 }, { "epoch": 0.58, "grad_norm": 4.482305760427568, "learning_rate": 3.940747621894964e-06, "loss": 0.2108, "step": 20285 }, { "epoch": 0.58, "grad_norm": 2.9481738734283285, "learning_rate": 3.940294394325966e-06, "loss": 0.2377, "step": 20286 }, { "epoch": 0.58, "grad_norm": 7.276406572312381, "learning_rate": 3.939841175873483e-06, "loss": 0.1482, "step": 20287 }, { "epoch": 0.58, "grad_norm": 3.716144811268777, "learning_rate": 3.939387966541417e-06, "loss": 0.2648, "step": 20288 }, { "epoch": 0.58, "grad_norm": 7.324869372143761, "learning_rate": 3.938934766333664e-06, "loss": 0.4151, "step": 20289 }, { "epoch": 0.58, "grad_norm": 4.265172578063695, "learning_rate": 3.938481575254125e-06, "loss": 0.5097, "step": 20290 }, { "epoch": 0.58, "grad_norm": 73.31422864562148, "learning_rate": 3.9380283933067e-06, "loss": 0.8178, "step": 20291 }, { "epoch": 0.58, "grad_norm": 2.004043492605231, "learning_rate": 3.9375752204952836e-06, "loss": 0.0791, "step": 20292 }, { "epoch": 0.58, "grad_norm": 5.403833345277415, "learning_rate": 3.937122056823779e-06, "loss": 0.2752, "step": 20293 }, { "epoch": 0.58, "grad_norm": 5.354959609790288, "learning_rate": 3.936668902296081e-06, "loss": 0.6905, "step": 20294 }, { "epoch": 0.58, "grad_norm": 6.397917972102063, "learning_rate": 3.936215756916091e-06, "loss": 0.2231, "step": 20295 }, { "epoch": 0.58, "grad_norm": 5.309481582335204, "learning_rate": 3.935762620687704e-06, "loss": 0.5801, "step": 20296 }, { "epoch": 0.58, "grad_norm": 6.291152663634717, "learning_rate": 3.935309493614822e-06, "loss": 0.5569, "step": 20297 }, { "epoch": 0.58, "grad_norm": 3.076290649058694, "learning_rate": 3.93485637570134e-06, "loss": 0.4363, "step": 20298 }, { "epoch": 0.58, "grad_norm": 5.5037879037689, "learning_rate": 3.934403266951157e-06, "loss": 0.4195, "step": 20299 }, { "epoch": 0.58, "grad_norm": 5.374323070393686, "learning_rate": 3.933950167368173e-06, "loss": 0.4795, "step": 20300 }, { "epoch": 0.58, "grad_norm": 3.7484841143884755, "learning_rate": 3.933497076956284e-06, "loss": 0.4272, "step": 20301 }, { "epoch": 0.58, "grad_norm": 6.1324091104396095, "learning_rate": 3.933043995719389e-06, "loss": 0.601, "step": 20302 }, { "epoch": 0.58, "grad_norm": 7.97879526122181, "learning_rate": 3.932590923661383e-06, "loss": 0.4164, "step": 20303 }, { "epoch": 0.58, "grad_norm": 10.025850262817196, "learning_rate": 3.932137860786168e-06, "loss": 0.23, "step": 20304 }, { "epoch": 0.58, "grad_norm": 7.182195945435686, "learning_rate": 3.931684807097638e-06, "loss": 0.4356, "step": 20305 }, { "epoch": 0.58, "grad_norm": 5.9598078826048235, "learning_rate": 3.931231762599692e-06, "loss": 0.685, "step": 20306 }, { "epoch": 0.58, "grad_norm": 4.8816406064408575, "learning_rate": 3.930778727296229e-06, "loss": 0.5025, "step": 20307 }, { "epoch": 0.58, "grad_norm": 6.092026726048556, "learning_rate": 3.930325701191144e-06, "loss": 0.4813, "step": 20308 }, { "epoch": 0.58, "grad_norm": 3.3533410819503215, "learning_rate": 3.929872684288337e-06, "loss": 0.4677, "step": 20309 }, { "epoch": 0.58, "grad_norm": 3.839262971779009, "learning_rate": 3.929419676591701e-06, "loss": 0.2252, "step": 20310 }, { "epoch": 0.58, "grad_norm": 5.112102841944181, "learning_rate": 3.928966678105138e-06, "loss": 0.468, "step": 20311 }, { "epoch": 0.58, "grad_norm": 5.8958346453374295, "learning_rate": 3.9285136888325405e-06, "loss": 0.401, "step": 20312 }, { "epoch": 0.58, "grad_norm": 6.071837205518894, "learning_rate": 3.92806070877781e-06, "loss": 0.3597, "step": 20313 }, { "epoch": 0.58, "grad_norm": 5.779683926826934, "learning_rate": 3.92760773794484e-06, "loss": 0.5934, "step": 20314 }, { "epoch": 0.58, "grad_norm": 6.397198791210695, "learning_rate": 3.927154776337527e-06, "loss": 0.361, "step": 20315 }, { "epoch": 0.58, "grad_norm": 3.7852348264366533, "learning_rate": 3.926701823959772e-06, "loss": 0.6142, "step": 20316 }, { "epoch": 0.58, "grad_norm": 5.513966859553952, "learning_rate": 3.926248880815468e-06, "loss": 0.5951, "step": 20317 }, { "epoch": 0.58, "grad_norm": 5.071826679330963, "learning_rate": 3.925795946908514e-06, "loss": 0.5058, "step": 20318 }, { "epoch": 0.58, "grad_norm": 16.899475150451206, "learning_rate": 3.925343022242803e-06, "loss": 0.7376, "step": 20319 }, { "epoch": 0.58, "grad_norm": 2.6896336978970807, "learning_rate": 3.924890106822235e-06, "loss": 0.1548, "step": 20320 }, { "epoch": 0.58, "grad_norm": 3.5089285589486456, "learning_rate": 3.924437200650705e-06, "loss": 0.2402, "step": 20321 }, { "epoch": 0.58, "grad_norm": 8.171710360967358, "learning_rate": 3.923984303732107e-06, "loss": 0.4776, "step": 20322 }, { "epoch": 0.58, "grad_norm": 7.769511856200657, "learning_rate": 3.923531416070343e-06, "loss": 0.4107, "step": 20323 }, { "epoch": 0.58, "grad_norm": 2.7299432207847287, "learning_rate": 3.923078537669304e-06, "loss": 0.2762, "step": 20324 }, { "epoch": 0.58, "grad_norm": 9.221647473187803, "learning_rate": 3.922625668532889e-06, "loss": 0.4586, "step": 20325 }, { "epoch": 0.58, "grad_norm": 5.163302413243671, "learning_rate": 3.922172808664991e-06, "loss": 0.6927, "step": 20326 }, { "epoch": 0.58, "grad_norm": 3.516285304473032, "learning_rate": 3.921719958069509e-06, "loss": 0.4968, "step": 20327 }, { "epoch": 0.58, "grad_norm": 6.054075699936896, "learning_rate": 3.921267116750337e-06, "loss": 0.4131, "step": 20328 }, { "epoch": 0.58, "grad_norm": 5.723861619469558, "learning_rate": 3.920814284711372e-06, "loss": 0.5346, "step": 20329 }, { "epoch": 0.58, "grad_norm": 7.785259516022027, "learning_rate": 3.9203614619565065e-06, "loss": 0.4454, "step": 20330 }, { "epoch": 0.58, "grad_norm": 3.038867522508809, "learning_rate": 3.91990864848964e-06, "loss": 0.2477, "step": 20331 }, { "epoch": 0.58, "grad_norm": 4.582801864616885, "learning_rate": 3.919455844314667e-06, "loss": 0.2616, "step": 20332 }, { "epoch": 0.58, "grad_norm": 4.750432446520535, "learning_rate": 3.9190030494354796e-06, "loss": 0.797, "step": 20333 }, { "epoch": 0.58, "grad_norm": 4.168211739490283, "learning_rate": 3.918550263855978e-06, "loss": 0.3397, "step": 20334 }, { "epoch": 0.58, "grad_norm": 8.208554518248187, "learning_rate": 3.9180974875800555e-06, "loss": 0.2763, "step": 20335 }, { "epoch": 0.58, "grad_norm": 8.695574623674313, "learning_rate": 3.917644720611608e-06, "loss": 0.6209, "step": 20336 }, { "epoch": 0.58, "grad_norm": 6.87765105025809, "learning_rate": 3.917191962954527e-06, "loss": 0.5228, "step": 20337 }, { "epoch": 0.58, "grad_norm": 4.4033893335296295, "learning_rate": 3.916739214612713e-06, "loss": 0.2026, "step": 20338 }, { "epoch": 0.58, "grad_norm": 3.8487765002972196, "learning_rate": 3.916286475590056e-06, "loss": 0.4477, "step": 20339 }, { "epoch": 0.58, "grad_norm": 18.55698263971144, "learning_rate": 3.9158337458904525e-06, "loss": 0.4399, "step": 20340 }, { "epoch": 0.58, "grad_norm": 5.761771757237631, "learning_rate": 3.915381025517801e-06, "loss": 0.4645, "step": 20341 }, { "epoch": 0.58, "grad_norm": 7.067632413796996, "learning_rate": 3.91492831447599e-06, "loss": 0.8026, "step": 20342 }, { "epoch": 0.58, "grad_norm": 1.572161236253414, "learning_rate": 3.914475612768919e-06, "loss": 0.2734, "step": 20343 }, { "epoch": 0.58, "grad_norm": 3.1698654273196336, "learning_rate": 3.91402292040048e-06, "loss": 0.2686, "step": 20344 }, { "epoch": 0.58, "grad_norm": 3.194742091220971, "learning_rate": 3.913570237374569e-06, "loss": 0.3901, "step": 20345 }, { "epoch": 0.58, "grad_norm": 3.6056443457862244, "learning_rate": 3.913117563695077e-06, "loss": 0.4009, "step": 20346 }, { "epoch": 0.58, "grad_norm": 4.978181967735605, "learning_rate": 3.912664899365902e-06, "loss": 0.5066, "step": 20347 }, { "epoch": 0.58, "grad_norm": 6.618535693754309, "learning_rate": 3.912212244390938e-06, "loss": 0.3114, "step": 20348 }, { "epoch": 0.58, "grad_norm": 2.3004412051661327, "learning_rate": 3.911759598774076e-06, "loss": 0.2187, "step": 20349 }, { "epoch": 0.58, "grad_norm": 7.84048503387426, "learning_rate": 3.911306962519215e-06, "loss": 0.5623, "step": 20350 }, { "epoch": 0.58, "grad_norm": 7.118871227970401, "learning_rate": 3.910854335630244e-06, "loss": 0.4722, "step": 20351 }, { "epoch": 0.58, "grad_norm": 4.804340480274991, "learning_rate": 3.910401718111061e-06, "loss": 0.6931, "step": 20352 }, { "epoch": 0.58, "grad_norm": 8.53804595005934, "learning_rate": 3.9099491099655555e-06, "loss": 0.5419, "step": 20353 }, { "epoch": 0.58, "grad_norm": 3.8800731797543486, "learning_rate": 3.909496511197626e-06, "loss": 0.6381, "step": 20354 }, { "epoch": 0.58, "grad_norm": 2.302373649162262, "learning_rate": 3.909043921811162e-06, "loss": 0.2041, "step": 20355 }, { "epoch": 0.58, "grad_norm": 3.737031638101605, "learning_rate": 3.908591341810058e-06, "loss": 0.6368, "step": 20356 }, { "epoch": 0.58, "grad_norm": 4.736561789500311, "learning_rate": 3.908138771198211e-06, "loss": 0.3751, "step": 20357 }, { "epoch": 0.58, "grad_norm": 6.325203605058637, "learning_rate": 3.90768620997951e-06, "loss": 0.6136, "step": 20358 }, { "epoch": 0.58, "grad_norm": 10.808158929182923, "learning_rate": 3.907233658157851e-06, "loss": 0.6945, "step": 20359 }, { "epoch": 0.58, "grad_norm": 6.819536197698579, "learning_rate": 3.906781115737124e-06, "loss": 0.3805, "step": 20360 }, { "epoch": 0.58, "grad_norm": 3.1749690707645137, "learning_rate": 3.906328582721227e-06, "loss": 0.3044, "step": 20361 }, { "epoch": 0.58, "grad_norm": 3.963204933061868, "learning_rate": 3.90587605911405e-06, "loss": 0.4309, "step": 20362 }, { "epoch": 0.58, "grad_norm": 6.699333416816579, "learning_rate": 3.9054235449194845e-06, "loss": 0.8622, "step": 20363 }, { "epoch": 0.58, "grad_norm": 3.691102531089038, "learning_rate": 3.904971040141429e-06, "loss": 0.2329, "step": 20364 }, { "epoch": 0.58, "grad_norm": 6.612061696227989, "learning_rate": 3.904518544783771e-06, "loss": 0.3074, "step": 20365 }, { "epoch": 0.58, "grad_norm": 7.166376503533649, "learning_rate": 3.904066058850406e-06, "loss": 0.5541, "step": 20366 }, { "epoch": 0.58, "grad_norm": 4.337221247705758, "learning_rate": 3.903613582345222e-06, "loss": 0.1834, "step": 20367 }, { "epoch": 0.58, "grad_norm": 4.245719605414841, "learning_rate": 3.90316111527212e-06, "loss": 0.5516, "step": 20368 }, { "epoch": 0.58, "grad_norm": 6.569116435914056, "learning_rate": 3.902708657634986e-06, "loss": 0.5559, "step": 20369 }, { "epoch": 0.58, "grad_norm": 5.558639601238884, "learning_rate": 3.902256209437715e-06, "loss": 0.7204, "step": 20370 }, { "epoch": 0.58, "grad_norm": 7.385991166066915, "learning_rate": 3.901803770684198e-06, "loss": 0.693, "step": 20371 }, { "epoch": 0.58, "grad_norm": 2.384761144835367, "learning_rate": 3.9013513413783264e-06, "loss": 0.364, "step": 20372 }, { "epoch": 0.58, "grad_norm": 8.540301485211923, "learning_rate": 3.900898921523997e-06, "loss": 0.6941, "step": 20373 }, { "epoch": 0.58, "grad_norm": 5.495799454629673, "learning_rate": 3.900446511125097e-06, "loss": 0.5132, "step": 20374 }, { "epoch": 0.58, "grad_norm": 6.316052409814065, "learning_rate": 3.899994110185522e-06, "loss": 0.5744, "step": 20375 }, { "epoch": 0.58, "grad_norm": 9.31833859922883, "learning_rate": 3.899541718709159e-06, "loss": 0.5376, "step": 20376 }, { "epoch": 0.58, "grad_norm": 5.3747616204918165, "learning_rate": 3.899089336699907e-06, "loss": 0.4546, "step": 20377 }, { "epoch": 0.58, "grad_norm": 5.342332797042455, "learning_rate": 3.898636964161651e-06, "loss": 0.6471, "step": 20378 }, { "epoch": 0.58, "grad_norm": 11.903749518762492, "learning_rate": 3.898184601098288e-06, "loss": 0.4325, "step": 20379 }, { "epoch": 0.58, "grad_norm": 7.747272134429146, "learning_rate": 3.897732247513704e-06, "loss": 1.009, "step": 20380 }, { "epoch": 0.58, "grad_norm": 3.9065814373551233, "learning_rate": 3.897279903411796e-06, "loss": 0.4019, "step": 20381 }, { "epoch": 0.58, "grad_norm": 2.6789839018266495, "learning_rate": 3.896827568796453e-06, "loss": 0.2524, "step": 20382 }, { "epoch": 0.58, "grad_norm": 5.158387764020605, "learning_rate": 3.896375243671566e-06, "loss": 0.4146, "step": 20383 }, { "epoch": 0.58, "grad_norm": 5.124923286794264, "learning_rate": 3.895922928041027e-06, "loss": 0.4764, "step": 20384 }, { "epoch": 0.58, "grad_norm": 10.605361246649547, "learning_rate": 3.895470621908728e-06, "loss": 0.5751, "step": 20385 }, { "epoch": 0.58, "grad_norm": 6.813291661171777, "learning_rate": 3.895018325278559e-06, "loss": 0.6061, "step": 20386 }, { "epoch": 0.58, "grad_norm": 5.6874272687684115, "learning_rate": 3.89456603815441e-06, "loss": 0.4578, "step": 20387 }, { "epoch": 0.58, "grad_norm": 6.5980019261454785, "learning_rate": 3.894113760540174e-06, "loss": 0.2565, "step": 20388 }, { "epoch": 0.58, "grad_norm": 6.316706436186365, "learning_rate": 3.893661492439742e-06, "loss": 0.4687, "step": 20389 }, { "epoch": 0.58, "grad_norm": 3.1115210713379993, "learning_rate": 3.893209233857002e-06, "loss": 0.0914, "step": 20390 }, { "epoch": 0.58, "grad_norm": 3.0332232079759125, "learning_rate": 3.8927569847958485e-06, "loss": 0.4907, "step": 20391 }, { "epoch": 0.58, "grad_norm": 8.000154493747488, "learning_rate": 3.89230474526017e-06, "loss": 0.7183, "step": 20392 }, { "epoch": 0.58, "grad_norm": 3.674119725840654, "learning_rate": 3.891852515253858e-06, "loss": 0.4977, "step": 20393 }, { "epoch": 0.58, "grad_norm": 4.155530078461918, "learning_rate": 3.8914002947808e-06, "loss": 0.3561, "step": 20394 }, { "epoch": 0.58, "grad_norm": 21.354491517612146, "learning_rate": 3.890948083844891e-06, "loss": 0.6258, "step": 20395 }, { "epoch": 0.58, "grad_norm": 6.606880528901757, "learning_rate": 3.890495882450019e-06, "loss": 0.621, "step": 20396 }, { "epoch": 0.58, "grad_norm": 5.647437633695219, "learning_rate": 3.890043690600072e-06, "loss": 0.4145, "step": 20397 }, { "epoch": 0.58, "grad_norm": 4.250579149391276, "learning_rate": 3.889591508298945e-06, "loss": 0.4302, "step": 20398 }, { "epoch": 0.58, "grad_norm": 8.218472378179293, "learning_rate": 3.889139335550525e-06, "loss": 0.6095, "step": 20399 }, { "epoch": 0.58, "grad_norm": 6.16927371676501, "learning_rate": 3.8886871723587026e-06, "loss": 0.2609, "step": 20400 }, { "epoch": 0.58, "grad_norm": 6.291703516702757, "learning_rate": 3.888235018727368e-06, "loss": 0.1786, "step": 20401 }, { "epoch": 0.58, "grad_norm": 4.439722471713658, "learning_rate": 3.88778287466041e-06, "loss": 0.6057, "step": 20402 }, { "epoch": 0.58, "grad_norm": 8.691350947857767, "learning_rate": 3.887330740161717e-06, "loss": 0.689, "step": 20403 }, { "epoch": 0.58, "grad_norm": 3.612071693663626, "learning_rate": 3.886878615235185e-06, "loss": 0.0686, "step": 20404 }, { "epoch": 0.58, "grad_norm": 6.454811189935085, "learning_rate": 3.8864264998846965e-06, "loss": 0.3459, "step": 20405 }, { "epoch": 0.58, "grad_norm": 8.636253423763275, "learning_rate": 3.8859743941141426e-06, "loss": 0.9096, "step": 20406 }, { "epoch": 0.58, "grad_norm": 8.708408032938033, "learning_rate": 3.885522297927416e-06, "loss": 0.3497, "step": 20407 }, { "epoch": 0.58, "grad_norm": 7.053184712520169, "learning_rate": 3.885070211328403e-06, "loss": 0.4409, "step": 20408 }, { "epoch": 0.58, "grad_norm": 4.362335375764776, "learning_rate": 3.884618134320994e-06, "loss": 0.2238, "step": 20409 }, { "epoch": 0.58, "grad_norm": 4.879099564887657, "learning_rate": 3.884166066909077e-06, "loss": 0.7061, "step": 20410 }, { "epoch": 0.58, "grad_norm": 3.7567537366671164, "learning_rate": 3.883714009096543e-06, "loss": 0.2535, "step": 20411 }, { "epoch": 0.58, "grad_norm": 3.1211526075024687, "learning_rate": 3.8832619608872795e-06, "loss": 0.3311, "step": 20412 }, { "epoch": 0.58, "grad_norm": 5.9901374980703554, "learning_rate": 3.882809922285175e-06, "loss": 0.8366, "step": 20413 }, { "epoch": 0.58, "grad_norm": 10.501188301783362, "learning_rate": 3.882357893294121e-06, "loss": 0.3405, "step": 20414 }, { "epoch": 0.58, "grad_norm": 2.719696910429415, "learning_rate": 3.881905873918003e-06, "loss": 0.365, "step": 20415 }, { "epoch": 0.58, "grad_norm": 3.9377154034021182, "learning_rate": 3.881453864160712e-06, "loss": 0.4862, "step": 20416 }, { "epoch": 0.58, "grad_norm": 3.286576706718751, "learning_rate": 3.881001864026135e-06, "loss": 0.1806, "step": 20417 }, { "epoch": 0.58, "grad_norm": 7.518581451277212, "learning_rate": 3.880549873518163e-06, "loss": 1.2652, "step": 20418 }, { "epoch": 0.58, "grad_norm": 6.813842641130329, "learning_rate": 3.88009789264068e-06, "loss": 0.7844, "step": 20419 }, { "epoch": 0.58, "grad_norm": 4.71287721899966, "learning_rate": 3.879645921397579e-06, "loss": 0.5505, "step": 20420 }, { "epoch": 0.58, "grad_norm": 7.850233582344114, "learning_rate": 3.879193959792744e-06, "loss": 0.6876, "step": 20421 }, { "epoch": 0.58, "grad_norm": 7.782207445429211, "learning_rate": 3.878742007830066e-06, "loss": 0.6381, "step": 20422 }, { "epoch": 0.58, "grad_norm": 5.432991175419174, "learning_rate": 3.878290065513435e-06, "loss": 0.3202, "step": 20423 }, { "epoch": 0.58, "grad_norm": 5.647937926680756, "learning_rate": 3.8778381328467325e-06, "loss": 0.5257, "step": 20424 }, { "epoch": 0.58, "grad_norm": 3.670091431229803, "learning_rate": 3.877386209833853e-06, "loss": 0.3453, "step": 20425 }, { "epoch": 0.58, "grad_norm": 5.717625700282334, "learning_rate": 3.876934296478681e-06, "loss": 0.8334, "step": 20426 }, { "epoch": 0.58, "grad_norm": 5.167720266814224, "learning_rate": 3.876482392785106e-06, "loss": 0.2739, "step": 20427 }, { "epoch": 0.59, "grad_norm": 7.326136695494632, "learning_rate": 3.876030498757014e-06, "loss": 0.4801, "step": 20428 }, { "epoch": 0.59, "grad_norm": 7.302609887685526, "learning_rate": 3.875578614398291e-06, "loss": 0.7135, "step": 20429 }, { "epoch": 0.59, "grad_norm": 6.111854167278192, "learning_rate": 3.87512673971283e-06, "loss": 0.3311, "step": 20430 }, { "epoch": 0.59, "grad_norm": 6.505486886805584, "learning_rate": 3.874674874704513e-06, "loss": 0.7703, "step": 20431 }, { "epoch": 0.59, "grad_norm": 2.880641833351563, "learning_rate": 3.874223019377232e-06, "loss": 0.4245, "step": 20432 }, { "epoch": 0.59, "grad_norm": 7.312045221777378, "learning_rate": 3.8737711737348684e-06, "loss": 0.5402, "step": 20433 }, { "epoch": 0.59, "grad_norm": 4.425711702775083, "learning_rate": 3.873319337781316e-06, "loss": 0.2753, "step": 20434 }, { "epoch": 0.59, "grad_norm": 5.949886732866572, "learning_rate": 3.872867511520457e-06, "loss": 0.4356, "step": 20435 }, { "epoch": 0.59, "grad_norm": 1.9404406074481597, "learning_rate": 3.872415694956181e-06, "loss": 0.1234, "step": 20436 }, { "epoch": 0.59, "grad_norm": 2.4975135836456195, "learning_rate": 3.871963888092371e-06, "loss": 0.2291, "step": 20437 }, { "epoch": 0.59, "grad_norm": 4.3687654486780065, "learning_rate": 3.871512090932919e-06, "loss": 0.2547, "step": 20438 }, { "epoch": 0.59, "grad_norm": 4.272555914122909, "learning_rate": 3.871060303481711e-06, "loss": 0.278, "step": 20439 }, { "epoch": 0.59, "grad_norm": 5.512916094241967, "learning_rate": 3.870608525742629e-06, "loss": 0.4613, "step": 20440 }, { "epoch": 0.59, "grad_norm": 8.81862876370781, "learning_rate": 3.870156757719565e-06, "loss": 0.6979, "step": 20441 }, { "epoch": 0.59, "grad_norm": 3.135223588141901, "learning_rate": 3.869704999416403e-06, "loss": 0.1392, "step": 20442 }, { "epoch": 0.59, "grad_norm": 8.06590024825052, "learning_rate": 3.8692532508370295e-06, "loss": 0.4231, "step": 20443 }, { "epoch": 0.59, "grad_norm": 4.892589284309254, "learning_rate": 3.868801511985329e-06, "loss": 0.4027, "step": 20444 }, { "epoch": 0.59, "grad_norm": 2.5529917650698852, "learning_rate": 3.868349782865194e-06, "loss": 0.3655, "step": 20445 }, { "epoch": 0.59, "grad_norm": 4.539840158892071, "learning_rate": 3.867898063480503e-06, "loss": 0.3181, "step": 20446 }, { "epoch": 0.59, "grad_norm": 6.036302416951663, "learning_rate": 3.867446353835146e-06, "loss": 0.38, "step": 20447 }, { "epoch": 0.59, "grad_norm": 6.961716254071942, "learning_rate": 3.8669946539330106e-06, "loss": 0.4265, "step": 20448 }, { "epoch": 0.59, "grad_norm": 7.441754991264231, "learning_rate": 3.86654296377798e-06, "loss": 0.4607, "step": 20449 }, { "epoch": 0.59, "grad_norm": 5.1771826577844635, "learning_rate": 3.86609128337394e-06, "loss": 0.3245, "step": 20450 }, { "epoch": 0.59, "grad_norm": 4.252449591919294, "learning_rate": 3.8656396127247765e-06, "loss": 0.4454, "step": 20451 }, { "epoch": 0.59, "grad_norm": 4.301751189383735, "learning_rate": 3.865187951834377e-06, "loss": 0.3244, "step": 20452 }, { "epoch": 0.59, "grad_norm": 6.23250503986643, "learning_rate": 3.864736300706625e-06, "loss": 0.5982, "step": 20453 }, { "epoch": 0.59, "grad_norm": 4.835558357228219, "learning_rate": 3.864284659345406e-06, "loss": 0.4748, "step": 20454 }, { "epoch": 0.59, "grad_norm": 8.812539824267285, "learning_rate": 3.863833027754609e-06, "loss": 0.7817, "step": 20455 }, { "epoch": 0.59, "grad_norm": 8.433159396924763, "learning_rate": 3.863381405938115e-06, "loss": 0.6994, "step": 20456 }, { "epoch": 0.59, "grad_norm": 6.7094514598141, "learning_rate": 3.8629297938998114e-06, "loss": 0.3978, "step": 20457 }, { "epoch": 0.59, "grad_norm": 6.4975172216253965, "learning_rate": 3.862478191643582e-06, "loss": 0.3128, "step": 20458 }, { "epoch": 0.59, "grad_norm": 7.653850444089132, "learning_rate": 3.862026599173313e-06, "loss": 0.9255, "step": 20459 }, { "epoch": 0.59, "grad_norm": 3.852931910841138, "learning_rate": 3.861575016492888e-06, "loss": 0.2071, "step": 20460 }, { "epoch": 0.59, "grad_norm": 6.533853404436692, "learning_rate": 3.861123443606195e-06, "loss": 0.8688, "step": 20461 }, { "epoch": 0.59, "grad_norm": 4.269566374387342, "learning_rate": 3.860671880517116e-06, "loss": 0.3617, "step": 20462 }, { "epoch": 0.59, "grad_norm": 4.945348176028996, "learning_rate": 3.8602203272295345e-06, "loss": 0.3502, "step": 20463 }, { "epoch": 0.59, "grad_norm": 5.44693266793745, "learning_rate": 3.8597687837473395e-06, "loss": 0.6398, "step": 20464 }, { "epoch": 0.59, "grad_norm": 3.1985808354822773, "learning_rate": 3.859317250074413e-06, "loss": 0.3282, "step": 20465 }, { "epoch": 0.59, "grad_norm": 5.007300435516106, "learning_rate": 3.85886572621464e-06, "loss": 0.3024, "step": 20466 }, { "epoch": 0.59, "grad_norm": 6.556343260502701, "learning_rate": 3.858414212171903e-06, "loss": 0.3637, "step": 20467 }, { "epoch": 0.59, "grad_norm": 7.131245268570235, "learning_rate": 3.857962707950089e-06, "loss": 0.8066, "step": 20468 }, { "epoch": 0.59, "grad_norm": 2.822304716796469, "learning_rate": 3.857511213553083e-06, "loss": 0.3805, "step": 20469 }, { "epoch": 0.59, "grad_norm": 3.7376540404251397, "learning_rate": 3.857059728984763e-06, "loss": 0.1986, "step": 20470 }, { "epoch": 0.59, "grad_norm": 5.338932207910726, "learning_rate": 3.856608254249021e-06, "loss": 0.6527, "step": 20471 }, { "epoch": 0.59, "grad_norm": 4.760454543593935, "learning_rate": 3.856156789349737e-06, "loss": 0.2243, "step": 20472 }, { "epoch": 0.59, "grad_norm": 5.546618431497491, "learning_rate": 3.855705334290795e-06, "loss": 0.567, "step": 20473 }, { "epoch": 0.59, "grad_norm": 3.4960717227177938, "learning_rate": 3.855253889076078e-06, "loss": 0.2973, "step": 20474 }, { "epoch": 0.59, "grad_norm": 7.216468351368874, "learning_rate": 3.8548024537094725e-06, "loss": 0.5574, "step": 20475 }, { "epoch": 0.59, "grad_norm": 3.052864281836758, "learning_rate": 3.85435102819486e-06, "loss": 0.5339, "step": 20476 }, { "epoch": 0.59, "grad_norm": 4.996736605924347, "learning_rate": 3.853899612536125e-06, "loss": 0.5539, "step": 20477 }, { "epoch": 0.59, "grad_norm": 4.091525514476723, "learning_rate": 3.853448206737148e-06, "loss": 0.4279, "step": 20478 }, { "epoch": 0.59, "grad_norm": 6.804089560444623, "learning_rate": 3.852996810801817e-06, "loss": 0.6779, "step": 20479 }, { "epoch": 0.59, "grad_norm": 2.791450701492285, "learning_rate": 3.852545424734014e-06, "loss": 0.1331, "step": 20480 }, { "epoch": 0.59, "grad_norm": 3.5275183346148182, "learning_rate": 3.852094048537619e-06, "loss": 0.3296, "step": 20481 }, { "epoch": 0.59, "grad_norm": 7.539725129254994, "learning_rate": 3.8516426822165195e-06, "loss": 0.6364, "step": 20482 }, { "epoch": 0.59, "grad_norm": 7.874194331055096, "learning_rate": 3.851191325774596e-06, "loss": 0.6337, "step": 20483 }, { "epoch": 0.59, "grad_norm": 48.94479262189299, "learning_rate": 3.850739979215733e-06, "loss": 0.5666, "step": 20484 }, { "epoch": 0.59, "grad_norm": 4.263857073394227, "learning_rate": 3.850288642543811e-06, "loss": 0.4578, "step": 20485 }, { "epoch": 0.59, "grad_norm": 6.662833132833776, "learning_rate": 3.849837315762717e-06, "loss": 0.419, "step": 20486 }, { "epoch": 0.59, "grad_norm": 4.974196274210672, "learning_rate": 3.849385998876327e-06, "loss": 0.3126, "step": 20487 }, { "epoch": 0.59, "grad_norm": 4.618470105787156, "learning_rate": 3.848934691888529e-06, "loss": 0.3336, "step": 20488 }, { "epoch": 0.59, "grad_norm": 4.895309690784534, "learning_rate": 3.848483394803205e-06, "loss": 0.4361, "step": 20489 }, { "epoch": 0.59, "grad_norm": 6.627982637782082, "learning_rate": 3.848032107624235e-06, "loss": 1.0086, "step": 20490 }, { "epoch": 0.59, "grad_norm": 3.872802357305997, "learning_rate": 3.847580830355505e-06, "loss": 0.3803, "step": 20491 }, { "epoch": 0.59, "grad_norm": 9.857450415273474, "learning_rate": 3.847129563000894e-06, "loss": 0.6498, "step": 20492 }, { "epoch": 0.59, "grad_norm": 10.304339237416, "learning_rate": 3.846678305564287e-06, "loss": 0.8024, "step": 20493 }, { "epoch": 0.59, "grad_norm": 4.012697659434015, "learning_rate": 3.846227058049562e-06, "loss": 0.4618, "step": 20494 }, { "epoch": 0.59, "grad_norm": 5.719455279442482, "learning_rate": 3.845775820460604e-06, "loss": 0.5162, "step": 20495 }, { "epoch": 0.59, "grad_norm": 9.179310372904881, "learning_rate": 3.845324592801296e-06, "loss": 0.4746, "step": 20496 }, { "epoch": 0.59, "grad_norm": 7.576662469111145, "learning_rate": 3.844873375075517e-06, "loss": 0.5738, "step": 20497 }, { "epoch": 0.59, "grad_norm": 6.452696409609695, "learning_rate": 3.844422167287151e-06, "loss": 0.505, "step": 20498 }, { "epoch": 0.59, "grad_norm": 5.631402038824975, "learning_rate": 3.843970969440079e-06, "loss": 0.5659, "step": 20499 }, { "epoch": 0.59, "grad_norm": 7.34041724367759, "learning_rate": 3.843519781538183e-06, "loss": 0.6593, "step": 20500 }, { "epoch": 0.59, "grad_norm": 2.974202339241319, "learning_rate": 3.8430686035853415e-06, "loss": 0.2744, "step": 20501 }, { "epoch": 0.59, "grad_norm": 6.657417015965193, "learning_rate": 3.8426174355854405e-06, "loss": 0.4974, "step": 20502 }, { "epoch": 0.59, "grad_norm": 12.87255869561803, "learning_rate": 3.842166277542358e-06, "loss": 0.7768, "step": 20503 }, { "epoch": 0.59, "grad_norm": 4.31576243298449, "learning_rate": 3.841715129459976e-06, "loss": 0.1522, "step": 20504 }, { "epoch": 0.59, "grad_norm": 5.776744612861109, "learning_rate": 3.841263991342179e-06, "loss": 0.262, "step": 20505 }, { "epoch": 0.59, "grad_norm": 4.040840569194205, "learning_rate": 3.840812863192843e-06, "loss": 0.3875, "step": 20506 }, { "epoch": 0.59, "grad_norm": 3.979908807395085, "learning_rate": 3.840361745015852e-06, "loss": 0.3964, "step": 20507 }, { "epoch": 0.59, "grad_norm": 3.047312700181586, "learning_rate": 3.839910636815085e-06, "loss": 0.2042, "step": 20508 }, { "epoch": 0.59, "grad_norm": 4.715122420096102, "learning_rate": 3.839459538594425e-06, "loss": 0.4264, "step": 20509 }, { "epoch": 0.59, "grad_norm": 4.321394303759077, "learning_rate": 3.839008450357751e-06, "loss": 0.2868, "step": 20510 }, { "epoch": 0.59, "grad_norm": 16.131343274029756, "learning_rate": 3.838557372108944e-06, "loss": 0.8692, "step": 20511 }, { "epoch": 0.59, "grad_norm": 3.4488560798040053, "learning_rate": 3.838106303851886e-06, "loss": 0.5672, "step": 20512 }, { "epoch": 0.59, "grad_norm": 5.892721435503537, "learning_rate": 3.8376552455904555e-06, "loss": 0.4761, "step": 20513 }, { "epoch": 0.59, "grad_norm": 6.664928849535189, "learning_rate": 3.8372041973285354e-06, "loss": 0.9888, "step": 20514 }, { "epoch": 0.59, "grad_norm": 7.097900655146361, "learning_rate": 3.8367531590700025e-06, "loss": 0.8003, "step": 20515 }, { "epoch": 0.59, "grad_norm": 3.4974470534826354, "learning_rate": 3.836302130818741e-06, "loss": 0.4658, "step": 20516 }, { "epoch": 0.59, "grad_norm": 6.412805245851135, "learning_rate": 3.8358511125786265e-06, "loss": 0.5687, "step": 20517 }, { "epoch": 0.59, "grad_norm": 3.997439548448069, "learning_rate": 3.835400104353544e-06, "loss": 0.4017, "step": 20518 }, { "epoch": 0.59, "grad_norm": 11.29182331685055, "learning_rate": 3.83494910614737e-06, "loss": 0.309, "step": 20519 }, { "epoch": 0.59, "grad_norm": 3.5178949445335252, "learning_rate": 3.834498117963984e-06, "loss": 0.6361, "step": 20520 }, { "epoch": 0.59, "grad_norm": 3.848329684582205, "learning_rate": 3.834047139807269e-06, "loss": 0.2912, "step": 20521 }, { "epoch": 0.59, "grad_norm": 2.8671207680069553, "learning_rate": 3.833596171681102e-06, "loss": 0.2815, "step": 20522 }, { "epoch": 0.59, "grad_norm": 5.344791890795305, "learning_rate": 3.833145213589365e-06, "loss": 0.3866, "step": 20523 }, { "epoch": 0.59, "grad_norm": 5.442871487984421, "learning_rate": 3.8326942655359334e-06, "loss": 0.2114, "step": 20524 }, { "epoch": 0.59, "grad_norm": 10.957096528047527, "learning_rate": 3.832243327524692e-06, "loss": 0.9255, "step": 20525 }, { "epoch": 0.59, "grad_norm": 8.332691422217112, "learning_rate": 3.8317923995595165e-06, "loss": 0.5514, "step": 20526 }, { "epoch": 0.59, "grad_norm": 13.703098692917663, "learning_rate": 3.831341481644287e-06, "loss": 0.4338, "step": 20527 }, { "epoch": 0.59, "grad_norm": 9.751934446488471, "learning_rate": 3.830890573782882e-06, "loss": 0.8316, "step": 20528 }, { "epoch": 0.59, "grad_norm": 5.444758623761347, "learning_rate": 3.830439675979182e-06, "loss": 0.4831, "step": 20529 }, { "epoch": 0.59, "grad_norm": 4.01315325057063, "learning_rate": 3.829988788237067e-06, "loss": 0.3009, "step": 20530 }, { "epoch": 0.59, "grad_norm": 8.288701228431341, "learning_rate": 3.829537910560411e-06, "loss": 0.6607, "step": 20531 }, { "epoch": 0.59, "grad_norm": 5.107442157770733, "learning_rate": 3.8290870429531e-06, "loss": 0.442, "step": 20532 }, { "epoch": 0.59, "grad_norm": 10.070859954845094, "learning_rate": 3.828636185419007e-06, "loss": 0.7052, "step": 20533 }, { "epoch": 0.59, "grad_norm": 8.72655856577239, "learning_rate": 3.828185337962014e-06, "loss": 0.7611, "step": 20534 }, { "epoch": 0.59, "grad_norm": 5.320885110701259, "learning_rate": 3.827734500585995e-06, "loss": 0.4388, "step": 20535 }, { "epoch": 0.59, "grad_norm": 8.51228185701872, "learning_rate": 3.8272836732948345e-06, "loss": 0.7772, "step": 20536 }, { "epoch": 0.59, "grad_norm": 7.275186420621525, "learning_rate": 3.826832856092408e-06, "loss": 0.5963, "step": 20537 }, { "epoch": 0.59, "grad_norm": 16.112413824604147, "learning_rate": 3.826382048982593e-06, "loss": 0.3658, "step": 20538 }, { "epoch": 0.59, "grad_norm": 6.505048221992892, "learning_rate": 3.82593125196927e-06, "loss": 0.8843, "step": 20539 }, { "epoch": 0.59, "grad_norm": 4.981219760893931, "learning_rate": 3.825480465056316e-06, "loss": 0.391, "step": 20540 }, { "epoch": 0.59, "grad_norm": 1.7610073641006234, "learning_rate": 3.825029688247609e-06, "loss": 0.2516, "step": 20541 }, { "epoch": 0.59, "grad_norm": 7.752155065849626, "learning_rate": 3.824578921547025e-06, "loss": 0.2347, "step": 20542 }, { "epoch": 0.59, "grad_norm": 7.412627938567229, "learning_rate": 3.824128164958447e-06, "loss": 0.6385, "step": 20543 }, { "epoch": 0.59, "grad_norm": 8.15137800958833, "learning_rate": 3.823677418485746e-06, "loss": 0.402, "step": 20544 }, { "epoch": 0.59, "grad_norm": 6.590370360692097, "learning_rate": 3.823226682132804e-06, "loss": 0.445, "step": 20545 }, { "epoch": 0.59, "grad_norm": 3.9478537505008884, "learning_rate": 3.822775955903501e-06, "loss": 0.3991, "step": 20546 }, { "epoch": 0.59, "grad_norm": 5.5918593807904085, "learning_rate": 3.822325239801708e-06, "loss": 0.4962, "step": 20547 }, { "epoch": 0.59, "grad_norm": 4.207158022133167, "learning_rate": 3.821874533831308e-06, "loss": 0.5346, "step": 20548 }, { "epoch": 0.59, "grad_norm": 4.590756537394716, "learning_rate": 3.8214238379961755e-06, "loss": 0.8194, "step": 20549 }, { "epoch": 0.59, "grad_norm": 7.247653713063451, "learning_rate": 3.8209731523001905e-06, "loss": 1.0961, "step": 20550 }, { "epoch": 0.59, "grad_norm": 7.652476910790272, "learning_rate": 3.820522476747225e-06, "loss": 0.5934, "step": 20551 }, { "epoch": 0.59, "grad_norm": 5.106075749873483, "learning_rate": 3.820071811341161e-06, "loss": 0.3451, "step": 20552 }, { "epoch": 0.59, "grad_norm": 6.132777589346298, "learning_rate": 3.819621156085875e-06, "loss": 0.3908, "step": 20553 }, { "epoch": 0.59, "grad_norm": 3.2807212040189815, "learning_rate": 3.81917051098524e-06, "loss": 0.2911, "step": 20554 }, { "epoch": 0.59, "grad_norm": 2.512459902182444, "learning_rate": 3.818719876043139e-06, "loss": 0.138, "step": 20555 }, { "epoch": 0.59, "grad_norm": 10.189883555906544, "learning_rate": 3.818269251263443e-06, "loss": 0.8134, "step": 20556 }, { "epoch": 0.59, "grad_norm": 6.131061901819725, "learning_rate": 3.817818636650034e-06, "loss": 0.4668, "step": 20557 }, { "epoch": 0.59, "grad_norm": 8.158404478620792, "learning_rate": 3.817368032206782e-06, "loss": 0.5069, "step": 20558 }, { "epoch": 0.59, "grad_norm": 8.658029008522, "learning_rate": 3.81691743793757e-06, "loss": 0.5674, "step": 20559 }, { "epoch": 0.59, "grad_norm": 7.390057080245862, "learning_rate": 3.81646685384627e-06, "loss": 0.4274, "step": 20560 }, { "epoch": 0.59, "grad_norm": 7.069789196902214, "learning_rate": 3.81601627993676e-06, "loss": 0.4539, "step": 20561 }, { "epoch": 0.59, "grad_norm": 8.409042192477326, "learning_rate": 3.815565716212918e-06, "loss": 0.4213, "step": 20562 }, { "epoch": 0.59, "grad_norm": 4.641543313561238, "learning_rate": 3.815115162678616e-06, "loss": 0.6568, "step": 20563 }, { "epoch": 0.59, "grad_norm": 3.2457472413136608, "learning_rate": 3.814664619337735e-06, "loss": 0.3119, "step": 20564 }, { "epoch": 0.59, "grad_norm": 3.121281471384045, "learning_rate": 3.8142140861941458e-06, "loss": 0.1542, "step": 20565 }, { "epoch": 0.59, "grad_norm": 8.343911387279322, "learning_rate": 3.8137635632517287e-06, "loss": 0.3841, "step": 20566 }, { "epoch": 0.59, "grad_norm": 5.559612767847952, "learning_rate": 3.8133130505143566e-06, "loss": 0.41, "step": 20567 }, { "epoch": 0.59, "grad_norm": 6.35905002072933, "learning_rate": 3.812862547985907e-06, "loss": 0.4966, "step": 20568 }, { "epoch": 0.59, "grad_norm": 2.943796227936499, "learning_rate": 3.8124120556702533e-06, "loss": 0.3968, "step": 20569 }, { "epoch": 0.59, "grad_norm": 2.516928234517794, "learning_rate": 3.811961573571273e-06, "loss": 0.1237, "step": 20570 }, { "epoch": 0.59, "grad_norm": 5.689079275004343, "learning_rate": 3.811511101692842e-06, "loss": 0.3788, "step": 20571 }, { "epoch": 0.59, "grad_norm": 4.49204673214821, "learning_rate": 3.811060640038833e-06, "loss": 0.3686, "step": 20572 }, { "epoch": 0.59, "grad_norm": 4.66111664896108, "learning_rate": 3.8106101886131253e-06, "loss": 0.3534, "step": 20573 }, { "epoch": 0.59, "grad_norm": 5.765610361468332, "learning_rate": 3.8101597474195894e-06, "loss": 0.7436, "step": 20574 }, { "epoch": 0.59, "grad_norm": 3.4019293198742004, "learning_rate": 3.809709316462104e-06, "loss": 0.6265, "step": 20575 }, { "epoch": 0.59, "grad_norm": 5.589188660649985, "learning_rate": 3.8092588957445426e-06, "loss": 0.1978, "step": 20576 }, { "epoch": 0.59, "grad_norm": 7.212837539258514, "learning_rate": 3.8088084852707787e-06, "loss": 0.3885, "step": 20577 }, { "epoch": 0.59, "grad_norm": 7.629823222331053, "learning_rate": 3.808358085044691e-06, "loss": 0.6257, "step": 20578 }, { "epoch": 0.59, "grad_norm": 4.369119888822607, "learning_rate": 3.807907695070151e-06, "loss": 0.3428, "step": 20579 }, { "epoch": 0.59, "grad_norm": 5.2753714060089045, "learning_rate": 3.8074573153510353e-06, "loss": 0.4419, "step": 20580 }, { "epoch": 0.59, "grad_norm": 7.916077548410033, "learning_rate": 3.8070069458912153e-06, "loss": 0.7456, "step": 20581 }, { "epoch": 0.59, "grad_norm": 5.987373218508158, "learning_rate": 3.8065565866945695e-06, "loss": 0.6179, "step": 20582 }, { "epoch": 0.59, "grad_norm": 5.17602262114073, "learning_rate": 3.80610623776497e-06, "loss": 0.5528, "step": 20583 }, { "epoch": 0.59, "grad_norm": 5.766023368822497, "learning_rate": 3.8056558991062925e-06, "loss": 0.3275, "step": 20584 }, { "epoch": 0.59, "grad_norm": 5.650608347321925, "learning_rate": 3.805205570722408e-06, "loss": 0.4667, "step": 20585 }, { "epoch": 0.59, "grad_norm": 5.243561747903533, "learning_rate": 3.804755252617194e-06, "loss": 0.5195, "step": 20586 }, { "epoch": 0.59, "grad_norm": 7.110492056815693, "learning_rate": 3.804304944794524e-06, "loss": 0.3866, "step": 20587 }, { "epoch": 0.59, "grad_norm": 4.951791577804653, "learning_rate": 3.8038546472582693e-06, "loss": 0.5981, "step": 20588 }, { "epoch": 0.59, "grad_norm": 3.8400525553404994, "learning_rate": 3.8034043600123083e-06, "loss": 0.2657, "step": 20589 }, { "epoch": 0.59, "grad_norm": 5.177885682878533, "learning_rate": 3.8029540830605114e-06, "loss": 0.3716, "step": 20590 }, { "epoch": 0.59, "grad_norm": 6.718078295820132, "learning_rate": 3.802503816406754e-06, "loss": 0.931, "step": 20591 }, { "epoch": 0.59, "grad_norm": 2.6473621221919266, "learning_rate": 3.8020535600549065e-06, "loss": 0.2224, "step": 20592 }, { "epoch": 0.59, "grad_norm": 3.2300921477764115, "learning_rate": 3.801603314008847e-06, "loss": 0.4806, "step": 20593 }, { "epoch": 0.59, "grad_norm": 5.758152448331417, "learning_rate": 3.8011530782724456e-06, "loss": 0.7205, "step": 20594 }, { "epoch": 0.59, "grad_norm": 4.534113485628191, "learning_rate": 3.8007028528495754e-06, "loss": 0.5777, "step": 20595 }, { "epoch": 0.59, "grad_norm": 5.224393887933494, "learning_rate": 3.800252637744113e-06, "loss": 0.6614, "step": 20596 }, { "epoch": 0.59, "grad_norm": 4.611024567865607, "learning_rate": 3.799802432959929e-06, "loss": 0.3456, "step": 20597 }, { "epoch": 0.59, "grad_norm": 5.632162577769679, "learning_rate": 3.799352238500897e-06, "loss": 0.4776, "step": 20598 }, { "epoch": 0.59, "grad_norm": 2.5346834655887633, "learning_rate": 3.7989020543708884e-06, "loss": 0.2504, "step": 20599 }, { "epoch": 0.59, "grad_norm": 2.1721440704311132, "learning_rate": 3.798451880573779e-06, "loss": 0.3287, "step": 20600 }, { "epoch": 0.59, "grad_norm": 4.257001859157767, "learning_rate": 3.79800171711344e-06, "loss": 0.3015, "step": 20601 }, { "epoch": 0.59, "grad_norm": 3.4791348145601106, "learning_rate": 3.797551563993743e-06, "loss": 0.3192, "step": 20602 }, { "epoch": 0.59, "grad_norm": 8.770825915359898, "learning_rate": 3.797101421218564e-06, "loss": 0.534, "step": 20603 }, { "epoch": 0.59, "grad_norm": 6.194379661726889, "learning_rate": 3.796651288791771e-06, "loss": 0.7795, "step": 20604 }, { "epoch": 0.59, "grad_norm": 4.313918668179233, "learning_rate": 3.796201166717241e-06, "loss": 0.3389, "step": 20605 }, { "epoch": 0.59, "grad_norm": 5.864646473121778, "learning_rate": 3.795751054998843e-06, "loss": 0.4316, "step": 20606 }, { "epoch": 0.59, "grad_norm": 4.801892269192344, "learning_rate": 3.7953009536404516e-06, "loss": 0.3525, "step": 20607 }, { "epoch": 0.59, "grad_norm": 6.294121227661843, "learning_rate": 3.794850862645935e-06, "loss": 0.5632, "step": 20608 }, { "epoch": 0.59, "grad_norm": 3.9443058958182204, "learning_rate": 3.794400782019171e-06, "loss": 0.4838, "step": 20609 }, { "epoch": 0.59, "grad_norm": 9.398863442679101, "learning_rate": 3.793950711764027e-06, "loss": 0.461, "step": 20610 }, { "epoch": 0.59, "grad_norm": 12.177926240218694, "learning_rate": 3.7935006518843758e-06, "loss": 0.5219, "step": 20611 }, { "epoch": 0.59, "grad_norm": 10.721506459443454, "learning_rate": 3.7930506023840917e-06, "loss": 0.8108, "step": 20612 }, { "epoch": 0.59, "grad_norm": 7.3635132446978115, "learning_rate": 3.7926005632670444e-06, "loss": 0.5979, "step": 20613 }, { "epoch": 0.59, "grad_norm": 4.346046444479258, "learning_rate": 3.7921505345371064e-06, "loss": 0.1785, "step": 20614 }, { "epoch": 0.59, "grad_norm": 4.782603165869286, "learning_rate": 3.7917005161981462e-06, "loss": 0.5083, "step": 20615 }, { "epoch": 0.59, "grad_norm": 3.3860930994577205, "learning_rate": 3.7912505082540396e-06, "loss": 0.3294, "step": 20616 }, { "epoch": 0.59, "grad_norm": 4.501248133605214, "learning_rate": 3.790800510708656e-06, "loss": 0.3328, "step": 20617 }, { "epoch": 0.59, "grad_norm": 8.203179350173373, "learning_rate": 3.790350523565865e-06, "loss": 0.3148, "step": 20618 }, { "epoch": 0.59, "grad_norm": 3.557699013543104, "learning_rate": 3.7899005468295413e-06, "loss": 0.2386, "step": 20619 }, { "epoch": 0.59, "grad_norm": 5.279741060380978, "learning_rate": 3.7894505805035537e-06, "loss": 0.3671, "step": 20620 }, { "epoch": 0.59, "grad_norm": 2.792056659008817, "learning_rate": 3.7890006245917747e-06, "loss": 0.239, "step": 20621 }, { "epoch": 0.59, "grad_norm": 6.219780036311876, "learning_rate": 3.788550679098072e-06, "loss": 0.911, "step": 20622 }, { "epoch": 0.59, "grad_norm": 6.249785228853813, "learning_rate": 3.7881007440263206e-06, "loss": 0.2037, "step": 20623 }, { "epoch": 0.59, "grad_norm": 7.379590965477984, "learning_rate": 3.7876508193803883e-06, "loss": 0.2017, "step": 20624 }, { "epoch": 0.59, "grad_norm": 6.016949792612842, "learning_rate": 3.7872009051641483e-06, "loss": 0.6337, "step": 20625 }, { "epoch": 0.59, "grad_norm": 3.4737193451002724, "learning_rate": 3.786751001381467e-06, "loss": 0.139, "step": 20626 }, { "epoch": 0.59, "grad_norm": 3.418671545526746, "learning_rate": 3.786301108036219e-06, "loss": 0.3076, "step": 20627 }, { "epoch": 0.59, "grad_norm": 4.051198292290145, "learning_rate": 3.785851225132274e-06, "loss": 0.645, "step": 20628 }, { "epoch": 0.59, "grad_norm": 4.322963432457375, "learning_rate": 3.785401352673499e-06, "loss": 0.285, "step": 20629 }, { "epoch": 0.59, "grad_norm": 3.4394954611638373, "learning_rate": 3.7849514906637687e-06, "loss": 0.3906, "step": 20630 }, { "epoch": 0.59, "grad_norm": 5.472864488755433, "learning_rate": 3.78450163910695e-06, "loss": 0.4263, "step": 20631 }, { "epoch": 0.59, "grad_norm": 6.5166597528087635, "learning_rate": 3.7840517980069154e-06, "loss": 0.4038, "step": 20632 }, { "epoch": 0.59, "grad_norm": 4.474868453878801, "learning_rate": 3.7836019673675324e-06, "loss": 0.3576, "step": 20633 }, { "epoch": 0.59, "grad_norm": 5.542423105837803, "learning_rate": 3.7831521471926723e-06, "loss": 0.5954, "step": 20634 }, { "epoch": 0.59, "grad_norm": 4.520805739686259, "learning_rate": 3.7827023374862034e-06, "loss": 0.5625, "step": 20635 }, { "epoch": 0.59, "grad_norm": 3.517255803262249, "learning_rate": 3.782252538251997e-06, "loss": 0.3045, "step": 20636 }, { "epoch": 0.59, "grad_norm": 4.208639788406795, "learning_rate": 3.781802749493923e-06, "loss": 0.7954, "step": 20637 }, { "epoch": 0.59, "grad_norm": 4.88026188264968, "learning_rate": 3.7813529712158482e-06, "loss": 0.5383, "step": 20638 }, { "epoch": 0.59, "grad_norm": 7.6081392824036875, "learning_rate": 3.7809032034216453e-06, "loss": 0.4314, "step": 20639 }, { "epoch": 0.59, "grad_norm": 3.0705221361804425, "learning_rate": 3.7804534461151816e-06, "loss": 0.3826, "step": 20640 }, { "epoch": 0.59, "grad_norm": 6.093891474719915, "learning_rate": 3.7800036993003276e-06, "loss": 0.586, "step": 20641 }, { "epoch": 0.59, "grad_norm": 4.286901377713419, "learning_rate": 3.779553962980949e-06, "loss": 0.1764, "step": 20642 }, { "epoch": 0.59, "grad_norm": 3.39816732099966, "learning_rate": 3.779104237160919e-06, "loss": 0.2314, "step": 20643 }, { "epoch": 0.59, "grad_norm": 7.274945144839778, "learning_rate": 3.7786545218441056e-06, "loss": 0.584, "step": 20644 }, { "epoch": 0.59, "grad_norm": 4.2175231279915995, "learning_rate": 3.7782048170343757e-06, "loss": 0.3781, "step": 20645 }, { "epoch": 0.59, "grad_norm": 8.589972203201953, "learning_rate": 3.7777551227356004e-06, "loss": 0.5378, "step": 20646 }, { "epoch": 0.59, "grad_norm": 5.597488347561469, "learning_rate": 3.777305438951647e-06, "loss": 0.6451, "step": 20647 }, { "epoch": 0.59, "grad_norm": 3.9004395108891465, "learning_rate": 3.776855765686385e-06, "loss": 0.1994, "step": 20648 }, { "epoch": 0.59, "grad_norm": 8.900868178466881, "learning_rate": 3.77640610294368e-06, "loss": 0.4618, "step": 20649 }, { "epoch": 0.59, "grad_norm": 8.762163699413092, "learning_rate": 3.775956450727405e-06, "loss": 0.6472, "step": 20650 }, { "epoch": 0.59, "grad_norm": 2.5364043891282813, "learning_rate": 3.7755068090414247e-06, "loss": 0.2321, "step": 20651 }, { "epoch": 0.59, "grad_norm": 4.206356295507969, "learning_rate": 3.775057177889608e-06, "loss": 0.5938, "step": 20652 }, { "epoch": 0.59, "grad_norm": 9.695091620057093, "learning_rate": 3.774607557275825e-06, "loss": 0.5953, "step": 20653 }, { "epoch": 0.59, "grad_norm": 14.870440385322082, "learning_rate": 3.7741579472039412e-06, "loss": 1.0659, "step": 20654 }, { "epoch": 0.59, "grad_norm": 9.378088683910024, "learning_rate": 3.7737083476778274e-06, "loss": 0.7091, "step": 20655 }, { "epoch": 0.59, "grad_norm": 3.6333042919386513, "learning_rate": 3.7732587587013476e-06, "loss": 0.3651, "step": 20656 }, { "epoch": 0.59, "grad_norm": 2.8430986811337324, "learning_rate": 3.7728091802783735e-06, "loss": 0.2606, "step": 20657 }, { "epoch": 0.59, "grad_norm": 7.371109275619579, "learning_rate": 3.77235961241277e-06, "loss": 0.36, "step": 20658 }, { "epoch": 0.59, "grad_norm": 4.861046482140964, "learning_rate": 3.771910055108404e-06, "loss": 0.5099, "step": 20659 }, { "epoch": 0.59, "grad_norm": 4.435842258120229, "learning_rate": 3.771460508369147e-06, "loss": 0.4313, "step": 20660 }, { "epoch": 0.59, "grad_norm": 3.975384310971944, "learning_rate": 3.771010972198863e-06, "loss": 0.3887, "step": 20661 }, { "epoch": 0.59, "grad_norm": 5.887202313261411, "learning_rate": 3.770561446601422e-06, "loss": 0.6819, "step": 20662 }, { "epoch": 0.59, "grad_norm": 3.880482671200822, "learning_rate": 3.770111931580687e-06, "loss": 0.4004, "step": 20663 }, { "epoch": 0.59, "grad_norm": 3.5198162103214754, "learning_rate": 3.76966242714053e-06, "loss": 0.5825, "step": 20664 }, { "epoch": 0.59, "grad_norm": 5.06061792121346, "learning_rate": 3.7692129332848128e-06, "loss": 0.7206, "step": 20665 }, { "epoch": 0.59, "grad_norm": 2.481831754328333, "learning_rate": 3.7687634500174074e-06, "loss": 0.2171, "step": 20666 }, { "epoch": 0.59, "grad_norm": 4.718593026386017, "learning_rate": 3.7683139773421774e-06, "loss": 0.385, "step": 20667 }, { "epoch": 0.59, "grad_norm": 1.9379457607011412, "learning_rate": 3.76786451526299e-06, "loss": 0.0888, "step": 20668 }, { "epoch": 0.59, "grad_norm": 5.901246327966906, "learning_rate": 3.767415063783714e-06, "loss": 0.4202, "step": 20669 }, { "epoch": 0.59, "grad_norm": 3.0670856087735827, "learning_rate": 3.7669656229082143e-06, "loss": 0.2421, "step": 20670 }, { "epoch": 0.59, "grad_norm": 5.983827889634574, "learning_rate": 3.7665161926403575e-06, "loss": 0.7009, "step": 20671 }, { "epoch": 0.59, "grad_norm": 3.385341905599796, "learning_rate": 3.7660667729840083e-06, "loss": 0.1723, "step": 20672 }, { "epoch": 0.59, "grad_norm": 4.356412735667363, "learning_rate": 3.765617363943037e-06, "loss": 0.4661, "step": 20673 }, { "epoch": 0.59, "grad_norm": 9.919674606831615, "learning_rate": 3.7651679655213065e-06, "loss": 0.629, "step": 20674 }, { "epoch": 0.59, "grad_norm": 14.943284514806447, "learning_rate": 3.7647185777226848e-06, "loss": 0.3479, "step": 20675 }, { "epoch": 0.59, "grad_norm": 5.498554516626015, "learning_rate": 3.764269200551035e-06, "loss": 0.2695, "step": 20676 }, { "epoch": 0.59, "grad_norm": 4.21014259493311, "learning_rate": 3.7638198340102263e-06, "loss": 0.1842, "step": 20677 }, { "epoch": 0.59, "grad_norm": 3.2864871419297876, "learning_rate": 3.763370478104124e-06, "loss": 0.4592, "step": 20678 }, { "epoch": 0.59, "grad_norm": 4.725415908835392, "learning_rate": 3.7629211328365916e-06, "loss": 0.4461, "step": 20679 }, { "epoch": 0.59, "grad_norm": 4.045464848095143, "learning_rate": 3.762471798211498e-06, "loss": 0.6915, "step": 20680 }, { "epoch": 0.59, "grad_norm": 5.954389863294476, "learning_rate": 3.7620224742327068e-06, "loss": 0.304, "step": 20681 }, { "epoch": 0.59, "grad_norm": 8.398364371047895, "learning_rate": 3.7615731609040846e-06, "loss": 0.5002, "step": 20682 }, { "epoch": 0.59, "grad_norm": 7.425554379404389, "learning_rate": 3.761123858229494e-06, "loss": 0.6763, "step": 20683 }, { "epoch": 0.59, "grad_norm": 7.523068427899737, "learning_rate": 3.760674566212803e-06, "loss": 0.719, "step": 20684 }, { "epoch": 0.59, "grad_norm": 4.915188125238752, "learning_rate": 3.7602252848578773e-06, "loss": 0.6571, "step": 20685 }, { "epoch": 0.59, "grad_norm": 5.976011402926105, "learning_rate": 3.759776014168579e-06, "loss": 0.3286, "step": 20686 }, { "epoch": 0.59, "grad_norm": 7.646909847577646, "learning_rate": 3.7593267541487767e-06, "loss": 0.4333, "step": 20687 }, { "epoch": 0.59, "grad_norm": 4.187792041900242, "learning_rate": 3.7588775048023327e-06, "loss": 0.4933, "step": 20688 }, { "epoch": 0.59, "grad_norm": 8.770106783122182, "learning_rate": 3.758428266133114e-06, "loss": 0.619, "step": 20689 }, { "epoch": 0.59, "grad_norm": 9.527561261899248, "learning_rate": 3.7579790381449814e-06, "loss": 0.7217, "step": 20690 }, { "epoch": 0.59, "grad_norm": 5.409579007308787, "learning_rate": 3.7575298208418053e-06, "loss": 0.7527, "step": 20691 }, { "epoch": 0.59, "grad_norm": 7.131481969940431, "learning_rate": 3.7570806142274446e-06, "loss": 0.3907, "step": 20692 }, { "epoch": 0.59, "grad_norm": 3.826811491161872, "learning_rate": 3.7566314183057675e-06, "loss": 0.5789, "step": 20693 }, { "epoch": 0.59, "grad_norm": 4.887538533168614, "learning_rate": 3.7561822330806375e-06, "loss": 0.4639, "step": 20694 }, { "epoch": 0.59, "grad_norm": 2.1552659671102603, "learning_rate": 3.755733058555917e-06, "loss": 0.134, "step": 20695 }, { "epoch": 0.59, "grad_norm": 4.272302061778166, "learning_rate": 3.755283894735474e-06, "loss": 0.4085, "step": 20696 }, { "epoch": 0.59, "grad_norm": 3.200064252789473, "learning_rate": 3.754834741623169e-06, "loss": 0.2501, "step": 20697 }, { "epoch": 0.59, "grad_norm": 5.57545787829783, "learning_rate": 3.7543855992228687e-06, "loss": 0.3065, "step": 20698 }, { "epoch": 0.59, "grad_norm": 2.758615349895966, "learning_rate": 3.753936467538434e-06, "loss": 0.2818, "step": 20699 }, { "epoch": 0.59, "grad_norm": 6.517758978236973, "learning_rate": 3.7534873465737308e-06, "loss": 0.5469, "step": 20700 }, { "epoch": 0.59, "grad_norm": 3.703650682696778, "learning_rate": 3.753038236332624e-06, "loss": 0.2722, "step": 20701 }, { "epoch": 0.59, "grad_norm": 7.768235287912745, "learning_rate": 3.7525891368189736e-06, "loss": 0.6495, "step": 20702 }, { "epoch": 0.59, "grad_norm": 5.406967071430114, "learning_rate": 3.752140048036647e-06, "loss": 0.4435, "step": 20703 }, { "epoch": 0.59, "grad_norm": 5.071340024777634, "learning_rate": 3.7516909699895053e-06, "loss": 0.299, "step": 20704 }, { "epoch": 0.59, "grad_norm": 9.34917641954712, "learning_rate": 3.7512419026814134e-06, "loss": 0.6375, "step": 20705 }, { "epoch": 0.59, "grad_norm": 3.831052773830336, "learning_rate": 3.7507928461162314e-06, "loss": 0.634, "step": 20706 }, { "epoch": 0.59, "grad_norm": 4.853278527714546, "learning_rate": 3.7503438002978265e-06, "loss": 0.3562, "step": 20707 }, { "epoch": 0.59, "grad_norm": 8.436466634130596, "learning_rate": 3.74989476523006e-06, "loss": 0.7184, "step": 20708 }, { "epoch": 0.59, "grad_norm": 7.358109448383356, "learning_rate": 3.749445740916793e-06, "loss": 0.4292, "step": 20709 }, { "epoch": 0.59, "grad_norm": 7.274159363881256, "learning_rate": 3.748996727361893e-06, "loss": 0.598, "step": 20710 }, { "epoch": 0.59, "grad_norm": 5.581983498008006, "learning_rate": 3.748547724569218e-06, "loss": 0.4947, "step": 20711 }, { "epoch": 0.59, "grad_norm": 3.407458991981536, "learning_rate": 3.7480987325426354e-06, "loss": 0.3882, "step": 20712 }, { "epoch": 0.59, "grad_norm": 5.2757914396888355, "learning_rate": 3.747649751286002e-06, "loss": 0.5814, "step": 20713 }, { "epoch": 0.59, "grad_norm": 7.2942527944189, "learning_rate": 3.7472007808031865e-06, "loss": 0.4775, "step": 20714 }, { "epoch": 0.59, "grad_norm": 5.745779084615056, "learning_rate": 3.746751821098047e-06, "loss": 0.1917, "step": 20715 }, { "epoch": 0.59, "grad_norm": 5.685895798360684, "learning_rate": 3.7463028721744484e-06, "loss": 0.3153, "step": 20716 }, { "epoch": 0.59, "grad_norm": 9.453558440556968, "learning_rate": 3.745853934036249e-06, "loss": 0.8294, "step": 20717 }, { "epoch": 0.59, "grad_norm": 2.9040379873817717, "learning_rate": 3.745405006687316e-06, "loss": 0.2139, "step": 20718 }, { "epoch": 0.59, "grad_norm": 9.341949850844097, "learning_rate": 3.74495609013151e-06, "loss": 0.22, "step": 20719 }, { "epoch": 0.59, "grad_norm": 3.568544446700729, "learning_rate": 3.744507184372691e-06, "loss": 0.4715, "step": 20720 }, { "epoch": 0.59, "grad_norm": 7.697164028129304, "learning_rate": 3.7440582894147226e-06, "loss": 0.4349, "step": 20721 }, { "epoch": 0.59, "grad_norm": 4.048145330465486, "learning_rate": 3.743609405261465e-06, "loss": 0.2654, "step": 20722 }, { "epoch": 0.59, "grad_norm": 5.780817623981352, "learning_rate": 3.743160531916783e-06, "loss": 0.4332, "step": 20723 }, { "epoch": 0.59, "grad_norm": 5.041899410371714, "learning_rate": 3.7427116693845344e-06, "loss": 0.5649, "step": 20724 }, { "epoch": 0.59, "grad_norm": 3.864145211253527, "learning_rate": 3.7422628176685816e-06, "loss": 0.3148, "step": 20725 }, { "epoch": 0.59, "grad_norm": 4.851270731385541, "learning_rate": 3.7418139767727894e-06, "loss": 0.6245, "step": 20726 }, { "epoch": 0.59, "grad_norm": 6.798905729153929, "learning_rate": 3.7413651467010158e-06, "loss": 0.4897, "step": 20727 }, { "epoch": 0.59, "grad_norm": 5.736680983385703, "learning_rate": 3.7409163274571234e-06, "loss": 0.554, "step": 20728 }, { "epoch": 0.59, "grad_norm": 6.18410275377129, "learning_rate": 3.740467519044971e-06, "loss": 0.5584, "step": 20729 }, { "epoch": 0.59, "grad_norm": 4.263403373368617, "learning_rate": 3.7400187214684234e-06, "loss": 0.3144, "step": 20730 }, { "epoch": 0.59, "grad_norm": 2.722220343526421, "learning_rate": 3.7395699347313386e-06, "loss": 0.2339, "step": 20731 }, { "epoch": 0.59, "grad_norm": 10.315658270670417, "learning_rate": 3.73912115883758e-06, "loss": 1.2009, "step": 20732 }, { "epoch": 0.59, "grad_norm": 5.864227238939868, "learning_rate": 3.7386723937910043e-06, "loss": 0.4171, "step": 20733 }, { "epoch": 0.59, "grad_norm": 5.833766712257146, "learning_rate": 3.738223639595475e-06, "loss": 0.6817, "step": 20734 }, { "epoch": 0.59, "grad_norm": 5.315436236188094, "learning_rate": 3.7377748962548544e-06, "loss": 0.4905, "step": 20735 }, { "epoch": 0.59, "grad_norm": 5.425421662358234, "learning_rate": 3.7373261637729984e-06, "loss": 0.3137, "step": 20736 }, { "epoch": 0.59, "grad_norm": 5.783597644772133, "learning_rate": 3.7368774421537723e-06, "loss": 0.4208, "step": 20737 }, { "epoch": 0.59, "grad_norm": 4.543711515470922, "learning_rate": 3.736428731401033e-06, "loss": 0.4956, "step": 20738 }, { "epoch": 0.59, "grad_norm": 2.8241339495367686, "learning_rate": 3.735980031518643e-06, "loss": 0.3296, "step": 20739 }, { "epoch": 0.59, "grad_norm": 5.7074822489812895, "learning_rate": 3.735531342510459e-06, "loss": 0.2767, "step": 20740 }, { "epoch": 0.59, "grad_norm": 2.8889246708129295, "learning_rate": 3.735082664380345e-06, "loss": 0.3047, "step": 20741 }, { "epoch": 0.59, "grad_norm": 2.5401164288381604, "learning_rate": 3.7346339971321577e-06, "loss": 0.1472, "step": 20742 }, { "epoch": 0.59, "grad_norm": 4.618745131225498, "learning_rate": 3.734185340769758e-06, "loss": 0.6728, "step": 20743 }, { "epoch": 0.59, "grad_norm": 2.333663139422691, "learning_rate": 3.7337366952970076e-06, "loss": 0.4675, "step": 20744 }, { "epoch": 0.59, "grad_norm": 6.013890003833662, "learning_rate": 3.7332880607177636e-06, "loss": 0.7853, "step": 20745 }, { "epoch": 0.59, "grad_norm": 5.837279547177936, "learning_rate": 3.732839437035888e-06, "loss": 0.6076, "step": 20746 }, { "epoch": 0.59, "grad_norm": 3.0759987434228377, "learning_rate": 3.7323908242552354e-06, "loss": 0.2912, "step": 20747 }, { "epoch": 0.59, "grad_norm": 5.581110905775472, "learning_rate": 3.731942222379673e-06, "loss": 0.5778, "step": 20748 }, { "epoch": 0.59, "grad_norm": 7.52586704214358, "learning_rate": 3.731493631413051e-06, "loss": 0.4351, "step": 20749 }, { "epoch": 0.59, "grad_norm": 2.294320080358852, "learning_rate": 3.7310450513592356e-06, "loss": 0.1956, "step": 20750 }, { "epoch": 0.59, "grad_norm": 6.279014721073852, "learning_rate": 3.730596482222083e-06, "loss": 0.6172, "step": 20751 }, { "epoch": 0.59, "grad_norm": 3.1450393929614653, "learning_rate": 3.730147924005452e-06, "loss": 0.2432, "step": 20752 }, { "epoch": 0.59, "grad_norm": 2.2630061173628158, "learning_rate": 3.729699376713203e-06, "loss": 0.2715, "step": 20753 }, { "epoch": 0.59, "grad_norm": 4.831033404727321, "learning_rate": 3.7292508403491936e-06, "loss": 0.3015, "step": 20754 }, { "epoch": 0.59, "grad_norm": 9.814853556041378, "learning_rate": 3.7288023149172824e-06, "loss": 0.8991, "step": 20755 }, { "epoch": 0.59, "grad_norm": 7.142158694178931, "learning_rate": 3.7283538004213276e-06, "loss": 0.8649, "step": 20756 }, { "epoch": 0.59, "grad_norm": 6.310936195881523, "learning_rate": 3.727905296865191e-06, "loss": 0.7206, "step": 20757 }, { "epoch": 0.59, "grad_norm": 8.474063853095535, "learning_rate": 3.7274568042527257e-06, "loss": 0.6235, "step": 20758 }, { "epoch": 0.59, "grad_norm": 11.313444825227927, "learning_rate": 3.727008322587793e-06, "loss": 0.5084, "step": 20759 }, { "epoch": 0.59, "grad_norm": 4.85569118195888, "learning_rate": 3.726559851874253e-06, "loss": 0.5489, "step": 20760 }, { "epoch": 0.59, "grad_norm": 4.186800770241792, "learning_rate": 3.726111392115961e-06, "loss": 0.285, "step": 20761 }, { "epoch": 0.59, "grad_norm": 5.592413566818408, "learning_rate": 3.7256629433167756e-06, "loss": 0.3257, "step": 20762 }, { "epoch": 0.59, "grad_norm": 3.7635210736504336, "learning_rate": 3.7252145054805543e-06, "loss": 0.5601, "step": 20763 }, { "epoch": 0.59, "grad_norm": 8.81067530361465, "learning_rate": 3.7247660786111574e-06, "loss": 0.7191, "step": 20764 }, { "epoch": 0.59, "grad_norm": 4.541698852331691, "learning_rate": 3.7243176627124394e-06, "loss": 0.7233, "step": 20765 }, { "epoch": 0.59, "grad_norm": 4.724264850647012, "learning_rate": 3.7238692577882584e-06, "loss": 0.2435, "step": 20766 }, { "epoch": 0.59, "grad_norm": 8.861764844525494, "learning_rate": 3.723420863842476e-06, "loss": 0.7102, "step": 20767 }, { "epoch": 0.59, "grad_norm": 5.610003869128465, "learning_rate": 3.722972480878945e-06, "loss": 0.5119, "step": 20768 }, { "epoch": 0.59, "grad_norm": 3.7505604007334106, "learning_rate": 3.7225241089015257e-06, "loss": 0.2304, "step": 20769 }, { "epoch": 0.59, "grad_norm": 4.888860073269588, "learning_rate": 3.7220757479140723e-06, "loss": 0.5141, "step": 20770 }, { "epoch": 0.59, "grad_norm": 6.264646991304392, "learning_rate": 3.721627397920446e-06, "loss": 0.6728, "step": 20771 }, { "epoch": 0.59, "grad_norm": 6.277327892993205, "learning_rate": 3.7211790589245e-06, "loss": 0.5258, "step": 20772 }, { "epoch": 0.59, "grad_norm": 6.363011026001495, "learning_rate": 3.7207307309300956e-06, "loss": 0.8014, "step": 20773 }, { "epoch": 0.59, "grad_norm": 8.300592397897656, "learning_rate": 3.7202824139410835e-06, "loss": 0.9501, "step": 20774 }, { "epoch": 0.59, "grad_norm": 8.684194471110176, "learning_rate": 3.7198341079613266e-06, "loss": 0.5842, "step": 20775 }, { "epoch": 0.59, "grad_norm": 6.041769292113274, "learning_rate": 3.719385812994679e-06, "loss": 0.5638, "step": 20776 }, { "epoch": 0.6, "grad_norm": 3.357409696351621, "learning_rate": 3.718937529044997e-06, "loss": 0.3666, "step": 20777 }, { "epoch": 0.6, "grad_norm": 5.885178332158869, "learning_rate": 3.71848925611614e-06, "loss": 0.5979, "step": 20778 }, { "epoch": 0.6, "grad_norm": 5.269512292419221, "learning_rate": 3.718040994211959e-06, "loss": 0.3399, "step": 20779 }, { "epoch": 0.6, "grad_norm": 9.241027398067066, "learning_rate": 3.7175927433363155e-06, "loss": 0.4107, "step": 20780 }, { "epoch": 0.6, "grad_norm": 4.237838588398288, "learning_rate": 3.717144503493063e-06, "loss": 0.1027, "step": 20781 }, { "epoch": 0.6, "grad_norm": 4.187676568365432, "learning_rate": 3.716696274686059e-06, "loss": 0.5664, "step": 20782 }, { "epoch": 0.6, "grad_norm": 7.160234033606417, "learning_rate": 3.7162480569191573e-06, "loss": 0.3682, "step": 20783 }, { "epoch": 0.6, "grad_norm": 4.086356788843223, "learning_rate": 3.715799850196216e-06, "loss": 0.4297, "step": 20784 }, { "epoch": 0.6, "grad_norm": 5.847211169758205, "learning_rate": 3.7153516545210923e-06, "loss": 0.4475, "step": 20785 }, { "epoch": 0.6, "grad_norm": 5.287956606761911, "learning_rate": 3.7149034698976372e-06, "loss": 0.2545, "step": 20786 }, { "epoch": 0.6, "grad_norm": 3.6491423095589473, "learning_rate": 3.714455296329712e-06, "loss": 0.3824, "step": 20787 }, { "epoch": 0.6, "grad_norm": 5.997668369564698, "learning_rate": 3.7140071338211687e-06, "loss": 0.3283, "step": 20788 }, { "epoch": 0.6, "grad_norm": 8.066008787546211, "learning_rate": 3.7135589823758645e-06, "loss": 0.7267, "step": 20789 }, { "epoch": 0.6, "grad_norm": 3.88645550780482, "learning_rate": 3.7131108419976514e-06, "loss": 0.3086, "step": 20790 }, { "epoch": 0.6, "grad_norm": 6.493832302948761, "learning_rate": 3.7126627126903893e-06, "loss": 0.5775, "step": 20791 }, { "epoch": 0.6, "grad_norm": 2.760013836432065, "learning_rate": 3.712214594457932e-06, "loss": 0.2045, "step": 20792 }, { "epoch": 0.6, "grad_norm": 9.927843092151075, "learning_rate": 3.7117664873041313e-06, "loss": 0.714, "step": 20793 }, { "epoch": 0.6, "grad_norm": 6.266080996693939, "learning_rate": 3.7113183912328475e-06, "loss": 0.53, "step": 20794 }, { "epoch": 0.6, "grad_norm": 4.864467491496893, "learning_rate": 3.7108703062479325e-06, "loss": 0.4516, "step": 20795 }, { "epoch": 0.6, "grad_norm": 5.552935424556991, "learning_rate": 3.7104222323532414e-06, "loss": 0.5698, "step": 20796 }, { "epoch": 0.6, "grad_norm": 4.28716459825267, "learning_rate": 3.7099741695526274e-06, "loss": 0.6423, "step": 20797 }, { "epoch": 0.6, "grad_norm": 2.8905205372673164, "learning_rate": 3.7095261178499493e-06, "loss": 0.2404, "step": 20798 }, { "epoch": 0.6, "grad_norm": 4.629609053813281, "learning_rate": 3.7090780772490575e-06, "loss": 0.435, "step": 20799 }, { "epoch": 0.6, "grad_norm": 6.127584184732104, "learning_rate": 3.7086300477538074e-06, "loss": 0.5908, "step": 20800 }, { "epoch": 0.6, "grad_norm": 7.722291697579159, "learning_rate": 3.708182029368056e-06, "loss": 0.7699, "step": 20801 }, { "epoch": 0.6, "grad_norm": 4.435414428614812, "learning_rate": 3.7077340220956546e-06, "loss": 0.4423, "step": 20802 }, { "epoch": 0.6, "grad_norm": 6.456853016022118, "learning_rate": 3.707286025940459e-06, "loss": 0.3819, "step": 20803 }, { "epoch": 0.6, "grad_norm": 3.3890007628977195, "learning_rate": 3.7068380409063208e-06, "loss": 0.1635, "step": 20804 }, { "epoch": 0.6, "grad_norm": 7.294064227529277, "learning_rate": 3.7063900669970988e-06, "loss": 0.5707, "step": 20805 }, { "epoch": 0.6, "grad_norm": 4.329336884976375, "learning_rate": 3.7059421042166403e-06, "loss": 0.5201, "step": 20806 }, { "epoch": 0.6, "grad_norm": 4.585592591606752, "learning_rate": 3.7054941525688024e-06, "loss": 0.4294, "step": 20807 }, { "epoch": 0.6, "grad_norm": 5.113676494479451, "learning_rate": 3.7050462120574433e-06, "loss": 0.6158, "step": 20808 }, { "epoch": 0.6, "grad_norm": 5.0934963982294965, "learning_rate": 3.704598282686408e-06, "loss": 0.364, "step": 20809 }, { "epoch": 0.6, "grad_norm": 6.0013763120771015, "learning_rate": 3.704150364459557e-06, "loss": 0.5798, "step": 20810 }, { "epoch": 0.6, "grad_norm": 3.705219272143018, "learning_rate": 3.703702457380739e-06, "loss": 0.4553, "step": 20811 }, { "epoch": 0.6, "grad_norm": 4.608557573672166, "learning_rate": 3.7032545614538106e-06, "loss": 0.5638, "step": 20812 }, { "epoch": 0.6, "grad_norm": 6.955045970487839, "learning_rate": 3.702806676682621e-06, "loss": 0.9878, "step": 20813 }, { "epoch": 0.6, "grad_norm": 5.143035694459059, "learning_rate": 3.702358803071029e-06, "loss": 0.5069, "step": 20814 }, { "epoch": 0.6, "grad_norm": 7.328863696381531, "learning_rate": 3.7019109406228826e-06, "loss": 0.7703, "step": 20815 }, { "epoch": 0.6, "grad_norm": 4.8516101681245685, "learning_rate": 3.7014630893420356e-06, "loss": 0.5668, "step": 20816 }, { "epoch": 0.6, "grad_norm": 5.4778100958501215, "learning_rate": 3.7010152492323436e-06, "loss": 0.6098, "step": 20817 }, { "epoch": 0.6, "grad_norm": 1.6640107052761288, "learning_rate": 3.7005674202976573e-06, "loss": 0.2136, "step": 20818 }, { "epoch": 0.6, "grad_norm": 5.177249846924333, "learning_rate": 3.7001196025418297e-06, "loss": 0.3036, "step": 20819 }, { "epoch": 0.6, "grad_norm": 2.950869511301108, "learning_rate": 3.699671795968711e-06, "loss": 0.1274, "step": 20820 }, { "epoch": 0.6, "grad_norm": 4.658381710187995, "learning_rate": 3.6992240005821576e-06, "loss": 0.4871, "step": 20821 }, { "epoch": 0.6, "grad_norm": 8.18917497580573, "learning_rate": 3.6987762163860198e-06, "loss": 0.3944, "step": 20822 }, { "epoch": 0.6, "grad_norm": 4.108226020809398, "learning_rate": 3.698328443384151e-06, "loss": 0.3522, "step": 20823 }, { "epoch": 0.6, "grad_norm": 13.354612123833, "learning_rate": 3.6978806815803996e-06, "loss": 0.7183, "step": 20824 }, { "epoch": 0.6, "grad_norm": 5.8272496606708115, "learning_rate": 3.697432930978622e-06, "loss": 0.4428, "step": 20825 }, { "epoch": 0.6, "grad_norm": 5.771434183434991, "learning_rate": 3.6969851915826694e-06, "loss": 0.4538, "step": 20826 }, { "epoch": 0.6, "grad_norm": 7.501825555512754, "learning_rate": 3.6965374633963904e-06, "loss": 0.5983, "step": 20827 }, { "epoch": 0.6, "grad_norm": 6.695067357813451, "learning_rate": 3.6960897464236413e-06, "loss": 0.4502, "step": 20828 }, { "epoch": 0.6, "grad_norm": 7.12603089755049, "learning_rate": 3.6956420406682707e-06, "loss": 0.9175, "step": 20829 }, { "epoch": 0.6, "grad_norm": 7.782994178255157, "learning_rate": 3.695194346134132e-06, "loss": 0.6021, "step": 20830 }, { "epoch": 0.6, "grad_norm": 7.046924937678172, "learning_rate": 3.694746662825074e-06, "loss": 1.1122, "step": 20831 }, { "epoch": 0.6, "grad_norm": 5.214531707000095, "learning_rate": 3.69429899074495e-06, "loss": 0.5163, "step": 20832 }, { "epoch": 0.6, "grad_norm": 7.253744473807744, "learning_rate": 3.6938513298976125e-06, "loss": 1.0313, "step": 20833 }, { "epoch": 0.6, "grad_norm": 4.821694248690645, "learning_rate": 3.693403680286909e-06, "loss": 0.365, "step": 20834 }, { "epoch": 0.6, "grad_norm": 7.2020070497244735, "learning_rate": 3.692956041916696e-06, "loss": 0.5919, "step": 20835 }, { "epoch": 0.6, "grad_norm": 6.906603773903442, "learning_rate": 3.692508414790818e-06, "loss": 0.2039, "step": 20836 }, { "epoch": 0.6, "grad_norm": 4.3711930741794225, "learning_rate": 3.6920607989131312e-06, "loss": 0.5632, "step": 20837 }, { "epoch": 0.6, "grad_norm": 3.8796323344456334, "learning_rate": 3.691613194287483e-06, "loss": 0.1683, "step": 20838 }, { "epoch": 0.6, "grad_norm": 2.5542082147397913, "learning_rate": 3.6911656009177277e-06, "loss": 0.2696, "step": 20839 }, { "epoch": 0.6, "grad_norm": 3.1607593389999367, "learning_rate": 3.6907180188077103e-06, "loss": 0.2915, "step": 20840 }, { "epoch": 0.6, "grad_norm": 6.384801883775428, "learning_rate": 3.690270447961286e-06, "loss": 0.5901, "step": 20841 }, { "epoch": 0.6, "grad_norm": 3.803007957519383, "learning_rate": 3.6898228883823043e-06, "loss": 0.2567, "step": 20842 }, { "epoch": 0.6, "grad_norm": 7.953336986422066, "learning_rate": 3.6893753400746135e-06, "loss": 0.3016, "step": 20843 }, { "epoch": 0.6, "grad_norm": 13.849785791386275, "learning_rate": 3.688927803042067e-06, "loss": 0.7059, "step": 20844 }, { "epoch": 0.6, "grad_norm": 2.7388193469754674, "learning_rate": 3.6884802772885124e-06, "loss": 0.304, "step": 20845 }, { "epoch": 0.6, "grad_norm": 5.1427363086310685, "learning_rate": 3.6880327628178016e-06, "loss": 0.5887, "step": 20846 }, { "epoch": 0.6, "grad_norm": 5.634067644229796, "learning_rate": 3.6875852596337815e-06, "loss": 0.5996, "step": 20847 }, { "epoch": 0.6, "grad_norm": 7.471879215789716, "learning_rate": 3.6871377677403044e-06, "loss": 0.8393, "step": 20848 }, { "epoch": 0.6, "grad_norm": 5.6967374172954806, "learning_rate": 3.68669028714122e-06, "loss": 0.2752, "step": 20849 }, { "epoch": 0.6, "grad_norm": 4.23726729820268, "learning_rate": 3.6862428178403763e-06, "loss": 0.5746, "step": 20850 }, { "epoch": 0.6, "grad_norm": 11.343578663102148, "learning_rate": 3.6857953598416256e-06, "loss": 0.9856, "step": 20851 }, { "epoch": 0.6, "grad_norm": 9.602312920020962, "learning_rate": 3.6853479131488145e-06, "loss": 0.4912, "step": 20852 }, { "epoch": 0.6, "grad_norm": 5.936757252062937, "learning_rate": 3.684900477765795e-06, "loss": 0.3797, "step": 20853 }, { "epoch": 0.6, "grad_norm": 3.0434494716159324, "learning_rate": 3.6844530536964117e-06, "loss": 0.1766, "step": 20854 }, { "epoch": 0.6, "grad_norm": 4.364358084366035, "learning_rate": 3.6840056409445196e-06, "loss": 0.2819, "step": 20855 }, { "epoch": 0.6, "grad_norm": 7.164816946658154, "learning_rate": 3.6835582395139635e-06, "loss": 0.269, "step": 20856 }, { "epoch": 0.6, "grad_norm": 7.847702914868724, "learning_rate": 3.6831108494085932e-06, "loss": 0.4656, "step": 20857 }, { "epoch": 0.6, "grad_norm": 6.604654633056936, "learning_rate": 3.68266347063226e-06, "loss": 0.7537, "step": 20858 }, { "epoch": 0.6, "grad_norm": 2.8022048715848715, "learning_rate": 3.6822161031888093e-06, "loss": 0.2918, "step": 20859 }, { "epoch": 0.6, "grad_norm": 5.824826531835654, "learning_rate": 3.681768747082093e-06, "loss": 0.3043, "step": 20860 }, { "epoch": 0.6, "grad_norm": 6.017037619658439, "learning_rate": 3.6813214023159553e-06, "loss": 0.3685, "step": 20861 }, { "epoch": 0.6, "grad_norm": 5.2586227631551345, "learning_rate": 3.6808740688942497e-06, "loss": 0.2328, "step": 20862 }, { "epoch": 0.6, "grad_norm": 4.510086623076795, "learning_rate": 3.6804267468208204e-06, "loss": 0.2919, "step": 20863 }, { "epoch": 0.6, "grad_norm": 6.487170519942327, "learning_rate": 3.679979436099519e-06, "loss": 0.3282, "step": 20864 }, { "epoch": 0.6, "grad_norm": 3.7258920126360935, "learning_rate": 3.6795321367341906e-06, "loss": 0.2308, "step": 20865 }, { "epoch": 0.6, "grad_norm": 2.889989525304113, "learning_rate": 3.6790848487286835e-06, "loss": 0.33, "step": 20866 }, { "epoch": 0.6, "grad_norm": 10.007257593570147, "learning_rate": 3.6786375720868494e-06, "loss": 0.6846, "step": 20867 }, { "epoch": 0.6, "grad_norm": 5.124526094995318, "learning_rate": 3.678190306812532e-06, "loss": 0.8322, "step": 20868 }, { "epoch": 0.6, "grad_norm": 5.66676041114158, "learning_rate": 3.6777430529095815e-06, "loss": 0.348, "step": 20869 }, { "epoch": 0.6, "grad_norm": 6.719961970823403, "learning_rate": 3.6772958103818435e-06, "loss": 0.6407, "step": 20870 }, { "epoch": 0.6, "grad_norm": 4.189878499948398, "learning_rate": 3.676848579233168e-06, "loss": 0.3549, "step": 20871 }, { "epoch": 0.6, "grad_norm": 4.620877361632481, "learning_rate": 3.676401359467401e-06, "loss": 0.2782, "step": 20872 }, { "epoch": 0.6, "grad_norm": 2.4572010072610744, "learning_rate": 3.6759541510883886e-06, "loss": 0.2689, "step": 20873 }, { "epoch": 0.6, "grad_norm": 3.9352086984434735, "learning_rate": 3.6755069540999815e-06, "loss": 0.1321, "step": 20874 }, { "epoch": 0.6, "grad_norm": 9.803694157225529, "learning_rate": 3.675059768506024e-06, "loss": 0.3037, "step": 20875 }, { "epoch": 0.6, "grad_norm": 2.9542669480111963, "learning_rate": 3.674612594310366e-06, "loss": 0.2038, "step": 20876 }, { "epoch": 0.6, "grad_norm": 7.725091097890246, "learning_rate": 3.6741654315168497e-06, "loss": 0.4882, "step": 20877 }, { "epoch": 0.6, "grad_norm": 4.611794098799609, "learning_rate": 3.6737182801293283e-06, "loss": 0.6061, "step": 20878 }, { "epoch": 0.6, "grad_norm": 6.579346387106455, "learning_rate": 3.673271140151643e-06, "loss": 0.4418, "step": 20879 }, { "epoch": 0.6, "grad_norm": 10.845499738520447, "learning_rate": 3.6728240115876433e-06, "loss": 0.5364, "step": 20880 }, { "epoch": 0.6, "grad_norm": 6.825846625835057, "learning_rate": 3.672376894441174e-06, "loss": 1.146, "step": 20881 }, { "epoch": 0.6, "grad_norm": 5.4420986241660625, "learning_rate": 3.671929788716084e-06, "loss": 0.427, "step": 20882 }, { "epoch": 0.6, "grad_norm": 2.957031179450776, "learning_rate": 3.6714826944162197e-06, "loss": 0.3031, "step": 20883 }, { "epoch": 0.6, "grad_norm": 3.291548217783015, "learning_rate": 3.6710356115454238e-06, "loss": 0.498, "step": 20884 }, { "epoch": 0.6, "grad_norm": 3.2287872717105777, "learning_rate": 3.670588540107547e-06, "loss": 0.1356, "step": 20885 }, { "epoch": 0.6, "grad_norm": 7.614833855497789, "learning_rate": 3.6701414801064317e-06, "loss": 0.3545, "step": 20886 }, { "epoch": 0.6, "grad_norm": 3.326303225223675, "learning_rate": 3.669694431545927e-06, "loss": 0.2325, "step": 20887 }, { "epoch": 0.6, "grad_norm": 4.037523932135075, "learning_rate": 3.6692473944298757e-06, "loss": 0.2993, "step": 20888 }, { "epoch": 0.6, "grad_norm": 7.194756137407996, "learning_rate": 3.6688003687621255e-06, "loss": 0.6946, "step": 20889 }, { "epoch": 0.6, "grad_norm": 6.460730626624001, "learning_rate": 3.6683533545465233e-06, "loss": 0.7236, "step": 20890 }, { "epoch": 0.6, "grad_norm": 5.059165421516485, "learning_rate": 3.667906351786911e-06, "loss": 0.6536, "step": 20891 }, { "epoch": 0.6, "grad_norm": 8.43136984341948, "learning_rate": 3.667459360487139e-06, "loss": 0.6531, "step": 20892 }, { "epoch": 0.6, "grad_norm": 5.30683446480286, "learning_rate": 3.6670123806510484e-06, "loss": 0.3298, "step": 20893 }, { "epoch": 0.6, "grad_norm": 6.460259509233026, "learning_rate": 3.666565412282487e-06, "loss": 0.3359, "step": 20894 }, { "epoch": 0.6, "grad_norm": 5.534387334533077, "learning_rate": 3.666118455385298e-06, "loss": 0.502, "step": 20895 }, { "epoch": 0.6, "grad_norm": 9.246855691088403, "learning_rate": 3.6656715099633295e-06, "loss": 1.0439, "step": 20896 }, { "epoch": 0.6, "grad_norm": 3.487929459931433, "learning_rate": 3.6652245760204227e-06, "loss": 0.4853, "step": 20897 }, { "epoch": 0.6, "grad_norm": 4.061951321449648, "learning_rate": 3.664777653560425e-06, "loss": 0.3955, "step": 20898 }, { "epoch": 0.6, "grad_norm": 3.7141254745491827, "learning_rate": 3.664330742587182e-06, "loss": 0.5508, "step": 20899 }, { "epoch": 0.6, "grad_norm": 3.353066664843344, "learning_rate": 3.663883843104535e-06, "loss": 0.3675, "step": 20900 }, { "epoch": 0.6, "grad_norm": 6.008368933894504, "learning_rate": 3.663436955116333e-06, "loss": 0.4106, "step": 20901 }, { "epoch": 0.6, "grad_norm": 4.628927586502282, "learning_rate": 3.662990078626417e-06, "loss": 0.2596, "step": 20902 }, { "epoch": 0.6, "grad_norm": 6.784306834830363, "learning_rate": 3.662543213638634e-06, "loss": 0.6129, "step": 20903 }, { "epoch": 0.6, "grad_norm": 5.817312647774362, "learning_rate": 3.6620963601568247e-06, "loss": 0.5882, "step": 20904 }, { "epoch": 0.6, "grad_norm": 5.3104335356133, "learning_rate": 3.6616495181848383e-06, "loss": 0.251, "step": 20905 }, { "epoch": 0.6, "grad_norm": 5.470051158338029, "learning_rate": 3.661202687726515e-06, "loss": 0.5019, "step": 20906 }, { "epoch": 0.6, "grad_norm": 3.2259151372750634, "learning_rate": 3.660755868785699e-06, "loss": 0.2451, "step": 20907 }, { "epoch": 0.6, "grad_norm": 4.247976817287794, "learning_rate": 3.6603090613662372e-06, "loss": 0.3432, "step": 20908 }, { "epoch": 0.6, "grad_norm": 6.840950066522383, "learning_rate": 3.659862265471972e-06, "loss": 0.7383, "step": 20909 }, { "epoch": 0.6, "grad_norm": 4.273124502041654, "learning_rate": 3.6594154811067463e-06, "loss": 0.2707, "step": 20910 }, { "epoch": 0.6, "grad_norm": 5.415045490725445, "learning_rate": 3.658968708274402e-06, "loss": 0.3613, "step": 20911 }, { "epoch": 0.6, "grad_norm": 3.4235205678120018, "learning_rate": 3.6585219469787876e-06, "loss": 0.1569, "step": 20912 }, { "epoch": 0.6, "grad_norm": 4.223270932609462, "learning_rate": 3.658075197223743e-06, "loss": 0.4287, "step": 20913 }, { "epoch": 0.6, "grad_norm": 3.156978523099175, "learning_rate": 3.65762845901311e-06, "loss": 0.2997, "step": 20914 }, { "epoch": 0.6, "grad_norm": 5.557520791657097, "learning_rate": 3.657181732350737e-06, "loss": 0.6971, "step": 20915 }, { "epoch": 0.6, "grad_norm": 11.533438994267271, "learning_rate": 3.6567350172404624e-06, "loss": 0.7444, "step": 20916 }, { "epoch": 0.6, "grad_norm": 3.5743745696909164, "learning_rate": 3.6562883136861334e-06, "loss": 0.2803, "step": 20917 }, { "epoch": 0.6, "grad_norm": 5.965724877490062, "learning_rate": 3.655841621691587e-06, "loss": 0.4864, "step": 20918 }, { "epoch": 0.6, "grad_norm": 7.049476092736812, "learning_rate": 3.6553949412606726e-06, "loss": 0.819, "step": 20919 }, { "epoch": 0.6, "grad_norm": 4.508239382095708, "learning_rate": 3.6549482723972298e-06, "loss": 0.2616, "step": 20920 }, { "epoch": 0.6, "grad_norm": 5.315877211214684, "learning_rate": 3.654501615105101e-06, "loss": 0.2714, "step": 20921 }, { "epoch": 0.6, "grad_norm": 4.5155597668856124, "learning_rate": 3.6540549693881278e-06, "loss": 0.5256, "step": 20922 }, { "epoch": 0.6, "grad_norm": 4.653614552852939, "learning_rate": 3.6536083352501546e-06, "loss": 0.313, "step": 20923 }, { "epoch": 0.6, "grad_norm": 10.075089486910302, "learning_rate": 3.653161712695024e-06, "loss": 0.2735, "step": 20924 }, { "epoch": 0.6, "grad_norm": 4.527061289022498, "learning_rate": 3.652715101726577e-06, "loss": 0.403, "step": 20925 }, { "epoch": 0.6, "grad_norm": 4.246664758099936, "learning_rate": 3.652268502348657e-06, "loss": 0.504, "step": 20926 }, { "epoch": 0.6, "grad_norm": 3.027778564729112, "learning_rate": 3.6518219145651023e-06, "loss": 0.3003, "step": 20927 }, { "epoch": 0.6, "grad_norm": 25.19869102832557, "learning_rate": 3.65137533837976e-06, "loss": 0.6504, "step": 20928 }, { "epoch": 0.6, "grad_norm": 4.918226651920202, "learning_rate": 3.6509287737964695e-06, "loss": 0.4853, "step": 20929 }, { "epoch": 0.6, "grad_norm": 8.284912335997355, "learning_rate": 3.6504822208190727e-06, "loss": 0.9177, "step": 20930 }, { "epoch": 0.6, "grad_norm": 8.73013830733009, "learning_rate": 3.65003567945141e-06, "loss": 0.8113, "step": 20931 }, { "epoch": 0.6, "grad_norm": 6.197180371399889, "learning_rate": 3.6495891496973246e-06, "loss": 0.507, "step": 20932 }, { "epoch": 0.6, "grad_norm": 4.741549001126555, "learning_rate": 3.649142631560658e-06, "loss": 0.6041, "step": 20933 }, { "epoch": 0.6, "grad_norm": 3.6284660671897817, "learning_rate": 3.6486961250452503e-06, "loss": 0.203, "step": 20934 }, { "epoch": 0.6, "grad_norm": 4.393527934511653, "learning_rate": 3.6482496301549442e-06, "loss": 0.2899, "step": 20935 }, { "epoch": 0.6, "grad_norm": 4.140239769631298, "learning_rate": 3.6478031468935798e-06, "loss": 0.5274, "step": 20936 }, { "epoch": 0.6, "grad_norm": 4.358105409666513, "learning_rate": 3.647356675264999e-06, "loss": 0.2725, "step": 20937 }, { "epoch": 0.6, "grad_norm": 9.202501181944648, "learning_rate": 3.6469102152730407e-06, "loss": 0.5687, "step": 20938 }, { "epoch": 0.6, "grad_norm": 4.565550346208043, "learning_rate": 3.646463766921548e-06, "loss": 0.5077, "step": 20939 }, { "epoch": 0.6, "grad_norm": 4.824463180959726, "learning_rate": 3.646017330214362e-06, "loss": 0.4696, "step": 20940 }, { "epoch": 0.6, "grad_norm": 5.699630753787714, "learning_rate": 3.64557090515532e-06, "loss": 0.3503, "step": 20941 }, { "epoch": 0.6, "grad_norm": 8.22592673602822, "learning_rate": 3.6451244917482664e-06, "loss": 0.6768, "step": 20942 }, { "epoch": 0.6, "grad_norm": 10.485620052356715, "learning_rate": 3.6446780899970396e-06, "loss": 1.0212, "step": 20943 }, { "epoch": 0.6, "grad_norm": 4.3933227097962195, "learning_rate": 3.6442316999054813e-06, "loss": 0.303, "step": 20944 }, { "epoch": 0.6, "grad_norm": 5.115472450848088, "learning_rate": 3.643785321477428e-06, "loss": 0.2379, "step": 20945 }, { "epoch": 0.6, "grad_norm": 3.50949810624007, "learning_rate": 3.6433389547167254e-06, "loss": 0.4292, "step": 20946 }, { "epoch": 0.6, "grad_norm": 7.137326202681668, "learning_rate": 3.6428925996272095e-06, "loss": 0.7034, "step": 20947 }, { "epoch": 0.6, "grad_norm": 4.013632684595202, "learning_rate": 3.6424462562127206e-06, "loss": 0.5207, "step": 20948 }, { "epoch": 0.6, "grad_norm": 4.461053155311714, "learning_rate": 3.641999924477101e-06, "loss": 0.2427, "step": 20949 }, { "epoch": 0.6, "grad_norm": 6.171321023150259, "learning_rate": 3.6415536044241884e-06, "loss": 0.4805, "step": 20950 }, { "epoch": 0.6, "grad_norm": 5.252930368315977, "learning_rate": 3.641107296057823e-06, "loss": 0.5807, "step": 20951 }, { "epoch": 0.6, "grad_norm": 3.3652506344712942, "learning_rate": 3.640660999381843e-06, "loss": 0.5693, "step": 20952 }, { "epoch": 0.6, "grad_norm": 5.333754532791762, "learning_rate": 3.6402147144000923e-06, "loss": 0.5506, "step": 20953 }, { "epoch": 0.6, "grad_norm": 4.360604526405556, "learning_rate": 3.6397684411164036e-06, "loss": 0.4823, "step": 20954 }, { "epoch": 0.6, "grad_norm": 2.5358892477337305, "learning_rate": 3.6393221795346202e-06, "loss": 0.1524, "step": 20955 }, { "epoch": 0.6, "grad_norm": 6.287102245695321, "learning_rate": 3.638875929658582e-06, "loss": 0.682, "step": 20956 }, { "epoch": 0.6, "grad_norm": 3.8930746126489137, "learning_rate": 3.638429691492125e-06, "loss": 0.4748, "step": 20957 }, { "epoch": 0.6, "grad_norm": 5.0658514914689485, "learning_rate": 3.6379834650390904e-06, "loss": 0.7325, "step": 20958 }, { "epoch": 0.6, "grad_norm": 7.388983449434409, "learning_rate": 3.6375372503033163e-06, "loss": 0.4511, "step": 20959 }, { "epoch": 0.6, "grad_norm": 5.147290323404746, "learning_rate": 3.6370910472886423e-06, "loss": 0.4211, "step": 20960 }, { "epoch": 0.6, "grad_norm": 6.45398091327409, "learning_rate": 3.636644855998904e-06, "loss": 0.4439, "step": 20961 }, { "epoch": 0.6, "grad_norm": 3.4781654520073038, "learning_rate": 3.6361986764379443e-06, "loss": 0.3983, "step": 20962 }, { "epoch": 0.6, "grad_norm": 4.494160970006032, "learning_rate": 3.635752508609599e-06, "loss": 0.347, "step": 20963 }, { "epoch": 0.6, "grad_norm": 6.953410796388965, "learning_rate": 3.6353063525177048e-06, "loss": 0.6117, "step": 20964 }, { "epoch": 0.6, "grad_norm": 6.99649198368867, "learning_rate": 3.634860208166105e-06, "loss": 0.5294, "step": 20965 }, { "epoch": 0.6, "grad_norm": 3.605330492470565, "learning_rate": 3.6344140755586333e-06, "loss": 0.296, "step": 20966 }, { "epoch": 0.6, "grad_norm": 4.877079397941651, "learning_rate": 3.6339679546991303e-06, "loss": 0.5983, "step": 20967 }, { "epoch": 0.6, "grad_norm": 5.298910194489767, "learning_rate": 3.6335218455914306e-06, "loss": 0.4281, "step": 20968 }, { "epoch": 0.6, "grad_norm": 5.4916977209545506, "learning_rate": 3.6330757482393763e-06, "loss": 0.4999, "step": 20969 }, { "epoch": 0.6, "grad_norm": 5.405042921490476, "learning_rate": 3.632629662646803e-06, "loss": 0.4345, "step": 20970 }, { "epoch": 0.6, "grad_norm": 4.853688190547051, "learning_rate": 3.6321835888175476e-06, "loss": 0.4045, "step": 20971 }, { "epoch": 0.6, "grad_norm": 7.678029965164605, "learning_rate": 3.6317375267554478e-06, "loss": 0.6106, "step": 20972 }, { "epoch": 0.6, "grad_norm": 13.412404032823394, "learning_rate": 3.6312914764643416e-06, "loss": 0.7503, "step": 20973 }, { "epoch": 0.6, "grad_norm": 6.250558599304923, "learning_rate": 3.630845437948068e-06, "loss": 0.6552, "step": 20974 }, { "epoch": 0.6, "grad_norm": 9.923589411122222, "learning_rate": 3.6303994112104602e-06, "loss": 0.4367, "step": 20975 }, { "epoch": 0.6, "grad_norm": 3.5993832099557284, "learning_rate": 3.6299533962553596e-06, "loss": 0.1617, "step": 20976 }, { "epoch": 0.6, "grad_norm": 4.836742032228732, "learning_rate": 3.6295073930866004e-06, "loss": 0.1936, "step": 20977 }, { "epoch": 0.6, "grad_norm": 5.141237958638461, "learning_rate": 3.629061401708022e-06, "loss": 0.5472, "step": 20978 }, { "epoch": 0.6, "grad_norm": 6.73800695773247, "learning_rate": 3.628615422123456e-06, "loss": 0.6152, "step": 20979 }, { "epoch": 0.6, "grad_norm": 8.41713486366958, "learning_rate": 3.6281694543367445e-06, "loss": 1.0546, "step": 20980 }, { "epoch": 0.6, "grad_norm": 10.101614330185333, "learning_rate": 3.6277234983517236e-06, "loss": 0.8271, "step": 20981 }, { "epoch": 0.6, "grad_norm": 7.174193296373841, "learning_rate": 3.627277554172226e-06, "loss": 0.6395, "step": 20982 }, { "epoch": 0.6, "grad_norm": 6.564154479960761, "learning_rate": 3.6268316218020936e-06, "loss": 0.4299, "step": 20983 }, { "epoch": 0.6, "grad_norm": 9.438408410013258, "learning_rate": 3.6263857012451574e-06, "loss": 0.4151, "step": 20984 }, { "epoch": 0.6, "grad_norm": 7.117481950014124, "learning_rate": 3.6259397925052575e-06, "loss": 0.209, "step": 20985 }, { "epoch": 0.6, "grad_norm": 3.4365644395598514, "learning_rate": 3.6254938955862275e-06, "loss": 0.5994, "step": 20986 }, { "epoch": 0.6, "grad_norm": 7.240840848389876, "learning_rate": 3.625048010491905e-06, "loss": 0.4172, "step": 20987 }, { "epoch": 0.6, "grad_norm": 8.632027430675501, "learning_rate": 3.624602137226123e-06, "loss": 0.6012, "step": 20988 }, { "epoch": 0.6, "grad_norm": 2.3699745289323118, "learning_rate": 3.6241562757927206e-06, "loss": 0.2936, "step": 20989 }, { "epoch": 0.6, "grad_norm": 5.367794174711153, "learning_rate": 3.623710426195534e-06, "loss": 0.9305, "step": 20990 }, { "epoch": 0.6, "grad_norm": 4.852562105704117, "learning_rate": 3.6232645884383937e-06, "loss": 0.3241, "step": 20991 }, { "epoch": 0.6, "grad_norm": 12.638151362958384, "learning_rate": 3.622818762525142e-06, "loss": 0.9505, "step": 20992 }, { "epoch": 0.6, "grad_norm": 4.517422671350228, "learning_rate": 3.6223729484596093e-06, "loss": 0.3251, "step": 20993 }, { "epoch": 0.6, "grad_norm": 4.888406414851769, "learning_rate": 3.621927146245634e-06, "loss": 0.4632, "step": 20994 }, { "epoch": 0.6, "grad_norm": 6.886562681025447, "learning_rate": 3.621481355887047e-06, "loss": 0.946, "step": 20995 }, { "epoch": 0.6, "grad_norm": 6.282189901450428, "learning_rate": 3.6210355773876877e-06, "loss": 0.5457, "step": 20996 }, { "epoch": 0.6, "grad_norm": 7.2367477315927555, "learning_rate": 3.6205898107513904e-06, "loss": 0.3623, "step": 20997 }, { "epoch": 0.6, "grad_norm": 6.3228171234423165, "learning_rate": 3.6201440559819874e-06, "loss": 0.3746, "step": 20998 }, { "epoch": 0.6, "grad_norm": 5.84386623491364, "learning_rate": 3.6196983130833167e-06, "loss": 0.4832, "step": 20999 }, { "epoch": 0.6, "grad_norm": 7.169599783034556, "learning_rate": 3.6192525820592105e-06, "loss": 0.7535, "step": 21000 }, { "epoch": 0.6, "grad_norm": 11.285768613115446, "learning_rate": 3.6188068629135065e-06, "loss": 1.0657, "step": 21001 }, { "epoch": 0.6, "grad_norm": 2.946770193475299, "learning_rate": 3.6183611556500336e-06, "loss": 0.2116, "step": 21002 }, { "epoch": 0.6, "grad_norm": 5.38237456158275, "learning_rate": 3.617915460272632e-06, "loss": 0.4043, "step": 21003 }, { "epoch": 0.6, "grad_norm": 3.728043571791587, "learning_rate": 3.617469776785133e-06, "loss": 0.2922, "step": 21004 }, { "epoch": 0.6, "grad_norm": 3.799947900163971, "learning_rate": 3.6170241051913697e-06, "loss": 0.4594, "step": 21005 }, { "epoch": 0.6, "grad_norm": 3.9706756545813318, "learning_rate": 3.6165784454951796e-06, "loss": 0.5825, "step": 21006 }, { "epoch": 0.6, "grad_norm": 5.793830956610495, "learning_rate": 3.6161327977003944e-06, "loss": 0.3708, "step": 21007 }, { "epoch": 0.6, "grad_norm": 4.3371864513333005, "learning_rate": 3.6156871618108493e-06, "loss": 0.5863, "step": 21008 }, { "epoch": 0.6, "grad_norm": 7.063237852189603, "learning_rate": 3.6152415378303747e-06, "loss": 0.4846, "step": 21009 }, { "epoch": 0.6, "grad_norm": 3.58897527207566, "learning_rate": 3.6147959257628095e-06, "loss": 0.3124, "step": 21010 }, { "epoch": 0.6, "grad_norm": 6.827126541288107, "learning_rate": 3.6143503256119815e-06, "loss": 0.4916, "step": 21011 }, { "epoch": 0.6, "grad_norm": 3.7864948185243397, "learning_rate": 3.6139047373817298e-06, "loss": 0.5895, "step": 21012 }, { "epoch": 0.6, "grad_norm": 4.475270749005164, "learning_rate": 3.6134591610758833e-06, "loss": 0.5732, "step": 21013 }, { "epoch": 0.6, "grad_norm": 6.049183565017985, "learning_rate": 3.613013596698276e-06, "loss": 0.5256, "step": 21014 }, { "epoch": 0.6, "grad_norm": 13.09986155778953, "learning_rate": 3.6125680442527445e-06, "loss": 0.6947, "step": 21015 }, { "epoch": 0.6, "grad_norm": 4.4707864242799475, "learning_rate": 3.6121225037431174e-06, "loss": 0.2085, "step": 21016 }, { "epoch": 0.6, "grad_norm": 10.996208751341353, "learning_rate": 3.611676975173231e-06, "loss": 1.038, "step": 21017 }, { "epoch": 0.6, "grad_norm": 4.3877524485328845, "learning_rate": 3.6112314585469146e-06, "loss": 0.3795, "step": 21018 }, { "epoch": 0.6, "grad_norm": 4.416381280903705, "learning_rate": 3.610785953868005e-06, "loss": 0.4676, "step": 21019 }, { "epoch": 0.6, "grad_norm": 5.627610173208223, "learning_rate": 3.610340461140332e-06, "loss": 0.5106, "step": 21020 }, { "epoch": 0.6, "grad_norm": 8.288707182636564, "learning_rate": 3.6098949803677274e-06, "loss": 0.4843, "step": 21021 }, { "epoch": 0.6, "grad_norm": 10.28583728058256, "learning_rate": 3.609449511554027e-06, "loss": 0.6511, "step": 21022 }, { "epoch": 0.6, "grad_norm": 10.142109860652127, "learning_rate": 3.6090040547030612e-06, "loss": 0.7368, "step": 21023 }, { "epoch": 0.6, "grad_norm": 4.063626998687312, "learning_rate": 3.608558609818663e-06, "loss": 0.248, "step": 21024 }, { "epoch": 0.6, "grad_norm": 5.68880908698165, "learning_rate": 3.6081131769046614e-06, "loss": 0.5198, "step": 21025 }, { "epoch": 0.6, "grad_norm": 7.109100940158901, "learning_rate": 3.6076677559648927e-06, "loss": 0.2986, "step": 21026 }, { "epoch": 0.6, "grad_norm": 5.519593947824577, "learning_rate": 3.607222347003186e-06, "loss": 0.358, "step": 21027 }, { "epoch": 0.6, "grad_norm": 4.30134300284374, "learning_rate": 3.606776950023375e-06, "loss": 0.5062, "step": 21028 }, { "epoch": 0.6, "grad_norm": 4.874915929216118, "learning_rate": 3.6063315650292884e-06, "loss": 0.5496, "step": 21029 }, { "epoch": 0.6, "grad_norm": 7.715484607975112, "learning_rate": 3.60588619202476e-06, "loss": 0.6184, "step": 21030 }, { "epoch": 0.6, "grad_norm": 6.241624560891071, "learning_rate": 3.6054408310136235e-06, "loss": 0.7704, "step": 21031 }, { "epoch": 0.6, "grad_norm": 3.023406390835896, "learning_rate": 3.6049954819997045e-06, "loss": 0.2137, "step": 21032 }, { "epoch": 0.6, "grad_norm": 5.600985977532614, "learning_rate": 3.60455014498684e-06, "loss": 0.4594, "step": 21033 }, { "epoch": 0.6, "grad_norm": 4.366503039174763, "learning_rate": 3.6041048199788585e-06, "loss": 0.6933, "step": 21034 }, { "epoch": 0.6, "grad_norm": 6.242559123525028, "learning_rate": 3.603659506979592e-06, "loss": 0.436, "step": 21035 }, { "epoch": 0.6, "grad_norm": 3.1552934046792105, "learning_rate": 3.603214205992868e-06, "loss": 0.2603, "step": 21036 }, { "epoch": 0.6, "grad_norm": 4.65108292900598, "learning_rate": 3.602768917022522e-06, "loss": 0.4651, "step": 21037 }, { "epoch": 0.6, "grad_norm": 3.4872603850273296, "learning_rate": 3.6023236400723836e-06, "loss": 0.2482, "step": 21038 }, { "epoch": 0.6, "grad_norm": 8.148634647606814, "learning_rate": 3.6018783751462815e-06, "loss": 0.6347, "step": 21039 }, { "epoch": 0.6, "grad_norm": 7.673974676327533, "learning_rate": 3.6014331222480502e-06, "loss": 0.96, "step": 21040 }, { "epoch": 0.6, "grad_norm": 3.218882956119867, "learning_rate": 3.6009878813815145e-06, "loss": 0.2709, "step": 21041 }, { "epoch": 0.6, "grad_norm": 4.218909112261002, "learning_rate": 3.60054265255051e-06, "loss": 0.3108, "step": 21042 }, { "epoch": 0.6, "grad_norm": 5.559496593807428, "learning_rate": 3.6000974357588638e-06, "loss": 0.5099, "step": 21043 }, { "epoch": 0.6, "grad_norm": 5.088811153380812, "learning_rate": 3.5996522310104076e-06, "loss": 0.6684, "step": 21044 }, { "epoch": 0.6, "grad_norm": 6.852441085456399, "learning_rate": 3.599207038308969e-06, "loss": 0.3873, "step": 21045 }, { "epoch": 0.6, "grad_norm": 5.059550473973725, "learning_rate": 3.5987618576583807e-06, "loss": 0.5255, "step": 21046 }, { "epoch": 0.6, "grad_norm": 8.372155176068782, "learning_rate": 3.5983166890624727e-06, "loss": 0.7203, "step": 21047 }, { "epoch": 0.6, "grad_norm": 3.6442902087672935, "learning_rate": 3.5978715325250715e-06, "loss": 0.2476, "step": 21048 }, { "epoch": 0.6, "grad_norm": 6.430236385087964, "learning_rate": 3.5974263880500114e-06, "loss": 0.6264, "step": 21049 }, { "epoch": 0.6, "grad_norm": 6.903659524595848, "learning_rate": 3.596981255641118e-06, "loss": 0.3123, "step": 21050 }, { "epoch": 0.6, "grad_norm": 6.087433421810708, "learning_rate": 3.596536135302223e-06, "loss": 0.6463, "step": 21051 }, { "epoch": 0.6, "grad_norm": 3.210991258008777, "learning_rate": 3.596091027037153e-06, "loss": 0.1165, "step": 21052 }, { "epoch": 0.6, "grad_norm": 5.891229148259646, "learning_rate": 3.595645930849741e-06, "loss": 0.4418, "step": 21053 }, { "epoch": 0.6, "grad_norm": 4.4978261306270975, "learning_rate": 3.5952008467438133e-06, "loss": 0.4613, "step": 21054 }, { "epoch": 0.6, "grad_norm": 5.6778042910023805, "learning_rate": 3.5947557747231986e-06, "loss": 0.3975, "step": 21055 }, { "epoch": 0.6, "grad_norm": 2.4648773142710287, "learning_rate": 3.594310714791729e-06, "loss": 0.1931, "step": 21056 }, { "epoch": 0.6, "grad_norm": 5.881433900396352, "learning_rate": 3.5938656669532303e-06, "loss": 0.658, "step": 21057 }, { "epoch": 0.6, "grad_norm": 17.19585845233083, "learning_rate": 3.5934206312115327e-06, "loss": 0.8151, "step": 21058 }, { "epoch": 0.6, "grad_norm": 7.187516121224278, "learning_rate": 3.5929756075704626e-06, "loss": 0.536, "step": 21059 }, { "epoch": 0.6, "grad_norm": 5.862441377000984, "learning_rate": 3.592530596033852e-06, "loss": 0.6213, "step": 21060 }, { "epoch": 0.6, "grad_norm": 2.0080214439948283, "learning_rate": 3.5920855966055266e-06, "loss": 0.2201, "step": 21061 }, { "epoch": 0.6, "grad_norm": 3.5718599801402715, "learning_rate": 3.591640609289314e-06, "loss": 0.1439, "step": 21062 }, { "epoch": 0.6, "grad_norm": 3.9900618117666733, "learning_rate": 3.5911956340890465e-06, "loss": 0.7722, "step": 21063 }, { "epoch": 0.6, "grad_norm": 13.368956225330741, "learning_rate": 3.590750671008548e-06, "loss": 0.9077, "step": 21064 }, { "epoch": 0.6, "grad_norm": 4.373672038580278, "learning_rate": 3.5903057200516493e-06, "loss": 0.6612, "step": 21065 }, { "epoch": 0.6, "grad_norm": 3.2808312285086036, "learning_rate": 3.589860781222175e-06, "loss": 0.2547, "step": 21066 }, { "epoch": 0.6, "grad_norm": 3.3273654482927344, "learning_rate": 3.5894158545239577e-06, "loss": 0.3617, "step": 21067 }, { "epoch": 0.6, "grad_norm": 6.011118957461918, "learning_rate": 3.5889709399608187e-06, "loss": 0.4125, "step": 21068 }, { "epoch": 0.6, "grad_norm": 6.4622579975674155, "learning_rate": 3.5885260375365915e-06, "loss": 0.6681, "step": 21069 }, { "epoch": 0.6, "grad_norm": 4.867426760748999, "learning_rate": 3.5880811472550995e-06, "loss": 0.5869, "step": 21070 }, { "epoch": 0.6, "grad_norm": 8.96618527948391, "learning_rate": 3.587636269120171e-06, "loss": 0.2798, "step": 21071 }, { "epoch": 0.6, "grad_norm": 6.868469622011994, "learning_rate": 3.587191403135636e-06, "loss": 0.3652, "step": 21072 }, { "epoch": 0.6, "grad_norm": 5.146517081401055, "learning_rate": 3.5867465493053178e-06, "loss": 0.3603, "step": 21073 }, { "epoch": 0.6, "grad_norm": 4.9521260261968845, "learning_rate": 3.5863017076330464e-06, "loss": 0.509, "step": 21074 }, { "epoch": 0.6, "grad_norm": 7.119226090450641, "learning_rate": 3.5858568781226448e-06, "loss": 0.7253, "step": 21075 }, { "epoch": 0.6, "grad_norm": 9.347335484728388, "learning_rate": 3.585412060777945e-06, "loss": 0.9709, "step": 21076 }, { "epoch": 0.6, "grad_norm": 5.817582072427822, "learning_rate": 3.5849672556027693e-06, "loss": 0.5328, "step": 21077 }, { "epoch": 0.6, "grad_norm": 5.520321259652256, "learning_rate": 3.5845224626009454e-06, "loss": 0.5015, "step": 21078 }, { "epoch": 0.6, "grad_norm": 5.538115168860762, "learning_rate": 3.5840776817763035e-06, "loss": 0.3499, "step": 21079 }, { "epoch": 0.6, "grad_norm": 4.563752511952904, "learning_rate": 3.5836329131326654e-06, "loss": 0.6862, "step": 21080 }, { "epoch": 0.6, "grad_norm": 4.432168148245702, "learning_rate": 3.5831881566738593e-06, "loss": 0.3349, "step": 21081 }, { "epoch": 0.6, "grad_norm": 3.88435010955766, "learning_rate": 3.58274341240371e-06, "loss": 0.1914, "step": 21082 }, { "epoch": 0.6, "grad_norm": 4.852039970945615, "learning_rate": 3.5822986803260453e-06, "loss": 0.5075, "step": 21083 }, { "epoch": 0.6, "grad_norm": 3.9754084953003157, "learning_rate": 3.58185396044469e-06, "loss": 0.2714, "step": 21084 }, { "epoch": 0.6, "grad_norm": 3.428919730134977, "learning_rate": 3.5814092527634725e-06, "loss": 0.3844, "step": 21085 }, { "epoch": 0.6, "grad_norm": 3.0011730483555663, "learning_rate": 3.5809645572862135e-06, "loss": 0.3263, "step": 21086 }, { "epoch": 0.6, "grad_norm": 2.746539224926842, "learning_rate": 3.580519874016743e-06, "loss": 0.2982, "step": 21087 }, { "epoch": 0.6, "grad_norm": 4.9032685763925645, "learning_rate": 3.580075202958886e-06, "loss": 0.4997, "step": 21088 }, { "epoch": 0.6, "grad_norm": 3.936363692036352, "learning_rate": 3.579630544116466e-06, "loss": 0.2217, "step": 21089 }, { "epoch": 0.6, "grad_norm": 8.18513792486853, "learning_rate": 3.5791858974933102e-06, "loss": 0.4792, "step": 21090 }, { "epoch": 0.6, "grad_norm": 6.449344885624846, "learning_rate": 3.5787412630932435e-06, "loss": 0.3472, "step": 21091 }, { "epoch": 0.6, "grad_norm": 7.551930757932531, "learning_rate": 3.5782966409200914e-06, "loss": 0.5461, "step": 21092 }, { "epoch": 0.6, "grad_norm": 4.968142034415295, "learning_rate": 3.5778520309776754e-06, "loss": 0.3934, "step": 21093 }, { "epoch": 0.6, "grad_norm": 2.7191701323260817, "learning_rate": 3.577407433269826e-06, "loss": 0.2149, "step": 21094 }, { "epoch": 0.6, "grad_norm": 4.72470843308658, "learning_rate": 3.5769628478003646e-06, "loss": 0.5577, "step": 21095 }, { "epoch": 0.6, "grad_norm": 5.019760375971918, "learning_rate": 3.576518274573115e-06, "loss": 0.6357, "step": 21096 }, { "epoch": 0.6, "grad_norm": 10.12777325832424, "learning_rate": 3.5760737135919064e-06, "loss": 0.4916, "step": 21097 }, { "epoch": 0.6, "grad_norm": 8.036016289319, "learning_rate": 3.575629164860558e-06, "loss": 0.7934, "step": 21098 }, { "epoch": 0.6, "grad_norm": 3.6259920965961774, "learning_rate": 3.5751846283828985e-06, "loss": 0.1454, "step": 21099 }, { "epoch": 0.6, "grad_norm": 3.1900251895185048, "learning_rate": 3.574740104162748e-06, "loss": 0.3971, "step": 21100 }, { "epoch": 0.6, "grad_norm": 3.3059706845769377, "learning_rate": 3.574295592203936e-06, "loss": 0.3117, "step": 21101 }, { "epoch": 0.6, "grad_norm": 5.271728012487023, "learning_rate": 3.57385109251028e-06, "loss": 0.2474, "step": 21102 }, { "epoch": 0.6, "grad_norm": 12.642999791409709, "learning_rate": 3.573406605085608e-06, "loss": 0.448, "step": 21103 }, { "epoch": 0.6, "grad_norm": 4.94129052309365, "learning_rate": 3.572962129933745e-06, "loss": 0.5711, "step": 21104 }, { "epoch": 0.6, "grad_norm": 3.18608617281353, "learning_rate": 3.5725176670585117e-06, "loss": 0.323, "step": 21105 }, { "epoch": 0.6, "grad_norm": 4.404591782670972, "learning_rate": 3.572073216463734e-06, "loss": 0.3286, "step": 21106 }, { "epoch": 0.6, "grad_norm": 5.503020064281145, "learning_rate": 3.5716287781532344e-06, "loss": 0.2671, "step": 21107 }, { "epoch": 0.6, "grad_norm": 5.393070694458458, "learning_rate": 3.5711843521308376e-06, "loss": 0.331, "step": 21108 }, { "epoch": 0.6, "grad_norm": 5.746045784714404, "learning_rate": 3.5707399384003635e-06, "loss": 0.7396, "step": 21109 }, { "epoch": 0.6, "grad_norm": 2.739162765730713, "learning_rate": 3.5702955369656394e-06, "loss": 0.269, "step": 21110 }, { "epoch": 0.6, "grad_norm": 3.244033508553404, "learning_rate": 3.5698511478304864e-06, "loss": 0.3291, "step": 21111 }, { "epoch": 0.6, "grad_norm": 7.9124796754114985, "learning_rate": 3.5694067709987267e-06, "loss": 0.7828, "step": 21112 }, { "epoch": 0.6, "grad_norm": 3.7499072699207856, "learning_rate": 3.5689624064741867e-06, "loss": 0.3107, "step": 21113 }, { "epoch": 0.6, "grad_norm": 6.689475275824412, "learning_rate": 3.5685180542606857e-06, "loss": 0.4037, "step": 21114 }, { "epoch": 0.6, "grad_norm": 4.564808744746475, "learning_rate": 3.568073714362048e-06, "loss": 0.5283, "step": 21115 }, { "epoch": 0.6, "grad_norm": 5.492199502988806, "learning_rate": 3.5676293867820943e-06, "loss": 0.3362, "step": 21116 }, { "epoch": 0.6, "grad_norm": 3.521307866247668, "learning_rate": 3.5671850715246514e-06, "loss": 0.2012, "step": 21117 }, { "epoch": 0.6, "grad_norm": 12.365997584940406, "learning_rate": 3.566740768593537e-06, "loss": 0.8379, "step": 21118 }, { "epoch": 0.6, "grad_norm": 8.901094142222682, "learning_rate": 3.566296477992577e-06, "loss": 0.9089, "step": 21119 }, { "epoch": 0.6, "grad_norm": 5.651216406398559, "learning_rate": 3.565852199725589e-06, "loss": 0.6298, "step": 21120 }, { "epoch": 0.6, "grad_norm": 5.980779618568655, "learning_rate": 3.565407933796399e-06, "loss": 0.6149, "step": 21121 }, { "epoch": 0.6, "grad_norm": 4.905793770151655, "learning_rate": 3.5649636802088293e-06, "loss": 0.5262, "step": 21122 }, { "epoch": 0.6, "grad_norm": 4.642751392795041, "learning_rate": 3.5645194389666983e-06, "loss": 0.4053, "step": 21123 }, { "epoch": 0.6, "grad_norm": 1.399193841572988, "learning_rate": 3.564075210073832e-06, "loss": 0.1454, "step": 21124 }, { "epoch": 0.6, "grad_norm": 3.939252841682859, "learning_rate": 3.5636309935340485e-06, "loss": 0.3235, "step": 21125 }, { "epoch": 0.61, "grad_norm": 9.985055580882268, "learning_rate": 3.5631867893511725e-06, "loss": 0.6089, "step": 21126 }, { "epoch": 0.61, "grad_norm": 8.074803036392781, "learning_rate": 3.562742597529022e-06, "loss": 0.5813, "step": 21127 }, { "epoch": 0.61, "grad_norm": 4.675994638722187, "learning_rate": 3.5622984180714194e-06, "loss": 0.4699, "step": 21128 }, { "epoch": 0.61, "grad_norm": 6.101543275067745, "learning_rate": 3.561854250982188e-06, "loss": 0.6112, "step": 21129 }, { "epoch": 0.61, "grad_norm": 6.7629994854879625, "learning_rate": 3.5614100962651464e-06, "loss": 0.6137, "step": 21130 }, { "epoch": 0.61, "grad_norm": 7.376424457896287, "learning_rate": 3.560965953924118e-06, "loss": 0.9804, "step": 21131 }, { "epoch": 0.61, "grad_norm": 5.945350115502925, "learning_rate": 3.5605218239629203e-06, "loss": 0.4621, "step": 21132 }, { "epoch": 0.61, "grad_norm": 5.708772554229826, "learning_rate": 3.5600777063853774e-06, "loss": 0.5263, "step": 21133 }, { "epoch": 0.61, "grad_norm": 7.261069233345595, "learning_rate": 3.559633601195308e-06, "loss": 0.5974, "step": 21134 }, { "epoch": 0.61, "grad_norm": 7.08512710597263, "learning_rate": 3.5591895083965357e-06, "loss": 0.3219, "step": 21135 }, { "epoch": 0.61, "grad_norm": 5.8711058920444765, "learning_rate": 3.5587454279928747e-06, "loss": 0.5383, "step": 21136 }, { "epoch": 0.61, "grad_norm": 4.852689258986301, "learning_rate": 3.558301359988151e-06, "loss": 0.5066, "step": 21137 }, { "epoch": 0.61, "grad_norm": 5.982597166675931, "learning_rate": 3.557857304386185e-06, "loss": 0.7711, "step": 21138 }, { "epoch": 0.61, "grad_norm": 7.097135466982467, "learning_rate": 3.5574132611907926e-06, "loss": 0.9036, "step": 21139 }, { "epoch": 0.61, "grad_norm": 8.706505740340006, "learning_rate": 3.5569692304057978e-06, "loss": 0.8041, "step": 21140 }, { "epoch": 0.61, "grad_norm": 6.131294247217758, "learning_rate": 3.5565252120350186e-06, "loss": 0.4374, "step": 21141 }, { "epoch": 0.61, "grad_norm": 4.723291901752438, "learning_rate": 3.5560812060822756e-06, "loss": 0.4918, "step": 21142 }, { "epoch": 0.61, "grad_norm": 3.715376822715117, "learning_rate": 3.5556372125513873e-06, "loss": 0.6437, "step": 21143 }, { "epoch": 0.61, "grad_norm": 10.03085773212047, "learning_rate": 3.5551932314461746e-06, "loss": 0.5968, "step": 21144 }, { "epoch": 0.61, "grad_norm": 5.974730446453286, "learning_rate": 3.554749262770457e-06, "loss": 0.3659, "step": 21145 }, { "epoch": 0.61, "grad_norm": 4.685270771193683, "learning_rate": 3.5543053065280526e-06, "loss": 0.6239, "step": 21146 }, { "epoch": 0.61, "grad_norm": 10.109876475801128, "learning_rate": 3.5538613627227836e-06, "loss": 0.6435, "step": 21147 }, { "epoch": 0.61, "grad_norm": 2.5597264236063384, "learning_rate": 3.5534174313584656e-06, "loss": 0.1638, "step": 21148 }, { "epoch": 0.61, "grad_norm": 3.0304061855654343, "learning_rate": 3.5529735124389208e-06, "loss": 0.0915, "step": 21149 }, { "epoch": 0.61, "grad_norm": 4.71166221143235, "learning_rate": 3.5525296059679643e-06, "loss": 0.227, "step": 21150 }, { "epoch": 0.61, "grad_norm": 4.5056097112770095, "learning_rate": 3.55208571194942e-06, "loss": 0.4736, "step": 21151 }, { "epoch": 0.61, "grad_norm": 2.894190100206941, "learning_rate": 3.5516418303871024e-06, "loss": 0.3348, "step": 21152 }, { "epoch": 0.61, "grad_norm": 5.520788115790068, "learning_rate": 3.5511979612848314e-06, "loss": 0.4126, "step": 21153 }, { "epoch": 0.61, "grad_norm": 6.6485065676265185, "learning_rate": 3.550754104646427e-06, "loss": 0.7233, "step": 21154 }, { "epoch": 0.61, "grad_norm": 5.326690077883407, "learning_rate": 3.550310260475707e-06, "loss": 0.5466, "step": 21155 }, { "epoch": 0.61, "grad_norm": 6.854560239752698, "learning_rate": 3.5498664287764894e-06, "loss": 0.2582, "step": 21156 }, { "epoch": 0.61, "grad_norm": 7.19692564203994, "learning_rate": 3.549422609552591e-06, "loss": 0.606, "step": 21157 }, { "epoch": 0.61, "grad_norm": 3.7706639618886375, "learning_rate": 3.5489788028078333e-06, "loss": 0.3027, "step": 21158 }, { "epoch": 0.61, "grad_norm": 6.143421400252743, "learning_rate": 3.5485350085460296e-06, "loss": 0.383, "step": 21159 }, { "epoch": 0.61, "grad_norm": 3.939538912200769, "learning_rate": 3.5480912267710025e-06, "loss": 0.3925, "step": 21160 }, { "epoch": 0.61, "grad_norm": 8.971211245228398, "learning_rate": 3.5476474574865673e-06, "loss": 0.1912, "step": 21161 }, { "epoch": 0.61, "grad_norm": 7.143181608187547, "learning_rate": 3.547203700696541e-06, "loss": 0.5592, "step": 21162 }, { "epoch": 0.61, "grad_norm": 4.665966980570001, "learning_rate": 3.5467599564047438e-06, "loss": 0.3317, "step": 21163 }, { "epoch": 0.61, "grad_norm": 6.038723915152729, "learning_rate": 3.546316224614992e-06, "loss": 0.7954, "step": 21164 }, { "epoch": 0.61, "grad_norm": 6.4838353679354945, "learning_rate": 3.5458725053311025e-06, "loss": 0.6567, "step": 21165 }, { "epoch": 0.61, "grad_norm": 4.209099617993438, "learning_rate": 3.545428798556891e-06, "loss": 0.405, "step": 21166 }, { "epoch": 0.61, "grad_norm": 6.12400953887107, "learning_rate": 3.544985104296179e-06, "loss": 0.3305, "step": 21167 }, { "epoch": 0.61, "grad_norm": 6.708991660123865, "learning_rate": 3.5445414225527803e-06, "loss": 0.6486, "step": 21168 }, { "epoch": 0.61, "grad_norm": 4.437658065344764, "learning_rate": 3.544097753330511e-06, "loss": 0.3744, "step": 21169 }, { "epoch": 0.61, "grad_norm": 8.120669487333902, "learning_rate": 3.543654096633192e-06, "loss": 0.7085, "step": 21170 }, { "epoch": 0.61, "grad_norm": 5.187632915218802, "learning_rate": 3.543210452464636e-06, "loss": 0.6801, "step": 21171 }, { "epoch": 0.61, "grad_norm": 4.9349640417963005, "learning_rate": 3.5427668208286613e-06, "loss": 0.4435, "step": 21172 }, { "epoch": 0.61, "grad_norm": 6.473487961719975, "learning_rate": 3.5423232017290833e-06, "loss": 0.4419, "step": 21173 }, { "epoch": 0.61, "grad_norm": 4.841872066577176, "learning_rate": 3.5418795951697214e-06, "loss": 0.4947, "step": 21174 }, { "epoch": 0.61, "grad_norm": 4.5974588538149375, "learning_rate": 3.541436001154388e-06, "loss": 0.3661, "step": 21175 }, { "epoch": 0.61, "grad_norm": 4.755265680185277, "learning_rate": 3.540992419686903e-06, "loss": 0.2643, "step": 21176 }, { "epoch": 0.61, "grad_norm": 3.5957839848980355, "learning_rate": 3.5405488507710784e-06, "loss": 0.3729, "step": 21177 }, { "epoch": 0.61, "grad_norm": 4.491103331721499, "learning_rate": 3.5401052944107328e-06, "loss": 0.1786, "step": 21178 }, { "epoch": 0.61, "grad_norm": 6.859311044868421, "learning_rate": 3.539661750609683e-06, "loss": 0.4514, "step": 21179 }, { "epoch": 0.61, "grad_norm": 7.375359639596228, "learning_rate": 3.5392182193717412e-06, "loss": 0.7574, "step": 21180 }, { "epoch": 0.61, "grad_norm": 6.045977286796798, "learning_rate": 3.538774700700727e-06, "loss": 0.2859, "step": 21181 }, { "epoch": 0.61, "grad_norm": 7.086042275122051, "learning_rate": 3.538331194600454e-06, "loss": 0.4645, "step": 21182 }, { "epoch": 0.61, "grad_norm": 4.83131300970247, "learning_rate": 3.537887701074738e-06, "loss": 0.3381, "step": 21183 }, { "epoch": 0.61, "grad_norm": 7.084974061518685, "learning_rate": 3.537444220127392e-06, "loss": 0.6183, "step": 21184 }, { "epoch": 0.61, "grad_norm": 3.4750602819853804, "learning_rate": 3.5370007517622353e-06, "loss": 0.2636, "step": 21185 }, { "epoch": 0.61, "grad_norm": 4.729023064381043, "learning_rate": 3.5365572959830808e-06, "loss": 0.2629, "step": 21186 }, { "epoch": 0.61, "grad_norm": 9.319298432430688, "learning_rate": 3.5361138527937433e-06, "loss": 0.7916, "step": 21187 }, { "epoch": 0.61, "grad_norm": 5.933308497209039, "learning_rate": 3.535670422198039e-06, "loss": 0.4498, "step": 21188 }, { "epoch": 0.61, "grad_norm": 5.07700971827348, "learning_rate": 3.5352270041997792e-06, "loss": 0.3936, "step": 21189 }, { "epoch": 0.61, "grad_norm": 2.678117831742476, "learning_rate": 3.534783598802784e-06, "loss": 0.249, "step": 21190 }, { "epoch": 0.61, "grad_norm": 5.043098052100936, "learning_rate": 3.5343402060108633e-06, "loss": 0.6073, "step": 21191 }, { "epoch": 0.61, "grad_norm": 3.7332929216621618, "learning_rate": 3.533896825827835e-06, "loss": 0.3408, "step": 21192 }, { "epoch": 0.61, "grad_norm": 5.286879930143293, "learning_rate": 3.53345345825751e-06, "loss": 0.5774, "step": 21193 }, { "epoch": 0.61, "grad_norm": 6.242977507710058, "learning_rate": 3.5330101033037047e-06, "loss": 0.4886, "step": 21194 }, { "epoch": 0.61, "grad_norm": 5.770416914952982, "learning_rate": 3.5325667609702343e-06, "loss": 0.3506, "step": 21195 }, { "epoch": 0.61, "grad_norm": 6.284497346829994, "learning_rate": 3.5321234312609086e-06, "loss": 0.3631, "step": 21196 }, { "epoch": 0.61, "grad_norm": 4.863463322551856, "learning_rate": 3.5316801141795478e-06, "loss": 0.5711, "step": 21197 }, { "epoch": 0.61, "grad_norm": 2.414186048586803, "learning_rate": 3.53123680972996e-06, "loss": 0.3345, "step": 21198 }, { "epoch": 0.61, "grad_norm": 7.5624098969443505, "learning_rate": 3.5307935179159626e-06, "loss": 0.5384, "step": 21199 }, { "epoch": 0.61, "grad_norm": 5.8831859535942845, "learning_rate": 3.5303502387413645e-06, "loss": 0.2906, "step": 21200 }, { "epoch": 0.61, "grad_norm": 7.248336601171798, "learning_rate": 3.5299069722099856e-06, "loss": 0.6605, "step": 21201 }, { "epoch": 0.61, "grad_norm": 2.5375916314140703, "learning_rate": 3.5294637183256348e-06, "loss": 0.2909, "step": 21202 }, { "epoch": 0.61, "grad_norm": 5.785250254092032, "learning_rate": 3.5290204770921255e-06, "loss": 0.5473, "step": 21203 }, { "epoch": 0.61, "grad_norm": 5.757808131471216, "learning_rate": 3.528577248513274e-06, "loss": 0.4547, "step": 21204 }, { "epoch": 0.61, "grad_norm": 2.684101617501191, "learning_rate": 3.5281340325928904e-06, "loss": 0.2587, "step": 21205 }, { "epoch": 0.61, "grad_norm": 3.802550042479589, "learning_rate": 3.5276908293347887e-06, "loss": 0.3648, "step": 21206 }, { "epoch": 0.61, "grad_norm": 4.795142541224569, "learning_rate": 3.5272476387427802e-06, "loss": 0.5843, "step": 21207 }, { "epoch": 0.61, "grad_norm": 6.890641331382999, "learning_rate": 3.526804460820681e-06, "loss": 0.4453, "step": 21208 }, { "epoch": 0.61, "grad_norm": 9.692458821726522, "learning_rate": 3.5263612955723005e-06, "loss": 0.6429, "step": 21209 }, { "epoch": 0.61, "grad_norm": 5.390298894333928, "learning_rate": 3.5259181430014512e-06, "loss": 0.6085, "step": 21210 }, { "epoch": 0.61, "grad_norm": 10.598534192995752, "learning_rate": 3.5254750031119486e-06, "loss": 0.6576, "step": 21211 }, { "epoch": 0.61, "grad_norm": 7.2159250189592425, "learning_rate": 3.525031875907602e-06, "loss": 0.4165, "step": 21212 }, { "epoch": 0.61, "grad_norm": 8.5795398094347, "learning_rate": 3.5245887613922257e-06, "loss": 0.4819, "step": 21213 }, { "epoch": 0.61, "grad_norm": 7.420813426341437, "learning_rate": 3.524145659569629e-06, "loss": 0.541, "step": 21214 }, { "epoch": 0.61, "grad_norm": 2.734404089636699, "learning_rate": 3.523702570443628e-06, "loss": 0.392, "step": 21215 }, { "epoch": 0.61, "grad_norm": 5.5311855161539585, "learning_rate": 3.5232594940180287e-06, "loss": 0.4327, "step": 21216 }, { "epoch": 0.61, "grad_norm": 5.296926402514922, "learning_rate": 3.5228164302966484e-06, "loss": 0.5093, "step": 21217 }, { "epoch": 0.61, "grad_norm": 3.1152897462139215, "learning_rate": 3.522373379283296e-06, "loss": 0.3304, "step": 21218 }, { "epoch": 0.61, "grad_norm": 4.855931524686343, "learning_rate": 3.521930340981782e-06, "loss": 0.6272, "step": 21219 }, { "epoch": 0.61, "grad_norm": 6.643166807313916, "learning_rate": 3.5214873153959213e-06, "loss": 0.8361, "step": 21220 }, { "epoch": 0.61, "grad_norm": 5.4819292880705905, "learning_rate": 3.521044302529522e-06, "loss": 0.2453, "step": 21221 }, { "epoch": 0.61, "grad_norm": 5.9872076437021065, "learning_rate": 3.5206013023863983e-06, "loss": 0.6072, "step": 21222 }, { "epoch": 0.61, "grad_norm": 5.455465388389184, "learning_rate": 3.5201583149703567e-06, "loss": 0.3024, "step": 21223 }, { "epoch": 0.61, "grad_norm": 6.1605550749367834, "learning_rate": 3.519715340285213e-06, "loss": 0.3363, "step": 21224 }, { "epoch": 0.61, "grad_norm": 7.0561038567248, "learning_rate": 3.519272378334775e-06, "loss": 0.4915, "step": 21225 }, { "epoch": 0.61, "grad_norm": 9.227282490364237, "learning_rate": 3.5188294291228532e-06, "loss": 0.7892, "step": 21226 }, { "epoch": 0.61, "grad_norm": 4.778325139925616, "learning_rate": 3.5183864926532616e-06, "loss": 0.3789, "step": 21227 }, { "epoch": 0.61, "grad_norm": 6.565402597022237, "learning_rate": 3.517943568929808e-06, "loss": 0.4372, "step": 21228 }, { "epoch": 0.61, "grad_norm": 8.346804798992128, "learning_rate": 3.5175006579563044e-06, "loss": 0.6602, "step": 21229 }, { "epoch": 0.61, "grad_norm": 5.331586725618076, "learning_rate": 3.5170577597365573e-06, "loss": 0.4526, "step": 21230 }, { "epoch": 0.61, "grad_norm": 4.087349197393981, "learning_rate": 3.5166148742743827e-06, "loss": 0.3169, "step": 21231 }, { "epoch": 0.61, "grad_norm": 2.1810695272948397, "learning_rate": 3.516172001573586e-06, "loss": 0.1533, "step": 21232 }, { "epoch": 0.61, "grad_norm": 4.1820923918885375, "learning_rate": 3.5157291416379803e-06, "loss": 0.506, "step": 21233 }, { "epoch": 0.61, "grad_norm": 5.234187063003218, "learning_rate": 3.515286294471372e-06, "loss": 0.5161, "step": 21234 }, { "epoch": 0.61, "grad_norm": 5.381475052685442, "learning_rate": 3.5148434600775737e-06, "loss": 0.6388, "step": 21235 }, { "epoch": 0.61, "grad_norm": 2.035600741824268, "learning_rate": 3.5144006384603956e-06, "loss": 0.1116, "step": 21236 }, { "epoch": 0.61, "grad_norm": 3.6089329964962586, "learning_rate": 3.5139578296236432e-06, "loss": 0.3122, "step": 21237 }, { "epoch": 0.61, "grad_norm": 5.594650728814091, "learning_rate": 3.513515033571131e-06, "loss": 0.5536, "step": 21238 }, { "epoch": 0.61, "grad_norm": 10.250576747392062, "learning_rate": 3.513072250306665e-06, "loss": 0.4959, "step": 21239 }, { "epoch": 0.61, "grad_norm": 2.5846783664059103, "learning_rate": 3.5126294798340568e-06, "loss": 0.2386, "step": 21240 }, { "epoch": 0.61, "grad_norm": 4.863625387861626, "learning_rate": 3.5121867221571114e-06, "loss": 0.2629, "step": 21241 }, { "epoch": 0.61, "grad_norm": 5.704835420987817, "learning_rate": 3.5117439772796436e-06, "loss": 0.7754, "step": 21242 }, { "epoch": 0.61, "grad_norm": 8.401073804976372, "learning_rate": 3.5113012452054562e-06, "loss": 0.4985, "step": 21243 }, { "epoch": 0.61, "grad_norm": 7.070885465388931, "learning_rate": 3.510858525938361e-06, "loss": 0.3515, "step": 21244 }, { "epoch": 0.61, "grad_norm": 4.773853764591171, "learning_rate": 3.510415819482169e-06, "loss": 0.3771, "step": 21245 }, { "epoch": 0.61, "grad_norm": 7.8880867983122185, "learning_rate": 3.5099731258406845e-06, "loss": 0.6689, "step": 21246 }, { "epoch": 0.61, "grad_norm": 5.692255400578065, "learning_rate": 3.5095304450177185e-06, "loss": 0.4323, "step": 21247 }, { "epoch": 0.61, "grad_norm": 7.247333101875377, "learning_rate": 3.5090877770170783e-06, "loss": 0.6402, "step": 21248 }, { "epoch": 0.61, "grad_norm": 5.425364797606081, "learning_rate": 3.508645121842573e-06, "loss": 0.548, "step": 21249 }, { "epoch": 0.61, "grad_norm": 7.51068603272427, "learning_rate": 3.5082024794980087e-06, "loss": 0.4582, "step": 21250 }, { "epoch": 0.61, "grad_norm": 6.103336012982222, "learning_rate": 3.5077598499871945e-06, "loss": 0.6009, "step": 21251 }, { "epoch": 0.61, "grad_norm": 5.686642844197566, "learning_rate": 3.5073172333139405e-06, "loss": 0.4158, "step": 21252 }, { "epoch": 0.61, "grad_norm": 6.699708650431853, "learning_rate": 3.506874629482051e-06, "loss": 0.2249, "step": 21253 }, { "epoch": 0.61, "grad_norm": 5.707314152008098, "learning_rate": 3.5064320384953364e-06, "loss": 0.4278, "step": 21254 }, { "epoch": 0.61, "grad_norm": 7.412045299871267, "learning_rate": 3.5059894603576023e-06, "loss": 0.7841, "step": 21255 }, { "epoch": 0.61, "grad_norm": 2.754149427566786, "learning_rate": 3.5055468950726578e-06, "loss": 0.3705, "step": 21256 }, { "epoch": 0.61, "grad_norm": 3.817673534253793, "learning_rate": 3.5051043426443075e-06, "loss": 0.5045, "step": 21257 }, { "epoch": 0.61, "grad_norm": 4.873792718904103, "learning_rate": 3.504661803076363e-06, "loss": 0.5251, "step": 21258 }, { "epoch": 0.61, "grad_norm": 6.519754980784206, "learning_rate": 3.5042192763726275e-06, "loss": 0.3558, "step": 21259 }, { "epoch": 0.61, "grad_norm": 8.026386109193005, "learning_rate": 3.503776762536908e-06, "loss": 0.5542, "step": 21260 }, { "epoch": 0.61, "grad_norm": 4.514740695625449, "learning_rate": 3.503334261573015e-06, "loss": 0.577, "step": 21261 }, { "epoch": 0.61, "grad_norm": 8.112674225756987, "learning_rate": 3.5028917734847524e-06, "loss": 0.7245, "step": 21262 }, { "epoch": 0.61, "grad_norm": 5.3600971636221155, "learning_rate": 3.502449298275928e-06, "loss": 0.6683, "step": 21263 }, { "epoch": 0.61, "grad_norm": 5.988857572509445, "learning_rate": 3.502006835950347e-06, "loss": 0.4032, "step": 21264 }, { "epoch": 0.61, "grad_norm": 8.039684482902624, "learning_rate": 3.501564386511818e-06, "loss": 0.5318, "step": 21265 }, { "epoch": 0.61, "grad_norm": 5.906381736779785, "learning_rate": 3.5011219499641447e-06, "loss": 0.533, "step": 21266 }, { "epoch": 0.61, "grad_norm": 4.9373719886892875, "learning_rate": 3.500679526311136e-06, "loss": 0.3118, "step": 21267 }, { "epoch": 0.61, "grad_norm": 6.021857028334545, "learning_rate": 3.500237115556595e-06, "loss": 0.4477, "step": 21268 }, { "epoch": 0.61, "grad_norm": 4.85904957457456, "learning_rate": 3.49979471770433e-06, "loss": 0.3493, "step": 21269 }, { "epoch": 0.61, "grad_norm": 6.274517797659008, "learning_rate": 3.499352332758148e-06, "loss": 0.4625, "step": 21270 }, { "epoch": 0.61, "grad_norm": 9.094883248417082, "learning_rate": 3.49890996072185e-06, "loss": 0.5948, "step": 21271 }, { "epoch": 0.61, "grad_norm": 4.749098315738029, "learning_rate": 3.4984676015992493e-06, "loss": 0.3441, "step": 21272 }, { "epoch": 0.61, "grad_norm": 3.9981308757267113, "learning_rate": 3.498025255394143e-06, "loss": 0.3016, "step": 21273 }, { "epoch": 0.61, "grad_norm": 4.675223537955409, "learning_rate": 3.497582922110343e-06, "loss": 0.4002, "step": 21274 }, { "epoch": 0.61, "grad_norm": 7.139876779299765, "learning_rate": 3.497140601751651e-06, "loss": 0.3382, "step": 21275 }, { "epoch": 0.61, "grad_norm": 3.989887299009612, "learning_rate": 3.496698294321872e-06, "loss": 0.6399, "step": 21276 }, { "epoch": 0.61, "grad_norm": 3.0576481045451644, "learning_rate": 3.496255999824814e-06, "loss": 0.1999, "step": 21277 }, { "epoch": 0.61, "grad_norm": 2.303509854236223, "learning_rate": 3.4958137182642803e-06, "loss": 0.3507, "step": 21278 }, { "epoch": 0.61, "grad_norm": 7.4006970347933825, "learning_rate": 3.495371449644077e-06, "loss": 0.6813, "step": 21279 }, { "epoch": 0.61, "grad_norm": 6.791918190197195, "learning_rate": 3.494929193968005e-06, "loss": 0.4518, "step": 21280 }, { "epoch": 0.61, "grad_norm": 5.583227735441281, "learning_rate": 3.494486951239875e-06, "loss": 0.3931, "step": 21281 }, { "epoch": 0.61, "grad_norm": 4.230559822715454, "learning_rate": 3.494044721463487e-06, "loss": 0.3113, "step": 21282 }, { "epoch": 0.61, "grad_norm": 10.13379259156306, "learning_rate": 3.4936025046426482e-06, "loss": 0.7962, "step": 21283 }, { "epoch": 0.61, "grad_norm": 8.986509464310508, "learning_rate": 3.4931603007811586e-06, "loss": 0.6231, "step": 21284 }, { "epoch": 0.61, "grad_norm": 8.68908868987059, "learning_rate": 3.492718109882827e-06, "loss": 0.6382, "step": 21285 }, { "epoch": 0.61, "grad_norm": 3.7511383395179165, "learning_rate": 3.4922759319514565e-06, "loss": 0.3717, "step": 21286 }, { "epoch": 0.61, "grad_norm": 7.572588976906867, "learning_rate": 3.491833766990849e-06, "loss": 0.338, "step": 21287 }, { "epoch": 0.61, "grad_norm": 6.090187655190134, "learning_rate": 3.4913916150048123e-06, "loss": 0.2351, "step": 21288 }, { "epoch": 0.61, "grad_norm": 4.754243461039918, "learning_rate": 3.490949475997146e-06, "loss": 0.5443, "step": 21289 }, { "epoch": 0.61, "grad_norm": 8.149394987619978, "learning_rate": 3.4905073499716566e-06, "loss": 0.6882, "step": 21290 }, { "epoch": 0.61, "grad_norm": 5.62075170519436, "learning_rate": 3.4900652369321453e-06, "loss": 0.5199, "step": 21291 }, { "epoch": 0.61, "grad_norm": 5.663714504732684, "learning_rate": 3.4896231368824174e-06, "loss": 0.3877, "step": 21292 }, { "epoch": 0.61, "grad_norm": 3.4008919808053593, "learning_rate": 3.489181049826277e-06, "loss": 0.3417, "step": 21293 }, { "epoch": 0.61, "grad_norm": 5.172844164992291, "learning_rate": 3.4887389757675234e-06, "loss": 0.2709, "step": 21294 }, { "epoch": 0.61, "grad_norm": 1.8130657529832326, "learning_rate": 3.4882969147099645e-06, "loss": 0.1302, "step": 21295 }, { "epoch": 0.61, "grad_norm": 5.375707358013364, "learning_rate": 3.4878548666574e-06, "loss": 0.1499, "step": 21296 }, { "epoch": 0.61, "grad_norm": 8.640523126977724, "learning_rate": 3.487412831613636e-06, "loss": 0.666, "step": 21297 }, { "epoch": 0.61, "grad_norm": 3.6105898839594266, "learning_rate": 3.4869708095824695e-06, "loss": 0.301, "step": 21298 }, { "epoch": 0.61, "grad_norm": 15.330103444187747, "learning_rate": 3.486528800567711e-06, "loss": 0.3568, "step": 21299 }, { "epoch": 0.61, "grad_norm": 2.4295968287191334, "learning_rate": 3.4860868045731554e-06, "loss": 0.2341, "step": 21300 }, { "epoch": 0.61, "grad_norm": 9.629965256958082, "learning_rate": 3.485644821602609e-06, "loss": 1.3559, "step": 21301 }, { "epoch": 0.61, "grad_norm": 8.464788150648767, "learning_rate": 3.4852028516598767e-06, "loss": 0.9494, "step": 21302 }, { "epoch": 0.61, "grad_norm": 9.946776852376681, "learning_rate": 3.484760894748754e-06, "loss": 0.7291, "step": 21303 }, { "epoch": 0.61, "grad_norm": 3.6566681908007643, "learning_rate": 3.4843189508730497e-06, "loss": 0.3598, "step": 21304 }, { "epoch": 0.61, "grad_norm": 6.439789577831518, "learning_rate": 3.483877020036561e-06, "loss": 0.643, "step": 21305 }, { "epoch": 0.61, "grad_norm": 6.121330251792798, "learning_rate": 3.4834351022430934e-06, "loss": 0.5649, "step": 21306 }, { "epoch": 0.61, "grad_norm": 5.384800072007383, "learning_rate": 3.482993197496445e-06, "loss": 0.3178, "step": 21307 }, { "epoch": 0.61, "grad_norm": 5.679671002853341, "learning_rate": 3.4825513058004213e-06, "loss": 0.1542, "step": 21308 }, { "epoch": 0.61, "grad_norm": 4.786436745723266, "learning_rate": 3.48210942715882e-06, "loss": 0.4683, "step": 21309 }, { "epoch": 0.61, "grad_norm": 6.293299604132887, "learning_rate": 3.4816675615754437e-06, "loss": 0.4946, "step": 21310 }, { "epoch": 0.61, "grad_norm": 3.196203466680131, "learning_rate": 3.481225709054097e-06, "loss": 0.1423, "step": 21311 }, { "epoch": 0.61, "grad_norm": 7.15060497005389, "learning_rate": 3.4807838695985775e-06, "loss": 0.5614, "step": 21312 }, { "epoch": 0.61, "grad_norm": 7.549169850973203, "learning_rate": 3.480342043212688e-06, "loss": 0.5828, "step": 21313 }, { "epoch": 0.61, "grad_norm": 7.253357701675535, "learning_rate": 3.4799002299002264e-06, "loss": 0.4087, "step": 21314 }, { "epoch": 0.61, "grad_norm": 10.379459525721055, "learning_rate": 3.4794584296649985e-06, "loss": 1.0191, "step": 21315 }, { "epoch": 0.61, "grad_norm": 8.460288011414503, "learning_rate": 3.479016642510802e-06, "loss": 0.7287, "step": 21316 }, { "epoch": 0.61, "grad_norm": 6.660772551271124, "learning_rate": 3.4785748684414354e-06, "loss": 0.6302, "step": 21317 }, { "epoch": 0.61, "grad_norm": 7.186408980119418, "learning_rate": 3.4781331074607056e-06, "loss": 0.576, "step": 21318 }, { "epoch": 0.61, "grad_norm": 3.951963502833145, "learning_rate": 3.477691359572407e-06, "loss": 0.5125, "step": 21319 }, { "epoch": 0.61, "grad_norm": 4.5457867765388125, "learning_rate": 3.4772496247803443e-06, "loss": 0.2175, "step": 21320 }, { "epoch": 0.61, "grad_norm": 4.039060138871409, "learning_rate": 3.476807903088313e-06, "loss": 0.6192, "step": 21321 }, { "epoch": 0.61, "grad_norm": 10.623939012398376, "learning_rate": 3.476366194500118e-06, "loss": 0.756, "step": 21322 }, { "epoch": 0.61, "grad_norm": 4.571052530481319, "learning_rate": 3.4759244990195564e-06, "loss": 0.4179, "step": 21323 }, { "epoch": 0.61, "grad_norm": 6.8972839566084545, "learning_rate": 3.475482816650429e-06, "loss": 0.5803, "step": 21324 }, { "epoch": 0.61, "grad_norm": 3.051899176413361, "learning_rate": 3.475041147396533e-06, "loss": 0.1803, "step": 21325 }, { "epoch": 0.61, "grad_norm": 8.229686929859541, "learning_rate": 3.474599491261672e-06, "loss": 0.4965, "step": 21326 }, { "epoch": 0.61, "grad_norm": 8.310617104579872, "learning_rate": 3.474157848249644e-06, "loss": 0.5194, "step": 21327 }, { "epoch": 0.61, "grad_norm": 7.944903127801121, "learning_rate": 3.473716218364247e-06, "loss": 0.4578, "step": 21328 }, { "epoch": 0.61, "grad_norm": 2.6877656461590105, "learning_rate": 3.4732746016092843e-06, "loss": 0.112, "step": 21329 }, { "epoch": 0.61, "grad_norm": 5.664102067480331, "learning_rate": 3.472832997988549e-06, "loss": 0.5651, "step": 21330 }, { "epoch": 0.61, "grad_norm": 6.431328713516459, "learning_rate": 3.472391407505845e-06, "loss": 0.6934, "step": 21331 }, { "epoch": 0.61, "grad_norm": 4.635714209218815, "learning_rate": 3.471949830164969e-06, "loss": 0.6628, "step": 21332 }, { "epoch": 0.61, "grad_norm": 6.594137984879153, "learning_rate": 3.4715082659697197e-06, "loss": 0.3954, "step": 21333 }, { "epoch": 0.61, "grad_norm": 5.759544526128814, "learning_rate": 3.4710667149238985e-06, "loss": 0.3312, "step": 21334 }, { "epoch": 0.61, "grad_norm": 7.272837023015317, "learning_rate": 3.470625177031301e-06, "loss": 0.501, "step": 21335 }, { "epoch": 0.61, "grad_norm": 3.8750579737356596, "learning_rate": 3.470183652295728e-06, "loss": 0.1815, "step": 21336 }, { "epoch": 0.61, "grad_norm": 3.7486394957371196, "learning_rate": 3.469742140720974e-06, "loss": 0.5364, "step": 21337 }, { "epoch": 0.61, "grad_norm": 6.468274711802597, "learning_rate": 3.469300642310843e-06, "loss": 0.4671, "step": 21338 }, { "epoch": 0.61, "grad_norm": 4.863351366387903, "learning_rate": 3.4688591570691284e-06, "loss": 0.3215, "step": 21339 }, { "epoch": 0.61, "grad_norm": 5.890951051466264, "learning_rate": 3.468417684999631e-06, "loss": 0.7612, "step": 21340 }, { "epoch": 0.61, "grad_norm": 6.403856937692806, "learning_rate": 3.4679762261061455e-06, "loss": 0.4725, "step": 21341 }, { "epoch": 0.61, "grad_norm": 6.2106787477731755, "learning_rate": 3.467534780392473e-06, "loss": 0.5655, "step": 21342 }, { "epoch": 0.61, "grad_norm": 5.508130046807975, "learning_rate": 3.467093347862411e-06, "loss": 0.2679, "step": 21343 }, { "epoch": 0.61, "grad_norm": 4.437681167557835, "learning_rate": 3.4666519285197554e-06, "loss": 0.3043, "step": 21344 }, { "epoch": 0.61, "grad_norm": 7.786368988102887, "learning_rate": 3.466210522368305e-06, "loss": 0.6798, "step": 21345 }, { "epoch": 0.61, "grad_norm": 6.011678773624394, "learning_rate": 3.4657691294118567e-06, "loss": 0.6058, "step": 21346 }, { "epoch": 0.61, "grad_norm": 4.934462445905395, "learning_rate": 3.4653277496542083e-06, "loss": 0.6137, "step": 21347 }, { "epoch": 0.61, "grad_norm": 3.668231471263778, "learning_rate": 3.464886383099154e-06, "loss": 0.2656, "step": 21348 }, { "epoch": 0.61, "grad_norm": 6.080468086557304, "learning_rate": 3.464445029750495e-06, "loss": 0.3208, "step": 21349 }, { "epoch": 0.61, "grad_norm": 5.889402368889398, "learning_rate": 3.4640036896120253e-06, "loss": 0.2307, "step": 21350 }, { "epoch": 0.61, "grad_norm": 4.659559852827247, "learning_rate": 3.4635623626875424e-06, "loss": 0.4551, "step": 21351 }, { "epoch": 0.61, "grad_norm": 3.8867975428276775, "learning_rate": 3.463121048980845e-06, "loss": 0.5069, "step": 21352 }, { "epoch": 0.61, "grad_norm": 4.720918321419665, "learning_rate": 3.4626797484957274e-06, "loss": 0.2219, "step": 21353 }, { "epoch": 0.61, "grad_norm": 7.528061251055679, "learning_rate": 3.4622384612359873e-06, "loss": 0.4748, "step": 21354 }, { "epoch": 0.61, "grad_norm": 5.048986672249163, "learning_rate": 3.461797187205418e-06, "loss": 0.3939, "step": 21355 }, { "epoch": 0.61, "grad_norm": 10.032865899203362, "learning_rate": 3.4613559264078205e-06, "loss": 0.2838, "step": 21356 }, { "epoch": 0.61, "grad_norm": 3.9616844180272044, "learning_rate": 3.460914678846988e-06, "loss": 0.3378, "step": 21357 }, { "epoch": 0.61, "grad_norm": 3.543656102707611, "learning_rate": 3.460473444526715e-06, "loss": 0.4559, "step": 21358 }, { "epoch": 0.61, "grad_norm": 6.901046898797889, "learning_rate": 3.4600322234508032e-06, "loss": 0.2732, "step": 21359 }, { "epoch": 0.61, "grad_norm": 14.733486953786018, "learning_rate": 3.459591015623041e-06, "loss": 0.9237, "step": 21360 }, { "epoch": 0.61, "grad_norm": 6.583898914363036, "learning_rate": 3.4591498210472295e-06, "loss": 0.5555, "step": 21361 }, { "epoch": 0.61, "grad_norm": 6.005524317381469, "learning_rate": 3.4587086397271622e-06, "loss": 0.3657, "step": 21362 }, { "epoch": 0.61, "grad_norm": 3.1404618367945076, "learning_rate": 3.4582674716666355e-06, "loss": 0.2436, "step": 21363 }, { "epoch": 0.61, "grad_norm": 4.382653966202209, "learning_rate": 3.4578263168694415e-06, "loss": 0.4059, "step": 21364 }, { "epoch": 0.61, "grad_norm": 3.8273489340602804, "learning_rate": 3.45738517533938e-06, "loss": 0.3797, "step": 21365 }, { "epoch": 0.61, "grad_norm": 4.279597638772052, "learning_rate": 3.456944047080243e-06, "loss": 0.2595, "step": 21366 }, { "epoch": 0.61, "grad_norm": 4.767690242338234, "learning_rate": 3.4565029320958256e-06, "loss": 0.6465, "step": 21367 }, { "epoch": 0.61, "grad_norm": 7.601766460070056, "learning_rate": 3.4560618303899253e-06, "loss": 0.616, "step": 21368 }, { "epoch": 0.61, "grad_norm": 2.4611316785952804, "learning_rate": 3.4556207419663335e-06, "loss": 0.1511, "step": 21369 }, { "epoch": 0.61, "grad_norm": 4.26526053412156, "learning_rate": 3.4551796668288473e-06, "loss": 0.5865, "step": 21370 }, { "epoch": 0.61, "grad_norm": 4.26575478157223, "learning_rate": 3.454738604981259e-06, "loss": 0.3808, "step": 21371 }, { "epoch": 0.61, "grad_norm": 2.8658456598852213, "learning_rate": 3.4542975564273655e-06, "loss": 0.3294, "step": 21372 }, { "epoch": 0.61, "grad_norm": 8.916978925662297, "learning_rate": 3.453856521170959e-06, "loss": 0.3948, "step": 21373 }, { "epoch": 0.61, "grad_norm": 7.977582397417953, "learning_rate": 3.4534154992158336e-06, "loss": 0.635, "step": 21374 }, { "epoch": 0.61, "grad_norm": 6.290329212070058, "learning_rate": 3.4529744905657865e-06, "loss": 0.3504, "step": 21375 }, { "epoch": 0.61, "grad_norm": 9.062807590920912, "learning_rate": 3.452533495224608e-06, "loss": 0.4725, "step": 21376 }, { "epoch": 0.61, "grad_norm": 4.728572197520161, "learning_rate": 3.4520925131960943e-06, "loss": 0.4732, "step": 21377 }, { "epoch": 0.61, "grad_norm": 5.717153378426773, "learning_rate": 3.451651544484036e-06, "loss": 1.1725, "step": 21378 }, { "epoch": 0.61, "grad_norm": 5.63847128204423, "learning_rate": 3.4512105890922307e-06, "loss": 0.7147, "step": 21379 }, { "epoch": 0.61, "grad_norm": 4.975619096430235, "learning_rate": 3.45076964702447e-06, "loss": 0.3225, "step": 21380 }, { "epoch": 0.61, "grad_norm": 4.235310046568848, "learning_rate": 3.450328718284548e-06, "loss": 0.2727, "step": 21381 }, { "epoch": 0.61, "grad_norm": 4.966950480568134, "learning_rate": 3.4498878028762545e-06, "loss": 0.5647, "step": 21382 }, { "epoch": 0.61, "grad_norm": 6.431356238997435, "learning_rate": 3.4494469008033864e-06, "loss": 0.4625, "step": 21383 }, { "epoch": 0.61, "grad_norm": 7.628372822212667, "learning_rate": 3.4490060120697376e-06, "loss": 0.5466, "step": 21384 }, { "epoch": 0.61, "grad_norm": 7.2449945731764664, "learning_rate": 3.448565136679096e-06, "loss": 0.2616, "step": 21385 }, { "epoch": 0.61, "grad_norm": 6.118436059895597, "learning_rate": 3.4481242746352606e-06, "loss": 0.5911, "step": 21386 }, { "epoch": 0.61, "grad_norm": 4.329763467288134, "learning_rate": 3.4476834259420187e-06, "loss": 0.5144, "step": 21387 }, { "epoch": 0.61, "grad_norm": 3.413367717937293, "learning_rate": 3.4472425906031674e-06, "loss": 0.4468, "step": 21388 }, { "epoch": 0.61, "grad_norm": 5.466550482798498, "learning_rate": 3.4468017686224953e-06, "loss": 0.5089, "step": 21389 }, { "epoch": 0.61, "grad_norm": 8.380697163007937, "learning_rate": 3.4463609600037977e-06, "loss": 0.7109, "step": 21390 }, { "epoch": 0.61, "grad_norm": 7.283676463525641, "learning_rate": 3.445920164750863e-06, "loss": 0.812, "step": 21391 }, { "epoch": 0.61, "grad_norm": 7.80757071055976, "learning_rate": 3.445479382867487e-06, "loss": 0.4218, "step": 21392 }, { "epoch": 0.61, "grad_norm": 3.036619655954131, "learning_rate": 3.445038614357462e-06, "loss": 0.0845, "step": 21393 }, { "epoch": 0.61, "grad_norm": 4.645556696822429, "learning_rate": 3.4445978592245755e-06, "loss": 0.4863, "step": 21394 }, { "epoch": 0.61, "grad_norm": 6.407287141536512, "learning_rate": 3.4441571174726247e-06, "loss": 0.582, "step": 21395 }, { "epoch": 0.61, "grad_norm": 5.327699795310144, "learning_rate": 3.443716389105397e-06, "loss": 0.4782, "step": 21396 }, { "epoch": 0.61, "grad_norm": 7.541512710798317, "learning_rate": 3.4432756741266875e-06, "loss": 0.5589, "step": 21397 }, { "epoch": 0.61, "grad_norm": 5.502910147550158, "learning_rate": 3.442834972540283e-06, "loss": 0.8063, "step": 21398 }, { "epoch": 0.61, "grad_norm": 6.106278124835274, "learning_rate": 3.442394284349979e-06, "loss": 0.852, "step": 21399 }, { "epoch": 0.61, "grad_norm": 9.585168734613935, "learning_rate": 3.441953609559566e-06, "loss": 0.8187, "step": 21400 }, { "epoch": 0.61, "grad_norm": 4.770006960577854, "learning_rate": 3.441512948172832e-06, "loss": 0.35, "step": 21401 }, { "epoch": 0.61, "grad_norm": 2.727789681922937, "learning_rate": 3.4410723001935727e-06, "loss": 0.3098, "step": 21402 }, { "epoch": 0.61, "grad_norm": 5.339741882379487, "learning_rate": 3.440631665625575e-06, "loss": 0.3269, "step": 21403 }, { "epoch": 0.61, "grad_norm": 3.5680543532187388, "learning_rate": 3.4401910444726323e-06, "loss": 0.3314, "step": 21404 }, { "epoch": 0.61, "grad_norm": 7.738144543598977, "learning_rate": 3.4397504367385313e-06, "loss": 0.3721, "step": 21405 }, { "epoch": 0.61, "grad_norm": 1.9247586318824725, "learning_rate": 3.4393098424270687e-06, "loss": 0.1881, "step": 21406 }, { "epoch": 0.61, "grad_norm": 6.785162680298389, "learning_rate": 3.4388692615420295e-06, "loss": 0.5175, "step": 21407 }, { "epoch": 0.61, "grad_norm": 4.46078063368363, "learning_rate": 3.4384286940872047e-06, "loss": 0.6011, "step": 21408 }, { "epoch": 0.61, "grad_norm": 4.250595205365527, "learning_rate": 3.4379881400663876e-06, "loss": 0.4754, "step": 21409 }, { "epoch": 0.61, "grad_norm": 7.864964979061044, "learning_rate": 3.437547599483365e-06, "loss": 0.5426, "step": 21410 }, { "epoch": 0.61, "grad_norm": 5.039808540416387, "learning_rate": 3.4371070723419297e-06, "loss": 0.6995, "step": 21411 }, { "epoch": 0.61, "grad_norm": 3.959694571420628, "learning_rate": 3.4366665586458674e-06, "loss": 0.1239, "step": 21412 }, { "epoch": 0.61, "grad_norm": 2.4742344524823587, "learning_rate": 3.4362260583989725e-06, "loss": 0.1909, "step": 21413 }, { "epoch": 0.61, "grad_norm": 3.9527181483852782, "learning_rate": 3.4357855716050315e-06, "loss": 0.3276, "step": 21414 }, { "epoch": 0.61, "grad_norm": 8.302573910286291, "learning_rate": 3.4353450982678325e-06, "loss": 0.7794, "step": 21415 }, { "epoch": 0.61, "grad_norm": 6.1166477476792345, "learning_rate": 3.43490463839117e-06, "loss": 0.4044, "step": 21416 }, { "epoch": 0.61, "grad_norm": 2.8833071793397202, "learning_rate": 3.4344641919788293e-06, "loss": 0.1243, "step": 21417 }, { "epoch": 0.61, "grad_norm": 6.225779102395437, "learning_rate": 3.4340237590346004e-06, "loss": 0.5462, "step": 21418 }, { "epoch": 0.61, "grad_norm": 82.71707867590158, "learning_rate": 3.433583339562272e-06, "loss": 0.9173, "step": 21419 }, { "epoch": 0.61, "grad_norm": 4.647929854682783, "learning_rate": 3.433142933565634e-06, "loss": 0.5274, "step": 21420 }, { "epoch": 0.61, "grad_norm": 6.604500129375677, "learning_rate": 3.4327025410484727e-06, "loss": 0.5537, "step": 21421 }, { "epoch": 0.61, "grad_norm": 10.741847428921073, "learning_rate": 3.4322621620145807e-06, "loss": 0.488, "step": 21422 }, { "epoch": 0.61, "grad_norm": 3.3871077299856442, "learning_rate": 3.4318217964677424e-06, "loss": 0.3359, "step": 21423 }, { "epoch": 0.61, "grad_norm": 7.065410183218586, "learning_rate": 3.4313814444117477e-06, "loss": 0.3507, "step": 21424 }, { "epoch": 0.61, "grad_norm": 5.377123125065281, "learning_rate": 3.4309411058503876e-06, "loss": 0.304, "step": 21425 }, { "epoch": 0.61, "grad_norm": 6.0052448237408615, "learning_rate": 3.430500780787447e-06, "loss": 0.4614, "step": 21426 }, { "epoch": 0.61, "grad_norm": 9.567550621885612, "learning_rate": 3.430060469226715e-06, "loss": 0.498, "step": 21427 }, { "epoch": 0.61, "grad_norm": 6.2332260488093505, "learning_rate": 3.4296201711719784e-06, "loss": 0.2518, "step": 21428 }, { "epoch": 0.61, "grad_norm": 2.7940768304595354, "learning_rate": 3.429179886627028e-06, "loss": 0.1862, "step": 21429 }, { "epoch": 0.61, "grad_norm": 8.027467601405425, "learning_rate": 3.428739615595648e-06, "loss": 0.9575, "step": 21430 }, { "epoch": 0.61, "grad_norm": 3.590250118861513, "learning_rate": 3.4282993580816293e-06, "loss": 0.3156, "step": 21431 }, { "epoch": 0.61, "grad_norm": 5.280380358000614, "learning_rate": 3.427859114088755e-06, "loss": 0.2725, "step": 21432 }, { "epoch": 0.61, "grad_norm": 8.067184802300023, "learning_rate": 3.4274188836208167e-06, "loss": 0.6709, "step": 21433 }, { "epoch": 0.61, "grad_norm": 4.100308500871902, "learning_rate": 3.4269786666816014e-06, "loss": 0.3991, "step": 21434 }, { "epoch": 0.61, "grad_norm": 2.8932508089553415, "learning_rate": 3.4265384632748923e-06, "loss": 0.0866, "step": 21435 }, { "epoch": 0.61, "grad_norm": 9.250081860979094, "learning_rate": 3.426098273404481e-06, "loss": 0.5215, "step": 21436 }, { "epoch": 0.61, "grad_norm": 5.822101988695532, "learning_rate": 3.4256580970741527e-06, "loss": 0.4608, "step": 21437 }, { "epoch": 0.61, "grad_norm": 4.038523713705707, "learning_rate": 3.425217934287694e-06, "loss": 0.3839, "step": 21438 }, { "epoch": 0.61, "grad_norm": 3.756452159551064, "learning_rate": 3.4247777850488895e-06, "loss": 0.2972, "step": 21439 }, { "epoch": 0.61, "grad_norm": 11.058508373754236, "learning_rate": 3.424337649361529e-06, "loss": 0.6492, "step": 21440 }, { "epoch": 0.61, "grad_norm": 5.409521843797447, "learning_rate": 3.4238975272293985e-06, "loss": 0.3397, "step": 21441 }, { "epoch": 0.61, "grad_norm": 5.202062870709016, "learning_rate": 3.423457418656282e-06, "loss": 0.5492, "step": 21442 }, { "epoch": 0.61, "grad_norm": 6.0457765039795985, "learning_rate": 3.4230173236459684e-06, "loss": 0.4742, "step": 21443 }, { "epoch": 0.61, "grad_norm": 5.63629760014816, "learning_rate": 3.4225772422022417e-06, "loss": 0.4451, "step": 21444 }, { "epoch": 0.61, "grad_norm": 3.7649280172156505, "learning_rate": 3.42213717432889e-06, "loss": 0.6385, "step": 21445 }, { "epoch": 0.61, "grad_norm": 5.5889188906397544, "learning_rate": 3.4216971200296965e-06, "loss": 0.5197, "step": 21446 }, { "epoch": 0.61, "grad_norm": 3.7311829527385334, "learning_rate": 3.421257079308451e-06, "loss": 0.279, "step": 21447 }, { "epoch": 0.61, "grad_norm": 6.002176724410969, "learning_rate": 3.420817052168933e-06, "loss": 0.3577, "step": 21448 }, { "epoch": 0.61, "grad_norm": 12.288745563200903, "learning_rate": 3.4203770386149326e-06, "loss": 0.6344, "step": 21449 }, { "epoch": 0.61, "grad_norm": 7.595668158005623, "learning_rate": 3.419937038650236e-06, "loss": 0.7656, "step": 21450 }, { "epoch": 0.61, "grad_norm": 6.999644474810323, "learning_rate": 3.419497052278623e-06, "loss": 0.8357, "step": 21451 }, { "epoch": 0.61, "grad_norm": 5.92023936609325, "learning_rate": 3.4190570795038854e-06, "loss": 0.2763, "step": 21452 }, { "epoch": 0.61, "grad_norm": 7.844803940685697, "learning_rate": 3.4186171203298033e-06, "loss": 0.483, "step": 21453 }, { "epoch": 0.61, "grad_norm": 6.428207189973798, "learning_rate": 3.4181771747601656e-06, "loss": 0.4422, "step": 21454 }, { "epoch": 0.61, "grad_norm": 7.532741667994673, "learning_rate": 3.417737242798752e-06, "loss": 0.4257, "step": 21455 }, { "epoch": 0.61, "grad_norm": 5.736165944171656, "learning_rate": 3.417297324449352e-06, "loss": 0.8458, "step": 21456 }, { "epoch": 0.61, "grad_norm": 5.7355856799760385, "learning_rate": 3.416857419715748e-06, "loss": 0.2339, "step": 21457 }, { "epoch": 0.61, "grad_norm": 4.089919270835614, "learning_rate": 3.4164175286017225e-06, "loss": 0.4834, "step": 21458 }, { "epoch": 0.61, "grad_norm": 6.263753673375015, "learning_rate": 3.415977651111065e-06, "loss": 0.388, "step": 21459 }, { "epoch": 0.61, "grad_norm": 4.974329329097794, "learning_rate": 3.415537787247555e-06, "loss": 0.3456, "step": 21460 }, { "epoch": 0.61, "grad_norm": 3.6448068136398524, "learning_rate": 3.41509793701498e-06, "loss": 0.1881, "step": 21461 }, { "epoch": 0.61, "grad_norm": 6.799946373840258, "learning_rate": 3.41465810041712e-06, "loss": 0.476, "step": 21462 }, { "epoch": 0.61, "grad_norm": 2.595655729321809, "learning_rate": 3.4142182774577633e-06, "loss": 0.1794, "step": 21463 }, { "epoch": 0.61, "grad_norm": 3.417614038862748, "learning_rate": 3.4137784681406905e-06, "loss": 0.2631, "step": 21464 }, { "epoch": 0.61, "grad_norm": 2.977006693859731, "learning_rate": 3.413338672469685e-06, "loss": 0.2817, "step": 21465 }, { "epoch": 0.61, "grad_norm": 3.577047710578286, "learning_rate": 3.412898890448534e-06, "loss": 0.261, "step": 21466 }, { "epoch": 0.61, "grad_norm": 3.7580090509034654, "learning_rate": 3.4124591220810167e-06, "loss": 0.4275, "step": 21467 }, { "epoch": 0.61, "grad_norm": 5.1412677768851625, "learning_rate": 3.41201936737092e-06, "loss": 0.4914, "step": 21468 }, { "epoch": 0.61, "grad_norm": 4.4185675932749655, "learning_rate": 3.4115796263220225e-06, "loss": 0.1919, "step": 21469 }, { "epoch": 0.61, "grad_norm": 3.6601224590357964, "learning_rate": 3.4111398989381122e-06, "loss": 0.2616, "step": 21470 }, { "epoch": 0.61, "grad_norm": 4.897102551264193, "learning_rate": 3.4107001852229687e-06, "loss": 0.2778, "step": 21471 }, { "epoch": 0.61, "grad_norm": 7.2881508407350575, "learning_rate": 3.410260485180377e-06, "loss": 1.1452, "step": 21472 }, { "epoch": 0.61, "grad_norm": 4.841915669278424, "learning_rate": 3.409820798814116e-06, "loss": 0.1209, "step": 21473 }, { "epoch": 0.61, "grad_norm": 5.962903339811001, "learning_rate": 3.4093811261279726e-06, "loss": 0.3928, "step": 21474 }, { "epoch": 0.61, "grad_norm": 6.146253883930039, "learning_rate": 3.4089414671257283e-06, "loss": 0.7486, "step": 21475 }, { "epoch": 0.62, "grad_norm": 2.7945623373169313, "learning_rate": 3.408501821811162e-06, "loss": 0.403, "step": 21476 }, { "epoch": 0.62, "grad_norm": 9.311653252323344, "learning_rate": 3.408062190188063e-06, "loss": 0.6232, "step": 21477 }, { "epoch": 0.62, "grad_norm": 4.591541979297385, "learning_rate": 3.4076225722602046e-06, "loss": 0.7225, "step": 21478 }, { "epoch": 0.62, "grad_norm": 4.670085755730975, "learning_rate": 3.407182968031376e-06, "loss": 0.3251, "step": 21479 }, { "epoch": 0.62, "grad_norm": 5.244598289139695, "learning_rate": 3.4067433775053547e-06, "loss": 0.5938, "step": 21480 }, { "epoch": 0.62, "grad_norm": 3.384374667490822, "learning_rate": 3.4063038006859227e-06, "loss": 0.1584, "step": 21481 }, { "epoch": 0.62, "grad_norm": 14.630287453571139, "learning_rate": 3.405864237576866e-06, "loss": 0.4428, "step": 21482 }, { "epoch": 0.62, "grad_norm": 4.779851970854458, "learning_rate": 3.4054246881819607e-06, "loss": 0.2891, "step": 21483 }, { "epoch": 0.62, "grad_norm": 4.216978930544849, "learning_rate": 3.4049851525049925e-06, "loss": 0.618, "step": 21484 }, { "epoch": 0.62, "grad_norm": 8.11449727774658, "learning_rate": 3.4045456305497383e-06, "loss": 0.4888, "step": 21485 }, { "epoch": 0.62, "grad_norm": 6.0608899052937595, "learning_rate": 3.4041061223199833e-06, "loss": 0.5167, "step": 21486 }, { "epoch": 0.62, "grad_norm": 5.608676282459031, "learning_rate": 3.4036666278195064e-06, "loss": 0.5216, "step": 21487 }, { "epoch": 0.62, "grad_norm": 7.620989260600009, "learning_rate": 3.4032271470520893e-06, "loss": 0.4415, "step": 21488 }, { "epoch": 0.62, "grad_norm": 6.612733461120283, "learning_rate": 3.402787680021511e-06, "loss": 0.5076, "step": 21489 }, { "epoch": 0.62, "grad_norm": 5.037762685699491, "learning_rate": 3.4023482267315542e-06, "loss": 0.5674, "step": 21490 }, { "epoch": 0.62, "grad_norm": 6.456619314172057, "learning_rate": 3.401908787186e-06, "loss": 0.6457, "step": 21491 }, { "epoch": 0.62, "grad_norm": 4.1464121269152505, "learning_rate": 3.4014693613886264e-06, "loss": 0.5514, "step": 21492 }, { "epoch": 0.62, "grad_norm": 5.022139967531518, "learning_rate": 3.4010299493432163e-06, "loss": 0.2738, "step": 21493 }, { "epoch": 0.62, "grad_norm": 3.9409805234279207, "learning_rate": 3.400590551053548e-06, "loss": 0.4922, "step": 21494 }, { "epoch": 0.62, "grad_norm": 5.237485320223172, "learning_rate": 3.4001511665234034e-06, "loss": 0.3044, "step": 21495 }, { "epoch": 0.62, "grad_norm": 2.1732446070733293, "learning_rate": 3.3997117957565588e-06, "loss": 0.1239, "step": 21496 }, { "epoch": 0.62, "grad_norm": 4.790269907950451, "learning_rate": 3.399272438756799e-06, "loss": 0.5186, "step": 21497 }, { "epoch": 0.62, "grad_norm": 4.975449945240407, "learning_rate": 3.3988330955279004e-06, "loss": 0.6485, "step": 21498 }, { "epoch": 0.62, "grad_norm": 2.391022593028923, "learning_rate": 3.3983937660736423e-06, "loss": 0.1548, "step": 21499 }, { "epoch": 0.62, "grad_norm": 5.91986936761771, "learning_rate": 3.3979544503978074e-06, "loss": 0.3058, "step": 21500 }, { "epoch": 0.62, "grad_norm": 6.578569881953215, "learning_rate": 3.3975151485041726e-06, "loss": 0.856, "step": 21501 }, { "epoch": 0.62, "grad_norm": 10.781566582053689, "learning_rate": 3.3970758603965183e-06, "loss": 0.6328, "step": 21502 }, { "epoch": 0.62, "grad_norm": 5.778180395163051, "learning_rate": 3.396636586078621e-06, "loss": 0.31, "step": 21503 }, { "epoch": 0.62, "grad_norm": 7.161182619278245, "learning_rate": 3.3961973255542646e-06, "loss": 0.6623, "step": 21504 }, { "epoch": 0.62, "grad_norm": 3.4397404738601423, "learning_rate": 3.395758078827222e-06, "loss": 0.2466, "step": 21505 }, { "epoch": 0.62, "grad_norm": 4.662568714157085, "learning_rate": 3.3953188459012752e-06, "loss": 0.1206, "step": 21506 }, { "epoch": 0.62, "grad_norm": 2.6805693840719917, "learning_rate": 3.3948796267802066e-06, "loss": 0.2231, "step": 21507 }, { "epoch": 0.62, "grad_norm": 4.487143110784564, "learning_rate": 3.394440421467787e-06, "loss": 0.8622, "step": 21508 }, { "epoch": 0.62, "grad_norm": 6.913896703079213, "learning_rate": 3.3940012299678014e-06, "loss": 0.3694, "step": 21509 }, { "epoch": 0.62, "grad_norm": 7.9185935302305595, "learning_rate": 3.393562052284024e-06, "loss": 0.7336, "step": 21510 }, { "epoch": 0.62, "grad_norm": 6.38619016740643, "learning_rate": 3.393122888420236e-06, "loss": 0.4806, "step": 21511 }, { "epoch": 0.62, "grad_norm": 7.435922984417469, "learning_rate": 3.3926837383802113e-06, "loss": 0.9804, "step": 21512 }, { "epoch": 0.62, "grad_norm": 3.5820298855278723, "learning_rate": 3.392244602167733e-06, "loss": 0.3045, "step": 21513 }, { "epoch": 0.62, "grad_norm": 4.773595716785634, "learning_rate": 3.391805479786575e-06, "loss": 0.4197, "step": 21514 }, { "epoch": 0.62, "grad_norm": 2.5709709553125863, "learning_rate": 3.3913663712405158e-06, "loss": 0.2723, "step": 21515 }, { "epoch": 0.62, "grad_norm": 10.061218559224542, "learning_rate": 3.390927276533336e-06, "loss": 0.2817, "step": 21516 }, { "epoch": 0.62, "grad_norm": 4.445662727705353, "learning_rate": 3.390488195668809e-06, "loss": 0.25, "step": 21517 }, { "epoch": 0.62, "grad_norm": 5.576085420329133, "learning_rate": 3.3900491286507153e-06, "loss": 0.702, "step": 21518 }, { "epoch": 0.62, "grad_norm": 4.601758139941258, "learning_rate": 3.3896100754828286e-06, "loss": 0.4481, "step": 21519 }, { "epoch": 0.62, "grad_norm": 11.861913758595508, "learning_rate": 3.38917103616893e-06, "loss": 0.5489, "step": 21520 }, { "epoch": 0.62, "grad_norm": 4.085582360320426, "learning_rate": 3.388732010712794e-06, "loss": 0.7738, "step": 21521 }, { "epoch": 0.62, "grad_norm": 4.930707871258886, "learning_rate": 3.3882929991181966e-06, "loss": 0.3534, "step": 21522 }, { "epoch": 0.62, "grad_norm": 10.175541698835795, "learning_rate": 3.3878540013889182e-06, "loss": 0.7238, "step": 21523 }, { "epoch": 0.62, "grad_norm": 5.385413085621493, "learning_rate": 3.387415017528733e-06, "loss": 0.6342, "step": 21524 }, { "epoch": 0.62, "grad_norm": 2.0295202666495076, "learning_rate": 3.3869760475414183e-06, "loss": 0.3769, "step": 21525 }, { "epoch": 0.62, "grad_norm": 4.040128954467557, "learning_rate": 3.386537091430748e-06, "loss": 0.4656, "step": 21526 }, { "epoch": 0.62, "grad_norm": 9.804244924336446, "learning_rate": 3.386098149200503e-06, "loss": 0.5977, "step": 21527 }, { "epoch": 0.62, "grad_norm": 4.449889041288794, "learning_rate": 3.3856592208544554e-06, "loss": 0.2602, "step": 21528 }, { "epoch": 0.62, "grad_norm": 15.149684718914765, "learning_rate": 3.385220306396384e-06, "loss": 0.8504, "step": 21529 }, { "epoch": 0.62, "grad_norm": 3.2772437570296953, "learning_rate": 3.384781405830061e-06, "loss": 0.4116, "step": 21530 }, { "epoch": 0.62, "grad_norm": 5.123333357589877, "learning_rate": 3.384342519159266e-06, "loss": 0.3975, "step": 21531 }, { "epoch": 0.62, "grad_norm": 5.8858932797254635, "learning_rate": 3.383903646387775e-06, "loss": 0.7509, "step": 21532 }, { "epoch": 0.62, "grad_norm": 2.292328142299306, "learning_rate": 3.3834647875193592e-06, "loss": 0.2329, "step": 21533 }, { "epoch": 0.62, "grad_norm": 2.7182649201610167, "learning_rate": 3.3830259425578e-06, "loss": 0.1686, "step": 21534 }, { "epoch": 0.62, "grad_norm": 9.607998384944072, "learning_rate": 3.3825871115068666e-06, "loss": 0.7696, "step": 21535 }, { "epoch": 0.62, "grad_norm": 5.554297024379239, "learning_rate": 3.3821482943703387e-06, "loss": 0.2738, "step": 21536 }, { "epoch": 0.62, "grad_norm": 4.251021763749453, "learning_rate": 3.3817094911519894e-06, "loss": 0.2942, "step": 21537 }, { "epoch": 0.62, "grad_norm": 4.599352566622488, "learning_rate": 3.3812707018555944e-06, "loss": 0.4867, "step": 21538 }, { "epoch": 0.62, "grad_norm": 9.222414536239134, "learning_rate": 3.380831926484927e-06, "loss": 0.6919, "step": 21539 }, { "epoch": 0.62, "grad_norm": 5.982881225127451, "learning_rate": 3.3803931650437636e-06, "loss": 0.3638, "step": 21540 }, { "epoch": 0.62, "grad_norm": 4.5203871514135665, "learning_rate": 3.37995441753588e-06, "loss": 0.3496, "step": 21541 }, { "epoch": 0.62, "grad_norm": 4.611925176142553, "learning_rate": 3.379515683965047e-06, "loss": 0.2279, "step": 21542 }, { "epoch": 0.62, "grad_norm": 4.05393686861924, "learning_rate": 3.379076964335043e-06, "loss": 0.5018, "step": 21543 }, { "epoch": 0.62, "grad_norm": 6.599136249780274, "learning_rate": 3.378638258649639e-06, "loss": 0.2901, "step": 21544 }, { "epoch": 0.62, "grad_norm": 5.306484586564814, "learning_rate": 3.3781995669126115e-06, "loss": 0.4486, "step": 21545 }, { "epoch": 0.62, "grad_norm": 7.194222002096829, "learning_rate": 3.3777608891277313e-06, "loss": 0.4164, "step": 21546 }, { "epoch": 0.62, "grad_norm": 4.80631046331543, "learning_rate": 3.377322225298776e-06, "loss": 0.5145, "step": 21547 }, { "epoch": 0.62, "grad_norm": 4.659248999178235, "learning_rate": 3.376883575429518e-06, "loss": 0.3561, "step": 21548 }, { "epoch": 0.62, "grad_norm": 5.1131647036961985, "learning_rate": 3.37644493952373e-06, "loss": 0.4447, "step": 21549 }, { "epoch": 0.62, "grad_norm": 5.386737560657081, "learning_rate": 3.376006317585187e-06, "loss": 0.5375, "step": 21550 }, { "epoch": 0.62, "grad_norm": 4.708300964559283, "learning_rate": 3.3755677096176615e-06, "loss": 0.5155, "step": 21551 }, { "epoch": 0.62, "grad_norm": 4.359827407824333, "learning_rate": 3.3751291156249277e-06, "loss": 0.2806, "step": 21552 }, { "epoch": 0.62, "grad_norm": 5.9462510519806635, "learning_rate": 3.374690535610756e-06, "loss": 0.549, "step": 21553 }, { "epoch": 0.62, "grad_norm": 5.358476124112215, "learning_rate": 3.3742519695789235e-06, "loss": 0.5605, "step": 21554 }, { "epoch": 0.62, "grad_norm": 8.949857904595534, "learning_rate": 3.3738134175332005e-06, "loss": 1.1594, "step": 21555 }, { "epoch": 0.62, "grad_norm": 8.800461534296314, "learning_rate": 3.373374879477359e-06, "loss": 0.8647, "step": 21556 }, { "epoch": 0.62, "grad_norm": 5.199818061433426, "learning_rate": 3.372936355415175e-06, "loss": 0.5265, "step": 21557 }, { "epoch": 0.62, "grad_norm": 2.6336863621146405, "learning_rate": 3.372497845350419e-06, "loss": 0.1878, "step": 21558 }, { "epoch": 0.62, "grad_norm": 7.040274834253369, "learning_rate": 3.3720593492868635e-06, "loss": 0.4513, "step": 21559 }, { "epoch": 0.62, "grad_norm": 5.099521853902357, "learning_rate": 3.37162086722828e-06, "loss": 0.2271, "step": 21560 }, { "epoch": 0.62, "grad_norm": 5.904051538052501, "learning_rate": 3.371182399178443e-06, "loss": 0.5913, "step": 21561 }, { "epoch": 0.62, "grad_norm": 2.672295676682623, "learning_rate": 3.3707439451411227e-06, "loss": 0.2586, "step": 21562 }, { "epoch": 0.62, "grad_norm": 6.578301252962298, "learning_rate": 3.3703055051200908e-06, "loss": 0.4187, "step": 21563 }, { "epoch": 0.62, "grad_norm": 5.774950874416917, "learning_rate": 3.3698670791191223e-06, "loss": 0.5815, "step": 21564 }, { "epoch": 0.62, "grad_norm": 7.908357000231795, "learning_rate": 3.369428667141984e-06, "loss": 0.8739, "step": 21565 }, { "epoch": 0.62, "grad_norm": 5.3771707897422125, "learning_rate": 3.3689902691924527e-06, "loss": 0.4598, "step": 21566 }, { "epoch": 0.62, "grad_norm": 4.695197664148227, "learning_rate": 3.3685518852742956e-06, "loss": 0.4218, "step": 21567 }, { "epoch": 0.62, "grad_norm": 12.532402955856528, "learning_rate": 3.368113515391287e-06, "loss": 0.4977, "step": 21568 }, { "epoch": 0.62, "grad_norm": 6.819180354201302, "learning_rate": 3.3676751595471945e-06, "loss": 0.4246, "step": 21569 }, { "epoch": 0.62, "grad_norm": 7.778668599733424, "learning_rate": 3.3672368177457936e-06, "loss": 0.54, "step": 21570 }, { "epoch": 0.62, "grad_norm": 9.150692030788381, "learning_rate": 3.366798489990853e-06, "loss": 0.4582, "step": 21571 }, { "epoch": 0.62, "grad_norm": 5.953424361103664, "learning_rate": 3.3663601762861426e-06, "loss": 0.4892, "step": 21572 }, { "epoch": 0.62, "grad_norm": 6.354834733040107, "learning_rate": 3.365921876635436e-06, "loss": 0.4342, "step": 21573 }, { "epoch": 0.62, "grad_norm": 6.983907457785095, "learning_rate": 3.365483591042502e-06, "loss": 0.3501, "step": 21574 }, { "epoch": 0.62, "grad_norm": 6.914406012498562, "learning_rate": 3.365045319511112e-06, "loss": 0.3895, "step": 21575 }, { "epoch": 0.62, "grad_norm": 10.19040802248236, "learning_rate": 3.3646070620450346e-06, "loss": 0.8262, "step": 21576 }, { "epoch": 0.62, "grad_norm": 6.122630926237784, "learning_rate": 3.3641688186480425e-06, "loss": 0.5033, "step": 21577 }, { "epoch": 0.62, "grad_norm": 8.611448837821994, "learning_rate": 3.3637305893239037e-06, "loss": 1.0828, "step": 21578 }, { "epoch": 0.62, "grad_norm": 6.1100601631611395, "learning_rate": 3.3632923740763902e-06, "loss": 0.5812, "step": 21579 }, { "epoch": 0.62, "grad_norm": 7.443531603936403, "learning_rate": 3.362854172909269e-06, "loss": 0.4557, "step": 21580 }, { "epoch": 0.62, "grad_norm": 4.957705548825298, "learning_rate": 3.362415985826313e-06, "loss": 0.333, "step": 21581 }, { "epoch": 0.62, "grad_norm": 7.395621709527095, "learning_rate": 3.3619778128312906e-06, "loss": 0.2719, "step": 21582 }, { "epoch": 0.62, "grad_norm": 5.750317150159902, "learning_rate": 3.361539653927971e-06, "loss": 0.2649, "step": 21583 }, { "epoch": 0.62, "grad_norm": 3.988290855851968, "learning_rate": 3.3611015091201247e-06, "loss": 0.1435, "step": 21584 }, { "epoch": 0.62, "grad_norm": 5.852631780614637, "learning_rate": 3.36066337841152e-06, "loss": 0.8093, "step": 21585 }, { "epoch": 0.62, "grad_norm": 7.1118094164325845, "learning_rate": 3.3602252618059274e-06, "loss": 0.3448, "step": 21586 }, { "epoch": 0.62, "grad_norm": 4.672272744997377, "learning_rate": 3.359787159307113e-06, "loss": 0.2639, "step": 21587 }, { "epoch": 0.62, "grad_norm": 5.697878914753916, "learning_rate": 3.3593490709188483e-06, "loss": 0.3624, "step": 21588 }, { "epoch": 0.62, "grad_norm": 6.801518189425895, "learning_rate": 3.3589109966449027e-06, "loss": 0.7316, "step": 21589 }, { "epoch": 0.62, "grad_norm": 2.8063103701208596, "learning_rate": 3.3584729364890423e-06, "loss": 0.2335, "step": 21590 }, { "epoch": 0.62, "grad_norm": 5.924036884496974, "learning_rate": 3.3580348904550397e-06, "loss": 0.4413, "step": 21591 }, { "epoch": 0.62, "grad_norm": 3.5029052869650408, "learning_rate": 3.3575968585466574e-06, "loss": 0.1813, "step": 21592 }, { "epoch": 0.62, "grad_norm": 3.676239244587356, "learning_rate": 3.3571588407676693e-06, "loss": 0.3503, "step": 21593 }, { "epoch": 0.62, "grad_norm": 2.706661006384445, "learning_rate": 3.3567208371218407e-06, "loss": 0.1462, "step": 21594 }, { "epoch": 0.62, "grad_norm": 4.8045358525356825, "learning_rate": 3.3562828476129415e-06, "loss": 0.3796, "step": 21595 }, { "epoch": 0.62, "grad_norm": 4.442623458117151, "learning_rate": 3.3558448722447356e-06, "loss": 0.4489, "step": 21596 }, { "epoch": 0.62, "grad_norm": 3.4246041201384414, "learning_rate": 3.3554069110209965e-06, "loss": 0.1612, "step": 21597 }, { "epoch": 0.62, "grad_norm": 7.640489385918643, "learning_rate": 3.3549689639454885e-06, "loss": 0.74, "step": 21598 }, { "epoch": 0.62, "grad_norm": 2.8447682161224828, "learning_rate": 3.354531031021979e-06, "loss": 0.2957, "step": 21599 }, { "epoch": 0.62, "grad_norm": 4.455615073578386, "learning_rate": 3.3540931122542384e-06, "loss": 0.8084, "step": 21600 }, { "epoch": 0.62, "grad_norm": 5.305509028915906, "learning_rate": 3.353655207646031e-06, "loss": 0.1875, "step": 21601 }, { "epoch": 0.62, "grad_norm": 7.795027060911371, "learning_rate": 3.3532173172011262e-06, "loss": 0.5312, "step": 21602 }, { "epoch": 0.62, "grad_norm": 4.7302126070916275, "learning_rate": 3.352779440923288e-06, "loss": 0.5355, "step": 21603 }, { "epoch": 0.62, "grad_norm": 2.8838590877620263, "learning_rate": 3.3523415788162873e-06, "loss": 0.2506, "step": 21604 }, { "epoch": 0.62, "grad_norm": 5.114530429708915, "learning_rate": 3.351903730883888e-06, "loss": 0.7675, "step": 21605 }, { "epoch": 0.62, "grad_norm": 5.207452755556691, "learning_rate": 3.3514658971298576e-06, "loss": 0.7228, "step": 21606 }, { "epoch": 0.62, "grad_norm": 6.06941124155064, "learning_rate": 3.3510280775579646e-06, "loss": 0.7854, "step": 21607 }, { "epoch": 0.62, "grad_norm": 4.92993705105781, "learning_rate": 3.350590272171973e-06, "loss": 0.5336, "step": 21608 }, { "epoch": 0.62, "grad_norm": 6.873802219434618, "learning_rate": 3.350152480975651e-06, "loss": 0.6462, "step": 21609 }, { "epoch": 0.62, "grad_norm": 7.773635601030645, "learning_rate": 3.349714703972763e-06, "loss": 0.646, "step": 21610 }, { "epoch": 0.62, "grad_norm": 6.009773995756581, "learning_rate": 3.3492769411670776e-06, "loss": 0.4171, "step": 21611 }, { "epoch": 0.62, "grad_norm": 6.9039243519385876, "learning_rate": 3.3488391925623586e-06, "loss": 0.2362, "step": 21612 }, { "epoch": 0.62, "grad_norm": 3.5302096159298477, "learning_rate": 3.3484014581623715e-06, "loss": 0.2456, "step": 21613 }, { "epoch": 0.62, "grad_norm": 2.988121716597859, "learning_rate": 3.3479637379708853e-06, "loss": 0.3797, "step": 21614 }, { "epoch": 0.62, "grad_norm": 5.534302402668537, "learning_rate": 3.347526031991663e-06, "loss": 0.4076, "step": 21615 }, { "epoch": 0.62, "grad_norm": 5.3908666141929595, "learning_rate": 3.3470883402284716e-06, "loss": 0.7622, "step": 21616 }, { "epoch": 0.62, "grad_norm": 2.8048749770727155, "learning_rate": 3.3466506626850735e-06, "loss": 0.2975, "step": 21617 }, { "epoch": 0.62, "grad_norm": 3.89123867357997, "learning_rate": 3.346212999365238e-06, "loss": 0.538, "step": 21618 }, { "epoch": 0.62, "grad_norm": 6.5721879929014575, "learning_rate": 3.3457753502727285e-06, "loss": 0.4252, "step": 21619 }, { "epoch": 0.62, "grad_norm": 2.676116231847398, "learning_rate": 3.34533771541131e-06, "loss": 0.2166, "step": 21620 }, { "epoch": 0.62, "grad_norm": 5.202753093819561, "learning_rate": 3.3449000947847465e-06, "loss": 0.4402, "step": 21621 }, { "epoch": 0.62, "grad_norm": 3.21404893396581, "learning_rate": 3.3444624883968034e-06, "loss": 0.282, "step": 21622 }, { "epoch": 0.62, "grad_norm": 7.690188085242614, "learning_rate": 3.3440248962512467e-06, "loss": 0.5645, "step": 21623 }, { "epoch": 0.62, "grad_norm": 4.463514582382283, "learning_rate": 3.3435873183518397e-06, "loss": 0.2286, "step": 21624 }, { "epoch": 0.62, "grad_norm": 10.283627591696527, "learning_rate": 3.343149754702348e-06, "loss": 0.7554, "step": 21625 }, { "epoch": 0.62, "grad_norm": 5.995500625947536, "learning_rate": 3.3427122053065327e-06, "loss": 0.3817, "step": 21626 }, { "epoch": 0.62, "grad_norm": 6.500630054748817, "learning_rate": 3.342274670168162e-06, "loss": 0.7702, "step": 21627 }, { "epoch": 0.62, "grad_norm": 6.4080723845385705, "learning_rate": 3.3418371492909972e-06, "loss": 0.618, "step": 21628 }, { "epoch": 0.62, "grad_norm": 3.8287729454536117, "learning_rate": 3.341399642678803e-06, "loss": 0.3527, "step": 21629 }, { "epoch": 0.62, "grad_norm": 4.498344116725673, "learning_rate": 3.340962150335345e-06, "loss": 0.4586, "step": 21630 }, { "epoch": 0.62, "grad_norm": 5.281534762859557, "learning_rate": 3.340524672264384e-06, "loss": 0.2662, "step": 21631 }, { "epoch": 0.62, "grad_norm": 6.198973681504019, "learning_rate": 3.3400872084696867e-06, "loss": 0.3507, "step": 21632 }, { "epoch": 0.62, "grad_norm": 5.986081191379311, "learning_rate": 3.3396497589550123e-06, "loss": 0.3845, "step": 21633 }, { "epoch": 0.62, "grad_norm": 7.338578436393663, "learning_rate": 3.3392123237241285e-06, "loss": 0.4484, "step": 21634 }, { "epoch": 0.62, "grad_norm": 7.198409185567075, "learning_rate": 3.3387749027807957e-06, "loss": 0.4946, "step": 21635 }, { "epoch": 0.62, "grad_norm": 6.754730544966795, "learning_rate": 3.338337496128779e-06, "loss": 0.6714, "step": 21636 }, { "epoch": 0.62, "grad_norm": 5.252780586675418, "learning_rate": 3.3379001037718383e-06, "loss": 0.5425, "step": 21637 }, { "epoch": 0.62, "grad_norm": 8.679965147836215, "learning_rate": 3.337462725713739e-06, "loss": 0.7426, "step": 21638 }, { "epoch": 0.62, "grad_norm": 9.938432997564682, "learning_rate": 3.3370253619582445e-06, "loss": 0.672, "step": 21639 }, { "epoch": 0.62, "grad_norm": 4.6626616375802135, "learning_rate": 3.336588012509114e-06, "loss": 0.4811, "step": 21640 }, { "epoch": 0.62, "grad_norm": 4.961387986282392, "learning_rate": 3.3361506773701136e-06, "loss": 0.5364, "step": 21641 }, { "epoch": 0.62, "grad_norm": 7.300709209219374, "learning_rate": 3.335713356545004e-06, "loss": 0.6566, "step": 21642 }, { "epoch": 0.62, "grad_norm": 6.7475624981615, "learning_rate": 3.3352760500375475e-06, "loss": 0.5925, "step": 21643 }, { "epoch": 0.62, "grad_norm": 7.80020474140734, "learning_rate": 3.3348387578515046e-06, "loss": 0.4595, "step": 21644 }, { "epoch": 0.62, "grad_norm": 4.816942220723442, "learning_rate": 3.334401479990641e-06, "loss": 1.0228, "step": 21645 }, { "epoch": 0.62, "grad_norm": 6.789579049181535, "learning_rate": 3.333964216458715e-06, "loss": 0.5564, "step": 21646 }, { "epoch": 0.62, "grad_norm": 6.07587204784755, "learning_rate": 3.333526967259489e-06, "loss": 0.5938, "step": 21647 }, { "epoch": 0.62, "grad_norm": 4.819844722462657, "learning_rate": 3.333089732396727e-06, "loss": 0.4826, "step": 21648 }, { "epoch": 0.62, "grad_norm": 5.3649776968088, "learning_rate": 3.332652511874188e-06, "loss": 0.4629, "step": 21649 }, { "epoch": 0.62, "grad_norm": 7.496036817564057, "learning_rate": 3.332215305695635e-06, "loss": 0.8173, "step": 21650 }, { "epoch": 0.62, "grad_norm": 6.191958160350212, "learning_rate": 3.331778113864828e-06, "loss": 0.6351, "step": 21651 }, { "epoch": 0.62, "grad_norm": 9.287472421186735, "learning_rate": 3.331340936385529e-06, "loss": 0.3354, "step": 21652 }, { "epoch": 0.62, "grad_norm": 5.34970405776156, "learning_rate": 3.3309037732614966e-06, "loss": 0.5054, "step": 21653 }, { "epoch": 0.62, "grad_norm": 3.9745648055893295, "learning_rate": 3.3304666244964945e-06, "loss": 0.461, "step": 21654 }, { "epoch": 0.62, "grad_norm": 3.5687711464544325, "learning_rate": 3.3300294900942836e-06, "loss": 0.3266, "step": 21655 }, { "epoch": 0.62, "grad_norm": 6.9267492419299685, "learning_rate": 3.3295923700586214e-06, "loss": 0.3683, "step": 21656 }, { "epoch": 0.62, "grad_norm": 8.023065575111186, "learning_rate": 3.3291552643932725e-06, "loss": 0.7773, "step": 21657 }, { "epoch": 0.62, "grad_norm": 7.283305520616351, "learning_rate": 3.3287181731019935e-06, "loss": 0.4724, "step": 21658 }, { "epoch": 0.62, "grad_norm": 4.001287074443137, "learning_rate": 3.3282810961885483e-06, "loss": 0.2931, "step": 21659 }, { "epoch": 0.62, "grad_norm": 5.119989923601367, "learning_rate": 3.327844033656692e-06, "loss": 0.4351, "step": 21660 }, { "epoch": 0.62, "grad_norm": 3.034705385083344, "learning_rate": 3.32740698551019e-06, "loss": 0.2923, "step": 21661 }, { "epoch": 0.62, "grad_norm": 6.947815314320519, "learning_rate": 3.326969951752799e-06, "loss": 0.6374, "step": 21662 }, { "epoch": 0.62, "grad_norm": 3.9234796713959534, "learning_rate": 3.3265329323882776e-06, "loss": 0.3515, "step": 21663 }, { "epoch": 0.62, "grad_norm": 5.661713422159789, "learning_rate": 3.3260959274203896e-06, "loss": 0.3213, "step": 21664 }, { "epoch": 0.62, "grad_norm": 3.8353639317578367, "learning_rate": 3.3256589368528918e-06, "loss": 0.2229, "step": 21665 }, { "epoch": 0.62, "grad_norm": 6.984862547377408, "learning_rate": 3.3252219606895454e-06, "loss": 0.5512, "step": 21666 }, { "epoch": 0.62, "grad_norm": 9.482567000530798, "learning_rate": 3.3247849989341053e-06, "loss": 0.7397, "step": 21667 }, { "epoch": 0.62, "grad_norm": 6.000247076033594, "learning_rate": 3.324348051590336e-06, "loss": 0.414, "step": 21668 }, { "epoch": 0.62, "grad_norm": 9.022736387551754, "learning_rate": 3.323911118661993e-06, "loss": 1.0079, "step": 21669 }, { "epoch": 0.62, "grad_norm": 2.4527873396072075, "learning_rate": 3.3234742001528353e-06, "loss": 0.1556, "step": 21670 }, { "epoch": 0.62, "grad_norm": 2.9121774912389036, "learning_rate": 3.3230372960666246e-06, "loss": 0.3359, "step": 21671 }, { "epoch": 0.62, "grad_norm": 5.027956719958018, "learning_rate": 3.3226004064071172e-06, "loss": 0.3075, "step": 21672 }, { "epoch": 0.62, "grad_norm": 3.9632040607704178, "learning_rate": 3.322163531178072e-06, "loss": 0.6676, "step": 21673 }, { "epoch": 0.62, "grad_norm": 7.674346028650796, "learning_rate": 3.3217266703832463e-06, "loss": 0.7962, "step": 21674 }, { "epoch": 0.62, "grad_norm": 5.387005815609416, "learning_rate": 3.3212898240264e-06, "loss": 0.4235, "step": 21675 }, { "epoch": 0.62, "grad_norm": 4.475221789314229, "learning_rate": 3.320852992111291e-06, "loss": 0.5794, "step": 21676 }, { "epoch": 0.62, "grad_norm": 4.01986337073711, "learning_rate": 3.3204161746416775e-06, "loss": 0.3469, "step": 21677 }, { "epoch": 0.62, "grad_norm": 4.460271675405716, "learning_rate": 3.3199793716213154e-06, "loss": 0.4977, "step": 21678 }, { "epoch": 0.62, "grad_norm": 5.7692375960676445, "learning_rate": 3.319542583053964e-06, "loss": 0.4179, "step": 21679 }, { "epoch": 0.62, "grad_norm": 7.310927132234113, "learning_rate": 3.3191058089433825e-06, "loss": 0.5183, "step": 21680 }, { "epoch": 0.62, "grad_norm": 6.035158975841314, "learning_rate": 3.3186690492933253e-06, "loss": 0.6165, "step": 21681 }, { "epoch": 0.62, "grad_norm": 5.648099707162377, "learning_rate": 3.3182323041075526e-06, "loss": 0.7156, "step": 21682 }, { "epoch": 0.62, "grad_norm": 5.889312638425548, "learning_rate": 3.3177955733898183e-06, "loss": 0.5906, "step": 21683 }, { "epoch": 0.62, "grad_norm": 1.3633650997138513, "learning_rate": 3.3173588571438837e-06, "loss": 0.0916, "step": 21684 }, { "epoch": 0.62, "grad_norm": 2.7394263122195066, "learning_rate": 3.316922155373502e-06, "loss": 0.2636, "step": 21685 }, { "epoch": 0.62, "grad_norm": 2.7401599071822185, "learning_rate": 3.3164854680824334e-06, "loss": 0.0796, "step": 21686 }, { "epoch": 0.62, "grad_norm": 9.499801432893442, "learning_rate": 3.3160487952744312e-06, "loss": 0.6322, "step": 21687 }, { "epoch": 0.62, "grad_norm": 7.884285916714583, "learning_rate": 3.3156121369532547e-06, "loss": 0.3611, "step": 21688 }, { "epoch": 0.62, "grad_norm": 6.160774969114053, "learning_rate": 3.3151754931226597e-06, "loss": 0.4301, "step": 21689 }, { "epoch": 0.62, "grad_norm": 5.924776136619697, "learning_rate": 3.314738863786402e-06, "loss": 0.5432, "step": 21690 }, { "epoch": 0.62, "grad_norm": 4.4232406394539066, "learning_rate": 3.314302248948239e-06, "loss": 0.4021, "step": 21691 }, { "epoch": 0.62, "grad_norm": 4.066979330321276, "learning_rate": 3.313865648611926e-06, "loss": 0.2113, "step": 21692 }, { "epoch": 0.62, "grad_norm": 3.133126963674347, "learning_rate": 3.3134290627812203e-06, "loss": 0.3801, "step": 21693 }, { "epoch": 0.62, "grad_norm": 4.141562963808212, "learning_rate": 3.3129924914598744e-06, "loss": 0.222, "step": 21694 }, { "epoch": 0.62, "grad_norm": 6.41818602850641, "learning_rate": 3.3125559346516473e-06, "loss": 0.5095, "step": 21695 }, { "epoch": 0.62, "grad_norm": 6.040650073935327, "learning_rate": 3.3121193923602953e-06, "loss": 0.2484, "step": 21696 }, { "epoch": 0.62, "grad_norm": 2.582713361661726, "learning_rate": 3.3116828645895693e-06, "loss": 0.1989, "step": 21697 }, { "epoch": 0.62, "grad_norm": 4.831792975111786, "learning_rate": 3.3112463513432303e-06, "loss": 0.6096, "step": 21698 }, { "epoch": 0.62, "grad_norm": 3.3198307899649078, "learning_rate": 3.3108098526250306e-06, "loss": 0.209, "step": 21699 }, { "epoch": 0.62, "grad_norm": 5.488855688910179, "learning_rate": 3.3103733684387263e-06, "loss": 0.6228, "step": 21700 }, { "epoch": 0.62, "grad_norm": 5.262584612204672, "learning_rate": 3.30993689878807e-06, "loss": 0.4393, "step": 21701 }, { "epoch": 0.62, "grad_norm": 5.572052778744397, "learning_rate": 3.30950044367682e-06, "loss": 0.3868, "step": 21702 }, { "epoch": 0.62, "grad_norm": 8.27207357746878, "learning_rate": 3.3090640031087283e-06, "loss": 0.7343, "step": 21703 }, { "epoch": 0.62, "grad_norm": 3.4329821288157314, "learning_rate": 3.308627577087551e-06, "loss": 0.3788, "step": 21704 }, { "epoch": 0.62, "grad_norm": 6.132554514129851, "learning_rate": 3.3081911656170436e-06, "loss": 0.6466, "step": 21705 }, { "epoch": 0.62, "grad_norm": 9.303683517446078, "learning_rate": 3.307754768700958e-06, "loss": 0.6672, "step": 21706 }, { "epoch": 0.62, "grad_norm": 7.023447446211412, "learning_rate": 3.3073183863430514e-06, "loss": 0.4779, "step": 21707 }, { "epoch": 0.62, "grad_norm": 6.108274316784517, "learning_rate": 3.306882018547074e-06, "loss": 0.401, "step": 21708 }, { "epoch": 0.62, "grad_norm": 4.1767713695035855, "learning_rate": 3.3064456653167854e-06, "loss": 0.5735, "step": 21709 }, { "epoch": 0.62, "grad_norm": 6.818237863079114, "learning_rate": 3.3060093266559334e-06, "loss": 0.5613, "step": 21710 }, { "epoch": 0.62, "grad_norm": 8.856375480845132, "learning_rate": 3.305573002568275e-06, "loss": 1.1186, "step": 21711 }, { "epoch": 0.62, "grad_norm": 5.35880047357279, "learning_rate": 3.3051366930575647e-06, "loss": 0.5023, "step": 21712 }, { "epoch": 0.62, "grad_norm": 3.488412374016234, "learning_rate": 3.304700398127553e-06, "loss": 0.4211, "step": 21713 }, { "epoch": 0.62, "grad_norm": 2.8218123656088268, "learning_rate": 3.304264117781997e-06, "loss": 0.2237, "step": 21714 }, { "epoch": 0.62, "grad_norm": 4.897694800622675, "learning_rate": 3.3038278520246464e-06, "loss": 0.5732, "step": 21715 }, { "epoch": 0.62, "grad_norm": 3.818309047981957, "learning_rate": 3.303391600859257e-06, "loss": 0.509, "step": 21716 }, { "epoch": 0.62, "grad_norm": 7.655840617542808, "learning_rate": 3.3029553642895796e-06, "loss": 0.5618, "step": 21717 }, { "epoch": 0.62, "grad_norm": 7.418799572259199, "learning_rate": 3.30251914231937e-06, "loss": 0.3586, "step": 21718 }, { "epoch": 0.62, "grad_norm": 4.106068401298344, "learning_rate": 3.3020829349523774e-06, "loss": 0.4355, "step": 21719 }, { "epoch": 0.62, "grad_norm": 4.648488917988011, "learning_rate": 3.301646742192356e-06, "loss": 0.6064, "step": 21720 }, { "epoch": 0.62, "grad_norm": 14.1244101612189, "learning_rate": 3.3012105640430602e-06, "loss": 0.5452, "step": 21721 }, { "epoch": 0.62, "grad_norm": 5.799142810355674, "learning_rate": 3.3007744005082402e-06, "loss": 0.4061, "step": 21722 }, { "epoch": 0.62, "grad_norm": 4.504977599246, "learning_rate": 3.30033825159165e-06, "loss": 0.4029, "step": 21723 }, { "epoch": 0.62, "grad_norm": 4.534161704243629, "learning_rate": 3.299902117297038e-06, "loss": 0.4402, "step": 21724 }, { "epoch": 0.62, "grad_norm": 7.553597909171735, "learning_rate": 3.2994659976281606e-06, "loss": 0.6648, "step": 21725 }, { "epoch": 0.62, "grad_norm": 4.830094451197988, "learning_rate": 3.2990298925887676e-06, "loss": 0.3976, "step": 21726 }, { "epoch": 0.62, "grad_norm": 10.928800251022883, "learning_rate": 3.2985938021826112e-06, "loss": 0.3219, "step": 21727 }, { "epoch": 0.62, "grad_norm": 9.246296527864612, "learning_rate": 3.2981577264134412e-06, "loss": 0.3835, "step": 21728 }, { "epoch": 0.62, "grad_norm": 6.997193420988263, "learning_rate": 3.297721665285012e-06, "loss": 1.0047, "step": 21729 }, { "epoch": 0.62, "grad_norm": 3.0026254689017065, "learning_rate": 3.2972856188010745e-06, "loss": 0.2377, "step": 21730 }, { "epoch": 0.62, "grad_norm": 4.027985840754544, "learning_rate": 3.296849586965377e-06, "loss": 0.4496, "step": 21731 }, { "epoch": 0.62, "grad_norm": 8.92900583463844, "learning_rate": 3.2964135697816753e-06, "loss": 0.6638, "step": 21732 }, { "epoch": 0.62, "grad_norm": 6.319769422212076, "learning_rate": 3.2959775672537164e-06, "loss": 0.7069, "step": 21733 }, { "epoch": 0.62, "grad_norm": 13.686167194848657, "learning_rate": 3.295541579385254e-06, "loss": 0.8209, "step": 21734 }, { "epoch": 0.62, "grad_norm": 7.865961125087431, "learning_rate": 3.295105606180035e-06, "loss": 0.3077, "step": 21735 }, { "epoch": 0.62, "grad_norm": 5.522930677759027, "learning_rate": 3.294669647641814e-06, "loss": 0.3069, "step": 21736 }, { "epoch": 0.62, "grad_norm": 2.957286264366025, "learning_rate": 3.294233703774341e-06, "loss": 0.1769, "step": 21737 }, { "epoch": 0.62, "grad_norm": 10.732151695231504, "learning_rate": 3.293797774581363e-06, "loss": 0.7405, "step": 21738 }, { "epoch": 0.62, "grad_norm": 8.713769180480309, "learning_rate": 3.2933618600666367e-06, "loss": 0.7849, "step": 21739 }, { "epoch": 0.62, "grad_norm": 3.63600331940725, "learning_rate": 3.2929259602339048e-06, "loss": 0.2072, "step": 21740 }, { "epoch": 0.62, "grad_norm": 4.52071497655304, "learning_rate": 3.292490075086922e-06, "loss": 0.5539, "step": 21741 }, { "epoch": 0.62, "grad_norm": 4.77895749052003, "learning_rate": 3.2920542046294366e-06, "loss": 0.1696, "step": 21742 }, { "epoch": 0.62, "grad_norm": 6.887511295925507, "learning_rate": 3.2916183488651993e-06, "loss": 0.5535, "step": 21743 }, { "epoch": 0.62, "grad_norm": 3.080589740572288, "learning_rate": 3.291182507797957e-06, "loss": 0.1784, "step": 21744 }, { "epoch": 0.62, "grad_norm": 3.1686486244914622, "learning_rate": 3.290746681431462e-06, "loss": 0.2861, "step": 21745 }, { "epoch": 0.62, "grad_norm": 5.470379308768206, "learning_rate": 3.2903108697694642e-06, "loss": 0.6741, "step": 21746 }, { "epoch": 0.62, "grad_norm": 5.814323754452286, "learning_rate": 3.289875072815709e-06, "loss": 0.6751, "step": 21747 }, { "epoch": 0.62, "grad_norm": 3.4148838094099667, "learning_rate": 3.28943929057395e-06, "loss": 0.2983, "step": 21748 }, { "epoch": 0.62, "grad_norm": 6.42573939718513, "learning_rate": 3.2890035230479344e-06, "loss": 0.7152, "step": 21749 }, { "epoch": 0.62, "grad_norm": 5.83256159400383, "learning_rate": 3.288567770241411e-06, "loss": 0.4219, "step": 21750 }, { "epoch": 0.62, "grad_norm": 5.377112683119171, "learning_rate": 3.2881320321581254e-06, "loss": 0.5892, "step": 21751 }, { "epoch": 0.62, "grad_norm": 16.48721789392728, "learning_rate": 3.2876963088018315e-06, "loss": 0.5125, "step": 21752 }, { "epoch": 0.62, "grad_norm": 4.031381471840972, "learning_rate": 3.2872606001762765e-06, "loss": 0.5739, "step": 21753 }, { "epoch": 0.62, "grad_norm": 2.7985954534020583, "learning_rate": 3.286824906285205e-06, "loss": 0.1743, "step": 21754 }, { "epoch": 0.62, "grad_norm": 3.262997642651851, "learning_rate": 3.28638922713237e-06, "loss": 0.2469, "step": 21755 }, { "epoch": 0.62, "grad_norm": 4.769256758855709, "learning_rate": 3.2859535627215165e-06, "loss": 0.6168, "step": 21756 }, { "epoch": 0.62, "grad_norm": 11.48251028687099, "learning_rate": 3.285517913056394e-06, "loss": 0.8439, "step": 21757 }, { "epoch": 0.62, "grad_norm": 5.969027847294102, "learning_rate": 3.285082278140749e-06, "loss": 0.6399, "step": 21758 }, { "epoch": 0.62, "grad_norm": 5.129845340750938, "learning_rate": 3.284646657978331e-06, "loss": 0.2344, "step": 21759 }, { "epoch": 0.62, "grad_norm": 3.5493038448584295, "learning_rate": 3.2842110525728858e-06, "loss": 0.4393, "step": 21760 }, { "epoch": 0.62, "grad_norm": 8.952372041633097, "learning_rate": 3.283775461928161e-06, "loss": 0.5862, "step": 21761 }, { "epoch": 0.62, "grad_norm": 5.573918912611894, "learning_rate": 3.283339886047906e-06, "loss": 0.665, "step": 21762 }, { "epoch": 0.62, "grad_norm": 7.494399077467695, "learning_rate": 3.282904324935866e-06, "loss": 0.4542, "step": 21763 }, { "epoch": 0.62, "grad_norm": 9.543535063496057, "learning_rate": 3.28246877859579e-06, "loss": 0.4371, "step": 21764 }, { "epoch": 0.62, "grad_norm": 7.515099838212937, "learning_rate": 3.2820332470314208e-06, "loss": 0.5445, "step": 21765 }, { "epoch": 0.62, "grad_norm": 5.295132330662848, "learning_rate": 3.2815977302465117e-06, "loss": 0.43, "step": 21766 }, { "epoch": 0.62, "grad_norm": 4.891223078711397, "learning_rate": 3.281162228244803e-06, "loss": 0.4577, "step": 21767 }, { "epoch": 0.62, "grad_norm": 15.203493778347045, "learning_rate": 3.2807267410300456e-06, "loss": 0.2239, "step": 21768 }, { "epoch": 0.62, "grad_norm": 6.447353229545782, "learning_rate": 3.2802912686059833e-06, "loss": 0.7386, "step": 21769 }, { "epoch": 0.62, "grad_norm": 10.351665314937518, "learning_rate": 3.2798558109763623e-06, "loss": 0.4902, "step": 21770 }, { "epoch": 0.62, "grad_norm": 4.981843598534462, "learning_rate": 3.279420368144932e-06, "loss": 0.3919, "step": 21771 }, { "epoch": 0.62, "grad_norm": 4.457506213010008, "learning_rate": 3.278984940115436e-06, "loss": 0.2934, "step": 21772 }, { "epoch": 0.62, "grad_norm": 12.88870113928476, "learning_rate": 3.278549526891621e-06, "loss": 0.7349, "step": 21773 }, { "epoch": 0.62, "grad_norm": 4.688796538018025, "learning_rate": 3.278114128477231e-06, "loss": 0.5007, "step": 21774 }, { "epoch": 0.62, "grad_norm": 10.848404075222728, "learning_rate": 3.277678744876015e-06, "loss": 0.4562, "step": 21775 }, { "epoch": 0.62, "grad_norm": 2.0870019075776116, "learning_rate": 3.2772433760917154e-06, "loss": 0.1932, "step": 21776 }, { "epoch": 0.62, "grad_norm": 3.9194357836241855, "learning_rate": 3.2768080221280784e-06, "loss": 0.5026, "step": 21777 }, { "epoch": 0.62, "grad_norm": 6.82059089293473, "learning_rate": 3.276372682988852e-06, "loss": 0.2752, "step": 21778 }, { "epoch": 0.62, "grad_norm": 3.2829346827883232, "learning_rate": 3.2759373586777776e-06, "loss": 0.385, "step": 21779 }, { "epoch": 0.62, "grad_norm": 8.29293292806456, "learning_rate": 3.2755020491986033e-06, "loss": 0.5064, "step": 21780 }, { "epoch": 0.62, "grad_norm": 4.272602174004829, "learning_rate": 3.2750667545550697e-06, "loss": 0.3526, "step": 21781 }, { "epoch": 0.62, "grad_norm": 8.514718271680735, "learning_rate": 3.2746314747509273e-06, "loss": 0.5622, "step": 21782 }, { "epoch": 0.62, "grad_norm": 6.130214184477636, "learning_rate": 3.2741962097899167e-06, "loss": 0.4276, "step": 21783 }, { "epoch": 0.62, "grad_norm": 9.334448838556506, "learning_rate": 3.2737609596757847e-06, "loss": 0.6556, "step": 21784 }, { "epoch": 0.62, "grad_norm": 7.913614454324234, "learning_rate": 3.273325724412272e-06, "loss": 0.8291, "step": 21785 }, { "epoch": 0.62, "grad_norm": 4.042231189328002, "learning_rate": 3.2728905040031267e-06, "loss": 0.4402, "step": 21786 }, { "epoch": 0.62, "grad_norm": 6.688060915231496, "learning_rate": 3.272455298452093e-06, "loss": 0.3733, "step": 21787 }, { "epoch": 0.62, "grad_norm": 7.353653418882447, "learning_rate": 3.272020107762912e-06, "loss": 0.3416, "step": 21788 }, { "epoch": 0.62, "grad_norm": 3.0263073531541713, "learning_rate": 3.271584931939331e-06, "loss": 0.2183, "step": 21789 }, { "epoch": 0.62, "grad_norm": 2.9219966189934756, "learning_rate": 3.2711497709850902e-06, "loss": 0.2355, "step": 21790 }, { "epoch": 0.62, "grad_norm": 4.48157344592541, "learning_rate": 3.2707146249039374e-06, "loss": 0.5397, "step": 21791 }, { "epoch": 0.62, "grad_norm": 5.187690432626547, "learning_rate": 3.2702794936996106e-06, "loss": 0.4654, "step": 21792 }, { "epoch": 0.62, "grad_norm": 5.092318608719768, "learning_rate": 3.2698443773758594e-06, "loss": 0.2754, "step": 21793 }, { "epoch": 0.62, "grad_norm": 6.01204719662494, "learning_rate": 3.2694092759364227e-06, "loss": 0.7079, "step": 21794 }, { "epoch": 0.62, "grad_norm": 7.07661189396643, "learning_rate": 3.2689741893850436e-06, "loss": 0.678, "step": 21795 }, { "epoch": 0.62, "grad_norm": 11.785043201909529, "learning_rate": 3.2685391177254694e-06, "loss": 0.9445, "step": 21796 }, { "epoch": 0.62, "grad_norm": 5.325427068679225, "learning_rate": 3.2681040609614366e-06, "loss": 0.3905, "step": 21797 }, { "epoch": 0.62, "grad_norm": 8.121222013824843, "learning_rate": 3.2676690190966943e-06, "loss": 0.4379, "step": 21798 }, { "epoch": 0.62, "grad_norm": 2.2165508251347927, "learning_rate": 3.2672339921349805e-06, "loss": 0.1247, "step": 21799 }, { "epoch": 0.62, "grad_norm": 2.833098637920859, "learning_rate": 3.2667989800800403e-06, "loss": 0.251, "step": 21800 }, { "epoch": 0.62, "grad_norm": 9.968417075406597, "learning_rate": 3.2663639829356137e-06, "loss": 0.8497, "step": 21801 }, { "epoch": 0.62, "grad_norm": 7.353300207460596, "learning_rate": 3.2659290007054443e-06, "loss": 0.741, "step": 21802 }, { "epoch": 0.62, "grad_norm": 10.53466791217048, "learning_rate": 3.265494033393276e-06, "loss": 0.5276, "step": 21803 }, { "epoch": 0.62, "grad_norm": 3.1491501335328507, "learning_rate": 3.265059081002847e-06, "loss": 0.7134, "step": 21804 }, { "epoch": 0.62, "grad_norm": 5.2291301584172505, "learning_rate": 3.264624143537903e-06, "loss": 0.4971, "step": 21805 }, { "epoch": 0.62, "grad_norm": 4.363002778458743, "learning_rate": 3.2641892210021826e-06, "loss": 0.1737, "step": 21806 }, { "epoch": 0.62, "grad_norm": 4.205557208093788, "learning_rate": 3.26375431339943e-06, "loss": 0.4036, "step": 21807 }, { "epoch": 0.62, "grad_norm": 4.458932161965368, "learning_rate": 3.263319420733383e-06, "loss": 0.6206, "step": 21808 }, { "epoch": 0.62, "grad_norm": 4.436660740965667, "learning_rate": 3.2628845430077873e-06, "loss": 0.4777, "step": 21809 }, { "epoch": 0.62, "grad_norm": 3.4563603154214317, "learning_rate": 3.2624496802263807e-06, "loss": 0.3622, "step": 21810 }, { "epoch": 0.62, "grad_norm": 7.809811243860755, "learning_rate": 3.2620148323929046e-06, "loss": 0.4044, "step": 21811 }, { "epoch": 0.62, "grad_norm": 6.920851035404551, "learning_rate": 3.261579999511103e-06, "loss": 0.5503, "step": 21812 }, { "epoch": 0.62, "grad_norm": 9.159298125783572, "learning_rate": 3.261145181584714e-06, "loss": 0.5024, "step": 21813 }, { "epoch": 0.62, "grad_norm": 19.04617060422012, "learning_rate": 3.260710378617479e-06, "loss": 0.2693, "step": 21814 }, { "epoch": 0.62, "grad_norm": 3.1772646940820377, "learning_rate": 3.260275590613137e-06, "loss": 0.252, "step": 21815 }, { "epoch": 0.62, "grad_norm": 10.836368917995745, "learning_rate": 3.259840817575432e-06, "loss": 0.8754, "step": 21816 }, { "epoch": 0.62, "grad_norm": 7.470530596966157, "learning_rate": 3.2594060595081013e-06, "loss": 0.6433, "step": 21817 }, { "epoch": 0.62, "grad_norm": 3.450542088089264, "learning_rate": 3.258971316414885e-06, "loss": 0.3548, "step": 21818 }, { "epoch": 0.62, "grad_norm": 5.695794083860721, "learning_rate": 3.2585365882995256e-06, "loss": 0.5136, "step": 21819 }, { "epoch": 0.62, "grad_norm": 3.336534183214473, "learning_rate": 3.2581018751657613e-06, "loss": 0.2451, "step": 21820 }, { "epoch": 0.62, "grad_norm": 8.330896199205318, "learning_rate": 3.2576671770173324e-06, "loss": 0.9667, "step": 21821 }, { "epoch": 0.62, "grad_norm": 9.316029539029506, "learning_rate": 3.2572324938579766e-06, "loss": 1.1096, "step": 21822 }, { "epoch": 0.62, "grad_norm": 5.089628131600073, "learning_rate": 3.2567978256914367e-06, "loss": 0.4819, "step": 21823 }, { "epoch": 0.62, "grad_norm": 6.127290161318708, "learning_rate": 3.2563631725214497e-06, "loss": 0.3781, "step": 21824 }, { "epoch": 0.63, "grad_norm": 3.675962859156024, "learning_rate": 3.2559285343517567e-06, "loss": 0.2396, "step": 21825 }, { "epoch": 0.63, "grad_norm": 9.077485421478155, "learning_rate": 3.255493911186095e-06, "loss": 0.7678, "step": 21826 }, { "epoch": 0.63, "grad_norm": 4.215015764942304, "learning_rate": 3.255059303028203e-06, "loss": 0.5573, "step": 21827 }, { "epoch": 0.63, "grad_norm": 7.2252857296948765, "learning_rate": 3.254624709881823e-06, "loss": 0.5784, "step": 21828 }, { "epoch": 0.63, "grad_norm": 6.0288966830460575, "learning_rate": 3.2541901317506906e-06, "loss": 0.3393, "step": 21829 }, { "epoch": 0.63, "grad_norm": 4.978799793129457, "learning_rate": 3.253755568638547e-06, "loss": 0.6695, "step": 21830 }, { "epoch": 0.63, "grad_norm": 2.4878166279963985, "learning_rate": 3.253321020549127e-06, "loss": 0.2204, "step": 21831 }, { "epoch": 0.63, "grad_norm": 6.632300475074603, "learning_rate": 3.2528864874861732e-06, "loss": 0.8346, "step": 21832 }, { "epoch": 0.63, "grad_norm": 6.811819655017423, "learning_rate": 3.2524519694534205e-06, "loss": 0.5133, "step": 21833 }, { "epoch": 0.63, "grad_norm": 6.193021256797032, "learning_rate": 3.25201746645461e-06, "loss": 0.8039, "step": 21834 }, { "epoch": 0.63, "grad_norm": 3.8110540806204543, "learning_rate": 3.251582978493476e-06, "loss": 0.3471, "step": 21835 }, { "epoch": 0.63, "grad_norm": 4.901806586878994, "learning_rate": 3.251148505573759e-06, "loss": 0.2103, "step": 21836 }, { "epoch": 0.63, "grad_norm": 3.5213188856088, "learning_rate": 3.250714047699197e-06, "loss": 0.4071, "step": 21837 }, { "epoch": 0.63, "grad_norm": 6.126298611052472, "learning_rate": 3.2502796048735253e-06, "loss": 0.8025, "step": 21838 }, { "epoch": 0.63, "grad_norm": 8.039479473781283, "learning_rate": 3.249845177100484e-06, "loss": 0.5716, "step": 21839 }, { "epoch": 0.63, "grad_norm": 5.3829053807100316, "learning_rate": 3.2494107643838087e-06, "loss": 0.3429, "step": 21840 }, { "epoch": 0.63, "grad_norm": 4.439155941241234, "learning_rate": 3.2489763667272377e-06, "loss": 0.2918, "step": 21841 }, { "epoch": 0.63, "grad_norm": 3.0540144391046424, "learning_rate": 3.2485419841345056e-06, "loss": 0.1433, "step": 21842 }, { "epoch": 0.63, "grad_norm": 5.665346870812588, "learning_rate": 3.2481076166093517e-06, "loss": 0.8204, "step": 21843 }, { "epoch": 0.63, "grad_norm": 6.2953687811613, "learning_rate": 3.2476732641555142e-06, "loss": 0.6639, "step": 21844 }, { "epoch": 0.63, "grad_norm": 5.293927388107222, "learning_rate": 3.2472389267767245e-06, "loss": 0.472, "step": 21845 }, { "epoch": 0.63, "grad_norm": 3.4785468107064657, "learning_rate": 3.246804604476726e-06, "loss": 0.3585, "step": 21846 }, { "epoch": 0.63, "grad_norm": 4.913081715626913, "learning_rate": 3.2463702972592498e-06, "loss": 0.3425, "step": 21847 }, { "epoch": 0.63, "grad_norm": 3.5144738071533106, "learning_rate": 3.245936005128035e-06, "loss": 0.3274, "step": 21848 }, { "epoch": 0.63, "grad_norm": 9.73079761391963, "learning_rate": 3.245501728086815e-06, "loss": 0.5448, "step": 21849 }, { "epoch": 0.63, "grad_norm": 7.429419254951953, "learning_rate": 3.24506746613933e-06, "loss": 0.7936, "step": 21850 }, { "epoch": 0.63, "grad_norm": 6.429649454887252, "learning_rate": 3.2446332192893114e-06, "loss": 0.5129, "step": 21851 }, { "epoch": 0.63, "grad_norm": 7.272538634584785, "learning_rate": 3.2441989875404973e-06, "loss": 0.6067, "step": 21852 }, { "epoch": 0.63, "grad_norm": 5.724050639700927, "learning_rate": 3.2437647708966247e-06, "loss": 0.4164, "step": 21853 }, { "epoch": 0.63, "grad_norm": 5.373323356478747, "learning_rate": 3.2433305693614257e-06, "loss": 0.5217, "step": 21854 }, { "epoch": 0.63, "grad_norm": 8.515843631842557, "learning_rate": 3.2428963829386397e-06, "loss": 0.3962, "step": 21855 }, { "epoch": 0.63, "grad_norm": 4.634672515229667, "learning_rate": 3.242462211631998e-06, "loss": 0.3985, "step": 21856 }, { "epoch": 0.63, "grad_norm": 4.033296006728813, "learning_rate": 3.2420280554452393e-06, "loss": 0.2443, "step": 21857 }, { "epoch": 0.63, "grad_norm": 4.3172495022607285, "learning_rate": 3.241593914382094e-06, "loss": 0.2703, "step": 21858 }, { "epoch": 0.63, "grad_norm": 4.824119746006974, "learning_rate": 3.2411597884463015e-06, "loss": 0.2312, "step": 21859 }, { "epoch": 0.63, "grad_norm": 4.8028934758017146, "learning_rate": 3.2407256776415953e-06, "loss": 0.2114, "step": 21860 }, { "epoch": 0.63, "grad_norm": 5.1682034739779255, "learning_rate": 3.2402915819717073e-06, "loss": 0.5726, "step": 21861 }, { "epoch": 0.63, "grad_norm": 5.24213419629895, "learning_rate": 3.2398575014403767e-06, "loss": 0.2637, "step": 21862 }, { "epoch": 0.63, "grad_norm": 6.9357159013060175, "learning_rate": 3.239423436051334e-06, "loss": 0.5367, "step": 21863 }, { "epoch": 0.63, "grad_norm": 5.414820894570934, "learning_rate": 3.2389893858083157e-06, "loss": 0.5013, "step": 21864 }, { "epoch": 0.63, "grad_norm": 4.618793188984924, "learning_rate": 3.238555350715053e-06, "loss": 0.6522, "step": 21865 }, { "epoch": 0.63, "grad_norm": 4.70556845827722, "learning_rate": 3.2381213307752834e-06, "loss": 0.4845, "step": 21866 }, { "epoch": 0.63, "grad_norm": 3.6344241474334895, "learning_rate": 3.2376873259927387e-06, "loss": 0.2825, "step": 21867 }, { "epoch": 0.63, "grad_norm": 7.718957689711206, "learning_rate": 3.2372533363711512e-06, "loss": 0.4453, "step": 21868 }, { "epoch": 0.63, "grad_norm": 9.630110486229622, "learning_rate": 3.2368193619142585e-06, "loss": 0.4296, "step": 21869 }, { "epoch": 0.63, "grad_norm": 4.727289014734873, "learning_rate": 3.236385402625791e-06, "loss": 0.4406, "step": 21870 }, { "epoch": 0.63, "grad_norm": 6.951295213283055, "learning_rate": 3.2359514585094843e-06, "loss": 0.8082, "step": 21871 }, { "epoch": 0.63, "grad_norm": 3.0090716810261684, "learning_rate": 3.2355175295690665e-06, "loss": 0.3429, "step": 21872 }, { "epoch": 0.63, "grad_norm": 3.561237780843588, "learning_rate": 3.235083615808277e-06, "loss": 0.2794, "step": 21873 }, { "epoch": 0.63, "grad_norm": 5.591333389393813, "learning_rate": 3.234649717230845e-06, "loss": 0.8187, "step": 21874 }, { "epoch": 0.63, "grad_norm": 3.918466948974988, "learning_rate": 3.234215833840505e-06, "loss": 0.2481, "step": 21875 }, { "epoch": 0.63, "grad_norm": 6.988822869112165, "learning_rate": 3.2337819656409862e-06, "loss": 0.4552, "step": 21876 }, { "epoch": 0.63, "grad_norm": 93.25729373807732, "learning_rate": 3.2333481126360257e-06, "loss": 0.4358, "step": 21877 }, { "epoch": 0.63, "grad_norm": 2.523061198707575, "learning_rate": 3.232914274829354e-06, "loss": 0.4285, "step": 21878 }, { "epoch": 0.63, "grad_norm": 4.8928333208301735, "learning_rate": 3.2324804522247015e-06, "loss": 0.4191, "step": 21879 }, { "epoch": 0.63, "grad_norm": 3.313303903881397, "learning_rate": 3.232046644825805e-06, "loss": 0.3568, "step": 21880 }, { "epoch": 0.63, "grad_norm": 4.160708996940727, "learning_rate": 3.231612852636391e-06, "loss": 0.5483, "step": 21881 }, { "epoch": 0.63, "grad_norm": 3.087738902003701, "learning_rate": 3.2311790756601958e-06, "loss": 0.3056, "step": 21882 }, { "epoch": 0.63, "grad_norm": 9.565732721160243, "learning_rate": 3.230745313900948e-06, "loss": 0.4677, "step": 21883 }, { "epoch": 0.63, "grad_norm": 4.88304367128554, "learning_rate": 3.2303115673623793e-06, "loss": 0.4863, "step": 21884 }, { "epoch": 0.63, "grad_norm": 2.826661168406495, "learning_rate": 3.229877836048224e-06, "loss": 0.2532, "step": 21885 }, { "epoch": 0.63, "grad_norm": 2.4484031450871506, "learning_rate": 3.2294441199622116e-06, "loss": 0.3973, "step": 21886 }, { "epoch": 0.63, "grad_norm": 7.088164193115913, "learning_rate": 3.229010419108074e-06, "loss": 0.5786, "step": 21887 }, { "epoch": 0.63, "grad_norm": 4.560197745770438, "learning_rate": 3.2285767334895394e-06, "loss": 0.6761, "step": 21888 }, { "epoch": 0.63, "grad_norm": 7.482522117921084, "learning_rate": 3.228143063110344e-06, "loss": 0.1924, "step": 21889 }, { "epoch": 0.63, "grad_norm": 4.683961117346067, "learning_rate": 3.227709407974213e-06, "loss": 0.6132, "step": 21890 }, { "epoch": 0.63, "grad_norm": 5.497121426088366, "learning_rate": 3.227275768084882e-06, "loss": 0.4977, "step": 21891 }, { "epoch": 0.63, "grad_norm": 4.986811386220914, "learning_rate": 3.2268421434460775e-06, "loss": 0.1576, "step": 21892 }, { "epoch": 0.63, "grad_norm": 7.032923622160302, "learning_rate": 3.2264085340615327e-06, "loss": 0.488, "step": 21893 }, { "epoch": 0.63, "grad_norm": 4.707924861687567, "learning_rate": 3.225974939934977e-06, "loss": 0.6279, "step": 21894 }, { "epoch": 0.63, "grad_norm": 5.602951452202538, "learning_rate": 3.2255413610701393e-06, "loss": 0.4481, "step": 21895 }, { "epoch": 0.63, "grad_norm": 3.667298277005411, "learning_rate": 3.225107797470752e-06, "loss": 0.4737, "step": 21896 }, { "epoch": 0.63, "grad_norm": 3.57035774185598, "learning_rate": 3.2246742491405434e-06, "loss": 0.224, "step": 21897 }, { "epoch": 0.63, "grad_norm": 5.322678296250543, "learning_rate": 3.2242407160832446e-06, "loss": 0.436, "step": 21898 }, { "epoch": 0.63, "grad_norm": 2.832860238650881, "learning_rate": 3.223807198302582e-06, "loss": 0.3873, "step": 21899 }, { "epoch": 0.63, "grad_norm": 7.750101396451109, "learning_rate": 3.2233736958022888e-06, "loss": 0.3433, "step": 21900 }, { "epoch": 0.63, "grad_norm": 5.70327348907721, "learning_rate": 3.222940208586094e-06, "loss": 0.3439, "step": 21901 }, { "epoch": 0.63, "grad_norm": 4.451102590586615, "learning_rate": 3.2225067366577236e-06, "loss": 0.4231, "step": 21902 }, { "epoch": 0.63, "grad_norm": 4.76500182533029, "learning_rate": 3.2220732800209104e-06, "loss": 0.5238, "step": 21903 }, { "epoch": 0.63, "grad_norm": 6.874655714951254, "learning_rate": 3.221639838679381e-06, "loss": 0.5367, "step": 21904 }, { "epoch": 0.63, "grad_norm": 15.397336381456949, "learning_rate": 3.221206412636867e-06, "loss": 0.9039, "step": 21905 }, { "epoch": 0.63, "grad_norm": 3.880182584468056, "learning_rate": 3.2207730018970917e-06, "loss": 0.4846, "step": 21906 }, { "epoch": 0.63, "grad_norm": 9.646248783093204, "learning_rate": 3.2203396064637906e-06, "loss": 0.7116, "step": 21907 }, { "epoch": 0.63, "grad_norm": 5.269927126617676, "learning_rate": 3.219906226340687e-06, "loss": 0.4929, "step": 21908 }, { "epoch": 0.63, "grad_norm": 5.862231115425703, "learning_rate": 3.2194728615315106e-06, "loss": 0.3414, "step": 21909 }, { "epoch": 0.63, "grad_norm": 5.367013364851637, "learning_rate": 3.2190395120399916e-06, "loss": 0.354, "step": 21910 }, { "epoch": 0.63, "grad_norm": 3.9001859516660145, "learning_rate": 3.2186061778698552e-06, "loss": 0.2225, "step": 21911 }, { "epoch": 0.63, "grad_norm": 4.89520799687844, "learning_rate": 3.218172859024832e-06, "loss": 0.5577, "step": 21912 }, { "epoch": 0.63, "grad_norm": 3.9402040551687687, "learning_rate": 3.2177395555086464e-06, "loss": 0.4541, "step": 21913 }, { "epoch": 0.63, "grad_norm": 4.142949110628754, "learning_rate": 3.21730626732503e-06, "loss": 0.2722, "step": 21914 }, { "epoch": 0.63, "grad_norm": 4.397021673883493, "learning_rate": 3.2168729944777056e-06, "loss": 0.3397, "step": 21915 }, { "epoch": 0.63, "grad_norm": 3.4371459084984117, "learning_rate": 3.216439736970406e-06, "loss": 0.0685, "step": 21916 }, { "epoch": 0.63, "grad_norm": 3.072576983583859, "learning_rate": 3.2160064948068538e-06, "loss": 0.1989, "step": 21917 }, { "epoch": 0.63, "grad_norm": 5.267215205651279, "learning_rate": 3.215573267990778e-06, "loss": 0.4281, "step": 21918 }, { "epoch": 0.63, "grad_norm": 2.811350311959245, "learning_rate": 3.215140056525907e-06, "loss": 0.5942, "step": 21919 }, { "epoch": 0.63, "grad_norm": 6.5791263135365465, "learning_rate": 3.2147068604159654e-06, "loss": 0.3903, "step": 21920 }, { "epoch": 0.63, "grad_norm": 3.750435056881118, "learning_rate": 3.2142736796646822e-06, "loss": 0.2975, "step": 21921 }, { "epoch": 0.63, "grad_norm": 6.246039699871675, "learning_rate": 3.2138405142757802e-06, "loss": 0.3576, "step": 21922 }, { "epoch": 0.63, "grad_norm": 5.489024090978649, "learning_rate": 3.2134073642529906e-06, "loss": 0.397, "step": 21923 }, { "epoch": 0.63, "grad_norm": 6.08302854619249, "learning_rate": 3.2129742296000364e-06, "loss": 0.2634, "step": 21924 }, { "epoch": 0.63, "grad_norm": 5.172704739581725, "learning_rate": 3.2125411103206443e-06, "loss": 0.5713, "step": 21925 }, { "epoch": 0.63, "grad_norm": 4.728026385643556, "learning_rate": 3.212108006418543e-06, "loss": 0.2934, "step": 21926 }, { "epoch": 0.63, "grad_norm": 4.11320720145873, "learning_rate": 3.2116749178974548e-06, "loss": 0.521, "step": 21927 }, { "epoch": 0.63, "grad_norm": 6.10337257644868, "learning_rate": 3.2112418447611083e-06, "loss": 0.862, "step": 21928 }, { "epoch": 0.63, "grad_norm": 7.906885702035325, "learning_rate": 3.210808787013226e-06, "loss": 0.5192, "step": 21929 }, { "epoch": 0.63, "grad_norm": 8.558150920267314, "learning_rate": 3.210375744657537e-06, "loss": 0.5264, "step": 21930 }, { "epoch": 0.63, "grad_norm": 7.966511797264221, "learning_rate": 3.209942717697765e-06, "loss": 0.4022, "step": 21931 }, { "epoch": 0.63, "grad_norm": 3.9991736154455495, "learning_rate": 3.209509706137636e-06, "loss": 0.3558, "step": 21932 }, { "epoch": 0.63, "grad_norm": 6.203734437614381, "learning_rate": 3.2090767099808722e-06, "loss": 0.6587, "step": 21933 }, { "epoch": 0.63, "grad_norm": 5.562783716231355, "learning_rate": 3.208643729231202e-06, "loss": 0.618, "step": 21934 }, { "epoch": 0.63, "grad_norm": 7.27196739410208, "learning_rate": 3.208210763892351e-06, "loss": 0.6352, "step": 21935 }, { "epoch": 0.63, "grad_norm": 4.641901245330621, "learning_rate": 3.207777813968039e-06, "loss": 0.4707, "step": 21936 }, { "epoch": 0.63, "grad_norm": 8.314081744232876, "learning_rate": 3.2073448794619972e-06, "loss": 0.6882, "step": 21937 }, { "epoch": 0.63, "grad_norm": 6.89140277374218, "learning_rate": 3.2069119603779448e-06, "loss": 0.3222, "step": 21938 }, { "epoch": 0.63, "grad_norm": 4.547195423280023, "learning_rate": 3.206479056719609e-06, "loss": 0.3018, "step": 21939 }, { "epoch": 0.63, "grad_norm": 7.796204685301508, "learning_rate": 3.2060461684907117e-06, "loss": 0.3125, "step": 21940 }, { "epoch": 0.63, "grad_norm": 5.587438421742136, "learning_rate": 3.2056132956949803e-06, "loss": 0.5201, "step": 21941 }, { "epoch": 0.63, "grad_norm": 6.05252784851802, "learning_rate": 3.205180438336135e-06, "loss": 0.7036, "step": 21942 }, { "epoch": 0.63, "grad_norm": 4.995434775002932, "learning_rate": 3.2047475964179013e-06, "loss": 0.6691, "step": 21943 }, { "epoch": 0.63, "grad_norm": 3.6434374248250405, "learning_rate": 3.2043147699440047e-06, "loss": 0.5089, "step": 21944 }, { "epoch": 0.63, "grad_norm": 7.260382190186472, "learning_rate": 3.2038819589181646e-06, "loss": 0.5349, "step": 21945 }, { "epoch": 0.63, "grad_norm": 5.776920904092518, "learning_rate": 3.203449163344109e-06, "loss": 0.3185, "step": 21946 }, { "epoch": 0.63, "grad_norm": 7.7738980717533845, "learning_rate": 3.2030163832255584e-06, "loss": 0.675, "step": 21947 }, { "epoch": 0.63, "grad_norm": 8.108579651742865, "learning_rate": 3.2025836185662374e-06, "loss": 0.7919, "step": 21948 }, { "epoch": 0.63, "grad_norm": 5.48526054688489, "learning_rate": 3.2021508693698662e-06, "loss": 0.6614, "step": 21949 }, { "epoch": 0.63, "grad_norm": 7.291020824349791, "learning_rate": 3.2017181356401706e-06, "loss": 0.4609, "step": 21950 }, { "epoch": 0.63, "grad_norm": 6.260650914951524, "learning_rate": 3.201285417380874e-06, "loss": 0.1868, "step": 21951 }, { "epoch": 0.63, "grad_norm": 3.6573662806307605, "learning_rate": 3.200852714595695e-06, "loss": 0.3674, "step": 21952 }, { "epoch": 0.63, "grad_norm": 2.686331872131743, "learning_rate": 3.2004200272883603e-06, "loss": 0.1861, "step": 21953 }, { "epoch": 0.63, "grad_norm": 5.983971803792526, "learning_rate": 3.1999873554625895e-06, "loss": 0.4751, "step": 21954 }, { "epoch": 0.63, "grad_norm": 13.207335862644067, "learning_rate": 3.1995546991221073e-06, "loss": 0.5631, "step": 21955 }, { "epoch": 0.63, "grad_norm": 5.058922080206582, "learning_rate": 3.1991220582706323e-06, "loss": 0.4349, "step": 21956 }, { "epoch": 0.63, "grad_norm": 4.564467071394372, "learning_rate": 3.1986894329118904e-06, "loss": 0.5768, "step": 21957 }, { "epoch": 0.63, "grad_norm": 3.8071655725697626, "learning_rate": 3.1982568230496003e-06, "loss": 0.5437, "step": 21958 }, { "epoch": 0.63, "grad_norm": 6.481964766432812, "learning_rate": 3.1978242286874845e-06, "loss": 0.7737, "step": 21959 }, { "epoch": 0.63, "grad_norm": 5.823717306798187, "learning_rate": 3.1973916498292666e-06, "loss": 0.565, "step": 21960 }, { "epoch": 0.63, "grad_norm": 4.970816104706642, "learning_rate": 3.196959086478666e-06, "loss": 0.5995, "step": 21961 }, { "epoch": 0.63, "grad_norm": 5.244419219787913, "learning_rate": 3.1965265386394046e-06, "loss": 0.5979, "step": 21962 }, { "epoch": 0.63, "grad_norm": 3.9259616791365377, "learning_rate": 3.196094006315201e-06, "loss": 0.4352, "step": 21963 }, { "epoch": 0.63, "grad_norm": 8.430296245722888, "learning_rate": 3.1956614895097814e-06, "loss": 1.0438, "step": 21964 }, { "epoch": 0.63, "grad_norm": 7.567170206475922, "learning_rate": 3.195228988226863e-06, "loss": 0.5392, "step": 21965 }, { "epoch": 0.63, "grad_norm": 4.4262462213473714, "learning_rate": 3.1947965024701657e-06, "loss": 0.3586, "step": 21966 }, { "epoch": 0.63, "grad_norm": 3.5196999224721544, "learning_rate": 3.1943640322434142e-06, "loss": 0.6308, "step": 21967 }, { "epoch": 0.63, "grad_norm": 4.5029883793497305, "learning_rate": 3.1939315775503255e-06, "loss": 0.5818, "step": 21968 }, { "epoch": 0.63, "grad_norm": 3.0131887962235817, "learning_rate": 3.1934991383946225e-06, "loss": 0.1502, "step": 21969 }, { "epoch": 0.63, "grad_norm": 3.6669809394592803, "learning_rate": 3.193066714780021e-06, "loss": 0.3, "step": 21970 }, { "epoch": 0.63, "grad_norm": 4.9812014052103954, "learning_rate": 3.1926343067102476e-06, "loss": 0.2367, "step": 21971 }, { "epoch": 0.63, "grad_norm": 8.569982892690383, "learning_rate": 3.192201914189016e-06, "loss": 0.8061, "step": 21972 }, { "epoch": 0.63, "grad_norm": 6.061103699331222, "learning_rate": 3.1917695372200507e-06, "loss": 0.5252, "step": 21973 }, { "epoch": 0.63, "grad_norm": 6.339413367968332, "learning_rate": 3.1913371758070684e-06, "loss": 0.4681, "step": 21974 }, { "epoch": 0.63, "grad_norm": 5.016481795764714, "learning_rate": 3.190904829953788e-06, "loss": 0.3459, "step": 21975 }, { "epoch": 0.63, "grad_norm": 4.601638974653678, "learning_rate": 3.1904724996639337e-06, "loss": 0.1778, "step": 21976 }, { "epoch": 0.63, "grad_norm": 7.720653530881223, "learning_rate": 3.19004018494122e-06, "loss": 0.61, "step": 21977 }, { "epoch": 0.63, "grad_norm": 4.046306260842951, "learning_rate": 3.1896078857893687e-06, "loss": 0.4913, "step": 21978 }, { "epoch": 0.63, "grad_norm": 9.35758004784392, "learning_rate": 3.1891756022120963e-06, "loss": 0.9088, "step": 21979 }, { "epoch": 0.63, "grad_norm": 4.096732109016325, "learning_rate": 3.188743334213125e-06, "loss": 0.4013, "step": 21980 }, { "epoch": 0.63, "grad_norm": 2.221515570646741, "learning_rate": 3.1883110817961706e-06, "loss": 0.2506, "step": 21981 }, { "epoch": 0.63, "grad_norm": 5.4846521725945125, "learning_rate": 3.1878788449649547e-06, "loss": 0.6594, "step": 21982 }, { "epoch": 0.63, "grad_norm": 3.2692074450139823, "learning_rate": 3.1874466237231912e-06, "loss": 0.3369, "step": 21983 }, { "epoch": 0.63, "grad_norm": 4.532394001684444, "learning_rate": 3.187014418074602e-06, "loss": 0.358, "step": 21984 }, { "epoch": 0.63, "grad_norm": 6.34601943289411, "learning_rate": 3.1865822280229063e-06, "loss": 0.5748, "step": 21985 }, { "epoch": 0.63, "grad_norm": 5.264814274800635, "learning_rate": 3.186150053571818e-06, "loss": 0.5469, "step": 21986 }, { "epoch": 0.63, "grad_norm": 6.100257059840714, "learning_rate": 3.1857178947250595e-06, "loss": 0.6552, "step": 21987 }, { "epoch": 0.63, "grad_norm": 13.430403711950241, "learning_rate": 3.1852857514863455e-06, "loss": 0.5563, "step": 21988 }, { "epoch": 0.63, "grad_norm": 6.711407006640937, "learning_rate": 3.1848536238593956e-06, "loss": 0.1996, "step": 21989 }, { "epoch": 0.63, "grad_norm": 5.410821999031219, "learning_rate": 3.184421511847925e-06, "loss": 0.9331, "step": 21990 }, { "epoch": 0.63, "grad_norm": 4.711114250650399, "learning_rate": 3.1839894154556535e-06, "loss": 0.3046, "step": 21991 }, { "epoch": 0.63, "grad_norm": 4.816359421501619, "learning_rate": 3.183557334686298e-06, "loss": 0.5877, "step": 21992 }, { "epoch": 0.63, "grad_norm": 5.815802097647535, "learning_rate": 3.1831252695435734e-06, "loss": 0.3237, "step": 21993 }, { "epoch": 0.63, "grad_norm": 4.309586522123521, "learning_rate": 3.1826932200312005e-06, "loss": 0.511, "step": 21994 }, { "epoch": 0.63, "grad_norm": 3.4681507658366533, "learning_rate": 3.182261186152893e-06, "loss": 0.2371, "step": 21995 }, { "epoch": 0.63, "grad_norm": 8.131424813880049, "learning_rate": 3.18182916791237e-06, "loss": 0.3336, "step": 21996 }, { "epoch": 0.63, "grad_norm": 2.718968766278941, "learning_rate": 3.181397165313344e-06, "loss": 0.3794, "step": 21997 }, { "epoch": 0.63, "grad_norm": 4.889794349207553, "learning_rate": 3.180965178359538e-06, "loss": 0.3265, "step": 21998 }, { "epoch": 0.63, "grad_norm": 6.73537598713764, "learning_rate": 3.1805332070546613e-06, "loss": 0.5757, "step": 21999 }, { "epoch": 0.63, "grad_norm": 5.720167140786209, "learning_rate": 3.1801012514024332e-06, "loss": 0.4457, "step": 22000 }, { "epoch": 0.63, "grad_norm": 5.9658987215290535, "learning_rate": 3.1796693114065737e-06, "loss": 0.565, "step": 22001 }, { "epoch": 0.63, "grad_norm": 4.760111336407748, "learning_rate": 3.1792373870707914e-06, "loss": 0.5142, "step": 22002 }, { "epoch": 0.63, "grad_norm": 9.214742119047969, "learning_rate": 3.178805478398808e-06, "loss": 0.7074, "step": 22003 }, { "epoch": 0.63, "grad_norm": 5.224471878666802, "learning_rate": 3.1783735853943355e-06, "loss": 0.538, "step": 22004 }, { "epoch": 0.63, "grad_norm": 7.381844415721624, "learning_rate": 3.177941708061092e-06, "loss": 0.5641, "step": 22005 }, { "epoch": 0.63, "grad_norm": 5.947788479877297, "learning_rate": 3.177509846402789e-06, "loss": 0.3436, "step": 22006 }, { "epoch": 0.63, "grad_norm": 6.981048133001113, "learning_rate": 3.177078000423146e-06, "loss": 0.4568, "step": 22007 }, { "epoch": 0.63, "grad_norm": 2.8887239545624364, "learning_rate": 3.1766461701258778e-06, "loss": 0.4259, "step": 22008 }, { "epoch": 0.63, "grad_norm": 7.00157025299971, "learning_rate": 3.1762143555146956e-06, "loss": 0.6411, "step": 22009 }, { "epoch": 0.63, "grad_norm": 6.8179718580574376, "learning_rate": 3.1757825565933195e-06, "loss": 0.6946, "step": 22010 }, { "epoch": 0.63, "grad_norm": 10.235358923287116, "learning_rate": 3.1753507733654597e-06, "loss": 0.628, "step": 22011 }, { "epoch": 0.63, "grad_norm": 4.881276320851732, "learning_rate": 3.1749190058348332e-06, "loss": 0.5487, "step": 22012 }, { "epoch": 0.63, "grad_norm": 2.885358518342767, "learning_rate": 3.174487254005153e-06, "loss": 0.2086, "step": 22013 }, { "epoch": 0.63, "grad_norm": 5.239666362380139, "learning_rate": 3.174055517880135e-06, "loss": 0.2717, "step": 22014 }, { "epoch": 0.63, "grad_norm": 7.680909099416934, "learning_rate": 3.173623797463492e-06, "loss": 0.7543, "step": 22015 }, { "epoch": 0.63, "grad_norm": 7.696477672064985, "learning_rate": 3.1731920927589378e-06, "loss": 0.39, "step": 22016 }, { "epoch": 0.63, "grad_norm": 4.060595080450542, "learning_rate": 3.172760403770189e-06, "loss": 0.4728, "step": 22017 }, { "epoch": 0.63, "grad_norm": 8.017966986087952, "learning_rate": 3.1723287305009564e-06, "loss": 0.3827, "step": 22018 }, { "epoch": 0.63, "grad_norm": 4.0004653659957174, "learning_rate": 3.171897072954956e-06, "loss": 0.4168, "step": 22019 }, { "epoch": 0.63, "grad_norm": 9.895599126888586, "learning_rate": 3.1714654311358972e-06, "loss": 0.5705, "step": 22020 }, { "epoch": 0.63, "grad_norm": 6.988980355857787, "learning_rate": 3.1710338050474986e-06, "loss": 0.3221, "step": 22021 }, { "epoch": 0.63, "grad_norm": 5.792405083693212, "learning_rate": 3.170602194693471e-06, "loss": 0.3719, "step": 22022 }, { "epoch": 0.63, "grad_norm": 6.31294807883329, "learning_rate": 3.1701706000775266e-06, "loss": 0.5586, "step": 22023 }, { "epoch": 0.63, "grad_norm": 6.058968449411127, "learning_rate": 3.1697390212033784e-06, "loss": 0.5194, "step": 22024 }, { "epoch": 0.63, "grad_norm": 6.022156060810618, "learning_rate": 3.1693074580747414e-06, "loss": 0.4526, "step": 22025 }, { "epoch": 0.63, "grad_norm": 4.149564916714958, "learning_rate": 3.168875910695327e-06, "loss": 0.2985, "step": 22026 }, { "epoch": 0.63, "grad_norm": 11.658782520234519, "learning_rate": 3.168444379068846e-06, "loss": 0.5717, "step": 22027 }, { "epoch": 0.63, "grad_norm": 3.7009597113600785, "learning_rate": 3.168012863199016e-06, "loss": 0.1603, "step": 22028 }, { "epoch": 0.63, "grad_norm": 4.343901844252258, "learning_rate": 3.1675813630895426e-06, "loss": 0.5863, "step": 22029 }, { "epoch": 0.63, "grad_norm": 4.035478267025553, "learning_rate": 3.167149878744142e-06, "loss": 0.2363, "step": 22030 }, { "epoch": 0.63, "grad_norm": 4.77346557009571, "learning_rate": 3.166718410166526e-06, "loss": 0.2496, "step": 22031 }, { "epoch": 0.63, "grad_norm": 4.750519975008944, "learning_rate": 3.1662869573604037e-06, "loss": 0.3994, "step": 22032 }, { "epoch": 0.63, "grad_norm": 5.37069352202029, "learning_rate": 3.1658555203294912e-06, "loss": 0.5216, "step": 22033 }, { "epoch": 0.63, "grad_norm": 8.85677615738241, "learning_rate": 3.1654240990774964e-06, "loss": 0.7587, "step": 22034 }, { "epoch": 0.63, "grad_norm": 9.964312483541116, "learning_rate": 3.1649926936081343e-06, "loss": 0.1921, "step": 22035 }, { "epoch": 0.63, "grad_norm": 8.2148887934155, "learning_rate": 3.1645613039251113e-06, "loss": 0.577, "step": 22036 }, { "epoch": 0.63, "grad_norm": 4.865798617323683, "learning_rate": 3.164129930032144e-06, "loss": 0.333, "step": 22037 }, { "epoch": 0.63, "grad_norm": 2.739147250686256, "learning_rate": 3.1636985719329393e-06, "loss": 0.3585, "step": 22038 }, { "epoch": 0.63, "grad_norm": 7.208175642754674, "learning_rate": 3.1632672296312105e-06, "loss": 0.277, "step": 22039 }, { "epoch": 0.63, "grad_norm": 4.380316201906721, "learning_rate": 3.162835903130666e-06, "loss": 0.6311, "step": 22040 }, { "epoch": 0.63, "grad_norm": 3.6137582824886576, "learning_rate": 3.162404592435019e-06, "loss": 0.3829, "step": 22041 }, { "epoch": 0.63, "grad_norm": 8.231724784904905, "learning_rate": 3.16197329754798e-06, "loss": 0.4544, "step": 22042 }, { "epoch": 0.63, "grad_norm": 3.1123054152488754, "learning_rate": 3.1615420184732567e-06, "loss": 0.2801, "step": 22043 }, { "epoch": 0.63, "grad_norm": 6.9548198563085455, "learning_rate": 3.1611107552145626e-06, "loss": 0.984, "step": 22044 }, { "epoch": 0.63, "grad_norm": 7.53538152932421, "learning_rate": 3.160679507775606e-06, "loss": 0.5166, "step": 22045 }, { "epoch": 0.63, "grad_norm": 3.8841956608957373, "learning_rate": 3.1602482761600973e-06, "loss": 0.3805, "step": 22046 }, { "epoch": 0.63, "grad_norm": 5.0636003970211645, "learning_rate": 3.1598170603717444e-06, "loss": 0.4849, "step": 22047 }, { "epoch": 0.63, "grad_norm": 3.2605699354877897, "learning_rate": 3.15938586041426e-06, "loss": 0.569, "step": 22048 }, { "epoch": 0.63, "grad_norm": 7.433133486411961, "learning_rate": 3.1589546762913527e-06, "loss": 0.5539, "step": 22049 }, { "epoch": 0.63, "grad_norm": 2.840696539990419, "learning_rate": 3.15852350800673e-06, "loss": 0.4349, "step": 22050 }, { "epoch": 0.63, "grad_norm": 7.704026515738786, "learning_rate": 3.158092355564105e-06, "loss": 0.4548, "step": 22051 }, { "epoch": 0.63, "grad_norm": 8.473834745931802, "learning_rate": 3.157661218967184e-06, "loss": 0.524, "step": 22052 }, { "epoch": 0.63, "grad_norm": 9.903273752093858, "learning_rate": 3.1572300982196773e-06, "loss": 0.8258, "step": 22053 }, { "epoch": 0.63, "grad_norm": 6.1553828504940045, "learning_rate": 3.156798993325291e-06, "loss": 0.2049, "step": 22054 }, { "epoch": 0.63, "grad_norm": 3.909608594896885, "learning_rate": 3.156367904287739e-06, "loss": 0.3075, "step": 22055 }, { "epoch": 0.63, "grad_norm": 8.603188952545343, "learning_rate": 3.1559368311107254e-06, "loss": 0.7142, "step": 22056 }, { "epoch": 0.63, "grad_norm": 3.178820630127784, "learning_rate": 3.155505773797958e-06, "loss": 0.3145, "step": 22057 }, { "epoch": 0.63, "grad_norm": 4.644882561027491, "learning_rate": 3.1550747323531517e-06, "loss": 0.4641, "step": 22058 }, { "epoch": 0.63, "grad_norm": 3.6701486628095603, "learning_rate": 3.1546437067800074e-06, "loss": 0.1954, "step": 22059 }, { "epoch": 0.63, "grad_norm": 8.348916557970595, "learning_rate": 3.154212697082237e-06, "loss": 0.7656, "step": 22060 }, { "epoch": 0.63, "grad_norm": 5.133158958041266, "learning_rate": 3.1537817032635475e-06, "loss": 0.2111, "step": 22061 }, { "epoch": 0.63, "grad_norm": 2.6443566133657397, "learning_rate": 3.153350725327647e-06, "loss": 0.4524, "step": 22062 }, { "epoch": 0.63, "grad_norm": 3.205144654861444, "learning_rate": 3.1529197632782416e-06, "loss": 0.1911, "step": 22063 }, { "epoch": 0.63, "grad_norm": 6.740948082436911, "learning_rate": 3.1524888171190415e-06, "loss": 0.5269, "step": 22064 }, { "epoch": 0.63, "grad_norm": 10.22990224230344, "learning_rate": 3.1520578868537522e-06, "loss": 0.3462, "step": 22065 }, { "epoch": 0.63, "grad_norm": 4.455120188486486, "learning_rate": 3.1516269724860792e-06, "loss": 0.4116, "step": 22066 }, { "epoch": 0.63, "grad_norm": 5.505558629531764, "learning_rate": 3.151196074019735e-06, "loss": 0.5732, "step": 22067 }, { "epoch": 0.63, "grad_norm": 7.203305256671313, "learning_rate": 3.1507651914584214e-06, "loss": 0.3293, "step": 22068 }, { "epoch": 0.63, "grad_norm": 7.774600393446018, "learning_rate": 3.1503343248058495e-06, "loss": 0.5396, "step": 22069 }, { "epoch": 0.63, "grad_norm": 5.98696381272555, "learning_rate": 3.1499034740657202e-06, "loss": 0.5089, "step": 22070 }, { "epoch": 0.63, "grad_norm": 5.533834112392814, "learning_rate": 3.149472639241746e-06, "loss": 0.5234, "step": 22071 }, { "epoch": 0.63, "grad_norm": 1.8758000574277265, "learning_rate": 3.1490418203376306e-06, "loss": 0.0845, "step": 22072 }, { "epoch": 0.63, "grad_norm": 9.08917545997971, "learning_rate": 3.1486110173570785e-06, "loss": 0.6382, "step": 22073 }, { "epoch": 0.63, "grad_norm": 4.834478401602168, "learning_rate": 3.1481802303038e-06, "loss": 0.2945, "step": 22074 }, { "epoch": 0.63, "grad_norm": 5.779323009291356, "learning_rate": 3.1477494591814983e-06, "loss": 0.5744, "step": 22075 }, { "epoch": 0.63, "grad_norm": 5.097165185653697, "learning_rate": 3.1473187039938815e-06, "loss": 0.4749, "step": 22076 }, { "epoch": 0.63, "grad_norm": 6.784633091428212, "learning_rate": 3.146887964744651e-06, "loss": 0.507, "step": 22077 }, { "epoch": 0.63, "grad_norm": 5.275219741872629, "learning_rate": 3.146457241437517e-06, "loss": 0.2999, "step": 22078 }, { "epoch": 0.63, "grad_norm": 6.050794252956695, "learning_rate": 3.146026534076183e-06, "loss": 0.739, "step": 22079 }, { "epoch": 0.63, "grad_norm": 7.920723494708451, "learning_rate": 3.145595842664355e-06, "loss": 0.6211, "step": 22080 }, { "epoch": 0.63, "grad_norm": 5.84602597228058, "learning_rate": 3.1451651672057353e-06, "loss": 0.2544, "step": 22081 }, { "epoch": 0.63, "grad_norm": 4.699461362288457, "learning_rate": 3.1447345077040327e-06, "loss": 0.3773, "step": 22082 }, { "epoch": 0.63, "grad_norm": 5.539117121460785, "learning_rate": 3.144303864162952e-06, "loss": 0.5349, "step": 22083 }, { "epoch": 0.63, "grad_norm": 4.401785299764838, "learning_rate": 3.1438732365861934e-06, "loss": 0.3405, "step": 22084 }, { "epoch": 0.63, "grad_norm": 6.562198087014391, "learning_rate": 3.143442624977468e-06, "loss": 0.3848, "step": 22085 }, { "epoch": 0.63, "grad_norm": 5.242680488066307, "learning_rate": 3.143012029340476e-06, "loss": 0.6897, "step": 22086 }, { "epoch": 0.63, "grad_norm": 7.91376762179695, "learning_rate": 3.1425814496789237e-06, "loss": 0.6095, "step": 22087 }, { "epoch": 0.63, "grad_norm": 11.340522664318728, "learning_rate": 3.1421508859965133e-06, "loss": 0.4243, "step": 22088 }, { "epoch": 0.63, "grad_norm": 7.3947182222115115, "learning_rate": 3.1417203382969496e-06, "loss": 0.395, "step": 22089 }, { "epoch": 0.63, "grad_norm": 13.764499302191819, "learning_rate": 3.141289806583938e-06, "loss": 0.5488, "step": 22090 }, { "epoch": 0.63, "grad_norm": 4.466141479608695, "learning_rate": 3.140859290861181e-06, "loss": 0.3106, "step": 22091 }, { "epoch": 0.63, "grad_norm": 5.038517365059546, "learning_rate": 3.140428791132384e-06, "loss": 0.5034, "step": 22092 }, { "epoch": 0.63, "grad_norm": 3.8318507222962164, "learning_rate": 3.139998307401247e-06, "loss": 0.4086, "step": 22093 }, { "epoch": 0.63, "grad_norm": 5.024414987814238, "learning_rate": 3.139567839671477e-06, "loss": 0.2895, "step": 22094 }, { "epoch": 0.63, "grad_norm": 4.549308728535769, "learning_rate": 3.1391373879467757e-06, "loss": 0.5039, "step": 22095 }, { "epoch": 0.63, "grad_norm": 7.736850165223958, "learning_rate": 3.1387069522308465e-06, "loss": 0.4632, "step": 22096 }, { "epoch": 0.63, "grad_norm": 3.710156553312198, "learning_rate": 3.1382765325273908e-06, "loss": 0.2678, "step": 22097 }, { "epoch": 0.63, "grad_norm": 4.06840287293811, "learning_rate": 3.137846128840113e-06, "loss": 0.4063, "step": 22098 }, { "epoch": 0.63, "grad_norm": 3.84306743422998, "learning_rate": 3.137415741172718e-06, "loss": 0.2467, "step": 22099 }, { "epoch": 0.63, "grad_norm": 3.9006940468641234, "learning_rate": 3.136985369528903e-06, "loss": 0.2116, "step": 22100 }, { "epoch": 0.63, "grad_norm": 5.676138523353765, "learning_rate": 3.1365550139123755e-06, "loss": 0.465, "step": 22101 }, { "epoch": 0.63, "grad_norm": 3.256016261228872, "learning_rate": 3.136124674326835e-06, "loss": 0.401, "step": 22102 }, { "epoch": 0.63, "grad_norm": 3.3720917003670294, "learning_rate": 3.1356943507759863e-06, "loss": 0.2007, "step": 22103 }, { "epoch": 0.63, "grad_norm": 5.775394464207064, "learning_rate": 3.1352640432635263e-06, "loss": 0.3355, "step": 22104 }, { "epoch": 0.63, "grad_norm": 9.902323380849982, "learning_rate": 3.1348337517931627e-06, "loss": 0.7057, "step": 22105 }, { "epoch": 0.63, "grad_norm": 4.5478383777338, "learning_rate": 3.134403476368594e-06, "loss": 0.3279, "step": 22106 }, { "epoch": 0.63, "grad_norm": 2.9977157360196443, "learning_rate": 3.133973216993521e-06, "loss": 0.2518, "step": 22107 }, { "epoch": 0.63, "grad_norm": 10.523273877384817, "learning_rate": 3.133542973671648e-06, "loss": 1.0751, "step": 22108 }, { "epoch": 0.63, "grad_norm": 4.252619567145521, "learning_rate": 3.1331127464066757e-06, "loss": 0.2994, "step": 22109 }, { "epoch": 0.63, "grad_norm": 6.736752225466866, "learning_rate": 3.1326825352023045e-06, "loss": 0.5581, "step": 22110 }, { "epoch": 0.63, "grad_norm": 4.743606782341994, "learning_rate": 3.1322523400622336e-06, "loss": 0.6157, "step": 22111 }, { "epoch": 0.63, "grad_norm": 7.314028588313955, "learning_rate": 3.1318221609901677e-06, "loss": 0.7963, "step": 22112 }, { "epoch": 0.63, "grad_norm": 3.224819312837327, "learning_rate": 3.1313919979898057e-06, "loss": 0.1831, "step": 22113 }, { "epoch": 0.63, "grad_norm": 3.7528937301084486, "learning_rate": 3.130961851064846e-06, "loss": 0.2996, "step": 22114 }, { "epoch": 0.63, "grad_norm": 4.3913556234294875, "learning_rate": 3.130531720218994e-06, "loss": 0.3991, "step": 22115 }, { "epoch": 0.63, "grad_norm": 5.606378248910318, "learning_rate": 3.1301016054559465e-06, "loss": 0.3863, "step": 22116 }, { "epoch": 0.63, "grad_norm": 4.673075913929989, "learning_rate": 3.1296715067794055e-06, "loss": 0.2079, "step": 22117 }, { "epoch": 0.63, "grad_norm": 6.406532467453104, "learning_rate": 3.1292414241930693e-06, "loss": 0.394, "step": 22118 }, { "epoch": 0.63, "grad_norm": 8.532123898192719, "learning_rate": 3.1288113577006394e-06, "loss": 0.6615, "step": 22119 }, { "epoch": 0.63, "grad_norm": 7.9325821427385135, "learning_rate": 3.1283813073058132e-06, "loss": 0.5555, "step": 22120 }, { "epoch": 0.63, "grad_norm": 7.876844492871905, "learning_rate": 3.127951273012295e-06, "loss": 0.2297, "step": 22121 }, { "epoch": 0.63, "grad_norm": 3.7186006788519985, "learning_rate": 3.1275212548237797e-06, "loss": 0.2431, "step": 22122 }, { "epoch": 0.63, "grad_norm": 6.775497105790734, "learning_rate": 3.127091252743968e-06, "loss": 0.2551, "step": 22123 }, { "epoch": 0.63, "grad_norm": 9.617114168285738, "learning_rate": 3.1266612667765606e-06, "loss": 0.3273, "step": 22124 }, { "epoch": 0.63, "grad_norm": 4.462563427172489, "learning_rate": 3.1262312969252554e-06, "loss": 0.3507, "step": 22125 }, { "epoch": 0.63, "grad_norm": 5.324564798686382, "learning_rate": 3.125801343193753e-06, "loss": 0.7136, "step": 22126 }, { "epoch": 0.63, "grad_norm": 3.552767861970239, "learning_rate": 3.125371405585749e-06, "loss": 0.1965, "step": 22127 }, { "epoch": 0.63, "grad_norm": 4.813380247589285, "learning_rate": 3.1249414841049445e-06, "loss": 0.4075, "step": 22128 }, { "epoch": 0.63, "grad_norm": 5.573774270592782, "learning_rate": 3.1245115787550377e-06, "loss": 0.416, "step": 22129 }, { "epoch": 0.63, "grad_norm": 3.1542493031224277, "learning_rate": 3.124081689539728e-06, "loss": 0.0987, "step": 22130 }, { "epoch": 0.63, "grad_norm": 11.819874716932265, "learning_rate": 3.1236518164627103e-06, "loss": 0.5201, "step": 22131 }, { "epoch": 0.63, "grad_norm": 7.675404966027336, "learning_rate": 3.123221959527686e-06, "loss": 0.692, "step": 22132 }, { "epoch": 0.63, "grad_norm": 7.94862133283719, "learning_rate": 3.1227921187383527e-06, "loss": 0.7556, "step": 22133 }, { "epoch": 0.63, "grad_norm": 11.809854766314462, "learning_rate": 3.1223622940984068e-06, "loss": 0.326, "step": 22134 }, { "epoch": 0.63, "grad_norm": 5.109323390501723, "learning_rate": 3.1219324856115473e-06, "loss": 0.6314, "step": 22135 }, { "epoch": 0.63, "grad_norm": 8.01683300054595, "learning_rate": 3.1215026932814716e-06, "loss": 0.559, "step": 22136 }, { "epoch": 0.63, "grad_norm": 3.142502958680234, "learning_rate": 3.1210729171118774e-06, "loss": 0.3666, "step": 22137 }, { "epoch": 0.63, "grad_norm": 2.279422975531593, "learning_rate": 3.1206431571064597e-06, "loss": 0.1407, "step": 22138 }, { "epoch": 0.63, "grad_norm": 6.023382560192459, "learning_rate": 3.1202134132689186e-06, "loss": 0.6718, "step": 22139 }, { "epoch": 0.63, "grad_norm": 4.407311764752946, "learning_rate": 3.1197836856029513e-06, "loss": 0.5189, "step": 22140 }, { "epoch": 0.63, "grad_norm": 4.660799965282326, "learning_rate": 3.119353974112251e-06, "loss": 0.4271, "step": 22141 }, { "epoch": 0.63, "grad_norm": 7.771796799999874, "learning_rate": 3.118924278800519e-06, "loss": 0.523, "step": 22142 }, { "epoch": 0.63, "grad_norm": 8.585840254173045, "learning_rate": 3.118494599671449e-06, "loss": 0.6622, "step": 22143 }, { "epoch": 0.63, "grad_norm": 7.799404429659836, "learning_rate": 3.11806493672874e-06, "loss": 0.7032, "step": 22144 }, { "epoch": 0.63, "grad_norm": 12.055901654769446, "learning_rate": 3.117635289976083e-06, "loss": 0.4238, "step": 22145 }, { "epoch": 0.63, "grad_norm": 9.127458215146113, "learning_rate": 3.117205659417182e-06, "loss": 0.5601, "step": 22146 }, { "epoch": 0.63, "grad_norm": 3.6616831946025292, "learning_rate": 3.1167760450557256e-06, "loss": 0.337, "step": 22147 }, { "epoch": 0.63, "grad_norm": 6.553421862221907, "learning_rate": 3.116346446895414e-06, "loss": 0.4839, "step": 22148 }, { "epoch": 0.63, "grad_norm": 20.09811444144854, "learning_rate": 3.115916864939943e-06, "loss": 0.827, "step": 22149 }, { "epoch": 0.63, "grad_norm": 6.949101516223661, "learning_rate": 3.115487299193005e-06, "loss": 0.3121, "step": 22150 }, { "epoch": 0.63, "grad_norm": 9.547931494060748, "learning_rate": 3.1150577496583e-06, "loss": 0.5444, "step": 22151 }, { "epoch": 0.63, "grad_norm": 6.257501525391024, "learning_rate": 3.11462821633952e-06, "loss": 0.247, "step": 22152 }, { "epoch": 0.63, "grad_norm": 7.197576475224089, "learning_rate": 3.1141986992403628e-06, "loss": 0.6894, "step": 22153 }, { "epoch": 0.63, "grad_norm": 11.387326614617608, "learning_rate": 3.11376919836452e-06, "loss": 0.6267, "step": 22154 }, { "epoch": 0.63, "grad_norm": 5.530197021775549, "learning_rate": 3.1133397137156885e-06, "loss": 0.4345, "step": 22155 }, { "epoch": 0.63, "grad_norm": 2.2366421008525736, "learning_rate": 3.1129102452975645e-06, "loss": 0.222, "step": 22156 }, { "epoch": 0.63, "grad_norm": 10.100083575516683, "learning_rate": 3.11248079311384e-06, "loss": 0.4761, "step": 22157 }, { "epoch": 0.63, "grad_norm": 8.081573104594778, "learning_rate": 3.1120513571682126e-06, "loss": 0.6006, "step": 22158 }, { "epoch": 0.63, "grad_norm": 3.9201306419109363, "learning_rate": 3.1116219374643742e-06, "loss": 0.3612, "step": 22159 }, { "epoch": 0.63, "grad_norm": 6.152484032378854, "learning_rate": 3.11119253400602e-06, "loss": 0.2487, "step": 22160 }, { "epoch": 0.63, "grad_norm": 7.394753784764743, "learning_rate": 3.1107631467968423e-06, "loss": 0.419, "step": 22161 }, { "epoch": 0.63, "grad_norm": 4.928488318017004, "learning_rate": 3.1103337758405382e-06, "loss": 0.5382, "step": 22162 }, { "epoch": 0.63, "grad_norm": 5.842962324126426, "learning_rate": 3.1099044211407992e-06, "loss": 0.5613, "step": 22163 }, { "epoch": 0.63, "grad_norm": 12.59814921516097, "learning_rate": 3.1094750827013192e-06, "loss": 0.2303, "step": 22164 }, { "epoch": 0.63, "grad_norm": 11.742026525270926, "learning_rate": 3.1090457605257938e-06, "loss": 0.5684, "step": 22165 }, { "epoch": 0.63, "grad_norm": 6.340962817131337, "learning_rate": 3.1086164546179143e-06, "loss": 1.0305, "step": 22166 }, { "epoch": 0.63, "grad_norm": 5.812890501133207, "learning_rate": 3.1081871649813755e-06, "loss": 0.4145, "step": 22167 }, { "epoch": 0.63, "grad_norm": 9.075225366993978, "learning_rate": 3.107757891619867e-06, "loss": 0.4239, "step": 22168 }, { "epoch": 0.63, "grad_norm": 5.99436606700726, "learning_rate": 3.107328634537087e-06, "loss": 0.6428, "step": 22169 }, { "epoch": 0.63, "grad_norm": 2.6064982289173426, "learning_rate": 3.1068993937367248e-06, "loss": 0.2567, "step": 22170 }, { "epoch": 0.63, "grad_norm": 6.5439464055037515, "learning_rate": 3.106470169222475e-06, "loss": 0.6292, "step": 22171 }, { "epoch": 0.63, "grad_norm": 5.254031132344805, "learning_rate": 3.106040960998027e-06, "loss": 0.2185, "step": 22172 }, { "epoch": 0.63, "grad_norm": 7.109284385690555, "learning_rate": 3.1056117690670763e-06, "loss": 0.7122, "step": 22173 }, { "epoch": 0.64, "grad_norm": 4.401202157910858, "learning_rate": 3.105182593433315e-06, "loss": 0.4826, "step": 22174 }, { "epoch": 0.64, "grad_norm": 9.715169805278647, "learning_rate": 3.1047534341004337e-06, "loss": 0.8299, "step": 22175 }, { "epoch": 0.64, "grad_norm": 2.8027903056129904, "learning_rate": 3.104324291072127e-06, "loss": 0.3867, "step": 22176 }, { "epoch": 0.64, "grad_norm": 4.888013123994119, "learning_rate": 3.1038951643520822e-06, "loss": 0.3879, "step": 22177 }, { "epoch": 0.64, "grad_norm": 4.939673826476618, "learning_rate": 3.1034660539439966e-06, "loss": 0.481, "step": 22178 }, { "epoch": 0.64, "grad_norm": 5.624389191418949, "learning_rate": 3.1030369598515575e-06, "loss": 0.3928, "step": 22179 }, { "epoch": 0.64, "grad_norm": 6.899681523478258, "learning_rate": 3.1026078820784566e-06, "loss": 0.3644, "step": 22180 }, { "epoch": 0.64, "grad_norm": 8.81515462946382, "learning_rate": 3.102178820628389e-06, "loss": 0.8084, "step": 22181 }, { "epoch": 0.64, "grad_norm": 8.210913080093846, "learning_rate": 3.1017497755050423e-06, "loss": 0.2272, "step": 22182 }, { "epoch": 0.64, "grad_norm": 6.323602487321889, "learning_rate": 3.10132074671211e-06, "loss": 0.6286, "step": 22183 }, { "epoch": 0.64, "grad_norm": 2.5950472127151274, "learning_rate": 3.1008917342532786e-06, "loss": 0.0885, "step": 22184 }, { "epoch": 0.64, "grad_norm": 7.133931768871228, "learning_rate": 3.100462738132244e-06, "loss": 0.6307, "step": 22185 }, { "epoch": 0.64, "grad_norm": 3.2951308965948947, "learning_rate": 3.100033758352694e-06, "loss": 0.2822, "step": 22186 }, { "epoch": 0.64, "grad_norm": 6.0843767899881405, "learning_rate": 3.0996047949183207e-06, "loss": 0.5906, "step": 22187 }, { "epoch": 0.64, "grad_norm": 7.914534499166769, "learning_rate": 3.0991758478328105e-06, "loss": 0.6988, "step": 22188 }, { "epoch": 0.64, "grad_norm": 8.42587270320095, "learning_rate": 3.0987469170998587e-06, "loss": 0.4764, "step": 22189 }, { "epoch": 0.64, "grad_norm": 1.8974572072210865, "learning_rate": 3.098318002723153e-06, "loss": 0.2136, "step": 22190 }, { "epoch": 0.64, "grad_norm": 3.865397152550064, "learning_rate": 3.0978891047063817e-06, "loss": 0.3105, "step": 22191 }, { "epoch": 0.64, "grad_norm": 4.4811449812709885, "learning_rate": 3.0974602230532384e-06, "loss": 0.225, "step": 22192 }, { "epoch": 0.64, "grad_norm": 3.9646149968646713, "learning_rate": 3.097031357767409e-06, "loss": 0.5449, "step": 22193 }, { "epoch": 0.64, "grad_norm": 6.993860958742995, "learning_rate": 3.0966025088525864e-06, "loss": 0.2981, "step": 22194 }, { "epoch": 0.64, "grad_norm": 6.087034192759595, "learning_rate": 3.096173676312456e-06, "loss": 0.3945, "step": 22195 }, { "epoch": 0.64, "grad_norm": 10.820258691977559, "learning_rate": 3.09574486015071e-06, "loss": 0.8986, "step": 22196 }, { "epoch": 0.64, "grad_norm": 6.5106289963747965, "learning_rate": 3.095316060371037e-06, "loss": 0.4958, "step": 22197 }, { "epoch": 0.64, "grad_norm": 6.595177880029171, "learning_rate": 3.094887276977123e-06, "loss": 0.6966, "step": 22198 }, { "epoch": 0.64, "grad_norm": 5.8133526248495135, "learning_rate": 3.0944585099726627e-06, "loss": 0.5447, "step": 22199 }, { "epoch": 0.64, "grad_norm": 5.985906021644792, "learning_rate": 3.0940297593613387e-06, "loss": 0.3002, "step": 22200 }, { "epoch": 0.64, "grad_norm": 9.025215508801779, "learning_rate": 3.093601025146844e-06, "loss": 0.4036, "step": 22201 }, { "epoch": 0.64, "grad_norm": 7.352608844427455, "learning_rate": 3.093172307332863e-06, "loss": 0.3821, "step": 22202 }, { "epoch": 0.64, "grad_norm": 4.118077755539945, "learning_rate": 3.0927436059230886e-06, "loss": 0.1735, "step": 22203 }, { "epoch": 0.64, "grad_norm": 15.776046771399939, "learning_rate": 3.092314920921203e-06, "loss": 0.5791, "step": 22204 }, { "epoch": 0.64, "grad_norm": 2.9167967948949323, "learning_rate": 3.0918862523308978e-06, "loss": 0.425, "step": 22205 }, { "epoch": 0.64, "grad_norm": 2.349704096811995, "learning_rate": 3.091457600155862e-06, "loss": 0.5982, "step": 22206 }, { "epoch": 0.64, "grad_norm": 5.432035089604186, "learning_rate": 3.0910289643997794e-06, "loss": 0.4556, "step": 22207 }, { "epoch": 0.64, "grad_norm": 7.266879459496863, "learning_rate": 3.0906003450663413e-06, "loss": 0.5531, "step": 22208 }, { "epoch": 0.64, "grad_norm": 3.412309174564458, "learning_rate": 3.0901717421592327e-06, "loss": 0.3069, "step": 22209 }, { "epoch": 0.64, "grad_norm": 7.3754144325839865, "learning_rate": 3.089743155682142e-06, "loss": 0.6651, "step": 22210 }, { "epoch": 0.64, "grad_norm": 2.6696664706159563, "learning_rate": 3.089314585638754e-06, "loss": 0.2671, "step": 22211 }, { "epoch": 0.64, "grad_norm": 3.8869389917283357, "learning_rate": 3.0888860320327598e-06, "loss": 0.4644, "step": 22212 }, { "epoch": 0.64, "grad_norm": 4.991229351907616, "learning_rate": 3.088457494867842e-06, "loss": 0.789, "step": 22213 }, { "epoch": 0.64, "grad_norm": 5.13911295164633, "learning_rate": 3.088028974147688e-06, "loss": 0.5984, "step": 22214 }, { "epoch": 0.64, "grad_norm": 3.2170073967831723, "learning_rate": 3.0876004698759877e-06, "loss": 0.6531, "step": 22215 }, { "epoch": 0.64, "grad_norm": 3.7548471595450565, "learning_rate": 3.087171982056424e-06, "loss": 0.322, "step": 22216 }, { "epoch": 0.64, "grad_norm": 7.130036832438777, "learning_rate": 3.086743510692686e-06, "loss": 0.393, "step": 22217 }, { "epoch": 0.64, "grad_norm": 4.45065003265768, "learning_rate": 3.0863150557884548e-06, "loss": 0.5639, "step": 22218 }, { "epoch": 0.64, "grad_norm": 6.588736865704146, "learning_rate": 3.0858866173474224e-06, "loss": 0.586, "step": 22219 }, { "epoch": 0.64, "grad_norm": 8.606719173366772, "learning_rate": 3.0854581953732696e-06, "loss": 0.5725, "step": 22220 }, { "epoch": 0.64, "grad_norm": 7.23043920509901, "learning_rate": 3.085029789869684e-06, "loss": 0.4915, "step": 22221 }, { "epoch": 0.64, "grad_norm": 4.550203658356352, "learning_rate": 3.0846014008403533e-06, "loss": 0.3555, "step": 22222 }, { "epoch": 0.64, "grad_norm": 3.204260071400448, "learning_rate": 3.0841730282889602e-06, "loss": 0.362, "step": 22223 }, { "epoch": 0.64, "grad_norm": 3.0109615932702254, "learning_rate": 3.083744672219191e-06, "loss": 0.2088, "step": 22224 }, { "epoch": 0.64, "grad_norm": 4.385304984923057, "learning_rate": 3.0833163326347294e-06, "loss": 0.4458, "step": 22225 }, { "epoch": 0.64, "grad_norm": 6.861092493630022, "learning_rate": 3.082888009539263e-06, "loss": 0.5255, "step": 22226 }, { "epoch": 0.64, "grad_norm": 9.166810127060481, "learning_rate": 3.0824597029364736e-06, "loss": 0.8087, "step": 22227 }, { "epoch": 0.64, "grad_norm": 3.2918681594085157, "learning_rate": 3.082031412830049e-06, "loss": 0.2671, "step": 22228 }, { "epoch": 0.64, "grad_norm": 4.391977556626418, "learning_rate": 3.081603139223669e-06, "loss": 0.4978, "step": 22229 }, { "epoch": 0.64, "grad_norm": 2.369815513295233, "learning_rate": 3.081174882121023e-06, "loss": 0.2744, "step": 22230 }, { "epoch": 0.64, "grad_norm": 5.956431674488232, "learning_rate": 3.0807466415257937e-06, "loss": 0.5845, "step": 22231 }, { "epoch": 0.64, "grad_norm": 5.0525962831363405, "learning_rate": 3.0803184174416622e-06, "loss": 0.3888, "step": 22232 }, { "epoch": 0.64, "grad_norm": 4.068559734155846, "learning_rate": 3.0798902098723193e-06, "loss": 0.2621, "step": 22233 }, { "epoch": 0.64, "grad_norm": 3.8766545947081816, "learning_rate": 3.0794620188214407e-06, "loss": 0.3062, "step": 22234 }, { "epoch": 0.64, "grad_norm": 5.8790164569651, "learning_rate": 3.079033844292716e-06, "loss": 0.4749, "step": 22235 }, { "epoch": 0.64, "grad_norm": 6.446822906150533, "learning_rate": 3.0786056862898256e-06, "loss": 0.3155, "step": 22236 }, { "epoch": 0.64, "grad_norm": 9.067158836675732, "learning_rate": 3.0781775448164532e-06, "loss": 0.7486, "step": 22237 }, { "epoch": 0.64, "grad_norm": 5.627661139114366, "learning_rate": 3.0777494198762844e-06, "loss": 0.6015, "step": 22238 }, { "epoch": 0.64, "grad_norm": 5.780907574384445, "learning_rate": 3.0773213114729994e-06, "loss": 0.2667, "step": 22239 }, { "epoch": 0.64, "grad_norm": 5.170824572330075, "learning_rate": 3.076893219610284e-06, "loss": 0.5748, "step": 22240 }, { "epoch": 0.64, "grad_norm": 4.315591201815616, "learning_rate": 3.076465144291817e-06, "loss": 0.2029, "step": 22241 }, { "epoch": 0.64, "grad_norm": 8.018212806029982, "learning_rate": 3.076037085521286e-06, "loss": 0.6007, "step": 22242 }, { "epoch": 0.64, "grad_norm": 3.345793286927509, "learning_rate": 3.07560904330237e-06, "loss": 0.2783, "step": 22243 }, { "epoch": 0.64, "grad_norm": 5.448237242545144, "learning_rate": 3.075181017638753e-06, "loss": 0.4462, "step": 22244 }, { "epoch": 0.64, "grad_norm": 5.733793140795464, "learning_rate": 3.0747530085341152e-06, "loss": 0.527, "step": 22245 }, { "epoch": 0.64, "grad_norm": 7.276637241142681, "learning_rate": 3.0743250159921413e-06, "loss": 0.5929, "step": 22246 }, { "epoch": 0.64, "grad_norm": 7.512359226831083, "learning_rate": 3.073897040016513e-06, "loss": 0.2581, "step": 22247 }, { "epoch": 0.64, "grad_norm": 8.945046126884822, "learning_rate": 3.073469080610909e-06, "loss": 0.9912, "step": 22248 }, { "epoch": 0.64, "grad_norm": 5.5434498057322115, "learning_rate": 3.073041137779016e-06, "loss": 0.4595, "step": 22249 }, { "epoch": 0.64, "grad_norm": 7.622094085284836, "learning_rate": 3.072613211524511e-06, "loss": 0.47, "step": 22250 }, { "epoch": 0.64, "grad_norm": 4.610068576162019, "learning_rate": 3.0721853018510787e-06, "loss": 0.29, "step": 22251 }, { "epoch": 0.64, "grad_norm": 5.295176500975501, "learning_rate": 3.071757408762397e-06, "loss": 0.4059, "step": 22252 }, { "epoch": 0.64, "grad_norm": 8.266323355754928, "learning_rate": 3.0713295322621505e-06, "loss": 1.0304, "step": 22253 }, { "epoch": 0.64, "grad_norm": 7.943372161307173, "learning_rate": 3.070901672354018e-06, "loss": 0.4639, "step": 22254 }, { "epoch": 0.64, "grad_norm": 17.32017765500269, "learning_rate": 3.0704738290416803e-06, "loss": 0.7966, "step": 22255 }, { "epoch": 0.64, "grad_norm": 2.20698108447291, "learning_rate": 3.07004600232882e-06, "loss": 0.1633, "step": 22256 }, { "epoch": 0.64, "grad_norm": 4.517140328930596, "learning_rate": 3.069618192219115e-06, "loss": 0.3665, "step": 22257 }, { "epoch": 0.64, "grad_norm": 1.9341282423701402, "learning_rate": 3.0691903987162484e-06, "loss": 0.2017, "step": 22258 }, { "epoch": 0.64, "grad_norm": 5.684821871178661, "learning_rate": 3.0687626218238975e-06, "loss": 0.3469, "step": 22259 }, { "epoch": 0.64, "grad_norm": 11.520723329723662, "learning_rate": 3.0683348615457464e-06, "loss": 0.6429, "step": 22260 }, { "epoch": 0.64, "grad_norm": 9.099826080630518, "learning_rate": 3.06790711788547e-06, "loss": 0.5033, "step": 22261 }, { "epoch": 0.64, "grad_norm": 6.4345321850986625, "learning_rate": 3.0674793908467503e-06, "loss": 0.4053, "step": 22262 }, { "epoch": 0.64, "grad_norm": 7.091340937307893, "learning_rate": 3.0670516804332708e-06, "loss": 0.2765, "step": 22263 }, { "epoch": 0.64, "grad_norm": 6.7658019009692545, "learning_rate": 3.066623986648705e-06, "loss": 0.5211, "step": 22264 }, { "epoch": 0.64, "grad_norm": 2.495422905932024, "learning_rate": 3.066196309496736e-06, "loss": 0.1573, "step": 22265 }, { "epoch": 0.64, "grad_norm": 4.764570826269541, "learning_rate": 3.065768648981042e-06, "loss": 0.3319, "step": 22266 }, { "epoch": 0.64, "grad_norm": 6.101094363493403, "learning_rate": 3.0653410051053032e-06, "loss": 0.3322, "step": 22267 }, { "epoch": 0.64, "grad_norm": 3.630086569292384, "learning_rate": 3.0649133778731955e-06, "loss": 0.211, "step": 22268 }, { "epoch": 0.64, "grad_norm": 5.414102309219624, "learning_rate": 3.064485767288401e-06, "loss": 0.6318, "step": 22269 }, { "epoch": 0.64, "grad_norm": 4.938956878700942, "learning_rate": 3.064058173354597e-06, "loss": 0.4234, "step": 22270 }, { "epoch": 0.64, "grad_norm": 5.3024874410161305, "learning_rate": 3.0636305960754607e-06, "loss": 0.468, "step": 22271 }, { "epoch": 0.64, "grad_norm": 6.249678603491713, "learning_rate": 3.0632030354546736e-06, "loss": 0.813, "step": 22272 }, { "epoch": 0.64, "grad_norm": 3.336380940251413, "learning_rate": 3.062775491495912e-06, "loss": 0.3123, "step": 22273 }, { "epoch": 0.64, "grad_norm": 6.276068414865902, "learning_rate": 3.0623479642028553e-06, "loss": 0.4151, "step": 22274 }, { "epoch": 0.64, "grad_norm": 2.7094069235921823, "learning_rate": 3.061920453579178e-06, "loss": 0.438, "step": 22275 }, { "epoch": 0.64, "grad_norm": 11.503735391743712, "learning_rate": 3.061492959628563e-06, "loss": 0.6965, "step": 22276 }, { "epoch": 0.64, "grad_norm": 9.508057640507594, "learning_rate": 3.061065482354684e-06, "loss": 0.5443, "step": 22277 }, { "epoch": 0.64, "grad_norm": 4.861750105079834, "learning_rate": 3.0606380217612193e-06, "loss": 0.3435, "step": 22278 }, { "epoch": 0.64, "grad_norm": 5.311204314705362, "learning_rate": 3.060210577851849e-06, "loss": 0.2467, "step": 22279 }, { "epoch": 0.64, "grad_norm": 8.170479066266692, "learning_rate": 3.0597831506302467e-06, "loss": 0.6814, "step": 22280 }, { "epoch": 0.64, "grad_norm": 2.3918227642127863, "learning_rate": 3.0593557401000916e-06, "loss": 0.3316, "step": 22281 }, { "epoch": 0.64, "grad_norm": 8.164105136887766, "learning_rate": 3.0589283462650592e-06, "loss": 0.6385, "step": 22282 }, { "epoch": 0.64, "grad_norm": 7.646973326580529, "learning_rate": 3.058500969128829e-06, "loss": 0.6537, "step": 22283 }, { "epoch": 0.64, "grad_norm": 2.7579061043823256, "learning_rate": 3.0580736086950748e-06, "loss": 0.3386, "step": 22284 }, { "epoch": 0.64, "grad_norm": 2.7981413047116104, "learning_rate": 3.0576462649674754e-06, "loss": 0.1337, "step": 22285 }, { "epoch": 0.64, "grad_norm": 4.545417434042373, "learning_rate": 3.057218937949704e-06, "loss": 0.3069, "step": 22286 }, { "epoch": 0.64, "grad_norm": 7.109648177379506, "learning_rate": 3.0567916276454394e-06, "loss": 0.5003, "step": 22287 }, { "epoch": 0.64, "grad_norm": 7.401489208487502, "learning_rate": 3.056364334058359e-06, "loss": 0.4356, "step": 22288 }, { "epoch": 0.64, "grad_norm": 4.626302819145038, "learning_rate": 3.0559370571921345e-06, "loss": 0.8369, "step": 22289 }, { "epoch": 0.64, "grad_norm": 5.003948798136461, "learning_rate": 3.0555097970504464e-06, "loss": 0.3346, "step": 22290 }, { "epoch": 0.64, "grad_norm": 5.413913212819351, "learning_rate": 3.055082553636966e-06, "loss": 0.3408, "step": 22291 }, { "epoch": 0.64, "grad_norm": 4.307102363728655, "learning_rate": 3.0546553269553724e-06, "loss": 0.4808, "step": 22292 }, { "epoch": 0.64, "grad_norm": 4.140052439131779, "learning_rate": 3.054228117009339e-06, "loss": 0.3588, "step": 22293 }, { "epoch": 0.64, "grad_norm": 8.224080318990122, "learning_rate": 3.0538009238025425e-06, "loss": 0.3685, "step": 22294 }, { "epoch": 0.64, "grad_norm": 6.205568769086663, "learning_rate": 3.053373747338655e-06, "loss": 0.7694, "step": 22295 }, { "epoch": 0.64, "grad_norm": 3.6975821043729513, "learning_rate": 3.0529465876213543e-06, "loss": 0.2539, "step": 22296 }, { "epoch": 0.64, "grad_norm": 3.7344870131544265, "learning_rate": 3.0525194446543162e-06, "loss": 0.3205, "step": 22297 }, { "epoch": 0.64, "grad_norm": 3.927680001434967, "learning_rate": 3.05209231844121e-06, "loss": 0.3724, "step": 22298 }, { "epoch": 0.64, "grad_norm": 6.107110152479671, "learning_rate": 3.0516652089857167e-06, "loss": 0.5589, "step": 22299 }, { "epoch": 0.64, "grad_norm": 3.091943675909976, "learning_rate": 3.0512381162915067e-06, "loss": 0.2526, "step": 22300 }, { "epoch": 0.64, "grad_norm": 8.151233752895076, "learning_rate": 3.050811040362256e-06, "loss": 0.3432, "step": 22301 }, { "epoch": 0.64, "grad_norm": 4.132800385253687, "learning_rate": 3.050383981201636e-06, "loss": 0.3756, "step": 22302 }, { "epoch": 0.64, "grad_norm": 9.127139115106862, "learning_rate": 3.049956938813324e-06, "loss": 0.6354, "step": 22303 }, { "epoch": 0.64, "grad_norm": 10.903091366760563, "learning_rate": 3.0495299132009926e-06, "loss": 0.6508, "step": 22304 }, { "epoch": 0.64, "grad_norm": 3.1172275301626486, "learning_rate": 3.049102904368314e-06, "loss": 0.3495, "step": 22305 }, { "epoch": 0.64, "grad_norm": 5.6076865670218154, "learning_rate": 3.0486759123189642e-06, "loss": 0.5217, "step": 22306 }, { "epoch": 0.64, "grad_norm": 8.421651295820842, "learning_rate": 3.0482489370566147e-06, "loss": 0.2663, "step": 22307 }, { "epoch": 0.64, "grad_norm": 13.316724832158698, "learning_rate": 3.04782197858494e-06, "loss": 0.4414, "step": 22308 }, { "epoch": 0.64, "grad_norm": 4.430981132625563, "learning_rate": 3.0473950369076107e-06, "loss": 0.6636, "step": 22309 }, { "epoch": 0.64, "grad_norm": 3.5396627628664, "learning_rate": 3.0469681120283033e-06, "loss": 0.2293, "step": 22310 }, { "epoch": 0.64, "grad_norm": 6.1912992353848, "learning_rate": 3.0465412039506882e-06, "loss": 0.2824, "step": 22311 }, { "epoch": 0.64, "grad_norm": 7.541061183867899, "learning_rate": 3.046114312678437e-06, "loss": 0.2429, "step": 22312 }, { "epoch": 0.64, "grad_norm": 4.61372687297555, "learning_rate": 3.045687438215226e-06, "loss": 0.4192, "step": 22313 }, { "epoch": 0.64, "grad_norm": 7.46197521722339, "learning_rate": 3.0452605805647245e-06, "loss": 1.2959, "step": 22314 }, { "epoch": 0.64, "grad_norm": 10.302790561902478, "learning_rate": 3.044833739730606e-06, "loss": 0.9027, "step": 22315 }, { "epoch": 0.64, "grad_norm": 3.815165010385891, "learning_rate": 3.0444069157165414e-06, "loss": 0.4177, "step": 22316 }, { "epoch": 0.64, "grad_norm": 5.9351317250160704, "learning_rate": 3.0439801085262042e-06, "loss": 0.2444, "step": 22317 }, { "epoch": 0.64, "grad_norm": 7.113685123011694, "learning_rate": 3.043553318163265e-06, "loss": 0.4691, "step": 22318 }, { "epoch": 0.64, "grad_norm": 8.09038647609121, "learning_rate": 3.0431265446313964e-06, "loss": 0.5707, "step": 22319 }, { "epoch": 0.64, "grad_norm": 4.149552506139119, "learning_rate": 3.042699787934268e-06, "loss": 0.4154, "step": 22320 }, { "epoch": 0.64, "grad_norm": 5.207002953053973, "learning_rate": 3.0422730480755512e-06, "loss": 0.3487, "step": 22321 }, { "epoch": 0.64, "grad_norm": 8.43533393465679, "learning_rate": 3.0418463250589205e-06, "loss": 1.0364, "step": 22322 }, { "epoch": 0.64, "grad_norm": 7.752289833681741, "learning_rate": 3.041419618888044e-06, "loss": 0.7515, "step": 22323 }, { "epoch": 0.64, "grad_norm": 4.3853284716704355, "learning_rate": 3.040992929566594e-06, "loss": 0.1251, "step": 22324 }, { "epoch": 0.64, "grad_norm": 5.195445215351402, "learning_rate": 3.0405662570982385e-06, "loss": 0.381, "step": 22325 }, { "epoch": 0.64, "grad_norm": 9.361006401927819, "learning_rate": 3.040139601486653e-06, "loss": 0.5422, "step": 22326 }, { "epoch": 0.64, "grad_norm": 4.113182885423901, "learning_rate": 3.039712962735504e-06, "loss": 0.4641, "step": 22327 }, { "epoch": 0.64, "grad_norm": 7.4703223197471935, "learning_rate": 3.0392863408484617e-06, "loss": 0.6776, "step": 22328 }, { "epoch": 0.64, "grad_norm": 7.122433484598027, "learning_rate": 3.0388597358291995e-06, "loss": 0.464, "step": 22329 }, { "epoch": 0.64, "grad_norm": 4.736136130934649, "learning_rate": 3.0384331476813843e-06, "loss": 0.5513, "step": 22330 }, { "epoch": 0.64, "grad_norm": 5.046302810214962, "learning_rate": 3.0380065764086893e-06, "loss": 0.1987, "step": 22331 }, { "epoch": 0.64, "grad_norm": 5.2240628364838075, "learning_rate": 3.0375800220147796e-06, "loss": 0.3735, "step": 22332 }, { "epoch": 0.64, "grad_norm": 4.4224768993923735, "learning_rate": 3.037153484503329e-06, "loss": 0.4714, "step": 22333 }, { "epoch": 0.64, "grad_norm": 4.875929572659995, "learning_rate": 3.036726963878005e-06, "loss": 0.3409, "step": 22334 }, { "epoch": 0.64, "grad_norm": 3.8986634703069476, "learning_rate": 3.0363004601424773e-06, "loss": 0.3119, "step": 22335 }, { "epoch": 0.64, "grad_norm": 5.156456729328225, "learning_rate": 3.0358739733004137e-06, "loss": 0.708, "step": 22336 }, { "epoch": 0.64, "grad_norm": 4.321546105967423, "learning_rate": 3.035447503355485e-06, "loss": 0.5139, "step": 22337 }, { "epoch": 0.64, "grad_norm": 2.4681984007549924, "learning_rate": 3.035021050311361e-06, "loss": 0.3123, "step": 22338 }, { "epoch": 0.64, "grad_norm": 4.124586460307461, "learning_rate": 3.0345946141717066e-06, "loss": 0.3427, "step": 22339 }, { "epoch": 0.64, "grad_norm": 7.68683577978741, "learning_rate": 3.034168194940195e-06, "loss": 0.6577, "step": 22340 }, { "epoch": 0.64, "grad_norm": 2.6958238420707525, "learning_rate": 3.0337417926204917e-06, "loss": 0.4566, "step": 22341 }, { "epoch": 0.64, "grad_norm": 6.455855856254146, "learning_rate": 3.0333154072162656e-06, "loss": 0.2141, "step": 22342 }, { "epoch": 0.64, "grad_norm": 4.552107274787614, "learning_rate": 3.032889038731184e-06, "loss": 0.5617, "step": 22343 }, { "epoch": 0.64, "grad_norm": 5.927480817270633, "learning_rate": 3.032462687168916e-06, "loss": 0.6599, "step": 22344 }, { "epoch": 0.64, "grad_norm": 4.260924519657945, "learning_rate": 3.032036352533131e-06, "loss": 0.5073, "step": 22345 }, { "epoch": 0.64, "grad_norm": 5.439967373932445, "learning_rate": 3.0316100348274924e-06, "loss": 0.8435, "step": 22346 }, { "epoch": 0.64, "grad_norm": 6.672734475444925, "learning_rate": 3.031183734055672e-06, "loss": 0.6065, "step": 22347 }, { "epoch": 0.64, "grad_norm": 5.273430288804387, "learning_rate": 3.0307574502213355e-06, "loss": 0.2954, "step": 22348 }, { "epoch": 0.64, "grad_norm": 2.7672491638619316, "learning_rate": 3.03033118332815e-06, "loss": 0.452, "step": 22349 }, { "epoch": 0.64, "grad_norm": 4.401628167011598, "learning_rate": 3.029904933379783e-06, "loss": 0.4629, "step": 22350 }, { "epoch": 0.64, "grad_norm": 3.5753382196027275, "learning_rate": 3.029478700379902e-06, "loss": 0.3082, "step": 22351 }, { "epoch": 0.64, "grad_norm": 5.512718710161263, "learning_rate": 3.0290524843321707e-06, "loss": 0.4836, "step": 22352 }, { "epoch": 0.64, "grad_norm": 8.890709526317213, "learning_rate": 3.0286262852402593e-06, "loss": 0.9426, "step": 22353 }, { "epoch": 0.64, "grad_norm": 5.000096510909384, "learning_rate": 3.0282001031078346e-06, "loss": 0.3977, "step": 22354 }, { "epoch": 0.64, "grad_norm": 7.6056976519025365, "learning_rate": 3.0277739379385594e-06, "loss": 0.6625, "step": 22355 }, { "epoch": 0.64, "grad_norm": 3.5898832665594007, "learning_rate": 3.027347789736104e-06, "loss": 0.5787, "step": 22356 }, { "epoch": 0.64, "grad_norm": 3.9633503624176196, "learning_rate": 3.0269216585041318e-06, "loss": 0.6904, "step": 22357 }, { "epoch": 0.64, "grad_norm": 4.228798801728005, "learning_rate": 3.0264955442463105e-06, "loss": 0.5487, "step": 22358 }, { "epoch": 0.64, "grad_norm": 6.62435541076096, "learning_rate": 3.0260694469663037e-06, "loss": 0.6966, "step": 22359 }, { "epoch": 0.64, "grad_norm": 5.896698469259881, "learning_rate": 3.02564336666778e-06, "loss": 0.5975, "step": 22360 }, { "epoch": 0.64, "grad_norm": 7.444682299750606, "learning_rate": 3.0252173033544024e-06, "loss": 0.424, "step": 22361 }, { "epoch": 0.64, "grad_norm": 5.6952542902479575, "learning_rate": 3.024791257029836e-06, "loss": 0.4835, "step": 22362 }, { "epoch": 0.64, "grad_norm": 7.11598160176421, "learning_rate": 3.0243652276977497e-06, "loss": 0.4714, "step": 22363 }, { "epoch": 0.64, "grad_norm": 4.294770214253297, "learning_rate": 3.0239392153618042e-06, "loss": 0.3088, "step": 22364 }, { "epoch": 0.64, "grad_norm": 3.3161402080031435, "learning_rate": 3.0235132200256677e-06, "loss": 0.3714, "step": 22365 }, { "epoch": 0.64, "grad_norm": 3.9462999373458176, "learning_rate": 3.0230872416930022e-06, "loss": 0.4451, "step": 22366 }, { "epoch": 0.64, "grad_norm": 5.4784831380985874, "learning_rate": 3.0226612803674755e-06, "loss": 0.4, "step": 22367 }, { "epoch": 0.64, "grad_norm": 7.246789747676807, "learning_rate": 3.022235336052749e-06, "loss": 0.4532, "step": 22368 }, { "epoch": 0.64, "grad_norm": 8.150679867710926, "learning_rate": 3.0218094087524874e-06, "loss": 0.432, "step": 22369 }, { "epoch": 0.64, "grad_norm": 3.0349946832060395, "learning_rate": 3.0213834984703583e-06, "loss": 0.2247, "step": 22370 }, { "epoch": 0.64, "grad_norm": 5.73614216945677, "learning_rate": 3.020957605210022e-06, "loss": 0.5924, "step": 22371 }, { "epoch": 0.64, "grad_norm": 3.21974559312032, "learning_rate": 3.020531728975145e-06, "loss": 0.343, "step": 22372 }, { "epoch": 0.64, "grad_norm": 3.1439289535037136, "learning_rate": 3.0201058697693877e-06, "loss": 0.2766, "step": 22373 }, { "epoch": 0.64, "grad_norm": 5.404088641181605, "learning_rate": 3.0196800275964176e-06, "loss": 0.6313, "step": 22374 }, { "epoch": 0.64, "grad_norm": 6.688140303716609, "learning_rate": 3.019254202459896e-06, "loss": 0.4445, "step": 22375 }, { "epoch": 0.64, "grad_norm": 6.112513055767946, "learning_rate": 3.0188283943634873e-06, "loss": 0.9257, "step": 22376 }, { "epoch": 0.64, "grad_norm": 5.971771434163371, "learning_rate": 3.0184026033108513e-06, "loss": 0.4934, "step": 22377 }, { "epoch": 0.64, "grad_norm": 5.066177351228593, "learning_rate": 3.0179768293056555e-06, "loss": 0.3126, "step": 22378 }, { "epoch": 0.64, "grad_norm": 3.8018996189968997, "learning_rate": 3.0175510723515624e-06, "loss": 0.3495, "step": 22379 }, { "epoch": 0.64, "grad_norm": 5.929791837837294, "learning_rate": 3.0171253324522316e-06, "loss": 0.6027, "step": 22380 }, { "epoch": 0.64, "grad_norm": 4.62427891445447, "learning_rate": 3.0166996096113287e-06, "loss": 0.5611, "step": 22381 }, { "epoch": 0.64, "grad_norm": 4.22261771467695, "learning_rate": 3.016273903832513e-06, "loss": 0.6027, "step": 22382 }, { "epoch": 0.64, "grad_norm": 9.324784132680701, "learning_rate": 3.01584821511945e-06, "loss": 0.3757, "step": 22383 }, { "epoch": 0.64, "grad_norm": 6.700754079800333, "learning_rate": 3.0154225434758e-06, "loss": 0.7665, "step": 22384 }, { "epoch": 0.64, "grad_norm": 4.488963554186785, "learning_rate": 3.014996888905225e-06, "loss": 0.5726, "step": 22385 }, { "epoch": 0.64, "grad_norm": 6.102597587208366, "learning_rate": 3.014571251411389e-06, "loss": 0.4181, "step": 22386 }, { "epoch": 0.64, "grad_norm": 9.163704295202466, "learning_rate": 3.0141456309979507e-06, "loss": 0.4486, "step": 22387 }, { "epoch": 0.64, "grad_norm": 6.144291295278913, "learning_rate": 3.013720027668574e-06, "loss": 0.3072, "step": 22388 }, { "epoch": 0.64, "grad_norm": 8.199801790935515, "learning_rate": 3.013294441426918e-06, "loss": 0.3568, "step": 22389 }, { "epoch": 0.64, "grad_norm": 5.1440430846212015, "learning_rate": 3.012868872276646e-06, "loss": 0.1777, "step": 22390 }, { "epoch": 0.64, "grad_norm": 4.72315631803069, "learning_rate": 3.012443320221419e-06, "loss": 0.5392, "step": 22391 }, { "epoch": 0.64, "grad_norm": 5.062172113797059, "learning_rate": 3.012017785264898e-06, "loss": 0.5338, "step": 22392 }, { "epoch": 0.64, "grad_norm": 5.634874429429103, "learning_rate": 3.0115922674107408e-06, "loss": 0.4755, "step": 22393 }, { "epoch": 0.64, "grad_norm": 6.531059262351582, "learning_rate": 3.0111667666626114e-06, "loss": 0.4034, "step": 22394 }, { "epoch": 0.64, "grad_norm": 4.512431616014018, "learning_rate": 3.010741283024171e-06, "loss": 0.582, "step": 22395 }, { "epoch": 0.64, "grad_norm": 4.794197577877418, "learning_rate": 3.010315816499076e-06, "loss": 0.5649, "step": 22396 }, { "epoch": 0.64, "grad_norm": 4.080913072964706, "learning_rate": 3.0098903670909913e-06, "loss": 0.2765, "step": 22397 }, { "epoch": 0.64, "grad_norm": 4.187223226026285, "learning_rate": 3.009464934803573e-06, "loss": 0.2417, "step": 22398 }, { "epoch": 0.64, "grad_norm": 3.7679791195082344, "learning_rate": 3.0090395196404846e-06, "loss": 0.3825, "step": 22399 }, { "epoch": 0.64, "grad_norm": 4.8726186071045525, "learning_rate": 3.0086141216053822e-06, "loss": 0.3845, "step": 22400 }, { "epoch": 0.64, "grad_norm": 2.788905927393156, "learning_rate": 3.008188740701929e-06, "loss": 0.2279, "step": 22401 }, { "epoch": 0.64, "grad_norm": 3.1451077519199284, "learning_rate": 3.007763376933782e-06, "loss": 0.4227, "step": 22402 }, { "epoch": 0.64, "grad_norm": 3.1817716193352994, "learning_rate": 3.0073380303046003e-06, "loss": 0.418, "step": 22403 }, { "epoch": 0.64, "grad_norm": 3.337217634736377, "learning_rate": 3.006912700818046e-06, "loss": 0.3387, "step": 22404 }, { "epoch": 0.64, "grad_norm": 3.9943201747540553, "learning_rate": 3.0064873884777756e-06, "loss": 0.4141, "step": 22405 }, { "epoch": 0.64, "grad_norm": 5.891921449662074, "learning_rate": 3.0060620932874495e-06, "loss": 0.9286, "step": 22406 }, { "epoch": 0.64, "grad_norm": 6.098049622773887, "learning_rate": 3.0056368152507238e-06, "loss": 0.6134, "step": 22407 }, { "epoch": 0.64, "grad_norm": 9.033271325756056, "learning_rate": 3.005211554371262e-06, "loss": 0.9242, "step": 22408 }, { "epoch": 0.64, "grad_norm": 6.879543051848276, "learning_rate": 3.0047863106527165e-06, "loss": 0.7522, "step": 22409 }, { "epoch": 0.64, "grad_norm": 7.124338654727358, "learning_rate": 3.00436108409875e-06, "loss": 0.2107, "step": 22410 }, { "epoch": 0.64, "grad_norm": 5.699655925253967, "learning_rate": 3.0039358747130197e-06, "loss": 0.1655, "step": 22411 }, { "epoch": 0.64, "grad_norm": 4.128362585561394, "learning_rate": 3.003510682499182e-06, "loss": 0.3822, "step": 22412 }, { "epoch": 0.64, "grad_norm": 6.384003582951012, "learning_rate": 3.003085507460898e-06, "loss": 0.4949, "step": 22413 }, { "epoch": 0.64, "grad_norm": 6.386303809449417, "learning_rate": 3.0026603496018225e-06, "loss": 0.4033, "step": 22414 }, { "epoch": 0.64, "grad_norm": 7.421920647731745, "learning_rate": 3.002235208925615e-06, "loss": 0.5602, "step": 22415 }, { "epoch": 0.64, "grad_norm": 6.8673134749904055, "learning_rate": 3.00181008543593e-06, "loss": 0.66, "step": 22416 }, { "epoch": 0.64, "grad_norm": 3.819376809455744, "learning_rate": 3.001384979136429e-06, "loss": 0.1432, "step": 22417 }, { "epoch": 0.64, "grad_norm": 3.294418576914326, "learning_rate": 3.0009598900307656e-06, "loss": 0.2742, "step": 22418 }, { "epoch": 0.64, "grad_norm": 2.077557787533331, "learning_rate": 3.0005348181225972e-06, "loss": 0.1132, "step": 22419 }, { "epoch": 0.64, "grad_norm": 5.663432413768374, "learning_rate": 3.000109763415584e-06, "loss": 0.2919, "step": 22420 }, { "epoch": 0.64, "grad_norm": 4.260531056305585, "learning_rate": 2.9996847259133787e-06, "loss": 0.2698, "step": 22421 }, { "epoch": 0.64, "grad_norm": 27.034790301150796, "learning_rate": 2.999259705619641e-06, "loss": 0.4384, "step": 22422 }, { "epoch": 0.64, "grad_norm": 6.2631329077661295, "learning_rate": 2.9988347025380233e-06, "loss": 0.4606, "step": 22423 }, { "epoch": 0.64, "grad_norm": 6.311048831887987, "learning_rate": 2.9984097166721863e-06, "loss": 0.7139, "step": 22424 }, { "epoch": 0.64, "grad_norm": 2.7192862080566393, "learning_rate": 2.9979847480257833e-06, "loss": 0.1336, "step": 22425 }, { "epoch": 0.64, "grad_norm": 7.340705662621554, "learning_rate": 2.99755979660247e-06, "loss": 0.5663, "step": 22426 }, { "epoch": 0.64, "grad_norm": 8.941165492531589, "learning_rate": 2.997134862405905e-06, "loss": 0.4246, "step": 22427 }, { "epoch": 0.64, "grad_norm": 10.160057222850984, "learning_rate": 2.9967099454397414e-06, "loss": 1.046, "step": 22428 }, { "epoch": 0.64, "grad_norm": 3.754036161812532, "learning_rate": 2.9962850457076366e-06, "loss": 0.5849, "step": 22429 }, { "epoch": 0.64, "grad_norm": 7.891665913126292, "learning_rate": 2.9958601632132422e-06, "loss": 0.5323, "step": 22430 }, { "epoch": 0.64, "grad_norm": 4.706287068782078, "learning_rate": 2.9954352979602188e-06, "loss": 0.6231, "step": 22431 }, { "epoch": 0.64, "grad_norm": 5.939641746930421, "learning_rate": 2.995010449952217e-06, "loss": 0.4201, "step": 22432 }, { "epoch": 0.64, "grad_norm": 9.198453174005538, "learning_rate": 2.9945856191928945e-06, "loss": 0.9904, "step": 22433 }, { "epoch": 0.64, "grad_norm": 4.008827482497074, "learning_rate": 2.994160805685904e-06, "loss": 0.2736, "step": 22434 }, { "epoch": 0.64, "grad_norm": 4.659175593754751, "learning_rate": 2.993736009434901e-06, "loss": 0.4443, "step": 22435 }, { "epoch": 0.64, "grad_norm": 4.942022473658149, "learning_rate": 2.9933112304435417e-06, "loss": 0.5424, "step": 22436 }, { "epoch": 0.64, "grad_norm": 5.0307079520763756, "learning_rate": 2.992886468715478e-06, "loss": 0.6515, "step": 22437 }, { "epoch": 0.64, "grad_norm": 2.9015615443740117, "learning_rate": 2.992461724254365e-06, "loss": 0.3979, "step": 22438 }, { "epoch": 0.64, "grad_norm": 3.508323479288961, "learning_rate": 2.992036997063855e-06, "loss": 0.2371, "step": 22439 }, { "epoch": 0.64, "grad_norm": 2.912442306634762, "learning_rate": 2.991612287147606e-06, "loss": 0.2529, "step": 22440 }, { "epoch": 0.64, "grad_norm": 5.477864892837326, "learning_rate": 2.9911875945092683e-06, "loss": 0.4755, "step": 22441 }, { "epoch": 0.64, "grad_norm": 3.751213401938774, "learning_rate": 2.990762919152497e-06, "loss": 0.6072, "step": 22442 }, { "epoch": 0.64, "grad_norm": 7.529714301149285, "learning_rate": 2.9903382610809426e-06, "loss": 0.1762, "step": 22443 }, { "epoch": 0.64, "grad_norm": 5.299215244279118, "learning_rate": 2.9899136202982613e-06, "loss": 0.6063, "step": 22444 }, { "epoch": 0.64, "grad_norm": 2.2651327716573446, "learning_rate": 2.9894889968081074e-06, "loss": 0.2708, "step": 22445 }, { "epoch": 0.64, "grad_norm": 3.852192391969371, "learning_rate": 2.9890643906141303e-06, "loss": 0.2288, "step": 22446 }, { "epoch": 0.64, "grad_norm": 3.0370232965469532, "learning_rate": 2.988639801719987e-06, "loss": 0.2447, "step": 22447 }, { "epoch": 0.64, "grad_norm": 4.940094954006658, "learning_rate": 2.9882152301293254e-06, "loss": 0.2695, "step": 22448 }, { "epoch": 0.64, "grad_norm": 8.82822978539859, "learning_rate": 2.9877906758458032e-06, "loss": 0.5518, "step": 22449 }, { "epoch": 0.64, "grad_norm": 6.630097281661442, "learning_rate": 2.9873661388730667e-06, "loss": 0.5903, "step": 22450 }, { "epoch": 0.64, "grad_norm": 4.663247578800198, "learning_rate": 2.986941619214773e-06, "loss": 0.6381, "step": 22451 }, { "epoch": 0.64, "grad_norm": 3.2430302087763083, "learning_rate": 2.9865171168745738e-06, "loss": 0.3944, "step": 22452 }, { "epoch": 0.64, "grad_norm": 3.782127152527012, "learning_rate": 2.986092631856118e-06, "loss": 0.2243, "step": 22453 }, { "epoch": 0.64, "grad_norm": 4.518000733448525, "learning_rate": 2.985668164163061e-06, "loss": 0.4384, "step": 22454 }, { "epoch": 0.64, "grad_norm": 10.958853188332718, "learning_rate": 2.985243713799052e-06, "loss": 0.7157, "step": 22455 }, { "epoch": 0.64, "grad_norm": 5.9916879776463805, "learning_rate": 2.984819280767744e-06, "loss": 0.4915, "step": 22456 }, { "epoch": 0.64, "grad_norm": 4.832935245606579, "learning_rate": 2.984394865072786e-06, "loss": 0.4004, "step": 22457 }, { "epoch": 0.64, "grad_norm": 6.693258640409542, "learning_rate": 2.983970466717833e-06, "loss": 0.8602, "step": 22458 }, { "epoch": 0.64, "grad_norm": 3.302245768854875, "learning_rate": 2.983546085706532e-06, "loss": 0.2342, "step": 22459 }, { "epoch": 0.64, "grad_norm": 4.9141075726551025, "learning_rate": 2.983121722042535e-06, "loss": 0.6688, "step": 22460 }, { "epoch": 0.64, "grad_norm": 5.846214753820152, "learning_rate": 2.982697375729496e-06, "loss": 0.7439, "step": 22461 }, { "epoch": 0.64, "grad_norm": 7.309826680123464, "learning_rate": 2.9822730467710614e-06, "loss": 0.8915, "step": 22462 }, { "epoch": 0.64, "grad_norm": 2.6004112491531877, "learning_rate": 2.9818487351708845e-06, "loss": 0.299, "step": 22463 }, { "epoch": 0.64, "grad_norm": 7.6230152236003805, "learning_rate": 2.981424440932613e-06, "loss": 0.581, "step": 22464 }, { "epoch": 0.64, "grad_norm": 7.512673002463055, "learning_rate": 2.981000164059902e-06, "loss": 0.3713, "step": 22465 }, { "epoch": 0.64, "grad_norm": 5.998974791995102, "learning_rate": 2.9805759045563936e-06, "loss": 0.5724, "step": 22466 }, { "epoch": 0.64, "grad_norm": 5.014535280490573, "learning_rate": 2.9801516624257442e-06, "loss": 0.6237, "step": 22467 }, { "epoch": 0.64, "grad_norm": 5.320529804469257, "learning_rate": 2.9797274376716013e-06, "loss": 0.3069, "step": 22468 }, { "epoch": 0.64, "grad_norm": 6.91459841617856, "learning_rate": 2.979303230297613e-06, "loss": 0.5043, "step": 22469 }, { "epoch": 0.64, "grad_norm": 5.520109995354198, "learning_rate": 2.978879040307432e-06, "loss": 0.3512, "step": 22470 }, { "epoch": 0.64, "grad_norm": 8.7172347635539, "learning_rate": 2.9784548677047054e-06, "loss": 0.679, "step": 22471 }, { "epoch": 0.64, "grad_norm": 5.415525472687272, "learning_rate": 2.978030712493083e-06, "loss": 0.4197, "step": 22472 }, { "epoch": 0.64, "grad_norm": 8.742275643266549, "learning_rate": 2.977606574676212e-06, "loss": 0.5623, "step": 22473 }, { "epoch": 0.64, "grad_norm": 7.628474085101518, "learning_rate": 2.9771824542577442e-06, "loss": 0.4278, "step": 22474 }, { "epoch": 0.64, "grad_norm": 9.828429394785624, "learning_rate": 2.9767583512413254e-06, "loss": 0.5492, "step": 22475 }, { "epoch": 0.64, "grad_norm": 3.2513133476448197, "learning_rate": 2.9763342656306053e-06, "loss": 0.4044, "step": 22476 }, { "epoch": 0.64, "grad_norm": 4.592707911516105, "learning_rate": 2.9759101974292337e-06, "loss": 0.354, "step": 22477 }, { "epoch": 0.64, "grad_norm": 19.22042080199245, "learning_rate": 2.9754861466408567e-06, "loss": 0.3648, "step": 22478 }, { "epoch": 0.64, "grad_norm": 3.5291659864464786, "learning_rate": 2.975062113269124e-06, "loss": 0.292, "step": 22479 }, { "epoch": 0.64, "grad_norm": 8.181884820262146, "learning_rate": 2.9746380973176813e-06, "loss": 1.1531, "step": 22480 }, { "epoch": 0.64, "grad_norm": 2.7512152327577004, "learning_rate": 2.974214098790179e-06, "loss": 0.3474, "step": 22481 }, { "epoch": 0.64, "grad_norm": 6.889612517070606, "learning_rate": 2.9737901176902622e-06, "loss": 0.7832, "step": 22482 }, { "epoch": 0.64, "grad_norm": 4.452478248076128, "learning_rate": 2.9733661540215807e-06, "loss": 0.2041, "step": 22483 }, { "epoch": 0.64, "grad_norm": 5.826330333042134, "learning_rate": 2.972942207787779e-06, "loss": 0.2369, "step": 22484 }, { "epoch": 0.64, "grad_norm": 7.454975146451134, "learning_rate": 2.9725182789925066e-06, "loss": 0.2626, "step": 22485 }, { "epoch": 0.64, "grad_norm": 8.644050567542592, "learning_rate": 2.972094367639411e-06, "loss": 0.8171, "step": 22486 }, { "epoch": 0.64, "grad_norm": 1.900526475118297, "learning_rate": 2.9716704737321357e-06, "loss": 0.2156, "step": 22487 }, { "epoch": 0.64, "grad_norm": 4.136198419945044, "learning_rate": 2.9712465972743315e-06, "loss": 0.4169, "step": 22488 }, { "epoch": 0.64, "grad_norm": 7.6442378265097295, "learning_rate": 2.970822738269642e-06, "loss": 0.5267, "step": 22489 }, { "epoch": 0.64, "grad_norm": 4.22292166836478, "learning_rate": 2.9703988967217155e-06, "loss": 0.3349, "step": 22490 }, { "epoch": 0.64, "grad_norm": 5.474873825256713, "learning_rate": 2.9699750726341966e-06, "loss": 0.4955, "step": 22491 }, { "epoch": 0.64, "grad_norm": 9.765732811904881, "learning_rate": 2.969551266010732e-06, "loss": 0.8275, "step": 22492 }, { "epoch": 0.64, "grad_norm": 7.743834165495969, "learning_rate": 2.9691274768549694e-06, "loss": 0.3455, "step": 22493 }, { "epoch": 0.64, "grad_norm": 7.483076393060699, "learning_rate": 2.9687037051705503e-06, "loss": 0.5967, "step": 22494 }, { "epoch": 0.64, "grad_norm": 4.914561745128729, "learning_rate": 2.968279950961127e-06, "loss": 0.2749, "step": 22495 }, { "epoch": 0.64, "grad_norm": 4.431862971384071, "learning_rate": 2.967856214230338e-06, "loss": 0.3332, "step": 22496 }, { "epoch": 0.64, "grad_norm": 3.5834335194857765, "learning_rate": 2.9674324949818332e-06, "loss": 0.4713, "step": 22497 }, { "epoch": 0.64, "grad_norm": 5.308148542967536, "learning_rate": 2.967008793219256e-06, "loss": 0.5078, "step": 22498 }, { "epoch": 0.64, "grad_norm": 8.654882440060613, "learning_rate": 2.9665851089462526e-06, "loss": 0.654, "step": 22499 }, { "epoch": 0.64, "grad_norm": 6.922069538215759, "learning_rate": 2.9661614421664654e-06, "loss": 0.5827, "step": 22500 }, { "epoch": 0.64, "grad_norm": 5.8710092727563215, "learning_rate": 2.965737792883542e-06, "loss": 0.1777, "step": 22501 }, { "epoch": 0.64, "grad_norm": 7.8530263570471694, "learning_rate": 2.9653141611011262e-06, "loss": 0.5425, "step": 22502 }, { "epoch": 0.64, "grad_norm": 7.222119457377766, "learning_rate": 2.9648905468228607e-06, "loss": 0.3587, "step": 22503 }, { "epoch": 0.64, "grad_norm": 6.474190234948555, "learning_rate": 2.9644669500523927e-06, "loss": 0.8205, "step": 22504 }, { "epoch": 0.64, "grad_norm": 4.99171586413119, "learning_rate": 2.964043370793365e-06, "loss": 0.4685, "step": 22505 }, { "epoch": 0.64, "grad_norm": 1.5649978127631847, "learning_rate": 2.963619809049421e-06, "loss": 0.1232, "step": 22506 }, { "epoch": 0.64, "grad_norm": 4.351761325423692, "learning_rate": 2.963196264824204e-06, "loss": 0.4686, "step": 22507 }, { "epoch": 0.64, "grad_norm": 30.08217524221722, "learning_rate": 2.9627727381213606e-06, "loss": 0.2967, "step": 22508 }, { "epoch": 0.64, "grad_norm": 10.986171027522635, "learning_rate": 2.962349228944532e-06, "loss": 0.5877, "step": 22509 }, { "epoch": 0.64, "grad_norm": 5.526389060454925, "learning_rate": 2.9619257372973603e-06, "loss": 0.2916, "step": 22510 }, { "epoch": 0.64, "grad_norm": 6.36313819607366, "learning_rate": 2.961502263183493e-06, "loss": 0.568, "step": 22511 }, { "epoch": 0.64, "grad_norm": 12.143012873668768, "learning_rate": 2.9610788066065695e-06, "loss": 0.2155, "step": 22512 }, { "epoch": 0.64, "grad_norm": 6.327087241900002, "learning_rate": 2.9606553675702347e-06, "loss": 0.2916, "step": 22513 }, { "epoch": 0.64, "grad_norm": 6.4154014908942125, "learning_rate": 2.960231946078129e-06, "loss": 0.5123, "step": 22514 }, { "epoch": 0.64, "grad_norm": 4.767459253738145, "learning_rate": 2.9598085421338997e-06, "loss": 0.3723, "step": 22515 }, { "epoch": 0.64, "grad_norm": 9.9800153595724, "learning_rate": 2.959385155741184e-06, "loss": 0.4913, "step": 22516 }, { "epoch": 0.64, "grad_norm": 7.828327107818556, "learning_rate": 2.958961786903626e-06, "loss": 0.5192, "step": 22517 }, { "epoch": 0.64, "grad_norm": 6.219533822967491, "learning_rate": 2.9585384356248705e-06, "loss": 0.6337, "step": 22518 }, { "epoch": 0.64, "grad_norm": 6.45296103059029, "learning_rate": 2.9581151019085565e-06, "loss": 0.3285, "step": 22519 }, { "epoch": 0.64, "grad_norm": 5.334910080213615, "learning_rate": 2.9576917857583277e-06, "loss": 0.4974, "step": 22520 }, { "epoch": 0.64, "grad_norm": 4.533313886282852, "learning_rate": 2.957268487177823e-06, "loss": 0.3344, "step": 22521 }, { "epoch": 0.64, "grad_norm": 4.373343426563858, "learning_rate": 2.9568452061706887e-06, "loss": 0.3407, "step": 22522 }, { "epoch": 0.65, "grad_norm": 3.588087854171506, "learning_rate": 2.9564219427405617e-06, "loss": 0.4097, "step": 22523 }, { "epoch": 0.65, "grad_norm": 6.238201143231755, "learning_rate": 2.955998696891086e-06, "loss": 0.3947, "step": 22524 }, { "epoch": 0.65, "grad_norm": 14.522489355697054, "learning_rate": 2.9555754686259007e-06, "loss": 0.7791, "step": 22525 }, { "epoch": 0.65, "grad_norm": 8.90333900395304, "learning_rate": 2.955152257948648e-06, "loss": 0.5669, "step": 22526 }, { "epoch": 0.65, "grad_norm": 5.3274963097490176, "learning_rate": 2.95472906486297e-06, "loss": 0.3312, "step": 22527 }, { "epoch": 0.65, "grad_norm": 5.610932157934074, "learning_rate": 2.954305889372505e-06, "loss": 0.3195, "step": 22528 }, { "epoch": 0.65, "grad_norm": 9.3462335354317, "learning_rate": 2.953882731480896e-06, "loss": 0.4617, "step": 22529 }, { "epoch": 0.65, "grad_norm": 7.377200784280618, "learning_rate": 2.9534595911917795e-06, "loss": 0.4013, "step": 22530 }, { "epoch": 0.65, "grad_norm": 5.91272929192568, "learning_rate": 2.9530364685087996e-06, "loss": 0.4547, "step": 22531 }, { "epoch": 0.65, "grad_norm": 3.994882677907419, "learning_rate": 2.9526133634355945e-06, "loss": 0.4099, "step": 22532 }, { "epoch": 0.65, "grad_norm": 4.400403520113985, "learning_rate": 2.9521902759758043e-06, "loss": 0.3229, "step": 22533 }, { "epoch": 0.65, "grad_norm": 5.591267381142598, "learning_rate": 2.9517672061330702e-06, "loss": 0.5473, "step": 22534 }, { "epoch": 0.65, "grad_norm": 8.393153656704595, "learning_rate": 2.95134415391103e-06, "loss": 0.3702, "step": 22535 }, { "epoch": 0.65, "grad_norm": 7.121068103804785, "learning_rate": 2.9509211193133247e-06, "loss": 0.4337, "step": 22536 }, { "epoch": 0.65, "grad_norm": 6.33803838666352, "learning_rate": 2.950498102343591e-06, "loss": 0.263, "step": 22537 }, { "epoch": 0.65, "grad_norm": 6.477052676951523, "learning_rate": 2.950075103005471e-06, "loss": 0.4018, "step": 22538 }, { "epoch": 0.65, "grad_norm": 11.849109775567518, "learning_rate": 2.949652121302602e-06, "loss": 0.433, "step": 22539 }, { "epoch": 0.65, "grad_norm": 8.232674844821421, "learning_rate": 2.9492291572386243e-06, "loss": 0.517, "step": 22540 }, { "epoch": 0.65, "grad_norm": 4.3264712262402, "learning_rate": 2.9488062108171733e-06, "loss": 0.1689, "step": 22541 }, { "epoch": 0.65, "grad_norm": 7.740248636544946, "learning_rate": 2.948383282041891e-06, "loss": 0.5359, "step": 22542 }, { "epoch": 0.65, "grad_norm": 5.790938224082236, "learning_rate": 2.9479603709164164e-06, "loss": 0.4515, "step": 22543 }, { "epoch": 0.65, "grad_norm": 3.1520747905513744, "learning_rate": 2.9475374774443836e-06, "loss": 0.3184, "step": 22544 }, { "epoch": 0.65, "grad_norm": 3.6026147227978034, "learning_rate": 2.947114601629435e-06, "loss": 0.413, "step": 22545 }, { "epoch": 0.65, "grad_norm": 4.951380232841602, "learning_rate": 2.946691743475206e-06, "loss": 0.425, "step": 22546 }, { "epoch": 0.65, "grad_norm": 6.122347043090586, "learning_rate": 2.9462689029853366e-06, "loss": 0.2959, "step": 22547 }, { "epoch": 0.65, "grad_norm": 10.444370680970145, "learning_rate": 2.94584608016346e-06, "loss": 0.7785, "step": 22548 }, { "epoch": 0.65, "grad_norm": 3.2904331314359627, "learning_rate": 2.9454232750132193e-06, "loss": 0.597, "step": 22549 }, { "epoch": 0.65, "grad_norm": 5.895416213558225, "learning_rate": 2.9450004875382474e-06, "loss": 0.4995, "step": 22550 }, { "epoch": 0.65, "grad_norm": 7.045870284290601, "learning_rate": 2.9445777177421835e-06, "loss": 0.5497, "step": 22551 }, { "epoch": 0.65, "grad_norm": 6.1089726386295045, "learning_rate": 2.944154965628667e-06, "loss": 0.5997, "step": 22552 }, { "epoch": 0.65, "grad_norm": 2.646362391965041, "learning_rate": 2.9437322312013287e-06, "loss": 0.2399, "step": 22553 }, { "epoch": 0.65, "grad_norm": 11.354583107313696, "learning_rate": 2.9433095144638114e-06, "loss": 0.4263, "step": 22554 }, { "epoch": 0.65, "grad_norm": 7.042801316017092, "learning_rate": 2.9428868154197475e-06, "loss": 0.3192, "step": 22555 }, { "epoch": 0.65, "grad_norm": 5.90681737364511, "learning_rate": 2.9424641340727766e-06, "loss": 0.4019, "step": 22556 }, { "epoch": 0.65, "grad_norm": 7.772015281426627, "learning_rate": 2.942041470426531e-06, "loss": 0.4003, "step": 22557 }, { "epoch": 0.65, "grad_norm": 7.274314671775995, "learning_rate": 2.941618824484651e-06, "loss": 0.4497, "step": 22558 }, { "epoch": 0.65, "grad_norm": 12.474820831785772, "learning_rate": 2.9411961962507714e-06, "loss": 1.3752, "step": 22559 }, { "epoch": 0.65, "grad_norm": 5.385144729192406, "learning_rate": 2.940773585728525e-06, "loss": 0.233, "step": 22560 }, { "epoch": 0.65, "grad_norm": 7.944372971160608, "learning_rate": 2.9403509929215523e-06, "loss": 0.7193, "step": 22561 }, { "epoch": 0.65, "grad_norm": 3.9697475531329265, "learning_rate": 2.939928417833485e-06, "loss": 0.3416, "step": 22562 }, { "epoch": 0.65, "grad_norm": 5.619097431376177, "learning_rate": 2.9395058604679616e-06, "loss": 0.4308, "step": 22563 }, { "epoch": 0.65, "grad_norm": 6.969561063499385, "learning_rate": 2.9390833208286134e-06, "loss": 0.3009, "step": 22564 }, { "epoch": 0.65, "grad_norm": 11.203656427396353, "learning_rate": 2.9386607989190797e-06, "loss": 0.4037, "step": 22565 }, { "epoch": 0.65, "grad_norm": 3.4950067801675804, "learning_rate": 2.938238294742992e-06, "loss": 0.2815, "step": 22566 }, { "epoch": 0.65, "grad_norm": 5.48724855094528, "learning_rate": 2.9378158083039855e-06, "loss": 0.4503, "step": 22567 }, { "epoch": 0.65, "grad_norm": 5.203780078603998, "learning_rate": 2.9373933396056977e-06, "loss": 0.4578, "step": 22568 }, { "epoch": 0.65, "grad_norm": 6.811620533022129, "learning_rate": 2.93697088865176e-06, "loss": 0.447, "step": 22569 }, { "epoch": 0.65, "grad_norm": 4.6903805846313285, "learning_rate": 2.936548455445809e-06, "loss": 0.354, "step": 22570 }, { "epoch": 0.65, "grad_norm": 7.618648385050124, "learning_rate": 2.936126039991475e-06, "loss": 0.5346, "step": 22571 }, { "epoch": 0.65, "grad_norm": 5.12748818357567, "learning_rate": 2.9357036422923975e-06, "loss": 0.3056, "step": 22572 }, { "epoch": 0.65, "grad_norm": 4.4631539365190696, "learning_rate": 2.9352812623522053e-06, "loss": 0.4205, "step": 22573 }, { "epoch": 0.65, "grad_norm": 5.7076593010385235, "learning_rate": 2.934858900174534e-06, "loss": 0.3971, "step": 22574 }, { "epoch": 0.65, "grad_norm": 7.285226939167769, "learning_rate": 2.9344365557630195e-06, "loss": 0.5524, "step": 22575 }, { "epoch": 0.65, "grad_norm": 6.333034955909931, "learning_rate": 2.9340142291212913e-06, "loss": 0.4396, "step": 22576 }, { "epoch": 0.65, "grad_norm": 4.738028322216516, "learning_rate": 2.9335919202529855e-06, "loss": 0.5339, "step": 22577 }, { "epoch": 0.65, "grad_norm": 9.20603153464191, "learning_rate": 2.9331696291617316e-06, "loss": 0.3522, "step": 22578 }, { "epoch": 0.65, "grad_norm": 3.739445647500489, "learning_rate": 2.9327473558511676e-06, "loss": 0.3885, "step": 22579 }, { "epoch": 0.65, "grad_norm": 3.557860582618999, "learning_rate": 2.9323251003249225e-06, "loss": 0.5591, "step": 22580 }, { "epoch": 0.65, "grad_norm": 4.621861191755383, "learning_rate": 2.9319028625866307e-06, "loss": 0.4691, "step": 22581 }, { "epoch": 0.65, "grad_norm": 5.614718768787051, "learning_rate": 2.9314806426399233e-06, "loss": 0.5373, "step": 22582 }, { "epoch": 0.65, "grad_norm": 3.668832695466033, "learning_rate": 2.9310584404884324e-06, "loss": 0.3782, "step": 22583 }, { "epoch": 0.65, "grad_norm": 8.337047099061527, "learning_rate": 2.9306362561357923e-06, "loss": 0.71, "step": 22584 }, { "epoch": 0.65, "grad_norm": 15.299203105194456, "learning_rate": 2.9302140895856327e-06, "loss": 0.6763, "step": 22585 }, { "epoch": 0.65, "grad_norm": 3.707391011957, "learning_rate": 2.9297919408415877e-06, "loss": 0.3166, "step": 22586 }, { "epoch": 0.65, "grad_norm": 6.461501109623106, "learning_rate": 2.9293698099072857e-06, "loss": 0.5052, "step": 22587 }, { "epoch": 0.65, "grad_norm": 7.062488285831424, "learning_rate": 2.9289476967863628e-06, "loss": 0.3718, "step": 22588 }, { "epoch": 0.65, "grad_norm": 3.294986581760575, "learning_rate": 2.9285256014824463e-06, "loss": 0.4727, "step": 22589 }, { "epoch": 0.65, "grad_norm": 2.720571894058372, "learning_rate": 2.928103523999171e-06, "loss": 0.3542, "step": 22590 }, { "epoch": 0.65, "grad_norm": 5.221662907656378, "learning_rate": 2.9276814643401625e-06, "loss": 0.6019, "step": 22591 }, { "epoch": 0.65, "grad_norm": 6.288273125623227, "learning_rate": 2.9272594225090565e-06, "loss": 0.6135, "step": 22592 }, { "epoch": 0.65, "grad_norm": 3.918566307374254, "learning_rate": 2.9268373985094844e-06, "loss": 0.34, "step": 22593 }, { "epoch": 0.65, "grad_norm": 3.7456400639310488, "learning_rate": 2.9264153923450723e-06, "loss": 0.3236, "step": 22594 }, { "epoch": 0.65, "grad_norm": 4.139373906979365, "learning_rate": 2.925993404019455e-06, "loss": 0.6957, "step": 22595 }, { "epoch": 0.65, "grad_norm": 12.539179473996834, "learning_rate": 2.925571433536261e-06, "loss": 0.7529, "step": 22596 }, { "epoch": 0.65, "grad_norm": 2.793091970006435, "learning_rate": 2.9251494808991205e-06, "loss": 0.1594, "step": 22597 }, { "epoch": 0.65, "grad_norm": 9.029604435195571, "learning_rate": 2.924727546111662e-06, "loss": 0.6351, "step": 22598 }, { "epoch": 0.65, "grad_norm": 2.892157895654384, "learning_rate": 2.924305629177518e-06, "loss": 0.1837, "step": 22599 }, { "epoch": 0.65, "grad_norm": 4.1003744966157605, "learning_rate": 2.9238837301003176e-06, "loss": 0.5405, "step": 22600 }, { "epoch": 0.65, "grad_norm": 10.305326428969495, "learning_rate": 2.9234618488836884e-06, "loss": 0.3959, "step": 22601 }, { "epoch": 0.65, "grad_norm": 7.478836431270049, "learning_rate": 2.923039985531263e-06, "loss": 0.6922, "step": 22602 }, { "epoch": 0.65, "grad_norm": 6.345870805088285, "learning_rate": 2.922618140046668e-06, "loss": 0.5805, "step": 22603 }, { "epoch": 0.65, "grad_norm": 3.977622020295585, "learning_rate": 2.922196312433535e-06, "loss": 0.3575, "step": 22604 }, { "epoch": 0.65, "grad_norm": 2.553265470378147, "learning_rate": 2.921774502695488e-06, "loss": 0.1927, "step": 22605 }, { "epoch": 0.65, "grad_norm": 5.594829199487559, "learning_rate": 2.9213527108361616e-06, "loss": 0.5364, "step": 22606 }, { "epoch": 0.65, "grad_norm": 5.468872417714664, "learning_rate": 2.9209309368591814e-06, "loss": 0.3424, "step": 22607 }, { "epoch": 0.65, "grad_norm": 4.862954075850017, "learning_rate": 2.920509180768175e-06, "loss": 0.4531, "step": 22608 }, { "epoch": 0.65, "grad_norm": 7.282109909688668, "learning_rate": 2.920087442566774e-06, "loss": 0.5659, "step": 22609 }, { "epoch": 0.65, "grad_norm": 3.862210965375795, "learning_rate": 2.919665722258604e-06, "loss": 0.4571, "step": 22610 }, { "epoch": 0.65, "grad_norm": 7.889508461091185, "learning_rate": 2.9192440198472956e-06, "loss": 0.3063, "step": 22611 }, { "epoch": 0.65, "grad_norm": 5.337976247726769, "learning_rate": 2.918822335336471e-06, "loss": 0.6154, "step": 22612 }, { "epoch": 0.65, "grad_norm": 3.1908937227105807, "learning_rate": 2.9184006687297644e-06, "loss": 0.3347, "step": 22613 }, { "epoch": 0.65, "grad_norm": 8.739130298631828, "learning_rate": 2.9179790200307977e-06, "loss": 0.5191, "step": 22614 }, { "epoch": 0.65, "grad_norm": 7.6986332113800735, "learning_rate": 2.917557389243202e-06, "loss": 0.2496, "step": 22615 }, { "epoch": 0.65, "grad_norm": 8.652542807741254, "learning_rate": 2.9171357763706055e-06, "loss": 0.505, "step": 22616 }, { "epoch": 0.65, "grad_norm": 5.086360532919432, "learning_rate": 2.916714181416633e-06, "loss": 0.3047, "step": 22617 }, { "epoch": 0.65, "grad_norm": 6.091476955495162, "learning_rate": 2.9162926043849127e-06, "loss": 0.4369, "step": 22618 }, { "epoch": 0.65, "grad_norm": 6.574183934845426, "learning_rate": 2.9158710452790683e-06, "loss": 0.5841, "step": 22619 }, { "epoch": 0.65, "grad_norm": 8.675944430256395, "learning_rate": 2.9154495041027304e-06, "loss": 0.7535, "step": 22620 }, { "epoch": 0.65, "grad_norm": 4.693450609353051, "learning_rate": 2.9150279808595215e-06, "loss": 0.4402, "step": 22621 }, { "epoch": 0.65, "grad_norm": 16.22446223456644, "learning_rate": 2.914606475553073e-06, "loss": 0.598, "step": 22622 }, { "epoch": 0.65, "grad_norm": 7.899778771019001, "learning_rate": 2.9141849881870056e-06, "loss": 0.7667, "step": 22623 }, { "epoch": 0.65, "grad_norm": 5.726436181646336, "learning_rate": 2.9137635187649503e-06, "loss": 0.2206, "step": 22624 }, { "epoch": 0.65, "grad_norm": 2.3249949721825494, "learning_rate": 2.91334206729053e-06, "loss": 0.1615, "step": 22625 }, { "epoch": 0.65, "grad_norm": 6.642400993266876, "learning_rate": 2.91292063376737e-06, "loss": 0.7461, "step": 22626 }, { "epoch": 0.65, "grad_norm": 3.6885254290845575, "learning_rate": 2.912499218199098e-06, "loss": 0.353, "step": 22627 }, { "epoch": 0.65, "grad_norm": 5.081986587644831, "learning_rate": 2.9120778205893363e-06, "loss": 0.2738, "step": 22628 }, { "epoch": 0.65, "grad_norm": 2.3538294982551635, "learning_rate": 2.9116564409417143e-06, "loss": 0.1164, "step": 22629 }, { "epoch": 0.65, "grad_norm": 6.27800032784764, "learning_rate": 2.911235079259852e-06, "loss": 0.5823, "step": 22630 }, { "epoch": 0.65, "grad_norm": 6.907761904555577, "learning_rate": 2.9108137355473803e-06, "loss": 0.4608, "step": 22631 }, { "epoch": 0.65, "grad_norm": 4.6980761679267955, "learning_rate": 2.9103924098079208e-06, "loss": 0.5744, "step": 22632 }, { "epoch": 0.65, "grad_norm": 3.0441289019667925, "learning_rate": 2.909971102045095e-06, "loss": 0.2479, "step": 22633 }, { "epoch": 0.65, "grad_norm": 4.4570165127402745, "learning_rate": 2.9095498122625333e-06, "loss": 0.7072, "step": 22634 }, { "epoch": 0.65, "grad_norm": 5.787137018936569, "learning_rate": 2.9091285404638547e-06, "loss": 0.3568, "step": 22635 }, { "epoch": 0.65, "grad_norm": 8.05520820599588, "learning_rate": 2.908707286652688e-06, "loss": 0.452, "step": 22636 }, { "epoch": 0.65, "grad_norm": 5.281725709696893, "learning_rate": 2.908286050832653e-06, "loss": 0.2443, "step": 22637 }, { "epoch": 0.65, "grad_norm": 3.319224208422718, "learning_rate": 2.90786483300738e-06, "loss": 0.3078, "step": 22638 }, { "epoch": 0.65, "grad_norm": 10.175553179800772, "learning_rate": 2.9074436331804838e-06, "loss": 0.8873, "step": 22639 }, { "epoch": 0.65, "grad_norm": 5.284013375079823, "learning_rate": 2.907022451355593e-06, "loss": 0.501, "step": 22640 }, { "epoch": 0.65, "grad_norm": 4.923993820192749, "learning_rate": 2.9066012875363315e-06, "loss": 0.4438, "step": 22641 }, { "epoch": 0.65, "grad_norm": 6.354703138457416, "learning_rate": 2.9061801417263202e-06, "loss": 0.4539, "step": 22642 }, { "epoch": 0.65, "grad_norm": 8.981744716241694, "learning_rate": 2.9057590139291846e-06, "loss": 0.7931, "step": 22643 }, { "epoch": 0.65, "grad_norm": 7.670599619716895, "learning_rate": 2.9053379041485465e-06, "loss": 0.6045, "step": 22644 }, { "epoch": 0.65, "grad_norm": 4.270197391459079, "learning_rate": 2.904916812388029e-06, "loss": 0.4264, "step": 22645 }, { "epoch": 0.65, "grad_norm": 7.733383515896704, "learning_rate": 2.9044957386512518e-06, "loss": 0.5028, "step": 22646 }, { "epoch": 0.65, "grad_norm": 6.817489511714711, "learning_rate": 2.904074682941842e-06, "loss": 0.5828, "step": 22647 }, { "epoch": 0.65, "grad_norm": 5.37455102242999, "learning_rate": 2.9036536452634172e-06, "loss": 0.3992, "step": 22648 }, { "epoch": 0.65, "grad_norm": 5.507702419181971, "learning_rate": 2.903232625619602e-06, "loss": 0.4567, "step": 22649 }, { "epoch": 0.65, "grad_norm": 2.1096287751306155, "learning_rate": 2.9028116240140207e-06, "loss": 0.1474, "step": 22650 }, { "epoch": 0.65, "grad_norm": 4.650806701580261, "learning_rate": 2.902390640450293e-06, "loss": 0.4502, "step": 22651 }, { "epoch": 0.65, "grad_norm": 8.125431108041488, "learning_rate": 2.90196967493204e-06, "loss": 0.2435, "step": 22652 }, { "epoch": 0.65, "grad_norm": 2.8188020671763825, "learning_rate": 2.901548727462882e-06, "loss": 0.2562, "step": 22653 }, { "epoch": 0.65, "grad_norm": 9.647868682526735, "learning_rate": 2.9011277980464435e-06, "loss": 0.5869, "step": 22654 }, { "epoch": 0.65, "grad_norm": 9.185329725881196, "learning_rate": 2.900706886686342e-06, "loss": 0.3664, "step": 22655 }, { "epoch": 0.65, "grad_norm": 12.888829350105608, "learning_rate": 2.9002859933862025e-06, "loss": 0.2568, "step": 22656 }, { "epoch": 0.65, "grad_norm": 4.925834346911608, "learning_rate": 2.899865118149643e-06, "loss": 0.5635, "step": 22657 }, { "epoch": 0.65, "grad_norm": 6.79061348673182, "learning_rate": 2.8994442609802862e-06, "loss": 0.6862, "step": 22658 }, { "epoch": 0.65, "grad_norm": 8.31042506199283, "learning_rate": 2.8990234218817527e-06, "loss": 1.5058, "step": 22659 }, { "epoch": 0.65, "grad_norm": 4.2565591909889875, "learning_rate": 2.8986026008576596e-06, "loss": 0.4004, "step": 22660 }, { "epoch": 0.65, "grad_norm": 4.451568759901761, "learning_rate": 2.8981817979116315e-06, "loss": 0.2842, "step": 22661 }, { "epoch": 0.65, "grad_norm": 4.334748443599716, "learning_rate": 2.897761013047285e-06, "loss": 0.3439, "step": 22662 }, { "epoch": 0.65, "grad_norm": 4.046687015011472, "learning_rate": 2.8973402462682433e-06, "loss": 0.2482, "step": 22663 }, { "epoch": 0.65, "grad_norm": 4.658252643825829, "learning_rate": 2.896919497578123e-06, "loss": 0.5423, "step": 22664 }, { "epoch": 0.65, "grad_norm": 5.751943425686856, "learning_rate": 2.8964987669805466e-06, "loss": 0.382, "step": 22665 }, { "epoch": 0.65, "grad_norm": 6.120878506730291, "learning_rate": 2.8960780544791334e-06, "loss": 0.646, "step": 22666 }, { "epoch": 0.65, "grad_norm": 7.335895899775552, "learning_rate": 2.8956573600774984e-06, "loss": 0.8737, "step": 22667 }, { "epoch": 0.65, "grad_norm": 4.736323896583164, "learning_rate": 2.8952366837792665e-06, "loss": 0.6139, "step": 22668 }, { "epoch": 0.65, "grad_norm": 5.776750803677928, "learning_rate": 2.8948160255880533e-06, "loss": 0.2296, "step": 22669 }, { "epoch": 0.65, "grad_norm": 8.97229225041474, "learning_rate": 2.8943953855074793e-06, "loss": 0.4101, "step": 22670 }, { "epoch": 0.65, "grad_norm": 3.763278103210008, "learning_rate": 2.893974763541163e-06, "loss": 0.3102, "step": 22671 }, { "epoch": 0.65, "grad_norm": 3.1707412685474683, "learning_rate": 2.893554159692723e-06, "loss": 0.2408, "step": 22672 }, { "epoch": 0.65, "grad_norm": 7.597251116387535, "learning_rate": 2.8931335739657747e-06, "loss": 0.5799, "step": 22673 }, { "epoch": 0.65, "grad_norm": 8.523951723861572, "learning_rate": 2.8927130063639385e-06, "loss": 0.9588, "step": 22674 }, { "epoch": 0.65, "grad_norm": 6.186999368162294, "learning_rate": 2.892292456890835e-06, "loss": 0.2513, "step": 22675 }, { "epoch": 0.65, "grad_norm": 3.704215633567863, "learning_rate": 2.8918719255500788e-06, "loss": 0.0857, "step": 22676 }, { "epoch": 0.65, "grad_norm": 8.267746362882662, "learning_rate": 2.89145141234529e-06, "loss": 0.557, "step": 22677 }, { "epoch": 0.65, "grad_norm": 5.0001659365775595, "learning_rate": 2.8910309172800856e-06, "loss": 0.6461, "step": 22678 }, { "epoch": 0.65, "grad_norm": 8.958749385901317, "learning_rate": 2.8906104403580824e-06, "loss": 0.5533, "step": 22679 }, { "epoch": 0.65, "grad_norm": 4.434695848789492, "learning_rate": 2.890189981582896e-06, "loss": 0.4257, "step": 22680 }, { "epoch": 0.65, "grad_norm": 5.179651516290244, "learning_rate": 2.889769540958145e-06, "loss": 0.3174, "step": 22681 }, { "epoch": 0.65, "grad_norm": 4.62330410769099, "learning_rate": 2.889349118487449e-06, "loss": 0.4711, "step": 22682 }, { "epoch": 0.65, "grad_norm": 5.532262418517961, "learning_rate": 2.8889287141744204e-06, "loss": 0.5136, "step": 22683 }, { "epoch": 0.65, "grad_norm": 2.9172793267155774, "learning_rate": 2.888508328022681e-06, "loss": 0.1707, "step": 22684 }, { "epoch": 0.65, "grad_norm": 7.12359759265277, "learning_rate": 2.8880879600358438e-06, "loss": 0.4917, "step": 22685 }, { "epoch": 0.65, "grad_norm": 8.568351444738964, "learning_rate": 2.887667610217526e-06, "loss": 0.339, "step": 22686 }, { "epoch": 0.65, "grad_norm": 6.384865924260945, "learning_rate": 2.8872472785713413e-06, "loss": 0.905, "step": 22687 }, { "epoch": 0.65, "grad_norm": 2.8664267047725134, "learning_rate": 2.8868269651009108e-06, "loss": 0.2983, "step": 22688 }, { "epoch": 0.65, "grad_norm": 6.138413941186142, "learning_rate": 2.886406669809846e-06, "loss": 0.2902, "step": 22689 }, { "epoch": 0.65, "grad_norm": 6.559266229157482, "learning_rate": 2.8859863927017636e-06, "loss": 0.4835, "step": 22690 }, { "epoch": 0.65, "grad_norm": 10.087944227078067, "learning_rate": 2.885566133780282e-06, "loss": 0.3446, "step": 22691 }, { "epoch": 0.65, "grad_norm": 4.6795059194347015, "learning_rate": 2.885145893049015e-06, "loss": 0.2649, "step": 22692 }, { "epoch": 0.65, "grad_norm": 4.57712114284053, "learning_rate": 2.884725670511577e-06, "loss": 0.1316, "step": 22693 }, { "epoch": 0.65, "grad_norm": 5.728810242634334, "learning_rate": 2.884305466171582e-06, "loss": 0.3838, "step": 22694 }, { "epoch": 0.65, "grad_norm": 5.437080147856035, "learning_rate": 2.883885280032649e-06, "loss": 0.4309, "step": 22695 }, { "epoch": 0.65, "grad_norm": 4.537865512092443, "learning_rate": 2.8834651120983876e-06, "loss": 0.4995, "step": 22696 }, { "epoch": 0.65, "grad_norm": 12.138688452196043, "learning_rate": 2.8830449623724175e-06, "loss": 0.8627, "step": 22697 }, { "epoch": 0.65, "grad_norm": 4.800023390792439, "learning_rate": 2.882624830858351e-06, "loss": 0.4504, "step": 22698 }, { "epoch": 0.65, "grad_norm": 2.74963597575957, "learning_rate": 2.8822047175598e-06, "loss": 0.1441, "step": 22699 }, { "epoch": 0.65, "grad_norm": 6.81969876493873, "learning_rate": 2.8817846224803837e-06, "loss": 0.5355, "step": 22700 }, { "epoch": 0.65, "grad_norm": 4.458901045727007, "learning_rate": 2.8813645456237115e-06, "loss": 0.1876, "step": 22701 }, { "epoch": 0.65, "grad_norm": 2.737713024264177, "learning_rate": 2.880944486993401e-06, "loss": 0.1198, "step": 22702 }, { "epoch": 0.65, "grad_norm": 5.336704807768669, "learning_rate": 2.8805244465930617e-06, "loss": 0.4372, "step": 22703 }, { "epoch": 0.65, "grad_norm": 12.669733245136998, "learning_rate": 2.8801044244263122e-06, "loss": 0.7833, "step": 22704 }, { "epoch": 0.65, "grad_norm": 5.734177422302977, "learning_rate": 2.879684420496763e-06, "loss": 0.5695, "step": 22705 }, { "epoch": 0.65, "grad_norm": 1.6983339395684212, "learning_rate": 2.8792644348080263e-06, "loss": 0.2695, "step": 22706 }, { "epoch": 0.65, "grad_norm": 3.994680712309983, "learning_rate": 2.8788444673637184e-06, "loss": 0.3288, "step": 22707 }, { "epoch": 0.65, "grad_norm": 3.863348949676189, "learning_rate": 2.8784245181674474e-06, "loss": 0.5079, "step": 22708 }, { "epoch": 0.65, "grad_norm": 4.667753436497708, "learning_rate": 2.8780045872228324e-06, "loss": 0.4305, "step": 22709 }, { "epoch": 0.65, "grad_norm": 4.994415072768572, "learning_rate": 2.8775846745334803e-06, "loss": 0.3713, "step": 22710 }, { "epoch": 0.65, "grad_norm": 5.310058212104872, "learning_rate": 2.877164780103007e-06, "loss": 0.6744, "step": 22711 }, { "epoch": 0.65, "grad_norm": 4.195565185893338, "learning_rate": 2.8767449039350247e-06, "loss": 0.5187, "step": 22712 }, { "epoch": 0.65, "grad_norm": 4.633486230760811, "learning_rate": 2.8763250460331443e-06, "loss": 0.42, "step": 22713 }, { "epoch": 0.65, "grad_norm": 12.340121370859377, "learning_rate": 2.8759052064009764e-06, "loss": 0.2366, "step": 22714 }, { "epoch": 0.65, "grad_norm": 2.7101384285203136, "learning_rate": 2.8754853850421336e-06, "loss": 0.5353, "step": 22715 }, { "epoch": 0.65, "grad_norm": 10.771386098543063, "learning_rate": 2.8750655819602315e-06, "loss": 0.5258, "step": 22716 }, { "epoch": 0.65, "grad_norm": 6.718949727483079, "learning_rate": 2.874645797158876e-06, "loss": 0.5284, "step": 22717 }, { "epoch": 0.65, "grad_norm": 3.7080213633408485, "learning_rate": 2.8742260306416826e-06, "loss": 0.2457, "step": 22718 }, { "epoch": 0.65, "grad_norm": 6.307525487629805, "learning_rate": 2.8738062824122616e-06, "loss": 0.633, "step": 22719 }, { "epoch": 0.65, "grad_norm": 9.514608795575198, "learning_rate": 2.873386552474223e-06, "loss": 0.5614, "step": 22720 }, { "epoch": 0.65, "grad_norm": 5.3271587312285975, "learning_rate": 2.8729668408311763e-06, "loss": 0.2581, "step": 22721 }, { "epoch": 0.65, "grad_norm": 6.0475171164703685, "learning_rate": 2.872547147486734e-06, "loss": 0.3965, "step": 22722 }, { "epoch": 0.65, "grad_norm": 3.8408501083417113, "learning_rate": 2.8721274724445086e-06, "loss": 0.4645, "step": 22723 }, { "epoch": 0.65, "grad_norm": 5.067592555491382, "learning_rate": 2.8717078157081065e-06, "loss": 0.4533, "step": 22724 }, { "epoch": 0.65, "grad_norm": 4.804071774611184, "learning_rate": 2.871288177281142e-06, "loss": 0.3464, "step": 22725 }, { "epoch": 0.65, "grad_norm": 6.407482475259774, "learning_rate": 2.8708685571672235e-06, "loss": 0.4029, "step": 22726 }, { "epoch": 0.65, "grad_norm": 7.878726077726596, "learning_rate": 2.8704489553699597e-06, "loss": 0.7806, "step": 22727 }, { "epoch": 0.65, "grad_norm": 6.972586768736449, "learning_rate": 2.8700293718929607e-06, "loss": 0.2326, "step": 22728 }, { "epoch": 0.65, "grad_norm": 7.9405081447994075, "learning_rate": 2.869609806739838e-06, "loss": 0.6502, "step": 22729 }, { "epoch": 0.65, "grad_norm": 7.246750563802943, "learning_rate": 2.869190259914198e-06, "loss": 0.3595, "step": 22730 }, { "epoch": 0.65, "grad_norm": 5.65972212132907, "learning_rate": 2.8687707314196534e-06, "loss": 0.6152, "step": 22731 }, { "epoch": 0.65, "grad_norm": 6.237973486869364, "learning_rate": 2.8683512212598124e-06, "loss": 0.4887, "step": 22732 }, { "epoch": 0.65, "grad_norm": 2.584715609263299, "learning_rate": 2.8679317294382814e-06, "loss": 0.2007, "step": 22733 }, { "epoch": 0.65, "grad_norm": 3.5114839197382963, "learning_rate": 2.8675122559586733e-06, "loss": 0.2483, "step": 22734 }, { "epoch": 0.65, "grad_norm": 6.552391040977472, "learning_rate": 2.867092800824592e-06, "loss": 0.3458, "step": 22735 }, { "epoch": 0.65, "grad_norm": 4.153306061850074, "learning_rate": 2.866673364039651e-06, "loss": 0.2721, "step": 22736 }, { "epoch": 0.65, "grad_norm": 6.102230411561708, "learning_rate": 2.8662539456074544e-06, "loss": 0.4602, "step": 22737 }, { "epoch": 0.65, "grad_norm": 6.489002387514467, "learning_rate": 2.865834545531615e-06, "loss": 0.4248, "step": 22738 }, { "epoch": 0.65, "grad_norm": 7.877501922868683, "learning_rate": 2.8654151638157374e-06, "loss": 0.3139, "step": 22739 }, { "epoch": 0.65, "grad_norm": 2.8125818028739475, "learning_rate": 2.8649958004634288e-06, "loss": 0.3812, "step": 22740 }, { "epoch": 0.65, "grad_norm": 2.6088742558051146, "learning_rate": 2.8645764554783e-06, "loss": 0.3525, "step": 22741 }, { "epoch": 0.65, "grad_norm": 5.14652084540342, "learning_rate": 2.8641571288639558e-06, "loss": 0.4355, "step": 22742 }, { "epoch": 0.65, "grad_norm": 5.417192443370077, "learning_rate": 2.8637378206240064e-06, "loss": 0.4467, "step": 22743 }, { "epoch": 0.65, "grad_norm": 4.596989974730644, "learning_rate": 2.8633185307620555e-06, "loss": 0.2212, "step": 22744 }, { "epoch": 0.65, "grad_norm": 4.149714932360015, "learning_rate": 2.862899259281714e-06, "loss": 0.6619, "step": 22745 }, { "epoch": 0.65, "grad_norm": 4.434887815667202, "learning_rate": 2.8624800061865876e-06, "loss": 0.3907, "step": 22746 }, { "epoch": 0.65, "grad_norm": 7.186656504354328, "learning_rate": 2.8620607714802793e-06, "loss": 0.5142, "step": 22747 }, { "epoch": 0.65, "grad_norm": 7.636472263894995, "learning_rate": 2.8616415551664025e-06, "loss": 0.5888, "step": 22748 }, { "epoch": 0.65, "grad_norm": 6.543690929128006, "learning_rate": 2.8612223572485575e-06, "loss": 0.6109, "step": 22749 }, { "epoch": 0.65, "grad_norm": 6.08940141501615, "learning_rate": 2.860803177730355e-06, "loss": 0.3969, "step": 22750 }, { "epoch": 0.65, "grad_norm": 4.909674378803001, "learning_rate": 2.860384016615397e-06, "loss": 0.446, "step": 22751 }, { "epoch": 0.65, "grad_norm": 5.965416641553309, "learning_rate": 2.8599648739072948e-06, "loss": 0.7608, "step": 22752 }, { "epoch": 0.65, "grad_norm": 3.3527636209737026, "learning_rate": 2.8595457496096513e-06, "loss": 0.2968, "step": 22753 }, { "epoch": 0.65, "grad_norm": 5.345868766986366, "learning_rate": 2.8591266437260716e-06, "loss": 0.6591, "step": 22754 }, { "epoch": 0.65, "grad_norm": 3.102890278139817, "learning_rate": 2.85870755626016e-06, "loss": 0.2021, "step": 22755 }, { "epoch": 0.65, "grad_norm": 3.6569284478015693, "learning_rate": 2.858288487215524e-06, "loss": 0.176, "step": 22756 }, { "epoch": 0.65, "grad_norm": 16.295299786727238, "learning_rate": 2.85786943659577e-06, "loss": 0.6538, "step": 22757 }, { "epoch": 0.65, "grad_norm": 6.8684776231321125, "learning_rate": 2.8574504044045017e-06, "loss": 0.6363, "step": 22758 }, { "epoch": 0.65, "grad_norm": 4.4743982645733595, "learning_rate": 2.857031390645324e-06, "loss": 0.4553, "step": 22759 }, { "epoch": 0.65, "grad_norm": 9.871232412592377, "learning_rate": 2.8566123953218383e-06, "loss": 0.583, "step": 22760 }, { "epoch": 0.65, "grad_norm": 3.795914767941461, "learning_rate": 2.8561934184376555e-06, "loss": 0.0747, "step": 22761 }, { "epoch": 0.65, "grad_norm": 6.704082642908482, "learning_rate": 2.855774459996374e-06, "loss": 0.7043, "step": 22762 }, { "epoch": 0.65, "grad_norm": 2.7656700647852865, "learning_rate": 2.8553555200016015e-06, "loss": 0.2778, "step": 22763 }, { "epoch": 0.65, "grad_norm": 4.497539138793282, "learning_rate": 2.8549365984569433e-06, "loss": 0.3707, "step": 22764 }, { "epoch": 0.65, "grad_norm": 7.147216258973346, "learning_rate": 2.8545176953660003e-06, "loss": 0.6792, "step": 22765 }, { "epoch": 0.65, "grad_norm": 5.391610716107404, "learning_rate": 2.8540988107323786e-06, "loss": 0.7041, "step": 22766 }, { "epoch": 0.65, "grad_norm": 4.999947309216387, "learning_rate": 2.8536799445596787e-06, "loss": 0.4252, "step": 22767 }, { "epoch": 0.65, "grad_norm": 7.752154296971201, "learning_rate": 2.853261096851507e-06, "loss": 0.6221, "step": 22768 }, { "epoch": 0.65, "grad_norm": 4.55161384529987, "learning_rate": 2.8528422676114643e-06, "loss": 0.392, "step": 22769 }, { "epoch": 0.65, "grad_norm": 4.18912503040375, "learning_rate": 2.8524234568431573e-06, "loss": 0.2338, "step": 22770 }, { "epoch": 0.65, "grad_norm": 5.435055654674212, "learning_rate": 2.852004664550185e-06, "loss": 0.3509, "step": 22771 }, { "epoch": 0.65, "grad_norm": 5.146716048583535, "learning_rate": 2.8515858907361536e-06, "loss": 0.5061, "step": 22772 }, { "epoch": 0.65, "grad_norm": 5.359392816719713, "learning_rate": 2.851167135404665e-06, "loss": 0.3252, "step": 22773 }, { "epoch": 0.65, "grad_norm": 5.066264977761244, "learning_rate": 2.850748398559319e-06, "loss": 0.4314, "step": 22774 }, { "epoch": 0.65, "grad_norm": 3.6590938593681845, "learning_rate": 2.8503296802037213e-06, "loss": 0.3505, "step": 22775 }, { "epoch": 0.65, "grad_norm": 2.555571610174566, "learning_rate": 2.8499109803414704e-06, "loss": 0.3288, "step": 22776 }, { "epoch": 0.65, "grad_norm": 2.245663517091587, "learning_rate": 2.849492298976173e-06, "loss": 0.0612, "step": 22777 }, { "epoch": 0.65, "grad_norm": 6.586455988809399, "learning_rate": 2.849073636111427e-06, "loss": 0.6531, "step": 22778 }, { "epoch": 0.65, "grad_norm": 4.349985600590167, "learning_rate": 2.8486549917508374e-06, "loss": 0.3077, "step": 22779 }, { "epoch": 0.65, "grad_norm": 4.823900170560102, "learning_rate": 2.8482363658980038e-06, "loss": 0.1363, "step": 22780 }, { "epoch": 0.65, "grad_norm": 4.701894947797169, "learning_rate": 2.847817758556526e-06, "loss": 0.2737, "step": 22781 }, { "epoch": 0.65, "grad_norm": 7.6042779853371805, "learning_rate": 2.8473991697300085e-06, "loss": 0.4866, "step": 22782 }, { "epoch": 0.65, "grad_norm": 4.884261393919055, "learning_rate": 2.84698059942205e-06, "loss": 0.414, "step": 22783 }, { "epoch": 0.65, "grad_norm": 8.006505288216378, "learning_rate": 2.846562047636253e-06, "loss": 0.4546, "step": 22784 }, { "epoch": 0.65, "grad_norm": 6.464221456489846, "learning_rate": 2.8461435143762167e-06, "loss": 0.5005, "step": 22785 }, { "epoch": 0.65, "grad_norm": 5.122131079648771, "learning_rate": 2.845724999645546e-06, "loss": 0.3785, "step": 22786 }, { "epoch": 0.65, "grad_norm": 4.896243466452514, "learning_rate": 2.8453065034478344e-06, "loss": 0.3863, "step": 22787 }, { "epoch": 0.65, "grad_norm": 6.975276473934017, "learning_rate": 2.844888025786685e-06, "loss": 0.4023, "step": 22788 }, { "epoch": 0.65, "grad_norm": 6.252537631806642, "learning_rate": 2.8444695666657005e-06, "loss": 0.5277, "step": 22789 }, { "epoch": 0.65, "grad_norm": 4.498032020348863, "learning_rate": 2.844051126088477e-06, "loss": 0.2486, "step": 22790 }, { "epoch": 0.65, "grad_norm": 10.339457840125583, "learning_rate": 2.843632704058619e-06, "loss": 0.5542, "step": 22791 }, { "epoch": 0.65, "grad_norm": 3.9935568534459756, "learning_rate": 2.843214300579723e-06, "loss": 0.3405, "step": 22792 }, { "epoch": 0.65, "grad_norm": 7.142954807295006, "learning_rate": 2.8427959156553886e-06, "loss": 0.5541, "step": 22793 }, { "epoch": 0.65, "grad_norm": 7.4657349186785735, "learning_rate": 2.8423775492892138e-06, "loss": 0.768, "step": 22794 }, { "epoch": 0.65, "grad_norm": 4.97719180740857, "learning_rate": 2.8419592014848008e-06, "loss": 0.5509, "step": 22795 }, { "epoch": 0.65, "grad_norm": 8.602672203887952, "learning_rate": 2.841540872245745e-06, "loss": 0.9208, "step": 22796 }, { "epoch": 0.65, "grad_norm": 5.946040125269121, "learning_rate": 2.8411225615756476e-06, "loss": 0.3132, "step": 22797 }, { "epoch": 0.65, "grad_norm": 3.790807279023599, "learning_rate": 2.8407042694781085e-06, "loss": 0.1893, "step": 22798 }, { "epoch": 0.65, "grad_norm": 3.03229064382267, "learning_rate": 2.8402859959567253e-06, "loss": 0.3271, "step": 22799 }, { "epoch": 0.65, "grad_norm": 3.881858263251443, "learning_rate": 2.8398677410150953e-06, "loss": 0.2383, "step": 22800 }, { "epoch": 0.65, "grad_norm": 7.258657515067854, "learning_rate": 2.839449504656816e-06, "loss": 0.5004, "step": 22801 }, { "epoch": 0.65, "grad_norm": 3.612985893756222, "learning_rate": 2.8390312868854874e-06, "loss": 0.3281, "step": 22802 }, { "epoch": 0.65, "grad_norm": 3.4617920244709843, "learning_rate": 2.838613087704706e-06, "loss": 0.1565, "step": 22803 }, { "epoch": 0.65, "grad_norm": 4.657694112609803, "learning_rate": 2.8381949071180705e-06, "loss": 0.2747, "step": 22804 }, { "epoch": 0.65, "grad_norm": 8.620068619227059, "learning_rate": 2.837776745129177e-06, "loss": 0.4051, "step": 22805 }, { "epoch": 0.65, "grad_norm": 5.780466634928109, "learning_rate": 2.8373586017416254e-06, "loss": 0.4927, "step": 22806 }, { "epoch": 0.65, "grad_norm": 7.4589539452606575, "learning_rate": 2.8369404769590126e-06, "loss": 0.5771, "step": 22807 }, { "epoch": 0.65, "grad_norm": 4.817818885428334, "learning_rate": 2.8365223707849314e-06, "loss": 0.4988, "step": 22808 }, { "epoch": 0.65, "grad_norm": 5.859815433479452, "learning_rate": 2.8361042832229846e-06, "loss": 0.7326, "step": 22809 }, { "epoch": 0.65, "grad_norm": 19.62438526527124, "learning_rate": 2.8356862142767638e-06, "loss": 0.3602, "step": 22810 }, { "epoch": 0.65, "grad_norm": 6.664587395982695, "learning_rate": 2.8352681639498704e-06, "loss": 0.7017, "step": 22811 }, { "epoch": 0.65, "grad_norm": 4.136293801876677, "learning_rate": 2.8348501322458955e-06, "loss": 0.2142, "step": 22812 }, { "epoch": 0.65, "grad_norm": 4.027419910025287, "learning_rate": 2.834432119168441e-06, "loss": 0.1366, "step": 22813 }, { "epoch": 0.65, "grad_norm": 4.866497646079203, "learning_rate": 2.8340141247211006e-06, "loss": 0.3713, "step": 22814 }, { "epoch": 0.65, "grad_norm": 5.195881564302166, "learning_rate": 2.8335961489074678e-06, "loss": 0.5418, "step": 22815 }, { "epoch": 0.65, "grad_norm": 4.830192049366635, "learning_rate": 2.8331781917311423e-06, "loss": 0.2945, "step": 22816 }, { "epoch": 0.65, "grad_norm": 11.014818267500578, "learning_rate": 2.8327602531957166e-06, "loss": 0.6092, "step": 22817 }, { "epoch": 0.65, "grad_norm": 6.915066334757247, "learning_rate": 2.8323423333047897e-06, "loss": 0.7365, "step": 22818 }, { "epoch": 0.65, "grad_norm": 5.5975646326270105, "learning_rate": 2.8319244320619537e-06, "loss": 0.5367, "step": 22819 }, { "epoch": 0.65, "grad_norm": 7.33452100961963, "learning_rate": 2.831506549470806e-06, "loss": 0.4094, "step": 22820 }, { "epoch": 0.65, "grad_norm": 6.26612068175238, "learning_rate": 2.831088685534937e-06, "loss": 0.4025, "step": 22821 }, { "epoch": 0.65, "grad_norm": 3.454803852690951, "learning_rate": 2.8306708402579465e-06, "loss": 0.1719, "step": 22822 }, { "epoch": 0.65, "grad_norm": 4.3290170514587585, "learning_rate": 2.830253013643429e-06, "loss": 0.5361, "step": 22823 }, { "epoch": 0.65, "grad_norm": 3.9925368243903723, "learning_rate": 2.8298352056949762e-06, "loss": 0.3641, "step": 22824 }, { "epoch": 0.65, "grad_norm": 1.9691635938688505, "learning_rate": 2.829417416416186e-06, "loss": 0.2911, "step": 22825 }, { "epoch": 0.65, "grad_norm": 3.9583873477395906, "learning_rate": 2.8289996458106493e-06, "loss": 0.251, "step": 22826 }, { "epoch": 0.65, "grad_norm": 2.937057603840705, "learning_rate": 2.8285818938819627e-06, "loss": 0.1455, "step": 22827 }, { "epoch": 0.65, "grad_norm": 6.917836546105849, "learning_rate": 2.828164160633716e-06, "loss": 0.7622, "step": 22828 }, { "epoch": 0.65, "grad_norm": 6.6842586113069276, "learning_rate": 2.827746446069506e-06, "loss": 0.5033, "step": 22829 }, { "epoch": 0.65, "grad_norm": 3.1591608391837753, "learning_rate": 2.8273287501929273e-06, "loss": 0.3319, "step": 22830 }, { "epoch": 0.65, "grad_norm": 2.7220666977729127, "learning_rate": 2.8269110730075706e-06, "loss": 0.2657, "step": 22831 }, { "epoch": 0.65, "grad_norm": 9.198893897279751, "learning_rate": 2.826493414517032e-06, "loss": 0.6997, "step": 22832 }, { "epoch": 0.65, "grad_norm": 6.325117927326429, "learning_rate": 2.826075774724903e-06, "loss": 0.31, "step": 22833 }, { "epoch": 0.65, "grad_norm": 8.78911203328056, "learning_rate": 2.825658153634776e-06, "loss": 0.5095, "step": 22834 }, { "epoch": 0.65, "grad_norm": 4.2546028848864506, "learning_rate": 2.825240551250243e-06, "loss": 0.5196, "step": 22835 }, { "epoch": 0.65, "grad_norm": 1.644563271398787, "learning_rate": 2.8248229675748995e-06, "loss": 0.0708, "step": 22836 }, { "epoch": 0.65, "grad_norm": 7.650612573469435, "learning_rate": 2.8244054026123336e-06, "loss": 0.6832, "step": 22837 }, { "epoch": 0.65, "grad_norm": 3.8951006351638195, "learning_rate": 2.8239878563661406e-06, "loss": 0.466, "step": 22838 }, { "epoch": 0.65, "grad_norm": 8.643313798358859, "learning_rate": 2.823570328839914e-06, "loss": 0.3977, "step": 22839 }, { "epoch": 0.65, "grad_norm": 5.210675806340277, "learning_rate": 2.8231528200372448e-06, "loss": 0.3687, "step": 22840 }, { "epoch": 0.65, "grad_norm": 4.972575652255189, "learning_rate": 2.8227353299617227e-06, "loss": 0.3069, "step": 22841 }, { "epoch": 0.65, "grad_norm": 4.66919491038284, "learning_rate": 2.822317858616939e-06, "loss": 0.4477, "step": 22842 }, { "epoch": 0.65, "grad_norm": 3.379408835369913, "learning_rate": 2.8219004060064887e-06, "loss": 0.1784, "step": 22843 }, { "epoch": 0.65, "grad_norm": 29.32775560003111, "learning_rate": 2.8214829721339588e-06, "loss": 0.5052, "step": 22844 }, { "epoch": 0.65, "grad_norm": 5.474059225278392, "learning_rate": 2.8210655570029446e-06, "loss": 0.0792, "step": 22845 }, { "epoch": 0.65, "grad_norm": 5.931912493512398, "learning_rate": 2.8206481606170355e-06, "loss": 0.4283, "step": 22846 }, { "epoch": 0.65, "grad_norm": 3.523496047927028, "learning_rate": 2.82023078297982e-06, "loss": 0.3958, "step": 22847 }, { "epoch": 0.65, "grad_norm": 6.8703957138591845, "learning_rate": 2.819813424094892e-06, "loss": 0.7615, "step": 22848 }, { "epoch": 0.65, "grad_norm": 3.3489420323348424, "learning_rate": 2.819396083965839e-06, "loss": 0.1587, "step": 22849 }, { "epoch": 0.65, "grad_norm": 5.562844276489615, "learning_rate": 2.818978762596255e-06, "loss": 0.6183, "step": 22850 }, { "epoch": 0.65, "grad_norm": 3.7837055524541174, "learning_rate": 2.8185614599897267e-06, "loss": 0.3484, "step": 22851 }, { "epoch": 0.65, "grad_norm": 7.502875666862667, "learning_rate": 2.8181441761498472e-06, "loss": 0.6591, "step": 22852 }, { "epoch": 0.65, "grad_norm": 15.990234688115493, "learning_rate": 2.8177269110802043e-06, "loss": 0.4196, "step": 22853 }, { "epoch": 0.65, "grad_norm": 4.561488470532725, "learning_rate": 2.817309664784387e-06, "loss": 0.3513, "step": 22854 }, { "epoch": 0.65, "grad_norm": 4.770933902574461, "learning_rate": 2.8168924372659866e-06, "loss": 0.6487, "step": 22855 }, { "epoch": 0.65, "grad_norm": 11.692709062855222, "learning_rate": 2.8164752285285913e-06, "loss": 0.8354, "step": 22856 }, { "epoch": 0.65, "grad_norm": 5.407145894520013, "learning_rate": 2.816058038575792e-06, "loss": 0.319, "step": 22857 }, { "epoch": 0.65, "grad_norm": 5.1496394929350195, "learning_rate": 2.8156408674111747e-06, "loss": 0.5306, "step": 22858 }, { "epoch": 0.65, "grad_norm": 7.05394881258131, "learning_rate": 2.8152237150383315e-06, "loss": 0.4002, "step": 22859 }, { "epoch": 0.65, "grad_norm": 5.078688769223734, "learning_rate": 2.81480658146085e-06, "loss": 0.4843, "step": 22860 }, { "epoch": 0.65, "grad_norm": 5.472736148400744, "learning_rate": 2.8143894666823183e-06, "loss": 0.3584, "step": 22861 }, { "epoch": 0.65, "grad_norm": 4.106574884971787, "learning_rate": 2.8139723707063234e-06, "loss": 0.3745, "step": 22862 }, { "epoch": 0.65, "grad_norm": 3.846697196910915, "learning_rate": 2.8135552935364547e-06, "loss": 0.4546, "step": 22863 }, { "epoch": 0.65, "grad_norm": 9.827173402260412, "learning_rate": 2.8131382351763024e-06, "loss": 0.2598, "step": 22864 }, { "epoch": 0.65, "grad_norm": 4.857911358671263, "learning_rate": 2.812721195629451e-06, "loss": 0.589, "step": 22865 }, { "epoch": 0.65, "grad_norm": 7.079543116908826, "learning_rate": 2.8123041748994917e-06, "loss": 0.626, "step": 22866 }, { "epoch": 0.65, "grad_norm": 6.442375373922774, "learning_rate": 2.8118871729900106e-06, "loss": 0.3149, "step": 22867 }, { "epoch": 0.65, "grad_norm": 4.6212572470317435, "learning_rate": 2.811470189904594e-06, "loss": 0.5102, "step": 22868 }, { "epoch": 0.65, "grad_norm": 3.204793932564135, "learning_rate": 2.8110532256468288e-06, "loss": 0.2806, "step": 22869 }, { "epoch": 0.65, "grad_norm": 6.283745511935353, "learning_rate": 2.8106362802203024e-06, "loss": 0.6148, "step": 22870 }, { "epoch": 0.65, "grad_norm": 4.661232861512671, "learning_rate": 2.8102193536286048e-06, "loss": 0.3741, "step": 22871 }, { "epoch": 0.66, "grad_norm": 12.25398897891563, "learning_rate": 2.809802445875319e-06, "loss": 0.4578, "step": 22872 }, { "epoch": 0.66, "grad_norm": 9.181124129188852, "learning_rate": 2.809385556964036e-06, "loss": 0.4507, "step": 22873 }, { "epoch": 0.66, "grad_norm": 5.225173012821299, "learning_rate": 2.8089686868983363e-06, "loss": 0.2805, "step": 22874 }, { "epoch": 0.66, "grad_norm": 5.094085354677376, "learning_rate": 2.808551835681811e-06, "loss": 0.5426, "step": 22875 }, { "epoch": 0.66, "grad_norm": 3.457293329164742, "learning_rate": 2.808135003318042e-06, "loss": 0.3217, "step": 22876 }, { "epoch": 0.66, "grad_norm": 3.0308253983890294, "learning_rate": 2.80771818981062e-06, "loss": 0.1932, "step": 22877 }, { "epoch": 0.66, "grad_norm": 2.479931474304224, "learning_rate": 2.8073013951631266e-06, "loss": 0.2706, "step": 22878 }, { "epoch": 0.66, "grad_norm": 10.530355624925607, "learning_rate": 2.806884619379151e-06, "loss": 1.5858, "step": 22879 }, { "epoch": 0.66, "grad_norm": 6.01358005395352, "learning_rate": 2.806467862462278e-06, "loss": 0.4149, "step": 22880 }, { "epoch": 0.66, "grad_norm": 5.584419599119103, "learning_rate": 2.8060511244160883e-06, "loss": 0.2232, "step": 22881 }, { "epoch": 0.66, "grad_norm": 6.741910290019669, "learning_rate": 2.805634405244174e-06, "loss": 0.5181, "step": 22882 }, { "epoch": 0.66, "grad_norm": 2.861160398391044, "learning_rate": 2.805217704950114e-06, "loss": 0.169, "step": 22883 }, { "epoch": 0.66, "grad_norm": 7.17784636325796, "learning_rate": 2.8048010235374977e-06, "loss": 0.5354, "step": 22884 }, { "epoch": 0.66, "grad_norm": 4.18098589258339, "learning_rate": 2.8043843610099066e-06, "loss": 0.5193, "step": 22885 }, { "epoch": 0.66, "grad_norm": 8.448652737900424, "learning_rate": 2.803967717370928e-06, "loss": 0.6616, "step": 22886 }, { "epoch": 0.66, "grad_norm": 17.39108355278338, "learning_rate": 2.803551092624145e-06, "loss": 0.6109, "step": 22887 }, { "epoch": 0.66, "grad_norm": 6.387499522976195, "learning_rate": 2.8031344867731393e-06, "loss": 0.5971, "step": 22888 }, { "epoch": 0.66, "grad_norm": 4.701533558415097, "learning_rate": 2.802717899821499e-06, "loss": 0.2442, "step": 22889 }, { "epoch": 0.66, "grad_norm": 7.2516759053531405, "learning_rate": 2.802301331772804e-06, "loss": 0.6079, "step": 22890 }, { "epoch": 0.66, "grad_norm": 6.738309556099966, "learning_rate": 2.8018847826306426e-06, "loss": 0.3211, "step": 22891 }, { "epoch": 0.66, "grad_norm": 4.45190956527528, "learning_rate": 2.8014682523985936e-06, "loss": 0.4764, "step": 22892 }, { "epoch": 0.66, "grad_norm": 5.374779253130514, "learning_rate": 2.8010517410802453e-06, "loss": 0.1842, "step": 22893 }, { "epoch": 0.66, "grad_norm": 2.644004960988941, "learning_rate": 2.8006352486791767e-06, "loss": 0.2513, "step": 22894 }, { "epoch": 0.66, "grad_norm": 5.240572798667426, "learning_rate": 2.8002187751989705e-06, "loss": 0.6393, "step": 22895 }, { "epoch": 0.66, "grad_norm": 5.570339293769931, "learning_rate": 2.7998023206432146e-06, "loss": 0.4913, "step": 22896 }, { "epoch": 0.66, "grad_norm": 9.461245896394848, "learning_rate": 2.7993858850154852e-06, "loss": 0.6669, "step": 22897 }, { "epoch": 0.66, "grad_norm": 7.851099773075716, "learning_rate": 2.7989694683193704e-06, "loss": 0.6807, "step": 22898 }, { "epoch": 0.66, "grad_norm": 6.56512115448072, "learning_rate": 2.7985530705584484e-06, "loss": 0.799, "step": 22899 }, { "epoch": 0.66, "grad_norm": 7.3018849591252, "learning_rate": 2.7981366917363073e-06, "loss": 0.3086, "step": 22900 }, { "epoch": 0.66, "grad_norm": 6.707875344732668, "learning_rate": 2.7977203318565206e-06, "loss": 0.7466, "step": 22901 }, { "epoch": 0.66, "grad_norm": 3.4998689354471, "learning_rate": 2.7973039909226774e-06, "loss": 0.4426, "step": 22902 }, { "epoch": 0.66, "grad_norm": 11.390415017033133, "learning_rate": 2.7968876689383545e-06, "loss": 0.7337, "step": 22903 }, { "epoch": 0.66, "grad_norm": 7.105023941117822, "learning_rate": 2.796471365907136e-06, "loss": 0.3385, "step": 22904 }, { "epoch": 0.66, "grad_norm": 4.858118219776985, "learning_rate": 2.796055081832605e-06, "loss": 0.6334, "step": 22905 }, { "epoch": 0.66, "grad_norm": 3.5141602071338314, "learning_rate": 2.79563881671834e-06, "loss": 0.1718, "step": 22906 }, { "epoch": 0.66, "grad_norm": 6.336454283385702, "learning_rate": 2.7952225705679236e-06, "loss": 0.3519, "step": 22907 }, { "epoch": 0.66, "grad_norm": 6.219344604282973, "learning_rate": 2.794806343384934e-06, "loss": 0.5637, "step": 22908 }, { "epoch": 0.66, "grad_norm": 2.0891134073278974, "learning_rate": 2.794390135172955e-06, "loss": 0.101, "step": 22909 }, { "epoch": 0.66, "grad_norm": 5.944072097731263, "learning_rate": 2.7939739459355653e-06, "loss": 0.307, "step": 22910 }, { "epoch": 0.66, "grad_norm": 8.397061740060868, "learning_rate": 2.7935577756763453e-06, "loss": 0.7612, "step": 22911 }, { "epoch": 0.66, "grad_norm": 5.48601763869651, "learning_rate": 2.7931416243988786e-06, "loss": 0.2615, "step": 22912 }, { "epoch": 0.66, "grad_norm": 6.250187756579212, "learning_rate": 2.7927254921067428e-06, "loss": 0.4866, "step": 22913 }, { "epoch": 0.66, "grad_norm": 5.9523040525827735, "learning_rate": 2.7923093788035173e-06, "loss": 0.8525, "step": 22914 }, { "epoch": 0.66, "grad_norm": 8.690173938270314, "learning_rate": 2.791893284492781e-06, "loss": 0.7731, "step": 22915 }, { "epoch": 0.66, "grad_norm": 8.433413667509813, "learning_rate": 2.7914772091781167e-06, "loss": 0.4497, "step": 22916 }, { "epoch": 0.66, "grad_norm": 10.273544067294242, "learning_rate": 2.7910611528631e-06, "loss": 0.3371, "step": 22917 }, { "epoch": 0.66, "grad_norm": 9.902788104859956, "learning_rate": 2.790645115551314e-06, "loss": 0.5278, "step": 22918 }, { "epoch": 0.66, "grad_norm": 6.080238543586733, "learning_rate": 2.790229097246334e-06, "loss": 0.4516, "step": 22919 }, { "epoch": 0.66, "grad_norm": 4.31381609087588, "learning_rate": 2.7898130979517435e-06, "loss": 0.4737, "step": 22920 }, { "epoch": 0.66, "grad_norm": 6.699279678112148, "learning_rate": 2.7893971176711186e-06, "loss": 0.3965, "step": 22921 }, { "epoch": 0.66, "grad_norm": 3.5869562035963796, "learning_rate": 2.788981156408036e-06, "loss": 0.3899, "step": 22922 }, { "epoch": 0.66, "grad_norm": 2.8968344108429274, "learning_rate": 2.7885652141660787e-06, "loss": 0.2167, "step": 22923 }, { "epoch": 0.66, "grad_norm": 8.372727726379146, "learning_rate": 2.78814929094882e-06, "loss": 0.5461, "step": 22924 }, { "epoch": 0.66, "grad_norm": 8.137088205023302, "learning_rate": 2.7877333867598434e-06, "loss": 0.4397, "step": 22925 }, { "epoch": 0.66, "grad_norm": 9.842094076233735, "learning_rate": 2.787317501602721e-06, "loss": 0.4837, "step": 22926 }, { "epoch": 0.66, "grad_norm": 4.80867588149707, "learning_rate": 2.7869016354810364e-06, "loss": 0.5571, "step": 22927 }, { "epoch": 0.66, "grad_norm": 4.971307588108948, "learning_rate": 2.7864857883983643e-06, "loss": 0.2887, "step": 22928 }, { "epoch": 0.66, "grad_norm": 19.186243308101755, "learning_rate": 2.7860699603582807e-06, "loss": 0.3017, "step": 22929 }, { "epoch": 0.66, "grad_norm": 7.8720207028233045, "learning_rate": 2.7856541513643666e-06, "loss": 0.4026, "step": 22930 }, { "epoch": 0.66, "grad_norm": 4.612817183756372, "learning_rate": 2.785238361420195e-06, "loss": 0.2792, "step": 22931 }, { "epoch": 0.66, "grad_norm": 4.599526024324408, "learning_rate": 2.784822590529347e-06, "loss": 0.6858, "step": 22932 }, { "epoch": 0.66, "grad_norm": 6.087778226850815, "learning_rate": 2.7844068386953984e-06, "loss": 0.4898, "step": 22933 }, { "epoch": 0.66, "grad_norm": 2.0628627689259944, "learning_rate": 2.783991105921924e-06, "loss": 0.1486, "step": 22934 }, { "epoch": 0.66, "grad_norm": 4.623223840044481, "learning_rate": 2.783575392212499e-06, "loss": 0.2858, "step": 22935 }, { "epoch": 0.66, "grad_norm": 7.842337735765032, "learning_rate": 2.783159697570703e-06, "loss": 0.8084, "step": 22936 }, { "epoch": 0.66, "grad_norm": 5.09756785349004, "learning_rate": 2.782744022000112e-06, "loss": 0.6319, "step": 22937 }, { "epoch": 0.66, "grad_norm": 4.440357309174803, "learning_rate": 2.7823283655043004e-06, "loss": 0.346, "step": 22938 }, { "epoch": 0.66, "grad_norm": 3.6659141042774293, "learning_rate": 2.7819127280868463e-06, "loss": 0.5229, "step": 22939 }, { "epoch": 0.66, "grad_norm": 14.717640440115112, "learning_rate": 2.7814971097513237e-06, "loss": 0.9372, "step": 22940 }, { "epoch": 0.66, "grad_norm": 4.85843612579603, "learning_rate": 2.781081510501309e-06, "loss": 0.2172, "step": 22941 }, { "epoch": 0.66, "grad_norm": 11.938746282420968, "learning_rate": 2.7806659303403736e-06, "loss": 0.5899, "step": 22942 }, { "epoch": 0.66, "grad_norm": 6.067593587900974, "learning_rate": 2.7802503692720993e-06, "loss": 0.52, "step": 22943 }, { "epoch": 0.66, "grad_norm": 5.615990820312062, "learning_rate": 2.7798348273000554e-06, "loss": 0.424, "step": 22944 }, { "epoch": 0.66, "grad_norm": 6.715890462180887, "learning_rate": 2.779419304427819e-06, "loss": 0.3742, "step": 22945 }, { "epoch": 0.66, "grad_norm": 6.248079615248113, "learning_rate": 2.779003800658967e-06, "loss": 0.7176, "step": 22946 }, { "epoch": 0.66, "grad_norm": 9.917910671477252, "learning_rate": 2.7785883159970717e-06, "loss": 0.3452, "step": 22947 }, { "epoch": 0.66, "grad_norm": 8.389700556288904, "learning_rate": 2.7781728504457074e-06, "loss": 0.3192, "step": 22948 }, { "epoch": 0.66, "grad_norm": 4.991770982156488, "learning_rate": 2.7777574040084466e-06, "loss": 0.2709, "step": 22949 }, { "epoch": 0.66, "grad_norm": 9.51037212137164, "learning_rate": 2.7773419766888676e-06, "loss": 0.5918, "step": 22950 }, { "epoch": 0.66, "grad_norm": 6.049439391652551, "learning_rate": 2.7769265684905395e-06, "loss": 0.2881, "step": 22951 }, { "epoch": 0.66, "grad_norm": 2.2186197390398585, "learning_rate": 2.7765111794170384e-06, "loss": 0.2043, "step": 22952 }, { "epoch": 0.66, "grad_norm": 5.368952565041006, "learning_rate": 2.7760958094719404e-06, "loss": 0.2528, "step": 22953 }, { "epoch": 0.66, "grad_norm": 6.401039498072465, "learning_rate": 2.775680458658816e-06, "loss": 0.48, "step": 22954 }, { "epoch": 0.66, "grad_norm": 4.276265967461091, "learning_rate": 2.775265126981239e-06, "loss": 0.5742, "step": 22955 }, { "epoch": 0.66, "grad_norm": 4.599492175639958, "learning_rate": 2.7748498144427794e-06, "loss": 0.4763, "step": 22956 }, { "epoch": 0.66, "grad_norm": 4.419577321754286, "learning_rate": 2.7744345210470157e-06, "loss": 0.6064, "step": 22957 }, { "epoch": 0.66, "grad_norm": 8.294144434342556, "learning_rate": 2.7740192467975156e-06, "loss": 0.7488, "step": 22958 }, { "epoch": 0.66, "grad_norm": 2.704764654402273, "learning_rate": 2.773603991697856e-06, "loss": 0.2092, "step": 22959 }, { "epoch": 0.66, "grad_norm": 4.955410114334142, "learning_rate": 2.773188755751607e-06, "loss": 0.7211, "step": 22960 }, { "epoch": 0.66, "grad_norm": 4.468863799240076, "learning_rate": 2.772773538962339e-06, "loss": 0.3284, "step": 22961 }, { "epoch": 0.66, "grad_norm": 4.097095636614229, "learning_rate": 2.7723583413336275e-06, "loss": 0.4348, "step": 22962 }, { "epoch": 0.66, "grad_norm": 19.821836696385073, "learning_rate": 2.771943162869041e-06, "loss": 0.5185, "step": 22963 }, { "epoch": 0.66, "grad_norm": 4.327890940648163, "learning_rate": 2.771528003572156e-06, "loss": 0.1335, "step": 22964 }, { "epoch": 0.66, "grad_norm": 4.268579877867647, "learning_rate": 2.771112863446538e-06, "loss": 0.3979, "step": 22965 }, { "epoch": 0.66, "grad_norm": 2.1449214045368294, "learning_rate": 2.7706977424957646e-06, "loss": 0.0461, "step": 22966 }, { "epoch": 0.66, "grad_norm": 3.4188442307898304, "learning_rate": 2.770282640723404e-06, "loss": 0.4173, "step": 22967 }, { "epoch": 0.66, "grad_norm": 3.886643022729224, "learning_rate": 2.769867558133027e-06, "loss": 0.211, "step": 22968 }, { "epoch": 0.66, "grad_norm": 6.2800727614057, "learning_rate": 2.7694524947282024e-06, "loss": 0.511, "step": 22969 }, { "epoch": 0.66, "grad_norm": 8.250536814477636, "learning_rate": 2.769037450512504e-06, "loss": 0.5624, "step": 22970 }, { "epoch": 0.66, "grad_norm": 5.317663802191793, "learning_rate": 2.7686224254895033e-06, "loss": 0.3495, "step": 22971 }, { "epoch": 0.66, "grad_norm": 6.194569104151507, "learning_rate": 2.7682074196627672e-06, "loss": 0.402, "step": 22972 }, { "epoch": 0.66, "grad_norm": 6.576463756875857, "learning_rate": 2.7677924330358705e-06, "loss": 0.5923, "step": 22973 }, { "epoch": 0.66, "grad_norm": 10.629405286486387, "learning_rate": 2.76737746561238e-06, "loss": 0.4545, "step": 22974 }, { "epoch": 0.66, "grad_norm": 9.371614480666773, "learning_rate": 2.766962517395867e-06, "loss": 0.7184, "step": 22975 }, { "epoch": 0.66, "grad_norm": 2.67966080810777, "learning_rate": 2.7665475883898973e-06, "loss": 0.3061, "step": 22976 }, { "epoch": 0.66, "grad_norm": 6.466445254274715, "learning_rate": 2.766132678598045e-06, "loss": 0.3133, "step": 22977 }, { "epoch": 0.66, "grad_norm": 3.7422892451142657, "learning_rate": 2.7657177880238793e-06, "loss": 0.3102, "step": 22978 }, { "epoch": 0.66, "grad_norm": 8.507106951417816, "learning_rate": 2.7653029166709666e-06, "loss": 1.0219, "step": 22979 }, { "epoch": 0.66, "grad_norm": 9.12730594135118, "learning_rate": 2.76488806454288e-06, "loss": 0.198, "step": 22980 }, { "epoch": 0.66, "grad_norm": 10.94776824695704, "learning_rate": 2.764473231643187e-06, "loss": 0.6417, "step": 22981 }, { "epoch": 0.66, "grad_norm": 4.9842118861376195, "learning_rate": 2.764058417975454e-06, "loss": 0.6299, "step": 22982 }, { "epoch": 0.66, "grad_norm": 3.5614293313256504, "learning_rate": 2.7636436235432497e-06, "loss": 0.3576, "step": 22983 }, { "epoch": 0.66, "grad_norm": 10.791295775492403, "learning_rate": 2.7632288483501457e-06, "loss": 0.368, "step": 22984 }, { "epoch": 0.66, "grad_norm": 8.737227791187411, "learning_rate": 2.762814092399707e-06, "loss": 0.5415, "step": 22985 }, { "epoch": 0.66, "grad_norm": 3.758223798611903, "learning_rate": 2.7623993556955033e-06, "loss": 0.487, "step": 22986 }, { "epoch": 0.66, "grad_norm": 6.414812868857209, "learning_rate": 2.761984638241104e-06, "loss": 0.8445, "step": 22987 }, { "epoch": 0.66, "grad_norm": 4.685822987977806, "learning_rate": 2.7615699400400754e-06, "loss": 0.3376, "step": 22988 }, { "epoch": 0.66, "grad_norm": 5.7052726986676205, "learning_rate": 2.761155261095985e-06, "loss": 0.2588, "step": 22989 }, { "epoch": 0.66, "grad_norm": 3.4628517936168834, "learning_rate": 2.760740601412398e-06, "loss": 0.2215, "step": 22990 }, { "epoch": 0.66, "grad_norm": 7.1112644241945775, "learning_rate": 2.7603259609928867e-06, "loss": 0.3966, "step": 22991 }, { "epoch": 0.66, "grad_norm": 8.04306224381445, "learning_rate": 2.759911339841013e-06, "loss": 0.7322, "step": 22992 }, { "epoch": 0.66, "grad_norm": 4.7337137396274604, "learning_rate": 2.759496737960348e-06, "loss": 0.2826, "step": 22993 }, { "epoch": 0.66, "grad_norm": 6.118479936852854, "learning_rate": 2.7590821553544568e-06, "loss": 0.7156, "step": 22994 }, { "epoch": 0.66, "grad_norm": 5.175155705491412, "learning_rate": 2.7586675920269044e-06, "loss": 0.4628, "step": 22995 }, { "epoch": 0.66, "grad_norm": 3.1043532569328103, "learning_rate": 2.75825304798126e-06, "loss": 0.3863, "step": 22996 }, { "epoch": 0.66, "grad_norm": 13.101528797777352, "learning_rate": 2.757838523221087e-06, "loss": 0.5228, "step": 22997 }, { "epoch": 0.66, "grad_norm": 5.7219153435627055, "learning_rate": 2.7574240177499556e-06, "loss": 0.4905, "step": 22998 }, { "epoch": 0.66, "grad_norm": 6.427857167436819, "learning_rate": 2.757009531571426e-06, "loss": 0.6464, "step": 22999 }, { "epoch": 0.66, "grad_norm": 7.018241105691713, "learning_rate": 2.7565950646890705e-06, "loss": 0.5188, "step": 23000 }, { "epoch": 0.66, "grad_norm": 7.742709576261816, "learning_rate": 2.7561806171064507e-06, "loss": 0.5314, "step": 23001 }, { "epoch": 0.66, "grad_norm": 5.121942887662187, "learning_rate": 2.7557661888271316e-06, "loss": 0.4226, "step": 23002 }, { "epoch": 0.66, "grad_norm": 4.567527561532619, "learning_rate": 2.7553517798546804e-06, "loss": 0.5614, "step": 23003 }, { "epoch": 0.66, "grad_norm": 1.4661790178878769, "learning_rate": 2.75493739019266e-06, "loss": 0.0753, "step": 23004 }, { "epoch": 0.66, "grad_norm": 5.054484296956388, "learning_rate": 2.7545230198446395e-06, "loss": 0.2099, "step": 23005 }, { "epoch": 0.66, "grad_norm": 9.65343090991589, "learning_rate": 2.7541086688141784e-06, "loss": 0.3432, "step": 23006 }, { "epoch": 0.66, "grad_norm": 5.743684036490444, "learning_rate": 2.7536943371048454e-06, "loss": 0.7265, "step": 23007 }, { "epoch": 0.66, "grad_norm": 6.358551159481836, "learning_rate": 2.753280024720204e-06, "loss": 0.6573, "step": 23008 }, { "epoch": 0.66, "grad_norm": 5.559001529665258, "learning_rate": 2.752865731663818e-06, "loss": 0.3795, "step": 23009 }, { "epoch": 0.66, "grad_norm": 5.480232130341113, "learning_rate": 2.7524514579392493e-06, "loss": 0.4914, "step": 23010 }, { "epoch": 0.66, "grad_norm": 7.136171116355346, "learning_rate": 2.752037203550064e-06, "loss": 0.5838, "step": 23011 }, { "epoch": 0.66, "grad_norm": 5.563193331595465, "learning_rate": 2.751622968499828e-06, "loss": 0.0579, "step": 23012 }, { "epoch": 0.66, "grad_norm": 5.550084091099334, "learning_rate": 2.7512087527921007e-06, "loss": 0.3695, "step": 23013 }, { "epoch": 0.66, "grad_norm": 5.292573650858528, "learning_rate": 2.7507945564304496e-06, "loss": 0.3056, "step": 23014 }, { "epoch": 0.66, "grad_norm": 7.008572880585551, "learning_rate": 2.750380379418436e-06, "loss": 0.3955, "step": 23015 }, { "epoch": 0.66, "grad_norm": 6.080420249481843, "learning_rate": 2.749966221759623e-06, "loss": 0.4588, "step": 23016 }, { "epoch": 0.66, "grad_norm": 6.321412244256881, "learning_rate": 2.7495520834575717e-06, "loss": 0.84, "step": 23017 }, { "epoch": 0.66, "grad_norm": 4.330305410983454, "learning_rate": 2.7491379645158466e-06, "loss": 0.4857, "step": 23018 }, { "epoch": 0.66, "grad_norm": 4.9555300580543875, "learning_rate": 2.748723864938012e-06, "loss": 0.6656, "step": 23019 }, { "epoch": 0.66, "grad_norm": 4.110970278137671, "learning_rate": 2.7483097847276296e-06, "loss": 0.3366, "step": 23020 }, { "epoch": 0.66, "grad_norm": 7.122665558240189, "learning_rate": 2.7478957238882604e-06, "loss": 0.2401, "step": 23021 }, { "epoch": 0.66, "grad_norm": 2.8946797980159014, "learning_rate": 2.747481682423465e-06, "loss": 0.4293, "step": 23022 }, { "epoch": 0.66, "grad_norm": 3.662752970789522, "learning_rate": 2.7470676603368094e-06, "loss": 0.4095, "step": 23023 }, { "epoch": 0.66, "grad_norm": 7.468322837457288, "learning_rate": 2.7466536576318512e-06, "loss": 0.4051, "step": 23024 }, { "epoch": 0.66, "grad_norm": 6.363324974851213, "learning_rate": 2.746239674312157e-06, "loss": 0.5027, "step": 23025 }, { "epoch": 0.66, "grad_norm": 4.166766349871637, "learning_rate": 2.7458257103812825e-06, "loss": 0.3262, "step": 23026 }, { "epoch": 0.66, "grad_norm": 4.8907359296509245, "learning_rate": 2.745411765842794e-06, "loss": 0.3732, "step": 23027 }, { "epoch": 0.66, "grad_norm": 6.839284772705674, "learning_rate": 2.744997840700251e-06, "loss": 0.7283, "step": 23028 }, { "epoch": 0.66, "grad_norm": 4.4243845473918135, "learning_rate": 2.744583934957211e-06, "loss": 0.4719, "step": 23029 }, { "epoch": 0.66, "grad_norm": 7.864797249888922, "learning_rate": 2.7441700486172406e-06, "loss": 0.5093, "step": 23030 }, { "epoch": 0.66, "grad_norm": 5.020541529532035, "learning_rate": 2.7437561816838953e-06, "loss": 0.1211, "step": 23031 }, { "epoch": 0.66, "grad_norm": 6.483649045453539, "learning_rate": 2.7433423341607394e-06, "loss": 0.6235, "step": 23032 }, { "epoch": 0.66, "grad_norm": 6.722700155448398, "learning_rate": 2.74292850605133e-06, "loss": 0.3377, "step": 23033 }, { "epoch": 0.66, "grad_norm": 3.938475896606843, "learning_rate": 2.7425146973592308e-06, "loss": 0.3379, "step": 23034 }, { "epoch": 0.66, "grad_norm": 3.161292363460289, "learning_rate": 2.742100908087999e-06, "loss": 0.1598, "step": 23035 }, { "epoch": 0.66, "grad_norm": 5.288119842054284, "learning_rate": 2.7416871382411936e-06, "loss": 0.5712, "step": 23036 }, { "epoch": 0.66, "grad_norm": 14.05148222900457, "learning_rate": 2.741273387822378e-06, "loss": 0.7264, "step": 23037 }, { "epoch": 0.66, "grad_norm": 3.615886792173617, "learning_rate": 2.7408596568351072e-06, "loss": 0.2034, "step": 23038 }, { "epoch": 0.66, "grad_norm": 4.37739773849263, "learning_rate": 2.740445945282944e-06, "loss": 0.5719, "step": 23039 }, { "epoch": 0.66, "grad_norm": 6.5037204292031285, "learning_rate": 2.7400322531694447e-06, "loss": 1.0415, "step": 23040 }, { "epoch": 0.66, "grad_norm": 7.692927948630551, "learning_rate": 2.739618580498171e-06, "loss": 0.575, "step": 23041 }, { "epoch": 0.66, "grad_norm": 3.2223866852812915, "learning_rate": 2.7392049272726817e-06, "loss": 0.2207, "step": 23042 }, { "epoch": 0.66, "grad_norm": 3.9382296900296034, "learning_rate": 2.738791293496531e-06, "loss": 0.4635, "step": 23043 }, { "epoch": 0.66, "grad_norm": 2.658568032785169, "learning_rate": 2.738377679173283e-06, "loss": 0.2305, "step": 23044 }, { "epoch": 0.66, "grad_norm": 6.2819800640592645, "learning_rate": 2.7379640843064904e-06, "loss": 0.6828, "step": 23045 }, { "epoch": 0.66, "grad_norm": 8.457244828062336, "learning_rate": 2.7375505088997166e-06, "loss": 0.4555, "step": 23046 }, { "epoch": 0.66, "grad_norm": 4.342042024583889, "learning_rate": 2.737136952956516e-06, "loss": 0.3481, "step": 23047 }, { "epoch": 0.66, "grad_norm": 3.718430433246563, "learning_rate": 2.7367234164804513e-06, "loss": 0.2937, "step": 23048 }, { "epoch": 0.66, "grad_norm": 5.99137660064106, "learning_rate": 2.736309899475072e-06, "loss": 0.4825, "step": 23049 }, { "epoch": 0.66, "grad_norm": 7.174747000031801, "learning_rate": 2.7358964019439415e-06, "loss": 0.6224, "step": 23050 }, { "epoch": 0.66, "grad_norm": 4.345327900321196, "learning_rate": 2.7354829238906144e-06, "loss": 0.5927, "step": 23051 }, { "epoch": 0.66, "grad_norm": 5.176155877140799, "learning_rate": 2.7350694653186487e-06, "loss": 0.6886, "step": 23052 }, { "epoch": 0.66, "grad_norm": 5.727840973034414, "learning_rate": 2.7346560262316036e-06, "loss": 0.9007, "step": 23053 }, { "epoch": 0.66, "grad_norm": 6.093979542639373, "learning_rate": 2.734242606633033e-06, "loss": 0.5628, "step": 23054 }, { "epoch": 0.66, "grad_norm": 10.200331534813218, "learning_rate": 2.7338292065264948e-06, "loss": 0.5807, "step": 23055 }, { "epoch": 0.66, "grad_norm": 7.089108835177794, "learning_rate": 2.7334158259155436e-06, "loss": 0.3794, "step": 23056 }, { "epoch": 0.66, "grad_norm": 9.040670688696986, "learning_rate": 2.7330024648037377e-06, "loss": 0.842, "step": 23057 }, { "epoch": 0.66, "grad_norm": 7.4343838615092075, "learning_rate": 2.7325891231946316e-06, "loss": 0.459, "step": 23058 }, { "epoch": 0.66, "grad_norm": 5.370929951337598, "learning_rate": 2.732175801091782e-06, "loss": 0.6496, "step": 23059 }, { "epoch": 0.66, "grad_norm": 2.683128416537729, "learning_rate": 2.7317624984987457e-06, "loss": 0.055, "step": 23060 }, { "epoch": 0.66, "grad_norm": 7.005242155955369, "learning_rate": 2.7313492154190784e-06, "loss": 0.5358, "step": 23061 }, { "epoch": 0.66, "grad_norm": 6.736423012852381, "learning_rate": 2.730935951856334e-06, "loss": 0.467, "step": 23062 }, { "epoch": 0.66, "grad_norm": 7.240524117430082, "learning_rate": 2.7305227078140663e-06, "loss": 0.469, "step": 23063 }, { "epoch": 0.66, "grad_norm": 5.598553262887666, "learning_rate": 2.730109483295834e-06, "loss": 0.706, "step": 23064 }, { "epoch": 0.66, "grad_norm": 8.967048457028694, "learning_rate": 2.7296962783051895e-06, "loss": 0.426, "step": 23065 }, { "epoch": 0.66, "grad_norm": 5.534808420520223, "learning_rate": 2.729283092845689e-06, "loss": 0.503, "step": 23066 }, { "epoch": 0.66, "grad_norm": 7.101956498197868, "learning_rate": 2.728869926920885e-06, "loss": 0.5098, "step": 23067 }, { "epoch": 0.66, "grad_norm": 4.232038521932066, "learning_rate": 2.728456780534336e-06, "loss": 0.5218, "step": 23068 }, { "epoch": 0.66, "grad_norm": 3.796352052193008, "learning_rate": 2.7280436536895926e-06, "loss": 0.5791, "step": 23069 }, { "epoch": 0.66, "grad_norm": 6.475509944556738, "learning_rate": 2.7276305463902086e-06, "loss": 0.2766, "step": 23070 }, { "epoch": 0.66, "grad_norm": 6.166850766019989, "learning_rate": 2.7272174586397415e-06, "loss": 0.4776, "step": 23071 }, { "epoch": 0.66, "grad_norm": 5.724038185717343, "learning_rate": 2.7268043904417403e-06, "loss": 0.6434, "step": 23072 }, { "epoch": 0.66, "grad_norm": 7.487312011396738, "learning_rate": 2.7263913417997627e-06, "loss": 0.1833, "step": 23073 }, { "epoch": 0.66, "grad_norm": 3.872193150538137, "learning_rate": 2.725978312717359e-06, "loss": 0.2882, "step": 23074 }, { "epoch": 0.66, "grad_norm": 7.303059768875129, "learning_rate": 2.725565303198088e-06, "loss": 0.421, "step": 23075 }, { "epoch": 0.66, "grad_norm": 5.106558066965486, "learning_rate": 2.7251523132454938e-06, "loss": 0.2906, "step": 23076 }, { "epoch": 0.66, "grad_norm": 6.201164070184556, "learning_rate": 2.724739342863135e-06, "loss": 0.3491, "step": 23077 }, { "epoch": 0.66, "grad_norm": 7.5284545268917284, "learning_rate": 2.7243263920545654e-06, "loss": 1.0426, "step": 23078 }, { "epoch": 0.66, "grad_norm": 7.024597513472772, "learning_rate": 2.723913460823333e-06, "loss": 0.1414, "step": 23079 }, { "epoch": 0.66, "grad_norm": 8.056190861025453, "learning_rate": 2.723500549172996e-06, "loss": 0.462, "step": 23080 }, { "epoch": 0.66, "grad_norm": 3.158649854140533, "learning_rate": 2.7230876571071025e-06, "loss": 0.2975, "step": 23081 }, { "epoch": 0.66, "grad_norm": 8.821615388126501, "learning_rate": 2.7226747846292055e-06, "loss": 0.4947, "step": 23082 }, { "epoch": 0.66, "grad_norm": 3.604756517415101, "learning_rate": 2.7222619317428558e-06, "loss": 0.1753, "step": 23083 }, { "epoch": 0.66, "grad_norm": 4.803898642684861, "learning_rate": 2.7218490984516053e-06, "loss": 0.2793, "step": 23084 }, { "epoch": 0.66, "grad_norm": 10.026131889961164, "learning_rate": 2.721436284759009e-06, "loss": 0.5057, "step": 23085 }, { "epoch": 0.66, "grad_norm": 2.3306547923804004, "learning_rate": 2.721023490668614e-06, "loss": 0.1, "step": 23086 }, { "epoch": 0.66, "grad_norm": 4.45243964024219, "learning_rate": 2.7206107161839757e-06, "loss": 0.354, "step": 23087 }, { "epoch": 0.66, "grad_norm": 2.8156291932001762, "learning_rate": 2.7201979613086414e-06, "loss": 0.4055, "step": 23088 }, { "epoch": 0.66, "grad_norm": 9.544663043461524, "learning_rate": 2.7197852260461643e-06, "loss": 0.6564, "step": 23089 }, { "epoch": 0.66, "grad_norm": 4.6888257504280535, "learning_rate": 2.7193725104000924e-06, "loss": 0.2955, "step": 23090 }, { "epoch": 0.66, "grad_norm": 6.304429324322479, "learning_rate": 2.7189598143739793e-06, "loss": 0.4926, "step": 23091 }, { "epoch": 0.66, "grad_norm": 3.445204821783182, "learning_rate": 2.7185471379713724e-06, "loss": 0.2868, "step": 23092 }, { "epoch": 0.66, "grad_norm": 4.651338483075927, "learning_rate": 2.7181344811958233e-06, "loss": 0.3876, "step": 23093 }, { "epoch": 0.66, "grad_norm": 10.16514822037953, "learning_rate": 2.7177218440508845e-06, "loss": 0.4436, "step": 23094 }, { "epoch": 0.66, "grad_norm": 8.822541380147726, "learning_rate": 2.7173092265401034e-06, "loss": 0.368, "step": 23095 }, { "epoch": 0.66, "grad_norm": 4.132102342824525, "learning_rate": 2.7168966286670306e-06, "loss": 0.7837, "step": 23096 }, { "epoch": 0.66, "grad_norm": 3.8113942027876058, "learning_rate": 2.716484050435213e-06, "loss": 0.1159, "step": 23097 }, { "epoch": 0.66, "grad_norm": 5.076508813575916, "learning_rate": 2.7160714918482033e-06, "loss": 0.6832, "step": 23098 }, { "epoch": 0.66, "grad_norm": 6.808089131987724, "learning_rate": 2.7156589529095476e-06, "loss": 0.5756, "step": 23099 }, { "epoch": 0.66, "grad_norm": 9.068271456605904, "learning_rate": 2.7152464336227967e-06, "loss": 0.7257, "step": 23100 }, { "epoch": 0.66, "grad_norm": 7.309066178157285, "learning_rate": 2.7148339339915016e-06, "loss": 0.3042, "step": 23101 }, { "epoch": 0.66, "grad_norm": 9.101230127380747, "learning_rate": 2.714421454019208e-06, "loss": 0.6487, "step": 23102 }, { "epoch": 0.66, "grad_norm": 7.966200357088219, "learning_rate": 2.7140089937094664e-06, "loss": 0.2374, "step": 23103 }, { "epoch": 0.66, "grad_norm": 4.238935738231453, "learning_rate": 2.7135965530658215e-06, "loss": 0.2586, "step": 23104 }, { "epoch": 0.66, "grad_norm": 5.007993888247968, "learning_rate": 2.713184132091826e-06, "loss": 0.426, "step": 23105 }, { "epoch": 0.66, "grad_norm": 9.600464301007628, "learning_rate": 2.712771730791024e-06, "loss": 0.9495, "step": 23106 }, { "epoch": 0.66, "grad_norm": 4.235853324231071, "learning_rate": 2.7123593491669666e-06, "loss": 0.4393, "step": 23107 }, { "epoch": 0.66, "grad_norm": 8.148162690865739, "learning_rate": 2.711946987223201e-06, "loss": 0.3047, "step": 23108 }, { "epoch": 0.66, "grad_norm": 4.968167852691298, "learning_rate": 2.7115346449632705e-06, "loss": 0.4423, "step": 23109 }, { "epoch": 0.66, "grad_norm": 6.400721197173673, "learning_rate": 2.7111223223907283e-06, "loss": 0.631, "step": 23110 }, { "epoch": 0.66, "grad_norm": 3.2202851791740637, "learning_rate": 2.7107100195091174e-06, "loss": 0.1908, "step": 23111 }, { "epoch": 0.66, "grad_norm": 4.567551364035685, "learning_rate": 2.7102977363219872e-06, "loss": 0.5496, "step": 23112 }, { "epoch": 0.66, "grad_norm": 4.250976085772357, "learning_rate": 2.7098854728328827e-06, "loss": 0.4671, "step": 23113 }, { "epoch": 0.66, "grad_norm": 3.3009967169324974, "learning_rate": 2.709473229045353e-06, "loss": 0.3839, "step": 23114 }, { "epoch": 0.66, "grad_norm": 9.098874670181576, "learning_rate": 2.7090610049629435e-06, "loss": 0.7242, "step": 23115 }, { "epoch": 0.66, "grad_norm": 5.697691620654557, "learning_rate": 2.7086488005891997e-06, "loss": 0.6131, "step": 23116 }, { "epoch": 0.66, "grad_norm": 7.525540066979454, "learning_rate": 2.7082366159276664e-06, "loss": 0.5953, "step": 23117 }, { "epoch": 0.66, "grad_norm": 7.487430004754341, "learning_rate": 2.7078244509818917e-06, "loss": 0.5255, "step": 23118 }, { "epoch": 0.66, "grad_norm": 6.442631759332412, "learning_rate": 2.7074123057554223e-06, "loss": 0.6032, "step": 23119 }, { "epoch": 0.66, "grad_norm": 4.824084841456607, "learning_rate": 2.7070001802518008e-06, "loss": 0.536, "step": 23120 }, { "epoch": 0.66, "grad_norm": 4.121797098460461, "learning_rate": 2.7065880744745767e-06, "loss": 0.4502, "step": 23121 }, { "epoch": 0.66, "grad_norm": 1.9416104215107675, "learning_rate": 2.7061759884272927e-06, "loss": 0.0533, "step": 23122 }, { "epoch": 0.66, "grad_norm": 12.527714637283335, "learning_rate": 2.7057639221134946e-06, "loss": 0.5854, "step": 23123 }, { "epoch": 0.66, "grad_norm": 5.11006687157877, "learning_rate": 2.7053518755367245e-06, "loss": 0.4869, "step": 23124 }, { "epoch": 0.66, "grad_norm": 3.8612255030701177, "learning_rate": 2.7049398487005296e-06, "loss": 0.5103, "step": 23125 }, { "epoch": 0.66, "grad_norm": 4.5585337081655375, "learning_rate": 2.7045278416084574e-06, "loss": 0.2583, "step": 23126 }, { "epoch": 0.66, "grad_norm": 6.404715265798771, "learning_rate": 2.7041158542640466e-06, "loss": 0.2464, "step": 23127 }, { "epoch": 0.66, "grad_norm": 5.001663336650516, "learning_rate": 2.7037038866708475e-06, "loss": 0.7444, "step": 23128 }, { "epoch": 0.66, "grad_norm": 8.467093780838018, "learning_rate": 2.7032919388324007e-06, "loss": 0.3726, "step": 23129 }, { "epoch": 0.66, "grad_norm": 5.49386002535653, "learning_rate": 2.70288001075225e-06, "loss": 0.7151, "step": 23130 }, { "epoch": 0.66, "grad_norm": 7.489202212919309, "learning_rate": 2.7024681024339384e-06, "loss": 0.3759, "step": 23131 }, { "epoch": 0.66, "grad_norm": 2.406334392504396, "learning_rate": 2.7020562138810123e-06, "loss": 0.2971, "step": 23132 }, { "epoch": 0.66, "grad_norm": 14.157223834295195, "learning_rate": 2.701644345097012e-06, "loss": 0.4632, "step": 23133 }, { "epoch": 0.66, "grad_norm": 6.039268085489135, "learning_rate": 2.7012324960854828e-06, "loss": 0.4844, "step": 23134 }, { "epoch": 0.66, "grad_norm": 6.510874601407736, "learning_rate": 2.7008206668499707e-06, "loss": 0.512, "step": 23135 }, { "epoch": 0.66, "grad_norm": 7.0388165260548865, "learning_rate": 2.7004088573940116e-06, "loss": 0.6733, "step": 23136 }, { "epoch": 0.66, "grad_norm": 5.227401369570043, "learning_rate": 2.699997067721154e-06, "loss": 0.2156, "step": 23137 }, { "epoch": 0.66, "grad_norm": 7.955953488514591, "learning_rate": 2.6995852978349368e-06, "loss": 0.5462, "step": 23138 }, { "epoch": 0.66, "grad_norm": 3.9200088194825744, "learning_rate": 2.699173547738906e-06, "loss": 0.3262, "step": 23139 }, { "epoch": 0.66, "grad_norm": 3.69178119349739, "learning_rate": 2.698761817436599e-06, "loss": 0.1862, "step": 23140 }, { "epoch": 0.66, "grad_norm": 2.817093340780666, "learning_rate": 2.6983501069315634e-06, "loss": 0.3078, "step": 23141 }, { "epoch": 0.66, "grad_norm": 3.988265871621727, "learning_rate": 2.697938416227338e-06, "loss": 0.3622, "step": 23142 }, { "epoch": 0.66, "grad_norm": 6.270698605058563, "learning_rate": 2.6975267453274633e-06, "loss": 0.3706, "step": 23143 }, { "epoch": 0.66, "grad_norm": 4.515026699675806, "learning_rate": 2.697115094235485e-06, "loss": 0.5498, "step": 23144 }, { "epoch": 0.66, "grad_norm": 6.846684980519634, "learning_rate": 2.6967034629549395e-06, "loss": 0.9726, "step": 23145 }, { "epoch": 0.66, "grad_norm": 5.546873022804445, "learning_rate": 2.6962918514893725e-06, "loss": 0.4695, "step": 23146 }, { "epoch": 0.66, "grad_norm": 5.834084049237978, "learning_rate": 2.6958802598423213e-06, "loss": 0.642, "step": 23147 }, { "epoch": 0.66, "grad_norm": 2.939180015473805, "learning_rate": 2.6954686880173305e-06, "loss": 0.1414, "step": 23148 }, { "epoch": 0.66, "grad_norm": 17.352911706482953, "learning_rate": 2.6950571360179382e-06, "loss": 1.0347, "step": 23149 }, { "epoch": 0.66, "grad_norm": 11.998161870525493, "learning_rate": 2.6946456038476843e-06, "loss": 0.5476, "step": 23150 }, { "epoch": 0.66, "grad_norm": 5.134287165089573, "learning_rate": 2.694234091510112e-06, "loss": 0.457, "step": 23151 }, { "epoch": 0.66, "grad_norm": 4.786422275522238, "learning_rate": 2.6938225990087586e-06, "loss": 0.4012, "step": 23152 }, { "epoch": 0.66, "grad_norm": 8.447690221446017, "learning_rate": 2.6934111263471664e-06, "loss": 0.4736, "step": 23153 }, { "epoch": 0.66, "grad_norm": 5.051371599643923, "learning_rate": 2.6929996735288733e-06, "loss": 0.6493, "step": 23154 }, { "epoch": 0.66, "grad_norm": 7.415493890284101, "learning_rate": 2.692588240557421e-06, "loss": 0.3366, "step": 23155 }, { "epoch": 0.66, "grad_norm": 8.63017189756417, "learning_rate": 2.692176827436349e-06, "loss": 0.6803, "step": 23156 }, { "epoch": 0.66, "grad_norm": 4.081682721458585, "learning_rate": 2.6917654341691944e-06, "loss": 0.4819, "step": 23157 }, { "epoch": 0.66, "grad_norm": 7.455202976587501, "learning_rate": 2.6913540607594967e-06, "loss": 0.6466, "step": 23158 }, { "epoch": 0.66, "grad_norm": 5.793128743859321, "learning_rate": 2.690942707210795e-06, "loss": 0.504, "step": 23159 }, { "epoch": 0.66, "grad_norm": 8.47150179365637, "learning_rate": 2.6905313735266314e-06, "loss": 0.5422, "step": 23160 }, { "epoch": 0.66, "grad_norm": 5.2829597910690875, "learning_rate": 2.6901200597105403e-06, "loss": 0.6226, "step": 23161 }, { "epoch": 0.66, "grad_norm": 3.3820496859789295, "learning_rate": 2.689708765766065e-06, "loss": 0.3087, "step": 23162 }, { "epoch": 0.66, "grad_norm": 4.86431620052932, "learning_rate": 2.6892974916967373e-06, "loss": 0.197, "step": 23163 }, { "epoch": 0.66, "grad_norm": 5.7162249884776655, "learning_rate": 2.688886237506101e-06, "loss": 0.148, "step": 23164 }, { "epoch": 0.66, "grad_norm": 5.970367452153419, "learning_rate": 2.6884750031976893e-06, "loss": 0.5388, "step": 23165 }, { "epoch": 0.66, "grad_norm": 5.459234056580427, "learning_rate": 2.688063788775043e-06, "loss": 0.6478, "step": 23166 }, { "epoch": 0.66, "grad_norm": 13.090917494860275, "learning_rate": 2.6876525942417007e-06, "loss": 0.8426, "step": 23167 }, { "epoch": 0.66, "grad_norm": 4.496204815643624, "learning_rate": 2.687241419601199e-06, "loss": 0.3875, "step": 23168 }, { "epoch": 0.66, "grad_norm": 6.213339402348634, "learning_rate": 2.6868302648570745e-06, "loss": 0.2951, "step": 23169 }, { "epoch": 0.66, "grad_norm": 4.446510756822446, "learning_rate": 2.686419130012862e-06, "loss": 0.5503, "step": 23170 }, { "epoch": 0.66, "grad_norm": 5.527443087598407, "learning_rate": 2.6860080150721034e-06, "loss": 0.7016, "step": 23171 }, { "epoch": 0.66, "grad_norm": 6.867568004425595, "learning_rate": 2.6855969200383304e-06, "loss": 0.5233, "step": 23172 }, { "epoch": 0.66, "grad_norm": 3.224950724971982, "learning_rate": 2.685185844915085e-06, "loss": 0.4436, "step": 23173 }, { "epoch": 0.66, "grad_norm": 5.3900558613517005, "learning_rate": 2.684774789705898e-06, "loss": 0.4321, "step": 23174 }, { "epoch": 0.66, "grad_norm": 6.419954852839698, "learning_rate": 2.684363754414311e-06, "loss": 0.4533, "step": 23175 }, { "epoch": 0.66, "grad_norm": 4.7071553194148565, "learning_rate": 2.683952739043857e-06, "loss": 0.332, "step": 23176 }, { "epoch": 0.66, "grad_norm": 6.528054071445548, "learning_rate": 2.68354174359807e-06, "loss": 0.3846, "step": 23177 }, { "epoch": 0.66, "grad_norm": 7.612990755946185, "learning_rate": 2.6831307680804914e-06, "loss": 0.8547, "step": 23178 }, { "epoch": 0.66, "grad_norm": 4.8417560534124, "learning_rate": 2.6827198124946508e-06, "loss": 0.3265, "step": 23179 }, { "epoch": 0.66, "grad_norm": 7.0882911011871155, "learning_rate": 2.6823088768440884e-06, "loss": 0.7744, "step": 23180 }, { "epoch": 0.66, "grad_norm": 5.30690025955243, "learning_rate": 2.681897961132335e-06, "loss": 1.0246, "step": 23181 }, { "epoch": 0.66, "grad_norm": 6.943151766520871, "learning_rate": 2.6814870653629312e-06, "loss": 0.5045, "step": 23182 }, { "epoch": 0.66, "grad_norm": 9.282858198689405, "learning_rate": 2.681076189539408e-06, "loss": 0.3341, "step": 23183 }, { "epoch": 0.66, "grad_norm": 6.244717998139047, "learning_rate": 2.680665333665299e-06, "loss": 0.4716, "step": 23184 }, { "epoch": 0.66, "grad_norm": 4.9763221862356035, "learning_rate": 2.6802544977441417e-06, "loss": 0.2484, "step": 23185 }, { "epoch": 0.66, "grad_norm": 3.950962998304071, "learning_rate": 2.679843681779468e-06, "loss": 0.4941, "step": 23186 }, { "epoch": 0.66, "grad_norm": 2.4329980332342713, "learning_rate": 2.6794328857748152e-06, "loss": 0.1476, "step": 23187 }, { "epoch": 0.66, "grad_norm": 4.930126986734867, "learning_rate": 2.6790221097337145e-06, "loss": 0.3599, "step": 23188 }, { "epoch": 0.66, "grad_norm": 3.9175549304765056, "learning_rate": 2.678611353659702e-06, "loss": 0.2506, "step": 23189 }, { "epoch": 0.66, "grad_norm": 9.467933241866278, "learning_rate": 2.67820061755631e-06, "loss": 1.1652, "step": 23190 }, { "epoch": 0.66, "grad_norm": 5.08731009014067, "learning_rate": 2.6777899014270707e-06, "loss": 0.4647, "step": 23191 }, { "epoch": 0.66, "grad_norm": 3.1738946807925403, "learning_rate": 2.677379205275521e-06, "loss": 0.3865, "step": 23192 }, { "epoch": 0.66, "grad_norm": 3.2910627865454236, "learning_rate": 2.6769685291051892e-06, "loss": 0.3553, "step": 23193 }, { "epoch": 0.66, "grad_norm": 6.25416914168717, "learning_rate": 2.676557872919614e-06, "loss": 0.4742, "step": 23194 }, { "epoch": 0.66, "grad_norm": 5.457040337546119, "learning_rate": 2.676147236722325e-06, "loss": 0.643, "step": 23195 }, { "epoch": 0.66, "grad_norm": 6.206424517833335, "learning_rate": 2.6757366205168556e-06, "loss": 0.2533, "step": 23196 }, { "epoch": 0.66, "grad_norm": 4.678257566114137, "learning_rate": 2.675326024306736e-06, "loss": 0.4001, "step": 23197 }, { "epoch": 0.66, "grad_norm": 4.794884159848809, "learning_rate": 2.674915448095502e-06, "loss": 0.6048, "step": 23198 }, { "epoch": 0.66, "grad_norm": 6.873217264867185, "learning_rate": 2.674504891886682e-06, "loss": 0.7972, "step": 23199 }, { "epoch": 0.66, "grad_norm": 4.942874180778573, "learning_rate": 2.6740943556838104e-06, "loss": 0.3173, "step": 23200 }, { "epoch": 0.66, "grad_norm": 9.40052159668137, "learning_rate": 2.673683839490421e-06, "loss": 0.5551, "step": 23201 }, { "epoch": 0.66, "grad_norm": 3.196009403701908, "learning_rate": 2.6732733433100423e-06, "loss": 0.4146, "step": 23202 }, { "epoch": 0.66, "grad_norm": 7.450760952278993, "learning_rate": 2.6728628671462063e-06, "loss": 0.3079, "step": 23203 }, { "epoch": 0.66, "grad_norm": 4.527924520978887, "learning_rate": 2.672452411002443e-06, "loss": 0.3922, "step": 23204 }, { "epoch": 0.66, "grad_norm": 7.074375161390777, "learning_rate": 2.6720419748822877e-06, "loss": 0.9105, "step": 23205 }, { "epoch": 0.66, "grad_norm": 4.427788214572847, "learning_rate": 2.671631558789266e-06, "loss": 0.2745, "step": 23206 }, { "epoch": 0.66, "grad_norm": 8.761586392780936, "learning_rate": 2.6712211627269112e-06, "loss": 0.3186, "step": 23207 }, { "epoch": 0.66, "grad_norm": 4.567489508589131, "learning_rate": 2.6708107866987563e-06, "loss": 0.485, "step": 23208 }, { "epoch": 0.66, "grad_norm": 3.308522643639183, "learning_rate": 2.6704004307083296e-06, "loss": 0.2351, "step": 23209 }, { "epoch": 0.66, "grad_norm": 4.092702542316511, "learning_rate": 2.6699900947591606e-06, "loss": 0.6654, "step": 23210 }, { "epoch": 0.66, "grad_norm": 2.7527839267515337, "learning_rate": 2.669579778854778e-06, "loss": 0.1986, "step": 23211 }, { "epoch": 0.66, "grad_norm": 4.148021612495467, "learning_rate": 2.6691694829987165e-06, "loss": 0.6428, "step": 23212 }, { "epoch": 0.66, "grad_norm": 1.6830719229838724, "learning_rate": 2.6687592071945e-06, "loss": 0.2491, "step": 23213 }, { "epoch": 0.66, "grad_norm": 8.007141263788327, "learning_rate": 2.6683489514456636e-06, "loss": 0.527, "step": 23214 }, { "epoch": 0.66, "grad_norm": 8.650650141103242, "learning_rate": 2.667938715755732e-06, "loss": 0.9194, "step": 23215 }, { "epoch": 0.66, "grad_norm": 6.294024899188921, "learning_rate": 2.667528500128238e-06, "loss": 0.548, "step": 23216 }, { "epoch": 0.66, "grad_norm": 8.391087808756254, "learning_rate": 2.6671183045667093e-06, "loss": 0.4225, "step": 23217 }, { "epoch": 0.66, "grad_norm": 6.826491485666391, "learning_rate": 2.666708129074673e-06, "loss": 0.7147, "step": 23218 }, { "epoch": 0.66, "grad_norm": 6.168950027577547, "learning_rate": 2.6662979736556615e-06, "loss": 0.3905, "step": 23219 }, { "epoch": 0.66, "grad_norm": 4.439429226436307, "learning_rate": 2.665887838313199e-06, "loss": 0.3742, "step": 23220 }, { "epoch": 0.66, "grad_norm": 4.064930056994291, "learning_rate": 2.6654777230508177e-06, "loss": 0.3449, "step": 23221 }, { "epoch": 0.67, "grad_norm": 8.936065825521279, "learning_rate": 2.6650676278720442e-06, "loss": 0.2797, "step": 23222 }, { "epoch": 0.67, "grad_norm": 7.766707500328591, "learning_rate": 2.6646575527804064e-06, "loss": 0.7046, "step": 23223 }, { "epoch": 0.67, "grad_norm": 3.8864176571258797, "learning_rate": 2.66424749777943e-06, "loss": 0.2707, "step": 23224 }, { "epoch": 0.67, "grad_norm": 6.638330327916916, "learning_rate": 2.6638374628726455e-06, "loss": 0.6621, "step": 23225 }, { "epoch": 0.67, "grad_norm": 7.567577706610025, "learning_rate": 2.6634274480635815e-06, "loss": 0.7255, "step": 23226 }, { "epoch": 0.67, "grad_norm": 5.011755190186841, "learning_rate": 2.663017453355761e-06, "loss": 0.4444, "step": 23227 }, { "epoch": 0.67, "grad_norm": 3.355210495270762, "learning_rate": 2.6626074787527158e-06, "loss": 0.3271, "step": 23228 }, { "epoch": 0.67, "grad_norm": 6.2641629346834975, "learning_rate": 2.6621975242579707e-06, "loss": 0.4412, "step": 23229 }, { "epoch": 0.67, "grad_norm": 3.990923955916928, "learning_rate": 2.6617875898750522e-06, "loss": 0.206, "step": 23230 }, { "epoch": 0.67, "grad_norm": 3.142707419002823, "learning_rate": 2.6613776756074853e-06, "loss": 0.4016, "step": 23231 }, { "epoch": 0.67, "grad_norm": 5.078682314294725, "learning_rate": 2.6609677814587987e-06, "loss": 0.3645, "step": 23232 }, { "epoch": 0.67, "grad_norm": 7.23424450816373, "learning_rate": 2.66055790743252e-06, "loss": 0.4218, "step": 23233 }, { "epoch": 0.67, "grad_norm": 7.3480465271732545, "learning_rate": 2.6601480535321716e-06, "loss": 0.8354, "step": 23234 }, { "epoch": 0.67, "grad_norm": 5.450307385936713, "learning_rate": 2.659738219761283e-06, "loss": 0.2185, "step": 23235 }, { "epoch": 0.67, "grad_norm": 6.290979223610875, "learning_rate": 2.6593284061233793e-06, "loss": 0.4704, "step": 23236 }, { "epoch": 0.67, "grad_norm": 7.665105439597454, "learning_rate": 2.6589186126219846e-06, "loss": 0.8491, "step": 23237 }, { "epoch": 0.67, "grad_norm": 4.664813494005216, "learning_rate": 2.6585088392606227e-06, "loss": 0.9908, "step": 23238 }, { "epoch": 0.67, "grad_norm": 6.242683209536791, "learning_rate": 2.6580990860428233e-06, "loss": 0.7362, "step": 23239 }, { "epoch": 0.67, "grad_norm": 5.980680475654403, "learning_rate": 2.657689352972107e-06, "loss": 0.381, "step": 23240 }, { "epoch": 0.67, "grad_norm": 4.959573633342107, "learning_rate": 2.657279640052001e-06, "loss": 0.4839, "step": 23241 }, { "epoch": 0.67, "grad_norm": 1.8960114507178494, "learning_rate": 2.656869947286031e-06, "loss": 0.0818, "step": 23242 }, { "epoch": 0.67, "grad_norm": 4.464940080913457, "learning_rate": 2.6564602746777212e-06, "loss": 0.3841, "step": 23243 }, { "epoch": 0.67, "grad_norm": 7.345492476851231, "learning_rate": 2.6560506222305947e-06, "loss": 0.7155, "step": 23244 }, { "epoch": 0.67, "grad_norm": 8.558581242101697, "learning_rate": 2.6556409899481737e-06, "loss": 0.8587, "step": 23245 }, { "epoch": 0.67, "grad_norm": 8.33467196839076, "learning_rate": 2.6552313778339876e-06, "loss": 0.5156, "step": 23246 }, { "epoch": 0.67, "grad_norm": 13.030631144440433, "learning_rate": 2.654821785891555e-06, "loss": 0.6108, "step": 23247 }, { "epoch": 0.67, "grad_norm": 2.0289393854501747, "learning_rate": 2.654412214124401e-06, "loss": 0.0885, "step": 23248 }, { "epoch": 0.67, "grad_norm": 4.238518310105021, "learning_rate": 2.6540026625360525e-06, "loss": 0.1124, "step": 23249 }, { "epoch": 0.67, "grad_norm": 4.545212720377031, "learning_rate": 2.6535931311300306e-06, "loss": 0.5925, "step": 23250 }, { "epoch": 0.67, "grad_norm": 6.473339277291573, "learning_rate": 2.653183619909858e-06, "loss": 0.544, "step": 23251 }, { "epoch": 0.67, "grad_norm": 6.83507964064796, "learning_rate": 2.6527741288790554e-06, "loss": 0.4053, "step": 23252 }, { "epoch": 0.67, "grad_norm": 10.307378433476595, "learning_rate": 2.65236465804115e-06, "loss": 0.2839, "step": 23253 }, { "epoch": 0.67, "grad_norm": 5.211138353952116, "learning_rate": 2.6519552073996607e-06, "loss": 0.6199, "step": 23254 }, { "epoch": 0.67, "grad_norm": 5.810114545365989, "learning_rate": 2.6515457769581132e-06, "loss": 0.1925, "step": 23255 }, { "epoch": 0.67, "grad_norm": 4.916530262407321, "learning_rate": 2.6511363667200283e-06, "loss": 0.5221, "step": 23256 }, { "epoch": 0.67, "grad_norm": 4.596736170930455, "learning_rate": 2.6507269766889265e-06, "loss": 0.6839, "step": 23257 }, { "epoch": 0.67, "grad_norm": 4.542418114444252, "learning_rate": 2.6503176068683333e-06, "loss": 0.4041, "step": 23258 }, { "epoch": 0.67, "grad_norm": 7.834494430307269, "learning_rate": 2.6499082572617662e-06, "loss": 0.5179, "step": 23259 }, { "epoch": 0.67, "grad_norm": 5.610047876490331, "learning_rate": 2.6494989278727512e-06, "loss": 0.4049, "step": 23260 }, { "epoch": 0.67, "grad_norm": 6.3681374057772855, "learning_rate": 2.6490896187048047e-06, "loss": 0.1852, "step": 23261 }, { "epoch": 0.67, "grad_norm": 1.1070531941179271, "learning_rate": 2.6486803297614537e-06, "loss": 0.1139, "step": 23262 }, { "epoch": 0.67, "grad_norm": 4.9191432092878395, "learning_rate": 2.648271061046216e-06, "loss": 0.4649, "step": 23263 }, { "epoch": 0.67, "grad_norm": 4.416353262572136, "learning_rate": 2.6478618125626125e-06, "loss": 0.5647, "step": 23264 }, { "epoch": 0.67, "grad_norm": 5.786382531036679, "learning_rate": 2.647452584314163e-06, "loss": 0.4305, "step": 23265 }, { "epoch": 0.67, "grad_norm": 5.041626838127428, "learning_rate": 2.6470433763043885e-06, "loss": 0.3399, "step": 23266 }, { "epoch": 0.67, "grad_norm": 7.369220343956558, "learning_rate": 2.646634188536812e-06, "loss": 0.7007, "step": 23267 }, { "epoch": 0.67, "grad_norm": 9.981817882150619, "learning_rate": 2.6462250210149503e-06, "loss": 0.7, "step": 23268 }, { "epoch": 0.67, "grad_norm": 8.33110740179006, "learning_rate": 2.6458158737423266e-06, "loss": 0.3727, "step": 23269 }, { "epoch": 0.67, "grad_norm": 4.90500226525304, "learning_rate": 2.6454067467224596e-06, "loss": 0.5343, "step": 23270 }, { "epoch": 0.67, "grad_norm": 5.745076435002284, "learning_rate": 2.6449976399588685e-06, "loss": 0.2058, "step": 23271 }, { "epoch": 0.67, "grad_norm": 6.18405803159205, "learning_rate": 2.64458855345507e-06, "loss": 0.7095, "step": 23272 }, { "epoch": 0.67, "grad_norm": 5.737187270241313, "learning_rate": 2.6441794872145863e-06, "loss": 0.2275, "step": 23273 }, { "epoch": 0.67, "grad_norm": 6.549616273894687, "learning_rate": 2.643770441240939e-06, "loss": 0.7428, "step": 23274 }, { "epoch": 0.67, "grad_norm": 9.558896576356332, "learning_rate": 2.6433614155376426e-06, "loss": 0.3614, "step": 23275 }, { "epoch": 0.67, "grad_norm": 7.776824060126229, "learning_rate": 2.6429524101082194e-06, "loss": 0.4502, "step": 23276 }, { "epoch": 0.67, "grad_norm": 5.217621961190857, "learning_rate": 2.6425434249561864e-06, "loss": 0.515, "step": 23277 }, { "epoch": 0.67, "grad_norm": 6.740569697450758, "learning_rate": 2.6421344600850617e-06, "loss": 0.3097, "step": 23278 }, { "epoch": 0.67, "grad_norm": 5.859870340390559, "learning_rate": 2.6417255154983624e-06, "loss": 0.3612, "step": 23279 }, { "epoch": 0.67, "grad_norm": 6.573899694850255, "learning_rate": 2.64131659119961e-06, "loss": 0.35, "step": 23280 }, { "epoch": 0.67, "grad_norm": 2.277168437301609, "learning_rate": 2.640907687192319e-06, "loss": 0.2616, "step": 23281 }, { "epoch": 0.67, "grad_norm": 2.6067679910456927, "learning_rate": 2.64049880348001e-06, "loss": 0.1349, "step": 23282 }, { "epoch": 0.67, "grad_norm": 3.4076669703022016, "learning_rate": 2.6400899400661993e-06, "loss": 0.3951, "step": 23283 }, { "epoch": 0.67, "grad_norm": 4.766696327985275, "learning_rate": 2.6396810969544028e-06, "loss": 0.0858, "step": 23284 }, { "epoch": 0.67, "grad_norm": 1.8464307450813744, "learning_rate": 2.639272274148141e-06, "loss": 0.1299, "step": 23285 }, { "epoch": 0.67, "grad_norm": 5.375329650706063, "learning_rate": 2.638863471650927e-06, "loss": 1.2036, "step": 23286 }, { "epoch": 0.67, "grad_norm": 6.814362402607528, "learning_rate": 2.638454689466282e-06, "loss": 1.0052, "step": 23287 }, { "epoch": 0.67, "grad_norm": 4.2927159937002095, "learning_rate": 2.638045927597719e-06, "loss": 0.4602, "step": 23288 }, { "epoch": 0.67, "grad_norm": 6.115577417655398, "learning_rate": 2.637637186048757e-06, "loss": 0.7036, "step": 23289 }, { "epoch": 0.67, "grad_norm": 6.807811173392274, "learning_rate": 2.637228464822912e-06, "loss": 0.3989, "step": 23290 }, { "epoch": 0.67, "grad_norm": 5.0729928242673825, "learning_rate": 2.636819763923698e-06, "loss": 0.5941, "step": 23291 }, { "epoch": 0.67, "grad_norm": 5.689209104858078, "learning_rate": 2.636411083354634e-06, "loss": 0.3772, "step": 23292 }, { "epoch": 0.67, "grad_norm": 6.965576612562034, "learning_rate": 2.636002423119233e-06, "loss": 0.5341, "step": 23293 }, { "epoch": 0.67, "grad_norm": 4.8649511450091465, "learning_rate": 2.635593783221013e-06, "loss": 0.3952, "step": 23294 }, { "epoch": 0.67, "grad_norm": 2.6726732455875415, "learning_rate": 2.635185163663487e-06, "loss": 0.1757, "step": 23295 }, { "epoch": 0.67, "grad_norm": 7.120951422494717, "learning_rate": 2.634776564450175e-06, "loss": 0.3515, "step": 23296 }, { "epoch": 0.67, "grad_norm": 6.6033478438853255, "learning_rate": 2.634367985584588e-06, "loss": 0.484, "step": 23297 }, { "epoch": 0.67, "grad_norm": 4.191493876910865, "learning_rate": 2.63395942707024e-06, "loss": 0.3073, "step": 23298 }, { "epoch": 0.67, "grad_norm": 10.503044254772341, "learning_rate": 2.63355088891065e-06, "loss": 0.6145, "step": 23299 }, { "epoch": 0.67, "grad_norm": 5.124547705812479, "learning_rate": 2.6331423711093286e-06, "loss": 0.5638, "step": 23300 }, { "epoch": 0.67, "grad_norm": 5.685379114955385, "learning_rate": 2.632733873669793e-06, "loss": 0.3712, "step": 23301 }, { "epoch": 0.67, "grad_norm": 5.106698516328475, "learning_rate": 2.632325396595555e-06, "loss": 0.1223, "step": 23302 }, { "epoch": 0.67, "grad_norm": 6.845275151565112, "learning_rate": 2.6319169398901324e-06, "loss": 0.538, "step": 23303 }, { "epoch": 0.67, "grad_norm": 8.168932353919786, "learning_rate": 2.6315085035570366e-06, "loss": 0.8178, "step": 23304 }, { "epoch": 0.67, "grad_norm": 9.706676036084373, "learning_rate": 2.6311000875997816e-06, "loss": 0.3493, "step": 23305 }, { "epoch": 0.67, "grad_norm": 6.384945665903502, "learning_rate": 2.6306916920218784e-06, "loss": 0.6483, "step": 23306 }, { "epoch": 0.67, "grad_norm": 4.791595088037115, "learning_rate": 2.630283316826843e-06, "loss": 0.7437, "step": 23307 }, { "epoch": 0.67, "grad_norm": 4.123571900045366, "learning_rate": 2.629874962018191e-06, "loss": 0.3794, "step": 23308 }, { "epoch": 0.67, "grad_norm": 4.958083309420024, "learning_rate": 2.6294666275994305e-06, "loss": 0.2542, "step": 23309 }, { "epoch": 0.67, "grad_norm": 3.5460192282360303, "learning_rate": 2.629058313574081e-06, "loss": 0.1563, "step": 23310 }, { "epoch": 0.67, "grad_norm": 3.6131143727300508, "learning_rate": 2.6286500199456466e-06, "loss": 0.1426, "step": 23311 }, { "epoch": 0.67, "grad_norm": 3.9649028003222746, "learning_rate": 2.628241746717646e-06, "loss": 0.578, "step": 23312 }, { "epoch": 0.67, "grad_norm": 8.545889342552439, "learning_rate": 2.627833493893588e-06, "loss": 0.8985, "step": 23313 }, { "epoch": 0.67, "grad_norm": 5.937379494498245, "learning_rate": 2.627425261476986e-06, "loss": 0.2779, "step": 23314 }, { "epoch": 0.67, "grad_norm": 4.7664833171412075, "learning_rate": 2.6270170494713543e-06, "loss": 0.3363, "step": 23315 }, { "epoch": 0.67, "grad_norm": 4.545686599270011, "learning_rate": 2.626608857880203e-06, "loss": 0.3865, "step": 23316 }, { "epoch": 0.67, "grad_norm": 5.943797215415595, "learning_rate": 2.6262006867070435e-06, "loss": 0.351, "step": 23317 }, { "epoch": 0.67, "grad_norm": 3.1239469852137516, "learning_rate": 2.6257925359553854e-06, "loss": 0.2803, "step": 23318 }, { "epoch": 0.67, "grad_norm": 7.101083032182482, "learning_rate": 2.6253844056287436e-06, "loss": 0.4552, "step": 23319 }, { "epoch": 0.67, "grad_norm": 4.281369061450349, "learning_rate": 2.6249762957306256e-06, "loss": 0.2457, "step": 23320 }, { "epoch": 0.67, "grad_norm": 4.812425612828417, "learning_rate": 2.6245682062645457e-06, "loss": 0.7958, "step": 23321 }, { "epoch": 0.67, "grad_norm": 5.495033969930348, "learning_rate": 2.624160137234011e-06, "loss": 0.1047, "step": 23322 }, { "epoch": 0.67, "grad_norm": 3.513682087093015, "learning_rate": 2.6237520886425368e-06, "loss": 0.4164, "step": 23323 }, { "epoch": 0.67, "grad_norm": 3.6044204196280827, "learning_rate": 2.62334406049363e-06, "loss": 0.5378, "step": 23324 }, { "epoch": 0.67, "grad_norm": 9.014097246277315, "learning_rate": 2.6229360527908e-06, "loss": 0.3656, "step": 23325 }, { "epoch": 0.67, "grad_norm": 6.339509646108332, "learning_rate": 2.6225280655375605e-06, "loss": 0.2389, "step": 23326 }, { "epoch": 0.67, "grad_norm": 6.014945331176015, "learning_rate": 2.6221200987374173e-06, "loss": 0.343, "step": 23327 }, { "epoch": 0.67, "grad_norm": 8.172416479431359, "learning_rate": 2.6217121523938837e-06, "loss": 0.6752, "step": 23328 }, { "epoch": 0.67, "grad_norm": 3.9906479466673987, "learning_rate": 2.6213042265104666e-06, "loss": 0.6203, "step": 23329 }, { "epoch": 0.67, "grad_norm": 4.604725655616725, "learning_rate": 2.620896321090678e-06, "loss": 0.4421, "step": 23330 }, { "epoch": 0.67, "grad_norm": 3.79092776605993, "learning_rate": 2.6204884361380246e-06, "loss": 0.1254, "step": 23331 }, { "epoch": 0.67, "grad_norm": 1.0784142493187558, "learning_rate": 2.6200805716560152e-06, "loss": 0.113, "step": 23332 }, { "epoch": 0.67, "grad_norm": 8.190616509565242, "learning_rate": 2.6196727276481614e-06, "loss": 0.3844, "step": 23333 }, { "epoch": 0.67, "grad_norm": 2.812991475502588, "learning_rate": 2.6192649041179674e-06, "loss": 0.1625, "step": 23334 }, { "epoch": 0.67, "grad_norm": 3.039813402466085, "learning_rate": 2.618857101068947e-06, "loss": 0.3791, "step": 23335 }, { "epoch": 0.67, "grad_norm": 9.609123115029536, "learning_rate": 2.6184493185046035e-06, "loss": 0.4886, "step": 23336 }, { "epoch": 0.67, "grad_norm": 8.39303219041636, "learning_rate": 2.618041556428452e-06, "loss": 0.7064, "step": 23337 }, { "epoch": 0.67, "grad_norm": 6.792250329273978, "learning_rate": 2.6176338148439906e-06, "loss": 0.308, "step": 23338 }, { "epoch": 0.67, "grad_norm": 3.705260357273905, "learning_rate": 2.617226093754733e-06, "loss": 0.3072, "step": 23339 }, { "epoch": 0.67, "grad_norm": 6.997658201598338, "learning_rate": 2.616818393164188e-06, "loss": 0.6895, "step": 23340 }, { "epoch": 0.67, "grad_norm": 5.90515473721036, "learning_rate": 2.6164107130758576e-06, "loss": 0.8398, "step": 23341 }, { "epoch": 0.67, "grad_norm": 5.532563910169044, "learning_rate": 2.6160030534932556e-06, "loss": 0.6589, "step": 23342 }, { "epoch": 0.67, "grad_norm": 4.142040025293873, "learning_rate": 2.6155954144198858e-06, "loss": 0.3758, "step": 23343 }, { "epoch": 0.67, "grad_norm": 3.410279961053858, "learning_rate": 2.615187795859254e-06, "loss": 0.4207, "step": 23344 }, { "epoch": 0.67, "grad_norm": 5.994900403885668, "learning_rate": 2.6147801978148673e-06, "loss": 0.4135, "step": 23345 }, { "epoch": 0.67, "grad_norm": 6.802691984961821, "learning_rate": 2.6143726202902343e-06, "loss": 0.4668, "step": 23346 }, { "epoch": 0.67, "grad_norm": 5.591115363052367, "learning_rate": 2.613965063288858e-06, "loss": 0.2658, "step": 23347 }, { "epoch": 0.67, "grad_norm": 3.075044097041462, "learning_rate": 2.6135575268142466e-06, "loss": 0.2916, "step": 23348 }, { "epoch": 0.67, "grad_norm": 4.876903602431158, "learning_rate": 2.6131500108699074e-06, "loss": 0.6, "step": 23349 }, { "epoch": 0.67, "grad_norm": 9.05016858591945, "learning_rate": 2.6127425154593457e-06, "loss": 0.5655, "step": 23350 }, { "epoch": 0.67, "grad_norm": 2.715082127925416, "learning_rate": 2.6123350405860655e-06, "loss": 0.3837, "step": 23351 }, { "epoch": 0.67, "grad_norm": 5.261535731972137, "learning_rate": 2.611927586253571e-06, "loss": 0.4314, "step": 23352 }, { "epoch": 0.67, "grad_norm": 2.8328876331799893, "learning_rate": 2.6115201524653723e-06, "loss": 0.1644, "step": 23353 }, { "epoch": 0.67, "grad_norm": 6.383639371954258, "learning_rate": 2.6111127392249686e-06, "loss": 0.6694, "step": 23354 }, { "epoch": 0.67, "grad_norm": 5.903282025310083, "learning_rate": 2.6107053465358677e-06, "loss": 0.4844, "step": 23355 }, { "epoch": 0.67, "grad_norm": 8.320053340117326, "learning_rate": 2.6102979744015778e-06, "loss": 0.6599, "step": 23356 }, { "epoch": 0.67, "grad_norm": 8.427216599915253, "learning_rate": 2.6098906228255995e-06, "loss": 0.4562, "step": 23357 }, { "epoch": 0.67, "grad_norm": 6.859973299180274, "learning_rate": 2.6094832918114375e-06, "loss": 0.6419, "step": 23358 }, { "epoch": 0.67, "grad_norm": 3.400871667902159, "learning_rate": 2.609075981362595e-06, "loss": 0.3494, "step": 23359 }, { "epoch": 0.67, "grad_norm": 7.104782834744193, "learning_rate": 2.6086686914825785e-06, "loss": 0.5965, "step": 23360 }, { "epoch": 0.67, "grad_norm": 5.832396877144038, "learning_rate": 2.6082614221748904e-06, "loss": 0.5134, "step": 23361 }, { "epoch": 0.67, "grad_norm": 4.380394661380418, "learning_rate": 2.6078541734430357e-06, "loss": 0.3128, "step": 23362 }, { "epoch": 0.67, "grad_norm": 6.326388462242041, "learning_rate": 2.607446945290515e-06, "loss": 0.6463, "step": 23363 }, { "epoch": 0.67, "grad_norm": 5.695981670569814, "learning_rate": 2.6070397377208363e-06, "loss": 0.3483, "step": 23364 }, { "epoch": 0.67, "grad_norm": 5.34547090984233, "learning_rate": 2.6066325507374996e-06, "loss": 0.6291, "step": 23365 }, { "epoch": 0.67, "grad_norm": 2.2461671236741645, "learning_rate": 2.6062253843440067e-06, "loss": 0.0803, "step": 23366 }, { "epoch": 0.67, "grad_norm": 2.904809379380453, "learning_rate": 2.6058182385438633e-06, "loss": 0.5025, "step": 23367 }, { "epoch": 0.67, "grad_norm": 2.0078182888318636, "learning_rate": 2.60541111334057e-06, "loss": 0.21, "step": 23368 }, { "epoch": 0.67, "grad_norm": 8.291510045347126, "learning_rate": 2.6050040087376316e-06, "loss": 0.6479, "step": 23369 }, { "epoch": 0.67, "grad_norm": 5.532496769796842, "learning_rate": 2.6045969247385482e-06, "loss": 0.7545, "step": 23370 }, { "epoch": 0.67, "grad_norm": 5.630088793120443, "learning_rate": 2.604189861346823e-06, "loss": 0.3546, "step": 23371 }, { "epoch": 0.67, "grad_norm": 9.277303620904165, "learning_rate": 2.6037828185659557e-06, "loss": 0.3732, "step": 23372 }, { "epoch": 0.67, "grad_norm": 3.2385377678776623, "learning_rate": 2.60337579639945e-06, "loss": 0.1597, "step": 23373 }, { "epoch": 0.67, "grad_norm": 8.175054467745973, "learning_rate": 2.6029687948508086e-06, "loss": 0.6433, "step": 23374 }, { "epoch": 0.67, "grad_norm": 5.42502304722685, "learning_rate": 2.60256181392353e-06, "loss": 0.3076, "step": 23375 }, { "epoch": 0.67, "grad_norm": 1.6635921422442372, "learning_rate": 2.602154853621118e-06, "loss": 0.3527, "step": 23376 }, { "epoch": 0.67, "grad_norm": 3.0255428544342666, "learning_rate": 2.6017479139470735e-06, "loss": 0.1478, "step": 23377 }, { "epoch": 0.67, "grad_norm": 4.077875356231413, "learning_rate": 2.601340994904896e-06, "loss": 0.1714, "step": 23378 }, { "epoch": 0.67, "grad_norm": 5.164266031152751, "learning_rate": 2.600934096498085e-06, "loss": 0.3619, "step": 23379 }, { "epoch": 0.67, "grad_norm": 14.210756708623556, "learning_rate": 2.600527218730142e-06, "loss": 0.8806, "step": 23380 }, { "epoch": 0.67, "grad_norm": 4.618929797274766, "learning_rate": 2.6001203616045706e-06, "loss": 0.4911, "step": 23381 }, { "epoch": 0.67, "grad_norm": 6.941357012967478, "learning_rate": 2.5997135251248663e-06, "loss": 0.6, "step": 23382 }, { "epoch": 0.67, "grad_norm": 4.8328993317443665, "learning_rate": 2.5993067092945323e-06, "loss": 0.21, "step": 23383 }, { "epoch": 0.67, "grad_norm": 5.07494780696846, "learning_rate": 2.5988999141170673e-06, "loss": 0.5125, "step": 23384 }, { "epoch": 0.67, "grad_norm": 4.4375856686771415, "learning_rate": 2.5984931395959707e-06, "loss": 0.4515, "step": 23385 }, { "epoch": 0.67, "grad_norm": 5.112119141921535, "learning_rate": 2.5980863857347403e-06, "loss": 0.3493, "step": 23386 }, { "epoch": 0.67, "grad_norm": 6.05387262582887, "learning_rate": 2.5976796525368787e-06, "loss": 0.4437, "step": 23387 }, { "epoch": 0.67, "grad_norm": 2.2984017209212237, "learning_rate": 2.597272940005881e-06, "loss": 0.1272, "step": 23388 }, { "epoch": 0.67, "grad_norm": 8.486145396838843, "learning_rate": 2.596866248145249e-06, "loss": 0.7013, "step": 23389 }, { "epoch": 0.67, "grad_norm": 3.723455385332458, "learning_rate": 2.596459576958482e-06, "loss": 0.437, "step": 23390 }, { "epoch": 0.67, "grad_norm": 5.64547079641985, "learning_rate": 2.596052926449078e-06, "loss": 0.3994, "step": 23391 }, { "epoch": 0.67, "grad_norm": 1.3456833703704694, "learning_rate": 2.595646296620534e-06, "loss": 0.0394, "step": 23392 }, { "epoch": 0.67, "grad_norm": 6.07598874715249, "learning_rate": 2.595239687476348e-06, "loss": 0.9625, "step": 23393 }, { "epoch": 0.67, "grad_norm": 8.419369921271771, "learning_rate": 2.59483309902002e-06, "loss": 1.1302, "step": 23394 }, { "epoch": 0.67, "grad_norm": 1.5625417536878796, "learning_rate": 2.5944265312550453e-06, "loss": 0.094, "step": 23395 }, { "epoch": 0.67, "grad_norm": 6.571059104057994, "learning_rate": 2.5940199841849223e-06, "loss": 0.7656, "step": 23396 }, { "epoch": 0.67, "grad_norm": 3.15885325046829, "learning_rate": 2.5936134578131534e-06, "loss": 0.1846, "step": 23397 }, { "epoch": 0.67, "grad_norm": 6.389905446029065, "learning_rate": 2.593206952143228e-06, "loss": 0.5624, "step": 23398 }, { "epoch": 0.67, "grad_norm": 5.287993645677196, "learning_rate": 2.5928004671786482e-06, "loss": 0.6474, "step": 23399 }, { "epoch": 0.67, "grad_norm": 7.274059625523353, "learning_rate": 2.5923940029229085e-06, "loss": 1.0722, "step": 23400 }, { "epoch": 0.67, "grad_norm": 6.400348233643374, "learning_rate": 2.591987559379508e-06, "loss": 0.607, "step": 23401 }, { "epoch": 0.67, "grad_norm": 8.743886010487104, "learning_rate": 2.5915811365519407e-06, "loss": 0.7542, "step": 23402 }, { "epoch": 0.67, "grad_norm": 3.168030499516201, "learning_rate": 2.5911747344437054e-06, "loss": 0.2656, "step": 23403 }, { "epoch": 0.67, "grad_norm": 5.2796539287510225, "learning_rate": 2.5907683530582976e-06, "loss": 0.5319, "step": 23404 }, { "epoch": 0.67, "grad_norm": 5.159290573196445, "learning_rate": 2.5903619923992117e-06, "loss": 0.3016, "step": 23405 }, { "epoch": 0.67, "grad_norm": 5.89902090256915, "learning_rate": 2.5899556524699464e-06, "loss": 0.3261, "step": 23406 }, { "epoch": 0.67, "grad_norm": 5.6203051577900025, "learning_rate": 2.589549333273994e-06, "loss": 0.2571, "step": 23407 }, { "epoch": 0.67, "grad_norm": 3.3186968384272637, "learning_rate": 2.5891430348148537e-06, "loss": 0.225, "step": 23408 }, { "epoch": 0.67, "grad_norm": 8.17236793446367, "learning_rate": 2.588736757096017e-06, "loss": 0.4219, "step": 23409 }, { "epoch": 0.67, "grad_norm": 6.462824406404209, "learning_rate": 2.5883305001209834e-06, "loss": 0.5073, "step": 23410 }, { "epoch": 0.67, "grad_norm": 4.267440716868591, "learning_rate": 2.5879242638932456e-06, "loss": 0.4323, "step": 23411 }, { "epoch": 0.67, "grad_norm": 7.0415699141437145, "learning_rate": 2.5875180484162977e-06, "loss": 0.4644, "step": 23412 }, { "epoch": 0.67, "grad_norm": 7.5688657844690095, "learning_rate": 2.5871118536936335e-06, "loss": 1.0768, "step": 23413 }, { "epoch": 0.67, "grad_norm": 11.51159574703781, "learning_rate": 2.586705679728749e-06, "loss": 0.4557, "step": 23414 }, { "epoch": 0.67, "grad_norm": 3.3877558896588753, "learning_rate": 2.58629952652514e-06, "loss": 0.1422, "step": 23415 }, { "epoch": 0.67, "grad_norm": 5.373805467807805, "learning_rate": 2.585893394086298e-06, "loss": 0.2644, "step": 23416 }, { "epoch": 0.67, "grad_norm": 4.826855526541193, "learning_rate": 2.585487282415719e-06, "loss": 0.3297, "step": 23417 }, { "epoch": 0.67, "grad_norm": 3.1201421745625058, "learning_rate": 2.585081191516896e-06, "loss": 0.2202, "step": 23418 }, { "epoch": 0.67, "grad_norm": 2.8408917325963388, "learning_rate": 2.5846751213933217e-06, "loss": 0.2293, "step": 23419 }, { "epoch": 0.67, "grad_norm": 6.0455372042686, "learning_rate": 2.584269072048489e-06, "loss": 0.4906, "step": 23420 }, { "epoch": 0.67, "grad_norm": 6.848680035602245, "learning_rate": 2.583863043485891e-06, "loss": 0.7183, "step": 23421 }, { "epoch": 0.67, "grad_norm": 10.255191790695111, "learning_rate": 2.5834570357090238e-06, "loss": 0.6047, "step": 23422 }, { "epoch": 0.67, "grad_norm": 3.3141751642064676, "learning_rate": 2.5830510487213766e-06, "loss": 0.4357, "step": 23423 }, { "epoch": 0.67, "grad_norm": 12.95037381873523, "learning_rate": 2.5826450825264453e-06, "loss": 0.8172, "step": 23424 }, { "epoch": 0.67, "grad_norm": 2.802058057895572, "learning_rate": 2.582239137127721e-06, "loss": 0.1734, "step": 23425 }, { "epoch": 0.67, "grad_norm": 6.927425251794613, "learning_rate": 2.581833212528696e-06, "loss": 0.5484, "step": 23426 }, { "epoch": 0.67, "grad_norm": 4.110004681401244, "learning_rate": 2.5814273087328596e-06, "loss": 0.5991, "step": 23427 }, { "epoch": 0.67, "grad_norm": 5.354721006005312, "learning_rate": 2.581021425743708e-06, "loss": 0.5566, "step": 23428 }, { "epoch": 0.67, "grad_norm": 10.870618090844786, "learning_rate": 2.58061556356473e-06, "loss": 0.7461, "step": 23429 }, { "epoch": 0.67, "grad_norm": 15.179533953241044, "learning_rate": 2.5802097221994206e-06, "loss": 0.8843, "step": 23430 }, { "epoch": 0.67, "grad_norm": 5.6329274159215625, "learning_rate": 2.5798039016512683e-06, "loss": 0.3088, "step": 23431 }, { "epoch": 0.67, "grad_norm": 5.171565389441625, "learning_rate": 2.5793981019237636e-06, "loss": 0.3243, "step": 23432 }, { "epoch": 0.67, "grad_norm": 7.192186768055443, "learning_rate": 2.5789923230204008e-06, "loss": 0.7242, "step": 23433 }, { "epoch": 0.67, "grad_norm": 8.362875317873986, "learning_rate": 2.5785865649446663e-06, "loss": 0.4476, "step": 23434 }, { "epoch": 0.67, "grad_norm": 5.966532230283432, "learning_rate": 2.5781808277000564e-06, "loss": 0.3702, "step": 23435 }, { "epoch": 0.67, "grad_norm": 3.9758519285384564, "learning_rate": 2.577775111290056e-06, "loss": 0.4519, "step": 23436 }, { "epoch": 0.67, "grad_norm": 6.611058879649528, "learning_rate": 2.5773694157181605e-06, "loss": 0.7379, "step": 23437 }, { "epoch": 0.67, "grad_norm": 4.903455241978774, "learning_rate": 2.576963740987858e-06, "loss": 0.2897, "step": 23438 }, { "epoch": 0.67, "grad_norm": 6.569207278812503, "learning_rate": 2.576558087102635e-06, "loss": 0.5662, "step": 23439 }, { "epoch": 0.67, "grad_norm": 8.052987808284628, "learning_rate": 2.576152454065987e-06, "loss": 1.079, "step": 23440 }, { "epoch": 0.67, "grad_norm": 4.969715312540997, "learning_rate": 2.575746841881399e-06, "loss": 0.4681, "step": 23441 }, { "epoch": 0.67, "grad_norm": 4.188307285779377, "learning_rate": 2.5753412505523646e-06, "loss": 0.5925, "step": 23442 }, { "epoch": 0.67, "grad_norm": 6.01135558183376, "learning_rate": 2.574935680082369e-06, "loss": 0.513, "step": 23443 }, { "epoch": 0.67, "grad_norm": 4.0002665132904465, "learning_rate": 2.574530130474905e-06, "loss": 0.2303, "step": 23444 }, { "epoch": 0.67, "grad_norm": 11.111232167750096, "learning_rate": 2.574124601733459e-06, "loss": 0.2971, "step": 23445 }, { "epoch": 0.67, "grad_norm": 8.56417061873716, "learning_rate": 2.573719093861519e-06, "loss": 0.8506, "step": 23446 }, { "epoch": 0.67, "grad_norm": 5.235383303872836, "learning_rate": 2.573313606862577e-06, "loss": 0.7536, "step": 23447 }, { "epoch": 0.67, "grad_norm": 4.964732411214402, "learning_rate": 2.572908140740117e-06, "loss": 0.4844, "step": 23448 }, { "epoch": 0.67, "grad_norm": 6.200924615210752, "learning_rate": 2.572502695497632e-06, "loss": 0.8597, "step": 23449 }, { "epoch": 0.67, "grad_norm": 9.721853588623876, "learning_rate": 2.5720972711386045e-06, "loss": 0.42, "step": 23450 }, { "epoch": 0.67, "grad_norm": 24.509035906232807, "learning_rate": 2.5716918676665274e-06, "loss": 0.6593, "step": 23451 }, { "epoch": 0.67, "grad_norm": 7.021614966948814, "learning_rate": 2.571286485084887e-06, "loss": 0.2234, "step": 23452 }, { "epoch": 0.67, "grad_norm": 4.3848356041708385, "learning_rate": 2.570881123397169e-06, "loss": 0.4794, "step": 23453 }, { "epoch": 0.67, "grad_norm": 5.336996216416514, "learning_rate": 2.5704757826068604e-06, "loss": 0.2822, "step": 23454 }, { "epoch": 0.67, "grad_norm": 4.340471031803298, "learning_rate": 2.5700704627174487e-06, "loss": 0.5877, "step": 23455 }, { "epoch": 0.67, "grad_norm": 8.720595024438483, "learning_rate": 2.5696651637324244e-06, "loss": 0.4852, "step": 23456 }, { "epoch": 0.67, "grad_norm": 5.196103235172142, "learning_rate": 2.569259885655271e-06, "loss": 0.684, "step": 23457 }, { "epoch": 0.67, "grad_norm": 6.529265179729756, "learning_rate": 2.5688546284894756e-06, "loss": 0.2596, "step": 23458 }, { "epoch": 0.67, "grad_norm": 3.940479090119756, "learning_rate": 2.5684493922385222e-06, "loss": 0.5827, "step": 23459 }, { "epoch": 0.67, "grad_norm": 4.654290523036476, "learning_rate": 2.5680441769059017e-06, "loss": 0.565, "step": 23460 }, { "epoch": 0.67, "grad_norm": 5.028996146386643, "learning_rate": 2.5676389824950966e-06, "loss": 0.2718, "step": 23461 }, { "epoch": 0.67, "grad_norm": 12.016629994283969, "learning_rate": 2.5672338090095925e-06, "loss": 0.3592, "step": 23462 }, { "epoch": 0.67, "grad_norm": 4.4753164318028285, "learning_rate": 2.5668286564528788e-06, "loss": 0.314, "step": 23463 }, { "epoch": 0.67, "grad_norm": 5.345080293742799, "learning_rate": 2.5664235248284388e-06, "loss": 0.1065, "step": 23464 }, { "epoch": 0.67, "grad_norm": 10.35307069551761, "learning_rate": 2.5660184141397572e-06, "loss": 0.4256, "step": 23465 }, { "epoch": 0.67, "grad_norm": 7.909276541312227, "learning_rate": 2.565613324390318e-06, "loss": 0.7368, "step": 23466 }, { "epoch": 0.67, "grad_norm": 4.646684280719694, "learning_rate": 2.5652082555836087e-06, "loss": 0.3726, "step": 23467 }, { "epoch": 0.67, "grad_norm": 7.694535176476306, "learning_rate": 2.5648032077231123e-06, "loss": 0.7708, "step": 23468 }, { "epoch": 0.67, "grad_norm": 3.4417319690306574, "learning_rate": 2.5643981808123153e-06, "loss": 0.4143, "step": 23469 }, { "epoch": 0.67, "grad_norm": 4.601663248354923, "learning_rate": 2.5639931748546985e-06, "loss": 0.3373, "step": 23470 }, { "epoch": 0.67, "grad_norm": 6.170429398450154, "learning_rate": 2.5635881898537513e-06, "loss": 0.4157, "step": 23471 }, { "epoch": 0.67, "grad_norm": 2.809644775676753, "learning_rate": 2.5631832258129546e-06, "loss": 0.1442, "step": 23472 }, { "epoch": 0.67, "grad_norm": 2.592416351499169, "learning_rate": 2.5627782827357905e-06, "loss": 0.1913, "step": 23473 }, { "epoch": 0.67, "grad_norm": 4.175402375207485, "learning_rate": 2.5623733606257474e-06, "loss": 0.7738, "step": 23474 }, { "epoch": 0.67, "grad_norm": 9.916283180665053, "learning_rate": 2.561968459486303e-06, "loss": 0.8865, "step": 23475 }, { "epoch": 0.67, "grad_norm": 4.175135020968017, "learning_rate": 2.5615635793209466e-06, "loss": 0.2826, "step": 23476 }, { "epoch": 0.67, "grad_norm": 3.5684825790341357, "learning_rate": 2.561158720133156e-06, "loss": 0.1894, "step": 23477 }, { "epoch": 0.67, "grad_norm": 5.794701942664898, "learning_rate": 2.560753881926419e-06, "loss": 0.3616, "step": 23478 }, { "epoch": 0.67, "grad_norm": 4.356322200620133, "learning_rate": 2.5603490647042164e-06, "loss": 0.5892, "step": 23479 }, { "epoch": 0.67, "grad_norm": 5.9736152512718, "learning_rate": 2.559944268470028e-06, "loss": 0.7359, "step": 23480 }, { "epoch": 0.67, "grad_norm": 3.9341484065171772, "learning_rate": 2.55953949322734e-06, "loss": 0.274, "step": 23481 }, { "epoch": 0.67, "grad_norm": 5.520798998551996, "learning_rate": 2.5591347389796315e-06, "loss": 0.6538, "step": 23482 }, { "epoch": 0.67, "grad_norm": 8.006041510530341, "learning_rate": 2.558730005730389e-06, "loss": 0.5612, "step": 23483 }, { "epoch": 0.67, "grad_norm": 3.520113440658031, "learning_rate": 2.5583252934830916e-06, "loss": 0.3438, "step": 23484 }, { "epoch": 0.67, "grad_norm": 3.6558328211215683, "learning_rate": 2.5579206022412207e-06, "loss": 0.6311, "step": 23485 }, { "epoch": 0.67, "grad_norm": 12.336219827852991, "learning_rate": 2.5575159320082567e-06, "loss": 0.7505, "step": 23486 }, { "epoch": 0.67, "grad_norm": 3.3241534109003514, "learning_rate": 2.5571112827876815e-06, "loss": 0.3547, "step": 23487 }, { "epoch": 0.67, "grad_norm": 3.871341639645206, "learning_rate": 2.55670665458298e-06, "loss": 0.1997, "step": 23488 }, { "epoch": 0.67, "grad_norm": 4.43894231217985, "learning_rate": 2.5563020473976272e-06, "loss": 0.5701, "step": 23489 }, { "epoch": 0.67, "grad_norm": 4.199648514867628, "learning_rate": 2.5558974612351105e-06, "loss": 0.5026, "step": 23490 }, { "epoch": 0.67, "grad_norm": 7.400905113025325, "learning_rate": 2.5554928960989063e-06, "loss": 0.4671, "step": 23491 }, { "epoch": 0.67, "grad_norm": 3.606786881650988, "learning_rate": 2.5550883519924953e-06, "loss": 0.21, "step": 23492 }, { "epoch": 0.67, "grad_norm": 10.579078349620868, "learning_rate": 2.554683828919357e-06, "loss": 0.592, "step": 23493 }, { "epoch": 0.67, "grad_norm": 3.5415947888130264, "learning_rate": 2.5542793268829743e-06, "loss": 0.4874, "step": 23494 }, { "epoch": 0.67, "grad_norm": 5.349065312173493, "learning_rate": 2.5538748458868235e-06, "loss": 0.3061, "step": 23495 }, { "epoch": 0.67, "grad_norm": 4.402772518693662, "learning_rate": 2.5534703859343856e-06, "loss": 0.5767, "step": 23496 }, { "epoch": 0.67, "grad_norm": 7.044000144923835, "learning_rate": 2.5530659470291434e-06, "loss": 0.4097, "step": 23497 }, { "epoch": 0.67, "grad_norm": 5.248301776336032, "learning_rate": 2.5526615291745737e-06, "loss": 0.6425, "step": 23498 }, { "epoch": 0.67, "grad_norm": 5.847013041886721, "learning_rate": 2.5522571323741552e-06, "loss": 0.3345, "step": 23499 }, { "epoch": 0.67, "grad_norm": 9.33649441003701, "learning_rate": 2.5518527566313655e-06, "loss": 1.026, "step": 23500 }, { "epoch": 0.67, "grad_norm": 5.093103847593151, "learning_rate": 2.5514484019496865e-06, "loss": 0.5421, "step": 23501 }, { "epoch": 0.67, "grad_norm": 3.7733797183460176, "learning_rate": 2.5510440683325943e-06, "loss": 0.2562, "step": 23502 }, { "epoch": 0.67, "grad_norm": 7.166282883799382, "learning_rate": 2.5506397557835682e-06, "loss": 0.4939, "step": 23503 }, { "epoch": 0.67, "grad_norm": 5.703703550491955, "learning_rate": 2.5502354643060885e-06, "loss": 0.6788, "step": 23504 }, { "epoch": 0.67, "grad_norm": 1.5688033229762657, "learning_rate": 2.5498311939036314e-06, "loss": 0.1069, "step": 23505 }, { "epoch": 0.67, "grad_norm": 4.5788802087105465, "learning_rate": 2.549426944579675e-06, "loss": 0.2109, "step": 23506 }, { "epoch": 0.67, "grad_norm": 5.3338734333425535, "learning_rate": 2.5490227163376946e-06, "loss": 0.2329, "step": 23507 }, { "epoch": 0.67, "grad_norm": 4.31277531284066, "learning_rate": 2.548618509181172e-06, "loss": 0.5841, "step": 23508 }, { "epoch": 0.67, "grad_norm": 7.493536198280746, "learning_rate": 2.5482143231135816e-06, "loss": 0.4776, "step": 23509 }, { "epoch": 0.67, "grad_norm": 5.1064821038787445, "learning_rate": 2.547810158138402e-06, "loss": 0.5673, "step": 23510 }, { "epoch": 0.67, "grad_norm": 9.366658887034088, "learning_rate": 2.547406014259108e-06, "loss": 0.6333, "step": 23511 }, { "epoch": 0.67, "grad_norm": 2.9849755846235912, "learning_rate": 2.547001891479181e-06, "loss": 0.1347, "step": 23512 }, { "epoch": 0.67, "grad_norm": 3.64601371682203, "learning_rate": 2.5465977898020943e-06, "loss": 0.4833, "step": 23513 }, { "epoch": 0.67, "grad_norm": 5.370780752598878, "learning_rate": 2.546193709231323e-06, "loss": 0.1789, "step": 23514 }, { "epoch": 0.67, "grad_norm": 5.341666300570773, "learning_rate": 2.545789649770346e-06, "loss": 0.5006, "step": 23515 }, { "epoch": 0.67, "grad_norm": 6.402272324690245, "learning_rate": 2.545385611422637e-06, "loss": 0.3947, "step": 23516 }, { "epoch": 0.67, "grad_norm": 2.0051491138251305, "learning_rate": 2.5449815941916756e-06, "loss": 0.0776, "step": 23517 }, { "epoch": 0.67, "grad_norm": 2.6869898910786603, "learning_rate": 2.5445775980809353e-06, "loss": 0.1847, "step": 23518 }, { "epoch": 0.67, "grad_norm": 6.972630331381275, "learning_rate": 2.5441736230938918e-06, "loss": 0.374, "step": 23519 }, { "epoch": 0.67, "grad_norm": 3.0465121713429917, "learning_rate": 2.543769669234018e-06, "loss": 0.3117, "step": 23520 }, { "epoch": 0.67, "grad_norm": 5.7535935450963755, "learning_rate": 2.5433657365047913e-06, "loss": 0.1414, "step": 23521 }, { "epoch": 0.67, "grad_norm": 3.019965630360999, "learning_rate": 2.542961824909689e-06, "loss": 0.2974, "step": 23522 }, { "epoch": 0.67, "grad_norm": 2.9313800246106303, "learning_rate": 2.542557934452181e-06, "loss": 0.2415, "step": 23523 }, { "epoch": 0.67, "grad_norm": 5.439202053227117, "learning_rate": 2.542154065135747e-06, "loss": 0.4023, "step": 23524 }, { "epoch": 0.67, "grad_norm": 7.471497928002589, "learning_rate": 2.5417502169638585e-06, "loss": 0.5186, "step": 23525 }, { "epoch": 0.67, "grad_norm": 2.8136213822119327, "learning_rate": 2.5413463899399903e-06, "loss": 0.1551, "step": 23526 }, { "epoch": 0.67, "grad_norm": 6.853362609250935, "learning_rate": 2.540942584067615e-06, "loss": 0.3732, "step": 23527 }, { "epoch": 0.67, "grad_norm": 3.3356583275219163, "learning_rate": 2.5405387993502074e-06, "loss": 0.27, "step": 23528 }, { "epoch": 0.67, "grad_norm": 4.016239430959492, "learning_rate": 2.5401350357912437e-06, "loss": 0.2079, "step": 23529 }, { "epoch": 0.67, "grad_norm": 3.5572921935379433, "learning_rate": 2.539731293394193e-06, "loss": 0.1985, "step": 23530 }, { "epoch": 0.67, "grad_norm": 9.492973992060033, "learning_rate": 2.539327572162534e-06, "loss": 0.687, "step": 23531 }, { "epoch": 0.67, "grad_norm": 6.097920169531734, "learning_rate": 2.5389238720997366e-06, "loss": 0.5815, "step": 23532 }, { "epoch": 0.67, "grad_norm": 4.575567448971469, "learning_rate": 2.538520193209274e-06, "loss": 0.188, "step": 23533 }, { "epoch": 0.67, "grad_norm": 8.631086882998012, "learning_rate": 2.538116535494617e-06, "loss": 0.4696, "step": 23534 }, { "epoch": 0.67, "grad_norm": 7.4756701982310405, "learning_rate": 2.5377128989592426e-06, "loss": 0.5981, "step": 23535 }, { "epoch": 0.67, "grad_norm": 7.893813472924522, "learning_rate": 2.537309283606618e-06, "loss": 0.6908, "step": 23536 }, { "epoch": 0.67, "grad_norm": 4.350056303690319, "learning_rate": 2.53690568944022e-06, "loss": 0.4197, "step": 23537 }, { "epoch": 0.67, "grad_norm": 4.957062104161451, "learning_rate": 2.53650211646352e-06, "loss": 0.2838, "step": 23538 }, { "epoch": 0.67, "grad_norm": 5.425628022789704, "learning_rate": 2.5360985646799896e-06, "loss": 0.8458, "step": 23539 }, { "epoch": 0.67, "grad_norm": 6.075943386155668, "learning_rate": 2.535695034093099e-06, "loss": 0.5207, "step": 23540 }, { "epoch": 0.67, "grad_norm": 4.453136189764587, "learning_rate": 2.535291524706319e-06, "loss": 0.5053, "step": 23541 }, { "epoch": 0.67, "grad_norm": 5.630447081503916, "learning_rate": 2.5348880365231253e-06, "loss": 0.624, "step": 23542 }, { "epoch": 0.67, "grad_norm": 4.325247679217026, "learning_rate": 2.5344845695469833e-06, "loss": 0.5728, "step": 23543 }, { "epoch": 0.67, "grad_norm": 4.915976583684241, "learning_rate": 2.53408112378137e-06, "loss": 0.3674, "step": 23544 }, { "epoch": 0.67, "grad_norm": 6.731577033100756, "learning_rate": 2.5336776992297523e-06, "loss": 0.3544, "step": 23545 }, { "epoch": 0.67, "grad_norm": 5.239648923469686, "learning_rate": 2.5332742958956e-06, "loss": 0.8326, "step": 23546 }, { "epoch": 0.67, "grad_norm": 4.035027825969817, "learning_rate": 2.532870913782387e-06, "loss": 0.6229, "step": 23547 }, { "epoch": 0.67, "grad_norm": 3.8807126052510568, "learning_rate": 2.5324675528935803e-06, "loss": 0.4162, "step": 23548 }, { "epoch": 0.67, "grad_norm": 7.637289680544452, "learning_rate": 2.532064213232653e-06, "loss": 0.8131, "step": 23549 }, { "epoch": 0.67, "grad_norm": 3.4908625843743724, "learning_rate": 2.531660894803072e-06, "loss": 0.4141, "step": 23550 }, { "epoch": 0.67, "grad_norm": 6.727939761562315, "learning_rate": 2.53125759760831e-06, "loss": 0.7129, "step": 23551 }, { "epoch": 0.67, "grad_norm": 5.5912575736485985, "learning_rate": 2.5308543216518346e-06, "loss": 0.4959, "step": 23552 }, { "epoch": 0.67, "grad_norm": 6.104990778766889, "learning_rate": 2.5304510669371135e-06, "loss": 0.3329, "step": 23553 }, { "epoch": 0.67, "grad_norm": 4.541316826788322, "learning_rate": 2.53004783346762e-06, "loss": 0.3971, "step": 23554 }, { "epoch": 0.67, "grad_norm": 4.769104984614887, "learning_rate": 2.529644621246819e-06, "loss": 0.4588, "step": 23555 }, { "epoch": 0.67, "grad_norm": 5.623007400231034, "learning_rate": 2.529241430278183e-06, "loss": 0.7131, "step": 23556 }, { "epoch": 0.67, "grad_norm": 4.125547748380435, "learning_rate": 2.528838260565177e-06, "loss": 0.4062, "step": 23557 }, { "epoch": 0.67, "grad_norm": 4.40560954424176, "learning_rate": 2.5284351121112727e-06, "loss": 0.2754, "step": 23558 }, { "epoch": 0.67, "grad_norm": 4.137520976171882, "learning_rate": 2.5280319849199377e-06, "loss": 0.1393, "step": 23559 }, { "epoch": 0.67, "grad_norm": 5.182362862358306, "learning_rate": 2.527628878994638e-06, "loss": 0.1402, "step": 23560 }, { "epoch": 0.67, "grad_norm": 4.077024347849664, "learning_rate": 2.527225794338841e-06, "loss": 0.4749, "step": 23561 }, { "epoch": 0.67, "grad_norm": 5.293448000674026, "learning_rate": 2.5268227309560166e-06, "loss": 0.3698, "step": 23562 }, { "epoch": 0.67, "grad_norm": 4.688274472153386, "learning_rate": 2.526419688849634e-06, "loss": 0.2407, "step": 23563 }, { "epoch": 0.67, "grad_norm": 3.150178470551178, "learning_rate": 2.5260166680231557e-06, "loss": 0.4486, "step": 23564 }, { "epoch": 0.67, "grad_norm": 5.235502161536501, "learning_rate": 2.525613668480053e-06, "loss": 0.6878, "step": 23565 }, { "epoch": 0.67, "grad_norm": 5.929762205274018, "learning_rate": 2.5252106902237916e-06, "loss": 0.5525, "step": 23566 }, { "epoch": 0.67, "grad_norm": 2.6770441235492317, "learning_rate": 2.524807733257838e-06, "loss": 0.1053, "step": 23567 }, { "epoch": 0.67, "grad_norm": 8.019552298628307, "learning_rate": 2.5244047975856566e-06, "loss": 0.8656, "step": 23568 }, { "epoch": 0.67, "grad_norm": 5.298437130117221, "learning_rate": 2.5240018832107164e-06, "loss": 0.4354, "step": 23569 }, { "epoch": 0.67, "grad_norm": 4.7453235142504315, "learning_rate": 2.5235989901364848e-06, "loss": 0.5196, "step": 23570 }, { "epoch": 0.68, "grad_norm": 4.402800488070786, "learning_rate": 2.5231961183664243e-06, "loss": 0.6543, "step": 23571 }, { "epoch": 0.68, "grad_norm": 5.770453976534809, "learning_rate": 2.522793267904006e-06, "loss": 0.3747, "step": 23572 }, { "epoch": 0.68, "grad_norm": 6.531690482293847, "learning_rate": 2.5223904387526887e-06, "loss": 0.4328, "step": 23573 }, { "epoch": 0.68, "grad_norm": 8.148781437258023, "learning_rate": 2.521987630915944e-06, "loss": 0.6162, "step": 23574 }, { "epoch": 0.68, "grad_norm": 7.861539933093828, "learning_rate": 2.521584844397232e-06, "loss": 0.5534, "step": 23575 }, { "epoch": 0.68, "grad_norm": 6.689950155463086, "learning_rate": 2.5211820792000226e-06, "loss": 0.7351, "step": 23576 }, { "epoch": 0.68, "grad_norm": 5.774853812675242, "learning_rate": 2.5207793353277765e-06, "loss": 0.304, "step": 23577 }, { "epoch": 0.68, "grad_norm": 5.6035170929070155, "learning_rate": 2.5203766127839624e-06, "loss": 0.4539, "step": 23578 }, { "epoch": 0.68, "grad_norm": 4.848631091468099, "learning_rate": 2.5199739115720428e-06, "loss": 0.1791, "step": 23579 }, { "epoch": 0.68, "grad_norm": 7.8513921766643335, "learning_rate": 2.519571231695481e-06, "loss": 0.8419, "step": 23580 }, { "epoch": 0.68, "grad_norm": 5.03552043535883, "learning_rate": 2.5191685731577436e-06, "loss": 0.4745, "step": 23581 }, { "epoch": 0.68, "grad_norm": 6.665495348392415, "learning_rate": 2.518765935962292e-06, "loss": 0.4321, "step": 23582 }, { "epoch": 0.68, "grad_norm": 3.157650023314612, "learning_rate": 2.5183633201125934e-06, "loss": 0.2658, "step": 23583 }, { "epoch": 0.68, "grad_norm": 4.959765823050594, "learning_rate": 2.517960725612108e-06, "loss": 0.4148, "step": 23584 }, { "epoch": 0.68, "grad_norm": 3.5417491996722767, "learning_rate": 2.517558152464303e-06, "loss": 0.2776, "step": 23585 }, { "epoch": 0.68, "grad_norm": 3.194409977455139, "learning_rate": 2.5171556006726387e-06, "loss": 0.553, "step": 23586 }, { "epoch": 0.68, "grad_norm": 9.304379346791043, "learning_rate": 2.5167530702405773e-06, "loss": 0.7888, "step": 23587 }, { "epoch": 0.68, "grad_norm": 4.661838531617439, "learning_rate": 2.516350561171586e-06, "loss": 0.4815, "step": 23588 }, { "epoch": 0.68, "grad_norm": 8.215216047309983, "learning_rate": 2.5159480734691234e-06, "loss": 0.6962, "step": 23589 }, { "epoch": 0.68, "grad_norm": 3.2972127551587684, "learning_rate": 2.5155456071366546e-06, "loss": 0.3315, "step": 23590 }, { "epoch": 0.68, "grad_norm": 5.725071940740488, "learning_rate": 2.5151431621776397e-06, "loss": 0.5611, "step": 23591 }, { "epoch": 0.68, "grad_norm": 5.646545135371698, "learning_rate": 2.5147407385955434e-06, "loss": 0.421, "step": 23592 }, { "epoch": 0.68, "grad_norm": 3.5519897116724715, "learning_rate": 2.514338336393828e-06, "loss": 0.3576, "step": 23593 }, { "epoch": 0.68, "grad_norm": 6.630936392110715, "learning_rate": 2.513935955575951e-06, "loss": 0.4926, "step": 23594 }, { "epoch": 0.68, "grad_norm": 4.567059368224611, "learning_rate": 2.513533596145379e-06, "loss": 0.3822, "step": 23595 }, { "epoch": 0.68, "grad_norm": 4.432296038699058, "learning_rate": 2.513131258105569e-06, "loss": 0.5643, "step": 23596 }, { "epoch": 0.68, "grad_norm": 4.119314436955678, "learning_rate": 2.5127289414599863e-06, "loss": 0.2357, "step": 23597 }, { "epoch": 0.68, "grad_norm": 7.783462853208344, "learning_rate": 2.5123266462120894e-06, "loss": 0.5569, "step": 23598 }, { "epoch": 0.68, "grad_norm": 5.08433950607581, "learning_rate": 2.511924372365343e-06, "loss": 0.4171, "step": 23599 }, { "epoch": 0.68, "grad_norm": 9.899745839642256, "learning_rate": 2.511522119923202e-06, "loss": 0.5521, "step": 23600 }, { "epoch": 0.68, "grad_norm": 10.338652492797177, "learning_rate": 2.511119888889131e-06, "loss": 0.5407, "step": 23601 }, { "epoch": 0.68, "grad_norm": 5.043652146157142, "learning_rate": 2.5107176792665873e-06, "loss": 0.5824, "step": 23602 }, { "epoch": 0.68, "grad_norm": 3.170553918842943, "learning_rate": 2.5103154910590334e-06, "loss": 0.3514, "step": 23603 }, { "epoch": 0.68, "grad_norm": 12.672364137289614, "learning_rate": 2.50991332426993e-06, "loss": 0.4642, "step": 23604 }, { "epoch": 0.68, "grad_norm": 2.540893715962229, "learning_rate": 2.509511178902736e-06, "loss": 0.205, "step": 23605 }, { "epoch": 0.68, "grad_norm": 4.919248188775123, "learning_rate": 2.5091090549609104e-06, "loss": 0.4401, "step": 23606 }, { "epoch": 0.68, "grad_norm": 10.652655300036994, "learning_rate": 2.5087069524479115e-06, "loss": 1.1449, "step": 23607 }, { "epoch": 0.68, "grad_norm": 3.4945886195740052, "learning_rate": 2.5083048713672017e-06, "loss": 0.3265, "step": 23608 }, { "epoch": 0.68, "grad_norm": 5.907120332726021, "learning_rate": 2.507902811722236e-06, "loss": 0.5556, "step": 23609 }, { "epoch": 0.68, "grad_norm": 4.880569602054963, "learning_rate": 2.5075007735164757e-06, "loss": 0.6209, "step": 23610 }, { "epoch": 0.68, "grad_norm": 5.3694959448690645, "learning_rate": 2.5070987567533823e-06, "loss": 0.2655, "step": 23611 }, { "epoch": 0.68, "grad_norm": 8.204056053003612, "learning_rate": 2.50669676143641e-06, "loss": 0.5773, "step": 23612 }, { "epoch": 0.68, "grad_norm": 12.096447527976494, "learning_rate": 2.5062947875690192e-06, "loss": 0.7684, "step": 23613 }, { "epoch": 0.68, "grad_norm": 5.017611077699001, "learning_rate": 2.5058928351546652e-06, "loss": 0.4462, "step": 23614 }, { "epoch": 0.68, "grad_norm": 4.434574506060834, "learning_rate": 2.5054909041968102e-06, "loss": 0.5914, "step": 23615 }, { "epoch": 0.68, "grad_norm": 5.447015963824253, "learning_rate": 2.5050889946989077e-06, "loss": 0.3343, "step": 23616 }, { "epoch": 0.68, "grad_norm": 8.148997974230255, "learning_rate": 2.504687106664419e-06, "loss": 0.5128, "step": 23617 }, { "epoch": 0.68, "grad_norm": 7.411101124658422, "learning_rate": 2.5042852400967986e-06, "loss": 0.8225, "step": 23618 }, { "epoch": 0.68, "grad_norm": 7.44128568456505, "learning_rate": 2.503883394999507e-06, "loss": 0.5178, "step": 23619 }, { "epoch": 0.68, "grad_norm": 3.3143975562864445, "learning_rate": 2.5034815713759988e-06, "loss": 0.324, "step": 23620 }, { "epoch": 0.68, "grad_norm": 5.647859367024599, "learning_rate": 2.503079769229729e-06, "loss": 0.5795, "step": 23621 }, { "epoch": 0.68, "grad_norm": 8.021673526767588, "learning_rate": 2.5026779885641582e-06, "loss": 0.5282, "step": 23622 }, { "epoch": 0.68, "grad_norm": 5.525344174224542, "learning_rate": 2.50227622938274e-06, "loss": 0.5049, "step": 23623 }, { "epoch": 0.68, "grad_norm": 4.47149689820003, "learning_rate": 2.501874491688934e-06, "loss": 0.2371, "step": 23624 }, { "epoch": 0.68, "grad_norm": 6.665562951594556, "learning_rate": 2.5014727754861924e-06, "loss": 0.5381, "step": 23625 }, { "epoch": 0.68, "grad_norm": 5.747755856653561, "learning_rate": 2.5010710807779737e-06, "loss": 0.3865, "step": 23626 }, { "epoch": 0.68, "grad_norm": 9.72491666664958, "learning_rate": 2.5006694075677333e-06, "loss": 0.2608, "step": 23627 }, { "epoch": 0.68, "grad_norm": 10.608510225134221, "learning_rate": 2.500267755858925e-06, "loss": 0.4627, "step": 23628 }, { "epoch": 0.68, "grad_norm": 5.147300675744617, "learning_rate": 2.4998661256550066e-06, "loss": 0.4484, "step": 23629 }, { "epoch": 0.68, "grad_norm": 6.1265502057855725, "learning_rate": 2.4994645169594304e-06, "loss": 0.5059, "step": 23630 }, { "epoch": 0.68, "grad_norm": 5.2336629810301245, "learning_rate": 2.4990629297756548e-06, "loss": 0.4045, "step": 23631 }, { "epoch": 0.68, "grad_norm": 5.697618559222698, "learning_rate": 2.4986613641071335e-06, "loss": 0.5709, "step": 23632 }, { "epoch": 0.68, "grad_norm": 7.704155657608917, "learning_rate": 2.49825981995732e-06, "loss": 0.37, "step": 23633 }, { "epoch": 0.68, "grad_norm": 5.22407552398137, "learning_rate": 2.4978582973296674e-06, "loss": 0.7254, "step": 23634 }, { "epoch": 0.68, "grad_norm": 6.865426177252684, "learning_rate": 2.497456796227632e-06, "loss": 0.6241, "step": 23635 }, { "epoch": 0.68, "grad_norm": 7.945447292328538, "learning_rate": 2.497055316654669e-06, "loss": 0.6851, "step": 23636 }, { "epoch": 0.68, "grad_norm": 6.765285932753127, "learning_rate": 2.4966538586142296e-06, "loss": 0.3928, "step": 23637 }, { "epoch": 0.68, "grad_norm": 7.083754568103552, "learning_rate": 2.4962524221097705e-06, "loss": 0.5556, "step": 23638 }, { "epoch": 0.68, "grad_norm": 7.016856605816233, "learning_rate": 2.4958510071447435e-06, "loss": 1.1801, "step": 23639 }, { "epoch": 0.68, "grad_norm": 2.542284895538222, "learning_rate": 2.495449613722602e-06, "loss": 0.1883, "step": 23640 }, { "epoch": 0.68, "grad_norm": 1.9153902314468056, "learning_rate": 2.4950482418467977e-06, "loss": 0.1158, "step": 23641 }, { "epoch": 0.68, "grad_norm": 5.31680643900442, "learning_rate": 2.494646891520786e-06, "loss": 0.3422, "step": 23642 }, { "epoch": 0.68, "grad_norm": 5.392517642159003, "learning_rate": 2.4942455627480174e-06, "loss": 0.5895, "step": 23643 }, { "epoch": 0.68, "grad_norm": 3.4511305283561544, "learning_rate": 2.4938442555319455e-06, "loss": 0.3073, "step": 23644 }, { "epoch": 0.68, "grad_norm": 3.5496650846754045, "learning_rate": 2.493442969876025e-06, "loss": 0.2416, "step": 23645 }, { "epoch": 0.68, "grad_norm": 5.845770257168611, "learning_rate": 2.4930417057837055e-06, "loss": 0.2853, "step": 23646 }, { "epoch": 0.68, "grad_norm": 5.576904633539599, "learning_rate": 2.49264046325844e-06, "loss": 0.4836, "step": 23647 }, { "epoch": 0.68, "grad_norm": 4.701079915648521, "learning_rate": 2.4922392423036777e-06, "loss": 0.3158, "step": 23648 }, { "epoch": 0.68, "grad_norm": 7.5019908170143, "learning_rate": 2.4918380429228743e-06, "loss": 0.702, "step": 23649 }, { "epoch": 0.68, "grad_norm": 4.007349801821439, "learning_rate": 2.4914368651194776e-06, "loss": 0.3101, "step": 23650 }, { "epoch": 0.68, "grad_norm": 9.739131689904559, "learning_rate": 2.4910357088969413e-06, "loss": 0.6477, "step": 23651 }, { "epoch": 0.68, "grad_norm": 3.340179497869489, "learning_rate": 2.4906345742587167e-06, "loss": 0.3342, "step": 23652 }, { "epoch": 0.68, "grad_norm": 4.216311619000435, "learning_rate": 2.4902334612082545e-06, "loss": 0.2615, "step": 23653 }, { "epoch": 0.68, "grad_norm": 5.78093689766589, "learning_rate": 2.4898323697490053e-06, "loss": 0.2861, "step": 23654 }, { "epoch": 0.68, "grad_norm": 5.351493200145794, "learning_rate": 2.4894312998844163e-06, "loss": 0.2513, "step": 23655 }, { "epoch": 0.68, "grad_norm": 6.014809669628972, "learning_rate": 2.489030251617943e-06, "loss": 0.4024, "step": 23656 }, { "epoch": 0.68, "grad_norm": 4.733224042729908, "learning_rate": 2.4886292249530314e-06, "loss": 0.2623, "step": 23657 }, { "epoch": 0.68, "grad_norm": 7.637752562497205, "learning_rate": 2.488228219893135e-06, "loss": 0.641, "step": 23658 }, { "epoch": 0.68, "grad_norm": 3.875666976256005, "learning_rate": 2.487827236441703e-06, "loss": 0.3771, "step": 23659 }, { "epoch": 0.68, "grad_norm": 6.160575779844687, "learning_rate": 2.487426274602181e-06, "loss": 0.6511, "step": 23660 }, { "epoch": 0.68, "grad_norm": 9.910725446029526, "learning_rate": 2.487025334378024e-06, "loss": 0.8225, "step": 23661 }, { "epoch": 0.68, "grad_norm": 3.597903496843866, "learning_rate": 2.486624415772676e-06, "loss": 0.3064, "step": 23662 }, { "epoch": 0.68, "grad_norm": 6.262241924701763, "learning_rate": 2.486223518789591e-06, "loss": 0.7479, "step": 23663 }, { "epoch": 0.68, "grad_norm": 3.791072563989807, "learning_rate": 2.4858226434322135e-06, "loss": 0.1202, "step": 23664 }, { "epoch": 0.68, "grad_norm": 4.582414199683496, "learning_rate": 2.4854217897039963e-06, "loss": 0.4405, "step": 23665 }, { "epoch": 0.68, "grad_norm": 4.765989058299917, "learning_rate": 2.4850209576083857e-06, "loss": 0.3201, "step": 23666 }, { "epoch": 0.68, "grad_norm": 10.3322197303815, "learning_rate": 2.4846201471488303e-06, "loss": 0.5782, "step": 23667 }, { "epoch": 0.68, "grad_norm": 6.958000616772595, "learning_rate": 2.484219358328776e-06, "loss": 0.6526, "step": 23668 }, { "epoch": 0.68, "grad_norm": 4.532771532517257, "learning_rate": 2.483818591151673e-06, "loss": 0.5342, "step": 23669 }, { "epoch": 0.68, "grad_norm": 5.939753576555295, "learning_rate": 2.4834178456209705e-06, "loss": 0.471, "step": 23670 }, { "epoch": 0.68, "grad_norm": 4.582709012714848, "learning_rate": 2.483017121740113e-06, "loss": 0.7158, "step": 23671 }, { "epoch": 0.68, "grad_norm": 3.4863663871653006, "learning_rate": 2.482616419512551e-06, "loss": 0.4968, "step": 23672 }, { "epoch": 0.68, "grad_norm": 7.282793790035771, "learning_rate": 2.4822157389417302e-06, "loss": 0.5896, "step": 23673 }, { "epoch": 0.68, "grad_norm": 5.7443386071000395, "learning_rate": 2.481815080031097e-06, "loss": 0.5252, "step": 23674 }, { "epoch": 0.68, "grad_norm": 5.377508930782349, "learning_rate": 2.481414442784097e-06, "loss": 0.5376, "step": 23675 }, { "epoch": 0.68, "grad_norm": 3.397937255522571, "learning_rate": 2.4810138272041776e-06, "loss": 0.3107, "step": 23676 }, { "epoch": 0.68, "grad_norm": 6.714470676002154, "learning_rate": 2.480613233294789e-06, "loss": 0.5129, "step": 23677 }, { "epoch": 0.68, "grad_norm": 10.889483792949806, "learning_rate": 2.4802126610593724e-06, "loss": 1.0803, "step": 23678 }, { "epoch": 0.68, "grad_norm": 3.3571522656050914, "learning_rate": 2.479812110501378e-06, "loss": 0.2879, "step": 23679 }, { "epoch": 0.68, "grad_norm": 8.023548396606122, "learning_rate": 2.4794115816242496e-06, "loss": 0.7546, "step": 23680 }, { "epoch": 0.68, "grad_norm": 5.102695123211535, "learning_rate": 2.4790110744314333e-06, "loss": 0.661, "step": 23681 }, { "epoch": 0.68, "grad_norm": 5.60419979818352, "learning_rate": 2.4786105889263717e-06, "loss": 0.8555, "step": 23682 }, { "epoch": 0.68, "grad_norm": 4.712578938275857, "learning_rate": 2.4782101251125152e-06, "loss": 0.6208, "step": 23683 }, { "epoch": 0.68, "grad_norm": 4.919791031718356, "learning_rate": 2.4778096829933045e-06, "loss": 0.3348, "step": 23684 }, { "epoch": 0.68, "grad_norm": 3.3090184106968836, "learning_rate": 2.477409262572187e-06, "loss": 0.3315, "step": 23685 }, { "epoch": 0.68, "grad_norm": 3.2884183420509374, "learning_rate": 2.477008863852608e-06, "loss": 0.2343, "step": 23686 }, { "epoch": 0.68, "grad_norm": 3.998526242081207, "learning_rate": 2.476608486838012e-06, "loss": 0.5457, "step": 23687 }, { "epoch": 0.68, "grad_norm": 3.7712243265574736, "learning_rate": 2.476208131531842e-06, "loss": 0.429, "step": 23688 }, { "epoch": 0.68, "grad_norm": 4.325245088457512, "learning_rate": 2.4758077979375413e-06, "loss": 0.4679, "step": 23689 }, { "epoch": 0.68, "grad_norm": 3.876343770988815, "learning_rate": 2.4754074860585565e-06, "loss": 0.3081, "step": 23690 }, { "epoch": 0.68, "grad_norm": 3.1273141017616735, "learning_rate": 2.4750071958983284e-06, "loss": 0.0769, "step": 23691 }, { "epoch": 0.68, "grad_norm": 3.516371482434852, "learning_rate": 2.474606927460305e-06, "loss": 0.3845, "step": 23692 }, { "epoch": 0.68, "grad_norm": 6.314176695578534, "learning_rate": 2.474206680747927e-06, "loss": 0.5738, "step": 23693 }, { "epoch": 0.68, "grad_norm": 7.4766368005263475, "learning_rate": 2.473806455764636e-06, "loss": 0.3997, "step": 23694 }, { "epoch": 0.68, "grad_norm": 6.208033620610584, "learning_rate": 2.473406252513879e-06, "loss": 0.5173, "step": 23695 }, { "epoch": 0.68, "grad_norm": 4.415374263871685, "learning_rate": 2.473006070999095e-06, "loss": 0.3944, "step": 23696 }, { "epoch": 0.68, "grad_norm": 6.8309818113964695, "learning_rate": 2.4726059112237304e-06, "loss": 0.3069, "step": 23697 }, { "epoch": 0.68, "grad_norm": 5.101201563125897, "learning_rate": 2.472205773191224e-06, "loss": 0.6992, "step": 23698 }, { "epoch": 0.68, "grad_norm": 8.665653560796333, "learning_rate": 2.4718056569050225e-06, "loss": 0.795, "step": 23699 }, { "epoch": 0.68, "grad_norm": 6.3868253646491215, "learning_rate": 2.471405562368565e-06, "loss": 0.564, "step": 23700 }, { "epoch": 0.68, "grad_norm": 8.667990259433514, "learning_rate": 2.471005489585293e-06, "loss": 0.4516, "step": 23701 }, { "epoch": 0.68, "grad_norm": 4.057628234040318, "learning_rate": 2.470605438558651e-06, "loss": 0.2882, "step": 23702 }, { "epoch": 0.68, "grad_norm": 7.1353049462104305, "learning_rate": 2.470205409292077e-06, "loss": 0.7726, "step": 23703 }, { "epoch": 0.68, "grad_norm": 4.545385031385336, "learning_rate": 2.469805401789016e-06, "loss": 0.5785, "step": 23704 }, { "epoch": 0.68, "grad_norm": 9.129150491825216, "learning_rate": 2.469405416052906e-06, "loss": 0.7648, "step": 23705 }, { "epoch": 0.68, "grad_norm": 5.668156605610426, "learning_rate": 2.4690054520871913e-06, "loss": 0.7714, "step": 23706 }, { "epoch": 0.68, "grad_norm": 2.760503973869748, "learning_rate": 2.468605509895311e-06, "loss": 0.2562, "step": 23707 }, { "epoch": 0.68, "grad_norm": 9.169236175607399, "learning_rate": 2.4682055894807062e-06, "loss": 0.3896, "step": 23708 }, { "epoch": 0.68, "grad_norm": 3.8548531393484393, "learning_rate": 2.4678056908468153e-06, "loss": 0.2919, "step": 23709 }, { "epoch": 0.68, "grad_norm": 8.947919307496669, "learning_rate": 2.46740581399708e-06, "loss": 0.6705, "step": 23710 }, { "epoch": 0.68, "grad_norm": 7.669360057439168, "learning_rate": 2.467005958934942e-06, "loss": 0.3889, "step": 23711 }, { "epoch": 0.68, "grad_norm": 9.505322019689709, "learning_rate": 2.4666061256638387e-06, "loss": 0.6087, "step": 23712 }, { "epoch": 0.68, "grad_norm": 3.2310298261762274, "learning_rate": 2.466206314187212e-06, "loss": 0.4714, "step": 23713 }, { "epoch": 0.68, "grad_norm": 5.285435885568736, "learning_rate": 2.4658065245085015e-06, "loss": 0.2677, "step": 23714 }, { "epoch": 0.68, "grad_norm": 7.565298224809178, "learning_rate": 2.465406756631145e-06, "loss": 0.3829, "step": 23715 }, { "epoch": 0.68, "grad_norm": 3.5366837041596084, "learning_rate": 2.4650070105585805e-06, "loss": 0.4672, "step": 23716 }, { "epoch": 0.68, "grad_norm": 14.725471711172672, "learning_rate": 2.464607286294248e-06, "loss": 0.574, "step": 23717 }, { "epoch": 0.68, "grad_norm": 3.073350369295193, "learning_rate": 2.464207583841589e-06, "loss": 0.2118, "step": 23718 }, { "epoch": 0.68, "grad_norm": 4.2310569967092695, "learning_rate": 2.46380790320404e-06, "loss": 0.4038, "step": 23719 }, { "epoch": 0.68, "grad_norm": 5.877886387769626, "learning_rate": 2.4634082443850394e-06, "loss": 0.4958, "step": 23720 }, { "epoch": 0.68, "grad_norm": 2.854051104988249, "learning_rate": 2.463008607388023e-06, "loss": 0.3112, "step": 23721 }, { "epoch": 0.68, "grad_norm": 5.361992466523947, "learning_rate": 2.462608992216434e-06, "loss": 0.573, "step": 23722 }, { "epoch": 0.68, "grad_norm": 8.438830913550763, "learning_rate": 2.4622093988737046e-06, "loss": 0.6169, "step": 23723 }, { "epoch": 0.68, "grad_norm": 8.47042548578029, "learning_rate": 2.461809827363278e-06, "loss": 0.5818, "step": 23724 }, { "epoch": 0.68, "grad_norm": 3.17951837413246, "learning_rate": 2.4614102776885863e-06, "loss": 0.3121, "step": 23725 }, { "epoch": 0.68, "grad_norm": 5.387992348710131, "learning_rate": 2.4610107498530716e-06, "loss": 0.2688, "step": 23726 }, { "epoch": 0.68, "grad_norm": 7.6467533926575735, "learning_rate": 2.4606112438601688e-06, "loss": 0.6677, "step": 23727 }, { "epoch": 0.68, "grad_norm": 7.997899882511043, "learning_rate": 2.460211759713313e-06, "loss": 0.1615, "step": 23728 }, { "epoch": 0.68, "grad_norm": 10.84079453706061, "learning_rate": 2.4598122974159446e-06, "loss": 0.44, "step": 23729 }, { "epoch": 0.68, "grad_norm": 5.007922761985717, "learning_rate": 2.4594128569714966e-06, "loss": 0.649, "step": 23730 }, { "epoch": 0.68, "grad_norm": 5.882584890392639, "learning_rate": 2.459013438383408e-06, "loss": 0.5957, "step": 23731 }, { "epoch": 0.68, "grad_norm": 5.537876707055367, "learning_rate": 2.458614041655113e-06, "loss": 0.3682, "step": 23732 }, { "epoch": 0.68, "grad_norm": 5.291118363135986, "learning_rate": 2.4582146667900497e-06, "loss": 0.5257, "step": 23733 }, { "epoch": 0.68, "grad_norm": 4.12652646344767, "learning_rate": 2.4578153137916535e-06, "loss": 0.173, "step": 23734 }, { "epoch": 0.68, "grad_norm": 3.981793633298628, "learning_rate": 2.4574159826633565e-06, "loss": 0.4955, "step": 23735 }, { "epoch": 0.68, "grad_norm": 4.118554671869998, "learning_rate": 2.4570166734085987e-06, "loss": 0.256, "step": 23736 }, { "epoch": 0.68, "grad_norm": 7.359518126697559, "learning_rate": 2.4566173860308118e-06, "loss": 0.3914, "step": 23737 }, { "epoch": 0.68, "grad_norm": 4.1145920797649405, "learning_rate": 2.456218120533434e-06, "loss": 0.5497, "step": 23738 }, { "epoch": 0.68, "grad_norm": 9.14736632502515, "learning_rate": 2.455818876919897e-06, "loss": 0.7622, "step": 23739 }, { "epoch": 0.68, "grad_norm": 9.760569980737648, "learning_rate": 2.4554196551936386e-06, "loss": 0.9879, "step": 23740 }, { "epoch": 0.68, "grad_norm": 3.7107324162080544, "learning_rate": 2.455020455358091e-06, "loss": 0.4654, "step": 23741 }, { "epoch": 0.68, "grad_norm": 3.6127423023682756, "learning_rate": 2.4546212774166883e-06, "loss": 0.1963, "step": 23742 }, { "epoch": 0.68, "grad_norm": 5.86061599813744, "learning_rate": 2.4542221213728667e-06, "loss": 0.5801, "step": 23743 }, { "epoch": 0.68, "grad_norm": 4.829373886406455, "learning_rate": 2.4538229872300573e-06, "loss": 0.7286, "step": 23744 }, { "epoch": 0.68, "grad_norm": 8.13800674477947, "learning_rate": 2.4534238749916965e-06, "loss": 0.6095, "step": 23745 }, { "epoch": 0.68, "grad_norm": 4.121891353132245, "learning_rate": 2.4530247846612165e-06, "loss": 0.3222, "step": 23746 }, { "epoch": 0.68, "grad_norm": 4.012761026133558, "learning_rate": 2.4526257162420512e-06, "loss": 0.5245, "step": 23747 }, { "epoch": 0.68, "grad_norm": 6.128332943965154, "learning_rate": 2.452226669737631e-06, "loss": 0.209, "step": 23748 }, { "epoch": 0.68, "grad_norm": 8.434598720617974, "learning_rate": 2.4518276451513934e-06, "loss": 0.4359, "step": 23749 }, { "epoch": 0.68, "grad_norm": 10.555484179762956, "learning_rate": 2.451428642486767e-06, "loss": 0.5301, "step": 23750 }, { "epoch": 0.68, "grad_norm": 5.007983866837752, "learning_rate": 2.451029661747185e-06, "loss": 0.145, "step": 23751 }, { "epoch": 0.68, "grad_norm": 3.565853732075165, "learning_rate": 2.4506307029360837e-06, "loss": 0.1935, "step": 23752 }, { "epoch": 0.68, "grad_norm": 5.6948618141469955, "learning_rate": 2.450231766056893e-06, "loss": 0.5775, "step": 23753 }, { "epoch": 0.68, "grad_norm": 3.1057506505306383, "learning_rate": 2.449832851113044e-06, "loss": 0.2458, "step": 23754 }, { "epoch": 0.68, "grad_norm": 7.014321186940474, "learning_rate": 2.449433958107967e-06, "loss": 0.7033, "step": 23755 }, { "epoch": 0.68, "grad_norm": 4.916102218016841, "learning_rate": 2.449035087045098e-06, "loss": 0.5591, "step": 23756 }, { "epoch": 0.68, "grad_norm": 7.065766752121077, "learning_rate": 2.4486362379278634e-06, "loss": 0.7576, "step": 23757 }, { "epoch": 0.68, "grad_norm": 2.9724095125598256, "learning_rate": 2.448237410759698e-06, "loss": 0.3509, "step": 23758 }, { "epoch": 0.68, "grad_norm": 6.135246231311107, "learning_rate": 2.447838605544033e-06, "loss": 0.4885, "step": 23759 }, { "epoch": 0.68, "grad_norm": 12.164570214095209, "learning_rate": 2.447439822284299e-06, "loss": 0.2564, "step": 23760 }, { "epoch": 0.68, "grad_norm": 5.860621693542987, "learning_rate": 2.4470410609839252e-06, "loss": 0.4201, "step": 23761 }, { "epoch": 0.68, "grad_norm": 3.214303213404902, "learning_rate": 2.4466423216463413e-06, "loss": 0.283, "step": 23762 }, { "epoch": 0.68, "grad_norm": 8.991823986139702, "learning_rate": 2.4462436042749814e-06, "loss": 0.5324, "step": 23763 }, { "epoch": 0.68, "grad_norm": 4.691839472884035, "learning_rate": 2.4458449088732713e-06, "loss": 0.4499, "step": 23764 }, { "epoch": 0.68, "grad_norm": 7.52645335055941, "learning_rate": 2.4454462354446445e-06, "loss": 0.4898, "step": 23765 }, { "epoch": 0.68, "grad_norm": 3.5417549888967215, "learning_rate": 2.4450475839925276e-06, "loss": 0.193, "step": 23766 }, { "epoch": 0.68, "grad_norm": 5.266256594487391, "learning_rate": 2.4446489545203533e-06, "loss": 0.1833, "step": 23767 }, { "epoch": 0.68, "grad_norm": 5.7107209505076195, "learning_rate": 2.44425034703155e-06, "loss": 0.3426, "step": 23768 }, { "epoch": 0.68, "grad_norm": 7.254656660945694, "learning_rate": 2.4438517615295437e-06, "loss": 0.4937, "step": 23769 }, { "epoch": 0.68, "grad_norm": 4.51646492086382, "learning_rate": 2.4434531980177685e-06, "loss": 0.2995, "step": 23770 }, { "epoch": 0.68, "grad_norm": 8.404749456298905, "learning_rate": 2.4430546564996484e-06, "loss": 0.7502, "step": 23771 }, { "epoch": 0.68, "grad_norm": 6.197072648567314, "learning_rate": 2.442656136978616e-06, "loss": 0.3315, "step": 23772 }, { "epoch": 0.68, "grad_norm": 4.710121096696393, "learning_rate": 2.4422576394580967e-06, "loss": 0.4649, "step": 23773 }, { "epoch": 0.68, "grad_norm": 5.756118835876686, "learning_rate": 2.4418591639415212e-06, "loss": 0.2456, "step": 23774 }, { "epoch": 0.68, "grad_norm": 7.947137703642995, "learning_rate": 2.441460710432317e-06, "loss": 0.7971, "step": 23775 }, { "epoch": 0.68, "grad_norm": 7.120905318620849, "learning_rate": 2.441062278933909e-06, "loss": 0.4307, "step": 23776 }, { "epoch": 0.68, "grad_norm": 3.8580004912406247, "learning_rate": 2.440663869449729e-06, "loss": 0.3312, "step": 23777 }, { "epoch": 0.68, "grad_norm": 8.09593118270455, "learning_rate": 2.4402654819832013e-06, "loss": 0.5571, "step": 23778 }, { "epoch": 0.68, "grad_norm": 3.704900774534388, "learning_rate": 2.4398671165377557e-06, "loss": 0.2716, "step": 23779 }, { "epoch": 0.68, "grad_norm": 3.8708482466219807, "learning_rate": 2.4394687731168184e-06, "loss": 0.3343, "step": 23780 }, { "epoch": 0.68, "grad_norm": 4.4549030167526515, "learning_rate": 2.4390704517238156e-06, "loss": 0.5203, "step": 23781 }, { "epoch": 0.68, "grad_norm": 1.4165589441220725, "learning_rate": 2.4386721523621733e-06, "loss": 0.267, "step": 23782 }, { "epoch": 0.68, "grad_norm": 7.2864712180368425, "learning_rate": 2.4382738750353185e-06, "loss": 0.6869, "step": 23783 }, { "epoch": 0.68, "grad_norm": 5.603563533882638, "learning_rate": 2.43787561974668e-06, "loss": 0.743, "step": 23784 }, { "epoch": 0.68, "grad_norm": 5.708198652511742, "learning_rate": 2.4374773864996803e-06, "loss": 0.6923, "step": 23785 }, { "epoch": 0.68, "grad_norm": 4.66469821399443, "learning_rate": 2.437079175297749e-06, "loss": 0.444, "step": 23786 }, { "epoch": 0.68, "grad_norm": 11.00258744759578, "learning_rate": 2.4366809861443104e-06, "loss": 0.6203, "step": 23787 }, { "epoch": 0.68, "grad_norm": 6.746232994952989, "learning_rate": 2.4362828190427895e-06, "loss": 0.3331, "step": 23788 }, { "epoch": 0.68, "grad_norm": 6.970388108753896, "learning_rate": 2.4358846739966097e-06, "loss": 0.1728, "step": 23789 }, { "epoch": 0.68, "grad_norm": 3.107438543575904, "learning_rate": 2.4354865510092003e-06, "loss": 0.3232, "step": 23790 }, { "epoch": 0.68, "grad_norm": 3.9975705517619944, "learning_rate": 2.435088450083983e-06, "loss": 0.284, "step": 23791 }, { "epoch": 0.68, "grad_norm": 1.5276614002735196, "learning_rate": 2.434690371224383e-06, "loss": 0.0896, "step": 23792 }, { "epoch": 0.68, "grad_norm": 9.004170246350094, "learning_rate": 2.4342923144338283e-06, "loss": 0.5073, "step": 23793 }, { "epoch": 0.68, "grad_norm": 8.486887017251455, "learning_rate": 2.433894279715741e-06, "loss": 0.8854, "step": 23794 }, { "epoch": 0.68, "grad_norm": 10.223267656116782, "learning_rate": 2.433496267073545e-06, "loss": 0.5718, "step": 23795 }, { "epoch": 0.68, "grad_norm": 3.768477518913242, "learning_rate": 2.433098276510663e-06, "loss": 0.3647, "step": 23796 }, { "epoch": 0.68, "grad_norm": 5.377215749338621, "learning_rate": 2.4327003080305223e-06, "loss": 0.4101, "step": 23797 }, { "epoch": 0.68, "grad_norm": 11.731132049048876, "learning_rate": 2.4323023616365427e-06, "loss": 0.3802, "step": 23798 }, { "epoch": 0.68, "grad_norm": 7.823910703798919, "learning_rate": 2.43190443733215e-06, "loss": 0.5294, "step": 23799 }, { "epoch": 0.68, "grad_norm": 8.363857543418098, "learning_rate": 2.431506535120769e-06, "loss": 0.6568, "step": 23800 }, { "epoch": 0.68, "grad_norm": 6.135292280800452, "learning_rate": 2.431108655005821e-06, "loss": 0.6881, "step": 23801 }, { "epoch": 0.68, "grad_norm": 4.95665107633956, "learning_rate": 2.430710796990729e-06, "loss": 0.5169, "step": 23802 }, { "epoch": 0.68, "grad_norm": 10.620654036359257, "learning_rate": 2.430312961078914e-06, "loss": 0.7668, "step": 23803 }, { "epoch": 0.68, "grad_norm": 4.028911119080494, "learning_rate": 2.4299151472738015e-06, "loss": 0.5143, "step": 23804 }, { "epoch": 0.68, "grad_norm": 6.153234004444752, "learning_rate": 2.429517355578811e-06, "loss": 0.4836, "step": 23805 }, { "epoch": 0.68, "grad_norm": 7.327010535633358, "learning_rate": 2.429119585997368e-06, "loss": 0.5774, "step": 23806 }, { "epoch": 0.68, "grad_norm": 5.8240995030313245, "learning_rate": 2.428721838532893e-06, "loss": 0.5462, "step": 23807 }, { "epoch": 0.68, "grad_norm": 1.3193038270150115, "learning_rate": 2.428324113188805e-06, "loss": 0.1343, "step": 23808 }, { "epoch": 0.68, "grad_norm": 4.78378851825965, "learning_rate": 2.4279264099685306e-06, "loss": 0.6893, "step": 23809 }, { "epoch": 0.68, "grad_norm": 3.5096185193786447, "learning_rate": 2.427528728875486e-06, "loss": 0.3916, "step": 23810 }, { "epoch": 0.68, "grad_norm": 7.125182032351217, "learning_rate": 2.4271310699130972e-06, "loss": 0.3861, "step": 23811 }, { "epoch": 0.68, "grad_norm": 5.550386289951528, "learning_rate": 2.4267334330847813e-06, "loss": 0.4488, "step": 23812 }, { "epoch": 0.68, "grad_norm": 1.4818517420378308, "learning_rate": 2.4263358183939627e-06, "loss": 0.1077, "step": 23813 }, { "epoch": 0.68, "grad_norm": 6.9084494010031445, "learning_rate": 2.4259382258440607e-06, "loss": 0.6118, "step": 23814 }, { "epoch": 0.68, "grad_norm": 1.9047696822651954, "learning_rate": 2.4255406554384925e-06, "loss": 0.1513, "step": 23815 }, { "epoch": 0.68, "grad_norm": 11.504819772196463, "learning_rate": 2.425143107180683e-06, "loss": 0.5718, "step": 23816 }, { "epoch": 0.68, "grad_norm": 7.7390906244264075, "learning_rate": 2.424745581074049e-06, "loss": 0.4453, "step": 23817 }, { "epoch": 0.68, "grad_norm": 2.7623615730781337, "learning_rate": 2.424348077122013e-06, "loss": 0.1066, "step": 23818 }, { "epoch": 0.68, "grad_norm": 5.08951844517425, "learning_rate": 2.423950595327992e-06, "loss": 0.2536, "step": 23819 }, { "epoch": 0.68, "grad_norm": 8.62147472042808, "learning_rate": 2.423553135695408e-06, "loss": 0.5894, "step": 23820 }, { "epoch": 0.68, "grad_norm": 5.122044129572915, "learning_rate": 2.4231556982276795e-06, "loss": 0.4456, "step": 23821 }, { "epoch": 0.68, "grad_norm": 5.048169327333829, "learning_rate": 2.4227582829282253e-06, "loss": 0.3142, "step": 23822 }, { "epoch": 0.68, "grad_norm": 3.6663148216294443, "learning_rate": 2.4223608898004624e-06, "loss": 0.3817, "step": 23823 }, { "epoch": 0.68, "grad_norm": 3.7459788856756515, "learning_rate": 2.421963518847812e-06, "loss": 0.2131, "step": 23824 }, { "epoch": 0.68, "grad_norm": 3.5077260607835816, "learning_rate": 2.4215661700736933e-06, "loss": 0.2509, "step": 23825 }, { "epoch": 0.68, "grad_norm": 4.748768044377461, "learning_rate": 2.4211688434815212e-06, "loss": 0.3176, "step": 23826 }, { "epoch": 0.68, "grad_norm": 7.753621086046434, "learning_rate": 2.4207715390747184e-06, "loss": 0.672, "step": 23827 }, { "epoch": 0.68, "grad_norm": 8.717127768706256, "learning_rate": 2.4203742568567e-06, "loss": 0.2844, "step": 23828 }, { "epoch": 0.68, "grad_norm": 7.5429561699831975, "learning_rate": 2.4199769968308852e-06, "loss": 0.726, "step": 23829 }, { "epoch": 0.68, "grad_norm": 5.242691607050366, "learning_rate": 2.4195797590006874e-06, "loss": 0.231, "step": 23830 }, { "epoch": 0.68, "grad_norm": 4.250216254174001, "learning_rate": 2.4191825433695304e-06, "loss": 0.2427, "step": 23831 }, { "epoch": 0.68, "grad_norm": 4.511862061441441, "learning_rate": 2.4187853499408255e-06, "loss": 0.2552, "step": 23832 }, { "epoch": 0.68, "grad_norm": 5.131124465652041, "learning_rate": 2.418388178717993e-06, "loss": 0.4103, "step": 23833 }, { "epoch": 0.68, "grad_norm": 7.697507965291911, "learning_rate": 2.4179910297044523e-06, "loss": 0.5301, "step": 23834 }, { "epoch": 0.68, "grad_norm": 8.385339844788138, "learning_rate": 2.417593902903614e-06, "loss": 0.4186, "step": 23835 }, { "epoch": 0.68, "grad_norm": 4.700700204514188, "learning_rate": 2.417196798318898e-06, "loss": 0.4444, "step": 23836 }, { "epoch": 0.68, "grad_norm": 3.953147661950815, "learning_rate": 2.4167997159537195e-06, "loss": 0.3238, "step": 23837 }, { "epoch": 0.68, "grad_norm": 4.939821156418881, "learning_rate": 2.4164026558114955e-06, "loss": 0.4367, "step": 23838 }, { "epoch": 0.68, "grad_norm": 2.1711184604222775, "learning_rate": 2.4160056178956405e-06, "loss": 0.4251, "step": 23839 }, { "epoch": 0.68, "grad_norm": 6.123270218713748, "learning_rate": 2.4156086022095728e-06, "loss": 0.6582, "step": 23840 }, { "epoch": 0.68, "grad_norm": 5.1892121258683295, "learning_rate": 2.4152116087567053e-06, "loss": 0.9472, "step": 23841 }, { "epoch": 0.68, "grad_norm": 7.185557492687181, "learning_rate": 2.414814637540453e-06, "loss": 0.4941, "step": 23842 }, { "epoch": 0.68, "grad_norm": 4.154264162093521, "learning_rate": 2.4144176885642335e-06, "loss": 0.4337, "step": 23843 }, { "epoch": 0.68, "grad_norm": 5.213680752001906, "learning_rate": 2.414020761831459e-06, "loss": 0.4368, "step": 23844 }, { "epoch": 0.68, "grad_norm": 5.045867583171464, "learning_rate": 2.4136238573455463e-06, "loss": 0.4422, "step": 23845 }, { "epoch": 0.68, "grad_norm": 3.289539692166258, "learning_rate": 2.413226975109908e-06, "loss": 0.22, "step": 23846 }, { "epoch": 0.68, "grad_norm": 7.9352856949139525, "learning_rate": 2.4128301151279614e-06, "loss": 0.3492, "step": 23847 }, { "epoch": 0.68, "grad_norm": 2.1706697152164773, "learning_rate": 2.4124332774031185e-06, "loss": 0.2258, "step": 23848 }, { "epoch": 0.68, "grad_norm": 8.413440904529127, "learning_rate": 2.412036461938792e-06, "loss": 0.4465, "step": 23849 }, { "epoch": 0.68, "grad_norm": 7.870707962009655, "learning_rate": 2.4116396687383987e-06, "loss": 0.5313, "step": 23850 }, { "epoch": 0.68, "grad_norm": 3.9051002336208023, "learning_rate": 2.4112428978053487e-06, "loss": 0.4707, "step": 23851 }, { "epoch": 0.68, "grad_norm": 8.568230208220482, "learning_rate": 2.4108461491430595e-06, "loss": 0.7363, "step": 23852 }, { "epoch": 0.68, "grad_norm": 3.912543300839989, "learning_rate": 2.4104494227549403e-06, "loss": 0.3688, "step": 23853 }, { "epoch": 0.68, "grad_norm": 6.3407934844229255, "learning_rate": 2.4100527186444077e-06, "loss": 0.3492, "step": 23854 }, { "epoch": 0.68, "grad_norm": 1.9950498475274403, "learning_rate": 2.4096560368148725e-06, "loss": 0.4603, "step": 23855 }, { "epoch": 0.68, "grad_norm": 5.403088301119764, "learning_rate": 2.4092593772697477e-06, "loss": 0.2907, "step": 23856 }, { "epoch": 0.68, "grad_norm": 7.144176376314224, "learning_rate": 2.408862740012443e-06, "loss": 0.7302, "step": 23857 }, { "epoch": 0.68, "grad_norm": 5.66948294312155, "learning_rate": 2.4084661250463744e-06, "loss": 0.5073, "step": 23858 }, { "epoch": 0.68, "grad_norm": 5.647764806934964, "learning_rate": 2.408069532374953e-06, "loss": 0.3527, "step": 23859 }, { "epoch": 0.68, "grad_norm": 3.7573651148582514, "learning_rate": 2.4076729620015893e-06, "loss": 0.5288, "step": 23860 }, { "epoch": 0.68, "grad_norm": 3.7524976836571606, "learning_rate": 2.4072764139296997e-06, "loss": 0.4941, "step": 23861 }, { "epoch": 0.68, "grad_norm": 6.380251404493623, "learning_rate": 2.406879888162688e-06, "loss": 0.4545, "step": 23862 }, { "epoch": 0.68, "grad_norm": 7.372959307220056, "learning_rate": 2.406483384703971e-06, "loss": 0.309, "step": 23863 }, { "epoch": 0.68, "grad_norm": 7.499134999302417, "learning_rate": 2.406086903556957e-06, "loss": 0.5417, "step": 23864 }, { "epoch": 0.68, "grad_norm": 8.411077821862502, "learning_rate": 2.4056904447250566e-06, "loss": 0.4761, "step": 23865 }, { "epoch": 0.68, "grad_norm": 7.4409931218367085, "learning_rate": 2.405294008211685e-06, "loss": 0.791, "step": 23866 }, { "epoch": 0.68, "grad_norm": 3.775400097593846, "learning_rate": 2.404897594020249e-06, "loss": 0.2732, "step": 23867 }, { "epoch": 0.68, "grad_norm": 4.997988534686008, "learning_rate": 2.40450120215416e-06, "loss": 0.6114, "step": 23868 }, { "epoch": 0.68, "grad_norm": 6.516400444303496, "learning_rate": 2.4041048326168252e-06, "loss": 0.7238, "step": 23869 }, { "epoch": 0.68, "grad_norm": 7.080242135088499, "learning_rate": 2.403708485411659e-06, "loss": 0.7635, "step": 23870 }, { "epoch": 0.68, "grad_norm": 5.254661806368731, "learning_rate": 2.4033121605420668e-06, "loss": 0.487, "step": 23871 }, { "epoch": 0.68, "grad_norm": 3.543830758306552, "learning_rate": 2.4029158580114625e-06, "loss": 0.3307, "step": 23872 }, { "epoch": 0.68, "grad_norm": 4.611955211483134, "learning_rate": 2.4025195778232517e-06, "loss": 0.3554, "step": 23873 }, { "epoch": 0.68, "grad_norm": 3.934120029371539, "learning_rate": 2.402123319980847e-06, "loss": 0.3678, "step": 23874 }, { "epoch": 0.68, "grad_norm": 8.548721253544343, "learning_rate": 2.401727084487655e-06, "loss": 0.4001, "step": 23875 }, { "epoch": 0.68, "grad_norm": 4.236373823736342, "learning_rate": 2.4013308713470832e-06, "loss": 0.4372, "step": 23876 }, { "epoch": 0.68, "grad_norm": 6.036052550884037, "learning_rate": 2.4009346805625444e-06, "loss": 0.436, "step": 23877 }, { "epoch": 0.68, "grad_norm": 5.173297214017375, "learning_rate": 2.4005385121374426e-06, "loss": 0.4226, "step": 23878 }, { "epoch": 0.68, "grad_norm": 8.012247842757638, "learning_rate": 2.4001423660751893e-06, "loss": 0.3586, "step": 23879 }, { "epoch": 0.68, "grad_norm": 6.9616046076370734, "learning_rate": 2.3997462423791894e-06, "loss": 0.189, "step": 23880 }, { "epoch": 0.68, "grad_norm": 3.812596804531119, "learning_rate": 2.3993501410528546e-06, "loss": 0.3084, "step": 23881 }, { "epoch": 0.68, "grad_norm": 2.152924364151387, "learning_rate": 2.3989540620995897e-06, "loss": 0.1932, "step": 23882 }, { "epoch": 0.68, "grad_norm": 4.321746753657204, "learning_rate": 2.398558005522802e-06, "loss": 0.1504, "step": 23883 }, { "epoch": 0.68, "grad_norm": 6.203759033700767, "learning_rate": 2.3981619713259e-06, "loss": 0.4, "step": 23884 }, { "epoch": 0.68, "grad_norm": 2.851152698908618, "learning_rate": 2.3977659595122894e-06, "loss": 0.2068, "step": 23885 }, { "epoch": 0.68, "grad_norm": 5.874629191607372, "learning_rate": 2.397369970085379e-06, "loss": 0.2477, "step": 23886 }, { "epoch": 0.68, "grad_norm": 10.781826589894822, "learning_rate": 2.3969740030485734e-06, "loss": 0.5118, "step": 23887 }, { "epoch": 0.68, "grad_norm": 8.672499213220028, "learning_rate": 2.396578058405281e-06, "loss": 0.3255, "step": 23888 }, { "epoch": 0.68, "grad_norm": 6.129681374699714, "learning_rate": 2.3961821361589072e-06, "loss": 0.9568, "step": 23889 }, { "epoch": 0.68, "grad_norm": 7.184221432153394, "learning_rate": 2.395786236312856e-06, "loss": 0.4167, "step": 23890 }, { "epoch": 0.68, "grad_norm": 4.859575813290037, "learning_rate": 2.3953903588705368e-06, "loss": 0.3813, "step": 23891 }, { "epoch": 0.68, "grad_norm": 7.2795942658764465, "learning_rate": 2.3949945038353524e-06, "loss": 0.5818, "step": 23892 }, { "epoch": 0.68, "grad_norm": 6.062448009779287, "learning_rate": 2.394598671210711e-06, "loss": 0.6768, "step": 23893 }, { "epoch": 0.68, "grad_norm": 2.463388246002488, "learning_rate": 2.394202861000017e-06, "loss": 0.2147, "step": 23894 }, { "epoch": 0.68, "grad_norm": 6.530951935752037, "learning_rate": 2.3938070732066743e-06, "loss": 0.2753, "step": 23895 }, { "epoch": 0.68, "grad_norm": 4.108704922077453, "learning_rate": 2.3934113078340863e-06, "loss": 0.212, "step": 23896 }, { "epoch": 0.68, "grad_norm": 5.6806095639402185, "learning_rate": 2.3930155648856622e-06, "loss": 0.422, "step": 23897 }, { "epoch": 0.68, "grad_norm": 6.467416893152661, "learning_rate": 2.3926198443648024e-06, "loss": 0.375, "step": 23898 }, { "epoch": 0.68, "grad_norm": 4.249527119526772, "learning_rate": 2.3922241462749133e-06, "loss": 0.1531, "step": 23899 }, { "epoch": 0.68, "grad_norm": 5.312298176241574, "learning_rate": 2.3918284706194007e-06, "loss": 0.4063, "step": 23900 }, { "epoch": 0.68, "grad_norm": 2.932174298572714, "learning_rate": 2.391432817401666e-06, "loss": 0.2467, "step": 23901 }, { "epoch": 0.68, "grad_norm": 4.889354748036015, "learning_rate": 2.391037186625114e-06, "loss": 0.2789, "step": 23902 }, { "epoch": 0.68, "grad_norm": 6.950599880538428, "learning_rate": 2.3906415782931457e-06, "loss": 0.3267, "step": 23903 }, { "epoch": 0.68, "grad_norm": 9.021489977705826, "learning_rate": 2.3902459924091686e-06, "loss": 0.8869, "step": 23904 }, { "epoch": 0.68, "grad_norm": 4.013172454577177, "learning_rate": 2.3898504289765823e-06, "loss": 0.3403, "step": 23905 }, { "epoch": 0.68, "grad_norm": 4.978472093262383, "learning_rate": 2.3894548879987913e-06, "loss": 0.5228, "step": 23906 }, { "epoch": 0.68, "grad_norm": 7.04809221467605, "learning_rate": 2.3890593694792004e-06, "loss": 0.7438, "step": 23907 }, { "epoch": 0.68, "grad_norm": 7.6642744091419335, "learning_rate": 2.3886638734212102e-06, "loss": 0.6586, "step": 23908 }, { "epoch": 0.68, "grad_norm": 2.8097658855003766, "learning_rate": 2.388268399828223e-06, "loss": 0.3402, "step": 23909 }, { "epoch": 0.68, "grad_norm": 4.819321096101768, "learning_rate": 2.3878729487036393e-06, "loss": 0.5997, "step": 23910 }, { "epoch": 0.68, "grad_norm": 9.023906692262594, "learning_rate": 2.3874775200508654e-06, "loss": 0.6777, "step": 23911 }, { "epoch": 0.68, "grad_norm": 6.983008419556768, "learning_rate": 2.3870821138732984e-06, "loss": 0.5522, "step": 23912 }, { "epoch": 0.68, "grad_norm": 11.34429839939293, "learning_rate": 2.3866867301743443e-06, "loss": 0.3689, "step": 23913 }, { "epoch": 0.68, "grad_norm": 3.6446379781879807, "learning_rate": 2.3862913689574e-06, "loss": 0.1874, "step": 23914 }, { "epoch": 0.68, "grad_norm": 5.093904645011595, "learning_rate": 2.385896030225872e-06, "loss": 0.2678, "step": 23915 }, { "epoch": 0.68, "grad_norm": 10.536020370082817, "learning_rate": 2.385500713983158e-06, "loss": 0.5208, "step": 23916 }, { "epoch": 0.68, "grad_norm": 5.35331928904929, "learning_rate": 2.3851054202326584e-06, "loss": 0.5152, "step": 23917 }, { "epoch": 0.68, "grad_norm": 4.849094371205381, "learning_rate": 2.3847101489777765e-06, "loss": 0.61, "step": 23918 }, { "epoch": 0.68, "grad_norm": 5.010299136659633, "learning_rate": 2.3843149002219087e-06, "loss": 0.5405, "step": 23919 }, { "epoch": 0.69, "grad_norm": 3.9683914773556723, "learning_rate": 2.38391967396846e-06, "loss": 0.3447, "step": 23920 }, { "epoch": 0.69, "grad_norm": 8.691726534653414, "learning_rate": 2.383524470220828e-06, "loss": 0.5508, "step": 23921 }, { "epoch": 0.69, "grad_norm": 3.790896060557682, "learning_rate": 2.3831292889824113e-06, "loss": 0.2678, "step": 23922 }, { "epoch": 0.69, "grad_norm": 5.625092272531484, "learning_rate": 2.3827341302566133e-06, "loss": 0.462, "step": 23923 }, { "epoch": 0.69, "grad_norm": 1.957726676716011, "learning_rate": 2.382338994046829e-06, "loss": 0.1862, "step": 23924 }, { "epoch": 0.69, "grad_norm": 5.461820910249589, "learning_rate": 2.381943880356462e-06, "loss": 0.8263, "step": 23925 }, { "epoch": 0.69, "grad_norm": 4.365232627236787, "learning_rate": 2.3815487891889072e-06, "loss": 0.3718, "step": 23926 }, { "epoch": 0.69, "grad_norm": 4.456721115355086, "learning_rate": 2.3811537205475683e-06, "loss": 0.4057, "step": 23927 }, { "epoch": 0.69, "grad_norm": 4.194755815621649, "learning_rate": 2.380758674435841e-06, "loss": 0.3, "step": 23928 }, { "epoch": 0.69, "grad_norm": 4.7276196519223275, "learning_rate": 2.3803636508571247e-06, "loss": 0.431, "step": 23929 }, { "epoch": 0.69, "grad_norm": 5.766787486749263, "learning_rate": 2.379968649814815e-06, "loss": 0.8246, "step": 23930 }, { "epoch": 0.69, "grad_norm": 7.492484841597112, "learning_rate": 2.379573671312313e-06, "loss": 0.6575, "step": 23931 }, { "epoch": 0.69, "grad_norm": 5.113290202436777, "learning_rate": 2.3791787153530175e-06, "loss": 0.4023, "step": 23932 }, { "epoch": 0.69, "grad_norm": 6.031331511425998, "learning_rate": 2.378783781940323e-06, "loss": 0.3837, "step": 23933 }, { "epoch": 0.69, "grad_norm": 4.4304022910310215, "learning_rate": 2.3783888710776313e-06, "loss": 0.6469, "step": 23934 }, { "epoch": 0.69, "grad_norm": 3.478001363224247, "learning_rate": 2.3779939827683367e-06, "loss": 0.338, "step": 23935 }, { "epoch": 0.69, "grad_norm": 6.936597713871382, "learning_rate": 2.3775991170158374e-06, "loss": 0.5815, "step": 23936 }, { "epoch": 0.69, "grad_norm": 6.802250142021308, "learning_rate": 2.3772042738235287e-06, "loss": 0.7104, "step": 23937 }, { "epoch": 0.69, "grad_norm": 6.329656119022818, "learning_rate": 2.3768094531948093e-06, "loss": 0.3123, "step": 23938 }, { "epoch": 0.69, "grad_norm": 5.947875905161701, "learning_rate": 2.376414655133074e-06, "loss": 0.8661, "step": 23939 }, { "epoch": 0.69, "grad_norm": 8.311409728175882, "learning_rate": 2.3760198796417206e-06, "loss": 0.3449, "step": 23940 }, { "epoch": 0.69, "grad_norm": 4.5757397892568195, "learning_rate": 2.3756251267241463e-06, "loss": 0.3169, "step": 23941 }, { "epoch": 0.69, "grad_norm": 9.402423777475432, "learning_rate": 2.375230396383747e-06, "loss": 0.5941, "step": 23942 }, { "epoch": 0.69, "grad_norm": 8.636034886249508, "learning_rate": 2.3748356886239164e-06, "loss": 0.8369, "step": 23943 }, { "epoch": 0.69, "grad_norm": 3.2208925541955926, "learning_rate": 2.3744410034480498e-06, "loss": 0.3757, "step": 23944 }, { "epoch": 0.69, "grad_norm": 8.379186807839346, "learning_rate": 2.3740463408595453e-06, "loss": 0.7601, "step": 23945 }, { "epoch": 0.69, "grad_norm": 5.4267851832442515, "learning_rate": 2.373651700861796e-06, "loss": 0.2699, "step": 23946 }, { "epoch": 0.69, "grad_norm": 8.073251578654421, "learning_rate": 2.373257083458197e-06, "loss": 0.5134, "step": 23947 }, { "epoch": 0.69, "grad_norm": 6.219133432347127, "learning_rate": 2.3728624886521455e-06, "loss": 0.1785, "step": 23948 }, { "epoch": 0.69, "grad_norm": 5.9431251783107175, "learning_rate": 2.3724679164470354e-06, "loss": 0.3891, "step": 23949 }, { "epoch": 0.69, "grad_norm": 7.586913088325735, "learning_rate": 2.37207336684626e-06, "loss": 0.3484, "step": 23950 }, { "epoch": 0.69, "grad_norm": 5.778774329383262, "learning_rate": 2.3716788398532113e-06, "loss": 0.4361, "step": 23951 }, { "epoch": 0.69, "grad_norm": 5.316985043447084, "learning_rate": 2.3712843354712884e-06, "loss": 0.3455, "step": 23952 }, { "epoch": 0.69, "grad_norm": 3.7301654792262084, "learning_rate": 2.370889853703881e-06, "loss": 0.292, "step": 23953 }, { "epoch": 0.69, "grad_norm": 8.067844246067756, "learning_rate": 2.3704953945543856e-06, "loss": 0.643, "step": 23954 }, { "epoch": 0.69, "grad_norm": 4.7966749913768805, "learning_rate": 2.370100958026195e-06, "loss": 0.4395, "step": 23955 }, { "epoch": 0.69, "grad_norm": 4.291952441555515, "learning_rate": 2.3697065441227007e-06, "loss": 0.7468, "step": 23956 }, { "epoch": 0.69, "grad_norm": 7.541961903000669, "learning_rate": 2.3693121528472984e-06, "loss": 0.9486, "step": 23957 }, { "epoch": 0.69, "grad_norm": 5.419793798634538, "learning_rate": 2.3689177842033786e-06, "loss": 0.4835, "step": 23958 }, { "epoch": 0.69, "grad_norm": 7.032338782888964, "learning_rate": 2.368523438194336e-06, "loss": 0.3442, "step": 23959 }, { "epoch": 0.69, "grad_norm": 5.891670316436946, "learning_rate": 2.3681291148235614e-06, "loss": 0.4154, "step": 23960 }, { "epoch": 0.69, "grad_norm": 8.451997410280523, "learning_rate": 2.367734814094449e-06, "loss": 0.2375, "step": 23961 }, { "epoch": 0.69, "grad_norm": 4.788682608293285, "learning_rate": 2.36734053601039e-06, "loss": 0.4075, "step": 23962 }, { "epoch": 0.69, "grad_norm": 6.314101214385767, "learning_rate": 2.366946280574775e-06, "loss": 0.4063, "step": 23963 }, { "epoch": 0.69, "grad_norm": 20.147797981517193, "learning_rate": 2.366552047790998e-06, "loss": 0.7753, "step": 23964 }, { "epoch": 0.69, "grad_norm": 7.819984543485074, "learning_rate": 2.3661578376624485e-06, "loss": 0.5616, "step": 23965 }, { "epoch": 0.69, "grad_norm": 7.799361816610313, "learning_rate": 2.3657636501925204e-06, "loss": 0.6189, "step": 23966 }, { "epoch": 0.69, "grad_norm": 9.332296489843914, "learning_rate": 2.365369485384601e-06, "loss": 0.6951, "step": 23967 }, { "epoch": 0.69, "grad_norm": 5.16207067076036, "learning_rate": 2.3649753432420857e-06, "loss": 0.7393, "step": 23968 }, { "epoch": 0.69, "grad_norm": 9.678211016422072, "learning_rate": 2.364581223768363e-06, "loss": 0.8078, "step": 23969 }, { "epoch": 0.69, "grad_norm": 8.399903351363982, "learning_rate": 2.3641871269668236e-06, "loss": 0.7192, "step": 23970 }, { "epoch": 0.69, "grad_norm": 22.14094085575745, "learning_rate": 2.363793052840856e-06, "loss": 0.3034, "step": 23971 }, { "epoch": 0.69, "grad_norm": 3.2788166469267725, "learning_rate": 2.3633990013938523e-06, "loss": 0.3171, "step": 23972 }, { "epoch": 0.69, "grad_norm": 5.607401381096084, "learning_rate": 2.3630049726292035e-06, "loss": 0.503, "step": 23973 }, { "epoch": 0.69, "grad_norm": 6.602198246246416, "learning_rate": 2.362610966550297e-06, "loss": 0.633, "step": 23974 }, { "epoch": 0.69, "grad_norm": 5.02126230752589, "learning_rate": 2.3622169831605258e-06, "loss": 0.5771, "step": 23975 }, { "epoch": 0.69, "grad_norm": 6.005845322010998, "learning_rate": 2.361823022463277e-06, "loss": 0.7041, "step": 23976 }, { "epoch": 0.69, "grad_norm": 9.123690053897324, "learning_rate": 2.3614290844619396e-06, "loss": 0.4983, "step": 23977 }, { "epoch": 0.69, "grad_norm": 7.003211919406006, "learning_rate": 2.361035169159901e-06, "loss": 0.2346, "step": 23978 }, { "epoch": 0.69, "grad_norm": 6.0303004807085925, "learning_rate": 2.3606412765605542e-06, "loss": 0.2535, "step": 23979 }, { "epoch": 0.69, "grad_norm": 5.381027340906892, "learning_rate": 2.3602474066672833e-06, "loss": 0.2607, "step": 23980 }, { "epoch": 0.69, "grad_norm": 9.331831919160797, "learning_rate": 2.3598535594834807e-06, "loss": 0.2941, "step": 23981 }, { "epoch": 0.69, "grad_norm": 6.27683926865495, "learning_rate": 2.3594597350125333e-06, "loss": 0.5151, "step": 23982 }, { "epoch": 0.69, "grad_norm": 6.544203984963638, "learning_rate": 2.3590659332578262e-06, "loss": 0.6105, "step": 23983 }, { "epoch": 0.69, "grad_norm": 5.023211057314282, "learning_rate": 2.3586721542227527e-06, "loss": 0.4538, "step": 23984 }, { "epoch": 0.69, "grad_norm": 5.742883590753924, "learning_rate": 2.3582783979106945e-06, "loss": 0.6156, "step": 23985 }, { "epoch": 0.69, "grad_norm": 6.171711091388088, "learning_rate": 2.357884664325044e-06, "loss": 0.607, "step": 23986 }, { "epoch": 0.69, "grad_norm": 1.6559417050810905, "learning_rate": 2.3574909534691847e-06, "loss": 0.2319, "step": 23987 }, { "epoch": 0.69, "grad_norm": 9.51706199882653, "learning_rate": 2.3570972653465076e-06, "loss": 0.8458, "step": 23988 }, { "epoch": 0.69, "grad_norm": 7.447375337006173, "learning_rate": 2.356703599960397e-06, "loss": 0.5622, "step": 23989 }, { "epoch": 0.69, "grad_norm": 3.4517193345460555, "learning_rate": 2.356309957314238e-06, "loss": 0.265, "step": 23990 }, { "epoch": 0.69, "grad_norm": 7.54789951233718, "learning_rate": 2.355916337411421e-06, "loss": 0.4673, "step": 23991 }, { "epoch": 0.69, "grad_norm": 3.8264725213085415, "learning_rate": 2.3555227402553284e-06, "loss": 0.3218, "step": 23992 }, { "epoch": 0.69, "grad_norm": 6.0657387960098035, "learning_rate": 2.355129165849349e-06, "loss": 0.5041, "step": 23993 }, { "epoch": 0.69, "grad_norm": 5.62507928156612, "learning_rate": 2.354735614196867e-06, "loss": 0.5522, "step": 23994 }, { "epoch": 0.69, "grad_norm": 5.819608238674748, "learning_rate": 2.3543420853012707e-06, "loss": 0.4052, "step": 23995 }, { "epoch": 0.69, "grad_norm": 3.4599261732990145, "learning_rate": 2.353948579165943e-06, "loss": 0.3881, "step": 23996 }, { "epoch": 0.69, "grad_norm": 3.0914513882302948, "learning_rate": 2.3535550957942684e-06, "loss": 0.3193, "step": 23997 }, { "epoch": 0.69, "grad_norm": 9.049847630013716, "learning_rate": 2.353161635189635e-06, "loss": 0.89, "step": 23998 }, { "epoch": 0.69, "grad_norm": 2.4318995571538733, "learning_rate": 2.3527681973554246e-06, "loss": 0.058, "step": 23999 }, { "epoch": 0.69, "grad_norm": 4.033555074118824, "learning_rate": 2.352374782295025e-06, "loss": 0.5839, "step": 24000 }, { "epoch": 0.69, "grad_norm": 3.5065723852363826, "learning_rate": 2.3519813900118175e-06, "loss": 0.391, "step": 24001 }, { "epoch": 0.69, "grad_norm": 4.449533292320692, "learning_rate": 2.3515880205091903e-06, "loss": 0.1575, "step": 24002 }, { "epoch": 0.69, "grad_norm": 3.9232107372678406, "learning_rate": 2.351194673790525e-06, "loss": 0.2351, "step": 24003 }, { "epoch": 0.69, "grad_norm": 7.198436974040419, "learning_rate": 2.350801349859206e-06, "loss": 0.6137, "step": 24004 }, { "epoch": 0.69, "grad_norm": 5.129832060000225, "learning_rate": 2.3504080487186142e-06, "loss": 0.3689, "step": 24005 }, { "epoch": 0.69, "grad_norm": 12.304195759953405, "learning_rate": 2.3500147703721367e-06, "loss": 0.4091, "step": 24006 }, { "epoch": 0.69, "grad_norm": 5.753113276994439, "learning_rate": 2.349621514823157e-06, "loss": 0.3801, "step": 24007 }, { "epoch": 0.69, "grad_norm": 3.280403100574425, "learning_rate": 2.349228282075056e-06, "loss": 0.3612, "step": 24008 }, { "epoch": 0.69, "grad_norm": 4.819244241663445, "learning_rate": 2.3488350721312203e-06, "loss": 0.4167, "step": 24009 }, { "epoch": 0.69, "grad_norm": 5.560362244338856, "learning_rate": 2.348441884995027e-06, "loss": 0.701, "step": 24010 }, { "epoch": 0.69, "grad_norm": 4.366949303533876, "learning_rate": 2.3480487206698636e-06, "loss": 0.3583, "step": 24011 }, { "epoch": 0.69, "grad_norm": 13.248850934568845, "learning_rate": 2.3476555791591083e-06, "loss": 0.7178, "step": 24012 }, { "epoch": 0.69, "grad_norm": 6.0898976987317335, "learning_rate": 2.347262460466146e-06, "loss": 0.3936, "step": 24013 }, { "epoch": 0.69, "grad_norm": 4.372239904328885, "learning_rate": 2.34686936459436e-06, "loss": 0.4548, "step": 24014 }, { "epoch": 0.69, "grad_norm": 5.757805170807645, "learning_rate": 2.3464762915471295e-06, "loss": 0.6763, "step": 24015 }, { "epoch": 0.69, "grad_norm": 5.328364280182849, "learning_rate": 2.3460832413278373e-06, "loss": 0.3781, "step": 24016 }, { "epoch": 0.69, "grad_norm": 7.833512635580031, "learning_rate": 2.3456902139398625e-06, "loss": 0.4556, "step": 24017 }, { "epoch": 0.69, "grad_norm": 6.661133975756065, "learning_rate": 2.3452972093865894e-06, "loss": 0.285, "step": 24018 }, { "epoch": 0.69, "grad_norm": 4.282625214170846, "learning_rate": 2.3449042276713964e-06, "loss": 0.6622, "step": 24019 }, { "epoch": 0.69, "grad_norm": 8.664641241448924, "learning_rate": 2.344511268797666e-06, "loss": 0.7556, "step": 24020 }, { "epoch": 0.69, "grad_norm": 5.665601387122005, "learning_rate": 2.344118332768778e-06, "loss": 0.4909, "step": 24021 }, { "epoch": 0.69, "grad_norm": 5.378603680868034, "learning_rate": 2.3437254195881137e-06, "loss": 0.3395, "step": 24022 }, { "epoch": 0.69, "grad_norm": 4.569151536906159, "learning_rate": 2.3433325292590526e-06, "loss": 0.2498, "step": 24023 }, { "epoch": 0.69, "grad_norm": 3.423644840537731, "learning_rate": 2.342939661784973e-06, "loss": 0.443, "step": 24024 }, { "epoch": 0.69, "grad_norm": 2.2955236287198146, "learning_rate": 2.3425468171692583e-06, "loss": 0.2521, "step": 24025 }, { "epoch": 0.69, "grad_norm": 5.105331103826936, "learning_rate": 2.3421539954152845e-06, "loss": 0.4123, "step": 24026 }, { "epoch": 0.69, "grad_norm": 7.29793728727418, "learning_rate": 2.3417611965264344e-06, "loss": 0.9004, "step": 24027 }, { "epoch": 0.69, "grad_norm": 4.9332080004640275, "learning_rate": 2.341368420506083e-06, "loss": 0.6155, "step": 24028 }, { "epoch": 0.69, "grad_norm": 7.193484195203197, "learning_rate": 2.340975667357614e-06, "loss": 0.3202, "step": 24029 }, { "epoch": 0.69, "grad_norm": 4.5103742173145065, "learning_rate": 2.340582937084404e-06, "loss": 0.3598, "step": 24030 }, { "epoch": 0.69, "grad_norm": 8.356841314788685, "learning_rate": 2.3401902296898292e-06, "loss": 0.4604, "step": 24031 }, { "epoch": 0.69, "grad_norm": 5.82237704738916, "learning_rate": 2.339797545177273e-06, "loss": 0.6595, "step": 24032 }, { "epoch": 0.69, "grad_norm": 8.970740173914486, "learning_rate": 2.3394048835501085e-06, "loss": 0.7803, "step": 24033 }, { "epoch": 0.69, "grad_norm": 4.756825587449926, "learning_rate": 2.3390122448117188e-06, "loss": 0.4438, "step": 24034 }, { "epoch": 0.69, "grad_norm": 4.2463876154051, "learning_rate": 2.3386196289654765e-06, "loss": 0.2955, "step": 24035 }, { "epoch": 0.69, "grad_norm": 11.260492348688645, "learning_rate": 2.3382270360147665e-06, "loss": 0.7155, "step": 24036 }, { "epoch": 0.69, "grad_norm": 9.192556637529066, "learning_rate": 2.337834465962957e-06, "loss": 0.5288, "step": 24037 }, { "epoch": 0.69, "grad_norm": 3.4152575568318073, "learning_rate": 2.33744191881343e-06, "loss": 0.3936, "step": 24038 }, { "epoch": 0.69, "grad_norm": 2.3556056449420884, "learning_rate": 2.3370493945695645e-06, "loss": 0.2374, "step": 24039 }, { "epoch": 0.69, "grad_norm": 19.033966722432655, "learning_rate": 2.336656893234733e-06, "loss": 0.8931, "step": 24040 }, { "epoch": 0.69, "grad_norm": 7.634381635665353, "learning_rate": 2.336264414812316e-06, "loss": 0.5038, "step": 24041 }, { "epoch": 0.69, "grad_norm": 4.5951122677426035, "learning_rate": 2.335871959305689e-06, "loss": 0.3127, "step": 24042 }, { "epoch": 0.69, "grad_norm": 5.887984475776172, "learning_rate": 2.3354795267182267e-06, "loss": 0.6577, "step": 24043 }, { "epoch": 0.69, "grad_norm": 7.407165728264527, "learning_rate": 2.3350871170533047e-06, "loss": 0.2467, "step": 24044 }, { "epoch": 0.69, "grad_norm": 7.29250624773126, "learning_rate": 2.3346947303143017e-06, "loss": 0.5606, "step": 24045 }, { "epoch": 0.69, "grad_norm": 5.577752322615502, "learning_rate": 2.33430236650459e-06, "loss": 0.5642, "step": 24046 }, { "epoch": 0.69, "grad_norm": 4.443010964619846, "learning_rate": 2.3339100256275464e-06, "loss": 0.2663, "step": 24047 }, { "epoch": 0.69, "grad_norm": 6.869325168398681, "learning_rate": 2.3335177076865496e-06, "loss": 0.3559, "step": 24048 }, { "epoch": 0.69, "grad_norm": 3.1952154242732096, "learning_rate": 2.3331254126849706e-06, "loss": 0.2334, "step": 24049 }, { "epoch": 0.69, "grad_norm": 9.088903282571767, "learning_rate": 2.3327331406261857e-06, "loss": 0.4315, "step": 24050 }, { "epoch": 0.69, "grad_norm": 3.012445127216943, "learning_rate": 2.332340891513567e-06, "loss": 0.3263, "step": 24051 }, { "epoch": 0.69, "grad_norm": 4.609772435527445, "learning_rate": 2.3319486653504937e-06, "loss": 0.1762, "step": 24052 }, { "epoch": 0.69, "grad_norm": 4.270347189177876, "learning_rate": 2.3315564621403352e-06, "loss": 0.4077, "step": 24053 }, { "epoch": 0.69, "grad_norm": 4.241542310459397, "learning_rate": 2.3311642818864676e-06, "loss": 0.3372, "step": 24054 }, { "epoch": 0.69, "grad_norm": 4.602649864645341, "learning_rate": 2.3307721245922678e-06, "loss": 0.3793, "step": 24055 }, { "epoch": 0.69, "grad_norm": 5.2214107469139295, "learning_rate": 2.330379990261106e-06, "loss": 0.4859, "step": 24056 }, { "epoch": 0.69, "grad_norm": 2.2479886495704138, "learning_rate": 2.329987878896357e-06, "loss": 0.0447, "step": 24057 }, { "epoch": 0.69, "grad_norm": 10.388155949305903, "learning_rate": 2.329595790501391e-06, "loss": 0.587, "step": 24058 }, { "epoch": 0.69, "grad_norm": 4.9631002322584035, "learning_rate": 2.3292037250795863e-06, "loss": 0.1752, "step": 24059 }, { "epoch": 0.69, "grad_norm": 7.874059075955331, "learning_rate": 2.3288116826343106e-06, "loss": 0.3222, "step": 24060 }, { "epoch": 0.69, "grad_norm": 4.026582070518839, "learning_rate": 2.3284196631689414e-06, "loss": 0.247, "step": 24061 }, { "epoch": 0.69, "grad_norm": 4.349133017071691, "learning_rate": 2.328027666686847e-06, "loss": 0.4471, "step": 24062 }, { "epoch": 0.69, "grad_norm": 2.7235170205041217, "learning_rate": 2.327635693191403e-06, "loss": 0.2355, "step": 24063 }, { "epoch": 0.69, "grad_norm": 3.881320106254045, "learning_rate": 2.3272437426859806e-06, "loss": 0.33, "step": 24064 }, { "epoch": 0.69, "grad_norm": 6.8493977588241, "learning_rate": 2.3268518151739496e-06, "loss": 0.3586, "step": 24065 }, { "epoch": 0.69, "grad_norm": 5.230457308744605, "learning_rate": 2.3264599106586843e-06, "loss": 0.422, "step": 24066 }, { "epoch": 0.69, "grad_norm": 8.288554299152244, "learning_rate": 2.326068029143554e-06, "loss": 0.9499, "step": 24067 }, { "epoch": 0.69, "grad_norm": 9.842808172795019, "learning_rate": 2.3256761706319325e-06, "loss": 0.5073, "step": 24068 }, { "epoch": 0.69, "grad_norm": 6.429035955630255, "learning_rate": 2.3252843351271905e-06, "loss": 0.4768, "step": 24069 }, { "epoch": 0.69, "grad_norm": 4.785570473940109, "learning_rate": 2.3248925226326952e-06, "loss": 0.3425, "step": 24070 }, { "epoch": 0.69, "grad_norm": 9.870546229899688, "learning_rate": 2.3245007331518226e-06, "loss": 0.304, "step": 24071 }, { "epoch": 0.69, "grad_norm": 5.464859041488445, "learning_rate": 2.324108966687939e-06, "loss": 0.7701, "step": 24072 }, { "epoch": 0.69, "grad_norm": 2.6785622060707817, "learning_rate": 2.3237172232444183e-06, "loss": 0.1271, "step": 24073 }, { "epoch": 0.69, "grad_norm": 15.211045776671476, "learning_rate": 2.3233255028246266e-06, "loss": 0.8099, "step": 24074 }, { "epoch": 0.69, "grad_norm": 11.58712546392433, "learning_rate": 2.3229338054319385e-06, "loss": 0.7041, "step": 24075 }, { "epoch": 0.69, "grad_norm": 3.733861672462216, "learning_rate": 2.3225421310697204e-06, "loss": 0.2421, "step": 24076 }, { "epoch": 0.69, "grad_norm": 4.220851007683752, "learning_rate": 2.3221504797413436e-06, "loss": 0.1868, "step": 24077 }, { "epoch": 0.69, "grad_norm": 5.263586311848051, "learning_rate": 2.321758851450174e-06, "loss": 0.4132, "step": 24078 }, { "epoch": 0.69, "grad_norm": 7.837495308590205, "learning_rate": 2.3213672461995833e-06, "loss": 0.4112, "step": 24079 }, { "epoch": 0.69, "grad_norm": 2.2861574506511544, "learning_rate": 2.3209756639929426e-06, "loss": 0.2333, "step": 24080 }, { "epoch": 0.69, "grad_norm": 3.693804152813058, "learning_rate": 2.3205841048336166e-06, "loss": 0.4321, "step": 24081 }, { "epoch": 0.69, "grad_norm": 4.942707623185917, "learning_rate": 2.320192568724977e-06, "loss": 0.1951, "step": 24082 }, { "epoch": 0.69, "grad_norm": 6.242466907191986, "learning_rate": 2.3198010556703913e-06, "loss": 0.4642, "step": 24083 }, { "epoch": 0.69, "grad_norm": 7.492801676886072, "learning_rate": 2.319409565673227e-06, "loss": 0.4806, "step": 24084 }, { "epoch": 0.69, "grad_norm": 3.472087732041652, "learning_rate": 2.3190180987368504e-06, "loss": 0.1292, "step": 24085 }, { "epoch": 0.69, "grad_norm": 6.557669251235987, "learning_rate": 2.3186266548646326e-06, "loss": 0.7571, "step": 24086 }, { "epoch": 0.69, "grad_norm": 5.40116385085959, "learning_rate": 2.318235234059938e-06, "loss": 0.6001, "step": 24087 }, { "epoch": 0.69, "grad_norm": 16.96735668609154, "learning_rate": 2.3178438363261346e-06, "loss": 0.6794, "step": 24088 }, { "epoch": 0.69, "grad_norm": 4.753186386885744, "learning_rate": 2.3174524616665934e-06, "loss": 0.3757, "step": 24089 }, { "epoch": 0.69, "grad_norm": 5.450343387180068, "learning_rate": 2.317061110084678e-06, "loss": 0.371, "step": 24090 }, { "epoch": 0.69, "grad_norm": 16.300218687170204, "learning_rate": 2.3166697815837554e-06, "loss": 0.9701, "step": 24091 }, { "epoch": 0.69, "grad_norm": 5.945117601365626, "learning_rate": 2.3162784761671903e-06, "loss": 0.6852, "step": 24092 }, { "epoch": 0.69, "grad_norm": 9.54169209794788, "learning_rate": 2.315887193838353e-06, "loss": 0.2228, "step": 24093 }, { "epoch": 0.69, "grad_norm": 4.822351899114964, "learning_rate": 2.315495934600606e-06, "loss": 0.47, "step": 24094 }, { "epoch": 0.69, "grad_norm": 7.052206167018112, "learning_rate": 2.3151046984573173e-06, "loss": 0.3511, "step": 24095 }, { "epoch": 0.69, "grad_norm": 2.4666557845528585, "learning_rate": 2.314713485411855e-06, "loss": 0.1875, "step": 24096 }, { "epoch": 0.69, "grad_norm": 5.839859876323365, "learning_rate": 2.3143222954675792e-06, "loss": 0.7676, "step": 24097 }, { "epoch": 0.69, "grad_norm": 5.876825657708095, "learning_rate": 2.31393112862786e-06, "loss": 0.2087, "step": 24098 }, { "epoch": 0.69, "grad_norm": 2.3621123394475183, "learning_rate": 2.3135399848960577e-06, "loss": 0.1425, "step": 24099 }, { "epoch": 0.69, "grad_norm": 4.510413518585916, "learning_rate": 2.313148864275543e-06, "loss": 0.4726, "step": 24100 }, { "epoch": 0.69, "grad_norm": 4.099364122067195, "learning_rate": 2.3127577667696756e-06, "loss": 0.2963, "step": 24101 }, { "epoch": 0.69, "grad_norm": 4.934079977617398, "learning_rate": 2.3123666923818238e-06, "loss": 0.4245, "step": 24102 }, { "epoch": 0.69, "grad_norm": 9.330246420546857, "learning_rate": 2.3119756411153503e-06, "loss": 0.7055, "step": 24103 }, { "epoch": 0.69, "grad_norm": 3.5980362914048487, "learning_rate": 2.3115846129736187e-06, "loss": 0.328, "step": 24104 }, { "epoch": 0.69, "grad_norm": 3.7178969807359974, "learning_rate": 2.3111936079599943e-06, "loss": 0.4902, "step": 24105 }, { "epoch": 0.69, "grad_norm": 6.659808072025302, "learning_rate": 2.310802626077839e-06, "loss": 0.3213, "step": 24106 }, { "epoch": 0.69, "grad_norm": 7.762138213297543, "learning_rate": 2.3104116673305195e-06, "loss": 0.5169, "step": 24107 }, { "epoch": 0.69, "grad_norm": 8.575560347930077, "learning_rate": 2.3100207317213953e-06, "loss": 0.6238, "step": 24108 }, { "epoch": 0.69, "grad_norm": 8.91529694550986, "learning_rate": 2.309629819253833e-06, "loss": 0.3971, "step": 24109 }, { "epoch": 0.69, "grad_norm": 3.1857606592719225, "learning_rate": 2.3092389299311945e-06, "loss": 0.3008, "step": 24110 }, { "epoch": 0.69, "grad_norm": 6.4360342857083, "learning_rate": 2.30884806375684e-06, "loss": 0.2882, "step": 24111 }, { "epoch": 0.69, "grad_norm": 37.86718674445854, "learning_rate": 2.3084572207341364e-06, "loss": 0.7154, "step": 24112 }, { "epoch": 0.69, "grad_norm": 3.7924596285698646, "learning_rate": 2.3080664008664423e-06, "loss": 0.4168, "step": 24113 }, { "epoch": 0.69, "grad_norm": 3.6569586173123905, "learning_rate": 2.307675604157123e-06, "loss": 0.3244, "step": 24114 }, { "epoch": 0.69, "grad_norm": 1.2425549064260963, "learning_rate": 2.3072848306095373e-06, "loss": 0.0472, "step": 24115 }, { "epoch": 0.69, "grad_norm": 2.62966497619013, "learning_rate": 2.3068940802270506e-06, "loss": 0.3603, "step": 24116 }, { "epoch": 0.69, "grad_norm": 5.015362670312832, "learning_rate": 2.3065033530130228e-06, "loss": 0.43, "step": 24117 }, { "epoch": 0.69, "grad_norm": 5.623509273086924, "learning_rate": 2.306112648970814e-06, "loss": 0.2855, "step": 24118 }, { "epoch": 0.69, "grad_norm": 10.756032271456299, "learning_rate": 2.3057219681037855e-06, "loss": 0.6813, "step": 24119 }, { "epoch": 0.69, "grad_norm": 5.214475331526007, "learning_rate": 2.305331310415299e-06, "loss": 0.5637, "step": 24120 }, { "epoch": 0.69, "grad_norm": 4.146450996713792, "learning_rate": 2.3049406759087173e-06, "loss": 0.3706, "step": 24121 }, { "epoch": 0.69, "grad_norm": 5.6226131249391, "learning_rate": 2.304550064587397e-06, "loss": 0.3337, "step": 24122 }, { "epoch": 0.69, "grad_norm": 5.025415911946315, "learning_rate": 2.304159476454705e-06, "loss": 0.3561, "step": 24123 }, { "epoch": 0.69, "grad_norm": 3.8292585913271684, "learning_rate": 2.3037689115139926e-06, "loss": 0.1501, "step": 24124 }, { "epoch": 0.69, "grad_norm": 4.298483654954466, "learning_rate": 2.303378369768626e-06, "loss": 0.2043, "step": 24125 }, { "epoch": 0.69, "grad_norm": 4.698456535135877, "learning_rate": 2.3029878512219623e-06, "loss": 0.341, "step": 24126 }, { "epoch": 0.69, "grad_norm": 3.102399679432469, "learning_rate": 2.3025973558773635e-06, "loss": 0.3818, "step": 24127 }, { "epoch": 0.69, "grad_norm": 4.380738445896977, "learning_rate": 2.3022068837381857e-06, "loss": 0.4284, "step": 24128 }, { "epoch": 0.69, "grad_norm": 6.0800754585100965, "learning_rate": 2.3018164348077914e-06, "loss": 0.6121, "step": 24129 }, { "epoch": 0.69, "grad_norm": 2.7669932649962665, "learning_rate": 2.3014260090895385e-06, "loss": 0.2609, "step": 24130 }, { "epoch": 0.69, "grad_norm": 9.762856443494357, "learning_rate": 2.301035606586783e-06, "loss": 0.5736, "step": 24131 }, { "epoch": 0.69, "grad_norm": 1.695652378309784, "learning_rate": 2.300645227302889e-06, "loss": 0.1433, "step": 24132 }, { "epoch": 0.69, "grad_norm": 2.2132171430369048, "learning_rate": 2.300254871241209e-06, "loss": 0.2819, "step": 24133 }, { "epoch": 0.69, "grad_norm": 5.683848581795186, "learning_rate": 2.299864538405106e-06, "loss": 0.7311, "step": 24134 }, { "epoch": 0.69, "grad_norm": 5.439206655729462, "learning_rate": 2.2994742287979337e-06, "loss": 0.2332, "step": 24135 }, { "epoch": 0.69, "grad_norm": 3.392175240935986, "learning_rate": 2.299083942423054e-06, "loss": 0.3751, "step": 24136 }, { "epoch": 0.69, "grad_norm": 6.52572573429599, "learning_rate": 2.298693679283823e-06, "loss": 0.723, "step": 24137 }, { "epoch": 0.69, "grad_norm": 9.43938987958587, "learning_rate": 2.2983034393835958e-06, "loss": 0.8453, "step": 24138 }, { "epoch": 0.69, "grad_norm": 5.146765661786414, "learning_rate": 2.2979132227257335e-06, "loss": 0.4842, "step": 24139 }, { "epoch": 0.69, "grad_norm": 4.3075185283189334, "learning_rate": 2.2975230293135886e-06, "loss": 0.2573, "step": 24140 }, { "epoch": 0.69, "grad_norm": 4.78775071167414, "learning_rate": 2.2971328591505227e-06, "loss": 0.2019, "step": 24141 }, { "epoch": 0.69, "grad_norm": 6.9088722877729705, "learning_rate": 2.2967427122398877e-06, "loss": 0.2828, "step": 24142 }, { "epoch": 0.69, "grad_norm": 5.156073364931464, "learning_rate": 2.2963525885850445e-06, "loss": 0.2305, "step": 24143 }, { "epoch": 0.69, "grad_norm": 4.208709127184062, "learning_rate": 2.2959624881893466e-06, "loss": 0.3961, "step": 24144 }, { "epoch": 0.69, "grad_norm": 4.13372469607932, "learning_rate": 2.2955724110561495e-06, "loss": 0.1239, "step": 24145 }, { "epoch": 0.69, "grad_norm": 6.9294502707971315, "learning_rate": 2.295182357188811e-06, "loss": 0.2559, "step": 24146 }, { "epoch": 0.69, "grad_norm": 7.706737483710099, "learning_rate": 2.2947923265906847e-06, "loss": 0.5795, "step": 24147 }, { "epoch": 0.69, "grad_norm": 7.958136218227927, "learning_rate": 2.2944023192651277e-06, "loss": 0.4023, "step": 24148 }, { "epoch": 0.69, "grad_norm": 6.651952296335428, "learning_rate": 2.2940123352154936e-06, "loss": 0.7671, "step": 24149 }, { "epoch": 0.69, "grad_norm": 6.267703541576382, "learning_rate": 2.293622374445139e-06, "loss": 0.4176, "step": 24150 }, { "epoch": 0.69, "grad_norm": 3.148925194464445, "learning_rate": 2.293232436957419e-06, "loss": 0.3097, "step": 24151 }, { "epoch": 0.69, "grad_norm": 2.285079380927008, "learning_rate": 2.2928425227556844e-06, "loss": 0.1753, "step": 24152 }, { "epoch": 0.69, "grad_norm": 6.356170184179134, "learning_rate": 2.2924526318432944e-06, "loss": 0.5608, "step": 24153 }, { "epoch": 0.69, "grad_norm": 6.610088120535359, "learning_rate": 2.2920627642235994e-06, "loss": 0.1676, "step": 24154 }, { "epoch": 0.69, "grad_norm": 7.079881664838937, "learning_rate": 2.2916729198999566e-06, "loss": 0.6727, "step": 24155 }, { "epoch": 0.69, "grad_norm": 3.985100773550652, "learning_rate": 2.2912830988757184e-06, "loss": 0.3857, "step": 24156 }, { "epoch": 0.69, "grad_norm": 7.306285728191019, "learning_rate": 2.2908933011542385e-06, "loss": 0.8292, "step": 24157 }, { "epoch": 0.69, "grad_norm": 5.643909100627151, "learning_rate": 2.2905035267388675e-06, "loss": 0.5507, "step": 24158 }, { "epoch": 0.69, "grad_norm": 5.183228515084431, "learning_rate": 2.290113775632963e-06, "loss": 0.5345, "step": 24159 }, { "epoch": 0.69, "grad_norm": 4.240171568206401, "learning_rate": 2.289724047839874e-06, "loss": 0.2539, "step": 24160 }, { "epoch": 0.69, "grad_norm": 4.25562113794921, "learning_rate": 2.289334343362956e-06, "loss": 0.1845, "step": 24161 }, { "epoch": 0.69, "grad_norm": 4.3932465027393794, "learning_rate": 2.288944662205562e-06, "loss": 0.4588, "step": 24162 }, { "epoch": 0.69, "grad_norm": 5.523930377902656, "learning_rate": 2.2885550043710443e-06, "loss": 0.0971, "step": 24163 }, { "epoch": 0.69, "grad_norm": 3.392394487487287, "learning_rate": 2.288165369862753e-06, "loss": 0.2907, "step": 24164 }, { "epoch": 0.69, "grad_norm": 3.8549591315115634, "learning_rate": 2.28777575868404e-06, "loss": 0.5502, "step": 24165 }, { "epoch": 0.69, "grad_norm": 5.115774504120374, "learning_rate": 2.28738617083826e-06, "loss": 0.338, "step": 24166 }, { "epoch": 0.69, "grad_norm": 3.7065373725833792, "learning_rate": 2.2869966063287606e-06, "loss": 0.4622, "step": 24167 }, { "epoch": 0.69, "grad_norm": 5.997984150167589, "learning_rate": 2.2866070651588974e-06, "loss": 0.2142, "step": 24168 }, { "epoch": 0.69, "grad_norm": 5.026118131329337, "learning_rate": 2.2862175473320175e-06, "loss": 0.6836, "step": 24169 }, { "epoch": 0.69, "grad_norm": 6.728804264604512, "learning_rate": 2.285828052851476e-06, "loss": 0.5035, "step": 24170 }, { "epoch": 0.69, "grad_norm": 7.007025207727885, "learning_rate": 2.2854385817206216e-06, "loss": 0.356, "step": 24171 }, { "epoch": 0.69, "grad_norm": 6.479600624033766, "learning_rate": 2.2850491339428027e-06, "loss": 0.7289, "step": 24172 }, { "epoch": 0.69, "grad_norm": 4.840669329091187, "learning_rate": 2.2846597095213735e-06, "loss": 0.622, "step": 24173 }, { "epoch": 0.69, "grad_norm": 2.530108865725201, "learning_rate": 2.284270308459681e-06, "loss": 0.2265, "step": 24174 }, { "epoch": 0.69, "grad_norm": 5.971484072359524, "learning_rate": 2.283880930761079e-06, "loss": 0.5167, "step": 24175 }, { "epoch": 0.69, "grad_norm": 6.480692799232195, "learning_rate": 2.2834915764289123e-06, "loss": 0.4594, "step": 24176 }, { "epoch": 0.69, "grad_norm": 8.206629413279504, "learning_rate": 2.2831022454665354e-06, "loss": 0.4442, "step": 24177 }, { "epoch": 0.69, "grad_norm": 3.791912682205972, "learning_rate": 2.282712937877295e-06, "loss": 0.3237, "step": 24178 }, { "epoch": 0.69, "grad_norm": 2.7190711829228023, "learning_rate": 2.2823236536645387e-06, "loss": 0.2832, "step": 24179 }, { "epoch": 0.69, "grad_norm": 14.570166865789432, "learning_rate": 2.2819343928316203e-06, "loss": 0.4899, "step": 24180 }, { "epoch": 0.69, "grad_norm": 3.988259430320302, "learning_rate": 2.2815451553818824e-06, "loss": 0.3508, "step": 24181 }, { "epoch": 0.69, "grad_norm": 5.570671594288571, "learning_rate": 2.2811559413186796e-06, "loss": 0.4986, "step": 24182 }, { "epoch": 0.69, "grad_norm": 7.471715234749939, "learning_rate": 2.2807667506453575e-06, "loss": 0.445, "step": 24183 }, { "epoch": 0.69, "grad_norm": 6.079299068076734, "learning_rate": 2.280377583365264e-06, "loss": 0.3959, "step": 24184 }, { "epoch": 0.69, "grad_norm": 3.624073238950625, "learning_rate": 2.279988439481745e-06, "loss": 0.2521, "step": 24185 }, { "epoch": 0.69, "grad_norm": 3.9467785066118926, "learning_rate": 2.279599318998151e-06, "loss": 0.4608, "step": 24186 }, { "epoch": 0.69, "grad_norm": 7.2039973130038435, "learning_rate": 2.2792102219178308e-06, "loss": 0.5561, "step": 24187 }, { "epoch": 0.69, "grad_norm": 7.162882700444164, "learning_rate": 2.2788211482441286e-06, "loss": 0.1464, "step": 24188 }, { "epoch": 0.69, "grad_norm": 4.825080726620239, "learning_rate": 2.2784320979803944e-06, "loss": 0.0735, "step": 24189 }, { "epoch": 0.69, "grad_norm": 4.782274323486486, "learning_rate": 2.278043071129974e-06, "loss": 0.5218, "step": 24190 }, { "epoch": 0.69, "grad_norm": 6.00282304630905, "learning_rate": 2.277654067696214e-06, "loss": 0.5885, "step": 24191 }, { "epoch": 0.69, "grad_norm": 4.495887625681399, "learning_rate": 2.2772650876824586e-06, "loss": 0.2028, "step": 24192 }, { "epoch": 0.69, "grad_norm": 5.926421058630449, "learning_rate": 2.276876131092059e-06, "loss": 0.8338, "step": 24193 }, { "epoch": 0.69, "grad_norm": 6.1283153202863145, "learning_rate": 2.2764871979283563e-06, "loss": 0.4961, "step": 24194 }, { "epoch": 0.69, "grad_norm": 4.053262919694835, "learning_rate": 2.2760982881946992e-06, "loss": 0.549, "step": 24195 }, { "epoch": 0.69, "grad_norm": 4.51927808153076, "learning_rate": 2.275709401894435e-06, "loss": 0.4577, "step": 24196 }, { "epoch": 0.69, "grad_norm": 9.713416104702594, "learning_rate": 2.2753205390309075e-06, "loss": 0.3907, "step": 24197 }, { "epoch": 0.69, "grad_norm": 6.132931808963375, "learning_rate": 2.2749316996074615e-06, "loss": 0.5746, "step": 24198 }, { "epoch": 0.69, "grad_norm": 5.5665864828277645, "learning_rate": 2.2745428836274407e-06, "loss": 0.3994, "step": 24199 }, { "epoch": 0.69, "grad_norm": 7.853249409453316, "learning_rate": 2.2741540910941945e-06, "loss": 0.2081, "step": 24200 }, { "epoch": 0.69, "grad_norm": 5.064975604180257, "learning_rate": 2.2737653220110626e-06, "loss": 0.4799, "step": 24201 }, { "epoch": 0.69, "grad_norm": 4.407024325652922, "learning_rate": 2.273376576381392e-06, "loss": 0.0868, "step": 24202 }, { "epoch": 0.69, "grad_norm": 7.721651466311304, "learning_rate": 2.272987854208529e-06, "loss": 0.4737, "step": 24203 }, { "epoch": 0.69, "grad_norm": 5.862095275117667, "learning_rate": 2.2725991554958155e-06, "loss": 0.5184, "step": 24204 }, { "epoch": 0.69, "grad_norm": 12.374025672549323, "learning_rate": 2.272210480246596e-06, "loss": 0.6368, "step": 24205 }, { "epoch": 0.69, "grad_norm": 9.692901729654276, "learning_rate": 2.2718218284642117e-06, "loss": 0.294, "step": 24206 }, { "epoch": 0.69, "grad_norm": 4.029998910444695, "learning_rate": 2.2714332001520108e-06, "loss": 0.5145, "step": 24207 }, { "epoch": 0.69, "grad_norm": 4.794734962067854, "learning_rate": 2.2710445953133315e-06, "loss": 0.6191, "step": 24208 }, { "epoch": 0.69, "grad_norm": 5.9664675557712075, "learning_rate": 2.2706560139515214e-06, "loss": 0.2449, "step": 24209 }, { "epoch": 0.69, "grad_norm": 7.776036396437893, "learning_rate": 2.2702674560699202e-06, "loss": 0.5402, "step": 24210 }, { "epoch": 0.69, "grad_norm": 4.869898107116817, "learning_rate": 2.2698789216718735e-06, "loss": 0.5924, "step": 24211 }, { "epoch": 0.69, "grad_norm": 7.227833371148567, "learning_rate": 2.2694904107607226e-06, "loss": 0.5294, "step": 24212 }, { "epoch": 0.69, "grad_norm": 5.13855495341612, "learning_rate": 2.2691019233398075e-06, "loss": 0.5689, "step": 24213 }, { "epoch": 0.69, "grad_norm": 5.595144513593079, "learning_rate": 2.2687134594124743e-06, "loss": 0.4342, "step": 24214 }, { "epoch": 0.69, "grad_norm": 6.63025093714193, "learning_rate": 2.268325018982061e-06, "loss": 0.529, "step": 24215 }, { "epoch": 0.69, "grad_norm": 5.031075516038643, "learning_rate": 2.267936602051913e-06, "loss": 0.2303, "step": 24216 }, { "epoch": 0.69, "grad_norm": 3.6300649445874393, "learning_rate": 2.2675482086253698e-06, "loss": 0.328, "step": 24217 }, { "epoch": 0.69, "grad_norm": 6.203425633138232, "learning_rate": 2.2671598387057714e-06, "loss": 0.4267, "step": 24218 }, { "epoch": 0.69, "grad_norm": 4.883675412032451, "learning_rate": 2.2667714922964624e-06, "loss": 0.5833, "step": 24219 }, { "epoch": 0.69, "grad_norm": 7.52260836174892, "learning_rate": 2.26638316940078e-06, "loss": 0.473, "step": 24220 }, { "epoch": 0.69, "grad_norm": 11.136477432660397, "learning_rate": 2.265994870022068e-06, "loss": 0.8479, "step": 24221 }, { "epoch": 0.69, "grad_norm": 7.148588151855571, "learning_rate": 2.2656065941636637e-06, "loss": 0.4894, "step": 24222 }, { "epoch": 0.69, "grad_norm": 10.412732828566497, "learning_rate": 2.265218341828911e-06, "loss": 1.0113, "step": 24223 }, { "epoch": 0.69, "grad_norm": 3.1827339555939576, "learning_rate": 2.2648301130211485e-06, "loss": 0.1457, "step": 24224 }, { "epoch": 0.69, "grad_norm": 3.916461628085374, "learning_rate": 2.2644419077437156e-06, "loss": 0.1078, "step": 24225 }, { "epoch": 0.69, "grad_norm": 16.646007143847072, "learning_rate": 2.26405372599995e-06, "loss": 0.506, "step": 24226 }, { "epoch": 0.69, "grad_norm": 5.6305811003147515, "learning_rate": 2.263665567793194e-06, "loss": 0.3216, "step": 24227 }, { "epoch": 0.69, "grad_norm": 5.746767752695777, "learning_rate": 2.2632774331267875e-06, "loss": 0.4482, "step": 24228 }, { "epoch": 0.69, "grad_norm": 12.962433400438249, "learning_rate": 2.262889322004067e-06, "loss": 0.5651, "step": 24229 }, { "epoch": 0.69, "grad_norm": 6.352590104762272, "learning_rate": 2.262501234428374e-06, "loss": 0.7317, "step": 24230 }, { "epoch": 0.69, "grad_norm": 2.712753270450386, "learning_rate": 2.2621131704030463e-06, "loss": 0.2562, "step": 24231 }, { "epoch": 0.69, "grad_norm": 6.913346868861653, "learning_rate": 2.2617251299314214e-06, "loss": 0.4042, "step": 24232 }, { "epoch": 0.69, "grad_norm": 5.836614280994265, "learning_rate": 2.2613371130168366e-06, "loss": 0.2287, "step": 24233 }, { "epoch": 0.69, "grad_norm": 5.117408433177916, "learning_rate": 2.2609491196626333e-06, "loss": 0.4749, "step": 24234 }, { "epoch": 0.69, "grad_norm": 7.072928670686657, "learning_rate": 2.2605611498721454e-06, "loss": 0.4337, "step": 24235 }, { "epoch": 0.69, "grad_norm": 5.308851908062016, "learning_rate": 2.2601732036487123e-06, "loss": 0.4282, "step": 24236 }, { "epoch": 0.69, "grad_norm": 4.63258959537911, "learning_rate": 2.2597852809956744e-06, "loss": 0.3838, "step": 24237 }, { "epoch": 0.69, "grad_norm": 4.475514473990675, "learning_rate": 2.2593973819163656e-06, "loss": 0.4167, "step": 24238 }, { "epoch": 0.69, "grad_norm": 3.86683933733015, "learning_rate": 2.2590095064141244e-06, "loss": 0.6162, "step": 24239 }, { "epoch": 0.69, "grad_norm": 5.457711943959531, "learning_rate": 2.2586216544922844e-06, "loss": 0.8337, "step": 24240 }, { "epoch": 0.69, "grad_norm": 4.874417196796234, "learning_rate": 2.258233826154187e-06, "loss": 0.3528, "step": 24241 }, { "epoch": 0.69, "grad_norm": 6.47814274227438, "learning_rate": 2.257846021403165e-06, "loss": 0.5322, "step": 24242 }, { "epoch": 0.69, "grad_norm": 6.146884048214775, "learning_rate": 2.257458240242557e-06, "loss": 0.4706, "step": 24243 }, { "epoch": 0.69, "grad_norm": 2.804868230070574, "learning_rate": 2.2570704826756988e-06, "loss": 0.3354, "step": 24244 }, { "epoch": 0.69, "grad_norm": 5.884178823643906, "learning_rate": 2.256682748705923e-06, "loss": 0.6253, "step": 24245 }, { "epoch": 0.69, "grad_norm": 5.705129443613333, "learning_rate": 2.2562950383365706e-06, "loss": 0.269, "step": 24246 }, { "epoch": 0.69, "grad_norm": 6.737984913377412, "learning_rate": 2.2559073515709717e-06, "loss": 0.3193, "step": 24247 }, { "epoch": 0.69, "grad_norm": 5.526497366976803, "learning_rate": 2.2555196884124658e-06, "loss": 0.3341, "step": 24248 }, { "epoch": 0.69, "grad_norm": 6.444024899499147, "learning_rate": 2.255132048864384e-06, "loss": 0.8685, "step": 24249 }, { "epoch": 0.69, "grad_norm": 5.2058764448477834, "learning_rate": 2.2547444329300655e-06, "loss": 0.7266, "step": 24250 }, { "epoch": 0.69, "grad_norm": 11.302957377352316, "learning_rate": 2.2543568406128424e-06, "loss": 0.6036, "step": 24251 }, { "epoch": 0.69, "grad_norm": 9.266756397151353, "learning_rate": 2.253969271916048e-06, "loss": 0.8821, "step": 24252 }, { "epoch": 0.69, "grad_norm": 5.625387983716873, "learning_rate": 2.253581726843019e-06, "loss": 0.5168, "step": 24253 }, { "epoch": 0.69, "grad_norm": 8.540969398385652, "learning_rate": 2.253194205397087e-06, "loss": 0.5172, "step": 24254 }, { "epoch": 0.69, "grad_norm": 4.504348455260616, "learning_rate": 2.2528067075815886e-06, "loss": 0.4364, "step": 24255 }, { "epoch": 0.69, "grad_norm": 6.405678179271994, "learning_rate": 2.252419233399854e-06, "loss": 0.7571, "step": 24256 }, { "epoch": 0.69, "grad_norm": 3.9181891823369313, "learning_rate": 2.252031782855221e-06, "loss": 0.2573, "step": 24257 }, { "epoch": 0.69, "grad_norm": 4.9691341899395045, "learning_rate": 2.251644355951019e-06, "loss": 0.3441, "step": 24258 }, { "epoch": 0.69, "grad_norm": 5.734720037822329, "learning_rate": 2.251256952690581e-06, "loss": 0.5972, "step": 24259 }, { "epoch": 0.69, "grad_norm": 3.379139428003472, "learning_rate": 2.250869573077243e-06, "loss": 0.198, "step": 24260 }, { "epoch": 0.69, "grad_norm": 4.691251296407356, "learning_rate": 2.250482217114333e-06, "loss": 0.4555, "step": 24261 }, { "epoch": 0.69, "grad_norm": 3.1567249129647332, "learning_rate": 2.250094884805189e-06, "loss": 0.4374, "step": 24262 }, { "epoch": 0.69, "grad_norm": 6.298559526734379, "learning_rate": 2.2497075761531374e-06, "loss": 0.4999, "step": 24263 }, { "epoch": 0.69, "grad_norm": 6.803033393379782, "learning_rate": 2.2493202911615146e-06, "loss": 0.2728, "step": 24264 }, { "epoch": 0.69, "grad_norm": 3.405790577974727, "learning_rate": 2.2489330298336516e-06, "loss": 0.225, "step": 24265 }, { "epoch": 0.69, "grad_norm": 6.328176936183721, "learning_rate": 2.248545792172878e-06, "loss": 0.2862, "step": 24266 }, { "epoch": 0.69, "grad_norm": 5.9558682666631375, "learning_rate": 2.248158578182525e-06, "loss": 0.4784, "step": 24267 }, { "epoch": 0.69, "grad_norm": 5.392820314842796, "learning_rate": 2.2477713878659247e-06, "loss": 0.8228, "step": 24268 }, { "epoch": 0.7, "grad_norm": 5.259826909107327, "learning_rate": 2.2473842212264103e-06, "loss": 0.2694, "step": 24269 }, { "epoch": 0.7, "grad_norm": 4.702557019380316, "learning_rate": 2.2469970782673084e-06, "loss": 0.3005, "step": 24270 }, { "epoch": 0.7, "grad_norm": 2.155696632033448, "learning_rate": 2.2466099589919553e-06, "loss": 0.1074, "step": 24271 }, { "epoch": 0.7, "grad_norm": 2.8429647975557657, "learning_rate": 2.246222863403675e-06, "loss": 0.1308, "step": 24272 }, { "epoch": 0.7, "grad_norm": 3.8659816827177296, "learning_rate": 2.245835791505801e-06, "loss": 0.2627, "step": 24273 }, { "epoch": 0.7, "grad_norm": 5.86322692398633, "learning_rate": 2.2454487433016615e-06, "loss": 1.0156, "step": 24274 }, { "epoch": 0.7, "grad_norm": 4.381579152475469, "learning_rate": 2.2450617187945882e-06, "loss": 0.1787, "step": 24275 }, { "epoch": 0.7, "grad_norm": 3.265676014332912, "learning_rate": 2.2446747179879085e-06, "loss": 0.2239, "step": 24276 }, { "epoch": 0.7, "grad_norm": 3.6471948896863866, "learning_rate": 2.244287740884955e-06, "loss": 0.1351, "step": 24277 }, { "epoch": 0.7, "grad_norm": 9.88409569894736, "learning_rate": 2.2439007874890546e-06, "loss": 0.5128, "step": 24278 }, { "epoch": 0.7, "grad_norm": 6.742081058939447, "learning_rate": 2.243513857803535e-06, "loss": 0.6839, "step": 24279 }, { "epoch": 0.7, "grad_norm": 7.15309316645547, "learning_rate": 2.2431269518317273e-06, "loss": 0.4586, "step": 24280 }, { "epoch": 0.7, "grad_norm": 8.21840234753666, "learning_rate": 2.2427400695769575e-06, "loss": 0.9225, "step": 24281 }, { "epoch": 0.7, "grad_norm": 6.028001612166282, "learning_rate": 2.242353211042557e-06, "loss": 0.3722, "step": 24282 }, { "epoch": 0.7, "grad_norm": 5.319375111012127, "learning_rate": 2.241966376231851e-06, "loss": 0.4055, "step": 24283 }, { "epoch": 0.7, "grad_norm": 5.845911534013595, "learning_rate": 2.2415795651481704e-06, "loss": 0.1461, "step": 24284 }, { "epoch": 0.7, "grad_norm": 4.858188803285838, "learning_rate": 2.241192777794841e-06, "loss": 0.453, "step": 24285 }, { "epoch": 0.7, "grad_norm": 5.969090636754074, "learning_rate": 2.2408060141751887e-06, "loss": 0.6281, "step": 24286 }, { "epoch": 0.7, "grad_norm": 7.987978187538895, "learning_rate": 2.2404192742925444e-06, "loss": 0.6694, "step": 24287 }, { "epoch": 0.7, "grad_norm": 3.6708195934875643, "learning_rate": 2.2400325581502313e-06, "loss": 0.1031, "step": 24288 }, { "epoch": 0.7, "grad_norm": 4.908146679906983, "learning_rate": 2.2396458657515803e-06, "loss": 0.417, "step": 24289 }, { "epoch": 0.7, "grad_norm": 9.159512352019316, "learning_rate": 2.239259197099914e-06, "loss": 0.6083, "step": 24290 }, { "epoch": 0.7, "grad_norm": 4.149755868280314, "learning_rate": 2.2388725521985634e-06, "loss": 0.6287, "step": 24291 }, { "epoch": 0.7, "grad_norm": 5.657006650443684, "learning_rate": 2.238485931050851e-06, "loss": 0.7307, "step": 24292 }, { "epoch": 0.7, "grad_norm": 4.386517921803352, "learning_rate": 2.2380993336601036e-06, "loss": 0.6709, "step": 24293 }, { "epoch": 0.7, "grad_norm": 5.957080318061233, "learning_rate": 2.237712760029649e-06, "loss": 0.4513, "step": 24294 }, { "epoch": 0.7, "grad_norm": 4.530712161536718, "learning_rate": 2.237326210162809e-06, "loss": 0.493, "step": 24295 }, { "epoch": 0.7, "grad_norm": 3.655002609439863, "learning_rate": 2.236939684062914e-06, "loss": 0.5642, "step": 24296 }, { "epoch": 0.7, "grad_norm": 12.40082079570403, "learning_rate": 2.2365531817332847e-06, "loss": 0.7176, "step": 24297 }, { "epoch": 0.7, "grad_norm": 3.9639760237641357, "learning_rate": 2.2361667031772517e-06, "loss": 0.3757, "step": 24298 }, { "epoch": 0.7, "grad_norm": 4.264259804605628, "learning_rate": 2.2357802483981328e-06, "loss": 0.3247, "step": 24299 }, { "epoch": 0.7, "grad_norm": 7.929675082492971, "learning_rate": 2.2353938173992555e-06, "loss": 0.7311, "step": 24300 }, { "epoch": 0.7, "grad_norm": 5.39145762329655, "learning_rate": 2.235007410183947e-06, "loss": 0.4719, "step": 24301 }, { "epoch": 0.7, "grad_norm": 8.534201757627516, "learning_rate": 2.234621026755528e-06, "loss": 0.4446, "step": 24302 }, { "epoch": 0.7, "grad_norm": 4.691917067644453, "learning_rate": 2.234234667117325e-06, "loss": 0.409, "step": 24303 }, { "epoch": 0.7, "grad_norm": 3.4778041551092063, "learning_rate": 2.2338483312726605e-06, "loss": 0.3277, "step": 24304 }, { "epoch": 0.7, "grad_norm": 5.302501424657045, "learning_rate": 2.233462019224859e-06, "loss": 0.4959, "step": 24305 }, { "epoch": 0.7, "grad_norm": 7.41029614370289, "learning_rate": 2.2330757309772407e-06, "loss": 0.5897, "step": 24306 }, { "epoch": 0.7, "grad_norm": 5.4460437124993755, "learning_rate": 2.2326894665331335e-06, "loss": 0.33, "step": 24307 }, { "epoch": 0.7, "grad_norm": 4.791685098806119, "learning_rate": 2.2323032258958554e-06, "loss": 0.5831, "step": 24308 }, { "epoch": 0.7, "grad_norm": 7.140918082935814, "learning_rate": 2.2319170090687324e-06, "loss": 0.5041, "step": 24309 }, { "epoch": 0.7, "grad_norm": 8.04252539547986, "learning_rate": 2.2315308160550886e-06, "loss": 0.9138, "step": 24310 }, { "epoch": 0.7, "grad_norm": 3.329981648678088, "learning_rate": 2.2311446468582444e-06, "loss": 0.2766, "step": 24311 }, { "epoch": 0.7, "grad_norm": 5.624414286318882, "learning_rate": 2.2307585014815213e-06, "loss": 0.5967, "step": 24312 }, { "epoch": 0.7, "grad_norm": 6.8669725195658575, "learning_rate": 2.2303723799282407e-06, "loss": 0.3028, "step": 24313 }, { "epoch": 0.7, "grad_norm": 5.221964411537928, "learning_rate": 2.2299862822017267e-06, "loss": 0.5163, "step": 24314 }, { "epoch": 0.7, "grad_norm": 4.808839557727014, "learning_rate": 2.229600208305298e-06, "loss": 0.3707, "step": 24315 }, { "epoch": 0.7, "grad_norm": 5.174540913070575, "learning_rate": 2.22921415824228e-06, "loss": 0.3643, "step": 24316 }, { "epoch": 0.7, "grad_norm": 3.4476408065356656, "learning_rate": 2.2288281320159887e-06, "loss": 0.2898, "step": 24317 }, { "epoch": 0.7, "grad_norm": 6.078833001880307, "learning_rate": 2.2284421296297503e-06, "loss": 0.5239, "step": 24318 }, { "epoch": 0.7, "grad_norm": 6.060573861802427, "learning_rate": 2.2280561510868833e-06, "loss": 0.5553, "step": 24319 }, { "epoch": 0.7, "grad_norm": 11.225910080207585, "learning_rate": 2.227670196390705e-06, "loss": 0.8754, "step": 24320 }, { "epoch": 0.7, "grad_norm": 3.764324422664686, "learning_rate": 2.2272842655445416e-06, "loss": 0.2782, "step": 24321 }, { "epoch": 0.7, "grad_norm": 3.1099249961983597, "learning_rate": 2.2268983585517077e-06, "loss": 0.1278, "step": 24322 }, { "epoch": 0.7, "grad_norm": 6.023560954410828, "learning_rate": 2.2265124754155275e-06, "loss": 0.5211, "step": 24323 }, { "epoch": 0.7, "grad_norm": 4.795320439137897, "learning_rate": 2.226126616139318e-06, "loss": 0.4989, "step": 24324 }, { "epoch": 0.7, "grad_norm": 3.7061648868868105, "learning_rate": 2.225740780726401e-06, "loss": 0.4227, "step": 24325 }, { "epoch": 0.7, "grad_norm": 4.8001182104495665, "learning_rate": 2.225354969180095e-06, "loss": 0.5353, "step": 24326 }, { "epoch": 0.7, "grad_norm": 6.767460069823935, "learning_rate": 2.2249691815037155e-06, "loss": 0.2664, "step": 24327 }, { "epoch": 0.7, "grad_norm": 7.493744593881625, "learning_rate": 2.2245834177005874e-06, "loss": 0.5043, "step": 24328 }, { "epoch": 0.7, "grad_norm": 5.398143754798221, "learning_rate": 2.2241976777740244e-06, "loss": 0.2658, "step": 24329 }, { "epoch": 0.7, "grad_norm": 3.271498003119935, "learning_rate": 2.2238119617273483e-06, "loss": 0.457, "step": 24330 }, { "epoch": 0.7, "grad_norm": 1.964955806380811, "learning_rate": 2.223426269563877e-06, "loss": 0.1519, "step": 24331 }, { "epoch": 0.7, "grad_norm": 2.2141510016306314, "learning_rate": 2.2230406012869273e-06, "loss": 0.1753, "step": 24332 }, { "epoch": 0.7, "grad_norm": 4.821213055390725, "learning_rate": 2.222654956899815e-06, "loss": 0.4795, "step": 24333 }, { "epoch": 0.7, "grad_norm": 6.57747424026021, "learning_rate": 2.2222693364058605e-06, "loss": 0.785, "step": 24334 }, { "epoch": 0.7, "grad_norm": 3.473399594435166, "learning_rate": 2.221883739808383e-06, "loss": 0.3045, "step": 24335 }, { "epoch": 0.7, "grad_norm": 3.532881047808283, "learning_rate": 2.2214981671106954e-06, "loss": 0.2884, "step": 24336 }, { "epoch": 0.7, "grad_norm": 2.093304985352033, "learning_rate": 2.221112618316119e-06, "loss": 0.271, "step": 24337 }, { "epoch": 0.7, "grad_norm": 5.900975309191327, "learning_rate": 2.220727093427969e-06, "loss": 0.2958, "step": 24338 }, { "epoch": 0.7, "grad_norm": 4.379848423103043, "learning_rate": 2.2203415924495613e-06, "loss": 0.2347, "step": 24339 }, { "epoch": 0.7, "grad_norm": 3.8535845480167397, "learning_rate": 2.219956115384211e-06, "loss": 0.3965, "step": 24340 }, { "epoch": 0.7, "grad_norm": 7.6226181312382835, "learning_rate": 2.2195706622352354e-06, "loss": 0.406, "step": 24341 }, { "epoch": 0.7, "grad_norm": 6.585560562732305, "learning_rate": 2.2191852330059533e-06, "loss": 0.5206, "step": 24342 }, { "epoch": 0.7, "grad_norm": 8.494689825411518, "learning_rate": 2.2187998276996765e-06, "loss": 0.4178, "step": 24343 }, { "epoch": 0.7, "grad_norm": 4.454868296657657, "learning_rate": 2.218414446319724e-06, "loss": 0.4252, "step": 24344 }, { "epoch": 0.7, "grad_norm": 3.6772974112261236, "learning_rate": 2.218029088869409e-06, "loss": 0.4197, "step": 24345 }, { "epoch": 0.7, "grad_norm": 2.0441171106421745, "learning_rate": 2.217643755352048e-06, "loss": 0.0511, "step": 24346 }, { "epoch": 0.7, "grad_norm": 2.999743629468513, "learning_rate": 2.2172584457709527e-06, "loss": 0.3359, "step": 24347 }, { "epoch": 0.7, "grad_norm": 12.387321426228482, "learning_rate": 2.216873160129443e-06, "loss": 0.3479, "step": 24348 }, { "epoch": 0.7, "grad_norm": 3.2828809227183533, "learning_rate": 2.2164878984308287e-06, "loss": 0.3375, "step": 24349 }, { "epoch": 0.7, "grad_norm": 8.374960771155648, "learning_rate": 2.2161026606784266e-06, "loss": 0.6479, "step": 24350 }, { "epoch": 0.7, "grad_norm": 8.598732575461158, "learning_rate": 2.2157174468755515e-06, "loss": 0.3006, "step": 24351 }, { "epoch": 0.7, "grad_norm": 4.490460058699173, "learning_rate": 2.215332257025517e-06, "loss": 0.1961, "step": 24352 }, { "epoch": 0.7, "grad_norm": 6.560856640638384, "learning_rate": 2.214947091131636e-06, "loss": 0.8407, "step": 24353 }, { "epoch": 0.7, "grad_norm": 6.766407276061433, "learning_rate": 2.214561949197221e-06, "loss": 0.4306, "step": 24354 }, { "epoch": 0.7, "grad_norm": 3.863698907845729, "learning_rate": 2.214176831225588e-06, "loss": 0.3523, "step": 24355 }, { "epoch": 0.7, "grad_norm": 4.30965571661407, "learning_rate": 2.2137917372200467e-06, "loss": 0.4062, "step": 24356 }, { "epoch": 0.7, "grad_norm": 4.906097919998479, "learning_rate": 2.2134066671839137e-06, "loss": 0.2852, "step": 24357 }, { "epoch": 0.7, "grad_norm": 7.43278094051029, "learning_rate": 2.213021621120501e-06, "loss": 0.7637, "step": 24358 }, { "epoch": 0.7, "grad_norm": 4.987471596199555, "learning_rate": 2.2126365990331174e-06, "loss": 0.413, "step": 24359 }, { "epoch": 0.7, "grad_norm": 3.4038958595087525, "learning_rate": 2.21225160092508e-06, "loss": 0.3107, "step": 24360 }, { "epoch": 0.7, "grad_norm": 4.108375877497966, "learning_rate": 2.2118666267996976e-06, "loss": 0.2377, "step": 24361 }, { "epoch": 0.7, "grad_norm": 6.239192850655711, "learning_rate": 2.211481676660284e-06, "loss": 0.6561, "step": 24362 }, { "epoch": 0.7, "grad_norm": 5.87529250695115, "learning_rate": 2.2110967505101495e-06, "loss": 0.4128, "step": 24363 }, { "epoch": 0.7, "grad_norm": 4.256921909529154, "learning_rate": 2.2107118483526076e-06, "loss": 0.324, "step": 24364 }, { "epoch": 0.7, "grad_norm": 5.218315220613255, "learning_rate": 2.2103269701909687e-06, "loss": 0.3225, "step": 24365 }, { "epoch": 0.7, "grad_norm": 5.298351610905569, "learning_rate": 2.2099421160285405e-06, "loss": 0.3519, "step": 24366 }, { "epoch": 0.7, "grad_norm": 8.446238591377126, "learning_rate": 2.2095572858686394e-06, "loss": 0.628, "step": 24367 }, { "epoch": 0.7, "grad_norm": 6.77793265210931, "learning_rate": 2.2091724797145716e-06, "loss": 0.8329, "step": 24368 }, { "epoch": 0.7, "grad_norm": 6.085619662289268, "learning_rate": 2.208787697569651e-06, "loss": 0.6121, "step": 24369 }, { "epoch": 0.7, "grad_norm": 3.5652374658772863, "learning_rate": 2.2084029394371843e-06, "loss": 0.3461, "step": 24370 }, { "epoch": 0.7, "grad_norm": 3.5074763657364314, "learning_rate": 2.2080182053204852e-06, "loss": 0.1792, "step": 24371 }, { "epoch": 0.7, "grad_norm": 1.4028706317906001, "learning_rate": 2.207633495222862e-06, "loss": 0.1448, "step": 24372 }, { "epoch": 0.7, "grad_norm": 4.783991407967191, "learning_rate": 2.2072488091476236e-06, "loss": 0.5253, "step": 24373 }, { "epoch": 0.7, "grad_norm": 4.763571875391818, "learning_rate": 2.2068641470980784e-06, "loss": 0.7102, "step": 24374 }, { "epoch": 0.7, "grad_norm": 5.80163662444704, "learning_rate": 2.206479509077537e-06, "loss": 0.4562, "step": 24375 }, { "epoch": 0.7, "grad_norm": 5.239103703574308, "learning_rate": 2.20609489508931e-06, "loss": 0.5418, "step": 24376 }, { "epoch": 0.7, "grad_norm": 5.796019930638756, "learning_rate": 2.205710305136703e-06, "loss": 0.6339, "step": 24377 }, { "epoch": 0.7, "grad_norm": 2.0768012513054073, "learning_rate": 2.205325739223028e-06, "loss": 0.1667, "step": 24378 }, { "epoch": 0.7, "grad_norm": 3.0763298551168674, "learning_rate": 2.2049411973515916e-06, "loss": 0.1798, "step": 24379 }, { "epoch": 0.7, "grad_norm": 6.475928410134979, "learning_rate": 2.204556679525702e-06, "loss": 0.7469, "step": 24380 }, { "epoch": 0.7, "grad_norm": 3.0895857094369514, "learning_rate": 2.2041721857486647e-06, "loss": 0.4077, "step": 24381 }, { "epoch": 0.7, "grad_norm": 4.161284502367088, "learning_rate": 2.2037877160237926e-06, "loss": 0.2409, "step": 24382 }, { "epoch": 0.7, "grad_norm": 6.582542784421784, "learning_rate": 2.2034032703543877e-06, "loss": 0.2139, "step": 24383 }, { "epoch": 0.7, "grad_norm": 7.018695728503201, "learning_rate": 2.203018848743761e-06, "loss": 0.7589, "step": 24384 }, { "epoch": 0.7, "grad_norm": 4.379029108071394, "learning_rate": 2.202634451195222e-06, "loss": 0.5479, "step": 24385 }, { "epoch": 0.7, "grad_norm": 3.9608788721083306, "learning_rate": 2.202250077712071e-06, "loss": 0.287, "step": 24386 }, { "epoch": 0.7, "grad_norm": 3.6401198204461895, "learning_rate": 2.2018657282976197e-06, "loss": 0.204, "step": 24387 }, { "epoch": 0.7, "grad_norm": 8.748124657344956, "learning_rate": 2.201481402955171e-06, "loss": 0.7828, "step": 24388 }, { "epoch": 0.7, "grad_norm": 4.319146549110045, "learning_rate": 2.2010971016880355e-06, "loss": 0.2443, "step": 24389 }, { "epoch": 0.7, "grad_norm": 3.670211886102358, "learning_rate": 2.200712824499515e-06, "loss": 0.3274, "step": 24390 }, { "epoch": 0.7, "grad_norm": 11.50090172590972, "learning_rate": 2.2003285713929194e-06, "loss": 0.5854, "step": 24391 }, { "epoch": 0.7, "grad_norm": 1.849757272190988, "learning_rate": 2.1999443423715523e-06, "loss": 0.1285, "step": 24392 }, { "epoch": 0.7, "grad_norm": 3.8205084799208264, "learning_rate": 2.1995601374387175e-06, "loss": 0.1598, "step": 24393 }, { "epoch": 0.7, "grad_norm": 6.027422308023576, "learning_rate": 2.1991759565977237e-06, "loss": 0.4404, "step": 24394 }, { "epoch": 0.7, "grad_norm": 5.0573353266247345, "learning_rate": 2.1987917998518725e-06, "loss": 0.1998, "step": 24395 }, { "epoch": 0.7, "grad_norm": 3.5413837231016982, "learning_rate": 2.1984076672044724e-06, "loss": 0.1496, "step": 24396 }, { "epoch": 0.7, "grad_norm": 8.718702746847757, "learning_rate": 2.1980235586588244e-06, "loss": 0.3955, "step": 24397 }, { "epoch": 0.7, "grad_norm": 8.297289850528182, "learning_rate": 2.1976394742182365e-06, "loss": 1.0744, "step": 24398 }, { "epoch": 0.7, "grad_norm": 5.577134800491667, "learning_rate": 2.1972554138860113e-06, "loss": 0.2839, "step": 24399 }, { "epoch": 0.7, "grad_norm": 6.0031989629697815, "learning_rate": 2.196871377665451e-06, "loss": 0.5891, "step": 24400 }, { "epoch": 0.7, "grad_norm": 7.312685646243711, "learning_rate": 2.1964873655598624e-06, "loss": 0.7042, "step": 24401 }, { "epoch": 0.7, "grad_norm": 5.2458025183263715, "learning_rate": 2.1961033775725467e-06, "loss": 0.4461, "step": 24402 }, { "epoch": 0.7, "grad_norm": 5.0846311705144585, "learning_rate": 2.1957194137068096e-06, "loss": 0.4964, "step": 24403 }, { "epoch": 0.7, "grad_norm": 4.569306352506943, "learning_rate": 2.1953354739659515e-06, "loss": 0.5099, "step": 24404 }, { "epoch": 0.7, "grad_norm": 6.382741491139877, "learning_rate": 2.194951558353279e-06, "loss": 0.4465, "step": 24405 }, { "epoch": 0.7, "grad_norm": 3.7270430888772887, "learning_rate": 2.1945676668720928e-06, "loss": 0.3412, "step": 24406 }, { "epoch": 0.7, "grad_norm": 7.616340723165721, "learning_rate": 2.1941837995256935e-06, "loss": 0.6681, "step": 24407 }, { "epoch": 0.7, "grad_norm": 15.465113788510486, "learning_rate": 2.1937999563173874e-06, "loss": 0.4001, "step": 24408 }, { "epoch": 0.7, "grad_norm": 10.146819540000884, "learning_rate": 2.193416137250473e-06, "loss": 0.7757, "step": 24409 }, { "epoch": 0.7, "grad_norm": 4.929504391130542, "learning_rate": 2.1930323423282564e-06, "loss": 0.169, "step": 24410 }, { "epoch": 0.7, "grad_norm": 7.302739827188975, "learning_rate": 2.192648571554034e-06, "loss": 0.5348, "step": 24411 }, { "epoch": 0.7, "grad_norm": 6.93718856249939, "learning_rate": 2.192264824931113e-06, "loss": 0.2507, "step": 24412 }, { "epoch": 0.7, "grad_norm": 3.620217028903757, "learning_rate": 2.1918811024627917e-06, "loss": 0.2554, "step": 24413 }, { "epoch": 0.7, "grad_norm": 8.358218531962756, "learning_rate": 2.191497404152372e-06, "loss": 0.5123, "step": 24414 }, { "epoch": 0.7, "grad_norm": 2.751758062246369, "learning_rate": 2.1911137300031515e-06, "loss": 0.2089, "step": 24415 }, { "epoch": 0.7, "grad_norm": 3.481518382714074, "learning_rate": 2.1907300800184343e-06, "loss": 0.2882, "step": 24416 }, { "epoch": 0.7, "grad_norm": 3.1843634956159534, "learning_rate": 2.1903464542015224e-06, "loss": 0.0676, "step": 24417 }, { "epoch": 0.7, "grad_norm": 5.618935261388913, "learning_rate": 2.189962852555714e-06, "loss": 0.5023, "step": 24418 }, { "epoch": 0.7, "grad_norm": 6.420900981017795, "learning_rate": 2.1895792750843086e-06, "loss": 0.2754, "step": 24419 }, { "epoch": 0.7, "grad_norm": 5.583424847360538, "learning_rate": 2.1891957217906055e-06, "loss": 0.5245, "step": 24420 }, { "epoch": 0.7, "grad_norm": 7.025523994967894, "learning_rate": 2.188812192677907e-06, "loss": 0.4238, "step": 24421 }, { "epoch": 0.7, "grad_norm": 13.373231556997323, "learning_rate": 2.188428687749509e-06, "loss": 0.5212, "step": 24422 }, { "epoch": 0.7, "grad_norm": 4.252779528554308, "learning_rate": 2.1880452070087154e-06, "loss": 0.386, "step": 24423 }, { "epoch": 0.7, "grad_norm": 4.973800323799298, "learning_rate": 2.1876617504588206e-06, "loss": 0.4548, "step": 24424 }, { "epoch": 0.7, "grad_norm": 5.753131013998672, "learning_rate": 2.1872783181031272e-06, "loss": 0.4951, "step": 24425 }, { "epoch": 0.7, "grad_norm": 9.704446208154806, "learning_rate": 2.186894909944932e-06, "loss": 0.5601, "step": 24426 }, { "epoch": 0.7, "grad_norm": 5.549987671812645, "learning_rate": 2.186511525987532e-06, "loss": 0.2282, "step": 24427 }, { "epoch": 0.7, "grad_norm": 3.1989636255433953, "learning_rate": 2.1861281662342283e-06, "loss": 0.4691, "step": 24428 }, { "epoch": 0.7, "grad_norm": 3.9036720314020505, "learning_rate": 2.1857448306883162e-06, "loss": 0.3596, "step": 24429 }, { "epoch": 0.7, "grad_norm": 5.430487221203616, "learning_rate": 2.1853615193530964e-06, "loss": 0.5906, "step": 24430 }, { "epoch": 0.7, "grad_norm": 7.802527509675669, "learning_rate": 2.184978232231863e-06, "loss": 1.4155, "step": 24431 }, { "epoch": 0.7, "grad_norm": 3.857388375052319, "learning_rate": 2.1845949693279175e-06, "loss": 0.1935, "step": 24432 }, { "epoch": 0.7, "grad_norm": 6.780088826275619, "learning_rate": 2.184211730644554e-06, "loss": 0.4751, "step": 24433 }, { "epoch": 0.7, "grad_norm": 6.22771038353955, "learning_rate": 2.1838285161850693e-06, "loss": 0.6099, "step": 24434 }, { "epoch": 0.7, "grad_norm": 3.6998702767545386, "learning_rate": 2.1834453259527627e-06, "loss": 0.3614, "step": 24435 }, { "epoch": 0.7, "grad_norm": 4.2509445795652105, "learning_rate": 2.183062159950927e-06, "loss": 0.4481, "step": 24436 }, { "epoch": 0.7, "grad_norm": 3.020984176728152, "learning_rate": 2.1826790181828628e-06, "loss": 0.3129, "step": 24437 }, { "epoch": 0.7, "grad_norm": 6.400003094970431, "learning_rate": 2.1822959006518624e-06, "loss": 0.1849, "step": 24438 }, { "epoch": 0.7, "grad_norm": 3.0149256681022165, "learning_rate": 2.1819128073612255e-06, "loss": 0.3198, "step": 24439 }, { "epoch": 0.7, "grad_norm": 5.439772120218793, "learning_rate": 2.1815297383142453e-06, "loss": 0.5389, "step": 24440 }, { "epoch": 0.7, "grad_norm": 4.015283815012916, "learning_rate": 2.181146693514216e-06, "loss": 0.2513, "step": 24441 }, { "epoch": 0.7, "grad_norm": 5.161400967563703, "learning_rate": 2.1807636729644367e-06, "loss": 0.3749, "step": 24442 }, { "epoch": 0.7, "grad_norm": 5.194284850857423, "learning_rate": 2.1803806766681986e-06, "loss": 0.2811, "step": 24443 }, { "epoch": 0.7, "grad_norm": 4.174412487859495, "learning_rate": 2.1799977046287996e-06, "loss": 0.1919, "step": 24444 }, { "epoch": 0.7, "grad_norm": 1.4930170079101768, "learning_rate": 2.1796147568495342e-06, "loss": 0.0612, "step": 24445 }, { "epoch": 0.7, "grad_norm": 3.6556302547955126, "learning_rate": 2.179231833333695e-06, "loss": 0.5465, "step": 24446 }, { "epoch": 0.7, "grad_norm": 5.332738316643372, "learning_rate": 2.1788489340845754e-06, "loss": 0.6109, "step": 24447 }, { "epoch": 0.7, "grad_norm": 5.907461455901876, "learning_rate": 2.1784660591054706e-06, "loss": 0.6002, "step": 24448 }, { "epoch": 0.7, "grad_norm": 13.062290500256765, "learning_rate": 2.178083208399677e-06, "loss": 0.7719, "step": 24449 }, { "epoch": 0.7, "grad_norm": 11.575564123129238, "learning_rate": 2.1777003819704845e-06, "loss": 0.8279, "step": 24450 }, { "epoch": 0.7, "grad_norm": 7.281895424720304, "learning_rate": 2.1773175798211898e-06, "loss": 0.2995, "step": 24451 }, { "epoch": 0.7, "grad_norm": 6.7405958717081145, "learning_rate": 2.176934801955085e-06, "loss": 0.4272, "step": 24452 }, { "epoch": 0.7, "grad_norm": 8.637587994846326, "learning_rate": 2.176552048375462e-06, "loss": 0.4428, "step": 24453 }, { "epoch": 0.7, "grad_norm": 5.039374715906055, "learning_rate": 2.176169319085612e-06, "loss": 0.5116, "step": 24454 }, { "epoch": 0.7, "grad_norm": 3.1352012070208906, "learning_rate": 2.1757866140888317e-06, "loss": 0.163, "step": 24455 }, { "epoch": 0.7, "grad_norm": 6.087943572816751, "learning_rate": 2.1754039333884096e-06, "loss": 0.6083, "step": 24456 }, { "epoch": 0.7, "grad_norm": 7.3119534019220795, "learning_rate": 2.17502127698764e-06, "loss": 0.8521, "step": 24457 }, { "epoch": 0.7, "grad_norm": 8.453865597024965, "learning_rate": 2.1746386448898165e-06, "loss": 0.5586, "step": 24458 }, { "epoch": 0.7, "grad_norm": 5.939380408221061, "learning_rate": 2.1742560370982285e-06, "loss": 0.5506, "step": 24459 }, { "epoch": 0.7, "grad_norm": 6.9428887101761525, "learning_rate": 2.1738734536161676e-06, "loss": 0.3684, "step": 24460 }, { "epoch": 0.7, "grad_norm": 5.19785098822412, "learning_rate": 2.173490894446924e-06, "loss": 0.6165, "step": 24461 }, { "epoch": 0.7, "grad_norm": 8.89541362930778, "learning_rate": 2.173108359593792e-06, "loss": 0.4048, "step": 24462 }, { "epoch": 0.7, "grad_norm": 6.367669247615935, "learning_rate": 2.172725849060059e-06, "loss": 0.446, "step": 24463 }, { "epoch": 0.7, "grad_norm": 4.609503120322826, "learning_rate": 2.172343362849019e-06, "loss": 0.3648, "step": 24464 }, { "epoch": 0.7, "grad_norm": 7.675657749881149, "learning_rate": 2.1719609009639592e-06, "loss": 0.3762, "step": 24465 }, { "epoch": 0.7, "grad_norm": 2.857336391127222, "learning_rate": 2.1715784634081733e-06, "loss": 0.1319, "step": 24466 }, { "epoch": 0.7, "grad_norm": 3.2565974615768303, "learning_rate": 2.17119605018495e-06, "loss": 0.3842, "step": 24467 }, { "epoch": 0.7, "grad_norm": 8.352912672364779, "learning_rate": 2.1708136612975766e-06, "loss": 0.7057, "step": 24468 }, { "epoch": 0.7, "grad_norm": 2.6490231319416906, "learning_rate": 2.1704312967493468e-06, "loss": 0.1257, "step": 24469 }, { "epoch": 0.7, "grad_norm": 2.509387938583671, "learning_rate": 2.1700489565435462e-06, "loss": 0.1447, "step": 24470 }, { "epoch": 0.7, "grad_norm": 5.7672313737091825, "learning_rate": 2.169666640683468e-06, "loss": 0.2175, "step": 24471 }, { "epoch": 0.7, "grad_norm": 4.474300725208242, "learning_rate": 2.1692843491723975e-06, "loss": 0.3318, "step": 24472 }, { "epoch": 0.7, "grad_norm": 5.042555859340973, "learning_rate": 2.168902082013627e-06, "loss": 0.4856, "step": 24473 }, { "epoch": 0.7, "grad_norm": 4.153235324516585, "learning_rate": 2.168519839210443e-06, "loss": 0.2126, "step": 24474 }, { "epoch": 0.7, "grad_norm": 3.322611499792676, "learning_rate": 2.1681376207661325e-06, "loss": 0.1372, "step": 24475 }, { "epoch": 0.7, "grad_norm": 5.536951133444876, "learning_rate": 2.1677554266839873e-06, "loss": 0.4573, "step": 24476 }, { "epoch": 0.7, "grad_norm": 8.128061686045188, "learning_rate": 2.167373256967292e-06, "loss": 0.6707, "step": 24477 }, { "epoch": 0.7, "grad_norm": 6.131634136147184, "learning_rate": 2.166991111619337e-06, "loss": 0.5753, "step": 24478 }, { "epoch": 0.7, "grad_norm": 7.2638762143496365, "learning_rate": 2.166608990643409e-06, "loss": 0.4244, "step": 24479 }, { "epoch": 0.7, "grad_norm": 4.860647396797808, "learning_rate": 2.166226894042795e-06, "loss": 0.2811, "step": 24480 }, { "epoch": 0.7, "grad_norm": 3.6700335491216514, "learning_rate": 2.16584482182078e-06, "loss": 0.4029, "step": 24481 }, { "epoch": 0.7, "grad_norm": 6.478593974750923, "learning_rate": 2.1654627739806535e-06, "loss": 0.1026, "step": 24482 }, { "epoch": 0.7, "grad_norm": 5.183257493796157, "learning_rate": 2.165080750525703e-06, "loss": 0.5856, "step": 24483 }, { "epoch": 0.7, "grad_norm": 3.20302994749877, "learning_rate": 2.1646987514592126e-06, "loss": 0.305, "step": 24484 }, { "epoch": 0.7, "grad_norm": 6.879355559440725, "learning_rate": 2.1643167767844716e-06, "loss": 0.3966, "step": 24485 }, { "epoch": 0.7, "grad_norm": 4.667740207328615, "learning_rate": 2.163934826504764e-06, "loss": 0.3677, "step": 24486 }, { "epoch": 0.7, "grad_norm": 4.651850791969643, "learning_rate": 2.1635529006233753e-06, "loss": 0.5661, "step": 24487 }, { "epoch": 0.7, "grad_norm": 3.198819500017947, "learning_rate": 2.1631709991435905e-06, "loss": 0.063, "step": 24488 }, { "epoch": 0.7, "grad_norm": 3.147446052688877, "learning_rate": 2.1627891220686965e-06, "loss": 0.4388, "step": 24489 }, { "epoch": 0.7, "grad_norm": 5.372916505341998, "learning_rate": 2.16240726940198e-06, "loss": 0.3189, "step": 24490 }, { "epoch": 0.7, "grad_norm": 4.737268652589189, "learning_rate": 2.1620254411467223e-06, "loss": 0.3134, "step": 24491 }, { "epoch": 0.7, "grad_norm": 6.2307212658835525, "learning_rate": 2.161643637306212e-06, "loss": 0.7048, "step": 24492 }, { "epoch": 0.7, "grad_norm": 4.008236399758658, "learning_rate": 2.1612618578837323e-06, "loss": 0.1236, "step": 24493 }, { "epoch": 0.7, "grad_norm": 8.258179453453225, "learning_rate": 2.1608801028825676e-06, "loss": 0.6784, "step": 24494 }, { "epoch": 0.7, "grad_norm": 3.574700294949943, "learning_rate": 2.160498372306e-06, "loss": 0.2678, "step": 24495 }, { "epoch": 0.7, "grad_norm": 9.119318565124487, "learning_rate": 2.160116666157317e-06, "loss": 0.6561, "step": 24496 }, { "epoch": 0.7, "grad_norm": 5.021251719073039, "learning_rate": 2.1597349844397987e-06, "loss": 0.3167, "step": 24497 }, { "epoch": 0.7, "grad_norm": 6.142768932102057, "learning_rate": 2.1593533271567306e-06, "loss": 0.5448, "step": 24498 }, { "epoch": 0.7, "grad_norm": 7.951789309114819, "learning_rate": 2.158971694311398e-06, "loss": 0.6117, "step": 24499 }, { "epoch": 0.7, "grad_norm": 6.637035306192039, "learning_rate": 2.158590085907082e-06, "loss": 0.3745, "step": 24500 }, { "epoch": 0.7, "grad_norm": 7.958939860400822, "learning_rate": 2.1582085019470656e-06, "loss": 0.9239, "step": 24501 }, { "epoch": 0.7, "grad_norm": 4.772474700676836, "learning_rate": 2.15782694243463e-06, "loss": 0.3824, "step": 24502 }, { "epoch": 0.7, "grad_norm": 7.8099907164085, "learning_rate": 2.157445407373061e-06, "loss": 0.987, "step": 24503 }, { "epoch": 0.7, "grad_norm": 3.5608361817726535, "learning_rate": 2.1570638967656373e-06, "loss": 0.2705, "step": 24504 }, { "epoch": 0.7, "grad_norm": 6.830047650987832, "learning_rate": 2.1566824106156447e-06, "loss": 0.3122, "step": 24505 }, { "epoch": 0.7, "grad_norm": 4.549354873237472, "learning_rate": 2.156300948926363e-06, "loss": 0.4506, "step": 24506 }, { "epoch": 0.7, "grad_norm": 4.100129492971028, "learning_rate": 2.1559195117010728e-06, "loss": 0.6249, "step": 24507 }, { "epoch": 0.7, "grad_norm": 4.170281809654233, "learning_rate": 2.1555380989430584e-06, "loss": 0.3138, "step": 24508 }, { "epoch": 0.7, "grad_norm": 7.991215368011153, "learning_rate": 2.1551567106555976e-06, "loss": 0.4892, "step": 24509 }, { "epoch": 0.7, "grad_norm": 8.271640081640264, "learning_rate": 2.154775346841975e-06, "loss": 0.4571, "step": 24510 }, { "epoch": 0.7, "grad_norm": 3.4806590087127858, "learning_rate": 2.1543940075054686e-06, "loss": 0.5251, "step": 24511 }, { "epoch": 0.7, "grad_norm": 3.2080411447683286, "learning_rate": 2.1540126926493616e-06, "loss": 0.2912, "step": 24512 }, { "epoch": 0.7, "grad_norm": 2.522440639625372, "learning_rate": 2.153631402276933e-06, "loss": 0.0984, "step": 24513 }, { "epoch": 0.7, "grad_norm": 2.492763608156782, "learning_rate": 2.1532501363914615e-06, "loss": 0.2166, "step": 24514 }, { "epoch": 0.7, "grad_norm": 13.721354174389214, "learning_rate": 2.15286889499623e-06, "loss": 0.7471, "step": 24515 }, { "epoch": 0.7, "grad_norm": 3.02487105260248, "learning_rate": 2.1524876780945155e-06, "loss": 0.5311, "step": 24516 }, { "epoch": 0.7, "grad_norm": 4.685894017082984, "learning_rate": 2.1521064856896003e-06, "loss": 0.2939, "step": 24517 }, { "epoch": 0.7, "grad_norm": 3.4631114013658237, "learning_rate": 2.1517253177847603e-06, "loss": 0.2557, "step": 24518 }, { "epoch": 0.7, "grad_norm": 5.083776058658308, "learning_rate": 2.1513441743832786e-06, "loss": 0.3456, "step": 24519 }, { "epoch": 0.7, "grad_norm": 6.710219001720876, "learning_rate": 2.1509630554884324e-06, "loss": 0.5613, "step": 24520 }, { "epoch": 0.7, "grad_norm": 4.025241824994766, "learning_rate": 2.1505819611035005e-06, "loss": 0.6206, "step": 24521 }, { "epoch": 0.7, "grad_norm": 3.804113795207956, "learning_rate": 2.1502008912317594e-06, "loss": 0.3112, "step": 24522 }, { "epoch": 0.7, "grad_norm": 7.35775784423385, "learning_rate": 2.149819845876489e-06, "loss": 0.2836, "step": 24523 }, { "epoch": 0.7, "grad_norm": 9.999584451624461, "learning_rate": 2.149438825040969e-06, "loss": 0.6694, "step": 24524 }, { "epoch": 0.7, "grad_norm": 3.257228533689375, "learning_rate": 2.149057828728474e-06, "loss": 0.3274, "step": 24525 }, { "epoch": 0.7, "grad_norm": 4.011936200422862, "learning_rate": 2.1486768569422862e-06, "loss": 0.4848, "step": 24526 }, { "epoch": 0.7, "grad_norm": 5.815205692874716, "learning_rate": 2.1482959096856792e-06, "loss": 0.7084, "step": 24527 }, { "epoch": 0.7, "grad_norm": 5.025606390201425, "learning_rate": 2.147914986961932e-06, "loss": 0.5677, "step": 24528 }, { "epoch": 0.7, "grad_norm": 8.579131187253205, "learning_rate": 2.1475340887743196e-06, "loss": 0.8312, "step": 24529 }, { "epoch": 0.7, "grad_norm": 8.053337953510392, "learning_rate": 2.147153215126121e-06, "loss": 0.4266, "step": 24530 }, { "epoch": 0.7, "grad_norm": 4.779026062794747, "learning_rate": 2.1467723660206107e-06, "loss": 0.175, "step": 24531 }, { "epoch": 0.7, "grad_norm": 3.117038445984297, "learning_rate": 2.146391541461067e-06, "loss": 0.3032, "step": 24532 }, { "epoch": 0.7, "grad_norm": 3.493878341392062, "learning_rate": 2.1460107414507687e-06, "loss": 0.3901, "step": 24533 }, { "epoch": 0.7, "grad_norm": 7.884109254305504, "learning_rate": 2.1456299659929848e-06, "loss": 0.7264, "step": 24534 }, { "epoch": 0.7, "grad_norm": 5.4097496127783335, "learning_rate": 2.1452492150909965e-06, "loss": 0.5242, "step": 24535 }, { "epoch": 0.7, "grad_norm": 5.297598820296156, "learning_rate": 2.1448684887480762e-06, "loss": 0.4264, "step": 24536 }, { "epoch": 0.7, "grad_norm": 3.1746375357616974, "learning_rate": 2.144487786967503e-06, "loss": 0.3055, "step": 24537 }, { "epoch": 0.7, "grad_norm": 1.8742912144901633, "learning_rate": 2.1441071097525475e-06, "loss": 0.2706, "step": 24538 }, { "epoch": 0.7, "grad_norm": 6.901646110886662, "learning_rate": 2.1437264571064886e-06, "loss": 0.7892, "step": 24539 }, { "epoch": 0.7, "grad_norm": 7.314898725109737, "learning_rate": 2.1433458290325993e-06, "loss": 0.3976, "step": 24540 }, { "epoch": 0.7, "grad_norm": 5.31020530059551, "learning_rate": 2.1429652255341526e-06, "loss": 0.8492, "step": 24541 }, { "epoch": 0.7, "grad_norm": 2.0220348032631326, "learning_rate": 2.1425846466144263e-06, "loss": 0.1867, "step": 24542 }, { "epoch": 0.7, "grad_norm": 5.785267748328126, "learning_rate": 2.14220409227669e-06, "loss": 0.3803, "step": 24543 }, { "epoch": 0.7, "grad_norm": 7.768195327512872, "learning_rate": 2.1418235625242224e-06, "loss": 0.5565, "step": 24544 }, { "epoch": 0.7, "grad_norm": 10.777644209563924, "learning_rate": 2.141443057360293e-06, "loss": 0.5564, "step": 24545 }, { "epoch": 0.7, "grad_norm": 6.737495425192671, "learning_rate": 2.141062576788178e-06, "loss": 0.5325, "step": 24546 }, { "epoch": 0.7, "grad_norm": 6.84552201971567, "learning_rate": 2.1406821208111506e-06, "loss": 0.4425, "step": 24547 }, { "epoch": 0.7, "grad_norm": 9.989248886052199, "learning_rate": 2.140301689432481e-06, "loss": 0.6996, "step": 24548 }, { "epoch": 0.7, "grad_norm": 8.459024848920432, "learning_rate": 2.139921282655445e-06, "loss": 0.8049, "step": 24549 }, { "epoch": 0.7, "grad_norm": 14.903007658522279, "learning_rate": 2.139540900483313e-06, "loss": 0.7566, "step": 24550 }, { "epoch": 0.7, "grad_norm": 6.50547219060223, "learning_rate": 2.1391605429193594e-06, "loss": 0.7639, "step": 24551 }, { "epoch": 0.7, "grad_norm": 3.956024659475298, "learning_rate": 2.138780209966854e-06, "loss": 0.3038, "step": 24552 }, { "epoch": 0.7, "grad_norm": 4.313527440951424, "learning_rate": 2.1383999016290724e-06, "loss": 0.4338, "step": 24553 }, { "epoch": 0.7, "grad_norm": 8.383824780609704, "learning_rate": 2.138019617909284e-06, "loss": 0.6293, "step": 24554 }, { "epoch": 0.7, "grad_norm": 2.7870935173626106, "learning_rate": 2.137639358810758e-06, "loss": 0.3548, "step": 24555 }, { "epoch": 0.7, "grad_norm": 6.9446726998857775, "learning_rate": 2.1372591243367708e-06, "loss": 0.8601, "step": 24556 }, { "epoch": 0.7, "grad_norm": 5.115815469391964, "learning_rate": 2.136878914490588e-06, "loss": 0.2794, "step": 24557 }, { "epoch": 0.7, "grad_norm": 5.6236149672190425, "learning_rate": 2.136498729275486e-06, "loss": 0.568, "step": 24558 }, { "epoch": 0.7, "grad_norm": 5.9786119325564355, "learning_rate": 2.136118568694731e-06, "loss": 0.3672, "step": 24559 }, { "epoch": 0.7, "grad_norm": 6.036001359847151, "learning_rate": 2.135738432751599e-06, "loss": 0.6653, "step": 24560 }, { "epoch": 0.7, "grad_norm": 5.844180086190173, "learning_rate": 2.135358321449353e-06, "loss": 0.4576, "step": 24561 }, { "epoch": 0.7, "grad_norm": 7.381592406519995, "learning_rate": 2.1349782347912685e-06, "loss": 0.493, "step": 24562 }, { "epoch": 0.7, "grad_norm": 1.4675357242862959, "learning_rate": 2.1345981727806116e-06, "loss": 0.0795, "step": 24563 }, { "epoch": 0.7, "grad_norm": 3.74907812867115, "learning_rate": 2.1342181354206536e-06, "loss": 0.4942, "step": 24564 }, { "epoch": 0.7, "grad_norm": 4.817315825548163, "learning_rate": 2.133838122714666e-06, "loss": 0.4916, "step": 24565 }, { "epoch": 0.7, "grad_norm": 5.141505250092123, "learning_rate": 2.1334581346659165e-06, "loss": 0.4459, "step": 24566 }, { "epoch": 0.7, "grad_norm": 3.7026763193517285, "learning_rate": 2.133078171277673e-06, "loss": 0.2927, "step": 24567 }, { "epoch": 0.7, "grad_norm": 3.652860129668615, "learning_rate": 2.1326982325532043e-06, "loss": 0.1731, "step": 24568 }, { "epoch": 0.7, "grad_norm": 7.934977693868916, "learning_rate": 2.1323183184957807e-06, "loss": 0.6493, "step": 24569 }, { "epoch": 0.7, "grad_norm": 4.979862478417961, "learning_rate": 2.1319384291086682e-06, "loss": 0.6456, "step": 24570 }, { "epoch": 0.7, "grad_norm": 2.2385380882180144, "learning_rate": 2.1315585643951376e-06, "loss": 0.3149, "step": 24571 }, { "epoch": 0.7, "grad_norm": 10.575267231047007, "learning_rate": 2.1311787243584537e-06, "loss": 0.6925, "step": 24572 }, { "epoch": 0.7, "grad_norm": 4.973199447605787, "learning_rate": 2.1307989090018878e-06, "loss": 0.3104, "step": 24573 }, { "epoch": 0.7, "grad_norm": 6.440428613947503, "learning_rate": 2.130419118328706e-06, "loss": 0.9618, "step": 24574 }, { "epoch": 0.7, "grad_norm": 4.958893290190658, "learning_rate": 2.130039352342173e-06, "loss": 0.4028, "step": 24575 }, { "epoch": 0.7, "grad_norm": 3.999254097533614, "learning_rate": 2.1296596110455605e-06, "loss": 0.1104, "step": 24576 }, { "epoch": 0.7, "grad_norm": 8.923409298314688, "learning_rate": 2.1292798944421307e-06, "loss": 0.8155, "step": 24577 }, { "epoch": 0.7, "grad_norm": 11.398669924860014, "learning_rate": 2.128900202535154e-06, "loss": 0.7811, "step": 24578 }, { "epoch": 0.7, "grad_norm": 4.886762806353038, "learning_rate": 2.128520535327894e-06, "loss": 0.4262, "step": 24579 }, { "epoch": 0.7, "grad_norm": 3.3282881683324987, "learning_rate": 2.1281408928236202e-06, "loss": 0.4432, "step": 24580 }, { "epoch": 0.7, "grad_norm": 19.991841941193414, "learning_rate": 2.1277612750255965e-06, "loss": 0.5082, "step": 24581 }, { "epoch": 0.7, "grad_norm": 5.711277587616938, "learning_rate": 2.1273816819370866e-06, "loss": 0.5568, "step": 24582 }, { "epoch": 0.7, "grad_norm": 7.211048030316129, "learning_rate": 2.127002113561361e-06, "loss": 0.2551, "step": 24583 }, { "epoch": 0.7, "grad_norm": 10.329103362912885, "learning_rate": 2.1266225699016807e-06, "loss": 0.6057, "step": 24584 }, { "epoch": 0.7, "grad_norm": 3.3512179146425565, "learning_rate": 2.126243050961314e-06, "loss": 0.3914, "step": 24585 }, { "epoch": 0.7, "grad_norm": 7.5557616928752775, "learning_rate": 2.125863556743523e-06, "loss": 0.9047, "step": 24586 }, { "epoch": 0.7, "grad_norm": 4.820961684373236, "learning_rate": 2.1254840872515758e-06, "loss": 0.1602, "step": 24587 }, { "epoch": 0.7, "grad_norm": 4.541778775969208, "learning_rate": 2.1251046424887345e-06, "loss": 0.4785, "step": 24588 }, { "epoch": 0.7, "grad_norm": 6.19190594795666, "learning_rate": 2.1247252224582626e-06, "loss": 0.2247, "step": 24589 }, { "epoch": 0.7, "grad_norm": 4.566097017362816, "learning_rate": 2.1243458271634274e-06, "loss": 0.2426, "step": 24590 }, { "epoch": 0.7, "grad_norm": 8.049690657648771, "learning_rate": 2.1239664566074897e-06, "loss": 0.5119, "step": 24591 }, { "epoch": 0.7, "grad_norm": 14.938878877594725, "learning_rate": 2.1235871107937155e-06, "loss": 0.7962, "step": 24592 }, { "epoch": 0.7, "grad_norm": 4.95863887369057, "learning_rate": 2.1232077897253673e-06, "loss": 0.3516, "step": 24593 }, { "epoch": 0.7, "grad_norm": 5.813528769726629, "learning_rate": 2.1228284934057086e-06, "loss": 0.6275, "step": 24594 }, { "epoch": 0.7, "grad_norm": 9.783846379550209, "learning_rate": 2.1224492218380005e-06, "loss": 0.6964, "step": 24595 }, { "epoch": 0.7, "grad_norm": 7.169811408995635, "learning_rate": 2.122069975025507e-06, "loss": 0.6862, "step": 24596 }, { "epoch": 0.7, "grad_norm": 4.819855332914211, "learning_rate": 2.121690752971493e-06, "loss": 0.2309, "step": 24597 }, { "epoch": 0.7, "grad_norm": 6.8331901391850804, "learning_rate": 2.1213115556792175e-06, "loss": 0.3859, "step": 24598 }, { "epoch": 0.7, "grad_norm": 1.8458847118041344, "learning_rate": 2.1209323831519456e-06, "loss": 0.4056, "step": 24599 }, { "epoch": 0.7, "grad_norm": 4.6170650020557105, "learning_rate": 2.1205532353929385e-06, "loss": 0.4063, "step": 24600 }, { "epoch": 0.7, "grad_norm": 5.466758583428105, "learning_rate": 2.120174112405457e-06, "loss": 0.4795, "step": 24601 }, { "epoch": 0.7, "grad_norm": 8.001450049592146, "learning_rate": 2.1197950141927613e-06, "loss": 1.0841, "step": 24602 }, { "epoch": 0.7, "grad_norm": 5.81700189639148, "learning_rate": 2.1194159407581167e-06, "loss": 0.2518, "step": 24603 }, { "epoch": 0.7, "grad_norm": 11.314165445888552, "learning_rate": 2.11903689210478e-06, "loss": 0.8961, "step": 24604 }, { "epoch": 0.7, "grad_norm": 4.625114387953454, "learning_rate": 2.118657868236014e-06, "loss": 0.4781, "step": 24605 }, { "epoch": 0.7, "grad_norm": 11.153880490116117, "learning_rate": 2.1182788691550816e-06, "loss": 0.4952, "step": 24606 }, { "epoch": 0.7, "grad_norm": 5.032333423228324, "learning_rate": 2.117899894865241e-06, "loss": 0.2737, "step": 24607 }, { "epoch": 0.7, "grad_norm": 5.741292787770751, "learning_rate": 2.1175209453697533e-06, "loss": 0.286, "step": 24608 }, { "epoch": 0.7, "grad_norm": 6.549176596799733, "learning_rate": 2.117142020671876e-06, "loss": 0.2699, "step": 24609 }, { "epoch": 0.7, "grad_norm": 5.151398844689457, "learning_rate": 2.1167631207748724e-06, "loss": 0.5395, "step": 24610 }, { "epoch": 0.7, "grad_norm": 5.979673366365458, "learning_rate": 2.1163842456819993e-06, "loss": 0.268, "step": 24611 }, { "epoch": 0.7, "grad_norm": 5.860509248940862, "learning_rate": 2.116005395396519e-06, "loss": 0.275, "step": 24612 }, { "epoch": 0.7, "grad_norm": 6.189846326063613, "learning_rate": 2.1156265699216877e-06, "loss": 0.3544, "step": 24613 }, { "epoch": 0.7, "grad_norm": 3.5775842382935137, "learning_rate": 2.1152477692607676e-06, "loss": 0.2949, "step": 24614 }, { "epoch": 0.7, "grad_norm": 7.628213611730401, "learning_rate": 2.1148689934170154e-06, "loss": 0.5561, "step": 24615 }, { "epoch": 0.7, "grad_norm": 4.889653996121017, "learning_rate": 2.1144902423936884e-06, "loss": 0.3323, "step": 24616 }, { "epoch": 0.7, "grad_norm": 4.735768481353188, "learning_rate": 2.1141115161940485e-06, "loss": 0.2662, "step": 24617 }, { "epoch": 0.71, "grad_norm": 6.302029301015035, "learning_rate": 2.1137328148213496e-06, "loss": 0.4448, "step": 24618 }, { "epoch": 0.71, "grad_norm": 5.381806610420684, "learning_rate": 2.1133541382788545e-06, "loss": 0.2059, "step": 24619 }, { "epoch": 0.71, "grad_norm": 7.5402089250391295, "learning_rate": 2.1129754865698183e-06, "loss": 0.5615, "step": 24620 }, { "epoch": 0.71, "grad_norm": 5.504029683447516, "learning_rate": 2.1125968596974965e-06, "loss": 0.4205, "step": 24621 }, { "epoch": 0.71, "grad_norm": 5.610414541503564, "learning_rate": 2.11221825766515e-06, "loss": 0.4368, "step": 24622 }, { "epoch": 0.71, "grad_norm": 5.061235222770164, "learning_rate": 2.1118396804760327e-06, "loss": 0.3174, "step": 24623 }, { "epoch": 0.71, "grad_norm": 10.029910535898303, "learning_rate": 2.1114611281334048e-06, "loss": 0.5979, "step": 24624 }, { "epoch": 0.71, "grad_norm": 8.206460735930184, "learning_rate": 2.1110826006405195e-06, "loss": 0.6203, "step": 24625 }, { "epoch": 0.71, "grad_norm": 5.482335550996444, "learning_rate": 2.110704098000636e-06, "loss": 0.6196, "step": 24626 }, { "epoch": 0.71, "grad_norm": 8.291166536465203, "learning_rate": 2.1103256202170104e-06, "loss": 0.8281, "step": 24627 }, { "epoch": 0.71, "grad_norm": 6.688908918756649, "learning_rate": 2.1099471672928972e-06, "loss": 0.6562, "step": 24628 }, { "epoch": 0.71, "grad_norm": 8.038209771694861, "learning_rate": 2.1095687392315507e-06, "loss": 0.7865, "step": 24629 }, { "epoch": 0.71, "grad_norm": 7.636598520722778, "learning_rate": 2.1091903360362286e-06, "loss": 0.6317, "step": 24630 }, { "epoch": 0.71, "grad_norm": 4.872332968239189, "learning_rate": 2.1088119577101885e-06, "loss": 0.4428, "step": 24631 }, { "epoch": 0.71, "grad_norm": 4.424903775909247, "learning_rate": 2.1084336042566806e-06, "loss": 0.4489, "step": 24632 }, { "epoch": 0.71, "grad_norm": 3.3080964598585823, "learning_rate": 2.1080552756789645e-06, "loss": 0.1845, "step": 24633 }, { "epoch": 0.71, "grad_norm": 8.68488969626298, "learning_rate": 2.1076769719802926e-06, "loss": 0.4914, "step": 24634 }, { "epoch": 0.71, "grad_norm": 1.6090976038537692, "learning_rate": 2.10729869316392e-06, "loss": 0.0606, "step": 24635 }, { "epoch": 0.71, "grad_norm": 5.113427797553202, "learning_rate": 2.106920439233098e-06, "loss": 0.5776, "step": 24636 }, { "epoch": 0.71, "grad_norm": 7.142737145097123, "learning_rate": 2.106542210191084e-06, "loss": 0.606, "step": 24637 }, { "epoch": 0.71, "grad_norm": 10.103475792196273, "learning_rate": 2.1061640060411327e-06, "loss": 0.5309, "step": 24638 }, { "epoch": 0.71, "grad_norm": 5.687108120674788, "learning_rate": 2.1057858267864945e-06, "loss": 0.2818, "step": 24639 }, { "epoch": 0.71, "grad_norm": 5.8300305235179835, "learning_rate": 2.1054076724304262e-06, "loss": 0.3715, "step": 24640 }, { "epoch": 0.71, "grad_norm": 10.559343012941653, "learning_rate": 2.105029542976179e-06, "loss": 0.4485, "step": 24641 }, { "epoch": 0.71, "grad_norm": 5.606081960279705, "learning_rate": 2.104651438427007e-06, "loss": 0.3887, "step": 24642 }, { "epoch": 0.71, "grad_norm": 5.450572555984309, "learning_rate": 2.1042733587861593e-06, "loss": 0.4749, "step": 24643 }, { "epoch": 0.71, "grad_norm": 3.453616871043836, "learning_rate": 2.1038953040568937e-06, "loss": 0.4548, "step": 24644 }, { "epoch": 0.71, "grad_norm": 3.488543493103904, "learning_rate": 2.103517274242458e-06, "loss": 0.5653, "step": 24645 }, { "epoch": 0.71, "grad_norm": 4.228511762727379, "learning_rate": 2.1031392693461073e-06, "loss": 0.2903, "step": 24646 }, { "epoch": 0.71, "grad_norm": 8.033225562400286, "learning_rate": 2.1027612893710962e-06, "loss": 0.8549, "step": 24647 }, { "epoch": 0.71, "grad_norm": 5.151731002037304, "learning_rate": 2.102383334320669e-06, "loss": 0.2764, "step": 24648 }, { "epoch": 0.71, "grad_norm": 6.016372158665769, "learning_rate": 2.102005404198084e-06, "loss": 0.3771, "step": 24649 }, { "epoch": 0.71, "grad_norm": 7.82431302939875, "learning_rate": 2.1016274990065868e-06, "loss": 0.8899, "step": 24650 }, { "epoch": 0.71, "grad_norm": 8.265249636803798, "learning_rate": 2.1012496187494337e-06, "loss": 0.7022, "step": 24651 }, { "epoch": 0.71, "grad_norm": 7.7641126054147716, "learning_rate": 2.1008717634298707e-06, "loss": 0.392, "step": 24652 }, { "epoch": 0.71, "grad_norm": 4.500960009216075, "learning_rate": 2.1004939330511535e-06, "loss": 0.3328, "step": 24653 }, { "epoch": 0.71, "grad_norm": 7.894510109640485, "learning_rate": 2.10011612761653e-06, "loss": 0.3322, "step": 24654 }, { "epoch": 0.71, "grad_norm": 9.449530405153988, "learning_rate": 2.0997383471292486e-06, "loss": 0.6688, "step": 24655 }, { "epoch": 0.71, "grad_norm": 3.6130896440170743, "learning_rate": 2.0993605915925623e-06, "loss": 0.376, "step": 24656 }, { "epoch": 0.71, "grad_norm": 2.6894910223790296, "learning_rate": 2.0989828610097186e-06, "loss": 0.2869, "step": 24657 }, { "epoch": 0.71, "grad_norm": 3.913704298002945, "learning_rate": 2.0986051553839696e-06, "loss": 0.2995, "step": 24658 }, { "epoch": 0.71, "grad_norm": 3.9897810071109245, "learning_rate": 2.0982274747185618e-06, "loss": 0.4001, "step": 24659 }, { "epoch": 0.71, "grad_norm": 10.201234417438235, "learning_rate": 2.097849819016747e-06, "loss": 0.658, "step": 24660 }, { "epoch": 0.71, "grad_norm": 7.196841927509122, "learning_rate": 2.0974721882817733e-06, "loss": 0.4217, "step": 24661 }, { "epoch": 0.71, "grad_norm": 7.153356025268147, "learning_rate": 2.097094582516887e-06, "loss": 0.674, "step": 24662 }, { "epoch": 0.71, "grad_norm": 4.622663062166214, "learning_rate": 2.096717001725341e-06, "loss": 0.2411, "step": 24663 }, { "epoch": 0.71, "grad_norm": 4.002606317655655, "learning_rate": 2.0963394459103787e-06, "loss": 0.4148, "step": 24664 }, { "epoch": 0.71, "grad_norm": 3.1664231054016, "learning_rate": 2.0959619150752525e-06, "loss": 0.3093, "step": 24665 }, { "epoch": 0.71, "grad_norm": 5.149029201375127, "learning_rate": 2.095584409223207e-06, "loss": 0.5056, "step": 24666 }, { "epoch": 0.71, "grad_norm": 8.786626941183973, "learning_rate": 2.0952069283574924e-06, "loss": 0.5874, "step": 24667 }, { "epoch": 0.71, "grad_norm": 7.71010120498593, "learning_rate": 2.094829472481355e-06, "loss": 0.5166, "step": 24668 }, { "epoch": 0.71, "grad_norm": 8.131170834530524, "learning_rate": 2.0944520415980425e-06, "loss": 0.7414, "step": 24669 }, { "epoch": 0.71, "grad_norm": 7.782663546325588, "learning_rate": 2.0940746357107995e-06, "loss": 0.5335, "step": 24670 }, { "epoch": 0.71, "grad_norm": 4.421537999767302, "learning_rate": 2.093697254822874e-06, "loss": 0.2993, "step": 24671 }, { "epoch": 0.71, "grad_norm": 3.325013152074974, "learning_rate": 2.0933198989375154e-06, "loss": 0.3194, "step": 24672 }, { "epoch": 0.71, "grad_norm": 4.034902234127978, "learning_rate": 2.0929425680579664e-06, "loss": 0.3594, "step": 24673 }, { "epoch": 0.71, "grad_norm": 3.712801559479642, "learning_rate": 2.092565262187475e-06, "loss": 0.2877, "step": 24674 }, { "epoch": 0.71, "grad_norm": 6.827506100607527, "learning_rate": 2.0921879813292883e-06, "loss": 0.8024, "step": 24675 }, { "epoch": 0.71, "grad_norm": 5.09239431489731, "learning_rate": 2.09181072548665e-06, "loss": 0.5121, "step": 24676 }, { "epoch": 0.71, "grad_norm": 7.370583327046281, "learning_rate": 2.0914334946628034e-06, "loss": 0.7276, "step": 24677 }, { "epoch": 0.71, "grad_norm": 7.1289455799428785, "learning_rate": 2.091056288860997e-06, "loss": 0.6927, "step": 24678 }, { "epoch": 0.71, "grad_norm": 5.760936536548307, "learning_rate": 2.090679108084477e-06, "loss": 0.644, "step": 24679 }, { "epoch": 0.71, "grad_norm": 5.7415110702175385, "learning_rate": 2.090301952336486e-06, "loss": 0.4517, "step": 24680 }, { "epoch": 0.71, "grad_norm": 5.414832408597597, "learning_rate": 2.089924821620269e-06, "loss": 0.294, "step": 24681 }, { "epoch": 0.71, "grad_norm": 3.4242161967716016, "learning_rate": 2.089547715939069e-06, "loss": 0.2918, "step": 24682 }, { "epoch": 0.71, "grad_norm": 10.464306927589254, "learning_rate": 2.0891706352961335e-06, "loss": 0.5449, "step": 24683 }, { "epoch": 0.71, "grad_norm": 7.243389239802785, "learning_rate": 2.088793579694703e-06, "loss": 0.2845, "step": 24684 }, { "epoch": 0.71, "grad_norm": 6.008591381921179, "learning_rate": 2.0884165491380244e-06, "loss": 0.5715, "step": 24685 }, { "epoch": 0.71, "grad_norm": 2.9244345505601244, "learning_rate": 2.0880395436293376e-06, "loss": 0.2511, "step": 24686 }, { "epoch": 0.71, "grad_norm": 5.671534756630387, "learning_rate": 2.0876625631718907e-06, "loss": 0.3249, "step": 24687 }, { "epoch": 0.71, "grad_norm": 5.219952930077446, "learning_rate": 2.0872856077689237e-06, "loss": 0.8284, "step": 24688 }, { "epoch": 0.71, "grad_norm": 4.269205707448731, "learning_rate": 2.086908677423678e-06, "loss": 0.3675, "step": 24689 }, { "epoch": 0.71, "grad_norm": 7.852625654369599, "learning_rate": 2.0865317721394003e-06, "loss": 0.621, "step": 24690 }, { "epoch": 0.71, "grad_norm": 11.213389526601356, "learning_rate": 2.0861548919193293e-06, "loss": 0.7614, "step": 24691 }, { "epoch": 0.71, "grad_norm": 7.966640716840333, "learning_rate": 2.085778036766711e-06, "loss": 0.7536, "step": 24692 }, { "epoch": 0.71, "grad_norm": 1.3778295663082507, "learning_rate": 2.085401206684783e-06, "loss": 0.0871, "step": 24693 }, { "epoch": 0.71, "grad_norm": 7.00278451577844, "learning_rate": 2.085024401676792e-06, "loss": 0.4096, "step": 24694 }, { "epoch": 0.71, "grad_norm": 8.256699125215594, "learning_rate": 2.0846476217459766e-06, "loss": 0.5216, "step": 24695 }, { "epoch": 0.71, "grad_norm": 6.957709834628295, "learning_rate": 2.0842708668955773e-06, "loss": 0.4379, "step": 24696 }, { "epoch": 0.71, "grad_norm": 3.2728966787413074, "learning_rate": 2.0838941371288387e-06, "loss": 0.3306, "step": 24697 }, { "epoch": 0.71, "grad_norm": 2.536761265132031, "learning_rate": 2.0835174324489975e-06, "loss": 0.3527, "step": 24698 }, { "epoch": 0.71, "grad_norm": 5.626969395368702, "learning_rate": 2.083140752859299e-06, "loss": 0.3906, "step": 24699 }, { "epoch": 0.71, "grad_norm": 6.894954990408798, "learning_rate": 2.0827640983629792e-06, "loss": 0.1984, "step": 24700 }, { "epoch": 0.71, "grad_norm": 5.563978448761614, "learning_rate": 2.082387468963283e-06, "loss": 0.3244, "step": 24701 }, { "epoch": 0.71, "grad_norm": 8.984345066186547, "learning_rate": 2.082010864663448e-06, "loss": 0.5969, "step": 24702 }, { "epoch": 0.71, "grad_norm": 7.247038663962864, "learning_rate": 2.0816342854667125e-06, "loss": 0.5891, "step": 24703 }, { "epoch": 0.71, "grad_norm": 7.164962162948125, "learning_rate": 2.0812577313763198e-06, "loss": 0.4568, "step": 24704 }, { "epoch": 0.71, "grad_norm": 9.166986777756208, "learning_rate": 2.0808812023955056e-06, "loss": 0.575, "step": 24705 }, { "epoch": 0.71, "grad_norm": 3.9688814697116777, "learning_rate": 2.0805046985275128e-06, "loss": 0.1843, "step": 24706 }, { "epoch": 0.71, "grad_norm": 3.945243789527762, "learning_rate": 2.0801282197755783e-06, "loss": 0.3391, "step": 24707 }, { "epoch": 0.71, "grad_norm": 5.8979918456824825, "learning_rate": 2.0797517661429416e-06, "loss": 0.3292, "step": 24708 }, { "epoch": 0.71, "grad_norm": 4.180969669085129, "learning_rate": 2.0793753376328392e-06, "loss": 0.3545, "step": 24709 }, { "epoch": 0.71, "grad_norm": 4.3338238762477514, "learning_rate": 2.0789989342485128e-06, "loss": 0.2952, "step": 24710 }, { "epoch": 0.71, "grad_norm": 4.879770878822434, "learning_rate": 2.078622555993197e-06, "loss": 0.3619, "step": 24711 }, { "epoch": 0.71, "grad_norm": 7.823762085224648, "learning_rate": 2.0782462028701315e-06, "loss": 0.6373, "step": 24712 }, { "epoch": 0.71, "grad_norm": 4.207192080513027, "learning_rate": 2.0778698748825565e-06, "loss": 0.6492, "step": 24713 }, { "epoch": 0.71, "grad_norm": 7.0331876670373035, "learning_rate": 2.077493572033707e-06, "loss": 0.7847, "step": 24714 }, { "epoch": 0.71, "grad_norm": 6.307032757544742, "learning_rate": 2.0771172943268203e-06, "loss": 0.7359, "step": 24715 }, { "epoch": 0.71, "grad_norm": 4.855317534641568, "learning_rate": 2.0767410417651317e-06, "loss": 0.4585, "step": 24716 }, { "epoch": 0.71, "grad_norm": 4.515534053533424, "learning_rate": 2.0763648143518822e-06, "loss": 0.7218, "step": 24717 }, { "epoch": 0.71, "grad_norm": 13.366021574218454, "learning_rate": 2.0759886120903046e-06, "loss": 0.4425, "step": 24718 }, { "epoch": 0.71, "grad_norm": 4.5881951410887005, "learning_rate": 2.0756124349836383e-06, "loss": 0.2301, "step": 24719 }, { "epoch": 0.71, "grad_norm": 9.651717520344723, "learning_rate": 2.0752362830351164e-06, "loss": 0.3574, "step": 24720 }, { "epoch": 0.71, "grad_norm": 3.0233522348924122, "learning_rate": 2.0748601562479788e-06, "loss": 0.2596, "step": 24721 }, { "epoch": 0.71, "grad_norm": 4.325967547067012, "learning_rate": 2.0744840546254586e-06, "loss": 0.5586, "step": 24722 }, { "epoch": 0.71, "grad_norm": 4.465434064580573, "learning_rate": 2.0741079781707903e-06, "loss": 0.6462, "step": 24723 }, { "epoch": 0.71, "grad_norm": 5.9981137728151275, "learning_rate": 2.0737319268872125e-06, "loss": 0.207, "step": 24724 }, { "epoch": 0.71, "grad_norm": 10.178059890627832, "learning_rate": 2.0733559007779574e-06, "loss": 0.6538, "step": 24725 }, { "epoch": 0.71, "grad_norm": 5.125469953808212, "learning_rate": 2.0729798998462625e-06, "loss": 0.53, "step": 24726 }, { "epoch": 0.71, "grad_norm": 3.3139649246865552, "learning_rate": 2.0726039240953595e-06, "loss": 0.2161, "step": 24727 }, { "epoch": 0.71, "grad_norm": 6.886081884647932, "learning_rate": 2.0722279735284863e-06, "loss": 0.4449, "step": 24728 }, { "epoch": 0.71, "grad_norm": 8.568685427588658, "learning_rate": 2.071852048148875e-06, "loss": 0.7884, "step": 24729 }, { "epoch": 0.71, "grad_norm": 4.8214168921838825, "learning_rate": 2.071476147959758e-06, "loss": 0.4679, "step": 24730 }, { "epoch": 0.71, "grad_norm": 6.794373027656304, "learning_rate": 2.071100272964374e-06, "loss": 0.4282, "step": 24731 }, { "epoch": 0.71, "grad_norm": 4.297650492653373, "learning_rate": 2.0707244231659512e-06, "loss": 0.3992, "step": 24732 }, { "epoch": 0.71, "grad_norm": 6.617664234413347, "learning_rate": 2.0703485985677274e-06, "loss": 1.1427, "step": 24733 }, { "epoch": 0.71, "grad_norm": 4.731839020118271, "learning_rate": 2.0699727991729326e-06, "loss": 0.4642, "step": 24734 }, { "epoch": 0.71, "grad_norm": 4.506382971718335, "learning_rate": 2.069597024984804e-06, "loss": 0.4916, "step": 24735 }, { "epoch": 0.71, "grad_norm": 4.753759452415215, "learning_rate": 2.0692212760065676e-06, "loss": 0.6612, "step": 24736 }, { "epoch": 0.71, "grad_norm": 4.329315641493834, "learning_rate": 2.0688455522414604e-06, "loss": 0.1448, "step": 24737 }, { "epoch": 0.71, "grad_norm": 5.003321807821791, "learning_rate": 2.0684698536927154e-06, "loss": 0.4942, "step": 24738 }, { "epoch": 0.71, "grad_norm": 7.10271708856596, "learning_rate": 2.068094180363562e-06, "loss": 0.4703, "step": 24739 }, { "epoch": 0.71, "grad_norm": 5.771726527677011, "learning_rate": 2.0677185322572346e-06, "loss": 0.4821, "step": 24740 }, { "epoch": 0.71, "grad_norm": 8.942861802671429, "learning_rate": 2.0673429093769638e-06, "loss": 0.2899, "step": 24741 }, { "epoch": 0.71, "grad_norm": 6.657416496684288, "learning_rate": 2.066967311725981e-06, "loss": 0.43, "step": 24742 }, { "epoch": 0.71, "grad_norm": 5.111117471239491, "learning_rate": 2.0665917393075154e-06, "loss": 0.5446, "step": 24743 }, { "epoch": 0.71, "grad_norm": 4.814970472290831, "learning_rate": 2.0662161921248e-06, "loss": 0.4426, "step": 24744 }, { "epoch": 0.71, "grad_norm": 4.065368519931537, "learning_rate": 2.0658406701810674e-06, "loss": 0.6779, "step": 24745 }, { "epoch": 0.71, "grad_norm": 3.720611651239811, "learning_rate": 2.065465173479544e-06, "loss": 0.2943, "step": 24746 }, { "epoch": 0.71, "grad_norm": 3.624085639865923, "learning_rate": 2.0650897020234646e-06, "loss": 0.5296, "step": 24747 }, { "epoch": 0.71, "grad_norm": 10.813829814301942, "learning_rate": 2.064714255816057e-06, "loss": 0.4869, "step": 24748 }, { "epoch": 0.71, "grad_norm": 7.4397067474694385, "learning_rate": 2.0643388348605515e-06, "loss": 0.4851, "step": 24749 }, { "epoch": 0.71, "grad_norm": 4.188035389849964, "learning_rate": 2.0639634391601753e-06, "loss": 0.4129, "step": 24750 }, { "epoch": 0.71, "grad_norm": 6.998434521827141, "learning_rate": 2.063588068718162e-06, "loss": 0.4016, "step": 24751 }, { "epoch": 0.71, "grad_norm": 1.7450244624901687, "learning_rate": 2.0632127235377377e-06, "loss": 0.1378, "step": 24752 }, { "epoch": 0.71, "grad_norm": 7.706796695718856, "learning_rate": 2.062837403622132e-06, "loss": 0.3821, "step": 24753 }, { "epoch": 0.71, "grad_norm": 19.4525988649357, "learning_rate": 2.0624621089745766e-06, "loss": 0.7634, "step": 24754 }, { "epoch": 0.71, "grad_norm": 7.856631731153374, "learning_rate": 2.0620868395982987e-06, "loss": 0.3897, "step": 24755 }, { "epoch": 0.71, "grad_norm": 11.465147861171072, "learning_rate": 2.061711595496525e-06, "loss": 0.624, "step": 24756 }, { "epoch": 0.71, "grad_norm": 5.758354337500298, "learning_rate": 2.061336376672483e-06, "loss": 0.5415, "step": 24757 }, { "epoch": 0.71, "grad_norm": 2.7894787304168713, "learning_rate": 2.0609611831294048e-06, "loss": 0.1771, "step": 24758 }, { "epoch": 0.71, "grad_norm": 3.402314108227831, "learning_rate": 2.0605860148705132e-06, "loss": 0.2887, "step": 24759 }, { "epoch": 0.71, "grad_norm": 4.224146746640332, "learning_rate": 2.0602108718990403e-06, "loss": 0.2202, "step": 24760 }, { "epoch": 0.71, "grad_norm": 6.1818872291105365, "learning_rate": 2.0598357542182097e-06, "loss": 0.3958, "step": 24761 }, { "epoch": 0.71, "grad_norm": 3.6312111227084944, "learning_rate": 2.0594606618312517e-06, "loss": 0.1565, "step": 24762 }, { "epoch": 0.71, "grad_norm": 4.517113041140373, "learning_rate": 2.0590855947413914e-06, "loss": 0.5637, "step": 24763 }, { "epoch": 0.71, "grad_norm": 3.3354414095289475, "learning_rate": 2.058710552951854e-06, "loss": 0.347, "step": 24764 }, { "epoch": 0.71, "grad_norm": 5.163341685440831, "learning_rate": 2.058335536465869e-06, "loss": 0.2293, "step": 24765 }, { "epoch": 0.71, "grad_norm": 6.817763998349813, "learning_rate": 2.05796054528666e-06, "loss": 0.6239, "step": 24766 }, { "epoch": 0.71, "grad_norm": 7.60964959058428, "learning_rate": 2.057585579417456e-06, "loss": 0.6172, "step": 24767 }, { "epoch": 0.71, "grad_norm": 10.797231223913895, "learning_rate": 2.0572106388614806e-06, "loss": 0.5471, "step": 24768 }, { "epoch": 0.71, "grad_norm": 2.2845464561190623, "learning_rate": 2.056835723621958e-06, "loss": 0.2572, "step": 24769 }, { "epoch": 0.71, "grad_norm": 3.1611466523846516, "learning_rate": 2.0564608337021173e-06, "loss": 0.4762, "step": 24770 }, { "epoch": 0.71, "grad_norm": 5.282255945954395, "learning_rate": 2.05608596910518e-06, "loss": 0.6424, "step": 24771 }, { "epoch": 0.71, "grad_norm": 7.800432024508766, "learning_rate": 2.055711129834374e-06, "loss": 0.2565, "step": 24772 }, { "epoch": 0.71, "grad_norm": 10.861623671183516, "learning_rate": 2.0553363158929207e-06, "loss": 0.8916, "step": 24773 }, { "epoch": 0.71, "grad_norm": 7.652741745573382, "learning_rate": 2.054961527284049e-06, "loss": 0.3146, "step": 24774 }, { "epoch": 0.71, "grad_norm": 5.23416105374041, "learning_rate": 2.05458676401098e-06, "loss": 0.2922, "step": 24775 }, { "epoch": 0.71, "grad_norm": 3.34708437936973, "learning_rate": 2.054212026076939e-06, "loss": 0.3505, "step": 24776 }, { "epoch": 0.71, "grad_norm": 5.067979625668476, "learning_rate": 2.053837313485147e-06, "loss": 0.6706, "step": 24777 }, { "epoch": 0.71, "grad_norm": 4.574582965791864, "learning_rate": 2.0534626262388303e-06, "loss": 0.1889, "step": 24778 }, { "epoch": 0.71, "grad_norm": 2.793538922351604, "learning_rate": 2.053087964341213e-06, "loss": 0.1626, "step": 24779 }, { "epoch": 0.71, "grad_norm": 5.717469296074787, "learning_rate": 2.052713327795516e-06, "loss": 0.4749, "step": 24780 }, { "epoch": 0.71, "grad_norm": 14.088325353102931, "learning_rate": 2.0523387166049646e-06, "loss": 0.5744, "step": 24781 }, { "epoch": 0.71, "grad_norm": 5.50566342679377, "learning_rate": 2.0519641307727805e-06, "loss": 0.3774, "step": 24782 }, { "epoch": 0.71, "grad_norm": 6.743425487668657, "learning_rate": 2.051589570302186e-06, "loss": 0.973, "step": 24783 }, { "epoch": 0.71, "grad_norm": 6.82242931537628, "learning_rate": 2.0512150351964015e-06, "loss": 0.6256, "step": 24784 }, { "epoch": 0.71, "grad_norm": 9.694857012602714, "learning_rate": 2.0508405254586513e-06, "loss": 0.4971, "step": 24785 }, { "epoch": 0.71, "grad_norm": 11.252964476902637, "learning_rate": 2.0504660410921585e-06, "loss": 0.6255, "step": 24786 }, { "epoch": 0.71, "grad_norm": 9.957139766995216, "learning_rate": 2.0500915821001416e-06, "loss": 0.3982, "step": 24787 }, { "epoch": 0.71, "grad_norm": 4.7306020577734085, "learning_rate": 2.049717148485825e-06, "loss": 0.25, "step": 24788 }, { "epoch": 0.71, "grad_norm": 2.8191163757473756, "learning_rate": 2.049342740252429e-06, "loss": 0.3348, "step": 24789 }, { "epoch": 0.71, "grad_norm": 6.242825624698764, "learning_rate": 2.048968357403174e-06, "loss": 0.2379, "step": 24790 }, { "epoch": 0.71, "grad_norm": 5.5741812605155205, "learning_rate": 2.048593999941279e-06, "loss": 0.5687, "step": 24791 }, { "epoch": 0.71, "grad_norm": 3.424531850179912, "learning_rate": 2.0482196678699677e-06, "loss": 0.1351, "step": 24792 }, { "epoch": 0.71, "grad_norm": 1.7425843458271988, "learning_rate": 2.047845361192458e-06, "loss": 0.2154, "step": 24793 }, { "epoch": 0.71, "grad_norm": 5.091670823082525, "learning_rate": 2.0474710799119713e-06, "loss": 0.1341, "step": 24794 }, { "epoch": 0.71, "grad_norm": 10.267499453551801, "learning_rate": 2.047096824031731e-06, "loss": 0.7537, "step": 24795 }, { "epoch": 0.71, "grad_norm": 5.050329602693319, "learning_rate": 2.046722593554949e-06, "loss": 0.4391, "step": 24796 }, { "epoch": 0.71, "grad_norm": 6.707960238852403, "learning_rate": 2.0463483884848514e-06, "loss": 0.4014, "step": 24797 }, { "epoch": 0.71, "grad_norm": 3.832004403147627, "learning_rate": 2.0459742088246527e-06, "loss": 0.3277, "step": 24798 }, { "epoch": 0.71, "grad_norm": 3.7988124346960674, "learning_rate": 2.0456000545775756e-06, "loss": 0.315, "step": 24799 }, { "epoch": 0.71, "grad_norm": 11.314978649407513, "learning_rate": 2.0452259257468364e-06, "loss": 0.8939, "step": 24800 }, { "epoch": 0.71, "grad_norm": 4.154210816546994, "learning_rate": 2.0448518223356563e-06, "loss": 0.2355, "step": 24801 }, { "epoch": 0.71, "grad_norm": 2.08453062383026, "learning_rate": 2.044477744347252e-06, "loss": 0.3116, "step": 24802 }, { "epoch": 0.71, "grad_norm": 3.813187302719916, "learning_rate": 2.04410369178484e-06, "loss": 0.4534, "step": 24803 }, { "epoch": 0.71, "grad_norm": 6.500263575565136, "learning_rate": 2.043729664651642e-06, "loss": 0.566, "step": 24804 }, { "epoch": 0.71, "grad_norm": 3.4532643630960016, "learning_rate": 2.0433556629508717e-06, "loss": 0.2767, "step": 24805 }, { "epoch": 0.71, "grad_norm": 4.081532717109015, "learning_rate": 2.0429816866857503e-06, "loss": 0.2923, "step": 24806 }, { "epoch": 0.71, "grad_norm": 5.430908109836831, "learning_rate": 2.042607735859492e-06, "loss": 0.7351, "step": 24807 }, { "epoch": 0.71, "grad_norm": 4.613329648919555, "learning_rate": 2.0422338104753165e-06, "loss": 0.407, "step": 24808 }, { "epoch": 0.71, "grad_norm": 3.630682601852647, "learning_rate": 2.04185991053644e-06, "loss": 0.2584, "step": 24809 }, { "epoch": 0.71, "grad_norm": 6.571264898737896, "learning_rate": 2.041486036046076e-06, "loss": 0.2212, "step": 24810 }, { "epoch": 0.71, "grad_norm": 7.355841110131458, "learning_rate": 2.041112187007446e-06, "loss": 0.8856, "step": 24811 }, { "epoch": 0.71, "grad_norm": 5.104671961886528, "learning_rate": 2.040738363423761e-06, "loss": 0.3574, "step": 24812 }, { "epoch": 0.71, "grad_norm": 7.262844480660202, "learning_rate": 2.0403645652982412e-06, "loss": 0.7221, "step": 24813 }, { "epoch": 0.71, "grad_norm": 6.507622943878948, "learning_rate": 2.0399907926340995e-06, "loss": 0.3397, "step": 24814 }, { "epoch": 0.71, "grad_norm": 4.507801419513909, "learning_rate": 2.0396170454345535e-06, "loss": 0.276, "step": 24815 }, { "epoch": 0.71, "grad_norm": 3.9895418063692563, "learning_rate": 2.039243323702819e-06, "loss": 0.3075, "step": 24816 }, { "epoch": 0.71, "grad_norm": 5.953963733771512, "learning_rate": 2.038869627442108e-06, "loss": 0.2808, "step": 24817 }, { "epoch": 0.71, "grad_norm": 4.4196525892890905, "learning_rate": 2.0384959566556363e-06, "loss": 0.3165, "step": 24818 }, { "epoch": 0.71, "grad_norm": 6.284913508230541, "learning_rate": 2.0381223113466188e-06, "loss": 0.5918, "step": 24819 }, { "epoch": 0.71, "grad_norm": 7.132493144612673, "learning_rate": 2.037748691518272e-06, "loss": 0.797, "step": 24820 }, { "epoch": 0.71, "grad_norm": 3.8359321236815527, "learning_rate": 2.0373750971738073e-06, "loss": 0.2495, "step": 24821 }, { "epoch": 0.71, "grad_norm": 9.060493668081136, "learning_rate": 2.037001528316443e-06, "loss": 0.5263, "step": 24822 }, { "epoch": 0.71, "grad_norm": 8.275969668043777, "learning_rate": 2.0366279849493863e-06, "loss": 0.4168, "step": 24823 }, { "epoch": 0.71, "grad_norm": 7.461019849073111, "learning_rate": 2.036254467075856e-06, "loss": 0.3701, "step": 24824 }, { "epoch": 0.71, "grad_norm": 4.544376525815692, "learning_rate": 2.0358809746990615e-06, "loss": 0.3377, "step": 24825 }, { "epoch": 0.71, "grad_norm": 3.348387416904727, "learning_rate": 2.0355075078222187e-06, "loss": 0.3386, "step": 24826 }, { "epoch": 0.71, "grad_norm": 6.183885038531141, "learning_rate": 2.0351340664485416e-06, "loss": 1.1052, "step": 24827 }, { "epoch": 0.71, "grad_norm": 4.649740443625291, "learning_rate": 2.03476065058124e-06, "loss": 0.5031, "step": 24828 }, { "epoch": 0.71, "grad_norm": 7.036902668945386, "learning_rate": 2.034387260223528e-06, "loss": 0.5119, "step": 24829 }, { "epoch": 0.71, "grad_norm": 6.54702015718125, "learning_rate": 2.034013895378616e-06, "loss": 0.7183, "step": 24830 }, { "epoch": 0.71, "grad_norm": 5.272300676798953, "learning_rate": 2.0336405560497183e-06, "loss": 0.6064, "step": 24831 }, { "epoch": 0.71, "grad_norm": 5.827187511325233, "learning_rate": 2.0332672422400445e-06, "loss": 0.471, "step": 24832 }, { "epoch": 0.71, "grad_norm": 6.201923321227477, "learning_rate": 2.032893953952809e-06, "loss": 0.7021, "step": 24833 }, { "epoch": 0.71, "grad_norm": 4.366908247042986, "learning_rate": 2.032520691191219e-06, "loss": 0.416, "step": 24834 }, { "epoch": 0.71, "grad_norm": 6.76263989146966, "learning_rate": 2.032147453958491e-06, "loss": 0.6575, "step": 24835 }, { "epoch": 0.71, "grad_norm": 3.5703533011678146, "learning_rate": 2.0317742422578323e-06, "loss": 0.3044, "step": 24836 }, { "epoch": 0.71, "grad_norm": 5.711278151177474, "learning_rate": 2.031401056092453e-06, "loss": 0.6273, "step": 24837 }, { "epoch": 0.71, "grad_norm": 5.540025527805772, "learning_rate": 2.0310278954655663e-06, "loss": 0.1013, "step": 24838 }, { "epoch": 0.71, "grad_norm": 10.755975792252896, "learning_rate": 2.03065476038038e-06, "loss": 0.5302, "step": 24839 }, { "epoch": 0.71, "grad_norm": 5.8186554258796255, "learning_rate": 2.0302816508401063e-06, "loss": 0.8241, "step": 24840 }, { "epoch": 0.71, "grad_norm": 9.253753003855898, "learning_rate": 2.029908566847952e-06, "loss": 0.4411, "step": 24841 }, { "epoch": 0.71, "grad_norm": 7.045796855318496, "learning_rate": 2.029535508407131e-06, "loss": 0.6027, "step": 24842 }, { "epoch": 0.71, "grad_norm": 4.4029166145910725, "learning_rate": 2.02916247552085e-06, "loss": 0.2987, "step": 24843 }, { "epoch": 0.71, "grad_norm": 8.058386855028228, "learning_rate": 2.0287894681923166e-06, "loss": 0.3827, "step": 24844 }, { "epoch": 0.71, "grad_norm": 5.800215424450348, "learning_rate": 2.0284164864247436e-06, "loss": 0.9339, "step": 24845 }, { "epoch": 0.71, "grad_norm": 10.235189204552034, "learning_rate": 2.0280435302213353e-06, "loss": 0.3872, "step": 24846 }, { "epoch": 0.71, "grad_norm": 3.362799526898916, "learning_rate": 2.0276705995853047e-06, "loss": 0.4357, "step": 24847 }, { "epoch": 0.71, "grad_norm": 3.5055159971769667, "learning_rate": 2.0272976945198563e-06, "loss": 0.3032, "step": 24848 }, { "epoch": 0.71, "grad_norm": 9.204066074616598, "learning_rate": 2.026924815028202e-06, "loss": 0.3676, "step": 24849 }, { "epoch": 0.71, "grad_norm": 3.737966143522632, "learning_rate": 2.0265519611135472e-06, "loss": 0.1968, "step": 24850 }, { "epoch": 0.71, "grad_norm": 3.381205716786233, "learning_rate": 2.0261791327790974e-06, "loss": 0.206, "step": 24851 }, { "epoch": 0.71, "grad_norm": 10.4080892317903, "learning_rate": 2.0258063300280644e-06, "loss": 1.0098, "step": 24852 }, { "epoch": 0.71, "grad_norm": 5.9069307933994155, "learning_rate": 2.025433552863652e-06, "loss": 0.7159, "step": 24853 }, { "epoch": 0.71, "grad_norm": 4.254026188836597, "learning_rate": 2.02506080128907e-06, "loss": 0.2887, "step": 24854 }, { "epoch": 0.71, "grad_norm": 3.1699063153217026, "learning_rate": 2.0246880753075237e-06, "loss": 0.3876, "step": 24855 }, { "epoch": 0.71, "grad_norm": 7.2988428243806025, "learning_rate": 2.0243153749222193e-06, "loss": 0.408, "step": 24856 }, { "epoch": 0.71, "grad_norm": 7.870331182527738, "learning_rate": 2.0239427001363622e-06, "loss": 0.3154, "step": 24857 }, { "epoch": 0.71, "grad_norm": 4.347551808983565, "learning_rate": 2.0235700509531606e-06, "loss": 0.4201, "step": 24858 }, { "epoch": 0.71, "grad_norm": 3.4511683733349727, "learning_rate": 2.023197427375818e-06, "loss": 0.1484, "step": 24859 }, { "epoch": 0.71, "grad_norm": 4.978534732870231, "learning_rate": 2.022824829407542e-06, "loss": 0.6487, "step": 24860 }, { "epoch": 0.71, "grad_norm": 10.173663472867945, "learning_rate": 2.0224522570515386e-06, "loss": 0.3733, "step": 24861 }, { "epoch": 0.71, "grad_norm": 6.659424575485085, "learning_rate": 2.022079710311012e-06, "loss": 0.7558, "step": 24862 }, { "epoch": 0.71, "grad_norm": 5.319959810865227, "learning_rate": 2.0217071891891664e-06, "loss": 0.4813, "step": 24863 }, { "epoch": 0.71, "grad_norm": 8.233528629359999, "learning_rate": 2.0213346936892057e-06, "loss": 0.436, "step": 24864 }, { "epoch": 0.71, "grad_norm": 3.478547838801918, "learning_rate": 2.0209622238143384e-06, "loss": 0.3644, "step": 24865 }, { "epoch": 0.71, "grad_norm": 10.596292421328702, "learning_rate": 2.0205897795677637e-06, "loss": 0.7227, "step": 24866 }, { "epoch": 0.71, "grad_norm": 10.89137298335919, "learning_rate": 2.0202173609526906e-06, "loss": 0.4909, "step": 24867 }, { "epoch": 0.71, "grad_norm": 15.657834610534657, "learning_rate": 2.0198449679723187e-06, "loss": 0.285, "step": 24868 }, { "epoch": 0.71, "grad_norm": 5.6860873805096865, "learning_rate": 2.019472600629855e-06, "loss": 0.643, "step": 24869 }, { "epoch": 0.71, "grad_norm": 6.22115643584792, "learning_rate": 2.019100258928502e-06, "loss": 0.7638, "step": 24870 }, { "epoch": 0.71, "grad_norm": 4.183187513102346, "learning_rate": 2.0187279428714607e-06, "loss": 0.2173, "step": 24871 }, { "epoch": 0.71, "grad_norm": 5.02521114570009, "learning_rate": 2.0183556524619376e-06, "loss": 0.4216, "step": 24872 }, { "epoch": 0.71, "grad_norm": 4.65581597475105, "learning_rate": 2.0179833877031313e-06, "loss": 0.4133, "step": 24873 }, { "epoch": 0.71, "grad_norm": 4.979946979791984, "learning_rate": 2.017611148598249e-06, "loss": 0.428, "step": 24874 }, { "epoch": 0.71, "grad_norm": 14.716395941059025, "learning_rate": 2.0172389351504888e-06, "loss": 0.6609, "step": 24875 }, { "epoch": 0.71, "grad_norm": 8.4568215571037, "learning_rate": 2.0168667473630573e-06, "loss": 0.2382, "step": 24876 }, { "epoch": 0.71, "grad_norm": 6.745528824239975, "learning_rate": 2.0164945852391532e-06, "loss": 0.7197, "step": 24877 }, { "epoch": 0.71, "grad_norm": 4.680695796089355, "learning_rate": 2.0161224487819776e-06, "loss": 0.3676, "step": 24878 }, { "epoch": 0.71, "grad_norm": 6.843481676946869, "learning_rate": 2.015750337994734e-06, "loss": 0.7462, "step": 24879 }, { "epoch": 0.71, "grad_norm": 6.742307588959844, "learning_rate": 2.015378252880622e-06, "loss": 0.6268, "step": 24880 }, { "epoch": 0.71, "grad_norm": 8.40910038607764, "learning_rate": 2.015006193442845e-06, "loss": 0.5318, "step": 24881 }, { "epoch": 0.71, "grad_norm": 5.740802156625165, "learning_rate": 2.0146341596846018e-06, "loss": 0.3105, "step": 24882 }, { "epoch": 0.71, "grad_norm": 5.2193326339308665, "learning_rate": 2.0142621516090937e-06, "loss": 0.5011, "step": 24883 }, { "epoch": 0.71, "grad_norm": 7.559327240280915, "learning_rate": 2.0138901692195185e-06, "loss": 0.3419, "step": 24884 }, { "epoch": 0.71, "grad_norm": 4.626375586622502, "learning_rate": 2.013518212519079e-06, "loss": 0.2826, "step": 24885 }, { "epoch": 0.71, "grad_norm": 4.096917623127955, "learning_rate": 2.0131462815109763e-06, "loss": 0.3187, "step": 24886 }, { "epoch": 0.71, "grad_norm": 7.103770434235908, "learning_rate": 2.0127743761984063e-06, "loss": 0.6797, "step": 24887 }, { "epoch": 0.71, "grad_norm": 4.631786985579006, "learning_rate": 2.0124024965845727e-06, "loss": 0.124, "step": 24888 }, { "epoch": 0.71, "grad_norm": 5.214372637895756, "learning_rate": 2.012030642672672e-06, "loss": 0.3278, "step": 24889 }, { "epoch": 0.71, "grad_norm": 3.3671067442802163, "learning_rate": 2.0116588144659046e-06, "loss": 0.2394, "step": 24890 }, { "epoch": 0.71, "grad_norm": 5.487181312326827, "learning_rate": 2.0112870119674668e-06, "loss": 0.8149, "step": 24891 }, { "epoch": 0.71, "grad_norm": 7.528937210220437, "learning_rate": 2.0109152351805578e-06, "loss": 0.6386, "step": 24892 }, { "epoch": 0.71, "grad_norm": 5.128231146514131, "learning_rate": 2.0105434841083794e-06, "loss": 0.3128, "step": 24893 }, { "epoch": 0.71, "grad_norm": 6.36877354251602, "learning_rate": 2.010171758754126e-06, "loss": 0.3989, "step": 24894 }, { "epoch": 0.71, "grad_norm": 7.897997020358627, "learning_rate": 2.0098000591209975e-06, "loss": 0.6745, "step": 24895 }, { "epoch": 0.71, "grad_norm": 3.2434393982110334, "learning_rate": 2.009428385212192e-06, "loss": 0.2911, "step": 24896 }, { "epoch": 0.71, "grad_norm": 8.319181467469999, "learning_rate": 2.0090567370309057e-06, "loss": 0.2776, "step": 24897 }, { "epoch": 0.71, "grad_norm": 2.3956358759130847, "learning_rate": 2.008685114580334e-06, "loss": 0.1866, "step": 24898 }, { "epoch": 0.71, "grad_norm": 5.983551387036733, "learning_rate": 2.0083135178636777e-06, "loss": 0.4935, "step": 24899 }, { "epoch": 0.71, "grad_norm": 4.186932966642771, "learning_rate": 2.00794194688413e-06, "loss": 0.4284, "step": 24900 }, { "epoch": 0.71, "grad_norm": 11.617939065968798, "learning_rate": 2.0075704016448895e-06, "loss": 0.3101, "step": 24901 }, { "epoch": 0.71, "grad_norm": 10.051205735447743, "learning_rate": 2.007198882149154e-06, "loss": 0.554, "step": 24902 }, { "epoch": 0.71, "grad_norm": 2.440441105614764, "learning_rate": 2.0068273884001183e-06, "loss": 0.141, "step": 24903 }, { "epoch": 0.71, "grad_norm": 5.271575463360622, "learning_rate": 2.0064559204009777e-06, "loss": 0.501, "step": 24904 }, { "epoch": 0.71, "grad_norm": 4.589712275793183, "learning_rate": 2.0060844781549265e-06, "loss": 0.3571, "step": 24905 }, { "epoch": 0.71, "grad_norm": 7.214342100902377, "learning_rate": 2.0057130616651644e-06, "loss": 0.4146, "step": 24906 }, { "epoch": 0.71, "grad_norm": 5.805152638257921, "learning_rate": 2.0053416709348815e-06, "loss": 0.5963, "step": 24907 }, { "epoch": 0.71, "grad_norm": 4.815240896899357, "learning_rate": 2.0049703059672778e-06, "loss": 0.3487, "step": 24908 }, { "epoch": 0.71, "grad_norm": 5.446732411184288, "learning_rate": 2.004598966765546e-06, "loss": 0.5161, "step": 24909 }, { "epoch": 0.71, "grad_norm": 6.213318105823593, "learning_rate": 2.004227653332878e-06, "loss": 0.688, "step": 24910 }, { "epoch": 0.71, "grad_norm": 4.3225281261193125, "learning_rate": 2.003856365672473e-06, "loss": 0.0908, "step": 24911 }, { "epoch": 0.71, "grad_norm": 2.751831376879895, "learning_rate": 2.0034851037875202e-06, "loss": 0.3951, "step": 24912 }, { "epoch": 0.71, "grad_norm": 5.997463445134644, "learning_rate": 2.003113867681219e-06, "loss": 0.694, "step": 24913 }, { "epoch": 0.71, "grad_norm": 7.193508124904455, "learning_rate": 2.0027426573567575e-06, "loss": 0.4548, "step": 24914 }, { "epoch": 0.71, "grad_norm": 4.76726196190899, "learning_rate": 2.0023714728173342e-06, "loss": 0.4399, "step": 24915 }, { "epoch": 0.71, "grad_norm": 1.9781588904022285, "learning_rate": 2.00200031406614e-06, "loss": 0.1267, "step": 24916 }, { "epoch": 0.71, "grad_norm": 4.318773792383487, "learning_rate": 2.001629181106366e-06, "loss": 0.1771, "step": 24917 }, { "epoch": 0.71, "grad_norm": 2.2333655844917115, "learning_rate": 2.001258073941209e-06, "loss": 0.1719, "step": 24918 }, { "epoch": 0.71, "grad_norm": 5.84765119431353, "learning_rate": 2.0008869925738576e-06, "loss": 0.5494, "step": 24919 }, { "epoch": 0.71, "grad_norm": 6.910570786691203, "learning_rate": 2.0005159370075084e-06, "loss": 0.2679, "step": 24920 }, { "epoch": 0.71, "grad_norm": 4.384035861112287, "learning_rate": 2.0001449072453487e-06, "loss": 0.6873, "step": 24921 }, { "epoch": 0.71, "grad_norm": 4.954987088676437, "learning_rate": 1.9997739032905757e-06, "loss": 0.5031, "step": 24922 }, { "epoch": 0.71, "grad_norm": 4.850060665105149, "learning_rate": 1.9994029251463777e-06, "loss": 0.3904, "step": 24923 }, { "epoch": 0.71, "grad_norm": 5.55751324122229, "learning_rate": 1.999031972815947e-06, "loss": 0.4172, "step": 24924 }, { "epoch": 0.71, "grad_norm": 5.053446928490476, "learning_rate": 1.998661046302473e-06, "loss": 0.3279, "step": 24925 }, { "epoch": 0.71, "grad_norm": 7.810530879773882, "learning_rate": 1.9982901456091485e-06, "loss": 0.2666, "step": 24926 }, { "epoch": 0.71, "grad_norm": 2.9125337656053047, "learning_rate": 1.997919270739166e-06, "loss": 0.1077, "step": 24927 }, { "epoch": 0.71, "grad_norm": 8.168614147197808, "learning_rate": 1.997548421695713e-06, "loss": 0.6548, "step": 24928 }, { "epoch": 0.71, "grad_norm": 4.137248494320804, "learning_rate": 1.9971775984819825e-06, "loss": 0.2128, "step": 24929 }, { "epoch": 0.71, "grad_norm": 6.809512121842828, "learning_rate": 1.9968068011011638e-06, "loss": 0.3147, "step": 24930 }, { "epoch": 0.71, "grad_norm": 4.0034739786662366, "learning_rate": 1.9964360295564466e-06, "loss": 0.5882, "step": 24931 }, { "epoch": 0.71, "grad_norm": 5.255918436558483, "learning_rate": 1.996065283851018e-06, "loss": 0.216, "step": 24932 }, { "epoch": 0.71, "grad_norm": 5.189150628062618, "learning_rate": 1.9956945639880704e-06, "loss": 0.2453, "step": 24933 }, { "epoch": 0.71, "grad_norm": 7.187015284904042, "learning_rate": 1.9953238699707945e-06, "loss": 0.6013, "step": 24934 }, { "epoch": 0.71, "grad_norm": 3.2488251359858635, "learning_rate": 1.9949532018023747e-06, "loss": 0.2762, "step": 24935 }, { "epoch": 0.71, "grad_norm": 3.5439821789553934, "learning_rate": 1.9945825594860054e-06, "loss": 0.375, "step": 24936 }, { "epoch": 0.71, "grad_norm": 7.854112808757925, "learning_rate": 1.994211943024872e-06, "loss": 0.5025, "step": 24937 }, { "epoch": 0.71, "grad_norm": 3.9228854150619554, "learning_rate": 1.993841352422163e-06, "loss": 0.5237, "step": 24938 }, { "epoch": 0.71, "grad_norm": 7.472041661642603, "learning_rate": 1.9934707876810645e-06, "loss": 0.5795, "step": 24939 }, { "epoch": 0.71, "grad_norm": 3.8153126300540223, "learning_rate": 1.993100248804769e-06, "loss": 0.2563, "step": 24940 }, { "epoch": 0.71, "grad_norm": 5.596712745005756, "learning_rate": 1.99272973579646e-06, "loss": 0.3958, "step": 24941 }, { "epoch": 0.71, "grad_norm": 7.007876460741419, "learning_rate": 1.9923592486593285e-06, "loss": 0.4745, "step": 24942 }, { "epoch": 0.71, "grad_norm": 3.2744590785763092, "learning_rate": 1.99198878739656e-06, "loss": 0.4614, "step": 24943 }, { "epoch": 0.71, "grad_norm": 4.776001786204504, "learning_rate": 1.99161835201134e-06, "loss": 0.2475, "step": 24944 }, { "epoch": 0.71, "grad_norm": 10.11399139233148, "learning_rate": 1.9912479425068587e-06, "loss": 0.4794, "step": 24945 }, { "epoch": 0.71, "grad_norm": 3.306749282376546, "learning_rate": 1.990877558886299e-06, "loss": 0.4704, "step": 24946 }, { "epoch": 0.71, "grad_norm": 4.490336851300465, "learning_rate": 1.990507201152851e-06, "loss": 0.3283, "step": 24947 }, { "epoch": 0.71, "grad_norm": 4.741961528792563, "learning_rate": 1.9901368693096973e-06, "loss": 0.2619, "step": 24948 }, { "epoch": 0.71, "grad_norm": 13.190452642880466, "learning_rate": 1.989766563360027e-06, "loss": 0.7889, "step": 24949 }, { "epoch": 0.71, "grad_norm": 3.4242275285793244, "learning_rate": 1.9893962833070246e-06, "loss": 0.2899, "step": 24950 }, { "epoch": 0.71, "grad_norm": 5.311030375338302, "learning_rate": 1.989026029153873e-06, "loss": 0.4205, "step": 24951 }, { "epoch": 0.71, "grad_norm": 7.3890975438281385, "learning_rate": 1.988655800903763e-06, "loss": 0.5907, "step": 24952 }, { "epoch": 0.71, "grad_norm": 4.094153056795987, "learning_rate": 1.9882855985598733e-06, "loss": 0.3507, "step": 24953 }, { "epoch": 0.71, "grad_norm": 5.051399777237261, "learning_rate": 1.987915422125394e-06, "loss": 0.3527, "step": 24954 }, { "epoch": 0.71, "grad_norm": 7.7793419884770225, "learning_rate": 1.9875452716035047e-06, "loss": 0.4674, "step": 24955 }, { "epoch": 0.71, "grad_norm": 6.310712202356923, "learning_rate": 1.9871751469973953e-06, "loss": 0.5368, "step": 24956 }, { "epoch": 0.71, "grad_norm": 7.282666244626019, "learning_rate": 1.9868050483102473e-06, "loss": 0.8024, "step": 24957 }, { "epoch": 0.71, "grad_norm": 5.371437577773431, "learning_rate": 1.9864349755452424e-06, "loss": 0.4217, "step": 24958 }, { "epoch": 0.71, "grad_norm": 3.4421594771856205, "learning_rate": 1.9860649287055685e-06, "loss": 0.3618, "step": 24959 }, { "epoch": 0.71, "grad_norm": 5.533612452594841, "learning_rate": 1.9856949077944044e-06, "loss": 0.3894, "step": 24960 }, { "epoch": 0.71, "grad_norm": 6.377913015189318, "learning_rate": 1.985324912814938e-06, "loss": 0.5887, "step": 24961 }, { "epoch": 0.71, "grad_norm": 2.855415208056241, "learning_rate": 1.9849549437703487e-06, "loss": 0.3179, "step": 24962 }, { "epoch": 0.71, "grad_norm": 5.25500131986331, "learning_rate": 1.9845850006638225e-06, "loss": 0.6796, "step": 24963 }, { "epoch": 0.71, "grad_norm": 4.2927054549222, "learning_rate": 1.9842150834985396e-06, "loss": 0.2538, "step": 24964 }, { "epoch": 0.71, "grad_norm": 5.493700321046668, "learning_rate": 1.9838451922776837e-06, "loss": 0.3538, "step": 24965 }, { "epoch": 0.71, "grad_norm": 7.1655160142851235, "learning_rate": 1.9834753270044343e-06, "loss": 0.6107, "step": 24966 }, { "epoch": 0.71, "grad_norm": 4.0801598160428325, "learning_rate": 1.9831054876819757e-06, "loss": 0.2628, "step": 24967 }, { "epoch": 0.72, "grad_norm": 3.635404519367783, "learning_rate": 1.98273567431349e-06, "loss": 0.2677, "step": 24968 }, { "epoch": 0.72, "grad_norm": 3.5291368694165945, "learning_rate": 1.9823658869021583e-06, "loss": 0.3428, "step": 24969 }, { "epoch": 0.72, "grad_norm": 9.228542390007485, "learning_rate": 1.9819961254511616e-06, "loss": 0.6879, "step": 24970 }, { "epoch": 0.72, "grad_norm": 4.636918768228204, "learning_rate": 1.9816263899636784e-06, "loss": 0.3607, "step": 24971 }, { "epoch": 0.72, "grad_norm": 5.06366405526184, "learning_rate": 1.981256680442894e-06, "loss": 0.4038, "step": 24972 }, { "epoch": 0.72, "grad_norm": 4.489806166014141, "learning_rate": 1.9808869968919848e-06, "loss": 0.3858, "step": 24973 }, { "epoch": 0.72, "grad_norm": 7.323474180904111, "learning_rate": 1.9805173393141326e-06, "loss": 0.5422, "step": 24974 }, { "epoch": 0.72, "grad_norm": 4.497903653447437, "learning_rate": 1.9801477077125197e-06, "loss": 0.3554, "step": 24975 }, { "epoch": 0.72, "grad_norm": 4.053259096302359, "learning_rate": 1.979778102090325e-06, "loss": 0.3255, "step": 24976 }, { "epoch": 0.72, "grad_norm": 6.056825518745978, "learning_rate": 1.979408522450727e-06, "loss": 0.4022, "step": 24977 }, { "epoch": 0.72, "grad_norm": 6.18175906870878, "learning_rate": 1.9790389687969038e-06, "loss": 0.3428, "step": 24978 }, { "epoch": 0.72, "grad_norm": 7.349622934751144, "learning_rate": 1.978669441132038e-06, "loss": 0.4021, "step": 24979 }, { "epoch": 0.72, "grad_norm": 4.324918998665348, "learning_rate": 1.9782999394593054e-06, "loss": 0.289, "step": 24980 }, { "epoch": 0.72, "grad_norm": 8.283980239793022, "learning_rate": 1.977930463781888e-06, "loss": 0.6777, "step": 24981 }, { "epoch": 0.72, "grad_norm": 5.839805863171415, "learning_rate": 1.9775610141029613e-06, "loss": 0.2134, "step": 24982 }, { "epoch": 0.72, "grad_norm": 4.244542404376405, "learning_rate": 1.9771915904257066e-06, "loss": 0.4183, "step": 24983 }, { "epoch": 0.72, "grad_norm": 7.37726897557588, "learning_rate": 1.9768221927533006e-06, "loss": 0.7097, "step": 24984 }, { "epoch": 0.72, "grad_norm": 8.186563685801843, "learning_rate": 1.9764528210889193e-06, "loss": 0.5928, "step": 24985 }, { "epoch": 0.72, "grad_norm": 8.879190718142226, "learning_rate": 1.9760834754357437e-06, "loss": 0.6932, "step": 24986 }, { "epoch": 0.72, "grad_norm": 3.8620823310282533, "learning_rate": 1.9757141557969477e-06, "loss": 0.4047, "step": 24987 }, { "epoch": 0.72, "grad_norm": 9.337259136805951, "learning_rate": 1.9753448621757126e-06, "loss": 0.4612, "step": 24988 }, { "epoch": 0.72, "grad_norm": 6.269184766090259, "learning_rate": 1.9749755945752115e-06, "loss": 0.4478, "step": 24989 }, { "epoch": 0.72, "grad_norm": 7.10162682498886, "learning_rate": 1.9746063529986246e-06, "loss": 0.8006, "step": 24990 }, { "epoch": 0.72, "grad_norm": 6.744688628795392, "learning_rate": 1.9742371374491265e-06, "loss": 0.6514, "step": 24991 }, { "epoch": 0.72, "grad_norm": 4.49202255609629, "learning_rate": 1.9738679479298924e-06, "loss": 0.3167, "step": 24992 }, { "epoch": 0.72, "grad_norm": 5.8892989145841685, "learning_rate": 1.973498784444101e-06, "loss": 0.3195, "step": 24993 }, { "epoch": 0.72, "grad_norm": 4.392101379006184, "learning_rate": 1.9731296469949257e-06, "loss": 0.2682, "step": 24994 }, { "epoch": 0.72, "grad_norm": 4.865028925248083, "learning_rate": 1.972760535585545e-06, "loss": 0.2609, "step": 24995 }, { "epoch": 0.72, "grad_norm": 9.467365629513345, "learning_rate": 1.972391450219131e-06, "loss": 0.4898, "step": 24996 }, { "epoch": 0.72, "grad_norm": 3.7766212316767045, "learning_rate": 1.9720223908988638e-06, "loss": 0.2143, "step": 24997 }, { "epoch": 0.72, "grad_norm": 4.215483665500721, "learning_rate": 1.9716533576279117e-06, "loss": 0.5477, "step": 24998 }, { "epoch": 0.72, "grad_norm": 6.626510483911566, "learning_rate": 1.971284350409453e-06, "loss": 0.6846, "step": 24999 }, { "epoch": 0.72, "grad_norm": 1.799926141044793, "learning_rate": 1.970915369246664e-06, "loss": 0.0108, "step": 25000 }, { "epoch": 0.72, "grad_norm": 4.649792001030507, "learning_rate": 1.970546414142715e-06, "loss": 0.367, "step": 25001 }, { "epoch": 0.72, "grad_norm": 6.250124357893669, "learning_rate": 1.9701774851007844e-06, "loss": 0.5011, "step": 25002 }, { "epoch": 0.72, "grad_norm": 7.906988780368667, "learning_rate": 1.9698085821240435e-06, "loss": 0.2837, "step": 25003 }, { "epoch": 0.72, "grad_norm": 5.387161955882133, "learning_rate": 1.9694397052156665e-06, "loss": 0.5502, "step": 25004 }, { "epoch": 0.72, "grad_norm": 3.5568735967059224, "learning_rate": 1.9690708543788243e-06, "loss": 0.3897, "step": 25005 }, { "epoch": 0.72, "grad_norm": 8.069640671720068, "learning_rate": 1.9687020296166946e-06, "loss": 0.3148, "step": 25006 }, { "epoch": 0.72, "grad_norm": 4.1433865820781834, "learning_rate": 1.9683332309324464e-06, "loss": 0.2498, "step": 25007 }, { "epoch": 0.72, "grad_norm": 4.009874910035027, "learning_rate": 1.9679644583292545e-06, "loss": 0.6182, "step": 25008 }, { "epoch": 0.72, "grad_norm": 4.588673608840793, "learning_rate": 1.9675957118102923e-06, "loss": 0.2199, "step": 25009 }, { "epoch": 0.72, "grad_norm": 7.084506158409697, "learning_rate": 1.967226991378731e-06, "loss": 0.6853, "step": 25010 }, { "epoch": 0.72, "grad_norm": 3.6691326533177864, "learning_rate": 1.9668582970377425e-06, "loss": 0.3259, "step": 25011 }, { "epoch": 0.72, "grad_norm": 5.994052979545417, "learning_rate": 1.9664896287904967e-06, "loss": 0.4408, "step": 25012 }, { "epoch": 0.72, "grad_norm": 9.904933854045424, "learning_rate": 1.9661209866401687e-06, "loss": 0.6392, "step": 25013 }, { "epoch": 0.72, "grad_norm": 5.466993998210062, "learning_rate": 1.965752370589927e-06, "loss": 0.4517, "step": 25014 }, { "epoch": 0.72, "grad_norm": 4.686343406403677, "learning_rate": 1.9653837806429436e-06, "loss": 0.186, "step": 25015 }, { "epoch": 0.72, "grad_norm": 4.071711206545181, "learning_rate": 1.965015216802392e-06, "loss": 0.874, "step": 25016 }, { "epoch": 0.72, "grad_norm": 6.526151647551815, "learning_rate": 1.9646466790714407e-06, "loss": 0.8481, "step": 25017 }, { "epoch": 0.72, "grad_norm": 4.94882069276334, "learning_rate": 1.9642781674532605e-06, "loss": 0.5055, "step": 25018 }, { "epoch": 0.72, "grad_norm": 9.491514331196914, "learning_rate": 1.9639096819510196e-06, "loss": 0.7926, "step": 25019 }, { "epoch": 0.72, "grad_norm": 5.730721476572044, "learning_rate": 1.9635412225678917e-06, "loss": 0.5938, "step": 25020 }, { "epoch": 0.72, "grad_norm": 5.787961456159946, "learning_rate": 1.963172789307043e-06, "loss": 0.354, "step": 25021 }, { "epoch": 0.72, "grad_norm": 4.857931529857054, "learning_rate": 1.9628043821716462e-06, "loss": 0.3121, "step": 25022 }, { "epoch": 0.72, "grad_norm": 11.766075986853815, "learning_rate": 1.9624360011648687e-06, "loss": 0.6983, "step": 25023 }, { "epoch": 0.72, "grad_norm": 7.738935663395772, "learning_rate": 1.962067646289881e-06, "loss": 0.7892, "step": 25024 }, { "epoch": 0.72, "grad_norm": 8.38604543156944, "learning_rate": 1.9616993175498516e-06, "loss": 0.4171, "step": 25025 }, { "epoch": 0.72, "grad_norm": 8.170879120984548, "learning_rate": 1.961331014947947e-06, "loss": 0.3879, "step": 25026 }, { "epoch": 0.72, "grad_norm": 3.9398238121615985, "learning_rate": 1.9609627384873396e-06, "loss": 0.4235, "step": 25027 }, { "epoch": 0.72, "grad_norm": 5.044682147407162, "learning_rate": 1.9605944881711937e-06, "loss": 0.4531, "step": 25028 }, { "epoch": 0.72, "grad_norm": 7.695628698988333, "learning_rate": 1.9602262640026807e-06, "loss": 0.6726, "step": 25029 }, { "epoch": 0.72, "grad_norm": 11.991700083615573, "learning_rate": 1.9598580659849675e-06, "loss": 0.7627, "step": 25030 }, { "epoch": 0.72, "grad_norm": 6.167989623292341, "learning_rate": 1.95948989412122e-06, "loss": 0.3055, "step": 25031 }, { "epoch": 0.72, "grad_norm": 5.293655812620216, "learning_rate": 1.959121748414606e-06, "loss": 0.4666, "step": 25032 }, { "epoch": 0.72, "grad_norm": 5.335834194987368, "learning_rate": 1.9587536288682928e-06, "loss": 0.3552, "step": 25033 }, { "epoch": 0.72, "grad_norm": 8.347380716134785, "learning_rate": 1.9583855354854493e-06, "loss": 0.8316, "step": 25034 }, { "epoch": 0.72, "grad_norm": 4.993233518902631, "learning_rate": 1.9580174682692387e-06, "loss": 0.3761, "step": 25035 }, { "epoch": 0.72, "grad_norm": 1.8079929177704126, "learning_rate": 1.9576494272228314e-06, "loss": 0.0896, "step": 25036 }, { "epoch": 0.72, "grad_norm": 4.317179752968331, "learning_rate": 1.957281412349391e-06, "loss": 0.407, "step": 25037 }, { "epoch": 0.72, "grad_norm": 2.64266054147568, "learning_rate": 1.9569134236520844e-06, "loss": 0.1792, "step": 25038 }, { "epoch": 0.72, "grad_norm": 2.569050882171915, "learning_rate": 1.956545461134075e-06, "loss": 0.2444, "step": 25039 }, { "epoch": 0.72, "grad_norm": 4.719921648700649, "learning_rate": 1.9561775247985304e-06, "loss": 0.4706, "step": 25040 }, { "epoch": 0.72, "grad_norm": 10.11708781660894, "learning_rate": 1.955809614648617e-06, "loss": 1.1697, "step": 25041 }, { "epoch": 0.72, "grad_norm": 4.895233591017825, "learning_rate": 1.955441730687498e-06, "loss": 0.3335, "step": 25042 }, { "epoch": 0.72, "grad_norm": 3.349821300864636, "learning_rate": 1.95507387291834e-06, "loss": 0.3241, "step": 25043 }, { "epoch": 0.72, "grad_norm": 7.015661702559201, "learning_rate": 1.9547060413443065e-06, "loss": 0.6551, "step": 25044 }, { "epoch": 0.72, "grad_norm": 3.611153894969794, "learning_rate": 1.954338235968562e-06, "loss": 0.2541, "step": 25045 }, { "epoch": 0.72, "grad_norm": 4.293622537245222, "learning_rate": 1.953970456794269e-06, "loss": 0.7318, "step": 25046 }, { "epoch": 0.72, "grad_norm": 4.790251492486285, "learning_rate": 1.9536027038245946e-06, "loss": 0.5756, "step": 25047 }, { "epoch": 0.72, "grad_norm": 2.9607571871566476, "learning_rate": 1.9532349770626994e-06, "loss": 0.3047, "step": 25048 }, { "epoch": 0.72, "grad_norm": 6.662524852026417, "learning_rate": 1.952867276511749e-06, "loss": 0.5401, "step": 25049 }, { "epoch": 0.72, "grad_norm": 3.788294511812346, "learning_rate": 1.952499602174908e-06, "loss": 0.3106, "step": 25050 }, { "epoch": 0.72, "grad_norm": 3.844417979186687, "learning_rate": 1.9521319540553375e-06, "loss": 0.3556, "step": 25051 }, { "epoch": 0.72, "grad_norm": 5.623114926985415, "learning_rate": 1.9517643321562004e-06, "loss": 0.594, "step": 25052 }, { "epoch": 0.72, "grad_norm": 5.273899764533724, "learning_rate": 1.951396736480658e-06, "loss": 0.6731, "step": 25053 }, { "epoch": 0.72, "grad_norm": 4.64435440381293, "learning_rate": 1.951029167031875e-06, "loss": 0.2914, "step": 25054 }, { "epoch": 0.72, "grad_norm": 3.4479344915906944, "learning_rate": 1.9506616238130123e-06, "loss": 0.3239, "step": 25055 }, { "epoch": 0.72, "grad_norm": 6.239490523202655, "learning_rate": 1.950294106827233e-06, "loss": 0.3729, "step": 25056 }, { "epoch": 0.72, "grad_norm": 6.213153793963565, "learning_rate": 1.949926616077698e-06, "loss": 0.8859, "step": 25057 }, { "epoch": 0.72, "grad_norm": 5.399874527674424, "learning_rate": 1.9495591515675676e-06, "loss": 0.2595, "step": 25058 }, { "epoch": 0.72, "grad_norm": 4.820116382238501, "learning_rate": 1.9491917133000053e-06, "loss": 0.2779, "step": 25059 }, { "epoch": 0.72, "grad_norm": 2.1789912133733402, "learning_rate": 1.94882430127817e-06, "loss": 0.1756, "step": 25060 }, { "epoch": 0.72, "grad_norm": 4.9389502170136845, "learning_rate": 1.9484569155052252e-06, "loss": 0.8924, "step": 25061 }, { "epoch": 0.72, "grad_norm": 5.907218732559079, "learning_rate": 1.9480895559843276e-06, "loss": 0.3502, "step": 25062 }, { "epoch": 0.72, "grad_norm": 4.350506639025486, "learning_rate": 1.947722222718642e-06, "loss": 0.5452, "step": 25063 }, { "epoch": 0.72, "grad_norm": 8.67977748655958, "learning_rate": 1.947354915711326e-06, "loss": 0.2708, "step": 25064 }, { "epoch": 0.72, "grad_norm": 3.8570578565137663, "learning_rate": 1.946987634965538e-06, "loss": 0.2804, "step": 25065 }, { "epoch": 0.72, "grad_norm": 7.011842043331924, "learning_rate": 1.946620380484442e-06, "loss": 0.7436, "step": 25066 }, { "epoch": 0.72, "grad_norm": 3.0352960895545187, "learning_rate": 1.9462531522711926e-06, "loss": 0.128, "step": 25067 }, { "epoch": 0.72, "grad_norm": 5.789971531219858, "learning_rate": 1.945885950328953e-06, "loss": 0.1301, "step": 25068 }, { "epoch": 0.72, "grad_norm": 3.364393805646699, "learning_rate": 1.945518774660879e-06, "loss": 0.2111, "step": 25069 }, { "epoch": 0.72, "grad_norm": 9.88170729175576, "learning_rate": 1.9451516252701326e-06, "loss": 0.7155, "step": 25070 }, { "epoch": 0.72, "grad_norm": 3.9332867173925306, "learning_rate": 1.9447845021598706e-06, "loss": 0.3017, "step": 25071 }, { "epoch": 0.72, "grad_norm": 8.425000461838707, "learning_rate": 1.944417405333251e-06, "loss": 0.8371, "step": 25072 }, { "epoch": 0.72, "grad_norm": 5.6594575670560605, "learning_rate": 1.9440503347934308e-06, "loss": 0.3111, "step": 25073 }, { "epoch": 0.72, "grad_norm": 8.140525304729325, "learning_rate": 1.9436832905435693e-06, "loss": 0.6626, "step": 25074 }, { "epoch": 0.72, "grad_norm": 6.556534189877937, "learning_rate": 1.943316272586826e-06, "loss": 0.5469, "step": 25075 }, { "epoch": 0.72, "grad_norm": 4.875116151257397, "learning_rate": 1.9429492809263543e-06, "loss": 0.2142, "step": 25076 }, { "epoch": 0.72, "grad_norm": 5.40863438764153, "learning_rate": 1.9425823155653163e-06, "loss": 0.5602, "step": 25077 }, { "epoch": 0.72, "grad_norm": 5.20317751196404, "learning_rate": 1.942215376506865e-06, "loss": 0.3956, "step": 25078 }, { "epoch": 0.72, "grad_norm": 3.832826825382227, "learning_rate": 1.941848463754159e-06, "loss": 0.29, "step": 25079 }, { "epoch": 0.72, "grad_norm": 7.605690442005208, "learning_rate": 1.9414815773103524e-06, "loss": 0.3665, "step": 25080 }, { "epoch": 0.72, "grad_norm": 8.045214020494363, "learning_rate": 1.941114717178603e-06, "loss": 0.9333, "step": 25081 }, { "epoch": 0.72, "grad_norm": 6.862511892951152, "learning_rate": 1.940747883362069e-06, "loss": 0.1864, "step": 25082 }, { "epoch": 0.72, "grad_norm": 8.094585184139067, "learning_rate": 1.9403810758639026e-06, "loss": 0.332, "step": 25083 }, { "epoch": 0.72, "grad_norm": 9.345200748154927, "learning_rate": 1.9400142946872648e-06, "loss": 0.9078, "step": 25084 }, { "epoch": 0.72, "grad_norm": 3.475585062874161, "learning_rate": 1.939647539835303e-06, "loss": 0.2381, "step": 25085 }, { "epoch": 0.72, "grad_norm": 8.931375819521929, "learning_rate": 1.939280811311179e-06, "loss": 0.4261, "step": 25086 }, { "epoch": 0.72, "grad_norm": 7.437861842481467, "learning_rate": 1.938914109118043e-06, "loss": 0.5562, "step": 25087 }, { "epoch": 0.72, "grad_norm": 7.231958088541327, "learning_rate": 1.938547433259054e-06, "loss": 0.6582, "step": 25088 }, { "epoch": 0.72, "grad_norm": 6.512003196174249, "learning_rate": 1.9381807837373636e-06, "loss": 0.52, "step": 25089 }, { "epoch": 0.72, "grad_norm": 4.863167855652799, "learning_rate": 1.9378141605561275e-06, "loss": 0.3407, "step": 25090 }, { "epoch": 0.72, "grad_norm": 4.058330156506516, "learning_rate": 1.9374475637184996e-06, "loss": 0.4366, "step": 25091 }, { "epoch": 0.72, "grad_norm": 9.060746832333274, "learning_rate": 1.9370809932276317e-06, "loss": 0.5767, "step": 25092 }, { "epoch": 0.72, "grad_norm": 4.3246052611480215, "learning_rate": 1.93671444908668e-06, "loss": 0.487, "step": 25093 }, { "epoch": 0.72, "grad_norm": 3.3647053148264465, "learning_rate": 1.936347931298796e-06, "loss": 0.1673, "step": 25094 }, { "epoch": 0.72, "grad_norm": 6.2392706519374075, "learning_rate": 1.9359814398671346e-06, "loss": 0.447, "step": 25095 }, { "epoch": 0.72, "grad_norm": 1.7716474699838674, "learning_rate": 1.935614974794846e-06, "loss": 0.2773, "step": 25096 }, { "epoch": 0.72, "grad_norm": 7.717269508372502, "learning_rate": 1.9352485360850866e-06, "loss": 0.3274, "step": 25097 }, { "epoch": 0.72, "grad_norm": 3.144445913353417, "learning_rate": 1.934882123741006e-06, "loss": 0.1942, "step": 25098 }, { "epoch": 0.72, "grad_norm": 5.526784872640004, "learning_rate": 1.934515737765756e-06, "loss": 0.3482, "step": 25099 }, { "epoch": 0.72, "grad_norm": 8.106499722648234, "learning_rate": 1.934149378162491e-06, "loss": 0.2022, "step": 25100 }, { "epoch": 0.72, "grad_norm": 3.0808751750708727, "learning_rate": 1.93378304493436e-06, "loss": 0.1936, "step": 25101 }, { "epoch": 0.72, "grad_norm": 2.5931330487137174, "learning_rate": 1.933416738084517e-06, "loss": 0.2434, "step": 25102 }, { "epoch": 0.72, "grad_norm": 5.299355052897409, "learning_rate": 1.933050457616111e-06, "loss": 0.5056, "step": 25103 }, { "epoch": 0.72, "grad_norm": 5.984675945155098, "learning_rate": 1.9326842035322955e-06, "loss": 0.3518, "step": 25104 }, { "epoch": 0.72, "grad_norm": 2.943330295067238, "learning_rate": 1.9323179758362208e-06, "loss": 0.2385, "step": 25105 }, { "epoch": 0.72, "grad_norm": 13.07753372223039, "learning_rate": 1.931951774531035e-06, "loss": 0.5164, "step": 25106 }, { "epoch": 0.72, "grad_norm": 3.8408925359799073, "learning_rate": 1.9315855996198924e-06, "loss": 0.2537, "step": 25107 }, { "epoch": 0.72, "grad_norm": 5.096362140423815, "learning_rate": 1.931219451105939e-06, "loss": 0.2059, "step": 25108 }, { "epoch": 0.72, "grad_norm": 10.320517919836533, "learning_rate": 1.930853328992329e-06, "loss": 0.6564, "step": 25109 }, { "epoch": 0.72, "grad_norm": 5.734282199202259, "learning_rate": 1.9304872332822074e-06, "loss": 0.5279, "step": 25110 }, { "epoch": 0.72, "grad_norm": 3.6611996265825875, "learning_rate": 1.930121163978728e-06, "loss": 0.1151, "step": 25111 }, { "epoch": 0.72, "grad_norm": 3.7724333191091985, "learning_rate": 1.9297551210850386e-06, "loss": 0.2841, "step": 25112 }, { "epoch": 0.72, "grad_norm": 8.757520223295277, "learning_rate": 1.9293891046042877e-06, "loss": 0.7445, "step": 25113 }, { "epoch": 0.72, "grad_norm": 6.030625622306805, "learning_rate": 1.9290231145396225e-06, "loss": 0.7014, "step": 25114 }, { "epoch": 0.72, "grad_norm": 3.378587705557996, "learning_rate": 1.9286571508941937e-06, "loss": 0.2067, "step": 25115 }, { "epoch": 0.72, "grad_norm": 3.870445620634899, "learning_rate": 1.928291213671151e-06, "loss": 0.2109, "step": 25116 }, { "epoch": 0.72, "grad_norm": 3.5754805041893767, "learning_rate": 1.9279253028736407e-06, "loss": 0.1935, "step": 25117 }, { "epoch": 0.72, "grad_norm": 5.7706039970723895, "learning_rate": 1.9275594185048104e-06, "loss": 0.3645, "step": 25118 }, { "epoch": 0.72, "grad_norm": 6.130595103994132, "learning_rate": 1.9271935605678067e-06, "loss": 0.2646, "step": 25119 }, { "epoch": 0.72, "grad_norm": 4.133290081156779, "learning_rate": 1.92682772906578e-06, "loss": 0.4438, "step": 25120 }, { "epoch": 0.72, "grad_norm": 7.413290869993725, "learning_rate": 1.9264619240018745e-06, "loss": 0.378, "step": 25121 }, { "epoch": 0.72, "grad_norm": 4.7898497688928865, "learning_rate": 1.926096145379238e-06, "loss": 0.2391, "step": 25122 }, { "epoch": 0.72, "grad_norm": 2.2414970664090887, "learning_rate": 1.925730393201021e-06, "loss": 0.1763, "step": 25123 }, { "epoch": 0.72, "grad_norm": 5.97400471925118, "learning_rate": 1.925364667470366e-06, "loss": 0.3721, "step": 25124 }, { "epoch": 0.72, "grad_norm": 6.893334891720614, "learning_rate": 1.9249989681904196e-06, "loss": 0.4263, "step": 25125 }, { "epoch": 0.72, "grad_norm": 4.286692424823344, "learning_rate": 1.9246332953643274e-06, "loss": 0.2624, "step": 25126 }, { "epoch": 0.72, "grad_norm": 7.801188466422757, "learning_rate": 1.924267648995238e-06, "loss": 0.6839, "step": 25127 }, { "epoch": 0.72, "grad_norm": 5.15300662597017, "learning_rate": 1.923902029086294e-06, "loss": 0.3171, "step": 25128 }, { "epoch": 0.72, "grad_norm": 7.79549599112527, "learning_rate": 1.9235364356406432e-06, "loss": 0.4456, "step": 25129 }, { "epoch": 0.72, "grad_norm": 4.906184931038084, "learning_rate": 1.923170868661429e-06, "loss": 0.3792, "step": 25130 }, { "epoch": 0.72, "grad_norm": 9.036565965053615, "learning_rate": 1.922805328151797e-06, "loss": 0.6971, "step": 25131 }, { "epoch": 0.72, "grad_norm": 7.5946301550654995, "learning_rate": 1.922439814114893e-06, "loss": 0.4482, "step": 25132 }, { "epoch": 0.72, "grad_norm": 7.470221306715576, "learning_rate": 1.9220743265538588e-06, "loss": 0.6279, "step": 25133 }, { "epoch": 0.72, "grad_norm": 3.3213103107781055, "learning_rate": 1.9217088654718414e-06, "loss": 0.4374, "step": 25134 }, { "epoch": 0.72, "grad_norm": 9.127274791454095, "learning_rate": 1.921343430871982e-06, "loss": 0.3697, "step": 25135 }, { "epoch": 0.72, "grad_norm": 7.832125009180064, "learning_rate": 1.9209780227574283e-06, "loss": 0.5397, "step": 25136 }, { "epoch": 0.72, "grad_norm": 3.574730341418074, "learning_rate": 1.9206126411313192e-06, "loss": 0.6071, "step": 25137 }, { "epoch": 0.72, "grad_norm": 4.910876577098345, "learning_rate": 1.920247285996802e-06, "loss": 0.4498, "step": 25138 }, { "epoch": 0.72, "grad_norm": 2.9332425330117484, "learning_rate": 1.9198819573570186e-06, "loss": 0.2307, "step": 25139 }, { "epoch": 0.72, "grad_norm": 4.579376049767914, "learning_rate": 1.9195166552151095e-06, "loss": 0.3206, "step": 25140 }, { "epoch": 0.72, "grad_norm": 4.5161367740158225, "learning_rate": 1.919151379574221e-06, "loss": 0.3677, "step": 25141 }, { "epoch": 0.72, "grad_norm": 8.07484661700664, "learning_rate": 1.9187861304374926e-06, "loss": 0.6508, "step": 25142 }, { "epoch": 0.72, "grad_norm": 11.296510218301163, "learning_rate": 1.9184209078080697e-06, "loss": 0.8462, "step": 25143 }, { "epoch": 0.72, "grad_norm": 4.126256346837813, "learning_rate": 1.9180557116890912e-06, "loss": 0.5254, "step": 25144 }, { "epoch": 0.72, "grad_norm": 5.589390425090796, "learning_rate": 1.917690542083701e-06, "loss": 0.6744, "step": 25145 }, { "epoch": 0.72, "grad_norm": 6.889330095686218, "learning_rate": 1.9173253989950374e-06, "loss": 0.3789, "step": 25146 }, { "epoch": 0.72, "grad_norm": 6.528874727387358, "learning_rate": 1.916960282426244e-06, "loss": 0.3191, "step": 25147 }, { "epoch": 0.72, "grad_norm": 7.4637362837901, "learning_rate": 1.9165951923804634e-06, "loss": 0.4896, "step": 25148 }, { "epoch": 0.72, "grad_norm": 6.758080096817529, "learning_rate": 1.916230128860833e-06, "loss": 0.5432, "step": 25149 }, { "epoch": 0.72, "grad_norm": 6.858624880014286, "learning_rate": 1.9158650918704964e-06, "loss": 0.5229, "step": 25150 }, { "epoch": 0.72, "grad_norm": 11.585807773843865, "learning_rate": 1.915500081412593e-06, "loss": 0.8237, "step": 25151 }, { "epoch": 0.72, "grad_norm": 8.056834396344032, "learning_rate": 1.915135097490263e-06, "loss": 0.4177, "step": 25152 }, { "epoch": 0.72, "grad_norm": 10.704446530027658, "learning_rate": 1.914770140106643e-06, "loss": 0.5784, "step": 25153 }, { "epoch": 0.72, "grad_norm": 5.935757873339706, "learning_rate": 1.9144052092648786e-06, "loss": 0.2517, "step": 25154 }, { "epoch": 0.72, "grad_norm": 7.163722538759804, "learning_rate": 1.9140403049681043e-06, "loss": 0.7258, "step": 25155 }, { "epoch": 0.72, "grad_norm": 5.103941171556721, "learning_rate": 1.9136754272194606e-06, "loss": 0.2476, "step": 25156 }, { "epoch": 0.72, "grad_norm": 5.8342316571583295, "learning_rate": 1.91331057602209e-06, "loss": 0.3865, "step": 25157 }, { "epoch": 0.72, "grad_norm": 3.487151985168789, "learning_rate": 1.9129457513791277e-06, "loss": 0.4672, "step": 25158 }, { "epoch": 0.72, "grad_norm": 5.154241482811783, "learning_rate": 1.9125809532937133e-06, "loss": 0.8868, "step": 25159 }, { "epoch": 0.72, "grad_norm": 4.910351612025364, "learning_rate": 1.9122161817689833e-06, "loss": 0.2716, "step": 25160 }, { "epoch": 0.72, "grad_norm": 8.530606088337159, "learning_rate": 1.9118514368080786e-06, "loss": 0.5551, "step": 25161 }, { "epoch": 0.72, "grad_norm": 8.785754640112165, "learning_rate": 1.9114867184141345e-06, "loss": 0.3989, "step": 25162 }, { "epoch": 0.72, "grad_norm": 7.105436873403793, "learning_rate": 1.9111220265902895e-06, "loss": 0.5271, "step": 25163 }, { "epoch": 0.72, "grad_norm": 9.223283331951142, "learning_rate": 1.9107573613396833e-06, "loss": 1.0182, "step": 25164 }, { "epoch": 0.72, "grad_norm": 4.7990152262759915, "learning_rate": 1.910392722665451e-06, "loss": 0.5855, "step": 25165 }, { "epoch": 0.72, "grad_norm": 3.442855391292771, "learning_rate": 1.9100281105707296e-06, "loss": 0.2861, "step": 25166 }, { "epoch": 0.72, "grad_norm": 5.790133069671893, "learning_rate": 1.9096635250586547e-06, "loss": 0.5261, "step": 25167 }, { "epoch": 0.72, "grad_norm": 5.793078554621424, "learning_rate": 1.9092989661323658e-06, "loss": 0.3514, "step": 25168 }, { "epoch": 0.72, "grad_norm": 3.334770623916395, "learning_rate": 1.908934433794995e-06, "loss": 0.5428, "step": 25169 }, { "epoch": 0.72, "grad_norm": 6.008343418904736, "learning_rate": 1.9085699280496827e-06, "loss": 0.2884, "step": 25170 }, { "epoch": 0.72, "grad_norm": 6.437891216797677, "learning_rate": 1.9082054488995626e-06, "loss": 0.5465, "step": 25171 }, { "epoch": 0.72, "grad_norm": 6.504824681774675, "learning_rate": 1.907840996347769e-06, "loss": 0.3582, "step": 25172 }, { "epoch": 0.72, "grad_norm": 5.8106908341213455, "learning_rate": 1.9074765703974396e-06, "loss": 0.5594, "step": 25173 }, { "epoch": 0.72, "grad_norm": 5.681855880525542, "learning_rate": 1.907112171051707e-06, "loss": 0.3246, "step": 25174 }, { "epoch": 0.72, "grad_norm": 17.29434579830028, "learning_rate": 1.9067477983137101e-06, "loss": 0.5722, "step": 25175 }, { "epoch": 0.72, "grad_norm": 3.3953477340674727, "learning_rate": 1.9063834521865783e-06, "loss": 0.232, "step": 25176 }, { "epoch": 0.72, "grad_norm": 6.591409783194539, "learning_rate": 1.9060191326734511e-06, "loss": 0.753, "step": 25177 }, { "epoch": 0.72, "grad_norm": 7.654148482908669, "learning_rate": 1.90565483977746e-06, "loss": 0.3304, "step": 25178 }, { "epoch": 0.72, "grad_norm": 5.572528756684849, "learning_rate": 1.9052905735017395e-06, "loss": 0.4633, "step": 25179 }, { "epoch": 0.72, "grad_norm": 5.560026690501362, "learning_rate": 1.9049263338494217e-06, "loss": 0.5845, "step": 25180 }, { "epoch": 0.72, "grad_norm": 4.665147306972413, "learning_rate": 1.9045621208236408e-06, "loss": 0.4727, "step": 25181 }, { "epoch": 0.72, "grad_norm": 9.147925019671256, "learning_rate": 1.9041979344275336e-06, "loss": 0.9075, "step": 25182 }, { "epoch": 0.72, "grad_norm": 8.94396925256934, "learning_rate": 1.9038337746642278e-06, "loss": 0.986, "step": 25183 }, { "epoch": 0.72, "grad_norm": 5.456540259716888, "learning_rate": 1.9034696415368609e-06, "loss": 0.8246, "step": 25184 }, { "epoch": 0.72, "grad_norm": 4.5095480184480214, "learning_rate": 1.903105535048564e-06, "loss": 0.3426, "step": 25185 }, { "epoch": 0.72, "grad_norm": 5.3585587928211265, "learning_rate": 1.9027414552024681e-06, "loss": 0.5746, "step": 25186 }, { "epoch": 0.72, "grad_norm": 2.444638765774113, "learning_rate": 1.9023774020017044e-06, "loss": 0.387, "step": 25187 }, { "epoch": 0.72, "grad_norm": 4.866116229809746, "learning_rate": 1.902013375449407e-06, "loss": 0.3718, "step": 25188 }, { "epoch": 0.72, "grad_norm": 7.465087455472288, "learning_rate": 1.9016493755487081e-06, "loss": 0.3014, "step": 25189 }, { "epoch": 0.72, "grad_norm": 4.157894612232259, "learning_rate": 1.901285402302737e-06, "loss": 0.3413, "step": 25190 }, { "epoch": 0.72, "grad_norm": 6.86626448086167, "learning_rate": 1.9009214557146278e-06, "loss": 0.5582, "step": 25191 }, { "epoch": 0.72, "grad_norm": 5.724642651661962, "learning_rate": 1.9005575357875095e-06, "loss": 0.7, "step": 25192 }, { "epoch": 0.72, "grad_norm": 11.48396119909797, "learning_rate": 1.9001936425245131e-06, "loss": 0.7822, "step": 25193 }, { "epoch": 0.72, "grad_norm": 10.181529630095888, "learning_rate": 1.8998297759287675e-06, "loss": 0.7955, "step": 25194 }, { "epoch": 0.72, "grad_norm": 7.048467959332047, "learning_rate": 1.8994659360034068e-06, "loss": 0.5011, "step": 25195 }, { "epoch": 0.72, "grad_norm": 10.586100199107182, "learning_rate": 1.8991021227515567e-06, "loss": 0.5589, "step": 25196 }, { "epoch": 0.72, "grad_norm": 7.444889789121661, "learning_rate": 1.898738336176349e-06, "loss": 0.6642, "step": 25197 }, { "epoch": 0.72, "grad_norm": 5.4079785973700965, "learning_rate": 1.8983745762809157e-06, "loss": 0.6189, "step": 25198 }, { "epoch": 0.72, "grad_norm": 4.398908166451023, "learning_rate": 1.898010843068384e-06, "loss": 0.3606, "step": 25199 }, { "epoch": 0.72, "grad_norm": 6.245961448504044, "learning_rate": 1.8976471365418831e-06, "loss": 0.3939, "step": 25200 }, { "epoch": 0.72, "grad_norm": 6.336963932607117, "learning_rate": 1.8972834567045401e-06, "loss": 0.4538, "step": 25201 }, { "epoch": 0.72, "grad_norm": 7.93859759574129, "learning_rate": 1.8969198035594872e-06, "loss": 0.5092, "step": 25202 }, { "epoch": 0.72, "grad_norm": 6.576999957724547, "learning_rate": 1.8965561771098495e-06, "loss": 0.324, "step": 25203 }, { "epoch": 0.72, "grad_norm": 5.295520800660869, "learning_rate": 1.8961925773587587e-06, "loss": 0.3433, "step": 25204 }, { "epoch": 0.72, "grad_norm": 3.6414850717102647, "learning_rate": 1.8958290043093408e-06, "loss": 0.3841, "step": 25205 }, { "epoch": 0.72, "grad_norm": 5.156624705253435, "learning_rate": 1.8954654579647219e-06, "loss": 0.4761, "step": 25206 }, { "epoch": 0.72, "grad_norm": 5.604629592022075, "learning_rate": 1.8951019383280333e-06, "loss": 0.3001, "step": 25207 }, { "epoch": 0.72, "grad_norm": 6.716359338599875, "learning_rate": 1.8947384454023992e-06, "loss": 0.8686, "step": 25208 }, { "epoch": 0.72, "grad_norm": 4.675920960801737, "learning_rate": 1.8943749791909494e-06, "loss": 0.7744, "step": 25209 }, { "epoch": 0.72, "grad_norm": 4.4627153959034835, "learning_rate": 1.8940115396968072e-06, "loss": 0.3061, "step": 25210 }, { "epoch": 0.72, "grad_norm": 2.147715714326301, "learning_rate": 1.8936481269231033e-06, "loss": 0.1363, "step": 25211 }, { "epoch": 0.72, "grad_norm": 5.81682360236099, "learning_rate": 1.8932847408729622e-06, "loss": 0.4735, "step": 25212 }, { "epoch": 0.72, "grad_norm": 7.945308779075149, "learning_rate": 1.8929213815495078e-06, "loss": 0.3362, "step": 25213 }, { "epoch": 0.72, "grad_norm": 2.845543327374367, "learning_rate": 1.8925580489558703e-06, "loss": 0.3439, "step": 25214 }, { "epoch": 0.72, "grad_norm": 6.23850363070471, "learning_rate": 1.8921947430951715e-06, "loss": 0.2641, "step": 25215 }, { "epoch": 0.72, "grad_norm": 11.908928557340921, "learning_rate": 1.8918314639705404e-06, "loss": 0.5561, "step": 25216 }, { "epoch": 0.72, "grad_norm": 14.422246562432386, "learning_rate": 1.891468211585099e-06, "loss": 0.7702, "step": 25217 }, { "epoch": 0.72, "grad_norm": 4.497816748287432, "learning_rate": 1.891104985941975e-06, "loss": 0.2705, "step": 25218 }, { "epoch": 0.72, "grad_norm": 5.254634741439735, "learning_rate": 1.8907417870442929e-06, "loss": 0.3073, "step": 25219 }, { "epoch": 0.72, "grad_norm": 3.6680360814293818, "learning_rate": 1.8903786148951758e-06, "loss": 0.2138, "step": 25220 }, { "epoch": 0.72, "grad_norm": 6.532429748285876, "learning_rate": 1.8900154694977468e-06, "loss": 0.3272, "step": 25221 }, { "epoch": 0.72, "grad_norm": 6.2319439041541385, "learning_rate": 1.8896523508551323e-06, "loss": 0.5852, "step": 25222 }, { "epoch": 0.72, "grad_norm": 5.397899109509899, "learning_rate": 1.889289258970457e-06, "loss": 0.3085, "step": 25223 }, { "epoch": 0.72, "grad_norm": 4.995511472173978, "learning_rate": 1.8889261938468412e-06, "loss": 0.7189, "step": 25224 }, { "epoch": 0.72, "grad_norm": 6.637203977919918, "learning_rate": 1.8885631554874123e-06, "loss": 0.2311, "step": 25225 }, { "epoch": 0.72, "grad_norm": 4.365589101830836, "learning_rate": 1.8882001438952918e-06, "loss": 0.1935, "step": 25226 }, { "epoch": 0.72, "grad_norm": 8.015893942799485, "learning_rate": 1.8878371590736023e-06, "loss": 0.6682, "step": 25227 }, { "epoch": 0.72, "grad_norm": 6.515262401563299, "learning_rate": 1.8874742010254644e-06, "loss": 0.3415, "step": 25228 }, { "epoch": 0.72, "grad_norm": 7.411783525343296, "learning_rate": 1.887111269754003e-06, "loss": 0.3149, "step": 25229 }, { "epoch": 0.72, "grad_norm": 5.659975774295305, "learning_rate": 1.8867483652623419e-06, "loss": 0.3948, "step": 25230 }, { "epoch": 0.72, "grad_norm": 5.852414485497297, "learning_rate": 1.8863854875536009e-06, "loss": 0.4476, "step": 25231 }, { "epoch": 0.72, "grad_norm": 3.6788590261912217, "learning_rate": 1.8860226366309025e-06, "loss": 0.4725, "step": 25232 }, { "epoch": 0.72, "grad_norm": 8.89323502294181, "learning_rate": 1.8856598124973662e-06, "loss": 0.6665, "step": 25233 }, { "epoch": 0.72, "grad_norm": 7.942530772408011, "learning_rate": 1.8852970151561167e-06, "loss": 0.4391, "step": 25234 }, { "epoch": 0.72, "grad_norm": 6.012643625205451, "learning_rate": 1.884934244610272e-06, "loss": 0.4687, "step": 25235 }, { "epoch": 0.72, "grad_norm": 6.855750897027331, "learning_rate": 1.8845715008629562e-06, "loss": 0.5677, "step": 25236 }, { "epoch": 0.72, "grad_norm": 8.858488788718002, "learning_rate": 1.8842087839172862e-06, "loss": 0.3421, "step": 25237 }, { "epoch": 0.72, "grad_norm": 7.483330130063103, "learning_rate": 1.8838460937763858e-06, "loss": 0.3296, "step": 25238 }, { "epoch": 0.72, "grad_norm": 3.4579437258539194, "learning_rate": 1.8834834304433746e-06, "loss": 0.4521, "step": 25239 }, { "epoch": 0.72, "grad_norm": 4.760912431851861, "learning_rate": 1.8831207939213698e-06, "loss": 0.6047, "step": 25240 }, { "epoch": 0.72, "grad_norm": 5.277850665319763, "learning_rate": 1.8827581842134946e-06, "loss": 0.4036, "step": 25241 }, { "epoch": 0.72, "grad_norm": 4.314540269834175, "learning_rate": 1.882395601322865e-06, "loss": 0.3663, "step": 25242 }, { "epoch": 0.72, "grad_norm": 5.142679285198302, "learning_rate": 1.8820330452526041e-06, "loss": 0.3978, "step": 25243 }, { "epoch": 0.72, "grad_norm": 4.141060514758224, "learning_rate": 1.8816705160058275e-06, "loss": 0.4113, "step": 25244 }, { "epoch": 0.72, "grad_norm": 2.8194283026643987, "learning_rate": 1.8813080135856564e-06, "loss": 0.3817, "step": 25245 }, { "epoch": 0.72, "grad_norm": 14.87580968752821, "learning_rate": 1.8809455379952097e-06, "loss": 0.3945, "step": 25246 }, { "epoch": 0.72, "grad_norm": 4.145069227618359, "learning_rate": 1.880583089237602e-06, "loss": 0.1982, "step": 25247 }, { "epoch": 0.72, "grad_norm": 2.146834548835636, "learning_rate": 1.8802206673159557e-06, "loss": 0.192, "step": 25248 }, { "epoch": 0.72, "grad_norm": 9.253644508792139, "learning_rate": 1.8798582722333851e-06, "loss": 0.5936, "step": 25249 }, { "epoch": 0.72, "grad_norm": 8.74539401214218, "learning_rate": 1.8794959039930117e-06, "loss": 0.5021, "step": 25250 }, { "epoch": 0.72, "grad_norm": 8.039670485628536, "learning_rate": 1.879133562597949e-06, "loss": 0.7457, "step": 25251 }, { "epoch": 0.72, "grad_norm": 15.228817971860732, "learning_rate": 1.8787712480513177e-06, "loss": 0.5774, "step": 25252 }, { "epoch": 0.72, "grad_norm": 4.793926003518833, "learning_rate": 1.8784089603562332e-06, "loss": 0.2854, "step": 25253 }, { "epoch": 0.72, "grad_norm": 7.4441717020368845, "learning_rate": 1.8780466995158104e-06, "loss": 0.5813, "step": 25254 }, { "epoch": 0.72, "grad_norm": 8.673889036250324, "learning_rate": 1.8776844655331688e-06, "loss": 0.9176, "step": 25255 }, { "epoch": 0.72, "grad_norm": 6.827891609864612, "learning_rate": 1.8773222584114215e-06, "loss": 0.4301, "step": 25256 }, { "epoch": 0.72, "grad_norm": 4.316586009981782, "learning_rate": 1.8769600781536884e-06, "loss": 0.3813, "step": 25257 }, { "epoch": 0.72, "grad_norm": 5.758918128104473, "learning_rate": 1.8765979247630817e-06, "loss": 0.3521, "step": 25258 }, { "epoch": 0.72, "grad_norm": 4.568722857643838, "learning_rate": 1.8762357982427215e-06, "loss": 0.2481, "step": 25259 }, { "epoch": 0.72, "grad_norm": 4.1836095074126245, "learning_rate": 1.8758736985957172e-06, "loss": 0.2703, "step": 25260 }, { "epoch": 0.72, "grad_norm": 3.5521163528712467, "learning_rate": 1.8755116258251876e-06, "loss": 0.2929, "step": 25261 }, { "epoch": 0.72, "grad_norm": 8.37024360074371, "learning_rate": 1.8751495799342462e-06, "loss": 0.4726, "step": 25262 }, { "epoch": 0.72, "grad_norm": 6.51009157716999, "learning_rate": 1.8747875609260073e-06, "loss": 0.7345, "step": 25263 }, { "epoch": 0.72, "grad_norm": 7.987676501525374, "learning_rate": 1.8744255688035884e-06, "loss": 0.3904, "step": 25264 }, { "epoch": 0.72, "grad_norm": 6.880168636495313, "learning_rate": 1.8740636035701014e-06, "loss": 0.4103, "step": 25265 }, { "epoch": 0.72, "grad_norm": 4.0417696883976815, "learning_rate": 1.8737016652286605e-06, "loss": 0.2224, "step": 25266 }, { "epoch": 0.72, "grad_norm": 3.1813507978437463, "learning_rate": 1.8733397537823773e-06, "loss": 0.3874, "step": 25267 }, { "epoch": 0.72, "grad_norm": 3.7157795687032, "learning_rate": 1.8729778692343691e-06, "loss": 0.2617, "step": 25268 }, { "epoch": 0.72, "grad_norm": 4.897132371128808, "learning_rate": 1.8726160115877456e-06, "loss": 0.4611, "step": 25269 }, { "epoch": 0.72, "grad_norm": 2.4044618402777953, "learning_rate": 1.8722541808456212e-06, "loss": 0.3813, "step": 25270 }, { "epoch": 0.72, "grad_norm": 3.1440003130478256, "learning_rate": 1.8718923770111108e-06, "loss": 0.3529, "step": 25271 }, { "epoch": 0.72, "grad_norm": 5.553281560525215, "learning_rate": 1.8715306000873251e-06, "loss": 0.3476, "step": 25272 }, { "epoch": 0.72, "grad_norm": 7.413436686369393, "learning_rate": 1.8711688500773767e-06, "loss": 0.3095, "step": 25273 }, { "epoch": 0.72, "grad_norm": 11.213766792377488, "learning_rate": 1.870807126984376e-06, "loss": 0.8394, "step": 25274 }, { "epoch": 0.72, "grad_norm": 6.4665638274769055, "learning_rate": 1.8704454308114372e-06, "loss": 0.4064, "step": 25275 }, { "epoch": 0.72, "grad_norm": 8.07613340873737, "learning_rate": 1.87008376156167e-06, "loss": 0.4138, "step": 25276 }, { "epoch": 0.72, "grad_norm": 5.4787770798797455, "learning_rate": 1.8697221192381888e-06, "loss": 0.4071, "step": 25277 }, { "epoch": 0.72, "grad_norm": 9.48328817007063, "learning_rate": 1.8693605038441003e-06, "loss": 0.6152, "step": 25278 }, { "epoch": 0.72, "grad_norm": 5.552845323482678, "learning_rate": 1.86899891538252e-06, "loss": 0.4239, "step": 25279 }, { "epoch": 0.72, "grad_norm": 4.123602832805724, "learning_rate": 1.8686373538565566e-06, "loss": 0.5573, "step": 25280 }, { "epoch": 0.72, "grad_norm": 3.5933857608808806, "learning_rate": 1.8682758192693184e-06, "loss": 0.5935, "step": 25281 }, { "epoch": 0.72, "grad_norm": 5.291700328008966, "learning_rate": 1.8679143116239195e-06, "loss": 0.3359, "step": 25282 }, { "epoch": 0.72, "grad_norm": 3.3818196167691115, "learning_rate": 1.867552830923467e-06, "loss": 0.29, "step": 25283 }, { "epoch": 0.72, "grad_norm": 5.21401609789372, "learning_rate": 1.8671913771710732e-06, "loss": 0.1867, "step": 25284 }, { "epoch": 0.72, "grad_norm": 3.2084237164403517, "learning_rate": 1.8668299503698445e-06, "loss": 0.3377, "step": 25285 }, { "epoch": 0.72, "grad_norm": 3.5150731649692983, "learning_rate": 1.8664685505228958e-06, "loss": 0.4897, "step": 25286 }, { "epoch": 0.72, "grad_norm": 10.308949917172717, "learning_rate": 1.8661071776333283e-06, "loss": 0.4659, "step": 25287 }, { "epoch": 0.72, "grad_norm": 6.6788355412289375, "learning_rate": 1.8657458317042553e-06, "loss": 0.5037, "step": 25288 }, { "epoch": 0.72, "grad_norm": 3.567289679622396, "learning_rate": 1.8653845127387865e-06, "loss": 0.2483, "step": 25289 }, { "epoch": 0.72, "grad_norm": 3.622579095054621, "learning_rate": 1.8650232207400272e-06, "loss": 0.2161, "step": 25290 }, { "epoch": 0.72, "grad_norm": 4.518728173336431, "learning_rate": 1.8646619557110889e-06, "loss": 0.4078, "step": 25291 }, { "epoch": 0.72, "grad_norm": 4.496624316239243, "learning_rate": 1.8643007176550775e-06, "loss": 0.5573, "step": 25292 }, { "epoch": 0.72, "grad_norm": 4.293334862020667, "learning_rate": 1.8639395065751009e-06, "loss": 0.3837, "step": 25293 }, { "epoch": 0.72, "grad_norm": 3.2914250965244345, "learning_rate": 1.8635783224742654e-06, "loss": 0.1045, "step": 25294 }, { "epoch": 0.72, "grad_norm": 4.370281754277478, "learning_rate": 1.863217165355679e-06, "loss": 0.4018, "step": 25295 }, { "epoch": 0.72, "grad_norm": 12.792594072518991, "learning_rate": 1.8628560352224512e-06, "loss": 0.5753, "step": 25296 }, { "epoch": 0.72, "grad_norm": 5.143515472813062, "learning_rate": 1.8624949320776848e-06, "loss": 0.5422, "step": 25297 }, { "epoch": 0.72, "grad_norm": 6.5308009819205255, "learning_rate": 1.8621338559244895e-06, "loss": 0.6505, "step": 25298 }, { "epoch": 0.72, "grad_norm": 3.961528320189124, "learning_rate": 1.8617728067659708e-06, "loss": 0.1978, "step": 25299 }, { "epoch": 0.72, "grad_norm": 6.260635025185472, "learning_rate": 1.8614117846052343e-06, "loss": 0.5523, "step": 25300 }, { "epoch": 0.72, "grad_norm": 3.9687156375388635, "learning_rate": 1.8610507894453839e-06, "loss": 0.3373, "step": 25301 }, { "epoch": 0.72, "grad_norm": 5.635399952955956, "learning_rate": 1.8606898212895286e-06, "loss": 0.2743, "step": 25302 }, { "epoch": 0.72, "grad_norm": 4.9706387558761955, "learning_rate": 1.8603288801407705e-06, "loss": 0.3662, "step": 25303 }, { "epoch": 0.72, "grad_norm": 3.1875331353353933, "learning_rate": 1.859967966002217e-06, "loss": 0.2548, "step": 25304 }, { "epoch": 0.72, "grad_norm": 6.514057821709958, "learning_rate": 1.8596070788769739e-06, "loss": 0.6171, "step": 25305 }, { "epoch": 0.72, "grad_norm": 6.868736639174708, "learning_rate": 1.8592462187681442e-06, "loss": 0.4337, "step": 25306 }, { "epoch": 0.72, "grad_norm": 5.360824644701476, "learning_rate": 1.858885385678833e-06, "loss": 0.3372, "step": 25307 }, { "epoch": 0.72, "grad_norm": 26.53619708880059, "learning_rate": 1.858524579612142e-06, "loss": 0.7475, "step": 25308 }, { "epoch": 0.72, "grad_norm": 6.32571904162564, "learning_rate": 1.858163800571179e-06, "loss": 0.3215, "step": 25309 }, { "epoch": 0.72, "grad_norm": 3.339742826308884, "learning_rate": 1.8578030485590444e-06, "loss": 0.4248, "step": 25310 }, { "epoch": 0.72, "grad_norm": 5.2882533391237665, "learning_rate": 1.8574423235788436e-06, "loss": 0.2544, "step": 25311 }, { "epoch": 0.72, "grad_norm": 7.50298742559824, "learning_rate": 1.857081625633681e-06, "loss": 0.3636, "step": 25312 }, { "epoch": 0.72, "grad_norm": 7.873789194666279, "learning_rate": 1.8567209547266579e-06, "loss": 0.3734, "step": 25313 }, { "epoch": 0.72, "grad_norm": 3.387630106830939, "learning_rate": 1.856360310860878e-06, "loss": 0.4049, "step": 25314 }, { "epoch": 0.72, "grad_norm": 3.4011975535506327, "learning_rate": 1.8559996940394409e-06, "loss": 0.3463, "step": 25315 }, { "epoch": 0.72, "grad_norm": 6.932402482115544, "learning_rate": 1.8556391042654536e-06, "loss": 0.3275, "step": 25316 }, { "epoch": 0.73, "grad_norm": 7.070068624937677, "learning_rate": 1.8552785415420139e-06, "loss": 0.3677, "step": 25317 }, { "epoch": 0.73, "grad_norm": 6.064287453076214, "learning_rate": 1.8549180058722277e-06, "loss": 0.5602, "step": 25318 }, { "epoch": 0.73, "grad_norm": 5.9867048085919174, "learning_rate": 1.8545574972591945e-06, "loss": 0.3705, "step": 25319 }, { "epoch": 0.73, "grad_norm": 5.965452471676687, "learning_rate": 1.854197015706014e-06, "loss": 0.729, "step": 25320 }, { "epoch": 0.73, "grad_norm": 7.782214307972846, "learning_rate": 1.8538365612157915e-06, "loss": 0.449, "step": 25321 }, { "epoch": 0.73, "grad_norm": 6.45224924125698, "learning_rate": 1.8534761337916235e-06, "loss": 0.7567, "step": 25322 }, { "epoch": 0.73, "grad_norm": 3.1129737616002124, "learning_rate": 1.853115733436615e-06, "loss": 0.2619, "step": 25323 }, { "epoch": 0.73, "grad_norm": 6.072636143621533, "learning_rate": 1.8527553601538622e-06, "loss": 0.8424, "step": 25324 }, { "epoch": 0.73, "grad_norm": 3.503458698315075, "learning_rate": 1.8523950139464698e-06, "loss": 0.2584, "step": 25325 }, { "epoch": 0.73, "grad_norm": 3.837248211158327, "learning_rate": 1.852034694817535e-06, "loss": 0.6749, "step": 25326 }, { "epoch": 0.73, "grad_norm": 5.8354600662566005, "learning_rate": 1.8516744027701582e-06, "loss": 0.4913, "step": 25327 }, { "epoch": 0.73, "grad_norm": 8.155808477034174, "learning_rate": 1.8513141378074379e-06, "loss": 0.483, "step": 25328 }, { "epoch": 0.73, "grad_norm": 6.193855374052887, "learning_rate": 1.850953899932474e-06, "loss": 0.2979, "step": 25329 }, { "epoch": 0.73, "grad_norm": 7.87881861549553, "learning_rate": 1.8505936891483671e-06, "loss": 0.4369, "step": 25330 }, { "epoch": 0.73, "grad_norm": 3.237010655923863, "learning_rate": 1.8502335054582139e-06, "loss": 0.3596, "step": 25331 }, { "epoch": 0.73, "grad_norm": 6.458111884309532, "learning_rate": 1.8498733488651156e-06, "loss": 0.318, "step": 25332 }, { "epoch": 0.73, "grad_norm": 6.034464399563528, "learning_rate": 1.8495132193721687e-06, "loss": 0.343, "step": 25333 }, { "epoch": 0.73, "grad_norm": 5.965762843861974, "learning_rate": 1.8491531169824716e-06, "loss": 0.5657, "step": 25334 }, { "epoch": 0.73, "grad_norm": 5.6406982924330675, "learning_rate": 1.8487930416991211e-06, "loss": 0.8306, "step": 25335 }, { "epoch": 0.73, "grad_norm": 6.278127226157432, "learning_rate": 1.848432993525216e-06, "loss": 0.3228, "step": 25336 }, { "epoch": 0.73, "grad_norm": 5.566486130526071, "learning_rate": 1.8480729724638553e-06, "loss": 0.4908, "step": 25337 }, { "epoch": 0.73, "grad_norm": 5.709705642622098, "learning_rate": 1.847712978518133e-06, "loss": 0.537, "step": 25338 }, { "epoch": 0.73, "grad_norm": 6.104534993001017, "learning_rate": 1.8473530116911499e-06, "loss": 0.5634, "step": 25339 }, { "epoch": 0.73, "grad_norm": 7.743776452832919, "learning_rate": 1.8469930719860008e-06, "loss": 0.7362, "step": 25340 }, { "epoch": 0.73, "grad_norm": 8.402397038544844, "learning_rate": 1.8466331594057812e-06, "loss": 0.6701, "step": 25341 }, { "epoch": 0.73, "grad_norm": 6.2516060863635365, "learning_rate": 1.846273273953587e-06, "loss": 0.4006, "step": 25342 }, { "epoch": 0.73, "grad_norm": 5.1712863736974635, "learning_rate": 1.8459134156325177e-06, "loss": 0.3726, "step": 25343 }, { "epoch": 0.73, "grad_norm": 2.407490630299962, "learning_rate": 1.845553584445665e-06, "loss": 0.0469, "step": 25344 }, { "epoch": 0.73, "grad_norm": 6.415466377984215, "learning_rate": 1.8451937803961262e-06, "loss": 0.4347, "step": 25345 }, { "epoch": 0.73, "grad_norm": 4.1716222650789145, "learning_rate": 1.8448340034870004e-06, "loss": 0.1784, "step": 25346 }, { "epoch": 0.73, "grad_norm": 1.8033720977695333, "learning_rate": 1.8444742537213762e-06, "loss": 0.0785, "step": 25347 }, { "epoch": 0.73, "grad_norm": 4.490249374915477, "learning_rate": 1.8441145311023522e-06, "loss": 0.2879, "step": 25348 }, { "epoch": 0.73, "grad_norm": 4.7649307996082095, "learning_rate": 1.8437548356330216e-06, "loss": 0.4656, "step": 25349 }, { "epoch": 0.73, "grad_norm": 7.8252064942727895, "learning_rate": 1.8433951673164807e-06, "loss": 0.4266, "step": 25350 }, { "epoch": 0.73, "grad_norm": 3.289085478713711, "learning_rate": 1.843035526155821e-06, "loss": 0.4086, "step": 25351 }, { "epoch": 0.73, "grad_norm": 6.685320846744275, "learning_rate": 1.8426759121541393e-06, "loss": 0.597, "step": 25352 }, { "epoch": 0.73, "grad_norm": 5.932941051795338, "learning_rate": 1.8423163253145287e-06, "loss": 0.3949, "step": 25353 }, { "epoch": 0.73, "grad_norm": 4.717981389586713, "learning_rate": 1.84195676564008e-06, "loss": 0.9324, "step": 25354 }, { "epoch": 0.73, "grad_norm": 2.2147454806801767, "learning_rate": 1.84159723313389e-06, "loss": 0.1228, "step": 25355 }, { "epoch": 0.73, "grad_norm": 3.7165691648760464, "learning_rate": 1.8412377277990483e-06, "loss": 0.3319, "step": 25356 }, { "epoch": 0.73, "grad_norm": 7.951649542084948, "learning_rate": 1.8408782496386512e-06, "loss": 0.5703, "step": 25357 }, { "epoch": 0.73, "grad_norm": 4.585129050848335, "learning_rate": 1.8405187986557882e-06, "loss": 0.306, "step": 25358 }, { "epoch": 0.73, "grad_norm": 6.764776233882894, "learning_rate": 1.8401593748535545e-06, "loss": 0.9835, "step": 25359 }, { "epoch": 0.73, "grad_norm": 6.814423140910585, "learning_rate": 1.8397999782350407e-06, "loss": 0.4907, "step": 25360 }, { "epoch": 0.73, "grad_norm": 5.279084207341913, "learning_rate": 1.8394406088033367e-06, "loss": 0.6146, "step": 25361 }, { "epoch": 0.73, "grad_norm": 6.333473187291135, "learning_rate": 1.8390812665615376e-06, "loss": 0.6091, "step": 25362 }, { "epoch": 0.73, "grad_norm": 6.23253418938677, "learning_rate": 1.8387219515127319e-06, "loss": 0.2844, "step": 25363 }, { "epoch": 0.73, "grad_norm": 5.054516655276669, "learning_rate": 1.8383626636600132e-06, "loss": 0.4255, "step": 25364 }, { "epoch": 0.73, "grad_norm": 5.86692974067883, "learning_rate": 1.8380034030064703e-06, "loss": 0.6632, "step": 25365 }, { "epoch": 0.73, "grad_norm": 4.109528513314588, "learning_rate": 1.837644169555196e-06, "loss": 0.4463, "step": 25366 }, { "epoch": 0.73, "grad_norm": 3.84988552517816, "learning_rate": 1.8372849633092792e-06, "loss": 0.353, "step": 25367 }, { "epoch": 0.73, "grad_norm": 4.4602653143960715, "learning_rate": 1.8369257842718113e-06, "loss": 0.401, "step": 25368 }, { "epoch": 0.73, "grad_norm": 6.677960410118138, "learning_rate": 1.8365666324458792e-06, "loss": 0.5487, "step": 25369 }, { "epoch": 0.73, "grad_norm": 8.187900766363033, "learning_rate": 1.8362075078345747e-06, "loss": 0.6554, "step": 25370 }, { "epoch": 0.73, "grad_norm": 9.51206785967389, "learning_rate": 1.8358484104409896e-06, "loss": 0.9907, "step": 25371 }, { "epoch": 0.73, "grad_norm": 12.222788891997597, "learning_rate": 1.8354893402682095e-06, "loss": 0.9162, "step": 25372 }, { "epoch": 0.73, "grad_norm": 6.723601678736015, "learning_rate": 1.8351302973193262e-06, "loss": 0.5729, "step": 25373 }, { "epoch": 0.73, "grad_norm": 9.530804970779592, "learning_rate": 1.8347712815974273e-06, "loss": 0.6269, "step": 25374 }, { "epoch": 0.73, "grad_norm": 5.767308472692922, "learning_rate": 1.834412293105602e-06, "loss": 0.5305, "step": 25375 }, { "epoch": 0.73, "grad_norm": 4.035584389203252, "learning_rate": 1.8340533318469355e-06, "loss": 0.4535, "step": 25376 }, { "epoch": 0.73, "grad_norm": 8.718807808195594, "learning_rate": 1.8336943978245186e-06, "loss": 0.4286, "step": 25377 }, { "epoch": 0.73, "grad_norm": 4.344073331586263, "learning_rate": 1.8333354910414409e-06, "loss": 0.1555, "step": 25378 }, { "epoch": 0.73, "grad_norm": 13.2173553524685, "learning_rate": 1.832976611500788e-06, "loss": 0.9463, "step": 25379 }, { "epoch": 0.73, "grad_norm": 2.3130595587002785, "learning_rate": 1.832617759205647e-06, "loss": 0.2728, "step": 25380 }, { "epoch": 0.73, "grad_norm": 5.60977269200048, "learning_rate": 1.832258934159104e-06, "loss": 0.5534, "step": 25381 }, { "epoch": 0.73, "grad_norm": 6.237257097671019, "learning_rate": 1.8319001363642492e-06, "loss": 0.1078, "step": 25382 }, { "epoch": 0.73, "grad_norm": 5.276253226865981, "learning_rate": 1.8315413658241654e-06, "loss": 0.4801, "step": 25383 }, { "epoch": 0.73, "grad_norm": 4.9754611582734505, "learning_rate": 1.8311826225419433e-06, "loss": 0.5009, "step": 25384 }, { "epoch": 0.73, "grad_norm": 6.344298766505569, "learning_rate": 1.830823906520665e-06, "loss": 0.4965, "step": 25385 }, { "epoch": 0.73, "grad_norm": 6.570335215779883, "learning_rate": 1.8304652177634202e-06, "loss": 0.6874, "step": 25386 }, { "epoch": 0.73, "grad_norm": 9.899485158537981, "learning_rate": 1.8301065562732928e-06, "loss": 0.5723, "step": 25387 }, { "epoch": 0.73, "grad_norm": 4.679479094435685, "learning_rate": 1.8297479220533665e-06, "loss": 0.5221, "step": 25388 }, { "epoch": 0.73, "grad_norm": 5.518542763436065, "learning_rate": 1.8293893151067305e-06, "loss": 0.521, "step": 25389 }, { "epoch": 0.73, "grad_norm": 6.47016703247065, "learning_rate": 1.829030735436466e-06, "loss": 0.6462, "step": 25390 }, { "epoch": 0.73, "grad_norm": 8.090649574333817, "learning_rate": 1.8286721830456616e-06, "loss": 0.6965, "step": 25391 }, { "epoch": 0.73, "grad_norm": 3.503854672531831, "learning_rate": 1.8283136579373983e-06, "loss": 0.3393, "step": 25392 }, { "epoch": 0.73, "grad_norm": 4.08181823466707, "learning_rate": 1.8279551601147633e-06, "loss": 0.4781, "step": 25393 }, { "epoch": 0.73, "grad_norm": 4.328029700376525, "learning_rate": 1.82759668958084e-06, "loss": 0.5081, "step": 25394 }, { "epoch": 0.73, "grad_norm": 4.655594286736035, "learning_rate": 1.8272382463387101e-06, "loss": 0.4036, "step": 25395 }, { "epoch": 0.73, "grad_norm": 7.261521491151497, "learning_rate": 1.8268798303914602e-06, "loss": 0.4915, "step": 25396 }, { "epoch": 0.73, "grad_norm": 3.0122471604354026, "learning_rate": 1.8265214417421707e-06, "loss": 0.1931, "step": 25397 }, { "epoch": 0.73, "grad_norm": 5.087850278375895, "learning_rate": 1.8261630803939284e-06, "loss": 0.35, "step": 25398 }, { "epoch": 0.73, "grad_norm": 6.410225281468638, "learning_rate": 1.825804746349813e-06, "loss": 0.5001, "step": 25399 }, { "epoch": 0.73, "grad_norm": 6.920276569230975, "learning_rate": 1.8254464396129096e-06, "loss": 0.6062, "step": 25400 }, { "epoch": 0.73, "grad_norm": 5.497967604612824, "learning_rate": 1.8250881601863002e-06, "loss": 0.4136, "step": 25401 }, { "epoch": 0.73, "grad_norm": 5.3460871510100505, "learning_rate": 1.8247299080730646e-06, "loss": 0.4976, "step": 25402 }, { "epoch": 0.73, "grad_norm": 3.6657800776596536, "learning_rate": 1.8243716832762881e-06, "loss": 0.2307, "step": 25403 }, { "epoch": 0.73, "grad_norm": 4.8887547338507185, "learning_rate": 1.8240134857990493e-06, "loss": 0.6225, "step": 25404 }, { "epoch": 0.73, "grad_norm": 5.2439061856567495, "learning_rate": 1.8236553156444337e-06, "loss": 0.7058, "step": 25405 }, { "epoch": 0.73, "grad_norm": 5.421444702662933, "learning_rate": 1.8232971728155196e-06, "loss": 0.226, "step": 25406 }, { "epoch": 0.73, "grad_norm": 6.030633321671937, "learning_rate": 1.8229390573153894e-06, "loss": 0.1937, "step": 25407 }, { "epoch": 0.73, "grad_norm": 7.788784439189541, "learning_rate": 1.8225809691471214e-06, "loss": 0.715, "step": 25408 }, { "epoch": 0.73, "grad_norm": 6.766331571900886, "learning_rate": 1.8222229083137999e-06, "loss": 0.4142, "step": 25409 }, { "epoch": 0.73, "grad_norm": 4.800447308043262, "learning_rate": 1.8218648748185014e-06, "loss": 0.5476, "step": 25410 }, { "epoch": 0.73, "grad_norm": 3.217503130243612, "learning_rate": 1.8215068686643084e-06, "loss": 0.2065, "step": 25411 }, { "epoch": 0.73, "grad_norm": 7.51451123207162, "learning_rate": 1.8211488898543018e-06, "loss": 0.5644, "step": 25412 }, { "epoch": 0.73, "grad_norm": 12.2955242806279, "learning_rate": 1.8207909383915606e-06, "loss": 0.598, "step": 25413 }, { "epoch": 0.73, "grad_norm": 3.7808785374302922, "learning_rate": 1.8204330142791627e-06, "loss": 0.4758, "step": 25414 }, { "epoch": 0.73, "grad_norm": 3.61810307625095, "learning_rate": 1.8200751175201869e-06, "loss": 0.3567, "step": 25415 }, { "epoch": 0.73, "grad_norm": 5.869639081305911, "learning_rate": 1.819717248117715e-06, "loss": 0.7785, "step": 25416 }, { "epoch": 0.73, "grad_norm": 7.307997401428596, "learning_rate": 1.819359406074822e-06, "loss": 0.5937, "step": 25417 }, { "epoch": 0.73, "grad_norm": 8.93555912204591, "learning_rate": 1.819001591394589e-06, "loss": 0.7406, "step": 25418 }, { "epoch": 0.73, "grad_norm": 4.954509577688873, "learning_rate": 1.8186438040800951e-06, "loss": 0.6015, "step": 25419 }, { "epoch": 0.73, "grad_norm": 7.303812131602753, "learning_rate": 1.818286044134417e-06, "loss": 0.5714, "step": 25420 }, { "epoch": 0.73, "grad_norm": 4.979004744233833, "learning_rate": 1.8179283115606321e-06, "loss": 0.3933, "step": 25421 }, { "epoch": 0.73, "grad_norm": 3.022946537184847, "learning_rate": 1.8175706063618165e-06, "loss": 0.3057, "step": 25422 }, { "epoch": 0.73, "grad_norm": 5.092699515957403, "learning_rate": 1.8172129285410506e-06, "loss": 0.4229, "step": 25423 }, { "epoch": 0.73, "grad_norm": 5.2316906550865, "learning_rate": 1.8168552781014088e-06, "loss": 0.4338, "step": 25424 }, { "epoch": 0.73, "grad_norm": 8.74841179739189, "learning_rate": 1.8164976550459706e-06, "loss": 0.7209, "step": 25425 }, { "epoch": 0.73, "grad_norm": 5.578936819631951, "learning_rate": 1.8161400593778095e-06, "loss": 0.5973, "step": 25426 }, { "epoch": 0.73, "grad_norm": 3.970038617808659, "learning_rate": 1.815782491100005e-06, "loss": 0.4818, "step": 25427 }, { "epoch": 0.73, "grad_norm": 3.779888790805725, "learning_rate": 1.8154249502156312e-06, "loss": 0.2888, "step": 25428 }, { "epoch": 0.73, "grad_norm": 4.800918763366303, "learning_rate": 1.8150674367277637e-06, "loss": 0.3409, "step": 25429 }, { "epoch": 0.73, "grad_norm": 4.569936179293486, "learning_rate": 1.8147099506394799e-06, "loss": 0.2396, "step": 25430 }, { "epoch": 0.73, "grad_norm": 4.880460077860922, "learning_rate": 1.814352491953853e-06, "loss": 0.4862, "step": 25431 }, { "epoch": 0.73, "grad_norm": 6.677432887726598, "learning_rate": 1.8139950606739608e-06, "loss": 0.5694, "step": 25432 }, { "epoch": 0.73, "grad_norm": 3.51458982698074, "learning_rate": 1.8136376568028751e-06, "loss": 0.2512, "step": 25433 }, { "epoch": 0.73, "grad_norm": 2.1688357982099205, "learning_rate": 1.8132802803436761e-06, "loss": 0.1112, "step": 25434 }, { "epoch": 0.73, "grad_norm": 5.449429875047341, "learning_rate": 1.8129229312994307e-06, "loss": 0.5628, "step": 25435 }, { "epoch": 0.73, "grad_norm": 4.230901426554151, "learning_rate": 1.812565609673217e-06, "loss": 0.17, "step": 25436 }, { "epoch": 0.73, "grad_norm": 5.991656641715218, "learning_rate": 1.8122083154681102e-06, "loss": 0.5461, "step": 25437 }, { "epoch": 0.73, "grad_norm": 3.7204389503390694, "learning_rate": 1.811851048687182e-06, "loss": 0.5919, "step": 25438 }, { "epoch": 0.73, "grad_norm": 2.9781465695731857, "learning_rate": 1.8114938093335078e-06, "loss": 0.2476, "step": 25439 }, { "epoch": 0.73, "grad_norm": 9.024371421671118, "learning_rate": 1.8111365974101596e-06, "loss": 0.5756, "step": 25440 }, { "epoch": 0.73, "grad_norm": 4.3196863752009165, "learning_rate": 1.810779412920211e-06, "loss": 0.2526, "step": 25441 }, { "epoch": 0.73, "grad_norm": 6.242448364459795, "learning_rate": 1.8104222558667328e-06, "loss": 0.7049, "step": 25442 }, { "epoch": 0.73, "grad_norm": 3.984908363695861, "learning_rate": 1.810065126252799e-06, "loss": 0.2608, "step": 25443 }, { "epoch": 0.73, "grad_norm": 7.683470623695734, "learning_rate": 1.809708024081484e-06, "loss": 0.5071, "step": 25444 }, { "epoch": 0.73, "grad_norm": 4.018212490413902, "learning_rate": 1.8093509493558564e-06, "loss": 0.3687, "step": 25445 }, { "epoch": 0.73, "grad_norm": 8.202615892869904, "learning_rate": 1.8089939020789914e-06, "loss": 0.3609, "step": 25446 }, { "epoch": 0.73, "grad_norm": 5.37050699254188, "learning_rate": 1.8086368822539586e-06, "loss": 0.2649, "step": 25447 }, { "epoch": 0.73, "grad_norm": 7.645831559377633, "learning_rate": 1.8082798898838305e-06, "loss": 0.6873, "step": 25448 }, { "epoch": 0.73, "grad_norm": 5.382631141032421, "learning_rate": 1.8079229249716751e-06, "loss": 0.3202, "step": 25449 }, { "epoch": 0.73, "grad_norm": 3.045688613361907, "learning_rate": 1.807565987520568e-06, "loss": 0.2058, "step": 25450 }, { "epoch": 0.73, "grad_norm": 3.421998078384172, "learning_rate": 1.8072090775335755e-06, "loss": 0.2031, "step": 25451 }, { "epoch": 0.73, "grad_norm": 5.589910649419783, "learning_rate": 1.8068521950137708e-06, "loss": 0.5539, "step": 25452 }, { "epoch": 0.73, "grad_norm": 6.609211299663604, "learning_rate": 1.8064953399642243e-06, "loss": 0.6287, "step": 25453 }, { "epoch": 0.73, "grad_norm": 3.9535792998165276, "learning_rate": 1.806138512388006e-06, "loss": 0.5352, "step": 25454 }, { "epoch": 0.73, "grad_norm": 5.261466379209217, "learning_rate": 1.8057817122881844e-06, "loss": 0.5196, "step": 25455 }, { "epoch": 0.73, "grad_norm": 5.7062484757921705, "learning_rate": 1.8054249396678281e-06, "loss": 0.77, "step": 25456 }, { "epoch": 0.73, "grad_norm": 4.384907104656527, "learning_rate": 1.8050681945300092e-06, "loss": 0.6142, "step": 25457 }, { "epoch": 0.73, "grad_norm": 5.010577457673974, "learning_rate": 1.8047114768777936e-06, "loss": 0.727, "step": 25458 }, { "epoch": 0.73, "grad_norm": 5.156969101208726, "learning_rate": 1.804354786714252e-06, "loss": 0.323, "step": 25459 }, { "epoch": 0.73, "grad_norm": 4.330677120089442, "learning_rate": 1.8039981240424537e-06, "loss": 0.2347, "step": 25460 }, { "epoch": 0.73, "grad_norm": 5.8024510301909205, "learning_rate": 1.8036414888654668e-06, "loss": 0.6353, "step": 25461 }, { "epoch": 0.73, "grad_norm": 7.691301816867745, "learning_rate": 1.803284881186358e-06, "loss": 0.2627, "step": 25462 }, { "epoch": 0.73, "grad_norm": 8.101023642296049, "learning_rate": 1.8029283010081944e-06, "loss": 0.5702, "step": 25463 }, { "epoch": 0.73, "grad_norm": 7.114715381325903, "learning_rate": 1.8025717483340466e-06, "loss": 0.6666, "step": 25464 }, { "epoch": 0.73, "grad_norm": 6.438687668655836, "learning_rate": 1.8022152231669793e-06, "loss": 0.4641, "step": 25465 }, { "epoch": 0.73, "grad_norm": 3.258783886304933, "learning_rate": 1.8018587255100616e-06, "loss": 0.2246, "step": 25466 }, { "epoch": 0.73, "grad_norm": 8.044143657640223, "learning_rate": 1.8015022553663596e-06, "loss": 0.45, "step": 25467 }, { "epoch": 0.73, "grad_norm": 9.666982642852206, "learning_rate": 1.8011458127389386e-06, "loss": 0.3326, "step": 25468 }, { "epoch": 0.73, "grad_norm": 4.755074651314656, "learning_rate": 1.8007893976308678e-06, "loss": 0.7411, "step": 25469 }, { "epoch": 0.73, "grad_norm": 4.294132897463668, "learning_rate": 1.80043301004521e-06, "loss": 0.4372, "step": 25470 }, { "epoch": 0.73, "grad_norm": 5.6054080122471595, "learning_rate": 1.8000766499850352e-06, "loss": 0.387, "step": 25471 }, { "epoch": 0.73, "grad_norm": 6.0586523056889865, "learning_rate": 1.799720317453405e-06, "loss": 0.5128, "step": 25472 }, { "epoch": 0.73, "grad_norm": 3.829387472190613, "learning_rate": 1.7993640124533883e-06, "loss": 0.3176, "step": 25473 }, { "epoch": 0.73, "grad_norm": 6.472972247479846, "learning_rate": 1.7990077349880497e-06, "loss": 0.4673, "step": 25474 }, { "epoch": 0.73, "grad_norm": 3.0400974110004406, "learning_rate": 1.798651485060453e-06, "loss": 0.2247, "step": 25475 }, { "epoch": 0.73, "grad_norm": 5.922986733953229, "learning_rate": 1.7982952626736616e-06, "loss": 0.3593, "step": 25476 }, { "epoch": 0.73, "grad_norm": 5.456999028410431, "learning_rate": 1.7979390678307423e-06, "loss": 0.2734, "step": 25477 }, { "epoch": 0.73, "grad_norm": 3.8235715604849627, "learning_rate": 1.79758290053476e-06, "loss": 0.2476, "step": 25478 }, { "epoch": 0.73, "grad_norm": 4.902309609100491, "learning_rate": 1.7972267607887766e-06, "loss": 0.5226, "step": 25479 }, { "epoch": 0.73, "grad_norm": 4.576792786789791, "learning_rate": 1.7968706485958582e-06, "loss": 0.4608, "step": 25480 }, { "epoch": 0.73, "grad_norm": 2.9697226687715963, "learning_rate": 1.7965145639590675e-06, "loss": 0.2773, "step": 25481 }, { "epoch": 0.73, "grad_norm": 3.3951559775903317, "learning_rate": 1.796158506881468e-06, "loss": 0.0918, "step": 25482 }, { "epoch": 0.73, "grad_norm": 14.902837846596803, "learning_rate": 1.7958024773661203e-06, "loss": 0.8146, "step": 25483 }, { "epoch": 0.73, "grad_norm": 3.9616818904196367, "learning_rate": 1.7954464754160894e-06, "loss": 0.4605, "step": 25484 }, { "epoch": 0.73, "grad_norm": 8.1588652645506, "learning_rate": 1.7950905010344394e-06, "loss": 0.5635, "step": 25485 }, { "epoch": 0.73, "grad_norm": 5.813239429981195, "learning_rate": 1.7947345542242295e-06, "loss": 0.1664, "step": 25486 }, { "epoch": 0.73, "grad_norm": 4.355526582479345, "learning_rate": 1.7943786349885257e-06, "loss": 0.7608, "step": 25487 }, { "epoch": 0.73, "grad_norm": 4.866551922589656, "learning_rate": 1.7940227433303874e-06, "loss": 0.3611, "step": 25488 }, { "epoch": 0.73, "grad_norm": 5.27817614108798, "learning_rate": 1.7936668792528771e-06, "loss": 0.207, "step": 25489 }, { "epoch": 0.73, "grad_norm": 6.400788207244165, "learning_rate": 1.7933110427590538e-06, "loss": 0.5838, "step": 25490 }, { "epoch": 0.73, "grad_norm": 4.990849944069155, "learning_rate": 1.792955233851983e-06, "loss": 0.32, "step": 25491 }, { "epoch": 0.73, "grad_norm": 3.856767893534005, "learning_rate": 1.7925994525347212e-06, "loss": 0.2078, "step": 25492 }, { "epoch": 0.73, "grad_norm": 3.568281702157774, "learning_rate": 1.7922436988103336e-06, "loss": 0.3708, "step": 25493 }, { "epoch": 0.73, "grad_norm": 9.206758206202686, "learning_rate": 1.7918879726818784e-06, "loss": 0.6439, "step": 25494 }, { "epoch": 0.73, "grad_norm": 4.843199680199091, "learning_rate": 1.7915322741524137e-06, "loss": 0.6658, "step": 25495 }, { "epoch": 0.73, "grad_norm": 6.555341212850832, "learning_rate": 1.7911766032250044e-06, "loss": 0.523, "step": 25496 }, { "epoch": 0.73, "grad_norm": 4.307371268715006, "learning_rate": 1.7908209599027054e-06, "loss": 0.316, "step": 25497 }, { "epoch": 0.73, "grad_norm": 11.587396654575555, "learning_rate": 1.7904653441885806e-06, "loss": 0.6937, "step": 25498 }, { "epoch": 0.73, "grad_norm": 6.468484550595032, "learning_rate": 1.790109756085685e-06, "loss": 0.3048, "step": 25499 }, { "epoch": 0.73, "grad_norm": 2.7312540390368873, "learning_rate": 1.7897541955970822e-06, "loss": 0.2686, "step": 25500 }, { "epoch": 0.73, "grad_norm": 4.507932135781117, "learning_rate": 1.7893986627258287e-06, "loss": 0.2864, "step": 25501 }, { "epoch": 0.73, "grad_norm": 4.7747902459614995, "learning_rate": 1.7890431574749812e-06, "loss": 0.5713, "step": 25502 }, { "epoch": 0.73, "grad_norm": 3.0624481897448024, "learning_rate": 1.7886876798476022e-06, "loss": 0.2801, "step": 25503 }, { "epoch": 0.73, "grad_norm": 7.552534741470453, "learning_rate": 1.788332229846746e-06, "loss": 0.5456, "step": 25504 }, { "epoch": 0.73, "grad_norm": 6.24990360185669, "learning_rate": 1.7879768074754734e-06, "loss": 0.4134, "step": 25505 }, { "epoch": 0.73, "grad_norm": 7.317390045810018, "learning_rate": 1.7876214127368397e-06, "loss": 0.549, "step": 25506 }, { "epoch": 0.73, "grad_norm": 3.573392762178062, "learning_rate": 1.7872660456339046e-06, "loss": 0.3188, "step": 25507 }, { "epoch": 0.73, "grad_norm": 6.9862234583930825, "learning_rate": 1.7869107061697243e-06, "loss": 0.4362, "step": 25508 }, { "epoch": 0.73, "grad_norm": 5.386863656754041, "learning_rate": 1.7865553943473546e-06, "loss": 0.4425, "step": 25509 }, { "epoch": 0.73, "grad_norm": 5.098236705720234, "learning_rate": 1.786200110169854e-06, "loss": 0.5844, "step": 25510 }, { "epoch": 0.73, "grad_norm": 3.5039828663150168, "learning_rate": 1.7858448536402767e-06, "loss": 0.4285, "step": 25511 }, { "epoch": 0.73, "grad_norm": 5.03729193283773, "learning_rate": 1.7854896247616827e-06, "loss": 0.5017, "step": 25512 }, { "epoch": 0.73, "grad_norm": 5.255697428612173, "learning_rate": 1.7851344235371232e-06, "loss": 0.5101, "step": 25513 }, { "epoch": 0.73, "grad_norm": 6.359385872346549, "learning_rate": 1.7847792499696582e-06, "loss": 0.5175, "step": 25514 }, { "epoch": 0.73, "grad_norm": 6.634059290083046, "learning_rate": 1.7844241040623416e-06, "loss": 0.3823, "step": 25515 }, { "epoch": 0.73, "grad_norm": 4.433136796299637, "learning_rate": 1.7840689858182286e-06, "loss": 0.253, "step": 25516 }, { "epoch": 0.73, "grad_norm": 3.348371502772811, "learning_rate": 1.7837138952403726e-06, "loss": 0.3072, "step": 25517 }, { "epoch": 0.73, "grad_norm": 7.191433303011309, "learning_rate": 1.7833588323318295e-06, "loss": 0.7183, "step": 25518 }, { "epoch": 0.73, "grad_norm": 11.814070299517063, "learning_rate": 1.7830037970956565e-06, "loss": 0.4973, "step": 25519 }, { "epoch": 0.73, "grad_norm": 4.781710085205787, "learning_rate": 1.7826487895349038e-06, "loss": 0.4733, "step": 25520 }, { "epoch": 0.73, "grad_norm": 4.5169369596752915, "learning_rate": 1.782293809652631e-06, "loss": 0.2199, "step": 25521 }, { "epoch": 0.73, "grad_norm": 4.1052796368366335, "learning_rate": 1.7819388574518847e-06, "loss": 0.2828, "step": 25522 }, { "epoch": 0.73, "grad_norm": 3.9623076647942246, "learning_rate": 1.7815839329357243e-06, "loss": 0.3762, "step": 25523 }, { "epoch": 0.73, "grad_norm": 7.766449729061727, "learning_rate": 1.7812290361071993e-06, "loss": 0.6527, "step": 25524 }, { "epoch": 0.73, "grad_norm": 4.792690452448916, "learning_rate": 1.780874166969364e-06, "loss": 0.1739, "step": 25525 }, { "epoch": 0.73, "grad_norm": 6.171390485456808, "learning_rate": 1.7805193255252746e-06, "loss": 0.5628, "step": 25526 }, { "epoch": 0.73, "grad_norm": 6.757964945049423, "learning_rate": 1.7801645117779803e-06, "loss": 0.5508, "step": 25527 }, { "epoch": 0.73, "grad_norm": 6.875353335924266, "learning_rate": 1.779809725730534e-06, "loss": 0.3579, "step": 25528 }, { "epoch": 0.73, "grad_norm": 2.3470816456502948, "learning_rate": 1.7794549673859873e-06, "loss": 0.0933, "step": 25529 }, { "epoch": 0.73, "grad_norm": 5.607584760606084, "learning_rate": 1.7791002367473942e-06, "loss": 0.6343, "step": 25530 }, { "epoch": 0.73, "grad_norm": 7.0927522604624995, "learning_rate": 1.7787455338178034e-06, "loss": 0.6308, "step": 25531 }, { "epoch": 0.73, "grad_norm": 4.356006483060918, "learning_rate": 1.77839085860027e-06, "loss": 0.2379, "step": 25532 }, { "epoch": 0.73, "grad_norm": 7.131125025945479, "learning_rate": 1.7780362110978415e-06, "loss": 0.4934, "step": 25533 }, { "epoch": 0.73, "grad_norm": 10.352708791927084, "learning_rate": 1.777681591313573e-06, "loss": 0.3693, "step": 25534 }, { "epoch": 0.73, "grad_norm": 5.951796415964461, "learning_rate": 1.7773269992505127e-06, "loss": 0.6727, "step": 25535 }, { "epoch": 0.73, "grad_norm": 4.545908376189122, "learning_rate": 1.7769724349117095e-06, "loss": 0.4941, "step": 25536 }, { "epoch": 0.73, "grad_norm": 4.365410936441703, "learning_rate": 1.776617898300218e-06, "loss": 0.3461, "step": 25537 }, { "epoch": 0.73, "grad_norm": 5.44017081602571, "learning_rate": 1.7762633894190834e-06, "loss": 0.3995, "step": 25538 }, { "epoch": 0.73, "grad_norm": 4.7012331633303095, "learning_rate": 1.7759089082713605e-06, "loss": 0.4179, "step": 25539 }, { "epoch": 0.73, "grad_norm": 3.7469114618380552, "learning_rate": 1.775554454860094e-06, "loss": 0.4851, "step": 25540 }, { "epoch": 0.73, "grad_norm": 8.77493971670805, "learning_rate": 1.775200029188337e-06, "loss": 0.5762, "step": 25541 }, { "epoch": 0.73, "grad_norm": 4.0362131778553145, "learning_rate": 1.7748456312591372e-06, "loss": 0.4105, "step": 25542 }, { "epoch": 0.73, "grad_norm": 4.138766249426324, "learning_rate": 1.774491261075542e-06, "loss": 0.5516, "step": 25543 }, { "epoch": 0.73, "grad_norm": 4.720543703631489, "learning_rate": 1.7741369186406027e-06, "loss": 0.318, "step": 25544 }, { "epoch": 0.73, "grad_norm": 5.2674073505265016, "learning_rate": 1.7737826039573653e-06, "loss": 0.4421, "step": 25545 }, { "epoch": 0.73, "grad_norm": 6.751333670080903, "learning_rate": 1.7734283170288795e-06, "loss": 0.549, "step": 25546 }, { "epoch": 0.73, "grad_norm": 6.414369192469247, "learning_rate": 1.7730740578581918e-06, "loss": 0.5239, "step": 25547 }, { "epoch": 0.73, "grad_norm": 5.266326517979376, "learning_rate": 1.7727198264483535e-06, "loss": 0.4373, "step": 25548 }, { "epoch": 0.73, "grad_norm": 6.89870016045114, "learning_rate": 1.7723656228024066e-06, "loss": 0.6143, "step": 25549 }, { "epoch": 0.73, "grad_norm": 7.603476993865388, "learning_rate": 1.7720114469234001e-06, "loss": 0.814, "step": 25550 }, { "epoch": 0.73, "grad_norm": 7.050527366230683, "learning_rate": 1.7716572988143837e-06, "loss": 0.3241, "step": 25551 }, { "epoch": 0.73, "grad_norm": 8.684172507633297, "learning_rate": 1.7713031784784002e-06, "loss": 0.2101, "step": 25552 }, { "epoch": 0.73, "grad_norm": 7.739048711316591, "learning_rate": 1.7709490859184996e-06, "loss": 0.3651, "step": 25553 }, { "epoch": 0.73, "grad_norm": 4.3191905987975865, "learning_rate": 1.770595021137727e-06, "loss": 0.2235, "step": 25554 }, { "epoch": 0.73, "grad_norm": 3.904794475224222, "learning_rate": 1.770240984139127e-06, "loss": 0.1984, "step": 25555 }, { "epoch": 0.73, "grad_norm": 9.84440784980951, "learning_rate": 1.7698869749257451e-06, "loss": 0.4629, "step": 25556 }, { "epoch": 0.73, "grad_norm": 9.462752705957653, "learning_rate": 1.7695329935006294e-06, "loss": 0.7233, "step": 25557 }, { "epoch": 0.73, "grad_norm": 3.7542996870234173, "learning_rate": 1.7691790398668218e-06, "loss": 0.348, "step": 25558 }, { "epoch": 0.73, "grad_norm": 4.615512240613778, "learning_rate": 1.7688251140273689e-06, "loss": 0.5407, "step": 25559 }, { "epoch": 0.73, "grad_norm": 12.588515942817809, "learning_rate": 1.7684712159853178e-06, "loss": 0.4649, "step": 25560 }, { "epoch": 0.73, "grad_norm": 4.364559741020928, "learning_rate": 1.7681173457437106e-06, "loss": 0.2223, "step": 25561 }, { "epoch": 0.73, "grad_norm": 4.646389704121067, "learning_rate": 1.7677635033055922e-06, "loss": 0.232, "step": 25562 }, { "epoch": 0.73, "grad_norm": 4.03388275996648, "learning_rate": 1.7674096886740049e-06, "loss": 0.3103, "step": 25563 }, { "epoch": 0.73, "grad_norm": 5.371151633210829, "learning_rate": 1.7670559018519955e-06, "loss": 0.5722, "step": 25564 }, { "epoch": 0.73, "grad_norm": 6.522502681428966, "learning_rate": 1.7667021428426046e-06, "loss": 0.7023, "step": 25565 }, { "epoch": 0.73, "grad_norm": 7.4542909745807355, "learning_rate": 1.7663484116488767e-06, "loss": 0.5323, "step": 25566 }, { "epoch": 0.73, "grad_norm": 3.903174845466551, "learning_rate": 1.7659947082738572e-06, "loss": 0.4216, "step": 25567 }, { "epoch": 0.73, "grad_norm": 5.28875298770684, "learning_rate": 1.7656410327205869e-06, "loss": 0.3201, "step": 25568 }, { "epoch": 0.73, "grad_norm": 6.756294706269035, "learning_rate": 1.7652873849921088e-06, "loss": 0.3163, "step": 25569 }, { "epoch": 0.73, "grad_norm": 4.874703165945346, "learning_rate": 1.7649337650914633e-06, "loss": 0.2836, "step": 25570 }, { "epoch": 0.73, "grad_norm": 3.6249940148665907, "learning_rate": 1.764580173021696e-06, "loss": 0.4183, "step": 25571 }, { "epoch": 0.73, "grad_norm": 4.234893260362799, "learning_rate": 1.7642266087858456e-06, "loss": 0.1129, "step": 25572 }, { "epoch": 0.73, "grad_norm": 4.485290832573508, "learning_rate": 1.7638730723869563e-06, "loss": 0.421, "step": 25573 }, { "epoch": 0.73, "grad_norm": 7.165424396353746, "learning_rate": 1.7635195638280677e-06, "loss": 0.5679, "step": 25574 }, { "epoch": 0.73, "grad_norm": 5.563103675231416, "learning_rate": 1.7631660831122228e-06, "loss": 0.5324, "step": 25575 }, { "epoch": 0.73, "grad_norm": 5.05276945770671, "learning_rate": 1.7628126302424619e-06, "loss": 0.0748, "step": 25576 }, { "epoch": 0.73, "grad_norm": 4.447024721264349, "learning_rate": 1.7624592052218237e-06, "loss": 0.3119, "step": 25577 }, { "epoch": 0.73, "grad_norm": 6.07546880192611, "learning_rate": 1.762105808053352e-06, "loss": 0.436, "step": 25578 }, { "epoch": 0.73, "grad_norm": 13.64047678094587, "learning_rate": 1.7617524387400837e-06, "loss": 0.4804, "step": 25579 }, { "epoch": 0.73, "grad_norm": 3.779204169819459, "learning_rate": 1.761399097285062e-06, "loss": 0.275, "step": 25580 }, { "epoch": 0.73, "grad_norm": 3.8073806787079847, "learning_rate": 1.7610457836913253e-06, "loss": 0.2957, "step": 25581 }, { "epoch": 0.73, "grad_norm": 3.718613053052183, "learning_rate": 1.7606924979619128e-06, "loss": 0.4578, "step": 25582 }, { "epoch": 0.73, "grad_norm": 3.9323030720054835, "learning_rate": 1.760339240099862e-06, "loss": 0.5231, "step": 25583 }, { "epoch": 0.73, "grad_norm": 12.614052057563148, "learning_rate": 1.7599860101082146e-06, "loss": 0.8787, "step": 25584 }, { "epoch": 0.73, "grad_norm": 3.3889517105165083, "learning_rate": 1.7596328079900093e-06, "loss": 0.2598, "step": 25585 }, { "epoch": 0.73, "grad_norm": 4.681023612360736, "learning_rate": 1.759279633748283e-06, "loss": 0.2144, "step": 25586 }, { "epoch": 0.73, "grad_norm": 3.3487448775083886, "learning_rate": 1.7589264873860768e-06, "loss": 0.1856, "step": 25587 }, { "epoch": 0.73, "grad_norm": 5.478379539934728, "learning_rate": 1.7585733689064266e-06, "loss": 0.319, "step": 25588 }, { "epoch": 0.73, "grad_norm": 4.895132845282907, "learning_rate": 1.7582202783123707e-06, "loss": 0.4355, "step": 25589 }, { "epoch": 0.73, "grad_norm": 5.453556360710542, "learning_rate": 1.7578672156069448e-06, "loss": 0.1759, "step": 25590 }, { "epoch": 0.73, "grad_norm": 6.823039100849145, "learning_rate": 1.7575141807931884e-06, "loss": 0.4465, "step": 25591 }, { "epoch": 0.73, "grad_norm": 6.436323910287157, "learning_rate": 1.7571611738741394e-06, "loss": 0.4673, "step": 25592 }, { "epoch": 0.73, "grad_norm": 4.886812472950284, "learning_rate": 1.7568081948528325e-06, "loss": 0.2533, "step": 25593 }, { "epoch": 0.73, "grad_norm": 7.520120271674626, "learning_rate": 1.7564552437323068e-06, "loss": 0.7005, "step": 25594 }, { "epoch": 0.73, "grad_norm": 5.420548422400169, "learning_rate": 1.756102320515597e-06, "loss": 0.5299, "step": 25595 }, { "epoch": 0.73, "grad_norm": 4.1962717050144995, "learning_rate": 1.75574942520574e-06, "loss": 0.2572, "step": 25596 }, { "epoch": 0.73, "grad_norm": 3.924803259954409, "learning_rate": 1.7553965578057692e-06, "loss": 0.2592, "step": 25597 }, { "epoch": 0.73, "grad_norm": 8.209780712288975, "learning_rate": 1.755043718318724e-06, "loss": 0.4633, "step": 25598 }, { "epoch": 0.73, "grad_norm": 2.8591567826514837, "learning_rate": 1.754690906747637e-06, "loss": 0.2626, "step": 25599 }, { "epoch": 0.73, "grad_norm": 15.074271766191755, "learning_rate": 1.7543381230955442e-06, "loss": 0.3939, "step": 25600 }, { "epoch": 0.73, "grad_norm": 7.446008319502739, "learning_rate": 1.7539853673654827e-06, "loss": 0.5155, "step": 25601 }, { "epoch": 0.73, "grad_norm": 16.68085235887846, "learning_rate": 1.7536326395604853e-06, "loss": 0.4383, "step": 25602 }, { "epoch": 0.73, "grad_norm": 7.840587084645459, "learning_rate": 1.7532799396835865e-06, "loss": 0.5789, "step": 25603 }, { "epoch": 0.73, "grad_norm": 4.662873722060799, "learning_rate": 1.752927267737819e-06, "loss": 0.4988, "step": 25604 }, { "epoch": 0.73, "grad_norm": 4.311032639905753, "learning_rate": 1.7525746237262203e-06, "loss": 0.2171, "step": 25605 }, { "epoch": 0.73, "grad_norm": 4.424533193287216, "learning_rate": 1.7522220076518205e-06, "loss": 0.5856, "step": 25606 }, { "epoch": 0.73, "grad_norm": 1.7418232609730127, "learning_rate": 1.751869419517655e-06, "loss": 0.2111, "step": 25607 }, { "epoch": 0.73, "grad_norm": 3.0970739368091063, "learning_rate": 1.75151685932676e-06, "loss": 0.2142, "step": 25608 }, { "epoch": 0.73, "grad_norm": 6.1861210452082185, "learning_rate": 1.7511643270821626e-06, "loss": 0.6804, "step": 25609 }, { "epoch": 0.73, "grad_norm": 5.950509365301677, "learning_rate": 1.7508118227868997e-06, "loss": 0.4875, "step": 25610 }, { "epoch": 0.73, "grad_norm": 3.2228402195062706, "learning_rate": 1.7504593464440013e-06, "loss": 0.2895, "step": 25611 }, { "epoch": 0.73, "grad_norm": 12.586837196514947, "learning_rate": 1.7501068980565028e-06, "loss": 0.9755, "step": 25612 }, { "epoch": 0.73, "grad_norm": 7.0409782419257265, "learning_rate": 1.7497544776274321e-06, "loss": 0.6075, "step": 25613 }, { "epoch": 0.73, "grad_norm": 7.556190901305097, "learning_rate": 1.749402085159826e-06, "loss": 0.3854, "step": 25614 }, { "epoch": 0.73, "grad_norm": 6.0130826259349535, "learning_rate": 1.7490497206567132e-06, "loss": 0.5657, "step": 25615 }, { "epoch": 0.73, "grad_norm": 2.24660044386676, "learning_rate": 1.7486973841211235e-06, "loss": 0.1815, "step": 25616 }, { "epoch": 0.73, "grad_norm": 4.783288209593775, "learning_rate": 1.7483450755560921e-06, "loss": 0.4074, "step": 25617 }, { "epoch": 0.73, "grad_norm": 7.266162778579267, "learning_rate": 1.7479927949646457e-06, "loss": 0.4634, "step": 25618 }, { "epoch": 0.73, "grad_norm": 7.0855088950748675, "learning_rate": 1.7476405423498184e-06, "loss": 0.5688, "step": 25619 }, { "epoch": 0.73, "grad_norm": 4.705151447692352, "learning_rate": 1.7472883177146377e-06, "loss": 0.2779, "step": 25620 }, { "epoch": 0.73, "grad_norm": 3.449424193194428, "learning_rate": 1.7469361210621367e-06, "loss": 0.1902, "step": 25621 }, { "epoch": 0.73, "grad_norm": 6.391031564776933, "learning_rate": 1.7465839523953433e-06, "loss": 0.4805, "step": 25622 }, { "epoch": 0.73, "grad_norm": 6.381785838974648, "learning_rate": 1.7462318117172882e-06, "loss": 0.661, "step": 25623 }, { "epoch": 0.73, "grad_norm": 7.580689552768863, "learning_rate": 1.7458796990309978e-06, "loss": 0.5131, "step": 25624 }, { "epoch": 0.73, "grad_norm": 10.00822187028659, "learning_rate": 1.7455276143395044e-06, "loss": 0.4018, "step": 25625 }, { "epoch": 0.73, "grad_norm": 2.8081627357200767, "learning_rate": 1.7451755576458378e-06, "loss": 0.1349, "step": 25626 }, { "epoch": 0.73, "grad_norm": 4.131854373509375, "learning_rate": 1.744823528953023e-06, "loss": 0.2998, "step": 25627 }, { "epoch": 0.73, "grad_norm": 6.358933754499264, "learning_rate": 1.7444715282640928e-06, "loss": 0.6113, "step": 25628 }, { "epoch": 0.73, "grad_norm": 3.4060907939137013, "learning_rate": 1.7441195555820728e-06, "loss": 0.4244, "step": 25629 }, { "epoch": 0.73, "grad_norm": 2.8893511671782774, "learning_rate": 1.7437676109099916e-06, "loss": 0.1996, "step": 25630 }, { "epoch": 0.73, "grad_norm": 4.388694175728068, "learning_rate": 1.7434156942508751e-06, "loss": 0.4714, "step": 25631 }, { "epoch": 0.73, "grad_norm": 5.516194171013582, "learning_rate": 1.7430638056077525e-06, "loss": 0.1496, "step": 25632 }, { "epoch": 0.73, "grad_norm": 4.799192632883239, "learning_rate": 1.7427119449836528e-06, "loss": 0.7283, "step": 25633 }, { "epoch": 0.73, "grad_norm": 6.966268910304151, "learning_rate": 1.7423601123815991e-06, "loss": 0.5882, "step": 25634 }, { "epoch": 0.73, "grad_norm": 5.36676485710275, "learning_rate": 1.7420083078046218e-06, "loss": 0.5323, "step": 25635 }, { "epoch": 0.73, "grad_norm": 7.572359231601997, "learning_rate": 1.7416565312557465e-06, "loss": 0.4858, "step": 25636 }, { "epoch": 0.73, "grad_norm": 9.110715266935463, "learning_rate": 1.7413047827379981e-06, "loss": 0.5347, "step": 25637 }, { "epoch": 0.73, "grad_norm": 4.779138984145477, "learning_rate": 1.7409530622544025e-06, "loss": 0.5375, "step": 25638 }, { "epoch": 0.73, "grad_norm": 5.526465658256663, "learning_rate": 1.7406013698079876e-06, "loss": 0.2533, "step": 25639 }, { "epoch": 0.73, "grad_norm": 2.7168623411078054, "learning_rate": 1.7402497054017764e-06, "loss": 0.2306, "step": 25640 }, { "epoch": 0.73, "grad_norm": 7.848097884196709, "learning_rate": 1.7398980690387972e-06, "loss": 0.5842, "step": 25641 }, { "epoch": 0.73, "grad_norm": 7.685264417409255, "learning_rate": 1.7395464607220736e-06, "loss": 0.3879, "step": 25642 }, { "epoch": 0.73, "grad_norm": 4.802489086339287, "learning_rate": 1.739194880454629e-06, "loss": 0.5178, "step": 25643 }, { "epoch": 0.73, "grad_norm": 10.120053389864527, "learning_rate": 1.7388433282394905e-06, "loss": 0.4793, "step": 25644 }, { "epoch": 0.73, "grad_norm": 6.930063059025883, "learning_rate": 1.73849180407968e-06, "loss": 0.487, "step": 25645 }, { "epoch": 0.73, "grad_norm": 3.061784057232161, "learning_rate": 1.7381403079782245e-06, "loss": 0.4764, "step": 25646 }, { "epoch": 0.73, "grad_norm": 3.032040483106332, "learning_rate": 1.7377888399381442e-06, "loss": 0.2031, "step": 25647 }, { "epoch": 0.73, "grad_norm": 7.461698004025054, "learning_rate": 1.7374373999624671e-06, "loss": 0.7482, "step": 25648 }, { "epoch": 0.73, "grad_norm": 8.167258889919149, "learning_rate": 1.737085988054214e-06, "loss": 0.6615, "step": 25649 }, { "epoch": 0.73, "grad_norm": 7.97346889423138, "learning_rate": 1.7367346042164067e-06, "loss": 0.7753, "step": 25650 }, { "epoch": 0.73, "grad_norm": 3.9018353960678316, "learning_rate": 1.736383248452072e-06, "loss": 0.2446, "step": 25651 }, { "epoch": 0.73, "grad_norm": 4.293708883252461, "learning_rate": 1.7360319207642284e-06, "loss": 0.2543, "step": 25652 }, { "epoch": 0.73, "grad_norm": 4.129835011694284, "learning_rate": 1.7356806211559018e-06, "loss": 0.2598, "step": 25653 }, { "epoch": 0.73, "grad_norm": 5.970950854312424, "learning_rate": 1.7353293496301116e-06, "loss": 0.3765, "step": 25654 }, { "epoch": 0.73, "grad_norm": 3.726347271284046, "learning_rate": 1.7349781061898829e-06, "loss": 0.4003, "step": 25655 }, { "epoch": 0.73, "grad_norm": 6.75552650551378, "learning_rate": 1.7346268908382352e-06, "loss": 0.4419, "step": 25656 }, { "epoch": 0.73, "grad_norm": 4.16101518100792, "learning_rate": 1.7342757035781882e-06, "loss": 0.5197, "step": 25657 }, { "epoch": 0.73, "grad_norm": 7.145903124528692, "learning_rate": 1.7339245444127678e-06, "loss": 0.6236, "step": 25658 }, { "epoch": 0.73, "grad_norm": 9.280085532353295, "learning_rate": 1.73357341334499e-06, "loss": 0.685, "step": 25659 }, { "epoch": 0.73, "grad_norm": 2.861686980883199, "learning_rate": 1.73322231037788e-06, "loss": 0.3337, "step": 25660 }, { "epoch": 0.73, "grad_norm": 2.649838732364838, "learning_rate": 1.7328712355144545e-06, "loss": 0.3269, "step": 25661 }, { "epoch": 0.73, "grad_norm": 3.083298619607595, "learning_rate": 1.732520188757737e-06, "loss": 0.0804, "step": 25662 }, { "epoch": 0.73, "grad_norm": 3.1939666917153375, "learning_rate": 1.7321691701107463e-06, "loss": 0.3257, "step": 25663 }, { "epoch": 0.73, "grad_norm": 9.040710905572146, "learning_rate": 1.7318181795765021e-06, "loss": 0.8931, "step": 25664 }, { "epoch": 0.73, "grad_norm": 4.729499648326761, "learning_rate": 1.7314672171580217e-06, "loss": 0.3582, "step": 25665 }, { "epoch": 0.74, "grad_norm": 4.092354806084966, "learning_rate": 1.731116282858326e-06, "loss": 0.6415, "step": 25666 }, { "epoch": 0.74, "grad_norm": 4.500116081859847, "learning_rate": 1.7307653766804366e-06, "loss": 0.3324, "step": 25667 }, { "epoch": 0.74, "grad_norm": 6.2087379639464535, "learning_rate": 1.7304144986273703e-06, "loss": 0.2161, "step": 25668 }, { "epoch": 0.74, "grad_norm": 6.2876953913700175, "learning_rate": 1.730063648702145e-06, "loss": 0.273, "step": 25669 }, { "epoch": 0.74, "grad_norm": 5.951280122779676, "learning_rate": 1.7297128269077784e-06, "loss": 0.1822, "step": 25670 }, { "epoch": 0.74, "grad_norm": 7.711795963939977, "learning_rate": 1.729362033247291e-06, "loss": 0.9556, "step": 25671 }, { "epoch": 0.74, "grad_norm": 3.341800691924599, "learning_rate": 1.7290112677236975e-06, "loss": 0.2317, "step": 25672 }, { "epoch": 0.74, "grad_norm": 3.068867350585672, "learning_rate": 1.7286605303400177e-06, "loss": 0.1937, "step": 25673 }, { "epoch": 0.74, "grad_norm": 4.179478549190766, "learning_rate": 1.7283098210992705e-06, "loss": 0.3764, "step": 25674 }, { "epoch": 0.74, "grad_norm": 7.285403430007838, "learning_rate": 1.7279591400044705e-06, "loss": 0.6736, "step": 25675 }, { "epoch": 0.74, "grad_norm": 3.464482440194338, "learning_rate": 1.7276084870586352e-06, "loss": 0.3103, "step": 25676 }, { "epoch": 0.74, "grad_norm": 5.841238328663688, "learning_rate": 1.7272578622647802e-06, "loss": 0.5726, "step": 25677 }, { "epoch": 0.74, "grad_norm": 5.719052499577174, "learning_rate": 1.726907265625924e-06, "loss": 0.4175, "step": 25678 }, { "epoch": 0.74, "grad_norm": 5.399664003904017, "learning_rate": 1.7265566971450799e-06, "loss": 0.6891, "step": 25679 }, { "epoch": 0.74, "grad_norm": 4.093167037806245, "learning_rate": 1.726206156825267e-06, "loss": 0.3957, "step": 25680 }, { "epoch": 0.74, "grad_norm": 3.9153624486922687, "learning_rate": 1.725855644669498e-06, "loss": 0.2132, "step": 25681 }, { "epoch": 0.74, "grad_norm": 7.8519288238548635, "learning_rate": 1.7255051606807916e-06, "loss": 0.3918, "step": 25682 }, { "epoch": 0.74, "grad_norm": 6.6755403192837, "learning_rate": 1.7251547048621608e-06, "loss": 0.6381, "step": 25683 }, { "epoch": 0.74, "grad_norm": 5.93618028931965, "learning_rate": 1.7248042772166195e-06, "loss": 0.5037, "step": 25684 }, { "epoch": 0.74, "grad_norm": 3.3785020531364007, "learning_rate": 1.7244538777471854e-06, "loss": 0.2787, "step": 25685 }, { "epoch": 0.74, "grad_norm": 11.589527440809205, "learning_rate": 1.7241035064568696e-06, "loss": 0.5422, "step": 25686 }, { "epoch": 0.74, "grad_norm": 6.930698738156527, "learning_rate": 1.7237531633486893e-06, "loss": 0.7911, "step": 25687 }, { "epoch": 0.74, "grad_norm": 5.911423632370073, "learning_rate": 1.723402848425656e-06, "loss": 0.367, "step": 25688 }, { "epoch": 0.74, "grad_norm": 2.8831234275802005, "learning_rate": 1.7230525616907857e-06, "loss": 0.2386, "step": 25689 }, { "epoch": 0.74, "grad_norm": 7.7129661419190985, "learning_rate": 1.7227023031470913e-06, "loss": 0.6957, "step": 25690 }, { "epoch": 0.74, "grad_norm": 8.27567567170084, "learning_rate": 1.7223520727975834e-06, "loss": 0.5379, "step": 25691 }, { "epoch": 0.74, "grad_norm": 4.032096773859515, "learning_rate": 1.722001870645279e-06, "loss": 0.4152, "step": 25692 }, { "epoch": 0.74, "grad_norm": 5.911851840148787, "learning_rate": 1.7216516966931869e-06, "loss": 0.4194, "step": 25693 }, { "epoch": 0.74, "grad_norm": 6.273155728635734, "learning_rate": 1.7213015509443236e-06, "loss": 0.4489, "step": 25694 }, { "epoch": 0.74, "grad_norm": 3.866688459159563, "learning_rate": 1.7209514334016974e-06, "loss": 0.2816, "step": 25695 }, { "epoch": 0.74, "grad_norm": 8.298180888652103, "learning_rate": 1.7206013440683262e-06, "loss": 0.6854, "step": 25696 }, { "epoch": 0.74, "grad_norm": 4.980976775943306, "learning_rate": 1.720251282947214e-06, "loss": 0.3409, "step": 25697 }, { "epoch": 0.74, "grad_norm": 7.193816718116082, "learning_rate": 1.7199012500413764e-06, "loss": 0.6639, "step": 25698 }, { "epoch": 0.74, "grad_norm": 8.726178740530251, "learning_rate": 1.719551245353826e-06, "loss": 0.5159, "step": 25699 }, { "epoch": 0.74, "grad_norm": 3.3476157258295114, "learning_rate": 1.7192012688875708e-06, "loss": 0.3168, "step": 25700 }, { "epoch": 0.74, "grad_norm": 7.482713678216836, "learning_rate": 1.7188513206456248e-06, "loss": 0.3927, "step": 25701 }, { "epoch": 0.74, "grad_norm": 4.496296206107765, "learning_rate": 1.718501400630997e-06, "loss": 0.5348, "step": 25702 }, { "epoch": 0.74, "grad_norm": 4.536896338122961, "learning_rate": 1.7181515088466977e-06, "loss": 0.3973, "step": 25703 }, { "epoch": 0.74, "grad_norm": 6.382030425139957, "learning_rate": 1.7178016452957346e-06, "loss": 0.7609, "step": 25704 }, { "epoch": 0.74, "grad_norm": 3.2981722661387427, "learning_rate": 1.7174518099811221e-06, "loss": 0.3168, "step": 25705 }, { "epoch": 0.74, "grad_norm": 3.5215933598279854, "learning_rate": 1.717102002905866e-06, "loss": 0.1022, "step": 25706 }, { "epoch": 0.74, "grad_norm": 3.7178678828739073, "learning_rate": 1.7167522240729767e-06, "loss": 0.3748, "step": 25707 }, { "epoch": 0.74, "grad_norm": 2.6972381568300063, "learning_rate": 1.7164024734854657e-06, "loss": 0.3218, "step": 25708 }, { "epoch": 0.74, "grad_norm": 3.851535046462201, "learning_rate": 1.7160527511463398e-06, "loss": 0.305, "step": 25709 }, { "epoch": 0.74, "grad_norm": 3.955943960854579, "learning_rate": 1.7157030570586076e-06, "loss": 0.2823, "step": 25710 }, { "epoch": 0.74, "grad_norm": 9.211341674323647, "learning_rate": 1.7153533912252762e-06, "loss": 0.3899, "step": 25711 }, { "epoch": 0.74, "grad_norm": 3.9689704653563003, "learning_rate": 1.715003753649357e-06, "loss": 0.3532, "step": 25712 }, { "epoch": 0.74, "grad_norm": 6.9819244933207445, "learning_rate": 1.7146541443338544e-06, "loss": 0.4688, "step": 25713 }, { "epoch": 0.74, "grad_norm": 6.7615606068012895, "learning_rate": 1.7143045632817773e-06, "loss": 0.3686, "step": 25714 }, { "epoch": 0.74, "grad_norm": 7.644777382702462, "learning_rate": 1.713955010496135e-06, "loss": 0.2399, "step": 25715 }, { "epoch": 0.74, "grad_norm": 2.0822740237919986, "learning_rate": 1.713605485979934e-06, "loss": 0.108, "step": 25716 }, { "epoch": 0.74, "grad_norm": 4.151829785524724, "learning_rate": 1.7132559897361795e-06, "loss": 0.2322, "step": 25717 }, { "epoch": 0.74, "grad_norm": 8.01619484941197, "learning_rate": 1.7129065217678775e-06, "loss": 0.4889, "step": 25718 }, { "epoch": 0.74, "grad_norm": 6.386846418592091, "learning_rate": 1.7125570820780379e-06, "loss": 0.6812, "step": 25719 }, { "epoch": 0.74, "grad_norm": 8.410872936054126, "learning_rate": 1.7122076706696634e-06, "loss": 0.2572, "step": 25720 }, { "epoch": 0.74, "grad_norm": 7.173524255545774, "learning_rate": 1.7118582875457629e-06, "loss": 0.4302, "step": 25721 }, { "epoch": 0.74, "grad_norm": 4.798031769980052, "learning_rate": 1.7115089327093393e-06, "loss": 0.2556, "step": 25722 }, { "epoch": 0.74, "grad_norm": 5.485632271398389, "learning_rate": 1.7111596061634012e-06, "loss": 0.4555, "step": 25723 }, { "epoch": 0.74, "grad_norm": 5.078301881497252, "learning_rate": 1.7108103079109518e-06, "loss": 0.2632, "step": 25724 }, { "epoch": 0.74, "grad_norm": 4.873557146223621, "learning_rate": 1.7104610379549947e-06, "loss": 0.3512, "step": 25725 }, { "epoch": 0.74, "grad_norm": 5.027477769465833, "learning_rate": 1.710111796298538e-06, "loss": 0.2659, "step": 25726 }, { "epoch": 0.74, "grad_norm": 4.356806254618187, "learning_rate": 1.709762582944583e-06, "loss": 0.1997, "step": 25727 }, { "epoch": 0.74, "grad_norm": 2.917626025028473, "learning_rate": 1.7094133978961368e-06, "loss": 0.1834, "step": 25728 }, { "epoch": 0.74, "grad_norm": 10.86899668417436, "learning_rate": 1.709064241156202e-06, "loss": 0.4508, "step": 25729 }, { "epoch": 0.74, "grad_norm": 5.465721817860325, "learning_rate": 1.7087151127277824e-06, "loss": 0.2639, "step": 25730 }, { "epoch": 0.74, "grad_norm": 5.415824702433509, "learning_rate": 1.7083660126138795e-06, "loss": 0.3765, "step": 25731 }, { "epoch": 0.74, "grad_norm": 5.427619990529613, "learning_rate": 1.708016940817499e-06, "loss": 0.5082, "step": 25732 }, { "epoch": 0.74, "grad_norm": 10.82929261973278, "learning_rate": 1.707667897341645e-06, "loss": 0.786, "step": 25733 }, { "epoch": 0.74, "grad_norm": 3.8836929735282566, "learning_rate": 1.7073188821893166e-06, "loss": 0.2675, "step": 25734 }, { "epoch": 0.74, "grad_norm": 6.866679271513595, "learning_rate": 1.7069698953635206e-06, "loss": 0.7317, "step": 25735 }, { "epoch": 0.74, "grad_norm": 4.563183537321837, "learning_rate": 1.7066209368672572e-06, "loss": 0.7705, "step": 25736 }, { "epoch": 0.74, "grad_norm": 5.655538145341204, "learning_rate": 1.7062720067035282e-06, "loss": 0.612, "step": 25737 }, { "epoch": 0.74, "grad_norm": 3.342419760803035, "learning_rate": 1.7059231048753338e-06, "loss": 0.2388, "step": 25738 }, { "epoch": 0.74, "grad_norm": 5.119023350460616, "learning_rate": 1.7055742313856776e-06, "loss": 0.6279, "step": 25739 }, { "epoch": 0.74, "grad_norm": 4.060551190614815, "learning_rate": 1.7052253862375622e-06, "loss": 0.3724, "step": 25740 }, { "epoch": 0.74, "grad_norm": 5.518777394901769, "learning_rate": 1.7048765694339854e-06, "loss": 0.2474, "step": 25741 }, { "epoch": 0.74, "grad_norm": 8.737645773701692, "learning_rate": 1.7045277809779515e-06, "loss": 0.6964, "step": 25742 }, { "epoch": 0.74, "grad_norm": 6.776147531263468, "learning_rate": 1.704179020872459e-06, "loss": 0.614, "step": 25743 }, { "epoch": 0.74, "grad_norm": 11.627317587242432, "learning_rate": 1.7038302891205095e-06, "loss": 0.5697, "step": 25744 }, { "epoch": 0.74, "grad_norm": 3.688710143450183, "learning_rate": 1.7034815857250997e-06, "loss": 0.2637, "step": 25745 }, { "epoch": 0.74, "grad_norm": 5.575958985441905, "learning_rate": 1.7031329106892337e-06, "loss": 0.0945, "step": 25746 }, { "epoch": 0.74, "grad_norm": 6.22794400424444, "learning_rate": 1.702784264015908e-06, "loss": 0.8309, "step": 25747 }, { "epoch": 0.74, "grad_norm": 4.825494426141061, "learning_rate": 1.7024356457081232e-06, "loss": 0.2575, "step": 25748 }, { "epoch": 0.74, "grad_norm": 8.090219677746958, "learning_rate": 1.70208705576888e-06, "loss": 0.49, "step": 25749 }, { "epoch": 0.74, "grad_norm": 4.301428362455479, "learning_rate": 1.701738494201176e-06, "loss": 0.4413, "step": 25750 }, { "epoch": 0.74, "grad_norm": 5.480516472932921, "learning_rate": 1.7013899610080098e-06, "loss": 0.6473, "step": 25751 }, { "epoch": 0.74, "grad_norm": 4.834800081114091, "learning_rate": 1.7010414561923776e-06, "loss": 0.3638, "step": 25752 }, { "epoch": 0.74, "grad_norm": 4.876143737041927, "learning_rate": 1.7006929797572813e-06, "loss": 0.4329, "step": 25753 }, { "epoch": 0.74, "grad_norm": 6.570986900236418, "learning_rate": 1.700344531705716e-06, "loss": 0.7077, "step": 25754 }, { "epoch": 0.74, "grad_norm": 5.450508954842776, "learning_rate": 1.6999961120406816e-06, "loss": 0.6114, "step": 25755 }, { "epoch": 0.74, "grad_norm": 6.648573231908486, "learning_rate": 1.699647720765174e-06, "loss": 0.5133, "step": 25756 }, { "epoch": 0.74, "grad_norm": 4.620720404072541, "learning_rate": 1.6992993578821898e-06, "loss": 0.2266, "step": 25757 }, { "epoch": 0.74, "grad_norm": 2.867367907676937, "learning_rate": 1.6989510233947276e-06, "loss": 0.2591, "step": 25758 }, { "epoch": 0.74, "grad_norm": 3.4855486514009906, "learning_rate": 1.6986027173057822e-06, "loss": 0.0859, "step": 25759 }, { "epoch": 0.74, "grad_norm": 3.776110253423667, "learning_rate": 1.6982544396183532e-06, "loss": 0.2775, "step": 25760 }, { "epoch": 0.74, "grad_norm": 4.438569544457162, "learning_rate": 1.6979061903354321e-06, "loss": 0.2509, "step": 25761 }, { "epoch": 0.74, "grad_norm": 8.285029891011401, "learning_rate": 1.6975579694600197e-06, "loss": 0.3545, "step": 25762 }, { "epoch": 0.74, "grad_norm": 8.728910644224232, "learning_rate": 1.6972097769951097e-06, "loss": 0.6002, "step": 25763 }, { "epoch": 0.74, "grad_norm": 5.87408805424767, "learning_rate": 1.6968616129436954e-06, "loss": 0.4693, "step": 25764 }, { "epoch": 0.74, "grad_norm": 4.280507656811083, "learning_rate": 1.6965134773087755e-06, "loss": 0.5746, "step": 25765 }, { "epoch": 0.74, "grad_norm": 5.561111555513373, "learning_rate": 1.6961653700933423e-06, "loss": 0.401, "step": 25766 }, { "epoch": 0.74, "grad_norm": 3.789855307783764, "learning_rate": 1.695817291300393e-06, "loss": 0.3898, "step": 25767 }, { "epoch": 0.74, "grad_norm": 4.291356430445106, "learning_rate": 1.6954692409329188e-06, "loss": 0.4161, "step": 25768 }, { "epoch": 0.74, "grad_norm": 4.135502132938699, "learning_rate": 1.6951212189939175e-06, "loss": 0.162, "step": 25769 }, { "epoch": 0.74, "grad_norm": 8.942394690252124, "learning_rate": 1.6947732254863818e-06, "loss": 0.5944, "step": 25770 }, { "epoch": 0.74, "grad_norm": 7.476549298026648, "learning_rate": 1.6944252604133048e-06, "loss": 0.7312, "step": 25771 }, { "epoch": 0.74, "grad_norm": 3.0596564384584837, "learning_rate": 1.6940773237776785e-06, "loss": 0.2747, "step": 25772 }, { "epoch": 0.74, "grad_norm": 4.491722441110545, "learning_rate": 1.6937294155824985e-06, "loss": 0.4076, "step": 25773 }, { "epoch": 0.74, "grad_norm": 5.113927836732889, "learning_rate": 1.6933815358307587e-06, "loss": 0.5062, "step": 25774 }, { "epoch": 0.74, "grad_norm": 5.748531485740833, "learning_rate": 1.6930336845254486e-06, "loss": 0.4532, "step": 25775 }, { "epoch": 0.74, "grad_norm": 4.5696530118970085, "learning_rate": 1.692685861669564e-06, "loss": 0.4192, "step": 25776 }, { "epoch": 0.74, "grad_norm": 4.025118993101989, "learning_rate": 1.692338067266096e-06, "loss": 0.4469, "step": 25777 }, { "epoch": 0.74, "grad_norm": 2.455568536229176, "learning_rate": 1.6919903013180367e-06, "loss": 0.1103, "step": 25778 }, { "epoch": 0.74, "grad_norm": 3.552256094272864, "learning_rate": 1.6916425638283757e-06, "loss": 0.4394, "step": 25779 }, { "epoch": 0.74, "grad_norm": 6.1681556794129015, "learning_rate": 1.6912948548001056e-06, "loss": 0.6725, "step": 25780 }, { "epoch": 0.74, "grad_norm": 5.2310136506936775, "learning_rate": 1.690947174236221e-06, "loss": 0.2211, "step": 25781 }, { "epoch": 0.74, "grad_norm": 8.582183655959971, "learning_rate": 1.6905995221397087e-06, "loss": 0.6741, "step": 25782 }, { "epoch": 0.74, "grad_norm": 6.4141705186787785, "learning_rate": 1.6902518985135647e-06, "loss": 0.3118, "step": 25783 }, { "epoch": 0.74, "grad_norm": 8.062632123470774, "learning_rate": 1.689904303360772e-06, "loss": 0.5697, "step": 25784 }, { "epoch": 0.74, "grad_norm": 11.697292122115797, "learning_rate": 1.6895567366843273e-06, "loss": 0.5914, "step": 25785 }, { "epoch": 0.74, "grad_norm": 9.884261025885145, "learning_rate": 1.6892091984872167e-06, "loss": 0.6982, "step": 25786 }, { "epoch": 0.74, "grad_norm": 7.542335519233169, "learning_rate": 1.6888616887724335e-06, "loss": 0.5542, "step": 25787 }, { "epoch": 0.74, "grad_norm": 7.991094937788471, "learning_rate": 1.688514207542964e-06, "loss": 0.9509, "step": 25788 }, { "epoch": 0.74, "grad_norm": 9.192004312819687, "learning_rate": 1.6881667548018e-06, "loss": 0.5091, "step": 25789 }, { "epoch": 0.74, "grad_norm": 6.883563014840615, "learning_rate": 1.6878193305519303e-06, "loss": 0.2929, "step": 25790 }, { "epoch": 0.74, "grad_norm": 1.361245315638277, "learning_rate": 1.6874719347963408e-06, "loss": 0.058, "step": 25791 }, { "epoch": 0.74, "grad_norm": 7.4099319255136935, "learning_rate": 1.6871245675380243e-06, "loss": 0.5744, "step": 25792 }, { "epoch": 0.74, "grad_norm": 11.747100634689957, "learning_rate": 1.686777228779966e-06, "loss": 0.5038, "step": 25793 }, { "epoch": 0.74, "grad_norm": 3.9492585947081227, "learning_rate": 1.6864299185251566e-06, "loss": 0.304, "step": 25794 }, { "epoch": 0.74, "grad_norm": 9.89812875460369, "learning_rate": 1.686082636776581e-06, "loss": 0.7685, "step": 25795 }, { "epoch": 0.74, "grad_norm": 4.007665486455805, "learning_rate": 1.6857353835372304e-06, "loss": 0.666, "step": 25796 }, { "epoch": 0.74, "grad_norm": 3.6687693346804062, "learning_rate": 1.68538815881009e-06, "loss": 0.5356, "step": 25797 }, { "epoch": 0.74, "grad_norm": 10.32244134932024, "learning_rate": 1.6850409625981451e-06, "loss": 0.4108, "step": 25798 }, { "epoch": 0.74, "grad_norm": 8.700966147785383, "learning_rate": 1.6846937949043867e-06, "loss": 1.3857, "step": 25799 }, { "epoch": 0.74, "grad_norm": 2.7332751868816465, "learning_rate": 1.6843466557317978e-06, "loss": 0.3622, "step": 25800 }, { "epoch": 0.74, "grad_norm": 9.174985341988062, "learning_rate": 1.683999545083368e-06, "loss": 0.6226, "step": 25801 }, { "epoch": 0.74, "grad_norm": 3.314340511964801, "learning_rate": 1.6836524629620804e-06, "loss": 0.3221, "step": 25802 }, { "epoch": 0.74, "grad_norm": 6.695031960306383, "learning_rate": 1.6833054093709239e-06, "loss": 0.3983, "step": 25803 }, { "epoch": 0.74, "grad_norm": 7.263304932678682, "learning_rate": 1.6829583843128822e-06, "loss": 0.7879, "step": 25804 }, { "epoch": 0.74, "grad_norm": 5.556155240223836, "learning_rate": 1.6826113877909394e-06, "loss": 0.6122, "step": 25805 }, { "epoch": 0.74, "grad_norm": 6.532678913246031, "learning_rate": 1.6822644198080844e-06, "loss": 0.5303, "step": 25806 }, { "epoch": 0.74, "grad_norm": 2.194363534887095, "learning_rate": 1.681917480367299e-06, "loss": 0.0963, "step": 25807 }, { "epoch": 0.74, "grad_norm": 3.1046209173641435, "learning_rate": 1.6815705694715695e-06, "loss": 0.3303, "step": 25808 }, { "epoch": 0.74, "grad_norm": 7.946780688963389, "learning_rate": 1.6812236871238791e-06, "loss": 0.3991, "step": 25809 }, { "epoch": 0.74, "grad_norm": 6.6763665047089935, "learning_rate": 1.6808768333272152e-06, "loss": 0.1898, "step": 25810 }, { "epoch": 0.74, "grad_norm": 6.580579358715318, "learning_rate": 1.6805300080845566e-06, "loss": 0.5532, "step": 25811 }, { "epoch": 0.74, "grad_norm": 5.836016959552033, "learning_rate": 1.680183211398891e-06, "loss": 0.5214, "step": 25812 }, { "epoch": 0.74, "grad_norm": 3.5675899216801366, "learning_rate": 1.679836443273199e-06, "loss": 0.2721, "step": 25813 }, { "epoch": 0.74, "grad_norm": 3.9358638815603295, "learning_rate": 1.6794897037104652e-06, "loss": 0.3463, "step": 25814 }, { "epoch": 0.74, "grad_norm": 6.296789542628763, "learning_rate": 1.6791429927136738e-06, "loss": 0.6037, "step": 25815 }, { "epoch": 0.74, "grad_norm": 7.049436894376849, "learning_rate": 1.6787963102858068e-06, "loss": 0.7864, "step": 25816 }, { "epoch": 0.74, "grad_norm": 4.391962248224766, "learning_rate": 1.6784496564298464e-06, "loss": 0.386, "step": 25817 }, { "epoch": 0.74, "grad_norm": 5.643768406834796, "learning_rate": 1.6781030311487728e-06, "loss": 0.3158, "step": 25818 }, { "epoch": 0.74, "grad_norm": 7.191246648423309, "learning_rate": 1.6777564344455715e-06, "loss": 0.5566, "step": 25819 }, { "epoch": 0.74, "grad_norm": 1.840419865367143, "learning_rate": 1.6774098663232208e-06, "loss": 0.3031, "step": 25820 }, { "epoch": 0.74, "grad_norm": 3.626997989013468, "learning_rate": 1.6770633267847042e-06, "loss": 0.5226, "step": 25821 }, { "epoch": 0.74, "grad_norm": 6.427164354437075, "learning_rate": 1.6767168158330045e-06, "loss": 0.4588, "step": 25822 }, { "epoch": 0.74, "grad_norm": 3.3349856374336966, "learning_rate": 1.6763703334711e-06, "loss": 0.3714, "step": 25823 }, { "epoch": 0.74, "grad_norm": 5.5244588271875195, "learning_rate": 1.6760238797019729e-06, "loss": 0.5028, "step": 25824 }, { "epoch": 0.74, "grad_norm": 11.151197170769137, "learning_rate": 1.6756774545286013e-06, "loss": 0.1562, "step": 25825 }, { "epoch": 0.74, "grad_norm": 5.285838735065389, "learning_rate": 1.675331057953969e-06, "loss": 0.5251, "step": 25826 }, { "epoch": 0.74, "grad_norm": 9.551921603221361, "learning_rate": 1.674984689981053e-06, "loss": 0.5067, "step": 25827 }, { "epoch": 0.74, "grad_norm": 5.637857534908393, "learning_rate": 1.6746383506128356e-06, "loss": 0.6078, "step": 25828 }, { "epoch": 0.74, "grad_norm": 5.150205965460393, "learning_rate": 1.6742920398522933e-06, "loss": 0.7216, "step": 25829 }, { "epoch": 0.74, "grad_norm": 4.7957759057767975, "learning_rate": 1.6739457577024087e-06, "loss": 0.821, "step": 25830 }, { "epoch": 0.74, "grad_norm": 3.544102858412914, "learning_rate": 1.673599504166159e-06, "loss": 0.2828, "step": 25831 }, { "epoch": 0.74, "grad_norm": 4.377072579057287, "learning_rate": 1.6732532792465223e-06, "loss": 0.2399, "step": 25832 }, { "epoch": 0.74, "grad_norm": 3.247676257048228, "learning_rate": 1.6729070829464788e-06, "loss": 0.3438, "step": 25833 }, { "epoch": 0.74, "grad_norm": 2.898197009862208, "learning_rate": 1.6725609152690052e-06, "loss": 0.4136, "step": 25834 }, { "epoch": 0.74, "grad_norm": 6.1934655071248725, "learning_rate": 1.6722147762170815e-06, "loss": 0.2812, "step": 25835 }, { "epoch": 0.74, "grad_norm": 4.230670843114113, "learning_rate": 1.6718686657936828e-06, "loss": 0.2136, "step": 25836 }, { "epoch": 0.74, "grad_norm": 6.560726034995076, "learning_rate": 1.6715225840017902e-06, "loss": 0.1808, "step": 25837 }, { "epoch": 0.74, "grad_norm": 5.137566906198665, "learning_rate": 1.671176530844379e-06, "loss": 0.5139, "step": 25838 }, { "epoch": 0.74, "grad_norm": 5.602525572405647, "learning_rate": 1.670830506324424e-06, "loss": 0.5317, "step": 25839 }, { "epoch": 0.74, "grad_norm": 10.48681393949385, "learning_rate": 1.6704845104449062e-06, "loss": 0.5914, "step": 25840 }, { "epoch": 0.74, "grad_norm": 6.649807261061646, "learning_rate": 1.6701385432087986e-06, "loss": 0.4149, "step": 25841 }, { "epoch": 0.74, "grad_norm": 4.536462377231809, "learning_rate": 1.669792604619081e-06, "loss": 0.1785, "step": 25842 }, { "epoch": 0.74, "grad_norm": 3.504611689597564, "learning_rate": 1.669446694678727e-06, "loss": 0.3917, "step": 25843 }, { "epoch": 0.74, "grad_norm": 6.805148296323964, "learning_rate": 1.6691008133907133e-06, "loss": 0.4003, "step": 25844 }, { "epoch": 0.74, "grad_norm": 6.226699999890902, "learning_rate": 1.668754960758014e-06, "loss": 0.3642, "step": 25845 }, { "epoch": 0.74, "grad_norm": 5.257626761206643, "learning_rate": 1.6684091367836053e-06, "loss": 0.3705, "step": 25846 }, { "epoch": 0.74, "grad_norm": 2.9341675391507835, "learning_rate": 1.668063341470464e-06, "loss": 0.1551, "step": 25847 }, { "epoch": 0.74, "grad_norm": 5.805273383288509, "learning_rate": 1.6677175748215618e-06, "loss": 0.6325, "step": 25848 }, { "epoch": 0.74, "grad_norm": 3.54683349181771, "learning_rate": 1.6673718368398772e-06, "loss": 0.3365, "step": 25849 }, { "epoch": 0.74, "grad_norm": 7.923700998747215, "learning_rate": 1.6670261275283822e-06, "loss": 0.5633, "step": 25850 }, { "epoch": 0.74, "grad_norm": 5.901282002290145, "learning_rate": 1.666680446890051e-06, "loss": 0.3529, "step": 25851 }, { "epoch": 0.74, "grad_norm": 5.3997709225702675, "learning_rate": 1.6663347949278558e-06, "loss": 0.829, "step": 25852 }, { "epoch": 0.74, "grad_norm": 3.9710667974175404, "learning_rate": 1.6659891716447736e-06, "loss": 0.3816, "step": 25853 }, { "epoch": 0.74, "grad_norm": 13.265457619553525, "learning_rate": 1.6656435770437746e-06, "loss": 0.3131, "step": 25854 }, { "epoch": 0.74, "grad_norm": 7.009021633457145, "learning_rate": 1.6652980111278328e-06, "loss": 0.5008, "step": 25855 }, { "epoch": 0.74, "grad_norm": 7.551106848014201, "learning_rate": 1.664952473899924e-06, "loss": 0.6494, "step": 25856 }, { "epoch": 0.74, "grad_norm": 4.713367853771163, "learning_rate": 1.664606965363018e-06, "loss": 0.3311, "step": 25857 }, { "epoch": 0.74, "grad_norm": 3.58944827818494, "learning_rate": 1.6642614855200878e-06, "loss": 0.3743, "step": 25858 }, { "epoch": 0.74, "grad_norm": 3.777091663868311, "learning_rate": 1.6639160343741033e-06, "loss": 0.2876, "step": 25859 }, { "epoch": 0.74, "grad_norm": 5.398412237922532, "learning_rate": 1.6635706119280403e-06, "loss": 0.7829, "step": 25860 }, { "epoch": 0.74, "grad_norm": 3.5129789779277334, "learning_rate": 1.6632252181848669e-06, "loss": 0.2471, "step": 25861 }, { "epoch": 0.74, "grad_norm": 5.821084441127765, "learning_rate": 1.6628798531475566e-06, "loss": 0.4357, "step": 25862 }, { "epoch": 0.74, "grad_norm": 7.209551445530148, "learning_rate": 1.6625345168190809e-06, "loss": 0.4657, "step": 25863 }, { "epoch": 0.74, "grad_norm": 7.4743772878865835, "learning_rate": 1.66218920920241e-06, "loss": 0.3903, "step": 25864 }, { "epoch": 0.74, "grad_norm": 5.016914605934461, "learning_rate": 1.6618439303005146e-06, "loss": 0.5591, "step": 25865 }, { "epoch": 0.74, "grad_norm": 2.464627058415744, "learning_rate": 1.6614986801163635e-06, "loss": 0.1809, "step": 25866 }, { "epoch": 0.74, "grad_norm": 5.784803051311968, "learning_rate": 1.6611534586529292e-06, "loss": 0.4589, "step": 25867 }, { "epoch": 0.74, "grad_norm": 3.3031466704409307, "learning_rate": 1.6608082659131797e-06, "loss": 0.4988, "step": 25868 }, { "epoch": 0.74, "grad_norm": 4.4921291181668215, "learning_rate": 1.6604631019000865e-06, "loss": 0.4002, "step": 25869 }, { "epoch": 0.74, "grad_norm": 4.098896722520394, "learning_rate": 1.6601179666166183e-06, "loss": 0.2023, "step": 25870 }, { "epoch": 0.74, "grad_norm": 9.976502946333852, "learning_rate": 1.6597728600657427e-06, "loss": 0.744, "step": 25871 }, { "epoch": 0.74, "grad_norm": 5.8654997286378245, "learning_rate": 1.6594277822504311e-06, "loss": 0.2924, "step": 25872 }, { "epoch": 0.74, "grad_norm": 4.738124835395886, "learning_rate": 1.6590827331736492e-06, "loss": 0.561, "step": 25873 }, { "epoch": 0.74, "grad_norm": 7.597687442195771, "learning_rate": 1.6587377128383692e-06, "loss": 0.273, "step": 25874 }, { "epoch": 0.74, "grad_norm": 7.327648887540289, "learning_rate": 1.658392721247556e-06, "loss": 0.4718, "step": 25875 }, { "epoch": 0.74, "grad_norm": 4.700138423788607, "learning_rate": 1.6580477584041798e-06, "loss": 0.4676, "step": 25876 }, { "epoch": 0.74, "grad_norm": 9.086452872960411, "learning_rate": 1.6577028243112076e-06, "loss": 0.5333, "step": 25877 }, { "epoch": 0.74, "grad_norm": 4.30879245552326, "learning_rate": 1.6573579189716044e-06, "loss": 0.5846, "step": 25878 }, { "epoch": 0.74, "grad_norm": 3.5707133462540708, "learning_rate": 1.6570130423883418e-06, "loss": 0.3678, "step": 25879 }, { "epoch": 0.74, "grad_norm": 4.33046295699672, "learning_rate": 1.656668194564382e-06, "loss": 0.573, "step": 25880 }, { "epoch": 0.74, "grad_norm": 6.930055249421917, "learning_rate": 1.6563233755026964e-06, "loss": 0.5121, "step": 25881 }, { "epoch": 0.74, "grad_norm": 5.552500899347871, "learning_rate": 1.655978585206247e-06, "loss": 0.453, "step": 25882 }, { "epoch": 0.74, "grad_norm": 5.292886318113874, "learning_rate": 1.655633823678004e-06, "loss": 0.3297, "step": 25883 }, { "epoch": 0.74, "grad_norm": 5.400138090275374, "learning_rate": 1.6552890909209314e-06, "loss": 0.5319, "step": 25884 }, { "epoch": 0.74, "grad_norm": 4.60304109557754, "learning_rate": 1.654944386937995e-06, "loss": 0.373, "step": 25885 }, { "epoch": 0.74, "grad_norm": 4.4008452633103285, "learning_rate": 1.6545997117321582e-06, "loss": 0.1296, "step": 25886 }, { "epoch": 0.74, "grad_norm": 6.111365732995901, "learning_rate": 1.654255065306389e-06, "loss": 0.8189, "step": 25887 }, { "epoch": 0.74, "grad_norm": 2.7780092410503068, "learning_rate": 1.6539104476636525e-06, "loss": 0.2906, "step": 25888 }, { "epoch": 0.74, "grad_norm": 4.646360199252344, "learning_rate": 1.6535658588069109e-06, "loss": 0.505, "step": 25889 }, { "epoch": 0.74, "grad_norm": 6.156142219335801, "learning_rate": 1.6532212987391316e-06, "loss": 0.4066, "step": 25890 }, { "epoch": 0.74, "grad_norm": 9.151136174561598, "learning_rate": 1.6528767674632779e-06, "loss": 0.6518, "step": 25891 }, { "epoch": 0.74, "grad_norm": 6.791983797903248, "learning_rate": 1.6525322649823128e-06, "loss": 0.6591, "step": 25892 }, { "epoch": 0.74, "grad_norm": 5.944760671663694, "learning_rate": 1.652187791299199e-06, "loss": 0.75, "step": 25893 }, { "epoch": 0.74, "grad_norm": 4.274086019065405, "learning_rate": 1.6518433464169032e-06, "loss": 0.5439, "step": 25894 }, { "epoch": 0.74, "grad_norm": 4.398288487134325, "learning_rate": 1.6514989303383856e-06, "loss": 0.3495, "step": 25895 }, { "epoch": 0.74, "grad_norm": 4.7773662575965865, "learning_rate": 1.6511545430666097e-06, "loss": 0.2814, "step": 25896 }, { "epoch": 0.74, "grad_norm": 3.5281504134284134, "learning_rate": 1.650810184604541e-06, "loss": 0.3222, "step": 25897 }, { "epoch": 0.74, "grad_norm": 3.8179600811745344, "learning_rate": 1.6504658549551395e-06, "loss": 0.2395, "step": 25898 }, { "epoch": 0.74, "grad_norm": 9.427190239793655, "learning_rate": 1.6501215541213683e-06, "loss": 1.1835, "step": 25899 }, { "epoch": 0.74, "grad_norm": 2.715033336578757, "learning_rate": 1.6497772821061868e-06, "loss": 0.2958, "step": 25900 }, { "epoch": 0.74, "grad_norm": 4.302065346644057, "learning_rate": 1.6494330389125607e-06, "loss": 0.4287, "step": 25901 }, { "epoch": 0.74, "grad_norm": 6.023722244835195, "learning_rate": 1.6490888245434482e-06, "loss": 0.5327, "step": 25902 }, { "epoch": 0.74, "grad_norm": 8.281514231946836, "learning_rate": 1.6487446390018135e-06, "loss": 0.51, "step": 25903 }, { "epoch": 0.74, "grad_norm": 5.922393917004817, "learning_rate": 1.6484004822906164e-06, "loss": 0.347, "step": 25904 }, { "epoch": 0.74, "grad_norm": 8.391037573873811, "learning_rate": 1.6480563544128146e-06, "loss": 0.4309, "step": 25905 }, { "epoch": 0.74, "grad_norm": 1.941664235341374, "learning_rate": 1.6477122553713742e-06, "loss": 0.5334, "step": 25906 }, { "epoch": 0.74, "grad_norm": 5.121924780277306, "learning_rate": 1.6473681851692503e-06, "loss": 0.1519, "step": 25907 }, { "epoch": 0.74, "grad_norm": 5.58226900014839, "learning_rate": 1.6470241438094063e-06, "loss": 0.3938, "step": 25908 }, { "epoch": 0.74, "grad_norm": 5.972323281766275, "learning_rate": 1.6466801312948e-06, "loss": 0.3542, "step": 25909 }, { "epoch": 0.74, "grad_norm": 5.182497001937831, "learning_rate": 1.646336147628393e-06, "loss": 0.4498, "step": 25910 }, { "epoch": 0.74, "grad_norm": 3.481202310478612, "learning_rate": 1.6459921928131428e-06, "loss": 0.6853, "step": 25911 }, { "epoch": 0.74, "grad_norm": 3.463449466405509, "learning_rate": 1.6456482668520075e-06, "loss": 0.2971, "step": 25912 }, { "epoch": 0.74, "grad_norm": 6.160723014888542, "learning_rate": 1.6453043697479487e-06, "loss": 0.5414, "step": 25913 }, { "epoch": 0.74, "grad_norm": 4.186749134782745, "learning_rate": 1.6449605015039216e-06, "loss": 0.3357, "step": 25914 }, { "epoch": 0.74, "grad_norm": 7.44257220247642, "learning_rate": 1.6446166621228882e-06, "loss": 0.2878, "step": 25915 }, { "epoch": 0.74, "grad_norm": 5.745513637778506, "learning_rate": 1.6442728516078026e-06, "loss": 0.3178, "step": 25916 }, { "epoch": 0.74, "grad_norm": 1.7668377247654343, "learning_rate": 1.643929069961626e-06, "loss": 0.1637, "step": 25917 }, { "epoch": 0.74, "grad_norm": 4.4729243169019375, "learning_rate": 1.643585317187315e-06, "loss": 0.2958, "step": 25918 }, { "epoch": 0.74, "grad_norm": 4.756576302322082, "learning_rate": 1.643241593287826e-06, "loss": 0.4202, "step": 25919 }, { "epoch": 0.74, "grad_norm": 6.24404749653192, "learning_rate": 1.6428978982661149e-06, "loss": 0.3347, "step": 25920 }, { "epoch": 0.74, "grad_norm": 2.274248050774826, "learning_rate": 1.6425542321251391e-06, "loss": 0.1252, "step": 25921 }, { "epoch": 0.74, "grad_norm": 7.770339731966094, "learning_rate": 1.642210594867858e-06, "loss": 0.374, "step": 25922 }, { "epoch": 0.74, "grad_norm": 11.971693906955316, "learning_rate": 1.6418669864972236e-06, "loss": 0.5349, "step": 25923 }, { "epoch": 0.74, "grad_norm": 3.1461700907620194, "learning_rate": 1.641523407016196e-06, "loss": 0.2147, "step": 25924 }, { "epoch": 0.74, "grad_norm": 2.477398078584992, "learning_rate": 1.6411798564277292e-06, "loss": 0.1042, "step": 25925 }, { "epoch": 0.74, "grad_norm": 6.278583169247595, "learning_rate": 1.640836334734779e-06, "loss": 0.5282, "step": 25926 }, { "epoch": 0.74, "grad_norm": 10.800214237278164, "learning_rate": 1.6404928419402976e-06, "loss": 0.6108, "step": 25927 }, { "epoch": 0.74, "grad_norm": 6.652594229551562, "learning_rate": 1.6401493780472432e-06, "loss": 0.1473, "step": 25928 }, { "epoch": 0.74, "grad_norm": 7.31949224934981, "learning_rate": 1.6398059430585717e-06, "loss": 0.2741, "step": 25929 }, { "epoch": 0.74, "grad_norm": 4.51632914613434, "learning_rate": 1.639462536977236e-06, "loss": 0.3271, "step": 25930 }, { "epoch": 0.74, "grad_norm": 6.5982366908467505, "learning_rate": 1.6391191598061901e-06, "loss": 0.4044, "step": 25931 }, { "epoch": 0.74, "grad_norm": 7.227778102600432, "learning_rate": 1.6387758115483864e-06, "loss": 0.5424, "step": 25932 }, { "epoch": 0.74, "grad_norm": 8.073418136936693, "learning_rate": 1.6384324922067823e-06, "loss": 0.3172, "step": 25933 }, { "epoch": 0.74, "grad_norm": 8.01052878854873, "learning_rate": 1.638089201784328e-06, "loss": 0.4832, "step": 25934 }, { "epoch": 0.74, "grad_norm": 7.649101781783578, "learning_rate": 1.63774594028398e-06, "loss": 0.5263, "step": 25935 }, { "epoch": 0.74, "grad_norm": 3.645410822046388, "learning_rate": 1.6374027077086885e-06, "loss": 0.2355, "step": 25936 }, { "epoch": 0.74, "grad_norm": 5.322565663131107, "learning_rate": 1.637059504061409e-06, "loss": 0.3595, "step": 25937 }, { "epoch": 0.74, "grad_norm": 4.623316561556876, "learning_rate": 1.6367163293450922e-06, "loss": 0.2426, "step": 25938 }, { "epoch": 0.74, "grad_norm": 4.943893349518779, "learning_rate": 1.636373183562689e-06, "loss": 0.2619, "step": 25939 }, { "epoch": 0.74, "grad_norm": 6.453375601347646, "learning_rate": 1.6360300667171552e-06, "loss": 0.6208, "step": 25940 }, { "epoch": 0.74, "grad_norm": 5.199808891159139, "learning_rate": 1.6356869788114388e-06, "loss": 0.5411, "step": 25941 }, { "epoch": 0.74, "grad_norm": 7.07018004235386, "learning_rate": 1.6353439198484943e-06, "loss": 0.5566, "step": 25942 }, { "epoch": 0.74, "grad_norm": 10.508003681244658, "learning_rate": 1.6350008898312709e-06, "loss": 0.482, "step": 25943 }, { "epoch": 0.74, "grad_norm": 5.982755018442801, "learning_rate": 1.6346578887627217e-06, "loss": 0.4436, "step": 25944 }, { "epoch": 0.74, "grad_norm": 4.419201477381731, "learning_rate": 1.6343149166457962e-06, "loss": 0.5341, "step": 25945 }, { "epoch": 0.74, "grad_norm": 6.133908469075376, "learning_rate": 1.6339719734834437e-06, "loss": 0.3176, "step": 25946 }, { "epoch": 0.74, "grad_norm": 7.43814879080017, "learning_rate": 1.6336290592786175e-06, "loss": 0.7119, "step": 25947 }, { "epoch": 0.74, "grad_norm": 7.851056544764841, "learning_rate": 1.6332861740342643e-06, "loss": 0.5215, "step": 25948 }, { "epoch": 0.74, "grad_norm": 2.856023564952047, "learning_rate": 1.6329433177533377e-06, "loss": 0.5064, "step": 25949 }, { "epoch": 0.74, "grad_norm": 4.185924917033422, "learning_rate": 1.6326004904387827e-06, "loss": 0.4271, "step": 25950 }, { "epoch": 0.74, "grad_norm": 6.361264559668596, "learning_rate": 1.6322576920935534e-06, "loss": 0.4147, "step": 25951 }, { "epoch": 0.74, "grad_norm": 5.0843697517991595, "learning_rate": 1.631914922720596e-06, "loss": 0.6794, "step": 25952 }, { "epoch": 0.74, "grad_norm": 4.421620364995777, "learning_rate": 1.6315721823228585e-06, "loss": 0.3026, "step": 25953 }, { "epoch": 0.74, "grad_norm": 7.586082323540342, "learning_rate": 1.6312294709032928e-06, "loss": 0.3875, "step": 25954 }, { "epoch": 0.74, "grad_norm": 5.6777012013403505, "learning_rate": 1.6308867884648426e-06, "loss": 0.5944, "step": 25955 }, { "epoch": 0.74, "grad_norm": 3.707016457466864, "learning_rate": 1.630544135010461e-06, "loss": 0.511, "step": 25956 }, { "epoch": 0.74, "grad_norm": 6.154302840985088, "learning_rate": 1.6302015105430917e-06, "loss": 0.7458, "step": 25957 }, { "epoch": 0.74, "grad_norm": 6.6528283579405, "learning_rate": 1.6298589150656868e-06, "loss": 0.4204, "step": 25958 }, { "epoch": 0.74, "grad_norm": 6.02328732472792, "learning_rate": 1.6295163485811877e-06, "loss": 0.2861, "step": 25959 }, { "epoch": 0.74, "grad_norm": 6.168485768387951, "learning_rate": 1.629173811092546e-06, "loss": 0.4985, "step": 25960 }, { "epoch": 0.74, "grad_norm": 8.184961114603466, "learning_rate": 1.6288313026027053e-06, "loss": 0.8179, "step": 25961 }, { "epoch": 0.74, "grad_norm": 4.107287728971019, "learning_rate": 1.6284888231146135e-06, "loss": 0.1633, "step": 25962 }, { "epoch": 0.74, "grad_norm": 7.887215480116234, "learning_rate": 1.6281463726312191e-06, "loss": 0.4006, "step": 25963 }, { "epoch": 0.74, "grad_norm": 12.825982457355808, "learning_rate": 1.6278039511554667e-06, "loss": 0.4827, "step": 25964 }, { "epoch": 0.74, "grad_norm": 4.683921796113285, "learning_rate": 1.6274615586903013e-06, "loss": 0.3582, "step": 25965 }, { "epoch": 0.74, "grad_norm": 3.9781295596543855, "learning_rate": 1.6271191952386673e-06, "loss": 0.2979, "step": 25966 }, { "epoch": 0.74, "grad_norm": 5.940707374415016, "learning_rate": 1.6267768608035134e-06, "loss": 0.4972, "step": 25967 }, { "epoch": 0.74, "grad_norm": 4.657523089979744, "learning_rate": 1.6264345553877813e-06, "loss": 0.2442, "step": 25968 }, { "epoch": 0.74, "grad_norm": 5.680448415544337, "learning_rate": 1.6260922789944172e-06, "loss": 0.2012, "step": 25969 }, { "epoch": 0.74, "grad_norm": 4.601003227508016, "learning_rate": 1.6257500316263674e-06, "loss": 0.2457, "step": 25970 }, { "epoch": 0.74, "grad_norm": 7.806912674886639, "learning_rate": 1.6254078132865748e-06, "loss": 0.5623, "step": 25971 }, { "epoch": 0.74, "grad_norm": 13.48271156562374, "learning_rate": 1.6250656239779828e-06, "loss": 0.1905, "step": 25972 }, { "epoch": 0.74, "grad_norm": 3.6099887342620645, "learning_rate": 1.624723463703534e-06, "loss": 0.2759, "step": 25973 }, { "epoch": 0.74, "grad_norm": 5.328296625052446, "learning_rate": 1.6243813324661761e-06, "loss": 0.6488, "step": 25974 }, { "epoch": 0.74, "grad_norm": 6.290022422556491, "learning_rate": 1.6240392302688479e-06, "loss": 0.5776, "step": 25975 }, { "epoch": 0.74, "grad_norm": 4.515419476857792, "learning_rate": 1.623697157114496e-06, "loss": 0.419, "step": 25976 }, { "epoch": 0.74, "grad_norm": 4.220523122526419, "learning_rate": 1.6233551130060604e-06, "loss": 0.3503, "step": 25977 }, { "epoch": 0.74, "grad_norm": 5.188633760396857, "learning_rate": 1.623013097946487e-06, "loss": 0.2807, "step": 25978 }, { "epoch": 0.74, "grad_norm": 8.967288120766764, "learning_rate": 1.6226711119387157e-06, "loss": 0.4445, "step": 25979 }, { "epoch": 0.74, "grad_norm": 7.320609787936816, "learning_rate": 1.6223291549856874e-06, "loss": 0.4903, "step": 25980 }, { "epoch": 0.74, "grad_norm": 2.108782805214209, "learning_rate": 1.621987227090347e-06, "loss": 0.1466, "step": 25981 }, { "epoch": 0.74, "grad_norm": 5.660520132516426, "learning_rate": 1.6216453282556332e-06, "loss": 0.4502, "step": 25982 }, { "epoch": 0.74, "grad_norm": 14.326485410040439, "learning_rate": 1.62130345848449e-06, "loss": 0.3583, "step": 25983 }, { "epoch": 0.74, "grad_norm": 10.148942025955998, "learning_rate": 1.6209616177798558e-06, "loss": 0.6593, "step": 25984 }, { "epoch": 0.74, "grad_norm": 6.3005584090877935, "learning_rate": 1.6206198061446743e-06, "loss": 0.5956, "step": 25985 }, { "epoch": 0.74, "grad_norm": 6.1465969796619495, "learning_rate": 1.6202780235818843e-06, "loss": 0.6462, "step": 25986 }, { "epoch": 0.74, "grad_norm": 8.118981876248286, "learning_rate": 1.619936270094425e-06, "loss": 0.923, "step": 25987 }, { "epoch": 0.74, "grad_norm": 10.513234585237997, "learning_rate": 1.6195945456852396e-06, "loss": 0.5305, "step": 25988 }, { "epoch": 0.74, "grad_norm": 6.750444256326829, "learning_rate": 1.619252850357264e-06, "loss": 0.394, "step": 25989 }, { "epoch": 0.74, "grad_norm": 4.312892730079415, "learning_rate": 1.6189111841134418e-06, "loss": 0.2352, "step": 25990 }, { "epoch": 0.74, "grad_norm": 6.823156194389328, "learning_rate": 1.6185695469567103e-06, "loss": 0.5482, "step": 25991 }, { "epoch": 0.74, "grad_norm": 2.508990477557944, "learning_rate": 1.618227938890009e-06, "loss": 0.3184, "step": 25992 }, { "epoch": 0.74, "grad_norm": 7.7152321401444794, "learning_rate": 1.6178863599162743e-06, "loss": 0.452, "step": 25993 }, { "epoch": 0.74, "grad_norm": 3.1059114344583847, "learning_rate": 1.6175448100384471e-06, "loss": 0.1803, "step": 25994 }, { "epoch": 0.74, "grad_norm": 9.671078946534989, "learning_rate": 1.6172032892594669e-06, "loss": 0.8119, "step": 25995 }, { "epoch": 0.74, "grad_norm": 7.705694052635668, "learning_rate": 1.6168617975822686e-06, "loss": 0.8109, "step": 25996 }, { "epoch": 0.74, "grad_norm": 8.821798870043304, "learning_rate": 1.6165203350097935e-06, "loss": 0.5794, "step": 25997 }, { "epoch": 0.74, "grad_norm": 5.082081465359422, "learning_rate": 1.6161789015449775e-06, "loss": 0.3055, "step": 25998 }, { "epoch": 0.74, "grad_norm": 2.8114407558016525, "learning_rate": 1.6158374971907576e-06, "loss": 0.1319, "step": 25999 }, { "epoch": 0.74, "grad_norm": 3.230554028346046, "learning_rate": 1.6154961219500693e-06, "loss": 0.5346, "step": 26000 }, { "epoch": 0.74, "grad_norm": 4.645481047794785, "learning_rate": 1.6151547758258529e-06, "loss": 0.4462, "step": 26001 }, { "epoch": 0.74, "grad_norm": 11.289109279443249, "learning_rate": 1.6148134588210418e-06, "loss": 0.5366, "step": 26002 }, { "epoch": 0.74, "grad_norm": 8.416955080535882, "learning_rate": 1.6144721709385735e-06, "loss": 0.7194, "step": 26003 }, { "epoch": 0.74, "grad_norm": 8.125375269513171, "learning_rate": 1.6141309121813858e-06, "loss": 0.8997, "step": 26004 }, { "epoch": 0.74, "grad_norm": 3.6370067707740286, "learning_rate": 1.6137896825524135e-06, "loss": 0.2907, "step": 26005 }, { "epoch": 0.74, "grad_norm": 4.52581176642164, "learning_rate": 1.6134484820545908e-06, "loss": 0.5142, "step": 26006 }, { "epoch": 0.74, "grad_norm": 4.07729772602806, "learning_rate": 1.6131073106908529e-06, "loss": 0.4782, "step": 26007 }, { "epoch": 0.74, "grad_norm": 3.9851977228198945, "learning_rate": 1.6127661684641371e-06, "loss": 0.3915, "step": 26008 }, { "epoch": 0.74, "grad_norm": 3.893184448121451, "learning_rate": 1.6124250553773758e-06, "loss": 0.4201, "step": 26009 }, { "epoch": 0.74, "grad_norm": 5.11227271826949, "learning_rate": 1.612083971433504e-06, "loss": 0.4798, "step": 26010 }, { "epoch": 0.74, "grad_norm": 3.3966479270300822, "learning_rate": 1.6117429166354587e-06, "loss": 0.3837, "step": 26011 }, { "epoch": 0.74, "grad_norm": 4.825424574914215, "learning_rate": 1.6114018909861717e-06, "loss": 0.3881, "step": 26012 }, { "epoch": 0.74, "grad_norm": 8.8065755963965, "learning_rate": 1.611060894488577e-06, "loss": 0.5803, "step": 26013 }, { "epoch": 0.74, "grad_norm": 8.736769673791667, "learning_rate": 1.6107199271456065e-06, "loss": 0.5352, "step": 26014 }, { "epoch": 0.75, "grad_norm": 10.971634466522017, "learning_rate": 1.6103789889601966e-06, "loss": 0.3429, "step": 26015 }, { "epoch": 0.75, "grad_norm": 1.987228609545597, "learning_rate": 1.6100380799352777e-06, "loss": 0.3444, "step": 26016 }, { "epoch": 0.75, "grad_norm": 4.822523948329159, "learning_rate": 1.6096972000737853e-06, "loss": 0.247, "step": 26017 }, { "epoch": 0.75, "grad_norm": 5.070213798890652, "learning_rate": 1.6093563493786508e-06, "loss": 0.2392, "step": 26018 }, { "epoch": 0.75, "grad_norm": 5.911875190545315, "learning_rate": 1.609015527852804e-06, "loss": 0.3426, "step": 26019 }, { "epoch": 0.75, "grad_norm": 3.5282180564373355, "learning_rate": 1.6086747354991812e-06, "loss": 0.2861, "step": 26020 }, { "epoch": 0.75, "grad_norm": 3.144430729935719, "learning_rate": 1.6083339723207097e-06, "loss": 0.2489, "step": 26021 }, { "epoch": 0.75, "grad_norm": 6.600780047934557, "learning_rate": 1.6079932383203256e-06, "loss": 0.2695, "step": 26022 }, { "epoch": 0.75, "grad_norm": 5.764877064064917, "learning_rate": 1.6076525335009568e-06, "loss": 0.4712, "step": 26023 }, { "epoch": 0.75, "grad_norm": 1.5342101270738504, "learning_rate": 1.6073118578655368e-06, "loss": 0.26, "step": 26024 }, { "epoch": 0.75, "grad_norm": 9.35945916496224, "learning_rate": 1.606971211416995e-06, "loss": 0.7246, "step": 26025 }, { "epoch": 0.75, "grad_norm": 6.043774344583612, "learning_rate": 1.6066305941582605e-06, "loss": 0.6634, "step": 26026 }, { "epoch": 0.75, "grad_norm": 4.084383433770343, "learning_rate": 1.606290006092267e-06, "loss": 0.2729, "step": 26027 }, { "epoch": 0.75, "grad_norm": 6.755619217948405, "learning_rate": 1.6059494472219411e-06, "loss": 0.5973, "step": 26028 }, { "epoch": 0.75, "grad_norm": 7.06438015917789, "learning_rate": 1.605608917550216e-06, "loss": 1.2476, "step": 26029 }, { "epoch": 0.75, "grad_norm": 3.9850901242430945, "learning_rate": 1.6052684170800175e-06, "loss": 0.398, "step": 26030 }, { "epoch": 0.75, "grad_norm": 4.399766128458456, "learning_rate": 1.6049279458142785e-06, "loss": 0.4783, "step": 26031 }, { "epoch": 0.75, "grad_norm": 6.795052259860361, "learning_rate": 1.6045875037559267e-06, "loss": 0.2696, "step": 26032 }, { "epoch": 0.75, "grad_norm": 7.290758465110611, "learning_rate": 1.6042470909078905e-06, "loss": 0.3249, "step": 26033 }, { "epoch": 0.75, "grad_norm": 6.523806478150273, "learning_rate": 1.6039067072730969e-06, "loss": 0.5482, "step": 26034 }, { "epoch": 0.75, "grad_norm": 3.3974412532103484, "learning_rate": 1.6035663528544753e-06, "loss": 0.2454, "step": 26035 }, { "epoch": 0.75, "grad_norm": 8.515241701791512, "learning_rate": 1.603226027654956e-06, "loss": 0.7238, "step": 26036 }, { "epoch": 0.75, "grad_norm": 5.371111993932083, "learning_rate": 1.6028857316774633e-06, "loss": 0.2653, "step": 26037 }, { "epoch": 0.75, "grad_norm": 4.448806221101085, "learning_rate": 1.6025454649249283e-06, "loss": 0.4551, "step": 26038 }, { "epoch": 0.75, "grad_norm": 7.04566792978801, "learning_rate": 1.602205227400276e-06, "loss": 0.331, "step": 26039 }, { "epoch": 0.75, "grad_norm": 3.1670089377761053, "learning_rate": 1.6018650191064338e-06, "loss": 0.0666, "step": 26040 }, { "epoch": 0.75, "grad_norm": 5.747387085046464, "learning_rate": 1.6015248400463273e-06, "loss": 0.3619, "step": 26041 }, { "epoch": 0.75, "grad_norm": 5.799246331279976, "learning_rate": 1.6011846902228857e-06, "loss": 0.4308, "step": 26042 }, { "epoch": 0.75, "grad_norm": 2.8469256181959093, "learning_rate": 1.6008445696390317e-06, "loss": 0.3153, "step": 26043 }, { "epoch": 0.75, "grad_norm": 9.099394000489673, "learning_rate": 1.6005044782976937e-06, "loss": 0.7695, "step": 26044 }, { "epoch": 0.75, "grad_norm": 7.115266190949103, "learning_rate": 1.6001644162018e-06, "loss": 0.591, "step": 26045 }, { "epoch": 0.75, "grad_norm": 4.331146397528175, "learning_rate": 1.5998243833542698e-06, "loss": 0.2969, "step": 26046 }, { "epoch": 0.75, "grad_norm": 4.485773300519427, "learning_rate": 1.599484379758034e-06, "loss": 0.3235, "step": 26047 }, { "epoch": 0.75, "grad_norm": 3.76356583006903, "learning_rate": 1.5991444054160138e-06, "loss": 0.4545, "step": 26048 }, { "epoch": 0.75, "grad_norm": 4.719766115843342, "learning_rate": 1.598804460331137e-06, "loss": 0.7143, "step": 26049 }, { "epoch": 0.75, "grad_norm": 4.297362282606264, "learning_rate": 1.5984645445063247e-06, "loss": 0.3713, "step": 26050 }, { "epoch": 0.75, "grad_norm": 7.663463324126974, "learning_rate": 1.5981246579445052e-06, "loss": 0.4984, "step": 26051 }, { "epoch": 0.75, "grad_norm": 6.8362913203634195, "learning_rate": 1.5977848006486001e-06, "loss": 0.843, "step": 26052 }, { "epoch": 0.75, "grad_norm": 5.10794924753975, "learning_rate": 1.5974449726215318e-06, "loss": 0.7675, "step": 26053 }, { "epoch": 0.75, "grad_norm": 5.319641877647488, "learning_rate": 1.5971051738662275e-06, "loss": 0.2626, "step": 26054 }, { "epoch": 0.75, "grad_norm": 6.711820710830519, "learning_rate": 1.5967654043856069e-06, "loss": 0.4729, "step": 26055 }, { "epoch": 0.75, "grad_norm": 3.6216715626362412, "learning_rate": 1.5964256641825953e-06, "loss": 0.5104, "step": 26056 }, { "epoch": 0.75, "grad_norm": 3.324544260872963, "learning_rate": 1.5960859532601137e-06, "loss": 0.2158, "step": 26057 }, { "epoch": 0.75, "grad_norm": 5.069837362777012, "learning_rate": 1.595746271621087e-06, "loss": 0.4231, "step": 26058 }, { "epoch": 0.75, "grad_norm": 4.733204133437483, "learning_rate": 1.595406619268436e-06, "loss": 0.4145, "step": 26059 }, { "epoch": 0.75, "grad_norm": 6.360714145787995, "learning_rate": 1.5950669962050814e-06, "loss": 0.3688, "step": 26060 }, { "epoch": 0.75, "grad_norm": 5.270467959117782, "learning_rate": 1.594727402433947e-06, "loss": 0.2869, "step": 26061 }, { "epoch": 0.75, "grad_norm": 17.15405808362615, "learning_rate": 1.5943878379579525e-06, "loss": 0.496, "step": 26062 }, { "epoch": 0.75, "grad_norm": 3.088419237438918, "learning_rate": 1.5940483027800214e-06, "loss": 0.3182, "step": 26063 }, { "epoch": 0.75, "grad_norm": 5.828384475454539, "learning_rate": 1.5937087969030718e-06, "loss": 0.9057, "step": 26064 }, { "epoch": 0.75, "grad_norm": 7.098226235680405, "learning_rate": 1.5933693203300281e-06, "loss": 0.5266, "step": 26065 }, { "epoch": 0.75, "grad_norm": 5.370463497291037, "learning_rate": 1.5930298730638083e-06, "loss": 0.628, "step": 26066 }, { "epoch": 0.75, "grad_norm": 6.563084630810247, "learning_rate": 1.592690455107333e-06, "loss": 0.2212, "step": 26067 }, { "epoch": 0.75, "grad_norm": 3.472361857891394, "learning_rate": 1.5923510664635205e-06, "loss": 0.4626, "step": 26068 }, { "epoch": 0.75, "grad_norm": 2.9056044240950416, "learning_rate": 1.5920117071352925e-06, "loss": 0.1181, "step": 26069 }, { "epoch": 0.75, "grad_norm": 4.703000393354239, "learning_rate": 1.5916723771255699e-06, "loss": 0.3638, "step": 26070 }, { "epoch": 0.75, "grad_norm": 4.487130066438796, "learning_rate": 1.5913330764372687e-06, "loss": 0.3435, "step": 26071 }, { "epoch": 0.75, "grad_norm": 8.544369122746835, "learning_rate": 1.590993805073312e-06, "loss": 0.3355, "step": 26072 }, { "epoch": 0.75, "grad_norm": 7.670359661915447, "learning_rate": 1.5906545630366127e-06, "loss": 0.948, "step": 26073 }, { "epoch": 0.75, "grad_norm": 3.175080131660973, "learning_rate": 1.5903153503300945e-06, "loss": 0.2887, "step": 26074 }, { "epoch": 0.75, "grad_norm": 4.490594783885707, "learning_rate": 1.5899761669566716e-06, "loss": 0.4685, "step": 26075 }, { "epoch": 0.75, "grad_norm": 5.844297536402492, "learning_rate": 1.5896370129192639e-06, "loss": 0.4629, "step": 26076 }, { "epoch": 0.75, "grad_norm": 7.972806488969042, "learning_rate": 1.589297888220791e-06, "loss": 0.5331, "step": 26077 }, { "epoch": 0.75, "grad_norm": 4.009996410330214, "learning_rate": 1.5889587928641682e-06, "loss": 0.3047, "step": 26078 }, { "epoch": 0.75, "grad_norm": 4.8221371008987175, "learning_rate": 1.5886197268523128e-06, "loss": 0.611, "step": 26079 }, { "epoch": 0.75, "grad_norm": 6.377286650507063, "learning_rate": 1.5882806901881404e-06, "loss": 0.3706, "step": 26080 }, { "epoch": 0.75, "grad_norm": 7.181388445948087, "learning_rate": 1.5879416828745703e-06, "loss": 0.7282, "step": 26081 }, { "epoch": 0.75, "grad_norm": 6.035895678212132, "learning_rate": 1.587602704914517e-06, "loss": 0.6675, "step": 26082 }, { "epoch": 0.75, "grad_norm": 8.690915979852047, "learning_rate": 1.5872637563108984e-06, "loss": 0.6783, "step": 26083 }, { "epoch": 0.75, "grad_norm": 5.826458096866746, "learning_rate": 1.5869248370666285e-06, "loss": 0.3065, "step": 26084 }, { "epoch": 0.75, "grad_norm": 3.7133634006394716, "learning_rate": 1.5865859471846252e-06, "loss": 0.3149, "step": 26085 }, { "epoch": 0.75, "grad_norm": 7.244491227203035, "learning_rate": 1.5862470866678025e-06, "loss": 0.5844, "step": 26086 }, { "epoch": 0.75, "grad_norm": 8.874764237831876, "learning_rate": 1.5859082555190746e-06, "loss": 0.4709, "step": 26087 }, { "epoch": 0.75, "grad_norm": 8.077283509215498, "learning_rate": 1.5855694537413596e-06, "loss": 0.3499, "step": 26088 }, { "epoch": 0.75, "grad_norm": 5.30979653178767, "learning_rate": 1.585230681337568e-06, "loss": 0.7714, "step": 26089 }, { "epoch": 0.75, "grad_norm": 6.7259349286564065, "learning_rate": 1.5848919383106182e-06, "loss": 0.3403, "step": 26090 }, { "epoch": 0.75, "grad_norm": 8.650002356071372, "learning_rate": 1.584553224663421e-06, "loss": 0.7876, "step": 26091 }, { "epoch": 0.75, "grad_norm": 6.259372588151, "learning_rate": 1.5842145403988935e-06, "loss": 0.6576, "step": 26092 }, { "epoch": 0.75, "grad_norm": 6.888967855404245, "learning_rate": 1.5838758855199476e-06, "loss": 0.4402, "step": 26093 }, { "epoch": 0.75, "grad_norm": 6.29820358610436, "learning_rate": 1.5835372600294952e-06, "loss": 0.4314, "step": 26094 }, { "epoch": 0.75, "grad_norm": 6.0617343773083014, "learning_rate": 1.583198663930453e-06, "loss": 0.7048, "step": 26095 }, { "epoch": 0.75, "grad_norm": 6.237660433859867, "learning_rate": 1.5828600972257302e-06, "loss": 0.4316, "step": 26096 }, { "epoch": 0.75, "grad_norm": 4.395232555802848, "learning_rate": 1.582521559918243e-06, "loss": 0.4549, "step": 26097 }, { "epoch": 0.75, "grad_norm": 3.4687465633340944, "learning_rate": 1.5821830520109005e-06, "loss": 0.1776, "step": 26098 }, { "epoch": 0.75, "grad_norm": 4.074582486909174, "learning_rate": 1.5818445735066174e-06, "loss": 0.3761, "step": 26099 }, { "epoch": 0.75, "grad_norm": 4.863145745162928, "learning_rate": 1.5815061244083052e-06, "loss": 0.4255, "step": 26100 }, { "epoch": 0.75, "grad_norm": 3.859211350650923, "learning_rate": 1.5811677047188734e-06, "loss": 0.3705, "step": 26101 }, { "epoch": 0.75, "grad_norm": 4.166384480775549, "learning_rate": 1.5808293144412362e-06, "loss": 0.3394, "step": 26102 }, { "epoch": 0.75, "grad_norm": 4.936394628095073, "learning_rate": 1.5804909535783014e-06, "loss": 0.664, "step": 26103 }, { "epoch": 0.75, "grad_norm": 2.34801325566922, "learning_rate": 1.5801526221329844e-06, "loss": 0.2594, "step": 26104 }, { "epoch": 0.75, "grad_norm": 6.233356440428201, "learning_rate": 1.5798143201081927e-06, "loss": 0.385, "step": 26105 }, { "epoch": 0.75, "grad_norm": 5.273998903131932, "learning_rate": 1.5794760475068377e-06, "loss": 0.6197, "step": 26106 }, { "epoch": 0.75, "grad_norm": 4.856429995388575, "learning_rate": 1.5791378043318272e-06, "loss": 0.1871, "step": 26107 }, { "epoch": 0.75, "grad_norm": 6.648036796785879, "learning_rate": 1.5787995905860748e-06, "loss": 0.5033, "step": 26108 }, { "epoch": 0.75, "grad_norm": 2.931758992590311, "learning_rate": 1.5784614062724862e-06, "loss": 0.3578, "step": 26109 }, { "epoch": 0.75, "grad_norm": 6.631141425132849, "learning_rate": 1.5781232513939738e-06, "loss": 0.6692, "step": 26110 }, { "epoch": 0.75, "grad_norm": 4.901171199804314, "learning_rate": 1.5777851259534466e-06, "loss": 0.3492, "step": 26111 }, { "epoch": 0.75, "grad_norm": 6.410290146567751, "learning_rate": 1.5774470299538124e-06, "loss": 0.4711, "step": 26112 }, { "epoch": 0.75, "grad_norm": 8.005949073395103, "learning_rate": 1.5771089633979809e-06, "loss": 0.2803, "step": 26113 }, { "epoch": 0.75, "grad_norm": 6.370148439519287, "learning_rate": 1.5767709262888576e-06, "loss": 0.5125, "step": 26114 }, { "epoch": 0.75, "grad_norm": 7.616431033401009, "learning_rate": 1.5764329186293536e-06, "loss": 0.488, "step": 26115 }, { "epoch": 0.75, "grad_norm": 4.337870398984815, "learning_rate": 1.5760949404223746e-06, "loss": 0.3197, "step": 26116 }, { "epoch": 0.75, "grad_norm": 6.152794792959291, "learning_rate": 1.5757569916708293e-06, "loss": 0.3138, "step": 26117 }, { "epoch": 0.75, "grad_norm": 4.359589793611762, "learning_rate": 1.5754190723776263e-06, "loss": 0.322, "step": 26118 }, { "epoch": 0.75, "grad_norm": 2.6471674301580914, "learning_rate": 1.5750811825456714e-06, "loss": 0.3131, "step": 26119 }, { "epoch": 0.75, "grad_norm": 4.723575373235895, "learning_rate": 1.5747433221778718e-06, "loss": 0.4164, "step": 26120 }, { "epoch": 0.75, "grad_norm": 7.509440520433706, "learning_rate": 1.5744054912771316e-06, "loss": 0.3545, "step": 26121 }, { "epoch": 0.75, "grad_norm": 5.796354779449585, "learning_rate": 1.574067689846361e-06, "loss": 0.4757, "step": 26122 }, { "epoch": 0.75, "grad_norm": 4.720135845274203, "learning_rate": 1.5737299178884625e-06, "loss": 0.4841, "step": 26123 }, { "epoch": 0.75, "grad_norm": 5.316144299132407, "learning_rate": 1.5733921754063454e-06, "loss": 0.2522, "step": 26124 }, { "epoch": 0.75, "grad_norm": 3.651015520569059, "learning_rate": 1.573054462402912e-06, "loss": 0.2217, "step": 26125 }, { "epoch": 0.75, "grad_norm": 3.6630346487345147, "learning_rate": 1.5727167788810704e-06, "loss": 0.2979, "step": 26126 }, { "epoch": 0.75, "grad_norm": 4.1610121728526055, "learning_rate": 1.5723791248437247e-06, "loss": 0.1459, "step": 26127 }, { "epoch": 0.75, "grad_norm": 1.6072261037534994, "learning_rate": 1.5720415002937778e-06, "loss": 0.2443, "step": 26128 }, { "epoch": 0.75, "grad_norm": 5.759193285902317, "learning_rate": 1.571703905234137e-06, "loss": 0.5899, "step": 26129 }, { "epoch": 0.75, "grad_norm": 6.148559879311377, "learning_rate": 1.5713663396677043e-06, "loss": 0.6983, "step": 26130 }, { "epoch": 0.75, "grad_norm": 2.509792425702751, "learning_rate": 1.5710288035973863e-06, "loss": 0.159, "step": 26131 }, { "epoch": 0.75, "grad_norm": 3.0462059655839955, "learning_rate": 1.5706912970260852e-06, "loss": 0.4618, "step": 26132 }, { "epoch": 0.75, "grad_norm": 5.635388339589036, "learning_rate": 1.570353819956703e-06, "loss": 0.3964, "step": 26133 }, { "epoch": 0.75, "grad_norm": 6.673128908060988, "learning_rate": 1.5700163723921463e-06, "loss": 0.7393, "step": 26134 }, { "epoch": 0.75, "grad_norm": 3.139437132943339, "learning_rate": 1.5696789543353148e-06, "loss": 0.3491, "step": 26135 }, { "epoch": 0.75, "grad_norm": 3.7342167266121304, "learning_rate": 1.5693415657891148e-06, "loss": 0.272, "step": 26136 }, { "epoch": 0.75, "grad_norm": 7.982529876777323, "learning_rate": 1.5690042067564448e-06, "loss": 0.7865, "step": 26137 }, { "epoch": 0.75, "grad_norm": 12.756273409248639, "learning_rate": 1.5686668772402115e-06, "loss": 0.4538, "step": 26138 }, { "epoch": 0.75, "grad_norm": 6.651808819680395, "learning_rate": 1.5683295772433137e-06, "loss": 0.6119, "step": 26139 }, { "epoch": 0.75, "grad_norm": 8.039469627992512, "learning_rate": 1.5679923067686548e-06, "loss": 0.4112, "step": 26140 }, { "epoch": 0.75, "grad_norm": 6.390857944113671, "learning_rate": 1.5676550658191332e-06, "loss": 0.4167, "step": 26141 }, { "epoch": 0.75, "grad_norm": 5.435535119743397, "learning_rate": 1.5673178543976531e-06, "loss": 0.4127, "step": 26142 }, { "epoch": 0.75, "grad_norm": 9.13931964128371, "learning_rate": 1.5669806725071163e-06, "loss": 0.6559, "step": 26143 }, { "epoch": 0.75, "grad_norm": 2.823332630399086, "learning_rate": 1.5666435201504198e-06, "loss": 0.1506, "step": 26144 }, { "epoch": 0.75, "grad_norm": 2.5713983346659988, "learning_rate": 1.5663063973304688e-06, "loss": 0.1398, "step": 26145 }, { "epoch": 0.75, "grad_norm": 5.215978286142145, "learning_rate": 1.565969304050161e-06, "loss": 0.1348, "step": 26146 }, { "epoch": 0.75, "grad_norm": 5.180611644483286, "learning_rate": 1.5656322403123963e-06, "loss": 0.5925, "step": 26147 }, { "epoch": 0.75, "grad_norm": 6.976405775268432, "learning_rate": 1.5652952061200727e-06, "loss": 0.5131, "step": 26148 }, { "epoch": 0.75, "grad_norm": 5.494443904725397, "learning_rate": 1.5649582014760939e-06, "loss": 0.6401, "step": 26149 }, { "epoch": 0.75, "grad_norm": 4.079859952813744, "learning_rate": 1.5646212263833543e-06, "loss": 0.2497, "step": 26150 }, { "epoch": 0.75, "grad_norm": 3.658709945641921, "learning_rate": 1.5642842808447556e-06, "loss": 0.399, "step": 26151 }, { "epoch": 0.75, "grad_norm": 5.337522972111102, "learning_rate": 1.5639473648631982e-06, "loss": 0.7811, "step": 26152 }, { "epoch": 0.75, "grad_norm": 4.139191174118105, "learning_rate": 1.5636104784415784e-06, "loss": 0.6508, "step": 26153 }, { "epoch": 0.75, "grad_norm": 8.032326325605757, "learning_rate": 1.5632736215827943e-06, "loss": 0.5184, "step": 26154 }, { "epoch": 0.75, "grad_norm": 4.7962107742133195, "learning_rate": 1.5629367942897428e-06, "loss": 0.3184, "step": 26155 }, { "epoch": 0.75, "grad_norm": 3.765544843018433, "learning_rate": 1.5625999965653243e-06, "loss": 0.2463, "step": 26156 }, { "epoch": 0.75, "grad_norm": 3.405974114470864, "learning_rate": 1.5622632284124333e-06, "loss": 0.53, "step": 26157 }, { "epoch": 0.75, "grad_norm": 5.069607161025433, "learning_rate": 1.5619264898339682e-06, "loss": 0.7241, "step": 26158 }, { "epoch": 0.75, "grad_norm": 5.071384404794116, "learning_rate": 1.5615897808328278e-06, "loss": 0.6911, "step": 26159 }, { "epoch": 0.75, "grad_norm": 8.027310811738612, "learning_rate": 1.5612531014119075e-06, "loss": 0.6925, "step": 26160 }, { "epoch": 0.75, "grad_norm": 7.442969755186876, "learning_rate": 1.5609164515741033e-06, "loss": 0.3461, "step": 26161 }, { "epoch": 0.75, "grad_norm": 6.911822905671413, "learning_rate": 1.5605798313223097e-06, "loss": 0.401, "step": 26162 }, { "epoch": 0.75, "grad_norm": 7.343617929123249, "learning_rate": 1.5602432406594264e-06, "loss": 0.6229, "step": 26163 }, { "epoch": 0.75, "grad_norm": 7.965850212711675, "learning_rate": 1.559906679588345e-06, "loss": 0.5183, "step": 26164 }, { "epoch": 0.75, "grad_norm": 4.352926591786573, "learning_rate": 1.559570148111964e-06, "loss": 0.4076, "step": 26165 }, { "epoch": 0.75, "grad_norm": 5.944701395195519, "learning_rate": 1.5592336462331781e-06, "loss": 0.6473, "step": 26166 }, { "epoch": 0.75, "grad_norm": 2.957762009340355, "learning_rate": 1.5588971739548798e-06, "loss": 0.83, "step": 26167 }, { "epoch": 0.75, "grad_norm": 5.900705712348953, "learning_rate": 1.5585607312799672e-06, "loss": 0.6783, "step": 26168 }, { "epoch": 0.75, "grad_norm": 4.610265354868792, "learning_rate": 1.558224318211331e-06, "loss": 0.2651, "step": 26169 }, { "epoch": 0.75, "grad_norm": 7.148797631702868, "learning_rate": 1.5578879347518688e-06, "loss": 0.7171, "step": 26170 }, { "epoch": 0.75, "grad_norm": 4.1684591760950465, "learning_rate": 1.5575515809044711e-06, "loss": 0.3408, "step": 26171 }, { "epoch": 0.75, "grad_norm": 12.662421636160065, "learning_rate": 1.5572152566720355e-06, "loss": 0.5222, "step": 26172 }, { "epoch": 0.75, "grad_norm": 5.688470443263876, "learning_rate": 1.5568789620574525e-06, "loss": 0.6426, "step": 26173 }, { "epoch": 0.75, "grad_norm": 4.8747936351820025, "learning_rate": 1.556542697063615e-06, "loss": 0.4588, "step": 26174 }, { "epoch": 0.75, "grad_norm": 4.550913901298961, "learning_rate": 1.556206461693418e-06, "loss": 0.4681, "step": 26175 }, { "epoch": 0.75, "grad_norm": 6.420773636798078, "learning_rate": 1.5558702559497507e-06, "loss": 0.5838, "step": 26176 }, { "epoch": 0.75, "grad_norm": 5.046187669937463, "learning_rate": 1.5555340798355095e-06, "loss": 0.3397, "step": 26177 }, { "epoch": 0.75, "grad_norm": 5.910964779408949, "learning_rate": 1.5551979333535833e-06, "loss": 0.599, "step": 26178 }, { "epoch": 0.75, "grad_norm": 5.091143122393446, "learning_rate": 1.5548618165068663e-06, "loss": 0.1877, "step": 26179 }, { "epoch": 0.75, "grad_norm": 3.4447026653729425, "learning_rate": 1.5545257292982491e-06, "loss": 0.46, "step": 26180 }, { "epoch": 0.75, "grad_norm": 3.9935939125518605, "learning_rate": 1.5541896717306227e-06, "loss": 0.2121, "step": 26181 }, { "epoch": 0.75, "grad_norm": 4.824967225116634, "learning_rate": 1.553853643806877e-06, "loss": 0.4312, "step": 26182 }, { "epoch": 0.75, "grad_norm": 7.281093644849624, "learning_rate": 1.5535176455299034e-06, "loss": 0.6905, "step": 26183 }, { "epoch": 0.75, "grad_norm": 9.049190929771537, "learning_rate": 1.5531816769025954e-06, "loss": 0.679, "step": 26184 }, { "epoch": 0.75, "grad_norm": 5.068259177568114, "learning_rate": 1.5528457379278394e-06, "loss": 0.4176, "step": 26185 }, { "epoch": 0.75, "grad_norm": 3.351943130042602, "learning_rate": 1.5525098286085282e-06, "loss": 0.3188, "step": 26186 }, { "epoch": 0.75, "grad_norm": 1.722146146856137, "learning_rate": 1.5521739489475513e-06, "loss": 0.1337, "step": 26187 }, { "epoch": 0.75, "grad_norm": 4.0784170331201, "learning_rate": 1.5518380989477967e-06, "loss": 0.1739, "step": 26188 }, { "epoch": 0.75, "grad_norm": 8.508491033673653, "learning_rate": 1.5515022786121531e-06, "loss": 1.0004, "step": 26189 }, { "epoch": 0.75, "grad_norm": 11.861455481059245, "learning_rate": 1.5511664879435123e-06, "loss": 0.272, "step": 26190 }, { "epoch": 0.75, "grad_norm": 6.813966978121306, "learning_rate": 1.5508307269447597e-06, "loss": 0.411, "step": 26191 }, { "epoch": 0.75, "grad_norm": 1.771554135901543, "learning_rate": 1.5504949956187876e-06, "loss": 0.1486, "step": 26192 }, { "epoch": 0.75, "grad_norm": 5.109747380210301, "learning_rate": 1.5501592939684823e-06, "loss": 0.499, "step": 26193 }, { "epoch": 0.75, "grad_norm": 5.399639476114621, "learning_rate": 1.5498236219967306e-06, "loss": 0.3708, "step": 26194 }, { "epoch": 0.75, "grad_norm": 5.6876855337880805, "learning_rate": 1.5494879797064228e-06, "loss": 0.5991, "step": 26195 }, { "epoch": 0.75, "grad_norm": 6.885033793536148, "learning_rate": 1.5491523671004433e-06, "loss": 0.3126, "step": 26196 }, { "epoch": 0.75, "grad_norm": 5.201912770233293, "learning_rate": 1.5488167841816826e-06, "loss": 0.5772, "step": 26197 }, { "epoch": 0.75, "grad_norm": 8.576151136087855, "learning_rate": 1.5484812309530251e-06, "loss": 0.7489, "step": 26198 }, { "epoch": 0.75, "grad_norm": 2.592791012347009, "learning_rate": 1.54814570741736e-06, "loss": 0.2532, "step": 26199 }, { "epoch": 0.75, "grad_norm": 2.3467865156415373, "learning_rate": 1.547810213577572e-06, "loss": 0.2929, "step": 26200 }, { "epoch": 0.75, "grad_norm": 3.919903111282243, "learning_rate": 1.5474747494365466e-06, "loss": 0.6313, "step": 26201 }, { "epoch": 0.75, "grad_norm": 3.6373077964472413, "learning_rate": 1.547139314997172e-06, "loss": 0.197, "step": 26202 }, { "epoch": 0.75, "grad_norm": 4.882376250043094, "learning_rate": 1.5468039102623318e-06, "loss": 0.2695, "step": 26203 }, { "epoch": 0.75, "grad_norm": 5.123797764142534, "learning_rate": 1.546468535234913e-06, "loss": 0.5806, "step": 26204 }, { "epoch": 0.75, "grad_norm": 7.902092967860165, "learning_rate": 1.5461331899177995e-06, "loss": 0.5923, "step": 26205 }, { "epoch": 0.75, "grad_norm": 3.2667359100677857, "learning_rate": 1.545797874313878e-06, "loss": 0.3052, "step": 26206 }, { "epoch": 0.75, "grad_norm": 2.439110883859694, "learning_rate": 1.545462588426032e-06, "loss": 0.189, "step": 26207 }, { "epoch": 0.75, "grad_norm": 4.833695380309453, "learning_rate": 1.5451273322571447e-06, "loss": 0.1657, "step": 26208 }, { "epoch": 0.75, "grad_norm": 3.8351880524296442, "learning_rate": 1.5447921058101034e-06, "loss": 0.5378, "step": 26209 }, { "epoch": 0.75, "grad_norm": 7.628793929484692, "learning_rate": 1.544456909087788e-06, "loss": 0.7304, "step": 26210 }, { "epoch": 0.75, "grad_norm": 9.387540389988713, "learning_rate": 1.5441217420930864e-06, "loss": 1.1112, "step": 26211 }, { "epoch": 0.75, "grad_norm": 6.63698221259631, "learning_rate": 1.5437866048288785e-06, "loss": 0.2349, "step": 26212 }, { "epoch": 0.75, "grad_norm": 6.762874334837135, "learning_rate": 1.5434514972980502e-06, "loss": 0.5007, "step": 26213 }, { "epoch": 0.75, "grad_norm": 5.166535055371084, "learning_rate": 1.5431164195034832e-06, "loss": 0.2953, "step": 26214 }, { "epoch": 0.75, "grad_norm": 16.908253521054835, "learning_rate": 1.5427813714480584e-06, "loss": 0.6295, "step": 26215 }, { "epoch": 0.75, "grad_norm": 7.434469423019147, "learning_rate": 1.5424463531346612e-06, "loss": 0.3339, "step": 26216 }, { "epoch": 0.75, "grad_norm": 8.681392471319844, "learning_rate": 1.5421113645661712e-06, "loss": 0.7491, "step": 26217 }, { "epoch": 0.75, "grad_norm": 2.8278665161032532, "learning_rate": 1.5417764057454727e-06, "loss": 0.135, "step": 26218 }, { "epoch": 0.75, "grad_norm": 3.855851762138227, "learning_rate": 1.5414414766754443e-06, "loss": 0.2108, "step": 26219 }, { "epoch": 0.75, "grad_norm": 3.8922483741903378, "learning_rate": 1.5411065773589723e-06, "loss": 0.4585, "step": 26220 }, { "epoch": 0.75, "grad_norm": 7.21507225645475, "learning_rate": 1.540771707798931e-06, "loss": 0.4503, "step": 26221 }, { "epoch": 0.75, "grad_norm": 7.556906720344817, "learning_rate": 1.5404368679982068e-06, "loss": 0.7187, "step": 26222 }, { "epoch": 0.75, "grad_norm": 5.1462337888916805, "learning_rate": 1.5401020579596765e-06, "loss": 0.6278, "step": 26223 }, { "epoch": 0.75, "grad_norm": 7.647795294665653, "learning_rate": 1.5397672776862227e-06, "loss": 0.6644, "step": 26224 }, { "epoch": 0.75, "grad_norm": 4.196835261062327, "learning_rate": 1.5394325271807259e-06, "loss": 0.2007, "step": 26225 }, { "epoch": 0.75, "grad_norm": 5.716361187909367, "learning_rate": 1.5390978064460654e-06, "loss": 0.5481, "step": 26226 }, { "epoch": 0.75, "grad_norm": 1.4305952582886228, "learning_rate": 1.53876311548512e-06, "loss": 0.1006, "step": 26227 }, { "epoch": 0.75, "grad_norm": 3.9138881317100642, "learning_rate": 1.5384284543007683e-06, "loss": 0.4346, "step": 26228 }, { "epoch": 0.75, "grad_norm": 5.291381913380774, "learning_rate": 1.5380938228958915e-06, "loss": 0.3643, "step": 26229 }, { "epoch": 0.75, "grad_norm": 6.195051228617017, "learning_rate": 1.537759221273366e-06, "loss": 0.3204, "step": 26230 }, { "epoch": 0.75, "grad_norm": 3.902208343081158, "learning_rate": 1.537424649436073e-06, "loss": 0.5852, "step": 26231 }, { "epoch": 0.75, "grad_norm": 5.166552937189079, "learning_rate": 1.5370901073868883e-06, "loss": 0.4163, "step": 26232 }, { "epoch": 0.75, "grad_norm": 4.746632988833492, "learning_rate": 1.5367555951286923e-06, "loss": 0.4151, "step": 26233 }, { "epoch": 0.75, "grad_norm": 4.85606025904451, "learning_rate": 1.5364211126643613e-06, "loss": 0.2137, "step": 26234 }, { "epoch": 0.75, "grad_norm": 3.34198232968885, "learning_rate": 1.5360866599967716e-06, "loss": 0.3393, "step": 26235 }, { "epoch": 0.75, "grad_norm": 4.652470879215072, "learning_rate": 1.5357522371288037e-06, "loss": 0.5159, "step": 26236 }, { "epoch": 0.75, "grad_norm": 4.94471144074389, "learning_rate": 1.5354178440633304e-06, "loss": 0.7436, "step": 26237 }, { "epoch": 0.75, "grad_norm": 3.034327802674064, "learning_rate": 1.5350834808032334e-06, "loss": 0.2202, "step": 26238 }, { "epoch": 0.75, "grad_norm": 7.212592434084207, "learning_rate": 1.5347491473513842e-06, "loss": 0.3361, "step": 26239 }, { "epoch": 0.75, "grad_norm": 8.639495831014177, "learning_rate": 1.5344148437106636e-06, "loss": 0.2003, "step": 26240 }, { "epoch": 0.75, "grad_norm": 5.058662243235776, "learning_rate": 1.5340805698839456e-06, "loss": 0.2029, "step": 26241 }, { "epoch": 0.75, "grad_norm": 2.944478470877607, "learning_rate": 1.5337463258741037e-06, "loss": 0.2194, "step": 26242 }, { "epoch": 0.75, "grad_norm": 3.4047854363482246, "learning_rate": 1.5334121116840168e-06, "loss": 0.3151, "step": 26243 }, { "epoch": 0.75, "grad_norm": 5.390836097896234, "learning_rate": 1.5330779273165575e-06, "loss": 0.5549, "step": 26244 }, { "epoch": 0.75, "grad_norm": 6.422416961150922, "learning_rate": 1.532743772774603e-06, "loss": 0.2561, "step": 26245 }, { "epoch": 0.75, "grad_norm": 3.451443154726915, "learning_rate": 1.5324096480610257e-06, "loss": 0.1986, "step": 26246 }, { "epoch": 0.75, "grad_norm": 6.453025650848336, "learning_rate": 1.5320755531787045e-06, "loss": 0.3328, "step": 26247 }, { "epoch": 0.75, "grad_norm": 4.929182265518456, "learning_rate": 1.531741488130507e-06, "loss": 0.589, "step": 26248 }, { "epoch": 0.75, "grad_norm": 6.4187904245204965, "learning_rate": 1.5314074529193107e-06, "loss": 0.7179, "step": 26249 }, { "epoch": 0.75, "grad_norm": 7.179493699140698, "learning_rate": 1.5310734475479905e-06, "loss": 0.3863, "step": 26250 }, { "epoch": 0.75, "grad_norm": 4.3212973108590065, "learning_rate": 1.5307394720194164e-06, "loss": 0.3708, "step": 26251 }, { "epoch": 0.75, "grad_norm": 7.123740637057367, "learning_rate": 1.5304055263364653e-06, "loss": 0.959, "step": 26252 }, { "epoch": 0.75, "grad_norm": 3.4975530040397538, "learning_rate": 1.5300716105020086e-06, "loss": 0.3372, "step": 26253 }, { "epoch": 0.75, "grad_norm": 11.606463594787346, "learning_rate": 1.5297377245189182e-06, "loss": 0.4705, "step": 26254 }, { "epoch": 0.75, "grad_norm": 9.76892707844818, "learning_rate": 1.5294038683900652e-06, "loss": 0.6814, "step": 26255 }, { "epoch": 0.75, "grad_norm": 3.532530375253542, "learning_rate": 1.5290700421183253e-06, "loss": 0.1971, "step": 26256 }, { "epoch": 0.75, "grad_norm": 6.139565684648391, "learning_rate": 1.5287362457065669e-06, "loss": 0.2438, "step": 26257 }, { "epoch": 0.75, "grad_norm": 3.9430001611518493, "learning_rate": 1.528402479157663e-06, "loss": 0.2235, "step": 26258 }, { "epoch": 0.75, "grad_norm": 5.287766876880563, "learning_rate": 1.5280687424744866e-06, "loss": 0.3733, "step": 26259 }, { "epoch": 0.75, "grad_norm": 4.770905067981765, "learning_rate": 1.527735035659908e-06, "loss": 0.7474, "step": 26260 }, { "epoch": 0.75, "grad_norm": 6.8934060364559455, "learning_rate": 1.5274013587167968e-06, "loss": 0.5274, "step": 26261 }, { "epoch": 0.75, "grad_norm": 3.668158269339069, "learning_rate": 1.5270677116480226e-06, "loss": 0.1647, "step": 26262 }, { "epoch": 0.75, "grad_norm": 4.928325337926068, "learning_rate": 1.526734094456459e-06, "loss": 0.5494, "step": 26263 }, { "epoch": 0.75, "grad_norm": 6.708613071966905, "learning_rate": 1.5264005071449728e-06, "loss": 0.5658, "step": 26264 }, { "epoch": 0.75, "grad_norm": 4.823491586098342, "learning_rate": 1.5260669497164348e-06, "loss": 0.2902, "step": 26265 }, { "epoch": 0.75, "grad_norm": 5.763591748995636, "learning_rate": 1.5257334221737175e-06, "loss": 0.4388, "step": 26266 }, { "epoch": 0.75, "grad_norm": 3.6088134031470545, "learning_rate": 1.5253999245196877e-06, "loss": 0.3128, "step": 26267 }, { "epoch": 0.75, "grad_norm": 7.243506219956674, "learning_rate": 1.5250664567572137e-06, "loss": 0.3704, "step": 26268 }, { "epoch": 0.75, "grad_norm": 7.374564642906662, "learning_rate": 1.5247330188891645e-06, "loss": 0.7197, "step": 26269 }, { "epoch": 0.75, "grad_norm": 7.819523454277886, "learning_rate": 1.5243996109184105e-06, "loss": 0.5519, "step": 26270 }, { "epoch": 0.75, "grad_norm": 4.322281621531911, "learning_rate": 1.5240662328478167e-06, "loss": 0.2157, "step": 26271 }, { "epoch": 0.75, "grad_norm": 6.213739801804056, "learning_rate": 1.5237328846802557e-06, "loss": 0.5226, "step": 26272 }, { "epoch": 0.75, "grad_norm": 4.23701437021303, "learning_rate": 1.5233995664185901e-06, "loss": 0.1239, "step": 26273 }, { "epoch": 0.75, "grad_norm": 3.5186344405555157, "learning_rate": 1.5230662780656924e-06, "loss": 0.2859, "step": 26274 }, { "epoch": 0.75, "grad_norm": 3.5886591803028365, "learning_rate": 1.5227330196244266e-06, "loss": 0.2415, "step": 26275 }, { "epoch": 0.75, "grad_norm": 3.60340215406119, "learning_rate": 1.5223997910976595e-06, "loss": 0.1332, "step": 26276 }, { "epoch": 0.75, "grad_norm": 3.8149041272037048, "learning_rate": 1.52206659248826e-06, "loss": 0.4056, "step": 26277 }, { "epoch": 0.75, "grad_norm": 10.495328863260934, "learning_rate": 1.5217334237990921e-06, "loss": 0.3217, "step": 26278 }, { "epoch": 0.75, "grad_norm": 7.852748921653224, "learning_rate": 1.521400285033025e-06, "loss": 0.5523, "step": 26279 }, { "epoch": 0.75, "grad_norm": 4.669328512277698, "learning_rate": 1.521067176192923e-06, "loss": 0.3834, "step": 26280 }, { "epoch": 0.75, "grad_norm": 6.56205262975332, "learning_rate": 1.5207340972816497e-06, "loss": 0.6273, "step": 26281 }, { "epoch": 0.75, "grad_norm": 4.989674205656003, "learning_rate": 1.5204010483020743e-06, "loss": 0.3283, "step": 26282 }, { "epoch": 0.75, "grad_norm": 6.15746469664898, "learning_rate": 1.520068029257059e-06, "loss": 0.3888, "step": 26283 }, { "epoch": 0.75, "grad_norm": 5.426704630265939, "learning_rate": 1.5197350401494715e-06, "loss": 0.3042, "step": 26284 }, { "epoch": 0.75, "grad_norm": 3.137824218837532, "learning_rate": 1.519402080982173e-06, "loss": 0.4298, "step": 26285 }, { "epoch": 0.75, "grad_norm": 7.78583866235022, "learning_rate": 1.5190691517580314e-06, "loss": 0.2143, "step": 26286 }, { "epoch": 0.75, "grad_norm": 4.146622643341847, "learning_rate": 1.5187362524799098e-06, "loss": 0.3253, "step": 26287 }, { "epoch": 0.75, "grad_norm": 7.895535895416198, "learning_rate": 1.518403383150671e-06, "loss": 0.5127, "step": 26288 }, { "epoch": 0.75, "grad_norm": 9.3206308698997, "learning_rate": 1.5180705437731786e-06, "loss": 0.6325, "step": 26289 }, { "epoch": 0.75, "grad_norm": 7.490415457037391, "learning_rate": 1.5177377343502957e-06, "loss": 0.8235, "step": 26290 }, { "epoch": 0.75, "grad_norm": 7.183629893076644, "learning_rate": 1.517404954884888e-06, "loss": 0.3409, "step": 26291 }, { "epoch": 0.75, "grad_norm": 5.831775319845314, "learning_rate": 1.5170722053798154e-06, "loss": 0.1598, "step": 26292 }, { "epoch": 0.75, "grad_norm": 5.482857888295595, "learning_rate": 1.5167394858379436e-06, "loss": 0.5241, "step": 26293 }, { "epoch": 0.75, "grad_norm": 3.374412644457697, "learning_rate": 1.5164067962621326e-06, "loss": 0.4049, "step": 26294 }, { "epoch": 0.75, "grad_norm": 4.284072669427116, "learning_rate": 1.516074136655245e-06, "loss": 0.2451, "step": 26295 }, { "epoch": 0.75, "grad_norm": 7.756289452660712, "learning_rate": 1.5157415070201414e-06, "loss": 0.2302, "step": 26296 }, { "epoch": 0.75, "grad_norm": 6.083773226923875, "learning_rate": 1.5154089073596861e-06, "loss": 0.4748, "step": 26297 }, { "epoch": 0.75, "grad_norm": 3.952715584885297, "learning_rate": 1.5150763376767375e-06, "loss": 0.6236, "step": 26298 }, { "epoch": 0.75, "grad_norm": 3.9757504036790494, "learning_rate": 1.514743797974158e-06, "loss": 0.4866, "step": 26299 }, { "epoch": 0.75, "grad_norm": 5.624335186136291, "learning_rate": 1.51441128825481e-06, "loss": 0.4283, "step": 26300 }, { "epoch": 0.75, "grad_norm": 4.1316379688128535, "learning_rate": 1.5140788085215525e-06, "loss": 0.4696, "step": 26301 }, { "epoch": 0.75, "grad_norm": 16.183711061413877, "learning_rate": 1.5137463587772462e-06, "loss": 0.128, "step": 26302 }, { "epoch": 0.75, "grad_norm": 8.250408104715072, "learning_rate": 1.5134139390247482e-06, "loss": 1.0243, "step": 26303 }, { "epoch": 0.75, "grad_norm": 8.467280242744271, "learning_rate": 1.5130815492669232e-06, "loss": 0.545, "step": 26304 }, { "epoch": 0.75, "grad_norm": 9.369013464380647, "learning_rate": 1.5127491895066265e-06, "loss": 0.384, "step": 26305 }, { "epoch": 0.75, "grad_norm": 4.921794006650064, "learning_rate": 1.5124168597467192e-06, "loss": 0.4872, "step": 26306 }, { "epoch": 0.75, "grad_norm": 8.076130722291614, "learning_rate": 1.5120845599900635e-06, "loss": 0.5365, "step": 26307 }, { "epoch": 0.75, "grad_norm": 3.3250147475008776, "learning_rate": 1.5117522902395115e-06, "loss": 0.3322, "step": 26308 }, { "epoch": 0.75, "grad_norm": 4.741705679886637, "learning_rate": 1.511420050497927e-06, "loss": 0.4489, "step": 26309 }, { "epoch": 0.75, "grad_norm": 4.528180799541925, "learning_rate": 1.5110878407681644e-06, "loss": 0.2941, "step": 26310 }, { "epoch": 0.75, "grad_norm": 5.579474534345214, "learning_rate": 1.5107556610530848e-06, "loss": 0.7131, "step": 26311 }, { "epoch": 0.75, "grad_norm": 5.593569214526042, "learning_rate": 1.5104235113555438e-06, "loss": 0.7102, "step": 26312 }, { "epoch": 0.75, "grad_norm": 6.481763713787292, "learning_rate": 1.510091391678401e-06, "loss": 0.5437, "step": 26313 }, { "epoch": 0.75, "grad_norm": 6.773185470098506, "learning_rate": 1.5097593020245122e-06, "loss": 0.457, "step": 26314 }, { "epoch": 0.75, "grad_norm": 6.218837181515348, "learning_rate": 1.5094272423967327e-06, "loss": 0.4735, "step": 26315 }, { "epoch": 0.75, "grad_norm": 6.521527075706266, "learning_rate": 1.5090952127979225e-06, "loss": 0.3949, "step": 26316 }, { "epoch": 0.75, "grad_norm": 8.668218333161095, "learning_rate": 1.508763213230935e-06, "loss": 0.5712, "step": 26317 }, { "epoch": 0.75, "grad_norm": 2.7280696867658696, "learning_rate": 1.5084312436986294e-06, "loss": 0.2677, "step": 26318 }, { "epoch": 0.75, "grad_norm": 6.04042833287343, "learning_rate": 1.5080993042038578e-06, "loss": 0.3787, "step": 26319 }, { "epoch": 0.75, "grad_norm": 5.470573012450245, "learning_rate": 1.5077673947494798e-06, "loss": 0.209, "step": 26320 }, { "epoch": 0.75, "grad_norm": 6.96062666587539, "learning_rate": 1.5074355153383492e-06, "loss": 0.6902, "step": 26321 }, { "epoch": 0.75, "grad_norm": 4.6733801978197125, "learning_rate": 1.5071036659733186e-06, "loss": 0.4363, "step": 26322 }, { "epoch": 0.75, "grad_norm": 7.080980478596805, "learning_rate": 1.5067718466572472e-06, "loss": 0.5408, "step": 26323 }, { "epoch": 0.75, "grad_norm": 3.271588442841039, "learning_rate": 1.506440057392986e-06, "loss": 0.5035, "step": 26324 }, { "epoch": 0.75, "grad_norm": 3.3101200019845076, "learning_rate": 1.5061082981833918e-06, "loss": 0.2645, "step": 26325 }, { "epoch": 0.75, "grad_norm": 6.276227128914661, "learning_rate": 1.505776569031317e-06, "loss": 0.2338, "step": 26326 }, { "epoch": 0.75, "grad_norm": 9.79370367627122, "learning_rate": 1.5054448699396173e-06, "loss": 0.7632, "step": 26327 }, { "epoch": 0.75, "grad_norm": 5.777575175490775, "learning_rate": 1.5051132009111451e-06, "loss": 0.588, "step": 26328 }, { "epoch": 0.75, "grad_norm": 3.471015671378421, "learning_rate": 1.5047815619487539e-06, "loss": 0.181, "step": 26329 }, { "epoch": 0.75, "grad_norm": 3.93006892088038, "learning_rate": 1.5044499530552947e-06, "loss": 0.2105, "step": 26330 }, { "epoch": 0.75, "grad_norm": 4.062294822793506, "learning_rate": 1.5041183742336224e-06, "loss": 0.6882, "step": 26331 }, { "epoch": 0.75, "grad_norm": 2.181147732291077, "learning_rate": 1.5037868254865906e-06, "loss": 0.2601, "step": 26332 }, { "epoch": 0.75, "grad_norm": 3.4891390541380445, "learning_rate": 1.5034553068170494e-06, "loss": 0.3285, "step": 26333 }, { "epoch": 0.75, "grad_norm": 6.96932077850112, "learning_rate": 1.503123818227854e-06, "loss": 0.5177, "step": 26334 }, { "epoch": 0.75, "grad_norm": 5.337532307797283, "learning_rate": 1.5027923597218514e-06, "loss": 0.3862, "step": 26335 }, { "epoch": 0.75, "grad_norm": 3.1161462674891656, "learning_rate": 1.5024609313018968e-06, "loss": 0.2587, "step": 26336 }, { "epoch": 0.75, "grad_norm": 2.986631239582662, "learning_rate": 1.5021295329708384e-06, "loss": 0.264, "step": 26337 }, { "epoch": 0.75, "grad_norm": 5.136598882980888, "learning_rate": 1.5017981647315306e-06, "loss": 0.3531, "step": 26338 }, { "epoch": 0.75, "grad_norm": 3.542285756623333, "learning_rate": 1.501466826586821e-06, "loss": 0.4254, "step": 26339 }, { "epoch": 0.75, "grad_norm": 4.5279493741450505, "learning_rate": 1.5011355185395632e-06, "loss": 0.8013, "step": 26340 }, { "epoch": 0.75, "grad_norm": 5.142057456937047, "learning_rate": 1.500804240592606e-06, "loss": 0.2638, "step": 26341 }, { "epoch": 0.75, "grad_norm": 6.0430966975864795, "learning_rate": 1.5004729927487972e-06, "loss": 0.3466, "step": 26342 }, { "epoch": 0.75, "grad_norm": 5.514951886393445, "learning_rate": 1.5001417750109897e-06, "loss": 0.3737, "step": 26343 }, { "epoch": 0.75, "grad_norm": 6.315067109199154, "learning_rate": 1.49981058738203e-06, "loss": 0.3686, "step": 26344 }, { "epoch": 0.75, "grad_norm": 9.165308198549008, "learning_rate": 1.499479429864771e-06, "loss": 0.3849, "step": 26345 }, { "epoch": 0.75, "grad_norm": 7.761635060427927, "learning_rate": 1.4991483024620574e-06, "loss": 0.4186, "step": 26346 }, { "epoch": 0.75, "grad_norm": 8.676170041554386, "learning_rate": 1.4988172051767419e-06, "loss": 0.6387, "step": 26347 }, { "epoch": 0.75, "grad_norm": 5.6200667046766135, "learning_rate": 1.4984861380116705e-06, "loss": 0.3286, "step": 26348 }, { "epoch": 0.75, "grad_norm": 5.433109878945534, "learning_rate": 1.4981551009696904e-06, "loss": 0.4891, "step": 26349 }, { "epoch": 0.75, "grad_norm": 9.794159143174996, "learning_rate": 1.4978240940536526e-06, "loss": 0.4547, "step": 26350 }, { "epoch": 0.75, "grad_norm": 5.002948463847012, "learning_rate": 1.497493117266401e-06, "loss": 0.2104, "step": 26351 }, { "epoch": 0.75, "grad_norm": 5.175388076531681, "learning_rate": 1.4971621706107864e-06, "loss": 0.2705, "step": 26352 }, { "epoch": 0.75, "grad_norm": 4.445083263634337, "learning_rate": 1.4968312540896528e-06, "loss": 0.6141, "step": 26353 }, { "epoch": 0.75, "grad_norm": 3.4351369711929434, "learning_rate": 1.4965003677058503e-06, "loss": 0.2177, "step": 26354 }, { "epoch": 0.75, "grad_norm": 3.644698667616254, "learning_rate": 1.4961695114622238e-06, "loss": 0.3378, "step": 26355 }, { "epoch": 0.75, "grad_norm": 10.35686341646304, "learning_rate": 1.495838685361618e-06, "loss": 0.3196, "step": 26356 }, { "epoch": 0.75, "grad_norm": 2.202994795935532, "learning_rate": 1.495507889406882e-06, "loss": 0.098, "step": 26357 }, { "epoch": 0.75, "grad_norm": 7.859167866034469, "learning_rate": 1.495177123600859e-06, "loss": 0.6275, "step": 26358 }, { "epoch": 0.75, "grad_norm": 2.1570659144185, "learning_rate": 1.494846387946397e-06, "loss": 0.2298, "step": 26359 }, { "epoch": 0.75, "grad_norm": 10.673780011542407, "learning_rate": 1.494515682446338e-06, "loss": 0.2912, "step": 26360 }, { "epoch": 0.75, "grad_norm": 4.946474634814801, "learning_rate": 1.4941850071035307e-06, "loss": 0.4972, "step": 26361 }, { "epoch": 0.75, "grad_norm": 7.312249725493081, "learning_rate": 1.4938543619208183e-06, "loss": 0.5596, "step": 26362 }, { "epoch": 0.75, "grad_norm": 6.653209403717913, "learning_rate": 1.493523746901044e-06, "loss": 0.2531, "step": 26363 }, { "epoch": 0.76, "grad_norm": 3.6690511030795516, "learning_rate": 1.493193162047054e-06, "loss": 0.3707, "step": 26364 }, { "epoch": 0.76, "grad_norm": 3.6779835475275693, "learning_rate": 1.4928626073616904e-06, "loss": 0.0981, "step": 26365 }, { "epoch": 0.76, "grad_norm": 9.16734615466271, "learning_rate": 1.4925320828477991e-06, "loss": 0.8264, "step": 26366 }, { "epoch": 0.76, "grad_norm": 4.450509089377593, "learning_rate": 1.492201588508223e-06, "loss": 0.4584, "step": 26367 }, { "epoch": 0.76, "grad_norm": 8.897629162761145, "learning_rate": 1.4918711243458045e-06, "loss": 1.0515, "step": 26368 }, { "epoch": 0.76, "grad_norm": 3.400367451895031, "learning_rate": 1.491540690363385e-06, "loss": 0.5246, "step": 26369 }, { "epoch": 0.76, "grad_norm": 8.227642050078725, "learning_rate": 1.4912102865638106e-06, "loss": 0.4457, "step": 26370 }, { "epoch": 0.76, "grad_norm": 4.698140363872374, "learning_rate": 1.4908799129499207e-06, "loss": 0.3925, "step": 26371 }, { "epoch": 0.76, "grad_norm": 4.931182586247803, "learning_rate": 1.4905495695245587e-06, "loss": 0.4041, "step": 26372 }, { "epoch": 0.76, "grad_norm": 8.452773476446787, "learning_rate": 1.4902192562905682e-06, "loss": 0.5963, "step": 26373 }, { "epoch": 0.76, "grad_norm": 3.756413316270592, "learning_rate": 1.4898889732507893e-06, "loss": 0.4356, "step": 26374 }, { "epoch": 0.76, "grad_norm": 7.795587773603856, "learning_rate": 1.4895587204080637e-06, "loss": 0.42, "step": 26375 }, { "epoch": 0.76, "grad_norm": 3.1534141793549746, "learning_rate": 1.4892284977652305e-06, "loss": 0.2762, "step": 26376 }, { "epoch": 0.76, "grad_norm": 6.917726604029897, "learning_rate": 1.4888983053251337e-06, "loss": 0.6903, "step": 26377 }, { "epoch": 0.76, "grad_norm": 7.859270038245131, "learning_rate": 1.4885681430906107e-06, "loss": 0.2944, "step": 26378 }, { "epoch": 0.76, "grad_norm": 3.5277343229602804, "learning_rate": 1.4882380110645055e-06, "loss": 0.2594, "step": 26379 }, { "epoch": 0.76, "grad_norm": 6.999967370638412, "learning_rate": 1.487907909249655e-06, "loss": 0.6592, "step": 26380 }, { "epoch": 0.76, "grad_norm": 8.654308555834477, "learning_rate": 1.4875778376489015e-06, "loss": 0.6411, "step": 26381 }, { "epoch": 0.76, "grad_norm": 5.281711490489452, "learning_rate": 1.4872477962650833e-06, "loss": 0.4502, "step": 26382 }, { "epoch": 0.76, "grad_norm": 5.492213459414779, "learning_rate": 1.4869177851010385e-06, "loss": 0.5021, "step": 26383 }, { "epoch": 0.76, "grad_norm": 7.6694854930807885, "learning_rate": 1.4865878041596093e-06, "loss": 0.4017, "step": 26384 }, { "epoch": 0.76, "grad_norm": 5.803255790259585, "learning_rate": 1.4862578534436305e-06, "loss": 0.2805, "step": 26385 }, { "epoch": 0.76, "grad_norm": 4.697228496190643, "learning_rate": 1.4859279329559446e-06, "loss": 0.4923, "step": 26386 }, { "epoch": 0.76, "grad_norm": 3.7382575406047134, "learning_rate": 1.4855980426993865e-06, "loss": 0.5287, "step": 26387 }, { "epoch": 0.76, "grad_norm": 1.0663465126828404, "learning_rate": 1.4852681826767973e-06, "loss": 0.0528, "step": 26388 }, { "epoch": 0.76, "grad_norm": 7.732374761080893, "learning_rate": 1.4849383528910132e-06, "loss": 0.2282, "step": 26389 }, { "epoch": 0.76, "grad_norm": 4.271209721920969, "learning_rate": 1.48460855334487e-06, "loss": 0.5669, "step": 26390 }, { "epoch": 0.76, "grad_norm": 7.12214422717198, "learning_rate": 1.4842787840412082e-06, "loss": 0.7826, "step": 26391 }, { "epoch": 0.76, "grad_norm": 4.758035263083182, "learning_rate": 1.4839490449828614e-06, "loss": 0.3124, "step": 26392 }, { "epoch": 0.76, "grad_norm": 6.000504551971847, "learning_rate": 1.4836193361726697e-06, "loss": 0.5161, "step": 26393 }, { "epoch": 0.76, "grad_norm": 4.8544042990261875, "learning_rate": 1.4832896576134682e-06, "loss": 0.5508, "step": 26394 }, { "epoch": 0.76, "grad_norm": 4.13254879432852, "learning_rate": 1.4829600093080927e-06, "loss": 0.5565, "step": 26395 }, { "epoch": 0.76, "grad_norm": 5.989326420678243, "learning_rate": 1.4826303912593776e-06, "loss": 0.5685, "step": 26396 }, { "epoch": 0.76, "grad_norm": 9.847800810984943, "learning_rate": 1.4823008034701596e-06, "loss": 0.5447, "step": 26397 }, { "epoch": 0.76, "grad_norm": 3.408918367990201, "learning_rate": 1.4819712459432772e-06, "loss": 0.2296, "step": 26398 }, { "epoch": 0.76, "grad_norm": 5.876945437283367, "learning_rate": 1.4816417186815607e-06, "loss": 0.2016, "step": 26399 }, { "epoch": 0.76, "grad_norm": 5.677334430519476, "learning_rate": 1.4813122216878495e-06, "loss": 0.6068, "step": 26400 }, { "epoch": 0.76, "grad_norm": 2.6319589901043816, "learning_rate": 1.4809827549649753e-06, "loss": 0.15, "step": 26401 }, { "epoch": 0.76, "grad_norm": 4.543281157388275, "learning_rate": 1.4806533185157735e-06, "loss": 0.2924, "step": 26402 }, { "epoch": 0.76, "grad_norm": 5.6553499548953825, "learning_rate": 1.4803239123430769e-06, "loss": 0.3259, "step": 26403 }, { "epoch": 0.76, "grad_norm": 4.061364822243916, "learning_rate": 1.4799945364497193e-06, "loss": 0.3845, "step": 26404 }, { "epoch": 0.76, "grad_norm": 2.3031016437893848, "learning_rate": 1.4796651908385373e-06, "loss": 0.0648, "step": 26405 }, { "epoch": 0.76, "grad_norm": 6.565793782115995, "learning_rate": 1.479335875512361e-06, "loss": 0.4167, "step": 26406 }, { "epoch": 0.76, "grad_norm": 3.525742616279064, "learning_rate": 1.4790065904740258e-06, "loss": 0.1395, "step": 26407 }, { "epoch": 0.76, "grad_norm": 6.808321660131799, "learning_rate": 1.478677335726364e-06, "loss": 0.7452, "step": 26408 }, { "epoch": 0.76, "grad_norm": 7.365512240764226, "learning_rate": 1.478348111272207e-06, "loss": 0.6731, "step": 26409 }, { "epoch": 0.76, "grad_norm": 6.205728825163474, "learning_rate": 1.478018917114386e-06, "loss": 0.3637, "step": 26410 }, { "epoch": 0.76, "grad_norm": 5.404352240904391, "learning_rate": 1.4776897532557367e-06, "loss": 0.2385, "step": 26411 }, { "epoch": 0.76, "grad_norm": 6.6027565141541515, "learning_rate": 1.4773606196990864e-06, "loss": 0.4794, "step": 26412 }, { "epoch": 0.76, "grad_norm": 2.8072657516123187, "learning_rate": 1.4770315164472698e-06, "loss": 0.2734, "step": 26413 }, { "epoch": 0.76, "grad_norm": 6.3701939137405565, "learning_rate": 1.4767024435031186e-06, "loss": 0.4927, "step": 26414 }, { "epoch": 0.76, "grad_norm": 4.546303768915073, "learning_rate": 1.476373400869462e-06, "loss": 0.2892, "step": 26415 }, { "epoch": 0.76, "grad_norm": 6.912617091589237, "learning_rate": 1.4760443885491315e-06, "loss": 0.3823, "step": 26416 }, { "epoch": 0.76, "grad_norm": 5.0780373903199845, "learning_rate": 1.475715406544956e-06, "loss": 0.4243, "step": 26417 }, { "epoch": 0.76, "grad_norm": 4.810884192067014, "learning_rate": 1.475386454859768e-06, "loss": 0.2877, "step": 26418 }, { "epoch": 0.76, "grad_norm": 2.2819841653628026, "learning_rate": 1.475057533496395e-06, "loss": 0.1912, "step": 26419 }, { "epoch": 0.76, "grad_norm": 4.684133439709943, "learning_rate": 1.4747286424576697e-06, "loss": 0.2358, "step": 26420 }, { "epoch": 0.76, "grad_norm": 5.996574795093244, "learning_rate": 1.4743997817464183e-06, "loss": 0.7462, "step": 26421 }, { "epoch": 0.76, "grad_norm": 8.669921682503448, "learning_rate": 1.4740709513654727e-06, "loss": 0.5444, "step": 26422 }, { "epoch": 0.76, "grad_norm": 6.35322757306023, "learning_rate": 1.4737421513176614e-06, "loss": 0.5943, "step": 26423 }, { "epoch": 0.76, "grad_norm": 3.5234847647125216, "learning_rate": 1.47341338160581e-06, "loss": 0.394, "step": 26424 }, { "epoch": 0.76, "grad_norm": 3.9564693010484944, "learning_rate": 1.4730846422327506e-06, "loss": 0.4129, "step": 26425 }, { "epoch": 0.76, "grad_norm": 6.451472701272074, "learning_rate": 1.4727559332013086e-06, "loss": 0.5538, "step": 26426 }, { "epoch": 0.76, "grad_norm": 6.479153877401046, "learning_rate": 1.4724272545143142e-06, "loss": 0.3303, "step": 26427 }, { "epoch": 0.76, "grad_norm": 9.64031816238464, "learning_rate": 1.472098606174594e-06, "loss": 0.4509, "step": 26428 }, { "epoch": 0.76, "grad_norm": 8.914389762253725, "learning_rate": 1.4717699881849734e-06, "loss": 0.6331, "step": 26429 }, { "epoch": 0.76, "grad_norm": 6.61177019588086, "learning_rate": 1.471441400548283e-06, "loss": 0.3798, "step": 26430 }, { "epoch": 0.76, "grad_norm": 8.065183030456177, "learning_rate": 1.4711128432673461e-06, "loss": 0.4744, "step": 26431 }, { "epoch": 0.76, "grad_norm": 5.446976198185377, "learning_rate": 1.4707843163449925e-06, "loss": 0.4348, "step": 26432 }, { "epoch": 0.76, "grad_norm": 7.022610625240723, "learning_rate": 1.4704558197840451e-06, "loss": 0.4801, "step": 26433 }, { "epoch": 0.76, "grad_norm": 6.598317015530473, "learning_rate": 1.4701273535873334e-06, "loss": 0.2321, "step": 26434 }, { "epoch": 0.76, "grad_norm": 8.694083758487558, "learning_rate": 1.4697989177576816e-06, "loss": 0.9248, "step": 26435 }, { "epoch": 0.76, "grad_norm": 7.161293484726739, "learning_rate": 1.4694705122979147e-06, "loss": 0.4236, "step": 26436 }, { "epoch": 0.76, "grad_norm": 3.049782935219768, "learning_rate": 1.4691421372108571e-06, "loss": 0.3972, "step": 26437 }, { "epoch": 0.76, "grad_norm": 3.9010352460182776, "learning_rate": 1.4688137924993351e-06, "loss": 0.5748, "step": 26438 }, { "epoch": 0.76, "grad_norm": 4.570821254485916, "learning_rate": 1.4684854781661746e-06, "loss": 0.4205, "step": 26439 }, { "epoch": 0.76, "grad_norm": 5.334045253365743, "learning_rate": 1.468157194214197e-06, "loss": 0.3289, "step": 26440 }, { "epoch": 0.76, "grad_norm": 5.8291746324435225, "learning_rate": 1.4678289406462303e-06, "loss": 0.3389, "step": 26441 }, { "epoch": 0.76, "grad_norm": 8.294061100981471, "learning_rate": 1.4675007174650957e-06, "loss": 0.4833, "step": 26442 }, { "epoch": 0.76, "grad_norm": 6.693409207428209, "learning_rate": 1.467172524673618e-06, "loss": 0.4454, "step": 26443 }, { "epoch": 0.76, "grad_norm": 4.765983668103652, "learning_rate": 1.466844362274618e-06, "loss": 0.2904, "step": 26444 }, { "epoch": 0.76, "grad_norm": 9.208576705501295, "learning_rate": 1.466516230270923e-06, "loss": 0.6534, "step": 26445 }, { "epoch": 0.76, "grad_norm": 4.391572870318143, "learning_rate": 1.4661881286653524e-06, "loss": 0.5673, "step": 26446 }, { "epoch": 0.76, "grad_norm": 6.303032110586051, "learning_rate": 1.4658600574607297e-06, "loss": 0.4729, "step": 26447 }, { "epoch": 0.76, "grad_norm": 5.675764457956088, "learning_rate": 1.4655320166598796e-06, "loss": 0.8088, "step": 26448 }, { "epoch": 0.76, "grad_norm": 4.718870578261079, "learning_rate": 1.465204006265622e-06, "loss": 0.281, "step": 26449 }, { "epoch": 0.76, "grad_norm": 6.473349479428606, "learning_rate": 1.4648760262807798e-06, "loss": 0.4751, "step": 26450 }, { "epoch": 0.76, "grad_norm": 6.1715520074082875, "learning_rate": 1.4645480767081717e-06, "loss": 0.7669, "step": 26451 }, { "epoch": 0.76, "grad_norm": 5.358609959574641, "learning_rate": 1.4642201575506231e-06, "loss": 0.2457, "step": 26452 }, { "epoch": 0.76, "grad_norm": 6.28247396206422, "learning_rate": 1.4638922688109514e-06, "loss": 0.4759, "step": 26453 }, { "epoch": 0.76, "grad_norm": 5.914520004092868, "learning_rate": 1.463564410491981e-06, "loss": 0.3865, "step": 26454 }, { "epoch": 0.76, "grad_norm": 7.900706402063762, "learning_rate": 1.4632365825965305e-06, "loss": 0.3258, "step": 26455 }, { "epoch": 0.76, "grad_norm": 6.521429097643088, "learning_rate": 1.4629087851274187e-06, "loss": 0.812, "step": 26456 }, { "epoch": 0.76, "grad_norm": 3.5080375004507123, "learning_rate": 1.4625810180874684e-06, "loss": 0.2151, "step": 26457 }, { "epoch": 0.76, "grad_norm": 13.029947302675874, "learning_rate": 1.4622532814794965e-06, "loss": 0.8692, "step": 26458 }, { "epoch": 0.76, "grad_norm": 6.304148938174468, "learning_rate": 1.4619255753063261e-06, "loss": 0.666, "step": 26459 }, { "epoch": 0.76, "grad_norm": 2.7492013118241796, "learning_rate": 1.461597899570772e-06, "loss": 0.3745, "step": 26460 }, { "epoch": 0.76, "grad_norm": 3.938320922046358, "learning_rate": 1.4612702542756573e-06, "loss": 0.1624, "step": 26461 }, { "epoch": 0.76, "grad_norm": 3.893548166055722, "learning_rate": 1.4609426394237996e-06, "loss": 0.2651, "step": 26462 }, { "epoch": 0.76, "grad_norm": 6.396413182452097, "learning_rate": 1.4606150550180142e-06, "loss": 0.5259, "step": 26463 }, { "epoch": 0.76, "grad_norm": 2.631627481163601, "learning_rate": 1.460287501061124e-06, "loss": 0.359, "step": 26464 }, { "epoch": 0.76, "grad_norm": 4.544553445883484, "learning_rate": 1.4599599775559426e-06, "loss": 0.421, "step": 26465 }, { "epoch": 0.76, "grad_norm": 4.467055232787484, "learning_rate": 1.4596324845052912e-06, "loss": 0.5574, "step": 26466 }, { "epoch": 0.76, "grad_norm": 4.707248514946568, "learning_rate": 1.4593050219119841e-06, "loss": 0.3772, "step": 26467 }, { "epoch": 0.76, "grad_norm": 3.6843484892146163, "learning_rate": 1.4589775897788415e-06, "loss": 0.2856, "step": 26468 }, { "epoch": 0.76, "grad_norm": 10.390026746463953, "learning_rate": 1.458650188108679e-06, "loss": 0.3229, "step": 26469 }, { "epoch": 0.76, "grad_norm": 3.405907044950491, "learning_rate": 1.4583228169043113e-06, "loss": 0.3273, "step": 26470 }, { "epoch": 0.76, "grad_norm": 6.195009933722338, "learning_rate": 1.4579954761685578e-06, "loss": 0.4896, "step": 26471 }, { "epoch": 0.76, "grad_norm": 8.311637833706346, "learning_rate": 1.4576681659042312e-06, "loss": 0.5102, "step": 26472 }, { "epoch": 0.76, "grad_norm": 4.846276737874371, "learning_rate": 1.4573408861141513e-06, "loss": 0.6701, "step": 26473 }, { "epoch": 0.76, "grad_norm": 7.4500928104943265, "learning_rate": 1.4570136368011296e-06, "loss": 0.4932, "step": 26474 }, { "epoch": 0.76, "grad_norm": 6.246135165212622, "learning_rate": 1.4566864179679852e-06, "loss": 0.6121, "step": 26475 }, { "epoch": 0.76, "grad_norm": 3.192203268123302, "learning_rate": 1.456359229617531e-06, "loss": 0.5512, "step": 26476 }, { "epoch": 0.76, "grad_norm": 7.3949404289420295, "learning_rate": 1.4560320717525827e-06, "loss": 0.7947, "step": 26477 }, { "epoch": 0.76, "grad_norm": 8.82790167872093, "learning_rate": 1.4557049443759518e-06, "loss": 0.7228, "step": 26478 }, { "epoch": 0.76, "grad_norm": 2.434968195909223, "learning_rate": 1.455377847490455e-06, "loss": 0.2046, "step": 26479 }, { "epoch": 0.76, "grad_norm": 4.887828527077595, "learning_rate": 1.4550507810989084e-06, "loss": 0.2742, "step": 26480 }, { "epoch": 0.76, "grad_norm": 5.507780250833669, "learning_rate": 1.4547237452041213e-06, "loss": 0.3036, "step": 26481 }, { "epoch": 0.76, "grad_norm": 4.729141969098147, "learning_rate": 1.4543967398089126e-06, "loss": 0.3776, "step": 26482 }, { "epoch": 0.76, "grad_norm": 5.549700208248029, "learning_rate": 1.454069764916089e-06, "loss": 0.5811, "step": 26483 }, { "epoch": 0.76, "grad_norm": 4.643696343380207, "learning_rate": 1.4537428205284681e-06, "loss": 0.2494, "step": 26484 }, { "epoch": 0.76, "grad_norm": 2.906980238202679, "learning_rate": 1.4534159066488597e-06, "loss": 0.2938, "step": 26485 }, { "epoch": 0.76, "grad_norm": 5.8124133688100565, "learning_rate": 1.4530890232800793e-06, "loss": 0.4414, "step": 26486 }, { "epoch": 0.76, "grad_norm": 5.934713010718571, "learning_rate": 1.4527621704249355e-06, "loss": 0.7869, "step": 26487 }, { "epoch": 0.76, "grad_norm": 30.832765271564302, "learning_rate": 1.4524353480862436e-06, "loss": 0.5224, "step": 26488 }, { "epoch": 0.76, "grad_norm": 9.580247902021991, "learning_rate": 1.4521085562668136e-06, "loss": 0.5424, "step": 26489 }, { "epoch": 0.76, "grad_norm": 5.171612044273488, "learning_rate": 1.451781794969455e-06, "loss": 0.3691, "step": 26490 }, { "epoch": 0.76, "grad_norm": 3.529522414144259, "learning_rate": 1.4514550641969833e-06, "loss": 0.2656, "step": 26491 }, { "epoch": 0.76, "grad_norm": 6.837266053934508, "learning_rate": 1.4511283639522044e-06, "loss": 0.8488, "step": 26492 }, { "epoch": 0.76, "grad_norm": 8.804011592290614, "learning_rate": 1.4508016942379328e-06, "loss": 0.5849, "step": 26493 }, { "epoch": 0.76, "grad_norm": 5.08313954353949, "learning_rate": 1.4504750550569756e-06, "loss": 0.3647, "step": 26494 }, { "epoch": 0.76, "grad_norm": 4.910083706486042, "learning_rate": 1.4501484464121468e-06, "loss": 0.3212, "step": 26495 }, { "epoch": 0.76, "grad_norm": 6.605986969041344, "learning_rate": 1.4498218683062532e-06, "loss": 0.3572, "step": 26496 }, { "epoch": 0.76, "grad_norm": 4.94151198684889, "learning_rate": 1.4494953207421036e-06, "loss": 0.5045, "step": 26497 }, { "epoch": 0.76, "grad_norm": 4.308378378499193, "learning_rate": 1.4491688037225098e-06, "loss": 0.4328, "step": 26498 }, { "epoch": 0.76, "grad_norm": 6.405445104137315, "learning_rate": 1.4488423172502786e-06, "loss": 0.3925, "step": 26499 }, { "epoch": 0.76, "grad_norm": 7.105714463425622, "learning_rate": 1.4485158613282208e-06, "loss": 0.6373, "step": 26500 }, { "epoch": 0.76, "grad_norm": 3.8502051880889367, "learning_rate": 1.4481894359591425e-06, "loss": 0.3071, "step": 26501 }, { "epoch": 0.76, "grad_norm": 5.551814692465053, "learning_rate": 1.4478630411458544e-06, "loss": 0.3004, "step": 26502 }, { "epoch": 0.76, "grad_norm": 3.8240949320401776, "learning_rate": 1.4475366768911635e-06, "loss": 0.4433, "step": 26503 }, { "epoch": 0.76, "grad_norm": 5.631612768988741, "learning_rate": 1.4472103431978756e-06, "loss": 0.5059, "step": 26504 }, { "epoch": 0.76, "grad_norm": 15.073258352811626, "learning_rate": 1.446884040068801e-06, "loss": 0.3226, "step": 26505 }, { "epoch": 0.76, "grad_norm": 1.4431335752661125, "learning_rate": 1.4465577675067443e-06, "loss": 0.0789, "step": 26506 }, { "epoch": 0.76, "grad_norm": 3.9436847161452917, "learning_rate": 1.4462315255145155e-06, "loss": 0.3549, "step": 26507 }, { "epoch": 0.76, "grad_norm": 3.594215396102934, "learning_rate": 1.445905314094917e-06, "loss": 0.4556, "step": 26508 }, { "epoch": 0.76, "grad_norm": 3.305094556335932, "learning_rate": 1.4455791332507607e-06, "loss": 0.2663, "step": 26509 }, { "epoch": 0.76, "grad_norm": 5.439097027448492, "learning_rate": 1.4452529829848465e-06, "loss": 0.4555, "step": 26510 }, { "epoch": 0.76, "grad_norm": 7.292802691293071, "learning_rate": 1.4449268632999836e-06, "loss": 0.2504, "step": 26511 }, { "epoch": 0.76, "grad_norm": 3.4567140422917104, "learning_rate": 1.4446007741989788e-06, "loss": 0.487, "step": 26512 }, { "epoch": 0.76, "grad_norm": 5.43497833886344, "learning_rate": 1.4442747156846338e-06, "loss": 0.7039, "step": 26513 }, { "epoch": 0.76, "grad_norm": 4.623777447031466, "learning_rate": 1.443948687759757e-06, "loss": 0.3556, "step": 26514 }, { "epoch": 0.76, "grad_norm": 3.3418496872938683, "learning_rate": 1.4436226904271521e-06, "loss": 0.4048, "step": 26515 }, { "epoch": 0.76, "grad_norm": 3.622653892570619, "learning_rate": 1.4432967236896234e-06, "loss": 0.4433, "step": 26516 }, { "epoch": 0.76, "grad_norm": 2.977177373701316, "learning_rate": 1.4429707875499732e-06, "loss": 0.342, "step": 26517 }, { "epoch": 0.76, "grad_norm": 4.967478799453135, "learning_rate": 1.4426448820110095e-06, "loss": 0.3009, "step": 26518 }, { "epoch": 0.76, "grad_norm": 4.478367147134646, "learning_rate": 1.4423190070755316e-06, "loss": 0.4539, "step": 26519 }, { "epoch": 0.76, "grad_norm": 3.151715165139468, "learning_rate": 1.4419931627463456e-06, "loss": 0.3416, "step": 26520 }, { "epoch": 0.76, "grad_norm": 6.6764272485519, "learning_rate": 1.4416673490262563e-06, "loss": 0.6215, "step": 26521 }, { "epoch": 0.76, "grad_norm": 4.172361195554262, "learning_rate": 1.441341565918064e-06, "loss": 0.4973, "step": 26522 }, { "epoch": 0.76, "grad_norm": 3.644607068874904, "learning_rate": 1.4410158134245729e-06, "loss": 0.4539, "step": 26523 }, { "epoch": 0.76, "grad_norm": 6.570312645149003, "learning_rate": 1.4406900915485821e-06, "loss": 1.0941, "step": 26524 }, { "epoch": 0.76, "grad_norm": 4.911611458207382, "learning_rate": 1.4403644002928979e-06, "loss": 0.5608, "step": 26525 }, { "epoch": 0.76, "grad_norm": 3.997012341356444, "learning_rate": 1.4400387396603194e-06, "loss": 0.2471, "step": 26526 }, { "epoch": 0.76, "grad_norm": 7.370375395573802, "learning_rate": 1.4397131096536504e-06, "loss": 0.6421, "step": 26527 }, { "epoch": 0.76, "grad_norm": 4.679346814117183, "learning_rate": 1.43938751027569e-06, "loss": 0.1732, "step": 26528 }, { "epoch": 0.76, "grad_norm": 3.2567730532956403, "learning_rate": 1.4390619415292417e-06, "loss": 0.3073, "step": 26529 }, { "epoch": 0.76, "grad_norm": 3.4534835629268787, "learning_rate": 1.4387364034171054e-06, "loss": 0.3391, "step": 26530 }, { "epoch": 0.76, "grad_norm": 5.600508310034436, "learning_rate": 1.4384108959420795e-06, "loss": 0.4958, "step": 26531 }, { "epoch": 0.76, "grad_norm": 5.2095926771354995, "learning_rate": 1.4380854191069682e-06, "loss": 0.5674, "step": 26532 }, { "epoch": 0.76, "grad_norm": 5.024234726266043, "learning_rate": 1.4377599729145674e-06, "loss": 0.2216, "step": 26533 }, { "epoch": 0.76, "grad_norm": 4.278884456927967, "learning_rate": 1.4374345573676806e-06, "loss": 0.5553, "step": 26534 }, { "epoch": 0.76, "grad_norm": 3.833185237981568, "learning_rate": 1.4371091724691044e-06, "loss": 0.2808, "step": 26535 }, { "epoch": 0.76, "grad_norm": 4.6667899842090454, "learning_rate": 1.4367838182216403e-06, "loss": 0.5477, "step": 26536 }, { "epoch": 0.76, "grad_norm": 6.386708633461502, "learning_rate": 1.436458494628087e-06, "loss": 0.391, "step": 26537 }, { "epoch": 0.76, "grad_norm": 3.957719726017315, "learning_rate": 1.4361332016912405e-06, "loss": 0.4939, "step": 26538 }, { "epoch": 0.76, "grad_norm": 5.97605050081723, "learning_rate": 1.435807939413903e-06, "loss": 0.3045, "step": 26539 }, { "epoch": 0.76, "grad_norm": 14.789006897609712, "learning_rate": 1.4354827077988693e-06, "loss": 0.6355, "step": 26540 }, { "epoch": 0.76, "grad_norm": 3.778505148745435, "learning_rate": 1.4351575068489403e-06, "loss": 0.5638, "step": 26541 }, { "epoch": 0.76, "grad_norm": 6.846076395739123, "learning_rate": 1.4348323365669126e-06, "loss": 0.4672, "step": 26542 }, { "epoch": 0.76, "grad_norm": 5.0373178226669015, "learning_rate": 1.4345071969555834e-06, "loss": 0.485, "step": 26543 }, { "epoch": 0.76, "grad_norm": 3.640743810327674, "learning_rate": 1.434182088017748e-06, "loss": 0.1579, "step": 26544 }, { "epoch": 0.76, "grad_norm": 5.819776922584739, "learning_rate": 1.4338570097562056e-06, "loss": 0.6111, "step": 26545 }, { "epoch": 0.76, "grad_norm": 7.501989990715848, "learning_rate": 1.4335319621737536e-06, "loss": 0.7789, "step": 26546 }, { "epoch": 0.76, "grad_norm": 12.1652513142382, "learning_rate": 1.4332069452731857e-06, "loss": 0.8254, "step": 26547 }, { "epoch": 0.76, "grad_norm": 3.46426379866824, "learning_rate": 1.4328819590573e-06, "loss": 0.2683, "step": 26548 }, { "epoch": 0.76, "grad_norm": 5.3996123210063045, "learning_rate": 1.432557003528892e-06, "loss": 0.5367, "step": 26549 }, { "epoch": 0.76, "grad_norm": 5.341888460423747, "learning_rate": 1.4322320786907574e-06, "loss": 0.2093, "step": 26550 }, { "epoch": 0.76, "grad_norm": 2.4383111728835423, "learning_rate": 1.431907184545689e-06, "loss": 0.1098, "step": 26551 }, { "epoch": 0.76, "grad_norm": 4.834268284947096, "learning_rate": 1.4315823210964835e-06, "loss": 0.4175, "step": 26552 }, { "epoch": 0.76, "grad_norm": 4.6451673785912355, "learning_rate": 1.431257488345938e-06, "loss": 0.2858, "step": 26553 }, { "epoch": 0.76, "grad_norm": 6.8505438957637255, "learning_rate": 1.430932686296843e-06, "loss": 0.5997, "step": 26554 }, { "epoch": 0.76, "grad_norm": 8.268252094066801, "learning_rate": 1.4306079149519964e-06, "loss": 0.4049, "step": 26555 }, { "epoch": 0.76, "grad_norm": 3.1047473381431, "learning_rate": 1.4302831743141904e-06, "loss": 0.1753, "step": 26556 }, { "epoch": 0.76, "grad_norm": 6.914623311030563, "learning_rate": 1.4299584643862186e-06, "loss": 0.8528, "step": 26557 }, { "epoch": 0.76, "grad_norm": 4.010662589851887, "learning_rate": 1.4296337851708735e-06, "loss": 0.5891, "step": 26558 }, { "epoch": 0.76, "grad_norm": 3.811559811339619, "learning_rate": 1.4293091366709506e-06, "loss": 0.3176, "step": 26559 }, { "epoch": 0.76, "grad_norm": 1.5409442802903563, "learning_rate": 1.42898451888924e-06, "loss": 0.1275, "step": 26560 }, { "epoch": 0.76, "grad_norm": 4.784529265764021, "learning_rate": 1.4286599318285354e-06, "loss": 0.4607, "step": 26561 }, { "epoch": 0.76, "grad_norm": 6.30997595983481, "learning_rate": 1.4283353754916317e-06, "loss": 0.5084, "step": 26562 }, { "epoch": 0.76, "grad_norm": 5.957024145903807, "learning_rate": 1.4280108498813194e-06, "loss": 0.5553, "step": 26563 }, { "epoch": 0.76, "grad_norm": 8.963223020367273, "learning_rate": 1.4276863550003895e-06, "loss": 0.6921, "step": 26564 }, { "epoch": 0.76, "grad_norm": 5.502334532756416, "learning_rate": 1.4273618908516322e-06, "loss": 0.7658, "step": 26565 }, { "epoch": 0.76, "grad_norm": 3.5643982765471844, "learning_rate": 1.427037457437843e-06, "loss": 0.476, "step": 26566 }, { "epoch": 0.76, "grad_norm": 6.511982876387256, "learning_rate": 1.4267130547618085e-06, "loss": 0.6949, "step": 26567 }, { "epoch": 0.76, "grad_norm": 7.027913230056392, "learning_rate": 1.4263886828263235e-06, "loss": 0.4536, "step": 26568 }, { "epoch": 0.76, "grad_norm": 7.088766792827714, "learning_rate": 1.4260643416341769e-06, "loss": 0.6132, "step": 26569 }, { "epoch": 0.76, "grad_norm": 4.62500962694558, "learning_rate": 1.4257400311881563e-06, "loss": 0.5152, "step": 26570 }, { "epoch": 0.76, "grad_norm": 6.25173177568598, "learning_rate": 1.4254157514910566e-06, "loss": 0.2008, "step": 26571 }, { "epoch": 0.76, "grad_norm": 46.084584629024135, "learning_rate": 1.425091502545663e-06, "loss": 0.669, "step": 26572 }, { "epoch": 0.76, "grad_norm": 2.975969731137203, "learning_rate": 1.4247672843547688e-06, "loss": 0.3263, "step": 26573 }, { "epoch": 0.76, "grad_norm": 4.990516968132902, "learning_rate": 1.4244430969211597e-06, "loss": 0.4249, "step": 26574 }, { "epoch": 0.76, "grad_norm": 5.8630366366479745, "learning_rate": 1.4241189402476285e-06, "loss": 0.5232, "step": 26575 }, { "epoch": 0.76, "grad_norm": 6.07026521131637, "learning_rate": 1.4237948143369612e-06, "loss": 0.4183, "step": 26576 }, { "epoch": 0.76, "grad_norm": 5.563620765160416, "learning_rate": 1.4234707191919455e-06, "loss": 0.3032, "step": 26577 }, { "epoch": 0.76, "grad_norm": 6.148366072165908, "learning_rate": 1.4231466548153721e-06, "loss": 0.3117, "step": 26578 }, { "epoch": 0.76, "grad_norm": 7.014613225148289, "learning_rate": 1.4228226212100266e-06, "loss": 0.3297, "step": 26579 }, { "epoch": 0.76, "grad_norm": 3.5675964709192076, "learning_rate": 1.4224986183786987e-06, "loss": 0.4346, "step": 26580 }, { "epoch": 0.76, "grad_norm": 5.373046497828901, "learning_rate": 1.4221746463241738e-06, "loss": 0.5541, "step": 26581 }, { "epoch": 0.76, "grad_norm": 6.157749980850161, "learning_rate": 1.4218507050492409e-06, "loss": 0.3867, "step": 26582 }, { "epoch": 0.76, "grad_norm": 7.188395502446785, "learning_rate": 1.4215267945566858e-06, "loss": 0.49, "step": 26583 }, { "epoch": 0.76, "grad_norm": 1.6332012334345967, "learning_rate": 1.4212029148492956e-06, "loss": 0.2763, "step": 26584 }, { "epoch": 0.76, "grad_norm": 8.653623409665022, "learning_rate": 1.4208790659298537e-06, "loss": 0.4262, "step": 26585 }, { "epoch": 0.76, "grad_norm": 5.477385846771844, "learning_rate": 1.4205552478011491e-06, "loss": 0.3568, "step": 26586 }, { "epoch": 0.76, "grad_norm": 7.428149075081936, "learning_rate": 1.4202314604659689e-06, "loss": 0.6676, "step": 26587 }, { "epoch": 0.76, "grad_norm": 6.296340238141495, "learning_rate": 1.4199077039270948e-06, "loss": 0.6059, "step": 26588 }, { "epoch": 0.76, "grad_norm": 2.473963568163236, "learning_rate": 1.419583978187315e-06, "loss": 0.3094, "step": 26589 }, { "epoch": 0.76, "grad_norm": 4.994072165857291, "learning_rate": 1.419260283249414e-06, "loss": 0.4995, "step": 26590 }, { "epoch": 0.76, "grad_norm": 5.230413252840205, "learning_rate": 1.4189366191161757e-06, "loss": 0.3453, "step": 26591 }, { "epoch": 0.76, "grad_norm": 5.392135054787311, "learning_rate": 1.418612985790383e-06, "loss": 0.4564, "step": 26592 }, { "epoch": 0.76, "grad_norm": 2.423315727179987, "learning_rate": 1.418289383274824e-06, "loss": 0.1118, "step": 26593 }, { "epoch": 0.76, "grad_norm": 2.5684719220584036, "learning_rate": 1.4179658115722783e-06, "loss": 0.0961, "step": 26594 }, { "epoch": 0.76, "grad_norm": 4.11335267442384, "learning_rate": 1.417642270685532e-06, "loss": 0.3143, "step": 26595 }, { "epoch": 0.76, "grad_norm": 10.810789866072753, "learning_rate": 1.417318760617371e-06, "loss": 1.0876, "step": 26596 }, { "epoch": 0.76, "grad_norm": 3.620860762029488, "learning_rate": 1.4169952813705728e-06, "loss": 0.2824, "step": 26597 }, { "epoch": 0.76, "grad_norm": 1.884249150959141, "learning_rate": 1.4166718329479245e-06, "loss": 0.222, "step": 26598 }, { "epoch": 0.76, "grad_norm": 6.020056858174438, "learning_rate": 1.4163484153522055e-06, "loss": 0.4686, "step": 26599 }, { "epoch": 0.76, "grad_norm": 8.574469727338334, "learning_rate": 1.4160250285862015e-06, "loss": 0.6022, "step": 26600 }, { "epoch": 0.76, "grad_norm": 6.43525058371075, "learning_rate": 1.4157016726526912e-06, "loss": 0.4549, "step": 26601 }, { "epoch": 0.76, "grad_norm": 4.033607207738904, "learning_rate": 1.4153783475544603e-06, "loss": 0.2071, "step": 26602 }, { "epoch": 0.76, "grad_norm": 7.052335125153105, "learning_rate": 1.4150550532942875e-06, "loss": 0.692, "step": 26603 }, { "epoch": 0.76, "grad_norm": 6.0218658573890895, "learning_rate": 1.414731789874953e-06, "loss": 0.4104, "step": 26604 }, { "epoch": 0.76, "grad_norm": 10.55509341438479, "learning_rate": 1.4144085572992417e-06, "loss": 0.7211, "step": 26605 }, { "epoch": 0.76, "grad_norm": 5.315801783721888, "learning_rate": 1.4140853555699302e-06, "loss": 0.5597, "step": 26606 }, { "epoch": 0.76, "grad_norm": 3.141309654042833, "learning_rate": 1.4137621846898026e-06, "loss": 0.3255, "step": 26607 }, { "epoch": 0.76, "grad_norm": 4.343858237255905, "learning_rate": 1.4134390446616353e-06, "loss": 0.4383, "step": 26608 }, { "epoch": 0.76, "grad_norm": 4.895097655626569, "learning_rate": 1.4131159354882124e-06, "loss": 0.3435, "step": 26609 }, { "epoch": 0.76, "grad_norm": 6.637346459173576, "learning_rate": 1.412792857172311e-06, "loss": 0.6441, "step": 26610 }, { "epoch": 0.76, "grad_norm": 6.647125150477708, "learning_rate": 1.41246980971671e-06, "loss": 0.4546, "step": 26611 }, { "epoch": 0.76, "grad_norm": 4.2854676459505, "learning_rate": 1.4121467931241905e-06, "loss": 0.2193, "step": 26612 }, { "epoch": 0.76, "grad_norm": 5.726182870394356, "learning_rate": 1.4118238073975293e-06, "loss": 0.5153, "step": 26613 }, { "epoch": 0.76, "grad_norm": 4.079930282258909, "learning_rate": 1.4115008525395073e-06, "loss": 0.3654, "step": 26614 }, { "epoch": 0.76, "grad_norm": 3.714433071729426, "learning_rate": 1.4111779285529005e-06, "loss": 0.3901, "step": 26615 }, { "epoch": 0.76, "grad_norm": 6.067565532089954, "learning_rate": 1.4108550354404893e-06, "loss": 0.5426, "step": 26616 }, { "epoch": 0.76, "grad_norm": 3.7161145528623196, "learning_rate": 1.4105321732050504e-06, "loss": 0.1889, "step": 26617 }, { "epoch": 0.76, "grad_norm": 3.068258030102228, "learning_rate": 1.4102093418493595e-06, "loss": 0.2889, "step": 26618 }, { "epoch": 0.76, "grad_norm": 4.9626741748674785, "learning_rate": 1.4098865413761974e-06, "loss": 0.4165, "step": 26619 }, { "epoch": 0.76, "grad_norm": 4.158394253245673, "learning_rate": 1.4095637717883382e-06, "loss": 0.3091, "step": 26620 }, { "epoch": 0.76, "grad_norm": 6.130067247329927, "learning_rate": 1.4092410330885608e-06, "loss": 0.4268, "step": 26621 }, { "epoch": 0.76, "grad_norm": 3.8868926502985732, "learning_rate": 1.4089183252796395e-06, "loss": 0.2778, "step": 26622 }, { "epoch": 0.76, "grad_norm": 3.8189705205949758, "learning_rate": 1.4085956483643532e-06, "loss": 0.1525, "step": 26623 }, { "epoch": 0.76, "grad_norm": 9.655999695826877, "learning_rate": 1.408273002345476e-06, "loss": 0.3474, "step": 26624 }, { "epoch": 0.76, "grad_norm": 10.620572772196493, "learning_rate": 1.4079503872257843e-06, "loss": 0.5078, "step": 26625 }, { "epoch": 0.76, "grad_norm": 3.505086302463418, "learning_rate": 1.4076278030080515e-06, "loss": 0.3001, "step": 26626 }, { "epoch": 0.76, "grad_norm": 3.333120653202206, "learning_rate": 1.4073052496950546e-06, "loss": 0.1625, "step": 26627 }, { "epoch": 0.76, "grad_norm": 5.777033778832248, "learning_rate": 1.4069827272895698e-06, "loss": 0.6619, "step": 26628 }, { "epoch": 0.76, "grad_norm": 6.0823041674583544, "learning_rate": 1.4066602357943698e-06, "loss": 0.2242, "step": 26629 }, { "epoch": 0.76, "grad_norm": 5.052391438740609, "learning_rate": 1.4063377752122297e-06, "loss": 0.3967, "step": 26630 }, { "epoch": 0.76, "grad_norm": 8.777884927999688, "learning_rate": 1.4060153455459212e-06, "loss": 0.6394, "step": 26631 }, { "epoch": 0.76, "grad_norm": 6.583345928260569, "learning_rate": 1.4056929467982229e-06, "loss": 0.373, "step": 26632 }, { "epoch": 0.76, "grad_norm": 6.578311582261472, "learning_rate": 1.4053705789719029e-06, "loss": 0.3097, "step": 26633 }, { "epoch": 0.76, "grad_norm": 7.404178152538773, "learning_rate": 1.4050482420697392e-06, "loss": 0.2142, "step": 26634 }, { "epoch": 0.76, "grad_norm": 4.386661029819782, "learning_rate": 1.4047259360945015e-06, "loss": 0.1781, "step": 26635 }, { "epoch": 0.76, "grad_norm": 7.247699372522153, "learning_rate": 1.4044036610489654e-06, "loss": 0.6394, "step": 26636 }, { "epoch": 0.76, "grad_norm": 4.830868086826689, "learning_rate": 1.404081416935902e-06, "loss": 0.3319, "step": 26637 }, { "epoch": 0.76, "grad_norm": 3.657748771024321, "learning_rate": 1.403759203758081e-06, "loss": 0.1895, "step": 26638 }, { "epoch": 0.76, "grad_norm": 6.474260940497144, "learning_rate": 1.403437021518279e-06, "loss": 0.7439, "step": 26639 }, { "epoch": 0.76, "grad_norm": 1.6674649797784182, "learning_rate": 1.4031148702192643e-06, "loss": 0.0959, "step": 26640 }, { "epoch": 0.76, "grad_norm": 3.2300179661702866, "learning_rate": 1.4027927498638105e-06, "loss": 0.3828, "step": 26641 }, { "epoch": 0.76, "grad_norm": 4.394919823315573, "learning_rate": 1.4024706604546866e-06, "loss": 0.5896, "step": 26642 }, { "epoch": 0.76, "grad_norm": 8.250235900974129, "learning_rate": 1.4021486019946662e-06, "loss": 0.4553, "step": 26643 }, { "epoch": 0.76, "grad_norm": 6.002450283723252, "learning_rate": 1.4018265744865178e-06, "loss": 0.6641, "step": 26644 }, { "epoch": 0.76, "grad_norm": 9.045782573410522, "learning_rate": 1.4015045779330117e-06, "loss": 0.6068, "step": 26645 }, { "epoch": 0.76, "grad_norm": 3.774424374505844, "learning_rate": 1.4011826123369193e-06, "loss": 0.1244, "step": 26646 }, { "epoch": 0.76, "grad_norm": 3.2826340526538638, "learning_rate": 1.400860677701009e-06, "loss": 0.1845, "step": 26647 }, { "epoch": 0.76, "grad_norm": 5.863517822535204, "learning_rate": 1.4005387740280525e-06, "loss": 0.2632, "step": 26648 }, { "epoch": 0.76, "grad_norm": 7.214639657711603, "learning_rate": 1.400216901320816e-06, "loss": 0.3235, "step": 26649 }, { "epoch": 0.76, "grad_norm": 6.63208909689734, "learning_rate": 1.399895059582072e-06, "loss": 0.3926, "step": 26650 }, { "epoch": 0.76, "grad_norm": 3.652769698330284, "learning_rate": 1.3995732488145874e-06, "loss": 0.3901, "step": 26651 }, { "epoch": 0.76, "grad_norm": 5.0791490666640255, "learning_rate": 1.3992514690211296e-06, "loss": 0.5184, "step": 26652 }, { "epoch": 0.76, "grad_norm": 4.2109007541843795, "learning_rate": 1.3989297202044694e-06, "loss": 0.4043, "step": 26653 }, { "epoch": 0.76, "grad_norm": 5.3551986036327905, "learning_rate": 1.3986080023673716e-06, "loss": 0.248, "step": 26654 }, { "epoch": 0.76, "grad_norm": 8.538986219674442, "learning_rate": 1.3982863155126074e-06, "loss": 0.8244, "step": 26655 }, { "epoch": 0.76, "grad_norm": 5.067005954328629, "learning_rate": 1.3979646596429431e-06, "loss": 0.5193, "step": 26656 }, { "epoch": 0.76, "grad_norm": 5.400364753978493, "learning_rate": 1.3976430347611447e-06, "loss": 0.8554, "step": 26657 }, { "epoch": 0.76, "grad_norm": 7.2977362375681425, "learning_rate": 1.3973214408699787e-06, "loss": 0.4671, "step": 26658 }, { "epoch": 0.76, "grad_norm": 5.553295256109009, "learning_rate": 1.3969998779722128e-06, "loss": 0.3358, "step": 26659 }, { "epoch": 0.76, "grad_norm": 12.690343528769182, "learning_rate": 1.3966783460706145e-06, "loss": 0.5032, "step": 26660 }, { "epoch": 0.76, "grad_norm": 8.464834117311806, "learning_rate": 1.396356845167947e-06, "loss": 1.0575, "step": 26661 }, { "epoch": 0.76, "grad_norm": 7.951236583096897, "learning_rate": 1.3960353752669797e-06, "loss": 0.6591, "step": 26662 }, { "epoch": 0.76, "grad_norm": 2.678230261987477, "learning_rate": 1.3957139363704763e-06, "loss": 0.2343, "step": 26663 }, { "epoch": 0.76, "grad_norm": 12.306595001111916, "learning_rate": 1.3953925284812026e-06, "loss": 0.5927, "step": 26664 }, { "epoch": 0.76, "grad_norm": 7.683892248626407, "learning_rate": 1.3950711516019212e-06, "loss": 0.6185, "step": 26665 }, { "epoch": 0.76, "grad_norm": 9.532343467149515, "learning_rate": 1.3947498057354003e-06, "loss": 0.788, "step": 26666 }, { "epoch": 0.76, "grad_norm": 9.197938425823898, "learning_rate": 1.3944284908844014e-06, "loss": 0.2268, "step": 26667 }, { "epoch": 0.76, "grad_norm": 4.838230507669412, "learning_rate": 1.3941072070516903e-06, "loss": 0.7334, "step": 26668 }, { "epoch": 0.76, "grad_norm": 5.229719272103288, "learning_rate": 1.3937859542400328e-06, "loss": 0.4139, "step": 26669 }, { "epoch": 0.76, "grad_norm": 5.0991134022507945, "learning_rate": 1.3934647324521905e-06, "loss": 0.193, "step": 26670 }, { "epoch": 0.76, "grad_norm": 4.443880893158381, "learning_rate": 1.3931435416909266e-06, "loss": 0.4426, "step": 26671 }, { "epoch": 0.76, "grad_norm": 4.173836464844557, "learning_rate": 1.3928223819590037e-06, "loss": 0.1541, "step": 26672 }, { "epoch": 0.76, "grad_norm": 5.579146219585956, "learning_rate": 1.3925012532591874e-06, "loss": 0.8027, "step": 26673 }, { "epoch": 0.76, "grad_norm": 2.681074000631042, "learning_rate": 1.3921801555942365e-06, "loss": 0.2574, "step": 26674 }, { "epoch": 0.76, "grad_norm": 7.397939792153959, "learning_rate": 1.3918590889669175e-06, "loss": 0.6023, "step": 26675 }, { "epoch": 0.76, "grad_norm": 4.503747068769997, "learning_rate": 1.3915380533799887e-06, "loss": 0.4212, "step": 26676 }, { "epoch": 0.76, "grad_norm": 4.6953061210093905, "learning_rate": 1.3912170488362158e-06, "loss": 0.2438, "step": 26677 }, { "epoch": 0.76, "grad_norm": 8.290609864279702, "learning_rate": 1.3908960753383583e-06, "loss": 0.5144, "step": 26678 }, { "epoch": 0.76, "grad_norm": 3.9026443167131095, "learning_rate": 1.3905751328891752e-06, "loss": 0.2694, "step": 26679 }, { "epoch": 0.76, "grad_norm": 5.497373907665029, "learning_rate": 1.3902542214914321e-06, "loss": 0.3807, "step": 26680 }, { "epoch": 0.76, "grad_norm": 2.413720054164135, "learning_rate": 1.3899333411478855e-06, "loss": 0.1666, "step": 26681 }, { "epoch": 0.76, "grad_norm": 6.497359399687012, "learning_rate": 1.3896124918612996e-06, "loss": 0.6853, "step": 26682 }, { "epoch": 0.76, "grad_norm": 3.562822628470255, "learning_rate": 1.3892916736344314e-06, "loss": 0.3596, "step": 26683 }, { "epoch": 0.76, "grad_norm": 2.7217606723043923, "learning_rate": 1.3889708864700436e-06, "loss": 0.301, "step": 26684 }, { "epoch": 0.76, "grad_norm": 5.551857679503878, "learning_rate": 1.3886501303708948e-06, "loss": 0.3835, "step": 26685 }, { "epoch": 0.76, "grad_norm": 6.131452995307388, "learning_rate": 1.3883294053397434e-06, "loss": 0.4763, "step": 26686 }, { "epoch": 0.76, "grad_norm": 7.730376348574537, "learning_rate": 1.38800871137935e-06, "loss": 0.3499, "step": 26687 }, { "epoch": 0.76, "grad_norm": 4.05214121694662, "learning_rate": 1.387688048492472e-06, "loss": 0.4303, "step": 26688 }, { "epoch": 0.76, "grad_norm": 8.51819200934284, "learning_rate": 1.3873674166818702e-06, "loss": 0.4273, "step": 26689 }, { "epoch": 0.76, "grad_norm": 5.436270388426608, "learning_rate": 1.387046815950302e-06, "loss": 0.4805, "step": 26690 }, { "epoch": 0.76, "grad_norm": 3.523627299337291, "learning_rate": 1.3867262463005249e-06, "loss": 0.1808, "step": 26691 }, { "epoch": 0.76, "grad_norm": 8.734421131100925, "learning_rate": 1.386405707735296e-06, "loss": 1.0347, "step": 26692 }, { "epoch": 0.76, "grad_norm": 4.743004943679399, "learning_rate": 1.3860852002573733e-06, "loss": 0.4569, "step": 26693 }, { "epoch": 0.76, "grad_norm": 5.923550854289862, "learning_rate": 1.3857647238695166e-06, "loss": 0.3681, "step": 26694 }, { "epoch": 0.76, "grad_norm": 5.539829819747417, "learning_rate": 1.3854442785744793e-06, "loss": 0.5972, "step": 26695 }, { "epoch": 0.76, "grad_norm": 5.10001273808104, "learning_rate": 1.3851238643750213e-06, "loss": 0.4561, "step": 26696 }, { "epoch": 0.76, "grad_norm": 6.169042927397089, "learning_rate": 1.3848034812738975e-06, "loss": 0.2481, "step": 26697 }, { "epoch": 0.76, "grad_norm": 4.836128489924263, "learning_rate": 1.3844831292738647e-06, "loss": 0.2814, "step": 26698 }, { "epoch": 0.76, "grad_norm": 88.1971239533638, "learning_rate": 1.3841628083776765e-06, "loss": 0.3404, "step": 26699 }, { "epoch": 0.76, "grad_norm": 2.3832047561717458, "learning_rate": 1.3838425185880906e-06, "loss": 0.1483, "step": 26700 }, { "epoch": 0.76, "grad_norm": 3.767330274245731, "learning_rate": 1.3835222599078634e-06, "loss": 0.3766, "step": 26701 }, { "epoch": 0.76, "grad_norm": 4.630281294028653, "learning_rate": 1.3832020323397478e-06, "loss": 0.4026, "step": 26702 }, { "epoch": 0.76, "grad_norm": 5.21406964332153, "learning_rate": 1.3828818358865015e-06, "loss": 0.3633, "step": 26703 }, { "epoch": 0.76, "grad_norm": 5.292588640478906, "learning_rate": 1.3825616705508772e-06, "loss": 0.2323, "step": 26704 }, { "epoch": 0.76, "grad_norm": 4.099789714072599, "learning_rate": 1.382241536335629e-06, "loss": 0.2787, "step": 26705 }, { "epoch": 0.76, "grad_norm": 4.826460443358119, "learning_rate": 1.3819214332435105e-06, "loss": 0.2994, "step": 26706 }, { "epoch": 0.76, "grad_norm": 7.947176929277995, "learning_rate": 1.3816013612772777e-06, "loss": 0.6903, "step": 26707 }, { "epoch": 0.76, "grad_norm": 6.561976420996431, "learning_rate": 1.381281320439682e-06, "loss": 0.6104, "step": 26708 }, { "epoch": 0.76, "grad_norm": 6.7186179214625055, "learning_rate": 1.3809613107334768e-06, "loss": 0.3973, "step": 26709 }, { "epoch": 0.76, "grad_norm": 3.8025480360900716, "learning_rate": 1.3806413321614176e-06, "loss": 0.2444, "step": 26710 }, { "epoch": 0.76, "grad_norm": 8.349888204637098, "learning_rate": 1.3803213847262554e-06, "loss": 0.5276, "step": 26711 }, { "epoch": 0.76, "grad_norm": 3.0480075785249827, "learning_rate": 1.3800014684307422e-06, "loss": 0.2463, "step": 26712 }, { "epoch": 0.76, "grad_norm": 3.8517889687461047, "learning_rate": 1.37968158327763e-06, "loss": 0.2519, "step": 26713 }, { "epoch": 0.77, "grad_norm": 7.800653004821823, "learning_rate": 1.379361729269672e-06, "loss": 0.5172, "step": 26714 }, { "epoch": 0.77, "grad_norm": 4.597639785516731, "learning_rate": 1.3790419064096184e-06, "loss": 0.6941, "step": 26715 }, { "epoch": 0.77, "grad_norm": 4.417287839836804, "learning_rate": 1.3787221147002229e-06, "loss": 0.4878, "step": 26716 }, { "epoch": 0.77, "grad_norm": 2.5587549988698277, "learning_rate": 1.3784023541442354e-06, "loss": 0.3057, "step": 26717 }, { "epoch": 0.77, "grad_norm": 8.291690161845851, "learning_rate": 1.3780826247444046e-06, "loss": 0.536, "step": 26718 }, { "epoch": 0.77, "grad_norm": 6.605750639059283, "learning_rate": 1.3777629265034848e-06, "loss": 0.7148, "step": 26719 }, { "epoch": 0.77, "grad_norm": 4.5193018348119685, "learning_rate": 1.3774432594242227e-06, "loss": 0.2081, "step": 26720 }, { "epoch": 0.77, "grad_norm": 5.867305180452641, "learning_rate": 1.377123623509372e-06, "loss": 0.5217, "step": 26721 }, { "epoch": 0.77, "grad_norm": 4.02681799045855, "learning_rate": 1.3768040187616788e-06, "loss": 0.23, "step": 26722 }, { "epoch": 0.77, "grad_norm": 6.915820640696037, "learning_rate": 1.3764844451838967e-06, "loss": 0.6162, "step": 26723 }, { "epoch": 0.77, "grad_norm": 7.617698926140444, "learning_rate": 1.3761649027787727e-06, "loss": 0.5957, "step": 26724 }, { "epoch": 0.77, "grad_norm": 8.957234609485521, "learning_rate": 1.3758453915490538e-06, "loss": 1.3943, "step": 26725 }, { "epoch": 0.77, "grad_norm": 4.5048389908176425, "learning_rate": 1.3755259114974928e-06, "loss": 0.3995, "step": 26726 }, { "epoch": 0.77, "grad_norm": 5.9514947696596465, "learning_rate": 1.3752064626268342e-06, "loss": 0.5233, "step": 26727 }, { "epoch": 0.77, "grad_norm": 10.251534835529256, "learning_rate": 1.37488704493983e-06, "loss": 0.7079, "step": 26728 }, { "epoch": 0.77, "grad_norm": 5.560120320013089, "learning_rate": 1.3745676584392242e-06, "loss": 0.5495, "step": 26729 }, { "epoch": 0.77, "grad_norm": 2.9757014353276947, "learning_rate": 1.3742483031277687e-06, "loss": 0.1947, "step": 26730 }, { "epoch": 0.77, "grad_norm": 6.579174655702494, "learning_rate": 1.3739289790082082e-06, "loss": 0.2697, "step": 26731 }, { "epoch": 0.77, "grad_norm": 6.267540845703638, "learning_rate": 1.3736096860832903e-06, "loss": 0.5596, "step": 26732 }, { "epoch": 0.77, "grad_norm": 3.6973211462625013, "learning_rate": 1.3732904243557598e-06, "loss": 0.4049, "step": 26733 }, { "epoch": 0.77, "grad_norm": 3.0371182260660317, "learning_rate": 1.3729711938283663e-06, "loss": 0.2483, "step": 26734 }, { "epoch": 0.77, "grad_norm": 8.80098337075971, "learning_rate": 1.3726519945038558e-06, "loss": 0.6235, "step": 26735 }, { "epoch": 0.77, "grad_norm": 6.6183462558326545, "learning_rate": 1.3723328263849723e-06, "loss": 0.4002, "step": 26736 }, { "epoch": 0.77, "grad_norm": 7.185514557318089, "learning_rate": 1.372013689474464e-06, "loss": 0.3866, "step": 26737 }, { "epoch": 0.77, "grad_norm": 1.952139495172672, "learning_rate": 1.3716945837750757e-06, "loss": 0.0649, "step": 26738 }, { "epoch": 0.77, "grad_norm": 5.078545221401379, "learning_rate": 1.371375509289552e-06, "loss": 0.5005, "step": 26739 }, { "epoch": 0.77, "grad_norm": 3.309790294694395, "learning_rate": 1.3710564660206366e-06, "loss": 0.1082, "step": 26740 }, { "epoch": 0.77, "grad_norm": 2.75002952039653, "learning_rate": 1.3707374539710755e-06, "loss": 0.1934, "step": 26741 }, { "epoch": 0.77, "grad_norm": 5.987455008702085, "learning_rate": 1.3704184731436149e-06, "loss": 0.6324, "step": 26742 }, { "epoch": 0.77, "grad_norm": 8.67235716432998, "learning_rate": 1.3700995235409954e-06, "loss": 0.6581, "step": 26743 }, { "epoch": 0.77, "grad_norm": 4.885522002146697, "learning_rate": 1.3697806051659662e-06, "loss": 0.4028, "step": 26744 }, { "epoch": 0.77, "grad_norm": 7.682207099750821, "learning_rate": 1.3694617180212644e-06, "loss": 0.4198, "step": 26745 }, { "epoch": 0.77, "grad_norm": 4.363214258347349, "learning_rate": 1.3691428621096376e-06, "loss": 0.628, "step": 26746 }, { "epoch": 0.77, "grad_norm": 4.282913135569539, "learning_rate": 1.3688240374338263e-06, "loss": 0.343, "step": 26747 }, { "epoch": 0.77, "grad_norm": 3.4770339474714502, "learning_rate": 1.3685052439965768e-06, "loss": 0.1816, "step": 26748 }, { "epoch": 0.77, "grad_norm": 6.895963162432213, "learning_rate": 1.3681864818006275e-06, "loss": 0.8388, "step": 26749 }, { "epoch": 0.77, "grad_norm": 5.687069572677843, "learning_rate": 1.3678677508487242e-06, "loss": 0.5431, "step": 26750 }, { "epoch": 0.77, "grad_norm": 1.9933549115824156, "learning_rate": 1.3675490511436079e-06, "loss": 0.1087, "step": 26751 }, { "epoch": 0.77, "grad_norm": 1.9420183625232152, "learning_rate": 1.3672303826880179e-06, "loss": 0.3464, "step": 26752 }, { "epoch": 0.77, "grad_norm": 7.994817217227893, "learning_rate": 1.3669117454846993e-06, "loss": 0.4776, "step": 26753 }, { "epoch": 0.77, "grad_norm": 2.5224590707639845, "learning_rate": 1.3665931395363901e-06, "loss": 0.1739, "step": 26754 }, { "epoch": 0.77, "grad_norm": 5.55719920268862, "learning_rate": 1.3662745648458348e-06, "loss": 0.5619, "step": 26755 }, { "epoch": 0.77, "grad_norm": 10.642536210152423, "learning_rate": 1.3659560214157697e-06, "loss": 0.7686, "step": 26756 }, { "epoch": 0.77, "grad_norm": 7.069819649226499, "learning_rate": 1.3656375092489393e-06, "loss": 0.5325, "step": 26757 }, { "epoch": 0.77, "grad_norm": 4.968498319573554, "learning_rate": 1.3653190283480821e-06, "loss": 0.4191, "step": 26758 }, { "epoch": 0.77, "grad_norm": 7.460616563215564, "learning_rate": 1.3650005787159364e-06, "loss": 0.6409, "step": 26759 }, { "epoch": 0.77, "grad_norm": 5.98415185098957, "learning_rate": 1.364682160355244e-06, "loss": 0.5509, "step": 26760 }, { "epoch": 0.77, "grad_norm": 5.8111484658538135, "learning_rate": 1.3643637732687426e-06, "loss": 0.5076, "step": 26761 }, { "epoch": 0.77, "grad_norm": 7.9630331438834245, "learning_rate": 1.3640454174591733e-06, "loss": 0.6073, "step": 26762 }, { "epoch": 0.77, "grad_norm": 3.5515720170947076, "learning_rate": 1.3637270929292722e-06, "loss": 0.2973, "step": 26763 }, { "epoch": 0.77, "grad_norm": 3.899022641349306, "learning_rate": 1.3634087996817808e-06, "loss": 0.205, "step": 26764 }, { "epoch": 0.77, "grad_norm": 6.296714174853066, "learning_rate": 1.3630905377194353e-06, "loss": 0.3382, "step": 26765 }, { "epoch": 0.77, "grad_norm": 7.803148883604287, "learning_rate": 1.3627723070449727e-06, "loss": 0.9609, "step": 26766 }, { "epoch": 0.77, "grad_norm": 4.710464668901045, "learning_rate": 1.3624541076611337e-06, "loss": 0.226, "step": 26767 }, { "epoch": 0.77, "grad_norm": 9.711074717650124, "learning_rate": 1.362135939570653e-06, "loss": 0.8224, "step": 26768 }, { "epoch": 0.77, "grad_norm": 5.410128596817019, "learning_rate": 1.3618178027762701e-06, "loss": 0.1916, "step": 26769 }, { "epoch": 0.77, "grad_norm": 5.128726255885812, "learning_rate": 1.3614996972807193e-06, "loss": 0.5999, "step": 26770 }, { "epoch": 0.77, "grad_norm": 7.464598838824827, "learning_rate": 1.3611816230867414e-06, "loss": 0.3373, "step": 26771 }, { "epoch": 0.77, "grad_norm": 4.310130173073772, "learning_rate": 1.360863580197067e-06, "loss": 0.4171, "step": 26772 }, { "epoch": 0.77, "grad_norm": 7.97283500235501, "learning_rate": 1.360545568614437e-06, "loss": 0.2761, "step": 26773 }, { "epoch": 0.77, "grad_norm": 2.66590110896167, "learning_rate": 1.3602275883415832e-06, "loss": 0.2683, "step": 26774 }, { "epoch": 0.77, "grad_norm": 2.4139332474361788, "learning_rate": 1.3599096393812438e-06, "loss": 0.2539, "step": 26775 }, { "epoch": 0.77, "grad_norm": 8.92961040473558, "learning_rate": 1.3595917217361553e-06, "loss": 0.4286, "step": 26776 }, { "epoch": 0.77, "grad_norm": 4.893564724003128, "learning_rate": 1.3592738354090507e-06, "loss": 0.5697, "step": 26777 }, { "epoch": 0.77, "grad_norm": 4.765054187046256, "learning_rate": 1.3589559804026653e-06, "loss": 0.5274, "step": 26778 }, { "epoch": 0.77, "grad_norm": 8.539984622736002, "learning_rate": 1.3586381567197314e-06, "loss": 0.7513, "step": 26779 }, { "epoch": 0.77, "grad_norm": 7.630410073036077, "learning_rate": 1.3583203643629866e-06, "loss": 0.3947, "step": 26780 }, { "epoch": 0.77, "grad_norm": 4.5859894757680335, "learning_rate": 1.3580026033351622e-06, "loss": 0.3087, "step": 26781 }, { "epoch": 0.77, "grad_norm": 4.503906541147744, "learning_rate": 1.357684873638994e-06, "loss": 0.3974, "step": 26782 }, { "epoch": 0.77, "grad_norm": 5.129871565194456, "learning_rate": 1.3573671752772133e-06, "loss": 0.2839, "step": 26783 }, { "epoch": 0.77, "grad_norm": 5.11050866758647, "learning_rate": 1.357049508252556e-06, "loss": 0.2399, "step": 26784 }, { "epoch": 0.77, "grad_norm": 5.54835602236679, "learning_rate": 1.3567318725677525e-06, "loss": 0.3758, "step": 26785 }, { "epoch": 0.77, "grad_norm": 6.307755150580343, "learning_rate": 1.3564142682255354e-06, "loss": 0.1605, "step": 26786 }, { "epoch": 0.77, "grad_norm": 16.691890067657457, "learning_rate": 1.3560966952286385e-06, "loss": 0.3988, "step": 26787 }, { "epoch": 0.77, "grad_norm": 7.516047348876224, "learning_rate": 1.355779153579792e-06, "loss": 0.5561, "step": 26788 }, { "epoch": 0.77, "grad_norm": 11.870840568892866, "learning_rate": 1.3554616432817309e-06, "loss": 0.3114, "step": 26789 }, { "epoch": 0.77, "grad_norm": 7.434886761786188, "learning_rate": 1.355144164337182e-06, "loss": 0.4081, "step": 26790 }, { "epoch": 0.77, "grad_norm": 4.282455831412397, "learning_rate": 1.354826716748881e-06, "loss": 0.4127, "step": 26791 }, { "epoch": 0.77, "grad_norm": 5.502957004320231, "learning_rate": 1.354509300519557e-06, "loss": 0.2004, "step": 26792 }, { "epoch": 0.77, "grad_norm": 3.932044170077217, "learning_rate": 1.354191915651939e-06, "loss": 0.6373, "step": 26793 }, { "epoch": 0.77, "grad_norm": 5.131483512703997, "learning_rate": 1.353874562148761e-06, "loss": 0.4484, "step": 26794 }, { "epoch": 0.77, "grad_norm": 8.339954870408354, "learning_rate": 1.3535572400127489e-06, "loss": 0.2882, "step": 26795 }, { "epoch": 0.77, "grad_norm": 4.231874072258312, "learning_rate": 1.353239949246637e-06, "loss": 0.6701, "step": 26796 }, { "epoch": 0.77, "grad_norm": 6.794768890649905, "learning_rate": 1.352922689853151e-06, "loss": 0.5404, "step": 26797 }, { "epoch": 0.77, "grad_norm": 5.1699757041766095, "learning_rate": 1.3526054618350237e-06, "loss": 0.278, "step": 26798 }, { "epoch": 0.77, "grad_norm": 4.273242646121471, "learning_rate": 1.3522882651949826e-06, "loss": 0.2951, "step": 26799 }, { "epoch": 0.77, "grad_norm": 10.486009473216864, "learning_rate": 1.351971099935755e-06, "loss": 0.5772, "step": 26800 }, { "epoch": 0.77, "grad_norm": 6.926307757181042, "learning_rate": 1.3516539660600724e-06, "loss": 0.388, "step": 26801 }, { "epoch": 0.77, "grad_norm": 5.531985950540395, "learning_rate": 1.3513368635706597e-06, "loss": 0.3883, "step": 26802 }, { "epoch": 0.77, "grad_norm": 11.201318996963193, "learning_rate": 1.3510197924702484e-06, "loss": 1.2225, "step": 26803 }, { "epoch": 0.77, "grad_norm": 5.791845684806786, "learning_rate": 1.3507027527615646e-06, "loss": 0.4641, "step": 26804 }, { "epoch": 0.77, "grad_norm": 2.69546291650317, "learning_rate": 1.3503857444473357e-06, "loss": 0.2627, "step": 26805 }, { "epoch": 0.77, "grad_norm": 2.50983352272426, "learning_rate": 1.3500687675302876e-06, "loss": 0.2901, "step": 26806 }, { "epoch": 0.77, "grad_norm": 2.8206498005117138, "learning_rate": 1.3497518220131484e-06, "loss": 0.3309, "step": 26807 }, { "epoch": 0.77, "grad_norm": 6.604229883639501, "learning_rate": 1.3494349078986457e-06, "loss": 0.5804, "step": 26808 }, { "epoch": 0.77, "grad_norm": 5.476113207096215, "learning_rate": 1.3491180251895041e-06, "loss": 0.2448, "step": 26809 }, { "epoch": 0.77, "grad_norm": 7.021938821785229, "learning_rate": 1.348801173888452e-06, "loss": 0.6102, "step": 26810 }, { "epoch": 0.77, "grad_norm": 5.140201945419629, "learning_rate": 1.348484353998214e-06, "loss": 0.3015, "step": 26811 }, { "epoch": 0.77, "grad_norm": 6.84430168053366, "learning_rate": 1.3481675655215155e-06, "loss": 0.24, "step": 26812 }, { "epoch": 0.77, "grad_norm": 8.289789309982133, "learning_rate": 1.3478508084610804e-06, "loss": 1.1203, "step": 26813 }, { "epoch": 0.77, "grad_norm": 7.104931492807413, "learning_rate": 1.3475340828196365e-06, "loss": 0.6383, "step": 26814 }, { "epoch": 0.77, "grad_norm": 4.274534569327759, "learning_rate": 1.3472173885999058e-06, "loss": 0.5003, "step": 26815 }, { "epoch": 0.77, "grad_norm": 4.237930803708873, "learning_rate": 1.3469007258046141e-06, "loss": 0.5487, "step": 26816 }, { "epoch": 0.77, "grad_norm": 4.400703347950762, "learning_rate": 1.3465840944364867e-06, "loss": 0.4769, "step": 26817 }, { "epoch": 0.77, "grad_norm": 2.8752246851820384, "learning_rate": 1.3462674944982468e-06, "loss": 0.4546, "step": 26818 }, { "epoch": 0.77, "grad_norm": 4.9446585827020275, "learning_rate": 1.3459509259926179e-06, "loss": 0.2588, "step": 26819 }, { "epoch": 0.77, "grad_norm": 2.7460140037624083, "learning_rate": 1.3456343889223222e-06, "loss": 0.3846, "step": 26820 }, { "epoch": 0.77, "grad_norm": 4.362388362385183, "learning_rate": 1.3453178832900849e-06, "loss": 0.1166, "step": 26821 }, { "epoch": 0.77, "grad_norm": 17.17895794924488, "learning_rate": 1.3450014090986264e-06, "loss": 0.5454, "step": 26822 }, { "epoch": 0.77, "grad_norm": 6.054020920023265, "learning_rate": 1.3446849663506722e-06, "loss": 0.3807, "step": 26823 }, { "epoch": 0.77, "grad_norm": 3.6824904536106033, "learning_rate": 1.3443685550489415e-06, "loss": 0.2563, "step": 26824 }, { "epoch": 0.77, "grad_norm": 6.998756775127987, "learning_rate": 1.34405217519616e-06, "loss": 0.1214, "step": 26825 }, { "epoch": 0.77, "grad_norm": 5.238116256766651, "learning_rate": 1.3437358267950472e-06, "loss": 0.1472, "step": 26826 }, { "epoch": 0.77, "grad_norm": 4.927692571909489, "learning_rate": 1.3434195098483239e-06, "loss": 0.3418, "step": 26827 }, { "epoch": 0.77, "grad_norm": 3.310128028502634, "learning_rate": 1.3431032243587133e-06, "loss": 0.2534, "step": 26828 }, { "epoch": 0.77, "grad_norm": 5.999825236636332, "learning_rate": 1.3427869703289343e-06, "loss": 0.2129, "step": 26829 }, { "epoch": 0.77, "grad_norm": 3.746246239320117, "learning_rate": 1.34247074776171e-06, "loss": 0.2012, "step": 26830 }, { "epoch": 0.77, "grad_norm": 4.979024185419193, "learning_rate": 1.3421545566597594e-06, "loss": 0.6672, "step": 26831 }, { "epoch": 0.77, "grad_norm": 7.32496174616801, "learning_rate": 1.3418383970258015e-06, "loss": 0.5034, "step": 26832 }, { "epoch": 0.77, "grad_norm": 9.982123895749481, "learning_rate": 1.341522268862559e-06, "loss": 0.6005, "step": 26833 }, { "epoch": 0.77, "grad_norm": 3.872395532330534, "learning_rate": 1.3412061721727483e-06, "loss": 0.3648, "step": 26834 }, { "epoch": 0.77, "grad_norm": 5.068862825353891, "learning_rate": 1.3408901069590919e-06, "loss": 0.5535, "step": 26835 }, { "epoch": 0.77, "grad_norm": 7.821074884011058, "learning_rate": 1.3405740732243062e-06, "loss": 0.6869, "step": 26836 }, { "epoch": 0.77, "grad_norm": 8.288533732297106, "learning_rate": 1.340258070971112e-06, "loss": 0.468, "step": 26837 }, { "epoch": 0.77, "grad_norm": 12.937943860977002, "learning_rate": 1.3399421002022278e-06, "loss": 0.6496, "step": 26838 }, { "epoch": 0.77, "grad_norm": 5.040526042990908, "learning_rate": 1.3396261609203708e-06, "loss": 0.4034, "step": 26839 }, { "epoch": 0.77, "grad_norm": 12.42384113610337, "learning_rate": 1.3393102531282576e-06, "loss": 0.4973, "step": 26840 }, { "epoch": 0.77, "grad_norm": 3.7902714167364047, "learning_rate": 1.3389943768286078e-06, "loss": 0.4289, "step": 26841 }, { "epoch": 0.77, "grad_norm": 6.028829929075849, "learning_rate": 1.3386785320241403e-06, "loss": 0.6244, "step": 26842 }, { "epoch": 0.77, "grad_norm": 5.292385257779932, "learning_rate": 1.3383627187175684e-06, "loss": 0.5279, "step": 26843 }, { "epoch": 0.77, "grad_norm": 5.702589746728106, "learning_rate": 1.3380469369116134e-06, "loss": 0.3984, "step": 26844 }, { "epoch": 0.77, "grad_norm": 5.720240372695647, "learning_rate": 1.3377311866089893e-06, "loss": 0.7555, "step": 26845 }, { "epoch": 0.77, "grad_norm": 3.530596250598321, "learning_rate": 1.337415467812413e-06, "loss": 0.2445, "step": 26846 }, { "epoch": 0.77, "grad_norm": 3.160444428043903, "learning_rate": 1.337099780524599e-06, "loss": 0.1447, "step": 26847 }, { "epoch": 0.77, "grad_norm": 12.384937573346049, "learning_rate": 1.3367841247482643e-06, "loss": 1.0183, "step": 26848 }, { "epoch": 0.77, "grad_norm": 6.94765698025304, "learning_rate": 1.3364685004861267e-06, "loss": 0.6138, "step": 26849 }, { "epoch": 0.77, "grad_norm": 5.701378123678105, "learning_rate": 1.336152907740898e-06, "loss": 0.4334, "step": 26850 }, { "epoch": 0.77, "grad_norm": 8.440124894614673, "learning_rate": 1.3358373465152962e-06, "loss": 0.5259, "step": 26851 }, { "epoch": 0.77, "grad_norm": 5.340231266764453, "learning_rate": 1.3355218168120343e-06, "loss": 0.5843, "step": 26852 }, { "epoch": 0.77, "grad_norm": 8.639297742119421, "learning_rate": 1.3352063186338271e-06, "loss": 0.5114, "step": 26853 }, { "epoch": 0.77, "grad_norm": 5.639414226738426, "learning_rate": 1.3348908519833875e-06, "loss": 0.3818, "step": 26854 }, { "epoch": 0.77, "grad_norm": 6.1088236294113045, "learning_rate": 1.3345754168634318e-06, "loss": 0.5889, "step": 26855 }, { "epoch": 0.77, "grad_norm": 5.682307083943469, "learning_rate": 1.3342600132766714e-06, "loss": 0.5449, "step": 26856 }, { "epoch": 0.77, "grad_norm": 7.028716177563036, "learning_rate": 1.3339446412258206e-06, "loss": 0.7297, "step": 26857 }, { "epoch": 0.77, "grad_norm": 4.493162749202598, "learning_rate": 1.3336293007135937e-06, "loss": 0.1629, "step": 26858 }, { "epoch": 0.77, "grad_norm": 2.216819124488254, "learning_rate": 1.3333139917427029e-06, "loss": 0.2111, "step": 26859 }, { "epoch": 0.77, "grad_norm": 3.244072901359355, "learning_rate": 1.3329987143158607e-06, "loss": 0.3053, "step": 26860 }, { "epoch": 0.77, "grad_norm": 6.312448822418336, "learning_rate": 1.332683468435777e-06, "loss": 0.4813, "step": 26861 }, { "epoch": 0.77, "grad_norm": 6.971164404863614, "learning_rate": 1.332368254105168e-06, "loss": 0.4991, "step": 26862 }, { "epoch": 0.77, "grad_norm": 10.438583317804158, "learning_rate": 1.332053071326741e-06, "loss": 0.4693, "step": 26863 }, { "epoch": 0.77, "grad_norm": 7.242226379356011, "learning_rate": 1.331737920103212e-06, "loss": 0.4307, "step": 26864 }, { "epoch": 0.77, "grad_norm": 8.081488316821165, "learning_rate": 1.3314228004372899e-06, "loss": 0.8112, "step": 26865 }, { "epoch": 0.77, "grad_norm": 5.721476045843238, "learning_rate": 1.331107712331684e-06, "loss": 0.455, "step": 26866 }, { "epoch": 0.77, "grad_norm": 3.875497662981195, "learning_rate": 1.3307926557891088e-06, "loss": 0.2081, "step": 26867 }, { "epoch": 0.77, "grad_norm": 8.734788669896803, "learning_rate": 1.3304776308122703e-06, "loss": 0.6082, "step": 26868 }, { "epoch": 0.77, "grad_norm": 3.566663617878499, "learning_rate": 1.3301626374038828e-06, "loss": 0.1368, "step": 26869 }, { "epoch": 0.77, "grad_norm": 7.389177047477267, "learning_rate": 1.3298476755666528e-06, "loss": 0.4669, "step": 26870 }, { "epoch": 0.77, "grad_norm": 8.347641398193971, "learning_rate": 1.329532745303293e-06, "loss": 0.509, "step": 26871 }, { "epoch": 0.77, "grad_norm": 6.207545206963093, "learning_rate": 1.3292178466165113e-06, "loss": 0.515, "step": 26872 }, { "epoch": 0.77, "grad_norm": 3.7552750044083494, "learning_rate": 1.3289029795090147e-06, "loss": 0.2331, "step": 26873 }, { "epoch": 0.77, "grad_norm": 7.250683850888962, "learning_rate": 1.3285881439835153e-06, "loss": 0.5207, "step": 26874 }, { "epoch": 0.77, "grad_norm": 3.479325334658044, "learning_rate": 1.3282733400427183e-06, "loss": 0.4178, "step": 26875 }, { "epoch": 0.77, "grad_norm": 10.143817134689076, "learning_rate": 1.3279585676893353e-06, "loss": 1.2466, "step": 26876 }, { "epoch": 0.77, "grad_norm": 6.901951828922533, "learning_rate": 1.327643826926071e-06, "loss": 0.4527, "step": 26877 }, { "epoch": 0.77, "grad_norm": 2.5372833121850475, "learning_rate": 1.3273291177556369e-06, "loss": 0.1361, "step": 26878 }, { "epoch": 0.77, "grad_norm": 24.162248525480155, "learning_rate": 1.3270144401807378e-06, "loss": 0.552, "step": 26879 }, { "epoch": 0.77, "grad_norm": 5.749289800487661, "learning_rate": 1.326699794204081e-06, "loss": 0.2014, "step": 26880 }, { "epoch": 0.77, "grad_norm": 4.927422101214549, "learning_rate": 1.3263851798283722e-06, "loss": 0.3647, "step": 26881 }, { "epoch": 0.77, "grad_norm": 3.7705565328150072, "learning_rate": 1.3260705970563197e-06, "loss": 0.4508, "step": 26882 }, { "epoch": 0.77, "grad_norm": 4.393358838832684, "learning_rate": 1.3257560458906303e-06, "loss": 0.5673, "step": 26883 }, { "epoch": 0.77, "grad_norm": 4.7259314249740365, "learning_rate": 1.3254415263340087e-06, "loss": 0.4252, "step": 26884 }, { "epoch": 0.77, "grad_norm": 9.410300716496266, "learning_rate": 1.3251270383891624e-06, "loss": 0.7038, "step": 26885 }, { "epoch": 0.77, "grad_norm": 4.000172611326936, "learning_rate": 1.3248125820587954e-06, "loss": 0.1338, "step": 26886 }, { "epoch": 0.77, "grad_norm": 8.413177301992294, "learning_rate": 1.3244981573456133e-06, "loss": 0.6587, "step": 26887 }, { "epoch": 0.77, "grad_norm": 4.533564193658165, "learning_rate": 1.3241837642523198e-06, "loss": 0.1076, "step": 26888 }, { "epoch": 0.77, "grad_norm": 5.816010717261408, "learning_rate": 1.3238694027816207e-06, "loss": 0.2922, "step": 26889 }, { "epoch": 0.77, "grad_norm": 8.597302177795552, "learning_rate": 1.3235550729362217e-06, "loss": 0.6449, "step": 26890 }, { "epoch": 0.77, "grad_norm": 5.906795738882368, "learning_rate": 1.3232407747188264e-06, "loss": 0.2983, "step": 26891 }, { "epoch": 0.77, "grad_norm": 6.7062654763476, "learning_rate": 1.3229265081321374e-06, "loss": 0.3937, "step": 26892 }, { "epoch": 0.77, "grad_norm": 2.7578341561566715, "learning_rate": 1.3226122731788576e-06, "loss": 0.2019, "step": 26893 }, { "epoch": 0.77, "grad_norm": 5.85227805067529, "learning_rate": 1.3222980698616933e-06, "loss": 0.5286, "step": 26894 }, { "epoch": 0.77, "grad_norm": 4.416776217895363, "learning_rate": 1.3219838981833443e-06, "loss": 0.2943, "step": 26895 }, { "epoch": 0.77, "grad_norm": 13.640272627536284, "learning_rate": 1.3216697581465166e-06, "loss": 0.8356, "step": 26896 }, { "epoch": 0.77, "grad_norm": 5.155042379743418, "learning_rate": 1.3213556497539092e-06, "loss": 0.6157, "step": 26897 }, { "epoch": 0.77, "grad_norm": 3.043656865130437, "learning_rate": 1.3210415730082281e-06, "loss": 0.289, "step": 26898 }, { "epoch": 0.77, "grad_norm": 7.697101737433551, "learning_rate": 1.3207275279121733e-06, "loss": 0.5723, "step": 26899 }, { "epoch": 0.77, "grad_norm": 8.376604296624995, "learning_rate": 1.320413514468445e-06, "loss": 0.3316, "step": 26900 }, { "epoch": 0.77, "grad_norm": 6.896521504187967, "learning_rate": 1.3200995326797478e-06, "loss": 0.3992, "step": 26901 }, { "epoch": 0.77, "grad_norm": 2.7779370839743907, "learning_rate": 1.3197855825487794e-06, "loss": 0.2653, "step": 26902 }, { "epoch": 0.77, "grad_norm": 7.968974390797508, "learning_rate": 1.3194716640782446e-06, "loss": 0.6695, "step": 26903 }, { "epoch": 0.77, "grad_norm": 7.867183742124537, "learning_rate": 1.3191577772708397e-06, "loss": 0.9825, "step": 26904 }, { "epoch": 0.77, "grad_norm": 4.978111493127654, "learning_rate": 1.3188439221292694e-06, "loss": 0.234, "step": 26905 }, { "epoch": 0.77, "grad_norm": 4.594761847300755, "learning_rate": 1.318530098656231e-06, "loss": 0.5311, "step": 26906 }, { "epoch": 0.77, "grad_norm": 7.077436333642479, "learning_rate": 1.318216306854424e-06, "loss": 0.6182, "step": 26907 }, { "epoch": 0.77, "grad_norm": 8.485632703094458, "learning_rate": 1.31790254672655e-06, "loss": 0.6778, "step": 26908 }, { "epoch": 0.77, "grad_norm": 6.221042708825027, "learning_rate": 1.3175888182753056e-06, "loss": 0.3183, "step": 26909 }, { "epoch": 0.77, "grad_norm": 11.046948552729852, "learning_rate": 1.3172751215033925e-06, "loss": 0.5251, "step": 26910 }, { "epoch": 0.77, "grad_norm": 3.4454561579849887, "learning_rate": 1.3169614564135069e-06, "loss": 0.5056, "step": 26911 }, { "epoch": 0.77, "grad_norm": 4.7148433960264216, "learning_rate": 1.3166478230083496e-06, "loss": 0.5247, "step": 26912 }, { "epoch": 0.77, "grad_norm": 4.730837290210468, "learning_rate": 1.3163342212906178e-06, "loss": 0.5582, "step": 26913 }, { "epoch": 0.77, "grad_norm": 12.74333697478049, "learning_rate": 1.3160206512630076e-06, "loss": 0.6683, "step": 26914 }, { "epoch": 0.77, "grad_norm": 7.429023464303953, "learning_rate": 1.3157071129282195e-06, "loss": 0.6355, "step": 26915 }, { "epoch": 0.77, "grad_norm": 2.909987210478321, "learning_rate": 1.3153936062889484e-06, "loss": 0.2861, "step": 26916 }, { "epoch": 0.77, "grad_norm": 7.9306573885384735, "learning_rate": 1.3150801313478933e-06, "loss": 0.5404, "step": 26917 }, { "epoch": 0.77, "grad_norm": 4.402337738547498, "learning_rate": 1.3147666881077508e-06, "loss": 0.3612, "step": 26918 }, { "epoch": 0.77, "grad_norm": 10.839499198317736, "learning_rate": 1.3144532765712166e-06, "loss": 0.7633, "step": 26919 }, { "epoch": 0.77, "grad_norm": 6.971575382348496, "learning_rate": 1.3141398967409852e-06, "loss": 0.6901, "step": 26920 }, { "epoch": 0.77, "grad_norm": 9.956592764165638, "learning_rate": 1.313826548619756e-06, "loss": 0.9304, "step": 26921 }, { "epoch": 0.77, "grad_norm": 7.709863342479203, "learning_rate": 1.3135132322102223e-06, "loss": 0.6728, "step": 26922 }, { "epoch": 0.77, "grad_norm": 6.07246064381824, "learning_rate": 1.3131999475150798e-06, "loss": 0.4442, "step": 26923 }, { "epoch": 0.77, "grad_norm": 5.3188438012786685, "learning_rate": 1.3128866945370256e-06, "loss": 0.2335, "step": 26924 }, { "epoch": 0.77, "grad_norm": 7.406155356273245, "learning_rate": 1.3125734732787536e-06, "loss": 0.4607, "step": 26925 }, { "epoch": 0.77, "grad_norm": 4.690257380424269, "learning_rate": 1.3122602837429577e-06, "loss": 0.2291, "step": 26926 }, { "epoch": 0.77, "grad_norm": 5.238816429523454, "learning_rate": 1.3119471259323313e-06, "loss": 0.3676, "step": 26927 }, { "epoch": 0.77, "grad_norm": 4.863793035947685, "learning_rate": 1.311633999849571e-06, "loss": 0.2519, "step": 26928 }, { "epoch": 0.77, "grad_norm": 4.425754045367486, "learning_rate": 1.311320905497368e-06, "loss": 0.1045, "step": 26929 }, { "epoch": 0.77, "grad_norm": 8.150413792876849, "learning_rate": 1.3110078428784184e-06, "loss": 0.6075, "step": 26930 }, { "epoch": 0.77, "grad_norm": 5.834353434967741, "learning_rate": 1.3106948119954127e-06, "loss": 0.5901, "step": 26931 }, { "epoch": 0.77, "grad_norm": 5.561341754467652, "learning_rate": 1.3103818128510465e-06, "loss": 0.4432, "step": 26932 }, { "epoch": 0.77, "grad_norm": 6.99719672611295, "learning_rate": 1.3100688454480115e-06, "loss": 0.2512, "step": 26933 }, { "epoch": 0.77, "grad_norm": 4.662434138735405, "learning_rate": 1.3097559097889983e-06, "loss": 0.1318, "step": 26934 }, { "epoch": 0.77, "grad_norm": 8.942412620162317, "learning_rate": 1.3094430058767028e-06, "loss": 0.7296, "step": 26935 }, { "epoch": 0.77, "grad_norm": 5.031565685912085, "learning_rate": 1.3091301337138123e-06, "loss": 0.5154, "step": 26936 }, { "epoch": 0.77, "grad_norm": 4.100658992980523, "learning_rate": 1.308817293303023e-06, "loss": 0.456, "step": 26937 }, { "epoch": 0.77, "grad_norm": 5.286790323090051, "learning_rate": 1.3085044846470224e-06, "loss": 0.244, "step": 26938 }, { "epoch": 0.77, "grad_norm": 3.6693610818059983, "learning_rate": 1.3081917077485045e-06, "loss": 0.4486, "step": 26939 }, { "epoch": 0.77, "grad_norm": 6.282273963477808, "learning_rate": 1.307878962610159e-06, "loss": 0.2263, "step": 26940 }, { "epoch": 0.77, "grad_norm": 17.84603081605126, "learning_rate": 1.3075662492346748e-06, "loss": 0.4511, "step": 26941 }, { "epoch": 0.77, "grad_norm": 4.5654263844197605, "learning_rate": 1.3072535676247444e-06, "loss": 0.318, "step": 26942 }, { "epoch": 0.77, "grad_norm": 5.264673707420571, "learning_rate": 1.3069409177830561e-06, "loss": 0.6283, "step": 26943 }, { "epoch": 0.77, "grad_norm": 5.624989997007165, "learning_rate": 1.3066282997123015e-06, "loss": 0.6666, "step": 26944 }, { "epoch": 0.77, "grad_norm": 20.398912357806473, "learning_rate": 1.306315713415167e-06, "loss": 0.7999, "step": 26945 }, { "epoch": 0.77, "grad_norm": 8.199880892190945, "learning_rate": 1.3060031588943479e-06, "loss": 0.8978, "step": 26946 }, { "epoch": 0.77, "grad_norm": 4.8607785568320585, "learning_rate": 1.3056906361525246e-06, "loss": 0.3921, "step": 26947 }, { "epoch": 0.77, "grad_norm": 7.848149984273948, "learning_rate": 1.3053781451923902e-06, "loss": 0.5024, "step": 26948 }, { "epoch": 0.77, "grad_norm": 7.008752936261744, "learning_rate": 1.3050656860166345e-06, "loss": 0.4328, "step": 26949 }, { "epoch": 0.77, "grad_norm": 5.888411776490476, "learning_rate": 1.3047532586279422e-06, "loss": 0.5791, "step": 26950 }, { "epoch": 0.77, "grad_norm": 5.217325187390295, "learning_rate": 1.304440863029004e-06, "loss": 0.6838, "step": 26951 }, { "epoch": 0.77, "grad_norm": 2.6571463475006665, "learning_rate": 1.304128499222506e-06, "loss": 0.1895, "step": 26952 }, { "epoch": 0.77, "grad_norm": 6.2876913530721605, "learning_rate": 1.303816167211136e-06, "loss": 0.387, "step": 26953 }, { "epoch": 0.77, "grad_norm": 5.740524362544785, "learning_rate": 1.3035038669975786e-06, "loss": 0.3651, "step": 26954 }, { "epoch": 0.77, "grad_norm": 2.8417254357026747, "learning_rate": 1.3031915985845223e-06, "loss": 0.2455, "step": 26955 }, { "epoch": 0.77, "grad_norm": 9.176263646306746, "learning_rate": 1.302879361974655e-06, "loss": 0.4989, "step": 26956 }, { "epoch": 0.77, "grad_norm": 4.141355414115439, "learning_rate": 1.3025671571706594e-06, "loss": 0.1733, "step": 26957 }, { "epoch": 0.77, "grad_norm": 6.051645373607518, "learning_rate": 1.3022549841752253e-06, "loss": 0.4128, "step": 26958 }, { "epoch": 0.77, "grad_norm": 4.312223674379995, "learning_rate": 1.3019428429910363e-06, "loss": 0.4312, "step": 26959 }, { "epoch": 0.77, "grad_norm": 4.656149408674802, "learning_rate": 1.301630733620778e-06, "loss": 0.4711, "step": 26960 }, { "epoch": 0.77, "grad_norm": 7.369191678879105, "learning_rate": 1.3013186560671331e-06, "loss": 0.8361, "step": 26961 }, { "epoch": 0.77, "grad_norm": 4.997421458067472, "learning_rate": 1.30100661033279e-06, "loss": 0.2548, "step": 26962 }, { "epoch": 0.77, "grad_norm": 3.2879227228672487, "learning_rate": 1.3006945964204299e-06, "loss": 0.2369, "step": 26963 }, { "epoch": 0.77, "grad_norm": 3.7970663893699834, "learning_rate": 1.3003826143327392e-06, "loss": 0.4091, "step": 26964 }, { "epoch": 0.77, "grad_norm": 4.888413706297684, "learning_rate": 1.3000706640724026e-06, "loss": 0.4406, "step": 26965 }, { "epoch": 0.77, "grad_norm": 4.156003901717527, "learning_rate": 1.2997587456421024e-06, "loss": 0.1412, "step": 26966 }, { "epoch": 0.77, "grad_norm": 4.1277679346690865, "learning_rate": 1.2994468590445224e-06, "loss": 0.3087, "step": 26967 }, { "epoch": 0.77, "grad_norm": 2.626422683879548, "learning_rate": 1.2991350042823437e-06, "loss": 0.1448, "step": 26968 }, { "epoch": 0.77, "grad_norm": 6.56209768242813, "learning_rate": 1.2988231813582525e-06, "loss": 0.584, "step": 26969 }, { "epoch": 0.77, "grad_norm": 3.2231124462558567, "learning_rate": 1.2985113902749286e-06, "loss": 0.2504, "step": 26970 }, { "epoch": 0.77, "grad_norm": 3.2904839785246254, "learning_rate": 1.2981996310350563e-06, "loss": 0.2853, "step": 26971 }, { "epoch": 0.77, "grad_norm": 8.807531268204801, "learning_rate": 1.2978879036413161e-06, "loss": 0.595, "step": 26972 }, { "epoch": 0.77, "grad_norm": 7.731831172726085, "learning_rate": 1.2975762080963917e-06, "loss": 0.4631, "step": 26973 }, { "epoch": 0.77, "grad_norm": 5.90778193714355, "learning_rate": 1.297264544402963e-06, "loss": 0.3472, "step": 26974 }, { "epoch": 0.77, "grad_norm": 5.901795337175316, "learning_rate": 1.2969529125637099e-06, "loss": 0.5389, "step": 26975 }, { "epoch": 0.77, "grad_norm": 5.854936958579522, "learning_rate": 1.2966413125813172e-06, "loss": 0.2585, "step": 26976 }, { "epoch": 0.77, "grad_norm": 8.113969680898737, "learning_rate": 1.2963297444584611e-06, "loss": 1.1838, "step": 26977 }, { "epoch": 0.77, "grad_norm": 5.614718089376798, "learning_rate": 1.2960182081978255e-06, "loss": 0.2628, "step": 26978 }, { "epoch": 0.77, "grad_norm": 5.847513934832764, "learning_rate": 1.2957067038020898e-06, "loss": 0.392, "step": 26979 }, { "epoch": 0.77, "grad_norm": 2.3039871444807094, "learning_rate": 1.295395231273932e-06, "loss": 0.1072, "step": 26980 }, { "epoch": 0.77, "grad_norm": 4.166844395979576, "learning_rate": 1.295083790616034e-06, "loss": 0.2517, "step": 26981 }, { "epoch": 0.77, "grad_norm": 10.558200118969012, "learning_rate": 1.2947723818310725e-06, "loss": 0.5283, "step": 26982 }, { "epoch": 0.77, "grad_norm": 5.775962060827521, "learning_rate": 1.2944610049217292e-06, "loss": 0.4821, "step": 26983 }, { "epoch": 0.77, "grad_norm": 4.538272519726268, "learning_rate": 1.2941496598906806e-06, "loss": 0.1774, "step": 26984 }, { "epoch": 0.77, "grad_norm": 6.360064344980327, "learning_rate": 1.2938383467406075e-06, "loss": 0.3275, "step": 26985 }, { "epoch": 0.77, "grad_norm": 6.313879070152208, "learning_rate": 1.2935270654741873e-06, "loss": 0.4058, "step": 26986 }, { "epoch": 0.77, "grad_norm": 5.976822392416555, "learning_rate": 1.2932158160940966e-06, "loss": 0.3005, "step": 26987 }, { "epoch": 0.77, "grad_norm": 3.7873662238841335, "learning_rate": 1.2929045986030126e-06, "loss": 0.3915, "step": 26988 }, { "epoch": 0.77, "grad_norm": 8.76905535317855, "learning_rate": 1.292593413003614e-06, "loss": 0.7757, "step": 26989 }, { "epoch": 0.77, "grad_norm": 5.1518368880206475, "learning_rate": 1.2922822592985796e-06, "loss": 0.8389, "step": 26990 }, { "epoch": 0.77, "grad_norm": 5.602112364016957, "learning_rate": 1.2919711374905825e-06, "loss": 0.7008, "step": 26991 }, { "epoch": 0.77, "grad_norm": 5.70509605313481, "learning_rate": 1.2916600475823026e-06, "loss": 0.4539, "step": 26992 }, { "epoch": 0.77, "grad_norm": 7.937315420572805, "learning_rate": 1.2913489895764148e-06, "loss": 0.312, "step": 26993 }, { "epoch": 0.77, "grad_norm": 7.25849456374455, "learning_rate": 1.2910379634755948e-06, "loss": 0.3923, "step": 26994 }, { "epoch": 0.77, "grad_norm": 4.353492569038742, "learning_rate": 1.2907269692825176e-06, "loss": 0.225, "step": 26995 }, { "epoch": 0.77, "grad_norm": 5.896699864182828, "learning_rate": 1.2904160069998595e-06, "loss": 0.3788, "step": 26996 }, { "epoch": 0.77, "grad_norm": 8.968205877406355, "learning_rate": 1.2901050766302968e-06, "loss": 0.4161, "step": 26997 }, { "epoch": 0.77, "grad_norm": 4.113768082343959, "learning_rate": 1.2897941781765022e-06, "loss": 0.5597, "step": 26998 }, { "epoch": 0.77, "grad_norm": 5.003561968428338, "learning_rate": 1.2894833116411531e-06, "loss": 0.5334, "step": 26999 }, { "epoch": 0.77, "grad_norm": 6.416433734711848, "learning_rate": 1.289172477026922e-06, "loss": 0.4956, "step": 27000 }, { "epoch": 0.77, "grad_norm": 5.94133417912197, "learning_rate": 1.2888616743364834e-06, "loss": 0.3305, "step": 27001 }, { "epoch": 0.77, "grad_norm": 6.8447419584245095, "learning_rate": 1.288550903572509e-06, "loss": 0.1762, "step": 27002 }, { "epoch": 0.77, "grad_norm": 6.89598203961629, "learning_rate": 1.2882401647376757e-06, "loss": 0.6969, "step": 27003 }, { "epoch": 0.77, "grad_norm": 3.6293019385954817, "learning_rate": 1.287929457834654e-06, "loss": 0.2686, "step": 27004 }, { "epoch": 0.77, "grad_norm": 2.3401158704761467, "learning_rate": 1.2876187828661186e-06, "loss": 0.2243, "step": 27005 }, { "epoch": 0.77, "grad_norm": 6.5110876902318875, "learning_rate": 1.2873081398347443e-06, "loss": 0.4186, "step": 27006 }, { "epoch": 0.77, "grad_norm": 7.3723325269634, "learning_rate": 1.2869975287431974e-06, "loss": 0.3809, "step": 27007 }, { "epoch": 0.77, "grad_norm": 5.647291286136047, "learning_rate": 1.2866869495941558e-06, "loss": 0.3473, "step": 27008 }, { "epoch": 0.77, "grad_norm": 5.559723729158205, "learning_rate": 1.2863764023902874e-06, "loss": 0.4093, "step": 27009 }, { "epoch": 0.77, "grad_norm": 6.508397180274201, "learning_rate": 1.2860658871342673e-06, "loss": 0.7758, "step": 27010 }, { "epoch": 0.77, "grad_norm": 6.477503378045317, "learning_rate": 1.2857554038287634e-06, "loss": 0.3259, "step": 27011 }, { "epoch": 0.77, "grad_norm": 5.1371269034909774, "learning_rate": 1.2854449524764501e-06, "loss": 0.4553, "step": 27012 }, { "epoch": 0.77, "grad_norm": 6.869545767092108, "learning_rate": 1.2851345330799963e-06, "loss": 0.4605, "step": 27013 }, { "epoch": 0.77, "grad_norm": 5.091236922155752, "learning_rate": 1.2848241456420713e-06, "loss": 0.2985, "step": 27014 }, { "epoch": 0.77, "grad_norm": 3.5990243106136504, "learning_rate": 1.2845137901653482e-06, "loss": 0.2386, "step": 27015 }, { "epoch": 0.77, "grad_norm": 4.7160080528530255, "learning_rate": 1.2842034666524945e-06, "loss": 0.2234, "step": 27016 }, { "epoch": 0.77, "grad_norm": 3.8330145302488905, "learning_rate": 1.2838931751061817e-06, "loss": 0.394, "step": 27017 }, { "epoch": 0.77, "grad_norm": 8.92882442224924, "learning_rate": 1.2835829155290774e-06, "loss": 0.7037, "step": 27018 }, { "epoch": 0.77, "grad_norm": 12.849809614025386, "learning_rate": 1.2832726879238527e-06, "loss": 0.7321, "step": 27019 }, { "epoch": 0.77, "grad_norm": 4.223021457063876, "learning_rate": 1.282962492293176e-06, "loss": 0.2947, "step": 27020 }, { "epoch": 0.77, "grad_norm": 8.641000915407217, "learning_rate": 1.2826523286397135e-06, "loss": 0.4489, "step": 27021 }, { "epoch": 0.77, "grad_norm": 7.198086314725483, "learning_rate": 1.2823421969661365e-06, "loss": 0.8439, "step": 27022 }, { "epoch": 0.77, "grad_norm": 4.009274251479291, "learning_rate": 1.282032097275111e-06, "loss": 0.5236, "step": 27023 }, { "epoch": 0.77, "grad_norm": 6.884764101285293, "learning_rate": 1.2817220295693066e-06, "loss": 0.4378, "step": 27024 }, { "epoch": 0.77, "grad_norm": 7.462270695471661, "learning_rate": 1.2814119938513885e-06, "loss": 0.6637, "step": 27025 }, { "epoch": 0.77, "grad_norm": 11.500516796939158, "learning_rate": 1.2811019901240267e-06, "loss": 0.4076, "step": 27026 }, { "epoch": 0.77, "grad_norm": 6.11520053363161, "learning_rate": 1.2807920183898864e-06, "loss": 0.6019, "step": 27027 }, { "epoch": 0.77, "grad_norm": 3.9509034379104673, "learning_rate": 1.2804820786516348e-06, "loss": 0.3794, "step": 27028 }, { "epoch": 0.77, "grad_norm": 3.9030281665722963, "learning_rate": 1.2801721709119363e-06, "loss": 0.7155, "step": 27029 }, { "epoch": 0.77, "grad_norm": 11.941039634120646, "learning_rate": 1.2798622951734585e-06, "loss": 0.6065, "step": 27030 }, { "epoch": 0.77, "grad_norm": 10.424156225257535, "learning_rate": 1.2795524514388691e-06, "loss": 0.5181, "step": 27031 }, { "epoch": 0.77, "grad_norm": 2.8102954701692724, "learning_rate": 1.2792426397108298e-06, "loss": 0.5098, "step": 27032 }, { "epoch": 0.77, "grad_norm": 3.5836247318276637, "learning_rate": 1.2789328599920113e-06, "loss": 0.2354, "step": 27033 }, { "epoch": 0.77, "grad_norm": 4.824463724565122, "learning_rate": 1.2786231122850722e-06, "loss": 0.4934, "step": 27034 }, { "epoch": 0.77, "grad_norm": 8.289241979383485, "learning_rate": 1.2783133965926819e-06, "loss": 0.8424, "step": 27035 }, { "epoch": 0.77, "grad_norm": 8.547363713604138, "learning_rate": 1.2780037129175016e-06, "loss": 0.4707, "step": 27036 }, { "epoch": 0.77, "grad_norm": 3.83510165615704, "learning_rate": 1.2776940612621974e-06, "loss": 0.2526, "step": 27037 }, { "epoch": 0.77, "grad_norm": 5.273408248255097, "learning_rate": 1.277384441629434e-06, "loss": 0.7491, "step": 27038 }, { "epoch": 0.77, "grad_norm": 7.214634667688594, "learning_rate": 1.277074854021874e-06, "loss": 0.3215, "step": 27039 }, { "epoch": 0.77, "grad_norm": 5.833092543763985, "learning_rate": 1.2767652984421807e-06, "loss": 0.4088, "step": 27040 }, { "epoch": 0.77, "grad_norm": 5.417923732534791, "learning_rate": 1.2764557748930157e-06, "loss": 0.5481, "step": 27041 }, { "epoch": 0.77, "grad_norm": 9.01133206666006, "learning_rate": 1.2761462833770449e-06, "loss": 0.5588, "step": 27042 }, { "epoch": 0.77, "grad_norm": 5.464241349105176, "learning_rate": 1.2758368238969272e-06, "loss": 0.3586, "step": 27043 }, { "epoch": 0.77, "grad_norm": 7.401852618106887, "learning_rate": 1.2755273964553282e-06, "loss": 0.5137, "step": 27044 }, { "epoch": 0.77, "grad_norm": 6.698219335983946, "learning_rate": 1.2752180010549075e-06, "loss": 0.5037, "step": 27045 }, { "epoch": 0.77, "grad_norm": 4.045415136303135, "learning_rate": 1.2749086376983289e-06, "loss": 0.3004, "step": 27046 }, { "epoch": 0.77, "grad_norm": 5.301873563302882, "learning_rate": 1.2745993063882529e-06, "loss": 0.5141, "step": 27047 }, { "epoch": 0.77, "grad_norm": 11.037486272265493, "learning_rate": 1.2742900071273384e-06, "loss": 0.5574, "step": 27048 }, { "epoch": 0.77, "grad_norm": 5.519638287261638, "learning_rate": 1.2739807399182503e-06, "loss": 0.4797, "step": 27049 }, { "epoch": 0.77, "grad_norm": 2.6942093955310478, "learning_rate": 1.273671504763645e-06, "loss": 0.1515, "step": 27050 }, { "epoch": 0.77, "grad_norm": 2.494412299096918, "learning_rate": 1.2733623016661866e-06, "loss": 0.076, "step": 27051 }, { "epoch": 0.77, "grad_norm": 4.96038858575842, "learning_rate": 1.273053130628532e-06, "loss": 0.5365, "step": 27052 }, { "epoch": 0.77, "grad_norm": 5.381429333971677, "learning_rate": 1.2727439916533446e-06, "loss": 0.1662, "step": 27053 }, { "epoch": 0.77, "grad_norm": 4.239202696312981, "learning_rate": 1.272434884743281e-06, "loss": 0.2991, "step": 27054 }, { "epoch": 0.77, "grad_norm": 3.1161913702292767, "learning_rate": 1.2721258099009997e-06, "loss": 0.0783, "step": 27055 }, { "epoch": 0.77, "grad_norm": 7.501222383702262, "learning_rate": 1.2718167671291632e-06, "loss": 0.5815, "step": 27056 }, { "epoch": 0.77, "grad_norm": 10.215604432121197, "learning_rate": 1.2715077564304257e-06, "loss": 0.3091, "step": 27057 }, { "epoch": 0.77, "grad_norm": 5.37064970643176, "learning_rate": 1.2711987778074497e-06, "loss": 0.9418, "step": 27058 }, { "epoch": 0.77, "grad_norm": 5.38134156436089, "learning_rate": 1.2708898312628903e-06, "loss": 0.2068, "step": 27059 }, { "epoch": 0.77, "grad_norm": 8.714897808760506, "learning_rate": 1.2705809167994076e-06, "loss": 0.6453, "step": 27060 }, { "epoch": 0.77, "grad_norm": 6.390187031630988, "learning_rate": 1.2702720344196584e-06, "loss": 0.8764, "step": 27061 }, { "epoch": 0.77, "grad_norm": 5.292789177470173, "learning_rate": 1.2699631841262983e-06, "loss": 0.6701, "step": 27062 }, { "epoch": 0.78, "grad_norm": 4.651109584584426, "learning_rate": 1.2696543659219867e-06, "loss": 0.3756, "step": 27063 }, { "epoch": 0.78, "grad_norm": 5.456189319321237, "learning_rate": 1.2693455798093784e-06, "loss": 0.467, "step": 27064 }, { "epoch": 0.78, "grad_norm": 4.104120586672315, "learning_rate": 1.2690368257911316e-06, "loss": 0.3128, "step": 27065 }, { "epoch": 0.78, "grad_norm": 3.1584798282343294, "learning_rate": 1.2687281038699022e-06, "loss": 0.1258, "step": 27066 }, { "epoch": 0.78, "grad_norm": 3.8914209800185637, "learning_rate": 1.2684194140483452e-06, "loss": 0.3382, "step": 27067 }, { "epoch": 0.78, "grad_norm": 7.539167745379377, "learning_rate": 1.2681107563291151e-06, "loss": 0.5026, "step": 27068 }, { "epoch": 0.78, "grad_norm": 7.124104493734387, "learning_rate": 1.2678021307148702e-06, "loss": 0.5184, "step": 27069 }, { "epoch": 0.78, "grad_norm": 3.5422163461958593, "learning_rate": 1.2674935372082624e-06, "loss": 0.3577, "step": 27070 }, { "epoch": 0.78, "grad_norm": 4.687322409762544, "learning_rate": 1.2671849758119481e-06, "loss": 0.3319, "step": 27071 }, { "epoch": 0.78, "grad_norm": 3.890805358995857, "learning_rate": 1.2668764465285837e-06, "loss": 0.2828, "step": 27072 }, { "epoch": 0.78, "grad_norm": 7.760003801325231, "learning_rate": 1.2665679493608212e-06, "loss": 0.3873, "step": 27073 }, { "epoch": 0.78, "grad_norm": 4.043927212683625, "learning_rate": 1.2662594843113151e-06, "loss": 0.3948, "step": 27074 }, { "epoch": 0.78, "grad_norm": 3.884323041204714, "learning_rate": 1.2659510513827179e-06, "loss": 0.3405, "step": 27075 }, { "epoch": 0.78, "grad_norm": 5.2987208793183695, "learning_rate": 1.2656426505776849e-06, "loss": 0.7851, "step": 27076 }, { "epoch": 0.78, "grad_norm": 8.404242520485688, "learning_rate": 1.2653342818988673e-06, "loss": 0.6105, "step": 27077 }, { "epoch": 0.78, "grad_norm": 6.39501399488312, "learning_rate": 1.2650259453489189e-06, "loss": 0.5077, "step": 27078 }, { "epoch": 0.78, "grad_norm": 6.145516231136474, "learning_rate": 1.264717640930494e-06, "loss": 0.9037, "step": 27079 }, { "epoch": 0.78, "grad_norm": 4.666387901608568, "learning_rate": 1.2644093686462434e-06, "loss": 0.455, "step": 27080 }, { "epoch": 0.78, "grad_norm": 7.1438302290074995, "learning_rate": 1.2641011284988185e-06, "loss": 0.4833, "step": 27081 }, { "epoch": 0.78, "grad_norm": 8.804520937978179, "learning_rate": 1.2637929204908706e-06, "loss": 0.6883, "step": 27082 }, { "epoch": 0.78, "grad_norm": 6.069825613165766, "learning_rate": 1.2634847446250538e-06, "loss": 0.3282, "step": 27083 }, { "epoch": 0.78, "grad_norm": 5.403808659930705, "learning_rate": 1.2631766009040159e-06, "loss": 0.3732, "step": 27084 }, { "epoch": 0.78, "grad_norm": 6.51477356248589, "learning_rate": 1.262868489330411e-06, "loss": 0.4995, "step": 27085 }, { "epoch": 0.78, "grad_norm": 3.0730513958032777, "learning_rate": 1.2625604099068873e-06, "loss": 0.6509, "step": 27086 }, { "epoch": 0.78, "grad_norm": 5.956027046986353, "learning_rate": 1.2622523626360978e-06, "loss": 0.4808, "step": 27087 }, { "epoch": 0.78, "grad_norm": 3.4235234927472686, "learning_rate": 1.2619443475206906e-06, "loss": 0.1423, "step": 27088 }, { "epoch": 0.78, "grad_norm": 6.843347615993414, "learning_rate": 1.2616363645633145e-06, "loss": 0.7946, "step": 27089 }, { "epoch": 0.78, "grad_norm": 5.777108539077389, "learning_rate": 1.2613284137666221e-06, "loss": 0.6124, "step": 27090 }, { "epoch": 0.78, "grad_norm": 10.961624830886407, "learning_rate": 1.2610204951332595e-06, "loss": 0.8086, "step": 27091 }, { "epoch": 0.78, "grad_norm": 1.8770963314494058, "learning_rate": 1.260712608665879e-06, "loss": 0.2162, "step": 27092 }, { "epoch": 0.78, "grad_norm": 9.850998985636668, "learning_rate": 1.260404754367127e-06, "loss": 0.4005, "step": 27093 }, { "epoch": 0.78, "grad_norm": 9.28397794251326, "learning_rate": 1.2600969322396528e-06, "loss": 0.6731, "step": 27094 }, { "epoch": 0.78, "grad_norm": 2.9311728920789046, "learning_rate": 1.2597891422861025e-06, "loss": 0.2005, "step": 27095 }, { "epoch": 0.78, "grad_norm": 5.260244138802951, "learning_rate": 1.2594813845091259e-06, "loss": 0.3641, "step": 27096 }, { "epoch": 0.78, "grad_norm": 5.997295087817617, "learning_rate": 1.2591736589113718e-06, "loss": 0.3647, "step": 27097 }, { "epoch": 0.78, "grad_norm": 6.614621580318702, "learning_rate": 1.2588659654954849e-06, "loss": 0.5059, "step": 27098 }, { "epoch": 0.78, "grad_norm": 7.476315804201318, "learning_rate": 1.2585583042641147e-06, "loss": 0.4863, "step": 27099 }, { "epoch": 0.78, "grad_norm": 3.168992710787671, "learning_rate": 1.2582506752199065e-06, "loss": 0.3577, "step": 27100 }, { "epoch": 0.78, "grad_norm": 6.105147281904036, "learning_rate": 1.2579430783655077e-06, "loss": 0.6877, "step": 27101 }, { "epoch": 0.78, "grad_norm": 5.88054703535807, "learning_rate": 1.2576355137035617e-06, "loss": 0.4026, "step": 27102 }, { "epoch": 0.78, "grad_norm": 6.6363002921343766, "learning_rate": 1.2573279812367173e-06, "loss": 0.3076, "step": 27103 }, { "epoch": 0.78, "grad_norm": 10.95761564875642, "learning_rate": 1.2570204809676201e-06, "loss": 0.8368, "step": 27104 }, { "epoch": 0.78, "grad_norm": 4.141915591656804, "learning_rate": 1.256713012898914e-06, "loss": 0.1986, "step": 27105 }, { "epoch": 0.78, "grad_norm": 10.443696016072817, "learning_rate": 1.2564055770332462e-06, "loss": 0.6353, "step": 27106 }, { "epoch": 0.78, "grad_norm": 4.182944339103727, "learning_rate": 1.2560981733732607e-06, "loss": 0.4126, "step": 27107 }, { "epoch": 0.78, "grad_norm": 3.525075375312451, "learning_rate": 1.2557908019216014e-06, "loss": 0.387, "step": 27108 }, { "epoch": 0.78, "grad_norm": 4.881715513981091, "learning_rate": 1.255483462680911e-06, "loss": 0.2794, "step": 27109 }, { "epoch": 0.78, "grad_norm": 2.7267389759565903, "learning_rate": 1.2551761556538373e-06, "loss": 0.4898, "step": 27110 }, { "epoch": 0.78, "grad_norm": 4.010830343469778, "learning_rate": 1.2548688808430204e-06, "loss": 0.4111, "step": 27111 }, { "epoch": 0.78, "grad_norm": 1.7847870042087748, "learning_rate": 1.2545616382511054e-06, "loss": 0.2285, "step": 27112 }, { "epoch": 0.78, "grad_norm": 2.437442146128208, "learning_rate": 1.2542544278807368e-06, "loss": 0.0669, "step": 27113 }, { "epoch": 0.78, "grad_norm": 6.399649294542604, "learning_rate": 1.2539472497345562e-06, "loss": 0.3147, "step": 27114 }, { "epoch": 0.78, "grad_norm": 8.873164672203984, "learning_rate": 1.2536401038152064e-06, "loss": 0.4761, "step": 27115 }, { "epoch": 0.78, "grad_norm": 5.440345677234037, "learning_rate": 1.2533329901253282e-06, "loss": 0.4951, "step": 27116 }, { "epoch": 0.78, "grad_norm": 4.505455313143761, "learning_rate": 1.2530259086675662e-06, "loss": 0.1716, "step": 27117 }, { "epoch": 0.78, "grad_norm": 3.7022182626058764, "learning_rate": 1.2527188594445593e-06, "loss": 0.5051, "step": 27118 }, { "epoch": 0.78, "grad_norm": 9.912334510990016, "learning_rate": 1.2524118424589526e-06, "loss": 1.0505, "step": 27119 }, { "epoch": 0.78, "grad_norm": 5.598353979332526, "learning_rate": 1.2521048577133838e-06, "loss": 0.4913, "step": 27120 }, { "epoch": 0.78, "grad_norm": 9.554344539148351, "learning_rate": 1.2517979052104967e-06, "loss": 0.383, "step": 27121 }, { "epoch": 0.78, "grad_norm": 3.5838017139089886, "learning_rate": 1.2514909849529306e-06, "loss": 0.2329, "step": 27122 }, { "epoch": 0.78, "grad_norm": 7.195333971619021, "learning_rate": 1.2511840969433248e-06, "loss": 0.6433, "step": 27123 }, { "epoch": 0.78, "grad_norm": 2.205724710927619, "learning_rate": 1.2508772411843223e-06, "loss": 0.1634, "step": 27124 }, { "epoch": 0.78, "grad_norm": 6.925167686966106, "learning_rate": 1.2505704176785595e-06, "loss": 0.8923, "step": 27125 }, { "epoch": 0.78, "grad_norm": 2.5195952533014982, "learning_rate": 1.2502636264286795e-06, "loss": 0.2025, "step": 27126 }, { "epoch": 0.78, "grad_norm": 3.5078084559088083, "learning_rate": 1.2499568674373197e-06, "loss": 0.1281, "step": 27127 }, { "epoch": 0.78, "grad_norm": 6.1001439906151225, "learning_rate": 1.2496501407071176e-06, "loss": 0.4555, "step": 27128 }, { "epoch": 0.78, "grad_norm": 3.8530162365885876, "learning_rate": 1.2493434462407156e-06, "loss": 0.2316, "step": 27129 }, { "epoch": 0.78, "grad_norm": 4.111876245055769, "learning_rate": 1.249036784040748e-06, "loss": 0.4519, "step": 27130 }, { "epoch": 0.78, "grad_norm": 6.091932837658945, "learning_rate": 1.2487301541098567e-06, "loss": 0.5703, "step": 27131 }, { "epoch": 0.78, "grad_norm": 2.9653389056116803, "learning_rate": 1.2484235564506769e-06, "loss": 0.1107, "step": 27132 }, { "epoch": 0.78, "grad_norm": 6.27164369925454, "learning_rate": 1.2481169910658486e-06, "loss": 0.5103, "step": 27133 }, { "epoch": 0.78, "grad_norm": 6.081952221307367, "learning_rate": 1.247810457958008e-06, "loss": 0.3409, "step": 27134 }, { "epoch": 0.78, "grad_norm": 3.9719663487936425, "learning_rate": 1.247503957129792e-06, "loss": 0.3493, "step": 27135 }, { "epoch": 0.78, "grad_norm": 6.2170166206892405, "learning_rate": 1.2471974885838362e-06, "loss": 0.5188, "step": 27136 }, { "epoch": 0.78, "grad_norm": 3.703255952861588, "learning_rate": 1.2468910523227778e-06, "loss": 0.4571, "step": 27137 }, { "epoch": 0.78, "grad_norm": 6.392068751128928, "learning_rate": 1.2465846483492555e-06, "loss": 0.3186, "step": 27138 }, { "epoch": 0.78, "grad_norm": 4.976370048694304, "learning_rate": 1.2462782766659015e-06, "loss": 0.289, "step": 27139 }, { "epoch": 0.78, "grad_norm": 3.4969978570023414, "learning_rate": 1.2459719372753553e-06, "loss": 0.1754, "step": 27140 }, { "epoch": 0.78, "grad_norm": 4.661539030886109, "learning_rate": 1.24566563018025e-06, "loss": 0.4878, "step": 27141 }, { "epoch": 0.78, "grad_norm": 4.446268588931584, "learning_rate": 1.245359355383221e-06, "loss": 0.1443, "step": 27142 }, { "epoch": 0.78, "grad_norm": 5.201708626635442, "learning_rate": 1.2450531128869014e-06, "loss": 0.2614, "step": 27143 }, { "epoch": 0.78, "grad_norm": 9.03870794050929, "learning_rate": 1.244746902693928e-06, "loss": 0.7416, "step": 27144 }, { "epoch": 0.78, "grad_norm": 4.061406795425505, "learning_rate": 1.2444407248069361e-06, "loss": 0.5306, "step": 27145 }, { "epoch": 0.78, "grad_norm": 5.393670545856956, "learning_rate": 1.2441345792285569e-06, "loss": 0.2551, "step": 27146 }, { "epoch": 0.78, "grad_norm": 5.749813491450172, "learning_rate": 1.243828465961427e-06, "loss": 0.6032, "step": 27147 }, { "epoch": 0.78, "grad_norm": 4.45448935505251, "learning_rate": 1.243522385008178e-06, "loss": 0.2897, "step": 27148 }, { "epoch": 0.78, "grad_norm": 8.116939067251408, "learning_rate": 1.2432163363714434e-06, "loss": 0.5124, "step": 27149 }, { "epoch": 0.78, "grad_norm": 6.71874141248997, "learning_rate": 1.2429103200538545e-06, "loss": 0.584, "step": 27150 }, { "epoch": 0.78, "grad_norm": 4.739841490481398, "learning_rate": 1.2426043360580476e-06, "loss": 0.3909, "step": 27151 }, { "epoch": 0.78, "grad_norm": 6.7161776919211436, "learning_rate": 1.2422983843866509e-06, "loss": 0.595, "step": 27152 }, { "epoch": 0.78, "grad_norm": 2.770887723307641, "learning_rate": 1.2419924650423004e-06, "loss": 0.2253, "step": 27153 }, { "epoch": 0.78, "grad_norm": 5.7937746214112, "learning_rate": 1.2416865780276255e-06, "loss": 0.538, "step": 27154 }, { "epoch": 0.78, "grad_norm": 9.380372580629958, "learning_rate": 1.2413807233452573e-06, "loss": 0.629, "step": 27155 }, { "epoch": 0.78, "grad_norm": 3.755306431321876, "learning_rate": 1.2410749009978297e-06, "loss": 0.2891, "step": 27156 }, { "epoch": 0.78, "grad_norm": 4.029987906492276, "learning_rate": 1.2407691109879693e-06, "loss": 0.5022, "step": 27157 }, { "epoch": 0.78, "grad_norm": 1.235097040184412, "learning_rate": 1.240463353318312e-06, "loss": 0.1908, "step": 27158 }, { "epoch": 0.78, "grad_norm": 4.852654523084263, "learning_rate": 1.2401576279914833e-06, "loss": 0.4689, "step": 27159 }, { "epoch": 0.78, "grad_norm": 7.237639776010044, "learning_rate": 1.2398519350101178e-06, "loss": 0.7199, "step": 27160 }, { "epoch": 0.78, "grad_norm": 5.522251978529019, "learning_rate": 1.2395462743768427e-06, "loss": 0.757, "step": 27161 }, { "epoch": 0.78, "grad_norm": 6.663079407280756, "learning_rate": 1.2392406460942868e-06, "loss": 0.3176, "step": 27162 }, { "epoch": 0.78, "grad_norm": 3.8046202918036482, "learning_rate": 1.2389350501650816e-06, "loss": 0.1061, "step": 27163 }, { "epoch": 0.78, "grad_norm": 4.575281946426634, "learning_rate": 1.238629486591854e-06, "loss": 0.4446, "step": 27164 }, { "epoch": 0.78, "grad_norm": 9.921086605918662, "learning_rate": 1.2383239553772353e-06, "loss": 0.6967, "step": 27165 }, { "epoch": 0.78, "grad_norm": 5.156674777942896, "learning_rate": 1.2380184565238506e-06, "loss": 0.6989, "step": 27166 }, { "epoch": 0.78, "grad_norm": 6.896695669908169, "learning_rate": 1.237712990034332e-06, "loss": 0.3903, "step": 27167 }, { "epoch": 0.78, "grad_norm": 5.760024351128126, "learning_rate": 1.2374075559113052e-06, "loss": 0.2905, "step": 27168 }, { "epoch": 0.78, "grad_norm": 9.05076499677375, "learning_rate": 1.2371021541573963e-06, "loss": 0.5786, "step": 27169 }, { "epoch": 0.78, "grad_norm": 6.30064684256828, "learning_rate": 1.2367967847752354e-06, "loss": 0.9223, "step": 27170 }, { "epoch": 0.78, "grad_norm": 6.039589388860654, "learning_rate": 1.2364914477674471e-06, "loss": 0.3811, "step": 27171 }, { "epoch": 0.78, "grad_norm": 6.791326428493605, "learning_rate": 1.236186143136661e-06, "loss": 0.3013, "step": 27172 }, { "epoch": 0.78, "grad_norm": 5.764665229122799, "learning_rate": 1.2358808708855013e-06, "loss": 0.4672, "step": 27173 }, { "epoch": 0.78, "grad_norm": 7.435078972177079, "learning_rate": 1.2355756310165956e-06, "loss": 0.5488, "step": 27174 }, { "epoch": 0.78, "grad_norm": 4.495751070407786, "learning_rate": 1.2352704235325697e-06, "loss": 0.2221, "step": 27175 }, { "epoch": 0.78, "grad_norm": 3.9941505637196792, "learning_rate": 1.2349652484360485e-06, "loss": 0.3461, "step": 27176 }, { "epoch": 0.78, "grad_norm": 6.03837887545258, "learning_rate": 1.234660105729657e-06, "loss": 0.6711, "step": 27177 }, { "epoch": 0.78, "grad_norm": 2.8863786592549947, "learning_rate": 1.23435499541602e-06, "loss": 0.31, "step": 27178 }, { "epoch": 0.78, "grad_norm": 5.012690936706643, "learning_rate": 1.2340499174977656e-06, "loss": 0.363, "step": 27179 }, { "epoch": 0.78, "grad_norm": 7.959669017715625, "learning_rate": 1.2337448719775158e-06, "loss": 0.379, "step": 27180 }, { "epoch": 0.78, "grad_norm": 5.145056672372644, "learning_rate": 1.233439858857895e-06, "loss": 0.1608, "step": 27181 }, { "epoch": 0.78, "grad_norm": 4.026100566214392, "learning_rate": 1.2331348781415259e-06, "loss": 0.446, "step": 27182 }, { "epoch": 0.78, "grad_norm": 5.572060159718571, "learning_rate": 1.2328299298310354e-06, "loss": 0.5304, "step": 27183 }, { "epoch": 0.78, "grad_norm": 3.1392279786867663, "learning_rate": 1.2325250139290434e-06, "loss": 0.1223, "step": 27184 }, { "epoch": 0.78, "grad_norm": 4.683627398117899, "learning_rate": 1.2322201304381748e-06, "loss": 0.6135, "step": 27185 }, { "epoch": 0.78, "grad_norm": 4.775132497976363, "learning_rate": 1.2319152793610544e-06, "loss": 0.4845, "step": 27186 }, { "epoch": 0.78, "grad_norm": 5.274514050269627, "learning_rate": 1.2316104607003027e-06, "loss": 0.5012, "step": 27187 }, { "epoch": 0.78, "grad_norm": 5.697828367712575, "learning_rate": 1.2313056744585421e-06, "loss": 0.3944, "step": 27188 }, { "epoch": 0.78, "grad_norm": 8.889116048559377, "learning_rate": 1.2310009206383932e-06, "loss": 0.5335, "step": 27189 }, { "epoch": 0.78, "grad_norm": 4.8726193165935205, "learning_rate": 1.230696199242481e-06, "loss": 0.5296, "step": 27190 }, { "epoch": 0.78, "grad_norm": 3.6710706276174565, "learning_rate": 1.2303915102734244e-06, "loss": 0.3087, "step": 27191 }, { "epoch": 0.78, "grad_norm": 4.1783653663763625, "learning_rate": 1.2300868537338462e-06, "loss": 0.26, "step": 27192 }, { "epoch": 0.78, "grad_norm": 3.996452248341359, "learning_rate": 1.229782229626365e-06, "loss": 0.2778, "step": 27193 }, { "epoch": 0.78, "grad_norm": 6.738578741168477, "learning_rate": 1.229477637953605e-06, "loss": 0.4385, "step": 27194 }, { "epoch": 0.78, "grad_norm": 8.064149554750447, "learning_rate": 1.2291730787181848e-06, "loss": 0.4445, "step": 27195 }, { "epoch": 0.78, "grad_norm": 6.87593185005013, "learning_rate": 1.2288685519227227e-06, "loss": 0.4608, "step": 27196 }, { "epoch": 0.78, "grad_norm": 8.234229475613846, "learning_rate": 1.2285640575698416e-06, "loss": 0.4716, "step": 27197 }, { "epoch": 0.78, "grad_norm": 7.511026224557059, "learning_rate": 1.2282595956621579e-06, "loss": 0.4532, "step": 27198 }, { "epoch": 0.78, "grad_norm": 6.653581756064274, "learning_rate": 1.2279551662022943e-06, "loss": 0.5696, "step": 27199 }, { "epoch": 0.78, "grad_norm": 2.98243177770783, "learning_rate": 1.2276507691928664e-06, "loss": 0.2482, "step": 27200 }, { "epoch": 0.78, "grad_norm": 4.991257080867951, "learning_rate": 1.2273464046364951e-06, "loss": 0.6182, "step": 27201 }, { "epoch": 0.78, "grad_norm": 8.089384234468156, "learning_rate": 1.2270420725357996e-06, "loss": 0.3871, "step": 27202 }, { "epoch": 0.78, "grad_norm": 5.811525201580874, "learning_rate": 1.2267377728933943e-06, "loss": 0.3044, "step": 27203 }, { "epoch": 0.78, "grad_norm": 6.662644838115047, "learning_rate": 1.2264335057119008e-06, "loss": 0.2332, "step": 27204 }, { "epoch": 0.78, "grad_norm": 2.461885396872979, "learning_rate": 1.2261292709939344e-06, "loss": 0.1716, "step": 27205 }, { "epoch": 0.78, "grad_norm": 7.891842648062295, "learning_rate": 1.2258250687421142e-06, "loss": 0.2423, "step": 27206 }, { "epoch": 0.78, "grad_norm": 5.345367008523413, "learning_rate": 1.2255208989590545e-06, "loss": 0.1276, "step": 27207 }, { "epoch": 0.78, "grad_norm": 2.4098810814765366, "learning_rate": 1.225216761647377e-06, "loss": 0.07, "step": 27208 }, { "epoch": 0.78, "grad_norm": 6.440311854790634, "learning_rate": 1.224912656809692e-06, "loss": 0.468, "step": 27209 }, { "epoch": 0.78, "grad_norm": 2.563057745735105, "learning_rate": 1.2246085844486188e-06, "loss": 0.1706, "step": 27210 }, { "epoch": 0.78, "grad_norm": 9.046989110777131, "learning_rate": 1.2243045445667746e-06, "loss": 0.8185, "step": 27211 }, { "epoch": 0.78, "grad_norm": 6.816219084610289, "learning_rate": 1.2240005371667719e-06, "loss": 0.3436, "step": 27212 }, { "epoch": 0.78, "grad_norm": 6.026215542257783, "learning_rate": 1.2236965622512292e-06, "loss": 0.2834, "step": 27213 }, { "epoch": 0.78, "grad_norm": 4.365891238636383, "learning_rate": 1.2233926198227598e-06, "loss": 0.4312, "step": 27214 }, { "epoch": 0.78, "grad_norm": 6.926839697050102, "learning_rate": 1.2230887098839788e-06, "loss": 0.6665, "step": 27215 }, { "epoch": 0.78, "grad_norm": 2.229542341098444, "learning_rate": 1.222784832437499e-06, "loss": 0.2381, "step": 27216 }, { "epoch": 0.78, "grad_norm": 5.078342726514839, "learning_rate": 1.2224809874859383e-06, "loss": 0.5279, "step": 27217 }, { "epoch": 0.78, "grad_norm": 6.728703918922859, "learning_rate": 1.2221771750319067e-06, "loss": 0.4181, "step": 27218 }, { "epoch": 0.78, "grad_norm": 6.330270850788459, "learning_rate": 1.2218733950780198e-06, "loss": 0.3697, "step": 27219 }, { "epoch": 0.78, "grad_norm": 5.484379173342429, "learning_rate": 1.2215696476268922e-06, "loss": 0.437, "step": 27220 }, { "epoch": 0.78, "grad_norm": 5.027351906983961, "learning_rate": 1.2212659326811356e-06, "loss": 0.6365, "step": 27221 }, { "epoch": 0.78, "grad_norm": 6.980610253127802, "learning_rate": 1.2209622502433633e-06, "loss": 0.5133, "step": 27222 }, { "epoch": 0.78, "grad_norm": 6.331727271811254, "learning_rate": 1.2206586003161857e-06, "loss": 0.7042, "step": 27223 }, { "epoch": 0.78, "grad_norm": 5.420960296954124, "learning_rate": 1.2203549829022182e-06, "loss": 0.7673, "step": 27224 }, { "epoch": 0.78, "grad_norm": 5.69724591460691, "learning_rate": 1.2200513980040707e-06, "loss": 0.4452, "step": 27225 }, { "epoch": 0.78, "grad_norm": 10.831910279537727, "learning_rate": 1.219747845624355e-06, "loss": 0.6506, "step": 27226 }, { "epoch": 0.78, "grad_norm": 5.031035661574409, "learning_rate": 1.2194443257656846e-06, "loss": 0.3446, "step": 27227 }, { "epoch": 0.78, "grad_norm": 5.608694348706363, "learning_rate": 1.219140838430669e-06, "loss": 0.3262, "step": 27228 }, { "epoch": 0.78, "grad_norm": 4.043385831539767, "learning_rate": 1.2188373836219198e-06, "loss": 0.3462, "step": 27229 }, { "epoch": 0.78, "grad_norm": 5.7873512034722365, "learning_rate": 1.2185339613420456e-06, "loss": 0.6538, "step": 27230 }, { "epoch": 0.78, "grad_norm": 6.387049787149033, "learning_rate": 1.2182305715936592e-06, "loss": 0.2083, "step": 27231 }, { "epoch": 0.78, "grad_norm": 4.926329906621861, "learning_rate": 1.2179272143793685e-06, "loss": 0.4715, "step": 27232 }, { "epoch": 0.78, "grad_norm": 4.1370885753928155, "learning_rate": 1.2176238897017856e-06, "loss": 0.5404, "step": 27233 }, { "epoch": 0.78, "grad_norm": 3.5747938850313394, "learning_rate": 1.217320597563517e-06, "loss": 0.1455, "step": 27234 }, { "epoch": 0.78, "grad_norm": 6.386135510986199, "learning_rate": 1.217017337967175e-06, "loss": 0.6629, "step": 27235 }, { "epoch": 0.78, "grad_norm": 1.867961376020873, "learning_rate": 1.2167141109153674e-06, "loss": 0.1694, "step": 27236 }, { "epoch": 0.78, "grad_norm": 11.974267320378251, "learning_rate": 1.2164109164107008e-06, "loss": 0.5203, "step": 27237 }, { "epoch": 0.78, "grad_norm": 6.03981635161859, "learning_rate": 1.216107754455787e-06, "loss": 0.6541, "step": 27238 }, { "epoch": 0.78, "grad_norm": 9.003256420591761, "learning_rate": 1.2158046250532307e-06, "loss": 0.5619, "step": 27239 }, { "epoch": 0.78, "grad_norm": 1.1626392278564566, "learning_rate": 1.2155015282056425e-06, "loss": 0.0305, "step": 27240 }, { "epoch": 0.78, "grad_norm": 5.658433355796655, "learning_rate": 1.215198463915629e-06, "loss": 0.3247, "step": 27241 }, { "epoch": 0.78, "grad_norm": 6.21716027571438, "learning_rate": 1.214895432185797e-06, "loss": 0.5353, "step": 27242 }, { "epoch": 0.78, "grad_norm": 4.768586973650909, "learning_rate": 1.214592433018752e-06, "loss": 0.2669, "step": 27243 }, { "epoch": 0.78, "grad_norm": 2.480902734472762, "learning_rate": 1.2142894664171024e-06, "loss": 0.122, "step": 27244 }, { "epoch": 0.78, "grad_norm": 2.67768685085158, "learning_rate": 1.2139865323834554e-06, "loss": 0.1879, "step": 27245 }, { "epoch": 0.78, "grad_norm": 4.9544968014063, "learning_rate": 1.2136836309204153e-06, "loss": 0.8982, "step": 27246 }, { "epoch": 0.78, "grad_norm": 3.519804221013554, "learning_rate": 1.21338076203059e-06, "loss": 0.1748, "step": 27247 }, { "epoch": 0.78, "grad_norm": 5.548633049688254, "learning_rate": 1.2130779257165837e-06, "loss": 0.4107, "step": 27248 }, { "epoch": 0.78, "grad_norm": 5.34506225089354, "learning_rate": 1.212775121981002e-06, "loss": 0.3208, "step": 27249 }, { "epoch": 0.78, "grad_norm": 1.976243245875129, "learning_rate": 1.2124723508264474e-06, "loss": 0.4077, "step": 27250 }, { "epoch": 0.78, "grad_norm": 7.884254406865944, "learning_rate": 1.2121696122555277e-06, "loss": 0.5086, "step": 27251 }, { "epoch": 0.78, "grad_norm": 4.995798205118808, "learning_rate": 1.2118669062708477e-06, "loss": 0.2955, "step": 27252 }, { "epoch": 0.78, "grad_norm": 4.007560320499572, "learning_rate": 1.2115642328750081e-06, "loss": 0.5701, "step": 27253 }, { "epoch": 0.78, "grad_norm": 6.121701266991285, "learning_rate": 1.2112615920706173e-06, "loss": 0.6123, "step": 27254 }, { "epoch": 0.78, "grad_norm": 3.0608211703586323, "learning_rate": 1.2109589838602764e-06, "loss": 0.1687, "step": 27255 }, { "epoch": 0.78, "grad_norm": 5.048184771101867, "learning_rate": 1.2106564082465888e-06, "loss": 0.2647, "step": 27256 }, { "epoch": 0.78, "grad_norm": 9.637589506928528, "learning_rate": 1.2103538652321562e-06, "loss": 0.7127, "step": 27257 }, { "epoch": 0.78, "grad_norm": 7.384415386927353, "learning_rate": 1.2100513548195836e-06, "loss": 0.5414, "step": 27258 }, { "epoch": 0.78, "grad_norm": 6.204232508572799, "learning_rate": 1.2097488770114718e-06, "loss": 0.5388, "step": 27259 }, { "epoch": 0.78, "grad_norm": 6.083790626931041, "learning_rate": 1.2094464318104231e-06, "loss": 0.5783, "step": 27260 }, { "epoch": 0.78, "grad_norm": 4.972463647509105, "learning_rate": 1.2091440192190424e-06, "loss": 0.5502, "step": 27261 }, { "epoch": 0.78, "grad_norm": 9.534623721519157, "learning_rate": 1.2088416392399282e-06, "loss": 0.6536, "step": 27262 }, { "epoch": 0.78, "grad_norm": 5.021129404205234, "learning_rate": 1.2085392918756828e-06, "loss": 0.2785, "step": 27263 }, { "epoch": 0.78, "grad_norm": 3.5678965201019377, "learning_rate": 1.2082369771289054e-06, "loss": 0.2202, "step": 27264 }, { "epoch": 0.78, "grad_norm": 8.833281450898975, "learning_rate": 1.2079346950022002e-06, "loss": 0.1715, "step": 27265 }, { "epoch": 0.78, "grad_norm": 2.563511834764775, "learning_rate": 1.2076324454981642e-06, "loss": 0.237, "step": 27266 }, { "epoch": 0.78, "grad_norm": 6.754845893056364, "learning_rate": 1.2073302286193994e-06, "loss": 0.5414, "step": 27267 }, { "epoch": 0.78, "grad_norm": 5.343346072987112, "learning_rate": 1.207028044368509e-06, "loss": 0.2828, "step": 27268 }, { "epoch": 0.78, "grad_norm": 8.307328587433057, "learning_rate": 1.2067258927480863e-06, "loss": 0.3252, "step": 27269 }, { "epoch": 0.78, "grad_norm": 2.433521973851075, "learning_rate": 1.2064237737607348e-06, "loss": 0.123, "step": 27270 }, { "epoch": 0.78, "grad_norm": 6.9597184711825895, "learning_rate": 1.2061216874090515e-06, "loss": 0.5838, "step": 27271 }, { "epoch": 0.78, "grad_norm": 2.433748267887794, "learning_rate": 1.2058196336956367e-06, "loss": 0.2071, "step": 27272 }, { "epoch": 0.78, "grad_norm": 1.9329649548242396, "learning_rate": 1.2055176126230877e-06, "loss": 0.1689, "step": 27273 }, { "epoch": 0.78, "grad_norm": 6.734630748025897, "learning_rate": 1.205215624194005e-06, "loss": 0.6585, "step": 27274 }, { "epoch": 0.78, "grad_norm": 5.430263022240028, "learning_rate": 1.2049136684109852e-06, "loss": 0.5104, "step": 27275 }, { "epoch": 0.78, "grad_norm": 4.574819706141896, "learning_rate": 1.2046117452766242e-06, "loss": 0.1652, "step": 27276 }, { "epoch": 0.78, "grad_norm": 8.533840750221103, "learning_rate": 1.2043098547935223e-06, "loss": 0.5972, "step": 27277 }, { "epoch": 0.78, "grad_norm": 5.332499970455322, "learning_rate": 1.2040079969642742e-06, "loss": 0.2606, "step": 27278 }, { "epoch": 0.78, "grad_norm": 5.894193494257402, "learning_rate": 1.2037061717914795e-06, "loss": 0.3598, "step": 27279 }, { "epoch": 0.78, "grad_norm": 3.2732460311939007, "learning_rate": 1.2034043792777312e-06, "loss": 0.3504, "step": 27280 }, { "epoch": 0.78, "grad_norm": 4.491066117677634, "learning_rate": 1.2031026194256297e-06, "loss": 0.2537, "step": 27281 }, { "epoch": 0.78, "grad_norm": 4.073048234198323, "learning_rate": 1.2028008922377683e-06, "loss": 0.3755, "step": 27282 }, { "epoch": 0.78, "grad_norm": 6.71840265839995, "learning_rate": 1.2024991977167433e-06, "loss": 0.272, "step": 27283 }, { "epoch": 0.78, "grad_norm": 4.541332681723783, "learning_rate": 1.2021975358651488e-06, "loss": 0.2409, "step": 27284 }, { "epoch": 0.78, "grad_norm": 4.326409368260739, "learning_rate": 1.2018959066855818e-06, "loss": 0.3024, "step": 27285 }, { "epoch": 0.78, "grad_norm": 11.375175139367919, "learning_rate": 1.2015943101806376e-06, "loss": 0.5795, "step": 27286 }, { "epoch": 0.78, "grad_norm": 12.559203137552851, "learning_rate": 1.2012927463529084e-06, "loss": 1.0339, "step": 27287 }, { "epoch": 0.78, "grad_norm": 9.704470481282456, "learning_rate": 1.200991215204992e-06, "loss": 0.5804, "step": 27288 }, { "epoch": 0.78, "grad_norm": 4.990416641042127, "learning_rate": 1.2006897167394798e-06, "loss": 0.3686, "step": 27289 }, { "epoch": 0.78, "grad_norm": 9.031202481154724, "learning_rate": 1.2003882509589666e-06, "loss": 0.6709, "step": 27290 }, { "epoch": 0.78, "grad_norm": 6.917834030209539, "learning_rate": 1.2000868178660434e-06, "loss": 0.7073, "step": 27291 }, { "epoch": 0.78, "grad_norm": 6.295205133866546, "learning_rate": 1.1997854174633061e-06, "loss": 0.5655, "step": 27292 }, { "epoch": 0.78, "grad_norm": 4.5223813314563035, "learning_rate": 1.1994840497533483e-06, "loss": 0.3972, "step": 27293 }, { "epoch": 0.78, "grad_norm": 4.069049778455449, "learning_rate": 1.1991827147387598e-06, "loss": 0.1601, "step": 27294 }, { "epoch": 0.78, "grad_norm": 11.91488306440376, "learning_rate": 1.198881412422137e-06, "loss": 0.2895, "step": 27295 }, { "epoch": 0.78, "grad_norm": 4.63112275055601, "learning_rate": 1.1985801428060673e-06, "loss": 0.3873, "step": 27296 }, { "epoch": 0.78, "grad_norm": 5.708972681629318, "learning_rate": 1.1982789058931455e-06, "loss": 0.4016, "step": 27297 }, { "epoch": 0.78, "grad_norm": 4.474049233818383, "learning_rate": 1.1979777016859606e-06, "loss": 0.2236, "step": 27298 }, { "epoch": 0.78, "grad_norm": 6.7391275510206645, "learning_rate": 1.1976765301871068e-06, "loss": 0.4146, "step": 27299 }, { "epoch": 0.78, "grad_norm": 3.2435131258462992, "learning_rate": 1.1973753913991725e-06, "loss": 0.1629, "step": 27300 }, { "epoch": 0.78, "grad_norm": 8.892048083284749, "learning_rate": 1.1970742853247509e-06, "loss": 0.6191, "step": 27301 }, { "epoch": 0.78, "grad_norm": 4.485743297278295, "learning_rate": 1.196773211966431e-06, "loss": 0.3853, "step": 27302 }, { "epoch": 0.78, "grad_norm": 4.493960062351182, "learning_rate": 1.1964721713268013e-06, "loss": 0.5122, "step": 27303 }, { "epoch": 0.78, "grad_norm": 5.477263575804903, "learning_rate": 1.196171163408455e-06, "loss": 0.2656, "step": 27304 }, { "epoch": 0.78, "grad_norm": 7.538102099661631, "learning_rate": 1.1958701882139778e-06, "loss": 0.4959, "step": 27305 }, { "epoch": 0.78, "grad_norm": 13.901484926754867, "learning_rate": 1.1955692457459632e-06, "loss": 0.7448, "step": 27306 }, { "epoch": 0.78, "grad_norm": 13.517393175129502, "learning_rate": 1.195268336006996e-06, "loss": 0.6379, "step": 27307 }, { "epoch": 0.78, "grad_norm": 6.576267659798603, "learning_rate": 1.1949674589996684e-06, "loss": 0.5568, "step": 27308 }, { "epoch": 0.78, "grad_norm": 8.778465834207235, "learning_rate": 1.1946666147265673e-06, "loss": 0.4883, "step": 27309 }, { "epoch": 0.78, "grad_norm": 13.401316964496589, "learning_rate": 1.1943658031902795e-06, "loss": 0.5592, "step": 27310 }, { "epoch": 0.78, "grad_norm": 6.037437704013698, "learning_rate": 1.1940650243933955e-06, "loss": 0.4119, "step": 27311 }, { "epoch": 0.78, "grad_norm": 1.8433311762575815, "learning_rate": 1.1937642783384995e-06, "loss": 0.0995, "step": 27312 }, { "epoch": 0.78, "grad_norm": 10.051390871432538, "learning_rate": 1.1934635650281829e-06, "loss": 0.6262, "step": 27313 }, { "epoch": 0.78, "grad_norm": 6.055260612681662, "learning_rate": 1.1931628844650288e-06, "loss": 0.302, "step": 27314 }, { "epoch": 0.78, "grad_norm": 1.883634379964079, "learning_rate": 1.1928622366516274e-06, "loss": 0.0655, "step": 27315 }, { "epoch": 0.78, "grad_norm": 2.336503486576822, "learning_rate": 1.1925616215905633e-06, "loss": 0.0822, "step": 27316 }, { "epoch": 0.78, "grad_norm": 3.358611831918062, "learning_rate": 1.1922610392844214e-06, "loss": 0.3157, "step": 27317 }, { "epoch": 0.78, "grad_norm": 7.332208679035963, "learning_rate": 1.1919604897357901e-06, "loss": 0.3499, "step": 27318 }, { "epoch": 0.78, "grad_norm": 4.824600711172617, "learning_rate": 1.1916599729472528e-06, "loss": 0.3309, "step": 27319 }, { "epoch": 0.78, "grad_norm": 5.187855811727887, "learning_rate": 1.1913594889213969e-06, "loss": 0.3708, "step": 27320 }, { "epoch": 0.78, "grad_norm": 7.692837978571418, "learning_rate": 1.1910590376608056e-06, "loss": 0.803, "step": 27321 }, { "epoch": 0.78, "grad_norm": 6.602930177846609, "learning_rate": 1.1907586191680653e-06, "loss": 0.5995, "step": 27322 }, { "epoch": 0.78, "grad_norm": 5.992791574360754, "learning_rate": 1.1904582334457599e-06, "loss": 0.1662, "step": 27323 }, { "epoch": 0.78, "grad_norm": 7.012725502781067, "learning_rate": 1.190157880496473e-06, "loss": 0.6851, "step": 27324 }, { "epoch": 0.78, "grad_norm": 3.618079024125073, "learning_rate": 1.1898575603227873e-06, "loss": 0.0646, "step": 27325 }, { "epoch": 0.78, "grad_norm": 7.913658199546097, "learning_rate": 1.189557272927288e-06, "loss": 0.4107, "step": 27326 }, { "epoch": 0.78, "grad_norm": 7.106321815255335, "learning_rate": 1.1892570183125602e-06, "loss": 0.4652, "step": 27327 }, { "epoch": 0.78, "grad_norm": 4.86579573863988, "learning_rate": 1.1889567964811843e-06, "loss": 0.2473, "step": 27328 }, { "epoch": 0.78, "grad_norm": 7.133203234629982, "learning_rate": 1.1886566074357442e-06, "loss": 0.4758, "step": 27329 }, { "epoch": 0.78, "grad_norm": 5.830271716713591, "learning_rate": 1.188356451178821e-06, "loss": 0.5916, "step": 27330 }, { "epoch": 0.78, "grad_norm": 3.9781175432037927, "learning_rate": 1.188056327712999e-06, "loss": 0.1548, "step": 27331 }, { "epoch": 0.78, "grad_norm": 5.434108070806792, "learning_rate": 1.1877562370408573e-06, "loss": 0.3725, "step": 27332 }, { "epoch": 0.78, "grad_norm": 4.442411364356476, "learning_rate": 1.1874561791649796e-06, "loss": 0.7205, "step": 27333 }, { "epoch": 0.78, "grad_norm": 3.4875299172160763, "learning_rate": 1.187156154087949e-06, "loss": 0.2217, "step": 27334 }, { "epoch": 0.78, "grad_norm": 7.037931531342726, "learning_rate": 1.1868561618123441e-06, "loss": 0.5062, "step": 27335 }, { "epoch": 0.78, "grad_norm": 4.645656850339461, "learning_rate": 1.1865562023407463e-06, "loss": 0.2332, "step": 27336 }, { "epoch": 0.78, "grad_norm": 3.258681110609429, "learning_rate": 1.186256275675734e-06, "loss": 0.2473, "step": 27337 }, { "epoch": 0.78, "grad_norm": 7.761653752047614, "learning_rate": 1.185956381819891e-06, "loss": 0.6993, "step": 27338 }, { "epoch": 0.78, "grad_norm": 4.768283140195433, "learning_rate": 1.1856565207757947e-06, "loss": 0.3828, "step": 27339 }, { "epoch": 0.78, "grad_norm": 4.9986783188156405, "learning_rate": 1.185356692546027e-06, "loss": 0.2303, "step": 27340 }, { "epoch": 0.78, "grad_norm": 8.449376373302192, "learning_rate": 1.1850568971331639e-06, "loss": 0.5878, "step": 27341 }, { "epoch": 0.78, "grad_norm": 5.235734267745904, "learning_rate": 1.1847571345397891e-06, "loss": 0.3326, "step": 27342 }, { "epoch": 0.78, "grad_norm": 5.109797935596569, "learning_rate": 1.1844574047684782e-06, "loss": 0.4133, "step": 27343 }, { "epoch": 0.78, "grad_norm": 5.943806481318661, "learning_rate": 1.1841577078218092e-06, "loss": 0.5628, "step": 27344 }, { "epoch": 0.78, "grad_norm": 3.4900323588393194, "learning_rate": 1.1838580437023629e-06, "loss": 0.149, "step": 27345 }, { "epoch": 0.78, "grad_norm": 5.004815167225785, "learning_rate": 1.1835584124127148e-06, "loss": 0.2483, "step": 27346 }, { "epoch": 0.78, "grad_norm": 6.7572519411656335, "learning_rate": 1.183258813955445e-06, "loss": 0.2838, "step": 27347 }, { "epoch": 0.78, "grad_norm": 4.853857606318355, "learning_rate": 1.1829592483331287e-06, "loss": 0.3684, "step": 27348 }, { "epoch": 0.78, "grad_norm": 6.021924908782066, "learning_rate": 1.182659715548345e-06, "loss": 0.3938, "step": 27349 }, { "epoch": 0.78, "grad_norm": 2.8232625184041287, "learning_rate": 1.18236021560367e-06, "loss": 0.2542, "step": 27350 }, { "epoch": 0.78, "grad_norm": 5.27124574678723, "learning_rate": 1.1820607485016783e-06, "loss": 0.906, "step": 27351 }, { "epoch": 0.78, "grad_norm": 6.237635799450976, "learning_rate": 1.1817613142449498e-06, "loss": 0.2226, "step": 27352 }, { "epoch": 0.78, "grad_norm": 6.0776846365772395, "learning_rate": 1.1814619128360566e-06, "loss": 0.2788, "step": 27353 }, { "epoch": 0.78, "grad_norm": 4.007589352587891, "learning_rate": 1.181162544277578e-06, "loss": 0.2285, "step": 27354 }, { "epoch": 0.78, "grad_norm": 10.592819488079236, "learning_rate": 1.1808632085720883e-06, "loss": 0.5631, "step": 27355 }, { "epoch": 0.78, "grad_norm": 4.113810361212437, "learning_rate": 1.1805639057221619e-06, "loss": 0.3723, "step": 27356 }, { "epoch": 0.78, "grad_norm": 5.9580190971403955, "learning_rate": 1.1802646357303727e-06, "loss": 0.5085, "step": 27357 }, { "epoch": 0.78, "grad_norm": 5.190500104252467, "learning_rate": 1.179965398599296e-06, "loss": 0.1193, "step": 27358 }, { "epoch": 0.78, "grad_norm": 4.198542113681348, "learning_rate": 1.1796661943315085e-06, "loss": 0.3313, "step": 27359 }, { "epoch": 0.78, "grad_norm": 5.558262507527405, "learning_rate": 1.179367022929581e-06, "loss": 0.3723, "step": 27360 }, { "epoch": 0.78, "grad_norm": 3.052553919597918, "learning_rate": 1.17906788439609e-06, "loss": 0.2683, "step": 27361 }, { "epoch": 0.78, "grad_norm": 4.564471981350787, "learning_rate": 1.1787687787336073e-06, "loss": 0.426, "step": 27362 }, { "epoch": 0.78, "grad_norm": 6.370027098485776, "learning_rate": 1.1784697059447064e-06, "loss": 0.2377, "step": 27363 }, { "epoch": 0.78, "grad_norm": 3.3044515755490993, "learning_rate": 1.1781706660319587e-06, "loss": 0.4996, "step": 27364 }, { "epoch": 0.78, "grad_norm": 6.229110215695711, "learning_rate": 1.17787165899794e-06, "loss": 0.4543, "step": 27365 }, { "epoch": 0.78, "grad_norm": 5.268827217414089, "learning_rate": 1.177572684845219e-06, "loss": 0.4091, "step": 27366 }, { "epoch": 0.78, "grad_norm": 8.393645639686833, "learning_rate": 1.1772737435763692e-06, "loss": 0.4925, "step": 27367 }, { "epoch": 0.78, "grad_norm": 2.930764857245974, "learning_rate": 1.1769748351939646e-06, "loss": 0.1382, "step": 27368 }, { "epoch": 0.78, "grad_norm": 10.181072758583154, "learning_rate": 1.176675959700575e-06, "loss": 0.2749, "step": 27369 }, { "epoch": 0.78, "grad_norm": 1.5269244367237351, "learning_rate": 1.1763771170987708e-06, "loss": 0.0699, "step": 27370 }, { "epoch": 0.78, "grad_norm": 6.401871389730459, "learning_rate": 1.1760783073911226e-06, "loss": 0.653, "step": 27371 }, { "epoch": 0.78, "grad_norm": 4.136843296730061, "learning_rate": 1.1757795305802033e-06, "loss": 0.7064, "step": 27372 }, { "epoch": 0.78, "grad_norm": 5.199314956290825, "learning_rate": 1.1754807866685802e-06, "loss": 0.3887, "step": 27373 }, { "epoch": 0.78, "grad_norm": 4.56249519243379, "learning_rate": 1.175182075658825e-06, "loss": 0.2088, "step": 27374 }, { "epoch": 0.78, "grad_norm": 3.7896022294244296, "learning_rate": 1.174883397553509e-06, "loss": 0.317, "step": 27375 }, { "epoch": 0.78, "grad_norm": 13.463827981862027, "learning_rate": 1.1745847523552e-06, "loss": 0.6873, "step": 27376 }, { "epoch": 0.78, "grad_norm": 9.529869905206276, "learning_rate": 1.1742861400664674e-06, "loss": 0.5381, "step": 27377 }, { "epoch": 0.78, "grad_norm": 6.244144786920434, "learning_rate": 1.1739875606898788e-06, "loss": 0.4596, "step": 27378 }, { "epoch": 0.78, "grad_norm": 5.040601391864129, "learning_rate": 1.1736890142280054e-06, "loss": 0.634, "step": 27379 }, { "epoch": 0.78, "grad_norm": 3.790543257043436, "learning_rate": 1.1733905006834134e-06, "loss": 0.6151, "step": 27380 }, { "epoch": 0.78, "grad_norm": 2.7124050686304946, "learning_rate": 1.1730920200586726e-06, "loss": 0.3332, "step": 27381 }, { "epoch": 0.78, "grad_norm": 3.2464785204373197, "learning_rate": 1.1727935723563489e-06, "loss": 0.1867, "step": 27382 }, { "epoch": 0.78, "grad_norm": 4.568640195933433, "learning_rate": 1.1724951575790121e-06, "loss": 0.3454, "step": 27383 }, { "epoch": 0.78, "grad_norm": 4.781782108192903, "learning_rate": 1.172196775729228e-06, "loss": 0.5421, "step": 27384 }, { "epoch": 0.78, "grad_norm": 6.270198418125796, "learning_rate": 1.171898426809563e-06, "loss": 0.3462, "step": 27385 }, { "epoch": 0.78, "grad_norm": 7.545447994555234, "learning_rate": 1.1716001108225856e-06, "loss": 0.5603, "step": 27386 }, { "epoch": 0.78, "grad_norm": 21.38109820640137, "learning_rate": 1.1713018277708594e-06, "loss": 0.4944, "step": 27387 }, { "epoch": 0.78, "grad_norm": 8.41790442917893, "learning_rate": 1.171003577656954e-06, "loss": 0.3362, "step": 27388 }, { "epoch": 0.78, "grad_norm": 4.265985829948145, "learning_rate": 1.1707053604834329e-06, "loss": 0.4667, "step": 27389 }, { "epoch": 0.78, "grad_norm": 4.674088033677943, "learning_rate": 1.1704071762528625e-06, "loss": 0.2511, "step": 27390 }, { "epoch": 0.78, "grad_norm": 16.55547048275202, "learning_rate": 1.170109024967806e-06, "loss": 0.4546, "step": 27391 }, { "epoch": 0.78, "grad_norm": 4.756353345592853, "learning_rate": 1.1698109066308299e-06, "loss": 0.5886, "step": 27392 }, { "epoch": 0.78, "grad_norm": 8.021940661861185, "learning_rate": 1.1695128212445005e-06, "loss": 0.4021, "step": 27393 }, { "epoch": 0.78, "grad_norm": 4.266224494140277, "learning_rate": 1.1692147688113798e-06, "loss": 0.3726, "step": 27394 }, { "epoch": 0.78, "grad_norm": 6.8352958334447225, "learning_rate": 1.1689167493340336e-06, "loss": 0.6007, "step": 27395 }, { "epoch": 0.78, "grad_norm": 6.316577161223585, "learning_rate": 1.1686187628150253e-06, "loss": 0.4344, "step": 27396 }, { "epoch": 0.78, "grad_norm": 4.169177722864145, "learning_rate": 1.1683208092569177e-06, "loss": 0.323, "step": 27397 }, { "epoch": 0.78, "grad_norm": 5.74079867844189, "learning_rate": 1.168022888662273e-06, "loss": 0.4003, "step": 27398 }, { "epoch": 0.78, "grad_norm": 4.527526272508465, "learning_rate": 1.167725001033656e-06, "loss": 0.4163, "step": 27399 }, { "epoch": 0.78, "grad_norm": 3.4711840569534664, "learning_rate": 1.1674271463736297e-06, "loss": 0.2515, "step": 27400 }, { "epoch": 0.78, "grad_norm": 4.904630575976874, "learning_rate": 1.1671293246847549e-06, "loss": 0.2968, "step": 27401 }, { "epoch": 0.78, "grad_norm": 5.0485887954034485, "learning_rate": 1.1668315359695958e-06, "loss": 0.6165, "step": 27402 }, { "epoch": 0.78, "grad_norm": 5.162386161897616, "learning_rate": 1.166533780230713e-06, "loss": 0.4958, "step": 27403 }, { "epoch": 0.78, "grad_norm": 4.387318407957778, "learning_rate": 1.1662360574706682e-06, "loss": 0.4442, "step": 27404 }, { "epoch": 0.78, "grad_norm": 3.495931201757621, "learning_rate": 1.1659383676920216e-06, "loss": 0.1704, "step": 27405 }, { "epoch": 0.78, "grad_norm": 3.833772502841834, "learning_rate": 1.165640710897336e-06, "loss": 0.6017, "step": 27406 }, { "epoch": 0.78, "grad_norm": 5.683653526385076, "learning_rate": 1.1653430870891701e-06, "loss": 0.7434, "step": 27407 }, { "epoch": 0.78, "grad_norm": 6.243712123263374, "learning_rate": 1.1650454962700853e-06, "loss": 0.9714, "step": 27408 }, { "epoch": 0.78, "grad_norm": 9.938437711511005, "learning_rate": 1.1647479384426436e-06, "loss": 0.5124, "step": 27409 }, { "epoch": 0.78, "grad_norm": 9.733782117983408, "learning_rate": 1.1644504136094032e-06, "loss": 0.651, "step": 27410 }, { "epoch": 0.78, "grad_norm": 5.923465484979684, "learning_rate": 1.164152921772923e-06, "loss": 0.1846, "step": 27411 }, { "epoch": 0.79, "grad_norm": 6.7189992459219114, "learning_rate": 1.1638554629357624e-06, "loss": 0.8211, "step": 27412 }, { "epoch": 0.79, "grad_norm": 5.70000669328397, "learning_rate": 1.1635580371004817e-06, "loss": 0.3005, "step": 27413 }, { "epoch": 0.79, "grad_norm": 5.681100871496789, "learning_rate": 1.1632606442696376e-06, "loss": 0.8574, "step": 27414 }, { "epoch": 0.79, "grad_norm": 4.351322270171001, "learning_rate": 1.162963284445791e-06, "loss": 0.4616, "step": 27415 }, { "epoch": 0.79, "grad_norm": 4.861484768751139, "learning_rate": 1.1626659576314997e-06, "loss": 0.4903, "step": 27416 }, { "epoch": 0.79, "grad_norm": 9.166676544415324, "learning_rate": 1.1623686638293185e-06, "loss": 0.5801, "step": 27417 }, { "epoch": 0.79, "grad_norm": 3.2942285627987853, "learning_rate": 1.162071403041809e-06, "loss": 0.1738, "step": 27418 }, { "epoch": 0.79, "grad_norm": 8.646203446030597, "learning_rate": 1.161774175271525e-06, "loss": 0.5135, "step": 27419 }, { "epoch": 0.79, "grad_norm": 5.20265023754696, "learning_rate": 1.1614769805210262e-06, "loss": 0.4138, "step": 27420 }, { "epoch": 0.79, "grad_norm": 5.04105828557961, "learning_rate": 1.1611798187928674e-06, "loss": 0.4256, "step": 27421 }, { "epoch": 0.79, "grad_norm": 2.520276135479692, "learning_rate": 1.1608826900896075e-06, "loss": 0.492, "step": 27422 }, { "epoch": 0.79, "grad_norm": 7.786749799667827, "learning_rate": 1.1605855944138012e-06, "loss": 0.4549, "step": 27423 }, { "epoch": 0.79, "grad_norm": 8.306297339999832, "learning_rate": 1.1602885317680024e-06, "loss": 0.1745, "step": 27424 }, { "epoch": 0.79, "grad_norm": 3.536055004883068, "learning_rate": 1.1599915021547703e-06, "loss": 0.2756, "step": 27425 }, { "epoch": 0.79, "grad_norm": 6.042195167664881, "learning_rate": 1.1596945055766572e-06, "loss": 0.709, "step": 27426 }, { "epoch": 0.79, "grad_norm": 4.660168221674951, "learning_rate": 1.1593975420362209e-06, "loss": 0.4053, "step": 27427 }, { "epoch": 0.79, "grad_norm": 6.147849455926783, "learning_rate": 1.159100611536013e-06, "loss": 0.5146, "step": 27428 }, { "epoch": 0.79, "grad_norm": 5.695992511607724, "learning_rate": 1.1588037140785913e-06, "loss": 0.4712, "step": 27429 }, { "epoch": 0.79, "grad_norm": 6.616230072331647, "learning_rate": 1.158506849666508e-06, "loss": 0.4907, "step": 27430 }, { "epoch": 0.79, "grad_norm": 3.112215594430997, "learning_rate": 1.1582100183023176e-06, "loss": 0.2477, "step": 27431 }, { "epoch": 0.79, "grad_norm": 6.743568959603456, "learning_rate": 1.1579132199885717e-06, "loss": 0.3447, "step": 27432 }, { "epoch": 0.79, "grad_norm": 7.041649075397737, "learning_rate": 1.1576164547278257e-06, "loss": 0.2082, "step": 27433 }, { "epoch": 0.79, "grad_norm": 9.025068127488598, "learning_rate": 1.1573197225226334e-06, "loss": 0.5193, "step": 27434 }, { "epoch": 0.79, "grad_norm": 5.145468149341817, "learning_rate": 1.1570230233755446e-06, "loss": 0.6628, "step": 27435 }, { "epoch": 0.79, "grad_norm": 4.305694905285455, "learning_rate": 1.1567263572891157e-06, "loss": 0.9822, "step": 27436 }, { "epoch": 0.79, "grad_norm": 4.544533667482596, "learning_rate": 1.1564297242658967e-06, "loss": 0.3181, "step": 27437 }, { "epoch": 0.79, "grad_norm": 4.063581938822318, "learning_rate": 1.156133124308439e-06, "loss": 0.5629, "step": 27438 }, { "epoch": 0.79, "grad_norm": 2.3638942932223896, "learning_rate": 1.1558365574192938e-06, "loss": 0.1857, "step": 27439 }, { "epoch": 0.79, "grad_norm": 4.1389333616266875, "learning_rate": 1.1555400236010133e-06, "loss": 0.2059, "step": 27440 }, { "epoch": 0.79, "grad_norm": 16.31217400820698, "learning_rate": 1.1552435228561499e-06, "loss": 0.8091, "step": 27441 }, { "epoch": 0.79, "grad_norm": 8.277730863411199, "learning_rate": 1.1549470551872515e-06, "loss": 0.6808, "step": 27442 }, { "epoch": 0.79, "grad_norm": 3.937314649790989, "learning_rate": 1.1546506205968738e-06, "loss": 0.359, "step": 27443 }, { "epoch": 0.79, "grad_norm": 6.93926824768462, "learning_rate": 1.15435421908756e-06, "loss": 0.2782, "step": 27444 }, { "epoch": 0.79, "grad_norm": 7.323543523562628, "learning_rate": 1.1540578506618644e-06, "loss": 0.7561, "step": 27445 }, { "epoch": 0.79, "grad_norm": 8.8600143659402, "learning_rate": 1.1537615153223347e-06, "loss": 0.4635, "step": 27446 }, { "epoch": 0.79, "grad_norm": 12.176220687362559, "learning_rate": 1.1534652130715224e-06, "loss": 0.625, "step": 27447 }, { "epoch": 0.79, "grad_norm": 3.0007976822526077, "learning_rate": 1.1531689439119736e-06, "loss": 0.3048, "step": 27448 }, { "epoch": 0.79, "grad_norm": 2.4455483904761572, "learning_rate": 1.152872707846241e-06, "loss": 0.1804, "step": 27449 }, { "epoch": 0.79, "grad_norm": 4.829524136868516, "learning_rate": 1.15257650487687e-06, "loss": 0.2022, "step": 27450 }, { "epoch": 0.79, "grad_norm": 3.064559613742659, "learning_rate": 1.1522803350064083e-06, "loss": 0.2059, "step": 27451 }, { "epoch": 0.79, "grad_norm": 2.881873911003415, "learning_rate": 1.1519841982374068e-06, "loss": 0.2375, "step": 27452 }, { "epoch": 0.79, "grad_norm": 3.584715524876013, "learning_rate": 1.1516880945724102e-06, "loss": 0.2931, "step": 27453 }, { "epoch": 0.79, "grad_norm": 6.666537220016054, "learning_rate": 1.1513920240139682e-06, "loss": 0.8765, "step": 27454 }, { "epoch": 0.79, "grad_norm": 8.01529120332104, "learning_rate": 1.1510959865646253e-06, "loss": 0.4375, "step": 27455 }, { "epoch": 0.79, "grad_norm": 6.640681241863167, "learning_rate": 1.1507999822269311e-06, "loss": 0.4865, "step": 27456 }, { "epoch": 0.79, "grad_norm": 3.883581964094672, "learning_rate": 1.150504011003431e-06, "loss": 0.4181, "step": 27457 }, { "epoch": 0.79, "grad_norm": 3.8918165657259793, "learning_rate": 1.1502080728966692e-06, "loss": 0.3745, "step": 27458 }, { "epoch": 0.79, "grad_norm": 7.758313426952727, "learning_rate": 1.1499121679091952e-06, "loss": 1.0355, "step": 27459 }, { "epoch": 0.79, "grad_norm": 8.702746946386629, "learning_rate": 1.1496162960435508e-06, "loss": 0.6333, "step": 27460 }, { "epoch": 0.79, "grad_norm": 4.8876328013688015, "learning_rate": 1.1493204573022855e-06, "loss": 0.3896, "step": 27461 }, { "epoch": 0.79, "grad_norm": 4.806114567905935, "learning_rate": 1.14902465168794e-06, "loss": 0.5571, "step": 27462 }, { "epoch": 0.79, "grad_norm": 6.777278950406177, "learning_rate": 1.1487288792030632e-06, "loss": 0.3419, "step": 27463 }, { "epoch": 0.79, "grad_norm": 7.471863325198445, "learning_rate": 1.1484331398501974e-06, "loss": 0.4487, "step": 27464 }, { "epoch": 0.79, "grad_norm": 2.271087283770552, "learning_rate": 1.1481374336318862e-06, "loss": 0.2549, "step": 27465 }, { "epoch": 0.79, "grad_norm": 4.258435460985457, "learning_rate": 1.1478417605506753e-06, "loss": 0.3358, "step": 27466 }, { "epoch": 0.79, "grad_norm": 7.093675419755682, "learning_rate": 1.1475461206091059e-06, "loss": 0.445, "step": 27467 }, { "epoch": 0.79, "grad_norm": 6.387488381212868, "learning_rate": 1.1472505138097245e-06, "loss": 0.3734, "step": 27468 }, { "epoch": 0.79, "grad_norm": 5.656703656798166, "learning_rate": 1.1469549401550716e-06, "loss": 0.5244, "step": 27469 }, { "epoch": 0.79, "grad_norm": 5.02342231237984, "learning_rate": 1.1466593996476932e-06, "loss": 0.4663, "step": 27470 }, { "epoch": 0.79, "grad_norm": 7.8069318536183605, "learning_rate": 1.146363892290127e-06, "loss": 1.0227, "step": 27471 }, { "epoch": 0.79, "grad_norm": 4.410736262088564, "learning_rate": 1.146068418084919e-06, "loss": 0.2598, "step": 27472 }, { "epoch": 0.79, "grad_norm": 2.3931596882184816, "learning_rate": 1.1457729770346087e-06, "loss": 0.2545, "step": 27473 }, { "epoch": 0.79, "grad_norm": 4.4156874779818684, "learning_rate": 1.1454775691417386e-06, "loss": 0.3186, "step": 27474 }, { "epoch": 0.79, "grad_norm": 3.8191322890755766, "learning_rate": 1.145182194408852e-06, "loss": 0.1877, "step": 27475 }, { "epoch": 0.79, "grad_norm": 5.145291549588994, "learning_rate": 1.1448868528384883e-06, "loss": 0.4221, "step": 27476 }, { "epoch": 0.79, "grad_norm": 6.1354087825221475, "learning_rate": 1.1445915444331884e-06, "loss": 0.7373, "step": 27477 }, { "epoch": 0.79, "grad_norm": 4.810084888795691, "learning_rate": 1.1442962691954917e-06, "loss": 0.2363, "step": 27478 }, { "epoch": 0.79, "grad_norm": 7.2676953063063685, "learning_rate": 1.1440010271279405e-06, "loss": 0.5919, "step": 27479 }, { "epoch": 0.79, "grad_norm": 12.082291490210743, "learning_rate": 1.1437058182330724e-06, "loss": 0.6565, "step": 27480 }, { "epoch": 0.79, "grad_norm": 4.901592303432613, "learning_rate": 1.1434106425134277e-06, "loss": 0.3425, "step": 27481 }, { "epoch": 0.79, "grad_norm": 3.54343995784974, "learning_rate": 1.1431154999715488e-06, "loss": 0.3614, "step": 27482 }, { "epoch": 0.79, "grad_norm": 4.978621028653853, "learning_rate": 1.1428203906099717e-06, "loss": 0.5243, "step": 27483 }, { "epoch": 0.79, "grad_norm": 10.174917114820003, "learning_rate": 1.1425253144312365e-06, "loss": 0.9078, "step": 27484 }, { "epoch": 0.79, "grad_norm": 7.298428845834307, "learning_rate": 1.142230271437879e-06, "loss": 0.4798, "step": 27485 }, { "epoch": 0.79, "grad_norm": 7.66778234642932, "learning_rate": 1.1419352616324413e-06, "loss": 0.6397, "step": 27486 }, { "epoch": 0.79, "grad_norm": 3.0204315817260285, "learning_rate": 1.141640285017458e-06, "loss": 0.4377, "step": 27487 }, { "epoch": 0.79, "grad_norm": 4.422778788696663, "learning_rate": 1.1413453415954701e-06, "loss": 0.5246, "step": 27488 }, { "epoch": 0.79, "grad_norm": 4.81289378945459, "learning_rate": 1.1410504313690112e-06, "loss": 0.2035, "step": 27489 }, { "epoch": 0.79, "grad_norm": 5.371197752899323, "learning_rate": 1.1407555543406219e-06, "loss": 0.5318, "step": 27490 }, { "epoch": 0.79, "grad_norm": 3.590514028110891, "learning_rate": 1.1404607105128373e-06, "loss": 0.5582, "step": 27491 }, { "epoch": 0.79, "grad_norm": 5.663779163508618, "learning_rate": 1.1401658998881926e-06, "loss": 0.5414, "step": 27492 }, { "epoch": 0.79, "grad_norm": 7.185274194145701, "learning_rate": 1.1398711224692265e-06, "loss": 0.5445, "step": 27493 }, { "epoch": 0.79, "grad_norm": 8.21719786633717, "learning_rate": 1.1395763782584735e-06, "loss": 0.4662, "step": 27494 }, { "epoch": 0.79, "grad_norm": 3.4376559829000923, "learning_rate": 1.13928166725847e-06, "loss": 0.2747, "step": 27495 }, { "epoch": 0.79, "grad_norm": 5.870106140034938, "learning_rate": 1.1389869894717499e-06, "loss": 0.339, "step": 27496 }, { "epoch": 0.79, "grad_norm": 10.394784562308242, "learning_rate": 1.1386923449008525e-06, "loss": 0.9438, "step": 27497 }, { "epoch": 0.79, "grad_norm": 4.9114609762476595, "learning_rate": 1.138397733548306e-06, "loss": 0.4732, "step": 27498 }, { "epoch": 0.79, "grad_norm": 5.686276272763153, "learning_rate": 1.1381031554166488e-06, "loss": 0.4887, "step": 27499 }, { "epoch": 0.79, "grad_norm": 5.985948002307414, "learning_rate": 1.1378086105084157e-06, "loss": 0.5319, "step": 27500 }, { "epoch": 0.79, "grad_norm": 4.834105235428819, "learning_rate": 1.1375140988261386e-06, "loss": 0.355, "step": 27501 }, { "epoch": 0.79, "grad_norm": 5.773566196850071, "learning_rate": 1.1372196203723535e-06, "loss": 0.5354, "step": 27502 }, { "epoch": 0.79, "grad_norm": 4.358627692810257, "learning_rate": 1.1369251751495924e-06, "loss": 0.2198, "step": 27503 }, { "epoch": 0.79, "grad_norm": 4.904488265459161, "learning_rate": 1.136630763160388e-06, "loss": 0.4505, "step": 27504 }, { "epoch": 0.79, "grad_norm": 7.262526378485201, "learning_rate": 1.1363363844072722e-06, "loss": 0.3916, "step": 27505 }, { "epoch": 0.79, "grad_norm": 2.4597769528740443, "learning_rate": 1.136042038892779e-06, "loss": 0.1419, "step": 27506 }, { "epoch": 0.79, "grad_norm": 5.866509613008425, "learning_rate": 1.1357477266194417e-06, "loss": 0.5749, "step": 27507 }, { "epoch": 0.79, "grad_norm": 11.048258779744842, "learning_rate": 1.1354534475897899e-06, "loss": 0.4796, "step": 27508 }, { "epoch": 0.79, "grad_norm": 6.197432166217271, "learning_rate": 1.1351592018063572e-06, "loss": 0.4972, "step": 27509 }, { "epoch": 0.79, "grad_norm": 9.899469263126653, "learning_rate": 1.134864989271674e-06, "loss": 0.4827, "step": 27510 }, { "epoch": 0.79, "grad_norm": 7.331537700578776, "learning_rate": 1.1345708099882719e-06, "loss": 0.5385, "step": 27511 }, { "epoch": 0.79, "grad_norm": 5.569748431265586, "learning_rate": 1.1342766639586793e-06, "loss": 0.4697, "step": 27512 }, { "epoch": 0.79, "grad_norm": 4.324717643649831, "learning_rate": 1.1339825511854303e-06, "loss": 0.3785, "step": 27513 }, { "epoch": 0.79, "grad_norm": 6.574058091030283, "learning_rate": 1.1336884716710517e-06, "loss": 0.6608, "step": 27514 }, { "epoch": 0.79, "grad_norm": 3.39246276413365, "learning_rate": 1.1333944254180752e-06, "loss": 0.518, "step": 27515 }, { "epoch": 0.79, "grad_norm": 6.360377951973889, "learning_rate": 1.1331004124290317e-06, "loss": 0.4276, "step": 27516 }, { "epoch": 0.79, "grad_norm": 6.329604270310645, "learning_rate": 1.1328064327064498e-06, "loss": 0.8246, "step": 27517 }, { "epoch": 0.79, "grad_norm": 8.45254046381077, "learning_rate": 1.1325124862528575e-06, "loss": 0.3421, "step": 27518 }, { "epoch": 0.79, "grad_norm": 8.340477005012518, "learning_rate": 1.132218573070783e-06, "loss": 0.2614, "step": 27519 }, { "epoch": 0.79, "grad_norm": 4.510929662884629, "learning_rate": 1.131924693162757e-06, "loss": 0.6779, "step": 27520 }, { "epoch": 0.79, "grad_norm": 8.119004722634248, "learning_rate": 1.1316308465313058e-06, "loss": 0.7262, "step": 27521 }, { "epoch": 0.79, "grad_norm": 1.9413158771810604, "learning_rate": 1.1313370331789576e-06, "loss": 0.1506, "step": 27522 }, { "epoch": 0.79, "grad_norm": 7.2593553625753415, "learning_rate": 1.131043253108242e-06, "loss": 0.4347, "step": 27523 }, { "epoch": 0.79, "grad_norm": 4.692699955951469, "learning_rate": 1.130749506321685e-06, "loss": 0.5916, "step": 27524 }, { "epoch": 0.79, "grad_norm": 4.637289653233629, "learning_rate": 1.130455792821814e-06, "loss": 0.5029, "step": 27525 }, { "epoch": 0.79, "grad_norm": 3.6651071226538043, "learning_rate": 1.1301621126111533e-06, "loss": 0.2872, "step": 27526 }, { "epoch": 0.79, "grad_norm": 5.866902066334244, "learning_rate": 1.1298684656922333e-06, "loss": 0.6971, "step": 27527 }, { "epoch": 0.79, "grad_norm": 4.938020727005167, "learning_rate": 1.129574852067577e-06, "loss": 0.1614, "step": 27528 }, { "epoch": 0.79, "grad_norm": 8.174580638523462, "learning_rate": 1.1292812717397129e-06, "loss": 0.2284, "step": 27529 }, { "epoch": 0.79, "grad_norm": 8.623047538575852, "learning_rate": 1.1289877247111657e-06, "loss": 0.3396, "step": 27530 }, { "epoch": 0.79, "grad_norm": 3.3558524626246733, "learning_rate": 1.128694210984459e-06, "loss": 0.2161, "step": 27531 }, { "epoch": 0.79, "grad_norm": 4.532240187220282, "learning_rate": 1.1284007305621209e-06, "loss": 0.2693, "step": 27532 }, { "epoch": 0.79, "grad_norm": 3.2844396982254054, "learning_rate": 1.1281072834466728e-06, "loss": 0.5144, "step": 27533 }, { "epoch": 0.79, "grad_norm": 4.5880864062217075, "learning_rate": 1.1278138696406426e-06, "loss": 0.3682, "step": 27534 }, { "epoch": 0.79, "grad_norm": 5.780746025594512, "learning_rate": 1.1275204891465518e-06, "loss": 0.8241, "step": 27535 }, { "epoch": 0.79, "grad_norm": 6.250789249178089, "learning_rate": 1.127227141966926e-06, "loss": 0.5846, "step": 27536 }, { "epoch": 0.79, "grad_norm": 2.197878752889154, "learning_rate": 1.126933828104289e-06, "loss": 0.0529, "step": 27537 }, { "epoch": 0.79, "grad_norm": 9.372531642214277, "learning_rate": 1.1266405475611635e-06, "loss": 0.6732, "step": 27538 }, { "epoch": 0.79, "grad_norm": 8.34753966261223, "learning_rate": 1.126347300340071e-06, "loss": 0.3719, "step": 27539 }, { "epoch": 0.79, "grad_norm": 5.870658853328293, "learning_rate": 1.1260540864435355e-06, "loss": 0.4059, "step": 27540 }, { "epoch": 0.79, "grad_norm": 7.3470064450816315, "learning_rate": 1.1257609058740815e-06, "loss": 0.5684, "step": 27541 }, { "epoch": 0.79, "grad_norm": 8.647935916514987, "learning_rate": 1.1254677586342278e-06, "loss": 0.699, "step": 27542 }, { "epoch": 0.79, "grad_norm": 8.491639513515775, "learning_rate": 1.1251746447264989e-06, "loss": 0.4404, "step": 27543 }, { "epoch": 0.79, "grad_norm": 5.252433893876228, "learning_rate": 1.1248815641534161e-06, "loss": 0.2684, "step": 27544 }, { "epoch": 0.79, "grad_norm": 5.602677961771741, "learning_rate": 1.1245885169174998e-06, "loss": 0.2374, "step": 27545 }, { "epoch": 0.79, "grad_norm": 2.45335630067496, "learning_rate": 1.12429550302127e-06, "loss": 0.3317, "step": 27546 }, { "epoch": 0.79, "grad_norm": 4.376320775801843, "learning_rate": 1.1240025224672486e-06, "loss": 0.6769, "step": 27547 }, { "epoch": 0.79, "grad_norm": 4.020749529358881, "learning_rate": 1.123709575257958e-06, "loss": 0.1864, "step": 27548 }, { "epoch": 0.79, "grad_norm": 2.747452097432651, "learning_rate": 1.1234166613959147e-06, "loss": 0.1302, "step": 27549 }, { "epoch": 0.79, "grad_norm": 8.734321607723382, "learning_rate": 1.1231237808836425e-06, "loss": 0.6267, "step": 27550 }, { "epoch": 0.79, "grad_norm": 9.795516362505117, "learning_rate": 1.1228309337236592e-06, "loss": 0.5171, "step": 27551 }, { "epoch": 0.79, "grad_norm": 7.200029680402827, "learning_rate": 1.1225381199184837e-06, "loss": 0.4804, "step": 27552 }, { "epoch": 0.79, "grad_norm": 5.703384686136098, "learning_rate": 1.122245339470634e-06, "loss": 0.4288, "step": 27553 }, { "epoch": 0.79, "grad_norm": 3.4363604650854365, "learning_rate": 1.1219525923826312e-06, "loss": 0.3337, "step": 27554 }, { "epoch": 0.79, "grad_norm": 5.325081232731842, "learning_rate": 1.1216598786569921e-06, "loss": 0.4388, "step": 27555 }, { "epoch": 0.79, "grad_norm": 9.315986646243818, "learning_rate": 1.121367198296235e-06, "loss": 0.5861, "step": 27556 }, { "epoch": 0.79, "grad_norm": 3.667527339809996, "learning_rate": 1.1210745513028815e-06, "loss": 0.2664, "step": 27557 }, { "epoch": 0.79, "grad_norm": 3.6922145705294147, "learning_rate": 1.1207819376794432e-06, "loss": 0.4409, "step": 27558 }, { "epoch": 0.79, "grad_norm": 8.609480453925396, "learning_rate": 1.1204893574284415e-06, "loss": 0.583, "step": 27559 }, { "epoch": 0.79, "grad_norm": 12.047737535999367, "learning_rate": 1.1201968105523908e-06, "loss": 0.3942, "step": 27560 }, { "epoch": 0.79, "grad_norm": 5.366430059789179, "learning_rate": 1.1199042970538104e-06, "loss": 0.3387, "step": 27561 }, { "epoch": 0.79, "grad_norm": 5.082141801715087, "learning_rate": 1.119611816935215e-06, "loss": 0.3958, "step": 27562 }, { "epoch": 0.79, "grad_norm": 4.32472649189056, "learning_rate": 1.1193193701991224e-06, "loss": 0.3785, "step": 27563 }, { "epoch": 0.79, "grad_norm": 5.325969450195111, "learning_rate": 1.1190269568480472e-06, "loss": 0.5684, "step": 27564 }, { "epoch": 0.79, "grad_norm": 1.8263837844376813, "learning_rate": 1.1187345768845036e-06, "loss": 0.1281, "step": 27565 }, { "epoch": 0.79, "grad_norm": 4.6754415415769675, "learning_rate": 1.1184422303110104e-06, "loss": 0.5542, "step": 27566 }, { "epoch": 0.79, "grad_norm": 5.750616247906165, "learning_rate": 1.1181499171300798e-06, "loss": 0.7275, "step": 27567 }, { "epoch": 0.79, "grad_norm": 4.563614356460412, "learning_rate": 1.117857637344228e-06, "loss": 0.4632, "step": 27568 }, { "epoch": 0.79, "grad_norm": 4.088980801399893, "learning_rate": 1.117565390955968e-06, "loss": 0.2207, "step": 27569 }, { "epoch": 0.79, "grad_norm": 5.21993132600883, "learning_rate": 1.1172731779678163e-06, "loss": 0.4008, "step": 27570 }, { "epoch": 0.79, "grad_norm": 2.8936373505386244, "learning_rate": 1.1169809983822854e-06, "loss": 0.3978, "step": 27571 }, { "epoch": 0.79, "grad_norm": 4.815275556100903, "learning_rate": 1.1166888522018876e-06, "loss": 0.3327, "step": 27572 }, { "epoch": 0.79, "grad_norm": 8.197587183947952, "learning_rate": 1.1163967394291392e-06, "loss": 0.6185, "step": 27573 }, { "epoch": 0.79, "grad_norm": 10.75845022534476, "learning_rate": 1.1161046600665492e-06, "loss": 0.6767, "step": 27574 }, { "epoch": 0.79, "grad_norm": 9.307917549138608, "learning_rate": 1.1158126141166347e-06, "loss": 0.4312, "step": 27575 }, { "epoch": 0.79, "grad_norm": 3.186067315301059, "learning_rate": 1.1155206015819043e-06, "loss": 0.1509, "step": 27576 }, { "epoch": 0.79, "grad_norm": 9.686506257309722, "learning_rate": 1.1152286224648735e-06, "loss": 0.2136, "step": 27577 }, { "epoch": 0.79, "grad_norm": 8.063906288223105, "learning_rate": 1.1149366767680526e-06, "loss": 0.508, "step": 27578 }, { "epoch": 0.79, "grad_norm": 7.027586665044892, "learning_rate": 1.1146447644939528e-06, "loss": 0.7416, "step": 27579 }, { "epoch": 0.79, "grad_norm": 2.8516583988315833, "learning_rate": 1.1143528856450848e-06, "loss": 0.3865, "step": 27580 }, { "epoch": 0.79, "grad_norm": 4.7717002791698615, "learning_rate": 1.1140610402239604e-06, "loss": 0.3711, "step": 27581 }, { "epoch": 0.79, "grad_norm": 1.7649164043870427, "learning_rate": 1.1137692282330914e-06, "loss": 0.2064, "step": 27582 }, { "epoch": 0.79, "grad_norm": 5.705982986670855, "learning_rate": 1.1134774496749862e-06, "loss": 0.523, "step": 27583 }, { "epoch": 0.79, "grad_norm": 3.0408435495995474, "learning_rate": 1.113185704552157e-06, "loss": 0.279, "step": 27584 }, { "epoch": 0.79, "grad_norm": 8.9507524632233, "learning_rate": 1.1128939928671133e-06, "loss": 0.6017, "step": 27585 }, { "epoch": 0.79, "grad_norm": 7.17962726156806, "learning_rate": 1.1126023146223635e-06, "loss": 0.5403, "step": 27586 }, { "epoch": 0.79, "grad_norm": 5.223632995069169, "learning_rate": 1.1123106698204162e-06, "loss": 0.6704, "step": 27587 }, { "epoch": 0.79, "grad_norm": 2.2890759099886506, "learning_rate": 1.1120190584637812e-06, "loss": 0.1232, "step": 27588 }, { "epoch": 0.79, "grad_norm": 8.103958526004359, "learning_rate": 1.1117274805549695e-06, "loss": 0.518, "step": 27589 }, { "epoch": 0.79, "grad_norm": 4.735451377655284, "learning_rate": 1.1114359360964872e-06, "loss": 0.3704, "step": 27590 }, { "epoch": 0.79, "grad_norm": 5.216446065300847, "learning_rate": 1.1111444250908427e-06, "loss": 0.4473, "step": 27591 }, { "epoch": 0.79, "grad_norm": 3.4245131613007413, "learning_rate": 1.1108529475405422e-06, "loss": 0.3339, "step": 27592 }, { "epoch": 0.79, "grad_norm": 5.07966125725568, "learning_rate": 1.1105615034480972e-06, "loss": 0.3826, "step": 27593 }, { "epoch": 0.79, "grad_norm": 8.62067354575658, "learning_rate": 1.1102700928160105e-06, "loss": 0.433, "step": 27594 }, { "epoch": 0.79, "grad_norm": 6.902821548582397, "learning_rate": 1.1099787156467928e-06, "loss": 0.7654, "step": 27595 }, { "epoch": 0.79, "grad_norm": 9.19984902330579, "learning_rate": 1.109687371942948e-06, "loss": 0.2969, "step": 27596 }, { "epoch": 0.79, "grad_norm": 3.187853139127092, "learning_rate": 1.1093960617069855e-06, "loss": 0.2909, "step": 27597 }, { "epoch": 0.79, "grad_norm": 3.453434166510118, "learning_rate": 1.1091047849414095e-06, "loss": 0.3147, "step": 27598 }, { "epoch": 0.79, "grad_norm": 8.007944215761254, "learning_rate": 1.1088135416487245e-06, "loss": 0.7979, "step": 27599 }, { "epoch": 0.79, "grad_norm": 7.611435221603079, "learning_rate": 1.1085223318314387e-06, "loss": 0.5267, "step": 27600 }, { "epoch": 0.79, "grad_norm": 3.8607200101253176, "learning_rate": 1.1082311554920549e-06, "loss": 0.4845, "step": 27601 }, { "epoch": 0.79, "grad_norm": 7.943454881614032, "learning_rate": 1.1079400126330804e-06, "loss": 0.6812, "step": 27602 }, { "epoch": 0.79, "grad_norm": 7.014154037860628, "learning_rate": 1.1076489032570176e-06, "loss": 0.3562, "step": 27603 }, { "epoch": 0.79, "grad_norm": 11.83180556486645, "learning_rate": 1.1073578273663737e-06, "loss": 0.5883, "step": 27604 }, { "epoch": 0.79, "grad_norm": 9.396230690639275, "learning_rate": 1.1070667849636507e-06, "loss": 0.4544, "step": 27605 }, { "epoch": 0.79, "grad_norm": 7.790088980023329, "learning_rate": 1.106775776051352e-06, "loss": 0.3204, "step": 27606 }, { "epoch": 0.79, "grad_norm": 5.8597144270176456, "learning_rate": 1.1064848006319828e-06, "loss": 0.6637, "step": 27607 }, { "epoch": 0.79, "grad_norm": 11.322032389756371, "learning_rate": 1.1061938587080444e-06, "loss": 0.5796, "step": 27608 }, { "epoch": 0.79, "grad_norm": 5.347060310367513, "learning_rate": 1.1059029502820423e-06, "loss": 0.5844, "step": 27609 }, { "epoch": 0.79, "grad_norm": 4.06952245150732, "learning_rate": 1.1056120753564758e-06, "loss": 0.3055, "step": 27610 }, { "epoch": 0.79, "grad_norm": 3.7477162877805164, "learning_rate": 1.105321233933851e-06, "loss": 0.446, "step": 27611 }, { "epoch": 0.79, "grad_norm": 7.3535737905443606, "learning_rate": 1.1050304260166678e-06, "loss": 0.5347, "step": 27612 }, { "epoch": 0.79, "grad_norm": 4.5459644153039696, "learning_rate": 1.1047396516074272e-06, "loss": 0.7374, "step": 27613 }, { "epoch": 0.79, "grad_norm": 7.845573304564486, "learning_rate": 1.1044489107086326e-06, "loss": 0.6631, "step": 27614 }, { "epoch": 0.79, "grad_norm": 3.7452787560895158, "learning_rate": 1.1041582033227838e-06, "loss": 0.1057, "step": 27615 }, { "epoch": 0.79, "grad_norm": 2.5076638650074385, "learning_rate": 1.1038675294523831e-06, "loss": 0.1812, "step": 27616 }, { "epoch": 0.79, "grad_norm": 9.186477682105744, "learning_rate": 1.1035768890999304e-06, "loss": 0.598, "step": 27617 }, { "epoch": 0.79, "grad_norm": 6.371137533499349, "learning_rate": 1.1032862822679264e-06, "loss": 0.5403, "step": 27618 }, { "epoch": 0.79, "grad_norm": 4.773780673064093, "learning_rate": 1.1029957089588688e-06, "loss": 0.3029, "step": 27619 }, { "epoch": 0.79, "grad_norm": 4.303684706290961, "learning_rate": 1.1027051691752611e-06, "loss": 0.2058, "step": 27620 }, { "epoch": 0.79, "grad_norm": 7.850547246627665, "learning_rate": 1.1024146629195992e-06, "loss": 0.9103, "step": 27621 }, { "epoch": 0.79, "grad_norm": 2.6114988192775277, "learning_rate": 1.1021241901943842e-06, "loss": 0.2581, "step": 27622 }, { "epoch": 0.79, "grad_norm": 1.609000542048475, "learning_rate": 1.1018337510021164e-06, "loss": 0.2786, "step": 27623 }, { "epoch": 0.79, "grad_norm": 5.757009049029955, "learning_rate": 1.101543345345293e-06, "loss": 0.732, "step": 27624 }, { "epoch": 0.79, "grad_norm": 4.801796541179997, "learning_rate": 1.101252973226412e-06, "loss": 0.6048, "step": 27625 }, { "epoch": 0.79, "grad_norm": 4.04505614422098, "learning_rate": 1.1009626346479702e-06, "loss": 0.267, "step": 27626 }, { "epoch": 0.79, "grad_norm": 3.1030727524214896, "learning_rate": 1.1006723296124683e-06, "loss": 0.2852, "step": 27627 }, { "epoch": 0.79, "grad_norm": 4.985376478650395, "learning_rate": 1.1003820581224011e-06, "loss": 0.7807, "step": 27628 }, { "epoch": 0.79, "grad_norm": 8.465927188474572, "learning_rate": 1.1000918201802662e-06, "loss": 1.0635, "step": 27629 }, { "epoch": 0.79, "grad_norm": 5.084474508464142, "learning_rate": 1.0998016157885632e-06, "loss": 0.3541, "step": 27630 }, { "epoch": 0.79, "grad_norm": 14.69052393200321, "learning_rate": 1.0995114449497868e-06, "loss": 0.4348, "step": 27631 }, { "epoch": 0.79, "grad_norm": 7.0609107803544875, "learning_rate": 1.0992213076664327e-06, "loss": 0.5206, "step": 27632 }, { "epoch": 0.79, "grad_norm": 5.832470294142583, "learning_rate": 1.0989312039409966e-06, "loss": 0.2742, "step": 27633 }, { "epoch": 0.79, "grad_norm": 2.3902166117484867, "learning_rate": 1.0986411337759762e-06, "loss": 0.4737, "step": 27634 }, { "epoch": 0.79, "grad_norm": 3.664613857145779, "learning_rate": 1.0983510971738642e-06, "loss": 0.4364, "step": 27635 }, { "epoch": 0.79, "grad_norm": 3.8867477185880643, "learning_rate": 1.0980610941371588e-06, "loss": 0.3348, "step": 27636 }, { "epoch": 0.79, "grad_norm": 3.89228034902214, "learning_rate": 1.097771124668352e-06, "loss": 0.4359, "step": 27637 }, { "epoch": 0.79, "grad_norm": 6.180160234312238, "learning_rate": 1.0974811887699411e-06, "loss": 0.4291, "step": 27638 }, { "epoch": 0.79, "grad_norm": 12.056890102260542, "learning_rate": 1.097191286444419e-06, "loss": 0.3832, "step": 27639 }, { "epoch": 0.79, "grad_norm": 6.3499548121969625, "learning_rate": 1.0969014176942782e-06, "loss": 0.1945, "step": 27640 }, { "epoch": 0.79, "grad_norm": 2.318117583857707, "learning_rate": 1.096611582522016e-06, "loss": 0.1089, "step": 27641 }, { "epoch": 0.79, "grad_norm": 6.177527473354652, "learning_rate": 1.0963217809301218e-06, "loss": 0.7861, "step": 27642 }, { "epoch": 0.79, "grad_norm": 6.631597670520318, "learning_rate": 1.0960320129210923e-06, "loss": 0.4451, "step": 27643 }, { "epoch": 0.79, "grad_norm": 7.346518850841468, "learning_rate": 1.095742278497417e-06, "loss": 0.6312, "step": 27644 }, { "epoch": 0.79, "grad_norm": 5.239086979536644, "learning_rate": 1.0954525776615933e-06, "loss": 0.4455, "step": 27645 }, { "epoch": 0.79, "grad_norm": 5.817222891430164, "learning_rate": 1.0951629104161077e-06, "loss": 0.4634, "step": 27646 }, { "epoch": 0.79, "grad_norm": 4.891372721005289, "learning_rate": 1.0948732767634547e-06, "loss": 0.2143, "step": 27647 }, { "epoch": 0.79, "grad_norm": 2.9929492269709206, "learning_rate": 1.0945836767061268e-06, "loss": 0.3484, "step": 27648 }, { "epoch": 0.79, "grad_norm": 3.5633594078653927, "learning_rate": 1.0942941102466144e-06, "loss": 0.1514, "step": 27649 }, { "epoch": 0.79, "grad_norm": 5.114159586562093, "learning_rate": 1.0940045773874092e-06, "loss": 0.25, "step": 27650 }, { "epoch": 0.79, "grad_norm": 3.7953014174166455, "learning_rate": 1.0937150781310024e-06, "loss": 0.2227, "step": 27651 }, { "epoch": 0.79, "grad_norm": 4.407715709209093, "learning_rate": 1.0934256124798837e-06, "loss": 0.354, "step": 27652 }, { "epoch": 0.79, "grad_norm": 4.636120803443972, "learning_rate": 1.093136180436542e-06, "loss": 0.2288, "step": 27653 }, { "epoch": 0.79, "grad_norm": 4.195440265549872, "learning_rate": 1.0928467820034688e-06, "loss": 0.3176, "step": 27654 }, { "epoch": 0.79, "grad_norm": 5.353738985408806, "learning_rate": 1.092557417183155e-06, "loss": 0.3244, "step": 27655 }, { "epoch": 0.79, "grad_norm": 2.128893594966687, "learning_rate": 1.092268085978087e-06, "loss": 0.0877, "step": 27656 }, { "epoch": 0.79, "grad_norm": 5.879482061125656, "learning_rate": 1.0919787883907573e-06, "loss": 0.3374, "step": 27657 }, { "epoch": 0.79, "grad_norm": 6.513653317357842, "learning_rate": 1.091689524423653e-06, "loss": 0.7328, "step": 27658 }, { "epoch": 0.79, "grad_norm": 7.479761664933514, "learning_rate": 1.0914002940792622e-06, "loss": 0.6356, "step": 27659 }, { "epoch": 0.79, "grad_norm": 7.775070693623807, "learning_rate": 1.091111097360072e-06, "loss": 0.4037, "step": 27660 }, { "epoch": 0.79, "grad_norm": 4.629529487727719, "learning_rate": 1.0908219342685733e-06, "loss": 0.1982, "step": 27661 }, { "epoch": 0.79, "grad_norm": 7.3586741962160485, "learning_rate": 1.0905328048072512e-06, "loss": 0.6117, "step": 27662 }, { "epoch": 0.79, "grad_norm": 1.5634760478374485, "learning_rate": 1.0902437089785934e-06, "loss": 0.1912, "step": 27663 }, { "epoch": 0.79, "grad_norm": 3.508027203962223, "learning_rate": 1.0899546467850892e-06, "loss": 0.2861, "step": 27664 }, { "epoch": 0.79, "grad_norm": 3.37663425508768, "learning_rate": 1.0896656182292238e-06, "loss": 0.2679, "step": 27665 }, { "epoch": 0.79, "grad_norm": 4.728937835310583, "learning_rate": 1.0893766233134834e-06, "loss": 0.2582, "step": 27666 }, { "epoch": 0.79, "grad_norm": 8.735319956535193, "learning_rate": 1.0890876620403535e-06, "loss": 0.6537, "step": 27667 }, { "epoch": 0.79, "grad_norm": 7.119300955600928, "learning_rate": 1.0887987344123218e-06, "loss": 0.4425, "step": 27668 }, { "epoch": 0.79, "grad_norm": 3.8780223842081067, "learning_rate": 1.0885098404318718e-06, "loss": 0.2984, "step": 27669 }, { "epoch": 0.79, "grad_norm": 4.39043477688256, "learning_rate": 1.0882209801014903e-06, "loss": 0.359, "step": 27670 }, { "epoch": 0.79, "grad_norm": 3.070124554887653, "learning_rate": 1.087932153423663e-06, "loss": 0.436, "step": 27671 }, { "epoch": 0.79, "grad_norm": 6.227995129737101, "learning_rate": 1.087643360400874e-06, "loss": 0.6651, "step": 27672 }, { "epoch": 0.79, "grad_norm": 4.150419893049376, "learning_rate": 1.0873546010356073e-06, "loss": 0.4422, "step": 27673 }, { "epoch": 0.79, "grad_norm": 4.507082293256609, "learning_rate": 1.0870658753303464e-06, "loss": 0.5415, "step": 27674 }, { "epoch": 0.79, "grad_norm": 6.578410670223162, "learning_rate": 1.0867771832875768e-06, "loss": 0.626, "step": 27675 }, { "epoch": 0.79, "grad_norm": 2.9277609428844262, "learning_rate": 1.08648852490978e-06, "loss": 0.29, "step": 27676 }, { "epoch": 0.79, "grad_norm": 3.5917012884915627, "learning_rate": 1.0861999001994423e-06, "loss": 0.7026, "step": 27677 }, { "epoch": 0.79, "grad_norm": 4.5063489789317055, "learning_rate": 1.0859113091590445e-06, "loss": 0.3771, "step": 27678 }, { "epoch": 0.79, "grad_norm": 12.419815449349416, "learning_rate": 1.085622751791069e-06, "loss": 0.4253, "step": 27679 }, { "epoch": 0.79, "grad_norm": 7.13962125505614, "learning_rate": 1.0853342280979995e-06, "loss": 0.3502, "step": 27680 }, { "epoch": 0.79, "grad_norm": 2.511598005700022, "learning_rate": 1.085045738082317e-06, "loss": 0.1842, "step": 27681 }, { "epoch": 0.79, "grad_norm": 4.802328879918412, "learning_rate": 1.084757281746505e-06, "loss": 0.5514, "step": 27682 }, { "epoch": 0.79, "grad_norm": 7.617042986159184, "learning_rate": 1.084468859093043e-06, "loss": 0.6387, "step": 27683 }, { "epoch": 0.79, "grad_norm": 5.260889612472202, "learning_rate": 1.0841804701244146e-06, "loss": 0.3163, "step": 27684 }, { "epoch": 0.79, "grad_norm": 15.960831995170485, "learning_rate": 1.0838921148430992e-06, "loss": 0.5887, "step": 27685 }, { "epoch": 0.79, "grad_norm": 4.779982355157056, "learning_rate": 1.083603793251578e-06, "loss": 0.6067, "step": 27686 }, { "epoch": 0.79, "grad_norm": 10.142919742172303, "learning_rate": 1.0833155053523298e-06, "loss": 0.5452, "step": 27687 }, { "epoch": 0.79, "grad_norm": 5.991123547651159, "learning_rate": 1.0830272511478362e-06, "loss": 0.302, "step": 27688 }, { "epoch": 0.79, "grad_norm": 6.901130263041751, "learning_rate": 1.0827390306405783e-06, "loss": 0.468, "step": 27689 }, { "epoch": 0.79, "grad_norm": 4.816589500486701, "learning_rate": 1.0824508438330322e-06, "loss": 0.288, "step": 27690 }, { "epoch": 0.79, "grad_norm": 6.009035221676531, "learning_rate": 1.082162690727681e-06, "loss": 0.4889, "step": 27691 }, { "epoch": 0.79, "grad_norm": 6.475778364286034, "learning_rate": 1.0818745713270019e-06, "loss": 0.439, "step": 27692 }, { "epoch": 0.79, "grad_norm": 7.282357585966765, "learning_rate": 1.0815864856334734e-06, "loss": 0.7737, "step": 27693 }, { "epoch": 0.79, "grad_norm": 3.060951951028342, "learning_rate": 1.0812984336495725e-06, "loss": 0.434, "step": 27694 }, { "epoch": 0.79, "grad_norm": 5.768287281808521, "learning_rate": 1.081010415377779e-06, "loss": 0.2388, "step": 27695 }, { "epoch": 0.79, "grad_norm": 8.488574932721495, "learning_rate": 1.0807224308205711e-06, "loss": 0.5592, "step": 27696 }, { "epoch": 0.79, "grad_norm": 4.007062905094138, "learning_rate": 1.0804344799804244e-06, "loss": 0.541, "step": 27697 }, { "epoch": 0.79, "grad_norm": 3.93034678819478, "learning_rate": 1.080146562859819e-06, "loss": 0.2256, "step": 27698 }, { "epoch": 0.79, "grad_norm": 6.371374651471777, "learning_rate": 1.0798586794612297e-06, "loss": 0.5804, "step": 27699 }, { "epoch": 0.79, "grad_norm": 6.50668350812942, "learning_rate": 1.0795708297871337e-06, "loss": 0.5512, "step": 27700 }, { "epoch": 0.79, "grad_norm": 4.87623851889183, "learning_rate": 1.0792830138400056e-06, "loss": 0.3628, "step": 27701 }, { "epoch": 0.79, "grad_norm": 5.676505728452694, "learning_rate": 1.0789952316223246e-06, "loss": 0.527, "step": 27702 }, { "epoch": 0.79, "grad_norm": 5.021257891716429, "learning_rate": 1.0787074831365634e-06, "loss": 0.4995, "step": 27703 }, { "epoch": 0.79, "grad_norm": 5.029510244639516, "learning_rate": 1.0784197683851993e-06, "loss": 0.606, "step": 27704 }, { "epoch": 0.79, "grad_norm": 4.512270727699764, "learning_rate": 1.078132087370709e-06, "loss": 0.3207, "step": 27705 }, { "epoch": 0.79, "grad_norm": 7.092773000514151, "learning_rate": 1.0778444400955635e-06, "loss": 0.5446, "step": 27706 }, { "epoch": 0.79, "grad_norm": 9.493177524004189, "learning_rate": 1.0775568265622405e-06, "loss": 0.8749, "step": 27707 }, { "epoch": 0.79, "grad_norm": 3.59176614155982, "learning_rate": 1.0772692467732116e-06, "loss": 0.2533, "step": 27708 }, { "epoch": 0.79, "grad_norm": 3.4220712683840557, "learning_rate": 1.076981700730954e-06, "loss": 0.2835, "step": 27709 }, { "epoch": 0.79, "grad_norm": 4.89607176775005, "learning_rate": 1.0766941884379384e-06, "loss": 0.7269, "step": 27710 }, { "epoch": 0.79, "grad_norm": 7.411046981565835, "learning_rate": 1.0764067098966408e-06, "loss": 0.8028, "step": 27711 }, { "epoch": 0.79, "grad_norm": 3.991409225895822, "learning_rate": 1.0761192651095337e-06, "loss": 0.301, "step": 27712 }, { "epoch": 0.79, "grad_norm": 2.496461796378666, "learning_rate": 1.0758318540790873e-06, "loss": 0.3546, "step": 27713 }, { "epoch": 0.79, "grad_norm": 4.755727828006168, "learning_rate": 1.0755444768077784e-06, "loss": 0.3153, "step": 27714 }, { "epoch": 0.79, "grad_norm": 6.396570848652125, "learning_rate": 1.0752571332980755e-06, "loss": 0.5705, "step": 27715 }, { "epoch": 0.79, "grad_norm": 1.2224691113712045, "learning_rate": 1.0749698235524535e-06, "loss": 0.0555, "step": 27716 }, { "epoch": 0.79, "grad_norm": 3.716985941261221, "learning_rate": 1.0746825475733818e-06, "loss": 0.2522, "step": 27717 }, { "epoch": 0.79, "grad_norm": 3.4823869156807676, "learning_rate": 1.0743953053633338e-06, "loss": 0.3438, "step": 27718 }, { "epoch": 0.79, "grad_norm": 2.249828544017972, "learning_rate": 1.0741080969247801e-06, "loss": 0.1197, "step": 27719 }, { "epoch": 0.79, "grad_norm": 3.522340523417481, "learning_rate": 1.0738209222601898e-06, "loss": 0.1609, "step": 27720 }, { "epoch": 0.79, "grad_norm": 5.001594646795059, "learning_rate": 1.0735337813720353e-06, "loss": 0.2323, "step": 27721 }, { "epoch": 0.79, "grad_norm": 6.94901913918541, "learning_rate": 1.0732466742627857e-06, "loss": 0.7224, "step": 27722 }, { "epoch": 0.79, "grad_norm": 5.785225692012856, "learning_rate": 1.0729596009349125e-06, "loss": 0.3476, "step": 27723 }, { "epoch": 0.79, "grad_norm": 4.100631666371375, "learning_rate": 1.0726725613908828e-06, "loss": 0.1301, "step": 27724 }, { "epoch": 0.79, "grad_norm": 8.989717119325727, "learning_rate": 1.072385555633169e-06, "loss": 0.4012, "step": 27725 }, { "epoch": 0.79, "grad_norm": 1.8379765408386564, "learning_rate": 1.072098583664239e-06, "loss": 0.1672, "step": 27726 }, { "epoch": 0.79, "grad_norm": 8.54426124678406, "learning_rate": 1.0718116454865608e-06, "loss": 0.559, "step": 27727 }, { "epoch": 0.79, "grad_norm": 3.7953872905726054, "learning_rate": 1.071524741102602e-06, "loss": 0.2459, "step": 27728 }, { "epoch": 0.79, "grad_norm": 3.661162800842857, "learning_rate": 1.0712378705148325e-06, "loss": 0.1568, "step": 27729 }, { "epoch": 0.79, "grad_norm": 5.665138805335296, "learning_rate": 1.0709510337257212e-06, "loss": 0.5421, "step": 27730 }, { "epoch": 0.79, "grad_norm": 3.4719179138772716, "learning_rate": 1.070664230737733e-06, "loss": 0.1952, "step": 27731 }, { "epoch": 0.79, "grad_norm": 8.35264513297843, "learning_rate": 1.0703774615533391e-06, "loss": 0.3632, "step": 27732 }, { "epoch": 0.79, "grad_norm": 6.15733138138129, "learning_rate": 1.0700907261750016e-06, "loss": 0.3701, "step": 27733 }, { "epoch": 0.79, "grad_norm": 6.598186139633524, "learning_rate": 1.069804024605191e-06, "loss": 0.5165, "step": 27734 }, { "epoch": 0.79, "grad_norm": 9.015042767161452, "learning_rate": 1.0695173568463712e-06, "loss": 0.7521, "step": 27735 }, { "epoch": 0.79, "grad_norm": 5.312986104828211, "learning_rate": 1.0692307229010096e-06, "loss": 0.374, "step": 27736 }, { "epoch": 0.79, "grad_norm": 6.391645205518493, "learning_rate": 1.0689441227715735e-06, "loss": 0.3518, "step": 27737 }, { "epoch": 0.79, "grad_norm": 3.8271449800336037, "learning_rate": 1.0686575564605267e-06, "loss": 0.3669, "step": 27738 }, { "epoch": 0.79, "grad_norm": 6.996078414516798, "learning_rate": 1.0683710239703354e-06, "loss": 0.4464, "step": 27739 }, { "epoch": 0.79, "grad_norm": 8.223563405645384, "learning_rate": 1.0680845253034628e-06, "loss": 0.6263, "step": 27740 }, { "epoch": 0.79, "grad_norm": 5.279142467119151, "learning_rate": 1.0677980604623761e-06, "loss": 0.3478, "step": 27741 }, { "epoch": 0.79, "grad_norm": 4.769273405736408, "learning_rate": 1.067511629449537e-06, "loss": 0.3656, "step": 27742 }, { "epoch": 0.79, "grad_norm": 8.947228185762302, "learning_rate": 1.0672252322674125e-06, "loss": 0.5368, "step": 27743 }, { "epoch": 0.79, "grad_norm": 3.4335329920543414, "learning_rate": 1.0669388689184646e-06, "loss": 0.4176, "step": 27744 }, { "epoch": 0.79, "grad_norm": 5.301876992168275, "learning_rate": 1.066652539405158e-06, "loss": 0.3892, "step": 27745 }, { "epoch": 0.79, "grad_norm": 4.947507711542335, "learning_rate": 1.0663662437299555e-06, "loss": 0.6168, "step": 27746 }, { "epoch": 0.79, "grad_norm": 6.314519474189505, "learning_rate": 1.0660799818953182e-06, "loss": 0.2483, "step": 27747 }, { "epoch": 0.79, "grad_norm": 6.1593329363837706, "learning_rate": 1.0657937539037123e-06, "loss": 0.3231, "step": 27748 }, { "epoch": 0.79, "grad_norm": 2.0310365124533134, "learning_rate": 1.0655075597575964e-06, "loss": 0.1514, "step": 27749 }, { "epoch": 0.79, "grad_norm": 4.0273016290099255, "learning_rate": 1.0652213994594362e-06, "loss": 0.4021, "step": 27750 }, { "epoch": 0.79, "grad_norm": 4.238759491127163, "learning_rate": 1.0649352730116907e-06, "loss": 0.5404, "step": 27751 }, { "epoch": 0.79, "grad_norm": 2.9517681496476467, "learning_rate": 1.0646491804168235e-06, "loss": 0.2982, "step": 27752 }, { "epoch": 0.79, "grad_norm": 5.243641590548209, "learning_rate": 1.0643631216772948e-06, "loss": 0.2029, "step": 27753 }, { "epoch": 0.79, "grad_norm": 4.647406276149775, "learning_rate": 1.064077096795565e-06, "loss": 0.5323, "step": 27754 }, { "epoch": 0.79, "grad_norm": 5.292641447300682, "learning_rate": 1.0637911057740957e-06, "loss": 0.4431, "step": 27755 }, { "epoch": 0.79, "grad_norm": 5.131461420013233, "learning_rate": 1.0635051486153465e-06, "loss": 0.1816, "step": 27756 }, { "epoch": 0.79, "grad_norm": 5.811773234226204, "learning_rate": 1.0632192253217783e-06, "loss": 0.7594, "step": 27757 }, { "epoch": 0.79, "grad_norm": 2.4997404440609325, "learning_rate": 1.06293333589585e-06, "loss": 0.1071, "step": 27758 }, { "epoch": 0.79, "grad_norm": 7.162683951260912, "learning_rate": 1.0626474803400232e-06, "loss": 0.4689, "step": 27759 }, { "epoch": 0.79, "grad_norm": 5.729544369356077, "learning_rate": 1.0623616586567532e-06, "loss": 0.5859, "step": 27760 }, { "epoch": 0.8, "grad_norm": 3.7267518065411025, "learning_rate": 1.0620758708485007e-06, "loss": 0.1545, "step": 27761 }, { "epoch": 0.8, "grad_norm": 4.930529925976735, "learning_rate": 1.0617901169177264e-06, "loss": 0.2506, "step": 27762 }, { "epoch": 0.8, "grad_norm": 5.337685674256445, "learning_rate": 1.0615043968668853e-06, "loss": 0.3331, "step": 27763 }, { "epoch": 0.8, "grad_norm": 4.754201662011137, "learning_rate": 1.0612187106984384e-06, "loss": 0.5856, "step": 27764 }, { "epoch": 0.8, "grad_norm": 7.466439500259488, "learning_rate": 1.0609330584148413e-06, "loss": 0.5783, "step": 27765 }, { "epoch": 0.8, "grad_norm": 4.3816247374389485, "learning_rate": 1.060647440018553e-06, "loss": 0.339, "step": 27766 }, { "epoch": 0.8, "grad_norm": 8.235315287286332, "learning_rate": 1.060361855512028e-06, "loss": 0.6465, "step": 27767 }, { "epoch": 0.8, "grad_norm": 7.391699876104998, "learning_rate": 1.0600763048977264e-06, "loss": 0.5, "step": 27768 }, { "epoch": 0.8, "grad_norm": 3.211302186624307, "learning_rate": 1.0597907881781016e-06, "loss": 0.4598, "step": 27769 }, { "epoch": 0.8, "grad_norm": 5.571445130578609, "learning_rate": 1.059505305355612e-06, "loss": 0.46, "step": 27770 }, { "epoch": 0.8, "grad_norm": 6.314203278034881, "learning_rate": 1.0592198564327138e-06, "loss": 0.4866, "step": 27771 }, { "epoch": 0.8, "grad_norm": 5.146581057485475, "learning_rate": 1.0589344414118624e-06, "loss": 0.4397, "step": 27772 }, { "epoch": 0.8, "grad_norm": 3.3356075793750057, "learning_rate": 1.0586490602955129e-06, "loss": 0.3566, "step": 27773 }, { "epoch": 0.8, "grad_norm": 4.09064319160677, "learning_rate": 1.0583637130861186e-06, "loss": 0.2983, "step": 27774 }, { "epoch": 0.8, "grad_norm": 3.560857996737283, "learning_rate": 1.0580783997861376e-06, "loss": 0.142, "step": 27775 }, { "epoch": 0.8, "grad_norm": 7.093001506825159, "learning_rate": 1.0577931203980213e-06, "loss": 0.6129, "step": 27776 }, { "epoch": 0.8, "grad_norm": 3.7913457381841136, "learning_rate": 1.0575078749242252e-06, "loss": 0.4099, "step": 27777 }, { "epoch": 0.8, "grad_norm": 5.523711288119676, "learning_rate": 1.057222663367205e-06, "loss": 0.2809, "step": 27778 }, { "epoch": 0.8, "grad_norm": 6.73268520458865, "learning_rate": 1.0569374857294128e-06, "loss": 0.624, "step": 27779 }, { "epoch": 0.8, "grad_norm": 8.107843743258751, "learning_rate": 1.0566523420133017e-06, "loss": 0.4959, "step": 27780 }, { "epoch": 0.8, "grad_norm": 3.956778048428861, "learning_rate": 1.0563672322213243e-06, "loss": 0.2613, "step": 27781 }, { "epoch": 0.8, "grad_norm": 6.614389451757361, "learning_rate": 1.056082156355935e-06, "loss": 0.1235, "step": 27782 }, { "epoch": 0.8, "grad_norm": 5.318980225220949, "learning_rate": 1.0557971144195833e-06, "loss": 0.566, "step": 27783 }, { "epoch": 0.8, "grad_norm": 8.874136439473236, "learning_rate": 1.0555121064147255e-06, "loss": 0.7991, "step": 27784 }, { "epoch": 0.8, "grad_norm": 5.76703339202617, "learning_rate": 1.0552271323438096e-06, "loss": 0.886, "step": 27785 }, { "epoch": 0.8, "grad_norm": 7.908930749975635, "learning_rate": 1.0549421922092906e-06, "loss": 0.5514, "step": 27786 }, { "epoch": 0.8, "grad_norm": 3.781104345117251, "learning_rate": 1.0546572860136185e-06, "loss": 0.3966, "step": 27787 }, { "epoch": 0.8, "grad_norm": 12.531308828011268, "learning_rate": 1.054372413759242e-06, "loss": 0.4069, "step": 27788 }, { "epoch": 0.8, "grad_norm": 4.12903651787169, "learning_rate": 1.0540875754486157e-06, "loss": 0.3997, "step": 27789 }, { "epoch": 0.8, "grad_norm": 4.233226606320262, "learning_rate": 1.053802771084187e-06, "loss": 0.3063, "step": 27790 }, { "epoch": 0.8, "grad_norm": 7.02699388329666, "learning_rate": 1.0535180006684081e-06, "loss": 0.2772, "step": 27791 }, { "epoch": 0.8, "grad_norm": 3.7411527537036213, "learning_rate": 1.053233264203728e-06, "loss": 0.1592, "step": 27792 }, { "epoch": 0.8, "grad_norm": 3.639801243265488, "learning_rate": 1.0529485616925962e-06, "loss": 0.1475, "step": 27793 }, { "epoch": 0.8, "grad_norm": 2.9411403743548448, "learning_rate": 1.0526638931374606e-06, "loss": 0.2357, "step": 27794 }, { "epoch": 0.8, "grad_norm": 5.2740677068455994, "learning_rate": 1.052379258540771e-06, "loss": 0.5743, "step": 27795 }, { "epoch": 0.8, "grad_norm": 3.4644228261323162, "learning_rate": 1.052094657904979e-06, "loss": 0.5404, "step": 27796 }, { "epoch": 0.8, "grad_norm": 4.35175636722583, "learning_rate": 1.0518100912325284e-06, "loss": 0.6888, "step": 27797 }, { "epoch": 0.8, "grad_norm": 6.591454164991652, "learning_rate": 1.051525558525871e-06, "loss": 0.5019, "step": 27798 }, { "epoch": 0.8, "grad_norm": 5.2310684008508135, "learning_rate": 1.0512410597874535e-06, "loss": 0.4536, "step": 27799 }, { "epoch": 0.8, "grad_norm": 8.780355496172131, "learning_rate": 1.0509565950197225e-06, "loss": 0.3959, "step": 27800 }, { "epoch": 0.8, "grad_norm": 6.602448461548825, "learning_rate": 1.0506721642251244e-06, "loss": 0.3425, "step": 27801 }, { "epoch": 0.8, "grad_norm": 5.863364059605564, "learning_rate": 1.0503877674061075e-06, "loss": 0.4869, "step": 27802 }, { "epoch": 0.8, "grad_norm": 7.398096546022664, "learning_rate": 1.0501034045651198e-06, "loss": 0.4867, "step": 27803 }, { "epoch": 0.8, "grad_norm": 4.7352792612803745, "learning_rate": 1.0498190757046051e-06, "loss": 0.2905, "step": 27804 }, { "epoch": 0.8, "grad_norm": 6.007466260360478, "learning_rate": 1.0495347808270112e-06, "loss": 0.4313, "step": 27805 }, { "epoch": 0.8, "grad_norm": 5.406937880661364, "learning_rate": 1.0492505199347835e-06, "loss": 0.4332, "step": 27806 }, { "epoch": 0.8, "grad_norm": 3.943680227300253, "learning_rate": 1.0489662930303672e-06, "loss": 0.4629, "step": 27807 }, { "epoch": 0.8, "grad_norm": 5.224532983524466, "learning_rate": 1.0486821001162062e-06, "loss": 0.7791, "step": 27808 }, { "epoch": 0.8, "grad_norm": 3.6934691139996465, "learning_rate": 1.0483979411947475e-06, "loss": 0.5445, "step": 27809 }, { "epoch": 0.8, "grad_norm": 4.024368474711842, "learning_rate": 1.048113816268434e-06, "loss": 0.3776, "step": 27810 }, { "epoch": 0.8, "grad_norm": 6.206447451428731, "learning_rate": 1.04782972533971e-06, "loss": 0.7052, "step": 27811 }, { "epoch": 0.8, "grad_norm": 8.093862171942295, "learning_rate": 1.0475456684110225e-06, "loss": 0.8371, "step": 27812 }, { "epoch": 0.8, "grad_norm": 6.7318751392855, "learning_rate": 1.0472616454848129e-06, "loss": 1.0195, "step": 27813 }, { "epoch": 0.8, "grad_norm": 6.526316700645474, "learning_rate": 1.0469776565635242e-06, "loss": 0.7702, "step": 27814 }, { "epoch": 0.8, "grad_norm": 3.6687036007704124, "learning_rate": 1.0466937016495987e-06, "loss": 0.3483, "step": 27815 }, { "epoch": 0.8, "grad_norm": 6.285062629187368, "learning_rate": 1.0464097807454821e-06, "loss": 0.5696, "step": 27816 }, { "epoch": 0.8, "grad_norm": 2.7141446537635656, "learning_rate": 1.046125893853614e-06, "loss": 0.1301, "step": 27817 }, { "epoch": 0.8, "grad_norm": 8.727967179097705, "learning_rate": 1.045842040976438e-06, "loss": 0.5635, "step": 27818 }, { "epoch": 0.8, "grad_norm": 3.973188096476007, "learning_rate": 1.0455582221163985e-06, "loss": 0.099, "step": 27819 }, { "epoch": 0.8, "grad_norm": 3.51890578729706, "learning_rate": 1.045274437275932e-06, "loss": 0.2066, "step": 27820 }, { "epoch": 0.8, "grad_norm": 7.917667151693104, "learning_rate": 1.0449906864574844e-06, "loss": 0.5557, "step": 27821 }, { "epoch": 0.8, "grad_norm": 4.942820856912483, "learning_rate": 1.0447069696634937e-06, "loss": 0.4186, "step": 27822 }, { "epoch": 0.8, "grad_norm": 4.81778535798506, "learning_rate": 1.0444232868964028e-06, "loss": 0.5823, "step": 27823 }, { "epoch": 0.8, "grad_norm": 6.1015751016757775, "learning_rate": 1.0441396381586504e-06, "loss": 0.4398, "step": 27824 }, { "epoch": 0.8, "grad_norm": 9.2929041997745, "learning_rate": 1.043856023452679e-06, "loss": 0.4775, "step": 27825 }, { "epoch": 0.8, "grad_norm": 3.0295607195528595, "learning_rate": 1.043572442780927e-06, "loss": 0.2755, "step": 27826 }, { "epoch": 0.8, "grad_norm": 3.5464761988745903, "learning_rate": 1.0432888961458326e-06, "loss": 0.3226, "step": 27827 }, { "epoch": 0.8, "grad_norm": 7.919508872217401, "learning_rate": 1.0430053835498383e-06, "loss": 0.6459, "step": 27828 }, { "epoch": 0.8, "grad_norm": 5.7844262960128665, "learning_rate": 1.0427219049953796e-06, "loss": 0.6253, "step": 27829 }, { "epoch": 0.8, "grad_norm": 6.462584870185126, "learning_rate": 1.0424384604848992e-06, "loss": 0.3789, "step": 27830 }, { "epoch": 0.8, "grad_norm": 19.572890523419694, "learning_rate": 1.0421550500208317e-06, "loss": 0.3221, "step": 27831 }, { "epoch": 0.8, "grad_norm": 4.678883122011031, "learning_rate": 1.0418716736056184e-06, "loss": 0.3663, "step": 27832 }, { "epoch": 0.8, "grad_norm": 5.405843002787402, "learning_rate": 1.0415883312416958e-06, "loss": 0.2482, "step": 27833 }, { "epoch": 0.8, "grad_norm": 2.367170944407769, "learning_rate": 1.0413050229315013e-06, "loss": 0.1144, "step": 27834 }, { "epoch": 0.8, "grad_norm": 4.85943991074952, "learning_rate": 1.041021748677471e-06, "loss": 0.4713, "step": 27835 }, { "epoch": 0.8, "grad_norm": 2.72913212184058, "learning_rate": 1.0407385084820432e-06, "loss": 0.0437, "step": 27836 }, { "epoch": 0.8, "grad_norm": 3.3936308093418375, "learning_rate": 1.040455302347656e-06, "loss": 0.332, "step": 27837 }, { "epoch": 0.8, "grad_norm": 4.713836132426967, "learning_rate": 1.0401721302767425e-06, "loss": 0.2965, "step": 27838 }, { "epoch": 0.8, "grad_norm": 4.5242231671682775, "learning_rate": 1.0398889922717426e-06, "loss": 0.3821, "step": 27839 }, { "epoch": 0.8, "grad_norm": 7.40096693294268, "learning_rate": 1.0396058883350896e-06, "loss": 0.3806, "step": 27840 }, { "epoch": 0.8, "grad_norm": 8.86223673140268, "learning_rate": 1.03932281846922e-06, "loss": 0.7268, "step": 27841 }, { "epoch": 0.8, "grad_norm": 3.5941336717218055, "learning_rate": 1.039039782676567e-06, "loss": 0.2892, "step": 27842 }, { "epoch": 0.8, "grad_norm": 4.504363911015504, "learning_rate": 1.0387567809595666e-06, "loss": 0.2474, "step": 27843 }, { "epoch": 0.8, "grad_norm": 4.614282510681255, "learning_rate": 1.0384738133206556e-06, "loss": 0.3604, "step": 27844 }, { "epoch": 0.8, "grad_norm": 5.963316156601765, "learning_rate": 1.0381908797622652e-06, "loss": 0.4445, "step": 27845 }, { "epoch": 0.8, "grad_norm": 7.256044958932167, "learning_rate": 1.0379079802868324e-06, "loss": 0.5681, "step": 27846 }, { "epoch": 0.8, "grad_norm": 6.389815001697656, "learning_rate": 1.0376251148967896e-06, "loss": 0.2123, "step": 27847 }, { "epoch": 0.8, "grad_norm": 1.7307718498055293, "learning_rate": 1.0373422835945697e-06, "loss": 0.2487, "step": 27848 }, { "epoch": 0.8, "grad_norm": 6.742681916144514, "learning_rate": 1.037059486382605e-06, "loss": 0.3788, "step": 27849 }, { "epoch": 0.8, "grad_norm": 3.461295425816796, "learning_rate": 1.0367767232633314e-06, "loss": 0.2818, "step": 27850 }, { "epoch": 0.8, "grad_norm": 5.19555144943624, "learning_rate": 1.0364939942391777e-06, "loss": 0.4637, "step": 27851 }, { "epoch": 0.8, "grad_norm": 8.13430561354094, "learning_rate": 1.0362112993125794e-06, "loss": 0.6642, "step": 27852 }, { "epoch": 0.8, "grad_norm": 4.281460109483434, "learning_rate": 1.035928638485968e-06, "loss": 0.176, "step": 27853 }, { "epoch": 0.8, "grad_norm": 6.781755911945412, "learning_rate": 1.0356460117617723e-06, "loss": 0.3167, "step": 27854 }, { "epoch": 0.8, "grad_norm": 8.235859167124163, "learning_rate": 1.0353634191424278e-06, "loss": 0.5629, "step": 27855 }, { "epoch": 0.8, "grad_norm": 6.6479360747723995, "learning_rate": 1.035080860630362e-06, "loss": 0.5254, "step": 27856 }, { "epoch": 0.8, "grad_norm": 2.9958556477259912, "learning_rate": 1.034798336228009e-06, "loss": 0.1966, "step": 27857 }, { "epoch": 0.8, "grad_norm": 6.432894568560284, "learning_rate": 1.0345158459377963e-06, "loss": 0.7428, "step": 27858 }, { "epoch": 0.8, "grad_norm": 6.063438421551204, "learning_rate": 1.0342333897621565e-06, "loss": 0.5106, "step": 27859 }, { "epoch": 0.8, "grad_norm": 3.807605619136577, "learning_rate": 1.0339509677035192e-06, "loss": 0.6107, "step": 27860 }, { "epoch": 0.8, "grad_norm": 8.665163374393563, "learning_rate": 1.0336685797643115e-06, "loss": 0.4413, "step": 27861 }, { "epoch": 0.8, "grad_norm": 4.771527446688273, "learning_rate": 1.0333862259469664e-06, "loss": 0.2561, "step": 27862 }, { "epoch": 0.8, "grad_norm": 6.294970127243899, "learning_rate": 1.0331039062539094e-06, "loss": 0.3074, "step": 27863 }, { "epoch": 0.8, "grad_norm": 5.8062257230351735, "learning_rate": 1.032821620687573e-06, "loss": 0.3979, "step": 27864 }, { "epoch": 0.8, "grad_norm": 7.096216755974427, "learning_rate": 1.0325393692503821e-06, "loss": 0.4504, "step": 27865 }, { "epoch": 0.8, "grad_norm": 4.127012484367756, "learning_rate": 1.0322571519447678e-06, "loss": 0.399, "step": 27866 }, { "epoch": 0.8, "grad_norm": 2.712979737501058, "learning_rate": 1.0319749687731567e-06, "loss": 0.3347, "step": 27867 }, { "epoch": 0.8, "grad_norm": 3.037497590695905, "learning_rate": 1.031692819737975e-06, "loss": 0.3991, "step": 27868 }, { "epoch": 0.8, "grad_norm": 6.0113367823164685, "learning_rate": 1.0314107048416533e-06, "loss": 0.4853, "step": 27869 }, { "epoch": 0.8, "grad_norm": 7.418964722891549, "learning_rate": 1.0311286240866147e-06, "loss": 0.5459, "step": 27870 }, { "epoch": 0.8, "grad_norm": 6.30331753922287, "learning_rate": 1.0308465774752897e-06, "loss": 0.5843, "step": 27871 }, { "epoch": 0.8, "grad_norm": 4.776073196401351, "learning_rate": 1.030564565010101e-06, "loss": 0.49, "step": 27872 }, { "epoch": 0.8, "grad_norm": 5.577386588153725, "learning_rate": 1.0302825866934784e-06, "loss": 0.3605, "step": 27873 }, { "epoch": 0.8, "grad_norm": 3.7227007612067093, "learning_rate": 1.0300006425278458e-06, "loss": 0.506, "step": 27874 }, { "epoch": 0.8, "grad_norm": 3.6367501852641624, "learning_rate": 1.0297187325156288e-06, "loss": 0.3523, "step": 27875 }, { "epoch": 0.8, "grad_norm": 5.139374879532944, "learning_rate": 1.0294368566592512e-06, "loss": 0.4467, "step": 27876 }, { "epoch": 0.8, "grad_norm": 3.3757172105043254, "learning_rate": 1.0291550149611396e-06, "loss": 0.3252, "step": 27877 }, { "epoch": 0.8, "grad_norm": 3.2231616369520806, "learning_rate": 1.02887320742372e-06, "loss": 0.4951, "step": 27878 }, { "epoch": 0.8, "grad_norm": 7.343276767498419, "learning_rate": 1.028591434049415e-06, "loss": 0.6023, "step": 27879 }, { "epoch": 0.8, "grad_norm": 3.3130581223639686, "learning_rate": 1.0283096948406495e-06, "loss": 0.218, "step": 27880 }, { "epoch": 0.8, "grad_norm": 6.429689020316592, "learning_rate": 1.0280279897998447e-06, "loss": 0.4732, "step": 27881 }, { "epoch": 0.8, "grad_norm": 9.225515281945352, "learning_rate": 1.0277463189294279e-06, "loss": 0.4995, "step": 27882 }, { "epoch": 0.8, "grad_norm": 6.2762755628717475, "learning_rate": 1.0274646822318184e-06, "loss": 0.3105, "step": 27883 }, { "epoch": 0.8, "grad_norm": 5.869402633632813, "learning_rate": 1.0271830797094418e-06, "loss": 0.1396, "step": 27884 }, { "epoch": 0.8, "grad_norm": 5.192913034805064, "learning_rate": 1.0269015113647213e-06, "loss": 0.6341, "step": 27885 }, { "epoch": 0.8, "grad_norm": 3.4484221595938025, "learning_rate": 1.0266199772000774e-06, "loss": 0.2746, "step": 27886 }, { "epoch": 0.8, "grad_norm": 3.1748392554389833, "learning_rate": 1.026338477217933e-06, "loss": 0.3166, "step": 27887 }, { "epoch": 0.8, "grad_norm": 7.232068017373893, "learning_rate": 1.026057011420708e-06, "loss": 0.8117, "step": 27888 }, { "epoch": 0.8, "grad_norm": 6.289992212797115, "learning_rate": 1.0257755798108272e-06, "loss": 0.939, "step": 27889 }, { "epoch": 0.8, "grad_norm": 3.776576677326553, "learning_rate": 1.0254941823907077e-06, "loss": 0.4501, "step": 27890 }, { "epoch": 0.8, "grad_norm": 3.320160734410668, "learning_rate": 1.0252128191627736e-06, "loss": 0.2641, "step": 27891 }, { "epoch": 0.8, "grad_norm": 6.40721592017768, "learning_rate": 1.0249314901294432e-06, "loss": 0.2126, "step": 27892 }, { "epoch": 0.8, "grad_norm": 7.002816553230003, "learning_rate": 1.0246501952931392e-06, "loss": 0.5399, "step": 27893 }, { "epoch": 0.8, "grad_norm": 3.221464160880285, "learning_rate": 1.0243689346562801e-06, "loss": 0.4402, "step": 27894 }, { "epoch": 0.8, "grad_norm": 4.338200653827842, "learning_rate": 1.0240877082212841e-06, "loss": 0.275, "step": 27895 }, { "epoch": 0.8, "grad_norm": 6.8129557535853165, "learning_rate": 1.0238065159905736e-06, "loss": 0.7152, "step": 27896 }, { "epoch": 0.8, "grad_norm": 2.651404235468602, "learning_rate": 1.0235253579665643e-06, "loss": 0.3059, "step": 27897 }, { "epoch": 0.8, "grad_norm": 6.848075127071432, "learning_rate": 1.0232442341516786e-06, "loss": 0.5709, "step": 27898 }, { "epoch": 0.8, "grad_norm": 9.488608243544459, "learning_rate": 1.0229631445483311e-06, "loss": 0.4572, "step": 27899 }, { "epoch": 0.8, "grad_norm": 4.699652673184017, "learning_rate": 1.022682089158944e-06, "loss": 0.5449, "step": 27900 }, { "epoch": 0.8, "grad_norm": 5.04032666812854, "learning_rate": 1.0224010679859331e-06, "loss": 0.5551, "step": 27901 }, { "epoch": 0.8, "grad_norm": 2.860868866293572, "learning_rate": 1.0221200810317144e-06, "loss": 0.1753, "step": 27902 }, { "epoch": 0.8, "grad_norm": 14.816613322348983, "learning_rate": 1.0218391282987085e-06, "loss": 0.4403, "step": 27903 }, { "epoch": 0.8, "grad_norm": 3.956137598619532, "learning_rate": 1.021558209789329e-06, "loss": 0.398, "step": 27904 }, { "epoch": 0.8, "grad_norm": 4.101414138290503, "learning_rate": 1.021277325505996e-06, "loss": 0.463, "step": 27905 }, { "epoch": 0.8, "grad_norm": 4.076889289340847, "learning_rate": 1.020996475451123e-06, "loss": 0.2382, "step": 27906 }, { "epoch": 0.8, "grad_norm": 2.3860783443586846, "learning_rate": 1.02071565962713e-06, "loss": 0.1149, "step": 27907 }, { "epoch": 0.8, "grad_norm": 5.490694974606466, "learning_rate": 1.0204348780364282e-06, "loss": 0.7743, "step": 27908 }, { "epoch": 0.8, "grad_norm": 5.39285519671157, "learning_rate": 1.0201541306814344e-06, "loss": 0.457, "step": 27909 }, { "epoch": 0.8, "grad_norm": 3.497287465469062, "learning_rate": 1.0198734175645663e-06, "loss": 0.4759, "step": 27910 }, { "epoch": 0.8, "grad_norm": 3.92543281792912, "learning_rate": 1.0195927386882353e-06, "loss": 0.1444, "step": 27911 }, { "epoch": 0.8, "grad_norm": 9.171462977446263, "learning_rate": 1.0193120940548595e-06, "loss": 0.5761, "step": 27912 }, { "epoch": 0.8, "grad_norm": 4.544803422762121, "learning_rate": 1.019031483666852e-06, "loss": 0.2678, "step": 27913 }, { "epoch": 0.8, "grad_norm": 5.336388408325132, "learning_rate": 1.018750907526626e-06, "loss": 0.6687, "step": 27914 }, { "epoch": 0.8, "grad_norm": 4.975779521197347, "learning_rate": 1.0184703656365946e-06, "loss": 0.7263, "step": 27915 }, { "epoch": 0.8, "grad_norm": 9.419724967175103, "learning_rate": 1.0181898579991738e-06, "loss": 0.1796, "step": 27916 }, { "epoch": 0.8, "grad_norm": 2.566427384613201, "learning_rate": 1.0179093846167742e-06, "loss": 0.1528, "step": 27917 }, { "epoch": 0.8, "grad_norm": 4.920013366308896, "learning_rate": 1.0176289454918099e-06, "loss": 0.7725, "step": 27918 }, { "epoch": 0.8, "grad_norm": 8.34912816138894, "learning_rate": 1.0173485406266948e-06, "loss": 1.0503, "step": 27919 }, { "epoch": 0.8, "grad_norm": 5.863767742226911, "learning_rate": 1.0170681700238394e-06, "loss": 0.579, "step": 27920 }, { "epoch": 0.8, "grad_norm": 6.6147997442772875, "learning_rate": 1.0167878336856568e-06, "loss": 0.4935, "step": 27921 }, { "epoch": 0.8, "grad_norm": 3.473000885862001, "learning_rate": 1.016507531614556e-06, "loss": 0.3673, "step": 27922 }, { "epoch": 0.8, "grad_norm": 7.33053562231784, "learning_rate": 1.0162272638129522e-06, "loss": 0.6155, "step": 27923 }, { "epoch": 0.8, "grad_norm": 4.477082493590993, "learning_rate": 1.0159470302832535e-06, "loss": 0.3707, "step": 27924 }, { "epoch": 0.8, "grad_norm": 4.213576613361054, "learning_rate": 1.0156668310278718e-06, "loss": 0.4684, "step": 27925 }, { "epoch": 0.8, "grad_norm": 5.471334228285626, "learning_rate": 1.015386666049219e-06, "loss": 0.5317, "step": 27926 }, { "epoch": 0.8, "grad_norm": 6.250961992616007, "learning_rate": 1.0151065353497042e-06, "loss": 0.5881, "step": 27927 }, { "epoch": 0.8, "grad_norm": 6.884176233913988, "learning_rate": 1.0148264389317375e-06, "loss": 0.4447, "step": 27928 }, { "epoch": 0.8, "grad_norm": 7.602692036936396, "learning_rate": 1.0145463767977264e-06, "loss": 0.5721, "step": 27929 }, { "epoch": 0.8, "grad_norm": 4.200057388662901, "learning_rate": 1.0142663489500843e-06, "loss": 0.4643, "step": 27930 }, { "epoch": 0.8, "grad_norm": 2.7046495228904894, "learning_rate": 1.013986355391216e-06, "loss": 0.3436, "step": 27931 }, { "epoch": 0.8, "grad_norm": 6.83315868464637, "learning_rate": 1.0137063961235339e-06, "loss": 0.3872, "step": 27932 }, { "epoch": 0.8, "grad_norm": 9.86711027548586, "learning_rate": 1.0134264711494434e-06, "loss": 0.908, "step": 27933 }, { "epoch": 0.8, "grad_norm": 4.508826817682244, "learning_rate": 1.0131465804713553e-06, "loss": 0.5454, "step": 27934 }, { "epoch": 0.8, "grad_norm": 5.232958519425491, "learning_rate": 1.0128667240916768e-06, "loss": 0.4789, "step": 27935 }, { "epoch": 0.8, "grad_norm": 5.7974960734759975, "learning_rate": 1.0125869020128133e-06, "loss": 0.5639, "step": 27936 }, { "epoch": 0.8, "grad_norm": 6.4788848983318985, "learning_rate": 1.0123071142371754e-06, "loss": 0.424, "step": 27937 }, { "epoch": 0.8, "grad_norm": 8.734521472228225, "learning_rate": 1.0120273607671666e-06, "loss": 0.6079, "step": 27938 }, { "epoch": 0.8, "grad_norm": 5.8211011928274745, "learning_rate": 1.011747641605197e-06, "loss": 0.4939, "step": 27939 }, { "epoch": 0.8, "grad_norm": 4.664710774568392, "learning_rate": 1.011467956753671e-06, "loss": 0.3484, "step": 27940 }, { "epoch": 0.8, "grad_norm": 7.798452580841897, "learning_rate": 1.0111883062149946e-06, "loss": 0.3174, "step": 27941 }, { "epoch": 0.8, "grad_norm": 10.579955986378492, "learning_rate": 1.0109086899915749e-06, "loss": 0.7221, "step": 27942 }, { "epoch": 0.8, "grad_norm": 4.578518931915961, "learning_rate": 1.0106291080858155e-06, "loss": 0.5893, "step": 27943 }, { "epoch": 0.8, "grad_norm": 6.951758775536029, "learning_rate": 1.0103495605001235e-06, "loss": 0.9255, "step": 27944 }, { "epoch": 0.8, "grad_norm": 7.520451191697808, "learning_rate": 1.0100700472369017e-06, "loss": 0.7553, "step": 27945 }, { "epoch": 0.8, "grad_norm": 5.955866024934083, "learning_rate": 1.009790568298558e-06, "loss": 0.5195, "step": 27946 }, { "epoch": 0.8, "grad_norm": 3.832783072133544, "learning_rate": 1.0095111236874943e-06, "loss": 0.2433, "step": 27947 }, { "epoch": 0.8, "grad_norm": 10.057315746708547, "learning_rate": 1.0092317134061152e-06, "loss": 0.4148, "step": 27948 }, { "epoch": 0.8, "grad_norm": 8.020326168985793, "learning_rate": 1.0089523374568233e-06, "loss": 0.4123, "step": 27949 }, { "epoch": 0.8, "grad_norm": 5.178371256450131, "learning_rate": 1.0086729958420227e-06, "loss": 0.3763, "step": 27950 }, { "epoch": 0.8, "grad_norm": 5.436608690895674, "learning_rate": 1.0083936885641181e-06, "loss": 0.3614, "step": 27951 }, { "epoch": 0.8, "grad_norm": 2.498881304307566, "learning_rate": 1.0081144156255101e-06, "loss": 0.1944, "step": 27952 }, { "epoch": 0.8, "grad_norm": 5.257524003478801, "learning_rate": 1.0078351770286032e-06, "loss": 0.3863, "step": 27953 }, { "epoch": 0.8, "grad_norm": 6.272064856909158, "learning_rate": 1.0075559727757994e-06, "loss": 0.624, "step": 27954 }, { "epoch": 0.8, "grad_norm": 4.905432225975496, "learning_rate": 1.0072768028694996e-06, "loss": 0.455, "step": 27955 }, { "epoch": 0.8, "grad_norm": 7.4219241813234795, "learning_rate": 1.0069976673121045e-06, "loss": 0.4954, "step": 27956 }, { "epoch": 0.8, "grad_norm": 13.898167659548065, "learning_rate": 1.006718566106018e-06, "loss": 0.4035, "step": 27957 }, { "epoch": 0.8, "grad_norm": 5.419800034261835, "learning_rate": 1.0064394992536392e-06, "loss": 0.396, "step": 27958 }, { "epoch": 0.8, "grad_norm": 7.248612698647423, "learning_rate": 1.0061604667573694e-06, "loss": 0.4959, "step": 27959 }, { "epoch": 0.8, "grad_norm": 6.7318480811246255, "learning_rate": 1.0058814686196106e-06, "loss": 0.8451, "step": 27960 }, { "epoch": 0.8, "grad_norm": 3.346953666610213, "learning_rate": 1.005602504842762e-06, "loss": 0.3806, "step": 27961 }, { "epoch": 0.8, "grad_norm": 5.6302020707802845, "learning_rate": 1.005323575429223e-06, "loss": 0.2308, "step": 27962 }, { "epoch": 0.8, "grad_norm": 6.216285370527226, "learning_rate": 1.0050446803813925e-06, "loss": 0.5693, "step": 27963 }, { "epoch": 0.8, "grad_norm": 2.3469579994367167, "learning_rate": 1.0047658197016714e-06, "loss": 0.0889, "step": 27964 }, { "epoch": 0.8, "grad_norm": 3.176998472904394, "learning_rate": 1.0044869933924573e-06, "loss": 0.3856, "step": 27965 }, { "epoch": 0.8, "grad_norm": 7.972879454325952, "learning_rate": 1.0042082014561493e-06, "loss": 0.4155, "step": 27966 }, { "epoch": 0.8, "grad_norm": 3.5869697963207616, "learning_rate": 1.003929443895149e-06, "loss": 0.1515, "step": 27967 }, { "epoch": 0.8, "grad_norm": 3.9210276885243167, "learning_rate": 1.003650720711849e-06, "loss": 0.1851, "step": 27968 }, { "epoch": 0.8, "grad_norm": 9.030630572238966, "learning_rate": 1.0033720319086509e-06, "loss": 0.8277, "step": 27969 }, { "epoch": 0.8, "grad_norm": 7.101353909793705, "learning_rate": 1.0030933774879503e-06, "loss": 0.7181, "step": 27970 }, { "epoch": 0.8, "grad_norm": 7.966601392513145, "learning_rate": 1.0028147574521462e-06, "loss": 0.4022, "step": 27971 }, { "epoch": 0.8, "grad_norm": 5.2738974137554715, "learning_rate": 1.0025361718036337e-06, "loss": 0.2781, "step": 27972 }, { "epoch": 0.8, "grad_norm": 4.094135877753724, "learning_rate": 1.0022576205448115e-06, "loss": 0.3966, "step": 27973 }, { "epoch": 0.8, "grad_norm": 5.171049507391008, "learning_rate": 1.0019791036780747e-06, "loss": 0.5097, "step": 27974 }, { "epoch": 0.8, "grad_norm": 4.810040043388597, "learning_rate": 1.001700621205818e-06, "loss": 0.4096, "step": 27975 }, { "epoch": 0.8, "grad_norm": 3.7217804077581045, "learning_rate": 1.0014221731304396e-06, "loss": 0.2955, "step": 27976 }, { "epoch": 0.8, "grad_norm": 8.144694652793538, "learning_rate": 1.0011437594543333e-06, "loss": 0.4278, "step": 27977 }, { "epoch": 0.8, "grad_norm": 2.867094906343456, "learning_rate": 1.0008653801798956e-06, "loss": 0.5591, "step": 27978 }, { "epoch": 0.8, "grad_norm": 5.263965967911858, "learning_rate": 1.00058703530952e-06, "loss": 0.4179, "step": 27979 }, { "epoch": 0.8, "grad_norm": 5.549606982912837, "learning_rate": 1.0003087248456022e-06, "loss": 0.2853, "step": 27980 }, { "epoch": 0.8, "grad_norm": 7.855904209050188, "learning_rate": 1.0000304487905366e-06, "loss": 0.4825, "step": 27981 }, { "epoch": 0.8, "grad_norm": 5.7791153962112425, "learning_rate": 9.997522071467164e-07, "loss": 0.5905, "step": 27982 }, { "epoch": 0.8, "grad_norm": 12.065506397406388, "learning_rate": 9.994739999165336e-07, "loss": 0.9884, "step": 27983 }, { "epoch": 0.8, "grad_norm": 11.696249394932003, "learning_rate": 9.991958271023844e-07, "loss": 0.6408, "step": 27984 }, { "epoch": 0.8, "grad_norm": 5.558128525117172, "learning_rate": 9.989176887066615e-07, "loss": 0.4743, "step": 27985 }, { "epoch": 0.8, "grad_norm": 2.5010733207744815, "learning_rate": 9.986395847317559e-07, "loss": 0.2929, "step": 27986 }, { "epoch": 0.8, "grad_norm": 4.786063918618238, "learning_rate": 9.98361515180063e-07, "loss": 0.5597, "step": 27987 }, { "epoch": 0.8, "grad_norm": 2.492816439105792, "learning_rate": 9.98083480053973e-07, "loss": 0.2484, "step": 27988 }, { "epoch": 0.8, "grad_norm": 5.854566834191025, "learning_rate": 9.978054793558778e-07, "loss": 0.4528, "step": 27989 }, { "epoch": 0.8, "grad_norm": 4.051948960273711, "learning_rate": 9.975275130881684e-07, "loss": 0.373, "step": 27990 }, { "epoch": 0.8, "grad_norm": 5.408226644093577, "learning_rate": 9.972495812532367e-07, "loss": 0.594, "step": 27991 }, { "epoch": 0.8, "grad_norm": 6.24886594020349, "learning_rate": 9.96971683853476e-07, "loss": 0.5207, "step": 27992 }, { "epoch": 0.8, "grad_norm": 4.451903004877537, "learning_rate": 9.96693820891273e-07, "loss": 0.2235, "step": 27993 }, { "epoch": 0.8, "grad_norm": 10.401183077570886, "learning_rate": 9.964159923690231e-07, "loss": 0.4669, "step": 27994 }, { "epoch": 0.8, "grad_norm": 6.411283125498953, "learning_rate": 9.961381982891105e-07, "loss": 0.5404, "step": 27995 }, { "epoch": 0.8, "grad_norm": 5.09848925335986, "learning_rate": 9.958604386539301e-07, "loss": 0.4178, "step": 27996 }, { "epoch": 0.8, "grad_norm": 8.802473817248464, "learning_rate": 9.955827134658674e-07, "loss": 1.0465, "step": 27997 }, { "epoch": 0.8, "grad_norm": 8.02178195116254, "learning_rate": 9.953050227273148e-07, "loss": 0.3902, "step": 27998 }, { "epoch": 0.8, "grad_norm": 7.186413956565176, "learning_rate": 9.950273664406591e-07, "loss": 0.5641, "step": 27999 }, { "epoch": 0.8, "grad_norm": 17.519756391390604, "learning_rate": 9.94749744608291e-07, "loss": 0.3738, "step": 28000 }, { "epoch": 0.8, "grad_norm": 14.421019755057436, "learning_rate": 9.944721572325977e-07, "loss": 0.5458, "step": 28001 }, { "epoch": 0.8, "grad_norm": 4.229405291485336, "learning_rate": 9.941946043159662e-07, "loss": 0.302, "step": 28002 }, { "epoch": 0.8, "grad_norm": 7.811971783901029, "learning_rate": 9.93917085860786e-07, "loss": 0.9227, "step": 28003 }, { "epoch": 0.8, "grad_norm": 9.405972549554157, "learning_rate": 9.936396018694434e-07, "loss": 0.6041, "step": 28004 }, { "epoch": 0.8, "grad_norm": 4.481122794799893, "learning_rate": 9.933621523443265e-07, "loss": 0.3509, "step": 28005 }, { "epoch": 0.8, "grad_norm": 4.720249670343553, "learning_rate": 9.93084737287821e-07, "loss": 0.2711, "step": 28006 }, { "epoch": 0.8, "grad_norm": 7.6876196503050505, "learning_rate": 9.928073567023149e-07, "loss": 0.584, "step": 28007 }, { "epoch": 0.8, "grad_norm": 4.144769782160126, "learning_rate": 9.925300105901942e-07, "loss": 0.2597, "step": 28008 }, { "epoch": 0.8, "grad_norm": 6.382200624980766, "learning_rate": 9.922526989538427e-07, "loss": 1.0883, "step": 28009 }, { "epoch": 0.8, "grad_norm": 5.662765691220711, "learning_rate": 9.919754217956495e-07, "loss": 0.2885, "step": 28010 }, { "epoch": 0.8, "grad_norm": 7.389342634311982, "learning_rate": 9.91698179117997e-07, "loss": 0.5364, "step": 28011 }, { "epoch": 0.8, "grad_norm": 6.7795075318169244, "learning_rate": 9.914209709232725e-07, "loss": 1.0429, "step": 28012 }, { "epoch": 0.8, "grad_norm": 3.7694227450077373, "learning_rate": 9.911437972138587e-07, "loss": 0.3693, "step": 28013 }, { "epoch": 0.8, "grad_norm": 5.0864846305888705, "learning_rate": 9.908666579921428e-07, "loss": 0.3364, "step": 28014 }, { "epoch": 0.8, "grad_norm": 7.196341077616841, "learning_rate": 9.905895532605069e-07, "loss": 0.6643, "step": 28015 }, { "epoch": 0.8, "grad_norm": 3.5468526831105454, "learning_rate": 9.903124830213346e-07, "loss": 0.3042, "step": 28016 }, { "epoch": 0.8, "grad_norm": 4.608303475699985, "learning_rate": 9.900354472770113e-07, "loss": 0.6783, "step": 28017 }, { "epoch": 0.8, "grad_norm": 14.814499844783574, "learning_rate": 9.897584460299181e-07, "loss": 0.4836, "step": 28018 }, { "epoch": 0.8, "grad_norm": 4.5042073661148745, "learning_rate": 9.894814792824409e-07, "loss": 0.4713, "step": 28019 }, { "epoch": 0.8, "grad_norm": 8.388734132049427, "learning_rate": 9.892045470369589e-07, "loss": 0.449, "step": 28020 }, { "epoch": 0.8, "grad_norm": 2.5893247841015543, "learning_rate": 9.889276492958578e-07, "loss": 0.2198, "step": 28021 }, { "epoch": 0.8, "grad_norm": 9.169695359622436, "learning_rate": 9.886507860615185e-07, "loss": 0.8065, "step": 28022 }, { "epoch": 0.8, "grad_norm": 4.891223663641328, "learning_rate": 9.883739573363227e-07, "loss": 0.4714, "step": 28023 }, { "epoch": 0.8, "grad_norm": 4.590976890128569, "learning_rate": 9.880971631226498e-07, "loss": 0.314, "step": 28024 }, { "epoch": 0.8, "grad_norm": 5.747337637155686, "learning_rate": 9.878204034228838e-07, "loss": 0.5991, "step": 28025 }, { "epoch": 0.8, "grad_norm": 7.080993677333363, "learning_rate": 9.87543678239406e-07, "loss": 0.4168, "step": 28026 }, { "epoch": 0.8, "grad_norm": 8.19947573916502, "learning_rate": 9.872669875745954e-07, "loss": 0.3799, "step": 28027 }, { "epoch": 0.8, "grad_norm": 3.1016731915071363, "learning_rate": 9.869903314308332e-07, "loss": 0.112, "step": 28028 }, { "epoch": 0.8, "grad_norm": 3.717563022984883, "learning_rate": 9.867137098104977e-07, "loss": 0.3778, "step": 28029 }, { "epoch": 0.8, "grad_norm": 5.971931328699975, "learning_rate": 9.86437122715972e-07, "loss": 0.6496, "step": 28030 }, { "epoch": 0.8, "grad_norm": 3.1093550638297756, "learning_rate": 9.861605701496312e-07, "loss": 0.4381, "step": 28031 }, { "epoch": 0.8, "grad_norm": 6.6054667287289925, "learning_rate": 9.858840521138579e-07, "loss": 0.3599, "step": 28032 }, { "epoch": 0.8, "grad_norm": 4.868742080701623, "learning_rate": 9.856075686110306e-07, "loss": 0.57, "step": 28033 }, { "epoch": 0.8, "grad_norm": 4.600194057227477, "learning_rate": 9.853311196435267e-07, "loss": 0.4219, "step": 28034 }, { "epoch": 0.8, "grad_norm": 5.519407385686929, "learning_rate": 9.850547052137255e-07, "loss": 0.3785, "step": 28035 }, { "epoch": 0.8, "grad_norm": 6.2635954426074125, "learning_rate": 9.84778325324003e-07, "loss": 0.5971, "step": 28036 }, { "epoch": 0.8, "grad_norm": 5.091080650699324, "learning_rate": 9.84501979976739e-07, "loss": 0.2816, "step": 28037 }, { "epoch": 0.8, "grad_norm": 6.6732596539607005, "learning_rate": 9.842256691743095e-07, "loss": 0.7236, "step": 28038 }, { "epoch": 0.8, "grad_norm": 3.869917643752576, "learning_rate": 9.83949392919093e-07, "loss": 0.2227, "step": 28039 }, { "epoch": 0.8, "grad_norm": 4.374690371864696, "learning_rate": 9.836731512134639e-07, "loss": 0.3939, "step": 28040 }, { "epoch": 0.8, "grad_norm": 5.867353332857914, "learning_rate": 9.833969440598017e-07, "loss": 0.5486, "step": 28041 }, { "epoch": 0.8, "grad_norm": 7.037803003756921, "learning_rate": 9.831207714604812e-07, "loss": 0.5565, "step": 28042 }, { "epoch": 0.8, "grad_norm": 3.747161553937021, "learning_rate": 9.82844633417877e-07, "loss": 0.24, "step": 28043 }, { "epoch": 0.8, "grad_norm": 7.655266087427904, "learning_rate": 9.825685299343667e-07, "loss": 0.5284, "step": 28044 }, { "epoch": 0.8, "grad_norm": 7.1334502320500395, "learning_rate": 9.822924610123237e-07, "loss": 0.6484, "step": 28045 }, { "epoch": 0.8, "grad_norm": 9.623196123195674, "learning_rate": 9.82016426654125e-07, "loss": 0.8741, "step": 28046 }, { "epoch": 0.8, "grad_norm": 6.856763546818214, "learning_rate": 9.817404268621427e-07, "loss": 0.6923, "step": 28047 }, { "epoch": 0.8, "grad_norm": 6.5445559999021405, "learning_rate": 9.814644616387547e-07, "loss": 0.4001, "step": 28048 }, { "epoch": 0.8, "grad_norm": 3.89685989903122, "learning_rate": 9.811885309863327e-07, "loss": 0.5048, "step": 28049 }, { "epoch": 0.8, "grad_norm": 6.506719655465414, "learning_rate": 9.8091263490725e-07, "loss": 0.401, "step": 28050 }, { "epoch": 0.8, "grad_norm": 7.897841494266309, "learning_rate": 9.806367734038823e-07, "loss": 0.5607, "step": 28051 }, { "epoch": 0.8, "grad_norm": 4.0192916101730445, "learning_rate": 9.803609464786e-07, "loss": 0.2711, "step": 28052 }, { "epoch": 0.8, "grad_norm": 8.282343493548018, "learning_rate": 9.800851541337792e-07, "loss": 0.5241, "step": 28053 }, { "epoch": 0.8, "grad_norm": 6.763495200325724, "learning_rate": 9.798093963717908e-07, "loss": 0.5134, "step": 28054 }, { "epoch": 0.8, "grad_norm": 5.242126691894782, "learning_rate": 9.795336731950073e-07, "loss": 0.4725, "step": 28055 }, { "epoch": 0.8, "grad_norm": 4.257693801765241, "learning_rate": 9.792579846057992e-07, "loss": 0.4164, "step": 28056 }, { "epoch": 0.8, "grad_norm": 6.176703889030927, "learning_rate": 9.789823306065393e-07, "loss": 0.5723, "step": 28057 }, { "epoch": 0.8, "grad_norm": 4.172141963117931, "learning_rate": 9.787067111996007e-07, "loss": 0.4167, "step": 28058 }, { "epoch": 0.8, "grad_norm": 5.082052173615411, "learning_rate": 9.784311263873518e-07, "loss": 0.2176, "step": 28059 }, { "epoch": 0.8, "grad_norm": 7.438888292009505, "learning_rate": 9.781555761721667e-07, "loss": 0.407, "step": 28060 }, { "epoch": 0.8, "grad_norm": 8.005721073114694, "learning_rate": 9.778800605564137e-07, "loss": 0.3734, "step": 28061 }, { "epoch": 0.8, "grad_norm": 6.856327153122118, "learning_rate": 9.776045795424631e-07, "loss": 0.634, "step": 28062 }, { "epoch": 0.8, "grad_norm": 4.792220053299473, "learning_rate": 9.77329133132684e-07, "loss": 0.4072, "step": 28063 }, { "epoch": 0.8, "grad_norm": 2.5535578898853153, "learning_rate": 9.770537213294478e-07, "loss": 0.5347, "step": 28064 }, { "epoch": 0.8, "grad_norm": 5.481395444604078, "learning_rate": 9.767783441351224e-07, "loss": 0.4654, "step": 28065 }, { "epoch": 0.8, "grad_norm": 5.422734041347115, "learning_rate": 9.765030015520777e-07, "loss": 0.312, "step": 28066 }, { "epoch": 0.8, "grad_norm": 4.799328651765777, "learning_rate": 9.762276935826831e-07, "loss": 0.6098, "step": 28067 }, { "epoch": 0.8, "grad_norm": 5.105608237009388, "learning_rate": 9.759524202293063e-07, "loss": 0.4875, "step": 28068 }, { "epoch": 0.8, "grad_norm": 4.8766003451701545, "learning_rate": 9.756771814943155e-07, "loss": 0.4592, "step": 28069 }, { "epoch": 0.8, "grad_norm": 7.501180110911579, "learning_rate": 9.754019773800771e-07, "loss": 0.8291, "step": 28070 }, { "epoch": 0.8, "grad_norm": 6.070576665719849, "learning_rate": 9.751268078889614e-07, "loss": 0.7188, "step": 28071 }, { "epoch": 0.8, "grad_norm": 8.446971352804223, "learning_rate": 9.748516730233327e-07, "loss": 0.5667, "step": 28072 }, { "epoch": 0.8, "grad_norm": 4.972292087402701, "learning_rate": 9.745765727855594e-07, "loss": 0.7232, "step": 28073 }, { "epoch": 0.8, "grad_norm": 4.232459588756393, "learning_rate": 9.7430150717801e-07, "loss": 0.316, "step": 28074 }, { "epoch": 0.8, "grad_norm": 1.7245251212702843, "learning_rate": 9.740264762030489e-07, "loss": 0.2613, "step": 28075 }, { "epoch": 0.8, "grad_norm": 4.3748514967646495, "learning_rate": 9.73751479863042e-07, "loss": 0.3247, "step": 28076 }, { "epoch": 0.8, "grad_norm": 6.663723184549149, "learning_rate": 9.734765181603546e-07, "loss": 0.3318, "step": 28077 }, { "epoch": 0.8, "grad_norm": 6.531193856175966, "learning_rate": 9.732015910973535e-07, "loss": 0.6146, "step": 28078 }, { "epoch": 0.8, "grad_norm": 5.707793992266031, "learning_rate": 9.729266986764025e-07, "loss": 0.41, "step": 28079 }, { "epoch": 0.8, "grad_norm": 6.151679419117851, "learning_rate": 9.726518408998686e-07, "loss": 0.5287, "step": 28080 }, { "epoch": 0.8, "grad_norm": 2.908402040953386, "learning_rate": 9.723770177701147e-07, "loss": 0.4185, "step": 28081 }, { "epoch": 0.8, "grad_norm": 5.613811608560207, "learning_rate": 9.72102229289505e-07, "loss": 0.3104, "step": 28082 }, { "epoch": 0.8, "grad_norm": 4.700940686299429, "learning_rate": 9.718274754604045e-07, "loss": 0.5791, "step": 28083 }, { "epoch": 0.8, "grad_norm": 8.976720584443513, "learning_rate": 9.715527562851751e-07, "loss": 0.8528, "step": 28084 }, { "epoch": 0.8, "grad_norm": 4.024872192580867, "learning_rate": 9.712780717661825e-07, "loss": 0.4824, "step": 28085 }, { "epoch": 0.8, "grad_norm": 6.047874742608572, "learning_rate": 9.710034219057873e-07, "loss": 0.5057, "step": 28086 }, { "epoch": 0.8, "grad_norm": 4.702648632979924, "learning_rate": 9.707288067063554e-07, "loss": 0.4241, "step": 28087 }, { "epoch": 0.8, "grad_norm": 8.189350647015635, "learning_rate": 9.70454226170247e-07, "loss": 0.5879, "step": 28088 }, { "epoch": 0.8, "grad_norm": 6.122940729368038, "learning_rate": 9.701796802998236e-07, "loss": 0.4751, "step": 28089 }, { "epoch": 0.8, "grad_norm": 5.5631078323701075, "learning_rate": 9.699051690974493e-07, "loss": 0.5469, "step": 28090 }, { "epoch": 0.8, "grad_norm": 5.109359181230888, "learning_rate": 9.696306925654836e-07, "loss": 0.5063, "step": 28091 }, { "epoch": 0.8, "grad_norm": 4.083653499092452, "learning_rate": 9.693562507062903e-07, "loss": 0.4773, "step": 28092 }, { "epoch": 0.8, "grad_norm": 4.45028134619244, "learning_rate": 9.690818435222272e-07, "loss": 0.315, "step": 28093 }, { "epoch": 0.8, "grad_norm": 3.2050991208762043, "learning_rate": 9.68807471015658e-07, "loss": 0.2044, "step": 28094 }, { "epoch": 0.8, "grad_norm": 4.177760284186548, "learning_rate": 9.685331331889425e-07, "loss": 0.4997, "step": 28095 }, { "epoch": 0.8, "grad_norm": 6.311254132897121, "learning_rate": 9.682588300444396e-07, "loss": 0.1827, "step": 28096 }, { "epoch": 0.8, "grad_norm": 4.509733904582034, "learning_rate": 9.679845615845079e-07, "loss": 0.3296, "step": 28097 }, { "epoch": 0.8, "grad_norm": 8.311626072885128, "learning_rate": 9.677103278115092e-07, "loss": 0.4806, "step": 28098 }, { "epoch": 0.8, "grad_norm": 2.1626555486093237, "learning_rate": 9.674361287278027e-07, "loss": 0.1343, "step": 28099 }, { "epoch": 0.8, "grad_norm": 8.313896864926065, "learning_rate": 9.67161964335746e-07, "loss": 0.533, "step": 28100 }, { "epoch": 0.8, "grad_norm": 5.086845164602289, "learning_rate": 9.668878346376997e-07, "loss": 0.3058, "step": 28101 }, { "epoch": 0.8, "grad_norm": 5.006203808136811, "learning_rate": 9.666137396360203e-07, "loss": 0.1992, "step": 28102 }, { "epoch": 0.8, "grad_norm": 2.187907564138159, "learning_rate": 9.663396793330665e-07, "loss": 0.1798, "step": 28103 }, { "epoch": 0.8, "grad_norm": 5.8023311715010895, "learning_rate": 9.660656537311946e-07, "loss": 0.3092, "step": 28104 }, { "epoch": 0.8, "grad_norm": 7.332807789379779, "learning_rate": 9.657916628327646e-07, "loss": 0.7997, "step": 28105 }, { "epoch": 0.8, "grad_norm": 4.339162888153777, "learning_rate": 9.655177066401305e-07, "loss": 0.5181, "step": 28106 }, { "epoch": 0.8, "grad_norm": 8.852117078331808, "learning_rate": 9.652437851556512e-07, "loss": 0.3971, "step": 28107 }, { "epoch": 0.8, "grad_norm": 5.242289876151391, "learning_rate": 9.64969898381684e-07, "loss": 0.3644, "step": 28108 }, { "epoch": 0.8, "grad_norm": 3.6057034268365755, "learning_rate": 9.646960463205845e-07, "loss": 0.2332, "step": 28109 }, { "epoch": 0.81, "grad_norm": 5.407011254136804, "learning_rate": 9.644222289747075e-07, "loss": 0.7642, "step": 28110 }, { "epoch": 0.81, "grad_norm": 2.892164191714218, "learning_rate": 9.641484463464079e-07, "loss": 0.194, "step": 28111 }, { "epoch": 0.81, "grad_norm": 10.434193572837428, "learning_rate": 9.638746984380436e-07, "loss": 1.1542, "step": 28112 }, { "epoch": 0.81, "grad_norm": 4.789454669670576, "learning_rate": 9.636009852519674e-07, "loss": 0.189, "step": 28113 }, { "epoch": 0.81, "grad_norm": 6.491547113410536, "learning_rate": 9.633273067905357e-07, "loss": 0.4399, "step": 28114 }, { "epoch": 0.81, "grad_norm": 4.392971022942986, "learning_rate": 9.630536630561027e-07, "loss": 0.1577, "step": 28115 }, { "epoch": 0.81, "grad_norm": 3.1501785840774006, "learning_rate": 9.627800540510202e-07, "loss": 0.2846, "step": 28116 }, { "epoch": 0.81, "grad_norm": 4.119508324423129, "learning_rate": 9.625064797776457e-07, "loss": 0.2708, "step": 28117 }, { "epoch": 0.81, "grad_norm": 7.292463713180659, "learning_rate": 9.62232940238329e-07, "loss": 0.5179, "step": 28118 }, { "epoch": 0.81, "grad_norm": 4.186900935806643, "learning_rate": 9.619594354354268e-07, "loss": 0.3122, "step": 28119 }, { "epoch": 0.81, "grad_norm": 1.750621217071902, "learning_rate": 9.61685965371289e-07, "loss": 0.159, "step": 28120 }, { "epoch": 0.81, "grad_norm": 6.265795241633004, "learning_rate": 9.614125300482707e-07, "loss": 0.4801, "step": 28121 }, { "epoch": 0.81, "grad_norm": 10.140676583510102, "learning_rate": 9.611391294687234e-07, "loss": 0.5682, "step": 28122 }, { "epoch": 0.81, "grad_norm": 8.314538003149096, "learning_rate": 9.608657636349977e-07, "loss": 0.5256, "step": 28123 }, { "epoch": 0.81, "grad_norm": 6.8442004678262265, "learning_rate": 9.605924325494476e-07, "loss": 0.7914, "step": 28124 }, { "epoch": 0.81, "grad_norm": 4.6932740443835055, "learning_rate": 9.603191362144227e-07, "loss": 0.258, "step": 28125 }, { "epoch": 0.81, "grad_norm": 4.3836436580032565, "learning_rate": 9.600458746322755e-07, "loss": 0.3562, "step": 28126 }, { "epoch": 0.81, "grad_norm": 3.7942981217170395, "learning_rate": 9.597726478053548e-07, "loss": 0.3499, "step": 28127 }, { "epoch": 0.81, "grad_norm": 4.399872351008569, "learning_rate": 9.594994557360144e-07, "loss": 0.1615, "step": 28128 }, { "epoch": 0.81, "grad_norm": 7.972570363712427, "learning_rate": 9.592262984266026e-07, "loss": 0.4911, "step": 28129 }, { "epoch": 0.81, "grad_norm": 5.882102447253167, "learning_rate": 9.589531758794696e-07, "loss": 0.2422, "step": 28130 }, { "epoch": 0.81, "grad_norm": 4.542795665064499, "learning_rate": 9.586800880969633e-07, "loss": 0.2156, "step": 28131 }, { "epoch": 0.81, "grad_norm": 7.734924759255358, "learning_rate": 9.584070350814346e-07, "loss": 0.5165, "step": 28132 }, { "epoch": 0.81, "grad_norm": 14.298512182664929, "learning_rate": 9.581340168352337e-07, "loss": 0.5559, "step": 28133 }, { "epoch": 0.81, "grad_norm": 10.196632270601174, "learning_rate": 9.578610333607075e-07, "loss": 0.2308, "step": 28134 }, { "epoch": 0.81, "grad_norm": 8.092933407391575, "learning_rate": 9.57588084660206e-07, "loss": 0.7504, "step": 28135 }, { "epoch": 0.81, "grad_norm": 8.423973940165062, "learning_rate": 9.573151707360768e-07, "loss": 0.2939, "step": 28136 }, { "epoch": 0.81, "grad_norm": 3.6060204378311598, "learning_rate": 9.570422915906669e-07, "loss": 0.2135, "step": 28137 }, { "epoch": 0.81, "grad_norm": 4.70473315857542, "learning_rate": 9.567694472263233e-07, "loss": 0.236, "step": 28138 }, { "epoch": 0.81, "grad_norm": 10.284819241401772, "learning_rate": 9.56496637645395e-07, "loss": 0.5129, "step": 28139 }, { "epoch": 0.81, "grad_norm": 2.987467277219957, "learning_rate": 9.562238628502286e-07, "loss": 0.1946, "step": 28140 }, { "epoch": 0.81, "grad_norm": 11.255391672675024, "learning_rate": 9.55951122843171e-07, "loss": 0.4766, "step": 28141 }, { "epoch": 0.81, "grad_norm": 3.6562687392447426, "learning_rate": 9.556784176265676e-07, "loss": 0.1647, "step": 28142 }, { "epoch": 0.81, "grad_norm": 5.839229426398072, "learning_rate": 9.55405747202764e-07, "loss": 0.3009, "step": 28143 }, { "epoch": 0.81, "grad_norm": 10.175873188914078, "learning_rate": 9.551331115741074e-07, "loss": 0.6517, "step": 28144 }, { "epoch": 0.81, "grad_norm": 4.170169924788962, "learning_rate": 9.54860510742942e-07, "loss": 0.5049, "step": 28145 }, { "epoch": 0.81, "grad_norm": 4.3068779635609475, "learning_rate": 9.545879447116146e-07, "loss": 0.2957, "step": 28146 }, { "epoch": 0.81, "grad_norm": 3.433556010762542, "learning_rate": 9.543154134824679e-07, "loss": 0.1811, "step": 28147 }, { "epoch": 0.81, "grad_norm": 4.951632566669514, "learning_rate": 9.540429170578486e-07, "loss": 0.4443, "step": 28148 }, { "epoch": 0.81, "grad_norm": 6.704379482301572, "learning_rate": 9.537704554401e-07, "loss": 0.2137, "step": 28149 }, { "epoch": 0.81, "grad_norm": 3.3623347174427725, "learning_rate": 9.534980286315648e-07, "loss": 0.2679, "step": 28150 }, { "epoch": 0.81, "grad_norm": 5.997473800843655, "learning_rate": 9.532256366345893e-07, "loss": 0.5137, "step": 28151 }, { "epoch": 0.81, "grad_norm": 4.852402422403918, "learning_rate": 9.529532794515134e-07, "loss": 0.431, "step": 28152 }, { "epoch": 0.81, "grad_norm": 1.8790785300089543, "learning_rate": 9.52680957084684e-07, "loss": 0.073, "step": 28153 }, { "epoch": 0.81, "grad_norm": 11.825853500049739, "learning_rate": 9.52408669536441e-07, "loss": 0.5142, "step": 28154 }, { "epoch": 0.81, "grad_norm": 3.989384990705327, "learning_rate": 9.521364168091285e-07, "loss": 0.4384, "step": 28155 }, { "epoch": 0.81, "grad_norm": 4.172868474617612, "learning_rate": 9.518641989050887e-07, "loss": 0.4825, "step": 28156 }, { "epoch": 0.81, "grad_norm": 5.070906076821124, "learning_rate": 9.515920158266606e-07, "loss": 0.4283, "step": 28157 }, { "epoch": 0.81, "grad_norm": 3.8846287000795443, "learning_rate": 9.513198675761898e-07, "loss": 0.1912, "step": 28158 }, { "epoch": 0.81, "grad_norm": 5.372669269101676, "learning_rate": 9.510477541560142e-07, "loss": 0.5039, "step": 28159 }, { "epoch": 0.81, "grad_norm": 7.70681123569323, "learning_rate": 9.507756755684777e-07, "loss": 0.8046, "step": 28160 }, { "epoch": 0.81, "grad_norm": 3.6801699020462273, "learning_rate": 9.505036318159183e-07, "loss": 0.1969, "step": 28161 }, { "epoch": 0.81, "grad_norm": 4.05369158735369, "learning_rate": 9.502316229006792e-07, "loss": 0.1513, "step": 28162 }, { "epoch": 0.81, "grad_norm": 7.451607219732232, "learning_rate": 9.499596488250978e-07, "loss": 0.5347, "step": 28163 }, { "epoch": 0.81, "grad_norm": 5.440831731191407, "learning_rate": 9.496877095915146e-07, "loss": 0.4398, "step": 28164 }, { "epoch": 0.81, "grad_norm": 4.338781410483848, "learning_rate": 9.494158052022701e-07, "loss": 0.2913, "step": 28165 }, { "epoch": 0.81, "grad_norm": 7.243459513700863, "learning_rate": 9.491439356597015e-07, "loss": 0.6233, "step": 28166 }, { "epoch": 0.81, "grad_norm": 4.59394747770226, "learning_rate": 9.4887210096615e-07, "loss": 0.3335, "step": 28167 }, { "epoch": 0.81, "grad_norm": 4.36599469486601, "learning_rate": 9.48600301123952e-07, "loss": 0.1544, "step": 28168 }, { "epoch": 0.81, "grad_norm": 3.223767342197236, "learning_rate": 9.483285361354494e-07, "loss": 0.3655, "step": 28169 }, { "epoch": 0.81, "grad_norm": 5.070600222284074, "learning_rate": 9.480568060029749e-07, "loss": 0.5967, "step": 28170 }, { "epoch": 0.81, "grad_norm": 9.981756090723396, "learning_rate": 9.477851107288699e-07, "loss": 0.6829, "step": 28171 }, { "epoch": 0.81, "grad_norm": 8.161777771664882, "learning_rate": 9.475134503154698e-07, "loss": 0.5272, "step": 28172 }, { "epoch": 0.81, "grad_norm": 8.222956520089014, "learning_rate": 9.472418247651122e-07, "loss": 0.419, "step": 28173 }, { "epoch": 0.81, "grad_norm": 6.0587458834310475, "learning_rate": 9.469702340801357e-07, "loss": 0.4957, "step": 28174 }, { "epoch": 0.81, "grad_norm": 3.8917737896380156, "learning_rate": 9.466986782628751e-07, "loss": 0.4798, "step": 28175 }, { "epoch": 0.81, "grad_norm": 3.6607673729227246, "learning_rate": 9.464271573156664e-07, "loss": 0.1616, "step": 28176 }, { "epoch": 0.81, "grad_norm": 3.7211517143660435, "learning_rate": 9.461556712408449e-07, "loss": 0.273, "step": 28177 }, { "epoch": 0.81, "grad_norm": 6.792165102152799, "learning_rate": 9.458842200407481e-07, "loss": 0.5211, "step": 28178 }, { "epoch": 0.81, "grad_norm": 3.6169888172003155, "learning_rate": 9.456128037177093e-07, "loss": 0.1978, "step": 28179 }, { "epoch": 0.81, "grad_norm": 4.711513250034974, "learning_rate": 9.453414222740643e-07, "loss": 0.2654, "step": 28180 }, { "epoch": 0.81, "grad_norm": 7.428804427333779, "learning_rate": 9.450700757121484e-07, "loss": 0.5639, "step": 28181 }, { "epoch": 0.81, "grad_norm": 4.513604764881006, "learning_rate": 9.447987640342959e-07, "loss": 0.3157, "step": 28182 }, { "epoch": 0.81, "grad_norm": 5.8202524066869445, "learning_rate": 9.445274872428406e-07, "loss": 0.5176, "step": 28183 }, { "epoch": 0.81, "grad_norm": 3.8062815685247187, "learning_rate": 9.442562453401144e-07, "loss": 0.2557, "step": 28184 }, { "epoch": 0.81, "grad_norm": 5.833512012833232, "learning_rate": 9.439850383284538e-07, "loss": 0.5384, "step": 28185 }, { "epoch": 0.81, "grad_norm": 4.603571713817596, "learning_rate": 9.437138662101891e-07, "loss": 0.1792, "step": 28186 }, { "epoch": 0.81, "grad_norm": 5.869395159440084, "learning_rate": 9.434427289876558e-07, "loss": 0.4787, "step": 28187 }, { "epoch": 0.81, "grad_norm": 2.7245065539794546, "learning_rate": 9.43171626663184e-07, "loss": 0.1542, "step": 28188 }, { "epoch": 0.81, "grad_norm": 17.358473732915108, "learning_rate": 9.429005592391088e-07, "loss": 0.6862, "step": 28189 }, { "epoch": 0.81, "grad_norm": 11.179097392611299, "learning_rate": 9.426295267177604e-07, "loss": 0.5991, "step": 28190 }, { "epoch": 0.81, "grad_norm": 4.551179925703387, "learning_rate": 9.423585291014692e-07, "loss": 0.4457, "step": 28191 }, { "epoch": 0.81, "grad_norm": 4.834001908703996, "learning_rate": 9.420875663925693e-07, "loss": 0.4465, "step": 28192 }, { "epoch": 0.81, "grad_norm": 9.833777641237173, "learning_rate": 9.418166385933891e-07, "loss": 0.6464, "step": 28193 }, { "epoch": 0.81, "grad_norm": 9.021316873631058, "learning_rate": 9.415457457062621e-07, "loss": 0.6555, "step": 28194 }, { "epoch": 0.81, "grad_norm": 6.123741643447921, "learning_rate": 9.41274887733516e-07, "loss": 0.5179, "step": 28195 }, { "epoch": 0.81, "grad_norm": 6.214094403333444, "learning_rate": 9.410040646774837e-07, "loss": 0.7532, "step": 28196 }, { "epoch": 0.81, "grad_norm": 5.958411565235932, "learning_rate": 9.407332765404936e-07, "loss": 0.3793, "step": 28197 }, { "epoch": 0.81, "grad_norm": 9.170066278232682, "learning_rate": 9.404625233248743e-07, "loss": 0.9778, "step": 28198 }, { "epoch": 0.81, "grad_norm": 8.242573776956254, "learning_rate": 9.401918050329572e-07, "loss": 0.7406, "step": 28199 }, { "epoch": 0.81, "grad_norm": 3.0297425641332896, "learning_rate": 9.39921121667069e-07, "loss": 0.2371, "step": 28200 }, { "epoch": 0.81, "grad_norm": 4.001917885903981, "learning_rate": 9.396504732295403e-07, "loss": 0.5053, "step": 28201 }, { "epoch": 0.81, "grad_norm": 7.9494181390073635, "learning_rate": 9.393798597226988e-07, "loss": 0.5034, "step": 28202 }, { "epoch": 0.81, "grad_norm": 4.942145998621023, "learning_rate": 9.391092811488727e-07, "loss": 0.2698, "step": 28203 }, { "epoch": 0.81, "grad_norm": 4.4902700826810555, "learning_rate": 9.38838737510388e-07, "loss": 0.2719, "step": 28204 }, { "epoch": 0.81, "grad_norm": 6.410156659132661, "learning_rate": 9.385682288095738e-07, "loss": 0.8548, "step": 28205 }, { "epoch": 0.81, "grad_norm": 8.825069580668343, "learning_rate": 9.382977550487582e-07, "loss": 0.2305, "step": 28206 }, { "epoch": 0.81, "grad_norm": 13.96451321724217, "learning_rate": 9.380273162302655e-07, "loss": 0.6105, "step": 28207 }, { "epoch": 0.81, "grad_norm": 8.665374188760563, "learning_rate": 9.37756912356425e-07, "loss": 0.438, "step": 28208 }, { "epoch": 0.81, "grad_norm": 8.77631258371867, "learning_rate": 9.374865434295621e-07, "loss": 0.5683, "step": 28209 }, { "epoch": 0.81, "grad_norm": 2.3258845282327836, "learning_rate": 9.372162094520021e-07, "loss": 0.4303, "step": 28210 }, { "epoch": 0.81, "grad_norm": 4.607849437608259, "learning_rate": 9.369459104260692e-07, "loss": 0.6581, "step": 28211 }, { "epoch": 0.81, "grad_norm": 6.476576194242533, "learning_rate": 9.366756463540916e-07, "loss": 0.3621, "step": 28212 }, { "epoch": 0.81, "grad_norm": 5.974822305886097, "learning_rate": 9.364054172383925e-07, "loss": 0.6305, "step": 28213 }, { "epoch": 0.81, "grad_norm": 1.4281294363524732, "learning_rate": 9.361352230812964e-07, "loss": 0.0524, "step": 28214 }, { "epoch": 0.81, "grad_norm": 4.18475624837602, "learning_rate": 9.358650638851308e-07, "loss": 0.7626, "step": 28215 }, { "epoch": 0.81, "grad_norm": 8.169912157228488, "learning_rate": 9.355949396522168e-07, "loss": 0.5846, "step": 28216 }, { "epoch": 0.81, "grad_norm": 6.943544452416439, "learning_rate": 9.353248503848794e-07, "loss": 0.4556, "step": 28217 }, { "epoch": 0.81, "grad_norm": 7.477599834361085, "learning_rate": 9.350547960854411e-07, "loss": 0.4878, "step": 28218 }, { "epoch": 0.81, "grad_norm": 7.289444875551514, "learning_rate": 9.347847767562269e-07, "loss": 0.4483, "step": 28219 }, { "epoch": 0.81, "grad_norm": 4.610370580193215, "learning_rate": 9.345147923995568e-07, "loss": 0.2251, "step": 28220 }, { "epoch": 0.81, "grad_norm": 9.606914223793211, "learning_rate": 9.342448430177564e-07, "loss": 1.2932, "step": 28221 }, { "epoch": 0.81, "grad_norm": 8.704019442333216, "learning_rate": 9.339749286131477e-07, "loss": 0.6644, "step": 28222 }, { "epoch": 0.81, "grad_norm": 8.201152077433163, "learning_rate": 9.337050491880523e-07, "loss": 0.7381, "step": 28223 }, { "epoch": 0.81, "grad_norm": 6.520833776146952, "learning_rate": 9.334352047447915e-07, "loss": 0.6586, "step": 28224 }, { "epoch": 0.81, "grad_norm": 3.3106211876194602, "learning_rate": 9.331653952856856e-07, "loss": 0.3092, "step": 28225 }, { "epoch": 0.81, "grad_norm": 7.51021852553776, "learning_rate": 9.328956208130586e-07, "loss": 0.5657, "step": 28226 }, { "epoch": 0.81, "grad_norm": 4.848506118333105, "learning_rate": 9.326258813292283e-07, "loss": 0.6342, "step": 28227 }, { "epoch": 0.81, "grad_norm": 4.5374060019432845, "learning_rate": 9.323561768365185e-07, "loss": 0.3029, "step": 28228 }, { "epoch": 0.81, "grad_norm": 5.135143105092382, "learning_rate": 9.32086507337247e-07, "loss": 0.3644, "step": 28229 }, { "epoch": 0.81, "grad_norm": 3.3825594871291753, "learning_rate": 9.31816872833734e-07, "loss": 0.2806, "step": 28230 }, { "epoch": 0.81, "grad_norm": 4.204426361899866, "learning_rate": 9.315472733283004e-07, "loss": 0.4204, "step": 28231 }, { "epoch": 0.81, "grad_norm": 6.88149384803666, "learning_rate": 9.312777088232633e-07, "loss": 0.3992, "step": 28232 }, { "epoch": 0.81, "grad_norm": 4.6252140304942495, "learning_rate": 9.310081793209442e-07, "loss": 0.3473, "step": 28233 }, { "epoch": 0.81, "grad_norm": 5.199397415771294, "learning_rate": 9.307386848236599e-07, "loss": 0.5526, "step": 28234 }, { "epoch": 0.81, "grad_norm": 8.593783652499807, "learning_rate": 9.304692253337305e-07, "loss": 0.3003, "step": 28235 }, { "epoch": 0.81, "grad_norm": 6.269544217580534, "learning_rate": 9.30199800853474e-07, "loss": 0.7073, "step": 28236 }, { "epoch": 0.81, "grad_norm": 8.981022376757766, "learning_rate": 9.299304113852059e-07, "loss": 0.6473, "step": 28237 }, { "epoch": 0.81, "grad_norm": 5.371032492548406, "learning_rate": 9.296610569312464e-07, "loss": 0.4022, "step": 28238 }, { "epoch": 0.81, "grad_norm": 2.470939052007672, "learning_rate": 9.293917374939105e-07, "loss": 0.1337, "step": 28239 }, { "epoch": 0.81, "grad_norm": 5.04653764193613, "learning_rate": 9.291224530755177e-07, "loss": 0.3643, "step": 28240 }, { "epoch": 0.81, "grad_norm": 7.531460201131681, "learning_rate": 9.288532036783821e-07, "loss": 0.5672, "step": 28241 }, { "epoch": 0.81, "grad_norm": 7.901991711410621, "learning_rate": 9.285839893048221e-07, "loss": 0.6844, "step": 28242 }, { "epoch": 0.81, "grad_norm": 4.208569032218694, "learning_rate": 9.283148099571526e-07, "loss": 0.8289, "step": 28243 }, { "epoch": 0.81, "grad_norm": 7.447919471224455, "learning_rate": 9.280456656376896e-07, "loss": 0.408, "step": 28244 }, { "epoch": 0.81, "grad_norm": 5.587239488506709, "learning_rate": 9.277765563487473e-07, "loss": 0.3684, "step": 28245 }, { "epoch": 0.81, "grad_norm": 4.272537973635843, "learning_rate": 9.275074820926422e-07, "loss": 0.4746, "step": 28246 }, { "epoch": 0.81, "grad_norm": 5.676598591772456, "learning_rate": 9.272384428716896e-07, "loss": 0.2737, "step": 28247 }, { "epoch": 0.81, "grad_norm": 6.336978097828137, "learning_rate": 9.269694386882022e-07, "loss": 0.4008, "step": 28248 }, { "epoch": 0.81, "grad_norm": 7.3690361866853875, "learning_rate": 9.267004695444959e-07, "loss": 0.8321, "step": 28249 }, { "epoch": 0.81, "grad_norm": 2.4928470205284188, "learning_rate": 9.264315354428849e-07, "loss": 0.0705, "step": 28250 }, { "epoch": 0.81, "grad_norm": 13.174582713285059, "learning_rate": 9.261626363856813e-07, "loss": 0.6738, "step": 28251 }, { "epoch": 0.81, "grad_norm": 1.9283547915221184, "learning_rate": 9.258937723751977e-07, "loss": 0.1198, "step": 28252 }, { "epoch": 0.81, "grad_norm": 8.264619185576112, "learning_rate": 9.256249434137493e-07, "loss": 0.4933, "step": 28253 }, { "epoch": 0.81, "grad_norm": 5.763303708113777, "learning_rate": 9.253561495036473e-07, "loss": 0.5816, "step": 28254 }, { "epoch": 0.81, "grad_norm": 3.9217566362126526, "learning_rate": 9.25087390647204e-07, "loss": 0.1825, "step": 28255 }, { "epoch": 0.81, "grad_norm": 8.180080400738664, "learning_rate": 9.248186668467352e-07, "loss": 0.3837, "step": 28256 }, { "epoch": 0.81, "grad_norm": 3.8457292097688156, "learning_rate": 9.24549978104547e-07, "loss": 0.2123, "step": 28257 }, { "epoch": 0.81, "grad_norm": 8.302169403689383, "learning_rate": 9.242813244229548e-07, "loss": 0.7396, "step": 28258 }, { "epoch": 0.81, "grad_norm": 4.947987970206486, "learning_rate": 9.240127058042675e-07, "loss": 0.85, "step": 28259 }, { "epoch": 0.81, "grad_norm": 6.515243226354137, "learning_rate": 9.237441222507976e-07, "loss": 0.6289, "step": 28260 }, { "epoch": 0.81, "grad_norm": 4.417437156312273, "learning_rate": 9.234755737648543e-07, "loss": 0.5036, "step": 28261 }, { "epoch": 0.81, "grad_norm": 3.0896653848471805, "learning_rate": 9.232070603487502e-07, "loss": 0.1201, "step": 28262 }, { "epoch": 0.81, "grad_norm": 9.907826487734814, "learning_rate": 9.229385820047936e-07, "loss": 0.8363, "step": 28263 }, { "epoch": 0.81, "grad_norm": 7.409201826487617, "learning_rate": 9.226701387352932e-07, "loss": 0.7469, "step": 28264 }, { "epoch": 0.81, "grad_norm": 3.4221641731877974, "learning_rate": 9.224017305425609e-07, "loss": 0.4682, "step": 28265 }, { "epoch": 0.81, "grad_norm": 5.646543763098109, "learning_rate": 9.221333574289032e-07, "loss": 0.351, "step": 28266 }, { "epoch": 0.81, "grad_norm": 7.353038805818354, "learning_rate": 9.218650193966311e-07, "loss": 0.4163, "step": 28267 }, { "epoch": 0.81, "grad_norm": 5.501871570853461, "learning_rate": 9.21596716448051e-07, "loss": 0.5682, "step": 28268 }, { "epoch": 0.81, "grad_norm": 6.579431689454733, "learning_rate": 9.21328448585474e-07, "loss": 0.5015, "step": 28269 }, { "epoch": 0.81, "grad_norm": 6.581816156421504, "learning_rate": 9.210602158112053e-07, "loss": 0.2217, "step": 28270 }, { "epoch": 0.81, "grad_norm": 7.721732640546189, "learning_rate": 9.207920181275526e-07, "loss": 0.3077, "step": 28271 }, { "epoch": 0.81, "grad_norm": 4.30153167857675, "learning_rate": 9.205238555368251e-07, "loss": 0.3367, "step": 28272 }, { "epoch": 0.81, "grad_norm": 5.721011251510867, "learning_rate": 9.202557280413277e-07, "loss": 0.6238, "step": 28273 }, { "epoch": 0.81, "grad_norm": 9.668223787276892, "learning_rate": 9.199876356433684e-07, "loss": 0.2069, "step": 28274 }, { "epoch": 0.81, "grad_norm": 9.988001682634254, "learning_rate": 9.197195783452528e-07, "loss": 0.5865, "step": 28275 }, { "epoch": 0.81, "grad_norm": 6.913139704098992, "learning_rate": 9.194515561492878e-07, "loss": 0.5, "step": 28276 }, { "epoch": 0.81, "grad_norm": 5.222392061979791, "learning_rate": 9.191835690577788e-07, "loss": 0.3781, "step": 28277 }, { "epoch": 0.81, "grad_norm": 4.107443932343889, "learning_rate": 9.189156170730301e-07, "loss": 0.2739, "step": 28278 }, { "epoch": 0.81, "grad_norm": 5.581566205951214, "learning_rate": 9.186477001973493e-07, "loss": 0.3855, "step": 28279 }, { "epoch": 0.81, "grad_norm": 3.3076794654787967, "learning_rate": 9.183798184330378e-07, "loss": 0.1716, "step": 28280 }, { "epoch": 0.81, "grad_norm": 11.578248006602275, "learning_rate": 9.181119717824044e-07, "loss": 0.5081, "step": 28281 }, { "epoch": 0.81, "grad_norm": 12.574262587538138, "learning_rate": 9.178441602477489e-07, "loss": 0.6279, "step": 28282 }, { "epoch": 0.81, "grad_norm": 2.5517584665314414, "learning_rate": 9.175763838313795e-07, "loss": 0.1238, "step": 28283 }, { "epoch": 0.81, "grad_norm": 4.115437608071907, "learning_rate": 9.173086425355976e-07, "loss": 0.129, "step": 28284 }, { "epoch": 0.81, "grad_norm": 5.032380918789038, "learning_rate": 9.170409363627069e-07, "loss": 0.4643, "step": 28285 }, { "epoch": 0.81, "grad_norm": 4.7340504752249455, "learning_rate": 9.167732653150085e-07, "loss": 0.4515, "step": 28286 }, { "epoch": 0.81, "grad_norm": 3.517005645070333, "learning_rate": 9.165056293948077e-07, "loss": 0.2587, "step": 28287 }, { "epoch": 0.81, "grad_norm": 3.960142804542725, "learning_rate": 9.16238028604407e-07, "loss": 0.3832, "step": 28288 }, { "epoch": 0.81, "grad_norm": 3.615743987582671, "learning_rate": 9.159704629461081e-07, "loss": 0.1004, "step": 28289 }, { "epoch": 0.81, "grad_norm": 7.802962195516406, "learning_rate": 9.157029324222116e-07, "loss": 0.4802, "step": 28290 }, { "epoch": 0.81, "grad_norm": 5.315961585133538, "learning_rate": 9.154354370350194e-07, "loss": 0.4503, "step": 28291 }, { "epoch": 0.81, "grad_norm": 2.2759846579063097, "learning_rate": 9.151679767868343e-07, "loss": 0.1365, "step": 28292 }, { "epoch": 0.81, "grad_norm": 5.260019097239126, "learning_rate": 9.149005516799553e-07, "loss": 0.6694, "step": 28293 }, { "epoch": 0.81, "grad_norm": 7.265109890961917, "learning_rate": 9.146331617166849e-07, "loss": 0.4784, "step": 28294 }, { "epoch": 0.81, "grad_norm": 10.972526269769588, "learning_rate": 9.143658068993205e-07, "loss": 0.5708, "step": 28295 }, { "epoch": 0.81, "grad_norm": 6.166962844162673, "learning_rate": 9.140984872301661e-07, "loss": 0.3886, "step": 28296 }, { "epoch": 0.81, "grad_norm": 11.071119323489887, "learning_rate": 9.138312027115187e-07, "loss": 0.6044, "step": 28297 }, { "epoch": 0.81, "grad_norm": 5.1722007026570695, "learning_rate": 9.135639533456775e-07, "loss": 0.5398, "step": 28298 }, { "epoch": 0.81, "grad_norm": 13.755149813258774, "learning_rate": 9.13296739134944e-07, "loss": 0.6089, "step": 28299 }, { "epoch": 0.81, "grad_norm": 4.2038273703096225, "learning_rate": 9.130295600816136e-07, "loss": 0.4306, "step": 28300 }, { "epoch": 0.81, "grad_norm": 4.010856766105556, "learning_rate": 9.127624161879884e-07, "loss": 0.322, "step": 28301 }, { "epoch": 0.81, "grad_norm": 5.910467327475852, "learning_rate": 9.124953074563636e-07, "loss": 0.4507, "step": 28302 }, { "epoch": 0.81, "grad_norm": 5.281812604021718, "learning_rate": 9.122282338890393e-07, "loss": 0.2348, "step": 28303 }, { "epoch": 0.81, "grad_norm": 8.440737745936591, "learning_rate": 9.119611954883129e-07, "loss": 0.4904, "step": 28304 }, { "epoch": 0.81, "grad_norm": 4.243677148207698, "learning_rate": 9.116941922564792e-07, "loss": 0.1844, "step": 28305 }, { "epoch": 0.81, "grad_norm": 5.872050133205287, "learning_rate": 9.114272241958383e-07, "loss": 0.5343, "step": 28306 }, { "epoch": 0.81, "grad_norm": 11.783842292344097, "learning_rate": 9.111602913086842e-07, "loss": 0.8903, "step": 28307 }, { "epoch": 0.81, "grad_norm": 3.3225007688809916, "learning_rate": 9.108933935973163e-07, "loss": 0.3928, "step": 28308 }, { "epoch": 0.81, "grad_norm": 8.487820033021663, "learning_rate": 9.106265310640278e-07, "loss": 0.5788, "step": 28309 }, { "epoch": 0.81, "grad_norm": 3.67301540115829, "learning_rate": 9.103597037111167e-07, "loss": 0.3272, "step": 28310 }, { "epoch": 0.81, "grad_norm": 6.382065504012291, "learning_rate": 9.10092911540878e-07, "loss": 0.6427, "step": 28311 }, { "epoch": 0.81, "grad_norm": 4.512790278558416, "learning_rate": 9.098261545556048e-07, "loss": 0.328, "step": 28312 }, { "epoch": 0.81, "grad_norm": 5.6928655236806955, "learning_rate": 9.095594327575946e-07, "loss": 0.3467, "step": 28313 }, { "epoch": 0.81, "grad_norm": 3.7222430709394163, "learning_rate": 9.092927461491402e-07, "loss": 0.3278, "step": 28314 }, { "epoch": 0.81, "grad_norm": 3.106110358985145, "learning_rate": 9.09026094732538e-07, "loss": 0.118, "step": 28315 }, { "epoch": 0.81, "grad_norm": 4.785410996487549, "learning_rate": 9.0875947851008e-07, "loss": 0.4129, "step": 28316 }, { "epoch": 0.81, "grad_norm": 9.551858153994477, "learning_rate": 9.084928974840613e-07, "loss": 0.5503, "step": 28317 }, { "epoch": 0.81, "grad_norm": 5.410436233271648, "learning_rate": 9.082263516567724e-07, "loss": 0.1678, "step": 28318 }, { "epoch": 0.81, "grad_norm": 4.89036664188945, "learning_rate": 9.079598410305102e-07, "loss": 0.2164, "step": 28319 }, { "epoch": 0.81, "grad_norm": 3.140635457187533, "learning_rate": 9.076933656075637e-07, "loss": 0.3969, "step": 28320 }, { "epoch": 0.81, "grad_norm": 5.324062868496563, "learning_rate": 9.074269253902279e-07, "loss": 0.407, "step": 28321 }, { "epoch": 0.81, "grad_norm": 4.199101390529354, "learning_rate": 9.07160520380796e-07, "loss": 0.197, "step": 28322 }, { "epoch": 0.81, "grad_norm": 5.957382161795538, "learning_rate": 9.068941505815576e-07, "loss": 0.3522, "step": 28323 }, { "epoch": 0.81, "grad_norm": 8.701911115658406, "learning_rate": 9.06627815994805e-07, "loss": 0.6894, "step": 28324 }, { "epoch": 0.81, "grad_norm": 2.5690507429656884, "learning_rate": 9.06361516622828e-07, "loss": 0.135, "step": 28325 }, { "epoch": 0.81, "grad_norm": 4.715867633783594, "learning_rate": 9.060952524679201e-07, "loss": 0.5568, "step": 28326 }, { "epoch": 0.81, "grad_norm": 5.742818494109545, "learning_rate": 9.058290235323696e-07, "loss": 0.3965, "step": 28327 }, { "epoch": 0.81, "grad_norm": 7.353091657557281, "learning_rate": 9.055628298184676e-07, "loss": 0.6974, "step": 28328 }, { "epoch": 0.81, "grad_norm": 4.011515314131187, "learning_rate": 9.052966713285061e-07, "loss": 0.3679, "step": 28329 }, { "epoch": 0.81, "grad_norm": 6.105177078553245, "learning_rate": 9.050305480647731e-07, "loss": 0.5578, "step": 28330 }, { "epoch": 0.81, "grad_norm": 4.316560216030532, "learning_rate": 9.04764460029558e-07, "loss": 0.3387, "step": 28331 }, { "epoch": 0.81, "grad_norm": 7.844466157720459, "learning_rate": 9.044984072251489e-07, "loss": 0.4255, "step": 28332 }, { "epoch": 0.81, "grad_norm": 5.930841667823711, "learning_rate": 9.042323896538368e-07, "loss": 0.4537, "step": 28333 }, { "epoch": 0.81, "grad_norm": 5.903153955278522, "learning_rate": 9.039664073179077e-07, "loss": 0.5644, "step": 28334 }, { "epoch": 0.81, "grad_norm": 8.026444180986417, "learning_rate": 9.037004602196531e-07, "loss": 0.5499, "step": 28335 }, { "epoch": 0.81, "grad_norm": 12.25629621357937, "learning_rate": 9.034345483613577e-07, "loss": 0.7821, "step": 28336 }, { "epoch": 0.81, "grad_norm": 11.998272771508061, "learning_rate": 9.031686717453114e-07, "loss": 0.402, "step": 28337 }, { "epoch": 0.81, "grad_norm": 8.928571709769109, "learning_rate": 9.029028303738013e-07, "loss": 0.578, "step": 28338 }, { "epoch": 0.81, "grad_norm": 5.34976183815748, "learning_rate": 9.026370242491117e-07, "loss": 0.4172, "step": 28339 }, { "epoch": 0.81, "grad_norm": 4.747641454701536, "learning_rate": 9.023712533735329e-07, "loss": 0.5899, "step": 28340 }, { "epoch": 0.81, "grad_norm": 3.952188976580447, "learning_rate": 9.021055177493488e-07, "loss": 0.2498, "step": 28341 }, { "epoch": 0.81, "grad_norm": 6.444276483674545, "learning_rate": 9.018398173788467e-07, "loss": 0.2826, "step": 28342 }, { "epoch": 0.81, "grad_norm": 4.6318932018403665, "learning_rate": 9.015741522643128e-07, "loss": 0.5046, "step": 28343 }, { "epoch": 0.81, "grad_norm": 6.683879977707296, "learning_rate": 9.013085224080303e-07, "loss": 0.518, "step": 28344 }, { "epoch": 0.81, "grad_norm": 9.13941673704931, "learning_rate": 9.010429278122873e-07, "loss": 0.7524, "step": 28345 }, { "epoch": 0.81, "grad_norm": 10.308126111570298, "learning_rate": 9.007773684793664e-07, "loss": 0.4729, "step": 28346 }, { "epoch": 0.81, "grad_norm": 4.008814784907199, "learning_rate": 9.00511844411554e-07, "loss": 0.4538, "step": 28347 }, { "epoch": 0.81, "grad_norm": 3.869388855428879, "learning_rate": 9.002463556111318e-07, "loss": 0.2535, "step": 28348 }, { "epoch": 0.81, "grad_norm": 2.938229551420882, "learning_rate": 8.999809020803874e-07, "loss": 0.3663, "step": 28349 }, { "epoch": 0.81, "grad_norm": 3.329643583612273, "learning_rate": 8.997154838216021e-07, "loss": 0.2537, "step": 28350 }, { "epoch": 0.81, "grad_norm": 6.001817666339739, "learning_rate": 8.994501008370598e-07, "loss": 0.6681, "step": 28351 }, { "epoch": 0.81, "grad_norm": 6.971854437879388, "learning_rate": 8.99184753129042e-07, "loss": 0.657, "step": 28352 }, { "epoch": 0.81, "grad_norm": 4.73526438300247, "learning_rate": 8.98919440699833e-07, "loss": 0.4138, "step": 28353 }, { "epoch": 0.81, "grad_norm": 6.794988558958188, "learning_rate": 8.986541635517165e-07, "loss": 0.2711, "step": 28354 }, { "epoch": 0.81, "grad_norm": 5.734345979123389, "learning_rate": 8.983889216869718e-07, "loss": 0.2957, "step": 28355 }, { "epoch": 0.81, "grad_norm": 8.276704541197926, "learning_rate": 8.981237151078831e-07, "loss": 0.34, "step": 28356 }, { "epoch": 0.81, "grad_norm": 3.5581264754358792, "learning_rate": 8.978585438167314e-07, "loss": 0.2099, "step": 28357 }, { "epoch": 0.81, "grad_norm": 4.674143428664247, "learning_rate": 8.975934078157978e-07, "loss": 0.5171, "step": 28358 }, { "epoch": 0.81, "grad_norm": 7.823539624424731, "learning_rate": 8.973283071073618e-07, "loss": 0.7311, "step": 28359 }, { "epoch": 0.81, "grad_norm": 5.183402131943652, "learning_rate": 8.970632416937058e-07, "loss": 0.5124, "step": 28360 }, { "epoch": 0.81, "grad_norm": 0.9713711732836374, "learning_rate": 8.96798211577109e-07, "loss": 0.0374, "step": 28361 }, { "epoch": 0.81, "grad_norm": 4.791710947357352, "learning_rate": 8.965332167598517e-07, "loss": 0.4153, "step": 28362 }, { "epoch": 0.81, "grad_norm": 3.758542820631619, "learning_rate": 8.962682572442155e-07, "loss": 0.181, "step": 28363 }, { "epoch": 0.81, "grad_norm": 6.179848014203339, "learning_rate": 8.96003333032478e-07, "loss": 0.5571, "step": 28364 }, { "epoch": 0.81, "grad_norm": 14.007501227456917, "learning_rate": 8.957384441269184e-07, "loss": 0.3243, "step": 28365 }, { "epoch": 0.81, "grad_norm": 4.125252600362786, "learning_rate": 8.954735905298145e-07, "loss": 0.4995, "step": 28366 }, { "epoch": 0.81, "grad_norm": 3.7508023357374727, "learning_rate": 8.952087722434472e-07, "loss": 0.3571, "step": 28367 }, { "epoch": 0.81, "grad_norm": 3.0614753098788903, "learning_rate": 8.949439892700917e-07, "loss": 0.2824, "step": 28368 }, { "epoch": 0.81, "grad_norm": 13.903012805433727, "learning_rate": 8.946792416120282e-07, "loss": 0.388, "step": 28369 }, { "epoch": 0.81, "grad_norm": 3.8843171794390363, "learning_rate": 8.944145292715351e-07, "loss": 0.4011, "step": 28370 }, { "epoch": 0.81, "grad_norm": 9.194408492228996, "learning_rate": 8.941498522508879e-07, "loss": 0.6268, "step": 28371 }, { "epoch": 0.81, "grad_norm": 2.279672599884975, "learning_rate": 8.938852105523638e-07, "loss": 0.1767, "step": 28372 }, { "epoch": 0.81, "grad_norm": 6.7358881761642655, "learning_rate": 8.936206041782391e-07, "loss": 0.3967, "step": 28373 }, { "epoch": 0.81, "grad_norm": 7.611862434034173, "learning_rate": 8.933560331307917e-07, "loss": 0.7828, "step": 28374 }, { "epoch": 0.81, "grad_norm": 7.90209284717387, "learning_rate": 8.93091497412295e-07, "loss": 0.4678, "step": 28375 }, { "epoch": 0.81, "grad_norm": 5.180518818652019, "learning_rate": 8.928269970250286e-07, "loss": 0.1716, "step": 28376 }, { "epoch": 0.81, "grad_norm": 5.517089606069946, "learning_rate": 8.92562531971265e-07, "loss": 0.4266, "step": 28377 }, { "epoch": 0.81, "grad_norm": 7.242668522700174, "learning_rate": 8.922981022532795e-07, "loss": 0.2216, "step": 28378 }, { "epoch": 0.81, "grad_norm": 3.406309652243624, "learning_rate": 8.920337078733488e-07, "loss": 0.2256, "step": 28379 }, { "epoch": 0.81, "grad_norm": 5.965925057460012, "learning_rate": 8.917693488337453e-07, "loss": 0.8422, "step": 28380 }, { "epoch": 0.81, "grad_norm": 7.320505699449916, "learning_rate": 8.915050251367452e-07, "loss": 0.415, "step": 28381 }, { "epoch": 0.81, "grad_norm": 5.022617089182221, "learning_rate": 8.912407367846204e-07, "loss": 0.6733, "step": 28382 }, { "epoch": 0.81, "grad_norm": 3.9043475591929666, "learning_rate": 8.90976483779647e-07, "loss": 0.3137, "step": 28383 }, { "epoch": 0.81, "grad_norm": 4.179781647094243, "learning_rate": 8.907122661240969e-07, "loss": 0.3726, "step": 28384 }, { "epoch": 0.81, "grad_norm": 3.7637509949884587, "learning_rate": 8.904480838202417e-07, "loss": 0.1798, "step": 28385 }, { "epoch": 0.81, "grad_norm": 5.7425070537478975, "learning_rate": 8.901839368703574e-07, "loss": 0.2641, "step": 28386 }, { "epoch": 0.81, "grad_norm": 5.238336083497116, "learning_rate": 8.899198252767132e-07, "loss": 0.3119, "step": 28387 }, { "epoch": 0.81, "grad_norm": 4.114236371183448, "learning_rate": 8.896557490415842e-07, "loss": 0.1773, "step": 28388 }, { "epoch": 0.81, "grad_norm": 3.247717624476216, "learning_rate": 8.893917081672387e-07, "loss": 0.3571, "step": 28389 }, { "epoch": 0.81, "grad_norm": 5.34559470120053, "learning_rate": 8.891277026559519e-07, "loss": 0.4678, "step": 28390 }, { "epoch": 0.81, "grad_norm": 7.4411025418358845, "learning_rate": 8.888637325099936e-07, "loss": 0.3211, "step": 28391 }, { "epoch": 0.81, "grad_norm": 5.566325211855282, "learning_rate": 8.885997977316341e-07, "loss": 0.5443, "step": 28392 }, { "epoch": 0.81, "grad_norm": 2.88980497121252, "learning_rate": 8.883358983231433e-07, "loss": 0.1406, "step": 28393 }, { "epoch": 0.81, "grad_norm": 4.43433842391884, "learning_rate": 8.880720342867927e-07, "loss": 0.2712, "step": 28394 }, { "epoch": 0.81, "grad_norm": 4.62702696432547, "learning_rate": 8.878082056248527e-07, "loss": 0.3785, "step": 28395 }, { "epoch": 0.81, "grad_norm": 5.12026093183153, "learning_rate": 8.875444123395915e-07, "loss": 0.2958, "step": 28396 }, { "epoch": 0.81, "grad_norm": 3.931914076034128, "learning_rate": 8.872806544332813e-07, "loss": 0.3269, "step": 28397 }, { "epoch": 0.81, "grad_norm": 11.13303239571756, "learning_rate": 8.870169319081884e-07, "loss": 0.7455, "step": 28398 }, { "epoch": 0.81, "grad_norm": 5.625295334586298, "learning_rate": 8.867532447665827e-07, "loss": 0.5922, "step": 28399 }, { "epoch": 0.81, "grad_norm": 5.395521046894119, "learning_rate": 8.864895930107309e-07, "loss": 0.2337, "step": 28400 }, { "epoch": 0.81, "grad_norm": 3.9596426989977513, "learning_rate": 8.862259766429043e-07, "loss": 0.3647, "step": 28401 }, { "epoch": 0.81, "grad_norm": 4.425917001310602, "learning_rate": 8.859623956653679e-07, "loss": 0.4035, "step": 28402 }, { "epoch": 0.81, "grad_norm": 3.9681439425072944, "learning_rate": 8.856988500803915e-07, "loss": 0.1226, "step": 28403 }, { "epoch": 0.81, "grad_norm": 8.539139784788638, "learning_rate": 8.854353398902409e-07, "loss": 0.4046, "step": 28404 }, { "epoch": 0.81, "grad_norm": 5.33575912780653, "learning_rate": 8.85171865097183e-07, "loss": 0.5388, "step": 28405 }, { "epoch": 0.81, "grad_norm": 7.287024864293607, "learning_rate": 8.849084257034857e-07, "loss": 0.539, "step": 28406 }, { "epoch": 0.81, "grad_norm": 7.405270861836885, "learning_rate": 8.846450217114139e-07, "loss": 0.56, "step": 28407 }, { "epoch": 0.81, "grad_norm": 3.832036305151044, "learning_rate": 8.843816531232346e-07, "loss": 0.1475, "step": 28408 }, { "epoch": 0.81, "grad_norm": 4.689539351300144, "learning_rate": 8.841183199412128e-07, "loss": 0.3258, "step": 28409 }, { "epoch": 0.81, "grad_norm": 4.819110071209541, "learning_rate": 8.838550221676151e-07, "loss": 0.3624, "step": 28410 }, { "epoch": 0.81, "grad_norm": 9.14820727311918, "learning_rate": 8.835917598047061e-07, "loss": 0.4258, "step": 28411 }, { "epoch": 0.81, "grad_norm": 7.236525872909565, "learning_rate": 8.833285328547486e-07, "loss": 0.7861, "step": 28412 }, { "epoch": 0.81, "grad_norm": 2.85745373784394, "learning_rate": 8.830653413200108e-07, "loss": 0.5103, "step": 28413 }, { "epoch": 0.81, "grad_norm": 3.3946380247261003, "learning_rate": 8.828021852027536e-07, "loss": 0.0679, "step": 28414 }, { "epoch": 0.81, "grad_norm": 6.030528979118674, "learning_rate": 8.825390645052434e-07, "loss": 0.4366, "step": 28415 }, { "epoch": 0.81, "grad_norm": 5.817535967025096, "learning_rate": 8.822759792297414e-07, "loss": 0.1612, "step": 28416 }, { "epoch": 0.81, "grad_norm": 3.248446533425428, "learning_rate": 8.82012929378513e-07, "loss": 0.6519, "step": 28417 }, { "epoch": 0.81, "grad_norm": 9.792793214531077, "learning_rate": 8.817499149538211e-07, "loss": 0.7417, "step": 28418 }, { "epoch": 0.81, "grad_norm": 3.4553868580100557, "learning_rate": 8.814869359579259e-07, "loss": 0.4986, "step": 28419 }, { "epoch": 0.81, "grad_norm": 8.253720831605014, "learning_rate": 8.812239923930927e-07, "loss": 0.6842, "step": 28420 }, { "epoch": 0.81, "grad_norm": 6.7975841283835825, "learning_rate": 8.809610842615812e-07, "loss": 0.459, "step": 28421 }, { "epoch": 0.81, "grad_norm": 8.802619129253435, "learning_rate": 8.80698211565656e-07, "loss": 0.6222, "step": 28422 }, { "epoch": 0.81, "grad_norm": 7.876584090506141, "learning_rate": 8.804353743075755e-07, "loss": 0.6242, "step": 28423 }, { "epoch": 0.81, "grad_norm": 4.426578109934517, "learning_rate": 8.801725724896032e-07, "loss": 0.2214, "step": 28424 }, { "epoch": 0.81, "grad_norm": 7.515157006933867, "learning_rate": 8.799098061139993e-07, "loss": 0.6298, "step": 28425 }, { "epoch": 0.81, "grad_norm": 7.5162289192415415, "learning_rate": 8.796470751830227e-07, "loss": 0.4291, "step": 28426 }, { "epoch": 0.81, "grad_norm": 2.0623102534316833, "learning_rate": 8.79384379698936e-07, "loss": 0.1411, "step": 28427 }, { "epoch": 0.81, "grad_norm": 5.369789237001192, "learning_rate": 8.791217196639973e-07, "loss": 0.3254, "step": 28428 }, { "epoch": 0.81, "grad_norm": 4.693446761392078, "learning_rate": 8.788590950804682e-07, "loss": 0.4395, "step": 28429 }, { "epoch": 0.81, "grad_norm": 7.5270233006902885, "learning_rate": 8.785965059506058e-07, "loss": 0.7104, "step": 28430 }, { "epoch": 0.81, "grad_norm": 6.467387327695438, "learning_rate": 8.78333952276672e-07, "loss": 0.3532, "step": 28431 }, { "epoch": 0.81, "grad_norm": 4.709738558257761, "learning_rate": 8.78071434060922e-07, "loss": 0.3184, "step": 28432 }, { "epoch": 0.81, "grad_norm": 6.256603338455926, "learning_rate": 8.778089513056171e-07, "loss": 0.4997, "step": 28433 }, { "epoch": 0.81, "grad_norm": 4.337978618060856, "learning_rate": 8.775465040130126e-07, "loss": 0.1285, "step": 28434 }, { "epoch": 0.81, "grad_norm": 5.838344378942346, "learning_rate": 8.772840921853681e-07, "loss": 0.5085, "step": 28435 }, { "epoch": 0.81, "grad_norm": 8.27971712716513, "learning_rate": 8.770217158249428e-07, "loss": 0.5824, "step": 28436 }, { "epoch": 0.81, "grad_norm": 9.97939791374947, "learning_rate": 8.767593749339914e-07, "loss": 0.3035, "step": 28437 }, { "epoch": 0.81, "grad_norm": 5.050584168214214, "learning_rate": 8.764970695147717e-07, "loss": 0.2802, "step": 28438 }, { "epoch": 0.81, "grad_norm": 3.859027939608562, "learning_rate": 8.762347995695386e-07, "loss": 0.3889, "step": 28439 }, { "epoch": 0.81, "grad_norm": 7.2228001053607676, "learning_rate": 8.759725651005507e-07, "loss": 0.4498, "step": 28440 }, { "epoch": 0.81, "grad_norm": 8.256396674817708, "learning_rate": 8.757103661100625e-07, "loss": 0.6499, "step": 28441 }, { "epoch": 0.81, "grad_norm": 6.567456435460934, "learning_rate": 8.754482026003314e-07, "loss": 0.7046, "step": 28442 }, { "epoch": 0.81, "grad_norm": 5.717676020182476, "learning_rate": 8.751860745736096e-07, "loss": 0.2053, "step": 28443 }, { "epoch": 0.81, "grad_norm": 6.887247239475195, "learning_rate": 8.749239820321559e-07, "loss": 0.38, "step": 28444 }, { "epoch": 0.81, "grad_norm": 4.006179506630896, "learning_rate": 8.746619249782234e-07, "loss": 0.1946, "step": 28445 }, { "epoch": 0.81, "grad_norm": 5.6587005702059825, "learning_rate": 8.743999034140654e-07, "loss": 0.5575, "step": 28446 }, { "epoch": 0.81, "grad_norm": 5.886691776206242, "learning_rate": 8.741379173419379e-07, "loss": 0.4869, "step": 28447 }, { "epoch": 0.81, "grad_norm": 5.1342882795666345, "learning_rate": 8.738759667640928e-07, "loss": 0.7285, "step": 28448 }, { "epoch": 0.81, "grad_norm": 7.021887246406854, "learning_rate": 8.736140516827857e-07, "loss": 0.4924, "step": 28449 }, { "epoch": 0.81, "grad_norm": 5.438756775585488, "learning_rate": 8.73352172100268e-07, "loss": 0.2204, "step": 28450 }, { "epoch": 0.81, "grad_norm": 8.785952574791443, "learning_rate": 8.730903280187947e-07, "loss": 0.4457, "step": 28451 }, { "epoch": 0.81, "grad_norm": 8.211067583412952, "learning_rate": 8.728285194406166e-07, "loss": 0.6753, "step": 28452 }, { "epoch": 0.81, "grad_norm": 4.298257168502657, "learning_rate": 8.72566746367986e-07, "loss": 0.4956, "step": 28453 }, { "epoch": 0.81, "grad_norm": 4.142404066113399, "learning_rate": 8.723050088031565e-07, "loss": 0.3305, "step": 28454 }, { "epoch": 0.81, "grad_norm": 4.045880758922275, "learning_rate": 8.720433067483775e-07, "loss": 0.3923, "step": 28455 }, { "epoch": 0.81, "grad_norm": 6.168334312308328, "learning_rate": 8.717816402059032e-07, "loss": 0.4782, "step": 28456 }, { "epoch": 0.81, "grad_norm": 2.6437616623064377, "learning_rate": 8.715200091779818e-07, "loss": 0.1064, "step": 28457 }, { "epoch": 0.81, "grad_norm": 7.260613302435018, "learning_rate": 8.712584136668678e-07, "loss": 0.3214, "step": 28458 }, { "epoch": 0.82, "grad_norm": 6.017504034480797, "learning_rate": 8.70996853674807e-07, "loss": 0.5317, "step": 28459 }, { "epoch": 0.82, "grad_norm": 3.892180074579789, "learning_rate": 8.707353292040522e-07, "loss": 0.3251, "step": 28460 }, { "epoch": 0.82, "grad_norm": 3.8335799141372386, "learning_rate": 8.704738402568536e-07, "loss": 0.2181, "step": 28461 }, { "epoch": 0.82, "grad_norm": 7.181587275062329, "learning_rate": 8.70212386835459e-07, "loss": 0.4009, "step": 28462 }, { "epoch": 0.82, "grad_norm": 6.6372162810354975, "learning_rate": 8.699509689421199e-07, "loss": 0.3281, "step": 28463 }, { "epoch": 0.82, "grad_norm": 5.942143561907693, "learning_rate": 8.696895865790844e-07, "loss": 0.505, "step": 28464 }, { "epoch": 0.82, "grad_norm": 3.7496568204888563, "learning_rate": 8.694282397486009e-07, "loss": 0.2607, "step": 28465 }, { "epoch": 0.82, "grad_norm": 7.081189702611428, "learning_rate": 8.691669284529164e-07, "loss": 0.5481, "step": 28466 }, { "epoch": 0.82, "grad_norm": 3.268799165461719, "learning_rate": 8.689056526942802e-07, "loss": 0.3175, "step": 28467 }, { "epoch": 0.82, "grad_norm": 3.955493503939893, "learning_rate": 8.68644412474941e-07, "loss": 0.4968, "step": 28468 }, { "epoch": 0.82, "grad_norm": 6.305479394655361, "learning_rate": 8.683832077971444e-07, "loss": 0.784, "step": 28469 }, { "epoch": 0.82, "grad_norm": 4.598395509362866, "learning_rate": 8.681220386631389e-07, "loss": 0.6017, "step": 28470 }, { "epoch": 0.82, "grad_norm": 6.754615247853141, "learning_rate": 8.678609050751713e-07, "loss": 0.5025, "step": 28471 }, { "epoch": 0.82, "grad_norm": 6.630039601502594, "learning_rate": 8.675998070354879e-07, "loss": 0.4221, "step": 28472 }, { "epoch": 0.82, "grad_norm": 5.552137430677783, "learning_rate": 8.673387445463327e-07, "loss": 0.6996, "step": 28473 }, { "epoch": 0.82, "grad_norm": 8.102532588960292, "learning_rate": 8.670777176099549e-07, "loss": 0.7358, "step": 28474 }, { "epoch": 0.82, "grad_norm": 2.965922525030661, "learning_rate": 8.668167262285976e-07, "loss": 0.2623, "step": 28475 }, { "epoch": 0.82, "grad_norm": 9.725319460449002, "learning_rate": 8.665557704045069e-07, "loss": 0.6473, "step": 28476 }, { "epoch": 0.82, "grad_norm": 4.878404651081874, "learning_rate": 8.662948501399293e-07, "loss": 0.1352, "step": 28477 }, { "epoch": 0.82, "grad_norm": 4.839513002290471, "learning_rate": 8.660339654371081e-07, "loss": 0.5245, "step": 28478 }, { "epoch": 0.82, "grad_norm": 3.020462267655938, "learning_rate": 8.65773116298288e-07, "loss": 0.1211, "step": 28479 }, { "epoch": 0.82, "grad_norm": 5.693515865733633, "learning_rate": 8.655123027257112e-07, "loss": 0.3961, "step": 28480 }, { "epoch": 0.82, "grad_norm": 3.062718568514956, "learning_rate": 8.652515247216242e-07, "loss": 0.2125, "step": 28481 }, { "epoch": 0.82, "grad_norm": 7.0877940512180295, "learning_rate": 8.649907822882681e-07, "loss": 0.8111, "step": 28482 }, { "epoch": 0.82, "grad_norm": 11.53128258511868, "learning_rate": 8.647300754278886e-07, "loss": 0.5345, "step": 28483 }, { "epoch": 0.82, "grad_norm": 4.2689679080420815, "learning_rate": 8.644694041427259e-07, "loss": 0.1886, "step": 28484 }, { "epoch": 0.82, "grad_norm": 4.422143341657004, "learning_rate": 8.642087684350248e-07, "loss": 0.2548, "step": 28485 }, { "epoch": 0.82, "grad_norm": 4.338581523124492, "learning_rate": 8.639481683070261e-07, "loss": 0.2812, "step": 28486 }, { "epoch": 0.82, "grad_norm": 3.2364646132332227, "learning_rate": 8.636876037609714e-07, "loss": 0.4053, "step": 28487 }, { "epoch": 0.82, "grad_norm": 7.5218717187573985, "learning_rate": 8.634270747991041e-07, "loss": 0.5821, "step": 28488 }, { "epoch": 0.82, "grad_norm": 4.416381037970958, "learning_rate": 8.631665814236629e-07, "loss": 0.4712, "step": 28489 }, { "epoch": 0.82, "grad_norm": 2.682154929123201, "learning_rate": 8.629061236368918e-07, "loss": 0.1538, "step": 28490 }, { "epoch": 0.82, "grad_norm": 7.084321499131575, "learning_rate": 8.626457014410294e-07, "loss": 0.4691, "step": 28491 }, { "epoch": 0.82, "grad_norm": 5.858616080408795, "learning_rate": 8.62385314838316e-07, "loss": 0.4696, "step": 28492 }, { "epoch": 0.82, "grad_norm": 9.621258788836453, "learning_rate": 8.621249638309931e-07, "loss": 0.4963, "step": 28493 }, { "epoch": 0.82, "grad_norm": 7.346349962069446, "learning_rate": 8.618646484212989e-07, "loss": 0.4003, "step": 28494 }, { "epoch": 0.82, "grad_norm": 6.434392975554663, "learning_rate": 8.616043686114745e-07, "loss": 0.3224, "step": 28495 }, { "epoch": 0.82, "grad_norm": 7.355344863472271, "learning_rate": 8.613441244037568e-07, "loss": 0.7318, "step": 28496 }, { "epoch": 0.82, "grad_norm": 5.3300516443094486, "learning_rate": 8.610839158003876e-07, "loss": 0.3099, "step": 28497 }, { "epoch": 0.82, "grad_norm": 4.066623942093425, "learning_rate": 8.608237428036037e-07, "loss": 0.5374, "step": 28498 }, { "epoch": 0.82, "grad_norm": 2.3647878085105414, "learning_rate": 8.605636054156436e-07, "loss": 0.2092, "step": 28499 }, { "epoch": 0.82, "grad_norm": 3.219685724038189, "learning_rate": 8.603035036387441e-07, "loss": 0.3601, "step": 28500 }, { "epoch": 0.82, "grad_norm": 7.284880964789874, "learning_rate": 8.600434374751437e-07, "loss": 0.5153, "step": 28501 }, { "epoch": 0.82, "grad_norm": 7.369781522889901, "learning_rate": 8.597834069270811e-07, "loss": 0.4625, "step": 28502 }, { "epoch": 0.82, "grad_norm": 6.540877916282374, "learning_rate": 8.59523411996791e-07, "loss": 0.4229, "step": 28503 }, { "epoch": 0.82, "grad_norm": 5.6196908267086165, "learning_rate": 8.592634526865118e-07, "loss": 0.538, "step": 28504 }, { "epoch": 0.82, "grad_norm": 8.17807396073644, "learning_rate": 8.590035289984799e-07, "loss": 0.073, "step": 28505 }, { "epoch": 0.82, "grad_norm": 17.493625678860568, "learning_rate": 8.587436409349309e-07, "loss": 0.5882, "step": 28506 }, { "epoch": 0.82, "grad_norm": 5.544274163652038, "learning_rate": 8.58483788498099e-07, "loss": 0.5262, "step": 28507 }, { "epoch": 0.82, "grad_norm": 6.272752850371069, "learning_rate": 8.582239716902224e-07, "loss": 0.5038, "step": 28508 }, { "epoch": 0.82, "grad_norm": 5.87173111248499, "learning_rate": 8.579641905135338e-07, "loss": 0.3498, "step": 28509 }, { "epoch": 0.82, "grad_norm": 5.708130863060964, "learning_rate": 8.577044449702692e-07, "loss": 0.5837, "step": 28510 }, { "epoch": 0.82, "grad_norm": 3.3599252471953025, "learning_rate": 8.574447350626641e-07, "loss": 0.2237, "step": 28511 }, { "epoch": 0.82, "grad_norm": 4.7577798525938215, "learning_rate": 8.571850607929527e-07, "loss": 0.2318, "step": 28512 }, { "epoch": 0.82, "grad_norm": 5.996868826508637, "learning_rate": 8.569254221633677e-07, "loss": 0.3369, "step": 28513 }, { "epoch": 0.82, "grad_norm": 13.934644107140652, "learning_rate": 8.566658191761418e-07, "loss": 0.4654, "step": 28514 }, { "epoch": 0.82, "grad_norm": 6.598439921605151, "learning_rate": 8.564062518335114e-07, "loss": 0.4953, "step": 28515 }, { "epoch": 0.82, "grad_norm": 5.630796201247684, "learning_rate": 8.561467201377066e-07, "loss": 0.7809, "step": 28516 }, { "epoch": 0.82, "grad_norm": 3.330856294967177, "learning_rate": 8.55887224090961e-07, "loss": 0.1673, "step": 28517 }, { "epoch": 0.82, "grad_norm": 8.627109048585965, "learning_rate": 8.556277636955101e-07, "loss": 0.3526, "step": 28518 }, { "epoch": 0.82, "grad_norm": 9.116838907803373, "learning_rate": 8.553683389535805e-07, "loss": 0.5556, "step": 28519 }, { "epoch": 0.82, "grad_norm": 4.937028427782401, "learning_rate": 8.551089498674082e-07, "loss": 0.6606, "step": 28520 }, { "epoch": 0.82, "grad_norm": 5.532011399962787, "learning_rate": 8.548495964392222e-07, "loss": 0.5649, "step": 28521 }, { "epoch": 0.82, "grad_norm": 4.251808763330434, "learning_rate": 8.545902786712556e-07, "loss": 0.3284, "step": 28522 }, { "epoch": 0.82, "grad_norm": 5.654039825763377, "learning_rate": 8.543309965657376e-07, "loss": 0.4645, "step": 28523 }, { "epoch": 0.82, "grad_norm": 1.7565562442789626, "learning_rate": 8.540717501249007e-07, "loss": 0.2191, "step": 28524 }, { "epoch": 0.82, "grad_norm": 6.716756622557673, "learning_rate": 8.538125393509739e-07, "loss": 0.5539, "step": 28525 }, { "epoch": 0.82, "grad_norm": 6.503957643964547, "learning_rate": 8.535533642461863e-07, "loss": 0.5617, "step": 28526 }, { "epoch": 0.82, "grad_norm": 3.488163722878849, "learning_rate": 8.532942248127701e-07, "loss": 0.3354, "step": 28527 }, { "epoch": 0.82, "grad_norm": 7.254119557668305, "learning_rate": 8.530351210529514e-07, "loss": 0.6823, "step": 28528 }, { "epoch": 0.82, "grad_norm": 11.994844620665475, "learning_rate": 8.527760529689621e-07, "loss": 0.5925, "step": 28529 }, { "epoch": 0.82, "grad_norm": 6.920070335438666, "learning_rate": 8.525170205630289e-07, "loss": 0.7794, "step": 28530 }, { "epoch": 0.82, "grad_norm": 4.150956188488088, "learning_rate": 8.522580238373823e-07, "loss": 0.4317, "step": 28531 }, { "epoch": 0.82, "grad_norm": 3.295540182352239, "learning_rate": 8.519990627942493e-07, "loss": 0.3106, "step": 28532 }, { "epoch": 0.82, "grad_norm": 5.447082625878571, "learning_rate": 8.517401374358564e-07, "loss": 0.63, "step": 28533 }, { "epoch": 0.82, "grad_norm": 5.0906416530348455, "learning_rate": 8.514812477644335e-07, "loss": 0.5005, "step": 28534 }, { "epoch": 0.82, "grad_norm": 9.886393384579794, "learning_rate": 8.512223937822056e-07, "loss": 0.7977, "step": 28535 }, { "epoch": 0.82, "grad_norm": 7.910663383145435, "learning_rate": 8.509635754914014e-07, "loss": 0.41, "step": 28536 }, { "epoch": 0.82, "grad_norm": 3.7990683668410887, "learning_rate": 8.507047928942458e-07, "loss": 0.6051, "step": 28537 }, { "epoch": 0.82, "grad_norm": 4.811894019481104, "learning_rate": 8.504460459929675e-07, "loss": 0.3181, "step": 28538 }, { "epoch": 0.82, "grad_norm": 4.216431326700576, "learning_rate": 8.501873347897904e-07, "loss": 0.3529, "step": 28539 }, { "epoch": 0.82, "grad_norm": 6.295750179116114, "learning_rate": 8.49928659286941e-07, "loss": 0.4418, "step": 28540 }, { "epoch": 0.82, "grad_norm": 6.435491600435993, "learning_rate": 8.496700194866431e-07, "loss": 0.3713, "step": 28541 }, { "epoch": 0.82, "grad_norm": 7.75133767427672, "learning_rate": 8.494114153911232e-07, "loss": 0.3509, "step": 28542 }, { "epoch": 0.82, "grad_norm": 6.456781935220081, "learning_rate": 8.491528470026073e-07, "loss": 0.717, "step": 28543 }, { "epoch": 0.82, "grad_norm": 2.9866261405267296, "learning_rate": 8.488943143233163e-07, "loss": 0.241, "step": 28544 }, { "epoch": 0.82, "grad_norm": 5.615530095813621, "learning_rate": 8.486358173554782e-07, "loss": 0.4266, "step": 28545 }, { "epoch": 0.82, "grad_norm": 1.5597296091799109, "learning_rate": 8.483773561013153e-07, "loss": 0.1291, "step": 28546 }, { "epoch": 0.82, "grad_norm": 7.61790943343693, "learning_rate": 8.481189305630505e-07, "loss": 0.231, "step": 28547 }, { "epoch": 0.82, "grad_norm": 3.740641901907026, "learning_rate": 8.478605407429058e-07, "loss": 0.5014, "step": 28548 }, { "epoch": 0.82, "grad_norm": 4.864616633538812, "learning_rate": 8.476021866431073e-07, "loss": 0.4245, "step": 28549 }, { "epoch": 0.82, "grad_norm": 6.952608959464872, "learning_rate": 8.473438682658747e-07, "loss": 0.2728, "step": 28550 }, { "epoch": 0.82, "grad_norm": 3.765396712731754, "learning_rate": 8.47085585613433e-07, "loss": 0.2122, "step": 28551 }, { "epoch": 0.82, "grad_norm": 4.7244242726115955, "learning_rate": 8.468273386880021e-07, "loss": 0.5301, "step": 28552 }, { "epoch": 0.82, "grad_norm": 5.11541586922077, "learning_rate": 8.465691274918031e-07, "loss": 0.3636, "step": 28553 }, { "epoch": 0.82, "grad_norm": 4.12754546724412, "learning_rate": 8.463109520270601e-07, "loss": 0.138, "step": 28554 }, { "epoch": 0.82, "grad_norm": 6.244152690743347, "learning_rate": 8.46052812295991e-07, "loss": 0.4473, "step": 28555 }, { "epoch": 0.82, "grad_norm": 1.5155303079971987, "learning_rate": 8.457947083008194e-07, "loss": 0.1137, "step": 28556 }, { "epoch": 0.82, "grad_norm": 5.305618833605019, "learning_rate": 8.455366400437637e-07, "loss": 0.3091, "step": 28557 }, { "epoch": 0.82, "grad_norm": 6.139809377146684, "learning_rate": 8.452786075270453e-07, "loss": 0.5702, "step": 28558 }, { "epoch": 0.82, "grad_norm": 5.933484817921351, "learning_rate": 8.450206107528841e-07, "loss": 0.3667, "step": 28559 }, { "epoch": 0.82, "grad_norm": 5.408362885302825, "learning_rate": 8.447626497234973e-07, "loss": 0.4547, "step": 28560 }, { "epoch": 0.82, "grad_norm": 3.5007457790097316, "learning_rate": 8.445047244411076e-07, "loss": 0.3797, "step": 28561 }, { "epoch": 0.82, "grad_norm": 8.058083085833037, "learning_rate": 8.442468349079302e-07, "loss": 0.5409, "step": 28562 }, { "epoch": 0.82, "grad_norm": 23.75320607680797, "learning_rate": 8.43988981126187e-07, "loss": 0.3379, "step": 28563 }, { "epoch": 0.82, "grad_norm": 5.077931374012903, "learning_rate": 8.437311630980938e-07, "loss": 0.4918, "step": 28564 }, { "epoch": 0.82, "grad_norm": 3.840067984008996, "learning_rate": 8.43473380825871e-07, "loss": 0.4273, "step": 28565 }, { "epoch": 0.82, "grad_norm": 5.18973759570326, "learning_rate": 8.432156343117348e-07, "loss": 0.367, "step": 28566 }, { "epoch": 0.82, "grad_norm": 5.489136718109253, "learning_rate": 8.429579235579016e-07, "loss": 0.332, "step": 28567 }, { "epoch": 0.82, "grad_norm": 5.7200908652337645, "learning_rate": 8.427002485665909e-07, "loss": 0.392, "step": 28568 }, { "epoch": 0.82, "grad_norm": 7.236540435271088, "learning_rate": 8.424426093400168e-07, "loss": 0.4284, "step": 28569 }, { "epoch": 0.82, "grad_norm": 4.18168539622828, "learning_rate": 8.421850058803982e-07, "loss": 0.2148, "step": 28570 }, { "epoch": 0.82, "grad_norm": 6.673241968847404, "learning_rate": 8.419274381899483e-07, "loss": 0.4801, "step": 28571 }, { "epoch": 0.82, "grad_norm": 4.309404921064728, "learning_rate": 8.416699062708867e-07, "loss": 0.4083, "step": 28572 }, { "epoch": 0.82, "grad_norm": 6.3019538256576775, "learning_rate": 8.414124101254268e-07, "loss": 0.7958, "step": 28573 }, { "epoch": 0.82, "grad_norm": 4.354554303571666, "learning_rate": 8.411549497557824e-07, "loss": 0.2239, "step": 28574 }, { "epoch": 0.82, "grad_norm": 4.159552101360126, "learning_rate": 8.408975251641716e-07, "loss": 0.276, "step": 28575 }, { "epoch": 0.82, "grad_norm": 5.430017870176356, "learning_rate": 8.406401363528055e-07, "loss": 0.3415, "step": 28576 }, { "epoch": 0.82, "grad_norm": 4.833877617784216, "learning_rate": 8.403827833239014e-07, "loss": 0.4167, "step": 28577 }, { "epoch": 0.82, "grad_norm": 5.365046022578613, "learning_rate": 8.401254660796726e-07, "loss": 0.3229, "step": 28578 }, { "epoch": 0.82, "grad_norm": 6.427033220669727, "learning_rate": 8.398681846223323e-07, "loss": 0.4799, "step": 28579 }, { "epoch": 0.82, "grad_norm": 9.452326565906667, "learning_rate": 8.396109389540918e-07, "loss": 0.6283, "step": 28580 }, { "epoch": 0.82, "grad_norm": 5.216828683565512, "learning_rate": 8.39353729077168e-07, "loss": 0.5384, "step": 28581 }, { "epoch": 0.82, "grad_norm": 5.2124416348050735, "learning_rate": 8.390965549937702e-07, "loss": 0.4854, "step": 28582 }, { "epoch": 0.82, "grad_norm": 4.7255842977869005, "learning_rate": 8.388394167061126e-07, "loss": 0.5737, "step": 28583 }, { "epoch": 0.82, "grad_norm": 6.529267297621274, "learning_rate": 8.385823142164085e-07, "loss": 0.8014, "step": 28584 }, { "epoch": 0.82, "grad_norm": 7.323972033605094, "learning_rate": 8.383252475268682e-07, "loss": 0.4593, "step": 28585 }, { "epoch": 0.82, "grad_norm": 3.8307376276264677, "learning_rate": 8.380682166397036e-07, "loss": 0.2751, "step": 28586 }, { "epoch": 0.82, "grad_norm": 5.4370112144631, "learning_rate": 8.378112215571244e-07, "loss": 0.5002, "step": 28587 }, { "epoch": 0.82, "grad_norm": 2.9915472396418323, "learning_rate": 8.375542622813443e-07, "loss": 0.2218, "step": 28588 }, { "epoch": 0.82, "grad_norm": 4.46151310135011, "learning_rate": 8.372973388145705e-07, "loss": 0.322, "step": 28589 }, { "epoch": 0.82, "grad_norm": 6.173209613445327, "learning_rate": 8.370404511590169e-07, "loss": 0.8992, "step": 28590 }, { "epoch": 0.82, "grad_norm": 6.437418520513635, "learning_rate": 8.367835993168905e-07, "loss": 0.3992, "step": 28591 }, { "epoch": 0.82, "grad_norm": 4.720215879207177, "learning_rate": 8.365267832904034e-07, "loss": 0.396, "step": 28592 }, { "epoch": 0.82, "grad_norm": 3.865553354572316, "learning_rate": 8.362700030817633e-07, "loss": 0.4178, "step": 28593 }, { "epoch": 0.82, "grad_norm": 3.7604133028277764, "learning_rate": 8.360132586931791e-07, "loss": 0.2886, "step": 28594 }, { "epoch": 0.82, "grad_norm": 7.967359264412762, "learning_rate": 8.357565501268611e-07, "loss": 0.483, "step": 28595 }, { "epoch": 0.82, "grad_norm": 4.321372041929284, "learning_rate": 8.354998773850154e-07, "loss": 0.5217, "step": 28596 }, { "epoch": 0.82, "grad_norm": 3.51087936536587, "learning_rate": 8.35243240469853e-07, "loss": 0.3452, "step": 28597 }, { "epoch": 0.82, "grad_norm": 4.517777612934983, "learning_rate": 8.349866393835787e-07, "loss": 0.4829, "step": 28598 }, { "epoch": 0.82, "grad_norm": 2.656344703781222, "learning_rate": 8.347300741284032e-07, "loss": 0.174, "step": 28599 }, { "epoch": 0.82, "grad_norm": 4.782930764372128, "learning_rate": 8.344735447065316e-07, "loss": 0.3918, "step": 28600 }, { "epoch": 0.82, "grad_norm": 6.254927409457523, "learning_rate": 8.342170511201703e-07, "loss": 0.3248, "step": 28601 }, { "epoch": 0.82, "grad_norm": 6.957806603450406, "learning_rate": 8.339605933715283e-07, "loss": 0.5388, "step": 28602 }, { "epoch": 0.82, "grad_norm": 5.452252381031048, "learning_rate": 8.337041714628086e-07, "loss": 0.5666, "step": 28603 }, { "epoch": 0.82, "grad_norm": 6.131067812640255, "learning_rate": 8.334477853962203e-07, "loss": 0.3245, "step": 28604 }, { "epoch": 0.82, "grad_norm": 3.42214874148094, "learning_rate": 8.331914351739678e-07, "loss": 0.7015, "step": 28605 }, { "epoch": 0.82, "grad_norm": 2.8386789842807643, "learning_rate": 8.329351207982562e-07, "loss": 0.2472, "step": 28606 }, { "epoch": 0.82, "grad_norm": 3.5172918140697775, "learning_rate": 8.326788422712895e-07, "loss": 0.451, "step": 28607 }, { "epoch": 0.82, "grad_norm": 4.767527166248169, "learning_rate": 8.324225995952739e-07, "loss": 0.4793, "step": 28608 }, { "epoch": 0.82, "grad_norm": 9.22957603861299, "learning_rate": 8.321663927724144e-07, "loss": 1.0297, "step": 28609 }, { "epoch": 0.82, "grad_norm": 5.5997302944085785, "learning_rate": 8.319102218049135e-07, "loss": 0.2993, "step": 28610 }, { "epoch": 0.82, "grad_norm": 4.201947852153826, "learning_rate": 8.316540866949768e-07, "loss": 0.2437, "step": 28611 }, { "epoch": 0.82, "grad_norm": 5.753585775428009, "learning_rate": 8.313979874448063e-07, "loss": 0.3713, "step": 28612 }, { "epoch": 0.82, "grad_norm": 5.40328942529126, "learning_rate": 8.311419240566066e-07, "loss": 0.5225, "step": 28613 }, { "epoch": 0.82, "grad_norm": 1.3590519373218592, "learning_rate": 8.308858965325778e-07, "loss": 0.082, "step": 28614 }, { "epoch": 0.82, "grad_norm": 4.088579713032903, "learning_rate": 8.306299048749239e-07, "loss": 0.4184, "step": 28615 }, { "epoch": 0.82, "grad_norm": 2.467908498542386, "learning_rate": 8.303739490858498e-07, "loss": 0.2155, "step": 28616 }, { "epoch": 0.82, "grad_norm": 7.7763838048649845, "learning_rate": 8.301180291675531e-07, "loss": 0.3988, "step": 28617 }, { "epoch": 0.82, "grad_norm": 6.326035340576761, "learning_rate": 8.298621451222393e-07, "loss": 0.4577, "step": 28618 }, { "epoch": 0.82, "grad_norm": 4.228295258380805, "learning_rate": 8.29606296952108e-07, "loss": 0.2531, "step": 28619 }, { "epoch": 0.82, "grad_norm": 5.0343971597668045, "learning_rate": 8.293504846593603e-07, "loss": 0.3644, "step": 28620 }, { "epoch": 0.82, "grad_norm": 3.630482408347233, "learning_rate": 8.290947082461953e-07, "loss": 0.3216, "step": 28621 }, { "epoch": 0.82, "grad_norm": 5.869447072386536, "learning_rate": 8.288389677148167e-07, "loss": 0.5271, "step": 28622 }, { "epoch": 0.82, "grad_norm": 7.345202298322606, "learning_rate": 8.285832630674212e-07, "loss": 0.3875, "step": 28623 }, { "epoch": 0.82, "grad_norm": 4.983067954002549, "learning_rate": 8.283275943062108e-07, "loss": 0.2448, "step": 28624 }, { "epoch": 0.82, "grad_norm": 4.872638277036282, "learning_rate": 8.280719614333855e-07, "loss": 0.4477, "step": 28625 }, { "epoch": 0.82, "grad_norm": 3.306295648414742, "learning_rate": 8.278163644511433e-07, "loss": 0.2606, "step": 28626 }, { "epoch": 0.82, "grad_norm": 3.035755721779135, "learning_rate": 8.275608033616833e-07, "loss": 0.3893, "step": 28627 }, { "epoch": 0.82, "grad_norm": 5.168778428872469, "learning_rate": 8.273052781672025e-07, "loss": 0.4597, "step": 28628 }, { "epoch": 0.82, "grad_norm": 3.989786489840318, "learning_rate": 8.270497888699025e-07, "loss": 0.4017, "step": 28629 }, { "epoch": 0.82, "grad_norm": 10.450138503621522, "learning_rate": 8.267943354719776e-07, "loss": 0.6187, "step": 28630 }, { "epoch": 0.82, "grad_norm": 5.482528201690378, "learning_rate": 8.265389179756283e-07, "loss": 0.4297, "step": 28631 }, { "epoch": 0.82, "grad_norm": 5.126755251017852, "learning_rate": 8.262835363830501e-07, "loss": 0.4172, "step": 28632 }, { "epoch": 0.82, "grad_norm": 5.898413449674168, "learning_rate": 8.260281906964418e-07, "loss": 0.2946, "step": 28633 }, { "epoch": 0.82, "grad_norm": 9.315714236615529, "learning_rate": 8.257728809179994e-07, "loss": 0.4719, "step": 28634 }, { "epoch": 0.82, "grad_norm": 5.246844296936392, "learning_rate": 8.255176070499171e-07, "loss": 0.2455, "step": 28635 }, { "epoch": 0.82, "grad_norm": 5.4245152270039805, "learning_rate": 8.252623690943944e-07, "loss": 0.632, "step": 28636 }, { "epoch": 0.82, "grad_norm": 5.427417922978563, "learning_rate": 8.250071670536242e-07, "loss": 0.4294, "step": 28637 }, { "epoch": 0.82, "grad_norm": 3.866842697647397, "learning_rate": 8.247520009298049e-07, "loss": 0.4175, "step": 28638 }, { "epoch": 0.82, "grad_norm": 8.405452647913055, "learning_rate": 8.2449687072513e-07, "loss": 0.38, "step": 28639 }, { "epoch": 0.82, "grad_norm": 7.084431884698784, "learning_rate": 8.242417764417926e-07, "loss": 0.5333, "step": 28640 }, { "epoch": 0.82, "grad_norm": 2.7347655099215715, "learning_rate": 8.239867180819905e-07, "loss": 0.187, "step": 28641 }, { "epoch": 0.82, "grad_norm": 5.359766331390364, "learning_rate": 8.237316956479158e-07, "loss": 0.4584, "step": 28642 }, { "epoch": 0.82, "grad_norm": 9.168852585918783, "learning_rate": 8.234767091417639e-07, "loss": 0.5756, "step": 28643 }, { "epoch": 0.82, "grad_norm": 11.121229239932468, "learning_rate": 8.232217585657265e-07, "loss": 0.615, "step": 28644 }, { "epoch": 0.82, "grad_norm": 4.718741789551722, "learning_rate": 8.229668439219996e-07, "loss": 0.2946, "step": 28645 }, { "epoch": 0.82, "grad_norm": 6.92570097091974, "learning_rate": 8.227119652127741e-07, "loss": 0.6661, "step": 28646 }, { "epoch": 0.82, "grad_norm": 4.143163111855681, "learning_rate": 8.224571224402433e-07, "loss": 0.206, "step": 28647 }, { "epoch": 0.82, "grad_norm": 3.597219500710445, "learning_rate": 8.222023156065984e-07, "loss": 0.4161, "step": 28648 }, { "epoch": 0.82, "grad_norm": 4.140658770729398, "learning_rate": 8.219475447140329e-07, "loss": 0.3262, "step": 28649 }, { "epoch": 0.82, "grad_norm": 5.600951285115149, "learning_rate": 8.216928097647392e-07, "loss": 0.5915, "step": 28650 }, { "epoch": 0.82, "grad_norm": 2.2746315930300263, "learning_rate": 8.214381107609065e-07, "loss": 0.1163, "step": 28651 }, { "epoch": 0.82, "grad_norm": 5.836216236898413, "learning_rate": 8.211834477047287e-07, "loss": 0.4755, "step": 28652 }, { "epoch": 0.82, "grad_norm": 4.270587396130954, "learning_rate": 8.209288205983951e-07, "loss": 0.4062, "step": 28653 }, { "epoch": 0.82, "grad_norm": 4.980333130429869, "learning_rate": 8.206742294440967e-07, "loss": 0.5739, "step": 28654 }, { "epoch": 0.82, "grad_norm": 4.707316561714191, "learning_rate": 8.204196742440212e-07, "loss": 0.5735, "step": 28655 }, { "epoch": 0.82, "grad_norm": 4.991998803239459, "learning_rate": 8.201651550003625e-07, "loss": 0.5472, "step": 28656 }, { "epoch": 0.82, "grad_norm": 5.536123833251175, "learning_rate": 8.199106717153072e-07, "loss": 0.4434, "step": 28657 }, { "epoch": 0.82, "grad_norm": 6.886922728340844, "learning_rate": 8.196562243910456e-07, "loss": 0.531, "step": 28658 }, { "epoch": 0.82, "grad_norm": 4.478574423855249, "learning_rate": 8.194018130297682e-07, "loss": 0.3141, "step": 28659 }, { "epoch": 0.82, "grad_norm": 4.3708977949384975, "learning_rate": 8.191474376336617e-07, "loss": 0.2957, "step": 28660 }, { "epoch": 0.82, "grad_norm": 2.899594428213931, "learning_rate": 8.188930982049153e-07, "loss": 0.2209, "step": 28661 }, { "epoch": 0.82, "grad_norm": 7.149693346450091, "learning_rate": 8.186387947457158e-07, "loss": 0.6071, "step": 28662 }, { "epoch": 0.82, "grad_norm": 9.586586132038464, "learning_rate": 8.18384527258253e-07, "loss": 0.6938, "step": 28663 }, { "epoch": 0.82, "grad_norm": 4.434479988688703, "learning_rate": 8.181302957447118e-07, "loss": 0.5583, "step": 28664 }, { "epoch": 0.82, "grad_norm": 4.623048989440925, "learning_rate": 8.178761002072821e-07, "loss": 0.3783, "step": 28665 }, { "epoch": 0.82, "grad_norm": 6.471189066714425, "learning_rate": 8.1762194064815e-07, "loss": 0.4314, "step": 28666 }, { "epoch": 0.82, "grad_norm": 5.885231098411584, "learning_rate": 8.173678170694993e-07, "loss": 0.4984, "step": 28667 }, { "epoch": 0.82, "grad_norm": 10.058426879683786, "learning_rate": 8.171137294735198e-07, "loss": 0.8651, "step": 28668 }, { "epoch": 0.82, "grad_norm": 6.947259310423754, "learning_rate": 8.16859677862395e-07, "loss": 0.536, "step": 28669 }, { "epoch": 0.82, "grad_norm": 7.6511119495658155, "learning_rate": 8.166056622383122e-07, "loss": 0.5596, "step": 28670 }, { "epoch": 0.82, "grad_norm": 6.639726645346686, "learning_rate": 8.163516826034551e-07, "loss": 0.6857, "step": 28671 }, { "epoch": 0.82, "grad_norm": 4.485644330337457, "learning_rate": 8.160977389600099e-07, "loss": 0.4954, "step": 28672 }, { "epoch": 0.82, "grad_norm": 5.154749449663796, "learning_rate": 8.158438313101613e-07, "loss": 0.3812, "step": 28673 }, { "epoch": 0.82, "grad_norm": 9.812246768866812, "learning_rate": 8.155899596560913e-07, "loss": 0.5885, "step": 28674 }, { "epoch": 0.82, "grad_norm": 3.719659437644008, "learning_rate": 8.153361239999874e-07, "loss": 0.4352, "step": 28675 }, { "epoch": 0.82, "grad_norm": 6.518586654298959, "learning_rate": 8.150823243440298e-07, "loss": 0.4378, "step": 28676 }, { "epoch": 0.82, "grad_norm": 5.8276361664418985, "learning_rate": 8.148285606904055e-07, "loss": 0.2859, "step": 28677 }, { "epoch": 0.82, "grad_norm": 6.215114479713852, "learning_rate": 8.145748330412945e-07, "loss": 0.385, "step": 28678 }, { "epoch": 0.82, "grad_norm": 6.709980018176644, "learning_rate": 8.143211413988816e-07, "loss": 0.415, "step": 28679 }, { "epoch": 0.82, "grad_norm": 4.290715217058611, "learning_rate": 8.140674857653491e-07, "loss": 0.3241, "step": 28680 }, { "epoch": 0.82, "grad_norm": 3.583514656240246, "learning_rate": 8.138138661428774e-07, "loss": 0.352, "step": 28681 }, { "epoch": 0.82, "grad_norm": 6.094212812797315, "learning_rate": 8.135602825336503e-07, "loss": 0.5984, "step": 28682 }, { "epoch": 0.82, "grad_norm": 7.76126845291409, "learning_rate": 8.133067349398477e-07, "loss": 0.322, "step": 28683 }, { "epoch": 0.82, "grad_norm": 3.6525073662487113, "learning_rate": 8.130532233636534e-07, "loss": 0.2804, "step": 28684 }, { "epoch": 0.82, "grad_norm": 3.305916686103856, "learning_rate": 8.127997478072453e-07, "loss": 0.2252, "step": 28685 }, { "epoch": 0.82, "grad_norm": 4.971847000858441, "learning_rate": 8.125463082728063e-07, "loss": 0.2234, "step": 28686 }, { "epoch": 0.82, "grad_norm": 8.566147632930747, "learning_rate": 8.122929047625161e-07, "loss": 0.4388, "step": 28687 }, { "epoch": 0.82, "grad_norm": 3.305981898840374, "learning_rate": 8.120395372785545e-07, "loss": 0.4414, "step": 28688 }, { "epoch": 0.82, "grad_norm": 6.513997448701391, "learning_rate": 8.117862058230997e-07, "loss": 0.7688, "step": 28689 }, { "epoch": 0.82, "grad_norm": 6.479395928778212, "learning_rate": 8.115329103983321e-07, "loss": 0.3927, "step": 28690 }, { "epoch": 0.82, "grad_norm": 6.21462630605424, "learning_rate": 8.112796510064331e-07, "loss": 0.4192, "step": 28691 }, { "epoch": 0.82, "grad_norm": 4.8294163430647234, "learning_rate": 8.110264276495783e-07, "loss": 0.4058, "step": 28692 }, { "epoch": 0.82, "grad_norm": 5.510581112038212, "learning_rate": 8.107732403299496e-07, "loss": 0.773, "step": 28693 }, { "epoch": 0.82, "grad_norm": 7.2035018590619035, "learning_rate": 8.105200890497206e-07, "loss": 0.7327, "step": 28694 }, { "epoch": 0.82, "grad_norm": 6.027828155059001, "learning_rate": 8.102669738110724e-07, "loss": 0.3172, "step": 28695 }, { "epoch": 0.82, "grad_norm": 8.166735110352906, "learning_rate": 8.100138946161807e-07, "loss": 0.4212, "step": 28696 }, { "epoch": 0.82, "grad_norm": 4.317156779082951, "learning_rate": 8.097608514672245e-07, "loss": 0.1143, "step": 28697 }, { "epoch": 0.82, "grad_norm": 7.252229709421043, "learning_rate": 8.095078443663784e-07, "loss": 0.371, "step": 28698 }, { "epoch": 0.82, "grad_norm": 6.642362479500952, "learning_rate": 8.092548733158218e-07, "loss": 0.5092, "step": 28699 }, { "epoch": 0.82, "grad_norm": 2.942116336296147, "learning_rate": 8.0900193831773e-07, "loss": 0.1741, "step": 28700 }, { "epoch": 0.82, "grad_norm": 3.696025448952001, "learning_rate": 8.087490393742764e-07, "loss": 0.3197, "step": 28701 }, { "epoch": 0.82, "grad_norm": 4.837647463894441, "learning_rate": 8.084961764876409e-07, "loss": 0.4148, "step": 28702 }, { "epoch": 0.82, "grad_norm": 4.942887710623247, "learning_rate": 8.082433496599951e-07, "loss": 0.5458, "step": 28703 }, { "epoch": 0.82, "grad_norm": 5.416560335827371, "learning_rate": 8.079905588935166e-07, "loss": 0.4858, "step": 28704 }, { "epoch": 0.82, "grad_norm": 5.8794759379291195, "learning_rate": 8.077378041903783e-07, "loss": 0.514, "step": 28705 }, { "epoch": 0.82, "grad_norm": 3.3444773515734845, "learning_rate": 8.074850855527567e-07, "loss": 0.2773, "step": 28706 }, { "epoch": 0.82, "grad_norm": 4.111968987685641, "learning_rate": 8.072324029828249e-07, "loss": 0.5335, "step": 28707 }, { "epoch": 0.82, "grad_norm": 2.9361607359613546, "learning_rate": 8.069797564827547e-07, "loss": 0.2549, "step": 28708 }, { "epoch": 0.82, "grad_norm": 3.578133045837152, "learning_rate": 8.067271460547232e-07, "loss": 0.2691, "step": 28709 }, { "epoch": 0.82, "grad_norm": 7.134593194005199, "learning_rate": 8.064745717009004e-07, "loss": 0.269, "step": 28710 }, { "epoch": 0.82, "grad_norm": 4.682529153483604, "learning_rate": 8.062220334234616e-07, "loss": 0.7446, "step": 28711 }, { "epoch": 0.82, "grad_norm": 3.9099520421318776, "learning_rate": 8.059695312245774e-07, "loss": 0.3307, "step": 28712 }, { "epoch": 0.82, "grad_norm": 5.4305085364168315, "learning_rate": 8.057170651064217e-07, "loss": 0.5407, "step": 28713 }, { "epoch": 0.82, "grad_norm": 8.670925631406915, "learning_rate": 8.054646350711659e-07, "loss": 0.5294, "step": 28714 }, { "epoch": 0.82, "grad_norm": 5.084355871613575, "learning_rate": 8.052122411209801e-07, "loss": 0.5312, "step": 28715 }, { "epoch": 0.82, "grad_norm": 4.5407373486643845, "learning_rate": 8.049598832580385e-07, "loss": 0.4328, "step": 28716 }, { "epoch": 0.82, "grad_norm": 4.194047506271631, "learning_rate": 8.04707561484509e-07, "loss": 0.2705, "step": 28717 }, { "epoch": 0.82, "grad_norm": 7.652104029812248, "learning_rate": 8.044552758025647e-07, "loss": 0.2487, "step": 28718 }, { "epoch": 0.82, "grad_norm": 7.1658359607996776, "learning_rate": 8.042030262143746e-07, "loss": 0.3801, "step": 28719 }, { "epoch": 0.82, "grad_norm": 2.666915294610111, "learning_rate": 8.039508127221113e-07, "loss": 0.2064, "step": 28720 }, { "epoch": 0.82, "grad_norm": 8.13316984976428, "learning_rate": 8.036986353279402e-07, "loss": 0.793, "step": 28721 }, { "epoch": 0.82, "grad_norm": 4.906796431028424, "learning_rate": 8.03446494034033e-07, "loss": 0.2519, "step": 28722 }, { "epoch": 0.82, "grad_norm": 2.0748965490359645, "learning_rate": 8.031943888425597e-07, "loss": 0.1658, "step": 28723 }, { "epoch": 0.82, "grad_norm": 3.764443121035161, "learning_rate": 8.029423197556879e-07, "loss": 0.2097, "step": 28724 }, { "epoch": 0.82, "grad_norm": 5.910998217003389, "learning_rate": 8.026902867755876e-07, "loss": 0.5977, "step": 28725 }, { "epoch": 0.82, "grad_norm": 5.466300066813079, "learning_rate": 8.024382899044258e-07, "loss": 0.7222, "step": 28726 }, { "epoch": 0.82, "grad_norm": 4.723520457012174, "learning_rate": 8.021863291443704e-07, "loss": 0.3395, "step": 28727 }, { "epoch": 0.82, "grad_norm": 7.965742553466098, "learning_rate": 8.01934404497588e-07, "loss": 0.985, "step": 28728 }, { "epoch": 0.82, "grad_norm": 5.1507197923319294, "learning_rate": 8.016825159662484e-07, "loss": 0.3735, "step": 28729 }, { "epoch": 0.82, "grad_norm": 5.857688966406516, "learning_rate": 8.01430663552516e-07, "loss": 0.4746, "step": 28730 }, { "epoch": 0.82, "grad_norm": 3.683592196612875, "learning_rate": 8.011788472585586e-07, "loss": 0.4724, "step": 28731 }, { "epoch": 0.82, "grad_norm": 6.610753774558123, "learning_rate": 8.009270670865443e-07, "loss": 0.7765, "step": 28732 }, { "epoch": 0.82, "grad_norm": 4.820338318692251, "learning_rate": 8.006753230386366e-07, "loss": 0.5048, "step": 28733 }, { "epoch": 0.82, "grad_norm": 4.92268108246023, "learning_rate": 8.004236151170025e-07, "loss": 0.4315, "step": 28734 }, { "epoch": 0.82, "grad_norm": 9.249476495311336, "learning_rate": 8.001719433238058e-07, "loss": 0.4539, "step": 28735 }, { "epoch": 0.82, "grad_norm": 7.186551172405375, "learning_rate": 7.999203076612139e-07, "loss": 0.4393, "step": 28736 }, { "epoch": 0.82, "grad_norm": 4.592119251846041, "learning_rate": 7.996687081313891e-07, "loss": 0.4227, "step": 28737 }, { "epoch": 0.82, "grad_norm": 6.787052045621184, "learning_rate": 7.99417144736499e-07, "loss": 0.6592, "step": 28738 }, { "epoch": 0.82, "grad_norm": 1.7701809597723552, "learning_rate": 7.991656174787044e-07, "loss": 0.0712, "step": 28739 }, { "epoch": 0.82, "grad_norm": 4.414520131922348, "learning_rate": 7.989141263601718e-07, "loss": 0.2155, "step": 28740 }, { "epoch": 0.82, "grad_norm": 8.609085604309076, "learning_rate": 7.986626713830642e-07, "loss": 0.9473, "step": 28741 }, { "epoch": 0.82, "grad_norm": 6.176966631228119, "learning_rate": 7.98411252549543e-07, "loss": 0.2325, "step": 28742 }, { "epoch": 0.82, "grad_norm": 3.4862880843398307, "learning_rate": 7.981598698617732e-07, "loss": 0.1884, "step": 28743 }, { "epoch": 0.82, "grad_norm": 4.088778454510425, "learning_rate": 7.979085233219158e-07, "loss": 0.4608, "step": 28744 }, { "epoch": 0.82, "grad_norm": 4.04086039389273, "learning_rate": 7.976572129321353e-07, "loss": 0.3637, "step": 28745 }, { "epoch": 0.82, "grad_norm": 3.150695558986439, "learning_rate": 7.974059386945915e-07, "loss": 0.0976, "step": 28746 }, { "epoch": 0.82, "grad_norm": 5.568928229565619, "learning_rate": 7.971547006114477e-07, "loss": 0.7968, "step": 28747 }, { "epoch": 0.82, "grad_norm": 8.226608580544463, "learning_rate": 7.969034986848645e-07, "loss": 0.3886, "step": 28748 }, { "epoch": 0.82, "grad_norm": 6.000165221800464, "learning_rate": 7.966523329170022e-07, "loss": 0.3555, "step": 28749 }, { "epoch": 0.82, "grad_norm": 5.279657880073996, "learning_rate": 7.964012033100232e-07, "loss": 0.5036, "step": 28750 }, { "epoch": 0.82, "grad_norm": 4.6947270439354964, "learning_rate": 7.961501098660862e-07, "loss": 0.324, "step": 28751 }, { "epoch": 0.82, "grad_norm": 4.415249933186531, "learning_rate": 7.958990525873528e-07, "loss": 0.5569, "step": 28752 }, { "epoch": 0.82, "grad_norm": 5.727968051256555, "learning_rate": 7.956480314759829e-07, "loss": 0.7445, "step": 28753 }, { "epoch": 0.82, "grad_norm": 5.586854232577082, "learning_rate": 7.953970465341349e-07, "loss": 0.6781, "step": 28754 }, { "epoch": 0.82, "grad_norm": 3.302735169070536, "learning_rate": 7.951460977639674e-07, "loss": 0.4365, "step": 28755 }, { "epoch": 0.82, "grad_norm": 5.045268130705411, "learning_rate": 7.948951851676406e-07, "loss": 0.5495, "step": 28756 }, { "epoch": 0.82, "grad_norm": 5.628687645865204, "learning_rate": 7.946443087473132e-07, "loss": 0.5617, "step": 28757 }, { "epoch": 0.82, "grad_norm": 6.982121660941149, "learning_rate": 7.943934685051424e-07, "loss": 0.2831, "step": 28758 }, { "epoch": 0.82, "grad_norm": 7.363967790648198, "learning_rate": 7.941426644432881e-07, "loss": 0.2929, "step": 28759 }, { "epoch": 0.82, "grad_norm": 8.30116503019061, "learning_rate": 7.938918965639064e-07, "loss": 0.7513, "step": 28760 }, { "epoch": 0.82, "grad_norm": 3.470583079102243, "learning_rate": 7.93641164869155e-07, "loss": 0.3623, "step": 28761 }, { "epoch": 0.82, "grad_norm": 6.911278081027224, "learning_rate": 7.933904693611894e-07, "loss": 0.7211, "step": 28762 }, { "epoch": 0.82, "grad_norm": 4.902077862949564, "learning_rate": 7.931398100421678e-07, "loss": 0.4877, "step": 28763 }, { "epoch": 0.82, "grad_norm": 3.526903211557522, "learning_rate": 7.928891869142475e-07, "loss": 0.3363, "step": 28764 }, { "epoch": 0.82, "grad_norm": 5.407138332524147, "learning_rate": 7.926385999795822e-07, "loss": 0.455, "step": 28765 }, { "epoch": 0.82, "grad_norm": 2.0884190132547844, "learning_rate": 7.923880492403302e-07, "loss": 0.4743, "step": 28766 }, { "epoch": 0.82, "grad_norm": 4.768058430732897, "learning_rate": 7.92137534698646e-07, "loss": 0.5158, "step": 28767 }, { "epoch": 0.82, "grad_norm": 3.3411547134689736, "learning_rate": 7.918870563566844e-07, "loss": 0.2714, "step": 28768 }, { "epoch": 0.82, "grad_norm": 4.365642758922459, "learning_rate": 7.916366142165994e-07, "loss": 0.2901, "step": 28769 }, { "epoch": 0.82, "grad_norm": 4.66114038274579, "learning_rate": 7.913862082805474e-07, "loss": 0.4376, "step": 28770 }, { "epoch": 0.82, "grad_norm": 8.126507897937119, "learning_rate": 7.911358385506801e-07, "loss": 0.9481, "step": 28771 }, { "epoch": 0.82, "grad_norm": 15.972302842236195, "learning_rate": 7.908855050291537e-07, "loss": 0.6048, "step": 28772 }, { "epoch": 0.82, "grad_norm": 6.467911375666534, "learning_rate": 7.906352077181218e-07, "loss": 0.6119, "step": 28773 }, { "epoch": 0.82, "grad_norm": 7.524276417259836, "learning_rate": 7.903849466197372e-07, "loss": 0.3549, "step": 28774 }, { "epoch": 0.82, "grad_norm": 6.15593397229771, "learning_rate": 7.901347217361521e-07, "loss": 0.3407, "step": 28775 }, { "epoch": 0.82, "grad_norm": 5.049901001970207, "learning_rate": 7.898845330695193e-07, "loss": 0.6773, "step": 28776 }, { "epoch": 0.82, "grad_norm": 6.217912627157513, "learning_rate": 7.89634380621992e-07, "loss": 0.4294, "step": 28777 }, { "epoch": 0.82, "grad_norm": 5.495208343568722, "learning_rate": 7.893842643957206e-07, "loss": 0.3924, "step": 28778 }, { "epoch": 0.82, "grad_norm": 6.20684011367921, "learning_rate": 7.891341843928596e-07, "loss": 0.3682, "step": 28779 }, { "epoch": 0.82, "grad_norm": 5.150168606859114, "learning_rate": 7.888841406155584e-07, "loss": 0.4641, "step": 28780 }, { "epoch": 0.82, "grad_norm": 4.467168141203353, "learning_rate": 7.886341330659675e-07, "loss": 0.6072, "step": 28781 }, { "epoch": 0.82, "grad_norm": 6.244935081517124, "learning_rate": 7.883841617462396e-07, "loss": 0.5468, "step": 28782 }, { "epoch": 0.82, "grad_norm": 10.292988392414843, "learning_rate": 7.881342266585229e-07, "loss": 0.7677, "step": 28783 }, { "epoch": 0.82, "grad_norm": 4.257568534371144, "learning_rate": 7.878843278049703e-07, "loss": 0.3127, "step": 28784 }, { "epoch": 0.82, "grad_norm": 4.845732498541187, "learning_rate": 7.876344651877288e-07, "loss": 0.3494, "step": 28785 }, { "epoch": 0.82, "grad_norm": 7.954516905448512, "learning_rate": 7.873846388089507e-07, "loss": 0.2228, "step": 28786 }, { "epoch": 0.82, "grad_norm": 2.5933168578135293, "learning_rate": 7.871348486707842e-07, "loss": 0.2504, "step": 28787 }, { "epoch": 0.82, "grad_norm": 8.918990114718307, "learning_rate": 7.868850947753759e-07, "loss": 0.347, "step": 28788 }, { "epoch": 0.82, "grad_norm": 7.004497173377649, "learning_rate": 7.86635377124878e-07, "loss": 0.7722, "step": 28789 }, { "epoch": 0.82, "grad_norm": 4.95331936524083, "learning_rate": 7.863856957214361e-07, "loss": 0.3562, "step": 28790 }, { "epoch": 0.82, "grad_norm": 5.447324537066025, "learning_rate": 7.861360505672006e-07, "loss": 0.5233, "step": 28791 }, { "epoch": 0.82, "grad_norm": 3.938976752661703, "learning_rate": 7.858864416643164e-07, "loss": 0.41, "step": 28792 }, { "epoch": 0.82, "grad_norm": 8.470450198967063, "learning_rate": 7.856368690149336e-07, "loss": 0.6078, "step": 28793 }, { "epoch": 0.82, "grad_norm": 3.0724508007819105, "learning_rate": 7.85387332621198e-07, "loss": 0.2624, "step": 28794 }, { "epoch": 0.82, "grad_norm": 1.8074256552315997, "learning_rate": 7.85137832485256e-07, "loss": 0.114, "step": 28795 }, { "epoch": 0.82, "grad_norm": 2.3778990443955714, "learning_rate": 7.848883686092535e-07, "loss": 0.1544, "step": 28796 }, { "epoch": 0.82, "grad_norm": 4.733199171842401, "learning_rate": 7.84638940995337e-07, "loss": 0.4215, "step": 28797 }, { "epoch": 0.82, "grad_norm": 4.488599057146795, "learning_rate": 7.843895496456539e-07, "loss": 0.1798, "step": 28798 }, { "epoch": 0.82, "grad_norm": 5.503409715919917, "learning_rate": 7.841401945623472e-07, "loss": 0.5005, "step": 28799 }, { "epoch": 0.82, "grad_norm": 4.351233314102998, "learning_rate": 7.83890875747565e-07, "loss": 0.199, "step": 28800 }, { "epoch": 0.82, "grad_norm": 3.652063565038409, "learning_rate": 7.836415932034497e-07, "loss": 0.3391, "step": 28801 }, { "epoch": 0.82, "grad_norm": 6.133203238899119, "learning_rate": 7.833923469321475e-07, "loss": 0.4494, "step": 28802 }, { "epoch": 0.82, "grad_norm": 3.8351543271869466, "learning_rate": 7.831431369358e-07, "loss": 0.2024, "step": 28803 }, { "epoch": 0.82, "grad_norm": 5.96035633778855, "learning_rate": 7.828939632165533e-07, "loss": 0.4691, "step": 28804 }, { "epoch": 0.82, "grad_norm": 5.985251260453218, "learning_rate": 7.826448257765512e-07, "loss": 0.2035, "step": 28805 }, { "epoch": 0.82, "grad_norm": 5.069065075734865, "learning_rate": 7.823957246179359e-07, "loss": 0.4285, "step": 28806 }, { "epoch": 0.82, "grad_norm": 7.589210047945715, "learning_rate": 7.821466597428518e-07, "loss": 0.578, "step": 28807 }, { "epoch": 0.82, "grad_norm": 5.6754421440000575, "learning_rate": 7.818976311534404e-07, "loss": 0.4087, "step": 28808 }, { "epoch": 0.83, "grad_norm": 2.9934678090041844, "learning_rate": 7.816486388518446e-07, "loss": 0.3878, "step": 28809 }, { "epoch": 0.83, "grad_norm": 3.46342939993195, "learning_rate": 7.813996828402054e-07, "loss": 0.1679, "step": 28810 }, { "epoch": 0.83, "grad_norm": 6.501120727462552, "learning_rate": 7.81150763120666e-07, "loss": 0.4919, "step": 28811 }, { "epoch": 0.83, "grad_norm": 4.312933056967008, "learning_rate": 7.80901879695366e-07, "loss": 0.9473, "step": 28812 }, { "epoch": 0.83, "grad_norm": 3.264707993247437, "learning_rate": 7.806530325664491e-07, "loss": 0.3951, "step": 28813 }, { "epoch": 0.83, "grad_norm": 9.313867148590681, "learning_rate": 7.804042217360541e-07, "loss": 0.4275, "step": 28814 }, { "epoch": 0.83, "grad_norm": 5.5340597376320275, "learning_rate": 7.801554472063217e-07, "loss": 0.4457, "step": 28815 }, { "epoch": 0.83, "grad_norm": 3.387146373885894, "learning_rate": 7.799067089793927e-07, "loss": 0.3519, "step": 28816 }, { "epoch": 0.83, "grad_norm": 4.689812458575124, "learning_rate": 7.796580070574061e-07, "loss": 0.4107, "step": 28817 }, { "epoch": 0.83, "grad_norm": 5.3953392976017485, "learning_rate": 7.79409341442503e-07, "loss": 0.506, "step": 28818 }, { "epoch": 0.83, "grad_norm": 5.148654776866724, "learning_rate": 7.791607121368206e-07, "loss": 0.2875, "step": 28819 }, { "epoch": 0.83, "grad_norm": 1.8869718614844495, "learning_rate": 7.789121191425003e-07, "loss": 0.189, "step": 28820 }, { "epoch": 0.83, "grad_norm": 6.449913241667885, "learning_rate": 7.786635624616789e-07, "loss": 0.3916, "step": 28821 }, { "epoch": 0.83, "grad_norm": 3.3562375343702175, "learning_rate": 7.784150420964942e-07, "loss": 0.4165, "step": 28822 }, { "epoch": 0.83, "grad_norm": 10.442211755121907, "learning_rate": 7.781665580490866e-07, "loss": 0.2574, "step": 28823 }, { "epoch": 0.83, "grad_norm": 4.245129824159505, "learning_rate": 7.779181103215905e-07, "loss": 0.1825, "step": 28824 }, { "epoch": 0.83, "grad_norm": 4.788568219165266, "learning_rate": 7.776696989161465e-07, "loss": 0.6562, "step": 28825 }, { "epoch": 0.83, "grad_norm": 4.609003239526849, "learning_rate": 7.774213238348894e-07, "loss": 0.3885, "step": 28826 }, { "epoch": 0.83, "grad_norm": 4.797615888705651, "learning_rate": 7.771729850799576e-07, "loss": 0.2779, "step": 28827 }, { "epoch": 0.83, "grad_norm": 11.501810345999951, "learning_rate": 7.769246826534865e-07, "loss": 0.3745, "step": 28828 }, { "epoch": 0.83, "grad_norm": 4.642718732258898, "learning_rate": 7.766764165576113e-07, "loss": 0.3786, "step": 28829 }, { "epoch": 0.83, "grad_norm": 5.25035852388806, "learning_rate": 7.764281867944701e-07, "loss": 0.5572, "step": 28830 }, { "epoch": 0.83, "grad_norm": 5.617426436094944, "learning_rate": 7.761799933661962e-07, "loss": 0.3979, "step": 28831 }, { "epoch": 0.83, "grad_norm": 3.5339413404664297, "learning_rate": 7.759318362749268e-07, "loss": 0.4283, "step": 28832 }, { "epoch": 0.83, "grad_norm": 3.8543105468012655, "learning_rate": 7.756837155227942e-07, "loss": 0.341, "step": 28833 }, { "epoch": 0.83, "grad_norm": 4.433197467587577, "learning_rate": 7.75435631111936e-07, "loss": 0.4109, "step": 28834 }, { "epoch": 0.83, "grad_norm": 5.033193603790834, "learning_rate": 7.75187583044485e-07, "loss": 0.3156, "step": 28835 }, { "epoch": 0.83, "grad_norm": 7.2176799559084674, "learning_rate": 7.749395713225749e-07, "loss": 0.2546, "step": 28836 }, { "epoch": 0.83, "grad_norm": 5.511686307404742, "learning_rate": 7.746915959483387e-07, "loss": 0.4797, "step": 28837 }, { "epoch": 0.83, "grad_norm": 9.114871949485497, "learning_rate": 7.7444365692391e-07, "loss": 0.6916, "step": 28838 }, { "epoch": 0.83, "grad_norm": 5.608535980126882, "learning_rate": 7.741957542514234e-07, "loss": 0.7965, "step": 28839 }, { "epoch": 0.83, "grad_norm": 6.370921232599705, "learning_rate": 7.739478879330109e-07, "loss": 0.4781, "step": 28840 }, { "epoch": 0.83, "grad_norm": 4.273576751616043, "learning_rate": 7.737000579708043e-07, "loss": 0.261, "step": 28841 }, { "epoch": 0.83, "grad_norm": 8.361291526637704, "learning_rate": 7.73452264366934e-07, "loss": 0.4352, "step": 28842 }, { "epoch": 0.83, "grad_norm": 3.345103346492597, "learning_rate": 7.73204507123535e-07, "loss": 0.4725, "step": 28843 }, { "epoch": 0.83, "grad_norm": 2.54241522464809, "learning_rate": 7.729567862427361e-07, "loss": 0.1431, "step": 28844 }, { "epoch": 0.83, "grad_norm": 4.219181632107624, "learning_rate": 7.727091017266708e-07, "loss": 0.4685, "step": 28845 }, { "epoch": 0.83, "grad_norm": 1.6461574822731229, "learning_rate": 7.724614535774672e-07, "loss": 0.116, "step": 28846 }, { "epoch": 0.83, "grad_norm": 5.078466926213874, "learning_rate": 7.722138417972586e-07, "loss": 0.4984, "step": 28847 }, { "epoch": 0.83, "grad_norm": 5.699706382885415, "learning_rate": 7.719662663881738e-07, "loss": 0.4694, "step": 28848 }, { "epoch": 0.83, "grad_norm": 5.074407666502111, "learning_rate": 7.717187273523414e-07, "loss": 0.2809, "step": 28849 }, { "epoch": 0.83, "grad_norm": 6.286970238332566, "learning_rate": 7.714712246918932e-07, "loss": 0.2779, "step": 28850 }, { "epoch": 0.83, "grad_norm": 3.926292363387319, "learning_rate": 7.71223758408956e-07, "loss": 0.365, "step": 28851 }, { "epoch": 0.83, "grad_norm": 8.38158129692281, "learning_rate": 7.709763285056621e-07, "loss": 0.3771, "step": 28852 }, { "epoch": 0.83, "grad_norm": 4.167906656133428, "learning_rate": 7.707289349841363e-07, "loss": 0.4497, "step": 28853 }, { "epoch": 0.83, "grad_norm": 3.0919970160244143, "learning_rate": 7.704815778465103e-07, "loss": 0.3022, "step": 28854 }, { "epoch": 0.83, "grad_norm": 7.808654387993828, "learning_rate": 7.702342570949101e-07, "loss": 0.5984, "step": 28855 }, { "epoch": 0.83, "grad_norm": 6.300324813098871, "learning_rate": 7.699869727314629e-07, "loss": 0.4328, "step": 28856 }, { "epoch": 0.83, "grad_norm": 3.338271171256433, "learning_rate": 7.697397247582983e-07, "loss": 0.176, "step": 28857 }, { "epoch": 0.83, "grad_norm": 5.5796333004838266, "learning_rate": 7.69492513177541e-07, "loss": 0.4529, "step": 28858 }, { "epoch": 0.83, "grad_norm": 3.702521456051617, "learning_rate": 7.692453379913195e-07, "loss": 0.4134, "step": 28859 }, { "epoch": 0.83, "grad_norm": 7.326263386483582, "learning_rate": 7.689981992017582e-07, "loss": 0.4039, "step": 28860 }, { "epoch": 0.83, "grad_norm": 4.167738603353121, "learning_rate": 7.687510968109857e-07, "loss": 0.3608, "step": 28861 }, { "epoch": 0.83, "grad_norm": 7.835614061764052, "learning_rate": 7.685040308211267e-07, "loss": 0.9404, "step": 28862 }, { "epoch": 0.83, "grad_norm": 8.836433754231393, "learning_rate": 7.682570012343055e-07, "loss": 0.4747, "step": 28863 }, { "epoch": 0.83, "grad_norm": 6.649993696424896, "learning_rate": 7.680100080526492e-07, "loss": 0.4379, "step": 28864 }, { "epoch": 0.83, "grad_norm": 8.1229861771521, "learning_rate": 7.677630512782808e-07, "loss": 1.0335, "step": 28865 }, { "epoch": 0.83, "grad_norm": 5.861179694406727, "learning_rate": 7.675161309133266e-07, "loss": 0.4193, "step": 28866 }, { "epoch": 0.83, "grad_norm": 4.043614432702367, "learning_rate": 7.672692469599092e-07, "loss": 0.1633, "step": 28867 }, { "epoch": 0.83, "grad_norm": 6.346366305229424, "learning_rate": 7.670223994201553e-07, "loss": 0.6718, "step": 28868 }, { "epoch": 0.83, "grad_norm": 3.90955133932331, "learning_rate": 7.667755882961847e-07, "loss": 0.3179, "step": 28869 }, { "epoch": 0.83, "grad_norm": 4.703776393657174, "learning_rate": 7.665288135901222e-07, "loss": 0.6046, "step": 28870 }, { "epoch": 0.83, "grad_norm": 5.350592755074708, "learning_rate": 7.662820753040923e-07, "loss": 0.5603, "step": 28871 }, { "epoch": 0.83, "grad_norm": 3.7060678112415855, "learning_rate": 7.660353734402148e-07, "loss": 0.4026, "step": 28872 }, { "epoch": 0.83, "grad_norm": 6.970790718193089, "learning_rate": 7.657887080006154e-07, "loss": 0.4614, "step": 28873 }, { "epoch": 0.83, "grad_norm": 4.24919070223883, "learning_rate": 7.655420789874147e-07, "loss": 0.4785, "step": 28874 }, { "epoch": 0.83, "grad_norm": 3.1561119880340054, "learning_rate": 7.652954864027334e-07, "loss": 0.4946, "step": 28875 }, { "epoch": 0.83, "grad_norm": 6.238410083680125, "learning_rate": 7.65048930248693e-07, "loss": 0.3313, "step": 28876 }, { "epoch": 0.83, "grad_norm": 4.5435969016034266, "learning_rate": 7.64802410527416e-07, "loss": 0.8747, "step": 28877 }, { "epoch": 0.83, "grad_norm": 6.262177467796932, "learning_rate": 7.645559272410214e-07, "loss": 0.4197, "step": 28878 }, { "epoch": 0.83, "grad_norm": 5.658154920294316, "learning_rate": 7.643094803916307e-07, "loss": 0.3418, "step": 28879 }, { "epoch": 0.83, "grad_norm": 6.8859069311105285, "learning_rate": 7.640630699813651e-07, "loss": 0.6752, "step": 28880 }, { "epoch": 0.83, "grad_norm": 3.5490453524493124, "learning_rate": 7.638166960123433e-07, "loss": 0.2296, "step": 28881 }, { "epoch": 0.83, "grad_norm": 4.536196803697925, "learning_rate": 7.635703584866855e-07, "loss": 0.5407, "step": 28882 }, { "epoch": 0.83, "grad_norm": 2.4087070268936053, "learning_rate": 7.633240574065088e-07, "loss": 0.2252, "step": 28883 }, { "epoch": 0.83, "grad_norm": 5.55251324428334, "learning_rate": 7.630777927739342e-07, "loss": 0.3913, "step": 28884 }, { "epoch": 0.83, "grad_norm": 6.293388935176197, "learning_rate": 7.628315645910788e-07, "loss": 0.6444, "step": 28885 }, { "epoch": 0.83, "grad_norm": 5.077474323547744, "learning_rate": 7.625853728600635e-07, "loss": 0.6315, "step": 28886 }, { "epoch": 0.83, "grad_norm": 7.616276362774349, "learning_rate": 7.62339217583003e-07, "loss": 0.2892, "step": 28887 }, { "epoch": 0.83, "grad_norm": 6.315755665525498, "learning_rate": 7.620930987620173e-07, "loss": 0.3045, "step": 28888 }, { "epoch": 0.83, "grad_norm": 2.904031747852001, "learning_rate": 7.618470163992231e-07, "loss": 0.2273, "step": 28889 }, { "epoch": 0.83, "grad_norm": 5.89799188610617, "learning_rate": 7.616009704967364e-07, "loss": 0.538, "step": 28890 }, { "epoch": 0.83, "grad_norm": 6.237114670037425, "learning_rate": 7.613549610566756e-07, "loss": 0.4183, "step": 28891 }, { "epoch": 0.83, "grad_norm": 4.68608689942908, "learning_rate": 7.611089880811551e-07, "loss": 0.3569, "step": 28892 }, { "epoch": 0.83, "grad_norm": 5.5370885347381185, "learning_rate": 7.608630515722932e-07, "loss": 0.579, "step": 28893 }, { "epoch": 0.83, "grad_norm": 4.301107395982092, "learning_rate": 7.60617151532203e-07, "loss": 0.4663, "step": 28894 }, { "epoch": 0.83, "grad_norm": 4.607067034658245, "learning_rate": 7.603712879630032e-07, "loss": 0.2021, "step": 28895 }, { "epoch": 0.83, "grad_norm": 7.173192852670219, "learning_rate": 7.601254608668068e-07, "loss": 0.6553, "step": 28896 }, { "epoch": 0.83, "grad_norm": 5.169300994656937, "learning_rate": 7.598796702457278e-07, "loss": 0.6073, "step": 28897 }, { "epoch": 0.83, "grad_norm": 4.2419532725266365, "learning_rate": 7.596339161018834e-07, "loss": 0.3354, "step": 28898 }, { "epoch": 0.83, "grad_norm": 4.450897355630123, "learning_rate": 7.593881984373852e-07, "loss": 0.5679, "step": 28899 }, { "epoch": 0.83, "grad_norm": 3.5037319688608624, "learning_rate": 7.591425172543493e-07, "loss": 0.3164, "step": 28900 }, { "epoch": 0.83, "grad_norm": 3.222485328534379, "learning_rate": 7.588968725548878e-07, "loss": 0.2308, "step": 28901 }, { "epoch": 0.83, "grad_norm": 4.124222133004242, "learning_rate": 7.586512643411148e-07, "loss": 0.4754, "step": 28902 }, { "epoch": 0.83, "grad_norm": 7.090281609079503, "learning_rate": 7.584056926151417e-07, "loss": 0.5435, "step": 28903 }, { "epoch": 0.83, "grad_norm": 4.584824851132033, "learning_rate": 7.58160157379082e-07, "loss": 0.323, "step": 28904 }, { "epoch": 0.83, "grad_norm": 5.425389714460334, "learning_rate": 7.579146586350494e-07, "loss": 0.2997, "step": 28905 }, { "epoch": 0.83, "grad_norm": 8.41576570151727, "learning_rate": 7.576691963851534e-07, "loss": 0.3828, "step": 28906 }, { "epoch": 0.83, "grad_norm": 9.547838527674058, "learning_rate": 7.574237706315079e-07, "loss": 0.4891, "step": 28907 }, { "epoch": 0.83, "grad_norm": 8.144099279424344, "learning_rate": 7.571783813762235e-07, "loss": 0.5071, "step": 28908 }, { "epoch": 0.83, "grad_norm": 3.9478704639371673, "learning_rate": 7.569330286214116e-07, "loss": 0.4035, "step": 28909 }, { "epoch": 0.83, "grad_norm": 3.3030044562540413, "learning_rate": 7.566877123691807e-07, "loss": 0.5853, "step": 28910 }, { "epoch": 0.83, "grad_norm": 7.877433067964772, "learning_rate": 7.564424326216429e-07, "loss": 0.8803, "step": 28911 }, { "epoch": 0.83, "grad_norm": 2.1759309015224675, "learning_rate": 7.5619718938091e-07, "loss": 0.1331, "step": 28912 }, { "epoch": 0.83, "grad_norm": 11.101883391669368, "learning_rate": 7.559519826490891e-07, "loss": 0.4757, "step": 28913 }, { "epoch": 0.83, "grad_norm": 7.2095286934570835, "learning_rate": 7.557068124282913e-07, "loss": 0.4713, "step": 28914 }, { "epoch": 0.83, "grad_norm": 6.678844572722431, "learning_rate": 7.554616787206254e-07, "loss": 0.4933, "step": 28915 }, { "epoch": 0.83, "grad_norm": 3.6252251588020172, "learning_rate": 7.552165815282003e-07, "loss": 0.1289, "step": 28916 }, { "epoch": 0.83, "grad_norm": 4.042806428608548, "learning_rate": 7.549715208531233e-07, "loss": 0.0892, "step": 28917 }, { "epoch": 0.83, "grad_norm": 2.272403691730176, "learning_rate": 7.547264966975043e-07, "loss": 0.1272, "step": 28918 }, { "epoch": 0.83, "grad_norm": 8.208249887213322, "learning_rate": 7.5448150906345e-07, "loss": 0.2615, "step": 28919 }, { "epoch": 0.83, "grad_norm": 3.417762828618385, "learning_rate": 7.542365579530681e-07, "loss": 0.2798, "step": 28920 }, { "epoch": 0.83, "grad_norm": 11.087332575242486, "learning_rate": 7.539916433684675e-07, "loss": 0.6644, "step": 28921 }, { "epoch": 0.83, "grad_norm": 6.672901369431234, "learning_rate": 7.537467653117542e-07, "loss": 0.5534, "step": 28922 }, { "epoch": 0.83, "grad_norm": 7.770293584356061, "learning_rate": 7.535019237850344e-07, "loss": 0.5461, "step": 28923 }, { "epoch": 0.83, "grad_norm": 4.643893263032969, "learning_rate": 7.532571187904136e-07, "loss": 0.3807, "step": 28924 }, { "epoch": 0.83, "grad_norm": 3.509391497253067, "learning_rate": 7.530123503300008e-07, "loss": 0.4398, "step": 28925 }, { "epoch": 0.83, "grad_norm": 7.798647814700439, "learning_rate": 7.52767618405898e-07, "loss": 0.8636, "step": 28926 }, { "epoch": 0.83, "grad_norm": 11.065566505378788, "learning_rate": 7.525229230202135e-07, "loss": 0.589, "step": 28927 }, { "epoch": 0.83, "grad_norm": 4.836581728027518, "learning_rate": 7.522782641750514e-07, "loss": 0.4914, "step": 28928 }, { "epoch": 0.83, "grad_norm": 4.607008400831766, "learning_rate": 7.520336418725155e-07, "loss": 0.42, "step": 28929 }, { "epoch": 0.83, "grad_norm": 6.668687867694776, "learning_rate": 7.517890561147123e-07, "loss": 0.6596, "step": 28930 }, { "epoch": 0.83, "grad_norm": 3.0998984997037526, "learning_rate": 7.515445069037436e-07, "loss": 0.3829, "step": 28931 }, { "epoch": 0.83, "grad_norm": 3.9412122514237553, "learning_rate": 7.512999942417155e-07, "loss": 0.3076, "step": 28932 }, { "epoch": 0.83, "grad_norm": 3.676105610626514, "learning_rate": 7.510555181307294e-07, "loss": 0.0553, "step": 28933 }, { "epoch": 0.83, "grad_norm": 6.885286681025063, "learning_rate": 7.508110785728906e-07, "loss": 0.6043, "step": 28934 }, { "epoch": 0.83, "grad_norm": 7.043402008423515, "learning_rate": 7.505666755703006e-07, "loss": 0.6039, "step": 28935 }, { "epoch": 0.83, "grad_norm": 9.136044024359727, "learning_rate": 7.503223091250616e-07, "loss": 0.4683, "step": 28936 }, { "epoch": 0.83, "grad_norm": 3.977677704146014, "learning_rate": 7.500779792392776e-07, "loss": 0.4918, "step": 28937 }, { "epoch": 0.83, "grad_norm": 10.311041526541368, "learning_rate": 7.498336859150484e-07, "loss": 0.3145, "step": 28938 }, { "epoch": 0.83, "grad_norm": 8.749251088381447, "learning_rate": 7.495894291544775e-07, "loss": 0.3331, "step": 28939 }, { "epoch": 0.83, "grad_norm": 4.431906801840478, "learning_rate": 7.493452089596648e-07, "loss": 0.3336, "step": 28940 }, { "epoch": 0.83, "grad_norm": 3.061381057762547, "learning_rate": 7.491010253327125e-07, "loss": 0.2061, "step": 28941 }, { "epoch": 0.83, "grad_norm": 4.752706986103289, "learning_rate": 7.488568782757211e-07, "loss": 0.6027, "step": 28942 }, { "epoch": 0.83, "grad_norm": 9.831574215305293, "learning_rate": 7.486127677907907e-07, "loss": 0.5234, "step": 28943 }, { "epoch": 0.83, "grad_norm": 3.736842676875634, "learning_rate": 7.483686938800194e-07, "loss": 0.2787, "step": 28944 }, { "epoch": 0.83, "grad_norm": 6.7948196635901725, "learning_rate": 7.481246565455092e-07, "loss": 0.3507, "step": 28945 }, { "epoch": 0.83, "grad_norm": 8.353092121326434, "learning_rate": 7.478806557893603e-07, "loss": 0.5342, "step": 28946 }, { "epoch": 0.83, "grad_norm": 3.151030275098318, "learning_rate": 7.476366916136691e-07, "loss": 0.2152, "step": 28947 }, { "epoch": 0.83, "grad_norm": 4.915818184307989, "learning_rate": 7.47392764020537e-07, "loss": 0.4116, "step": 28948 }, { "epoch": 0.83, "grad_norm": 17.818558475745576, "learning_rate": 7.471488730120618e-07, "loss": 0.2513, "step": 28949 }, { "epoch": 0.83, "grad_norm": 4.323817950440165, "learning_rate": 7.469050185903409e-07, "loss": 0.2391, "step": 28950 }, { "epoch": 0.83, "grad_norm": 3.3822873877492485, "learning_rate": 7.466612007574714e-07, "loss": 0.1572, "step": 28951 }, { "epoch": 0.83, "grad_norm": 4.282616473794734, "learning_rate": 7.464174195155522e-07, "loss": 0.309, "step": 28952 }, { "epoch": 0.83, "grad_norm": 6.789333746334482, "learning_rate": 7.461736748666809e-07, "loss": 0.4738, "step": 28953 }, { "epoch": 0.83, "grad_norm": 3.6954078903951393, "learning_rate": 7.459299668129527e-07, "loss": 0.2929, "step": 28954 }, { "epoch": 0.83, "grad_norm": 6.940307908112255, "learning_rate": 7.456862953564675e-07, "loss": 0.4603, "step": 28955 }, { "epoch": 0.83, "grad_norm": 9.133771599628808, "learning_rate": 7.45442660499317e-07, "loss": 0.4816, "step": 28956 }, { "epoch": 0.83, "grad_norm": 10.544881810357534, "learning_rate": 7.451990622436011e-07, "loss": 0.5918, "step": 28957 }, { "epoch": 0.83, "grad_norm": 5.066610398367002, "learning_rate": 7.449555005914122e-07, "loss": 0.1973, "step": 28958 }, { "epoch": 0.83, "grad_norm": 6.6280181056013046, "learning_rate": 7.447119755448484e-07, "loss": 0.421, "step": 28959 }, { "epoch": 0.83, "grad_norm": 3.0327595658475732, "learning_rate": 7.444684871060021e-07, "loss": 0.1259, "step": 28960 }, { "epoch": 0.83, "grad_norm": 5.0074167080302106, "learning_rate": 7.442250352769709e-07, "loss": 0.4164, "step": 28961 }, { "epoch": 0.83, "grad_norm": 7.850078856321177, "learning_rate": 7.439816200598482e-07, "loss": 0.3174, "step": 28962 }, { "epoch": 0.83, "grad_norm": 4.152915132959317, "learning_rate": 7.437382414567257e-07, "loss": 0.2253, "step": 28963 }, { "epoch": 0.83, "grad_norm": 2.2287497635657783, "learning_rate": 7.434948994697005e-07, "loss": 0.1539, "step": 28964 }, { "epoch": 0.83, "grad_norm": 5.260775588478174, "learning_rate": 7.432515941008639e-07, "loss": 0.4703, "step": 28965 }, { "epoch": 0.83, "grad_norm": 3.2463088715994726, "learning_rate": 7.430083253523102e-07, "loss": 0.1962, "step": 28966 }, { "epoch": 0.83, "grad_norm": 5.327123150647477, "learning_rate": 7.42765093226131e-07, "loss": 0.6844, "step": 28967 }, { "epoch": 0.83, "grad_norm": 6.418896655048162, "learning_rate": 7.425218977244203e-07, "loss": 0.0764, "step": 28968 }, { "epoch": 0.83, "grad_norm": 5.099755146656276, "learning_rate": 7.422787388492697e-07, "loss": 0.1919, "step": 28969 }, { "epoch": 0.83, "grad_norm": 8.172200913066154, "learning_rate": 7.420356166027698e-07, "loss": 0.3812, "step": 28970 }, { "epoch": 0.83, "grad_norm": 3.4107560441963827, "learning_rate": 7.417925309870144e-07, "loss": 0.2419, "step": 28971 }, { "epoch": 0.83, "grad_norm": 5.014789642455709, "learning_rate": 7.415494820040925e-07, "loss": 0.3399, "step": 28972 }, { "epoch": 0.83, "grad_norm": 5.916233485666483, "learning_rate": 7.413064696560973e-07, "loss": 0.4711, "step": 28973 }, { "epoch": 0.83, "grad_norm": 10.720366820350804, "learning_rate": 7.410634939451167e-07, "loss": 0.5426, "step": 28974 }, { "epoch": 0.83, "grad_norm": 1.7994325680548706, "learning_rate": 7.408205548732439e-07, "loss": 0.3061, "step": 28975 }, { "epoch": 0.83, "grad_norm": 4.551963567431257, "learning_rate": 7.405776524425673e-07, "loss": 0.3218, "step": 28976 }, { "epoch": 0.83, "grad_norm": 5.940589622942052, "learning_rate": 7.403347866551758e-07, "loss": 0.3033, "step": 28977 }, { "epoch": 0.83, "grad_norm": 5.953482509501164, "learning_rate": 7.40091957513161e-07, "loss": 0.5535, "step": 28978 }, { "epoch": 0.83, "grad_norm": 5.9447810653592885, "learning_rate": 7.39849165018609e-07, "loss": 0.5734, "step": 28979 }, { "epoch": 0.83, "grad_norm": 5.288299370259988, "learning_rate": 7.396064091736116e-07, "loss": 0.5471, "step": 28980 }, { "epoch": 0.83, "grad_norm": 9.664913994198747, "learning_rate": 7.393636899802542e-07, "loss": 0.5977, "step": 28981 }, { "epoch": 0.83, "grad_norm": 4.904066924337031, "learning_rate": 7.391210074406286e-07, "loss": 0.3297, "step": 28982 }, { "epoch": 0.83, "grad_norm": 3.8779486850493177, "learning_rate": 7.388783615568185e-07, "loss": 0.2023, "step": 28983 }, { "epoch": 0.83, "grad_norm": 5.23622854636818, "learning_rate": 7.38635752330914e-07, "loss": 0.3878, "step": 28984 }, { "epoch": 0.83, "grad_norm": 5.694540110097213, "learning_rate": 7.383931797650007e-07, "loss": 0.2912, "step": 28985 }, { "epoch": 0.83, "grad_norm": 9.280314414395418, "learning_rate": 7.381506438611658e-07, "loss": 0.5267, "step": 28986 }, { "epoch": 0.83, "grad_norm": 3.578926496400588, "learning_rate": 7.379081446214975e-07, "loss": 0.2218, "step": 28987 }, { "epoch": 0.83, "grad_norm": 6.409744872492534, "learning_rate": 7.376656820480804e-07, "loss": 0.2999, "step": 28988 }, { "epoch": 0.83, "grad_norm": 4.914093551173757, "learning_rate": 7.374232561430012e-07, "loss": 0.3493, "step": 28989 }, { "epoch": 0.83, "grad_norm": 8.53297812037947, "learning_rate": 7.371808669083436e-07, "loss": 0.2796, "step": 28990 }, { "epoch": 0.83, "grad_norm": 6.921727439392515, "learning_rate": 7.369385143461949e-07, "loss": 0.5373, "step": 28991 }, { "epoch": 0.83, "grad_norm": 13.500414382791371, "learning_rate": 7.366961984586385e-07, "loss": 1.1057, "step": 28992 }, { "epoch": 0.83, "grad_norm": 4.5706797134377535, "learning_rate": 7.36453919247761e-07, "loss": 0.4784, "step": 28993 }, { "epoch": 0.83, "grad_norm": 9.203969065069195, "learning_rate": 7.362116767156441e-07, "loss": 0.5393, "step": 28994 }, { "epoch": 0.83, "grad_norm": 2.45733470892527, "learning_rate": 7.359694708643739e-07, "loss": 0.1863, "step": 28995 }, { "epoch": 0.83, "grad_norm": 15.4239221081884, "learning_rate": 7.357273016960342e-07, "loss": 0.5556, "step": 28996 }, { "epoch": 0.83, "grad_norm": 2.0008796456185873, "learning_rate": 7.354851692127057e-07, "loss": 0.267, "step": 28997 }, { "epoch": 0.83, "grad_norm": 5.128463644670381, "learning_rate": 7.352430734164745e-07, "loss": 0.3853, "step": 28998 }, { "epoch": 0.83, "grad_norm": 5.296875697673201, "learning_rate": 7.350010143094211e-07, "loss": 0.6019, "step": 28999 }, { "epoch": 0.83, "grad_norm": 4.004576598085582, "learning_rate": 7.347589918936299e-07, "loss": 0.3061, "step": 29000 }, { "epoch": 0.83, "grad_norm": 4.486001440832864, "learning_rate": 7.345170061711809e-07, "loss": 0.4277, "step": 29001 }, { "epoch": 0.83, "grad_norm": 4.485927459269746, "learning_rate": 7.342750571441582e-07, "loss": 0.4227, "step": 29002 }, { "epoch": 0.83, "grad_norm": 5.788428267976298, "learning_rate": 7.340331448146415e-07, "loss": 0.524, "step": 29003 }, { "epoch": 0.83, "grad_norm": 3.141728297538894, "learning_rate": 7.337912691847116e-07, "loss": 0.4678, "step": 29004 }, { "epoch": 0.83, "grad_norm": 5.937907556299253, "learning_rate": 7.335494302564516e-07, "loss": 0.5643, "step": 29005 }, { "epoch": 0.83, "grad_norm": 5.186780029339388, "learning_rate": 7.33307628031939e-07, "loss": 0.4262, "step": 29006 }, { "epoch": 0.83, "grad_norm": 4.087910338170636, "learning_rate": 7.330658625132564e-07, "loss": 0.2656, "step": 29007 }, { "epoch": 0.83, "grad_norm": 7.699162266725842, "learning_rate": 7.328241337024822e-07, "loss": 0.5647, "step": 29008 }, { "epoch": 0.83, "grad_norm": 4.902156312558418, "learning_rate": 7.325824416016974e-07, "loss": 0.3183, "step": 29009 }, { "epoch": 0.83, "grad_norm": 6.972929774457155, "learning_rate": 7.323407862129805e-07, "loss": 0.6361, "step": 29010 }, { "epoch": 0.83, "grad_norm": 3.309348784749029, "learning_rate": 7.320991675384093e-07, "loss": 0.2937, "step": 29011 }, { "epoch": 0.83, "grad_norm": 6.202107497392334, "learning_rate": 7.318575855800647e-07, "loss": 0.5943, "step": 29012 }, { "epoch": 0.83, "grad_norm": 2.4691811680011204, "learning_rate": 7.316160403400224e-07, "loss": 0.4561, "step": 29013 }, { "epoch": 0.83, "grad_norm": 2.6599645783640136, "learning_rate": 7.313745318203635e-07, "loss": 0.3697, "step": 29014 }, { "epoch": 0.83, "grad_norm": 5.882605236196731, "learning_rate": 7.311330600231631e-07, "loss": 0.564, "step": 29015 }, { "epoch": 0.83, "grad_norm": 5.06940112276887, "learning_rate": 7.308916249504999e-07, "loss": 0.4371, "step": 29016 }, { "epoch": 0.83, "grad_norm": 3.678961437214024, "learning_rate": 7.306502266044491e-07, "loss": 0.4259, "step": 29017 }, { "epoch": 0.83, "grad_norm": 6.106629322963679, "learning_rate": 7.304088649870889e-07, "loss": 0.3885, "step": 29018 }, { "epoch": 0.83, "grad_norm": 4.175950592004507, "learning_rate": 7.301675401004965e-07, "loss": 0.3973, "step": 29019 }, { "epoch": 0.83, "grad_norm": 2.668690529368388, "learning_rate": 7.299262519467464e-07, "loss": 0.3667, "step": 29020 }, { "epoch": 0.83, "grad_norm": 6.173864502695539, "learning_rate": 7.296850005279155e-07, "loss": 0.8712, "step": 29021 }, { "epoch": 0.83, "grad_norm": 5.990620991482137, "learning_rate": 7.29443785846079e-07, "loss": 0.4239, "step": 29022 }, { "epoch": 0.83, "grad_norm": 7.840925946927455, "learning_rate": 7.292026079033121e-07, "loss": 0.4832, "step": 29023 }, { "epoch": 0.83, "grad_norm": 3.1022437474106854, "learning_rate": 7.289614667016881e-07, "loss": 0.0544, "step": 29024 }, { "epoch": 0.83, "grad_norm": 5.655731008756358, "learning_rate": 7.287203622432837e-07, "loss": 0.6282, "step": 29025 }, { "epoch": 0.83, "grad_norm": 3.77554092068599, "learning_rate": 7.284792945301705e-07, "loss": 0.5199, "step": 29026 }, { "epoch": 0.83, "grad_norm": 3.1518163795322502, "learning_rate": 7.282382635644247e-07, "loss": 0.2173, "step": 29027 }, { "epoch": 0.83, "grad_norm": 5.467756568050098, "learning_rate": 7.279972693481202e-07, "loss": 0.3242, "step": 29028 }, { "epoch": 0.83, "grad_norm": 3.215522796370913, "learning_rate": 7.277563118833292e-07, "loss": 0.3616, "step": 29029 }, { "epoch": 0.83, "grad_norm": 7.30339716158121, "learning_rate": 7.275153911721245e-07, "loss": 0.7038, "step": 29030 }, { "epoch": 0.83, "grad_norm": 9.356724435401215, "learning_rate": 7.272745072165777e-07, "loss": 0.5465, "step": 29031 }, { "epoch": 0.83, "grad_norm": 6.025241725077225, "learning_rate": 7.270336600187633e-07, "loss": 0.4481, "step": 29032 }, { "epoch": 0.83, "grad_norm": 1.6427101463214333, "learning_rate": 7.267928495807514e-07, "loss": 0.1235, "step": 29033 }, { "epoch": 0.83, "grad_norm": 2.011722364700181, "learning_rate": 7.265520759046152e-07, "loss": 0.1745, "step": 29034 }, { "epoch": 0.83, "grad_norm": 7.7883856353973675, "learning_rate": 7.263113389924248e-07, "loss": 0.2801, "step": 29035 }, { "epoch": 0.83, "grad_norm": 5.382775825630235, "learning_rate": 7.260706388462524e-07, "loss": 0.3796, "step": 29036 }, { "epoch": 0.83, "grad_norm": 4.716861675441266, "learning_rate": 7.25829975468168e-07, "loss": 0.3345, "step": 29037 }, { "epoch": 0.83, "grad_norm": 3.4793210861444654, "learning_rate": 7.255893488602411e-07, "loss": 0.3573, "step": 29038 }, { "epoch": 0.83, "grad_norm": 3.446270175967482, "learning_rate": 7.25348759024544e-07, "loss": 0.2474, "step": 29039 }, { "epoch": 0.83, "grad_norm": 5.240556011075691, "learning_rate": 7.251082059631443e-07, "loss": 0.5583, "step": 29040 }, { "epoch": 0.83, "grad_norm": 4.325856988266905, "learning_rate": 7.248676896781131e-07, "loss": 0.5018, "step": 29041 }, { "epoch": 0.83, "grad_norm": 5.450909445940356, "learning_rate": 7.24627210171519e-07, "loss": 0.5012, "step": 29042 }, { "epoch": 0.83, "grad_norm": 6.8102672312731265, "learning_rate": 7.243867674454292e-07, "loss": 0.4077, "step": 29043 }, { "epoch": 0.83, "grad_norm": 10.24092574971529, "learning_rate": 7.241463615019151e-07, "loss": 0.4331, "step": 29044 }, { "epoch": 0.83, "grad_norm": 6.253643271011849, "learning_rate": 7.239059923430419e-07, "loss": 0.393, "step": 29045 }, { "epoch": 0.83, "grad_norm": 5.823748952747192, "learning_rate": 7.236656599708803e-07, "loss": 0.2795, "step": 29046 }, { "epoch": 0.83, "grad_norm": 5.996980066059893, "learning_rate": 7.234253643874956e-07, "loss": 0.3236, "step": 29047 }, { "epoch": 0.83, "grad_norm": 3.6120965118525694, "learning_rate": 7.231851055949568e-07, "loss": 0.3679, "step": 29048 }, { "epoch": 0.83, "grad_norm": 6.596274737755743, "learning_rate": 7.229448835953296e-07, "loss": 0.6244, "step": 29049 }, { "epoch": 0.83, "grad_norm": 4.630958687993617, "learning_rate": 7.227046983906815e-07, "loss": 0.2663, "step": 29050 }, { "epoch": 0.83, "grad_norm": 6.012224658205044, "learning_rate": 7.224645499830768e-07, "loss": 0.7385, "step": 29051 }, { "epoch": 0.83, "grad_norm": 3.0997811109420077, "learning_rate": 7.222244383745836e-07, "loss": 0.2597, "step": 29052 }, { "epoch": 0.83, "grad_norm": 7.430513835405617, "learning_rate": 7.219843635672674e-07, "loss": 0.3577, "step": 29053 }, { "epoch": 0.83, "grad_norm": 2.5937733936403853, "learning_rate": 7.217443255631918e-07, "loss": 0.2507, "step": 29054 }, { "epoch": 0.83, "grad_norm": 15.635009534983569, "learning_rate": 7.215043243644243e-07, "loss": 0.8154, "step": 29055 }, { "epoch": 0.83, "grad_norm": 4.01225975422586, "learning_rate": 7.212643599730284e-07, "loss": 0.4473, "step": 29056 }, { "epoch": 0.83, "grad_norm": 4.192239783348816, "learning_rate": 7.210244323910681e-07, "loss": 0.2806, "step": 29057 }, { "epoch": 0.83, "grad_norm": 6.333797145055977, "learning_rate": 7.207845416206072e-07, "loss": 0.2668, "step": 29058 }, { "epoch": 0.83, "grad_norm": 3.7709287276592343, "learning_rate": 7.205446876637095e-07, "loss": 0.425, "step": 29059 }, { "epoch": 0.83, "grad_norm": 6.746918716748421, "learning_rate": 7.203048705224403e-07, "loss": 0.2274, "step": 29060 }, { "epoch": 0.83, "grad_norm": 5.146591943995607, "learning_rate": 7.200650901988598e-07, "loss": 0.5301, "step": 29061 }, { "epoch": 0.83, "grad_norm": 3.2711310807952576, "learning_rate": 7.198253466950339e-07, "loss": 0.2345, "step": 29062 }, { "epoch": 0.83, "grad_norm": 4.0512717087119015, "learning_rate": 7.195856400130235e-07, "loss": 0.4676, "step": 29063 }, { "epoch": 0.83, "grad_norm": 5.837286000546217, "learning_rate": 7.19345970154891e-07, "loss": 0.6528, "step": 29064 }, { "epoch": 0.83, "grad_norm": 3.5430437645756885, "learning_rate": 7.19106337122697e-07, "loss": 0.4099, "step": 29065 }, { "epoch": 0.83, "grad_norm": 8.61939397318961, "learning_rate": 7.188667409185051e-07, "loss": 0.8209, "step": 29066 }, { "epoch": 0.83, "grad_norm": 3.817474371943358, "learning_rate": 7.18627181544374e-07, "loss": 0.5125, "step": 29067 }, { "epoch": 0.83, "grad_norm": 3.7437503235168825, "learning_rate": 7.183876590023664e-07, "loss": 0.5034, "step": 29068 }, { "epoch": 0.83, "grad_norm": 8.98389466701728, "learning_rate": 7.181481732945438e-07, "loss": 0.2946, "step": 29069 }, { "epoch": 0.83, "grad_norm": 5.16120621584805, "learning_rate": 7.179087244229654e-07, "loss": 0.352, "step": 29070 }, { "epoch": 0.83, "grad_norm": 5.769708422792469, "learning_rate": 7.17669312389691e-07, "loss": 0.366, "step": 29071 }, { "epoch": 0.83, "grad_norm": 6.6381263789369545, "learning_rate": 7.174299371967791e-07, "loss": 0.3249, "step": 29072 }, { "epoch": 0.83, "grad_norm": 4.750363185449799, "learning_rate": 7.171905988462907e-07, "loss": 0.5838, "step": 29073 }, { "epoch": 0.83, "grad_norm": 6.884252425741972, "learning_rate": 7.169512973402836e-07, "loss": 0.1483, "step": 29074 }, { "epoch": 0.83, "grad_norm": 3.9016815326271126, "learning_rate": 7.167120326808186e-07, "loss": 0.3606, "step": 29075 }, { "epoch": 0.83, "grad_norm": 5.4162494926707705, "learning_rate": 7.16472804869952e-07, "loss": 0.2988, "step": 29076 }, { "epoch": 0.83, "grad_norm": 5.61722981698301, "learning_rate": 7.16233613909742e-07, "loss": 0.5308, "step": 29077 }, { "epoch": 0.83, "grad_norm": 4.605720286662909, "learning_rate": 7.159944598022473e-07, "loss": 0.5309, "step": 29078 }, { "epoch": 0.83, "grad_norm": 5.879240899684122, "learning_rate": 7.157553425495239e-07, "loss": 0.3378, "step": 29079 }, { "epoch": 0.83, "grad_norm": 4.389462055666709, "learning_rate": 7.15516262153631e-07, "loss": 0.2641, "step": 29080 }, { "epoch": 0.83, "grad_norm": 5.0523199465161275, "learning_rate": 7.152772186166229e-07, "loss": 0.1646, "step": 29081 }, { "epoch": 0.83, "grad_norm": 3.3000732384848694, "learning_rate": 7.150382119405586e-07, "loss": 0.2073, "step": 29082 }, { "epoch": 0.83, "grad_norm": 3.714770134839248, "learning_rate": 7.14799242127493e-07, "loss": 0.2938, "step": 29083 }, { "epoch": 0.83, "grad_norm": 8.41014961635755, "learning_rate": 7.145603091794811e-07, "loss": 0.4971, "step": 29084 }, { "epoch": 0.83, "grad_norm": 2.740516544749362, "learning_rate": 7.143214130985799e-07, "loss": 0.2246, "step": 29085 }, { "epoch": 0.83, "grad_norm": 5.040735862917573, "learning_rate": 7.140825538868429e-07, "loss": 0.5144, "step": 29086 }, { "epoch": 0.83, "grad_norm": 10.071723383402349, "learning_rate": 7.13843731546327e-07, "loss": 0.4517, "step": 29087 }, { "epoch": 0.83, "grad_norm": 7.56274716312269, "learning_rate": 7.136049460790845e-07, "loss": 0.6053, "step": 29088 }, { "epoch": 0.83, "grad_norm": 5.082034393239204, "learning_rate": 7.133661974871725e-07, "loss": 0.3464, "step": 29089 }, { "epoch": 0.83, "grad_norm": 4.870030902827166, "learning_rate": 7.131274857726428e-07, "loss": 0.4229, "step": 29090 }, { "epoch": 0.83, "grad_norm": 4.624980720273634, "learning_rate": 7.128888109375498e-07, "loss": 0.6463, "step": 29091 }, { "epoch": 0.83, "grad_norm": 5.425737483793516, "learning_rate": 7.126501729839458e-07, "loss": 0.27, "step": 29092 }, { "epoch": 0.83, "grad_norm": 1.2166923306591124, "learning_rate": 7.124115719138835e-07, "loss": 0.0783, "step": 29093 }, { "epoch": 0.83, "grad_norm": 5.743813918861158, "learning_rate": 7.121730077294187e-07, "loss": 0.645, "step": 29094 }, { "epoch": 0.83, "grad_norm": 7.654932648345641, "learning_rate": 7.119344804326001e-07, "loss": 0.5822, "step": 29095 }, { "epoch": 0.83, "grad_norm": 48.828304814122035, "learning_rate": 7.116959900254822e-07, "loss": 0.7704, "step": 29096 }, { "epoch": 0.83, "grad_norm": 5.311229374343423, "learning_rate": 7.114575365101156e-07, "loss": 0.4057, "step": 29097 }, { "epoch": 0.83, "grad_norm": 4.913254275667204, "learning_rate": 7.112191198885515e-07, "loss": 0.3989, "step": 29098 }, { "epoch": 0.83, "grad_norm": 7.468825910493711, "learning_rate": 7.10980740162841e-07, "loss": 0.5648, "step": 29099 }, { "epoch": 0.83, "grad_norm": 5.325690376051483, "learning_rate": 7.107423973350347e-07, "loss": 0.2369, "step": 29100 }, { "epoch": 0.83, "grad_norm": 2.4710881592852836, "learning_rate": 7.105040914071842e-07, "loss": 0.1597, "step": 29101 }, { "epoch": 0.83, "grad_norm": 5.773940336761516, "learning_rate": 7.102658223813391e-07, "loss": 0.737, "step": 29102 }, { "epoch": 0.83, "grad_norm": 7.769349339169448, "learning_rate": 7.100275902595493e-07, "loss": 0.5812, "step": 29103 }, { "epoch": 0.83, "grad_norm": 5.973364958246562, "learning_rate": 7.097893950438628e-07, "loss": 0.6672, "step": 29104 }, { "epoch": 0.83, "grad_norm": 3.978905937214286, "learning_rate": 7.095512367363305e-07, "loss": 0.4467, "step": 29105 }, { "epoch": 0.83, "grad_norm": 8.19423741942997, "learning_rate": 7.093131153389998e-07, "loss": 0.8734, "step": 29106 }, { "epoch": 0.83, "grad_norm": 3.6515909490101466, "learning_rate": 7.090750308539213e-07, "loss": 0.3714, "step": 29107 }, { "epoch": 0.83, "grad_norm": 6.462192436587903, "learning_rate": 7.08836983283141e-07, "loss": 0.2689, "step": 29108 }, { "epoch": 0.83, "grad_norm": 5.390401730543874, "learning_rate": 7.08598972628709e-07, "loss": 0.6174, "step": 29109 }, { "epoch": 0.83, "grad_norm": 5.084840014481269, "learning_rate": 7.083609988926716e-07, "loss": 0.2789, "step": 29110 }, { "epoch": 0.83, "grad_norm": 7.1764944321228, "learning_rate": 7.081230620770746e-07, "loss": 0.3704, "step": 29111 }, { "epoch": 0.83, "grad_norm": 5.5358508958968144, "learning_rate": 7.078851621839677e-07, "loss": 0.4071, "step": 29112 }, { "epoch": 0.83, "grad_norm": 13.13756954853439, "learning_rate": 7.076472992153955e-07, "loss": 0.4539, "step": 29113 }, { "epoch": 0.83, "grad_norm": 4.180226628123838, "learning_rate": 7.07409473173406e-07, "loss": 0.5036, "step": 29114 }, { "epoch": 0.83, "grad_norm": 7.5791022545620415, "learning_rate": 7.071716840600428e-07, "loss": 0.4211, "step": 29115 }, { "epoch": 0.83, "grad_norm": 13.038829936244293, "learning_rate": 7.069339318773549e-07, "loss": 0.5092, "step": 29116 }, { "epoch": 0.83, "grad_norm": 3.0457293581838853, "learning_rate": 7.066962166273855e-07, "loss": 0.1889, "step": 29117 }, { "epoch": 0.83, "grad_norm": 13.269573006102428, "learning_rate": 7.064585383121786e-07, "loss": 0.3956, "step": 29118 }, { "epoch": 0.83, "grad_norm": 3.012413212012764, "learning_rate": 7.062208969337814e-07, "loss": 0.1333, "step": 29119 }, { "epoch": 0.83, "grad_norm": 8.290680607833176, "learning_rate": 7.059832924942362e-07, "loss": 0.5854, "step": 29120 }, { "epoch": 0.83, "grad_norm": 3.0379536623531807, "learning_rate": 7.05745724995589e-07, "loss": 0.4405, "step": 29121 }, { "epoch": 0.83, "grad_norm": 4.20891028248301, "learning_rate": 7.055081944398817e-07, "loss": 0.2187, "step": 29122 }, { "epoch": 0.83, "grad_norm": 6.895416427717721, "learning_rate": 7.052707008291599e-07, "loss": 0.3374, "step": 29123 }, { "epoch": 0.83, "grad_norm": 3.3116737270871908, "learning_rate": 7.05033244165465e-07, "loss": 0.3653, "step": 29124 }, { "epoch": 0.83, "grad_norm": 2.187651193025917, "learning_rate": 7.047958244508396e-07, "loss": 0.1649, "step": 29125 }, { "epoch": 0.83, "grad_norm": 3.2212822961872076, "learning_rate": 7.04558441687328e-07, "loss": 0.2026, "step": 29126 }, { "epoch": 0.83, "grad_norm": 5.76803951827121, "learning_rate": 7.043210958769703e-07, "loss": 0.5411, "step": 29127 }, { "epoch": 0.83, "grad_norm": 6.620066911411096, "learning_rate": 7.040837870218098e-07, "loss": 0.3748, "step": 29128 }, { "epoch": 0.83, "grad_norm": 14.487133532966904, "learning_rate": 7.038465151238866e-07, "loss": 1.0196, "step": 29129 }, { "epoch": 0.83, "grad_norm": 5.837936487971717, "learning_rate": 7.036092801852457e-07, "loss": 0.4542, "step": 29130 }, { "epoch": 0.83, "grad_norm": 3.809889227791216, "learning_rate": 7.033720822079221e-07, "loss": 0.2367, "step": 29131 }, { "epoch": 0.83, "grad_norm": 6.890647421041328, "learning_rate": 7.031349211939615e-07, "loss": 0.3622, "step": 29132 }, { "epoch": 0.83, "grad_norm": 9.604257203928316, "learning_rate": 7.028977971454004e-07, "loss": 0.5937, "step": 29133 }, { "epoch": 0.83, "grad_norm": 9.566369912953524, "learning_rate": 7.026607100642807e-07, "loss": 0.7348, "step": 29134 }, { "epoch": 0.83, "grad_norm": 7.089058757442154, "learning_rate": 7.024236599526424e-07, "loss": 0.7078, "step": 29135 }, { "epoch": 0.83, "grad_norm": 9.18582252187807, "learning_rate": 7.021866468125249e-07, "loss": 0.8777, "step": 29136 }, { "epoch": 0.83, "grad_norm": 1.7401416295744032, "learning_rate": 7.01949670645966e-07, "loss": 0.0961, "step": 29137 }, { "epoch": 0.83, "grad_norm": 4.295604837126904, "learning_rate": 7.017127314550043e-07, "loss": 0.47, "step": 29138 }, { "epoch": 0.83, "grad_norm": 3.8290624599508156, "learning_rate": 7.014758292416795e-07, "loss": 0.2181, "step": 29139 }, { "epoch": 0.83, "grad_norm": 8.907503809428892, "learning_rate": 7.01238964008028e-07, "loss": 0.5553, "step": 29140 }, { "epoch": 0.83, "grad_norm": 3.547650391104035, "learning_rate": 7.010021357560881e-07, "loss": 0.4287, "step": 29141 }, { "epoch": 0.83, "grad_norm": 9.266941202222709, "learning_rate": 7.007653444878987e-07, "loss": 0.6043, "step": 29142 }, { "epoch": 0.83, "grad_norm": 4.968516506250953, "learning_rate": 7.005285902054959e-07, "loss": 0.5517, "step": 29143 }, { "epoch": 0.83, "grad_norm": 5.9834220665361935, "learning_rate": 7.002918729109165e-07, "loss": 0.2472, "step": 29144 }, { "epoch": 0.83, "grad_norm": 5.522518356658834, "learning_rate": 7.00055192606195e-07, "loss": 0.4695, "step": 29145 }, { "epoch": 0.83, "grad_norm": 6.008205524977577, "learning_rate": 6.998185492933707e-07, "loss": 0.4628, "step": 29146 }, { "epoch": 0.83, "grad_norm": 2.992026302288335, "learning_rate": 6.995819429744771e-07, "loss": 0.2635, "step": 29147 }, { "epoch": 0.83, "grad_norm": 4.350565989712966, "learning_rate": 6.993453736515515e-07, "loss": 0.2956, "step": 29148 }, { "epoch": 0.83, "grad_norm": 6.290059758131417, "learning_rate": 6.991088413266267e-07, "loss": 0.3401, "step": 29149 }, { "epoch": 0.83, "grad_norm": 7.013385032794442, "learning_rate": 6.988723460017405e-07, "loss": 0.6323, "step": 29150 }, { "epoch": 0.83, "grad_norm": 3.9139734436989935, "learning_rate": 6.986358876789256e-07, "loss": 0.2573, "step": 29151 }, { "epoch": 0.83, "grad_norm": 10.436682297889734, "learning_rate": 6.983994663602156e-07, "loss": 1.1457, "step": 29152 }, { "epoch": 0.83, "grad_norm": 4.534710003750542, "learning_rate": 6.981630820476465e-07, "loss": 0.3801, "step": 29153 }, { "epoch": 0.83, "grad_norm": 7.558757423383355, "learning_rate": 6.9792673474325e-07, "loss": 0.4822, "step": 29154 }, { "epoch": 0.83, "grad_norm": 2.6234198310079826, "learning_rate": 6.976904244490607e-07, "loss": 0.4137, "step": 29155 }, { "epoch": 0.83, "grad_norm": 4.719722622302049, "learning_rate": 6.974541511671101e-07, "loss": 0.3496, "step": 29156 }, { "epoch": 0.83, "grad_norm": 4.1985243112201625, "learning_rate": 6.972179148994341e-07, "loss": 0.2887, "step": 29157 }, { "epoch": 0.84, "grad_norm": 9.079239072740009, "learning_rate": 6.969817156480596e-07, "loss": 0.1803, "step": 29158 }, { "epoch": 0.84, "grad_norm": 4.963306972509663, "learning_rate": 6.967455534150224e-07, "loss": 0.2861, "step": 29159 }, { "epoch": 0.84, "grad_norm": 3.880934754375356, "learning_rate": 6.965094282023544e-07, "loss": 0.3287, "step": 29160 }, { "epoch": 0.84, "grad_norm": 9.899089210691704, "learning_rate": 6.962733400120842e-07, "loss": 0.372, "step": 29161 }, { "epoch": 0.84, "grad_norm": 5.086230619688464, "learning_rate": 6.960372888462463e-07, "loss": 0.8715, "step": 29162 }, { "epoch": 0.84, "grad_norm": 8.030020727342242, "learning_rate": 6.958012747068694e-07, "loss": 0.4083, "step": 29163 }, { "epoch": 0.84, "grad_norm": 4.100210435630722, "learning_rate": 6.955652975959842e-07, "loss": 0.3314, "step": 29164 }, { "epoch": 0.84, "grad_norm": 4.38115142515468, "learning_rate": 6.9532935751562e-07, "loss": 0.2888, "step": 29165 }, { "epoch": 0.84, "grad_norm": 4.377824988422584, "learning_rate": 6.950934544678068e-07, "loss": 0.4453, "step": 29166 }, { "epoch": 0.84, "grad_norm": 7.650234864562734, "learning_rate": 6.948575884545761e-07, "loss": 0.6314, "step": 29167 }, { "epoch": 0.84, "grad_norm": 8.652007678116254, "learning_rate": 6.946217594779542e-07, "loss": 0.185, "step": 29168 }, { "epoch": 0.84, "grad_norm": 5.022150660926787, "learning_rate": 6.943859675399723e-07, "loss": 0.6007, "step": 29169 }, { "epoch": 0.84, "grad_norm": 7.462829925042665, "learning_rate": 6.941502126426581e-07, "loss": 0.2647, "step": 29170 }, { "epoch": 0.84, "grad_norm": 3.0355282313768757, "learning_rate": 6.939144947880394e-07, "loss": 0.3156, "step": 29171 }, { "epoch": 0.84, "grad_norm": 6.587565301213173, "learning_rate": 6.936788139781426e-07, "loss": 0.4769, "step": 29172 }, { "epoch": 0.84, "grad_norm": 6.275322465855924, "learning_rate": 6.934431702149979e-07, "loss": 0.3704, "step": 29173 }, { "epoch": 0.84, "grad_norm": 2.3384678892011257, "learning_rate": 6.932075635006308e-07, "loss": 0.1354, "step": 29174 }, { "epoch": 0.84, "grad_norm": 3.300229052339393, "learning_rate": 6.929719938370683e-07, "loss": 0.1996, "step": 29175 }, { "epoch": 0.84, "grad_norm": 2.153806310638207, "learning_rate": 6.927364612263387e-07, "loss": 0.1154, "step": 29176 }, { "epoch": 0.84, "grad_norm": 5.383918283213655, "learning_rate": 6.925009656704673e-07, "loss": 0.5985, "step": 29177 }, { "epoch": 0.84, "grad_norm": 8.491209870525378, "learning_rate": 6.92265507171479e-07, "loss": 0.6275, "step": 29178 }, { "epoch": 0.84, "grad_norm": 4.795647168903076, "learning_rate": 6.920300857314e-07, "loss": 0.4793, "step": 29179 }, { "epoch": 0.84, "grad_norm": 5.653878615858546, "learning_rate": 6.917947013522564e-07, "loss": 0.6016, "step": 29180 }, { "epoch": 0.84, "grad_norm": 5.831462589653177, "learning_rate": 6.91559354036071e-07, "loss": 0.4688, "step": 29181 }, { "epoch": 0.84, "grad_norm": 5.423998502061575, "learning_rate": 6.913240437848718e-07, "loss": 0.665, "step": 29182 }, { "epoch": 0.84, "grad_norm": 2.887835109966045, "learning_rate": 6.910887706006797e-07, "loss": 0.1375, "step": 29183 }, { "epoch": 0.84, "grad_norm": 3.4540801647538375, "learning_rate": 6.908535344855216e-07, "loss": 0.4225, "step": 29184 }, { "epoch": 0.84, "grad_norm": 3.3392697024999705, "learning_rate": 6.906183354414203e-07, "loss": 0.3709, "step": 29185 }, { "epoch": 0.84, "grad_norm": 6.482928011203711, "learning_rate": 6.903831734703976e-07, "loss": 0.5005, "step": 29186 }, { "epoch": 0.84, "grad_norm": 11.001652420081724, "learning_rate": 6.901480485744788e-07, "loss": 0.6792, "step": 29187 }, { "epoch": 0.84, "grad_norm": 2.6591766166879416, "learning_rate": 6.899129607556848e-07, "loss": 0.217, "step": 29188 }, { "epoch": 0.84, "grad_norm": 12.107863793367223, "learning_rate": 6.8967791001604e-07, "loss": 0.7, "step": 29189 }, { "epoch": 0.84, "grad_norm": 3.6225549410174165, "learning_rate": 6.894428963575655e-07, "loss": 0.3159, "step": 29190 }, { "epoch": 0.84, "grad_norm": 6.155237879552815, "learning_rate": 6.892079197822821e-07, "loss": 0.601, "step": 29191 }, { "epoch": 0.84, "grad_norm": 3.3751313925704074, "learning_rate": 6.889729802922129e-07, "loss": 0.2669, "step": 29192 }, { "epoch": 0.84, "grad_norm": 3.916964264311038, "learning_rate": 6.887380778893771e-07, "loss": 0.2065, "step": 29193 }, { "epoch": 0.84, "grad_norm": 3.2857094932752213, "learning_rate": 6.885032125757984e-07, "loss": 0.1887, "step": 29194 }, { "epoch": 0.84, "grad_norm": 4.146897240171609, "learning_rate": 6.882683843534943e-07, "loss": 0.2216, "step": 29195 }, { "epoch": 0.84, "grad_norm": 7.135727486256581, "learning_rate": 6.88033593224488e-07, "loss": 0.4875, "step": 29196 }, { "epoch": 0.84, "grad_norm": 4.1877562316389065, "learning_rate": 6.877988391907975e-07, "loss": 0.6681, "step": 29197 }, { "epoch": 0.84, "grad_norm": 4.71907284876393, "learning_rate": 6.875641222544421e-07, "loss": 0.4948, "step": 29198 }, { "epoch": 0.84, "grad_norm": 3.6247636126033678, "learning_rate": 6.87329442417441e-07, "loss": 0.5238, "step": 29199 }, { "epoch": 0.84, "grad_norm": 5.339762666835931, "learning_rate": 6.870947996818139e-07, "loss": 0.4457, "step": 29200 }, { "epoch": 0.84, "grad_norm": 3.707452973153062, "learning_rate": 6.868601940495801e-07, "loss": 0.2016, "step": 29201 }, { "epoch": 0.84, "grad_norm": 14.43368737547865, "learning_rate": 6.86625625522756e-07, "loss": 0.4501, "step": 29202 }, { "epoch": 0.84, "grad_norm": 3.268068120669, "learning_rate": 6.863910941033613e-07, "loss": 0.4431, "step": 29203 }, { "epoch": 0.84, "grad_norm": 3.915164997818616, "learning_rate": 6.861565997934133e-07, "loss": 0.2273, "step": 29204 }, { "epoch": 0.84, "grad_norm": 7.775351927909388, "learning_rate": 6.859221425949292e-07, "loss": 0.5514, "step": 29205 }, { "epoch": 0.84, "grad_norm": 6.030773914897138, "learning_rate": 6.85687722509924e-07, "loss": 0.4845, "step": 29206 }, { "epoch": 0.84, "grad_norm": 11.271992126143976, "learning_rate": 6.854533395404161e-07, "loss": 0.5914, "step": 29207 }, { "epoch": 0.84, "grad_norm": 4.9153703850381385, "learning_rate": 6.852189936884235e-07, "loss": 0.5866, "step": 29208 }, { "epoch": 0.84, "grad_norm": 8.845492888799402, "learning_rate": 6.849846849559594e-07, "loss": 0.5162, "step": 29209 }, { "epoch": 0.84, "grad_norm": 6.056839138543239, "learning_rate": 6.847504133450416e-07, "loss": 0.3886, "step": 29210 }, { "epoch": 0.84, "grad_norm": 5.341117344232692, "learning_rate": 6.845161788576854e-07, "loss": 0.5501, "step": 29211 }, { "epoch": 0.84, "grad_norm": 5.266319025412636, "learning_rate": 6.842819814959046e-07, "loss": 0.5451, "step": 29212 }, { "epoch": 0.84, "grad_norm": 3.3996911553749727, "learning_rate": 6.840478212617136e-07, "loss": 0.2138, "step": 29213 }, { "epoch": 0.84, "grad_norm": 2.8644137153114944, "learning_rate": 6.838136981571286e-07, "loss": 0.3179, "step": 29214 }, { "epoch": 0.84, "grad_norm": 2.594456863600499, "learning_rate": 6.835796121841621e-07, "loss": 0.2138, "step": 29215 }, { "epoch": 0.84, "grad_norm": 5.447500262809702, "learning_rate": 6.833455633448288e-07, "loss": 0.4883, "step": 29216 }, { "epoch": 0.84, "grad_norm": 2.596676336188017, "learning_rate": 6.831115516411441e-07, "loss": 0.0833, "step": 29217 }, { "epoch": 0.84, "grad_norm": 4.979377226055151, "learning_rate": 6.828775770751172e-07, "loss": 0.3236, "step": 29218 }, { "epoch": 0.84, "grad_norm": 3.932467135457569, "learning_rate": 6.82643639648764e-07, "loss": 0.2669, "step": 29219 }, { "epoch": 0.84, "grad_norm": 6.570490885704506, "learning_rate": 6.82409739364095e-07, "loss": 0.372, "step": 29220 }, { "epoch": 0.84, "grad_norm": 4.548351598653792, "learning_rate": 6.821758762231245e-07, "loss": 0.6696, "step": 29221 }, { "epoch": 0.84, "grad_norm": 7.114599500631242, "learning_rate": 6.819420502278623e-07, "loss": 0.6686, "step": 29222 }, { "epoch": 0.84, "grad_norm": 8.62314696378068, "learning_rate": 6.817082613803222e-07, "loss": 1.1079, "step": 29223 }, { "epoch": 0.84, "grad_norm": 5.1312540089153655, "learning_rate": 6.814745096825138e-07, "loss": 0.5596, "step": 29224 }, { "epoch": 0.84, "grad_norm": 5.310825263439787, "learning_rate": 6.812407951364475e-07, "loss": 0.3839, "step": 29225 }, { "epoch": 0.84, "grad_norm": 5.40927222565811, "learning_rate": 6.810071177441363e-07, "loss": 0.6289, "step": 29226 }, { "epoch": 0.84, "grad_norm": 7.715048083725992, "learning_rate": 6.807734775075875e-07, "loss": 0.5775, "step": 29227 }, { "epoch": 0.84, "grad_norm": 3.1037215974132684, "learning_rate": 6.805398744288145e-07, "loss": 0.1406, "step": 29228 }, { "epoch": 0.84, "grad_norm": 3.1709938140707754, "learning_rate": 6.803063085098233e-07, "loss": 0.2118, "step": 29229 }, { "epoch": 0.84, "grad_norm": 7.475824620618099, "learning_rate": 6.800727797526264e-07, "loss": 0.7548, "step": 29230 }, { "epoch": 0.84, "grad_norm": 6.515304886892481, "learning_rate": 6.798392881592314e-07, "loss": 0.7419, "step": 29231 }, { "epoch": 0.84, "grad_norm": 6.203823636394479, "learning_rate": 6.796058337316463e-07, "loss": 0.4781, "step": 29232 }, { "epoch": 0.84, "grad_norm": 4.875977344845221, "learning_rate": 6.793724164718812e-07, "loss": 0.5292, "step": 29233 }, { "epoch": 0.84, "grad_norm": 4.013708406825078, "learning_rate": 6.791390363819422e-07, "loss": 0.2975, "step": 29234 }, { "epoch": 0.84, "grad_norm": 8.068572249105667, "learning_rate": 6.789056934638393e-07, "loss": 1.4319, "step": 29235 }, { "epoch": 0.84, "grad_norm": 5.853177834438841, "learning_rate": 6.786723877195772e-07, "loss": 0.2408, "step": 29236 }, { "epoch": 0.84, "grad_norm": 7.181506170446396, "learning_rate": 6.784391191511658e-07, "loss": 0.9794, "step": 29237 }, { "epoch": 0.84, "grad_norm": 8.31519853200072, "learning_rate": 6.782058877606107e-07, "loss": 0.4181, "step": 29238 }, { "epoch": 0.84, "grad_norm": 4.770613913786192, "learning_rate": 6.779726935499181e-07, "loss": 0.5641, "step": 29239 }, { "epoch": 0.84, "grad_norm": 6.736365402529979, "learning_rate": 6.777395365210931e-07, "loss": 0.482, "step": 29240 }, { "epoch": 0.84, "grad_norm": 8.009817119991899, "learning_rate": 6.77506416676143e-07, "loss": 0.4745, "step": 29241 }, { "epoch": 0.84, "grad_norm": 5.23318918944842, "learning_rate": 6.77273334017074e-07, "loss": 0.332, "step": 29242 }, { "epoch": 0.84, "grad_norm": 5.558301905934459, "learning_rate": 6.770402885458888e-07, "loss": 0.6527, "step": 29243 }, { "epoch": 0.84, "grad_norm": 5.9070777514862005, "learning_rate": 6.768072802645964e-07, "loss": 0.3124, "step": 29244 }, { "epoch": 0.84, "grad_norm": 1.9136254259224021, "learning_rate": 6.765743091751958e-07, "loss": 0.2496, "step": 29245 }, { "epoch": 0.84, "grad_norm": 3.533183471557547, "learning_rate": 6.76341375279696e-07, "loss": 0.2438, "step": 29246 }, { "epoch": 0.84, "grad_norm": 4.40429590060272, "learning_rate": 6.761084785800975e-07, "loss": 0.225, "step": 29247 }, { "epoch": 0.84, "grad_norm": 6.152782548061078, "learning_rate": 6.758756190784055e-07, "loss": 0.3843, "step": 29248 }, { "epoch": 0.84, "grad_norm": 3.9199688295176087, "learning_rate": 6.756427967766244e-07, "loss": 0.4367, "step": 29249 }, { "epoch": 0.84, "grad_norm": 8.237933148354792, "learning_rate": 6.754100116767553e-07, "loss": 0.4078, "step": 29250 }, { "epoch": 0.84, "grad_norm": 5.174920813097455, "learning_rate": 6.751772637808019e-07, "loss": 0.3501, "step": 29251 }, { "epoch": 0.84, "grad_norm": 5.8538455354664585, "learning_rate": 6.74944553090765e-07, "loss": 0.4884, "step": 29252 }, { "epoch": 0.84, "grad_norm": 5.794203294224675, "learning_rate": 6.747118796086488e-07, "loss": 0.3391, "step": 29253 }, { "epoch": 0.84, "grad_norm": 3.963519607195161, "learning_rate": 6.744792433364522e-07, "loss": 0.4482, "step": 29254 }, { "epoch": 0.84, "grad_norm": 3.7833990342713433, "learning_rate": 6.742466442761792e-07, "loss": 0.533, "step": 29255 }, { "epoch": 0.84, "grad_norm": 9.92315257099913, "learning_rate": 6.740140824298292e-07, "loss": 0.5663, "step": 29256 }, { "epoch": 0.84, "grad_norm": 4.1638902123972015, "learning_rate": 6.737815577994039e-07, "loss": 0.221, "step": 29257 }, { "epoch": 0.84, "grad_norm": 7.693974412485347, "learning_rate": 6.735490703869036e-07, "loss": 0.6656, "step": 29258 }, { "epoch": 0.84, "grad_norm": 4.523215991774978, "learning_rate": 6.733166201943264e-07, "loss": 0.3592, "step": 29259 }, { "epoch": 0.84, "grad_norm": 7.179468128725773, "learning_rate": 6.730842072236754e-07, "loss": 0.8014, "step": 29260 }, { "epoch": 0.84, "grad_norm": 3.6515429592956377, "learning_rate": 6.728518314769467e-07, "loss": 0.1067, "step": 29261 }, { "epoch": 0.84, "grad_norm": 4.490547292059567, "learning_rate": 6.726194929561419e-07, "loss": 0.235, "step": 29262 }, { "epoch": 0.84, "grad_norm": 4.0173552826392465, "learning_rate": 6.72387191663258e-07, "loss": 0.2989, "step": 29263 }, { "epoch": 0.84, "grad_norm": 6.916548666656039, "learning_rate": 6.721549276002948e-07, "loss": 0.5631, "step": 29264 }, { "epoch": 0.84, "grad_norm": 5.022424195222157, "learning_rate": 6.719227007692503e-07, "loss": 0.3519, "step": 29265 }, { "epoch": 0.84, "grad_norm": 4.46708594197089, "learning_rate": 6.716905111721212e-07, "loss": 0.2127, "step": 29266 }, { "epoch": 0.84, "grad_norm": 6.797297462400656, "learning_rate": 6.714583588109063e-07, "loss": 0.7813, "step": 29267 }, { "epoch": 0.84, "grad_norm": 6.428275452718288, "learning_rate": 6.712262436876011e-07, "loss": 0.5453, "step": 29268 }, { "epoch": 0.84, "grad_norm": 5.050593987081616, "learning_rate": 6.709941658042047e-07, "loss": 0.3297, "step": 29269 }, { "epoch": 0.84, "grad_norm": 5.6741425156281275, "learning_rate": 6.707621251627117e-07, "loss": 0.2336, "step": 29270 }, { "epoch": 0.84, "grad_norm": 8.225997919922206, "learning_rate": 6.7053012176512e-07, "loss": 1.0041, "step": 29271 }, { "epoch": 0.84, "grad_norm": 3.3634882917488182, "learning_rate": 6.702981556134247e-07, "loss": 0.3555, "step": 29272 }, { "epoch": 0.84, "grad_norm": 4.677309071871196, "learning_rate": 6.700662267096198e-07, "loss": 0.4918, "step": 29273 }, { "epoch": 0.84, "grad_norm": 11.108457674979416, "learning_rate": 6.698343350557035e-07, "loss": 0.6787, "step": 29274 }, { "epoch": 0.84, "grad_norm": 3.993580494896775, "learning_rate": 6.696024806536677e-07, "loss": 0.312, "step": 29275 }, { "epoch": 0.84, "grad_norm": 5.899847284053943, "learning_rate": 6.693706635055103e-07, "loss": 0.4844, "step": 29276 }, { "epoch": 0.84, "grad_norm": 4.323949348858816, "learning_rate": 6.691388836132235e-07, "loss": 0.5088, "step": 29277 }, { "epoch": 0.84, "grad_norm": 13.894422735005513, "learning_rate": 6.689071409788018e-07, "loss": 0.2811, "step": 29278 }, { "epoch": 0.84, "grad_norm": 3.9141020934639763, "learning_rate": 6.686754356042379e-07, "loss": 0.2271, "step": 29279 }, { "epoch": 0.84, "grad_norm": 8.6846150609756, "learning_rate": 6.684437674915267e-07, "loss": 0.4558, "step": 29280 }, { "epoch": 0.84, "grad_norm": 4.681995924229662, "learning_rate": 6.682121366426591e-07, "loss": 0.6048, "step": 29281 }, { "epoch": 0.84, "grad_norm": 9.208893708544263, "learning_rate": 6.679805430596298e-07, "loss": 0.5177, "step": 29282 }, { "epoch": 0.84, "grad_norm": 1.1549566613360494, "learning_rate": 6.677489867444309e-07, "loss": 0.073, "step": 29283 }, { "epoch": 0.84, "grad_norm": 4.66921890947612, "learning_rate": 6.675174676990548e-07, "loss": 0.2931, "step": 29284 }, { "epoch": 0.84, "grad_norm": 1.5057517284109148, "learning_rate": 6.672859859254921e-07, "loss": 0.0902, "step": 29285 }, { "epoch": 0.84, "grad_norm": 4.67926614485198, "learning_rate": 6.670545414257335e-07, "loss": 0.5153, "step": 29286 }, { "epoch": 0.84, "grad_norm": 6.982322852226164, "learning_rate": 6.668231342017722e-07, "loss": 0.684, "step": 29287 }, { "epoch": 0.84, "grad_norm": 6.671504151090348, "learning_rate": 6.665917642555969e-07, "loss": 0.5822, "step": 29288 }, { "epoch": 0.84, "grad_norm": 3.6737197462499527, "learning_rate": 6.663604315891992e-07, "loss": 0.3499, "step": 29289 }, { "epoch": 0.84, "grad_norm": 4.712313171245006, "learning_rate": 6.661291362045702e-07, "loss": 0.342, "step": 29290 }, { "epoch": 0.84, "grad_norm": 4.845784197017259, "learning_rate": 6.658978781036984e-07, "loss": 0.4004, "step": 29291 }, { "epoch": 0.84, "grad_norm": 6.337306579106126, "learning_rate": 6.65666657288574e-07, "loss": 0.7082, "step": 29292 }, { "epoch": 0.84, "grad_norm": 0.7169537489523474, "learning_rate": 6.654354737611845e-07, "loss": 0.039, "step": 29293 }, { "epoch": 0.84, "grad_norm": 5.163510176169042, "learning_rate": 6.652043275235209e-07, "loss": 0.2604, "step": 29294 }, { "epoch": 0.84, "grad_norm": 3.4259720056506877, "learning_rate": 6.649732185775698e-07, "loss": 0.5906, "step": 29295 }, { "epoch": 0.84, "grad_norm": 8.600439041596738, "learning_rate": 6.647421469253212e-07, "loss": 0.5433, "step": 29296 }, { "epoch": 0.84, "grad_norm": 4.990640857363203, "learning_rate": 6.645111125687608e-07, "loss": 0.1918, "step": 29297 }, { "epoch": 0.84, "grad_norm": 5.496987341361193, "learning_rate": 6.642801155098793e-07, "loss": 0.3323, "step": 29298 }, { "epoch": 0.84, "grad_norm": 9.372927398460696, "learning_rate": 6.640491557506618e-07, "loss": 0.7137, "step": 29299 }, { "epoch": 0.84, "grad_norm": 3.943421369483799, "learning_rate": 6.638182332930942e-07, "loss": 0.1642, "step": 29300 }, { "epoch": 0.84, "grad_norm": 4.106448331598177, "learning_rate": 6.635873481391659e-07, "loss": 0.2943, "step": 29301 }, { "epoch": 0.84, "grad_norm": 4.864242054351419, "learning_rate": 6.633565002908604e-07, "loss": 0.3814, "step": 29302 }, { "epoch": 0.84, "grad_norm": 5.3009978933907655, "learning_rate": 6.631256897501664e-07, "loss": 0.2206, "step": 29303 }, { "epoch": 0.84, "grad_norm": 4.60218946657743, "learning_rate": 6.62894916519068e-07, "loss": 0.6349, "step": 29304 }, { "epoch": 0.84, "grad_norm": 10.777818482006417, "learning_rate": 6.626641805995504e-07, "loss": 0.4184, "step": 29305 }, { "epoch": 0.84, "grad_norm": 8.993998221762196, "learning_rate": 6.624334819935979e-07, "loss": 0.5484, "step": 29306 }, { "epoch": 0.84, "grad_norm": 4.71145707992862, "learning_rate": 6.622028207031961e-07, "loss": 0.3414, "step": 29307 }, { "epoch": 0.84, "grad_norm": 13.494997934551158, "learning_rate": 6.619721967303306e-07, "loss": 0.3529, "step": 29308 }, { "epoch": 0.84, "grad_norm": 8.061192606116677, "learning_rate": 6.61741610076983e-07, "loss": 0.8363, "step": 29309 }, { "epoch": 0.84, "grad_norm": 4.035115008221902, "learning_rate": 6.615110607451392e-07, "loss": 0.1936, "step": 29310 }, { "epoch": 0.84, "grad_norm": 6.220475767283585, "learning_rate": 6.612805487367818e-07, "loss": 0.3749, "step": 29311 }, { "epoch": 0.84, "grad_norm": 4.414170552509411, "learning_rate": 6.610500740538933e-07, "loss": 0.3416, "step": 29312 }, { "epoch": 0.84, "grad_norm": 5.61589067156521, "learning_rate": 6.608196366984559e-07, "loss": 0.7158, "step": 29313 }, { "epoch": 0.84, "grad_norm": 3.5301712211278256, "learning_rate": 6.605892366724526e-07, "loss": 0.2816, "step": 29314 }, { "epoch": 0.84, "grad_norm": 9.520783678143744, "learning_rate": 6.603588739778671e-07, "loss": 1.0304, "step": 29315 }, { "epoch": 0.84, "grad_norm": 12.568996400850672, "learning_rate": 6.601285486166792e-07, "loss": 0.655, "step": 29316 }, { "epoch": 0.84, "grad_norm": 3.6314680667603993, "learning_rate": 6.598982605908716e-07, "loss": 0.2297, "step": 29317 }, { "epoch": 0.84, "grad_norm": 6.454355081065938, "learning_rate": 6.59668009902425e-07, "loss": 0.2865, "step": 29318 }, { "epoch": 0.84, "grad_norm": 2.7662923837371904, "learning_rate": 6.594377965533205e-07, "loss": 0.1492, "step": 29319 }, { "epoch": 0.84, "grad_norm": 5.715412395813405, "learning_rate": 6.592076205455366e-07, "loss": 0.6036, "step": 29320 }, { "epoch": 0.84, "grad_norm": 5.277332523314241, "learning_rate": 6.589774818810568e-07, "loss": 0.5054, "step": 29321 }, { "epoch": 0.84, "grad_norm": 5.26289867603516, "learning_rate": 6.587473805618577e-07, "loss": 0.3641, "step": 29322 }, { "epoch": 0.84, "grad_norm": 1.8618952516617757, "learning_rate": 6.585173165899206e-07, "loss": 0.1462, "step": 29323 }, { "epoch": 0.84, "grad_norm": 5.969592768902925, "learning_rate": 6.582872899672249e-07, "loss": 0.626, "step": 29324 }, { "epoch": 0.84, "grad_norm": 4.792710500162695, "learning_rate": 6.580573006957497e-07, "loss": 0.381, "step": 29325 }, { "epoch": 0.84, "grad_norm": 8.949901459860037, "learning_rate": 6.578273487774728e-07, "loss": 0.5972, "step": 29326 }, { "epoch": 0.84, "grad_norm": 6.526775707500361, "learning_rate": 6.575974342143715e-07, "loss": 0.8324, "step": 29327 }, { "epoch": 0.84, "grad_norm": 4.758299353233816, "learning_rate": 6.573675570084265e-07, "loss": 0.2238, "step": 29328 }, { "epoch": 0.84, "grad_norm": 6.4050840107562195, "learning_rate": 6.571377171616117e-07, "loss": 0.7636, "step": 29329 }, { "epoch": 0.84, "grad_norm": 2.8605680426106517, "learning_rate": 6.569079146759072e-07, "loss": 0.2178, "step": 29330 }, { "epoch": 0.84, "grad_norm": 9.804739537803888, "learning_rate": 6.566781495532898e-07, "loss": 0.7027, "step": 29331 }, { "epoch": 0.84, "grad_norm": 2.867258429624403, "learning_rate": 6.564484217957356e-07, "loss": 0.5077, "step": 29332 }, { "epoch": 0.84, "grad_norm": 6.180732339719059, "learning_rate": 6.562187314052204e-07, "loss": 0.4798, "step": 29333 }, { "epoch": 0.84, "grad_norm": 3.869603983801841, "learning_rate": 6.559890783837202e-07, "loss": 0.1453, "step": 29334 }, { "epoch": 0.84, "grad_norm": 9.255358819090548, "learning_rate": 6.55759462733212e-07, "loss": 0.2427, "step": 29335 }, { "epoch": 0.84, "grad_norm": 4.049358430720487, "learning_rate": 6.55529884455669e-07, "loss": 0.4165, "step": 29336 }, { "epoch": 0.84, "grad_norm": 5.797495230425624, "learning_rate": 6.553003435530692e-07, "loss": 0.4125, "step": 29337 }, { "epoch": 0.84, "grad_norm": 3.810003166572877, "learning_rate": 6.550708400273848e-07, "loss": 0.2783, "step": 29338 }, { "epoch": 0.84, "grad_norm": 4.050683162903193, "learning_rate": 6.548413738805903e-07, "loss": 0.618, "step": 29339 }, { "epoch": 0.84, "grad_norm": 3.516918622538353, "learning_rate": 6.546119451146616e-07, "loss": 0.2893, "step": 29340 }, { "epoch": 0.84, "grad_norm": 6.030904730366767, "learning_rate": 6.543825537315701e-07, "loss": 0.4023, "step": 29341 }, { "epoch": 0.84, "grad_norm": 7.70027847777299, "learning_rate": 6.541531997332918e-07, "loss": 0.5456, "step": 29342 }, { "epoch": 0.84, "grad_norm": 4.009703994775321, "learning_rate": 6.53923883121797e-07, "loss": 0.2872, "step": 29343 }, { "epoch": 0.84, "grad_norm": 4.94947285590709, "learning_rate": 6.53694603899061e-07, "loss": 0.4772, "step": 29344 }, { "epoch": 0.84, "grad_norm": 5.904410302791648, "learning_rate": 6.534653620670556e-07, "loss": 0.4781, "step": 29345 }, { "epoch": 0.84, "grad_norm": 3.132762651689021, "learning_rate": 6.532361576277523e-07, "loss": 0.3828, "step": 29346 }, { "epoch": 0.84, "grad_norm": 6.951045859154705, "learning_rate": 6.530069905831221e-07, "loss": 0.254, "step": 29347 }, { "epoch": 0.84, "grad_norm": 6.099111223354084, "learning_rate": 6.52777860935137e-07, "loss": 0.4235, "step": 29348 }, { "epoch": 0.84, "grad_norm": 6.5210187071105015, "learning_rate": 6.525487686857707e-07, "loss": 0.182, "step": 29349 }, { "epoch": 0.84, "grad_norm": 6.040130698419418, "learning_rate": 6.523197138369902e-07, "loss": 0.5654, "step": 29350 }, { "epoch": 0.84, "grad_norm": 6.500468310578549, "learning_rate": 6.520906963907698e-07, "loss": 0.6233, "step": 29351 }, { "epoch": 0.84, "grad_norm": 5.195978773029657, "learning_rate": 6.51861716349077e-07, "loss": 0.2477, "step": 29352 }, { "epoch": 0.84, "grad_norm": 4.810887450515074, "learning_rate": 6.516327737138828e-07, "loss": 0.5035, "step": 29353 }, { "epoch": 0.84, "grad_norm": 6.598735244540817, "learning_rate": 6.514038684871554e-07, "loss": 0.5451, "step": 29354 }, { "epoch": 0.84, "grad_norm": 4.6246628767299285, "learning_rate": 6.511750006708656e-07, "loss": 0.4844, "step": 29355 }, { "epoch": 0.84, "grad_norm": 3.484306916397089, "learning_rate": 6.509461702669822e-07, "loss": 0.3218, "step": 29356 }, { "epoch": 0.84, "grad_norm": 5.99741647650252, "learning_rate": 6.507173772774728e-07, "loss": 0.3541, "step": 29357 }, { "epoch": 0.84, "grad_norm": 2.644627827130367, "learning_rate": 6.504886217043071e-07, "loss": 0.1585, "step": 29358 }, { "epoch": 0.84, "grad_norm": 6.652377976907967, "learning_rate": 6.502599035494522e-07, "loss": 0.5002, "step": 29359 }, { "epoch": 0.84, "grad_norm": 3.637605257903763, "learning_rate": 6.500312228148764e-07, "loss": 0.4188, "step": 29360 }, { "epoch": 0.84, "grad_norm": 4.089712262886441, "learning_rate": 6.498025795025448e-07, "loss": 0.664, "step": 29361 }, { "epoch": 0.84, "grad_norm": 5.532039068803998, "learning_rate": 6.495739736144268e-07, "loss": 0.5442, "step": 29362 }, { "epoch": 0.84, "grad_norm": 3.9344442711413876, "learning_rate": 6.493454051524878e-07, "loss": 0.4388, "step": 29363 }, { "epoch": 0.84, "grad_norm": 3.5750783084749216, "learning_rate": 6.491168741186954e-07, "loss": 0.3067, "step": 29364 }, { "epoch": 0.84, "grad_norm": 8.056048510438544, "learning_rate": 6.488883805150148e-07, "loss": 0.6619, "step": 29365 }, { "epoch": 0.84, "grad_norm": 4.799979457215219, "learning_rate": 6.486599243434105e-07, "loss": 0.4537, "step": 29366 }, { "epoch": 0.84, "grad_norm": 2.627152650197467, "learning_rate": 6.48431505605851e-07, "loss": 0.2721, "step": 29367 }, { "epoch": 0.84, "grad_norm": 6.378062204652733, "learning_rate": 6.482031243042974e-07, "loss": 0.7906, "step": 29368 }, { "epoch": 0.84, "grad_norm": 3.2666096457707816, "learning_rate": 6.479747804407183e-07, "loss": 0.3991, "step": 29369 }, { "epoch": 0.84, "grad_norm": 6.855902468446083, "learning_rate": 6.477464740170747e-07, "loss": 0.651, "step": 29370 }, { "epoch": 0.84, "grad_norm": 5.702420920192028, "learning_rate": 6.47518205035334e-07, "loss": 0.6021, "step": 29371 }, { "epoch": 0.84, "grad_norm": 6.222604486761417, "learning_rate": 6.472899734974581e-07, "loss": 0.1362, "step": 29372 }, { "epoch": 0.84, "grad_norm": 8.085147070835722, "learning_rate": 6.47061779405409e-07, "loss": 0.5176, "step": 29373 }, { "epoch": 0.84, "grad_norm": 8.47721389166218, "learning_rate": 6.468336227611532e-07, "loss": 0.2195, "step": 29374 }, { "epoch": 0.84, "grad_norm": 7.342816427424358, "learning_rate": 6.466055035666501e-07, "loss": 0.4041, "step": 29375 }, { "epoch": 0.84, "grad_norm": 4.722814565171089, "learning_rate": 6.463774218238655e-07, "loss": 0.2185, "step": 29376 }, { "epoch": 0.84, "grad_norm": 6.35900967835051, "learning_rate": 6.461493775347588e-07, "loss": 0.6274, "step": 29377 }, { "epoch": 0.84, "grad_norm": 8.451467665346343, "learning_rate": 6.459213707012935e-07, "loss": 0.7453, "step": 29378 }, { "epoch": 0.84, "grad_norm": 5.338882728164687, "learning_rate": 6.456934013254312e-07, "loss": 0.5878, "step": 29379 }, { "epoch": 0.84, "grad_norm": 6.405013695493018, "learning_rate": 6.454654694091311e-07, "loss": 0.6311, "step": 29380 }, { "epoch": 0.84, "grad_norm": 4.221738364424953, "learning_rate": 6.452375749543566e-07, "loss": 0.7167, "step": 29381 }, { "epoch": 0.84, "grad_norm": 3.94656146836122, "learning_rate": 6.450097179630654e-07, "loss": 0.4869, "step": 29382 }, { "epoch": 0.84, "grad_norm": 3.3029389320881637, "learning_rate": 6.447818984372212e-07, "loss": 0.3227, "step": 29383 }, { "epoch": 0.84, "grad_norm": 5.6543146689864665, "learning_rate": 6.445541163787805e-07, "loss": 0.3468, "step": 29384 }, { "epoch": 0.84, "grad_norm": 3.670618369786455, "learning_rate": 6.443263717897058e-07, "loss": 0.4171, "step": 29385 }, { "epoch": 0.84, "grad_norm": 5.134683671473581, "learning_rate": 6.440986646719555e-07, "loss": 0.639, "step": 29386 }, { "epoch": 0.84, "grad_norm": 9.903786915608363, "learning_rate": 6.438709950274874e-07, "loss": 0.57, "step": 29387 }, { "epoch": 0.84, "grad_norm": 3.4794181668108513, "learning_rate": 6.4364336285826e-07, "loss": 0.28, "step": 29388 }, { "epoch": 0.84, "grad_norm": 5.368358678622709, "learning_rate": 6.434157681662323e-07, "loss": 0.222, "step": 29389 }, { "epoch": 0.84, "grad_norm": 7.276819149813118, "learning_rate": 6.431882109533633e-07, "loss": 0.253, "step": 29390 }, { "epoch": 0.84, "grad_norm": 5.928016477404923, "learning_rate": 6.429606912216091e-07, "loss": 0.7836, "step": 29391 }, { "epoch": 0.84, "grad_norm": 7.285188845645914, "learning_rate": 6.427332089729293e-07, "loss": 0.4661, "step": 29392 }, { "epoch": 0.84, "grad_norm": 4.894874992995429, "learning_rate": 6.425057642092769e-07, "loss": 0.2939, "step": 29393 }, { "epoch": 0.84, "grad_norm": 4.693292827666216, "learning_rate": 6.422783569326124e-07, "loss": 0.6899, "step": 29394 }, { "epoch": 0.84, "grad_norm": 3.502348963103711, "learning_rate": 6.420509871448894e-07, "loss": 0.3773, "step": 29395 }, { "epoch": 0.84, "grad_norm": 9.850722758890743, "learning_rate": 6.418236548480656e-07, "loss": 0.8045, "step": 29396 }, { "epoch": 0.84, "grad_norm": 6.816243604233451, "learning_rate": 6.415963600440966e-07, "loss": 0.3887, "step": 29397 }, { "epoch": 0.84, "grad_norm": 3.8011271837810234, "learning_rate": 6.413691027349384e-07, "loss": 0.3197, "step": 29398 }, { "epoch": 0.84, "grad_norm": 7.1749646215862555, "learning_rate": 6.411418829225446e-07, "loss": 0.3568, "step": 29399 }, { "epoch": 0.84, "grad_norm": 2.915416351899152, "learning_rate": 6.409147006088696e-07, "loss": 0.2293, "step": 29400 }, { "epoch": 0.84, "grad_norm": 6.716142334677673, "learning_rate": 6.406875557958697e-07, "loss": 0.5594, "step": 29401 }, { "epoch": 0.84, "grad_norm": 4.370504085527078, "learning_rate": 6.404604484854971e-07, "loss": 0.2388, "step": 29402 }, { "epoch": 0.84, "grad_norm": 17.922481297129234, "learning_rate": 6.402333786797072e-07, "loss": 0.3428, "step": 29403 }, { "epoch": 0.84, "grad_norm": 4.936100242276957, "learning_rate": 6.400063463804518e-07, "loss": 0.5013, "step": 29404 }, { "epoch": 0.84, "grad_norm": 4.92044191841397, "learning_rate": 6.397793515896866e-07, "loss": 0.4314, "step": 29405 }, { "epoch": 0.84, "grad_norm": 10.223232441087047, "learning_rate": 6.395523943093624e-07, "loss": 0.6197, "step": 29406 }, { "epoch": 0.84, "grad_norm": 2.4667095733682594, "learning_rate": 6.393254745414307e-07, "loss": 0.1794, "step": 29407 }, { "epoch": 0.84, "grad_norm": 3.6228016690668396, "learning_rate": 6.390985922878462e-07, "loss": 0.3929, "step": 29408 }, { "epoch": 0.84, "grad_norm": 5.694390116279945, "learning_rate": 6.388717475505591e-07, "loss": 0.227, "step": 29409 }, { "epoch": 0.84, "grad_norm": 4.64980543508895, "learning_rate": 6.38644940331522e-07, "loss": 0.2731, "step": 29410 }, { "epoch": 0.84, "grad_norm": 7.710748888547805, "learning_rate": 6.384181706326842e-07, "loss": 0.7101, "step": 29411 }, { "epoch": 0.84, "grad_norm": 4.735900280691988, "learning_rate": 6.381914384559995e-07, "loss": 0.409, "step": 29412 }, { "epoch": 0.84, "grad_norm": 5.526207300904334, "learning_rate": 6.379647438034164e-07, "loss": 0.4325, "step": 29413 }, { "epoch": 0.84, "grad_norm": 5.383398857347809, "learning_rate": 6.377380866768845e-07, "loss": 0.5947, "step": 29414 }, { "epoch": 0.84, "grad_norm": 4.601510272676872, "learning_rate": 6.375114670783561e-07, "loss": 0.4055, "step": 29415 }, { "epoch": 0.84, "grad_norm": 4.615433232191983, "learning_rate": 6.372848850097785e-07, "loss": 0.338, "step": 29416 }, { "epoch": 0.84, "grad_norm": 5.169411709368821, "learning_rate": 6.370583404731023e-07, "loss": 0.7686, "step": 29417 }, { "epoch": 0.84, "grad_norm": 4.467466476818442, "learning_rate": 6.368318334702756e-07, "loss": 0.3281, "step": 29418 }, { "epoch": 0.84, "grad_norm": 6.6311041403358955, "learning_rate": 6.366053640032494e-07, "loss": 0.3394, "step": 29419 }, { "epoch": 0.84, "grad_norm": 6.850480275824496, "learning_rate": 6.363789320739678e-07, "loss": 0.5733, "step": 29420 }, { "epoch": 0.84, "grad_norm": 5.847701791752875, "learning_rate": 6.361525376843807e-07, "loss": 0.4595, "step": 29421 }, { "epoch": 0.84, "grad_norm": 5.279638146008617, "learning_rate": 6.359261808364375e-07, "loss": 0.5052, "step": 29422 }, { "epoch": 0.84, "grad_norm": 4.125122270794913, "learning_rate": 6.356998615320825e-07, "loss": 0.3304, "step": 29423 }, { "epoch": 0.84, "grad_norm": 9.70574601259536, "learning_rate": 6.354735797732659e-07, "loss": 0.562, "step": 29424 }, { "epoch": 0.84, "grad_norm": 4.771023503584847, "learning_rate": 6.352473355619321e-07, "loss": 0.2151, "step": 29425 }, { "epoch": 0.84, "grad_norm": 1.6509833225286494, "learning_rate": 6.350211289000286e-07, "loss": 0.1151, "step": 29426 }, { "epoch": 0.84, "grad_norm": 4.404989725159386, "learning_rate": 6.347949597894998e-07, "loss": 0.253, "step": 29427 }, { "epoch": 0.84, "grad_norm": 5.940827029834831, "learning_rate": 6.345688282322931e-07, "loss": 0.4175, "step": 29428 }, { "epoch": 0.84, "grad_norm": 4.4569061288480745, "learning_rate": 6.343427342303526e-07, "loss": 0.449, "step": 29429 }, { "epoch": 0.84, "grad_norm": 6.888154917074752, "learning_rate": 6.341166777856239e-07, "loss": 0.5961, "step": 29430 }, { "epoch": 0.84, "grad_norm": 6.837185450402378, "learning_rate": 6.338906589000532e-07, "loss": 0.5469, "step": 29431 }, { "epoch": 0.84, "grad_norm": 3.5554840201559377, "learning_rate": 6.336646775755834e-07, "loss": 0.2088, "step": 29432 }, { "epoch": 0.84, "grad_norm": 4.905148716654279, "learning_rate": 6.33438733814159e-07, "loss": 0.4399, "step": 29433 }, { "epoch": 0.84, "grad_norm": 9.148904711930195, "learning_rate": 6.332128276177218e-07, "loss": 0.5911, "step": 29434 }, { "epoch": 0.84, "grad_norm": 5.419198598629275, "learning_rate": 6.329869589882188e-07, "loss": 0.2925, "step": 29435 }, { "epoch": 0.84, "grad_norm": 2.9953315093502377, "learning_rate": 6.327611279275897e-07, "loss": 0.2347, "step": 29436 }, { "epoch": 0.84, "grad_norm": 11.255039061415417, "learning_rate": 6.325353344377794e-07, "loss": 0.6161, "step": 29437 }, { "epoch": 0.84, "grad_norm": 5.9808498587541035, "learning_rate": 6.323095785207306e-07, "loss": 0.8007, "step": 29438 }, { "epoch": 0.84, "grad_norm": 7.50020338418487, "learning_rate": 6.320838601783852e-07, "loss": 0.3832, "step": 29439 }, { "epoch": 0.84, "grad_norm": 3.6832637699744692, "learning_rate": 6.318581794126838e-07, "loss": 0.2122, "step": 29440 }, { "epoch": 0.84, "grad_norm": 8.32017014072755, "learning_rate": 6.316325362255681e-07, "loss": 0.7745, "step": 29441 }, { "epoch": 0.84, "grad_norm": 4.886754365908629, "learning_rate": 6.314069306189807e-07, "loss": 0.4492, "step": 29442 }, { "epoch": 0.84, "grad_norm": 4.3043134080233685, "learning_rate": 6.31181362594861e-07, "loss": 0.1836, "step": 29443 }, { "epoch": 0.84, "grad_norm": 8.188878183632486, "learning_rate": 6.309558321551507e-07, "loss": 0.8851, "step": 29444 }, { "epoch": 0.84, "grad_norm": 3.2420499106018172, "learning_rate": 6.307303393017888e-07, "loss": 0.2073, "step": 29445 }, { "epoch": 0.84, "grad_norm": 5.269413159901551, "learning_rate": 6.305048840367162e-07, "loss": 0.412, "step": 29446 }, { "epoch": 0.84, "grad_norm": 4.75936065190758, "learning_rate": 6.302794663618728e-07, "loss": 0.2171, "step": 29447 }, { "epoch": 0.84, "grad_norm": 17.24493931052616, "learning_rate": 6.300540862791959e-07, "loss": 0.4519, "step": 29448 }, { "epoch": 0.84, "grad_norm": 3.581578932297498, "learning_rate": 6.298287437906264e-07, "loss": 0.2503, "step": 29449 }, { "epoch": 0.84, "grad_norm": 5.152110663543456, "learning_rate": 6.296034388981015e-07, "loss": 0.7339, "step": 29450 }, { "epoch": 0.84, "grad_norm": 4.658775591911768, "learning_rate": 6.293781716035607e-07, "loss": 0.6337, "step": 29451 }, { "epoch": 0.84, "grad_norm": 12.692336189575926, "learning_rate": 6.291529419089421e-07, "loss": 0.6042, "step": 29452 }, { "epoch": 0.84, "grad_norm": 8.019495812071604, "learning_rate": 6.289277498161817e-07, "loss": 0.7056, "step": 29453 }, { "epoch": 0.84, "grad_norm": 6.667554953126615, "learning_rate": 6.287025953272174e-07, "loss": 0.3645, "step": 29454 }, { "epoch": 0.84, "grad_norm": 5.541509090659472, "learning_rate": 6.284774784439862e-07, "loss": 0.3685, "step": 29455 }, { "epoch": 0.84, "grad_norm": 8.461395420352614, "learning_rate": 6.282523991684258e-07, "loss": 0.4826, "step": 29456 }, { "epoch": 0.84, "grad_norm": 9.140326483652974, "learning_rate": 6.280273575024709e-07, "loss": 0.4105, "step": 29457 }, { "epoch": 0.84, "grad_norm": 6.953113427313271, "learning_rate": 6.278023534480593e-07, "loss": 0.4516, "step": 29458 }, { "epoch": 0.84, "grad_norm": 3.552623308740995, "learning_rate": 6.275773870071262e-07, "loss": 0.1787, "step": 29459 }, { "epoch": 0.84, "grad_norm": 4.301946664014021, "learning_rate": 6.273524581816059e-07, "loss": 0.2469, "step": 29460 }, { "epoch": 0.84, "grad_norm": 7.62152508209652, "learning_rate": 6.271275669734334e-07, "loss": 0.2822, "step": 29461 }, { "epoch": 0.84, "grad_norm": 3.602228578882795, "learning_rate": 6.269027133845434e-07, "loss": 0.2816, "step": 29462 }, { "epoch": 0.84, "grad_norm": 3.3309830048653484, "learning_rate": 6.266778974168725e-07, "loss": 0.3169, "step": 29463 }, { "epoch": 0.84, "grad_norm": 1.8815211818259505, "learning_rate": 6.264531190723522e-07, "loss": 0.0921, "step": 29464 }, { "epoch": 0.84, "grad_norm": 3.2141521354197145, "learning_rate": 6.262283783529182e-07, "loss": 0.3366, "step": 29465 }, { "epoch": 0.84, "grad_norm": 3.7121074880028977, "learning_rate": 6.260036752605026e-07, "loss": 0.3577, "step": 29466 }, { "epoch": 0.84, "grad_norm": 3.917864553752775, "learning_rate": 6.257790097970396e-07, "loss": 0.4904, "step": 29467 }, { "epoch": 0.84, "grad_norm": 7.987872050281139, "learning_rate": 6.255543819644593e-07, "loss": 0.4073, "step": 29468 }, { "epoch": 0.84, "grad_norm": 7.431458239128173, "learning_rate": 6.253297917646978e-07, "loss": 0.7292, "step": 29469 }, { "epoch": 0.84, "grad_norm": 3.4355594012372017, "learning_rate": 6.251052391996837e-07, "loss": 0.3375, "step": 29470 }, { "epoch": 0.84, "grad_norm": 4.645644841269179, "learning_rate": 6.248807242713511e-07, "loss": 0.4357, "step": 29471 }, { "epoch": 0.84, "grad_norm": 5.999990860614173, "learning_rate": 6.24656246981632e-07, "loss": 0.3662, "step": 29472 }, { "epoch": 0.84, "grad_norm": 3.984383915909908, "learning_rate": 6.244318073324563e-07, "loss": 0.4447, "step": 29473 }, { "epoch": 0.84, "grad_norm": 7.504618303630019, "learning_rate": 6.242074053257552e-07, "loss": 0.5049, "step": 29474 }, { "epoch": 0.84, "grad_norm": 4.083929527640506, "learning_rate": 6.239830409634578e-07, "loss": 0.4425, "step": 29475 }, { "epoch": 0.84, "grad_norm": 6.66758330917805, "learning_rate": 6.237587142474971e-07, "loss": 0.2988, "step": 29476 }, { "epoch": 0.84, "grad_norm": 9.150096505078299, "learning_rate": 6.235344251797998e-07, "loss": 0.3054, "step": 29477 }, { "epoch": 0.84, "grad_norm": 7.826459147752761, "learning_rate": 6.233101737622971e-07, "loss": 0.719, "step": 29478 }, { "epoch": 0.84, "grad_norm": 6.418951255369606, "learning_rate": 6.230859599969202e-07, "loss": 0.5274, "step": 29479 }, { "epoch": 0.84, "grad_norm": 7.553565714253252, "learning_rate": 6.228617838855938e-07, "loss": 0.4492, "step": 29480 }, { "epoch": 0.84, "grad_norm": 7.795862288823087, "learning_rate": 6.226376454302491e-07, "loss": 0.7778, "step": 29481 }, { "epoch": 0.84, "grad_norm": 6.7400277083135816, "learning_rate": 6.224135446328133e-07, "loss": 0.4312, "step": 29482 }, { "epoch": 0.84, "grad_norm": 7.605666868727975, "learning_rate": 6.221894814952156e-07, "loss": 0.9197, "step": 29483 }, { "epoch": 0.84, "grad_norm": 6.44430304743037, "learning_rate": 6.219654560193822e-07, "loss": 0.2981, "step": 29484 }, { "epoch": 0.84, "grad_norm": 3.961104500464698, "learning_rate": 6.217414682072414e-07, "loss": 0.3229, "step": 29485 }, { "epoch": 0.84, "grad_norm": 4.726855812745001, "learning_rate": 6.215175180607202e-07, "loss": 0.2743, "step": 29486 }, { "epoch": 0.84, "grad_norm": 5.313336598215013, "learning_rate": 6.212936055817432e-07, "loss": 0.5408, "step": 29487 }, { "epoch": 0.84, "grad_norm": 3.0391489521664763, "learning_rate": 6.210697307722397e-07, "loss": 0.278, "step": 29488 }, { "epoch": 0.84, "grad_norm": 5.106253320699894, "learning_rate": 6.208458936341327e-07, "loss": 0.265, "step": 29489 }, { "epoch": 0.84, "grad_norm": 3.5903478357762784, "learning_rate": 6.206220941693503e-07, "loss": 0.3946, "step": 29490 }, { "epoch": 0.84, "grad_norm": 7.704473226612249, "learning_rate": 6.203983323798163e-07, "loss": 0.4737, "step": 29491 }, { "epoch": 0.84, "grad_norm": 3.478879555069304, "learning_rate": 6.20174608267457e-07, "loss": 0.5146, "step": 29492 }, { "epoch": 0.84, "grad_norm": 9.553982999566509, "learning_rate": 6.199509218341959e-07, "loss": 0.7802, "step": 29493 }, { "epoch": 0.84, "grad_norm": 5.652018740048559, "learning_rate": 6.197272730819581e-07, "loss": 0.4397, "step": 29494 }, { "epoch": 0.84, "grad_norm": 3.7121091739686998, "learning_rate": 6.19503662012666e-07, "loss": 0.3459, "step": 29495 }, { "epoch": 0.84, "grad_norm": 2.8923358699882273, "learning_rate": 6.192800886282446e-07, "loss": 0.2613, "step": 29496 }, { "epoch": 0.84, "grad_norm": 6.124944336307373, "learning_rate": 6.190565529306187e-07, "loss": 0.3769, "step": 29497 }, { "epoch": 0.84, "grad_norm": 9.58121220431304, "learning_rate": 6.188330549217086e-07, "loss": 0.7052, "step": 29498 }, { "epoch": 0.84, "grad_norm": 3.869286755378318, "learning_rate": 6.186095946034393e-07, "loss": 0.5222, "step": 29499 }, { "epoch": 0.84, "grad_norm": 11.698059860172862, "learning_rate": 6.183861719777324e-07, "loss": 0.4616, "step": 29500 }, { "epoch": 0.84, "grad_norm": 5.827741104041293, "learning_rate": 6.18162787046509e-07, "loss": 0.4749, "step": 29501 }, { "epoch": 0.84, "grad_norm": 6.953154625990255, "learning_rate": 6.179394398116911e-07, "loss": 0.4508, "step": 29502 }, { "epoch": 0.84, "grad_norm": 5.232088734101793, "learning_rate": 6.177161302752011e-07, "loss": 0.4568, "step": 29503 }, { "epoch": 0.84, "grad_norm": 5.313477280428641, "learning_rate": 6.1749285843896e-07, "loss": 0.2584, "step": 29504 }, { "epoch": 0.84, "grad_norm": 2.995573344287689, "learning_rate": 6.172696243048875e-07, "loss": 0.3312, "step": 29505 }, { "epoch": 0.84, "grad_norm": 3.5245241574463257, "learning_rate": 6.170464278749072e-07, "loss": 0.5466, "step": 29506 }, { "epoch": 0.85, "grad_norm": 4.209114600200481, "learning_rate": 6.168232691509346e-07, "loss": 0.3677, "step": 29507 }, { "epoch": 0.85, "grad_norm": 4.148426063076425, "learning_rate": 6.166001481348927e-07, "loss": 0.3776, "step": 29508 }, { "epoch": 0.85, "grad_norm": 2.165582178361292, "learning_rate": 6.163770648286988e-07, "loss": 0.0882, "step": 29509 }, { "epoch": 0.85, "grad_norm": 4.3133282626228855, "learning_rate": 6.161540192342747e-07, "loss": 0.2644, "step": 29510 }, { "epoch": 0.85, "grad_norm": 4.199295909083993, "learning_rate": 6.159310113535366e-07, "loss": 0.4339, "step": 29511 }, { "epoch": 0.85, "grad_norm": 2.667439744817587, "learning_rate": 6.157080411884047e-07, "loss": 0.3196, "step": 29512 }, { "epoch": 0.85, "grad_norm": 6.788501571467405, "learning_rate": 6.154851087407976e-07, "loss": 0.5986, "step": 29513 }, { "epoch": 0.85, "grad_norm": 6.818027668568651, "learning_rate": 6.152622140126307e-07, "loss": 0.5123, "step": 29514 }, { "epoch": 0.85, "grad_norm": 6.003436733494209, "learning_rate": 6.150393570058244e-07, "loss": 0.4159, "step": 29515 }, { "epoch": 0.85, "grad_norm": 4.961967949967853, "learning_rate": 6.148165377222937e-07, "loss": 0.1929, "step": 29516 }, { "epoch": 0.85, "grad_norm": 9.143401156359472, "learning_rate": 6.14593756163957e-07, "loss": 0.6674, "step": 29517 }, { "epoch": 0.85, "grad_norm": 3.5950230043813276, "learning_rate": 6.143710123327296e-07, "loss": 0.4657, "step": 29518 }, { "epoch": 0.85, "grad_norm": 6.268519886239372, "learning_rate": 6.141483062305292e-07, "loss": 0.5062, "step": 29519 }, { "epoch": 0.85, "grad_norm": 4.26280415364339, "learning_rate": 6.139256378592712e-07, "loss": 0.3405, "step": 29520 }, { "epoch": 0.85, "grad_norm": 4.6198548230567384, "learning_rate": 6.137030072208699e-07, "loss": 0.5515, "step": 29521 }, { "epoch": 0.85, "grad_norm": 8.269659085517736, "learning_rate": 6.134804143172424e-07, "loss": 0.8638, "step": 29522 }, { "epoch": 0.85, "grad_norm": 3.810401714303261, "learning_rate": 6.132578591503019e-07, "loss": 0.3118, "step": 29523 }, { "epoch": 0.85, "grad_norm": 3.118308685513871, "learning_rate": 6.130353417219648e-07, "loss": 0.283, "step": 29524 }, { "epoch": 0.85, "grad_norm": 7.52099349431681, "learning_rate": 6.128128620341433e-07, "loss": 0.7342, "step": 29525 }, { "epoch": 0.85, "grad_norm": 4.678772698402634, "learning_rate": 6.12590420088754e-07, "loss": 0.4525, "step": 29526 }, { "epoch": 0.85, "grad_norm": 3.286696609266535, "learning_rate": 6.123680158877088e-07, "loss": 0.3413, "step": 29527 }, { "epoch": 0.85, "grad_norm": 14.798515736410645, "learning_rate": 6.121456494329209e-07, "loss": 0.6004, "step": 29528 }, { "epoch": 0.85, "grad_norm": 5.2319120163679536, "learning_rate": 6.119233207263042e-07, "loss": 0.4448, "step": 29529 }, { "epoch": 0.85, "grad_norm": 2.4134238796499323, "learning_rate": 6.117010297697701e-07, "loss": 0.1582, "step": 29530 }, { "epoch": 0.85, "grad_norm": 3.9318664455608787, "learning_rate": 6.114787765652325e-07, "loss": 0.4344, "step": 29531 }, { "epoch": 0.85, "grad_norm": 9.943032841743005, "learning_rate": 6.11256561114602e-07, "loss": 0.8803, "step": 29532 }, { "epoch": 0.85, "grad_norm": 4.557927126222815, "learning_rate": 6.110343834197918e-07, "loss": 0.4441, "step": 29533 }, { "epoch": 0.85, "grad_norm": 5.992222115666599, "learning_rate": 6.108122434827124e-07, "loss": 0.2062, "step": 29534 }, { "epoch": 0.85, "grad_norm": 3.955029145867635, "learning_rate": 6.105901413052751e-07, "loss": 0.2621, "step": 29535 }, { "epoch": 0.85, "grad_norm": 9.36879125360793, "learning_rate": 6.103680768893888e-07, "loss": 0.3028, "step": 29536 }, { "epoch": 0.85, "grad_norm": 6.039953898860244, "learning_rate": 6.10146050236966e-07, "loss": 0.629, "step": 29537 }, { "epoch": 0.85, "grad_norm": 6.060560722470484, "learning_rate": 6.099240613499168e-07, "loss": 0.3378, "step": 29538 }, { "epoch": 0.85, "grad_norm": 5.386912131486372, "learning_rate": 6.097021102301509e-07, "loss": 0.4847, "step": 29539 }, { "epoch": 0.85, "grad_norm": 4.700642028542406, "learning_rate": 6.094801968795766e-07, "loss": 0.3313, "step": 29540 }, { "epoch": 0.85, "grad_norm": 9.63806652576026, "learning_rate": 6.09258321300103e-07, "loss": 0.6704, "step": 29541 }, { "epoch": 0.85, "grad_norm": 8.935982421932843, "learning_rate": 6.090364834936407e-07, "loss": 0.5732, "step": 29542 }, { "epoch": 0.85, "grad_norm": 3.621043512516434, "learning_rate": 6.088146834620957e-07, "loss": 0.2858, "step": 29543 }, { "epoch": 0.85, "grad_norm": 6.053299952840705, "learning_rate": 6.085929212073771e-07, "loss": 0.6359, "step": 29544 }, { "epoch": 0.85, "grad_norm": 1.2502682696000962, "learning_rate": 6.083711967313938e-07, "loss": 0.2239, "step": 29545 }, { "epoch": 0.85, "grad_norm": 4.349130440540576, "learning_rate": 6.081495100360523e-07, "loss": 0.3, "step": 29546 }, { "epoch": 0.85, "grad_norm": 4.677817963110161, "learning_rate": 6.079278611232603e-07, "loss": 0.346, "step": 29547 }, { "epoch": 0.85, "grad_norm": 4.019556280860777, "learning_rate": 6.077062499949232e-07, "loss": 0.5352, "step": 29548 }, { "epoch": 0.85, "grad_norm": 6.695383363079449, "learning_rate": 6.074846766529491e-07, "loss": 0.3322, "step": 29549 }, { "epoch": 0.85, "grad_norm": 4.908457994200303, "learning_rate": 6.072631410992424e-07, "loss": 0.5205, "step": 29550 }, { "epoch": 0.85, "grad_norm": 7.084638348208209, "learning_rate": 6.070416433357118e-07, "loss": 0.5358, "step": 29551 }, { "epoch": 0.85, "grad_norm": 3.933809155170001, "learning_rate": 6.06820183364259e-07, "loss": 0.3077, "step": 29552 }, { "epoch": 0.85, "grad_norm": 7.037596148686137, "learning_rate": 6.06598761186793e-07, "loss": 0.9109, "step": 29553 }, { "epoch": 0.85, "grad_norm": 3.6983597915279467, "learning_rate": 6.063773768052166e-07, "loss": 0.6533, "step": 29554 }, { "epoch": 0.85, "grad_norm": 5.116091498728121, "learning_rate": 6.061560302214342e-07, "loss": 0.7181, "step": 29555 }, { "epoch": 0.85, "grad_norm": 2.989420894148484, "learning_rate": 6.059347214373507e-07, "loss": 0.2442, "step": 29556 }, { "epoch": 0.85, "grad_norm": 3.5436278616496604, "learning_rate": 6.057134504548695e-07, "loss": 0.1397, "step": 29557 }, { "epoch": 0.85, "grad_norm": 5.823302823150371, "learning_rate": 6.054922172758948e-07, "loss": 0.2153, "step": 29558 }, { "epoch": 0.85, "grad_norm": 3.378833518596062, "learning_rate": 6.052710219023289e-07, "loss": 0.3978, "step": 29559 }, { "epoch": 0.85, "grad_norm": 10.149092467377413, "learning_rate": 6.050498643360764e-07, "loss": 0.7138, "step": 29560 }, { "epoch": 0.85, "grad_norm": 6.5535680749334455, "learning_rate": 6.048287445790385e-07, "loss": 0.7154, "step": 29561 }, { "epoch": 0.85, "grad_norm": 4.38704016468452, "learning_rate": 6.046076626331165e-07, "loss": 0.6583, "step": 29562 }, { "epoch": 0.85, "grad_norm": 5.547877691201013, "learning_rate": 6.04386618500215e-07, "loss": 0.6107, "step": 29563 }, { "epoch": 0.85, "grad_norm": 5.587559860557424, "learning_rate": 6.041656121822332e-07, "loss": 0.5558, "step": 29564 }, { "epoch": 0.85, "grad_norm": 7.578892682705005, "learning_rate": 6.039446436810747e-07, "loss": 0.8674, "step": 29565 }, { "epoch": 0.85, "grad_norm": 3.3012676715775067, "learning_rate": 6.03723712998639e-07, "loss": 0.1728, "step": 29566 }, { "epoch": 0.85, "grad_norm": 9.429978006687184, "learning_rate": 6.035028201368265e-07, "loss": 0.813, "step": 29567 }, { "epoch": 0.85, "grad_norm": 8.427821973785221, "learning_rate": 6.032819650975375e-07, "loss": 0.2372, "step": 29568 }, { "epoch": 0.85, "grad_norm": 6.47948085436085, "learning_rate": 6.03061147882672e-07, "loss": 0.4493, "step": 29569 }, { "epoch": 0.85, "grad_norm": 6.35100715339605, "learning_rate": 6.028403684941308e-07, "loss": 0.2555, "step": 29570 }, { "epoch": 0.85, "grad_norm": 4.57084164938899, "learning_rate": 6.026196269338119e-07, "loss": 0.3351, "step": 29571 }, { "epoch": 0.85, "grad_norm": 4.990965501113065, "learning_rate": 6.02398923203616e-07, "loss": 0.3227, "step": 29572 }, { "epoch": 0.85, "grad_norm": 6.662065929135859, "learning_rate": 6.021782573054408e-07, "loss": 0.3719, "step": 29573 }, { "epoch": 0.85, "grad_norm": 12.79037750325299, "learning_rate": 6.019576292411844e-07, "loss": 0.7122, "step": 29574 }, { "epoch": 0.85, "grad_norm": 3.835940296925693, "learning_rate": 6.017370390127441e-07, "loss": 0.5625, "step": 29575 }, { "epoch": 0.85, "grad_norm": 4.91854864675359, "learning_rate": 6.015164866220197e-07, "loss": 0.4965, "step": 29576 }, { "epoch": 0.85, "grad_norm": 5.884835167798984, "learning_rate": 6.01295972070906e-07, "loss": 0.4303, "step": 29577 }, { "epoch": 0.85, "grad_norm": 4.665666160734091, "learning_rate": 6.010754953613018e-07, "loss": 0.3997, "step": 29578 }, { "epoch": 0.85, "grad_norm": 7.734275277776077, "learning_rate": 6.008550564951044e-07, "loss": 0.6092, "step": 29579 }, { "epoch": 0.85, "grad_norm": 9.527784173912025, "learning_rate": 6.006346554742093e-07, "loss": 0.4187, "step": 29580 }, { "epoch": 0.85, "grad_norm": 4.274591823521978, "learning_rate": 6.004142923005124e-07, "loss": 0.4842, "step": 29581 }, { "epoch": 0.85, "grad_norm": 6.5404264971287045, "learning_rate": 6.001939669759088e-07, "loss": 0.5981, "step": 29582 }, { "epoch": 0.85, "grad_norm": 7.192230160835503, "learning_rate": 5.999736795022959e-07, "loss": 0.2828, "step": 29583 }, { "epoch": 0.85, "grad_norm": 5.892163912964404, "learning_rate": 5.997534298815666e-07, "loss": 0.5168, "step": 29584 }, { "epoch": 0.85, "grad_norm": 5.168174779912643, "learning_rate": 5.995332181156166e-07, "loss": 0.4604, "step": 29585 }, { "epoch": 0.85, "grad_norm": 3.776171402730426, "learning_rate": 5.993130442063416e-07, "loss": 0.3516, "step": 29586 }, { "epoch": 0.85, "grad_norm": 4.556198510714985, "learning_rate": 5.990929081556351e-07, "loss": 0.1531, "step": 29587 }, { "epoch": 0.85, "grad_norm": 4.372956152754795, "learning_rate": 5.988728099653901e-07, "loss": 0.4301, "step": 29588 }, { "epoch": 0.85, "grad_norm": 6.502688732114212, "learning_rate": 5.986527496374994e-07, "loss": 0.3668, "step": 29589 }, { "epoch": 0.85, "grad_norm": 0.7500345500299517, "learning_rate": 5.984327271738583e-07, "loss": 0.027, "step": 29590 }, { "epoch": 0.85, "grad_norm": 4.473235646662685, "learning_rate": 5.982127425763573e-07, "loss": 0.3258, "step": 29591 }, { "epoch": 0.85, "grad_norm": 5.289992415525483, "learning_rate": 5.979927958468912e-07, "loss": 0.6054, "step": 29592 }, { "epoch": 0.85, "grad_norm": 4.424840438250451, "learning_rate": 5.9777288698735e-07, "loss": 0.541, "step": 29593 }, { "epoch": 0.85, "grad_norm": 3.6159420299757152, "learning_rate": 5.975530159996279e-07, "loss": 0.2984, "step": 29594 }, { "epoch": 0.85, "grad_norm": 3.3378173748659696, "learning_rate": 5.973331828856149e-07, "loss": 0.4591, "step": 29595 }, { "epoch": 0.85, "grad_norm": 4.997842156174286, "learning_rate": 5.971133876472018e-07, "loss": 0.3127, "step": 29596 }, { "epoch": 0.85, "grad_norm": 5.325578970618987, "learning_rate": 5.96893630286281e-07, "loss": 0.2282, "step": 29597 }, { "epoch": 0.85, "grad_norm": 5.153119032505298, "learning_rate": 5.966739108047409e-07, "loss": 0.2798, "step": 29598 }, { "epoch": 0.85, "grad_norm": 3.2991252650160114, "learning_rate": 5.96454229204474e-07, "loss": 0.2666, "step": 29599 }, { "epoch": 0.85, "grad_norm": 6.14269111177634, "learning_rate": 5.962345854873697e-07, "loss": 0.8823, "step": 29600 }, { "epoch": 0.85, "grad_norm": 3.8857475255798657, "learning_rate": 5.960149796553166e-07, "loss": 0.4378, "step": 29601 }, { "epoch": 0.85, "grad_norm": 7.837551494593491, "learning_rate": 5.957954117102033e-07, "loss": 0.5067, "step": 29602 }, { "epoch": 0.85, "grad_norm": 5.211686934035349, "learning_rate": 5.955758816539204e-07, "loss": 0.6051, "step": 29603 }, { "epoch": 0.85, "grad_norm": 5.085398608838122, "learning_rate": 5.953563894883562e-07, "loss": 0.5231, "step": 29604 }, { "epoch": 0.85, "grad_norm": 5.006559884794688, "learning_rate": 5.951369352153979e-07, "loss": 0.2387, "step": 29605 }, { "epoch": 0.85, "grad_norm": 4.564300234204604, "learning_rate": 5.949175188369355e-07, "loss": 0.3446, "step": 29606 }, { "epoch": 0.85, "grad_norm": 6.545710129997893, "learning_rate": 5.946981403548552e-07, "loss": 0.4659, "step": 29607 }, { "epoch": 0.85, "grad_norm": 19.4078055969968, "learning_rate": 5.944787997710444e-07, "loss": 0.5689, "step": 29608 }, { "epoch": 0.85, "grad_norm": 5.630012991095778, "learning_rate": 5.942594970873888e-07, "loss": 0.3126, "step": 29609 }, { "epoch": 0.85, "grad_norm": 6.158168087274948, "learning_rate": 5.940402323057764e-07, "loss": 0.7138, "step": 29610 }, { "epoch": 0.85, "grad_norm": 3.3158200917567164, "learning_rate": 5.938210054280946e-07, "loss": 0.2764, "step": 29611 }, { "epoch": 0.85, "grad_norm": 2.7896500297210127, "learning_rate": 5.936018164562273e-07, "loss": 0.196, "step": 29612 }, { "epoch": 0.85, "grad_norm": 8.55546375393297, "learning_rate": 5.933826653920616e-07, "loss": 0.4967, "step": 29613 }, { "epoch": 0.85, "grad_norm": 6.814865681478381, "learning_rate": 5.931635522374823e-07, "loss": 0.4565, "step": 29614 }, { "epoch": 0.85, "grad_norm": 5.636579674414741, "learning_rate": 5.929444769943748e-07, "loss": 0.2786, "step": 29615 }, { "epoch": 0.85, "grad_norm": 3.620353713642715, "learning_rate": 5.927254396646221e-07, "loss": 0.3305, "step": 29616 }, { "epoch": 0.85, "grad_norm": 6.786259642176261, "learning_rate": 5.925064402501107e-07, "loss": 0.4016, "step": 29617 }, { "epoch": 0.85, "grad_norm": 3.7562688563630475, "learning_rate": 5.922874787527233e-07, "loss": 0.3998, "step": 29618 }, { "epoch": 0.85, "grad_norm": 6.103992793847627, "learning_rate": 5.920685551743433e-07, "loss": 0.7318, "step": 29619 }, { "epoch": 0.85, "grad_norm": 4.525336254124175, "learning_rate": 5.918496695168563e-07, "loss": 0.517, "step": 29620 }, { "epoch": 0.85, "grad_norm": 3.7394729038322345, "learning_rate": 5.916308217821437e-07, "loss": 0.1022, "step": 29621 }, { "epoch": 0.85, "grad_norm": 2.3157600672322967, "learning_rate": 5.914120119720884e-07, "loss": 0.2938, "step": 29622 }, { "epoch": 0.85, "grad_norm": 3.3723814307338547, "learning_rate": 5.911932400885712e-07, "loss": 0.4022, "step": 29623 }, { "epoch": 0.85, "grad_norm": 5.8917346182451755, "learning_rate": 5.909745061334777e-07, "loss": 0.484, "step": 29624 }, { "epoch": 0.85, "grad_norm": 5.933765280820217, "learning_rate": 5.907558101086858e-07, "loss": 0.6974, "step": 29625 }, { "epoch": 0.85, "grad_norm": 4.763155162732294, "learning_rate": 5.905371520160802e-07, "loss": 0.3557, "step": 29626 }, { "epoch": 0.85, "grad_norm": 4.604820820639132, "learning_rate": 5.903185318575405e-07, "loss": 0.2602, "step": 29627 }, { "epoch": 0.85, "grad_norm": 3.377935016381945, "learning_rate": 5.900999496349469e-07, "loss": 0.4171, "step": 29628 }, { "epoch": 0.85, "grad_norm": 7.541410185384993, "learning_rate": 5.898814053501812e-07, "loss": 0.1413, "step": 29629 }, { "epoch": 0.85, "grad_norm": 2.143967281596319, "learning_rate": 5.896628990051212e-07, "loss": 0.3556, "step": 29630 }, { "epoch": 0.85, "grad_norm": 8.423226808896636, "learning_rate": 5.8944443060165e-07, "loss": 0.5062, "step": 29631 }, { "epoch": 0.85, "grad_norm": 4.8082029306236755, "learning_rate": 5.892260001416439e-07, "loss": 0.2336, "step": 29632 }, { "epoch": 0.85, "grad_norm": 4.763257698800463, "learning_rate": 5.890076076269846e-07, "loss": 0.4207, "step": 29633 }, { "epoch": 0.85, "grad_norm": 7.325114007785188, "learning_rate": 5.887892530595502e-07, "loss": 0.7729, "step": 29634 }, { "epoch": 0.85, "grad_norm": 2.938623659832888, "learning_rate": 5.885709364412173e-07, "loss": 0.3799, "step": 29635 }, { "epoch": 0.85, "grad_norm": 6.433804645799125, "learning_rate": 5.883526577738663e-07, "loss": 0.5104, "step": 29636 }, { "epoch": 0.85, "grad_norm": 3.541024108640899, "learning_rate": 5.881344170593734e-07, "loss": 0.4477, "step": 29637 }, { "epoch": 0.85, "grad_norm": 5.557084200942115, "learning_rate": 5.879162142996181e-07, "loss": 0.5802, "step": 29638 }, { "epoch": 0.85, "grad_norm": 6.073418449841087, "learning_rate": 5.876980494964751e-07, "loss": 0.569, "step": 29639 }, { "epoch": 0.85, "grad_norm": 5.101000440868605, "learning_rate": 5.874799226518235e-07, "loss": 0.381, "step": 29640 }, { "epoch": 0.85, "grad_norm": 2.710640690410466, "learning_rate": 5.87261833767539e-07, "loss": 0.1825, "step": 29641 }, { "epoch": 0.85, "grad_norm": 6.049795780991224, "learning_rate": 5.870437828454978e-07, "loss": 0.4219, "step": 29642 }, { "epoch": 0.85, "grad_norm": 6.9904867650545155, "learning_rate": 5.868257698875745e-07, "loss": 0.7338, "step": 29643 }, { "epoch": 0.85, "grad_norm": 7.748021026945163, "learning_rate": 5.866077948956456e-07, "loss": 0.5716, "step": 29644 }, { "epoch": 0.85, "grad_norm": 4.936070802729507, "learning_rate": 5.863898578715876e-07, "loss": 0.3597, "step": 29645 }, { "epoch": 0.85, "grad_norm": 7.496407983822126, "learning_rate": 5.861719588172732e-07, "loss": 0.7639, "step": 29646 }, { "epoch": 0.85, "grad_norm": 8.448419583391397, "learning_rate": 5.85954097734579e-07, "loss": 0.7139, "step": 29647 }, { "epoch": 0.85, "grad_norm": 5.638137903091962, "learning_rate": 5.857362746253781e-07, "loss": 0.9323, "step": 29648 }, { "epoch": 0.85, "grad_norm": 4.456730985431762, "learning_rate": 5.855184894915444e-07, "loss": 0.324, "step": 29649 }, { "epoch": 0.85, "grad_norm": 4.592657205780587, "learning_rate": 5.85300742334951e-07, "loss": 0.4737, "step": 29650 }, { "epoch": 0.85, "grad_norm": 6.886070389717193, "learning_rate": 5.850830331574714e-07, "loss": 0.5274, "step": 29651 }, { "epoch": 0.85, "grad_norm": 4.089700880351593, "learning_rate": 5.848653619609801e-07, "loss": 0.4335, "step": 29652 }, { "epoch": 0.85, "grad_norm": 5.712839459573228, "learning_rate": 5.846477287473473e-07, "loss": 0.6226, "step": 29653 }, { "epoch": 0.85, "grad_norm": 4.704517804610294, "learning_rate": 5.844301335184488e-07, "loss": 0.2826, "step": 29654 }, { "epoch": 0.85, "grad_norm": 4.901135883243231, "learning_rate": 5.842125762761519e-07, "loss": 0.3098, "step": 29655 }, { "epoch": 0.85, "grad_norm": 8.834899679498447, "learning_rate": 5.839950570223318e-07, "loss": 0.2926, "step": 29656 }, { "epoch": 0.85, "grad_norm": 2.501561964369746, "learning_rate": 5.83777575758857e-07, "loss": 0.0763, "step": 29657 }, { "epoch": 0.85, "grad_norm": 6.3988115622854105, "learning_rate": 5.835601324876011e-07, "loss": 0.3236, "step": 29658 }, { "epoch": 0.85, "grad_norm": 5.42243246681014, "learning_rate": 5.83342727210433e-07, "loss": 0.4161, "step": 29659 }, { "epoch": 0.85, "grad_norm": 9.715618058205544, "learning_rate": 5.831253599292247e-07, "loss": 0.2242, "step": 29660 }, { "epoch": 0.85, "grad_norm": 5.838298029130846, "learning_rate": 5.829080306458446e-07, "loss": 0.577, "step": 29661 }, { "epoch": 0.85, "grad_norm": 4.636400451275999, "learning_rate": 5.826907393621623e-07, "loss": 0.31, "step": 29662 }, { "epoch": 0.85, "grad_norm": 5.299808466347464, "learning_rate": 5.824734860800485e-07, "loss": 0.6381, "step": 29663 }, { "epoch": 0.85, "grad_norm": 5.4973567333002515, "learning_rate": 5.822562708013701e-07, "loss": 0.3939, "step": 29664 }, { "epoch": 0.85, "grad_norm": 3.9853456698586522, "learning_rate": 5.820390935279984e-07, "loss": 0.4328, "step": 29665 }, { "epoch": 0.85, "grad_norm": 5.511334725565996, "learning_rate": 5.818219542617998e-07, "loss": 0.4067, "step": 29666 }, { "epoch": 0.85, "grad_norm": 2.1078367205020734, "learning_rate": 5.816048530046431e-07, "loss": 0.2967, "step": 29667 }, { "epoch": 0.85, "grad_norm": 5.350362456601896, "learning_rate": 5.813877897583964e-07, "loss": 0.4565, "step": 29668 }, { "epoch": 0.85, "grad_norm": 6.211828395838712, "learning_rate": 5.811707645249253e-07, "loss": 0.6688, "step": 29669 }, { "epoch": 0.85, "grad_norm": 11.97712669362498, "learning_rate": 5.809537773060991e-07, "loss": 0.3053, "step": 29670 }, { "epoch": 0.85, "grad_norm": 3.965629281837551, "learning_rate": 5.807368281037828e-07, "loss": 0.3532, "step": 29671 }, { "epoch": 0.85, "grad_norm": 6.014514810744163, "learning_rate": 5.805199169198445e-07, "loss": 0.4587, "step": 29672 }, { "epoch": 0.85, "grad_norm": 5.444755230140635, "learning_rate": 5.803030437561475e-07, "loss": 0.3262, "step": 29673 }, { "epoch": 0.85, "grad_norm": 6.939937266525229, "learning_rate": 5.800862086145608e-07, "loss": 0.5918, "step": 29674 }, { "epoch": 0.85, "grad_norm": 3.1759809405429977, "learning_rate": 5.798694114969477e-07, "loss": 0.1672, "step": 29675 }, { "epoch": 0.85, "grad_norm": 3.873738083404105, "learning_rate": 5.796526524051738e-07, "loss": 0.2186, "step": 29676 }, { "epoch": 0.85, "grad_norm": 4.60710439845495, "learning_rate": 5.794359313411041e-07, "loss": 0.3991, "step": 29677 }, { "epoch": 0.85, "grad_norm": 5.640851214408665, "learning_rate": 5.792192483066017e-07, "loss": 0.5186, "step": 29678 }, { "epoch": 0.85, "grad_norm": 3.7453247171965813, "learning_rate": 5.790026033035334e-07, "loss": 0.4723, "step": 29679 }, { "epoch": 0.85, "grad_norm": 3.782786143989877, "learning_rate": 5.7878599633376e-07, "loss": 0.1835, "step": 29680 }, { "epoch": 0.85, "grad_norm": 5.344139107262766, "learning_rate": 5.785694273991482e-07, "loss": 0.4362, "step": 29681 }, { "epoch": 0.85, "grad_norm": 4.163003124726126, "learning_rate": 5.783528965015578e-07, "loss": 0.1374, "step": 29682 }, { "epoch": 0.85, "grad_norm": 4.927578192196442, "learning_rate": 5.781364036428538e-07, "loss": 0.4923, "step": 29683 }, { "epoch": 0.85, "grad_norm": 4.8278625318176935, "learning_rate": 5.779199488248965e-07, "loss": 0.2918, "step": 29684 }, { "epoch": 0.85, "grad_norm": 4.9106200855682545, "learning_rate": 5.777035320495494e-07, "loss": 0.2648, "step": 29685 }, { "epoch": 0.85, "grad_norm": 3.746915088785895, "learning_rate": 5.77487153318676e-07, "loss": 0.2072, "step": 29686 }, { "epoch": 0.85, "grad_norm": 4.428284680294624, "learning_rate": 5.772708126341358e-07, "loss": 0.3559, "step": 29687 }, { "epoch": 0.85, "grad_norm": 6.127654843189057, "learning_rate": 5.770545099977898e-07, "loss": 0.5504, "step": 29688 }, { "epoch": 0.85, "grad_norm": 4.039589587412192, "learning_rate": 5.76838245411499e-07, "loss": 0.1571, "step": 29689 }, { "epoch": 0.85, "grad_norm": 3.384240569230115, "learning_rate": 5.766220188771249e-07, "loss": 0.4085, "step": 29690 }, { "epoch": 0.85, "grad_norm": 3.924052344562197, "learning_rate": 5.764058303965258e-07, "loss": 0.2798, "step": 29691 }, { "epoch": 0.85, "grad_norm": 8.663193035614029, "learning_rate": 5.761896799715627e-07, "loss": 0.372, "step": 29692 }, { "epoch": 0.85, "grad_norm": 4.179262399892273, "learning_rate": 5.759735676040962e-07, "loss": 0.4812, "step": 29693 }, { "epoch": 0.85, "grad_norm": 5.9126748153940625, "learning_rate": 5.757574932959841e-07, "loss": 0.5659, "step": 29694 }, { "epoch": 0.85, "grad_norm": 9.765605090311736, "learning_rate": 5.755414570490864e-07, "loss": 0.1834, "step": 29695 }, { "epoch": 0.85, "grad_norm": 3.4890457460417172, "learning_rate": 5.75325458865259e-07, "loss": 0.165, "step": 29696 }, { "epoch": 0.85, "grad_norm": 6.641433487089044, "learning_rate": 5.751094987463629e-07, "loss": 0.4524, "step": 29697 }, { "epoch": 0.85, "grad_norm": 4.948787294051209, "learning_rate": 5.748935766942542e-07, "loss": 0.6111, "step": 29698 }, { "epoch": 0.85, "grad_norm": 2.2369009891715192, "learning_rate": 5.746776927107922e-07, "loss": 0.0737, "step": 29699 }, { "epoch": 0.85, "grad_norm": 4.480595767308539, "learning_rate": 5.744618467978325e-07, "loss": 0.1608, "step": 29700 }, { "epoch": 0.85, "grad_norm": 7.0090501047554, "learning_rate": 5.74246038957233e-07, "loss": 0.7044, "step": 29701 }, { "epoch": 0.85, "grad_norm": 6.315381440567796, "learning_rate": 5.740302691908505e-07, "loss": 0.7141, "step": 29702 }, { "epoch": 0.85, "grad_norm": 4.230553398095595, "learning_rate": 5.738145375005394e-07, "loss": 0.3445, "step": 29703 }, { "epoch": 0.85, "grad_norm": 5.781443370987457, "learning_rate": 5.735988438881579e-07, "loss": 0.5326, "step": 29704 }, { "epoch": 0.85, "grad_norm": 3.2984456664417534, "learning_rate": 5.733831883555596e-07, "loss": 0.31, "step": 29705 }, { "epoch": 0.85, "grad_norm": 6.08908340571894, "learning_rate": 5.731675709046014e-07, "loss": 0.873, "step": 29706 }, { "epoch": 0.85, "grad_norm": 4.018277490819306, "learning_rate": 5.729519915371368e-07, "loss": 0.2986, "step": 29707 }, { "epoch": 0.85, "grad_norm": 7.866424402315824, "learning_rate": 5.72736450255022e-07, "loss": 0.1139, "step": 29708 }, { "epoch": 0.85, "grad_norm": 9.836111635298309, "learning_rate": 5.725209470601106e-07, "loss": 0.4779, "step": 29709 }, { "epoch": 0.85, "grad_norm": 2.8376821400669257, "learning_rate": 5.72305481954255e-07, "loss": 0.1727, "step": 29710 }, { "epoch": 0.85, "grad_norm": 3.743350730200539, "learning_rate": 5.720900549393116e-07, "loss": 0.2111, "step": 29711 }, { "epoch": 0.85, "grad_norm": 5.149920074990808, "learning_rate": 5.718746660171314e-07, "loss": 0.1971, "step": 29712 }, { "epoch": 0.85, "grad_norm": 5.926966710164005, "learning_rate": 5.716593151895688e-07, "loss": 0.6146, "step": 29713 }, { "epoch": 0.85, "grad_norm": 3.9813359058160596, "learning_rate": 5.714440024584761e-07, "loss": 0.6321, "step": 29714 }, { "epoch": 0.85, "grad_norm": 5.16333926123978, "learning_rate": 5.712287278257056e-07, "loss": 0.595, "step": 29715 }, { "epoch": 0.85, "grad_norm": 6.709401568860777, "learning_rate": 5.710134912931075e-07, "loss": 0.7382, "step": 29716 }, { "epoch": 0.85, "grad_norm": 6.148123087229755, "learning_rate": 5.707982928625355e-07, "loss": 0.4396, "step": 29717 }, { "epoch": 0.85, "grad_norm": 4.771872256015271, "learning_rate": 5.705831325358413e-07, "loss": 0.4692, "step": 29718 }, { "epoch": 0.85, "grad_norm": 3.2265251841015217, "learning_rate": 5.70368010314874e-07, "loss": 0.3201, "step": 29719 }, { "epoch": 0.85, "grad_norm": 9.601426076269039, "learning_rate": 5.701529262014866e-07, "loss": 0.7148, "step": 29720 }, { "epoch": 0.85, "grad_norm": 5.862687458872209, "learning_rate": 5.69937880197528e-07, "loss": 0.5598, "step": 29721 }, { "epoch": 0.85, "grad_norm": 4.893205382441735, "learning_rate": 5.697228723048481e-07, "loss": 0.3876, "step": 29722 }, { "epoch": 0.85, "grad_norm": 4.114637884752185, "learning_rate": 5.695079025252965e-07, "loss": 0.239, "step": 29723 }, { "epoch": 0.85, "grad_norm": 2.209617004737431, "learning_rate": 5.692929708607231e-07, "loss": 0.2517, "step": 29724 }, { "epoch": 0.85, "grad_norm": 10.334108774766268, "learning_rate": 5.690780773129761e-07, "loss": 0.8807, "step": 29725 }, { "epoch": 0.85, "grad_norm": 5.835398453785389, "learning_rate": 5.688632218839052e-07, "loss": 0.5984, "step": 29726 }, { "epoch": 0.85, "grad_norm": 18.7541430664366, "learning_rate": 5.686484045753588e-07, "loss": 0.43, "step": 29727 }, { "epoch": 0.85, "grad_norm": 7.000312474633396, "learning_rate": 5.684336253891848e-07, "loss": 0.4823, "step": 29728 }, { "epoch": 0.85, "grad_norm": 3.511557196612867, "learning_rate": 5.682188843272313e-07, "loss": 0.409, "step": 29729 }, { "epoch": 0.85, "grad_norm": 11.313469692420766, "learning_rate": 5.680041813913434e-07, "loss": 0.6063, "step": 29730 }, { "epoch": 0.85, "grad_norm": 4.805326056497567, "learning_rate": 5.677895165833708e-07, "loss": 0.3824, "step": 29731 }, { "epoch": 0.85, "grad_norm": 6.907782337163422, "learning_rate": 5.675748899051586e-07, "loss": 0.4261, "step": 29732 }, { "epoch": 0.85, "grad_norm": 7.919962635241136, "learning_rate": 5.673603013585538e-07, "loss": 0.3974, "step": 29733 }, { "epoch": 0.85, "grad_norm": 5.944612679918315, "learning_rate": 5.671457509454037e-07, "loss": 0.7625, "step": 29734 }, { "epoch": 0.85, "grad_norm": 4.158849319267274, "learning_rate": 5.669312386675524e-07, "loss": 0.2454, "step": 29735 }, { "epoch": 0.85, "grad_norm": 3.9462259273864193, "learning_rate": 5.667167645268462e-07, "loss": 0.3875, "step": 29736 }, { "epoch": 0.85, "grad_norm": 3.859004879397045, "learning_rate": 5.665023285251292e-07, "loss": 0.656, "step": 29737 }, { "epoch": 0.85, "grad_norm": 3.655188414522205, "learning_rate": 5.662879306642472e-07, "loss": 0.3669, "step": 29738 }, { "epoch": 0.85, "grad_norm": 6.132501951482488, "learning_rate": 5.660735709460435e-07, "loss": 0.4316, "step": 29739 }, { "epoch": 0.85, "grad_norm": 4.778755584482038, "learning_rate": 5.658592493723641e-07, "loss": 0.5151, "step": 29740 }, { "epoch": 0.85, "grad_norm": 4.868346540116877, "learning_rate": 5.656449659450514e-07, "loss": 0.557, "step": 29741 }, { "epoch": 0.85, "grad_norm": 9.36614649749184, "learning_rate": 5.654307206659482e-07, "loss": 0.695, "step": 29742 }, { "epoch": 0.85, "grad_norm": 4.547001499369575, "learning_rate": 5.652165135368992e-07, "loss": 0.6827, "step": 29743 }, { "epoch": 0.85, "grad_norm": 3.221045883566936, "learning_rate": 5.650023445597458e-07, "loss": 0.138, "step": 29744 }, { "epoch": 0.85, "grad_norm": 4.430597201795706, "learning_rate": 5.64788213736332e-07, "loss": 0.1525, "step": 29745 }, { "epoch": 0.85, "grad_norm": 6.024887207368386, "learning_rate": 5.645741210684985e-07, "loss": 0.6336, "step": 29746 }, { "epoch": 0.85, "grad_norm": 8.782305019439246, "learning_rate": 5.643600665580884e-07, "loss": 0.4548, "step": 29747 }, { "epoch": 0.85, "grad_norm": 7.574718472628221, "learning_rate": 5.641460502069423e-07, "loss": 0.7956, "step": 29748 }, { "epoch": 0.85, "grad_norm": 5.699804494248842, "learning_rate": 5.639320720169022e-07, "loss": 0.3524, "step": 29749 }, { "epoch": 0.85, "grad_norm": 6.299307000934039, "learning_rate": 5.637181319898071e-07, "loss": 0.4579, "step": 29750 }, { "epoch": 0.85, "grad_norm": 5.1542622520610895, "learning_rate": 5.635042301274984e-07, "loss": 0.6916, "step": 29751 }, { "epoch": 0.85, "grad_norm": 4.4880173940100505, "learning_rate": 5.632903664318179e-07, "loss": 0.4408, "step": 29752 }, { "epoch": 0.85, "grad_norm": 4.317114103109609, "learning_rate": 5.63076540904603e-07, "loss": 0.3082, "step": 29753 }, { "epoch": 0.85, "grad_norm": 6.76564135119993, "learning_rate": 5.628627535476949e-07, "loss": 0.3457, "step": 29754 }, { "epoch": 0.85, "grad_norm": 4.274373204866342, "learning_rate": 5.626490043629329e-07, "loss": 0.516, "step": 29755 }, { "epoch": 0.85, "grad_norm": 8.198228402776529, "learning_rate": 5.624352933521548e-07, "loss": 0.4328, "step": 29756 }, { "epoch": 0.85, "grad_norm": 3.6483314057634493, "learning_rate": 5.622216205171987e-07, "loss": 0.3183, "step": 29757 }, { "epoch": 0.85, "grad_norm": 7.586700055407021, "learning_rate": 5.620079858599037e-07, "loss": 0.8307, "step": 29758 }, { "epoch": 0.85, "grad_norm": 4.4351949211989465, "learning_rate": 5.617943893821081e-07, "loss": 0.3113, "step": 29759 }, { "epoch": 0.85, "grad_norm": 3.2002540934533297, "learning_rate": 5.61580831085648e-07, "loss": 0.238, "step": 29760 }, { "epoch": 0.85, "grad_norm": 8.652601774686817, "learning_rate": 5.613673109723627e-07, "loss": 0.4456, "step": 29761 }, { "epoch": 0.85, "grad_norm": 9.427823493240748, "learning_rate": 5.611538290440882e-07, "loss": 0.7132, "step": 29762 }, { "epoch": 0.85, "grad_norm": 5.807766412280669, "learning_rate": 5.609403853026607e-07, "loss": 0.1988, "step": 29763 }, { "epoch": 0.85, "grad_norm": 5.151321552671089, "learning_rate": 5.607269797499155e-07, "loss": 0.5533, "step": 29764 }, { "epoch": 0.85, "grad_norm": 7.771183319664753, "learning_rate": 5.605136123876903e-07, "loss": 0.8445, "step": 29765 }, { "epoch": 0.85, "grad_norm": 4.867634172593695, "learning_rate": 5.603002832178189e-07, "loss": 0.5206, "step": 29766 }, { "epoch": 0.85, "grad_norm": 8.507072283417875, "learning_rate": 5.600869922421376e-07, "loss": 0.4886, "step": 29767 }, { "epoch": 0.85, "grad_norm": 5.6869747212738435, "learning_rate": 5.598737394624831e-07, "loss": 0.1677, "step": 29768 }, { "epoch": 0.85, "grad_norm": 7.437747165836463, "learning_rate": 5.596605248806864e-07, "loss": 0.5209, "step": 29769 }, { "epoch": 0.85, "grad_norm": 10.389490556095508, "learning_rate": 5.59447348498584e-07, "loss": 0.4865, "step": 29770 }, { "epoch": 0.85, "grad_norm": 8.475098992503819, "learning_rate": 5.592342103180088e-07, "loss": 0.5076, "step": 29771 }, { "epoch": 0.85, "grad_norm": 4.462519430337273, "learning_rate": 5.590211103407961e-07, "loss": 0.462, "step": 29772 }, { "epoch": 0.85, "grad_norm": 5.461823987704044, "learning_rate": 5.588080485687769e-07, "loss": 0.5097, "step": 29773 }, { "epoch": 0.85, "grad_norm": 3.7314478602896095, "learning_rate": 5.585950250037858e-07, "loss": 0.4066, "step": 29774 }, { "epoch": 0.85, "grad_norm": 4.695569125000058, "learning_rate": 5.583820396476552e-07, "loss": 0.3402, "step": 29775 }, { "epoch": 0.85, "grad_norm": 5.939889085870839, "learning_rate": 5.581690925022159e-07, "loss": 0.6412, "step": 29776 }, { "epoch": 0.85, "grad_norm": 4.809525697762615, "learning_rate": 5.57956183569302e-07, "loss": 0.1992, "step": 29777 }, { "epoch": 0.85, "grad_norm": 6.584020224682694, "learning_rate": 5.577433128507431e-07, "loss": 0.3988, "step": 29778 }, { "epoch": 0.85, "grad_norm": 6.07470626591884, "learning_rate": 5.575304803483728e-07, "loss": 0.6438, "step": 29779 }, { "epoch": 0.85, "grad_norm": 7.019610863575372, "learning_rate": 5.573176860640195e-07, "loss": 0.3956, "step": 29780 }, { "epoch": 0.85, "grad_norm": 2.386950433227831, "learning_rate": 5.571049299995168e-07, "loss": 0.0856, "step": 29781 }, { "epoch": 0.85, "grad_norm": 10.952160541121266, "learning_rate": 5.568922121566928e-07, "loss": 0.446, "step": 29782 }, { "epoch": 0.85, "grad_norm": 3.6909357810571564, "learning_rate": 5.56679532537377e-07, "loss": 0.49, "step": 29783 }, { "epoch": 0.85, "grad_norm": 5.48009534829438, "learning_rate": 5.564668911434013e-07, "loss": 0.1206, "step": 29784 }, { "epoch": 0.85, "grad_norm": 3.7789338008940176, "learning_rate": 5.562542879765925e-07, "loss": 0.2179, "step": 29785 }, { "epoch": 0.85, "grad_norm": 5.110252958221572, "learning_rate": 5.560417230387821e-07, "loss": 0.3922, "step": 29786 }, { "epoch": 0.85, "grad_norm": 6.0292297300227435, "learning_rate": 5.558291963317969e-07, "loss": 0.4311, "step": 29787 }, { "epoch": 0.85, "grad_norm": 8.233665710823926, "learning_rate": 5.55616707857467e-07, "loss": 0.5187, "step": 29788 }, { "epoch": 0.85, "grad_norm": 2.807830324790389, "learning_rate": 5.554042576176188e-07, "loss": 0.2327, "step": 29789 }, { "epoch": 0.85, "grad_norm": 5.2890860304879554, "learning_rate": 5.551918456140809e-07, "loss": 0.3825, "step": 29790 }, { "epoch": 0.85, "grad_norm": 6.408369913811155, "learning_rate": 5.54979471848679e-07, "loss": 0.9574, "step": 29791 }, { "epoch": 0.85, "grad_norm": 7.245456850589538, "learning_rate": 5.54767136323242e-07, "loss": 0.3996, "step": 29792 }, { "epoch": 0.85, "grad_norm": 4.814073008297078, "learning_rate": 5.545548390395966e-07, "loss": 0.4165, "step": 29793 }, { "epoch": 0.85, "grad_norm": 5.288570183925095, "learning_rate": 5.543425799995677e-07, "loss": 0.4284, "step": 29794 }, { "epoch": 0.85, "grad_norm": 6.502934453721598, "learning_rate": 5.54130359204983e-07, "loss": 0.1535, "step": 29795 }, { "epoch": 0.85, "grad_norm": 5.702576535123972, "learning_rate": 5.539181766576674e-07, "loss": 0.5129, "step": 29796 }, { "epoch": 0.85, "grad_norm": 7.094437149981429, "learning_rate": 5.537060323594467e-07, "loss": 0.3871, "step": 29797 }, { "epoch": 0.85, "grad_norm": 4.610508071515057, "learning_rate": 5.534939263121442e-07, "loss": 0.3234, "step": 29798 }, { "epoch": 0.85, "grad_norm": 4.109180590886357, "learning_rate": 5.532818585175865e-07, "loss": 0.4557, "step": 29799 }, { "epoch": 0.85, "grad_norm": 3.067142325334378, "learning_rate": 5.530698289775976e-07, "loss": 0.1766, "step": 29800 }, { "epoch": 0.85, "grad_norm": 2.8842684989601137, "learning_rate": 5.528578376940024e-07, "loss": 0.3946, "step": 29801 }, { "epoch": 0.85, "grad_norm": 1.439856825886641, "learning_rate": 5.526458846686234e-07, "loss": 0.1429, "step": 29802 }, { "epoch": 0.85, "grad_norm": 5.704438525962426, "learning_rate": 5.524339699032827e-07, "loss": 0.4911, "step": 29803 }, { "epoch": 0.85, "grad_norm": 4.259382549903319, "learning_rate": 5.522220933998063e-07, "loss": 0.3004, "step": 29804 }, { "epoch": 0.85, "grad_norm": 4.193046654108474, "learning_rate": 5.520102551600148e-07, "loss": 0.2633, "step": 29805 }, { "epoch": 0.85, "grad_norm": 4.74930820948738, "learning_rate": 5.517984551857325e-07, "loss": 0.5266, "step": 29806 }, { "epoch": 0.85, "grad_norm": 2.742247730256622, "learning_rate": 5.515866934787789e-07, "loss": 0.2873, "step": 29807 }, { "epoch": 0.85, "grad_norm": 10.172389285539081, "learning_rate": 5.513749700409787e-07, "loss": 1.2619, "step": 29808 }, { "epoch": 0.85, "grad_norm": 7.367033195295508, "learning_rate": 5.511632848741516e-07, "loss": 0.4683, "step": 29809 }, { "epoch": 0.85, "grad_norm": 6.471922939386386, "learning_rate": 5.509516379801183e-07, "loss": 0.4054, "step": 29810 }, { "epoch": 0.85, "grad_norm": 4.382889201263171, "learning_rate": 5.507400293607013e-07, "loss": 0.2591, "step": 29811 }, { "epoch": 0.85, "grad_norm": 2.977915859184051, "learning_rate": 5.50528459017719e-07, "loss": 0.2927, "step": 29812 }, { "epoch": 0.85, "grad_norm": 5.04015155231349, "learning_rate": 5.503169269529934e-07, "loss": 0.4038, "step": 29813 }, { "epoch": 0.85, "grad_norm": 4.915484151501717, "learning_rate": 5.501054331683426e-07, "loss": 0.2877, "step": 29814 }, { "epoch": 0.85, "grad_norm": 4.326145862647531, "learning_rate": 5.498939776655876e-07, "loss": 0.2844, "step": 29815 }, { "epoch": 0.85, "grad_norm": 5.495682495607738, "learning_rate": 5.496825604465466e-07, "loss": 0.2228, "step": 29816 }, { "epoch": 0.85, "grad_norm": 2.848556025806993, "learning_rate": 5.494711815130383e-07, "loss": 0.172, "step": 29817 }, { "epoch": 0.85, "grad_norm": 7.109040103611963, "learning_rate": 5.49259840866882e-07, "loss": 0.6004, "step": 29818 }, { "epoch": 0.85, "grad_norm": 6.421704997761699, "learning_rate": 5.490485385098943e-07, "loss": 0.5526, "step": 29819 }, { "epoch": 0.85, "grad_norm": 2.4756064269292004, "learning_rate": 5.488372744438952e-07, "loss": 0.2276, "step": 29820 }, { "epoch": 0.85, "grad_norm": 5.325880814178851, "learning_rate": 5.486260486706996e-07, "loss": 0.3621, "step": 29821 }, { "epoch": 0.85, "grad_norm": 3.559260819825076, "learning_rate": 5.484148611921275e-07, "loss": 0.3516, "step": 29822 }, { "epoch": 0.85, "grad_norm": 7.193870325349897, "learning_rate": 5.482037120099943e-07, "loss": 0.5045, "step": 29823 }, { "epoch": 0.85, "grad_norm": 8.63440589526486, "learning_rate": 5.47992601126115e-07, "loss": 0.3622, "step": 29824 }, { "epoch": 0.85, "grad_norm": 7.839597780848104, "learning_rate": 5.477815285423083e-07, "loss": 0.4325, "step": 29825 }, { "epoch": 0.85, "grad_norm": 5.958027920753899, "learning_rate": 5.475704942603883e-07, "loss": 0.6615, "step": 29826 }, { "epoch": 0.85, "grad_norm": 8.497899188558105, "learning_rate": 5.473594982821717e-07, "loss": 0.2324, "step": 29827 }, { "epoch": 0.85, "grad_norm": 3.7604424835896864, "learning_rate": 5.471485406094734e-07, "loss": 0.4142, "step": 29828 }, { "epoch": 0.85, "grad_norm": 6.7029438483384505, "learning_rate": 5.46937621244108e-07, "loss": 0.379, "step": 29829 }, { "epoch": 0.85, "grad_norm": 5.118609584610467, "learning_rate": 5.467267401878884e-07, "loss": 0.5423, "step": 29830 }, { "epoch": 0.85, "grad_norm": 5.2223983621088665, "learning_rate": 5.465158974426321e-07, "loss": 0.1997, "step": 29831 }, { "epoch": 0.85, "grad_norm": 5.467276325578125, "learning_rate": 5.463050930101499e-07, "loss": 0.4687, "step": 29832 }, { "epoch": 0.85, "grad_norm": 7.1731503086187045, "learning_rate": 5.460943268922564e-07, "loss": 0.3493, "step": 29833 }, { "epoch": 0.85, "grad_norm": 2.8392186795296026, "learning_rate": 5.458835990907663e-07, "loss": 0.2596, "step": 29834 }, { "epoch": 0.85, "grad_norm": 5.40666108966419, "learning_rate": 5.456729096074914e-07, "loss": 0.6595, "step": 29835 }, { "epoch": 0.85, "grad_norm": 5.354982494535782, "learning_rate": 5.454622584442437e-07, "loss": 0.0888, "step": 29836 }, { "epoch": 0.85, "grad_norm": 5.868891380971473, "learning_rate": 5.452516456028345e-07, "loss": 0.4901, "step": 29837 }, { "epoch": 0.85, "grad_norm": 7.753353992849013, "learning_rate": 5.450410710850784e-07, "loss": 0.6996, "step": 29838 }, { "epoch": 0.85, "grad_norm": 3.7451891159849375, "learning_rate": 5.44830534892784e-07, "loss": 0.2661, "step": 29839 }, { "epoch": 0.85, "grad_norm": 3.980256109575732, "learning_rate": 5.446200370277644e-07, "loss": 0.3255, "step": 29840 }, { "epoch": 0.85, "grad_norm": 4.04940716678439, "learning_rate": 5.444095774918307e-07, "loss": 0.2013, "step": 29841 }, { "epoch": 0.85, "grad_norm": 6.653098779772708, "learning_rate": 5.441991562867932e-07, "loss": 0.5076, "step": 29842 }, { "epoch": 0.85, "grad_norm": 4.506957609408522, "learning_rate": 5.439887734144616e-07, "loss": 0.3211, "step": 29843 }, { "epoch": 0.85, "grad_norm": 6.175960955577925, "learning_rate": 5.437784288766451e-07, "loss": 0.5613, "step": 29844 }, { "epoch": 0.85, "grad_norm": 2.672887955885775, "learning_rate": 5.435681226751549e-07, "loss": 0.1545, "step": 29845 }, { "epoch": 0.85, "grad_norm": 11.51406166510463, "learning_rate": 5.433578548117985e-07, "loss": 0.1066, "step": 29846 }, { "epoch": 0.85, "grad_norm": 5.637851360731164, "learning_rate": 5.431476252883866e-07, "loss": 0.4345, "step": 29847 }, { "epoch": 0.85, "grad_norm": 2.5886011609328095, "learning_rate": 5.429374341067261e-07, "loss": 0.3277, "step": 29848 }, { "epoch": 0.85, "grad_norm": 4.443793655803438, "learning_rate": 5.427272812686268e-07, "loss": 0.5066, "step": 29849 }, { "epoch": 0.85, "grad_norm": 3.982062891878772, "learning_rate": 5.425171667758961e-07, "loss": 0.2521, "step": 29850 }, { "epoch": 0.85, "grad_norm": 6.907553433278222, "learning_rate": 5.423070906303407e-07, "loss": 0.3165, "step": 29851 }, { "epoch": 0.85, "grad_norm": 3.6056603807501113, "learning_rate": 5.420970528337693e-07, "loss": 0.4048, "step": 29852 }, { "epoch": 0.85, "grad_norm": 8.342417349813203, "learning_rate": 5.418870533879871e-07, "loss": 0.7262, "step": 29853 }, { "epoch": 0.85, "grad_norm": 7.326751645364511, "learning_rate": 5.416770922948028e-07, "loss": 0.5877, "step": 29854 }, { "epoch": 0.85, "grad_norm": 4.343744586694212, "learning_rate": 5.414671695560203e-07, "loss": 0.1328, "step": 29855 }, { "epoch": 0.86, "grad_norm": 3.0682378364837226, "learning_rate": 5.412572851734488e-07, "loss": 0.1705, "step": 29856 }, { "epoch": 0.86, "grad_norm": 6.906604740473982, "learning_rate": 5.410474391488901e-07, "loss": 0.4624, "step": 29857 }, { "epoch": 0.86, "grad_norm": 5.982563391994045, "learning_rate": 5.408376314841512e-07, "loss": 0.56, "step": 29858 }, { "epoch": 0.86, "grad_norm": 8.034951125342294, "learning_rate": 5.40627862181038e-07, "loss": 0.416, "step": 29859 }, { "epoch": 0.86, "grad_norm": 4.027239971137272, "learning_rate": 5.404181312413531e-07, "loss": 0.4288, "step": 29860 }, { "epoch": 0.86, "grad_norm": 6.4598121078082, "learning_rate": 5.402084386669037e-07, "loss": 0.4436, "step": 29861 }, { "epoch": 0.86, "grad_norm": 4.534350222540263, "learning_rate": 5.399987844594912e-07, "loss": 0.1923, "step": 29862 }, { "epoch": 0.86, "grad_norm": 1.9249733861099083, "learning_rate": 5.397891686209206e-07, "loss": 0.1297, "step": 29863 }, { "epoch": 0.86, "grad_norm": 5.0829502128100525, "learning_rate": 5.395795911529933e-07, "loss": 0.5138, "step": 29864 }, { "epoch": 0.86, "grad_norm": 6.283058020161583, "learning_rate": 5.393700520575135e-07, "loss": 0.6156, "step": 29865 }, { "epoch": 0.86, "grad_norm": 5.475084265839942, "learning_rate": 5.391605513362852e-07, "loss": 0.3739, "step": 29866 }, { "epoch": 0.86, "grad_norm": 6.816710684462416, "learning_rate": 5.389510889911081e-07, "loss": 0.6789, "step": 29867 }, { "epoch": 0.86, "grad_norm": 3.992860920166054, "learning_rate": 5.38741665023787e-07, "loss": 0.2053, "step": 29868 }, { "epoch": 0.86, "grad_norm": 9.988105375541446, "learning_rate": 5.385322794361219e-07, "loss": 0.5669, "step": 29869 }, { "epoch": 0.86, "grad_norm": 3.6832080206260467, "learning_rate": 5.383229322299138e-07, "loss": 0.4704, "step": 29870 }, { "epoch": 0.86, "grad_norm": 1.7821126154177427, "learning_rate": 5.381136234069639e-07, "loss": 0.127, "step": 29871 }, { "epoch": 0.86, "grad_norm": 5.313143074158369, "learning_rate": 5.379043529690736e-07, "loss": 0.427, "step": 29872 }, { "epoch": 0.86, "grad_norm": 11.784196439658267, "learning_rate": 5.37695120918042e-07, "loss": 0.6568, "step": 29873 }, { "epoch": 0.86, "grad_norm": 7.120574949013788, "learning_rate": 5.374859272556698e-07, "loss": 0.5564, "step": 29874 }, { "epoch": 0.86, "grad_norm": 9.581374545945756, "learning_rate": 5.37276771983758e-07, "loss": 0.7193, "step": 29875 }, { "epoch": 0.86, "grad_norm": 3.41664266965549, "learning_rate": 5.370676551041043e-07, "loss": 0.3352, "step": 29876 }, { "epoch": 0.86, "grad_norm": 4.825932915582933, "learning_rate": 5.368585766185086e-07, "loss": 0.3361, "step": 29877 }, { "epoch": 0.86, "grad_norm": 5.208834016258288, "learning_rate": 5.366495365287677e-07, "loss": 0.37, "step": 29878 }, { "epoch": 0.86, "grad_norm": 4.970397292286006, "learning_rate": 5.364405348366819e-07, "loss": 0.4437, "step": 29879 }, { "epoch": 0.86, "grad_norm": 5.149733338350223, "learning_rate": 5.36231571544048e-07, "loss": 0.2904, "step": 29880 }, { "epoch": 0.86, "grad_norm": 5.14814935783801, "learning_rate": 5.360226466526646e-07, "loss": 0.2273, "step": 29881 }, { "epoch": 0.86, "grad_norm": 5.128096691660183, "learning_rate": 5.358137601643293e-07, "loss": 0.2961, "step": 29882 }, { "epoch": 0.86, "grad_norm": 5.345486632016662, "learning_rate": 5.356049120808387e-07, "loss": 0.1993, "step": 29883 }, { "epoch": 0.86, "grad_norm": 11.403472499513017, "learning_rate": 5.353961024039894e-07, "loss": 0.5906, "step": 29884 }, { "epoch": 0.86, "grad_norm": 8.340802018369178, "learning_rate": 5.351873311355765e-07, "loss": 0.897, "step": 29885 }, { "epoch": 0.86, "grad_norm": 4.8973128687191, "learning_rate": 5.349785982773987e-07, "loss": 0.2771, "step": 29886 }, { "epoch": 0.86, "grad_norm": 2.3283560337410925, "learning_rate": 5.347699038312487e-07, "loss": 0.1441, "step": 29887 }, { "epoch": 0.86, "grad_norm": 6.701796677941176, "learning_rate": 5.345612477989248e-07, "loss": 0.5398, "step": 29888 }, { "epoch": 0.86, "grad_norm": 8.225030682167406, "learning_rate": 5.343526301822211e-07, "loss": 0.6923, "step": 29889 }, { "epoch": 0.86, "grad_norm": 3.4415827001222863, "learning_rate": 5.341440509829304e-07, "loss": 0.3801, "step": 29890 }, { "epoch": 0.86, "grad_norm": 5.136146194400538, "learning_rate": 5.339355102028493e-07, "loss": 0.5216, "step": 29891 }, { "epoch": 0.86, "grad_norm": 5.907299372381509, "learning_rate": 5.337270078437706e-07, "loss": 0.3649, "step": 29892 }, { "epoch": 0.86, "grad_norm": 7.035121411412558, "learning_rate": 5.335185439074897e-07, "loss": 0.5372, "step": 29893 }, { "epoch": 0.86, "grad_norm": 3.2745616320979734, "learning_rate": 5.333101183957972e-07, "loss": 0.3534, "step": 29894 }, { "epoch": 0.86, "grad_norm": 4.9760626956632965, "learning_rate": 5.331017313104891e-07, "loss": 0.73, "step": 29895 }, { "epoch": 0.86, "grad_norm": 6.250174748838766, "learning_rate": 5.328933826533572e-07, "loss": 0.4857, "step": 29896 }, { "epoch": 0.86, "grad_norm": 4.816023490404905, "learning_rate": 5.326850724261934e-07, "loss": 0.5401, "step": 29897 }, { "epoch": 0.86, "grad_norm": 5.892658398732307, "learning_rate": 5.324768006307884e-07, "loss": 0.6784, "step": 29898 }, { "epoch": 0.86, "grad_norm": 6.329337524291064, "learning_rate": 5.322685672689359e-07, "loss": 0.3311, "step": 29899 }, { "epoch": 0.86, "grad_norm": 6.710507113741239, "learning_rate": 5.320603723424278e-07, "loss": 0.5586, "step": 29900 }, { "epoch": 0.86, "grad_norm": 4.092166273973087, "learning_rate": 5.318522158530526e-07, "loss": 0.4444, "step": 29901 }, { "epoch": 0.86, "grad_norm": 4.285432429354986, "learning_rate": 5.316440978026044e-07, "loss": 0.3379, "step": 29902 }, { "epoch": 0.86, "grad_norm": 3.970606857611425, "learning_rate": 5.314360181928718e-07, "loss": 0.2276, "step": 29903 }, { "epoch": 0.86, "grad_norm": 7.728596680866605, "learning_rate": 5.312279770256445e-07, "loss": 0.2934, "step": 29904 }, { "epoch": 0.86, "grad_norm": 5.422935623954536, "learning_rate": 5.310199743027122e-07, "loss": 0.344, "step": 29905 }, { "epoch": 0.86, "grad_norm": 7.2043240225709155, "learning_rate": 5.308120100258646e-07, "loss": 0.6341, "step": 29906 }, { "epoch": 0.86, "grad_norm": 3.9450911355944305, "learning_rate": 5.306040841968918e-07, "loss": 0.1572, "step": 29907 }, { "epoch": 0.86, "grad_norm": 6.187526356034067, "learning_rate": 5.303961968175808e-07, "loss": 0.2158, "step": 29908 }, { "epoch": 0.86, "grad_norm": 7.106940018058975, "learning_rate": 5.301883478897223e-07, "loss": 0.5362, "step": 29909 }, { "epoch": 0.86, "grad_norm": 6.902038118410263, "learning_rate": 5.299805374151029e-07, "loss": 0.5633, "step": 29910 }, { "epoch": 0.86, "grad_norm": 3.820525118600092, "learning_rate": 5.297727653955109e-07, "loss": 0.2681, "step": 29911 }, { "epoch": 0.86, "grad_norm": 6.665749053587605, "learning_rate": 5.295650318327322e-07, "loss": 0.4842, "step": 29912 }, { "epoch": 0.86, "grad_norm": 8.130849317314897, "learning_rate": 5.293573367285559e-07, "loss": 0.624, "step": 29913 }, { "epoch": 0.86, "grad_norm": 7.62483465296838, "learning_rate": 5.291496800847673e-07, "loss": 0.5519, "step": 29914 }, { "epoch": 0.86, "grad_norm": 5.517121898052598, "learning_rate": 5.289420619031533e-07, "loss": 0.2962, "step": 29915 }, { "epoch": 0.86, "grad_norm": 8.691749631120622, "learning_rate": 5.287344821855023e-07, "loss": 0.1979, "step": 29916 }, { "epoch": 0.86, "grad_norm": 6.242387732587438, "learning_rate": 5.285269409335963e-07, "loss": 0.4771, "step": 29917 }, { "epoch": 0.86, "grad_norm": 4.663223932431322, "learning_rate": 5.283194381492229e-07, "loss": 0.4543, "step": 29918 }, { "epoch": 0.86, "grad_norm": 4.438998063508342, "learning_rate": 5.281119738341667e-07, "loss": 0.3553, "step": 29919 }, { "epoch": 0.86, "grad_norm": 5.082784516251344, "learning_rate": 5.279045479902129e-07, "loss": 0.1814, "step": 29920 }, { "epoch": 0.86, "grad_norm": 5.137063784306716, "learning_rate": 5.27697160619145e-07, "loss": 0.2629, "step": 29921 }, { "epoch": 0.86, "grad_norm": 3.2693146664655424, "learning_rate": 5.274898117227484e-07, "loss": 0.5494, "step": 29922 }, { "epoch": 0.86, "grad_norm": 5.757972083604085, "learning_rate": 5.272825013028065e-07, "loss": 0.6609, "step": 29923 }, { "epoch": 0.86, "grad_norm": 3.9527660101325726, "learning_rate": 5.270752293611014e-07, "loss": 0.521, "step": 29924 }, { "epoch": 0.86, "grad_norm": 8.82605644905266, "learning_rate": 5.268679958994188e-07, "loss": 0.7787, "step": 29925 }, { "epoch": 0.86, "grad_norm": 4.402830745159129, "learning_rate": 5.266608009195389e-07, "loss": 0.264, "step": 29926 }, { "epoch": 0.86, "grad_norm": 10.329319871815482, "learning_rate": 5.26453644423246e-07, "loss": 0.6834, "step": 29927 }, { "epoch": 0.86, "grad_norm": 5.623719175533652, "learning_rate": 5.262465264123212e-07, "loss": 0.3031, "step": 29928 }, { "epoch": 0.86, "grad_norm": 5.469453210577724, "learning_rate": 5.260394468885472e-07, "loss": 0.4134, "step": 29929 }, { "epoch": 0.86, "grad_norm": 4.723238143221944, "learning_rate": 5.258324058537051e-07, "loss": 0.3992, "step": 29930 }, { "epoch": 0.86, "grad_norm": 8.551510699139834, "learning_rate": 5.256254033095748e-07, "loss": 0.6191, "step": 29931 }, { "epoch": 0.86, "grad_norm": 4.31378047011565, "learning_rate": 5.254184392579398e-07, "loss": 0.2115, "step": 29932 }, { "epoch": 0.86, "grad_norm": 5.0872180457562814, "learning_rate": 5.252115137005776e-07, "loss": 0.4431, "step": 29933 }, { "epoch": 0.86, "grad_norm": 6.123440427163786, "learning_rate": 5.250046266392711e-07, "loss": 0.3517, "step": 29934 }, { "epoch": 0.86, "grad_norm": 7.105008152843633, "learning_rate": 5.247977780757973e-07, "loss": 0.3056, "step": 29935 }, { "epoch": 0.86, "grad_norm": 5.411299183340682, "learning_rate": 5.245909680119388e-07, "loss": 0.328, "step": 29936 }, { "epoch": 0.86, "grad_norm": 4.549129051910869, "learning_rate": 5.243841964494728e-07, "loss": 0.4622, "step": 29937 }, { "epoch": 0.86, "grad_norm": 8.032199432419988, "learning_rate": 5.241774633901786e-07, "loss": 0.3545, "step": 29938 }, { "epoch": 0.86, "grad_norm": 7.298113699418577, "learning_rate": 5.239707688358336e-07, "loss": 0.9728, "step": 29939 }, { "epoch": 0.86, "grad_norm": 4.417318550921261, "learning_rate": 5.23764112788217e-07, "loss": 0.4149, "step": 29940 }, { "epoch": 0.86, "grad_norm": 5.33265779607828, "learning_rate": 5.235574952491073e-07, "loss": 0.3653, "step": 29941 }, { "epoch": 0.86, "grad_norm": 5.1043528101188755, "learning_rate": 5.233509162202811e-07, "loss": 0.2463, "step": 29942 }, { "epoch": 0.86, "grad_norm": 5.360062090812998, "learning_rate": 5.23144375703517e-07, "loss": 0.5226, "step": 29943 }, { "epoch": 0.86, "grad_norm": 18.871453665742123, "learning_rate": 5.229378737005892e-07, "loss": 0.7123, "step": 29944 }, { "epoch": 0.86, "grad_norm": 3.7713715957697547, "learning_rate": 5.227314102132763e-07, "loss": 0.3741, "step": 29945 }, { "epoch": 0.86, "grad_norm": 7.636854243722881, "learning_rate": 5.225249852433529e-07, "loss": 0.2802, "step": 29946 }, { "epoch": 0.86, "grad_norm": 1.1121537622772693, "learning_rate": 5.223185987925955e-07, "loss": 0.0929, "step": 29947 }, { "epoch": 0.86, "grad_norm": 2.8808617336457507, "learning_rate": 5.221122508627813e-07, "loss": 0.2197, "step": 29948 }, { "epoch": 0.86, "grad_norm": 3.5292946794900995, "learning_rate": 5.219059414556843e-07, "loss": 0.2743, "step": 29949 }, { "epoch": 0.86, "grad_norm": 2.786104444806622, "learning_rate": 5.216996705730787e-07, "loss": 0.263, "step": 29950 }, { "epoch": 0.86, "grad_norm": 6.411224666730602, "learning_rate": 5.214934382167381e-07, "loss": 0.4192, "step": 29951 }, { "epoch": 0.86, "grad_norm": 3.0468027155200104, "learning_rate": 5.212872443884398e-07, "loss": 0.2, "step": 29952 }, { "epoch": 0.86, "grad_norm": 6.4258173515441985, "learning_rate": 5.210810890899542e-07, "loss": 0.2362, "step": 29953 }, { "epoch": 0.86, "grad_norm": 4.8692839318711085, "learning_rate": 5.208749723230583e-07, "loss": 0.3438, "step": 29954 }, { "epoch": 0.86, "grad_norm": 5.256138187811303, "learning_rate": 5.206688940895221e-07, "loss": 0.4017, "step": 29955 }, { "epoch": 0.86, "grad_norm": 4.485039718222414, "learning_rate": 5.204628543911205e-07, "loss": 0.2436, "step": 29956 }, { "epoch": 0.86, "grad_norm": 5.117553604657738, "learning_rate": 5.20256853229626e-07, "loss": 0.5176, "step": 29957 }, { "epoch": 0.86, "grad_norm": 3.537130691742132, "learning_rate": 5.200508906068092e-07, "loss": 0.5307, "step": 29958 }, { "epoch": 0.86, "grad_norm": 10.439483454118712, "learning_rate": 5.198449665244437e-07, "loss": 0.3436, "step": 29959 }, { "epoch": 0.86, "grad_norm": 2.493979775209107, "learning_rate": 5.196390809842994e-07, "loss": 0.3562, "step": 29960 }, { "epoch": 0.86, "grad_norm": 6.564782899054526, "learning_rate": 5.194332339881497e-07, "loss": 0.6311, "step": 29961 }, { "epoch": 0.86, "grad_norm": 4.939766478337611, "learning_rate": 5.192274255377633e-07, "loss": 0.3239, "step": 29962 }, { "epoch": 0.86, "grad_norm": 4.669450928391501, "learning_rate": 5.190216556349126e-07, "loss": 0.5562, "step": 29963 }, { "epoch": 0.86, "grad_norm": 7.3256235631923925, "learning_rate": 5.188159242813667e-07, "loss": 0.5698, "step": 29964 }, { "epoch": 0.86, "grad_norm": 7.214046845265225, "learning_rate": 5.186102314788944e-07, "loss": 0.2298, "step": 29965 }, { "epoch": 0.86, "grad_norm": 6.172397407924455, "learning_rate": 5.184045772292679e-07, "loss": 0.4586, "step": 29966 }, { "epoch": 0.86, "grad_norm": 3.696299140946671, "learning_rate": 5.181989615342542e-07, "loss": 0.3747, "step": 29967 }, { "epoch": 0.86, "grad_norm": 5.214212557994262, "learning_rate": 5.179933843956237e-07, "loss": 0.412, "step": 29968 }, { "epoch": 0.86, "grad_norm": 8.150351426674552, "learning_rate": 5.177878458151431e-07, "loss": 0.5839, "step": 29969 }, { "epoch": 0.86, "grad_norm": 4.289161972810173, "learning_rate": 5.175823457945833e-07, "loss": 0.2222, "step": 29970 }, { "epoch": 0.86, "grad_norm": 9.76950977419171, "learning_rate": 5.173768843357102e-07, "loss": 0.4515, "step": 29971 }, { "epoch": 0.86, "grad_norm": 4.975832731436124, "learning_rate": 5.171714614402911e-07, "loss": 0.3374, "step": 29972 }, { "epoch": 0.86, "grad_norm": 5.2098854295430375, "learning_rate": 5.169660771100949e-07, "loss": 0.7194, "step": 29973 }, { "epoch": 0.86, "grad_norm": 6.100110182783254, "learning_rate": 5.16760731346887e-07, "loss": 0.4135, "step": 29974 }, { "epoch": 0.86, "grad_norm": 4.726762348080031, "learning_rate": 5.16555424152435e-07, "loss": 0.5476, "step": 29975 }, { "epoch": 0.86, "grad_norm": 6.891951971671587, "learning_rate": 5.16350155528505e-07, "loss": 0.2892, "step": 29976 }, { "epoch": 0.86, "grad_norm": 10.038204598785462, "learning_rate": 5.161449254768624e-07, "loss": 0.756, "step": 29977 }, { "epoch": 0.86, "grad_norm": 3.4560593785288876, "learning_rate": 5.159397339992717e-07, "loss": 0.3202, "step": 29978 }, { "epoch": 0.86, "grad_norm": 7.991173494361519, "learning_rate": 5.157345810975007e-07, "loss": 0.3478, "step": 29979 }, { "epoch": 0.86, "grad_norm": 3.4289722087754044, "learning_rate": 5.155294667733118e-07, "loss": 0.277, "step": 29980 }, { "epoch": 0.86, "grad_norm": 4.158482733446303, "learning_rate": 5.153243910284711e-07, "loss": 0.25, "step": 29981 }, { "epoch": 0.86, "grad_norm": 7.4437453070560915, "learning_rate": 5.151193538647431e-07, "loss": 0.657, "step": 29982 }, { "epoch": 0.86, "grad_norm": 5.998784895881063, "learning_rate": 5.149143552838915e-07, "loss": 0.3388, "step": 29983 }, { "epoch": 0.86, "grad_norm": 3.571375256719463, "learning_rate": 5.147093952876792e-07, "loss": 0.1381, "step": 29984 }, { "epoch": 0.86, "grad_norm": 3.5975804357171888, "learning_rate": 5.145044738778687e-07, "loss": 0.3454, "step": 29985 }, { "epoch": 0.86, "grad_norm": 4.558534518840172, "learning_rate": 5.142995910562249e-07, "loss": 0.2018, "step": 29986 }, { "epoch": 0.86, "grad_norm": 4.612628525594774, "learning_rate": 5.140947468245088e-07, "loss": 0.1358, "step": 29987 }, { "epoch": 0.86, "grad_norm": 5.5087128031915835, "learning_rate": 5.138899411844827e-07, "loss": 0.6278, "step": 29988 }, { "epoch": 0.86, "grad_norm": 4.120176298156429, "learning_rate": 5.136851741379107e-07, "loss": 0.2852, "step": 29989 }, { "epoch": 0.86, "grad_norm": 6.899043141928102, "learning_rate": 5.134804456865522e-07, "loss": 0.3998, "step": 29990 }, { "epoch": 0.86, "grad_norm": 8.82854837398279, "learning_rate": 5.132757558321694e-07, "loss": 0.6646, "step": 29991 }, { "epoch": 0.86, "grad_norm": 6.561044277222581, "learning_rate": 5.130711045765219e-07, "loss": 0.8464, "step": 29992 }, { "epoch": 0.86, "grad_norm": 8.558985302045725, "learning_rate": 5.128664919213721e-07, "loss": 0.5159, "step": 29993 }, { "epoch": 0.86, "grad_norm": 5.0357371158720055, "learning_rate": 5.126619178684783e-07, "loss": 0.4311, "step": 29994 }, { "epoch": 0.86, "grad_norm": 4.341428067514798, "learning_rate": 5.124573824196027e-07, "loss": 0.3996, "step": 29995 }, { "epoch": 0.86, "grad_norm": 4.997947748534644, "learning_rate": 5.122528855765029e-07, "loss": 0.3835, "step": 29996 }, { "epoch": 0.86, "grad_norm": 5.2260839588181796, "learning_rate": 5.120484273409398e-07, "loss": 0.4297, "step": 29997 }, { "epoch": 0.86, "grad_norm": 4.3850284625079, "learning_rate": 5.118440077146713e-07, "loss": 0.5586, "step": 29998 }, { "epoch": 0.86, "grad_norm": 5.429231012489535, "learning_rate": 5.116396266994555e-07, "loss": 0.5025, "step": 29999 }, { "epoch": 0.86, "grad_norm": 5.1690987917875635, "learning_rate": 5.114352842970521e-07, "loss": 0.6984, "step": 30000 }, { "epoch": 0.86, "grad_norm": 8.627066046900051, "learning_rate": 5.112309805092175e-07, "loss": 0.7241, "step": 30001 }, { "epoch": 0.86, "grad_norm": 3.140850742255067, "learning_rate": 5.110267153377108e-07, "loss": 0.2025, "step": 30002 }, { "epoch": 0.86, "grad_norm": 4.6022898126749565, "learning_rate": 5.108224887842889e-07, "loss": 0.2989, "step": 30003 }, { "epoch": 0.86, "grad_norm": 2.605087993086616, "learning_rate": 5.106183008507071e-07, "loss": 0.0937, "step": 30004 }, { "epoch": 0.86, "grad_norm": 6.728753914786753, "learning_rate": 5.104141515387245e-07, "loss": 0.4545, "step": 30005 }, { "epoch": 0.86, "grad_norm": 8.48739317432083, "learning_rate": 5.102100408500954e-07, "loss": 0.5646, "step": 30006 }, { "epoch": 0.86, "grad_norm": 5.2366180624224885, "learning_rate": 5.100059687865772e-07, "loss": 0.7259, "step": 30007 }, { "epoch": 0.86, "grad_norm": 8.275684026469174, "learning_rate": 5.098019353499239e-07, "loss": 0.7905, "step": 30008 }, { "epoch": 0.86, "grad_norm": 7.631427744701998, "learning_rate": 5.095979405418927e-07, "loss": 0.5973, "step": 30009 }, { "epoch": 0.86, "grad_norm": 15.479528629114965, "learning_rate": 5.093939843642375e-07, "loss": 0.5852, "step": 30010 }, { "epoch": 0.86, "grad_norm": 7.40141492667923, "learning_rate": 5.091900668187134e-07, "loss": 0.6205, "step": 30011 }, { "epoch": 0.86, "grad_norm": 2.4812606989055745, "learning_rate": 5.089861879070729e-07, "loss": 0.2854, "step": 30012 }, { "epoch": 0.86, "grad_norm": 9.130300053891805, "learning_rate": 5.08782347631071e-07, "loss": 0.6137, "step": 30013 }, { "epoch": 0.86, "grad_norm": 5.397433373877964, "learning_rate": 5.085785459924631e-07, "loss": 0.5573, "step": 30014 }, { "epoch": 0.86, "grad_norm": 5.122369230583034, "learning_rate": 5.083747829929997e-07, "loss": 0.1415, "step": 30015 }, { "epoch": 0.86, "grad_norm": 4.461539566911996, "learning_rate": 5.081710586344357e-07, "loss": 0.2402, "step": 30016 }, { "epoch": 0.86, "grad_norm": 3.5625106409817437, "learning_rate": 5.079673729185237e-07, "loss": 0.5115, "step": 30017 }, { "epoch": 0.86, "grad_norm": 5.9235689261831155, "learning_rate": 5.077637258470147e-07, "loss": 0.2591, "step": 30018 }, { "epoch": 0.86, "grad_norm": 4.369701501266941, "learning_rate": 5.07560117421661e-07, "loss": 0.4718, "step": 30019 }, { "epoch": 0.86, "grad_norm": 3.366426333183605, "learning_rate": 5.07356547644215e-07, "loss": 0.2744, "step": 30020 }, { "epoch": 0.86, "grad_norm": 3.9719994376173515, "learning_rate": 5.071530165164263e-07, "loss": 0.6966, "step": 30021 }, { "epoch": 0.86, "grad_norm": 6.733218826211016, "learning_rate": 5.069495240400473e-07, "loss": 0.566, "step": 30022 }, { "epoch": 0.86, "grad_norm": 5.148573368577787, "learning_rate": 5.067460702168292e-07, "loss": 0.3269, "step": 30023 }, { "epoch": 0.86, "grad_norm": 6.160616570244712, "learning_rate": 5.065426550485214e-07, "loss": 0.5846, "step": 30024 }, { "epoch": 0.86, "grad_norm": 7.4729706079432585, "learning_rate": 5.063392785368743e-07, "loss": 0.4335, "step": 30025 }, { "epoch": 0.86, "grad_norm": 4.76782200934547, "learning_rate": 5.061359406836358e-07, "loss": 0.5178, "step": 30026 }, { "epoch": 0.86, "grad_norm": 3.0702979400522494, "learning_rate": 5.059326414905575e-07, "loss": 0.1533, "step": 30027 }, { "epoch": 0.86, "grad_norm": 4.1246991481308735, "learning_rate": 5.057293809593861e-07, "loss": 0.3527, "step": 30028 }, { "epoch": 0.86, "grad_norm": 4.624697649583932, "learning_rate": 5.055261590918719e-07, "loss": 0.6145, "step": 30029 }, { "epoch": 0.86, "grad_norm": 1.9567198362821887, "learning_rate": 5.053229758897637e-07, "loss": 0.2381, "step": 30030 }, { "epoch": 0.86, "grad_norm": 6.231864481082969, "learning_rate": 5.051198313548083e-07, "loss": 0.6927, "step": 30031 }, { "epoch": 0.86, "grad_norm": 5.577459728415323, "learning_rate": 5.049167254887538e-07, "loss": 0.406, "step": 30032 }, { "epoch": 0.86, "grad_norm": 6.359603240395122, "learning_rate": 5.047136582933465e-07, "loss": 0.5913, "step": 30033 }, { "epoch": 0.86, "grad_norm": 5.003925832669893, "learning_rate": 5.045106297703344e-07, "loss": 0.3717, "step": 30034 }, { "epoch": 0.86, "grad_norm": 3.2710272170671875, "learning_rate": 5.043076399214636e-07, "loss": 0.3439, "step": 30035 }, { "epoch": 0.86, "grad_norm": 3.745408847796574, "learning_rate": 5.041046887484813e-07, "loss": 0.5024, "step": 30036 }, { "epoch": 0.86, "grad_norm": 3.708808432730402, "learning_rate": 5.039017762531329e-07, "loss": 0.1349, "step": 30037 }, { "epoch": 0.86, "grad_norm": 5.348821832041241, "learning_rate": 5.03698902437163e-07, "loss": 0.3537, "step": 30038 }, { "epoch": 0.86, "grad_norm": 4.842293009511694, "learning_rate": 5.034960673023187e-07, "loss": 0.6004, "step": 30039 }, { "epoch": 0.86, "grad_norm": 4.680130900932437, "learning_rate": 5.03293270850343e-07, "loss": 0.38, "step": 30040 }, { "epoch": 0.86, "grad_norm": 5.85525278172851, "learning_rate": 5.030905130829828e-07, "loss": 0.8129, "step": 30041 }, { "epoch": 0.86, "grad_norm": 5.552559381894838, "learning_rate": 5.028877940019805e-07, "loss": 0.2813, "step": 30042 }, { "epoch": 0.86, "grad_norm": 5.76099716581895, "learning_rate": 5.026851136090816e-07, "loss": 0.4922, "step": 30043 }, { "epoch": 0.86, "grad_norm": 6.3262397877886425, "learning_rate": 5.024824719060284e-07, "loss": 0.4664, "step": 30044 }, { "epoch": 0.86, "grad_norm": 4.06014705155582, "learning_rate": 5.022798688945651e-07, "loss": 0.265, "step": 30045 }, { "epoch": 0.86, "grad_norm": 5.437132768178468, "learning_rate": 5.020773045764337e-07, "loss": 0.3856, "step": 30046 }, { "epoch": 0.86, "grad_norm": 6.559166779286907, "learning_rate": 5.018747789533773e-07, "loss": 0.9, "step": 30047 }, { "epoch": 0.86, "grad_norm": 4.489657729639547, "learning_rate": 5.016722920271394e-07, "loss": 0.4404, "step": 30048 }, { "epoch": 0.86, "grad_norm": 3.9648524542651913, "learning_rate": 5.014698437994597e-07, "loss": 0.4081, "step": 30049 }, { "epoch": 0.86, "grad_norm": 7.519436636985944, "learning_rate": 5.012674342720819e-07, "loss": 0.414, "step": 30050 }, { "epoch": 0.86, "grad_norm": 4.483436991451663, "learning_rate": 5.010650634467467e-07, "loss": 0.4645, "step": 30051 }, { "epoch": 0.86, "grad_norm": 3.9055879260699724, "learning_rate": 5.008627313251952e-07, "loss": 0.2014, "step": 30052 }, { "epoch": 0.86, "grad_norm": 3.4179772251569034, "learning_rate": 5.006604379091662e-07, "loss": 0.5453, "step": 30053 }, { "epoch": 0.86, "grad_norm": 5.749137482014965, "learning_rate": 5.004581832004013e-07, "loss": 0.3928, "step": 30054 }, { "epoch": 0.86, "grad_norm": 6.8385424026833626, "learning_rate": 5.002559672006418e-07, "loss": 0.3686, "step": 30055 }, { "epoch": 0.86, "grad_norm": 5.725189210855092, "learning_rate": 5.000537899116253e-07, "loss": 0.3738, "step": 30056 }, { "epoch": 0.86, "grad_norm": 7.629895999144659, "learning_rate": 4.998516513350931e-07, "loss": 0.4877, "step": 30057 }, { "epoch": 0.86, "grad_norm": 4.538935595525897, "learning_rate": 4.996495514727834e-07, "loss": 0.4208, "step": 30058 }, { "epoch": 0.86, "grad_norm": 4.140557803202532, "learning_rate": 4.99447490326434e-07, "loss": 0.2062, "step": 30059 }, { "epoch": 0.86, "grad_norm": 4.90863491820054, "learning_rate": 4.992454678977826e-07, "loss": 0.194, "step": 30060 }, { "epoch": 0.86, "grad_norm": 5.297274999228372, "learning_rate": 4.990434841885694e-07, "loss": 0.6362, "step": 30061 }, { "epoch": 0.86, "grad_norm": 4.384525610253101, "learning_rate": 4.988415392005303e-07, "loss": 0.3857, "step": 30062 }, { "epoch": 0.86, "grad_norm": 5.939061812732218, "learning_rate": 4.986396329354037e-07, "loss": 0.2082, "step": 30063 }, { "epoch": 0.86, "grad_norm": 5.603252586873729, "learning_rate": 4.984377653949268e-07, "loss": 0.348, "step": 30064 }, { "epoch": 0.86, "grad_norm": 6.171554866165817, "learning_rate": 4.982359365808343e-07, "loss": 0.4862, "step": 30065 }, { "epoch": 0.86, "grad_norm": 2.43666615039526, "learning_rate": 4.980341464948646e-07, "loss": 0.2758, "step": 30066 }, { "epoch": 0.86, "grad_norm": 3.1711736194883695, "learning_rate": 4.978323951387515e-07, "loss": 0.2944, "step": 30067 }, { "epoch": 0.86, "grad_norm": 2.6815931923068583, "learning_rate": 4.976306825142335e-07, "loss": 0.3336, "step": 30068 }, { "epoch": 0.86, "grad_norm": 4.656963831313237, "learning_rate": 4.974290086230432e-07, "loss": 0.4662, "step": 30069 }, { "epoch": 0.86, "grad_norm": 9.976484783828809, "learning_rate": 4.972273734669181e-07, "loss": 0.6057, "step": 30070 }, { "epoch": 0.86, "grad_norm": 4.884959196078415, "learning_rate": 4.970257770475912e-07, "loss": 0.6098, "step": 30071 }, { "epoch": 0.86, "grad_norm": 5.841136858101724, "learning_rate": 4.96824219366796e-07, "loss": 0.4134, "step": 30072 }, { "epoch": 0.86, "grad_norm": 4.99814113395389, "learning_rate": 4.966227004262686e-07, "loss": 0.3603, "step": 30073 }, { "epoch": 0.86, "grad_norm": 4.092715649571153, "learning_rate": 4.964212202277407e-07, "loss": 0.251, "step": 30074 }, { "epoch": 0.86, "grad_norm": 12.493027535369949, "learning_rate": 4.962197787729478e-07, "loss": 0.5269, "step": 30075 }, { "epoch": 0.86, "grad_norm": 1.7407019914895863, "learning_rate": 4.960183760636206e-07, "loss": 0.1382, "step": 30076 }, { "epoch": 0.86, "grad_norm": 9.914282301081215, "learning_rate": 4.958170121014933e-07, "loss": 0.3107, "step": 30077 }, { "epoch": 0.86, "grad_norm": 8.443825618199172, "learning_rate": 4.956156868882978e-07, "loss": 0.2843, "step": 30078 }, { "epoch": 0.86, "grad_norm": 4.599645451689728, "learning_rate": 4.954144004257655e-07, "loss": 0.4311, "step": 30079 }, { "epoch": 0.86, "grad_norm": 4.165929493861009, "learning_rate": 4.95213152715629e-07, "loss": 0.5718, "step": 30080 }, { "epoch": 0.86, "grad_norm": 3.4988402761333743, "learning_rate": 4.950119437596184e-07, "loss": 0.2158, "step": 30081 }, { "epoch": 0.86, "grad_norm": 5.830674296557214, "learning_rate": 4.948107735594659e-07, "loss": 0.5102, "step": 30082 }, { "epoch": 0.86, "grad_norm": 5.0598838781142605, "learning_rate": 4.946096421169011e-07, "loss": 0.1665, "step": 30083 }, { "epoch": 0.86, "grad_norm": 3.747353684977199, "learning_rate": 4.94408549433656e-07, "loss": 0.4871, "step": 30084 }, { "epoch": 0.86, "grad_norm": 5.911820786801542, "learning_rate": 4.942074955114584e-07, "loss": 0.4675, "step": 30085 }, { "epoch": 0.86, "grad_norm": 6.621472445210498, "learning_rate": 4.9400648035204e-07, "loss": 0.4042, "step": 30086 }, { "epoch": 0.86, "grad_norm": 4.339143794462054, "learning_rate": 4.938055039571276e-07, "loss": 0.3221, "step": 30087 }, { "epoch": 0.86, "grad_norm": 8.728305174773839, "learning_rate": 4.936045663284517e-07, "loss": 0.1518, "step": 30088 }, { "epoch": 0.86, "grad_norm": 8.258955169988388, "learning_rate": 4.934036674677417e-07, "loss": 0.6092, "step": 30089 }, { "epoch": 0.86, "grad_norm": 3.0221734571165166, "learning_rate": 4.932028073767253e-07, "loss": 0.234, "step": 30090 }, { "epoch": 0.86, "grad_norm": 3.473713185107388, "learning_rate": 4.930019860571306e-07, "loss": 0.5142, "step": 30091 }, { "epoch": 0.86, "grad_norm": 5.768537824537313, "learning_rate": 4.928012035106832e-07, "loss": 0.5488, "step": 30092 }, { "epoch": 0.86, "grad_norm": 5.729773772468953, "learning_rate": 4.926004597391131e-07, "loss": 0.7521, "step": 30093 }, { "epoch": 0.86, "grad_norm": 5.350504749766185, "learning_rate": 4.923997547441462e-07, "loss": 0.7225, "step": 30094 }, { "epoch": 0.86, "grad_norm": 8.27026998169155, "learning_rate": 4.921990885275085e-07, "loss": 0.6198, "step": 30095 }, { "epoch": 0.86, "grad_norm": 5.3624328920304585, "learning_rate": 4.919984610909279e-07, "loss": 0.498, "step": 30096 }, { "epoch": 0.86, "grad_norm": 5.6898140863171465, "learning_rate": 4.9179787243613e-07, "loss": 0.4894, "step": 30097 }, { "epoch": 0.86, "grad_norm": 4.208043280447822, "learning_rate": 4.915973225648396e-07, "loss": 0.3146, "step": 30098 }, { "epoch": 0.86, "grad_norm": 7.672586194421516, "learning_rate": 4.913968114787815e-07, "loss": 0.3567, "step": 30099 }, { "epoch": 0.86, "grad_norm": 4.536880782991185, "learning_rate": 4.911963391796826e-07, "loss": 0.4201, "step": 30100 }, { "epoch": 0.86, "grad_norm": 8.82583042808785, "learning_rate": 4.909959056692648e-07, "loss": 0.5804, "step": 30101 }, { "epoch": 0.86, "grad_norm": 4.15230654307766, "learning_rate": 4.907955109492557e-07, "loss": 0.3147, "step": 30102 }, { "epoch": 0.86, "grad_norm": 4.184560897264605, "learning_rate": 4.905951550213767e-07, "loss": 0.4584, "step": 30103 }, { "epoch": 0.86, "grad_norm": 7.303054643389204, "learning_rate": 4.90394837887353e-07, "loss": 0.5342, "step": 30104 }, { "epoch": 0.86, "grad_norm": 6.459900594070486, "learning_rate": 4.901945595489077e-07, "loss": 0.6446, "step": 30105 }, { "epoch": 0.86, "grad_norm": 6.803574060383056, "learning_rate": 4.899943200077617e-07, "loss": 0.4376, "step": 30106 }, { "epoch": 0.86, "grad_norm": 4.16296864755312, "learning_rate": 4.897941192656408e-07, "loss": 0.606, "step": 30107 }, { "epoch": 0.86, "grad_norm": 5.2867450905683855, "learning_rate": 4.895939573242647e-07, "loss": 0.1411, "step": 30108 }, { "epoch": 0.86, "grad_norm": 7.598075043185441, "learning_rate": 4.893938341853572e-07, "loss": 0.1687, "step": 30109 }, { "epoch": 0.86, "grad_norm": 4.13842792881181, "learning_rate": 4.891937498506382e-07, "loss": 0.3395, "step": 30110 }, { "epoch": 0.86, "grad_norm": 7.623319941054262, "learning_rate": 4.889937043218313e-07, "loss": 0.3515, "step": 30111 }, { "epoch": 0.86, "grad_norm": 7.930171346701356, "learning_rate": 4.887936976006564e-07, "loss": 0.3491, "step": 30112 }, { "epoch": 0.86, "grad_norm": 5.880063897908036, "learning_rate": 4.885937296888322e-07, "loss": 0.332, "step": 30113 }, { "epoch": 0.86, "grad_norm": 5.969892662080201, "learning_rate": 4.883938005880823e-07, "loss": 0.3527, "step": 30114 }, { "epoch": 0.86, "grad_norm": 4.430015592181664, "learning_rate": 4.881939103001232e-07, "loss": 0.147, "step": 30115 }, { "epoch": 0.86, "grad_norm": 5.345428872312208, "learning_rate": 4.87994058826678e-07, "loss": 0.3862, "step": 30116 }, { "epoch": 0.86, "grad_norm": 4.94589721694134, "learning_rate": 4.877942461694634e-07, "loss": 0.6112, "step": 30117 }, { "epoch": 0.86, "grad_norm": 6.115438432922669, "learning_rate": 4.875944723302011e-07, "loss": 0.6131, "step": 30118 }, { "epoch": 0.86, "grad_norm": 4.477424631767731, "learning_rate": 4.873947373106064e-07, "loss": 0.2638, "step": 30119 }, { "epoch": 0.86, "grad_norm": 4.758001514808186, "learning_rate": 4.871950411123988e-07, "loss": 0.2801, "step": 30120 }, { "epoch": 0.86, "grad_norm": 7.017371760964328, "learning_rate": 4.869953837372976e-07, "loss": 0.7615, "step": 30121 }, { "epoch": 0.86, "grad_norm": 7.164332726893054, "learning_rate": 4.867957651870187e-07, "loss": 0.5053, "step": 30122 }, { "epoch": 0.86, "grad_norm": 4.960725602558131, "learning_rate": 4.865961854632811e-07, "loss": 0.383, "step": 30123 }, { "epoch": 0.86, "grad_norm": 6.584057088092251, "learning_rate": 4.863966445678003e-07, "loss": 0.8332, "step": 30124 }, { "epoch": 0.86, "grad_norm": 3.9092195743733895, "learning_rate": 4.861971425022937e-07, "loss": 0.5032, "step": 30125 }, { "epoch": 0.86, "grad_norm": 4.5928228052411315, "learning_rate": 4.859976792684768e-07, "loss": 0.4302, "step": 30126 }, { "epoch": 0.86, "grad_norm": 2.334411224533559, "learning_rate": 4.857982548680668e-07, "loss": 0.2708, "step": 30127 }, { "epoch": 0.86, "grad_norm": 8.184290773847556, "learning_rate": 4.855988693027774e-07, "loss": 0.7277, "step": 30128 }, { "epoch": 0.86, "grad_norm": 4.261319009720605, "learning_rate": 4.853995225743257e-07, "loss": 0.4088, "step": 30129 }, { "epoch": 0.86, "grad_norm": 5.621379662108984, "learning_rate": 4.852002146844265e-07, "loss": 0.5733, "step": 30130 }, { "epoch": 0.86, "grad_norm": 6.921235892453868, "learning_rate": 4.850009456347943e-07, "loss": 0.7128, "step": 30131 }, { "epoch": 0.86, "grad_norm": 4.237919270751521, "learning_rate": 4.848017154271428e-07, "loss": 0.4823, "step": 30132 }, { "epoch": 0.86, "grad_norm": 4.957476801046088, "learning_rate": 4.846025240631852e-07, "loss": 0.5793, "step": 30133 }, { "epoch": 0.86, "grad_norm": 9.357091685667902, "learning_rate": 4.844033715446372e-07, "loss": 0.5263, "step": 30134 }, { "epoch": 0.86, "grad_norm": 4.048260867565802, "learning_rate": 4.842042578732098e-07, "loss": 0.3973, "step": 30135 }, { "epoch": 0.86, "grad_norm": 5.064893274566747, "learning_rate": 4.840051830506176e-07, "loss": 0.5628, "step": 30136 }, { "epoch": 0.86, "grad_norm": 5.0262614569430255, "learning_rate": 4.838061470785737e-07, "loss": 0.4955, "step": 30137 }, { "epoch": 0.86, "grad_norm": 10.340896028325599, "learning_rate": 4.836071499587891e-07, "loss": 0.3843, "step": 30138 }, { "epoch": 0.86, "grad_norm": 5.202344543759443, "learning_rate": 4.834081916929767e-07, "loss": 0.4771, "step": 30139 }, { "epoch": 0.86, "grad_norm": 7.106853398427858, "learning_rate": 4.832092722828463e-07, "loss": 0.6002, "step": 30140 }, { "epoch": 0.86, "grad_norm": 8.789941226688962, "learning_rate": 4.83010391730111e-07, "loss": 0.4715, "step": 30141 }, { "epoch": 0.86, "grad_norm": 3.491046420402559, "learning_rate": 4.828115500364805e-07, "loss": 0.1786, "step": 30142 }, { "epoch": 0.86, "grad_norm": 16.784341154488757, "learning_rate": 4.826127472036668e-07, "loss": 0.8352, "step": 30143 }, { "epoch": 0.86, "grad_norm": 7.565447114840922, "learning_rate": 4.824139832333791e-07, "loss": 0.4974, "step": 30144 }, { "epoch": 0.86, "grad_norm": 4.792456664047159, "learning_rate": 4.822152581273282e-07, "loss": 0.1758, "step": 30145 }, { "epoch": 0.86, "grad_norm": 3.4096503303046477, "learning_rate": 4.820165718872233e-07, "loss": 0.1725, "step": 30146 }, { "epoch": 0.86, "grad_norm": 5.946981123209389, "learning_rate": 4.818179245147725e-07, "loss": 0.3237, "step": 30147 }, { "epoch": 0.86, "grad_norm": 4.38258004889577, "learning_rate": 4.816193160116867e-07, "loss": 0.448, "step": 30148 }, { "epoch": 0.86, "grad_norm": 7.8163157185693, "learning_rate": 4.814207463796727e-07, "loss": 0.5533, "step": 30149 }, { "epoch": 0.86, "grad_norm": 8.767567118541994, "learning_rate": 4.812222156204405e-07, "loss": 0.3076, "step": 30150 }, { "epoch": 0.86, "grad_norm": 4.882560052067802, "learning_rate": 4.810237237356974e-07, "loss": 0.3634, "step": 30151 }, { "epoch": 0.86, "grad_norm": 4.74980815700817, "learning_rate": 4.808252707271499e-07, "loss": 0.4363, "step": 30152 }, { "epoch": 0.86, "grad_norm": 6.944361463898688, "learning_rate": 4.806268565965072e-07, "loss": 0.382, "step": 30153 }, { "epoch": 0.86, "grad_norm": 6.8969228254560475, "learning_rate": 4.80428481345474e-07, "loss": 0.4163, "step": 30154 }, { "epoch": 0.86, "grad_norm": 5.299795825192401, "learning_rate": 4.802301449757596e-07, "loss": 0.5125, "step": 30155 }, { "epoch": 0.86, "grad_norm": 3.893540450530075, "learning_rate": 4.800318474890675e-07, "loss": 0.4208, "step": 30156 }, { "epoch": 0.86, "grad_norm": 4.568606718558919, "learning_rate": 4.798335888871053e-07, "loss": 0.5009, "step": 30157 }, { "epoch": 0.86, "grad_norm": 6.608948353373245, "learning_rate": 4.796353691715788e-07, "loss": 0.539, "step": 30158 }, { "epoch": 0.86, "grad_norm": 4.926285478163615, "learning_rate": 4.794371883441928e-07, "loss": 0.3158, "step": 30159 }, { "epoch": 0.86, "grad_norm": 6.731340437115982, "learning_rate": 4.79239046406651e-07, "loss": 0.43, "step": 30160 }, { "epoch": 0.86, "grad_norm": 3.796634996152033, "learning_rate": 4.790409433606585e-07, "loss": 0.311, "step": 30161 }, { "epoch": 0.86, "grad_norm": 5.417048171864569, "learning_rate": 4.788428792079219e-07, "loss": 0.4538, "step": 30162 }, { "epoch": 0.86, "grad_norm": 4.044542148362454, "learning_rate": 4.786448539501421e-07, "loss": 0.551, "step": 30163 }, { "epoch": 0.86, "grad_norm": 6.665870483857016, "learning_rate": 4.784468675890247e-07, "loss": 0.2656, "step": 30164 }, { "epoch": 0.86, "grad_norm": 4.462005487391621, "learning_rate": 4.782489201262719e-07, "loss": 0.248, "step": 30165 }, { "epoch": 0.86, "grad_norm": 5.671226296227139, "learning_rate": 4.780510115635878e-07, "loss": 0.4784, "step": 30166 }, { "epoch": 0.86, "grad_norm": 4.414709695277029, "learning_rate": 4.778531419026722e-07, "loss": 0.4038, "step": 30167 }, { "epoch": 0.86, "grad_norm": 9.664551953045986, "learning_rate": 4.776553111452309e-07, "loss": 0.3147, "step": 30168 }, { "epoch": 0.86, "grad_norm": 5.948767601735814, "learning_rate": 4.774575192929631e-07, "loss": 0.4754, "step": 30169 }, { "epoch": 0.86, "grad_norm": 8.893525573944194, "learning_rate": 4.772597663475709e-07, "loss": 1.2621, "step": 30170 }, { "epoch": 0.86, "grad_norm": 6.118471169279978, "learning_rate": 4.770620523107578e-07, "loss": 0.5174, "step": 30171 }, { "epoch": 0.86, "grad_norm": 4.159920527267603, "learning_rate": 4.768643771842224e-07, "loss": 0.3197, "step": 30172 }, { "epoch": 0.86, "grad_norm": 10.262632494128088, "learning_rate": 4.7666674096966624e-07, "loss": 0.3784, "step": 30173 }, { "epoch": 0.86, "grad_norm": 7.13270674059909, "learning_rate": 4.7646914366878793e-07, "loss": 0.8681, "step": 30174 }, { "epoch": 0.86, "grad_norm": 6.370915619156796, "learning_rate": 4.762715852832894e-07, "loss": 0.4021, "step": 30175 }, { "epoch": 0.86, "grad_norm": 7.5451994431857905, "learning_rate": 4.760740658148688e-07, "loss": 0.6267, "step": 30176 }, { "epoch": 0.86, "grad_norm": 4.698263399420781, "learning_rate": 4.7587658526522587e-07, "loss": 0.7639, "step": 30177 }, { "epoch": 0.86, "grad_norm": 4.155757674120124, "learning_rate": 4.756791436360619e-07, "loss": 0.3576, "step": 30178 }, { "epoch": 0.86, "grad_norm": 3.3762958299505614, "learning_rate": 4.754817409290713e-07, "loss": 0.3337, "step": 30179 }, { "epoch": 0.86, "grad_norm": 5.171398405952059, "learning_rate": 4.752843771459548e-07, "loss": 0.3354, "step": 30180 }, { "epoch": 0.86, "grad_norm": 3.2102367072607416, "learning_rate": 4.750870522884088e-07, "loss": 0.3099, "step": 30181 }, { "epoch": 0.86, "grad_norm": 3.047733498938625, "learning_rate": 4.7488976635813256e-07, "loss": 0.2748, "step": 30182 }, { "epoch": 0.86, "grad_norm": 6.150391826708263, "learning_rate": 4.7469251935682195e-07, "loss": 0.6815, "step": 30183 }, { "epoch": 0.86, "grad_norm": 5.588307722112436, "learning_rate": 4.7449531128617554e-07, "loss": 0.4627, "step": 30184 }, { "epoch": 0.86, "grad_norm": 3.8785929173960914, "learning_rate": 4.742981421478882e-07, "loss": 0.3421, "step": 30185 }, { "epoch": 0.86, "grad_norm": 5.557567509744368, "learning_rate": 4.741010119436562e-07, "loss": 0.5724, "step": 30186 }, { "epoch": 0.86, "grad_norm": 4.678333194765733, "learning_rate": 4.7390392067517664e-07, "loss": 0.6824, "step": 30187 }, { "epoch": 0.86, "grad_norm": 6.817631914620229, "learning_rate": 4.737068683441437e-07, "loss": 0.5193, "step": 30188 }, { "epoch": 0.86, "grad_norm": 3.7898096822647678, "learning_rate": 4.7350985495225377e-07, "loss": 0.6349, "step": 30189 }, { "epoch": 0.86, "grad_norm": 3.4786678154544926, "learning_rate": 4.733128805012005e-07, "loss": 0.2316, "step": 30190 }, { "epoch": 0.86, "grad_norm": 7.186235664951893, "learning_rate": 4.731159449926803e-07, "loss": 0.3657, "step": 30191 }, { "epoch": 0.86, "grad_norm": 4.586587447163379, "learning_rate": 4.729190484283863e-07, "loss": 0.2681, "step": 30192 }, { "epoch": 0.86, "grad_norm": 4.910704686044005, "learning_rate": 4.727221908100121e-07, "loss": 0.4628, "step": 30193 }, { "epoch": 0.86, "grad_norm": 5.560094441805596, "learning_rate": 4.725253721392509e-07, "loss": 0.1909, "step": 30194 }, { "epoch": 0.86, "grad_norm": 9.074538765107485, "learning_rate": 4.7232859241779617e-07, "loss": 0.4273, "step": 30195 }, { "epoch": 0.86, "grad_norm": 5.341953265535193, "learning_rate": 4.721318516473422e-07, "loss": 0.3405, "step": 30196 }, { "epoch": 0.86, "grad_norm": 5.8988401161871735, "learning_rate": 4.719351498295793e-07, "loss": 0.9716, "step": 30197 }, { "epoch": 0.86, "grad_norm": 5.1209128760338665, "learning_rate": 4.717384869662017e-07, "loss": 0.4583, "step": 30198 }, { "epoch": 0.86, "grad_norm": 9.93784499619183, "learning_rate": 4.715418630589008e-07, "loss": 0.6139, "step": 30199 }, { "epoch": 0.86, "grad_norm": 4.559115421679094, "learning_rate": 4.713452781093669e-07, "loss": 0.4822, "step": 30200 }, { "epoch": 0.86, "grad_norm": 4.1764652559124675, "learning_rate": 4.7114873211929203e-07, "loss": 0.523, "step": 30201 }, { "epoch": 0.86, "grad_norm": 8.208443405329906, "learning_rate": 4.7095222509036644e-07, "loss": 0.3336, "step": 30202 }, { "epoch": 0.86, "grad_norm": 13.66340010069626, "learning_rate": 4.7075575702428213e-07, "loss": 0.8013, "step": 30203 }, { "epoch": 0.86, "grad_norm": 1.8462470812248242, "learning_rate": 4.705593279227277e-07, "loss": 0.1128, "step": 30204 }, { "epoch": 0.87, "grad_norm": 6.1580159709062965, "learning_rate": 4.7036293778739526e-07, "loss": 0.4099, "step": 30205 }, { "epoch": 0.87, "grad_norm": 4.377875282414153, "learning_rate": 4.701665866199712e-07, "loss": 0.5915, "step": 30206 }, { "epoch": 0.87, "grad_norm": 4.240633291687586, "learning_rate": 4.6997027442214683e-07, "loss": 0.6453, "step": 30207 }, { "epoch": 0.87, "grad_norm": 5.928795547648188, "learning_rate": 4.697740011956098e-07, "loss": 0.3261, "step": 30208 }, { "epoch": 0.87, "grad_norm": 4.7952806389061875, "learning_rate": 4.695777669420498e-07, "loss": 0.299, "step": 30209 }, { "epoch": 0.87, "grad_norm": 4.795713650657, "learning_rate": 4.693815716631539e-07, "loss": 0.4564, "step": 30210 }, { "epoch": 0.87, "grad_norm": 7.539246741697073, "learning_rate": 4.691854153606118e-07, "loss": 0.4229, "step": 30211 }, { "epoch": 0.87, "grad_norm": 5.992332625838689, "learning_rate": 4.689892980361094e-07, "loss": 0.4867, "step": 30212 }, { "epoch": 0.87, "grad_norm": 5.535111625669185, "learning_rate": 4.687932196913331e-07, "loss": 0.4278, "step": 30213 }, { "epoch": 0.87, "grad_norm": 2.581774919629911, "learning_rate": 4.685971803279715e-07, "loss": 0.12, "step": 30214 }, { "epoch": 0.87, "grad_norm": 6.526776438086417, "learning_rate": 4.6840117994771004e-07, "loss": 0.5492, "step": 30215 }, { "epoch": 0.87, "grad_norm": 7.012418120207083, "learning_rate": 4.682052185522362e-07, "loss": 0.7555, "step": 30216 }, { "epoch": 0.87, "grad_norm": 5.98006365579515, "learning_rate": 4.6800929614323365e-07, "loss": 0.8019, "step": 30217 }, { "epoch": 0.87, "grad_norm": 4.428700358957528, "learning_rate": 4.6781341272239045e-07, "loss": 0.7731, "step": 30218 }, { "epoch": 0.87, "grad_norm": 3.0767012658314266, "learning_rate": 4.6761756829138973e-07, "loss": 0.1061, "step": 30219 }, { "epoch": 0.87, "grad_norm": 8.905809036849494, "learning_rate": 4.6742176285191675e-07, "loss": 0.6765, "step": 30220 }, { "epoch": 0.87, "grad_norm": 6.962981363911735, "learning_rate": 4.672259964056569e-07, "loss": 0.4622, "step": 30221 }, { "epoch": 0.87, "grad_norm": 3.768209876077236, "learning_rate": 4.6703026895429325e-07, "loss": 0.1948, "step": 30222 }, { "epoch": 0.87, "grad_norm": 4.586784167660595, "learning_rate": 4.6683458049951057e-07, "loss": 0.3298, "step": 30223 }, { "epoch": 0.87, "grad_norm": 6.641186176598641, "learning_rate": 4.6663893104299083e-07, "loss": 0.3295, "step": 30224 }, { "epoch": 0.87, "grad_norm": 4.094454726501442, "learning_rate": 4.664433205864194e-07, "loss": 0.805, "step": 30225 }, { "epoch": 0.87, "grad_norm": 6.03892268207753, "learning_rate": 4.6624774913147817e-07, "loss": 0.3519, "step": 30226 }, { "epoch": 0.87, "grad_norm": 5.222393751145579, "learning_rate": 4.6605221667984814e-07, "loss": 0.3121, "step": 30227 }, { "epoch": 0.87, "grad_norm": 7.852039349247982, "learning_rate": 4.658567232332134e-07, "loss": 0.6376, "step": 30228 }, { "epoch": 0.87, "grad_norm": 8.195832096934327, "learning_rate": 4.656612687932538e-07, "loss": 0.6398, "step": 30229 }, { "epoch": 0.87, "grad_norm": 8.621894221960183, "learning_rate": 4.654658533616535e-07, "loss": 0.578, "step": 30230 }, { "epoch": 0.87, "grad_norm": 3.4937194433906216, "learning_rate": 4.652704769400912e-07, "loss": 0.2401, "step": 30231 }, { "epoch": 0.87, "grad_norm": 6.446289210441598, "learning_rate": 4.6507513953024943e-07, "loss": 0.3973, "step": 30232 }, { "epoch": 0.87, "grad_norm": 7.787001418414733, "learning_rate": 4.648798411338079e-07, "loss": 0.2877, "step": 30233 }, { "epoch": 0.87, "grad_norm": 6.590235600695514, "learning_rate": 4.64684581752447e-07, "loss": 0.2605, "step": 30234 }, { "epoch": 0.87, "grad_norm": 2.898433880662329, "learning_rate": 4.6448936138784486e-07, "loss": 0.2075, "step": 30235 }, { "epoch": 0.87, "grad_norm": 4.953218862324125, "learning_rate": 4.6429418004168223e-07, "loss": 0.4495, "step": 30236 }, { "epoch": 0.87, "grad_norm": 5.842954785508433, "learning_rate": 4.640990377156396e-07, "loss": 0.187, "step": 30237 }, { "epoch": 0.87, "grad_norm": 2.192959525670247, "learning_rate": 4.6390393441139437e-07, "loss": 0.2872, "step": 30238 }, { "epoch": 0.87, "grad_norm": 4.2213130370797005, "learning_rate": 4.637088701306253e-07, "loss": 0.4556, "step": 30239 }, { "epoch": 0.87, "grad_norm": 9.12475418386795, "learning_rate": 4.635138448750093e-07, "loss": 0.422, "step": 30240 }, { "epoch": 0.87, "grad_norm": 6.694935390753235, "learning_rate": 4.6331885864622614e-07, "loss": 0.2898, "step": 30241 }, { "epoch": 0.87, "grad_norm": 7.127529356209287, "learning_rate": 4.631239114459507e-07, "loss": 0.2973, "step": 30242 }, { "epoch": 0.87, "grad_norm": 8.63007968092255, "learning_rate": 4.629290032758621e-07, "loss": 0.6602, "step": 30243 }, { "epoch": 0.87, "grad_norm": 5.206603482669352, "learning_rate": 4.627341341376379e-07, "loss": 0.4265, "step": 30244 }, { "epoch": 0.87, "grad_norm": 3.082842759894046, "learning_rate": 4.625393040329529e-07, "loss": 0.2989, "step": 30245 }, { "epoch": 0.87, "grad_norm": 3.7351304172052617, "learning_rate": 4.6234451296348405e-07, "loss": 0.2193, "step": 30246 }, { "epoch": 0.87, "grad_norm": 5.221948157664947, "learning_rate": 4.62149760930905e-07, "loss": 0.6577, "step": 30247 }, { "epoch": 0.87, "grad_norm": 3.9139639105343953, "learning_rate": 4.619550479368945e-07, "loss": 0.3398, "step": 30248 }, { "epoch": 0.87, "grad_norm": 6.7553647233266405, "learning_rate": 4.6176037398312446e-07, "loss": 0.8534, "step": 30249 }, { "epoch": 0.87, "grad_norm": 3.148920462318882, "learning_rate": 4.6156573907127245e-07, "loss": 0.6024, "step": 30250 }, { "epoch": 0.87, "grad_norm": 3.9431400625413113, "learning_rate": 4.613711432030105e-07, "loss": 0.3152, "step": 30251 }, { "epoch": 0.87, "grad_norm": 3.32736944299305, "learning_rate": 4.6117658638001493e-07, "loss": 0.2081, "step": 30252 }, { "epoch": 0.87, "grad_norm": 6.6446293504617016, "learning_rate": 4.6098206860395786e-07, "loss": 0.6409, "step": 30253 }, { "epoch": 0.87, "grad_norm": 16.608638844006478, "learning_rate": 4.6078758987651295e-07, "loss": 0.2717, "step": 30254 }, { "epoch": 0.87, "grad_norm": 5.39368155249244, "learning_rate": 4.605931501993538e-07, "loss": 0.4373, "step": 30255 }, { "epoch": 0.87, "grad_norm": 9.012613358756903, "learning_rate": 4.603987495741524e-07, "loss": 0.3604, "step": 30256 }, { "epoch": 0.87, "grad_norm": 4.144385821553132, "learning_rate": 4.602043880025825e-07, "loss": 0.1788, "step": 30257 }, { "epoch": 0.87, "grad_norm": 5.745596733874394, "learning_rate": 4.600100654863143e-07, "loss": 0.7249, "step": 30258 }, { "epoch": 0.87, "grad_norm": 3.884676142563879, "learning_rate": 4.598157820270216e-07, "loss": 0.3721, "step": 30259 }, { "epoch": 0.87, "grad_norm": 4.549385033434088, "learning_rate": 4.596215376263746e-07, "loss": 0.2684, "step": 30260 }, { "epoch": 0.87, "grad_norm": 4.729899579306821, "learning_rate": 4.594273322860432e-07, "loss": 0.2566, "step": 30261 }, { "epoch": 0.87, "grad_norm": 3.928599758789167, "learning_rate": 4.5923316600770097e-07, "loss": 0.6511, "step": 30262 }, { "epoch": 0.87, "grad_norm": 5.772628459071054, "learning_rate": 4.590390387930155e-07, "loss": 0.4041, "step": 30263 }, { "epoch": 0.87, "grad_norm": 4.540789119855012, "learning_rate": 4.5884495064365876e-07, "loss": 0.6989, "step": 30264 }, { "epoch": 0.87, "grad_norm": 5.019739501432988, "learning_rate": 4.5865090156129997e-07, "loss": 0.3474, "step": 30265 }, { "epoch": 0.87, "grad_norm": 5.783315918352948, "learning_rate": 4.5845689154760843e-07, "loss": 0.2709, "step": 30266 }, { "epoch": 0.87, "grad_norm": 2.810498882040342, "learning_rate": 4.5826292060425216e-07, "loss": 0.3726, "step": 30267 }, { "epoch": 0.87, "grad_norm": 9.65781367705714, "learning_rate": 4.5806898873290093e-07, "loss": 0.4228, "step": 30268 }, { "epoch": 0.87, "grad_norm": 5.48270223381112, "learning_rate": 4.578750959352235e-07, "loss": 0.2701, "step": 30269 }, { "epoch": 0.87, "grad_norm": 3.7106193325776267, "learning_rate": 4.576812422128868e-07, "loss": 0.3244, "step": 30270 }, { "epoch": 0.87, "grad_norm": 6.561084776368004, "learning_rate": 4.5748742756756e-07, "loss": 0.3989, "step": 30271 }, { "epoch": 0.87, "grad_norm": 11.92787629313701, "learning_rate": 4.572936520009097e-07, "loss": 0.4047, "step": 30272 }, { "epoch": 0.87, "grad_norm": 4.017305401051555, "learning_rate": 4.570999155146022e-07, "loss": 0.1046, "step": 30273 }, { "epoch": 0.87, "grad_norm": 16.684706764902405, "learning_rate": 4.5690621811030457e-07, "loss": 0.5065, "step": 30274 }, { "epoch": 0.87, "grad_norm": 4.1703197423443985, "learning_rate": 4.5671255978968377e-07, "loss": 0.4731, "step": 30275 }, { "epoch": 0.87, "grad_norm": 3.750487550194809, "learning_rate": 4.565189405544046e-07, "loss": 0.2309, "step": 30276 }, { "epoch": 0.87, "grad_norm": 5.315446844148604, "learning_rate": 4.5632536040613405e-07, "loss": 0.7167, "step": 30277 }, { "epoch": 0.87, "grad_norm": 3.1555035152158832, "learning_rate": 4.561318193465375e-07, "loss": 0.1568, "step": 30278 }, { "epoch": 0.87, "grad_norm": 3.290542613824887, "learning_rate": 4.5593831737727967e-07, "loss": 0.368, "step": 30279 }, { "epoch": 0.87, "grad_norm": 8.608149003316896, "learning_rate": 4.5574485450002484e-07, "loss": 0.4059, "step": 30280 }, { "epoch": 0.87, "grad_norm": 15.072226869959165, "learning_rate": 4.555514307164366e-07, "loss": 0.4079, "step": 30281 }, { "epoch": 0.87, "grad_norm": 4.5360910272059565, "learning_rate": 4.553580460281809e-07, "loss": 0.2668, "step": 30282 }, { "epoch": 0.87, "grad_norm": 4.263724368266349, "learning_rate": 4.551647004369192e-07, "loss": 0.1785, "step": 30283 }, { "epoch": 0.87, "grad_norm": 7.6633969950289, "learning_rate": 4.5497139394431624e-07, "loss": 0.5436, "step": 30284 }, { "epoch": 0.87, "grad_norm": 9.791920457080572, "learning_rate": 4.547781265520357e-07, "loss": 0.3643, "step": 30285 }, { "epoch": 0.87, "grad_norm": 5.279741037802354, "learning_rate": 4.54584898261739e-07, "loss": 0.3064, "step": 30286 }, { "epoch": 0.87, "grad_norm": 5.0089613240965605, "learning_rate": 4.5439170907508935e-07, "loss": 0.3209, "step": 30287 }, { "epoch": 0.87, "grad_norm": 2.8171570474223855, "learning_rate": 4.54198558993747e-07, "loss": 0.2725, "step": 30288 }, { "epoch": 0.87, "grad_norm": 7.550590848224236, "learning_rate": 4.5400544801937565e-07, "loss": 0.2863, "step": 30289 }, { "epoch": 0.87, "grad_norm": 5.7363901365498196, "learning_rate": 4.5381237615363447e-07, "loss": 0.3465, "step": 30290 }, { "epoch": 0.87, "grad_norm": 5.458662484220671, "learning_rate": 4.536193433981867e-07, "loss": 0.3451, "step": 30291 }, { "epoch": 0.87, "grad_norm": 4.0228947541904265, "learning_rate": 4.534263497546915e-07, "loss": 0.5733, "step": 30292 }, { "epoch": 0.87, "grad_norm": 9.504638944668327, "learning_rate": 4.5323339522481024e-07, "loss": 0.4859, "step": 30293 }, { "epoch": 0.87, "grad_norm": 5.711906403099295, "learning_rate": 4.530404798102023e-07, "loss": 0.2963, "step": 30294 }, { "epoch": 0.87, "grad_norm": 5.011440825393765, "learning_rate": 4.528476035125262e-07, "loss": 0.3642, "step": 30295 }, { "epoch": 0.87, "grad_norm": 5.880437242249438, "learning_rate": 4.526547663334435e-07, "loss": 0.3315, "step": 30296 }, { "epoch": 0.87, "grad_norm": 3.739504065067526, "learning_rate": 4.5246196827461063e-07, "loss": 0.3411, "step": 30297 }, { "epoch": 0.87, "grad_norm": 8.433271040579578, "learning_rate": 4.52269209337689e-07, "loss": 0.3263, "step": 30298 }, { "epoch": 0.87, "grad_norm": 8.64686621199183, "learning_rate": 4.5207648952433513e-07, "loss": 0.5999, "step": 30299 }, { "epoch": 0.87, "grad_norm": 6.8984837773499965, "learning_rate": 4.5188380883620653e-07, "loss": 0.4533, "step": 30300 }, { "epoch": 0.87, "grad_norm": 6.106845263669439, "learning_rate": 4.5169116727496186e-07, "loss": 0.3027, "step": 30301 }, { "epoch": 0.87, "grad_norm": 7.852468926029121, "learning_rate": 4.514985648422576e-07, "loss": 0.7778, "step": 30302 }, { "epoch": 0.87, "grad_norm": 6.57347294004701, "learning_rate": 4.5130600153975246e-07, "loss": 0.682, "step": 30303 }, { "epoch": 0.87, "grad_norm": 3.209899140434206, "learning_rate": 4.5111347736910004e-07, "loss": 0.2316, "step": 30304 }, { "epoch": 0.87, "grad_norm": 3.384300908821344, "learning_rate": 4.5092099233195964e-07, "loss": 0.3499, "step": 30305 }, { "epoch": 0.87, "grad_norm": 4.6866619886905845, "learning_rate": 4.50728546429986e-07, "loss": 0.2692, "step": 30306 }, { "epoch": 0.87, "grad_norm": 6.61342949202576, "learning_rate": 4.5053613966483445e-07, "loss": 0.2229, "step": 30307 }, { "epoch": 0.87, "grad_norm": 6.930394683844614, "learning_rate": 4.503437720381593e-07, "loss": 0.4445, "step": 30308 }, { "epoch": 0.87, "grad_norm": 5.443966166850113, "learning_rate": 4.501514435516169e-07, "loss": 0.4431, "step": 30309 }, { "epoch": 0.87, "grad_norm": 6.321495822428692, "learning_rate": 4.4995915420686153e-07, "loss": 0.4297, "step": 30310 }, { "epoch": 0.87, "grad_norm": 6.180915257229565, "learning_rate": 4.497669040055469e-07, "loss": 0.3276, "step": 30311 }, { "epoch": 0.87, "grad_norm": 4.429090133297641, "learning_rate": 4.495746929493283e-07, "loss": 0.3359, "step": 30312 }, { "epoch": 0.87, "grad_norm": 4.728811892115264, "learning_rate": 4.4938252103985836e-07, "loss": 0.283, "step": 30313 }, { "epoch": 0.87, "grad_norm": 3.787512267442231, "learning_rate": 4.4919038827879067e-07, "loss": 0.3945, "step": 30314 }, { "epoch": 0.87, "grad_norm": 3.9251304884194607, "learning_rate": 4.4899829466777623e-07, "loss": 0.3072, "step": 30315 }, { "epoch": 0.87, "grad_norm": 5.999341749799968, "learning_rate": 4.488062402084703e-07, "loss": 0.5931, "step": 30316 }, { "epoch": 0.87, "grad_norm": 6.603038735645399, "learning_rate": 4.4861422490252325e-07, "loss": 0.181, "step": 30317 }, { "epoch": 0.87, "grad_norm": 8.885833199782557, "learning_rate": 4.4842224875158715e-07, "loss": 0.7693, "step": 30318 }, { "epoch": 0.87, "grad_norm": 5.57654478626639, "learning_rate": 4.48230311757315e-07, "loss": 0.0998, "step": 30319 }, { "epoch": 0.87, "grad_norm": 6.054855560923602, "learning_rate": 4.480384139213573e-07, "loss": 0.648, "step": 30320 }, { "epoch": 0.87, "grad_norm": 8.457771815059955, "learning_rate": 4.478465552453648e-07, "loss": 0.476, "step": 30321 }, { "epoch": 0.87, "grad_norm": 4.750648353899672, "learning_rate": 4.476547357309868e-07, "loss": 0.4427, "step": 30322 }, { "epoch": 0.87, "grad_norm": 5.119590556119776, "learning_rate": 4.4746295537987536e-07, "loss": 0.5301, "step": 30323 }, { "epoch": 0.87, "grad_norm": 6.34903226521991, "learning_rate": 4.4727121419367913e-07, "loss": 0.5857, "step": 30324 }, { "epoch": 0.87, "grad_norm": 3.4984740609306915, "learning_rate": 4.470795121740484e-07, "loss": 0.3593, "step": 30325 }, { "epoch": 0.87, "grad_norm": 6.790532592762631, "learning_rate": 4.468878493226325e-07, "loss": 0.7762, "step": 30326 }, { "epoch": 0.87, "grad_norm": 4.965930570655555, "learning_rate": 4.4669622564107896e-07, "loss": 0.49, "step": 30327 }, { "epoch": 0.87, "grad_norm": 3.6049223348370365, "learning_rate": 4.465046411310381e-07, "loss": 0.257, "step": 30328 }, { "epoch": 0.87, "grad_norm": 6.003847914879562, "learning_rate": 4.4631309579415584e-07, "loss": 0.7005, "step": 30329 }, { "epoch": 0.87, "grad_norm": 4.096492402043485, "learning_rate": 4.4612158963208253e-07, "loss": 0.3912, "step": 30330 }, { "epoch": 0.87, "grad_norm": 8.12567329551417, "learning_rate": 4.4593012264646353e-07, "loss": 0.5245, "step": 30331 }, { "epoch": 0.87, "grad_norm": 8.82282761157661, "learning_rate": 4.4573869483894806e-07, "loss": 0.5305, "step": 30332 }, { "epoch": 0.87, "grad_norm": 7.019115081151305, "learning_rate": 4.455473062111815e-07, "loss": 0.3091, "step": 30333 }, { "epoch": 0.87, "grad_norm": 9.899573401690672, "learning_rate": 4.4535595676481025e-07, "loss": 0.5643, "step": 30334 }, { "epoch": 0.87, "grad_norm": 5.263008350456292, "learning_rate": 4.4516464650148137e-07, "loss": 0.4183, "step": 30335 }, { "epoch": 0.87, "grad_norm": 7.329686663385042, "learning_rate": 4.449733754228397e-07, "loss": 0.2193, "step": 30336 }, { "epoch": 0.87, "grad_norm": 2.939663313641164, "learning_rate": 4.447821435305322e-07, "loss": 0.1089, "step": 30337 }, { "epoch": 0.87, "grad_norm": 9.189240323236774, "learning_rate": 4.4459095082620196e-07, "loss": 0.6296, "step": 30338 }, { "epoch": 0.87, "grad_norm": 9.136127819768836, "learning_rate": 4.443997973114955e-07, "loss": 0.4184, "step": 30339 }, { "epoch": 0.87, "grad_norm": 3.61055310331437, "learning_rate": 4.4420868298805707e-07, "loss": 0.2264, "step": 30340 }, { "epoch": 0.87, "grad_norm": 6.576784300830805, "learning_rate": 4.4401760785752925e-07, "loss": 0.5176, "step": 30341 }, { "epoch": 0.87, "grad_norm": 3.9691625065327547, "learning_rate": 4.438265719215584e-07, "loss": 0.6593, "step": 30342 }, { "epoch": 0.87, "grad_norm": 10.683943128713663, "learning_rate": 4.4363557518178503e-07, "loss": 0.449, "step": 30343 }, { "epoch": 0.87, "grad_norm": 5.47571320961582, "learning_rate": 4.434446176398555e-07, "loss": 0.5089, "step": 30344 }, { "epoch": 0.87, "grad_norm": 5.5168402524454345, "learning_rate": 4.43253699297409e-07, "loss": 0.3707, "step": 30345 }, { "epoch": 0.87, "grad_norm": 5.0709359207475275, "learning_rate": 4.430628201560916e-07, "loss": 0.358, "step": 30346 }, { "epoch": 0.87, "grad_norm": 3.8988158017617214, "learning_rate": 4.428719802175435e-07, "loss": 0.383, "step": 30347 }, { "epoch": 0.87, "grad_norm": 3.100895903705373, "learning_rate": 4.426811794834068e-07, "loss": 0.2504, "step": 30348 }, { "epoch": 0.87, "grad_norm": 4.103476609957392, "learning_rate": 4.424904179553213e-07, "loss": 0.3206, "step": 30349 }, { "epoch": 0.87, "grad_norm": 6.200291752872581, "learning_rate": 4.422996956349301e-07, "loss": 0.6946, "step": 30350 }, { "epoch": 0.87, "grad_norm": 3.7295470372292225, "learning_rate": 4.4210901252387415e-07, "loss": 0.1143, "step": 30351 }, { "epoch": 0.87, "grad_norm": 10.649131881004704, "learning_rate": 4.419183686237932e-07, "loss": 0.355, "step": 30352 }, { "epoch": 0.87, "grad_norm": 4.599317952053812, "learning_rate": 4.4172776393632765e-07, "loss": 0.3344, "step": 30353 }, { "epoch": 0.87, "grad_norm": 3.6015146711570463, "learning_rate": 4.415371984631156e-07, "loss": 0.3127, "step": 30354 }, { "epoch": 0.87, "grad_norm": 5.11387841778858, "learning_rate": 4.4134667220579906e-07, "loss": 0.4945, "step": 30355 }, { "epoch": 0.87, "grad_norm": 7.357689536916822, "learning_rate": 4.4115618516601453e-07, "loss": 0.4684, "step": 30356 }, { "epoch": 0.87, "grad_norm": 8.155983288375998, "learning_rate": 4.4096573734540347e-07, "loss": 0.5068, "step": 30357 }, { "epoch": 0.87, "grad_norm": 7.817338833515424, "learning_rate": 4.4077532874560123e-07, "loss": 0.2177, "step": 30358 }, { "epoch": 0.87, "grad_norm": 2.929941313875192, "learning_rate": 4.405849593682487e-07, "loss": 0.2777, "step": 30359 }, { "epoch": 0.87, "grad_norm": 6.228487911264387, "learning_rate": 4.40394629214983e-07, "loss": 0.3623, "step": 30360 }, { "epoch": 0.87, "grad_norm": 8.15565889050811, "learning_rate": 4.4020433828743936e-07, "loss": 0.5655, "step": 30361 }, { "epoch": 0.87, "grad_norm": 9.44470707646637, "learning_rate": 4.400140865872571e-07, "loss": 0.7833, "step": 30362 }, { "epoch": 0.87, "grad_norm": 4.849695113519179, "learning_rate": 4.398238741160715e-07, "loss": 0.5626, "step": 30363 }, { "epoch": 0.87, "grad_norm": 4.519625413024176, "learning_rate": 4.3963370087552084e-07, "loss": 0.3671, "step": 30364 }, { "epoch": 0.87, "grad_norm": 3.9166521045062392, "learning_rate": 4.394435668672392e-07, "loss": 0.3811, "step": 30365 }, { "epoch": 0.87, "grad_norm": 6.647469330164931, "learning_rate": 4.392534720928632e-07, "loss": 0.5913, "step": 30366 }, { "epoch": 0.87, "grad_norm": 3.740764116267352, "learning_rate": 4.390634165540286e-07, "loss": 0.5305, "step": 30367 }, { "epoch": 0.87, "grad_norm": 6.9816788626198765, "learning_rate": 4.388734002523687e-07, "loss": 0.7918, "step": 30368 }, { "epoch": 0.87, "grad_norm": 5.224282809595379, "learning_rate": 4.386834231895204e-07, "loss": 0.7083, "step": 30369 }, { "epoch": 0.87, "grad_norm": 5.2888574828133486, "learning_rate": 4.3849348536711635e-07, "loss": 0.3671, "step": 30370 }, { "epoch": 0.87, "grad_norm": 4.993537911218953, "learning_rate": 4.3830358678679184e-07, "loss": 0.3668, "step": 30371 }, { "epoch": 0.87, "grad_norm": 14.889086953114143, "learning_rate": 4.3811372745017897e-07, "loss": 0.3981, "step": 30372 }, { "epoch": 0.87, "grad_norm": 5.044951364606029, "learning_rate": 4.379239073589131e-07, "loss": 0.4147, "step": 30373 }, { "epoch": 0.87, "grad_norm": 5.345480454657228, "learning_rate": 4.377341265146262e-07, "loss": 0.3277, "step": 30374 }, { "epoch": 0.87, "grad_norm": 5.8707038105235725, "learning_rate": 4.375443849189498e-07, "loss": 0.4294, "step": 30375 }, { "epoch": 0.87, "grad_norm": 5.250845750169351, "learning_rate": 4.373546825735181e-07, "loss": 0.5024, "step": 30376 }, { "epoch": 0.87, "grad_norm": 3.8002999011705207, "learning_rate": 4.371650194799615e-07, "loss": 0.2751, "step": 30377 }, { "epoch": 0.87, "grad_norm": 6.680178618641356, "learning_rate": 4.3697539563991366e-07, "loss": 0.2484, "step": 30378 }, { "epoch": 0.87, "grad_norm": 7.21785905632651, "learning_rate": 4.367858110550033e-07, "loss": 0.4555, "step": 30379 }, { "epoch": 0.87, "grad_norm": 4.215498681575989, "learning_rate": 4.3659626572686463e-07, "loss": 0.2708, "step": 30380 }, { "epoch": 0.87, "grad_norm": 5.309951080987744, "learning_rate": 4.364067596571253e-07, "loss": 0.4555, "step": 30381 }, { "epoch": 0.87, "grad_norm": 4.279394569134946, "learning_rate": 4.3621729284741675e-07, "loss": 0.3987, "step": 30382 }, { "epoch": 0.87, "grad_norm": 5.292236998253075, "learning_rate": 4.360278652993688e-07, "loss": 0.6573, "step": 30383 }, { "epoch": 0.87, "grad_norm": 4.377905589234577, "learning_rate": 4.358384770146107e-07, "loss": 0.4231, "step": 30384 }, { "epoch": 0.87, "grad_norm": 5.879587695399696, "learning_rate": 4.356491279947728e-07, "loss": 0.2519, "step": 30385 }, { "epoch": 0.87, "grad_norm": 4.482844154562599, "learning_rate": 4.354598182414843e-07, "loss": 0.3003, "step": 30386 }, { "epoch": 0.87, "grad_norm": 8.84789553691322, "learning_rate": 4.3527054775637234e-07, "loss": 1.0751, "step": 30387 }, { "epoch": 0.87, "grad_norm": 4.1663520916763375, "learning_rate": 4.35081316541065e-07, "loss": 0.3617, "step": 30388 }, { "epoch": 0.87, "grad_norm": 3.6812778607853978, "learning_rate": 4.348921245971921e-07, "loss": 0.1947, "step": 30389 }, { "epoch": 0.87, "grad_norm": 4.020783684313642, "learning_rate": 4.347029719263796e-07, "loss": 0.2085, "step": 30390 }, { "epoch": 0.87, "grad_norm": 6.291232096166998, "learning_rate": 4.3451385853025497e-07, "loss": 0.5108, "step": 30391 }, { "epoch": 0.87, "grad_norm": 2.181614847466367, "learning_rate": 4.3432478441044647e-07, "loss": 0.2631, "step": 30392 }, { "epoch": 0.87, "grad_norm": 6.772242597980982, "learning_rate": 4.3413574956857995e-07, "loss": 0.4482, "step": 30393 }, { "epoch": 0.87, "grad_norm": 5.142827591167801, "learning_rate": 4.339467540062808e-07, "loss": 0.4996, "step": 30394 }, { "epoch": 0.87, "grad_norm": 4.670719935288103, "learning_rate": 4.33757797725175e-07, "loss": 0.2942, "step": 30395 }, { "epoch": 0.87, "grad_norm": 6.496139773941556, "learning_rate": 4.3356888072688953e-07, "loss": 0.5455, "step": 30396 }, { "epoch": 0.87, "grad_norm": 1.6284619372607236, "learning_rate": 4.3338000301304806e-07, "loss": 0.096, "step": 30397 }, { "epoch": 0.87, "grad_norm": 6.695486522714267, "learning_rate": 4.331911645852771e-07, "loss": 0.554, "step": 30398 }, { "epoch": 0.87, "grad_norm": 5.338341457675575, "learning_rate": 4.330023654451987e-07, "loss": 0.5919, "step": 30399 }, { "epoch": 0.87, "grad_norm": 4.606841965710751, "learning_rate": 4.328136055944404e-07, "loss": 0.3053, "step": 30400 }, { "epoch": 0.87, "grad_norm": 5.506346249036853, "learning_rate": 4.3262488503462375e-07, "loss": 0.3364, "step": 30401 }, { "epoch": 0.87, "grad_norm": 5.644193497950755, "learning_rate": 4.3243620376737184e-07, "loss": 0.4753, "step": 30402 }, { "epoch": 0.87, "grad_norm": 12.313459378349322, "learning_rate": 4.3224756179431003e-07, "loss": 0.3124, "step": 30403 }, { "epoch": 0.87, "grad_norm": 11.29186590424797, "learning_rate": 4.320589591170593e-07, "loss": 0.3848, "step": 30404 }, { "epoch": 0.87, "grad_norm": 4.46988764867036, "learning_rate": 4.318703957372439e-07, "loss": 0.2672, "step": 30405 }, { "epoch": 0.87, "grad_norm": 14.790416206193429, "learning_rate": 4.316818716564841e-07, "loss": 0.388, "step": 30406 }, { "epoch": 0.87, "grad_norm": 5.471977894336822, "learning_rate": 4.314933868764032e-07, "loss": 0.7745, "step": 30407 }, { "epoch": 0.87, "grad_norm": 9.072791313651457, "learning_rate": 4.313049413986226e-07, "loss": 0.8425, "step": 30408 }, { "epoch": 0.87, "grad_norm": 6.55525730643555, "learning_rate": 4.3111653522476203e-07, "loss": 0.3392, "step": 30409 }, { "epoch": 0.87, "grad_norm": 2.7968274703862193, "learning_rate": 4.3092816835644426e-07, "loss": 0.2788, "step": 30410 }, { "epoch": 0.87, "grad_norm": 4.867375377868091, "learning_rate": 4.3073984079528787e-07, "loss": 0.2486, "step": 30411 }, { "epoch": 0.87, "grad_norm": 6.968759990052046, "learning_rate": 4.305515525429155e-07, "loss": 0.5806, "step": 30412 }, { "epoch": 0.87, "grad_norm": 5.540300626344516, "learning_rate": 4.303633036009447e-07, "loss": 0.3386, "step": 30413 }, { "epoch": 0.87, "grad_norm": 7.21537424397736, "learning_rate": 4.301750939709964e-07, "loss": 0.7874, "step": 30414 }, { "epoch": 0.87, "grad_norm": 3.559141248842827, "learning_rate": 4.299869236546883e-07, "loss": 0.2563, "step": 30415 }, { "epoch": 0.87, "grad_norm": 5.875671348360569, "learning_rate": 4.2979879265363956e-07, "loss": 0.615, "step": 30416 }, { "epoch": 0.87, "grad_norm": 6.409152717771898, "learning_rate": 4.2961070096947e-07, "loss": 0.1797, "step": 30417 }, { "epoch": 0.87, "grad_norm": 6.44164744354117, "learning_rate": 4.294226486037961e-07, "loss": 0.5706, "step": 30418 }, { "epoch": 0.87, "grad_norm": 3.2050329713122276, "learning_rate": 4.2923463555823777e-07, "loss": 0.3484, "step": 30419 }, { "epoch": 0.87, "grad_norm": 4.470312552267085, "learning_rate": 4.2904666183441026e-07, "loss": 0.2498, "step": 30420 }, { "epoch": 0.87, "grad_norm": 7.6751921837816806, "learning_rate": 4.288587274339323e-07, "loss": 0.8704, "step": 30421 }, { "epoch": 0.87, "grad_norm": 5.348493823937014, "learning_rate": 4.286708323584182e-07, "loss": 0.1936, "step": 30422 }, { "epoch": 0.87, "grad_norm": 4.2498477880484735, "learning_rate": 4.284829766094878e-07, "loss": 0.5825, "step": 30423 }, { "epoch": 0.87, "grad_norm": 3.123723379680384, "learning_rate": 4.2829516018875415e-07, "loss": 0.2934, "step": 30424 }, { "epoch": 0.87, "grad_norm": 7.9310028638881445, "learning_rate": 4.281073830978344e-07, "loss": 0.667, "step": 30425 }, { "epoch": 0.87, "grad_norm": 2.2630776126412484, "learning_rate": 4.2791964533834495e-07, "loss": 0.3052, "step": 30426 }, { "epoch": 0.87, "grad_norm": 7.203884788045177, "learning_rate": 4.277319469118996e-07, "loss": 0.2871, "step": 30427 }, { "epoch": 0.87, "grad_norm": 4.501742740771212, "learning_rate": 4.2754428782011357e-07, "loss": 0.1681, "step": 30428 }, { "epoch": 0.87, "grad_norm": 6.301934568892945, "learning_rate": 4.273566680646002e-07, "loss": 0.6306, "step": 30429 }, { "epoch": 0.87, "grad_norm": 7.881710977406236, "learning_rate": 4.271690876469753e-07, "loss": 0.7354, "step": 30430 }, { "epoch": 0.87, "grad_norm": 7.997631005361635, "learning_rate": 4.2698154656885036e-07, "loss": 0.4472, "step": 30431 }, { "epoch": 0.87, "grad_norm": 5.797633571295107, "learning_rate": 4.2679404483184086e-07, "loss": 0.3025, "step": 30432 }, { "epoch": 0.87, "grad_norm": 7.554344540279988, "learning_rate": 4.2660658243755924e-07, "loss": 0.5334, "step": 30433 }, { "epoch": 0.87, "grad_norm": 2.4856824733275373, "learning_rate": 4.264191593876188e-07, "loss": 0.2206, "step": 30434 }, { "epoch": 0.87, "grad_norm": 11.880920901649521, "learning_rate": 4.2623177568363093e-07, "loss": 0.493, "step": 30435 }, { "epoch": 0.87, "grad_norm": 8.670574358754635, "learning_rate": 4.260444313272072e-07, "loss": 0.9176, "step": 30436 }, { "epoch": 0.87, "grad_norm": 3.972792391474471, "learning_rate": 4.258571263199607e-07, "loss": 0.2913, "step": 30437 }, { "epoch": 0.87, "grad_norm": 6.338220356738405, "learning_rate": 4.2566986066350123e-07, "loss": 0.535, "step": 30438 }, { "epoch": 0.87, "grad_norm": 4.293364988164917, "learning_rate": 4.25482634359442e-07, "loss": 0.4858, "step": 30439 }, { "epoch": 0.87, "grad_norm": 6.363402270061399, "learning_rate": 4.2529544740939234e-07, "loss": 0.5961, "step": 30440 }, { "epoch": 0.87, "grad_norm": 3.480856962634443, "learning_rate": 4.2510829981496195e-07, "loss": 0.3335, "step": 30441 }, { "epoch": 0.87, "grad_norm": 4.856274023205265, "learning_rate": 4.249211915777618e-07, "loss": 0.2488, "step": 30442 }, { "epoch": 0.87, "grad_norm": 3.678860006408322, "learning_rate": 4.247341226994012e-07, "loss": 0.3557, "step": 30443 }, { "epoch": 0.87, "grad_norm": 5.907640485219129, "learning_rate": 4.245470931814899e-07, "loss": 0.5478, "step": 30444 }, { "epoch": 0.87, "grad_norm": 3.4437014906940413, "learning_rate": 4.243601030256361e-07, "loss": 0.2888, "step": 30445 }, { "epoch": 0.87, "grad_norm": 5.858809868580126, "learning_rate": 4.2417315223344966e-07, "loss": 0.4133, "step": 30446 }, { "epoch": 0.87, "grad_norm": 7.997591252094051, "learning_rate": 4.2398624080653873e-07, "loss": 0.7283, "step": 30447 }, { "epoch": 0.87, "grad_norm": 4.142194025990724, "learning_rate": 4.237993687465092e-07, "loss": 0.1753, "step": 30448 }, { "epoch": 0.87, "grad_norm": 8.711277073284265, "learning_rate": 4.2361253605497144e-07, "loss": 0.7186, "step": 30449 }, { "epoch": 0.87, "grad_norm": 7.711468864769353, "learning_rate": 4.234257427335309e-07, "loss": 1.0751, "step": 30450 }, { "epoch": 0.87, "grad_norm": 8.34946891443442, "learning_rate": 4.2323898878379564e-07, "loss": 0.4463, "step": 30451 }, { "epoch": 0.87, "grad_norm": 6.900473755314959, "learning_rate": 4.2305227420737115e-07, "loss": 0.3176, "step": 30452 }, { "epoch": 0.87, "grad_norm": 6.1629657035491086, "learning_rate": 4.2286559900586554e-07, "loss": 0.3294, "step": 30453 }, { "epoch": 0.87, "grad_norm": 10.812736552743399, "learning_rate": 4.2267896318088306e-07, "loss": 0.8564, "step": 30454 }, { "epoch": 0.87, "grad_norm": 7.491852531065264, "learning_rate": 4.2249236673403026e-07, "loss": 0.4889, "step": 30455 }, { "epoch": 0.87, "grad_norm": 3.7508651689049777, "learning_rate": 4.2230580966691136e-07, "loss": 0.2615, "step": 30456 }, { "epoch": 0.87, "grad_norm": 4.166806689134876, "learning_rate": 4.2211929198113176e-07, "loss": 0.7058, "step": 30457 }, { "epoch": 0.87, "grad_norm": 6.047254959510026, "learning_rate": 4.2193281367829684e-07, "loss": 0.5407, "step": 30458 }, { "epoch": 0.87, "grad_norm": 2.9499638506727295, "learning_rate": 4.217463747600098e-07, "loss": 0.3954, "step": 30459 }, { "epoch": 0.87, "grad_norm": 3.387691899174466, "learning_rate": 4.2155997522787594e-07, "loss": 0.2246, "step": 30460 }, { "epoch": 0.87, "grad_norm": 5.5447671243183105, "learning_rate": 4.213736150834974e-07, "loss": 0.7075, "step": 30461 }, { "epoch": 0.87, "grad_norm": 3.9316198250854733, "learning_rate": 4.211872943284784e-07, "loss": 0.506, "step": 30462 }, { "epoch": 0.87, "grad_norm": 1.798942910175957, "learning_rate": 4.2100101296441984e-07, "loss": 0.131, "step": 30463 }, { "epoch": 0.87, "grad_norm": 6.257348508932809, "learning_rate": 4.2081477099292724e-07, "loss": 0.2529, "step": 30464 }, { "epoch": 0.87, "grad_norm": 5.103232421491904, "learning_rate": 4.206285684156003e-07, "loss": 0.5816, "step": 30465 }, { "epoch": 0.87, "grad_norm": 4.80735501309281, "learning_rate": 4.204424052340422e-07, "loss": 0.4448, "step": 30466 }, { "epoch": 0.87, "grad_norm": 5.497399625742255, "learning_rate": 4.2025628144985564e-07, "loss": 0.5799, "step": 30467 }, { "epoch": 0.87, "grad_norm": 3.9221810073318584, "learning_rate": 4.200701970646387e-07, "loss": 0.2326, "step": 30468 }, { "epoch": 0.87, "grad_norm": 5.490384018743936, "learning_rate": 4.198841520799951e-07, "loss": 0.3358, "step": 30469 }, { "epoch": 0.87, "grad_norm": 5.241601493361543, "learning_rate": 4.1969814649752304e-07, "loss": 0.5208, "step": 30470 }, { "epoch": 0.87, "grad_norm": 6.192015839901459, "learning_rate": 4.195121803188251e-07, "loss": 0.4801, "step": 30471 }, { "epoch": 0.87, "grad_norm": 4.105205059203467, "learning_rate": 4.1932625354549896e-07, "loss": 0.5916, "step": 30472 }, { "epoch": 0.87, "grad_norm": 4.949908683396949, "learning_rate": 4.1914036617914597e-07, "loss": 0.4264, "step": 30473 }, { "epoch": 0.87, "grad_norm": 2.63412940712097, "learning_rate": 4.189545182213639e-07, "loss": 0.4865, "step": 30474 }, { "epoch": 0.87, "grad_norm": 8.191059649133267, "learning_rate": 4.1876870967375194e-07, "loss": 0.7811, "step": 30475 }, { "epoch": 0.87, "grad_norm": 4.421025157449619, "learning_rate": 4.185829405379088e-07, "loss": 0.3254, "step": 30476 }, { "epoch": 0.87, "grad_norm": 9.890256757344385, "learning_rate": 4.1839721081543214e-07, "loss": 0.5132, "step": 30477 }, { "epoch": 0.87, "grad_norm": 5.321216478446056, "learning_rate": 4.1821152050792123e-07, "loss": 0.4039, "step": 30478 }, { "epoch": 0.87, "grad_norm": 3.9398421330460938, "learning_rate": 4.1802586961697143e-07, "loss": 0.5052, "step": 30479 }, { "epoch": 0.87, "grad_norm": 8.752437905878832, "learning_rate": 4.178402581441815e-07, "loss": 0.2552, "step": 30480 }, { "epoch": 0.87, "grad_norm": 7.69912622126192, "learning_rate": 4.176546860911479e-07, "loss": 0.7603, "step": 30481 }, { "epoch": 0.87, "grad_norm": 4.565550659535313, "learning_rate": 4.17469153459466e-07, "loss": 0.6288, "step": 30482 }, { "epoch": 0.87, "grad_norm": 4.25361869127322, "learning_rate": 4.1728366025073354e-07, "loss": 0.3911, "step": 30483 }, { "epoch": 0.87, "grad_norm": 4.695758817409264, "learning_rate": 4.1709820646654464e-07, "loss": 0.3812, "step": 30484 }, { "epoch": 0.87, "grad_norm": 4.29526015021158, "learning_rate": 4.1691279210849646e-07, "loss": 0.4017, "step": 30485 }, { "epoch": 0.87, "grad_norm": 6.67862788490522, "learning_rate": 4.1672741717818265e-07, "loss": 0.5144, "step": 30486 }, { "epoch": 0.87, "grad_norm": 7.213377237109949, "learning_rate": 4.165420816771992e-07, "loss": 0.4203, "step": 30487 }, { "epoch": 0.87, "grad_norm": 6.628146737803595, "learning_rate": 4.1635678560713986e-07, "loss": 0.639, "step": 30488 }, { "epoch": 0.87, "grad_norm": 3.79234781889088, "learning_rate": 4.161715289695978e-07, "loss": 0.4485, "step": 30489 }, { "epoch": 0.87, "grad_norm": 7.0078423710997635, "learning_rate": 4.159863117661689e-07, "loss": 1.104, "step": 30490 }, { "epoch": 0.87, "grad_norm": 5.89436419978976, "learning_rate": 4.1580113399844415e-07, "loss": 0.3635, "step": 30491 }, { "epoch": 0.87, "grad_norm": 4.685818230617461, "learning_rate": 4.1561599566801893e-07, "loss": 0.4209, "step": 30492 }, { "epoch": 0.87, "grad_norm": 13.139181595315971, "learning_rate": 4.154308967764842e-07, "loss": 0.3361, "step": 30493 }, { "epoch": 0.87, "grad_norm": 10.402773118764333, "learning_rate": 4.1524583732543313e-07, "loss": 0.7664, "step": 30494 }, { "epoch": 0.87, "grad_norm": 6.583190598529978, "learning_rate": 4.150608173164583e-07, "loss": 0.5281, "step": 30495 }, { "epoch": 0.87, "grad_norm": 5.088098351160796, "learning_rate": 4.148758367511507e-07, "loss": 0.1718, "step": 30496 }, { "epoch": 0.87, "grad_norm": 5.561791428429902, "learning_rate": 4.146908956311002e-07, "loss": 0.5091, "step": 30497 }, { "epoch": 0.87, "grad_norm": 6.414761058017478, "learning_rate": 4.145059939578999e-07, "loss": 0.65, "step": 30498 }, { "epoch": 0.87, "grad_norm": 4.767582075655753, "learning_rate": 4.1432113173314084e-07, "loss": 0.4422, "step": 30499 }, { "epoch": 0.87, "grad_norm": 3.543361755578881, "learning_rate": 4.141363089584127e-07, "loss": 0.3749, "step": 30500 }, { "epoch": 0.87, "grad_norm": 5.725058322925918, "learning_rate": 4.1395152563530494e-07, "loss": 0.4989, "step": 30501 }, { "epoch": 0.87, "grad_norm": 1.8505203799107521, "learning_rate": 4.1376678176540677e-07, "loss": 0.1646, "step": 30502 }, { "epoch": 0.87, "grad_norm": 7.014293841725498, "learning_rate": 4.135820773503091e-07, "loss": 0.2774, "step": 30503 }, { "epoch": 0.87, "grad_norm": 3.7871363827242557, "learning_rate": 4.13397412391599e-07, "loss": 0.2149, "step": 30504 }, { "epoch": 0.87, "grad_norm": 5.599579570519303, "learning_rate": 4.132127868908675e-07, "loss": 0.7389, "step": 30505 }, { "epoch": 0.87, "grad_norm": 8.711807057040273, "learning_rate": 4.1302820084970054e-07, "loss": 0.7418, "step": 30506 }, { "epoch": 0.87, "grad_norm": 5.193942561981043, "learning_rate": 4.128436542696884e-07, "loss": 0.605, "step": 30507 }, { "epoch": 0.87, "grad_norm": 4.2680051541927435, "learning_rate": 4.126591471524172e-07, "loss": 0.5121, "step": 30508 }, { "epoch": 0.87, "grad_norm": 6.561269226918145, "learning_rate": 4.1247467949947385e-07, "loss": 0.8228, "step": 30509 }, { "epoch": 0.87, "grad_norm": 5.2405523714823135, "learning_rate": 4.122902513124466e-07, "loss": 0.2777, "step": 30510 }, { "epoch": 0.87, "grad_norm": 8.077524690439782, "learning_rate": 4.1210586259292087e-07, "loss": 0.4996, "step": 30511 }, { "epoch": 0.87, "grad_norm": 18.025685104492645, "learning_rate": 4.119215133424848e-07, "loss": 0.4317, "step": 30512 }, { "epoch": 0.87, "grad_norm": 8.526861662087777, "learning_rate": 4.117372035627215e-07, "loss": 0.5729, "step": 30513 }, { "epoch": 0.87, "grad_norm": 6.721302239681657, "learning_rate": 4.115529332552193e-07, "loss": 0.3966, "step": 30514 }, { "epoch": 0.87, "grad_norm": 6.3911060254812595, "learning_rate": 4.1136870242156233e-07, "loss": 0.3549, "step": 30515 }, { "epoch": 0.87, "grad_norm": 7.294984298908784, "learning_rate": 4.1118451106333447e-07, "loss": 0.6246, "step": 30516 }, { "epoch": 0.87, "grad_norm": 5.740612950777004, "learning_rate": 4.1100035918212214e-07, "loss": 0.2938, "step": 30517 }, { "epoch": 0.87, "grad_norm": 10.359516401749891, "learning_rate": 4.108162467795074e-07, "loss": 0.5947, "step": 30518 }, { "epoch": 0.87, "grad_norm": 4.226091905156881, "learning_rate": 4.1063217385707676e-07, "loss": 0.5118, "step": 30519 }, { "epoch": 0.87, "grad_norm": 4.946889809582075, "learning_rate": 4.104481404164118e-07, "loss": 0.1653, "step": 30520 }, { "epoch": 0.87, "grad_norm": 4.87487049419829, "learning_rate": 4.1026414645909675e-07, "loss": 0.3057, "step": 30521 }, { "epoch": 0.87, "grad_norm": 2.6758013258132167, "learning_rate": 4.100801919867148e-07, "loss": 0.2255, "step": 30522 }, { "epoch": 0.87, "grad_norm": 3.5070438519899603, "learning_rate": 4.098962770008463e-07, "loss": 0.3521, "step": 30523 }, { "epoch": 0.87, "grad_norm": 6.185706716820426, "learning_rate": 4.097124015030762e-07, "loss": 0.6078, "step": 30524 }, { "epoch": 0.87, "grad_norm": 3.6231664098058127, "learning_rate": 4.095285654949843e-07, "loss": 0.3535, "step": 30525 }, { "epoch": 0.87, "grad_norm": 5.792284276219154, "learning_rate": 4.093447689781538e-07, "loss": 0.3591, "step": 30526 }, { "epoch": 0.87, "grad_norm": 4.300024397359433, "learning_rate": 4.0916101195416505e-07, "loss": 0.3983, "step": 30527 }, { "epoch": 0.87, "grad_norm": 3.184676546713, "learning_rate": 4.0897729442459843e-07, "loss": 0.234, "step": 30528 }, { "epoch": 0.87, "grad_norm": 4.484079703467406, "learning_rate": 4.0879361639103444e-07, "loss": 0.3564, "step": 30529 }, { "epoch": 0.87, "grad_norm": 17.090192853577193, "learning_rate": 4.0860997785505395e-07, "loss": 0.8301, "step": 30530 }, { "epoch": 0.87, "grad_norm": 4.073282223790293, "learning_rate": 4.084263788182369e-07, "loss": 0.0795, "step": 30531 }, { "epoch": 0.87, "grad_norm": 9.233201444512488, "learning_rate": 4.0824281928216194e-07, "loss": 0.4995, "step": 30532 }, { "epoch": 0.87, "grad_norm": 3.572947658630543, "learning_rate": 4.0805929924840894e-07, "loss": 0.4274, "step": 30533 }, { "epoch": 0.87, "grad_norm": 2.847825766124068, "learning_rate": 4.078758187185566e-07, "loss": 0.1276, "step": 30534 }, { "epoch": 0.87, "grad_norm": 2.6315119443068236, "learning_rate": 4.076923776941838e-07, "loss": 0.466, "step": 30535 }, { "epoch": 0.87, "grad_norm": 8.288156529999387, "learning_rate": 4.075089761768664e-07, "loss": 0.4005, "step": 30536 }, { "epoch": 0.87, "grad_norm": 7.436664374074863, "learning_rate": 4.073256141681853e-07, "loss": 0.7545, "step": 30537 }, { "epoch": 0.87, "grad_norm": 8.301343099091877, "learning_rate": 4.0714229166971484e-07, "loss": 0.8782, "step": 30538 }, { "epoch": 0.87, "grad_norm": 9.797734784084607, "learning_rate": 4.069590086830344e-07, "loss": 0.8237, "step": 30539 }, { "epoch": 0.87, "grad_norm": 7.549455316108294, "learning_rate": 4.0677576520972095e-07, "loss": 0.4156, "step": 30540 }, { "epoch": 0.87, "grad_norm": 5.443653275323588, "learning_rate": 4.0659256125134995e-07, "loss": 0.1553, "step": 30541 }, { "epoch": 0.87, "grad_norm": 5.668762151501133, "learning_rate": 4.064093968094973e-07, "loss": 0.3287, "step": 30542 }, { "epoch": 0.87, "grad_norm": 5.822842819846563, "learning_rate": 4.0622627188573847e-07, "loss": 0.3443, "step": 30543 }, { "epoch": 0.87, "grad_norm": 3.8406766063925972, "learning_rate": 4.0604318648165053e-07, "loss": 0.4099, "step": 30544 }, { "epoch": 0.87, "grad_norm": 3.9437650459594997, "learning_rate": 4.0586014059880607e-07, "loss": 0.5646, "step": 30545 }, { "epoch": 0.87, "grad_norm": 9.713894725993311, "learning_rate": 4.056771342387822e-07, "loss": 0.9533, "step": 30546 }, { "epoch": 0.87, "grad_norm": 2.2662371236032413, "learning_rate": 4.054941674031515e-07, "loss": 0.1558, "step": 30547 }, { "epoch": 0.87, "grad_norm": 3.9408868422854684, "learning_rate": 4.0531124009349e-07, "loss": 0.223, "step": 30548 }, { "epoch": 0.87, "grad_norm": 6.093393794677291, "learning_rate": 4.051283523113697e-07, "loss": 0.4248, "step": 30549 }, { "epoch": 0.87, "grad_norm": 5.0354661986703535, "learning_rate": 4.049455040583639e-07, "loss": 0.4852, "step": 30550 }, { "epoch": 0.87, "grad_norm": 4.634137792297821, "learning_rate": 4.0476269533604673e-07, "loss": 0.5225, "step": 30551 }, { "epoch": 0.87, "grad_norm": 4.6342759418254325, "learning_rate": 4.045799261459893e-07, "loss": 0.3431, "step": 30552 }, { "epoch": 0.87, "grad_norm": 5.516102302799002, "learning_rate": 4.043971964897664e-07, "loss": 0.4397, "step": 30553 }, { "epoch": 0.87, "grad_norm": 5.255175219589974, "learning_rate": 4.042145063689473e-07, "loss": 0.5722, "step": 30554 }, { "epoch": 0.88, "grad_norm": 4.580071019409134, "learning_rate": 4.0403185578510584e-07, "loss": 0.2299, "step": 30555 }, { "epoch": 0.88, "grad_norm": 6.05597028371759, "learning_rate": 4.0384924473981236e-07, "loss": 0.471, "step": 30556 }, { "epoch": 0.88, "grad_norm": 4.420940421722238, "learning_rate": 4.0366667323463725e-07, "loss": 0.6147, "step": 30557 }, { "epoch": 0.88, "grad_norm": 6.042606966794582, "learning_rate": 4.0348414127115266e-07, "loss": 0.5341, "step": 30558 }, { "epoch": 0.88, "grad_norm": 4.445480906643623, "learning_rate": 4.0330164885092725e-07, "loss": 0.168, "step": 30559 }, { "epoch": 0.88, "grad_norm": 4.544369639840898, "learning_rate": 4.0311919597553263e-07, "loss": 0.6887, "step": 30560 }, { "epoch": 0.88, "grad_norm": 3.966255335090384, "learning_rate": 4.0293678264653745e-07, "loss": 0.3489, "step": 30561 }, { "epoch": 0.88, "grad_norm": 3.8333388342334143, "learning_rate": 4.0275440886551164e-07, "loss": 0.4832, "step": 30562 }, { "epoch": 0.88, "grad_norm": 4.0127515791222725, "learning_rate": 4.0257207463402226e-07, "loss": 0.3889, "step": 30563 }, { "epoch": 0.88, "grad_norm": 6.4052432501980565, "learning_rate": 4.023897799536391e-07, "loss": 0.6724, "step": 30564 }, { "epoch": 0.88, "grad_norm": 4.5704269525265735, "learning_rate": 4.0220752482593216e-07, "loss": 0.347, "step": 30565 }, { "epoch": 0.88, "grad_norm": 4.275603252924916, "learning_rate": 4.020253092524662e-07, "loss": 0.5017, "step": 30566 }, { "epoch": 0.88, "grad_norm": 3.768537384412612, "learning_rate": 4.018431332348116e-07, "loss": 0.3046, "step": 30567 }, { "epoch": 0.88, "grad_norm": 6.019685598445493, "learning_rate": 4.016609967745344e-07, "loss": 0.4295, "step": 30568 }, { "epoch": 0.88, "grad_norm": 7.584960245972512, "learning_rate": 4.0147889987320166e-07, "loss": 0.2513, "step": 30569 }, { "epoch": 0.88, "grad_norm": 4.935942923412139, "learning_rate": 4.012968425323788e-07, "loss": 0.2675, "step": 30570 }, { "epoch": 0.88, "grad_norm": 6.1246479478204465, "learning_rate": 4.011148247536334e-07, "loss": 0.3047, "step": 30571 }, { "epoch": 0.88, "grad_norm": 4.969968514409872, "learning_rate": 4.009328465385304e-07, "loss": 0.3874, "step": 30572 }, { "epoch": 0.88, "grad_norm": 5.766979564968554, "learning_rate": 4.007509078886357e-07, "loss": 0.3352, "step": 30573 }, { "epoch": 0.88, "grad_norm": 2.453750749219643, "learning_rate": 4.0056900880551583e-07, "loss": 0.0989, "step": 30574 }, { "epoch": 0.88, "grad_norm": 2.060510080018042, "learning_rate": 4.003871492907346e-07, "loss": 0.1254, "step": 30575 }, { "epoch": 0.88, "grad_norm": 4.70333225642696, "learning_rate": 4.002053293458558e-07, "loss": 0.468, "step": 30576 }, { "epoch": 0.88, "grad_norm": 6.022145094305287, "learning_rate": 4.0002354897244413e-07, "loss": 0.4067, "step": 30577 }, { "epoch": 0.88, "grad_norm": 5.027099176606645, "learning_rate": 3.9984180817206395e-07, "loss": 0.5845, "step": 30578 }, { "epoch": 0.88, "grad_norm": 2.979117390863121, "learning_rate": 3.9966010694627745e-07, "loss": 0.3513, "step": 30579 }, { "epoch": 0.88, "grad_norm": 5.3584066244583894, "learning_rate": 3.9947844529664825e-07, "loss": 0.419, "step": 30580 }, { "epoch": 0.88, "grad_norm": 2.880033337115356, "learning_rate": 3.992968232247407e-07, "loss": 0.3082, "step": 30581 }, { "epoch": 0.88, "grad_norm": 8.652239369608818, "learning_rate": 3.991152407321164e-07, "loss": 0.5478, "step": 30582 }, { "epoch": 0.88, "grad_norm": 7.9973691399312115, "learning_rate": 3.9893369782033733e-07, "loss": 0.4883, "step": 30583 }, { "epoch": 0.88, "grad_norm": 3.4839281556860398, "learning_rate": 3.987521944909639e-07, "loss": 0.4279, "step": 30584 }, { "epoch": 0.88, "grad_norm": 8.032119792935891, "learning_rate": 3.985707307455594e-07, "loss": 0.5714, "step": 30585 }, { "epoch": 0.88, "grad_norm": 5.826636812161242, "learning_rate": 3.983893065856842e-07, "loss": 0.5165, "step": 30586 }, { "epoch": 0.88, "grad_norm": 7.094085248511824, "learning_rate": 3.9820792201289983e-07, "loss": 0.5549, "step": 30587 }, { "epoch": 0.88, "grad_norm": 6.354613487501109, "learning_rate": 3.9802657702876555e-07, "loss": 0.6078, "step": 30588 }, { "epoch": 0.88, "grad_norm": 4.085663416114226, "learning_rate": 3.9784527163484185e-07, "loss": 0.632, "step": 30589 }, { "epoch": 0.88, "grad_norm": 4.062329596833713, "learning_rate": 3.976640058326892e-07, "loss": 0.4908, "step": 30590 }, { "epoch": 0.88, "grad_norm": 13.258164247736966, "learning_rate": 3.974827796238662e-07, "loss": 0.5209, "step": 30591 }, { "epoch": 0.88, "grad_norm": 4.241439416165124, "learning_rate": 3.973015930099322e-07, "loss": 0.329, "step": 30592 }, { "epoch": 0.88, "grad_norm": 7.944210249850858, "learning_rate": 3.971204459924455e-07, "loss": 0.3732, "step": 30593 }, { "epoch": 0.88, "grad_norm": 3.5424895546058535, "learning_rate": 3.969393385729653e-07, "loss": 0.5924, "step": 30594 }, { "epoch": 0.88, "grad_norm": 4.005120471971193, "learning_rate": 3.9675827075304986e-07, "loss": 0.5015, "step": 30595 }, { "epoch": 0.88, "grad_norm": 9.677824935720329, "learning_rate": 3.965772425342551e-07, "loss": 0.683, "step": 30596 }, { "epoch": 0.88, "grad_norm": 3.933707818022432, "learning_rate": 3.963962539181404e-07, "loss": 0.4014, "step": 30597 }, { "epoch": 0.88, "grad_norm": 10.57706661367056, "learning_rate": 3.9621530490626114e-07, "loss": 0.274, "step": 30598 }, { "epoch": 0.88, "grad_norm": 3.980291450602365, "learning_rate": 3.9603439550017554e-07, "loss": 0.5846, "step": 30599 }, { "epoch": 0.88, "grad_norm": 5.679138828730861, "learning_rate": 3.9585352570143785e-07, "loss": 0.5525, "step": 30600 }, { "epoch": 0.88, "grad_norm": 4.003197740345084, "learning_rate": 3.956726955116069e-07, "loss": 0.3631, "step": 30601 }, { "epoch": 0.88, "grad_norm": 7.609450792252401, "learning_rate": 3.9549190493223644e-07, "loss": 0.4333, "step": 30602 }, { "epoch": 0.88, "grad_norm": 2.4550122576172537, "learning_rate": 3.953111539648824e-07, "loss": 0.2322, "step": 30603 }, { "epoch": 0.88, "grad_norm": 5.527643698381998, "learning_rate": 3.951304426110991e-07, "loss": 0.4572, "step": 30604 }, { "epoch": 0.88, "grad_norm": 6.943001807941002, "learning_rate": 3.949497708724409e-07, "loss": 0.5951, "step": 30605 }, { "epoch": 0.88, "grad_norm": 5.85707793922535, "learning_rate": 3.947691387504643e-07, "loss": 0.6849, "step": 30606 }, { "epoch": 0.88, "grad_norm": 6.254710024874837, "learning_rate": 3.9458854624672085e-07, "loss": 0.7043, "step": 30607 }, { "epoch": 0.88, "grad_norm": 5.419899528388617, "learning_rate": 3.944079933627654e-07, "loss": 0.4322, "step": 30608 }, { "epoch": 0.88, "grad_norm": 4.363757948418805, "learning_rate": 3.9422748010015166e-07, "loss": 0.1529, "step": 30609 }, { "epoch": 0.88, "grad_norm": 5.515242004065037, "learning_rate": 3.940470064604313e-07, "loss": 0.5147, "step": 30610 }, { "epoch": 0.88, "grad_norm": 6.127912412824257, "learning_rate": 3.938665724451568e-07, "loss": 0.4594, "step": 30611 }, { "epoch": 0.88, "grad_norm": 6.373409278472343, "learning_rate": 3.93686178055882e-07, "loss": 0.4246, "step": 30612 }, { "epoch": 0.88, "grad_norm": 3.5885832756003557, "learning_rate": 3.935058232941569e-07, "loss": 0.2068, "step": 30613 }, { "epoch": 0.88, "grad_norm": 8.670733319985395, "learning_rate": 3.93325508161535e-07, "loss": 0.6361, "step": 30614 }, { "epoch": 0.88, "grad_norm": 3.4771545377533286, "learning_rate": 3.931452326595664e-07, "loss": 0.2448, "step": 30615 }, { "epoch": 0.88, "grad_norm": 7.743161014205558, "learning_rate": 3.929649967898014e-07, "loss": 0.5966, "step": 30616 }, { "epoch": 0.88, "grad_norm": 3.5465869534528687, "learning_rate": 3.927848005537921e-07, "loss": 0.3268, "step": 30617 }, { "epoch": 0.88, "grad_norm": 7.648701212216663, "learning_rate": 3.9260464395308673e-07, "loss": 0.6805, "step": 30618 }, { "epoch": 0.88, "grad_norm": 6.418677023349284, "learning_rate": 3.9242452698923736e-07, "loss": 0.4717, "step": 30619 }, { "epoch": 0.88, "grad_norm": 3.8855644007082666, "learning_rate": 3.9224444966379226e-07, "loss": 0.6781, "step": 30620 }, { "epoch": 0.88, "grad_norm": 3.896432533014893, "learning_rate": 3.9206441197830123e-07, "loss": 0.3097, "step": 30621 }, { "epoch": 0.88, "grad_norm": 5.604893756577548, "learning_rate": 3.9188441393431255e-07, "loss": 0.5518, "step": 30622 }, { "epoch": 0.88, "grad_norm": 6.527991517831046, "learning_rate": 3.917044555333743e-07, "loss": 0.2221, "step": 30623 }, { "epoch": 0.88, "grad_norm": 7.377116917818575, "learning_rate": 3.9152453677703594e-07, "loss": 0.5005, "step": 30624 }, { "epoch": 0.88, "grad_norm": 3.842902330420555, "learning_rate": 3.913446576668434e-07, "loss": 0.2276, "step": 30625 }, { "epoch": 0.88, "grad_norm": 3.4252651961186986, "learning_rate": 3.911648182043465e-07, "loss": 0.3738, "step": 30626 }, { "epoch": 0.88, "grad_norm": 1.695857729451237, "learning_rate": 3.909850183910907e-07, "loss": 0.1563, "step": 30627 }, { "epoch": 0.88, "grad_norm": 4.412481622414506, "learning_rate": 3.908052582286237e-07, "loss": 0.4608, "step": 30628 }, { "epoch": 0.88, "grad_norm": 4.18940293075263, "learning_rate": 3.90625537718492e-07, "loss": 0.1935, "step": 30629 }, { "epoch": 0.88, "grad_norm": 7.433406953774943, "learning_rate": 3.904458568622399e-07, "loss": 0.3103, "step": 30630 }, { "epoch": 0.88, "grad_norm": 4.116356463591876, "learning_rate": 3.902662156614156e-07, "loss": 0.6146, "step": 30631 }, { "epoch": 0.88, "grad_norm": 4.70170551563013, "learning_rate": 3.90086614117563e-07, "loss": 0.599, "step": 30632 }, { "epoch": 0.88, "grad_norm": 2.8318822361119724, "learning_rate": 3.8990705223222793e-07, "loss": 0.2991, "step": 30633 }, { "epoch": 0.88, "grad_norm": 4.762810222904764, "learning_rate": 3.8972753000695475e-07, "loss": 0.48, "step": 30634 }, { "epoch": 0.88, "grad_norm": 3.8916920497047434, "learning_rate": 3.8954804744328834e-07, "loss": 0.2825, "step": 30635 }, { "epoch": 0.88, "grad_norm": 4.920663472580831, "learning_rate": 3.893686045427725e-07, "loss": 0.3472, "step": 30636 }, { "epoch": 0.88, "grad_norm": 8.704882569285123, "learning_rate": 3.8918920130694983e-07, "loss": 0.4517, "step": 30637 }, { "epoch": 0.88, "grad_norm": 2.1226380348477027, "learning_rate": 3.890098377373658e-07, "loss": 0.3201, "step": 30638 }, { "epoch": 0.88, "grad_norm": 4.7139020862792975, "learning_rate": 3.8883051383556134e-07, "loss": 0.3179, "step": 30639 }, { "epoch": 0.88, "grad_norm": 3.568850729300736, "learning_rate": 3.886512296030814e-07, "loss": 0.2344, "step": 30640 }, { "epoch": 0.88, "grad_norm": 5.047838313992517, "learning_rate": 3.8847198504146643e-07, "loss": 0.4598, "step": 30641 }, { "epoch": 0.88, "grad_norm": 10.339210366916447, "learning_rate": 3.882927801522607e-07, "loss": 0.5703, "step": 30642 }, { "epoch": 0.88, "grad_norm": 9.04036714441255, "learning_rate": 3.881136149370024e-07, "loss": 0.5965, "step": 30643 }, { "epoch": 0.88, "grad_norm": 3.660853918914842, "learning_rate": 3.879344893972364e-07, "loss": 0.2745, "step": 30644 }, { "epoch": 0.88, "grad_norm": 5.001341568257072, "learning_rate": 3.877554035345005e-07, "loss": 0.4762, "step": 30645 }, { "epoch": 0.88, "grad_norm": 4.255738871576603, "learning_rate": 3.8757635735033717e-07, "loss": 0.6263, "step": 30646 }, { "epoch": 0.88, "grad_norm": 7.35172309659536, "learning_rate": 3.8739735084628805e-07, "loss": 0.5901, "step": 30647 }, { "epoch": 0.88, "grad_norm": 6.731508569878135, "learning_rate": 3.872183840238908e-07, "loss": 0.4933, "step": 30648 }, { "epoch": 0.88, "grad_norm": 6.063813607962149, "learning_rate": 3.8703945688468636e-07, "loss": 0.9252, "step": 30649 }, { "epoch": 0.88, "grad_norm": 6.817203507895323, "learning_rate": 3.8686056943021244e-07, "loss": 0.8083, "step": 30650 }, { "epoch": 0.88, "grad_norm": 3.3394025812396895, "learning_rate": 3.8668172166201e-07, "loss": 0.0972, "step": 30651 }, { "epoch": 0.88, "grad_norm": 4.960912076629615, "learning_rate": 3.865029135816156e-07, "loss": 0.5559, "step": 30652 }, { "epoch": 0.88, "grad_norm": 4.566002768384952, "learning_rate": 3.863241451905697e-07, "loss": 0.3261, "step": 30653 }, { "epoch": 0.88, "grad_norm": 3.8867359870260056, "learning_rate": 3.8614541649040825e-07, "loss": 0.581, "step": 30654 }, { "epoch": 0.88, "grad_norm": 3.732711325639971, "learning_rate": 3.8596672748267007e-07, "loss": 0.5555, "step": 30655 }, { "epoch": 0.88, "grad_norm": 4.993175331250731, "learning_rate": 3.8578807816889274e-07, "loss": 0.5511, "step": 30656 }, { "epoch": 0.88, "grad_norm": 4.538795844171957, "learning_rate": 3.856094685506112e-07, "loss": 0.162, "step": 30657 }, { "epoch": 0.88, "grad_norm": 8.747093480888571, "learning_rate": 3.854308986293642e-07, "loss": 0.5153, "step": 30658 }, { "epoch": 0.88, "grad_norm": 6.838919689908956, "learning_rate": 3.8525236840668667e-07, "loss": 0.2618, "step": 30659 }, { "epoch": 0.88, "grad_norm": 6.024405583465194, "learning_rate": 3.850738778841151e-07, "loss": 0.369, "step": 30660 }, { "epoch": 0.88, "grad_norm": 5.655039283622741, "learning_rate": 3.8489542706318383e-07, "loss": 0.5693, "step": 30661 }, { "epoch": 0.88, "grad_norm": 5.118543558766926, "learning_rate": 3.8471701594543e-07, "loss": 0.2431, "step": 30662 }, { "epoch": 0.88, "grad_norm": 5.595756682294417, "learning_rate": 3.845386445323879e-07, "loss": 0.3926, "step": 30663 }, { "epoch": 0.88, "grad_norm": 7.5707175635592865, "learning_rate": 3.8436031282559015e-07, "loss": 0.4562, "step": 30664 }, { "epoch": 0.88, "grad_norm": 5.286940809760927, "learning_rate": 3.841820208265734e-07, "loss": 0.4596, "step": 30665 }, { "epoch": 0.88, "grad_norm": 6.683604505163532, "learning_rate": 3.840037685368692e-07, "loss": 0.5756, "step": 30666 }, { "epoch": 0.88, "grad_norm": 7.751116825971966, "learning_rate": 3.838255559580134e-07, "loss": 0.2745, "step": 30667 }, { "epoch": 0.88, "grad_norm": 5.315150732808006, "learning_rate": 3.8364738309153716e-07, "loss": 0.4831, "step": 30668 }, { "epoch": 0.88, "grad_norm": 6.214041456110646, "learning_rate": 3.8346924993897584e-07, "loss": 0.5108, "step": 30669 }, { "epoch": 0.88, "grad_norm": 9.87690281944748, "learning_rate": 3.8329115650185823e-07, "loss": 0.6807, "step": 30670 }, { "epoch": 0.88, "grad_norm": 5.904694528971736, "learning_rate": 3.8311310278171863e-07, "loss": 0.4629, "step": 30671 }, { "epoch": 0.88, "grad_norm": 5.176427076821108, "learning_rate": 3.829350887800892e-07, "loss": 0.1897, "step": 30672 }, { "epoch": 0.88, "grad_norm": 7.440703735832175, "learning_rate": 3.8275711449849983e-07, "loss": 0.5116, "step": 30673 }, { "epoch": 0.88, "grad_norm": 8.088063956501156, "learning_rate": 3.825791799384837e-07, "loss": 0.2213, "step": 30674 }, { "epoch": 0.88, "grad_norm": 2.732311494682367, "learning_rate": 3.8240128510157014e-07, "loss": 0.1691, "step": 30675 }, { "epoch": 0.88, "grad_norm": 6.281463467590791, "learning_rate": 3.8222342998929016e-07, "loss": 0.6209, "step": 30676 }, { "epoch": 0.88, "grad_norm": 5.096724338047194, "learning_rate": 3.8204561460317204e-07, "loss": 0.5424, "step": 30677 }, { "epoch": 0.88, "grad_norm": 4.143159256334718, "learning_rate": 3.818678389447472e-07, "loss": 0.6825, "step": 30678 }, { "epoch": 0.88, "grad_norm": 4.605237053890389, "learning_rate": 3.816901030155457e-07, "loss": 0.2956, "step": 30679 }, { "epoch": 0.88, "grad_norm": 5.337836669023864, "learning_rate": 3.8151240681709443e-07, "loss": 0.3194, "step": 30680 }, { "epoch": 0.88, "grad_norm": 3.230375350838174, "learning_rate": 3.813347503509246e-07, "loss": 0.1817, "step": 30681 }, { "epoch": 0.88, "grad_norm": 7.474001518594385, "learning_rate": 3.811571336185627e-07, "loss": 0.8387, "step": 30682 }, { "epoch": 0.88, "grad_norm": 7.423179431525257, "learning_rate": 3.809795566215374e-07, "loss": 0.4801, "step": 30683 }, { "epoch": 0.88, "grad_norm": 4.900128281140287, "learning_rate": 3.8080201936137595e-07, "loss": 0.5619, "step": 30684 }, { "epoch": 0.88, "grad_norm": 7.92725122167893, "learning_rate": 3.806245218396065e-07, "loss": 0.4342, "step": 30685 }, { "epoch": 0.88, "grad_norm": 3.1266652820017447, "learning_rate": 3.8044706405775456e-07, "loss": 0.3011, "step": 30686 }, { "epoch": 0.88, "grad_norm": 8.256273759667675, "learning_rate": 3.802696460173477e-07, "loss": 0.4556, "step": 30687 }, { "epoch": 0.88, "grad_norm": 7.567593018155769, "learning_rate": 3.800922677199137e-07, "loss": 0.4628, "step": 30688 }, { "epoch": 0.88, "grad_norm": 6.018761429028142, "learning_rate": 3.7991492916697624e-07, "loss": 0.4264, "step": 30689 }, { "epoch": 0.88, "grad_norm": 4.917682993906461, "learning_rate": 3.797376303600625e-07, "loss": 0.2403, "step": 30690 }, { "epoch": 0.88, "grad_norm": 4.397119626178879, "learning_rate": 3.7956037130069565e-07, "loss": 0.5787, "step": 30691 }, { "epoch": 0.88, "grad_norm": 6.319356159902352, "learning_rate": 3.7938315199040343e-07, "loss": 0.4791, "step": 30692 }, { "epoch": 0.88, "grad_norm": 6.786277700280883, "learning_rate": 3.792059724307079e-07, "loss": 0.1455, "step": 30693 }, { "epoch": 0.88, "grad_norm": 6.566465832532419, "learning_rate": 3.790288326231356e-07, "loss": 0.4883, "step": 30694 }, { "epoch": 0.88, "grad_norm": 6.502212221317997, "learning_rate": 3.7885173256920817e-07, "loss": 0.499, "step": 30695 }, { "epoch": 0.88, "grad_norm": 4.349936162545963, "learning_rate": 3.7867467227045153e-07, "loss": 0.4098, "step": 30696 }, { "epoch": 0.88, "grad_norm": 7.089819066869051, "learning_rate": 3.7849765172838725e-07, "loss": 0.7034, "step": 30697 }, { "epoch": 0.88, "grad_norm": 5.581754940554104, "learning_rate": 3.7832067094453804e-07, "loss": 0.4833, "step": 30698 }, { "epoch": 0.88, "grad_norm": 3.7674249169981486, "learning_rate": 3.781437299204277e-07, "loss": 0.3925, "step": 30699 }, { "epoch": 0.88, "grad_norm": 3.3759952993758526, "learning_rate": 3.7796682865757716e-07, "loss": 0.1572, "step": 30700 }, { "epoch": 0.88, "grad_norm": 6.294210963713498, "learning_rate": 3.7778996715750973e-07, "loss": 0.4557, "step": 30701 }, { "epoch": 0.88, "grad_norm": 3.6159182601863895, "learning_rate": 3.776131454217458e-07, "loss": 0.1861, "step": 30702 }, { "epoch": 0.88, "grad_norm": 4.91347845879403, "learning_rate": 3.7743636345180633e-07, "loss": 0.2896, "step": 30703 }, { "epoch": 0.88, "grad_norm": 7.785693359221886, "learning_rate": 3.7725962124921354e-07, "loss": 0.3782, "step": 30704 }, { "epoch": 0.88, "grad_norm": 5.629339282224383, "learning_rate": 3.770829188154862e-07, "loss": 0.3372, "step": 30705 }, { "epoch": 0.88, "grad_norm": 7.253412495922077, "learning_rate": 3.7690625615214583e-07, "loss": 0.2829, "step": 30706 }, { "epoch": 0.88, "grad_norm": 4.711305986609702, "learning_rate": 3.7672963326071124e-07, "loss": 0.2631, "step": 30707 }, { "epoch": 0.88, "grad_norm": 5.007456607640118, "learning_rate": 3.7655305014270337e-07, "loss": 0.4157, "step": 30708 }, { "epoch": 0.88, "grad_norm": 8.511393372337567, "learning_rate": 3.7637650679964e-07, "loss": 1.329, "step": 30709 }, { "epoch": 0.88, "grad_norm": 4.889793934761083, "learning_rate": 3.762000032330404e-07, "loss": 0.6482, "step": 30710 }, { "epoch": 0.88, "grad_norm": 4.36883592971315, "learning_rate": 3.760235394444223e-07, "loss": 0.5099, "step": 30711 }, { "epoch": 0.88, "grad_norm": 5.6825993982257215, "learning_rate": 3.758471154353038e-07, "loss": 0.7026, "step": 30712 }, { "epoch": 0.88, "grad_norm": 6.832828254951965, "learning_rate": 3.756707312072044e-07, "loss": 0.6327, "step": 30713 }, { "epoch": 0.88, "grad_norm": 4.764226514395784, "learning_rate": 3.7549438676163954e-07, "loss": 0.2112, "step": 30714 }, { "epoch": 0.88, "grad_norm": 4.582695850197443, "learning_rate": 3.753180821001279e-07, "loss": 0.5159, "step": 30715 }, { "epoch": 0.88, "grad_norm": 5.234763424254258, "learning_rate": 3.751418172241855e-07, "loss": 0.4833, "step": 30716 }, { "epoch": 0.88, "grad_norm": 6.9845017116581944, "learning_rate": 3.7496559213532847e-07, "loss": 0.4556, "step": 30717 }, { "epoch": 0.88, "grad_norm": 5.210887948734257, "learning_rate": 3.7478940683507215e-07, "loss": 0.5078, "step": 30718 }, { "epoch": 0.88, "grad_norm": 5.154432542872181, "learning_rate": 3.746132613249337e-07, "loss": 0.464, "step": 30719 }, { "epoch": 0.88, "grad_norm": 5.23609515720886, "learning_rate": 3.744371556064275e-07, "loss": 0.4614, "step": 30720 }, { "epoch": 0.88, "grad_norm": 6.632719041505145, "learning_rate": 3.742610896810683e-07, "loss": 0.4899, "step": 30721 }, { "epoch": 0.88, "grad_norm": 3.7439740083701105, "learning_rate": 3.7408506355037286e-07, "loss": 0.4613, "step": 30722 }, { "epoch": 0.88, "grad_norm": 9.76404787069523, "learning_rate": 3.739090772158538e-07, "loss": 0.6544, "step": 30723 }, { "epoch": 0.88, "grad_norm": 6.456951937041249, "learning_rate": 3.737331306790254e-07, "loss": 0.6425, "step": 30724 }, { "epoch": 0.88, "grad_norm": 3.9545699483039165, "learning_rate": 3.7355722394140036e-07, "loss": 0.3576, "step": 30725 }, { "epoch": 0.88, "grad_norm": 5.343160273263383, "learning_rate": 3.7338135700449417e-07, "loss": 0.5265, "step": 30726 }, { "epoch": 0.88, "grad_norm": 6.5465492040011215, "learning_rate": 3.7320552986981726e-07, "loss": 0.5569, "step": 30727 }, { "epoch": 0.88, "grad_norm": 7.258942876361646, "learning_rate": 3.730297425388835e-07, "loss": 0.2265, "step": 30728 }, { "epoch": 0.88, "grad_norm": 5.298215713291397, "learning_rate": 3.728539950132071e-07, "loss": 0.3808, "step": 30729 }, { "epoch": 0.88, "grad_norm": 5.775498369519362, "learning_rate": 3.7267828729429636e-07, "loss": 0.5274, "step": 30730 }, { "epoch": 0.88, "grad_norm": 5.259825934550556, "learning_rate": 3.7250261938366506e-07, "loss": 0.4369, "step": 30731 }, { "epoch": 0.88, "grad_norm": 5.0072871987524135, "learning_rate": 3.7232699128282366e-07, "loss": 0.2999, "step": 30732 }, { "epoch": 0.88, "grad_norm": 5.46143475234651, "learning_rate": 3.721514029932838e-07, "loss": 0.3963, "step": 30733 }, { "epoch": 0.88, "grad_norm": 3.6097751907504243, "learning_rate": 3.719758545165547e-07, "loss": 0.4471, "step": 30734 }, { "epoch": 0.88, "grad_norm": 5.159731435218459, "learning_rate": 3.7180034585414857e-07, "loss": 0.1638, "step": 30735 }, { "epoch": 0.88, "grad_norm": 10.67490206635554, "learning_rate": 3.716248770075742e-07, "loss": 0.8498, "step": 30736 }, { "epoch": 0.88, "grad_norm": 7.500709436559769, "learning_rate": 3.714494479783404e-07, "loss": 0.5915, "step": 30737 }, { "epoch": 0.88, "grad_norm": 6.9586354568891595, "learning_rate": 3.712740587679581e-07, "loss": 0.5958, "step": 30738 }, { "epoch": 0.88, "grad_norm": 6.216071620754393, "learning_rate": 3.710987093779339e-07, "loss": 0.489, "step": 30739 }, { "epoch": 0.88, "grad_norm": 8.535545469439928, "learning_rate": 3.709233998097789e-07, "loss": 0.9487, "step": 30740 }, { "epoch": 0.88, "grad_norm": 4.627256848023059, "learning_rate": 3.70748130064999e-07, "loss": 0.6389, "step": 30741 }, { "epoch": 0.88, "grad_norm": 10.763251232551982, "learning_rate": 3.705729001451036e-07, "loss": 0.9217, "step": 30742 }, { "epoch": 0.88, "grad_norm": 4.386124663237743, "learning_rate": 3.7039771005159933e-07, "loss": 0.3371, "step": 30743 }, { "epoch": 0.88, "grad_norm": 2.4033567696309133, "learning_rate": 3.702225597859932e-07, "loss": 0.1621, "step": 30744 }, { "epoch": 0.88, "grad_norm": 4.475238331250133, "learning_rate": 3.7004744934979297e-07, "loss": 0.3266, "step": 30745 }, { "epoch": 0.88, "grad_norm": 5.644008562115312, "learning_rate": 3.6987237874450354e-07, "loss": 0.5391, "step": 30746 }, { "epoch": 0.88, "grad_norm": 7.664445717784043, "learning_rate": 3.6969734797163313e-07, "loss": 0.4403, "step": 30747 }, { "epoch": 0.88, "grad_norm": 6.923802367943981, "learning_rate": 3.69522357032685e-07, "loss": 0.6609, "step": 30748 }, { "epoch": 0.88, "grad_norm": 4.5279302867149696, "learning_rate": 3.693474059291674e-07, "loss": 0.2171, "step": 30749 }, { "epoch": 0.88, "grad_norm": 3.620223812223775, "learning_rate": 3.6917249466258354e-07, "loss": 0.1831, "step": 30750 }, { "epoch": 0.88, "grad_norm": 7.632496260121363, "learning_rate": 3.6899762323443833e-07, "loss": 1.0632, "step": 30751 }, { "epoch": 0.88, "grad_norm": 3.447278074074547, "learning_rate": 3.688227916462361e-07, "loss": 0.2628, "step": 30752 }, { "epoch": 0.88, "grad_norm": 4.77763279736779, "learning_rate": 3.6864799989948075e-07, "loss": 0.3421, "step": 30753 }, { "epoch": 0.88, "grad_norm": 4.754747703389227, "learning_rate": 3.6847324799567705e-07, "loss": 0.4126, "step": 30754 }, { "epoch": 0.88, "grad_norm": 6.885496969134682, "learning_rate": 3.6829853593632723e-07, "loss": 0.2943, "step": 30755 }, { "epoch": 0.88, "grad_norm": 7.147519011977569, "learning_rate": 3.681238637229362e-07, "loss": 0.4431, "step": 30756 }, { "epoch": 0.88, "grad_norm": 3.9965656557487517, "learning_rate": 3.6794923135700435e-07, "loss": 0.1776, "step": 30757 }, { "epoch": 0.88, "grad_norm": 9.682762710470364, "learning_rate": 3.677746388400355e-07, "loss": 0.9417, "step": 30758 }, { "epoch": 0.88, "grad_norm": 8.400675290075474, "learning_rate": 3.6760008617353013e-07, "loss": 0.5637, "step": 30759 }, { "epoch": 0.88, "grad_norm": 10.08872099982716, "learning_rate": 3.6742557335899155e-07, "loss": 0.7227, "step": 30760 }, { "epoch": 0.88, "grad_norm": 3.0900906393034853, "learning_rate": 3.672511003979201e-07, "loss": 0.4096, "step": 30761 }, { "epoch": 0.88, "grad_norm": 5.377816194383074, "learning_rate": 3.6707666729181747e-07, "loss": 0.3093, "step": 30762 }, { "epoch": 0.88, "grad_norm": 7.424450943294541, "learning_rate": 3.669022740421835e-07, "loss": 0.7371, "step": 30763 }, { "epoch": 0.88, "grad_norm": 8.420467591746526, "learning_rate": 3.6672792065051863e-07, "loss": 0.5718, "step": 30764 }, { "epoch": 0.88, "grad_norm": 4.785259061993406, "learning_rate": 3.6655360711832343e-07, "loss": 0.2076, "step": 30765 }, { "epoch": 0.88, "grad_norm": 5.006283269193022, "learning_rate": 3.6637933344709606e-07, "loss": 0.3697, "step": 30766 }, { "epoch": 0.88, "grad_norm": 2.057243573958582, "learning_rate": 3.6620509963833817e-07, "loss": 0.194, "step": 30767 }, { "epoch": 0.88, "grad_norm": 6.762210574628783, "learning_rate": 3.660309056935463e-07, "loss": 0.4882, "step": 30768 }, { "epoch": 0.88, "grad_norm": 2.344855543701626, "learning_rate": 3.658567516142203e-07, "loss": 0.2967, "step": 30769 }, { "epoch": 0.88, "grad_norm": 4.47834340299651, "learning_rate": 3.656826374018585e-07, "loss": 0.2503, "step": 30770 }, { "epoch": 0.88, "grad_norm": 5.55735004636304, "learning_rate": 3.6550856305795746e-07, "loss": 0.644, "step": 30771 }, { "epoch": 0.88, "grad_norm": 6.689643070837704, "learning_rate": 3.6533452858401653e-07, "loss": 0.4815, "step": 30772 }, { "epoch": 0.88, "grad_norm": 6.794542916638685, "learning_rate": 3.651605339815317e-07, "loss": 0.6044, "step": 30773 }, { "epoch": 0.88, "grad_norm": 2.905850249659582, "learning_rate": 3.649865792520002e-07, "loss": 0.3349, "step": 30774 }, { "epoch": 0.88, "grad_norm": 4.555653229237521, "learning_rate": 3.6481266439691797e-07, "loss": 0.4161, "step": 30775 }, { "epoch": 0.88, "grad_norm": 6.648569968637516, "learning_rate": 3.646387894177822e-07, "loss": 0.7582, "step": 30776 }, { "epoch": 0.88, "grad_norm": 4.438330505452321, "learning_rate": 3.6446495431608887e-07, "loss": 0.3537, "step": 30777 }, { "epoch": 0.88, "grad_norm": 3.938959184356214, "learning_rate": 3.6429115909333124e-07, "loss": 0.4204, "step": 30778 }, { "epoch": 0.88, "grad_norm": 5.515582681552976, "learning_rate": 3.641174037510076e-07, "loss": 0.2538, "step": 30779 }, { "epoch": 0.88, "grad_norm": 4.191765306777112, "learning_rate": 3.6394368829060943e-07, "loss": 0.3007, "step": 30780 }, { "epoch": 0.88, "grad_norm": 3.452700213373257, "learning_rate": 3.6377001271363455e-07, "loss": 0.2267, "step": 30781 }, { "epoch": 0.88, "grad_norm": 3.2445340225204204, "learning_rate": 3.635963770215739e-07, "loss": 0.3165, "step": 30782 }, { "epoch": 0.88, "grad_norm": 3.5059341805215647, "learning_rate": 3.634227812159241e-07, "loss": 0.2203, "step": 30783 }, { "epoch": 0.88, "grad_norm": 5.510060472520028, "learning_rate": 3.632492252981773e-07, "loss": 0.7068, "step": 30784 }, { "epoch": 0.88, "grad_norm": 9.549543391791003, "learning_rate": 3.6307570926982504e-07, "loss": 0.5898, "step": 30785 }, { "epoch": 0.88, "grad_norm": 8.387557451076994, "learning_rate": 3.629022331323628e-07, "loss": 0.5309, "step": 30786 }, { "epoch": 0.88, "grad_norm": 4.2380438531584295, "learning_rate": 3.6272879688728114e-07, "loss": 0.3963, "step": 30787 }, { "epoch": 0.88, "grad_norm": 2.8222500599362634, "learning_rate": 3.625554005360732e-07, "loss": 0.1891, "step": 30788 }, { "epoch": 0.88, "grad_norm": 6.043392390940816, "learning_rate": 3.6238204408023006e-07, "loss": 0.4949, "step": 30789 }, { "epoch": 0.88, "grad_norm": 11.188948894425272, "learning_rate": 3.6220872752124327e-07, "loss": 0.6497, "step": 30790 }, { "epoch": 0.88, "grad_norm": 6.65447283020366, "learning_rate": 3.620354508606033e-07, "loss": 0.4182, "step": 30791 }, { "epoch": 0.88, "grad_norm": 7.82338548280715, "learning_rate": 3.6186221409980184e-07, "loss": 0.7414, "step": 30792 }, { "epoch": 0.88, "grad_norm": 4.63474509985776, "learning_rate": 3.616890172403281e-07, "loss": 0.2577, "step": 30793 }, { "epoch": 0.88, "grad_norm": 7.285974075189584, "learning_rate": 3.615158602836721e-07, "loss": 0.2489, "step": 30794 }, { "epoch": 0.88, "grad_norm": 3.3163113080600577, "learning_rate": 3.6134274323132547e-07, "loss": 0.2986, "step": 30795 }, { "epoch": 0.88, "grad_norm": 4.425595231620746, "learning_rate": 3.6116966608477577e-07, "loss": 0.2313, "step": 30796 }, { "epoch": 0.88, "grad_norm": 1.616190462472535, "learning_rate": 3.6099662884551243e-07, "loss": 0.0848, "step": 30797 }, { "epoch": 0.88, "grad_norm": 4.582659874191186, "learning_rate": 3.608236315150232e-07, "loss": 0.2576, "step": 30798 }, { "epoch": 0.88, "grad_norm": 4.323369026262233, "learning_rate": 3.606506740947974e-07, "loss": 0.2717, "step": 30799 }, { "epoch": 0.88, "grad_norm": 6.559416273476999, "learning_rate": 3.604777565863221e-07, "loss": 0.5797, "step": 30800 }, { "epoch": 0.88, "grad_norm": 3.777095261835675, "learning_rate": 3.6030487899108623e-07, "loss": 0.3261, "step": 30801 }, { "epoch": 0.88, "grad_norm": 10.640882402127112, "learning_rate": 3.601320413105758e-07, "loss": 0.3543, "step": 30802 }, { "epoch": 0.88, "grad_norm": 5.802112978774469, "learning_rate": 3.5995924354627845e-07, "loss": 0.4851, "step": 30803 }, { "epoch": 0.88, "grad_norm": 3.0507957760981554, "learning_rate": 3.597864856996808e-07, "loss": 0.1984, "step": 30804 }, { "epoch": 0.88, "grad_norm": 5.862901445320733, "learning_rate": 3.5961376777226777e-07, "loss": 0.5472, "step": 30805 }, { "epoch": 0.88, "grad_norm": 4.069343143948353, "learning_rate": 3.5944108976552707e-07, "loss": 0.3511, "step": 30806 }, { "epoch": 0.88, "grad_norm": 3.3461952537871866, "learning_rate": 3.592684516809419e-07, "loss": 0.2873, "step": 30807 }, { "epoch": 0.88, "grad_norm": 7.241238001993421, "learning_rate": 3.590958535200001e-07, "loss": 0.6358, "step": 30808 }, { "epoch": 0.88, "grad_norm": 5.524070606341194, "learning_rate": 3.5892329528418477e-07, "loss": 0.5344, "step": 30809 }, { "epoch": 0.88, "grad_norm": 3.2141352784814305, "learning_rate": 3.587507769749815e-07, "loss": 0.2491, "step": 30810 }, { "epoch": 0.88, "grad_norm": 7.272720973597313, "learning_rate": 3.585782985938735e-07, "loss": 0.6082, "step": 30811 }, { "epoch": 0.88, "grad_norm": 11.176770163513, "learning_rate": 3.584058601423446e-07, "loss": 0.6508, "step": 30812 }, { "epoch": 0.88, "grad_norm": 8.570624012879914, "learning_rate": 3.582334616218791e-07, "loss": 0.4706, "step": 30813 }, { "epoch": 0.88, "grad_norm": 5.572407931580819, "learning_rate": 3.580611030339587e-07, "loss": 0.4319, "step": 30814 }, { "epoch": 0.88, "grad_norm": 6.279660531484646, "learning_rate": 3.5788878438006826e-07, "loss": 0.7985, "step": 30815 }, { "epoch": 0.88, "grad_norm": 6.121877575066158, "learning_rate": 3.5771650566168825e-07, "loss": 0.4793, "step": 30816 }, { "epoch": 0.88, "grad_norm": 4.631371619806546, "learning_rate": 3.575442668803031e-07, "loss": 0.3401, "step": 30817 }, { "epoch": 0.88, "grad_norm": 5.648825161242447, "learning_rate": 3.57372068037391e-07, "loss": 0.4356, "step": 30818 }, { "epoch": 0.88, "grad_norm": 7.471225152819225, "learning_rate": 3.571999091344358e-07, "loss": 0.5114, "step": 30819 }, { "epoch": 0.88, "grad_norm": 4.6054269984506835, "learning_rate": 3.570277901729191e-07, "loss": 0.2761, "step": 30820 }, { "epoch": 0.88, "grad_norm": 4.4930387801588125, "learning_rate": 3.5685571115431917e-07, "loss": 0.2589, "step": 30821 }, { "epoch": 0.88, "grad_norm": 4.857444011544226, "learning_rate": 3.566836720801192e-07, "loss": 0.5377, "step": 30822 }, { "epoch": 0.88, "grad_norm": 7.86240384720804, "learning_rate": 3.565116729517981e-07, "loss": 0.5073, "step": 30823 }, { "epoch": 0.88, "grad_norm": 4.570754201036557, "learning_rate": 3.5633971377083523e-07, "loss": 0.3927, "step": 30824 }, { "epoch": 0.88, "grad_norm": 6.717541146757505, "learning_rate": 3.5616779453870876e-07, "loss": 0.3796, "step": 30825 }, { "epoch": 0.88, "grad_norm": 3.8691295949055613, "learning_rate": 3.5599591525689923e-07, "loss": 0.4796, "step": 30826 }, { "epoch": 0.88, "grad_norm": 4.790319056984551, "learning_rate": 3.5582407592688606e-07, "loss": 0.5147, "step": 30827 }, { "epoch": 0.88, "grad_norm": 4.614845159377694, "learning_rate": 3.5565227655014576e-07, "loss": 0.1036, "step": 30828 }, { "epoch": 0.88, "grad_norm": 4.439499525439434, "learning_rate": 3.554805171281578e-07, "loss": 0.5675, "step": 30829 }, { "epoch": 0.88, "grad_norm": 11.867853282863717, "learning_rate": 3.5530879766239925e-07, "loss": 0.754, "step": 30830 }, { "epoch": 0.88, "grad_norm": 6.6619305794957455, "learning_rate": 3.5513711815434726e-07, "loss": 0.6075, "step": 30831 }, { "epoch": 0.88, "grad_norm": 4.173464472293278, "learning_rate": 3.549654786054779e-07, "loss": 0.371, "step": 30832 }, { "epoch": 0.88, "grad_norm": 3.3963631711966347, "learning_rate": 3.5479387901726894e-07, "loss": 0.2002, "step": 30833 }, { "epoch": 0.88, "grad_norm": 5.878345592036425, "learning_rate": 3.5462231939119627e-07, "loss": 0.3585, "step": 30834 }, { "epoch": 0.88, "grad_norm": 5.0213713720389785, "learning_rate": 3.544507997287355e-07, "loss": 0.5345, "step": 30835 }, { "epoch": 0.88, "grad_norm": 2.2638623846162913, "learning_rate": 3.5427932003136324e-07, "loss": 0.1506, "step": 30836 }, { "epoch": 0.88, "grad_norm": 8.594336042362345, "learning_rate": 3.541078803005538e-07, "loss": 0.5103, "step": 30837 }, { "epoch": 0.88, "grad_norm": 2.8320759421143573, "learning_rate": 3.5393648053778264e-07, "loss": 0.2319, "step": 30838 }, { "epoch": 0.88, "grad_norm": 5.510960581519967, "learning_rate": 3.5376512074452306e-07, "loss": 0.4458, "step": 30839 }, { "epoch": 0.88, "grad_norm": 4.645106248738855, "learning_rate": 3.535938009222506e-07, "loss": 0.4671, "step": 30840 }, { "epoch": 0.88, "grad_norm": 6.385581416317082, "learning_rate": 3.534225210724379e-07, "loss": 0.4451, "step": 30841 }, { "epoch": 0.88, "grad_norm": 3.767151677433225, "learning_rate": 3.5325128119655993e-07, "loss": 0.3601, "step": 30842 }, { "epoch": 0.88, "grad_norm": 3.6517683096046882, "learning_rate": 3.5308008129608827e-07, "loss": 0.2746, "step": 30843 }, { "epoch": 0.88, "grad_norm": 4.307930003802705, "learning_rate": 3.529089213724973e-07, "loss": 0.4051, "step": 30844 }, { "epoch": 0.88, "grad_norm": 5.06360498778645, "learning_rate": 3.5273780142725923e-07, "loss": 0.2731, "step": 30845 }, { "epoch": 0.88, "grad_norm": 5.2911114238685695, "learning_rate": 3.5256672146184445e-07, "loss": 0.4642, "step": 30846 }, { "epoch": 0.88, "grad_norm": 7.4936998609022, "learning_rate": 3.5239568147772686e-07, "loss": 0.5846, "step": 30847 }, { "epoch": 0.88, "grad_norm": 7.229284023537943, "learning_rate": 3.522246814763758e-07, "loss": 0.8792, "step": 30848 }, { "epoch": 0.88, "grad_norm": 7.034190126935888, "learning_rate": 3.520537214592645e-07, "loss": 0.5144, "step": 30849 }, { "epoch": 0.88, "grad_norm": 2.9120512252313278, "learning_rate": 3.51882801427863e-07, "loss": 0.2422, "step": 30850 }, { "epoch": 0.88, "grad_norm": 6.905971745888111, "learning_rate": 3.5171192138364065e-07, "loss": 0.3758, "step": 30851 }, { "epoch": 0.88, "grad_norm": 4.416123440762165, "learning_rate": 3.51541081328069e-07, "loss": 0.3033, "step": 30852 }, { "epoch": 0.88, "grad_norm": 5.172711952922422, "learning_rate": 3.513702812626163e-07, "loss": 0.1872, "step": 30853 }, { "epoch": 0.88, "grad_norm": 4.638558335370328, "learning_rate": 3.5119952118875365e-07, "loss": 0.5524, "step": 30854 }, { "epoch": 0.88, "grad_norm": 4.463257355094366, "learning_rate": 3.5102880110794824e-07, "loss": 0.3293, "step": 30855 }, { "epoch": 0.88, "grad_norm": 10.757923266935537, "learning_rate": 3.508581210216705e-07, "loss": 0.4552, "step": 30856 }, { "epoch": 0.88, "grad_norm": 3.748435457013596, "learning_rate": 3.5068748093138815e-07, "loss": 0.1606, "step": 30857 }, { "epoch": 0.88, "grad_norm": 2.2953276060475916, "learning_rate": 3.505168808385684e-07, "loss": 0.0713, "step": 30858 }, { "epoch": 0.88, "grad_norm": 4.3329702173217965, "learning_rate": 3.503463207446789e-07, "loss": 0.38, "step": 30859 }, { "epoch": 0.88, "grad_norm": 6.258859611086242, "learning_rate": 3.5017580065118796e-07, "loss": 0.7332, "step": 30860 }, { "epoch": 0.88, "grad_norm": 9.48008849863199, "learning_rate": 3.5000532055956274e-07, "loss": 0.8023, "step": 30861 }, { "epoch": 0.88, "grad_norm": 1.9862013694806862, "learning_rate": 3.4983488047126814e-07, "loss": 0.3022, "step": 30862 }, { "epoch": 0.88, "grad_norm": 2.3800107649952396, "learning_rate": 3.4966448038777245e-07, "loss": 0.1034, "step": 30863 }, { "epoch": 0.88, "grad_norm": 5.910906535427255, "learning_rate": 3.4949412031054065e-07, "loss": 0.4274, "step": 30864 }, { "epoch": 0.88, "grad_norm": 5.891145475752529, "learning_rate": 3.493238002410382e-07, "loss": 0.3931, "step": 30865 }, { "epoch": 0.88, "grad_norm": 5.39333401487053, "learning_rate": 3.4915352018072944e-07, "loss": 0.4412, "step": 30866 }, { "epoch": 0.88, "grad_norm": 3.156330466425019, "learning_rate": 3.489832801310811e-07, "loss": 0.2359, "step": 30867 }, { "epoch": 0.88, "grad_norm": 6.134121544956205, "learning_rate": 3.488130800935574e-07, "loss": 0.2241, "step": 30868 }, { "epoch": 0.88, "grad_norm": 5.087501906411004, "learning_rate": 3.486429200696212e-07, "loss": 0.5837, "step": 30869 }, { "epoch": 0.88, "grad_norm": 4.321314400636343, "learning_rate": 3.484728000607379e-07, "loss": 0.3727, "step": 30870 }, { "epoch": 0.88, "grad_norm": 5.505364099739183, "learning_rate": 3.4830272006837084e-07, "loss": 0.4116, "step": 30871 }, { "epoch": 0.88, "grad_norm": 5.802874604196405, "learning_rate": 3.481326800939827e-07, "loss": 0.7466, "step": 30872 }, { "epoch": 0.88, "grad_norm": 3.3834330317805197, "learning_rate": 3.479626801390351e-07, "loss": 0.1426, "step": 30873 }, { "epoch": 0.88, "grad_norm": 4.2533839161674285, "learning_rate": 3.4779272020499297e-07, "loss": 0.4548, "step": 30874 }, { "epoch": 0.88, "grad_norm": 5.54740358439927, "learning_rate": 3.476228002933163e-07, "loss": 0.6004, "step": 30875 }, { "epoch": 0.88, "grad_norm": 3.7922810521352974, "learning_rate": 3.4745292040546775e-07, "loss": 0.3251, "step": 30876 }, { "epoch": 0.88, "grad_norm": 4.1267037918878735, "learning_rate": 3.472830805429106e-07, "loss": 0.3751, "step": 30877 }, { "epoch": 0.88, "grad_norm": 8.30521911469841, "learning_rate": 3.4711328070710313e-07, "loss": 0.781, "step": 30878 }, { "epoch": 0.88, "grad_norm": 16.60102061385844, "learning_rate": 3.4694352089950756e-07, "loss": 0.4462, "step": 30879 }, { "epoch": 0.88, "grad_norm": 5.638773394979875, "learning_rate": 3.467738011215832e-07, "loss": 0.6355, "step": 30880 }, { "epoch": 0.88, "grad_norm": 1.934271957444658, "learning_rate": 3.466041213747917e-07, "loss": 0.1565, "step": 30881 }, { "epoch": 0.88, "grad_norm": 7.071994442337751, "learning_rate": 3.464344816605913e-07, "loss": 0.4134, "step": 30882 }, { "epoch": 0.88, "grad_norm": 2.8421663129372288, "learning_rate": 3.462648819804426e-07, "loss": 0.3396, "step": 30883 }, { "epoch": 0.88, "grad_norm": 14.43577971675166, "learning_rate": 3.460953223358049e-07, "loss": 0.5926, "step": 30884 }, { "epoch": 0.88, "grad_norm": 5.041288750516356, "learning_rate": 3.459258027281348e-07, "loss": 0.6639, "step": 30885 }, { "epoch": 0.88, "grad_norm": 8.792584689212141, "learning_rate": 3.4575632315889284e-07, "loss": 0.5308, "step": 30886 }, { "epoch": 0.88, "grad_norm": 6.257467272280836, "learning_rate": 3.455868836295351e-07, "loss": 0.7115, "step": 30887 }, { "epoch": 0.88, "grad_norm": 4.786950048427892, "learning_rate": 3.4541748414152143e-07, "loss": 0.3491, "step": 30888 }, { "epoch": 0.88, "grad_norm": 10.710199977599888, "learning_rate": 3.4524812469630743e-07, "loss": 0.4994, "step": 30889 }, { "epoch": 0.88, "grad_norm": 4.807323570049167, "learning_rate": 3.450788052953513e-07, "loss": 0.3681, "step": 30890 }, { "epoch": 0.88, "grad_norm": 5.039673051144574, "learning_rate": 3.4490952594010864e-07, "loss": 0.3578, "step": 30891 }, { "epoch": 0.88, "grad_norm": 6.145337807889106, "learning_rate": 3.4474028663203595e-07, "loss": 0.4689, "step": 30892 }, { "epoch": 0.88, "grad_norm": 3.8630746722557365, "learning_rate": 3.4457108737258993e-07, "loss": 0.4845, "step": 30893 }, { "epoch": 0.88, "grad_norm": 7.956838583919453, "learning_rate": 3.4440192816322547e-07, "loss": 0.4346, "step": 30894 }, { "epoch": 0.88, "grad_norm": 9.04924343909013, "learning_rate": 3.442328090053981e-07, "loss": 0.4647, "step": 30895 }, { "epoch": 0.88, "grad_norm": 4.381251677704625, "learning_rate": 3.4406372990056225e-07, "loss": 0.1943, "step": 30896 }, { "epoch": 0.88, "grad_norm": 6.558264937118896, "learning_rate": 3.4389469085017333e-07, "loss": 0.5155, "step": 30897 }, { "epoch": 0.88, "grad_norm": 4.107833660270019, "learning_rate": 3.437256918556853e-07, "loss": 0.3624, "step": 30898 }, { "epoch": 0.88, "grad_norm": 3.8763328536502395, "learning_rate": 3.4355673291855185e-07, "loss": 0.4217, "step": 30899 }, { "epoch": 0.88, "grad_norm": 5.408456826036908, "learning_rate": 3.433878140402258e-07, "loss": 0.3453, "step": 30900 }, { "epoch": 0.88, "grad_norm": 9.073970586016484, "learning_rate": 3.43218935222161e-07, "loss": 0.4004, "step": 30901 }, { "epoch": 0.88, "grad_norm": 7.81918085925197, "learning_rate": 3.4305009646581065e-07, "loss": 0.217, "step": 30902 }, { "epoch": 0.88, "grad_norm": 3.461390928441935, "learning_rate": 3.4288129777262704e-07, "loss": 0.4266, "step": 30903 }, { "epoch": 0.89, "grad_norm": 5.196287731908187, "learning_rate": 3.4271253914406334e-07, "loss": 0.4934, "step": 30904 }, { "epoch": 0.89, "grad_norm": 3.576918052775512, "learning_rate": 3.425438205815684e-07, "loss": 0.4241, "step": 30905 }, { "epoch": 0.89, "grad_norm": 8.038624179705453, "learning_rate": 3.423751420865967e-07, "loss": 0.4849, "step": 30906 }, { "epoch": 0.89, "grad_norm": 5.024271704433703, "learning_rate": 3.4220650366059704e-07, "loss": 0.4417, "step": 30907 }, { "epoch": 0.89, "grad_norm": 2.1771475647085894, "learning_rate": 3.420379053050227e-07, "loss": 0.1907, "step": 30908 }, { "epoch": 0.89, "grad_norm": 10.157028027591116, "learning_rate": 3.4186934702132134e-07, "loss": 0.8599, "step": 30909 }, { "epoch": 0.89, "grad_norm": 11.406097160255966, "learning_rate": 3.417008288109458e-07, "loss": 0.4869, "step": 30910 }, { "epoch": 0.89, "grad_norm": 4.787486951327159, "learning_rate": 3.415323506753443e-07, "loss": 0.3594, "step": 30911 }, { "epoch": 0.89, "grad_norm": 2.951008860985519, "learning_rate": 3.413639126159657e-07, "loss": 0.2095, "step": 30912 }, { "epoch": 0.89, "grad_norm": 9.027200180225986, "learning_rate": 3.411955146342605e-07, "loss": 0.7595, "step": 30913 }, { "epoch": 0.89, "grad_norm": 5.681243809192, "learning_rate": 3.410271567316753e-07, "loss": 0.5859, "step": 30914 }, { "epoch": 0.89, "grad_norm": 4.021200235786382, "learning_rate": 3.408588389096612e-07, "loss": 0.4486, "step": 30915 }, { "epoch": 0.89, "grad_norm": 4.736470882191369, "learning_rate": 3.4069056116966423e-07, "loss": 0.2181, "step": 30916 }, { "epoch": 0.89, "grad_norm": 4.11549047135812, "learning_rate": 3.405223235131333e-07, "loss": 0.4119, "step": 30917 }, { "epoch": 0.89, "grad_norm": 2.06977860386308, "learning_rate": 3.4035412594151495e-07, "loss": 0.1931, "step": 30918 }, { "epoch": 0.89, "grad_norm": 7.7306840509057135, "learning_rate": 3.4018596845625585e-07, "loss": 0.6321, "step": 30919 }, { "epoch": 0.89, "grad_norm": 3.728579792891309, "learning_rate": 3.400178510588037e-07, "loss": 0.2535, "step": 30920 }, { "epoch": 0.89, "grad_norm": 3.524133719194571, "learning_rate": 3.398497737506035e-07, "loss": 0.3121, "step": 30921 }, { "epoch": 0.89, "grad_norm": 2.912191296454324, "learning_rate": 3.396817365331029e-07, "loss": 0.2468, "step": 30922 }, { "epoch": 0.89, "grad_norm": 2.391012821030714, "learning_rate": 3.395137394077458e-07, "loss": 0.1115, "step": 30923 }, { "epoch": 0.89, "grad_norm": 5.5906059977004325, "learning_rate": 3.393457823759788e-07, "loss": 0.2176, "step": 30924 }, { "epoch": 0.89, "grad_norm": 7.909929828935377, "learning_rate": 3.391778654392458e-07, "loss": 0.1275, "step": 30925 }, { "epoch": 0.89, "grad_norm": 4.2642576939688475, "learning_rate": 3.390099885989917e-07, "loss": 0.2826, "step": 30926 }, { "epoch": 0.89, "grad_norm": 4.00854134578349, "learning_rate": 3.388421518566609e-07, "loss": 0.4351, "step": 30927 }, { "epoch": 0.89, "grad_norm": 4.595260683122298, "learning_rate": 3.3867435521369675e-07, "loss": 0.3193, "step": 30928 }, { "epoch": 0.89, "grad_norm": 6.000050306109299, "learning_rate": 3.385065986715441e-07, "loss": 0.5346, "step": 30929 }, { "epoch": 0.89, "grad_norm": 3.227977654786275, "learning_rate": 3.3833888223164415e-07, "loss": 0.3659, "step": 30930 }, { "epoch": 0.89, "grad_norm": 4.9435726558353785, "learning_rate": 3.3817120589544283e-07, "loss": 0.3358, "step": 30931 }, { "epoch": 0.89, "grad_norm": 4.915425534575705, "learning_rate": 3.380035696643785e-07, "loss": 0.5449, "step": 30932 }, { "epoch": 0.89, "grad_norm": 4.798738546315561, "learning_rate": 3.378359735398956e-07, "loss": 0.6767, "step": 30933 }, { "epoch": 0.89, "grad_norm": 3.176106134854907, "learning_rate": 3.3766841752343683e-07, "loss": 0.3193, "step": 30934 }, { "epoch": 0.89, "grad_norm": 4.427452155812697, "learning_rate": 3.3750090161644213e-07, "loss": 0.372, "step": 30935 }, { "epoch": 0.89, "grad_norm": 2.4768839907236346, "learning_rate": 3.373334258203531e-07, "loss": 0.1236, "step": 30936 }, { "epoch": 0.89, "grad_norm": 3.7970964971541825, "learning_rate": 3.371659901366109e-07, "loss": 0.3256, "step": 30937 }, { "epoch": 0.89, "grad_norm": 6.74853305417787, "learning_rate": 3.3699859456665593e-07, "loss": 0.2739, "step": 30938 }, { "epoch": 0.89, "grad_norm": 9.829225511901654, "learning_rate": 3.368312391119266e-07, "loss": 0.5356, "step": 30939 }, { "epoch": 0.89, "grad_norm": 5.631489168395306, "learning_rate": 3.36663923773865e-07, "loss": 0.3886, "step": 30940 }, { "epoch": 0.89, "grad_norm": 5.867830365717484, "learning_rate": 3.3649664855390893e-07, "loss": 0.421, "step": 30941 }, { "epoch": 0.89, "grad_norm": 4.072377740794019, "learning_rate": 3.3632941345349777e-07, "loss": 0.5572, "step": 30942 }, { "epoch": 0.89, "grad_norm": 5.306727875128102, "learning_rate": 3.361622184740715e-07, "loss": 0.3786, "step": 30943 }, { "epoch": 0.89, "grad_norm": 5.616057514396099, "learning_rate": 3.3599506361706724e-07, "loss": 0.2108, "step": 30944 }, { "epoch": 0.89, "grad_norm": 6.960804570873632, "learning_rate": 3.358279488839228e-07, "loss": 0.4921, "step": 30945 }, { "epoch": 0.89, "grad_norm": 4.401523517060909, "learning_rate": 3.3566087427607585e-07, "loss": 0.5088, "step": 30946 }, { "epoch": 0.89, "grad_norm": 4.799985442536849, "learning_rate": 3.3549383979496477e-07, "loss": 0.5648, "step": 30947 }, { "epoch": 0.89, "grad_norm": 3.261233904774354, "learning_rate": 3.3532684544202554e-07, "loss": 0.4856, "step": 30948 }, { "epoch": 0.89, "grad_norm": 6.478100344458703, "learning_rate": 3.351598912186954e-07, "loss": 0.6652, "step": 30949 }, { "epoch": 0.89, "grad_norm": 2.6128542085021196, "learning_rate": 3.349929771264099e-07, "loss": 0.3181, "step": 30950 }, { "epoch": 0.89, "grad_norm": 4.631059697780624, "learning_rate": 3.348261031666061e-07, "loss": 0.5052, "step": 30951 }, { "epoch": 0.89, "grad_norm": 4.025516158232967, "learning_rate": 3.346592693407186e-07, "loss": 0.4934, "step": 30952 }, { "epoch": 0.89, "grad_norm": 3.8299477422251678, "learning_rate": 3.344924756501827e-07, "loss": 0.5412, "step": 30953 }, { "epoch": 0.89, "grad_norm": 4.892324597153481, "learning_rate": 3.343257220964341e-07, "loss": 0.4798, "step": 30954 }, { "epoch": 0.89, "grad_norm": 19.492919908115088, "learning_rate": 3.34159008680906e-07, "loss": 0.6917, "step": 30955 }, { "epoch": 0.89, "grad_norm": 4.891071530758279, "learning_rate": 3.339923354050345e-07, "loss": 0.2561, "step": 30956 }, { "epoch": 0.89, "grad_norm": 6.015928582792256, "learning_rate": 3.338257022702518e-07, "loss": 0.3645, "step": 30957 }, { "epoch": 0.89, "grad_norm": 4.257534696983033, "learning_rate": 3.336591092779928e-07, "loss": 0.6427, "step": 30958 }, { "epoch": 0.89, "grad_norm": 4.23387435825054, "learning_rate": 3.334925564296898e-07, "loss": 0.1767, "step": 30959 }, { "epoch": 0.89, "grad_norm": 5.199219116852917, "learning_rate": 3.333260437267749e-07, "loss": 0.4413, "step": 30960 }, { "epoch": 0.89, "grad_norm": 5.176335511670029, "learning_rate": 3.331595711706825e-07, "loss": 0.3814, "step": 30961 }, { "epoch": 0.89, "grad_norm": 8.492038027124348, "learning_rate": 3.3299313876284266e-07, "loss": 0.723, "step": 30962 }, { "epoch": 0.89, "grad_norm": 3.130591216237289, "learning_rate": 3.328267465046892e-07, "loss": 0.3115, "step": 30963 }, { "epoch": 0.89, "grad_norm": 9.331191154851437, "learning_rate": 3.326603943976525e-07, "loss": 0.4083, "step": 30964 }, { "epoch": 0.89, "grad_norm": 2.1692282102747162, "learning_rate": 3.3249408244316385e-07, "loss": 0.1369, "step": 30965 }, { "epoch": 0.89, "grad_norm": 7.229263163938521, "learning_rate": 3.3232781064265306e-07, "loss": 0.4962, "step": 30966 }, { "epoch": 0.89, "grad_norm": 3.838090902058071, "learning_rate": 3.321615789975513e-07, "loss": 0.3756, "step": 30967 }, { "epoch": 0.89, "grad_norm": 10.238918408329285, "learning_rate": 3.3199538750929016e-07, "loss": 0.8303, "step": 30968 }, { "epoch": 0.89, "grad_norm": 10.99634213751434, "learning_rate": 3.318292361792963e-07, "loss": 0.8661, "step": 30969 }, { "epoch": 0.89, "grad_norm": 6.548435398502873, "learning_rate": 3.316631250090019e-07, "loss": 0.526, "step": 30970 }, { "epoch": 0.89, "grad_norm": 4.473678165614765, "learning_rate": 3.3149705399983523e-07, "loss": 0.588, "step": 30971 }, { "epoch": 0.89, "grad_norm": 5.230340934713047, "learning_rate": 3.3133102315322407e-07, "loss": 0.3387, "step": 30972 }, { "epoch": 0.89, "grad_norm": 6.3453890557498, "learning_rate": 3.3116503247059673e-07, "loss": 0.207, "step": 30973 }, { "epoch": 0.89, "grad_norm": 3.870182796271566, "learning_rate": 3.3099908195338205e-07, "loss": 0.4821, "step": 30974 }, { "epoch": 0.89, "grad_norm": 10.877018971311509, "learning_rate": 3.3083317160300775e-07, "loss": 0.6244, "step": 30975 }, { "epoch": 0.89, "grad_norm": 3.8365150995721757, "learning_rate": 3.306673014209e-07, "loss": 0.1749, "step": 30976 }, { "epoch": 0.89, "grad_norm": 5.603761462153724, "learning_rate": 3.3050147140848756e-07, "loss": 0.5131, "step": 30977 }, { "epoch": 0.89, "grad_norm": 4.888211638991567, "learning_rate": 3.3033568156719595e-07, "loss": 0.6247, "step": 30978 }, { "epoch": 0.89, "grad_norm": 1.4002299994760181, "learning_rate": 3.3016993189845194e-07, "loss": 0.0912, "step": 30979 }, { "epoch": 0.89, "grad_norm": 2.8347309793260917, "learning_rate": 3.300042224036798e-07, "loss": 0.2475, "step": 30980 }, { "epoch": 0.89, "grad_norm": 9.702870830947948, "learning_rate": 3.2983855308430735e-07, "loss": 0.4471, "step": 30981 }, { "epoch": 0.89, "grad_norm": 6.309285053369682, "learning_rate": 3.296729239417579e-07, "loss": 0.3085, "step": 30982 }, { "epoch": 0.89, "grad_norm": 4.264257050993196, "learning_rate": 3.295073349774569e-07, "loss": 0.2696, "step": 30983 }, { "epoch": 0.89, "grad_norm": 5.927045854428756, "learning_rate": 3.293417861928305e-07, "loss": 0.3329, "step": 30984 }, { "epoch": 0.89, "grad_norm": 4.1380651658478, "learning_rate": 3.291762775893015e-07, "loss": 0.4348, "step": 30985 }, { "epoch": 0.89, "grad_norm": 5.642710799376951, "learning_rate": 3.290108091682942e-07, "loss": 0.3316, "step": 30986 }, { "epoch": 0.89, "grad_norm": 6.3372843824195275, "learning_rate": 3.288453809312309e-07, "loss": 0.7108, "step": 30987 }, { "epoch": 0.89, "grad_norm": 4.950756963014437, "learning_rate": 3.286799928795359e-07, "loss": 0.54, "step": 30988 }, { "epoch": 0.89, "grad_norm": 3.034085785806582, "learning_rate": 3.2851464501463147e-07, "loss": 0.144, "step": 30989 }, { "epoch": 0.89, "grad_norm": 5.155719013039049, "learning_rate": 3.283493373379415e-07, "loss": 0.2509, "step": 30990 }, { "epoch": 0.89, "grad_norm": 4.52483326620549, "learning_rate": 3.281840698508865e-07, "loss": 0.5864, "step": 30991 }, { "epoch": 0.89, "grad_norm": 6.202012276960547, "learning_rate": 3.2801884255488803e-07, "loss": 0.4354, "step": 30992 }, { "epoch": 0.89, "grad_norm": 7.0718951903900775, "learning_rate": 3.278536554513689e-07, "loss": 0.3906, "step": 30993 }, { "epoch": 0.89, "grad_norm": 4.333189992490157, "learning_rate": 3.276885085417486e-07, "loss": 0.3544, "step": 30994 }, { "epoch": 0.89, "grad_norm": 3.633142567068272, "learning_rate": 3.2752340182744977e-07, "loss": 0.1702, "step": 30995 }, { "epoch": 0.89, "grad_norm": 6.008032448739403, "learning_rate": 3.273583353098914e-07, "loss": 0.1907, "step": 30996 }, { "epoch": 0.89, "grad_norm": 5.978767855787242, "learning_rate": 3.271933089904944e-07, "loss": 0.4684, "step": 30997 }, { "epoch": 0.89, "grad_norm": 9.512236995795709, "learning_rate": 3.2702832287067833e-07, "loss": 0.8068, "step": 30998 }, { "epoch": 0.89, "grad_norm": 8.549382039096583, "learning_rate": 3.2686337695186087e-07, "loss": 0.6826, "step": 30999 }, { "epoch": 0.89, "grad_norm": 1.6380366934618487, "learning_rate": 3.266984712354637e-07, "loss": 0.0984, "step": 31000 }, { "epoch": 0.89, "grad_norm": 2.3328737305809057, "learning_rate": 3.265336057229029e-07, "loss": 0.2438, "step": 31001 }, { "epoch": 0.89, "grad_norm": 6.402706710832932, "learning_rate": 3.263687804155996e-07, "loss": 0.3313, "step": 31002 }, { "epoch": 0.89, "grad_norm": 5.519471294404109, "learning_rate": 3.262039953149687e-07, "loss": 0.6778, "step": 31003 }, { "epoch": 0.89, "grad_norm": 3.6517724227796626, "learning_rate": 3.260392504224308e-07, "loss": 0.1166, "step": 31004 }, { "epoch": 0.89, "grad_norm": 3.8515033833452836, "learning_rate": 3.258745457394014e-07, "loss": 0.5761, "step": 31005 }, { "epoch": 0.89, "grad_norm": 8.446204633287484, "learning_rate": 3.257098812672982e-07, "loss": 0.698, "step": 31006 }, { "epoch": 0.89, "grad_norm": 2.7690284390943636, "learning_rate": 3.2554525700753626e-07, "loss": 0.2601, "step": 31007 }, { "epoch": 0.89, "grad_norm": 6.034276331485538, "learning_rate": 3.2538067296153277e-07, "loss": 0.5131, "step": 31008 }, { "epoch": 0.89, "grad_norm": 5.480193323567881, "learning_rate": 3.252161291307049e-07, "loss": 0.3395, "step": 31009 }, { "epoch": 0.89, "grad_norm": 8.719943275967108, "learning_rate": 3.25051625516466e-07, "loss": 0.261, "step": 31010 }, { "epoch": 0.89, "grad_norm": 5.690978820292157, "learning_rate": 3.2488716212023317e-07, "loss": 0.1655, "step": 31011 }, { "epoch": 0.89, "grad_norm": 6.507578723092772, "learning_rate": 3.247227389434204e-07, "loss": 0.2985, "step": 31012 }, { "epoch": 0.89, "grad_norm": 7.39901566789115, "learning_rate": 3.245583559874427e-07, "loss": 0.8826, "step": 31013 }, { "epoch": 0.89, "grad_norm": 7.070921257325093, "learning_rate": 3.243940132537127e-07, "loss": 0.4401, "step": 31014 }, { "epoch": 0.89, "grad_norm": 4.81757624539594, "learning_rate": 3.242297107436454e-07, "loss": 0.3769, "step": 31015 }, { "epoch": 0.89, "grad_norm": 3.9340386693088605, "learning_rate": 3.240654484586553e-07, "loss": 0.2378, "step": 31016 }, { "epoch": 0.89, "grad_norm": 4.238849204546907, "learning_rate": 3.2390122640015343e-07, "loss": 0.3101, "step": 31017 }, { "epoch": 0.89, "grad_norm": 5.113291321490272, "learning_rate": 3.2373704456955425e-07, "loss": 0.4564, "step": 31018 }, { "epoch": 0.89, "grad_norm": 4.335817187590374, "learning_rate": 3.235729029682694e-07, "loss": 0.4259, "step": 31019 }, { "epoch": 0.89, "grad_norm": 5.411953866377136, "learning_rate": 3.234088015977116e-07, "loss": 0.5494, "step": 31020 }, { "epoch": 0.89, "grad_norm": 6.9726907852129685, "learning_rate": 3.232447404592909e-07, "loss": 0.2927, "step": 31021 }, { "epoch": 0.89, "grad_norm": 5.813653380288888, "learning_rate": 3.2308071955442056e-07, "loss": 0.4218, "step": 31022 }, { "epoch": 0.89, "grad_norm": 4.469763314127502, "learning_rate": 3.2291673888451056e-07, "loss": 0.277, "step": 31023 }, { "epoch": 0.89, "grad_norm": 5.53900099102782, "learning_rate": 3.227527984509721e-07, "loss": 0.4642, "step": 31024 }, { "epoch": 0.89, "grad_norm": 7.702811708714531, "learning_rate": 3.225888982552161e-07, "loss": 0.6499, "step": 31025 }, { "epoch": 0.89, "grad_norm": 7.838280035745227, "learning_rate": 3.22425038298651e-07, "loss": 0.4246, "step": 31026 }, { "epoch": 0.89, "grad_norm": 6.970971407741802, "learning_rate": 3.222612185826879e-07, "loss": 0.6922, "step": 31027 }, { "epoch": 0.89, "grad_norm": 6.342774414814205, "learning_rate": 3.2209743910873503e-07, "loss": 0.517, "step": 31028 }, { "epoch": 0.89, "grad_norm": 11.616429030306072, "learning_rate": 3.21933699878203e-07, "loss": 0.6854, "step": 31029 }, { "epoch": 0.89, "grad_norm": 2.000150898366831, "learning_rate": 3.2177000089249787e-07, "loss": 0.2333, "step": 31030 }, { "epoch": 0.89, "grad_norm": 10.086037916751765, "learning_rate": 3.216063421530308e-07, "loss": 0.635, "step": 31031 }, { "epoch": 0.89, "grad_norm": 4.905509455488115, "learning_rate": 3.2144272366120835e-07, "loss": 0.2841, "step": 31032 }, { "epoch": 0.89, "grad_norm": 10.341929364819118, "learning_rate": 3.2127914541843666e-07, "loss": 0.4011, "step": 31033 }, { "epoch": 0.89, "grad_norm": 6.90112998665968, "learning_rate": 3.211156074261257e-07, "loss": 0.8934, "step": 31034 }, { "epoch": 0.89, "grad_norm": 7.207490140370305, "learning_rate": 3.209521096856805e-07, "loss": 0.855, "step": 31035 }, { "epoch": 0.89, "grad_norm": 2.829928250026681, "learning_rate": 3.207886521985087e-07, "loss": 0.2926, "step": 31036 }, { "epoch": 0.89, "grad_norm": 3.512735747846181, "learning_rate": 3.206252349660155e-07, "loss": 0.2089, "step": 31037 }, { "epoch": 0.89, "grad_norm": 6.9352518511464725, "learning_rate": 3.204618579896074e-07, "loss": 0.46, "step": 31038 }, { "epoch": 0.89, "grad_norm": 12.215010517129887, "learning_rate": 3.202985212706905e-07, "loss": 0.7564, "step": 31039 }, { "epoch": 0.89, "grad_norm": 5.757775253527394, "learning_rate": 3.201352248106682e-07, "loss": 0.64, "step": 31040 }, { "epoch": 0.89, "grad_norm": 7.744627105325945, "learning_rate": 3.1997196861094704e-07, "loss": 0.6401, "step": 31041 }, { "epoch": 0.89, "grad_norm": 5.653518607384517, "learning_rate": 3.1980875267292985e-07, "loss": 0.4544, "step": 31042 }, { "epoch": 0.89, "grad_norm": 5.472057541221221, "learning_rate": 3.196455769980228e-07, "loss": 0.3659, "step": 31043 }, { "epoch": 0.89, "grad_norm": 5.8405157104521255, "learning_rate": 3.1948244158762796e-07, "loss": 0.2257, "step": 31044 }, { "epoch": 0.89, "grad_norm": 6.729696715494527, "learning_rate": 3.193193464431499e-07, "loss": 0.671, "step": 31045 }, { "epoch": 0.89, "grad_norm": 7.322265331953099, "learning_rate": 3.191562915659913e-07, "loss": 0.4759, "step": 31046 }, { "epoch": 0.89, "grad_norm": 2.9388143155788895, "learning_rate": 3.1899327695755486e-07, "loss": 0.3897, "step": 31047 }, { "epoch": 0.89, "grad_norm": 3.00107104335514, "learning_rate": 3.1883030261924185e-07, "loss": 0.2345, "step": 31048 }, { "epoch": 0.89, "grad_norm": 3.553252146248467, "learning_rate": 3.186673685524561e-07, "loss": 0.2804, "step": 31049 }, { "epoch": 0.89, "grad_norm": 5.912452972579939, "learning_rate": 3.1850447475859856e-07, "loss": 0.3381, "step": 31050 }, { "epoch": 0.89, "grad_norm": 5.8407743497136115, "learning_rate": 3.1834162123907106e-07, "loss": 0.2426, "step": 31051 }, { "epoch": 0.89, "grad_norm": 6.669119486990055, "learning_rate": 3.181788079952741e-07, "loss": 0.4178, "step": 31052 }, { "epoch": 0.89, "grad_norm": 3.359834116235502, "learning_rate": 3.180160350286077e-07, "loss": 0.1608, "step": 31053 }, { "epoch": 0.89, "grad_norm": 7.880516784104066, "learning_rate": 3.178533023404734e-07, "loss": 0.8091, "step": 31054 }, { "epoch": 0.89, "grad_norm": 9.180882568485893, "learning_rate": 3.1769060993226963e-07, "loss": 0.5443, "step": 31055 }, { "epoch": 0.89, "grad_norm": 4.510549378883735, "learning_rate": 3.175279578053986e-07, "loss": 0.3426, "step": 31056 }, { "epoch": 0.89, "grad_norm": 2.9863511677782975, "learning_rate": 3.173653459612569e-07, "loss": 0.1955, "step": 31057 }, { "epoch": 0.89, "grad_norm": 4.483535448763124, "learning_rate": 3.1720277440124513e-07, "loss": 0.2135, "step": 31058 }, { "epoch": 0.89, "grad_norm": 6.974783299952683, "learning_rate": 3.170402431267616e-07, "loss": 0.6048, "step": 31059 }, { "epoch": 0.89, "grad_norm": 6.628685483926056, "learning_rate": 3.168777521392036e-07, "loss": 0.6289, "step": 31060 }, { "epoch": 0.89, "grad_norm": 3.3575952117894867, "learning_rate": 3.1671530143996987e-07, "loss": 0.2087, "step": 31061 }, { "epoch": 0.89, "grad_norm": 7.162798754468066, "learning_rate": 3.1655289103045773e-07, "loss": 0.543, "step": 31062 }, { "epoch": 0.89, "grad_norm": 8.523283540085211, "learning_rate": 3.163905209120649e-07, "loss": 0.2503, "step": 31063 }, { "epoch": 0.89, "grad_norm": 3.8230090151797005, "learning_rate": 3.1622819108618695e-07, "loss": 0.3433, "step": 31064 }, { "epoch": 0.89, "grad_norm": 5.837425603943881, "learning_rate": 3.1606590155422224e-07, "loss": 0.4154, "step": 31065 }, { "epoch": 0.89, "grad_norm": 3.0775478809576784, "learning_rate": 3.159036523175657e-07, "loss": 0.2975, "step": 31066 }, { "epoch": 0.89, "grad_norm": 6.791743443841059, "learning_rate": 3.1574144337761236e-07, "loss": 0.5415, "step": 31067 }, { "epoch": 0.89, "grad_norm": 6.3520370249834235, "learning_rate": 3.1557927473575943e-07, "loss": 0.4972, "step": 31068 }, { "epoch": 0.89, "grad_norm": 9.428644990221212, "learning_rate": 3.154171463934008e-07, "loss": 0.7947, "step": 31069 }, { "epoch": 0.89, "grad_norm": 3.8898236472059904, "learning_rate": 3.15255058351932e-07, "loss": 0.4228, "step": 31070 }, { "epoch": 0.89, "grad_norm": 5.349470278103912, "learning_rate": 3.1509301061274645e-07, "loss": 0.2639, "step": 31071 }, { "epoch": 0.89, "grad_norm": 4.952589878574223, "learning_rate": 3.149310031772401e-07, "loss": 0.6397, "step": 31072 }, { "epoch": 0.89, "grad_norm": 4.297596430470538, "learning_rate": 3.147690360468053e-07, "loss": 0.2872, "step": 31073 }, { "epoch": 0.89, "grad_norm": 3.7008069344117094, "learning_rate": 3.1460710922283476e-07, "loss": 0.3913, "step": 31074 }, { "epoch": 0.89, "grad_norm": 5.197093916346278, "learning_rate": 3.1444522270672294e-07, "loss": 0.216, "step": 31075 }, { "epoch": 0.89, "grad_norm": 5.06190404798616, "learning_rate": 3.142833764998615e-07, "loss": 0.4951, "step": 31076 }, { "epoch": 0.89, "grad_norm": 5.5498710170569945, "learning_rate": 3.141215706036438e-07, "loss": 0.3106, "step": 31077 }, { "epoch": 0.89, "grad_norm": 3.6192184744550993, "learning_rate": 3.1395980501946087e-07, "loss": 0.5108, "step": 31078 }, { "epoch": 0.89, "grad_norm": 5.799402472530348, "learning_rate": 3.137980797487061e-07, "loss": 0.3578, "step": 31079 }, { "epoch": 0.89, "grad_norm": 4.609016999388598, "learning_rate": 3.136363947927679e-07, "loss": 0.3772, "step": 31080 }, { "epoch": 0.89, "grad_norm": 2.1821469340131348, "learning_rate": 3.1347475015303883e-07, "loss": 0.1401, "step": 31081 }, { "epoch": 0.89, "grad_norm": 5.853072965913065, "learning_rate": 3.133131458309102e-07, "loss": 0.4953, "step": 31082 }, { "epoch": 0.89, "grad_norm": 7.609865114066791, "learning_rate": 3.131515818277708e-07, "loss": 0.5463, "step": 31083 }, { "epoch": 0.89, "grad_norm": 8.400982479132132, "learning_rate": 3.129900581450118e-07, "loss": 0.8103, "step": 31084 }, { "epoch": 0.89, "grad_norm": 6.955837380783691, "learning_rate": 3.1282857478402263e-07, "loss": 0.5692, "step": 31085 }, { "epoch": 0.89, "grad_norm": 6.478574654209897, "learning_rate": 3.126671317461921e-07, "loss": 0.3202, "step": 31086 }, { "epoch": 0.89, "grad_norm": 2.1125209166828673, "learning_rate": 3.125057290329081e-07, "loss": 0.1028, "step": 31087 }, { "epoch": 0.89, "grad_norm": 7.040400472109398, "learning_rate": 3.123443666455611e-07, "loss": 0.8485, "step": 31088 }, { "epoch": 0.89, "grad_norm": 4.367996959700941, "learning_rate": 3.1218304458553785e-07, "loss": 0.3775, "step": 31089 }, { "epoch": 0.89, "grad_norm": 7.262557302961291, "learning_rate": 3.1202176285422605e-07, "loss": 0.5362, "step": 31090 }, { "epoch": 0.89, "grad_norm": 7.774596299487863, "learning_rate": 3.1186052145301516e-07, "loss": 0.4984, "step": 31091 }, { "epoch": 0.89, "grad_norm": 3.8515969636624856, "learning_rate": 3.1169932038329076e-07, "loss": 0.3234, "step": 31092 }, { "epoch": 0.89, "grad_norm": 5.447321342000032, "learning_rate": 3.115381596464401e-07, "loss": 0.6222, "step": 31093 }, { "epoch": 0.89, "grad_norm": 6.1925594182526895, "learning_rate": 3.113770392438481e-07, "loss": 0.5975, "step": 31094 }, { "epoch": 0.89, "grad_norm": 22.167312335408123, "learning_rate": 3.1121595917690316e-07, "loss": 0.6343, "step": 31095 }, { "epoch": 0.89, "grad_norm": 4.50332522718141, "learning_rate": 3.1105491944698916e-07, "loss": 0.2675, "step": 31096 }, { "epoch": 0.89, "grad_norm": 4.7496046855460055, "learning_rate": 3.1089392005549334e-07, "loss": 0.2793, "step": 31097 }, { "epoch": 0.89, "grad_norm": 4.45362976961165, "learning_rate": 3.10732961003799e-07, "loss": 0.3257, "step": 31098 }, { "epoch": 0.89, "grad_norm": 5.940279902147451, "learning_rate": 3.1057204229329284e-07, "loss": 0.3043, "step": 31099 }, { "epoch": 0.89, "grad_norm": 4.9963855076619454, "learning_rate": 3.104111639253571e-07, "loss": 1.1305, "step": 31100 }, { "epoch": 0.89, "grad_norm": 7.051562321479267, "learning_rate": 3.102503259013767e-07, "loss": 0.6527, "step": 31101 }, { "epoch": 0.89, "grad_norm": 2.8599968947880567, "learning_rate": 3.1008952822273565e-07, "loss": 0.1816, "step": 31102 }, { "epoch": 0.89, "grad_norm": 4.527297802248437, "learning_rate": 3.099287708908166e-07, "loss": 0.3079, "step": 31103 }, { "epoch": 0.89, "grad_norm": 7.904049804686957, "learning_rate": 3.09768053907003e-07, "loss": 0.4068, "step": 31104 }, { "epoch": 0.89, "grad_norm": 7.035754172562697, "learning_rate": 3.096073772726771e-07, "loss": 0.896, "step": 31105 }, { "epoch": 0.89, "grad_norm": 5.568556285361671, "learning_rate": 3.0944674098922213e-07, "loss": 0.494, "step": 31106 }, { "epoch": 0.89, "grad_norm": 8.624746457118897, "learning_rate": 3.0928614505801925e-07, "loss": 0.5066, "step": 31107 }, { "epoch": 0.89, "grad_norm": 4.521074906793261, "learning_rate": 3.091255894804496e-07, "loss": 0.4374, "step": 31108 }, { "epoch": 0.89, "grad_norm": 3.5259672839810166, "learning_rate": 3.0896507425789535e-07, "loss": 0.5039, "step": 31109 }, { "epoch": 0.89, "grad_norm": 4.439685364005305, "learning_rate": 3.0880459939173656e-07, "loss": 0.4513, "step": 31110 }, { "epoch": 0.89, "grad_norm": 6.027161551569144, "learning_rate": 3.086441648833549e-07, "loss": 0.6876, "step": 31111 }, { "epoch": 0.89, "grad_norm": 4.12901795379623, "learning_rate": 3.0848377073412985e-07, "loss": 0.5216, "step": 31112 }, { "epoch": 0.89, "grad_norm": 6.896337394693, "learning_rate": 3.0832341694544134e-07, "loss": 0.4972, "step": 31113 }, { "epoch": 0.89, "grad_norm": 8.493630855979395, "learning_rate": 3.081631035186677e-07, "loss": 0.5441, "step": 31114 }, { "epoch": 0.89, "grad_norm": 9.337876326855818, "learning_rate": 3.0800283045518963e-07, "loss": 0.5955, "step": 31115 }, { "epoch": 0.89, "grad_norm": 4.021230147683016, "learning_rate": 3.078425977563859e-07, "loss": 0.4872, "step": 31116 }, { "epoch": 0.89, "grad_norm": 7.142479453319336, "learning_rate": 3.076824054236338e-07, "loss": 0.3415, "step": 31117 }, { "epoch": 0.89, "grad_norm": 6.408704929301209, "learning_rate": 3.0752225345831334e-07, "loss": 0.5141, "step": 31118 }, { "epoch": 0.89, "grad_norm": 4.448147558551984, "learning_rate": 3.0736214186180113e-07, "loss": 0.5845, "step": 31119 }, { "epoch": 0.89, "grad_norm": 5.163923322270853, "learning_rate": 3.072020706354745e-07, "loss": 0.2905, "step": 31120 }, { "epoch": 0.89, "grad_norm": 7.492102072569824, "learning_rate": 3.0704203978071e-07, "loss": 0.2562, "step": 31121 }, { "epoch": 0.89, "grad_norm": 5.619648825238669, "learning_rate": 3.0688204929888497e-07, "loss": 0.3931, "step": 31122 }, { "epoch": 0.89, "grad_norm": 7.69392139234614, "learning_rate": 3.0672209919137664e-07, "loss": 0.6133, "step": 31123 }, { "epoch": 0.89, "grad_norm": 6.266708546694693, "learning_rate": 3.065621894595594e-07, "loss": 0.3607, "step": 31124 }, { "epoch": 0.89, "grad_norm": 6.406189355330571, "learning_rate": 3.064023201048105e-07, "loss": 0.3896, "step": 31125 }, { "epoch": 0.89, "grad_norm": 6.8717919927964015, "learning_rate": 3.062424911285045e-07, "loss": 0.5846, "step": 31126 }, { "epoch": 0.89, "grad_norm": 4.390512322442762, "learning_rate": 3.060827025320168e-07, "loss": 0.3535, "step": 31127 }, { "epoch": 0.89, "grad_norm": 5.047622483587339, "learning_rate": 3.059229543167208e-07, "loss": 0.6189, "step": 31128 }, { "epoch": 0.89, "grad_norm": 8.60084557035798, "learning_rate": 3.057632464839921e-07, "loss": 0.9291, "step": 31129 }, { "epoch": 0.89, "grad_norm": 5.507652507649654, "learning_rate": 3.0560357903520353e-07, "loss": 0.2625, "step": 31130 }, { "epoch": 0.89, "grad_norm": 6.306701073898806, "learning_rate": 3.054439519717295e-07, "loss": 0.3468, "step": 31131 }, { "epoch": 0.89, "grad_norm": 6.7175864165766725, "learning_rate": 3.0528436529494444e-07, "loss": 0.3879, "step": 31132 }, { "epoch": 0.89, "grad_norm": 3.949743279432119, "learning_rate": 3.0512481900621895e-07, "loss": 0.2462, "step": 31133 }, { "epoch": 0.89, "grad_norm": 5.7101472863677465, "learning_rate": 3.0496531310692743e-07, "loss": 0.5822, "step": 31134 }, { "epoch": 0.89, "grad_norm": 3.6464922127948496, "learning_rate": 3.0480584759844e-07, "loss": 0.3116, "step": 31135 }, { "epoch": 0.89, "grad_norm": 4.849515547595601, "learning_rate": 3.046464224821305e-07, "loss": 0.4292, "step": 31136 }, { "epoch": 0.89, "grad_norm": 4.50913997785831, "learning_rate": 3.044870377593695e-07, "loss": 0.671, "step": 31137 }, { "epoch": 0.89, "grad_norm": 3.841147960942428, "learning_rate": 3.043276934315287e-07, "loss": 0.1711, "step": 31138 }, { "epoch": 0.89, "grad_norm": 6.708405413348976, "learning_rate": 3.041683894999786e-07, "loss": 0.4981, "step": 31139 }, { "epoch": 0.89, "grad_norm": 3.8615313540643745, "learning_rate": 3.0400912596608934e-07, "loss": 0.2732, "step": 31140 }, { "epoch": 0.89, "grad_norm": 3.4309809858973264, "learning_rate": 3.038499028312314e-07, "loss": 0.2994, "step": 31141 }, { "epoch": 0.89, "grad_norm": 8.898941521440983, "learning_rate": 3.0369072009677426e-07, "loss": 0.3284, "step": 31142 }, { "epoch": 0.89, "grad_norm": 2.6328603304358453, "learning_rate": 3.035315777640885e-07, "loss": 0.1097, "step": 31143 }, { "epoch": 0.89, "grad_norm": 7.539218217101375, "learning_rate": 3.033724758345413e-07, "loss": 0.4989, "step": 31144 }, { "epoch": 0.89, "grad_norm": 3.945384563127834, "learning_rate": 3.032134143095034e-07, "loss": 0.1698, "step": 31145 }, { "epoch": 0.89, "grad_norm": 3.264298055356574, "learning_rate": 3.0305439319034234e-07, "loss": 0.3484, "step": 31146 }, { "epoch": 0.89, "grad_norm": 6.111347806806804, "learning_rate": 3.0289541247842504e-07, "loss": 0.5297, "step": 31147 }, { "epoch": 0.89, "grad_norm": 9.714556755363882, "learning_rate": 3.027364721751208e-07, "loss": 0.3406, "step": 31148 }, { "epoch": 0.89, "grad_norm": 6.028713661509857, "learning_rate": 3.025775722817958e-07, "loss": 0.3348, "step": 31149 }, { "epoch": 0.89, "grad_norm": 3.4009333423384738, "learning_rate": 3.024187127998179e-07, "loss": 0.2365, "step": 31150 }, { "epoch": 0.89, "grad_norm": 4.371109745108867, "learning_rate": 3.022598937305532e-07, "loss": 0.4573, "step": 31151 }, { "epoch": 0.89, "grad_norm": 7.18925845984505, "learning_rate": 3.021011150753689e-07, "loss": 0.4257, "step": 31152 }, { "epoch": 0.89, "grad_norm": 5.149369753077784, "learning_rate": 3.0194237683563e-07, "loss": 0.6699, "step": 31153 }, { "epoch": 0.89, "grad_norm": 4.55341626284933, "learning_rate": 3.017836790127027e-07, "loss": 0.2966, "step": 31154 }, { "epoch": 0.89, "grad_norm": 5.694563681690288, "learning_rate": 3.0162502160795136e-07, "loss": 0.1778, "step": 31155 }, { "epoch": 0.89, "grad_norm": 6.309830356016246, "learning_rate": 3.014664046227411e-07, "loss": 0.531, "step": 31156 }, { "epoch": 0.89, "grad_norm": 7.919794926878307, "learning_rate": 3.0130782805843807e-07, "loss": 0.3734, "step": 31157 }, { "epoch": 0.89, "grad_norm": 5.508638295702399, "learning_rate": 3.011492919164039e-07, "loss": 0.5751, "step": 31158 }, { "epoch": 0.89, "grad_norm": 3.4873172842037, "learning_rate": 3.009907961980052e-07, "loss": 0.3861, "step": 31159 }, { "epoch": 0.89, "grad_norm": 7.095895316778545, "learning_rate": 3.0083234090460376e-07, "loss": 0.6081, "step": 31160 }, { "epoch": 0.89, "grad_norm": 3.66224744205879, "learning_rate": 3.0067392603756295e-07, "loss": 0.3255, "step": 31161 }, { "epoch": 0.89, "grad_norm": 5.055642628822334, "learning_rate": 3.0051555159824497e-07, "loss": 0.4098, "step": 31162 }, { "epoch": 0.89, "grad_norm": 4.093182997712769, "learning_rate": 3.0035721758801375e-07, "loss": 0.3483, "step": 31163 }, { "epoch": 0.89, "grad_norm": 6.54893519504303, "learning_rate": 3.001989240082309e-07, "loss": 0.5306, "step": 31164 }, { "epoch": 0.89, "grad_norm": 5.784049042142941, "learning_rate": 3.000406708602577e-07, "loss": 0.3005, "step": 31165 }, { "epoch": 0.89, "grad_norm": 4.9495999521378495, "learning_rate": 2.998824581454568e-07, "loss": 0.3649, "step": 31166 }, { "epoch": 0.89, "grad_norm": 5.109852759746394, "learning_rate": 2.9972428586518774e-07, "loss": 0.4267, "step": 31167 }, { "epoch": 0.89, "grad_norm": 5.209937278457879, "learning_rate": 2.9956615402081223e-07, "loss": 0.4552, "step": 31168 }, { "epoch": 0.89, "grad_norm": 3.1675726364225554, "learning_rate": 2.9940806261368973e-07, "loss": 0.1775, "step": 31169 }, { "epoch": 0.89, "grad_norm": 7.797873209503454, "learning_rate": 2.992500116451813e-07, "loss": 0.5601, "step": 31170 }, { "epoch": 0.89, "grad_norm": 7.490096101640913, "learning_rate": 2.9909200111664593e-07, "loss": 0.5038, "step": 31171 }, { "epoch": 0.89, "grad_norm": 8.13384616230408, "learning_rate": 2.989340310294442e-07, "loss": 0.4003, "step": 31172 }, { "epoch": 0.89, "grad_norm": 6.2438919738574254, "learning_rate": 2.9877610138493383e-07, "loss": 0.5104, "step": 31173 }, { "epoch": 0.89, "grad_norm": 7.5445831167479085, "learning_rate": 2.9861821218447275e-07, "loss": 0.2661, "step": 31174 }, { "epoch": 0.89, "grad_norm": 7.291239586084844, "learning_rate": 2.9846036342942197e-07, "loss": 0.2822, "step": 31175 }, { "epoch": 0.89, "grad_norm": 4.874216677015169, "learning_rate": 2.983025551211366e-07, "loss": 0.8698, "step": 31176 }, { "epoch": 0.89, "grad_norm": 5.93700914863677, "learning_rate": 2.981447872609761e-07, "loss": 0.2167, "step": 31177 }, { "epoch": 0.89, "grad_norm": 4.124563685374649, "learning_rate": 2.979870598502965e-07, "loss": 0.5001, "step": 31178 }, { "epoch": 0.89, "grad_norm": 6.731389421667906, "learning_rate": 2.978293728904558e-07, "loss": 0.779, "step": 31179 }, { "epoch": 0.89, "grad_norm": 6.078924680534087, "learning_rate": 2.9767172638280993e-07, "loss": 0.5789, "step": 31180 }, { "epoch": 0.89, "grad_norm": 4.169108098333089, "learning_rate": 2.975141203287152e-07, "loss": 0.4148, "step": 31181 }, { "epoch": 0.89, "grad_norm": 5.327991897844621, "learning_rate": 2.973565547295276e-07, "loss": 0.24, "step": 31182 }, { "epoch": 0.89, "grad_norm": 2.6204475709226362, "learning_rate": 2.971990295866023e-07, "loss": 0.282, "step": 31183 }, { "epoch": 0.89, "grad_norm": 17.944583278482426, "learning_rate": 2.970415449012953e-07, "loss": 0.6141, "step": 31184 }, { "epoch": 0.89, "grad_norm": 6.517306506493219, "learning_rate": 2.968841006749595e-07, "loss": 0.3926, "step": 31185 }, { "epoch": 0.89, "grad_norm": 6.90775605432987, "learning_rate": 2.967266969089522e-07, "loss": 0.3553, "step": 31186 }, { "epoch": 0.89, "grad_norm": 5.7268395766690885, "learning_rate": 2.96569333604626e-07, "loss": 0.339, "step": 31187 }, { "epoch": 0.89, "grad_norm": 5.203075695090623, "learning_rate": 2.9641201076333395e-07, "loss": 0.614, "step": 31188 }, { "epoch": 0.89, "grad_norm": 5.087124792879847, "learning_rate": 2.962547283864309e-07, "loss": 0.1491, "step": 31189 }, { "epoch": 0.89, "grad_norm": 5.613883032028379, "learning_rate": 2.96097486475268e-07, "loss": 0.5845, "step": 31190 }, { "epoch": 0.89, "grad_norm": 5.988616316865274, "learning_rate": 2.95940285031201e-07, "loss": 0.6382, "step": 31191 }, { "epoch": 0.89, "grad_norm": 5.365156741807853, "learning_rate": 2.957831240555792e-07, "loss": 0.4309, "step": 31192 }, { "epoch": 0.89, "grad_norm": 4.106535013604858, "learning_rate": 2.956260035497577e-07, "loss": 0.2997, "step": 31193 }, { "epoch": 0.89, "grad_norm": 5.720068649266847, "learning_rate": 2.954689235150848e-07, "loss": 0.3788, "step": 31194 }, { "epoch": 0.89, "grad_norm": 5.014661084301238, "learning_rate": 2.95311883952914e-07, "loss": 0.8471, "step": 31195 }, { "epoch": 0.89, "grad_norm": 5.0133981485099905, "learning_rate": 2.951548848645958e-07, "loss": 0.4343, "step": 31196 }, { "epoch": 0.89, "grad_norm": 3.9096575713397557, "learning_rate": 2.9499792625148014e-07, "loss": 0.512, "step": 31197 }, { "epoch": 0.89, "grad_norm": 7.294047099700643, "learning_rate": 2.948410081149189e-07, "loss": 0.4064, "step": 31198 }, { "epoch": 0.89, "grad_norm": 4.107133261154053, "learning_rate": 2.9468413045626143e-07, "loss": 0.4029, "step": 31199 }, { "epoch": 0.89, "grad_norm": 4.34897113120505, "learning_rate": 2.945272932768573e-07, "loss": 0.5372, "step": 31200 }, { "epoch": 0.89, "grad_norm": 3.6786059594220104, "learning_rate": 2.9437049657805415e-07, "loss": 0.1384, "step": 31201 }, { "epoch": 0.89, "grad_norm": 9.760394399991416, "learning_rate": 2.9421374036120333e-07, "loss": 0.381, "step": 31202 }, { "epoch": 0.89, "grad_norm": 8.326920699598523, "learning_rate": 2.940570246276514e-07, "loss": 0.6954, "step": 31203 }, { "epoch": 0.89, "grad_norm": 4.949497230119755, "learning_rate": 2.9390034937874734e-07, "loss": 0.3991, "step": 31204 }, { "epoch": 0.89, "grad_norm": 2.2129731259703056, "learning_rate": 2.937437146158401e-07, "loss": 0.1472, "step": 31205 }, { "epoch": 0.89, "grad_norm": 6.959372056498801, "learning_rate": 2.935871203402763e-07, "loss": 0.6221, "step": 31206 }, { "epoch": 0.89, "grad_norm": 6.423384069147685, "learning_rate": 2.934305665534032e-07, "loss": 0.5234, "step": 31207 }, { "epoch": 0.89, "grad_norm": 4.3564106970418495, "learning_rate": 2.9327405325656645e-07, "loss": 0.6185, "step": 31208 }, { "epoch": 0.89, "grad_norm": 6.431173827199379, "learning_rate": 2.9311758045111437e-07, "loss": 0.5573, "step": 31209 }, { "epoch": 0.89, "grad_norm": 8.699181065437083, "learning_rate": 2.929611481383909e-07, "loss": 0.3602, "step": 31210 }, { "epoch": 0.89, "grad_norm": 5.558299503862111, "learning_rate": 2.928047563197445e-07, "loss": 0.3329, "step": 31211 }, { "epoch": 0.89, "grad_norm": 6.455678660841624, "learning_rate": 2.926484049965184e-07, "loss": 0.376, "step": 31212 }, { "epoch": 0.89, "grad_norm": 6.378976068155499, "learning_rate": 2.924920941700593e-07, "loss": 0.6242, "step": 31213 }, { "epoch": 0.89, "grad_norm": 4.390744163552311, "learning_rate": 2.9233582384171064e-07, "loss": 0.2461, "step": 31214 }, { "epoch": 0.89, "grad_norm": 5.912232996615691, "learning_rate": 2.921795940128169e-07, "loss": 0.6892, "step": 31215 }, { "epoch": 0.89, "grad_norm": 9.347031594370465, "learning_rate": 2.920234046847237e-07, "loss": 0.5956, "step": 31216 }, { "epoch": 0.89, "grad_norm": 6.017069259270306, "learning_rate": 2.9186725585877206e-07, "loss": 0.2405, "step": 31217 }, { "epoch": 0.89, "grad_norm": 7.636898700125689, "learning_rate": 2.917111475363077e-07, "loss": 0.5844, "step": 31218 }, { "epoch": 0.89, "grad_norm": 4.370816764884992, "learning_rate": 2.915550797186717e-07, "loss": 0.5278, "step": 31219 }, { "epoch": 0.89, "grad_norm": 6.1323371071220265, "learning_rate": 2.913990524072086e-07, "loss": 0.4462, "step": 31220 }, { "epoch": 0.89, "grad_norm": 12.68579210333602, "learning_rate": 2.912430656032594e-07, "loss": 0.6232, "step": 31221 }, { "epoch": 0.89, "grad_norm": 27.28125173911737, "learning_rate": 2.91087119308166e-07, "loss": 0.4121, "step": 31222 }, { "epoch": 0.89, "grad_norm": 4.244011474554314, "learning_rate": 2.9093121352327057e-07, "loss": 0.2173, "step": 31223 }, { "epoch": 0.89, "grad_norm": 6.966193136369527, "learning_rate": 2.907753482499137e-07, "loss": 0.4124, "step": 31224 }, { "epoch": 0.89, "grad_norm": 6.10524057656299, "learning_rate": 2.9061952348943767e-07, "loss": 0.376, "step": 31225 }, { "epoch": 0.89, "grad_norm": 8.013034215962852, "learning_rate": 2.904637392431819e-07, "loss": 0.2836, "step": 31226 }, { "epoch": 0.89, "grad_norm": 9.381903992691257, "learning_rate": 2.903079955124866e-07, "loss": 0.7694, "step": 31227 }, { "epoch": 0.89, "grad_norm": 4.77368283304697, "learning_rate": 2.90152292298691e-07, "loss": 0.4204, "step": 31228 }, { "epoch": 0.89, "grad_norm": 4.5509952085566585, "learning_rate": 2.8999662960313535e-07, "loss": 0.325, "step": 31229 }, { "epoch": 0.89, "grad_norm": 4.85638146633674, "learning_rate": 2.8984100742715957e-07, "loss": 0.2846, "step": 31230 }, { "epoch": 0.89, "grad_norm": 4.030138498297571, "learning_rate": 2.89685425772101e-07, "loss": 0.4374, "step": 31231 }, { "epoch": 0.89, "grad_norm": 4.6339146555852535, "learning_rate": 2.895298846392991e-07, "loss": 0.3704, "step": 31232 }, { "epoch": 0.89, "grad_norm": 4.999612554797581, "learning_rate": 2.893743840300922e-07, "loss": 0.2226, "step": 31233 }, { "epoch": 0.89, "grad_norm": 4.628081326275394, "learning_rate": 2.8921892394581706e-07, "loss": 0.6032, "step": 31234 }, { "epoch": 0.89, "grad_norm": 7.3924001614353605, "learning_rate": 2.8906350438781094e-07, "loss": 0.3673, "step": 31235 }, { "epoch": 0.89, "grad_norm": 9.54798710365847, "learning_rate": 2.889081253574122e-07, "loss": 0.5778, "step": 31236 }, { "epoch": 0.89, "grad_norm": 14.73806148621003, "learning_rate": 2.887527868559564e-07, "loss": 0.4863, "step": 31237 }, { "epoch": 0.89, "grad_norm": 4.28197088924599, "learning_rate": 2.885974888847798e-07, "loss": 0.445, "step": 31238 }, { "epoch": 0.89, "grad_norm": 5.374197667143335, "learning_rate": 2.884422314452201e-07, "loss": 0.3151, "step": 31239 }, { "epoch": 0.89, "grad_norm": 5.981459502534188, "learning_rate": 2.882870145386113e-07, "loss": 0.4301, "step": 31240 }, { "epoch": 0.89, "grad_norm": 4.539541248805134, "learning_rate": 2.8813183816628953e-07, "loss": 0.4675, "step": 31241 }, { "epoch": 0.89, "grad_norm": 13.095280742162974, "learning_rate": 2.8797670232958874e-07, "loss": 0.1557, "step": 31242 }, { "epoch": 0.89, "grad_norm": 6.661676120453897, "learning_rate": 2.878216070298445e-07, "loss": 0.4278, "step": 31243 }, { "epoch": 0.89, "grad_norm": 12.605600843987542, "learning_rate": 2.8766655226839024e-07, "loss": 0.2091, "step": 31244 }, { "epoch": 0.89, "grad_norm": 5.681397654143248, "learning_rate": 2.875115380465615e-07, "loss": 0.3373, "step": 31245 }, { "epoch": 0.89, "grad_norm": 4.777239744562373, "learning_rate": 2.8735656436568946e-07, "loss": 0.6207, "step": 31246 }, { "epoch": 0.89, "grad_norm": 10.792770820208268, "learning_rate": 2.872016312271103e-07, "loss": 0.3702, "step": 31247 }, { "epoch": 0.89, "grad_norm": 3.1397814400653408, "learning_rate": 2.870467386321546e-07, "loss": 0.3239, "step": 31248 }, { "epoch": 0.89, "grad_norm": 4.856309420542447, "learning_rate": 2.8689188658215464e-07, "loss": 0.559, "step": 31249 }, { "epoch": 0.89, "grad_norm": 7.782097843047232, "learning_rate": 2.8673707507844485e-07, "loss": 0.6331, "step": 31250 }, { "epoch": 0.89, "grad_norm": 6.261889135906554, "learning_rate": 2.8658230412235424e-07, "loss": 0.2862, "step": 31251 }, { "epoch": 0.89, "grad_norm": 4.470378152311196, "learning_rate": 2.8642757371521725e-07, "loss": 0.3529, "step": 31252 }, { "epoch": 0.9, "grad_norm": 10.905162112129902, "learning_rate": 2.862728838583628e-07, "loss": 0.3715, "step": 31253 }, { "epoch": 0.9, "grad_norm": 6.564836249709824, "learning_rate": 2.8611823455312217e-07, "loss": 0.4885, "step": 31254 }, { "epoch": 0.9, "grad_norm": 4.699881084439316, "learning_rate": 2.8596362580082637e-07, "loss": 0.3012, "step": 31255 }, { "epoch": 0.9, "grad_norm": 4.826061165688794, "learning_rate": 2.858090576028044e-07, "loss": 0.5388, "step": 31256 }, { "epoch": 0.9, "grad_norm": 4.226747378326775, "learning_rate": 2.8565452996038743e-07, "loss": 0.3349, "step": 31257 }, { "epoch": 0.9, "grad_norm": 5.185886453835328, "learning_rate": 2.855000428749033e-07, "loss": 0.643, "step": 31258 }, { "epoch": 0.9, "grad_norm": 11.273749164124892, "learning_rate": 2.853455963476826e-07, "loss": 0.5897, "step": 31259 }, { "epoch": 0.9, "grad_norm": 5.149570415690615, "learning_rate": 2.8519119038005305e-07, "loss": 0.4808, "step": 31260 }, { "epoch": 0.9, "grad_norm": 3.783113761165455, "learning_rate": 2.8503682497334375e-07, "loss": 0.3422, "step": 31261 }, { "epoch": 0.9, "grad_norm": 5.021131873323648, "learning_rate": 2.8488250012888073e-07, "loss": 0.6141, "step": 31262 }, { "epoch": 0.9, "grad_norm": 5.662225296397118, "learning_rate": 2.847282158479936e-07, "loss": 0.43, "step": 31263 }, { "epoch": 0.9, "grad_norm": 5.276406725209884, "learning_rate": 2.845739721320095e-07, "loss": 0.5353, "step": 31264 }, { "epoch": 0.9, "grad_norm": 5.521411140962863, "learning_rate": 2.844197689822542e-07, "loss": 0.3568, "step": 31265 }, { "epoch": 0.9, "grad_norm": 6.2754139144375065, "learning_rate": 2.84265606400056e-07, "loss": 0.6176, "step": 31266 }, { "epoch": 0.9, "grad_norm": 3.906132322446205, "learning_rate": 2.8411148438673984e-07, "loss": 0.2485, "step": 31267 }, { "epoch": 0.9, "grad_norm": 6.78723752127226, "learning_rate": 2.8395740294363203e-07, "loss": 0.6336, "step": 31268 }, { "epoch": 0.9, "grad_norm": 4.285889082136925, "learning_rate": 2.8380336207205763e-07, "loss": 0.4466, "step": 31269 }, { "epoch": 0.9, "grad_norm": 3.6005607751019335, "learning_rate": 2.8364936177334213e-07, "loss": 0.2034, "step": 31270 }, { "epoch": 0.9, "grad_norm": 5.720483110806867, "learning_rate": 2.8349540204881066e-07, "loss": 0.212, "step": 31271 }, { "epoch": 0.9, "grad_norm": 4.605885002343814, "learning_rate": 2.8334148289978767e-07, "loss": 0.4331, "step": 31272 }, { "epoch": 0.9, "grad_norm": 7.059242206082704, "learning_rate": 2.8318760432759717e-07, "loss": 0.2759, "step": 31273 }, { "epoch": 0.9, "grad_norm": 5.3270840788676495, "learning_rate": 2.8303376633356307e-07, "loss": 0.8422, "step": 31274 }, { "epoch": 0.9, "grad_norm": 4.57852535645455, "learning_rate": 2.8287996891900925e-07, "loss": 0.6275, "step": 31275 }, { "epoch": 0.9, "grad_norm": 3.1144785571043774, "learning_rate": 2.8272621208525695e-07, "loss": 0.2341, "step": 31276 }, { "epoch": 0.9, "grad_norm": 6.2740448578145065, "learning_rate": 2.825724958336312e-07, "loss": 0.2513, "step": 31277 }, { "epoch": 0.9, "grad_norm": 8.79115827877341, "learning_rate": 2.8241882016545266e-07, "loss": 0.7379, "step": 31278 }, { "epoch": 0.9, "grad_norm": 5.156950192160463, "learning_rate": 2.8226518508204404e-07, "loss": 0.4474, "step": 31279 }, { "epoch": 0.9, "grad_norm": 6.7413197176635204, "learning_rate": 2.8211159058472826e-07, "loss": 0.3646, "step": 31280 }, { "epoch": 0.9, "grad_norm": 6.239521168530212, "learning_rate": 2.8195803667482537e-07, "loss": 0.427, "step": 31281 }, { "epoch": 0.9, "grad_norm": 3.358535803725124, "learning_rate": 2.8180452335365647e-07, "loss": 0.3962, "step": 31282 }, { "epoch": 0.9, "grad_norm": 7.965127967002865, "learning_rate": 2.816510506225417e-07, "loss": 0.5332, "step": 31283 }, { "epoch": 0.9, "grad_norm": 3.282528201415685, "learning_rate": 2.8149761848280276e-07, "loss": 0.0821, "step": 31284 }, { "epoch": 0.9, "grad_norm": 3.4564484875544745, "learning_rate": 2.8134422693575857e-07, "loss": 0.209, "step": 31285 }, { "epoch": 0.9, "grad_norm": 8.39476760524418, "learning_rate": 2.811908759827292e-07, "loss": 0.5889, "step": 31286 }, { "epoch": 0.9, "grad_norm": 4.401405579759441, "learning_rate": 2.810375656250336e-07, "loss": 0.2288, "step": 31287 }, { "epoch": 0.9, "grad_norm": 6.668579431156135, "learning_rate": 2.8088429586399015e-07, "loss": 0.4906, "step": 31288 }, { "epoch": 0.9, "grad_norm": 11.393269800894801, "learning_rate": 2.8073106670091945e-07, "loss": 1.3715, "step": 31289 }, { "epoch": 0.9, "grad_norm": 8.56484641343284, "learning_rate": 2.805778781371371e-07, "loss": 0.5291, "step": 31290 }, { "epoch": 0.9, "grad_norm": 4.906053575604993, "learning_rate": 2.8042473017396265e-07, "loss": 0.6198, "step": 31291 }, { "epoch": 0.9, "grad_norm": 8.997840966762018, "learning_rate": 2.8027162281271283e-07, "loss": 0.7274, "step": 31292 }, { "epoch": 0.9, "grad_norm": 3.5462533851421822, "learning_rate": 2.8011855605470594e-07, "loss": 0.4241, "step": 31293 }, { "epoch": 0.9, "grad_norm": 4.963625850810853, "learning_rate": 2.799655299012577e-07, "loss": 0.5979, "step": 31294 }, { "epoch": 0.9, "grad_norm": 7.508246529345888, "learning_rate": 2.798125443536842e-07, "loss": 0.2999, "step": 31295 }, { "epoch": 0.9, "grad_norm": 5.397123450130607, "learning_rate": 2.7965959941330277e-07, "loss": 0.4086, "step": 31296 }, { "epoch": 0.9, "grad_norm": 3.787803456453257, "learning_rate": 2.795066950814285e-07, "loss": 0.2922, "step": 31297 }, { "epoch": 0.9, "grad_norm": 7.742254524441254, "learning_rate": 2.7935383135937686e-07, "loss": 0.4494, "step": 31298 }, { "epoch": 0.9, "grad_norm": 13.19282608592116, "learning_rate": 2.792010082484625e-07, "loss": 0.8464, "step": 31299 }, { "epoch": 0.9, "grad_norm": 4.661255341555174, "learning_rate": 2.790482257500016e-07, "loss": 0.3879, "step": 31300 }, { "epoch": 0.9, "grad_norm": 4.622500955881749, "learning_rate": 2.788954838653074e-07, "loss": 0.4176, "step": 31301 }, { "epoch": 0.9, "grad_norm": 3.0637685522905276, "learning_rate": 2.7874278259569455e-07, "loss": 0.2617, "step": 31302 }, { "epoch": 0.9, "grad_norm": 22.678803030146632, "learning_rate": 2.785901219424747e-07, "loss": 0.2557, "step": 31303 }, { "epoch": 0.9, "grad_norm": 4.489800284941785, "learning_rate": 2.784375019069635e-07, "loss": 0.1897, "step": 31304 }, { "epoch": 0.9, "grad_norm": 3.3574027015977395, "learning_rate": 2.7828492249047325e-07, "loss": 0.1583, "step": 31305 }, { "epoch": 0.9, "grad_norm": 3.6841223166597095, "learning_rate": 2.7813238369431617e-07, "loss": 0.2492, "step": 31306 }, { "epoch": 0.9, "grad_norm": 6.781470580842371, "learning_rate": 2.779798855198051e-07, "loss": 0.7476, "step": 31307 }, { "epoch": 0.9, "grad_norm": 3.0085612665531225, "learning_rate": 2.778274279682519e-07, "loss": 0.2009, "step": 31308 }, { "epoch": 0.9, "grad_norm": 5.675710027868745, "learning_rate": 2.7767501104096817e-07, "loss": 0.6395, "step": 31309 }, { "epoch": 0.9, "grad_norm": 3.082396182271136, "learning_rate": 2.77522634739264e-07, "loss": 0.2188, "step": 31310 }, { "epoch": 0.9, "grad_norm": 7.914752052746663, "learning_rate": 2.7737029906445113e-07, "loss": 0.3609, "step": 31311 }, { "epoch": 0.9, "grad_norm": 9.468931744030469, "learning_rate": 2.7721800401784074e-07, "loss": 0.6021, "step": 31312 }, { "epoch": 0.9, "grad_norm": 9.541348769932233, "learning_rate": 2.770657496007423e-07, "loss": 0.5702, "step": 31313 }, { "epoch": 0.9, "grad_norm": 6.776274618200138, "learning_rate": 2.769135358144659e-07, "loss": 0.7299, "step": 31314 }, { "epoch": 0.9, "grad_norm": 4.492165792661547, "learning_rate": 2.7676136266031995e-07, "loss": 0.4551, "step": 31315 }, { "epoch": 0.9, "grad_norm": 4.944737827363382, "learning_rate": 2.766092301396156e-07, "loss": 0.5534, "step": 31316 }, { "epoch": 0.9, "grad_norm": 3.7760884231654197, "learning_rate": 2.764571382536591e-07, "loss": 0.1697, "step": 31317 }, { "epoch": 0.9, "grad_norm": 6.08126004915103, "learning_rate": 2.7630508700376144e-07, "loss": 0.4164, "step": 31318 }, { "epoch": 0.9, "grad_norm": 5.142593122982723, "learning_rate": 2.76153076391229e-07, "loss": 0.16, "step": 31319 }, { "epoch": 0.9, "grad_norm": 5.348762303084354, "learning_rate": 2.760011064173701e-07, "loss": 0.4977, "step": 31320 }, { "epoch": 0.9, "grad_norm": 4.459075325277679, "learning_rate": 2.7584917708349257e-07, "loss": 0.4841, "step": 31321 }, { "epoch": 0.9, "grad_norm": 3.3445633941723902, "learning_rate": 2.75697288390902e-07, "loss": 0.1697, "step": 31322 }, { "epoch": 0.9, "grad_norm": 5.706154643052748, "learning_rate": 2.755454403409069e-07, "loss": 0.416, "step": 31323 }, { "epoch": 0.9, "grad_norm": 3.6664034069805393, "learning_rate": 2.753936329348117e-07, "loss": 0.3849, "step": 31324 }, { "epoch": 0.9, "grad_norm": 5.084373690763843, "learning_rate": 2.7524186617392425e-07, "loss": 0.4324, "step": 31325 }, { "epoch": 0.9, "grad_norm": 2.6240779528901648, "learning_rate": 2.7509014005954847e-07, "loss": 0.1941, "step": 31326 }, { "epoch": 0.9, "grad_norm": 4.0302440216122335, "learning_rate": 2.749384545929912e-07, "loss": 0.2732, "step": 31327 }, { "epoch": 0.9, "grad_norm": 3.637989514634099, "learning_rate": 2.747868097755568e-07, "loss": 0.331, "step": 31328 }, { "epoch": 0.9, "grad_norm": 6.8949426803806615, "learning_rate": 2.7463520560854886e-07, "loss": 0.3861, "step": 31329 }, { "epoch": 0.9, "grad_norm": 4.944080554892301, "learning_rate": 2.744836420932728e-07, "loss": 0.3744, "step": 31330 }, { "epoch": 0.9, "grad_norm": 3.333622077515789, "learning_rate": 2.7433211923103163e-07, "loss": 0.284, "step": 31331 }, { "epoch": 0.9, "grad_norm": 4.007260617336758, "learning_rate": 2.7418063702313037e-07, "loss": 0.6795, "step": 31332 }, { "epoch": 0.9, "grad_norm": 5.604737301199236, "learning_rate": 2.740291954708707e-07, "loss": 0.3805, "step": 31333 }, { "epoch": 0.9, "grad_norm": 6.215824646809664, "learning_rate": 2.738777945755561e-07, "loss": 0.217, "step": 31334 }, { "epoch": 0.9, "grad_norm": 5.654399590226371, "learning_rate": 2.7372643433848933e-07, "loss": 0.2273, "step": 31335 }, { "epoch": 0.9, "grad_norm": 3.957748513771883, "learning_rate": 2.7357511476097165e-07, "loss": 0.5278, "step": 31336 }, { "epoch": 0.9, "grad_norm": 4.7156476312881725, "learning_rate": 2.734238358443053e-07, "loss": 0.2186, "step": 31337 }, { "epoch": 0.9, "grad_norm": 5.785389959236822, "learning_rate": 2.7327259758979154e-07, "loss": 0.8952, "step": 31338 }, { "epoch": 0.9, "grad_norm": 2.236405070617787, "learning_rate": 2.7312139999873257e-07, "loss": 0.2034, "step": 31339 }, { "epoch": 0.9, "grad_norm": 7.72574536329389, "learning_rate": 2.729702430724268e-07, "loss": 0.7827, "step": 31340 }, { "epoch": 0.9, "grad_norm": 3.836468630688938, "learning_rate": 2.7281912681217827e-07, "loss": 0.1089, "step": 31341 }, { "epoch": 0.9, "grad_norm": 4.830965854436457, "learning_rate": 2.726680512192825e-07, "loss": 0.4777, "step": 31342 }, { "epoch": 0.9, "grad_norm": 4.811132199497174, "learning_rate": 2.7251701629504247e-07, "loss": 0.3241, "step": 31343 }, { "epoch": 0.9, "grad_norm": 3.6923943382957014, "learning_rate": 2.723660220407559e-07, "loss": 0.2618, "step": 31344 }, { "epoch": 0.9, "grad_norm": 6.088205270037343, "learning_rate": 2.722150684577224e-07, "loss": 0.2589, "step": 31345 }, { "epoch": 0.9, "grad_norm": 5.332999944200843, "learning_rate": 2.720641555472414e-07, "loss": 0.7567, "step": 31346 }, { "epoch": 0.9, "grad_norm": 6.909380552789121, "learning_rate": 2.7191328331061027e-07, "loss": 0.5429, "step": 31347 }, { "epoch": 0.9, "grad_norm": 2.987676521896042, "learning_rate": 2.7176245174912683e-07, "loss": 0.1973, "step": 31348 }, { "epoch": 0.9, "grad_norm": 4.22562075010576, "learning_rate": 2.7161166086408787e-07, "loss": 0.2648, "step": 31349 }, { "epoch": 0.9, "grad_norm": 5.916703273319671, "learning_rate": 2.7146091065679283e-07, "loss": 0.3528, "step": 31350 }, { "epoch": 0.9, "grad_norm": 3.665768647041963, "learning_rate": 2.7131020112853677e-07, "loss": 0.5011, "step": 31351 }, { "epoch": 0.9, "grad_norm": 5.92555704113862, "learning_rate": 2.711595322806165e-07, "loss": 0.4788, "step": 31352 }, { "epoch": 0.9, "grad_norm": 5.272075923760813, "learning_rate": 2.710089041143293e-07, "loss": 0.4762, "step": 31353 }, { "epoch": 0.9, "grad_norm": 3.9050662879348517, "learning_rate": 2.708583166309703e-07, "loss": 0.2383, "step": 31354 }, { "epoch": 0.9, "grad_norm": 5.0006507926841195, "learning_rate": 2.70707769831835e-07, "loss": 0.4915, "step": 31355 }, { "epoch": 0.9, "grad_norm": 3.753282000889121, "learning_rate": 2.7055726371821743e-07, "loss": 0.3575, "step": 31356 }, { "epoch": 0.9, "grad_norm": 7.5838349686454665, "learning_rate": 2.7040679829141494e-07, "loss": 0.6076, "step": 31357 }, { "epoch": 0.9, "grad_norm": 5.683484598796183, "learning_rate": 2.702563735527186e-07, "loss": 0.309, "step": 31358 }, { "epoch": 0.9, "grad_norm": 8.234949254781162, "learning_rate": 2.701059895034258e-07, "loss": 0.4524, "step": 31359 }, { "epoch": 0.9, "grad_norm": 8.756638951073175, "learning_rate": 2.699556461448283e-07, "loss": 0.445, "step": 31360 }, { "epoch": 0.9, "grad_norm": 5.5807024987302, "learning_rate": 2.6980534347822e-07, "loss": 0.612, "step": 31361 }, { "epoch": 0.9, "grad_norm": 8.467409091097666, "learning_rate": 2.6965508150489426e-07, "loss": 0.6563, "step": 31362 }, { "epoch": 0.9, "grad_norm": 7.461884571302228, "learning_rate": 2.69504860226143e-07, "loss": 0.5559, "step": 31363 }, { "epoch": 0.9, "grad_norm": 8.42463562401003, "learning_rate": 2.6935467964325947e-07, "loss": 0.6428, "step": 31364 }, { "epoch": 0.9, "grad_norm": 8.858896609245464, "learning_rate": 2.6920453975753434e-07, "loss": 0.4351, "step": 31365 }, { "epoch": 0.9, "grad_norm": 5.556814481298204, "learning_rate": 2.6905444057026107e-07, "loss": 0.601, "step": 31366 }, { "epoch": 0.9, "grad_norm": 9.05265370552144, "learning_rate": 2.6890438208272864e-07, "loss": 0.4733, "step": 31367 }, { "epoch": 0.9, "grad_norm": 5.20442211032223, "learning_rate": 2.687543642962315e-07, "loss": 0.4297, "step": 31368 }, { "epoch": 0.9, "grad_norm": 6.0823711871121855, "learning_rate": 2.686043872120564e-07, "loss": 0.6043, "step": 31369 }, { "epoch": 0.9, "grad_norm": 6.0398624181551455, "learning_rate": 2.684544508314946e-07, "loss": 0.547, "step": 31370 }, { "epoch": 0.9, "grad_norm": 1.5400576399878232, "learning_rate": 2.6830455515583775e-07, "loss": 0.0945, "step": 31371 }, { "epoch": 0.9, "grad_norm": 13.077786914096977, "learning_rate": 2.681547001863738e-07, "loss": 0.394, "step": 31372 }, { "epoch": 0.9, "grad_norm": 5.467294684637975, "learning_rate": 2.680048859243928e-07, "loss": 0.4431, "step": 31373 }, { "epoch": 0.9, "grad_norm": 5.672376710334506, "learning_rate": 2.678551123711831e-07, "loss": 0.3792, "step": 31374 }, { "epoch": 0.9, "grad_norm": 9.238178070330918, "learning_rate": 2.6770537952803375e-07, "loss": 0.4739, "step": 31375 }, { "epoch": 0.9, "grad_norm": 4.782064106763999, "learning_rate": 2.6755568739623097e-07, "loss": 0.5491, "step": 31376 }, { "epoch": 0.9, "grad_norm": 7.3334329410492805, "learning_rate": 2.6740603597706415e-07, "loss": 0.299, "step": 31377 }, { "epoch": 0.9, "grad_norm": 4.987622915871009, "learning_rate": 2.6725642527182126e-07, "loss": 0.3477, "step": 31378 }, { "epoch": 0.9, "grad_norm": 6.322059342793397, "learning_rate": 2.6710685528178794e-07, "loss": 0.6877, "step": 31379 }, { "epoch": 0.9, "grad_norm": 3.3957575261054322, "learning_rate": 2.6695732600825253e-07, "loss": 0.3563, "step": 31380 }, { "epoch": 0.9, "grad_norm": 5.54341318339453, "learning_rate": 2.668078374525002e-07, "loss": 0.349, "step": 31381 }, { "epoch": 0.9, "grad_norm": 4.4860595303861555, "learning_rate": 2.66658389615817e-07, "loss": 0.6021, "step": 31382 }, { "epoch": 0.9, "grad_norm": 3.6017498538589896, "learning_rate": 2.6650898249948875e-07, "loss": 0.3054, "step": 31383 }, { "epoch": 0.9, "grad_norm": 4.758433885508459, "learning_rate": 2.6635961610480153e-07, "loss": 0.224, "step": 31384 }, { "epoch": 0.9, "grad_norm": 3.6338890859507957, "learning_rate": 2.662102904330388e-07, "loss": 0.2011, "step": 31385 }, { "epoch": 0.9, "grad_norm": 3.2991865922072896, "learning_rate": 2.660610054854862e-07, "loss": 0.2285, "step": 31386 }, { "epoch": 0.9, "grad_norm": 5.9116318821463985, "learning_rate": 2.659117612634288e-07, "loss": 0.5347, "step": 31387 }, { "epoch": 0.9, "grad_norm": 5.135729165721691, "learning_rate": 2.6576255776814943e-07, "loss": 0.3892, "step": 31388 }, { "epoch": 0.9, "grad_norm": 7.257630377743907, "learning_rate": 2.656133950009315e-07, "loss": 0.7165, "step": 31389 }, { "epoch": 0.9, "grad_norm": 10.045109001848052, "learning_rate": 2.6546427296305855e-07, "loss": 0.4869, "step": 31390 }, { "epoch": 0.9, "grad_norm": 5.7543931432632505, "learning_rate": 2.6531519165581385e-07, "loss": 0.5738, "step": 31391 }, { "epoch": 0.9, "grad_norm": 6.411619531963769, "learning_rate": 2.651661510804793e-07, "loss": 0.3706, "step": 31392 }, { "epoch": 0.9, "grad_norm": 5.577047185123996, "learning_rate": 2.6501715123833703e-07, "loss": 0.5515, "step": 31393 }, { "epoch": 0.9, "grad_norm": 2.0406232100708195, "learning_rate": 2.6486819213067005e-07, "loss": 0.167, "step": 31394 }, { "epoch": 0.9, "grad_norm": 5.816109961654746, "learning_rate": 2.6471927375875895e-07, "loss": 0.6233, "step": 31395 }, { "epoch": 0.9, "grad_norm": 4.855790217369498, "learning_rate": 2.645703961238849e-07, "loss": 0.4219, "step": 31396 }, { "epoch": 0.9, "grad_norm": 2.8612782340533647, "learning_rate": 2.6442155922732805e-07, "loss": 0.289, "step": 31397 }, { "epoch": 0.9, "grad_norm": 4.233556548689187, "learning_rate": 2.642727630703701e-07, "loss": 0.4562, "step": 31398 }, { "epoch": 0.9, "grad_norm": 5.5992087895169185, "learning_rate": 2.6412400765429e-07, "loss": 0.4533, "step": 31399 }, { "epoch": 0.9, "grad_norm": 4.712808936336342, "learning_rate": 2.639752929803685e-07, "loss": 0.3896, "step": 31400 }, { "epoch": 0.9, "grad_norm": 6.3114380273672275, "learning_rate": 2.638266190498845e-07, "loss": 0.6648, "step": 31401 }, { "epoch": 0.9, "grad_norm": 4.0453018901475115, "learning_rate": 2.6367798586411584e-07, "loss": 0.1802, "step": 31402 }, { "epoch": 0.9, "grad_norm": 7.914114075442752, "learning_rate": 2.635293934243438e-07, "loss": 0.3054, "step": 31403 }, { "epoch": 0.9, "grad_norm": 5.196541065857996, "learning_rate": 2.63380841731844e-07, "loss": 0.5566, "step": 31404 }, { "epoch": 0.9, "grad_norm": 6.75596891624839, "learning_rate": 2.6323233078789643e-07, "loss": 0.4525, "step": 31405 }, { "epoch": 0.9, "grad_norm": 7.399711007841641, "learning_rate": 2.630838605937774e-07, "loss": 0.5862, "step": 31406 }, { "epoch": 0.9, "grad_norm": 6.30047633171737, "learning_rate": 2.629354311507648e-07, "loss": 0.4039, "step": 31407 }, { "epoch": 0.9, "grad_norm": 3.2510372670706222, "learning_rate": 2.627870424601359e-07, "loss": 0.0999, "step": 31408 }, { "epoch": 0.9, "grad_norm": 4.4467891392625205, "learning_rate": 2.6263869452316683e-07, "loss": 0.3903, "step": 31409 }, { "epoch": 0.9, "grad_norm": 4.266336346793887, "learning_rate": 2.624903873411333e-07, "loss": 0.4253, "step": 31410 }, { "epoch": 0.9, "grad_norm": 3.8365987918582403, "learning_rate": 2.6234212091531097e-07, "loss": 0.4055, "step": 31411 }, { "epoch": 0.9, "grad_norm": 6.795655871268285, "learning_rate": 2.621938952469777e-07, "loss": 0.5711, "step": 31412 }, { "epoch": 0.9, "grad_norm": 4.204737740297667, "learning_rate": 2.620457103374058e-07, "loss": 0.381, "step": 31413 }, { "epoch": 0.9, "grad_norm": 4.809715295145836, "learning_rate": 2.61897566187872e-07, "loss": 0.2924, "step": 31414 }, { "epoch": 0.9, "grad_norm": 5.561821328240792, "learning_rate": 2.6174946279964974e-07, "loss": 0.7519, "step": 31415 }, { "epoch": 0.9, "grad_norm": 6.23909183364419, "learning_rate": 2.6160140017401414e-07, "loss": 0.6058, "step": 31416 }, { "epoch": 0.9, "grad_norm": 8.09244485590604, "learning_rate": 2.6145337831223696e-07, "loss": 0.9186, "step": 31417 }, { "epoch": 0.9, "grad_norm": 5.5280618261821575, "learning_rate": 2.613053972155932e-07, "loss": 0.2076, "step": 31418 }, { "epoch": 0.9, "grad_norm": 6.622586944458643, "learning_rate": 2.6115745688535645e-07, "loss": 0.4567, "step": 31419 }, { "epoch": 0.9, "grad_norm": 5.750695145092846, "learning_rate": 2.6100955732279776e-07, "loss": 0.4541, "step": 31420 }, { "epoch": 0.9, "grad_norm": 4.404590456496754, "learning_rate": 2.608616985291912e-07, "loss": 0.476, "step": 31421 }, { "epoch": 0.9, "grad_norm": 6.23124535961227, "learning_rate": 2.6071388050580795e-07, "loss": 0.3526, "step": 31422 }, { "epoch": 0.9, "grad_norm": 4.9414877903660175, "learning_rate": 2.6056610325391973e-07, "loss": 0.3872, "step": 31423 }, { "epoch": 0.9, "grad_norm": 5.787550797043861, "learning_rate": 2.6041836677479724e-07, "loss": 0.44, "step": 31424 }, { "epoch": 0.9, "grad_norm": 5.214564169486425, "learning_rate": 2.602706710697123e-07, "loss": 0.4288, "step": 31425 }, { "epoch": 0.9, "grad_norm": 8.911454028885272, "learning_rate": 2.6012301613993483e-07, "loss": 0.6913, "step": 31426 }, { "epoch": 0.9, "grad_norm": 6.255774157679419, "learning_rate": 2.599754019867351e-07, "loss": 0.5193, "step": 31427 }, { "epoch": 0.9, "grad_norm": 5.608036657292473, "learning_rate": 2.5982782861138476e-07, "loss": 0.3998, "step": 31428 }, { "epoch": 0.9, "grad_norm": 5.199227623247675, "learning_rate": 2.596802960151512e-07, "loss": 0.5212, "step": 31429 }, { "epoch": 0.9, "grad_norm": 6.624942275461594, "learning_rate": 2.5953280419930447e-07, "loss": 0.3835, "step": 31430 }, { "epoch": 0.9, "grad_norm": 4.16027900622145, "learning_rate": 2.5938535316511306e-07, "loss": 0.4598, "step": 31431 }, { "epoch": 0.9, "grad_norm": 5.16281619593027, "learning_rate": 2.592379429138464e-07, "loss": 0.315, "step": 31432 }, { "epoch": 0.9, "grad_norm": 6.9043877623968335, "learning_rate": 2.590905734467708e-07, "loss": 0.5469, "step": 31433 }, { "epoch": 0.9, "grad_norm": 6.488041146995745, "learning_rate": 2.5894324476515687e-07, "loss": 0.577, "step": 31434 }, { "epoch": 0.9, "grad_norm": 9.54710002078322, "learning_rate": 2.587959568702697e-07, "loss": 0.4731, "step": 31435 }, { "epoch": 0.9, "grad_norm": 8.70254966710708, "learning_rate": 2.586487097633766e-07, "loss": 1.018, "step": 31436 }, { "epoch": 0.9, "grad_norm": 4.982444461066996, "learning_rate": 2.585015034457461e-07, "loss": 0.4412, "step": 31437 }, { "epoch": 0.9, "grad_norm": 6.492809132769137, "learning_rate": 2.5835433791864206e-07, "loss": 0.3797, "step": 31438 }, { "epoch": 0.9, "grad_norm": 5.529387683183368, "learning_rate": 2.5820721318333296e-07, "loss": 0.7352, "step": 31439 }, { "epoch": 0.9, "grad_norm": 6.223627717455843, "learning_rate": 2.580601292410828e-07, "loss": 0.3949, "step": 31440 }, { "epoch": 0.9, "grad_norm": 5.844314915080265, "learning_rate": 2.579130860931578e-07, "loss": 0.4616, "step": 31441 }, { "epoch": 0.9, "grad_norm": 4.203148625530793, "learning_rate": 2.57766083740823e-07, "loss": 0.3416, "step": 31442 }, { "epoch": 0.9, "grad_norm": 6.161968267067506, "learning_rate": 2.5761912218534134e-07, "loss": 0.5604, "step": 31443 }, { "epoch": 0.9, "grad_norm": 4.3910257014089495, "learning_rate": 2.574722014279801e-07, "loss": 0.3283, "step": 31444 }, { "epoch": 0.9, "grad_norm": 7.979363826643192, "learning_rate": 2.5732532147e-07, "loss": 0.7647, "step": 31445 }, { "epoch": 0.9, "grad_norm": 4.382118918197715, "learning_rate": 2.5717848231266774e-07, "loss": 0.3445, "step": 31446 }, { "epoch": 0.9, "grad_norm": 8.883863294680655, "learning_rate": 2.57031683957244e-07, "loss": 0.5747, "step": 31447 }, { "epoch": 0.9, "grad_norm": 6.559544469962214, "learning_rate": 2.568849264049933e-07, "loss": 0.7317, "step": 31448 }, { "epoch": 0.9, "grad_norm": 4.809375566131777, "learning_rate": 2.567382096571774e-07, "loss": 0.5255, "step": 31449 }, { "epoch": 0.9, "grad_norm": 5.2387516683813855, "learning_rate": 2.565915337150593e-07, "loss": 0.5295, "step": 31450 }, { "epoch": 0.9, "grad_norm": 6.9296241593933505, "learning_rate": 2.5644489857989894e-07, "loss": 0.5427, "step": 31451 }, { "epoch": 0.9, "grad_norm": 6.420622729049752, "learning_rate": 2.562983042529593e-07, "loss": 0.4212, "step": 31452 }, { "epoch": 0.9, "grad_norm": 5.115937011748553, "learning_rate": 2.5615175073550213e-07, "loss": 0.224, "step": 31453 }, { "epoch": 0.9, "grad_norm": 4.261962492045168, "learning_rate": 2.560052380287864e-07, "loss": 0.3073, "step": 31454 }, { "epoch": 0.9, "grad_norm": 4.666851366452487, "learning_rate": 2.5585876613407445e-07, "loss": 0.2377, "step": 31455 }, { "epoch": 0.9, "grad_norm": 3.6174432486964285, "learning_rate": 2.5571233505262526e-07, "loss": 0.3304, "step": 31456 }, { "epoch": 0.9, "grad_norm": 5.4965504318973215, "learning_rate": 2.55565944785699e-07, "loss": 0.6408, "step": 31457 }, { "epoch": 0.9, "grad_norm": 8.018601547139598, "learning_rate": 2.554195953345534e-07, "loss": 0.5685, "step": 31458 }, { "epoch": 0.9, "grad_norm": 3.3039559711242834, "learning_rate": 2.552732867004493e-07, "loss": 0.1388, "step": 31459 }, { "epoch": 0.9, "grad_norm": 6.107271578714134, "learning_rate": 2.551270188846461e-07, "loss": 0.3417, "step": 31460 }, { "epoch": 0.9, "grad_norm": 3.88540387939858, "learning_rate": 2.5498079188840006e-07, "loss": 0.1973, "step": 31461 }, { "epoch": 0.9, "grad_norm": 3.241002466037828, "learning_rate": 2.548346057129708e-07, "loss": 0.3549, "step": 31462 }, { "epoch": 0.9, "grad_norm": 5.95966250481734, "learning_rate": 2.546884603596139e-07, "loss": 0.6696, "step": 31463 }, { "epoch": 0.9, "grad_norm": 7.144022661213747, "learning_rate": 2.5454235582958896e-07, "loss": 0.1531, "step": 31464 }, { "epoch": 0.9, "grad_norm": 5.19238841436951, "learning_rate": 2.5439629212415105e-07, "loss": 0.2374, "step": 31465 }, { "epoch": 0.9, "grad_norm": 7.07266678366366, "learning_rate": 2.5425026924455807e-07, "loss": 0.6573, "step": 31466 }, { "epoch": 0.9, "grad_norm": 6.30074715649438, "learning_rate": 2.5410428719206516e-07, "loss": 0.548, "step": 31467 }, { "epoch": 0.9, "grad_norm": 5.422263601486986, "learning_rate": 2.5395834596792955e-07, "loss": 0.5036, "step": 31468 }, { "epoch": 0.9, "grad_norm": 5.428856128741776, "learning_rate": 2.538124455734059e-07, "loss": 0.3378, "step": 31469 }, { "epoch": 0.9, "grad_norm": 4.0473931470577424, "learning_rate": 2.536665860097487e-07, "loss": 0.4002, "step": 31470 }, { "epoch": 0.9, "grad_norm": 5.241096223812909, "learning_rate": 2.5352076727821415e-07, "loss": 0.1726, "step": 31471 }, { "epoch": 0.9, "grad_norm": 5.08080268101484, "learning_rate": 2.533749893800552e-07, "loss": 0.3954, "step": 31472 }, { "epoch": 0.9, "grad_norm": 4.035625153556775, "learning_rate": 2.532292523165275e-07, "loss": 0.1856, "step": 31473 }, { "epoch": 0.9, "grad_norm": 4.069637411003077, "learning_rate": 2.530835560888839e-07, "loss": 0.3969, "step": 31474 }, { "epoch": 0.9, "grad_norm": 6.2461157745297475, "learning_rate": 2.5293790069837845e-07, "loss": 0.7273, "step": 31475 }, { "epoch": 0.9, "grad_norm": 9.884522687379711, "learning_rate": 2.527922861462634e-07, "loss": 0.5113, "step": 31476 }, { "epoch": 0.9, "grad_norm": 2.625542039766024, "learning_rate": 2.526467124337917e-07, "loss": 0.1565, "step": 31477 }, { "epoch": 0.9, "grad_norm": 2.899716920026109, "learning_rate": 2.525011795622162e-07, "loss": 0.1795, "step": 31478 }, { "epoch": 0.9, "grad_norm": 3.202994906877273, "learning_rate": 2.5235568753278763e-07, "loss": 0.2556, "step": 31479 }, { "epoch": 0.9, "grad_norm": 6.462591879687729, "learning_rate": 2.522102363467593e-07, "loss": 0.5682, "step": 31480 }, { "epoch": 0.9, "grad_norm": 3.8550680120383616, "learning_rate": 2.520648260053815e-07, "loss": 0.2921, "step": 31481 }, { "epoch": 0.9, "grad_norm": 7.9333360217861895, "learning_rate": 2.5191945650990534e-07, "loss": 0.3734, "step": 31482 }, { "epoch": 0.9, "grad_norm": 4.372462926656201, "learning_rate": 2.517741278615821e-07, "loss": 0.2375, "step": 31483 }, { "epoch": 0.9, "grad_norm": 7.189018188684908, "learning_rate": 2.516288400616607e-07, "loss": 0.9992, "step": 31484 }, { "epoch": 0.9, "grad_norm": 8.083133406640995, "learning_rate": 2.514835931113924e-07, "loss": 0.3516, "step": 31485 }, { "epoch": 0.9, "grad_norm": 4.295390673800594, "learning_rate": 2.5133838701202506e-07, "loss": 0.3555, "step": 31486 }, { "epoch": 0.9, "grad_norm": 8.935180736607258, "learning_rate": 2.5119322176481e-07, "loss": 0.469, "step": 31487 }, { "epoch": 0.9, "grad_norm": 3.812682788406797, "learning_rate": 2.51048097370995e-07, "loss": 0.2232, "step": 31488 }, { "epoch": 0.9, "grad_norm": 3.4634919050652675, "learning_rate": 2.5090301383182805e-07, "loss": 0.2922, "step": 31489 }, { "epoch": 0.9, "grad_norm": 3.9363863596309385, "learning_rate": 2.507579711485575e-07, "loss": 0.2861, "step": 31490 }, { "epoch": 0.9, "grad_norm": 5.424673715790704, "learning_rate": 2.5061296932243184e-07, "loss": 0.1888, "step": 31491 }, { "epoch": 0.9, "grad_norm": 5.349250817651958, "learning_rate": 2.504680083546973e-07, "loss": 0.6673, "step": 31492 }, { "epoch": 0.9, "grad_norm": 7.290848835085578, "learning_rate": 2.503230882466023e-07, "loss": 0.4315, "step": 31493 }, { "epoch": 0.9, "grad_norm": 5.841230206185617, "learning_rate": 2.5017820899939315e-07, "loss": 0.5171, "step": 31494 }, { "epoch": 0.9, "grad_norm": 7.3608178022317166, "learning_rate": 2.50033370614316e-07, "loss": 0.59, "step": 31495 }, { "epoch": 0.9, "grad_norm": 2.851798144493513, "learning_rate": 2.4988857309261705e-07, "loss": 0.1006, "step": 31496 }, { "epoch": 0.9, "grad_norm": 9.23405425003604, "learning_rate": 2.497438164355409e-07, "loss": 0.5147, "step": 31497 }, { "epoch": 0.9, "grad_norm": 4.733165221295889, "learning_rate": 2.495991006443349e-07, "loss": 0.2112, "step": 31498 }, { "epoch": 0.9, "grad_norm": 6.316338986408435, "learning_rate": 2.494544257202425e-07, "loss": 0.4141, "step": 31499 }, { "epoch": 0.9, "grad_norm": 7.899709748114907, "learning_rate": 2.493097916645082e-07, "loss": 0.4371, "step": 31500 }, { "epoch": 0.9, "grad_norm": 7.330200779755429, "learning_rate": 2.4916519847837774e-07, "loss": 0.5216, "step": 31501 }, { "epoch": 0.9, "grad_norm": 6.605910978397108, "learning_rate": 2.49020646163094e-07, "loss": 0.43, "step": 31502 }, { "epoch": 0.9, "grad_norm": 6.118212948883152, "learning_rate": 2.488761347199009e-07, "loss": 0.6, "step": 31503 }, { "epoch": 0.9, "grad_norm": 5.302932269090231, "learning_rate": 2.487316641500409e-07, "loss": 0.2531, "step": 31504 }, { "epoch": 0.9, "grad_norm": 4.028232628070308, "learning_rate": 2.4858723445475796e-07, "loss": 0.255, "step": 31505 }, { "epoch": 0.9, "grad_norm": 3.240297423813372, "learning_rate": 2.484428456352933e-07, "loss": 0.3881, "step": 31506 }, { "epoch": 0.9, "grad_norm": 5.73983010409462, "learning_rate": 2.482984976928904e-07, "loss": 0.3303, "step": 31507 }, { "epoch": 0.9, "grad_norm": 7.1675471459904845, "learning_rate": 2.481541906287899e-07, "loss": 0.5214, "step": 31508 }, { "epoch": 0.9, "grad_norm": 3.1241844543574055, "learning_rate": 2.480099244442341e-07, "loss": 0.4372, "step": 31509 }, { "epoch": 0.9, "grad_norm": 1.3750267026215757, "learning_rate": 2.4786569914046434e-07, "loss": 0.094, "step": 31510 }, { "epoch": 0.9, "grad_norm": 7.7713547873400355, "learning_rate": 2.477215147187201e-07, "loss": 0.5841, "step": 31511 }, { "epoch": 0.9, "grad_norm": 3.516526416080536, "learning_rate": 2.4757737118024315e-07, "loss": 0.3374, "step": 31512 }, { "epoch": 0.9, "grad_norm": 5.000700901496268, "learning_rate": 2.4743326852627256e-07, "loss": 0.3478, "step": 31513 }, { "epoch": 0.9, "grad_norm": 3.564530112075273, "learning_rate": 2.4728920675804845e-07, "loss": 0.223, "step": 31514 }, { "epoch": 0.9, "grad_norm": 4.75422447976286, "learning_rate": 2.471451858768109e-07, "loss": 0.4089, "step": 31515 }, { "epoch": 0.9, "grad_norm": 7.324286392916808, "learning_rate": 2.4700120588379784e-07, "loss": 0.6304, "step": 31516 }, { "epoch": 0.9, "grad_norm": 3.306864551188101, "learning_rate": 2.468572667802471e-07, "loss": 0.4568, "step": 31517 }, { "epoch": 0.9, "grad_norm": 3.3021673237962266, "learning_rate": 2.4671336856739837e-07, "loss": 0.4448, "step": 31518 }, { "epoch": 0.9, "grad_norm": 2.761905280778316, "learning_rate": 2.4656951124648997e-07, "loss": 0.3664, "step": 31519 }, { "epoch": 0.9, "grad_norm": 5.774386398943333, "learning_rate": 2.464256948187582e-07, "loss": 0.5824, "step": 31520 }, { "epoch": 0.9, "grad_norm": 4.213177355973069, "learning_rate": 2.4628191928544156e-07, "loss": 0.2448, "step": 31521 }, { "epoch": 0.9, "grad_norm": 5.467410724680252, "learning_rate": 2.4613818464777616e-07, "loss": 0.6288, "step": 31522 }, { "epoch": 0.9, "grad_norm": 3.7986736354451534, "learning_rate": 2.459944909069989e-07, "loss": 0.4145, "step": 31523 }, { "epoch": 0.9, "grad_norm": 4.864719334511243, "learning_rate": 2.458508380643448e-07, "loss": 0.2738, "step": 31524 }, { "epoch": 0.9, "grad_norm": 6.129510503270599, "learning_rate": 2.457072261210508e-07, "loss": 0.5418, "step": 31525 }, { "epoch": 0.9, "grad_norm": 10.560319381981193, "learning_rate": 2.4556365507835246e-07, "loss": 0.2282, "step": 31526 }, { "epoch": 0.9, "grad_norm": 6.684463239304067, "learning_rate": 2.454201249374844e-07, "loss": 0.4723, "step": 31527 }, { "epoch": 0.9, "grad_norm": 2.1700792229821255, "learning_rate": 2.452766356996822e-07, "loss": 0.0809, "step": 31528 }, { "epoch": 0.9, "grad_norm": 4.712497408635973, "learning_rate": 2.4513318736617943e-07, "loss": 0.1948, "step": 31529 }, { "epoch": 0.9, "grad_norm": 2.697875686589569, "learning_rate": 2.449897799382106e-07, "loss": 0.298, "step": 31530 }, { "epoch": 0.9, "grad_norm": 4.52873807538595, "learning_rate": 2.448464134170081e-07, "loss": 0.3032, "step": 31531 }, { "epoch": 0.9, "grad_norm": 4.677214744551221, "learning_rate": 2.447030878038076e-07, "loss": 0.3143, "step": 31532 }, { "epoch": 0.9, "grad_norm": 4.44833724321289, "learning_rate": 2.445598030998403e-07, "loss": 0.4495, "step": 31533 }, { "epoch": 0.9, "grad_norm": 5.505092582339889, "learning_rate": 2.444165593063391e-07, "loss": 0.3683, "step": 31534 }, { "epoch": 0.9, "grad_norm": 7.025106603010441, "learning_rate": 2.442733564245381e-07, "loss": 0.6346, "step": 31535 }, { "epoch": 0.9, "grad_norm": 2.959674878017484, "learning_rate": 2.441301944556673e-07, "loss": 0.1203, "step": 31536 }, { "epoch": 0.9, "grad_norm": 8.242286775542526, "learning_rate": 2.4398707340095917e-07, "loss": 0.9007, "step": 31537 }, { "epoch": 0.9, "grad_norm": 9.819867180899163, "learning_rate": 2.438439932616438e-07, "loss": 0.8822, "step": 31538 }, { "epoch": 0.9, "grad_norm": 4.486492919552376, "learning_rate": 2.437009540389534e-07, "loss": 0.5413, "step": 31539 }, { "epoch": 0.9, "grad_norm": 5.946837381313088, "learning_rate": 2.435579557341178e-07, "loss": 0.7033, "step": 31540 }, { "epoch": 0.9, "grad_norm": 5.175017056460076, "learning_rate": 2.4341499834836745e-07, "loss": 0.7355, "step": 31541 }, { "epoch": 0.9, "grad_norm": 4.837098298163717, "learning_rate": 2.432720818829332e-07, "loss": 0.1949, "step": 31542 }, { "epoch": 0.9, "grad_norm": 6.635091940351338, "learning_rate": 2.431292063390428e-07, "loss": 0.1361, "step": 31543 }, { "epoch": 0.9, "grad_norm": 3.7508162245960213, "learning_rate": 2.429863717179265e-07, "loss": 0.4261, "step": 31544 }, { "epoch": 0.9, "grad_norm": 6.594486213551085, "learning_rate": 2.4284357802081216e-07, "loss": 0.6427, "step": 31545 }, { "epoch": 0.9, "grad_norm": 6.2112184856841335, "learning_rate": 2.427008252489294e-07, "loss": 0.4748, "step": 31546 }, { "epoch": 0.9, "grad_norm": 9.596807036288062, "learning_rate": 2.4255811340350543e-07, "loss": 0.5922, "step": 31547 }, { "epoch": 0.9, "grad_norm": 5.913546782851939, "learning_rate": 2.424154424857683e-07, "loss": 0.485, "step": 31548 }, { "epoch": 0.9, "grad_norm": 4.413090664716917, "learning_rate": 2.422728124969459e-07, "loss": 0.2361, "step": 31549 }, { "epoch": 0.9, "grad_norm": 3.0124573748093657, "learning_rate": 2.421302234382633e-07, "loss": 0.2666, "step": 31550 }, { "epoch": 0.9, "grad_norm": 10.22996418958793, "learning_rate": 2.4198767531095013e-07, "loss": 0.482, "step": 31551 }, { "epoch": 0.9, "grad_norm": 4.48407533023165, "learning_rate": 2.418451681162298e-07, "loss": 0.148, "step": 31552 }, { "epoch": 0.9, "grad_norm": 6.156164178395296, "learning_rate": 2.4170270185533086e-07, "loss": 0.1709, "step": 31553 }, { "epoch": 0.9, "grad_norm": 5.503527983641369, "learning_rate": 2.415602765294767e-07, "loss": 0.3604, "step": 31554 }, { "epoch": 0.9, "grad_norm": 3.9649681034224256, "learning_rate": 2.414178921398941e-07, "loss": 0.224, "step": 31555 }, { "epoch": 0.9, "grad_norm": 8.063905992561704, "learning_rate": 2.412755486878082e-07, "loss": 0.3668, "step": 31556 }, { "epoch": 0.9, "grad_norm": 4.963378449912362, "learning_rate": 2.41133246174442e-07, "loss": 0.4499, "step": 31557 }, { "epoch": 0.9, "grad_norm": 2.705707396981733, "learning_rate": 2.4099098460102055e-07, "loss": 0.1951, "step": 31558 }, { "epoch": 0.9, "grad_norm": 6.347279999365898, "learning_rate": 2.408487639687673e-07, "loss": 0.695, "step": 31559 }, { "epoch": 0.9, "grad_norm": 7.2876794285874995, "learning_rate": 2.407065842789069e-07, "loss": 0.7436, "step": 31560 }, { "epoch": 0.9, "grad_norm": 11.07960265813968, "learning_rate": 2.405644455326611e-07, "loss": 0.415, "step": 31561 }, { "epoch": 0.9, "grad_norm": 5.467361164850767, "learning_rate": 2.404223477312545e-07, "loss": 0.2986, "step": 31562 }, { "epoch": 0.9, "grad_norm": 7.514742095542082, "learning_rate": 2.4028029087590774e-07, "loss": 0.6895, "step": 31563 }, { "epoch": 0.9, "grad_norm": 13.28267010445414, "learning_rate": 2.401382749678444e-07, "loss": 0.5117, "step": 31564 }, { "epoch": 0.9, "grad_norm": 30.71695353939272, "learning_rate": 2.399963000082839e-07, "loss": 0.57, "step": 31565 }, { "epoch": 0.9, "grad_norm": 6.338472849196213, "learning_rate": 2.398543659984498e-07, "loss": 0.6314, "step": 31566 }, { "epoch": 0.9, "grad_norm": 3.435595487648624, "learning_rate": 2.397124729395628e-07, "loss": 0.2467, "step": 31567 }, { "epoch": 0.9, "grad_norm": 4.494001894537506, "learning_rate": 2.3957062083284297e-07, "loss": 0.411, "step": 31568 }, { "epoch": 0.9, "grad_norm": 6.301539092300643, "learning_rate": 2.394288096795111e-07, "loss": 0.3144, "step": 31569 }, { "epoch": 0.9, "grad_norm": 4.803281600077042, "learning_rate": 2.3928703948078726e-07, "loss": 0.4835, "step": 31570 }, { "epoch": 0.9, "grad_norm": 3.270322605924853, "learning_rate": 2.39145310237891e-07, "loss": 0.2274, "step": 31571 }, { "epoch": 0.9, "grad_norm": 3.104477442262089, "learning_rate": 2.3900362195204085e-07, "loss": 0.3216, "step": 31572 }, { "epoch": 0.9, "grad_norm": 4.781790609276951, "learning_rate": 2.388619746244569e-07, "loss": 0.2337, "step": 31573 }, { "epoch": 0.9, "grad_norm": 5.380119724288732, "learning_rate": 2.387203682563566e-07, "loss": 0.8969, "step": 31574 }, { "epoch": 0.9, "grad_norm": 5.2500702989503765, "learning_rate": 2.385788028489594e-07, "loss": 0.3572, "step": 31575 }, { "epoch": 0.9, "grad_norm": 4.273704004433506, "learning_rate": 2.3843727840348275e-07, "loss": 0.4993, "step": 31576 }, { "epoch": 0.9, "grad_norm": 5.340980838463427, "learning_rate": 2.3829579492114285e-07, "loss": 0.6431, "step": 31577 }, { "epoch": 0.9, "grad_norm": 7.650392401361247, "learning_rate": 2.3815435240315876e-07, "loss": 0.1799, "step": 31578 }, { "epoch": 0.9, "grad_norm": 6.1991833671997485, "learning_rate": 2.380129508507456e-07, "loss": 0.5331, "step": 31579 }, { "epoch": 0.9, "grad_norm": 3.6079380097826075, "learning_rate": 2.3787159026512186e-07, "loss": 0.2412, "step": 31580 }, { "epoch": 0.9, "grad_norm": 4.9163513917649295, "learning_rate": 2.3773027064750155e-07, "loss": 0.7044, "step": 31581 }, { "epoch": 0.9, "grad_norm": 5.681274276362591, "learning_rate": 2.37588991999102e-07, "loss": 0.4449, "step": 31582 }, { "epoch": 0.9, "grad_norm": 3.4894608813005004, "learning_rate": 2.3744775432113788e-07, "loss": 0.2724, "step": 31583 }, { "epoch": 0.9, "grad_norm": 4.7547463244492105, "learning_rate": 2.3730655761482369e-07, "loss": 0.4913, "step": 31584 }, { "epoch": 0.9, "grad_norm": 6.096255095626207, "learning_rate": 2.371654018813757e-07, "loss": 0.2286, "step": 31585 }, { "epoch": 0.9, "grad_norm": 5.593430323496797, "learning_rate": 2.3702428712200686e-07, "loss": 0.4833, "step": 31586 }, { "epoch": 0.9, "grad_norm": 4.8128896283053555, "learning_rate": 2.3688321333793173e-07, "loss": 0.1032, "step": 31587 }, { "epoch": 0.9, "grad_norm": 6.4898796156781575, "learning_rate": 2.3674218053036378e-07, "loss": 0.7819, "step": 31588 }, { "epoch": 0.9, "grad_norm": 5.556136402389195, "learning_rate": 2.366011887005165e-07, "loss": 0.6613, "step": 31589 }, { "epoch": 0.9, "grad_norm": 8.730297412940523, "learning_rate": 2.3646023784960338e-07, "loss": 0.5466, "step": 31590 }, { "epoch": 0.9, "grad_norm": 4.808364106558575, "learning_rate": 2.3631932797883507e-07, "loss": 0.3349, "step": 31591 }, { "epoch": 0.9, "grad_norm": 5.953255178875481, "learning_rate": 2.3617845908942615e-07, "loss": 0.4416, "step": 31592 }, { "epoch": 0.9, "grad_norm": 5.315906419879012, "learning_rate": 2.3603763118258626e-07, "loss": 0.5248, "step": 31593 }, { "epoch": 0.9, "grad_norm": 10.085275216080225, "learning_rate": 2.3589684425952886e-07, "loss": 0.6572, "step": 31594 }, { "epoch": 0.9, "grad_norm": 5.077416449779271, "learning_rate": 2.3575609832146406e-07, "loss": 0.4454, "step": 31595 }, { "epoch": 0.9, "grad_norm": 4.244368243795586, "learning_rate": 2.3561539336960315e-07, "loss": 0.3932, "step": 31596 }, { "epoch": 0.9, "grad_norm": 3.767933940944174, "learning_rate": 2.354747294051568e-07, "loss": 0.5143, "step": 31597 }, { "epoch": 0.9, "grad_norm": 7.083797043226293, "learning_rate": 2.3533410642933463e-07, "loss": 0.3804, "step": 31598 }, { "epoch": 0.9, "grad_norm": 8.727957891441328, "learning_rate": 2.3519352444334565e-07, "loss": 0.877, "step": 31599 }, { "epoch": 0.9, "grad_norm": 5.557122556573672, "learning_rate": 2.350529834484e-07, "loss": 0.532, "step": 31600 }, { "epoch": 0.9, "grad_norm": 7.554858358634578, "learning_rate": 2.3491248344570782e-07, "loss": 0.8486, "step": 31601 }, { "epoch": 0.91, "grad_norm": 4.202620210079984, "learning_rate": 2.3477202443647652e-07, "loss": 0.4143, "step": 31602 }, { "epoch": 0.91, "grad_norm": 5.247369834008817, "learning_rate": 2.3463160642191561e-07, "loss": 0.4732, "step": 31603 }, { "epoch": 0.91, "grad_norm": 4.008480737610136, "learning_rate": 2.3449122940323087e-07, "loss": 0.3326, "step": 31604 }, { "epoch": 0.91, "grad_norm": 5.709568762453609, "learning_rate": 2.3435089338163243e-07, "loss": 0.3022, "step": 31605 }, { "epoch": 0.91, "grad_norm": 4.67313551722638, "learning_rate": 2.3421059835832538e-07, "loss": 0.5832, "step": 31606 }, { "epoch": 0.91, "grad_norm": 3.463134860286503, "learning_rate": 2.340703443345177e-07, "loss": 0.2896, "step": 31607 }, { "epoch": 0.91, "grad_norm": 7.153171060196356, "learning_rate": 2.3393013131141728e-07, "loss": 0.8307, "step": 31608 }, { "epoch": 0.91, "grad_norm": 6.91074577132861, "learning_rate": 2.3378995929022873e-07, "loss": 0.5839, "step": 31609 }, { "epoch": 0.91, "grad_norm": 7.931830236013901, "learning_rate": 2.3364982827215832e-07, "loss": 0.611, "step": 31610 }, { "epoch": 0.91, "grad_norm": 3.697509241718042, "learning_rate": 2.3350973825841118e-07, "loss": 0.3825, "step": 31611 }, { "epoch": 0.91, "grad_norm": 9.281514892908127, "learning_rate": 2.3336968925019354e-07, "loss": 0.4759, "step": 31612 }, { "epoch": 0.91, "grad_norm": 3.297967106153697, "learning_rate": 2.3322968124870837e-07, "loss": 0.376, "step": 31613 }, { "epoch": 0.91, "grad_norm": 2.842127158775907, "learning_rate": 2.3308971425516248e-07, "loss": 0.2733, "step": 31614 }, { "epoch": 0.91, "grad_norm": 11.117523681533052, "learning_rate": 2.3294978827075821e-07, "loss": 0.6075, "step": 31615 }, { "epoch": 0.91, "grad_norm": 7.930331560108901, "learning_rate": 2.3280990329670074e-07, "loss": 0.5897, "step": 31616 }, { "epoch": 0.91, "grad_norm": 6.124216204726786, "learning_rate": 2.326700593341924e-07, "loss": 0.3057, "step": 31617 }, { "epoch": 0.91, "grad_norm": 3.891361151607963, "learning_rate": 2.3253025638443615e-07, "loss": 0.3254, "step": 31618 }, { "epoch": 0.91, "grad_norm": 6.545052213176421, "learning_rate": 2.3239049444863547e-07, "loss": 0.4511, "step": 31619 }, { "epoch": 0.91, "grad_norm": 8.147568536583112, "learning_rate": 2.322507735279922e-07, "loss": 0.5212, "step": 31620 }, { "epoch": 0.91, "grad_norm": 8.113848780699362, "learning_rate": 2.3211109362370866e-07, "loss": 0.5786, "step": 31621 }, { "epoch": 0.91, "grad_norm": 2.4507839427832274, "learning_rate": 2.3197145473698612e-07, "loss": 0.1462, "step": 31622 }, { "epoch": 0.91, "grad_norm": 7.517643393800353, "learning_rate": 2.3183185686902644e-07, "loss": 0.5758, "step": 31623 }, { "epoch": 0.91, "grad_norm": 4.627857356603997, "learning_rate": 2.3169230002103028e-07, "loss": 0.3038, "step": 31624 }, { "epoch": 0.91, "grad_norm": 11.847045399721567, "learning_rate": 2.315527841941978e-07, "loss": 0.623, "step": 31625 }, { "epoch": 0.91, "grad_norm": 3.9115173988945204, "learning_rate": 2.3141330938972972e-07, "loss": 0.1883, "step": 31626 }, { "epoch": 0.91, "grad_norm": 4.65399906615972, "learning_rate": 2.3127387560882564e-07, "loss": 0.5249, "step": 31627 }, { "epoch": 0.91, "grad_norm": 4.6472827663203296, "learning_rate": 2.3113448285268569e-07, "loss": 0.2681, "step": 31628 }, { "epoch": 0.91, "grad_norm": 5.080556809555148, "learning_rate": 2.3099513112250837e-07, "loss": 0.6369, "step": 31629 }, { "epoch": 0.91, "grad_norm": 5.33396993751765, "learning_rate": 2.3085582041949383e-07, "loss": 0.7637, "step": 31630 }, { "epoch": 0.91, "grad_norm": 5.167030808732172, "learning_rate": 2.3071655074483778e-07, "loss": 0.8017, "step": 31631 }, { "epoch": 0.91, "grad_norm": 1.6850459303666083, "learning_rate": 2.3057732209974037e-07, "loss": 0.2355, "step": 31632 }, { "epoch": 0.91, "grad_norm": 2.740678529640709, "learning_rate": 2.3043813448539953e-07, "loss": 0.3916, "step": 31633 }, { "epoch": 0.91, "grad_norm": 6.85500045002853, "learning_rate": 2.3029898790301152e-07, "loss": 0.6829, "step": 31634 }, { "epoch": 0.91, "grad_norm": 9.647593374045114, "learning_rate": 2.3015988235377484e-07, "loss": 0.3998, "step": 31635 }, { "epoch": 0.91, "grad_norm": 14.096445675880648, "learning_rate": 2.3002081783888575e-07, "loss": 0.52, "step": 31636 }, { "epoch": 0.91, "grad_norm": 5.56583833708607, "learning_rate": 2.2988179435953996e-07, "loss": 0.3668, "step": 31637 }, { "epoch": 0.91, "grad_norm": 3.3905694315351704, "learning_rate": 2.2974281191693315e-07, "loss": 0.4071, "step": 31638 }, { "epoch": 0.91, "grad_norm": 6.288024797777729, "learning_rate": 2.2960387051226218e-07, "loss": 0.5656, "step": 31639 }, { "epoch": 0.91, "grad_norm": 5.878715963230642, "learning_rate": 2.2946497014672108e-07, "loss": 0.5843, "step": 31640 }, { "epoch": 0.91, "grad_norm": 4.34786435680547, "learning_rate": 2.2932611082150557e-07, "loss": 0.2413, "step": 31641 }, { "epoch": 0.91, "grad_norm": 6.407896249033602, "learning_rate": 2.2918729253781026e-07, "loss": 0.3573, "step": 31642 }, { "epoch": 0.91, "grad_norm": 4.9946540147935465, "learning_rate": 2.2904851529683025e-07, "loss": 0.3664, "step": 31643 }, { "epoch": 0.91, "grad_norm": 4.228385264301182, "learning_rate": 2.2890977909975798e-07, "loss": 0.7862, "step": 31644 }, { "epoch": 0.91, "grad_norm": 7.767277378689552, "learning_rate": 2.2877108394778637e-07, "loss": 0.989, "step": 31645 }, { "epoch": 0.91, "grad_norm": 6.394424763831593, "learning_rate": 2.286324298421111e-07, "loss": 0.4533, "step": 31646 }, { "epoch": 0.91, "grad_norm": 5.706011775748386, "learning_rate": 2.2849381678392235e-07, "loss": 0.2901, "step": 31647 }, { "epoch": 0.91, "grad_norm": 3.238135690029032, "learning_rate": 2.2835524477441419e-07, "loss": 0.2984, "step": 31648 }, { "epoch": 0.91, "grad_norm": 6.014801048229299, "learning_rate": 2.2821671381477839e-07, "loss": 0.3915, "step": 31649 }, { "epoch": 0.91, "grad_norm": 5.506827386726659, "learning_rate": 2.2807822390620738e-07, "loss": 0.4787, "step": 31650 }, { "epoch": 0.91, "grad_norm": 8.62301516154453, "learning_rate": 2.2793977504989185e-07, "loss": 0.1664, "step": 31651 }, { "epoch": 0.91, "grad_norm": 1.3022121286271704, "learning_rate": 2.2780136724702196e-07, "loss": 0.0489, "step": 31652 }, { "epoch": 0.91, "grad_norm": 3.8780589796620615, "learning_rate": 2.2766300049879009e-07, "loss": 0.3307, "step": 31653 }, { "epoch": 0.91, "grad_norm": 5.640390132336188, "learning_rate": 2.2752467480638474e-07, "loss": 0.6027, "step": 31654 }, { "epoch": 0.91, "grad_norm": 7.604128037097652, "learning_rate": 2.2738639017099828e-07, "loss": 0.4212, "step": 31655 }, { "epoch": 0.91, "grad_norm": 8.550319054434004, "learning_rate": 2.272481465938181e-07, "loss": 0.5094, "step": 31656 }, { "epoch": 0.91, "grad_norm": 5.4402341874287545, "learning_rate": 2.2710994407603548e-07, "loss": 0.3468, "step": 31657 }, { "epoch": 0.91, "grad_norm": 4.352943625851232, "learning_rate": 2.2697178261883778e-07, "loss": 0.27, "step": 31658 }, { "epoch": 0.91, "grad_norm": 4.08425334699631, "learning_rate": 2.2683366222341407e-07, "loss": 0.527, "step": 31659 }, { "epoch": 0.91, "grad_norm": 15.923790124899082, "learning_rate": 2.2669558289095285e-07, "loss": 0.5579, "step": 31660 }, { "epoch": 0.91, "grad_norm": 3.1396040894029613, "learning_rate": 2.2655754462264145e-07, "loss": 0.5487, "step": 31661 }, { "epoch": 0.91, "grad_norm": 4.248418597915538, "learning_rate": 2.2641954741966844e-07, "loss": 0.3533, "step": 31662 }, { "epoch": 0.91, "grad_norm": 6.838261742151641, "learning_rate": 2.262815912832206e-07, "loss": 0.4641, "step": 31663 }, { "epoch": 0.91, "grad_norm": 5.595659281773594, "learning_rate": 2.2614367621448418e-07, "loss": 0.7829, "step": 31664 }, { "epoch": 0.91, "grad_norm": 4.579219244683178, "learning_rate": 2.2600580221464497e-07, "loss": 0.2315, "step": 31665 }, { "epoch": 0.91, "grad_norm": 2.164661059591888, "learning_rate": 2.258679692848903e-07, "loss": 0.1179, "step": 31666 }, { "epoch": 0.91, "grad_norm": 1.9642927182060317, "learning_rate": 2.2573017742640645e-07, "loss": 0.2614, "step": 31667 }, { "epoch": 0.91, "grad_norm": 7.408367256741974, "learning_rate": 2.255924266403775e-07, "loss": 0.8783, "step": 31668 }, { "epoch": 0.91, "grad_norm": 5.94355196457446, "learning_rate": 2.2545471692798971e-07, "loss": 0.6094, "step": 31669 }, { "epoch": 0.91, "grad_norm": 6.183848218567723, "learning_rate": 2.2531704829042655e-07, "loss": 0.3362, "step": 31670 }, { "epoch": 0.91, "grad_norm": 6.2630994467979635, "learning_rate": 2.2517942072887378e-07, "loss": 0.5544, "step": 31671 }, { "epoch": 0.91, "grad_norm": 6.554073354982818, "learning_rate": 2.250418342445132e-07, "loss": 0.2237, "step": 31672 }, { "epoch": 0.91, "grad_norm": 3.7331859020232225, "learning_rate": 2.2490428883853056e-07, "loss": 0.3394, "step": 31673 }, { "epoch": 0.91, "grad_norm": 3.5149589585828958, "learning_rate": 2.247667845121082e-07, "loss": 0.4439, "step": 31674 }, { "epoch": 0.91, "grad_norm": 8.004654662708822, "learning_rate": 2.2462932126642912e-07, "loss": 0.4859, "step": 31675 }, { "epoch": 0.91, "grad_norm": 3.8318022679967565, "learning_rate": 2.2449189910267678e-07, "loss": 0.1513, "step": 31676 }, { "epoch": 0.91, "grad_norm": 3.5765491330358214, "learning_rate": 2.2435451802203246e-07, "loss": 0.3428, "step": 31677 }, { "epoch": 0.91, "grad_norm": 6.607753870485881, "learning_rate": 2.24217178025678e-07, "loss": 0.4578, "step": 31678 }, { "epoch": 0.91, "grad_norm": 5.880543122898839, "learning_rate": 2.2407987911479469e-07, "loss": 0.3238, "step": 31679 }, { "epoch": 0.91, "grad_norm": 8.52366250472805, "learning_rate": 2.2394262129056487e-07, "loss": 0.7687, "step": 31680 }, { "epoch": 0.91, "grad_norm": 4.128096198790788, "learning_rate": 2.238054045541682e-07, "loss": 0.4598, "step": 31681 }, { "epoch": 0.91, "grad_norm": 7.249825245296697, "learning_rate": 2.236682289067854e-07, "loss": 0.4671, "step": 31682 }, { "epoch": 0.91, "grad_norm": 4.652328555244853, "learning_rate": 2.235310943495972e-07, "loss": 0.2405, "step": 31683 }, { "epoch": 0.91, "grad_norm": 4.231988072223476, "learning_rate": 2.2339400088378316e-07, "loss": 0.3156, "step": 31684 }, { "epoch": 0.91, "grad_norm": 5.946552944232805, "learning_rate": 2.2325694851052181e-07, "loss": 0.4233, "step": 31685 }, { "epoch": 0.91, "grad_norm": 9.002089787623405, "learning_rate": 2.2311993723099278e-07, "loss": 0.3049, "step": 31686 }, { "epoch": 0.91, "grad_norm": 7.294753229727322, "learning_rate": 2.2298296704637513e-07, "loss": 0.4424, "step": 31687 }, { "epoch": 0.91, "grad_norm": 2.0838039502423578, "learning_rate": 2.2284603795784676e-07, "loss": 0.1906, "step": 31688 }, { "epoch": 0.91, "grad_norm": 4.406940649569093, "learning_rate": 2.227091499665851e-07, "loss": 0.2673, "step": 31689 }, { "epoch": 0.91, "grad_norm": 3.4258568645557785, "learning_rate": 2.2257230307376977e-07, "loss": 0.2556, "step": 31690 }, { "epoch": 0.91, "grad_norm": 3.272960436888689, "learning_rate": 2.224354972805759e-07, "loss": 0.3053, "step": 31691 }, { "epoch": 0.91, "grad_norm": 10.342633512694205, "learning_rate": 2.2229873258818146e-07, "loss": 0.3742, "step": 31692 }, { "epoch": 0.91, "grad_norm": 5.8300453274569515, "learning_rate": 2.221620089977622e-07, "loss": 0.409, "step": 31693 }, { "epoch": 0.91, "grad_norm": 8.285997836306743, "learning_rate": 2.2202532651049548e-07, "loss": 0.7673, "step": 31694 }, { "epoch": 0.91, "grad_norm": 6.262227457173072, "learning_rate": 2.2188868512755647e-07, "loss": 0.5828, "step": 31695 }, { "epoch": 0.91, "grad_norm": 7.11624105715931, "learning_rate": 2.2175208485012088e-07, "loss": 0.6167, "step": 31696 }, { "epoch": 0.91, "grad_norm": 6.854059858175972, "learning_rate": 2.2161552567936395e-07, "loss": 0.5704, "step": 31697 }, { "epoch": 0.91, "grad_norm": 4.751390203521427, "learning_rate": 2.214790076164597e-07, "loss": 0.5186, "step": 31698 }, { "epoch": 0.91, "grad_norm": 6.484963411038252, "learning_rate": 2.213425306625838e-07, "loss": 0.4613, "step": 31699 }, { "epoch": 0.91, "grad_norm": 3.635023891942399, "learning_rate": 2.212060948189093e-07, "loss": 0.4286, "step": 31700 }, { "epoch": 0.91, "grad_norm": 2.902975021446694, "learning_rate": 2.2106970008661076e-07, "loss": 0.2134, "step": 31701 }, { "epoch": 0.91, "grad_norm": 6.276465952185034, "learning_rate": 2.209333464668606e-07, "loss": 0.5, "step": 31702 }, { "epoch": 0.91, "grad_norm": 9.565979667302207, "learning_rate": 2.2079703396083286e-07, "loss": 0.2403, "step": 31703 }, { "epoch": 0.91, "grad_norm": 8.510180183278235, "learning_rate": 2.206607625697005e-07, "loss": 0.2522, "step": 31704 }, { "epoch": 0.91, "grad_norm": 4.199994903515948, "learning_rate": 2.2052453229463477e-07, "loss": 0.406, "step": 31705 }, { "epoch": 0.91, "grad_norm": 12.217228533759695, "learning_rate": 2.2038834313680702e-07, "loss": 0.5821, "step": 31706 }, { "epoch": 0.91, "grad_norm": 2.923078452345896, "learning_rate": 2.2025219509739015e-07, "loss": 0.1802, "step": 31707 }, { "epoch": 0.91, "grad_norm": 5.34009641260271, "learning_rate": 2.2011608817755604e-07, "loss": 0.2005, "step": 31708 }, { "epoch": 0.91, "grad_norm": 5.4278309893442795, "learning_rate": 2.199800223784737e-07, "loss": 0.1955, "step": 31709 }, { "epoch": 0.91, "grad_norm": 6.149526535975652, "learning_rate": 2.198439977013156e-07, "loss": 0.4301, "step": 31710 }, { "epoch": 0.91, "grad_norm": 10.075343352873077, "learning_rate": 2.1970801414725074e-07, "loss": 0.4211, "step": 31711 }, { "epoch": 0.91, "grad_norm": 3.9558506792737016, "learning_rate": 2.1957207171744987e-07, "loss": 0.4345, "step": 31712 }, { "epoch": 0.91, "grad_norm": 7.348406090771666, "learning_rate": 2.194361704130804e-07, "loss": 0.7665, "step": 31713 }, { "epoch": 0.91, "grad_norm": 2.6864344236491497, "learning_rate": 2.193003102353136e-07, "loss": 0.167, "step": 31714 }, { "epoch": 0.91, "grad_norm": 2.8897732691279696, "learning_rate": 2.1916449118531802e-07, "loss": 0.3671, "step": 31715 }, { "epoch": 0.91, "grad_norm": 7.1588741971528185, "learning_rate": 2.19028713264261e-07, "loss": 0.383, "step": 31716 }, { "epoch": 0.91, "grad_norm": 2.9250377326553028, "learning_rate": 2.188929764733122e-07, "loss": 0.215, "step": 31717 }, { "epoch": 0.91, "grad_norm": 6.476003956379427, "learning_rate": 2.1875728081363845e-07, "loss": 0.5983, "step": 31718 }, { "epoch": 0.91, "grad_norm": 8.22151189891815, "learning_rate": 2.186216262864066e-07, "loss": 0.332, "step": 31719 }, { "epoch": 0.91, "grad_norm": 5.681909286635593, "learning_rate": 2.1848601289278403e-07, "loss": 0.6334, "step": 31720 }, { "epoch": 0.91, "grad_norm": 2.918107485249848, "learning_rate": 2.1835044063393817e-07, "loss": 0.2049, "step": 31721 }, { "epoch": 0.91, "grad_norm": 2.3427545595223416, "learning_rate": 2.1821490951103418e-07, "loss": 0.201, "step": 31722 }, { "epoch": 0.91, "grad_norm": 4.716908683004549, "learning_rate": 2.1807941952523892e-07, "loss": 0.269, "step": 31723 }, { "epoch": 0.91, "grad_norm": 9.313646130789635, "learning_rate": 2.179439706777181e-07, "loss": 0.7538, "step": 31724 }, { "epoch": 0.91, "grad_norm": 5.4438503934973275, "learning_rate": 2.1780856296963526e-07, "loss": 0.5271, "step": 31725 }, { "epoch": 0.91, "grad_norm": 4.3294694375473695, "learning_rate": 2.1767319640215779e-07, "loss": 0.5273, "step": 31726 }, { "epoch": 0.91, "grad_norm": 4.425926212855823, "learning_rate": 2.1753787097644807e-07, "loss": 0.2831, "step": 31727 }, { "epoch": 0.91, "grad_norm": 4.252542464522505, "learning_rate": 2.1740258669367187e-07, "loss": 0.3059, "step": 31728 }, { "epoch": 0.91, "grad_norm": 5.340390002335473, "learning_rate": 2.1726734355499156e-07, "loss": 0.4066, "step": 31729 }, { "epoch": 0.91, "grad_norm": 7.5743886959060065, "learning_rate": 2.1713214156157236e-07, "loss": 0.6214, "step": 31730 }, { "epoch": 0.91, "grad_norm": 3.5532725441929487, "learning_rate": 2.1699698071457664e-07, "loss": 0.2643, "step": 31731 }, { "epoch": 0.91, "grad_norm": 7.892465750638389, "learning_rate": 2.168618610151657e-07, "loss": 0.2608, "step": 31732 }, { "epoch": 0.91, "grad_norm": 3.1448806852866005, "learning_rate": 2.1672678246450417e-07, "loss": 0.2342, "step": 31733 }, { "epoch": 0.91, "grad_norm": 4.8078346427975385, "learning_rate": 2.165917450637528e-07, "loss": 0.4621, "step": 31734 }, { "epoch": 0.91, "grad_norm": 6.960370075976093, "learning_rate": 2.1645674881407397e-07, "loss": 0.8473, "step": 31735 }, { "epoch": 0.91, "grad_norm": 3.1925026027158014, "learning_rate": 2.1632179371662843e-07, "loss": 0.2699, "step": 31736 }, { "epoch": 0.91, "grad_norm": 4.0973471933189, "learning_rate": 2.1618687977257802e-07, "loss": 0.3602, "step": 31737 }, { "epoch": 0.91, "grad_norm": 5.551341576854867, "learning_rate": 2.1605200698308293e-07, "loss": 0.4866, "step": 31738 }, { "epoch": 0.91, "grad_norm": 3.5342961903764962, "learning_rate": 2.159171753493028e-07, "loss": 0.3628, "step": 31739 }, { "epoch": 0.91, "grad_norm": 3.557627809578313, "learning_rate": 2.157823848723989e-07, "loss": 0.2197, "step": 31740 }, { "epoch": 0.91, "grad_norm": 6.135269120087832, "learning_rate": 2.156476355535292e-07, "loss": 0.3493, "step": 31741 }, { "epoch": 0.91, "grad_norm": 11.997576389343793, "learning_rate": 2.15512927393855e-07, "loss": 0.673, "step": 31742 }, { "epoch": 0.91, "grad_norm": 5.319278229998673, "learning_rate": 2.1537826039453257e-07, "loss": 0.3505, "step": 31743 }, { "epoch": 0.91, "grad_norm": 6.354742814088964, "learning_rate": 2.1524363455672324e-07, "loss": 0.2361, "step": 31744 }, { "epoch": 0.91, "grad_norm": 14.273605604596423, "learning_rate": 2.1510904988158333e-07, "loss": 0.685, "step": 31745 }, { "epoch": 0.91, "grad_norm": 5.375397445710975, "learning_rate": 2.1497450637027128e-07, "loss": 0.4086, "step": 31746 }, { "epoch": 0.91, "grad_norm": 5.686183735211258, "learning_rate": 2.14840004023944e-07, "loss": 0.3924, "step": 31747 }, { "epoch": 0.91, "grad_norm": 6.029960375856496, "learning_rate": 2.1470554284375834e-07, "loss": 0.5246, "step": 31748 }, { "epoch": 0.91, "grad_norm": 6.870609927858824, "learning_rate": 2.145711228308728e-07, "loss": 0.564, "step": 31749 }, { "epoch": 0.91, "grad_norm": 3.806991224354478, "learning_rate": 2.1443674398644255e-07, "loss": 0.1375, "step": 31750 }, { "epoch": 0.91, "grad_norm": 7.5366148142638645, "learning_rate": 2.1430240631162392e-07, "loss": 0.2249, "step": 31751 }, { "epoch": 0.91, "grad_norm": 3.1737059208745078, "learning_rate": 2.1416810980757208e-07, "loss": 0.2155, "step": 31752 }, { "epoch": 0.91, "grad_norm": 8.98548837148593, "learning_rate": 2.140338544754428e-07, "loss": 0.3397, "step": 31753 }, { "epoch": 0.91, "grad_norm": 4.105401790809659, "learning_rate": 2.1389964031639065e-07, "loss": 0.5292, "step": 31754 }, { "epoch": 0.91, "grad_norm": 5.615906039992515, "learning_rate": 2.137654673315709e-07, "loss": 0.458, "step": 31755 }, { "epoch": 0.91, "grad_norm": 9.325402250639405, "learning_rate": 2.136313355221381e-07, "loss": 0.7428, "step": 31756 }, { "epoch": 0.91, "grad_norm": 4.560329600351577, "learning_rate": 2.134972448892453e-07, "loss": 0.7283, "step": 31757 }, { "epoch": 0.91, "grad_norm": 12.363217977690335, "learning_rate": 2.133631954340465e-07, "loss": 0.65, "step": 31758 }, { "epoch": 0.91, "grad_norm": 4.663997681644969, "learning_rate": 2.1322918715769414e-07, "loss": 0.8267, "step": 31759 }, { "epoch": 0.91, "grad_norm": 6.1422456345078995, "learning_rate": 2.130952200613423e-07, "loss": 0.4914, "step": 31760 }, { "epoch": 0.91, "grad_norm": 8.602102153594577, "learning_rate": 2.129612941461423e-07, "loss": 0.1962, "step": 31761 }, { "epoch": 0.91, "grad_norm": 5.6063054859218155, "learning_rate": 2.1282740941324763e-07, "loss": 0.2941, "step": 31762 }, { "epoch": 0.91, "grad_norm": 3.575966696319562, "learning_rate": 2.126935658638085e-07, "loss": 0.1992, "step": 31763 }, { "epoch": 0.91, "grad_norm": 4.424950921600073, "learning_rate": 2.1255976349897788e-07, "loss": 0.4877, "step": 31764 }, { "epoch": 0.91, "grad_norm": 4.94287864249615, "learning_rate": 2.1242600231990595e-07, "loss": 0.3326, "step": 31765 }, { "epoch": 0.91, "grad_norm": 4.974337381296637, "learning_rate": 2.1229228232774346e-07, "loss": 0.5716, "step": 31766 }, { "epoch": 0.91, "grad_norm": 5.999059523943229, "learning_rate": 2.121586035236417e-07, "loss": 0.3989, "step": 31767 }, { "epoch": 0.91, "grad_norm": 3.582119906294496, "learning_rate": 2.1202496590874922e-07, "loss": 0.1501, "step": 31768 }, { "epoch": 0.91, "grad_norm": 5.601374592415872, "learning_rate": 2.1189136948421672e-07, "loss": 0.2326, "step": 31769 }, { "epoch": 0.91, "grad_norm": 3.344457498016596, "learning_rate": 2.117578142511928e-07, "loss": 0.613, "step": 31770 }, { "epoch": 0.91, "grad_norm": 3.990173668009816, "learning_rate": 2.1162430021082758e-07, "loss": 0.3826, "step": 31771 }, { "epoch": 0.91, "grad_norm": 6.167333085763008, "learning_rate": 2.1149082736426907e-07, "loss": 0.6343, "step": 31772 }, { "epoch": 0.91, "grad_norm": 3.589571239932975, "learning_rate": 2.1135739571266468e-07, "loss": 0.3609, "step": 31773 }, { "epoch": 0.91, "grad_norm": 4.031032083225163, "learning_rate": 2.1122400525716347e-07, "loss": 0.2985, "step": 31774 }, { "epoch": 0.91, "grad_norm": 6.2194205071209785, "learning_rate": 2.1109065599891176e-07, "loss": 0.5984, "step": 31775 }, { "epoch": 0.91, "grad_norm": 5.325940755565463, "learning_rate": 2.1095734793905865e-07, "loss": 0.559, "step": 31776 }, { "epoch": 0.91, "grad_norm": 5.955138098895483, "learning_rate": 2.1082408107874986e-07, "loss": 0.4116, "step": 31777 }, { "epoch": 0.91, "grad_norm": 1.1091193992699475, "learning_rate": 2.1069085541913114e-07, "loss": 0.0142, "step": 31778 }, { "epoch": 0.91, "grad_norm": 2.742444789803672, "learning_rate": 2.1055767096134938e-07, "loss": 0.1951, "step": 31779 }, { "epoch": 0.91, "grad_norm": 6.1736868601117365, "learning_rate": 2.1042452770654976e-07, "loss": 0.0696, "step": 31780 }, { "epoch": 0.91, "grad_norm": 4.6171613844182975, "learning_rate": 2.1029142565587912e-07, "loss": 0.4333, "step": 31781 }, { "epoch": 0.91, "grad_norm": 6.156945532996041, "learning_rate": 2.1015836481048103e-07, "loss": 0.6086, "step": 31782 }, { "epoch": 0.91, "grad_norm": 3.3736481166605765, "learning_rate": 2.100253451715012e-07, "loss": 0.4236, "step": 31783 }, { "epoch": 0.91, "grad_norm": 9.102989274861896, "learning_rate": 2.0989236674008372e-07, "loss": 0.4723, "step": 31784 }, { "epoch": 0.91, "grad_norm": 4.568289127926936, "learning_rate": 2.0975942951737271e-07, "loss": 0.337, "step": 31785 }, { "epoch": 0.91, "grad_norm": 7.648663869124405, "learning_rate": 2.0962653350451056e-07, "loss": 0.828, "step": 31786 }, { "epoch": 0.91, "grad_norm": 10.849744915051215, "learning_rate": 2.0949367870264248e-07, "loss": 0.2072, "step": 31787 }, { "epoch": 0.91, "grad_norm": 3.7096859718842117, "learning_rate": 2.093608651129092e-07, "loss": 0.4546, "step": 31788 }, { "epoch": 0.91, "grad_norm": 3.2396594199090294, "learning_rate": 2.0922809273645538e-07, "loss": 0.2641, "step": 31789 }, { "epoch": 0.91, "grad_norm": 7.488907940529356, "learning_rate": 2.0909536157442235e-07, "loss": 0.2896, "step": 31790 }, { "epoch": 0.91, "grad_norm": 2.8188128724455632, "learning_rate": 2.0896267162795247e-07, "loss": 0.4099, "step": 31791 }, { "epoch": 0.91, "grad_norm": 6.207766393936274, "learning_rate": 2.0883002289818654e-07, "loss": 0.2833, "step": 31792 }, { "epoch": 0.91, "grad_norm": 7.373948361693209, "learning_rate": 2.0869741538626587e-07, "loss": 0.6059, "step": 31793 }, { "epoch": 0.91, "grad_norm": 3.916716477232933, "learning_rate": 2.0856484909333175e-07, "loss": 0.4258, "step": 31794 }, { "epoch": 0.91, "grad_norm": 7.085351955526217, "learning_rate": 2.0843232402052327e-07, "loss": 0.1795, "step": 31795 }, { "epoch": 0.91, "grad_norm": 3.5795909510579422, "learning_rate": 2.082998401689823e-07, "loss": 0.1851, "step": 31796 }, { "epoch": 0.91, "grad_norm": 5.7476203182083, "learning_rate": 2.0816739753984795e-07, "loss": 0.5717, "step": 31797 }, { "epoch": 0.91, "grad_norm": 7.492640635736446, "learning_rate": 2.080349961342598e-07, "loss": 0.6337, "step": 31798 }, { "epoch": 0.91, "grad_norm": 5.39593147375144, "learning_rate": 2.0790263595335647e-07, "loss": 0.6761, "step": 31799 }, { "epoch": 0.91, "grad_norm": 3.7949200059353103, "learning_rate": 2.077703169982759e-07, "loss": 0.3471, "step": 31800 }, { "epoch": 0.91, "grad_norm": 3.241331378093197, "learning_rate": 2.076380392701577e-07, "loss": 0.1413, "step": 31801 }, { "epoch": 0.91, "grad_norm": 2.6648106293645664, "learning_rate": 2.0750580277013932e-07, "loss": 0.1338, "step": 31802 }, { "epoch": 0.91, "grad_norm": 4.695654832797749, "learning_rate": 2.0737360749935876e-07, "loss": 0.3419, "step": 31803 }, { "epoch": 0.91, "grad_norm": 11.464818296128048, "learning_rate": 2.072414534589523e-07, "loss": 0.2812, "step": 31804 }, { "epoch": 0.91, "grad_norm": 7.558520034312295, "learning_rate": 2.071093406500585e-07, "loss": 0.3282, "step": 31805 }, { "epoch": 0.91, "grad_norm": 8.280697210323687, "learning_rate": 2.0697726907381255e-07, "loss": 0.3997, "step": 31806 }, { "epoch": 0.91, "grad_norm": 7.431082128324272, "learning_rate": 2.0684523873135076e-07, "loss": 0.8359, "step": 31807 }, { "epoch": 0.91, "grad_norm": 5.567812836263723, "learning_rate": 2.0671324962380944e-07, "loss": 0.2519, "step": 31808 }, { "epoch": 0.91, "grad_norm": 6.751755133019207, "learning_rate": 2.0658130175232383e-07, "loss": 0.238, "step": 31809 }, { "epoch": 0.91, "grad_norm": 3.7593773262352856, "learning_rate": 2.064493951180291e-07, "loss": 0.2099, "step": 31810 }, { "epoch": 0.91, "grad_norm": 4.9784046159222965, "learning_rate": 2.0631752972206043e-07, "loss": 0.5755, "step": 31811 }, { "epoch": 0.91, "grad_norm": 6.4070460871596895, "learning_rate": 2.0618570556555194e-07, "loss": 0.3098, "step": 31812 }, { "epoch": 0.91, "grad_norm": 4.186212284741778, "learning_rate": 2.0605392264963664e-07, "loss": 0.5043, "step": 31813 }, { "epoch": 0.91, "grad_norm": 1.270446133710784, "learning_rate": 2.0592218097544912e-07, "loss": 0.078, "step": 31814 }, { "epoch": 0.91, "grad_norm": 5.255581681518369, "learning_rate": 2.0579048054412355e-07, "loss": 0.3611, "step": 31815 }, { "epoch": 0.91, "grad_norm": 5.607645825544804, "learning_rate": 2.056588213567917e-07, "loss": 0.478, "step": 31816 }, { "epoch": 0.91, "grad_norm": 2.7043557865526164, "learning_rate": 2.0552720341458722e-07, "loss": 0.2457, "step": 31817 }, { "epoch": 0.91, "grad_norm": 2.9458794693164507, "learning_rate": 2.0539562671864188e-07, "loss": 0.1597, "step": 31818 }, { "epoch": 0.91, "grad_norm": 6.0861412082804085, "learning_rate": 2.0526409127008762e-07, "loss": 0.3765, "step": 31819 }, { "epoch": 0.91, "grad_norm": 4.022241788795175, "learning_rate": 2.0513259707005572e-07, "loss": 0.3134, "step": 31820 }, { "epoch": 0.91, "grad_norm": 15.533700116510953, "learning_rate": 2.0500114411967754e-07, "loss": 0.5229, "step": 31821 }, { "epoch": 0.91, "grad_norm": 6.9067275628727804, "learning_rate": 2.048697324200849e-07, "loss": 0.193, "step": 31822 }, { "epoch": 0.91, "grad_norm": 5.369288093148642, "learning_rate": 2.0473836197240637e-07, "loss": 0.4571, "step": 31823 }, { "epoch": 0.91, "grad_norm": 6.3806857017419425, "learning_rate": 2.0460703277777438e-07, "loss": 0.3546, "step": 31824 }, { "epoch": 0.91, "grad_norm": 6.803005566860637, "learning_rate": 2.0447574483731746e-07, "loss": 0.6658, "step": 31825 }, { "epoch": 0.91, "grad_norm": 5.270297052093313, "learning_rate": 2.0434449815216584e-07, "loss": 0.5231, "step": 31826 }, { "epoch": 0.91, "grad_norm": 8.684626920655635, "learning_rate": 2.0421329272344693e-07, "loss": 0.6772, "step": 31827 }, { "epoch": 0.91, "grad_norm": 7.202831204395172, "learning_rate": 2.0408212855229092e-07, "loss": 0.6326, "step": 31828 }, { "epoch": 0.91, "grad_norm": 7.129612717830899, "learning_rate": 2.0395100563982583e-07, "loss": 0.4063, "step": 31829 }, { "epoch": 0.91, "grad_norm": 4.133740730102223, "learning_rate": 2.0381992398717965e-07, "loss": 0.4314, "step": 31830 }, { "epoch": 0.91, "grad_norm": 7.241180053562764, "learning_rate": 2.0368888359548033e-07, "loss": 0.7731, "step": 31831 }, { "epoch": 0.91, "grad_norm": 4.513552153653146, "learning_rate": 2.0355788446585534e-07, "loss": 0.4686, "step": 31832 }, { "epoch": 0.91, "grad_norm": 3.522686000621358, "learning_rate": 2.0342692659943154e-07, "loss": 0.4504, "step": 31833 }, { "epoch": 0.91, "grad_norm": 3.1917892095548215, "learning_rate": 2.0329600999733413e-07, "loss": 0.2641, "step": 31834 }, { "epoch": 0.91, "grad_norm": 5.24426242111531, "learning_rate": 2.0316513466069164e-07, "loss": 0.2316, "step": 31835 }, { "epoch": 0.91, "grad_norm": 5.568641444046172, "learning_rate": 2.0303430059062823e-07, "loss": 0.4841, "step": 31836 }, { "epoch": 0.91, "grad_norm": 5.135538291909472, "learning_rate": 2.0290350778827018e-07, "loss": 0.7191, "step": 31837 }, { "epoch": 0.91, "grad_norm": 6.715599137403303, "learning_rate": 2.0277275625474325e-07, "loss": 0.4051, "step": 31838 }, { "epoch": 0.91, "grad_norm": 4.155375481864296, "learning_rate": 2.0264204599117098e-07, "loss": 0.2891, "step": 31839 }, { "epoch": 0.91, "grad_norm": 5.726878200431445, "learning_rate": 2.0251137699867918e-07, "loss": 0.499, "step": 31840 }, { "epoch": 0.91, "grad_norm": 5.106310890983309, "learning_rate": 2.0238074927839025e-07, "loss": 0.2374, "step": 31841 }, { "epoch": 0.91, "grad_norm": 4.6148691827949255, "learning_rate": 2.0225016283142995e-07, "loss": 0.2116, "step": 31842 }, { "epoch": 0.91, "grad_norm": 11.497644929646361, "learning_rate": 2.021196176589202e-07, "loss": 0.4664, "step": 31843 }, { "epoch": 0.91, "grad_norm": 6.356850912820909, "learning_rate": 2.0198911376198505e-07, "loss": 0.4067, "step": 31844 }, { "epoch": 0.91, "grad_norm": 5.612598608633267, "learning_rate": 2.0185865114174695e-07, "loss": 0.4499, "step": 31845 }, { "epoch": 0.91, "grad_norm": 5.831344737829862, "learning_rate": 2.0172822979932782e-07, "loss": 0.4842, "step": 31846 }, { "epoch": 0.91, "grad_norm": 4.05083645789434, "learning_rate": 2.0159784973585006e-07, "loss": 0.2402, "step": 31847 }, { "epoch": 0.91, "grad_norm": 4.376239519278325, "learning_rate": 2.0146751095243498e-07, "loss": 0.2951, "step": 31848 }, { "epoch": 0.91, "grad_norm": 10.619122214526758, "learning_rate": 2.0133721345020453e-07, "loss": 0.6418, "step": 31849 }, { "epoch": 0.91, "grad_norm": 3.485084149338069, "learning_rate": 2.0120695723027828e-07, "loss": 0.1914, "step": 31850 }, { "epoch": 0.91, "grad_norm": 5.966659579281709, "learning_rate": 2.0107674229377873e-07, "loss": 0.4957, "step": 31851 }, { "epoch": 0.91, "grad_norm": 8.208869651920368, "learning_rate": 2.0094656864182492e-07, "loss": 0.3939, "step": 31852 }, { "epoch": 0.91, "grad_norm": 6.187084800584227, "learning_rate": 2.0081643627553715e-07, "loss": 0.5202, "step": 31853 }, { "epoch": 0.91, "grad_norm": 8.79453304684185, "learning_rate": 2.0068634519603393e-07, "loss": 0.6753, "step": 31854 }, { "epoch": 0.91, "grad_norm": 3.4797801697017388, "learning_rate": 2.0055629540443488e-07, "loss": 0.2896, "step": 31855 }, { "epoch": 0.91, "grad_norm": 8.319609161880463, "learning_rate": 2.0042628690186028e-07, "loss": 0.6711, "step": 31856 }, { "epoch": 0.91, "grad_norm": 5.242365599561688, "learning_rate": 2.0029631968942643e-07, "loss": 0.4344, "step": 31857 }, { "epoch": 0.91, "grad_norm": 4.549463040243768, "learning_rate": 2.0016639376825297e-07, "loss": 0.3341, "step": 31858 }, { "epoch": 0.91, "grad_norm": 1.6189913065384178, "learning_rate": 2.0003650913945738e-07, "loss": 0.0663, "step": 31859 }, { "epoch": 0.91, "grad_norm": 3.2715296317998384, "learning_rate": 1.9990666580415653e-07, "loss": 0.0675, "step": 31860 }, { "epoch": 0.91, "grad_norm": 2.9874738811803634, "learning_rate": 1.9977686376346672e-07, "loss": 0.1783, "step": 31861 }, { "epoch": 0.91, "grad_norm": 4.012759095142494, "learning_rate": 1.99647103018506e-07, "loss": 0.5132, "step": 31862 }, { "epoch": 0.91, "grad_norm": 6.14617149155481, "learning_rate": 1.9951738357039062e-07, "loss": 0.4561, "step": 31863 }, { "epoch": 0.91, "grad_norm": 6.006306076184983, "learning_rate": 1.9938770542023533e-07, "loss": 0.392, "step": 31864 }, { "epoch": 0.91, "grad_norm": 5.503190827287376, "learning_rate": 1.9925806856915808e-07, "loss": 0.3703, "step": 31865 }, { "epoch": 0.91, "grad_norm": 3.582903207794392, "learning_rate": 1.9912847301827132e-07, "loss": 0.3307, "step": 31866 }, { "epoch": 0.91, "grad_norm": 2.628778769135692, "learning_rate": 1.9899891876869136e-07, "loss": 0.3483, "step": 31867 }, { "epoch": 0.91, "grad_norm": 8.755589443779678, "learning_rate": 1.9886940582153235e-07, "loss": 0.6798, "step": 31868 }, { "epoch": 0.91, "grad_norm": 3.680544289785614, "learning_rate": 1.987399341779095e-07, "loss": 0.4351, "step": 31869 }, { "epoch": 0.91, "grad_norm": 3.710049107270271, "learning_rate": 1.986105038389352e-07, "loss": 0.3369, "step": 31870 }, { "epoch": 0.91, "grad_norm": 7.443382981842731, "learning_rate": 1.984811148057242e-07, "loss": 0.6152, "step": 31871 }, { "epoch": 0.91, "grad_norm": 3.148490091267975, "learning_rate": 1.9835176707938885e-07, "loss": 0.34, "step": 31872 }, { "epoch": 0.91, "grad_norm": 6.155284592892081, "learning_rate": 1.9822246066104112e-07, "loss": 0.4736, "step": 31873 }, { "epoch": 0.91, "grad_norm": 6.977374705573896, "learning_rate": 1.9809319555179563e-07, "loss": 0.465, "step": 31874 }, { "epoch": 0.91, "grad_norm": 4.676033261804922, "learning_rate": 1.9796397175276206e-07, "loss": 0.6475, "step": 31875 }, { "epoch": 0.91, "grad_norm": 4.390286632817069, "learning_rate": 1.9783478926505395e-07, "loss": 0.2742, "step": 31876 }, { "epoch": 0.91, "grad_norm": 3.10090266975387, "learning_rate": 1.97705648089781e-07, "loss": 0.3183, "step": 31877 }, { "epoch": 0.91, "grad_norm": 4.5250386779143765, "learning_rate": 1.9757654822805615e-07, "loss": 0.5747, "step": 31878 }, { "epoch": 0.91, "grad_norm": 5.38296818611882, "learning_rate": 1.9744748968098858e-07, "loss": 0.2338, "step": 31879 }, { "epoch": 0.91, "grad_norm": 4.554803156488467, "learning_rate": 1.9731847244968848e-07, "loss": 0.2961, "step": 31880 }, { "epoch": 0.91, "grad_norm": 5.440798668716149, "learning_rate": 1.971894965352672e-07, "loss": 0.4754, "step": 31881 }, { "epoch": 0.91, "grad_norm": 6.038174306046101, "learning_rate": 1.9706056193883217e-07, "loss": 0.4708, "step": 31882 }, { "epoch": 0.91, "grad_norm": 7.6220334645369725, "learning_rate": 1.9693166866149471e-07, "loss": 0.421, "step": 31883 }, { "epoch": 0.91, "grad_norm": 5.4071605114091605, "learning_rate": 1.968028167043623e-07, "loss": 0.9091, "step": 31884 }, { "epoch": 0.91, "grad_norm": 4.711679061800927, "learning_rate": 1.9667400606854402e-07, "loss": 0.5773, "step": 31885 }, { "epoch": 0.91, "grad_norm": 4.787172924851029, "learning_rate": 1.9654523675514792e-07, "loss": 0.5299, "step": 31886 }, { "epoch": 0.91, "grad_norm": 6.420014420490172, "learning_rate": 1.964165087652814e-07, "loss": 0.511, "step": 31887 }, { "epoch": 0.91, "grad_norm": 3.8703634225623946, "learning_rate": 1.9628782210005305e-07, "loss": 0.1165, "step": 31888 }, { "epoch": 0.91, "grad_norm": 5.709673364586396, "learning_rate": 1.9615917676056807e-07, "loss": 0.3407, "step": 31889 }, { "epoch": 0.91, "grad_norm": 7.118384653149226, "learning_rate": 1.9603057274793502e-07, "loss": 0.9969, "step": 31890 }, { "epoch": 0.91, "grad_norm": 5.724988817428928, "learning_rate": 1.959020100632586e-07, "loss": 0.3922, "step": 31891 }, { "epoch": 0.91, "grad_norm": 4.8097156049596155, "learning_rate": 1.9577348870764734e-07, "loss": 0.2199, "step": 31892 }, { "epoch": 0.91, "grad_norm": 4.2785966544905385, "learning_rate": 1.956450086822037e-07, "loss": 0.4042, "step": 31893 }, { "epoch": 0.91, "grad_norm": 6.605088704081712, "learning_rate": 1.9551656998803515e-07, "loss": 0.5197, "step": 31894 }, { "epoch": 0.91, "grad_norm": 2.844247669646523, "learning_rate": 1.953881726262452e-07, "loss": 0.162, "step": 31895 }, { "epoch": 0.91, "grad_norm": 3.9941115548491313, "learning_rate": 1.9525981659793968e-07, "loss": 0.3307, "step": 31896 }, { "epoch": 0.91, "grad_norm": 2.1290726062977687, "learning_rate": 1.951315019042227e-07, "loss": 0.1753, "step": 31897 }, { "epoch": 0.91, "grad_norm": 3.9349471702567738, "learning_rate": 1.9500322854619837e-07, "loss": 0.5386, "step": 31898 }, { "epoch": 0.91, "grad_norm": 5.859166988495231, "learning_rate": 1.9487499652496912e-07, "loss": 0.1124, "step": 31899 }, { "epoch": 0.91, "grad_norm": 5.256720306184899, "learning_rate": 1.9474680584163795e-07, "loss": 0.2335, "step": 31900 }, { "epoch": 0.91, "grad_norm": 5.9698902259331685, "learning_rate": 1.9461865649730903e-07, "loss": 0.4472, "step": 31901 }, { "epoch": 0.91, "grad_norm": 4.131151496532369, "learning_rate": 1.9449054849308368e-07, "loss": 0.645, "step": 31902 }, { "epoch": 0.91, "grad_norm": 3.3306845015015734, "learning_rate": 1.943624818300649e-07, "loss": 0.5243, "step": 31903 }, { "epoch": 0.91, "grad_norm": 7.636995572977854, "learning_rate": 1.9423445650935403e-07, "loss": 0.3755, "step": 31904 }, { "epoch": 0.91, "grad_norm": 4.342355600392841, "learning_rate": 1.9410647253205239e-07, "loss": 0.5243, "step": 31905 }, { "epoch": 0.91, "grad_norm": 8.123614970780032, "learning_rate": 1.9397852989926135e-07, "loss": 0.4894, "step": 31906 }, { "epoch": 0.91, "grad_norm": 7.261554422720025, "learning_rate": 1.938506286120806e-07, "loss": 0.3985, "step": 31907 }, { "epoch": 0.91, "grad_norm": 4.873009030609875, "learning_rate": 1.93722768671612e-07, "loss": 0.3791, "step": 31908 }, { "epoch": 0.91, "grad_norm": 4.53356824305983, "learning_rate": 1.9359495007895357e-07, "loss": 0.1587, "step": 31909 }, { "epoch": 0.91, "grad_norm": 5.803286479619852, "learning_rate": 1.9346717283520666e-07, "loss": 0.3102, "step": 31910 }, { "epoch": 0.91, "grad_norm": 5.736172074873237, "learning_rate": 1.9333943694146984e-07, "loss": 0.3293, "step": 31911 }, { "epoch": 0.91, "grad_norm": 7.966624137200939, "learning_rate": 1.932117423988422e-07, "loss": 0.3341, "step": 31912 }, { "epoch": 0.91, "grad_norm": 7.735851663210728, "learning_rate": 1.9308408920842182e-07, "loss": 0.3749, "step": 31913 }, { "epoch": 0.91, "grad_norm": 5.37066408970388, "learning_rate": 1.9295647737130717e-07, "loss": 0.3897, "step": 31914 }, { "epoch": 0.91, "grad_norm": 3.6296954492752054, "learning_rate": 1.9282890688859634e-07, "loss": 0.2166, "step": 31915 }, { "epoch": 0.91, "grad_norm": 3.6553007670590327, "learning_rate": 1.927013777613862e-07, "loss": 0.5103, "step": 31916 }, { "epoch": 0.91, "grad_norm": 5.288261409253988, "learning_rate": 1.925738899907742e-07, "loss": 0.522, "step": 31917 }, { "epoch": 0.91, "grad_norm": 2.5859362092259808, "learning_rate": 1.9244644357785725e-07, "loss": 0.0524, "step": 31918 }, { "epoch": 0.91, "grad_norm": 6.108017129073391, "learning_rate": 1.923190385237317e-07, "loss": 0.6974, "step": 31919 }, { "epoch": 0.91, "grad_norm": 6.090112392722621, "learning_rate": 1.9219167482949385e-07, "loss": 0.4314, "step": 31920 }, { "epoch": 0.91, "grad_norm": 6.68284972891562, "learning_rate": 1.9206435249623846e-07, "loss": 0.4841, "step": 31921 }, { "epoch": 0.91, "grad_norm": 7.6295649512216395, "learning_rate": 1.919370715250618e-07, "loss": 0.9299, "step": 31922 }, { "epoch": 0.91, "grad_norm": 7.076823167470432, "learning_rate": 1.9180983191705805e-07, "loss": 0.3001, "step": 31923 }, { "epoch": 0.91, "grad_norm": 3.8943382529109263, "learning_rate": 1.9168263367332296e-07, "loss": 0.2682, "step": 31924 }, { "epoch": 0.91, "grad_norm": 5.237185597599498, "learning_rate": 1.915554767949501e-07, "loss": 0.5614, "step": 31925 }, { "epoch": 0.91, "grad_norm": 7.298315653419844, "learning_rate": 1.9142836128303365e-07, "loss": 0.7447, "step": 31926 }, { "epoch": 0.91, "grad_norm": 6.386246802015059, "learning_rate": 1.9130128713866658e-07, "loss": 0.4327, "step": 31927 }, { "epoch": 0.91, "grad_norm": 2.953069353336238, "learning_rate": 1.911742543629419e-07, "loss": 0.118, "step": 31928 }, { "epoch": 0.91, "grad_norm": 3.973163703608728, "learning_rate": 1.9104726295695374e-07, "loss": 0.118, "step": 31929 }, { "epoch": 0.91, "grad_norm": 5.741401255230788, "learning_rate": 1.9092031292179346e-07, "loss": 0.6851, "step": 31930 }, { "epoch": 0.91, "grad_norm": 3.8444221343114036, "learning_rate": 1.9079340425855407e-07, "loss": 0.2389, "step": 31931 }, { "epoch": 0.91, "grad_norm": 5.442519885242811, "learning_rate": 1.9066653696832693e-07, "loss": 0.7218, "step": 31932 }, { "epoch": 0.91, "grad_norm": 3.453614627423126, "learning_rate": 1.9053971105220393e-07, "loss": 0.3918, "step": 31933 }, { "epoch": 0.91, "grad_norm": 3.4795186022779103, "learning_rate": 1.904129265112742e-07, "loss": 0.3644, "step": 31934 }, { "epoch": 0.91, "grad_norm": 10.058090689217837, "learning_rate": 1.9028618334663073e-07, "loss": 0.7597, "step": 31935 }, { "epoch": 0.91, "grad_norm": 6.49826870382841, "learning_rate": 1.901594815593627e-07, "loss": 0.4393, "step": 31936 }, { "epoch": 0.91, "grad_norm": 3.781126918838772, "learning_rate": 1.9003282115056032e-07, "loss": 0.1842, "step": 31937 }, { "epoch": 0.91, "grad_norm": 6.815441931251539, "learning_rate": 1.8990620212131439e-07, "loss": 0.7696, "step": 31938 }, { "epoch": 0.91, "grad_norm": 5.541967278647217, "learning_rate": 1.8977962447271237e-07, "loss": 0.5841, "step": 31939 }, { "epoch": 0.91, "grad_norm": 4.724588735534574, "learning_rate": 1.896530882058445e-07, "loss": 0.4853, "step": 31940 }, { "epoch": 0.91, "grad_norm": 5.360186545983195, "learning_rate": 1.8952659332179823e-07, "loss": 0.6421, "step": 31941 }, { "epoch": 0.91, "grad_norm": 6.845307978437228, "learning_rate": 1.894001398216627e-07, "loss": 0.3048, "step": 31942 }, { "epoch": 0.91, "grad_norm": 3.963160235397678, "learning_rate": 1.8927372770652542e-07, "loss": 0.6337, "step": 31943 }, { "epoch": 0.91, "grad_norm": 7.447382412046515, "learning_rate": 1.8914735697747323e-07, "loss": 0.3576, "step": 31944 }, { "epoch": 0.91, "grad_norm": 8.697377250224049, "learning_rate": 1.8902102763559526e-07, "loss": 0.6366, "step": 31945 }, { "epoch": 0.91, "grad_norm": 5.043467525811402, "learning_rate": 1.888947396819768e-07, "loss": 0.4644, "step": 31946 }, { "epoch": 0.91, "grad_norm": 7.260649587506352, "learning_rate": 1.8876849311770418e-07, "loss": 0.6597, "step": 31947 }, { "epoch": 0.91, "grad_norm": 4.256046033725026, "learning_rate": 1.8864228794386375e-07, "loss": 0.1764, "step": 31948 }, { "epoch": 0.91, "grad_norm": 6.830820629590035, "learning_rate": 1.8851612416154185e-07, "loss": 0.8615, "step": 31949 }, { "epoch": 0.91, "grad_norm": 4.886900827505708, "learning_rate": 1.883900017718221e-07, "loss": 0.3645, "step": 31950 }, { "epoch": 0.92, "grad_norm": 7.008752868227221, "learning_rate": 1.8826392077579192e-07, "loss": 0.545, "step": 31951 }, { "epoch": 0.92, "grad_norm": 6.043114727602142, "learning_rate": 1.881378811745349e-07, "loss": 0.481, "step": 31952 }, { "epoch": 0.92, "grad_norm": 6.49054514542423, "learning_rate": 1.8801188296913465e-07, "loss": 0.2265, "step": 31953 }, { "epoch": 0.92, "grad_norm": 4.934142890868168, "learning_rate": 1.878859261606758e-07, "loss": 0.2334, "step": 31954 }, { "epoch": 0.92, "grad_norm": 2.2837164883381127, "learning_rate": 1.8776001075024143e-07, "loss": 0.262, "step": 31955 }, { "epoch": 0.92, "grad_norm": 4.745417643296948, "learning_rate": 1.8763413673891617e-07, "loss": 0.4104, "step": 31956 }, { "epoch": 0.92, "grad_norm": 4.2169992275549415, "learning_rate": 1.8750830412778088e-07, "loss": 0.4526, "step": 31957 }, { "epoch": 0.92, "grad_norm": 4.655284295010413, "learning_rate": 1.873825129179202e-07, "loss": 0.408, "step": 31958 }, { "epoch": 0.92, "grad_norm": 4.288645906488746, "learning_rate": 1.8725676311041496e-07, "loss": 0.1635, "step": 31959 }, { "epoch": 0.92, "grad_norm": 4.5356073409818665, "learning_rate": 1.8713105470634707e-07, "loss": 0.2432, "step": 31960 }, { "epoch": 0.92, "grad_norm": 4.909957456851167, "learning_rate": 1.8700538770679733e-07, "loss": 0.5056, "step": 31961 }, { "epoch": 0.92, "grad_norm": 7.719134332764233, "learning_rate": 1.868797621128482e-07, "loss": 0.5362, "step": 31962 }, { "epoch": 0.92, "grad_norm": 3.100698201707759, "learning_rate": 1.8675417792557994e-07, "loss": 0.239, "step": 31963 }, { "epoch": 0.92, "grad_norm": 5.72454840199105, "learning_rate": 1.8662863514607276e-07, "loss": 0.2931, "step": 31964 }, { "epoch": 0.92, "grad_norm": 8.500006507422256, "learning_rate": 1.8650313377540696e-07, "loss": 0.6682, "step": 31965 }, { "epoch": 0.92, "grad_norm": 4.113540510826435, "learning_rate": 1.8637767381466277e-07, "loss": 0.298, "step": 31966 }, { "epoch": 0.92, "grad_norm": 5.201405077700964, "learning_rate": 1.862522552649182e-07, "loss": 0.2472, "step": 31967 }, { "epoch": 0.92, "grad_norm": 5.385226567920397, "learning_rate": 1.861268781272524e-07, "loss": 0.3263, "step": 31968 }, { "epoch": 0.92, "grad_norm": 2.013295535302246, "learning_rate": 1.8600154240274392e-07, "loss": 0.1996, "step": 31969 }, { "epoch": 0.92, "grad_norm": 3.825228564595307, "learning_rate": 1.858762480924725e-07, "loss": 0.4175, "step": 31970 }, { "epoch": 0.92, "grad_norm": 3.533635220061857, "learning_rate": 1.8575099519751394e-07, "loss": 0.2275, "step": 31971 }, { "epoch": 0.92, "grad_norm": 2.081917886707414, "learning_rate": 1.856257837189479e-07, "loss": 0.2651, "step": 31972 }, { "epoch": 0.92, "grad_norm": 5.257542460111868, "learning_rate": 1.8550061365785022e-07, "loss": 0.3065, "step": 31973 }, { "epoch": 0.92, "grad_norm": 4.213841387160297, "learning_rate": 1.8537548501529833e-07, "loss": 0.4238, "step": 31974 }, { "epoch": 0.92, "grad_norm": 6.546784575573049, "learning_rate": 1.8525039779236754e-07, "loss": 0.5164, "step": 31975 }, { "epoch": 0.92, "grad_norm": 4.539310835907829, "learning_rate": 1.8512535199013526e-07, "loss": 0.3137, "step": 31976 }, { "epoch": 0.92, "grad_norm": 3.1849330963342073, "learning_rate": 1.8500034760967623e-07, "loss": 0.2035, "step": 31977 }, { "epoch": 0.92, "grad_norm": 10.049276394201629, "learning_rate": 1.8487538465206622e-07, "loss": 0.7788, "step": 31978 }, { "epoch": 0.92, "grad_norm": 4.146280564893298, "learning_rate": 1.8475046311838164e-07, "loss": 0.3337, "step": 31979 }, { "epoch": 0.92, "grad_norm": 3.1405880864189792, "learning_rate": 1.8462558300969545e-07, "loss": 0.2362, "step": 31980 }, { "epoch": 0.92, "grad_norm": 5.894665938368635, "learning_rate": 1.8450074432708242e-07, "loss": 0.4882, "step": 31981 }, { "epoch": 0.92, "grad_norm": 6.355973554843074, "learning_rate": 1.843759470716161e-07, "loss": 0.4136, "step": 31982 }, { "epoch": 0.92, "grad_norm": 8.042222657691026, "learning_rate": 1.8425119124437118e-07, "loss": 0.3191, "step": 31983 }, { "epoch": 0.92, "grad_norm": 4.799587986071754, "learning_rate": 1.841264768464196e-07, "loss": 0.2465, "step": 31984 }, { "epoch": 0.92, "grad_norm": 3.8308790304232647, "learning_rate": 1.8400180387883603e-07, "loss": 0.4809, "step": 31985 }, { "epoch": 0.92, "grad_norm": 3.581883284945767, "learning_rate": 1.838771723426913e-07, "loss": 0.2698, "step": 31986 }, { "epoch": 0.92, "grad_norm": 6.693877770836639, "learning_rate": 1.8375258223905846e-07, "loss": 0.4508, "step": 31987 }, { "epoch": 0.92, "grad_norm": 5.688422453135893, "learning_rate": 1.8362803356900939e-07, "loss": 0.4149, "step": 31988 }, { "epoch": 0.92, "grad_norm": 5.167981252273119, "learning_rate": 1.8350352633361436e-07, "loss": 0.3871, "step": 31989 }, { "epoch": 0.92, "grad_norm": 6.056550282060103, "learning_rate": 1.8337906053394638e-07, "loss": 0.1993, "step": 31990 }, { "epoch": 0.92, "grad_norm": 8.560425388693496, "learning_rate": 1.832546361710752e-07, "loss": 0.4545, "step": 31991 }, { "epoch": 0.92, "grad_norm": 2.969576389337989, "learning_rate": 1.8313025324607159e-07, "loss": 0.3769, "step": 31992 }, { "epoch": 0.92, "grad_norm": 5.99528739552117, "learning_rate": 1.8300591176000472e-07, "loss": 0.4, "step": 31993 }, { "epoch": 0.92, "grad_norm": 3.244462394029646, "learning_rate": 1.8288161171394536e-07, "loss": 0.2593, "step": 31994 }, { "epoch": 0.92, "grad_norm": 5.6759215073081135, "learning_rate": 1.8275735310896214e-07, "loss": 0.4777, "step": 31995 }, { "epoch": 0.92, "grad_norm": 4.29729085123706, "learning_rate": 1.826331359461242e-07, "loss": 0.5354, "step": 31996 }, { "epoch": 0.92, "grad_norm": 3.903467575933419, "learning_rate": 1.8250896022650066e-07, "loss": 0.3902, "step": 31997 }, { "epoch": 0.92, "grad_norm": 1.8212066140829348, "learning_rate": 1.8238482595115847e-07, "loss": 0.1215, "step": 31998 }, { "epoch": 0.92, "grad_norm": 7.123809848901476, "learning_rate": 1.8226073312116788e-07, "loss": 0.4873, "step": 31999 }, { "epoch": 0.92, "grad_norm": 6.902534210219779, "learning_rate": 1.8213668173759414e-07, "loss": 0.4934, "step": 32000 }, { "epoch": 0.92, "grad_norm": 5.453469383055173, "learning_rate": 1.8201267180150583e-07, "loss": 0.3248, "step": 32001 }, { "epoch": 0.92, "grad_norm": 3.4016613631804447, "learning_rate": 1.818887033139688e-07, "loss": 0.2212, "step": 32002 }, { "epoch": 0.92, "grad_norm": 3.229506269665916, "learning_rate": 1.8176477627604994e-07, "loss": 0.2173, "step": 32003 }, { "epoch": 0.92, "grad_norm": 5.060194974241373, "learning_rate": 1.8164089068881564e-07, "loss": 0.5233, "step": 32004 }, { "epoch": 0.92, "grad_norm": 10.32846516613133, "learning_rate": 1.815170465533317e-07, "loss": 0.762, "step": 32005 }, { "epoch": 0.92, "grad_norm": 3.337188575445109, "learning_rate": 1.8139324387066336e-07, "loss": 0.4545, "step": 32006 }, { "epoch": 0.92, "grad_norm": 7.082404453482758, "learning_rate": 1.8126948264187538e-07, "loss": 0.4967, "step": 32007 }, { "epoch": 0.92, "grad_norm": 5.100215155607055, "learning_rate": 1.8114576286803352e-07, "loss": 0.3908, "step": 32008 }, { "epoch": 0.92, "grad_norm": 7.705360072867397, "learning_rate": 1.8102208455020031e-07, "loss": 0.9219, "step": 32009 }, { "epoch": 0.92, "grad_norm": 5.737409739729716, "learning_rate": 1.8089844768944098e-07, "loss": 0.5467, "step": 32010 }, { "epoch": 0.92, "grad_norm": 6.722516268437038, "learning_rate": 1.8077485228681912e-07, "loss": 0.3465, "step": 32011 }, { "epoch": 0.92, "grad_norm": 7.3081164792930435, "learning_rate": 1.8065129834339834e-07, "loss": 0.3796, "step": 32012 }, { "epoch": 0.92, "grad_norm": 3.9837717833819637, "learning_rate": 1.805277858602411e-07, "loss": 0.2515, "step": 32013 }, { "epoch": 0.92, "grad_norm": 4.285500358737303, "learning_rate": 1.804043148384088e-07, "loss": 0.493, "step": 32014 }, { "epoch": 0.92, "grad_norm": 7.833560510817661, "learning_rate": 1.802808852789656e-07, "loss": 0.6369, "step": 32015 }, { "epoch": 0.92, "grad_norm": 4.266650764738689, "learning_rate": 1.8015749718297226e-07, "loss": 0.3556, "step": 32016 }, { "epoch": 0.92, "grad_norm": 6.355182099633396, "learning_rate": 1.8003415055149076e-07, "loss": 0.3683, "step": 32017 }, { "epoch": 0.92, "grad_norm": 4.910461683340757, "learning_rate": 1.7991084538558136e-07, "loss": 0.1926, "step": 32018 }, { "epoch": 0.92, "grad_norm": 4.557404049895467, "learning_rate": 1.797875816863065e-07, "loss": 0.3383, "step": 32019 }, { "epoch": 0.92, "grad_norm": 5.413488536537543, "learning_rate": 1.7966435945472537e-07, "loss": 0.5162, "step": 32020 }, { "epoch": 0.92, "grad_norm": 6.381906582850666, "learning_rate": 1.795411786918977e-07, "loss": 0.5875, "step": 32021 }, { "epoch": 0.92, "grad_norm": 4.500277908008944, "learning_rate": 1.7941803939888423e-07, "loss": 0.3875, "step": 32022 }, { "epoch": 0.92, "grad_norm": 6.477172105028912, "learning_rate": 1.792949415767431e-07, "loss": 0.4229, "step": 32023 }, { "epoch": 0.92, "grad_norm": 9.180328970065654, "learning_rate": 1.7917188522653506e-07, "loss": 0.8445, "step": 32024 }, { "epoch": 0.92, "grad_norm": 5.7480687339644465, "learning_rate": 1.7904887034931706e-07, "loss": 0.5478, "step": 32025 }, { "epoch": 0.92, "grad_norm": 6.840965436094482, "learning_rate": 1.7892589694614826e-07, "loss": 0.4255, "step": 32026 }, { "epoch": 0.92, "grad_norm": 6.072229934347005, "learning_rate": 1.788029650180867e-07, "loss": 0.278, "step": 32027 }, { "epoch": 0.92, "grad_norm": 6.69803983141041, "learning_rate": 1.786800745661893e-07, "loss": 0.5708, "step": 32028 }, { "epoch": 0.92, "grad_norm": 3.9500523563729844, "learning_rate": 1.7855722559151357e-07, "loss": 0.5171, "step": 32029 }, { "epoch": 0.92, "grad_norm": 3.487080314979215, "learning_rate": 1.784344180951164e-07, "loss": 0.3854, "step": 32030 }, { "epoch": 0.92, "grad_norm": 6.992015269625071, "learning_rate": 1.783116520780548e-07, "loss": 0.4008, "step": 32031 }, { "epoch": 0.92, "grad_norm": 5.247066859179024, "learning_rate": 1.781889275413834e-07, "loss": 0.4432, "step": 32032 }, { "epoch": 0.92, "grad_norm": 7.085929222901209, "learning_rate": 1.7806624448615973e-07, "loss": 0.6706, "step": 32033 }, { "epoch": 0.92, "grad_norm": 3.924708858553066, "learning_rate": 1.7794360291343848e-07, "loss": 0.1914, "step": 32034 }, { "epoch": 0.92, "grad_norm": 10.237120934106974, "learning_rate": 1.7782100282427438e-07, "loss": 0.4628, "step": 32035 }, { "epoch": 0.92, "grad_norm": 11.112850455732087, "learning_rate": 1.776984442197227e-07, "loss": 0.7922, "step": 32036 }, { "epoch": 0.92, "grad_norm": 5.320356192388392, "learning_rate": 1.7757592710083705e-07, "loss": 0.4311, "step": 32037 }, { "epoch": 0.92, "grad_norm": 4.64785134602871, "learning_rate": 1.7745345146867266e-07, "loss": 0.141, "step": 32038 }, { "epoch": 0.92, "grad_norm": 6.3307850086087925, "learning_rate": 1.7733101732428203e-07, "loss": 0.5416, "step": 32039 }, { "epoch": 0.92, "grad_norm": 6.869031899301918, "learning_rate": 1.772086246687199e-07, "loss": 0.562, "step": 32040 }, { "epoch": 0.92, "grad_norm": 6.249351048157065, "learning_rate": 1.7708627350303652e-07, "loss": 0.2489, "step": 32041 }, { "epoch": 0.92, "grad_norm": 3.8255331802255474, "learning_rate": 1.7696396382828718e-07, "loss": 0.3008, "step": 32042 }, { "epoch": 0.92, "grad_norm": 3.8576393394496655, "learning_rate": 1.7684169564552267e-07, "loss": 0.3914, "step": 32043 }, { "epoch": 0.92, "grad_norm": 7.655507639492156, "learning_rate": 1.7671946895579495e-07, "loss": 0.5664, "step": 32044 }, { "epoch": 0.92, "grad_norm": 4.305559904171515, "learning_rate": 1.7659728376015595e-07, "loss": 0.5438, "step": 32045 }, { "epoch": 0.92, "grad_norm": 8.274796227542137, "learning_rate": 1.7647514005965704e-07, "loss": 0.5563, "step": 32046 }, { "epoch": 0.92, "grad_norm": 4.578360854182914, "learning_rate": 1.7635303785534853e-07, "loss": 0.1355, "step": 32047 }, { "epoch": 0.92, "grad_norm": 5.726834445548747, "learning_rate": 1.7623097714828064e-07, "loss": 0.1334, "step": 32048 }, { "epoch": 0.92, "grad_norm": 8.717852255703264, "learning_rate": 1.7610895793950423e-07, "loss": 0.5497, "step": 32049 }, { "epoch": 0.92, "grad_norm": 4.9837785082887445, "learning_rate": 1.759869802300679e-07, "loss": 0.4847, "step": 32050 }, { "epoch": 0.92, "grad_norm": 8.752777094874437, "learning_rate": 1.7586504402102134e-07, "loss": 0.6725, "step": 32051 }, { "epoch": 0.92, "grad_norm": 9.163022605907244, "learning_rate": 1.7574314931341484e-07, "loss": 0.5589, "step": 32052 }, { "epoch": 0.92, "grad_norm": 11.316805025658502, "learning_rate": 1.7562129610829538e-07, "loss": 0.2074, "step": 32053 }, { "epoch": 0.92, "grad_norm": 3.7072281458572793, "learning_rate": 1.754994844067126e-07, "loss": 0.6233, "step": 32054 }, { "epoch": 0.92, "grad_norm": 3.382750565664498, "learning_rate": 1.7537771420971294e-07, "loss": 0.4304, "step": 32055 }, { "epoch": 0.92, "grad_norm": 2.909080992859228, "learning_rate": 1.7525598551834556e-07, "loss": 0.3238, "step": 32056 }, { "epoch": 0.92, "grad_norm": 5.2611349188321785, "learning_rate": 1.7513429833365568e-07, "loss": 0.5575, "step": 32057 }, { "epoch": 0.92, "grad_norm": 6.580034970883369, "learning_rate": 1.7501265265669254e-07, "loss": 0.9175, "step": 32058 }, { "epoch": 0.92, "grad_norm": 4.787203603787519, "learning_rate": 1.748910484885008e-07, "loss": 0.3091, "step": 32059 }, { "epoch": 0.92, "grad_norm": 8.52181960711436, "learning_rate": 1.7476948583012798e-07, "loss": 0.5961, "step": 32060 }, { "epoch": 0.92, "grad_norm": 3.498590526155962, "learning_rate": 1.746479646826188e-07, "loss": 0.4439, "step": 32061 }, { "epoch": 0.92, "grad_norm": 6.591628650920573, "learning_rate": 1.7452648504701853e-07, "loss": 1.1077, "step": 32062 }, { "epoch": 0.92, "grad_norm": 4.425932192269652, "learning_rate": 1.7440504692437355e-07, "loss": 0.597, "step": 32063 }, { "epoch": 0.92, "grad_norm": 7.855849550370127, "learning_rate": 1.7428365031572748e-07, "loss": 0.6305, "step": 32064 }, { "epoch": 0.92, "grad_norm": 3.7945856562879214, "learning_rate": 1.7416229522212503e-07, "loss": 0.2808, "step": 32065 }, { "epoch": 0.92, "grad_norm": 2.5942130537222705, "learning_rate": 1.7404098164461036e-07, "loss": 0.2957, "step": 32066 }, { "epoch": 0.92, "grad_norm": 6.836975228153584, "learning_rate": 1.7391970958422654e-07, "loss": 0.584, "step": 32067 }, { "epoch": 0.92, "grad_norm": 6.05029357954485, "learning_rate": 1.737984790420183e-07, "loss": 0.6072, "step": 32068 }, { "epoch": 0.92, "grad_norm": 5.9681837602224075, "learning_rate": 1.736772900190259e-07, "loss": 0.6837, "step": 32069 }, { "epoch": 0.92, "grad_norm": 10.454557448972071, "learning_rate": 1.7355614251629515e-07, "loss": 0.396, "step": 32070 }, { "epoch": 0.92, "grad_norm": 5.633120884171661, "learning_rate": 1.7343503653486527e-07, "loss": 0.3648, "step": 32071 }, { "epoch": 0.92, "grad_norm": 7.707718970757674, "learning_rate": 1.7331397207578038e-07, "loss": 0.2671, "step": 32072 }, { "epoch": 0.92, "grad_norm": 4.276806746265315, "learning_rate": 1.7319294914008134e-07, "loss": 0.2571, "step": 32073 }, { "epoch": 0.92, "grad_norm": 7.259686756706257, "learning_rate": 1.7307196772880842e-07, "loss": 0.3886, "step": 32074 }, { "epoch": 0.92, "grad_norm": 7.690441406241072, "learning_rate": 1.72951027843003e-07, "loss": 0.4632, "step": 32075 }, { "epoch": 0.92, "grad_norm": 2.0111492573085306, "learning_rate": 1.7283012948370538e-07, "loss": 0.3133, "step": 32076 }, { "epoch": 0.92, "grad_norm": 9.975351880016937, "learning_rate": 1.7270927265195691e-07, "loss": 0.7405, "step": 32077 }, { "epoch": 0.92, "grad_norm": 4.381823953536062, "learning_rate": 1.7258845734879514e-07, "loss": 0.3513, "step": 32078 }, { "epoch": 0.92, "grad_norm": 3.5277088436807515, "learning_rate": 1.7246768357526089e-07, "loss": 0.4145, "step": 32079 }, { "epoch": 0.92, "grad_norm": 3.6910553619089925, "learning_rate": 1.7234695133239331e-07, "loss": 0.3001, "step": 32080 }, { "epoch": 0.92, "grad_norm": 10.233893929160498, "learning_rate": 1.7222626062122994e-07, "loss": 0.6997, "step": 32081 }, { "epoch": 0.92, "grad_norm": 7.264452177161409, "learning_rate": 1.7210561144280935e-07, "loss": 0.4704, "step": 32082 }, { "epoch": 0.92, "grad_norm": 4.783746229797743, "learning_rate": 1.7198500379817018e-07, "loss": 0.7829, "step": 32083 }, { "epoch": 0.92, "grad_norm": 4.9972855590718535, "learning_rate": 1.7186443768834938e-07, "loss": 0.4105, "step": 32084 }, { "epoch": 0.92, "grad_norm": 3.6060547522938604, "learning_rate": 1.7174391311438442e-07, "loss": 0.6494, "step": 32085 }, { "epoch": 0.92, "grad_norm": 4.119382327545969, "learning_rate": 1.716234300773123e-07, "loss": 0.7148, "step": 32086 }, { "epoch": 0.92, "grad_norm": 4.072537156527413, "learning_rate": 1.7150298857816937e-07, "loss": 0.1929, "step": 32087 }, { "epoch": 0.92, "grad_norm": 8.667294338923293, "learning_rate": 1.7138258861799151e-07, "loss": 0.5785, "step": 32088 }, { "epoch": 0.92, "grad_norm": 11.460400527825312, "learning_rate": 1.7126223019781452e-07, "loss": 0.6583, "step": 32089 }, { "epoch": 0.92, "grad_norm": 4.615494832532402, "learning_rate": 1.711419133186748e-07, "loss": 0.7504, "step": 32090 }, { "epoch": 0.92, "grad_norm": 7.984320593721344, "learning_rate": 1.7102163798160543e-07, "loss": 0.4834, "step": 32091 }, { "epoch": 0.92, "grad_norm": 7.340952531142238, "learning_rate": 1.7090140418764277e-07, "loss": 0.3946, "step": 32092 }, { "epoch": 0.92, "grad_norm": 6.79338489242744, "learning_rate": 1.707812119378216e-07, "loss": 0.5133, "step": 32093 }, { "epoch": 0.92, "grad_norm": 3.7511418034814885, "learning_rate": 1.7066106123317438e-07, "loss": 0.1391, "step": 32094 }, { "epoch": 0.92, "grad_norm": 4.273835576849981, "learning_rate": 1.7054095207473586e-07, "loss": 0.4465, "step": 32095 }, { "epoch": 0.92, "grad_norm": 4.3685985053213345, "learning_rate": 1.7042088446353856e-07, "loss": 0.4336, "step": 32096 }, { "epoch": 0.92, "grad_norm": 3.7372372085145176, "learning_rate": 1.7030085840061606e-07, "loss": 0.2069, "step": 32097 }, { "epoch": 0.92, "grad_norm": 3.7092962872092494, "learning_rate": 1.7018087388699977e-07, "loss": 0.2636, "step": 32098 }, { "epoch": 0.92, "grad_norm": 3.881664344305903, "learning_rate": 1.7006093092372388e-07, "loss": 0.39, "step": 32099 }, { "epoch": 0.92, "grad_norm": 4.742450185221545, "learning_rate": 1.6994102951181868e-07, "loss": 0.4394, "step": 32100 }, { "epoch": 0.92, "grad_norm": 6.438766762251189, "learning_rate": 1.698211696523161e-07, "loss": 0.3886, "step": 32101 }, { "epoch": 0.92, "grad_norm": 3.0931928691713746, "learning_rate": 1.6970135134624756e-07, "loss": 0.28, "step": 32102 }, { "epoch": 0.92, "grad_norm": 4.657512032927912, "learning_rate": 1.6958157459464276e-07, "loss": 0.2013, "step": 32103 }, { "epoch": 0.92, "grad_norm": 4.778086394808328, "learning_rate": 1.6946183939853367e-07, "loss": 0.2421, "step": 32104 }, { "epoch": 0.92, "grad_norm": 7.449250691769975, "learning_rate": 1.693421457589489e-07, "loss": 0.7214, "step": 32105 }, { "epoch": 0.92, "grad_norm": 4.904736449406617, "learning_rate": 1.692224936769199e-07, "loss": 0.5223, "step": 32106 }, { "epoch": 0.92, "grad_norm": 5.795146057091967, "learning_rate": 1.6910288315347467e-07, "loss": 0.5235, "step": 32107 }, { "epoch": 0.92, "grad_norm": 5.050141071955235, "learning_rate": 1.6898331418964242e-07, "loss": 0.6131, "step": 32108 }, { "epoch": 0.92, "grad_norm": 9.666221235689514, "learning_rate": 1.6886378678645176e-07, "loss": 0.3721, "step": 32109 }, { "epoch": 0.92, "grad_norm": 4.384175760061181, "learning_rate": 1.6874430094493076e-07, "loss": 0.3873, "step": 32110 }, { "epoch": 0.92, "grad_norm": 5.826798111562657, "learning_rate": 1.6862485666610806e-07, "loss": 0.5658, "step": 32111 }, { "epoch": 0.92, "grad_norm": 7.789333235493944, "learning_rate": 1.685054539510106e-07, "loss": 0.3023, "step": 32112 }, { "epoch": 0.92, "grad_norm": 3.7994442633772447, "learning_rate": 1.6838609280066642e-07, "loss": 0.2078, "step": 32113 }, { "epoch": 0.92, "grad_norm": 4.20701113158563, "learning_rate": 1.6826677321610196e-07, "loss": 0.3995, "step": 32114 }, { "epoch": 0.92, "grad_norm": 6.173320320965679, "learning_rate": 1.6814749519834362e-07, "loss": 0.1822, "step": 32115 }, { "epoch": 0.92, "grad_norm": 4.43632841090915, "learning_rate": 1.6802825874841667e-07, "loss": 0.2207, "step": 32116 }, { "epoch": 0.92, "grad_norm": 3.9795746852069986, "learning_rate": 1.679090638673475e-07, "loss": 0.2334, "step": 32117 }, { "epoch": 0.92, "grad_norm": 4.983833881530504, "learning_rate": 1.6778991055616256e-07, "loss": 0.4993, "step": 32118 }, { "epoch": 0.92, "grad_norm": 5.661599764304636, "learning_rate": 1.6767079881588543e-07, "loss": 0.321, "step": 32119 }, { "epoch": 0.92, "grad_norm": 4.459852657920655, "learning_rate": 1.6755172864754198e-07, "loss": 0.3475, "step": 32120 }, { "epoch": 0.92, "grad_norm": 6.272933046574418, "learning_rate": 1.6743270005215583e-07, "loss": 0.3957, "step": 32121 }, { "epoch": 0.92, "grad_norm": 4.845710185490561, "learning_rate": 1.6731371303075172e-07, "loss": 0.4741, "step": 32122 }, { "epoch": 0.92, "grad_norm": 3.833233317162301, "learning_rate": 1.671947675843516e-07, "loss": 0.2633, "step": 32123 }, { "epoch": 0.92, "grad_norm": 3.666718591878706, "learning_rate": 1.6707586371398076e-07, "loss": 0.6372, "step": 32124 }, { "epoch": 0.92, "grad_norm": 8.771131095150954, "learning_rate": 1.6695700142066007e-07, "loss": 0.4575, "step": 32125 }, { "epoch": 0.92, "grad_norm": 4.753983583973236, "learning_rate": 1.668381807054137e-07, "loss": 0.4219, "step": 32126 }, { "epoch": 0.92, "grad_norm": 4.433714581830177, "learning_rate": 1.667194015692647e-07, "loss": 0.822, "step": 32127 }, { "epoch": 0.92, "grad_norm": 3.8035530733347462, "learning_rate": 1.6660066401323227e-07, "loss": 0.2532, "step": 32128 }, { "epoch": 0.92, "grad_norm": 5.956980180365107, "learning_rate": 1.6648196803834006e-07, "loss": 0.477, "step": 32129 }, { "epoch": 0.92, "grad_norm": 8.39398975498573, "learning_rate": 1.663633136456072e-07, "loss": 0.742, "step": 32130 }, { "epoch": 0.92, "grad_norm": 7.258661456598593, "learning_rate": 1.6624470083605682e-07, "loss": 0.6998, "step": 32131 }, { "epoch": 0.92, "grad_norm": 4.35171640010969, "learning_rate": 1.6612612961070695e-07, "loss": 0.4461, "step": 32132 }, { "epoch": 0.92, "grad_norm": 7.889853622954636, "learning_rate": 1.6600759997057957e-07, "loss": 0.7378, "step": 32133 }, { "epoch": 0.92, "grad_norm": 7.6452178568763856, "learning_rate": 1.6588911191669388e-07, "loss": 0.8255, "step": 32134 }, { "epoch": 0.92, "grad_norm": 7.850753454021939, "learning_rate": 1.6577066545006848e-07, "loss": 0.5255, "step": 32135 }, { "epoch": 0.92, "grad_norm": 4.944847434595755, "learning_rate": 1.6565226057172312e-07, "loss": 0.2771, "step": 32136 }, { "epoch": 0.92, "grad_norm": 6.923831430711482, "learning_rate": 1.6553389728267533e-07, "loss": 0.6601, "step": 32137 }, { "epoch": 0.92, "grad_norm": 5.638022163044433, "learning_rate": 1.6541557558394538e-07, "loss": 0.5498, "step": 32138 }, { "epoch": 0.92, "grad_norm": 8.282738605796435, "learning_rate": 1.6529729547654917e-07, "loss": 0.6108, "step": 32139 }, { "epoch": 0.92, "grad_norm": 5.544139563668499, "learning_rate": 1.6517905696150526e-07, "loss": 0.4611, "step": 32140 }, { "epoch": 0.92, "grad_norm": 1.8027617513410885, "learning_rate": 1.6506086003983124e-07, "loss": 0.2072, "step": 32141 }, { "epoch": 0.92, "grad_norm": 4.620568755922191, "learning_rate": 1.6494270471254236e-07, "loss": 0.4371, "step": 32142 }, { "epoch": 0.92, "grad_norm": 9.66390221354135, "learning_rate": 1.6482459098065673e-07, "loss": 0.6195, "step": 32143 }, { "epoch": 0.92, "grad_norm": 6.743141115717202, "learning_rate": 1.647065188451896e-07, "loss": 0.3246, "step": 32144 }, { "epoch": 0.92, "grad_norm": 6.855732187279485, "learning_rate": 1.645884883071569e-07, "loss": 0.4551, "step": 32145 }, { "epoch": 0.92, "grad_norm": 9.93608006343435, "learning_rate": 1.6447049936757386e-07, "loss": 0.5692, "step": 32146 }, { "epoch": 0.92, "grad_norm": 9.13867182282205, "learning_rate": 1.6435255202745637e-07, "loss": 1.1327, "step": 32147 }, { "epoch": 0.92, "grad_norm": 2.8211474252695448, "learning_rate": 1.642346462878186e-07, "loss": 0.3925, "step": 32148 }, { "epoch": 0.92, "grad_norm": 4.833096435375643, "learning_rate": 1.6411678214967474e-07, "loss": 0.1636, "step": 32149 }, { "epoch": 0.92, "grad_norm": 3.21354510937243, "learning_rate": 1.639989596140379e-07, "loss": 0.1502, "step": 32150 }, { "epoch": 0.92, "grad_norm": 5.394238971634401, "learning_rate": 1.6388117868192277e-07, "loss": 0.3189, "step": 32151 }, { "epoch": 0.92, "grad_norm": 8.569146187357877, "learning_rate": 1.63763439354343e-07, "loss": 0.8829, "step": 32152 }, { "epoch": 0.92, "grad_norm": 4.512308031051276, "learning_rate": 1.6364574163231006e-07, "loss": 0.3859, "step": 32153 }, { "epoch": 0.92, "grad_norm": 4.99179066019487, "learning_rate": 1.6352808551683917e-07, "loss": 0.5508, "step": 32154 }, { "epoch": 0.92, "grad_norm": 1.7273368652427388, "learning_rate": 1.6341047100893902e-07, "loss": 0.1348, "step": 32155 }, { "epoch": 0.92, "grad_norm": 3.7075584848669347, "learning_rate": 1.6329289810962378e-07, "loss": 0.3572, "step": 32156 }, { "epoch": 0.92, "grad_norm": 1.6287416974205196, "learning_rate": 1.631753668199032e-07, "loss": 0.0351, "step": 32157 }, { "epoch": 0.92, "grad_norm": 7.390210033162727, "learning_rate": 1.6305787714079036e-07, "loss": 0.4867, "step": 32158 }, { "epoch": 0.92, "grad_norm": 5.852635976522694, "learning_rate": 1.62940429073295e-07, "loss": 0.5623, "step": 32159 }, { "epoch": 0.92, "grad_norm": 6.281806807249873, "learning_rate": 1.628230226184274e-07, "loss": 0.3211, "step": 32160 }, { "epoch": 0.92, "grad_norm": 10.489522520240696, "learning_rate": 1.627056577771985e-07, "loss": 0.4798, "step": 32161 }, { "epoch": 0.92, "grad_norm": 3.889332169669806, "learning_rate": 1.6258833455061575e-07, "loss": 0.2951, "step": 32162 }, { "epoch": 0.92, "grad_norm": 3.8145498487140768, "learning_rate": 1.6247105293969113e-07, "loss": 0.4724, "step": 32163 }, { "epoch": 0.92, "grad_norm": 6.001388826168905, "learning_rate": 1.6235381294543163e-07, "loss": 0.5269, "step": 32164 }, { "epoch": 0.92, "grad_norm": 5.843280181766342, "learning_rate": 1.6223661456884753e-07, "loss": 0.4472, "step": 32165 }, { "epoch": 0.92, "grad_norm": 4.0875665980055285, "learning_rate": 1.6211945781094528e-07, "loss": 0.3045, "step": 32166 }, { "epoch": 0.92, "grad_norm": 6.822624697274296, "learning_rate": 1.6200234267273463e-07, "loss": 0.4226, "step": 32167 }, { "epoch": 0.92, "grad_norm": 4.139299749225759, "learning_rate": 1.6188526915522196e-07, "loss": 0.3846, "step": 32168 }, { "epoch": 0.92, "grad_norm": 4.540049539161323, "learning_rate": 1.617682372594137e-07, "loss": 0.3687, "step": 32169 }, { "epoch": 0.92, "grad_norm": 6.506475890651175, "learning_rate": 1.616512469863185e-07, "loss": 0.3363, "step": 32170 }, { "epoch": 0.92, "grad_norm": 5.279821620297705, "learning_rate": 1.6153429833694168e-07, "loss": 0.7893, "step": 32171 }, { "epoch": 0.92, "grad_norm": 1.504435359471383, "learning_rate": 1.6141739131228962e-07, "loss": 0.0292, "step": 32172 }, { "epoch": 0.92, "grad_norm": 7.596678118270985, "learning_rate": 1.6130052591336764e-07, "loss": 0.6033, "step": 32173 }, { "epoch": 0.92, "grad_norm": 9.969180680780388, "learning_rate": 1.6118370214118218e-07, "loss": 0.9909, "step": 32174 }, { "epoch": 0.92, "grad_norm": 7.234045231267221, "learning_rate": 1.610669199967374e-07, "loss": 0.5961, "step": 32175 }, { "epoch": 0.92, "grad_norm": 2.3538232626024844, "learning_rate": 1.6095017948103754e-07, "loss": 0.17, "step": 32176 }, { "epoch": 0.92, "grad_norm": 4.21664299801201, "learning_rate": 1.6083348059508786e-07, "loss": 0.205, "step": 32177 }, { "epoch": 0.92, "grad_norm": 7.573104705644833, "learning_rate": 1.6071682333989147e-07, "loss": 0.7751, "step": 32178 }, { "epoch": 0.92, "grad_norm": 10.559893856526609, "learning_rate": 1.6060020771645314e-07, "loss": 0.6829, "step": 32179 }, { "epoch": 0.92, "grad_norm": 11.158276328486572, "learning_rate": 1.604836337257748e-07, "loss": 0.6571, "step": 32180 }, { "epoch": 0.92, "grad_norm": 8.896931803032576, "learning_rate": 1.6036710136886013e-07, "loss": 0.6873, "step": 32181 }, { "epoch": 0.92, "grad_norm": 6.373570786878255, "learning_rate": 1.6025061064671165e-07, "loss": 0.4791, "step": 32182 }, { "epoch": 0.92, "grad_norm": 3.092661955269305, "learning_rate": 1.601341615603308e-07, "loss": 0.0822, "step": 32183 }, { "epoch": 0.92, "grad_norm": 5.281171380535666, "learning_rate": 1.6001775411072063e-07, "loss": 0.3865, "step": 32184 }, { "epoch": 0.92, "grad_norm": 4.196511635349578, "learning_rate": 1.5990138829888092e-07, "loss": 0.5024, "step": 32185 }, { "epoch": 0.92, "grad_norm": 5.885776781075748, "learning_rate": 1.597850641258142e-07, "loss": 0.1206, "step": 32186 }, { "epoch": 0.92, "grad_norm": 9.417814755851976, "learning_rate": 1.5966878159252075e-07, "loss": 0.4109, "step": 32187 }, { "epoch": 0.92, "grad_norm": 6.18491394254476, "learning_rate": 1.5955254070000092e-07, "loss": 0.326, "step": 32188 }, { "epoch": 0.92, "grad_norm": 9.425433649261105, "learning_rate": 1.594363414492539e-07, "loss": 0.6965, "step": 32189 }, { "epoch": 0.92, "grad_norm": 15.173413655353958, "learning_rate": 1.5932018384128056e-07, "loss": 0.6554, "step": 32190 }, { "epoch": 0.92, "grad_norm": 5.780335266329127, "learning_rate": 1.5920406787707898e-07, "loss": 0.438, "step": 32191 }, { "epoch": 0.92, "grad_norm": 11.607132008306444, "learning_rate": 1.5908799355764893e-07, "loss": 0.8335, "step": 32192 }, { "epoch": 0.92, "grad_norm": 6.328890982528107, "learning_rate": 1.589719608839896e-07, "loss": 0.4462, "step": 32193 }, { "epoch": 0.92, "grad_norm": 7.151156082590138, "learning_rate": 1.588559698570985e-07, "loss": 0.5426, "step": 32194 }, { "epoch": 0.92, "grad_norm": 2.6257225586206254, "learning_rate": 1.587400204779732e-07, "loss": 0.1707, "step": 32195 }, { "epoch": 0.92, "grad_norm": 2.057653034841207, "learning_rate": 1.586241127476107e-07, "loss": 0.3382, "step": 32196 }, { "epoch": 0.92, "grad_norm": 2.1490733090176435, "learning_rate": 1.585082466670096e-07, "loss": 0.1621, "step": 32197 }, { "epoch": 0.92, "grad_norm": 5.1177966970989495, "learning_rate": 1.5839242223716579e-07, "loss": 0.5599, "step": 32198 }, { "epoch": 0.92, "grad_norm": 4.225905023512238, "learning_rate": 1.5827663945907512e-07, "loss": 0.4248, "step": 32199 }, { "epoch": 0.92, "grad_norm": 6.884367612331753, "learning_rate": 1.581608983337357e-07, "loss": 0.4964, "step": 32200 }, { "epoch": 0.92, "grad_norm": 7.479641430874093, "learning_rate": 1.580451988621412e-07, "loss": 0.7149, "step": 32201 }, { "epoch": 0.92, "grad_norm": 6.648844920494024, "learning_rate": 1.57929541045288e-07, "loss": 0.219, "step": 32202 }, { "epoch": 0.92, "grad_norm": 2.4437858520375504, "learning_rate": 1.5781392488417037e-07, "loss": 0.1549, "step": 32203 }, { "epoch": 0.92, "grad_norm": 2.4259450000281895, "learning_rate": 1.576983503797841e-07, "loss": 0.1117, "step": 32204 }, { "epoch": 0.92, "grad_norm": 5.142208168639678, "learning_rate": 1.5758281753312176e-07, "loss": 0.5708, "step": 32205 }, { "epoch": 0.92, "grad_norm": 9.145868763315544, "learning_rate": 1.5746732634517925e-07, "loss": 0.6995, "step": 32206 }, { "epoch": 0.92, "grad_norm": 5.063579561957172, "learning_rate": 1.5735187681694797e-07, "loss": 0.3206, "step": 32207 }, { "epoch": 0.92, "grad_norm": 2.7737432338207455, "learning_rate": 1.5723646894942324e-07, "loss": 0.0664, "step": 32208 }, { "epoch": 0.92, "grad_norm": 4.3708187558804115, "learning_rate": 1.5712110274359648e-07, "loss": 0.4342, "step": 32209 }, { "epoch": 0.92, "grad_norm": 6.007677748896454, "learning_rate": 1.5700577820046026e-07, "loss": 0.2551, "step": 32210 }, { "epoch": 0.92, "grad_norm": 7.564608603095317, "learning_rate": 1.5689049532100764e-07, "loss": 0.4858, "step": 32211 }, { "epoch": 0.92, "grad_norm": 9.612596917624936, "learning_rate": 1.5677525410622952e-07, "loss": 0.567, "step": 32212 }, { "epoch": 0.92, "grad_norm": 6.201815334787699, "learning_rate": 1.5666005455711785e-07, "loss": 0.3735, "step": 32213 }, { "epoch": 0.92, "grad_norm": 1.7572242261076705, "learning_rate": 1.5654489667466298e-07, "loss": 0.1021, "step": 32214 }, { "epoch": 0.92, "grad_norm": 4.3974339371880955, "learning_rate": 1.564297804598558e-07, "loss": 0.3468, "step": 32215 }, { "epoch": 0.92, "grad_norm": 3.5995188312040582, "learning_rate": 1.5631470591368769e-07, "loss": 0.6805, "step": 32216 }, { "epoch": 0.92, "grad_norm": 4.427764461702856, "learning_rate": 1.5619967303714733e-07, "loss": 0.4028, "step": 32217 }, { "epoch": 0.92, "grad_norm": 3.0902087819449227, "learning_rate": 1.5608468183122505e-07, "loss": 0.2248, "step": 32218 }, { "epoch": 0.92, "grad_norm": 6.717512433715262, "learning_rate": 1.5596973229690892e-07, "loss": 0.3235, "step": 32219 }, { "epoch": 0.92, "grad_norm": 3.4305184819633103, "learning_rate": 1.5585482443518984e-07, "loss": 0.0658, "step": 32220 }, { "epoch": 0.92, "grad_norm": 3.6646115475266607, "learning_rate": 1.5573995824705534e-07, "loss": 0.2415, "step": 32221 }, { "epoch": 0.92, "grad_norm": 3.50752782197496, "learning_rate": 1.5562513373349297e-07, "loss": 0.2749, "step": 32222 }, { "epoch": 0.92, "grad_norm": 1.9154622895617035, "learning_rate": 1.5551035089549138e-07, "loss": 0.0689, "step": 32223 }, { "epoch": 0.92, "grad_norm": 3.475101841319315, "learning_rate": 1.5539560973403701e-07, "loss": 0.2487, "step": 32224 }, { "epoch": 0.92, "grad_norm": 3.5147489685452142, "learning_rate": 1.552809102501185e-07, "loss": 0.1963, "step": 32225 }, { "epoch": 0.92, "grad_norm": 5.223915011174649, "learning_rate": 1.5516625244472172e-07, "loss": 0.671, "step": 32226 }, { "epoch": 0.92, "grad_norm": 3.101957377704798, "learning_rate": 1.5505163631883313e-07, "loss": 0.3547, "step": 32227 }, { "epoch": 0.92, "grad_norm": 5.173428673766916, "learning_rate": 1.5493706187343916e-07, "loss": 0.4003, "step": 32228 }, { "epoch": 0.92, "grad_norm": 3.4881673796416357, "learning_rate": 1.5482252910952511e-07, "loss": 0.1961, "step": 32229 }, { "epoch": 0.92, "grad_norm": 10.126864603036442, "learning_rate": 1.5470803802807577e-07, "loss": 0.66, "step": 32230 }, { "epoch": 0.92, "grad_norm": 5.403022375946902, "learning_rate": 1.5459358863007644e-07, "loss": 0.2049, "step": 32231 }, { "epoch": 0.92, "grad_norm": 3.555117587386178, "learning_rate": 1.5447918091651192e-07, "loss": 0.3915, "step": 32232 }, { "epoch": 0.92, "grad_norm": 4.519272858693718, "learning_rate": 1.5436481488836641e-07, "loss": 0.451, "step": 32233 }, { "epoch": 0.92, "grad_norm": 5.1283170618777065, "learning_rate": 1.5425049054662466e-07, "loss": 0.2811, "step": 32234 }, { "epoch": 0.92, "grad_norm": 10.00359832873677, "learning_rate": 1.541362078922687e-07, "loss": 0.4487, "step": 32235 }, { "epoch": 0.92, "grad_norm": 7.885921232003789, "learning_rate": 1.5402196692628268e-07, "loss": 0.569, "step": 32236 }, { "epoch": 0.92, "grad_norm": 5.5050563977754114, "learning_rate": 1.539077676496481e-07, "loss": 0.6049, "step": 32237 }, { "epoch": 0.92, "grad_norm": 2.310839314467344, "learning_rate": 1.5379361006334914e-07, "loss": 0.1625, "step": 32238 }, { "epoch": 0.92, "grad_norm": 2.1234412085898646, "learning_rate": 1.536794941683667e-07, "loss": 0.1296, "step": 32239 }, { "epoch": 0.92, "grad_norm": 6.494688431429956, "learning_rate": 1.5356541996568275e-07, "loss": 0.4476, "step": 32240 }, { "epoch": 0.92, "grad_norm": 7.02960674157254, "learning_rate": 1.534513874562793e-07, "loss": 0.3718, "step": 32241 }, { "epoch": 0.92, "grad_norm": 6.917482002491315, "learning_rate": 1.5333739664113668e-07, "loss": 0.5712, "step": 32242 }, { "epoch": 0.92, "grad_norm": 6.959061495053572, "learning_rate": 1.5322344752123575e-07, "loss": 0.4875, "step": 32243 }, { "epoch": 0.92, "grad_norm": 4.991345974911564, "learning_rate": 1.531095400975563e-07, "loss": 0.4662, "step": 32244 }, { "epoch": 0.92, "grad_norm": 9.328026443548655, "learning_rate": 1.5299567437107977e-07, "loss": 0.7683, "step": 32245 }, { "epoch": 0.92, "grad_norm": 4.893194955405407, "learning_rate": 1.5288185034278313e-07, "loss": 0.4763, "step": 32246 }, { "epoch": 0.92, "grad_norm": 12.336559393525526, "learning_rate": 1.5276806801364842e-07, "loss": 0.8772, "step": 32247 }, { "epoch": 0.92, "grad_norm": 6.428988728148902, "learning_rate": 1.5265432738465313e-07, "loss": 0.3214, "step": 32248 }, { "epoch": 0.92, "grad_norm": 5.222640135233062, "learning_rate": 1.5254062845677487e-07, "loss": 0.1517, "step": 32249 }, { "epoch": 0.92, "grad_norm": 3.158465146322797, "learning_rate": 1.5242697123099337e-07, "loss": 0.4627, "step": 32250 }, { "epoch": 0.92, "grad_norm": 7.549519677704283, "learning_rate": 1.5231335570828565e-07, "loss": 0.4788, "step": 32251 }, { "epoch": 0.92, "grad_norm": 4.206764290644633, "learning_rate": 1.521997818896298e-07, "loss": 0.3805, "step": 32252 }, { "epoch": 0.92, "grad_norm": 5.523812466078497, "learning_rate": 1.5208624977600117e-07, "loss": 0.1735, "step": 32253 }, { "epoch": 0.92, "grad_norm": 14.417072773691196, "learning_rate": 1.5197275936837898e-07, "loss": 0.5554, "step": 32254 }, { "epoch": 0.92, "grad_norm": 5.121260511533245, "learning_rate": 1.518593106677374e-07, "loss": 0.3548, "step": 32255 }, { "epoch": 0.92, "grad_norm": 6.29802774711401, "learning_rate": 1.5174590367505403e-07, "loss": 0.4183, "step": 32256 }, { "epoch": 0.92, "grad_norm": 6.902261402133283, "learning_rate": 1.5163253839130255e-07, "loss": 0.6929, "step": 32257 }, { "epoch": 0.92, "grad_norm": 10.233153991080345, "learning_rate": 1.515192148174599e-07, "loss": 0.7259, "step": 32258 }, { "epoch": 0.92, "grad_norm": 5.315685170061285, "learning_rate": 1.514059329545009e-07, "loss": 0.5891, "step": 32259 }, { "epoch": 0.92, "grad_norm": 7.5773622423900715, "learning_rate": 1.512926928033992e-07, "loss": 0.6157, "step": 32260 }, { "epoch": 0.92, "grad_norm": 4.404957547930424, "learning_rate": 1.5117949436512956e-07, "loss": 0.6383, "step": 32261 }, { "epoch": 0.92, "grad_norm": 4.154766219456051, "learning_rate": 1.5106633764066625e-07, "loss": 0.3761, "step": 32262 }, { "epoch": 0.92, "grad_norm": 4.792198535867925, "learning_rate": 1.509532226309818e-07, "loss": 0.5811, "step": 32263 }, { "epoch": 0.92, "grad_norm": 3.744311977996507, "learning_rate": 1.5084014933704982e-07, "loss": 0.1957, "step": 32264 }, { "epoch": 0.92, "grad_norm": 8.074312206773854, "learning_rate": 1.507271177598424e-07, "loss": 0.6946, "step": 32265 }, { "epoch": 0.92, "grad_norm": 1.8197200449529678, "learning_rate": 1.5061412790033314e-07, "loss": 0.1623, "step": 32266 }, { "epoch": 0.92, "grad_norm": 4.520299267609391, "learning_rate": 1.5050117975949296e-07, "loss": 0.2914, "step": 32267 }, { "epoch": 0.92, "grad_norm": 4.784209301730941, "learning_rate": 1.5038827333829497e-07, "loss": 0.2384, "step": 32268 }, { "epoch": 0.92, "grad_norm": 7.55417262850521, "learning_rate": 1.5027540863770896e-07, "loss": 0.6425, "step": 32269 }, { "epoch": 0.92, "grad_norm": 12.574602778324802, "learning_rate": 1.501625856587069e-07, "loss": 0.5741, "step": 32270 }, { "epoch": 0.92, "grad_norm": 2.7549777929030643, "learning_rate": 1.5004980440225857e-07, "loss": 0.1768, "step": 32271 }, { "epoch": 0.92, "grad_norm": 5.265591720340053, "learning_rate": 1.499370648693349e-07, "loss": 0.2495, "step": 32272 }, { "epoch": 0.92, "grad_norm": 2.161361964884882, "learning_rate": 1.498243670609051e-07, "loss": 0.1261, "step": 32273 }, { "epoch": 0.92, "grad_norm": 7.100357339490014, "learning_rate": 1.4971171097793947e-07, "loss": 0.5563, "step": 32274 }, { "epoch": 0.92, "grad_norm": 7.399354001793661, "learning_rate": 1.495990966214067e-07, "loss": 0.4389, "step": 32275 }, { "epoch": 0.92, "grad_norm": 6.442894462378282, "learning_rate": 1.4948652399227547e-07, "loss": 0.431, "step": 32276 }, { "epoch": 0.92, "grad_norm": 7.163112831195254, "learning_rate": 1.49373993091515e-07, "loss": 0.7514, "step": 32277 }, { "epoch": 0.92, "grad_norm": 5.904680013134528, "learning_rate": 1.4926150392009232e-07, "loss": 0.5695, "step": 32278 }, { "epoch": 0.92, "grad_norm": 10.910900272249608, "learning_rate": 1.4914905647897603e-07, "loss": 1.0699, "step": 32279 }, { "epoch": 0.92, "grad_norm": 9.223283900643265, "learning_rate": 1.4903665076913266e-07, "loss": 0.5991, "step": 32280 }, { "epoch": 0.92, "grad_norm": 3.6493336069222866, "learning_rate": 1.4892428679153026e-07, "loss": 0.1293, "step": 32281 }, { "epoch": 0.92, "grad_norm": 5.899460497936869, "learning_rate": 1.4881196454713476e-07, "loss": 0.3595, "step": 32282 }, { "epoch": 0.92, "grad_norm": 10.751712041745412, "learning_rate": 1.486996840369126e-07, "loss": 0.6139, "step": 32283 }, { "epoch": 0.92, "grad_norm": 6.081636566703717, "learning_rate": 1.4858744526182967e-07, "loss": 0.4428, "step": 32284 }, { "epoch": 0.92, "grad_norm": 3.983830942146165, "learning_rate": 1.4847524822285186e-07, "loss": 0.2963, "step": 32285 }, { "epoch": 0.92, "grad_norm": 4.792630607285907, "learning_rate": 1.4836309292094396e-07, "loss": 0.4402, "step": 32286 }, { "epoch": 0.92, "grad_norm": 5.789824503839737, "learning_rate": 1.482509793570708e-07, "loss": 0.2765, "step": 32287 }, { "epoch": 0.92, "grad_norm": 6.39239074637539, "learning_rate": 1.481389075321976e-07, "loss": 0.6944, "step": 32288 }, { "epoch": 0.92, "grad_norm": 3.334822117609714, "learning_rate": 1.4802687744728817e-07, "loss": 0.3252, "step": 32289 }, { "epoch": 0.92, "grad_norm": 3.719494849229014, "learning_rate": 1.4791488910330555e-07, "loss": 0.2005, "step": 32290 }, { "epoch": 0.92, "grad_norm": 7.787429470030654, "learning_rate": 1.4780294250121398e-07, "loss": 0.2933, "step": 32291 }, { "epoch": 0.92, "grad_norm": 6.596264002851812, "learning_rate": 1.47691037641976e-07, "loss": 0.6716, "step": 32292 }, { "epoch": 0.92, "grad_norm": 5.021805422478345, "learning_rate": 1.475791745265548e-07, "loss": 0.4159, "step": 32293 }, { "epoch": 0.92, "grad_norm": 10.37488105429981, "learning_rate": 1.474673531559123e-07, "loss": 0.8207, "step": 32294 }, { "epoch": 0.92, "grad_norm": 4.102982861859841, "learning_rate": 1.4735557353101115e-07, "loss": 0.0902, "step": 32295 }, { "epoch": 0.92, "grad_norm": 5.116159117353047, "learning_rate": 1.4724383565281219e-07, "loss": 0.5495, "step": 32296 }, { "epoch": 0.92, "grad_norm": 6.507125580106557, "learning_rate": 1.4713213952227744e-07, "loss": 0.4679, "step": 32297 }, { "epoch": 0.92, "grad_norm": 5.20610927652631, "learning_rate": 1.4702048514036671e-07, "loss": 0.3345, "step": 32298 }, { "epoch": 0.92, "grad_norm": 6.051776289469232, "learning_rate": 1.469088725080414e-07, "loss": 0.7568, "step": 32299 }, { "epoch": 0.92, "grad_norm": 6.400127257631632, "learning_rate": 1.4679730162626193e-07, "loss": 0.7546, "step": 32300 }, { "epoch": 0.93, "grad_norm": 6.692976091028425, "learning_rate": 1.466857724959875e-07, "loss": 0.3456, "step": 32301 }, { "epoch": 0.93, "grad_norm": 2.9719045844144847, "learning_rate": 1.4657428511817838e-07, "loss": 0.1448, "step": 32302 }, { "epoch": 0.93, "grad_norm": 4.553882167802878, "learning_rate": 1.4646283949379226e-07, "loss": 0.3854, "step": 32303 }, { "epoch": 0.93, "grad_norm": 6.006996287843088, "learning_rate": 1.463514356237894e-07, "loss": 0.6712, "step": 32304 }, { "epoch": 0.93, "grad_norm": 4.584595570006949, "learning_rate": 1.4624007350912683e-07, "loss": 0.2689, "step": 32305 }, { "epoch": 0.93, "grad_norm": 2.2778412028595163, "learning_rate": 1.461287531507638e-07, "loss": 0.1293, "step": 32306 }, { "epoch": 0.93, "grad_norm": 6.425806999734692, "learning_rate": 1.460174745496573e-07, "loss": 0.9802, "step": 32307 }, { "epoch": 0.93, "grad_norm": 7.318473723398594, "learning_rate": 1.4590623770676492e-07, "loss": 0.7111, "step": 32308 }, { "epoch": 0.93, "grad_norm": 7.420492328546015, "learning_rate": 1.4579504262304367e-07, "loss": 0.4798, "step": 32309 }, { "epoch": 0.93, "grad_norm": 5.389842852918487, "learning_rate": 1.4568388929944999e-07, "loss": 0.5065, "step": 32310 }, { "epoch": 0.93, "grad_norm": 3.6904997020256687, "learning_rate": 1.4557277773693977e-07, "loss": 0.3822, "step": 32311 }, { "epoch": 0.93, "grad_norm": 5.925558026911714, "learning_rate": 1.4546170793646952e-07, "loss": 0.5226, "step": 32312 }, { "epoch": 0.93, "grad_norm": 10.323800663415556, "learning_rate": 1.4535067989899454e-07, "loss": 0.4803, "step": 32313 }, { "epoch": 0.93, "grad_norm": 7.776279746463052, "learning_rate": 1.4523969362546964e-07, "loss": 0.7568, "step": 32314 }, { "epoch": 0.93, "grad_norm": 7.189461614535873, "learning_rate": 1.4512874911685015e-07, "loss": 0.5822, "step": 32315 }, { "epoch": 0.93, "grad_norm": 6.19154703359261, "learning_rate": 1.4501784637409033e-07, "loss": 0.5963, "step": 32316 }, { "epoch": 0.93, "grad_norm": 8.10853910456238, "learning_rate": 1.4490698539814385e-07, "loss": 0.3826, "step": 32317 }, { "epoch": 0.93, "grad_norm": 2.708520961033199, "learning_rate": 1.447961661899655e-07, "loss": 0.2452, "step": 32318 }, { "epoch": 0.93, "grad_norm": 5.916783743582036, "learning_rate": 1.446853887505073e-07, "loss": 0.711, "step": 32319 }, { "epoch": 0.93, "grad_norm": 5.7696407568579, "learning_rate": 1.4457465308072295e-07, "loss": 0.4281, "step": 32320 }, { "epoch": 0.93, "grad_norm": 9.693565586286336, "learning_rate": 1.4446395918156498e-07, "loss": 0.5557, "step": 32321 }, { "epoch": 0.93, "grad_norm": 6.853072657909278, "learning_rate": 1.4435330705398654e-07, "loss": 0.5998, "step": 32322 }, { "epoch": 0.93, "grad_norm": 5.198941699871261, "learning_rate": 1.4424269669893798e-07, "loss": 0.3381, "step": 32323 }, { "epoch": 0.93, "grad_norm": 6.760531298621154, "learning_rate": 1.4413212811737187e-07, "loss": 1.1712, "step": 32324 }, { "epoch": 0.93, "grad_norm": 5.664119788570733, "learning_rate": 1.4402160131023913e-07, "loss": 0.3975, "step": 32325 }, { "epoch": 0.93, "grad_norm": 6.474036889636671, "learning_rate": 1.4391111627849064e-07, "loss": 0.558, "step": 32326 }, { "epoch": 0.93, "grad_norm": 6.59164091250681, "learning_rate": 1.4380067302307732e-07, "loss": 0.5424, "step": 32327 }, { "epoch": 0.93, "grad_norm": 5.000003337858993, "learning_rate": 1.436902715449484e-07, "loss": 0.2625, "step": 32328 }, { "epoch": 0.93, "grad_norm": 6.120826038575245, "learning_rate": 1.4357991184505482e-07, "loss": 0.3458, "step": 32329 }, { "epoch": 0.93, "grad_norm": 4.751958618913478, "learning_rate": 1.4346959392434467e-07, "loss": 0.3039, "step": 32330 }, { "epoch": 0.93, "grad_norm": 5.624277449930903, "learning_rate": 1.4335931778376723e-07, "loss": 0.5247, "step": 32331 }, { "epoch": 0.93, "grad_norm": 6.563560400167576, "learning_rate": 1.432490834242728e-07, "loss": 0.3294, "step": 32332 }, { "epoch": 0.93, "grad_norm": 6.629084677421982, "learning_rate": 1.431388908468073e-07, "loss": 0.7633, "step": 32333 }, { "epoch": 0.93, "grad_norm": 3.768172751608323, "learning_rate": 1.430287400523206e-07, "loss": 0.7275, "step": 32334 }, { "epoch": 0.93, "grad_norm": 7.600898235344941, "learning_rate": 1.4291863104176018e-07, "loss": 0.5774, "step": 32335 }, { "epoch": 0.93, "grad_norm": 5.777848413180317, "learning_rate": 1.42808563816072e-07, "loss": 0.3792, "step": 32336 }, { "epoch": 0.93, "grad_norm": 9.274869324619594, "learning_rate": 1.426985383762036e-07, "loss": 0.5112, "step": 32337 }, { "epoch": 0.93, "grad_norm": 5.118283336452768, "learning_rate": 1.4258855472310208e-07, "loss": 0.5341, "step": 32338 }, { "epoch": 0.93, "grad_norm": 4.845795230382205, "learning_rate": 1.424786128577127e-07, "loss": 0.1651, "step": 32339 }, { "epoch": 0.93, "grad_norm": 4.093622394931703, "learning_rate": 1.4236871278098196e-07, "loss": 0.4681, "step": 32340 }, { "epoch": 0.93, "grad_norm": 5.964249017290923, "learning_rate": 1.4225885449385523e-07, "loss": 0.4607, "step": 32341 }, { "epoch": 0.93, "grad_norm": 6.969085856328797, "learning_rate": 1.4214903799727786e-07, "loss": 0.8851, "step": 32342 }, { "epoch": 0.93, "grad_norm": 15.181858576971086, "learning_rate": 1.420392632921941e-07, "loss": 0.5543, "step": 32343 }, { "epoch": 0.93, "grad_norm": 4.391584515511999, "learning_rate": 1.419295303795476e-07, "loss": 0.3971, "step": 32344 }, { "epoch": 0.93, "grad_norm": 12.302896540048872, "learning_rate": 1.4181983926028376e-07, "loss": 0.6253, "step": 32345 }, { "epoch": 0.93, "grad_norm": 7.702609774244671, "learning_rate": 1.4171018993534515e-07, "loss": 0.4228, "step": 32346 }, { "epoch": 0.93, "grad_norm": 5.782325974421453, "learning_rate": 1.41600582405676e-07, "loss": 0.7598, "step": 32347 }, { "epoch": 0.93, "grad_norm": 5.568945204613356, "learning_rate": 1.4149101667221944e-07, "loss": 0.5488, "step": 32348 }, { "epoch": 0.93, "grad_norm": 5.317834128430943, "learning_rate": 1.4138149273591694e-07, "loss": 0.3085, "step": 32349 }, { "epoch": 0.93, "grad_norm": 4.165219402098502, "learning_rate": 1.4127201059771166e-07, "loss": 0.1685, "step": 32350 }, { "epoch": 0.93, "grad_norm": 3.612999784487019, "learning_rate": 1.4116257025854398e-07, "loss": 0.3869, "step": 32351 }, { "epoch": 0.93, "grad_norm": 7.542187405798235, "learning_rate": 1.410531717193575e-07, "loss": 0.4197, "step": 32352 }, { "epoch": 0.93, "grad_norm": 4.203583646234035, "learning_rate": 1.4094381498109156e-07, "loss": 0.355, "step": 32353 }, { "epoch": 0.93, "grad_norm": 2.8227613178783404, "learning_rate": 1.4083450004468868e-07, "loss": 0.4107, "step": 32354 }, { "epoch": 0.93, "grad_norm": 6.663325583173714, "learning_rate": 1.4072522691108757e-07, "loss": 0.4565, "step": 32355 }, { "epoch": 0.93, "grad_norm": 7.466140722231561, "learning_rate": 1.406159955812292e-07, "loss": 0.3796, "step": 32356 }, { "epoch": 0.93, "grad_norm": 4.584496604799188, "learning_rate": 1.4050680605605327e-07, "loss": 0.3387, "step": 32357 }, { "epoch": 0.93, "grad_norm": 4.130262428240517, "learning_rate": 1.4039765833649854e-07, "loss": 0.2977, "step": 32358 }, { "epoch": 0.93, "grad_norm": 3.5180221861919887, "learning_rate": 1.4028855242350537e-07, "loss": 0.3923, "step": 32359 }, { "epoch": 0.93, "grad_norm": 9.166398501086066, "learning_rate": 1.401794883180102e-07, "loss": 0.7261, "step": 32360 }, { "epoch": 0.93, "grad_norm": 1.6625290462996776, "learning_rate": 1.4007046602095343e-07, "loss": 0.2167, "step": 32361 }, { "epoch": 0.93, "grad_norm": 2.7656532113743078, "learning_rate": 1.3996148553327204e-07, "loss": 0.5009, "step": 32362 }, { "epoch": 0.93, "grad_norm": 4.831921464417603, "learning_rate": 1.398525468559031e-07, "loss": 0.5302, "step": 32363 }, { "epoch": 0.93, "grad_norm": 6.268080137599862, "learning_rate": 1.3974364998978474e-07, "loss": 0.3716, "step": 32364 }, { "epoch": 0.93, "grad_norm": 7.238263446685092, "learning_rate": 1.3963479493585286e-07, "loss": 0.496, "step": 32365 }, { "epoch": 0.93, "grad_norm": 4.031254613119636, "learning_rate": 1.3952598169504505e-07, "loss": 0.2738, "step": 32366 }, { "epoch": 0.93, "grad_norm": 11.142454877166758, "learning_rate": 1.3941721026829613e-07, "loss": 0.4357, "step": 32367 }, { "epoch": 0.93, "grad_norm": 4.234706344872219, "learning_rate": 1.393084806565431e-07, "loss": 0.6561, "step": 32368 }, { "epoch": 0.93, "grad_norm": 4.493740417088251, "learning_rate": 1.3919979286072083e-07, "loss": 0.4432, "step": 32369 }, { "epoch": 0.93, "grad_norm": 8.274574900423273, "learning_rate": 1.3909114688176406e-07, "loss": 0.7302, "step": 32370 }, { "epoch": 0.93, "grad_norm": 2.7744868912448175, "learning_rate": 1.3898254272060706e-07, "loss": 0.3561, "step": 32371 }, { "epoch": 0.93, "grad_norm": 5.511925296711848, "learning_rate": 1.388739803781852e-07, "loss": 0.4308, "step": 32372 }, { "epoch": 0.93, "grad_norm": 5.00102704943943, "learning_rate": 1.3876545985543221e-07, "loss": 0.4501, "step": 32373 }, { "epoch": 0.93, "grad_norm": 11.68852412994749, "learning_rate": 1.3865698115328062e-07, "loss": 0.312, "step": 32374 }, { "epoch": 0.93, "grad_norm": 6.217165184316495, "learning_rate": 1.385485442726653e-07, "loss": 0.4884, "step": 32375 }, { "epoch": 0.93, "grad_norm": 3.9472431557840197, "learning_rate": 1.384401492145182e-07, "loss": 0.3414, "step": 32376 }, { "epoch": 0.93, "grad_norm": 5.677562961674389, "learning_rate": 1.3833179597977198e-07, "loss": 0.656, "step": 32377 }, { "epoch": 0.93, "grad_norm": 5.119746097065672, "learning_rate": 1.382234845693581e-07, "loss": 0.5239, "step": 32378 }, { "epoch": 0.93, "grad_norm": 3.3260761545131343, "learning_rate": 1.3811521498420967e-07, "loss": 0.1515, "step": 32379 }, { "epoch": 0.93, "grad_norm": 4.425835012326538, "learning_rate": 1.3800698722525706e-07, "loss": 0.3624, "step": 32380 }, { "epoch": 0.93, "grad_norm": 2.564415215984145, "learning_rate": 1.3789880129343125e-07, "loss": 0.233, "step": 32381 }, { "epoch": 0.93, "grad_norm": 3.262216294767522, "learning_rate": 1.3779065718966422e-07, "loss": 0.4337, "step": 32382 }, { "epoch": 0.93, "grad_norm": 7.616236356381483, "learning_rate": 1.376825549148858e-07, "loss": 0.6862, "step": 32383 }, { "epoch": 0.93, "grad_norm": 6.539509857079306, "learning_rate": 1.3757449447002526e-07, "loss": 0.4261, "step": 32384 }, { "epoch": 0.93, "grad_norm": 3.5185545858594383, "learning_rate": 1.3746647585601237e-07, "loss": 0.5302, "step": 32385 }, { "epoch": 0.93, "grad_norm": 6.910774544009247, "learning_rate": 1.3735849907377696e-07, "loss": 0.8044, "step": 32386 }, { "epoch": 0.93, "grad_norm": 5.8005253282600195, "learning_rate": 1.3725056412424776e-07, "loss": 0.3863, "step": 32387 }, { "epoch": 0.93, "grad_norm": 8.295255168832362, "learning_rate": 1.3714267100835232e-07, "loss": 0.6314, "step": 32388 }, { "epoch": 0.93, "grad_norm": 1.7113239949169359, "learning_rate": 1.370348197270216e-07, "loss": 0.2953, "step": 32389 }, { "epoch": 0.93, "grad_norm": 7.072651614212174, "learning_rate": 1.3692701028118037e-07, "loss": 0.6654, "step": 32390 }, { "epoch": 0.93, "grad_norm": 5.0317633467655645, "learning_rate": 1.3681924267175738e-07, "loss": 0.3999, "step": 32391 }, { "epoch": 0.93, "grad_norm": 6.722297512592613, "learning_rate": 1.3671151689967965e-07, "loss": 0.4106, "step": 32392 }, { "epoch": 0.93, "grad_norm": 4.955394766332108, "learning_rate": 1.366038329658742e-07, "loss": 0.3317, "step": 32393 }, { "epoch": 0.93, "grad_norm": 3.658998027249472, "learning_rate": 1.3649619087126696e-07, "loss": 0.3751, "step": 32394 }, { "epoch": 0.93, "grad_norm": 7.235585550156365, "learning_rate": 1.3638859061678444e-07, "loss": 0.8428, "step": 32395 }, { "epoch": 0.93, "grad_norm": 4.429503125856311, "learning_rate": 1.3628103220335198e-07, "loss": 0.2531, "step": 32396 }, { "epoch": 0.93, "grad_norm": 2.5124199868741983, "learning_rate": 1.3617351563189441e-07, "loss": 0.2896, "step": 32397 }, { "epoch": 0.93, "grad_norm": 6.8855166367868685, "learning_rate": 1.360660409033382e-07, "loss": 0.3918, "step": 32398 }, { "epoch": 0.93, "grad_norm": 4.615516902559143, "learning_rate": 1.3595860801860594e-07, "loss": 0.4769, "step": 32399 }, { "epoch": 0.93, "grad_norm": 5.724836831078893, "learning_rate": 1.3585121697862357e-07, "loss": 0.4749, "step": 32400 }, { "epoch": 0.93, "grad_norm": 4.710428593162253, "learning_rate": 1.3574386778431426e-07, "loss": 0.2253, "step": 32401 }, { "epoch": 0.93, "grad_norm": 10.6596092679269, "learning_rate": 1.3563656043660168e-07, "loss": 0.5763, "step": 32402 }, { "epoch": 0.93, "grad_norm": 4.677434617793096, "learning_rate": 1.3552929493640843e-07, "loss": 0.4617, "step": 32403 }, { "epoch": 0.93, "grad_norm": 3.016433311011022, "learning_rate": 1.3542207128465768e-07, "loss": 0.4197, "step": 32404 }, { "epoch": 0.93, "grad_norm": 3.5010801419693243, "learning_rate": 1.3531488948227257e-07, "loss": 0.2678, "step": 32405 }, { "epoch": 0.93, "grad_norm": 3.924337973920452, "learning_rate": 1.3520774953017345e-07, "loss": 0.2813, "step": 32406 }, { "epoch": 0.93, "grad_norm": 4.83981155741948, "learning_rate": 1.3510065142928407e-07, "loss": 0.4088, "step": 32407 }, { "epoch": 0.93, "grad_norm": 3.7077972456806765, "learning_rate": 1.3499359518052425e-07, "loss": 0.251, "step": 32408 }, { "epoch": 0.93, "grad_norm": 5.512145460496052, "learning_rate": 1.34886580784816e-07, "loss": 0.3274, "step": 32409 }, { "epoch": 0.93, "grad_norm": 1.976605071864578, "learning_rate": 1.347796082430791e-07, "loss": 0.1023, "step": 32410 }, { "epoch": 0.93, "grad_norm": 4.178775067426811, "learning_rate": 1.346726775562346e-07, "loss": 0.3753, "step": 32411 }, { "epoch": 0.93, "grad_norm": 6.053200067735088, "learning_rate": 1.3456578872520164e-07, "loss": 0.6175, "step": 32412 }, { "epoch": 0.93, "grad_norm": 6.002464304940612, "learning_rate": 1.3445894175090013e-07, "loss": 0.4897, "step": 32413 }, { "epoch": 0.93, "grad_norm": 3.4753195351576687, "learning_rate": 1.343521366342493e-07, "loss": 0.3653, "step": 32414 }, { "epoch": 0.93, "grad_norm": 5.121471632597626, "learning_rate": 1.3424537337616783e-07, "loss": 0.3544, "step": 32415 }, { "epoch": 0.93, "grad_norm": 7.065924835254464, "learning_rate": 1.3413865197757559e-07, "loss": 0.3791, "step": 32416 }, { "epoch": 0.93, "grad_norm": 5.572340543920539, "learning_rate": 1.3403197243938794e-07, "loss": 0.863, "step": 32417 }, { "epoch": 0.93, "grad_norm": 9.789662019313669, "learning_rate": 1.3392533476252468e-07, "loss": 0.4483, "step": 32418 }, { "epoch": 0.93, "grad_norm": 5.873714225158398, "learning_rate": 1.338187389479023e-07, "loss": 0.8167, "step": 32419 }, { "epoch": 0.93, "grad_norm": 8.704905384344894, "learning_rate": 1.3371218499643845e-07, "loss": 0.6306, "step": 32420 }, { "epoch": 0.93, "grad_norm": 4.287679341516328, "learning_rate": 1.336056729090496e-07, "loss": 0.5195, "step": 32421 }, { "epoch": 0.93, "grad_norm": 5.483112548549615, "learning_rate": 1.3349920268665217e-07, "loss": 0.6435, "step": 32422 }, { "epoch": 0.93, "grad_norm": 3.0095682939701764, "learning_rate": 1.3339277433016162e-07, "loss": 0.3687, "step": 32423 }, { "epoch": 0.93, "grad_norm": 5.072516974565993, "learning_rate": 1.332863878404933e-07, "loss": 0.4793, "step": 32424 }, { "epoch": 0.93, "grad_norm": 4.865116811684906, "learning_rate": 1.331800432185637e-07, "loss": 0.2545, "step": 32425 }, { "epoch": 0.93, "grad_norm": 7.927452516585007, "learning_rate": 1.3307374046528653e-07, "loss": 0.2383, "step": 32426 }, { "epoch": 0.93, "grad_norm": 4.815815155314915, "learning_rate": 1.3296747958157718e-07, "loss": 0.905, "step": 32427 }, { "epoch": 0.93, "grad_norm": 7.474802222553877, "learning_rate": 1.3286126056834937e-07, "loss": 0.5177, "step": 32428 }, { "epoch": 0.93, "grad_norm": 3.017514916409189, "learning_rate": 1.3275508342651678e-07, "loss": 0.2777, "step": 32429 }, { "epoch": 0.93, "grad_norm": 7.634205529997818, "learning_rate": 1.3264894815699315e-07, "loss": 0.5073, "step": 32430 }, { "epoch": 0.93, "grad_norm": 8.29335741955192, "learning_rate": 1.325428547606905e-07, "loss": 0.4773, "step": 32431 }, { "epoch": 0.93, "grad_norm": 3.4870056519797585, "learning_rate": 1.3243680323852314e-07, "loss": 0.3456, "step": 32432 }, { "epoch": 0.93, "grad_norm": 3.6547268034023177, "learning_rate": 1.32330793591402e-07, "loss": 0.2792, "step": 32433 }, { "epoch": 0.93, "grad_norm": 3.0787819098469575, "learning_rate": 1.3222482582024022e-07, "loss": 0.3233, "step": 32434 }, { "epoch": 0.93, "grad_norm": 12.521051685293298, "learning_rate": 1.3211889992594873e-07, "loss": 0.5004, "step": 32435 }, { "epoch": 0.93, "grad_norm": 9.854551607534413, "learning_rate": 1.3201301590943903e-07, "loss": 0.7807, "step": 32436 }, { "epoch": 0.93, "grad_norm": 5.349959485622553, "learning_rate": 1.3190717377162265e-07, "loss": 0.7637, "step": 32437 }, { "epoch": 0.93, "grad_norm": 3.719056060762363, "learning_rate": 1.3180137351340828e-07, "loss": 0.2515, "step": 32438 }, { "epoch": 0.93, "grad_norm": 4.095964633522608, "learning_rate": 1.316956151357085e-07, "loss": 0.1521, "step": 32439 }, { "epoch": 0.93, "grad_norm": 3.8332756891615722, "learning_rate": 1.3158989863943095e-07, "loss": 0.062, "step": 32440 }, { "epoch": 0.93, "grad_norm": 4.350089846154525, "learning_rate": 1.3148422402548654e-07, "loss": 0.5107, "step": 32441 }, { "epoch": 0.93, "grad_norm": 4.566123881881382, "learning_rate": 1.3137859129478348e-07, "loss": 0.5104, "step": 32442 }, { "epoch": 0.93, "grad_norm": 9.23406475856283, "learning_rate": 1.3127300044823156e-07, "loss": 0.7603, "step": 32443 }, { "epoch": 0.93, "grad_norm": 6.30568440436842, "learning_rate": 1.3116745148673893e-07, "loss": 0.3305, "step": 32444 }, { "epoch": 0.93, "grad_norm": 4.2624489834325, "learning_rate": 1.3106194441121268e-07, "loss": 0.3695, "step": 32445 }, { "epoch": 0.93, "grad_norm": 4.829650242938531, "learning_rate": 1.3095647922256094e-07, "loss": 0.5479, "step": 32446 }, { "epoch": 0.93, "grad_norm": 7.054355414971643, "learning_rate": 1.3085105592169077e-07, "loss": 0.7885, "step": 32447 }, { "epoch": 0.93, "grad_norm": 8.915644245414406, "learning_rate": 1.307456745095098e-07, "loss": 0.4544, "step": 32448 }, { "epoch": 0.93, "grad_norm": 4.174721907302468, "learning_rate": 1.3064033498692508e-07, "loss": 0.5213, "step": 32449 }, { "epoch": 0.93, "grad_norm": 1.9735924189537937, "learning_rate": 1.305350373548414e-07, "loss": 0.0757, "step": 32450 }, { "epoch": 0.93, "grad_norm": 3.075226246643992, "learning_rate": 1.3042978161416475e-07, "loss": 0.2221, "step": 32451 }, { "epoch": 0.93, "grad_norm": 4.357399797234032, "learning_rate": 1.303245677658016e-07, "loss": 0.4872, "step": 32452 }, { "epoch": 0.93, "grad_norm": 17.158971240632848, "learning_rate": 1.3021939581065623e-07, "loss": 0.7141, "step": 32453 }, { "epoch": 0.93, "grad_norm": 4.611344566461319, "learning_rate": 1.3011426574963404e-07, "loss": 0.1642, "step": 32454 }, { "epoch": 0.93, "grad_norm": 6.753006018526482, "learning_rate": 1.3000917758363984e-07, "loss": 0.5003, "step": 32455 }, { "epoch": 0.93, "grad_norm": 5.904199476381974, "learning_rate": 1.2990413131357683e-07, "loss": 0.309, "step": 32456 }, { "epoch": 0.93, "grad_norm": 4.61655157005438, "learning_rate": 1.2979912694034868e-07, "loss": 0.1315, "step": 32457 }, { "epoch": 0.93, "grad_norm": 3.2168848318687546, "learning_rate": 1.296941644648586e-07, "loss": 0.4398, "step": 32458 }, { "epoch": 0.93, "grad_norm": 3.258562344765809, "learning_rate": 1.2958924388801086e-07, "loss": 0.3601, "step": 32459 }, { "epoch": 0.93, "grad_norm": 8.5194409701021, "learning_rate": 1.294843652107064e-07, "loss": 0.5397, "step": 32460 }, { "epoch": 0.93, "grad_norm": 8.105441806242869, "learning_rate": 1.2937952843384895e-07, "loss": 0.5796, "step": 32461 }, { "epoch": 0.93, "grad_norm": 5.967812948918559, "learning_rate": 1.2927473355833885e-07, "loss": 0.6194, "step": 32462 }, { "epoch": 0.93, "grad_norm": 2.0737609070622702, "learning_rate": 1.2916998058507934e-07, "loss": 0.0927, "step": 32463 }, { "epoch": 0.93, "grad_norm": 4.825200797137517, "learning_rate": 1.2906526951497077e-07, "loss": 0.6929, "step": 32464 }, { "epoch": 0.93, "grad_norm": 5.094830018203803, "learning_rate": 1.2896060034891356e-07, "loss": 0.4077, "step": 32465 }, { "epoch": 0.93, "grad_norm": 6.300784315000901, "learning_rate": 1.288559730878086e-07, "loss": 0.6395, "step": 32466 }, { "epoch": 0.93, "grad_norm": 4.843193908272079, "learning_rate": 1.287513877325558e-07, "loss": 0.3691, "step": 32467 }, { "epoch": 0.93, "grad_norm": 9.533192371002054, "learning_rate": 1.286468442840555e-07, "loss": 0.5174, "step": 32468 }, { "epoch": 0.93, "grad_norm": 7.225042531769379, "learning_rate": 1.2854234274320644e-07, "loss": 0.5295, "step": 32469 }, { "epoch": 0.93, "grad_norm": 3.985094633749698, "learning_rate": 1.2843788311090787e-07, "loss": 0.2255, "step": 32470 }, { "epoch": 0.93, "grad_norm": 2.638935987919451, "learning_rate": 1.2833346538805857e-07, "loss": 0.1532, "step": 32471 }, { "epoch": 0.93, "grad_norm": 6.808413881255747, "learning_rate": 1.2822908957555614e-07, "loss": 0.304, "step": 32472 }, { "epoch": 0.93, "grad_norm": 10.446042750759535, "learning_rate": 1.2812475567429927e-07, "loss": 0.6819, "step": 32473 }, { "epoch": 0.93, "grad_norm": 4.098436774000365, "learning_rate": 1.2802046368518506e-07, "loss": 0.3739, "step": 32474 }, { "epoch": 0.93, "grad_norm": 3.987828810955691, "learning_rate": 1.279162136091111e-07, "loss": 0.3804, "step": 32475 }, { "epoch": 0.93, "grad_norm": 4.283078687176153, "learning_rate": 1.2781200544697447e-07, "loss": 0.523, "step": 32476 }, { "epoch": 0.93, "grad_norm": 5.472529429587066, "learning_rate": 1.2770783919967112e-07, "loss": 0.7252, "step": 32477 }, { "epoch": 0.93, "grad_norm": 4.963492677119273, "learning_rate": 1.2760371486809697e-07, "loss": 0.7496, "step": 32478 }, { "epoch": 0.93, "grad_norm": 4.335337746870514, "learning_rate": 1.27499632453148e-07, "loss": 0.4678, "step": 32479 }, { "epoch": 0.93, "grad_norm": 6.110516765861462, "learning_rate": 1.273955919557207e-07, "loss": 0.7397, "step": 32480 }, { "epoch": 0.93, "grad_norm": 3.669546744606485, "learning_rate": 1.2729159337670827e-07, "loss": 0.323, "step": 32481 }, { "epoch": 0.93, "grad_norm": 6.7089927973132175, "learning_rate": 1.2718763671700719e-07, "loss": 0.5897, "step": 32482 }, { "epoch": 0.93, "grad_norm": 5.553363754987489, "learning_rate": 1.2708372197751006e-07, "loss": 0.2591, "step": 32483 }, { "epoch": 0.93, "grad_norm": 7.947778429768218, "learning_rate": 1.2697984915911233e-07, "loss": 0.5137, "step": 32484 }, { "epoch": 0.93, "grad_norm": 2.965291166659961, "learning_rate": 1.2687601826270657e-07, "loss": 0.3294, "step": 32485 }, { "epoch": 0.93, "grad_norm": 6.153155173425812, "learning_rate": 1.2677222928918708e-07, "loss": 0.6245, "step": 32486 }, { "epoch": 0.93, "grad_norm": 4.022296603023236, "learning_rate": 1.2666848223944484e-07, "loss": 0.4903, "step": 32487 }, { "epoch": 0.93, "grad_norm": 4.492533569274202, "learning_rate": 1.265647771143741e-07, "loss": 0.3059, "step": 32488 }, { "epoch": 0.93, "grad_norm": 3.186300444684412, "learning_rate": 1.2646111391486694e-07, "loss": 0.2392, "step": 32489 }, { "epoch": 0.93, "grad_norm": 10.870596026843275, "learning_rate": 1.2635749264181485e-07, "loss": 0.517, "step": 32490 }, { "epoch": 0.93, "grad_norm": 4.294599034352332, "learning_rate": 1.2625391329610937e-07, "loss": 0.6139, "step": 32491 }, { "epoch": 0.93, "grad_norm": 3.7228454029425957, "learning_rate": 1.2615037587864087e-07, "loss": 0.3826, "step": 32492 }, { "epoch": 0.93, "grad_norm": 6.202223128474344, "learning_rate": 1.2604688039030088e-07, "loss": 0.3773, "step": 32493 }, { "epoch": 0.93, "grad_norm": 6.576542571099471, "learning_rate": 1.2594342683197925e-07, "loss": 0.5804, "step": 32494 }, { "epoch": 0.93, "grad_norm": 4.232199923157601, "learning_rate": 1.2584001520456578e-07, "loss": 0.3209, "step": 32495 }, { "epoch": 0.93, "grad_norm": 4.8652916119676135, "learning_rate": 1.257366455089515e-07, "loss": 0.52, "step": 32496 }, { "epoch": 0.93, "grad_norm": 7.668640076238843, "learning_rate": 1.256333177460245e-07, "loss": 0.5528, "step": 32497 }, { "epoch": 0.93, "grad_norm": 5.467026269916938, "learning_rate": 1.2553003191667355e-07, "loss": 0.1448, "step": 32498 }, { "epoch": 0.93, "grad_norm": 4.0133583856008785, "learning_rate": 1.2542678802178744e-07, "loss": 0.3798, "step": 32499 }, { "epoch": 0.93, "grad_norm": 10.53942807117079, "learning_rate": 1.2532358606225536e-07, "loss": 0.7595, "step": 32500 }, { "epoch": 0.93, "grad_norm": 3.1270589340006953, "learning_rate": 1.2522042603896335e-07, "loss": 0.1847, "step": 32501 }, { "epoch": 0.93, "grad_norm": 6.338533558731933, "learning_rate": 1.251173079528001e-07, "loss": 0.8392, "step": 32502 }, { "epoch": 0.93, "grad_norm": 1.7388791395485956, "learning_rate": 1.2501423180465266e-07, "loss": 0.1338, "step": 32503 }, { "epoch": 0.93, "grad_norm": 6.735197366656011, "learning_rate": 1.249111975954076e-07, "loss": 0.3964, "step": 32504 }, { "epoch": 0.93, "grad_norm": 9.571058470010442, "learning_rate": 1.248082053259514e-07, "loss": 0.5116, "step": 32505 }, { "epoch": 0.93, "grad_norm": 3.6255886981043033, "learning_rate": 1.247052549971689e-07, "loss": 0.4731, "step": 32506 }, { "epoch": 0.93, "grad_norm": 8.319836813353378, "learning_rate": 1.2460234660994774e-07, "loss": 0.7151, "step": 32507 }, { "epoch": 0.93, "grad_norm": 6.816072174892301, "learning_rate": 1.2449948016517222e-07, "loss": 0.6372, "step": 32508 }, { "epoch": 0.93, "grad_norm": 3.8180058230260268, "learning_rate": 1.2439665566372717e-07, "loss": 0.5696, "step": 32509 }, { "epoch": 0.93, "grad_norm": 4.0867464275002625, "learning_rate": 1.2429387310649799e-07, "loss": 0.3229, "step": 32510 }, { "epoch": 0.93, "grad_norm": 6.411615478754872, "learning_rate": 1.2419113249436732e-07, "loss": 0.2874, "step": 32511 }, { "epoch": 0.93, "grad_norm": 4.666851647434684, "learning_rate": 1.2408843382822111e-07, "loss": 0.4607, "step": 32512 }, { "epoch": 0.93, "grad_norm": 3.7449410487884123, "learning_rate": 1.239857771089409e-07, "loss": 0.7491, "step": 32513 }, { "epoch": 0.93, "grad_norm": 7.912221530636369, "learning_rate": 1.2388316233741095e-07, "loss": 0.1729, "step": 32514 }, { "epoch": 0.93, "grad_norm": 3.3987133045747955, "learning_rate": 1.237805895145139e-07, "loss": 0.1628, "step": 32515 }, { "epoch": 0.93, "grad_norm": 3.8508621105171916, "learning_rate": 1.2367805864113181e-07, "loss": 0.1462, "step": 32516 }, { "epoch": 0.93, "grad_norm": 6.808111614724375, "learning_rate": 1.2357556971814788e-07, "loss": 0.4559, "step": 32517 }, { "epoch": 0.93, "grad_norm": 6.149449925517248, "learning_rate": 1.2347312274644196e-07, "loss": 0.6189, "step": 32518 }, { "epoch": 0.93, "grad_norm": 10.34332825414607, "learning_rate": 1.2337071772689668e-07, "loss": 0.4693, "step": 32519 }, { "epoch": 0.93, "grad_norm": 7.976938745067165, "learning_rate": 1.2326835466039244e-07, "loss": 0.3776, "step": 32520 }, { "epoch": 0.93, "grad_norm": 5.512964141751229, "learning_rate": 1.2316603354781021e-07, "loss": 0.3463, "step": 32521 }, { "epoch": 0.93, "grad_norm": 2.858729576268066, "learning_rate": 1.2306375439002982e-07, "loss": 0.2402, "step": 32522 }, { "epoch": 0.93, "grad_norm": 5.0769870832939, "learning_rate": 1.2296151718793227e-07, "loss": 0.4687, "step": 32523 }, { "epoch": 0.93, "grad_norm": 3.3273040403677765, "learning_rate": 1.2285932194239626e-07, "loss": 0.3068, "step": 32524 }, { "epoch": 0.93, "grad_norm": 6.706740713519687, "learning_rate": 1.227571686543011e-07, "loss": 0.8776, "step": 32525 }, { "epoch": 0.93, "grad_norm": 2.5412208186930805, "learning_rate": 1.2265505732452443e-07, "loss": 0.4009, "step": 32526 }, { "epoch": 0.93, "grad_norm": 5.888277754871798, "learning_rate": 1.2255298795394667e-07, "loss": 0.6117, "step": 32527 }, { "epoch": 0.93, "grad_norm": 7.678602605737398, "learning_rate": 1.2245096054344485e-07, "loss": 0.4405, "step": 32528 }, { "epoch": 0.93, "grad_norm": 7.363937826252622, "learning_rate": 1.2234897509389664e-07, "loss": 0.3224, "step": 32529 }, { "epoch": 0.93, "grad_norm": 5.371717470887326, "learning_rate": 1.222470316061797e-07, "loss": 0.5627, "step": 32530 }, { "epoch": 0.93, "grad_norm": 8.535926738678462, "learning_rate": 1.2214513008117157e-07, "loss": 0.4371, "step": 32531 }, { "epoch": 0.93, "grad_norm": 3.579910998649967, "learning_rate": 1.2204327051974773e-07, "loss": 0.1427, "step": 32532 }, { "epoch": 0.93, "grad_norm": 7.298059992172785, "learning_rate": 1.2194145292278527e-07, "loss": 0.3396, "step": 32533 }, { "epoch": 0.93, "grad_norm": 2.3245011999505008, "learning_rate": 1.218396772911601e-07, "loss": 0.2724, "step": 32534 }, { "epoch": 0.93, "grad_norm": 7.491389799960267, "learning_rate": 1.2173794362574652e-07, "loss": 0.3097, "step": 32535 }, { "epoch": 0.93, "grad_norm": 3.5066655584219952, "learning_rate": 1.2163625192742223e-07, "loss": 0.2626, "step": 32536 }, { "epoch": 0.93, "grad_norm": 4.437521813567369, "learning_rate": 1.215346021970598e-07, "loss": 0.5051, "step": 32537 }, { "epoch": 0.93, "grad_norm": 4.033065446254041, "learning_rate": 1.2143299443553468e-07, "loss": 0.4148, "step": 32538 }, { "epoch": 0.93, "grad_norm": 4.013215407057336, "learning_rate": 1.2133142864372062e-07, "loss": 0.6082, "step": 32539 }, { "epoch": 0.93, "grad_norm": 3.876285985627693, "learning_rate": 1.2122990482249187e-07, "loss": 0.3235, "step": 32540 }, { "epoch": 0.93, "grad_norm": 5.909672830178276, "learning_rate": 1.2112842297272165e-07, "loss": 0.3526, "step": 32541 }, { "epoch": 0.93, "grad_norm": 8.364804196480208, "learning_rate": 1.210269830952826e-07, "loss": 0.2694, "step": 32542 }, { "epoch": 0.93, "grad_norm": 7.794320507934265, "learning_rate": 1.2092558519104792e-07, "loss": 0.6748, "step": 32543 }, { "epoch": 0.93, "grad_norm": 4.404223280331981, "learning_rate": 1.2082422926088965e-07, "loss": 0.5542, "step": 32544 }, { "epoch": 0.93, "grad_norm": 4.507509218059472, "learning_rate": 1.2072291530567992e-07, "loss": 0.4287, "step": 32545 }, { "epoch": 0.93, "grad_norm": 7.01689741324568, "learning_rate": 1.2062164332629023e-07, "loss": 0.4913, "step": 32546 }, { "epoch": 0.93, "grad_norm": 8.084269151558708, "learning_rate": 1.2052041332359098e-07, "loss": 0.5281, "step": 32547 }, { "epoch": 0.93, "grad_norm": 6.108328414991341, "learning_rate": 1.2041922529845485e-07, "loss": 0.4492, "step": 32548 }, { "epoch": 0.93, "grad_norm": 6.783615121760537, "learning_rate": 1.2031807925175055e-07, "loss": 0.4689, "step": 32549 }, { "epoch": 0.93, "grad_norm": 5.428076064482728, "learning_rate": 1.202169751843496e-07, "loss": 0.3167, "step": 32550 }, { "epoch": 0.93, "grad_norm": 4.121953966658303, "learning_rate": 1.2011591309712134e-07, "loss": 0.3685, "step": 32551 }, { "epoch": 0.93, "grad_norm": 3.1132979063500286, "learning_rate": 1.2001489299093449e-07, "loss": 0.1472, "step": 32552 }, { "epoch": 0.93, "grad_norm": 5.286271997434428, "learning_rate": 1.1991391486665894e-07, "loss": 0.5403, "step": 32553 }, { "epoch": 0.93, "grad_norm": 5.376396884823028, "learning_rate": 1.1981297872516284e-07, "loss": 0.3344, "step": 32554 }, { "epoch": 0.93, "grad_norm": 4.30068528902438, "learning_rate": 1.1971208456731553e-07, "loss": 0.255, "step": 32555 }, { "epoch": 0.93, "grad_norm": 6.751367200919365, "learning_rate": 1.1961123239398352e-07, "loss": 0.5229, "step": 32556 }, { "epoch": 0.93, "grad_norm": 3.5742492841239724, "learning_rate": 1.195104222060356e-07, "loss": 0.3568, "step": 32557 }, { "epoch": 0.93, "grad_norm": 5.240678163462372, "learning_rate": 1.194096540043388e-07, "loss": 0.2126, "step": 32558 }, { "epoch": 0.93, "grad_norm": 6.435702878598068, "learning_rate": 1.1930892778975968e-07, "loss": 0.8118, "step": 32559 }, { "epoch": 0.93, "grad_norm": 3.9290366875733502, "learning_rate": 1.1920824356316474e-07, "loss": 0.2982, "step": 32560 }, { "epoch": 0.93, "grad_norm": 6.237768678860487, "learning_rate": 1.1910760132541999e-07, "loss": 0.2761, "step": 32561 }, { "epoch": 0.93, "grad_norm": 7.488505901853176, "learning_rate": 1.1900700107739194e-07, "loss": 0.5151, "step": 32562 }, { "epoch": 0.93, "grad_norm": 3.5637338576257314, "learning_rate": 1.1890644281994545e-07, "loss": 0.6038, "step": 32563 }, { "epoch": 0.93, "grad_norm": 4.311178557793064, "learning_rate": 1.1880592655394708e-07, "loss": 0.5087, "step": 32564 }, { "epoch": 0.93, "grad_norm": 7.16394894841682, "learning_rate": 1.1870545228025886e-07, "loss": 0.4788, "step": 32565 }, { "epoch": 0.93, "grad_norm": 9.53728206680508, "learning_rate": 1.1860501999974739e-07, "loss": 0.4347, "step": 32566 }, { "epoch": 0.93, "grad_norm": 6.799571342140557, "learning_rate": 1.1850462971327581e-07, "loss": 0.8199, "step": 32567 }, { "epoch": 0.93, "grad_norm": 9.175425191688541, "learning_rate": 1.184042814217079e-07, "loss": 0.4284, "step": 32568 }, { "epoch": 0.93, "grad_norm": 5.233561142237469, "learning_rate": 1.183039751259063e-07, "loss": 0.4255, "step": 32569 }, { "epoch": 0.93, "grad_norm": 5.40477428250809, "learning_rate": 1.1820371082673532e-07, "loss": 0.1046, "step": 32570 }, { "epoch": 0.93, "grad_norm": 5.545304540831055, "learning_rate": 1.1810348852505649e-07, "loss": 0.6138, "step": 32571 }, { "epoch": 0.93, "grad_norm": 7.525679114737785, "learning_rate": 1.1800330822173245e-07, "loss": 0.5147, "step": 32572 }, { "epoch": 0.93, "grad_norm": 3.8256767696473117, "learning_rate": 1.1790316991762474e-07, "loss": 0.2902, "step": 32573 }, { "epoch": 0.93, "grad_norm": 3.8155988387727033, "learning_rate": 1.1780307361359433e-07, "loss": 0.2107, "step": 32574 }, { "epoch": 0.93, "grad_norm": 6.730585647426874, "learning_rate": 1.1770301931050388e-07, "loss": 0.3408, "step": 32575 }, { "epoch": 0.93, "grad_norm": 4.295989704831892, "learning_rate": 1.1760300700921267e-07, "loss": 0.6618, "step": 32576 }, { "epoch": 0.93, "grad_norm": 6.483355890184141, "learning_rate": 1.1750303671058228e-07, "loss": 0.5295, "step": 32577 }, { "epoch": 0.93, "grad_norm": 5.595373333704399, "learning_rate": 1.1740310841547198e-07, "loss": 0.4246, "step": 32578 }, { "epoch": 0.93, "grad_norm": 4.921386355409439, "learning_rate": 1.1730322212474054e-07, "loss": 0.2675, "step": 32579 }, { "epoch": 0.93, "grad_norm": 3.844911880425917, "learning_rate": 1.1720337783924951e-07, "loss": 0.1593, "step": 32580 }, { "epoch": 0.93, "grad_norm": 6.019382205135361, "learning_rate": 1.1710357555985596e-07, "loss": 0.6197, "step": 32581 }, { "epoch": 0.93, "grad_norm": 6.395976132301233, "learning_rate": 1.1700381528741977e-07, "loss": 0.4685, "step": 32582 }, { "epoch": 0.93, "grad_norm": 2.87274364372734, "learning_rate": 1.1690409702279748e-07, "loss": 0.3216, "step": 32583 }, { "epoch": 0.93, "grad_norm": 5.541378338801438, "learning_rate": 1.1680442076684895e-07, "loss": 0.3693, "step": 32584 }, { "epoch": 0.93, "grad_norm": 4.922189563205624, "learning_rate": 1.1670478652043072e-07, "loss": 0.5138, "step": 32585 }, { "epoch": 0.93, "grad_norm": 2.474455662313759, "learning_rate": 1.1660519428439987e-07, "loss": 0.0187, "step": 32586 }, { "epoch": 0.93, "grad_norm": 10.467896930611342, "learning_rate": 1.1650564405961295e-07, "loss": 0.8907, "step": 32587 }, { "epoch": 0.93, "grad_norm": 5.345110268378748, "learning_rate": 1.1640613584692706e-07, "loss": 0.7761, "step": 32588 }, { "epoch": 0.93, "grad_norm": 4.761941755923123, "learning_rate": 1.1630666964719761e-07, "loss": 0.6036, "step": 32589 }, { "epoch": 0.93, "grad_norm": 5.121273360605291, "learning_rate": 1.162072454612806e-07, "loss": 0.555, "step": 32590 }, { "epoch": 0.93, "grad_norm": 4.464070437554128, "learning_rate": 1.1610786329003198e-07, "loss": 0.4421, "step": 32591 }, { "epoch": 0.93, "grad_norm": 4.375530156302452, "learning_rate": 1.1600852313430555e-07, "loss": 0.2696, "step": 32592 }, { "epoch": 0.93, "grad_norm": 4.332202409230041, "learning_rate": 1.1590922499495616e-07, "loss": 0.195, "step": 32593 }, { "epoch": 0.93, "grad_norm": 14.835544976721183, "learning_rate": 1.1580996887283869e-07, "loss": 0.5213, "step": 32594 }, { "epoch": 0.93, "grad_norm": 3.0429329239544494, "learning_rate": 1.1571075476880634e-07, "loss": 0.2792, "step": 32595 }, { "epoch": 0.93, "grad_norm": 4.619017572356728, "learning_rate": 1.1561158268371398e-07, "loss": 0.4131, "step": 32596 }, { "epoch": 0.93, "grad_norm": 3.6291973214911426, "learning_rate": 1.1551245261841315e-07, "loss": 0.2595, "step": 32597 }, { "epoch": 0.93, "grad_norm": 4.762728426804286, "learning_rate": 1.1541336457375762e-07, "loss": 0.6381, "step": 32598 }, { "epoch": 0.93, "grad_norm": 4.046400123335541, "learning_rate": 1.1531431855059894e-07, "loss": 0.3752, "step": 32599 }, { "epoch": 0.93, "grad_norm": 11.53014784096974, "learning_rate": 1.152153145497903e-07, "loss": 0.4797, "step": 32600 }, { "epoch": 0.93, "grad_norm": 5.321751560968086, "learning_rate": 1.1511635257218213e-07, "loss": 0.4431, "step": 32601 }, { "epoch": 0.93, "grad_norm": 5.50825959683849, "learning_rate": 1.1501743261862652e-07, "loss": 0.7073, "step": 32602 }, { "epoch": 0.93, "grad_norm": 5.559885310595618, "learning_rate": 1.1491855468997559e-07, "loss": 0.1922, "step": 32603 }, { "epoch": 0.93, "grad_norm": 2.733748244253187, "learning_rate": 1.1481971878707809e-07, "loss": 0.0956, "step": 32604 }, { "epoch": 0.93, "grad_norm": 5.848174149303756, "learning_rate": 1.1472092491078557e-07, "loss": 0.3772, "step": 32605 }, { "epoch": 0.93, "grad_norm": 4.439835806723219, "learning_rate": 1.1462217306194679e-07, "loss": 0.4345, "step": 32606 }, { "epoch": 0.93, "grad_norm": 2.744653728476696, "learning_rate": 1.1452346324141273e-07, "loss": 0.1363, "step": 32607 }, { "epoch": 0.93, "grad_norm": 7.073367846503925, "learning_rate": 1.1442479545003104e-07, "loss": 0.4715, "step": 32608 }, { "epoch": 0.93, "grad_norm": 6.31854710012614, "learning_rate": 1.1432616968865162e-07, "loss": 0.2819, "step": 32609 }, { "epoch": 0.93, "grad_norm": 6.950133496655866, "learning_rate": 1.1422758595812211e-07, "loss": 0.5188, "step": 32610 }, { "epoch": 0.93, "grad_norm": 3.4730306623057063, "learning_rate": 1.1412904425929184e-07, "loss": 0.2439, "step": 32611 }, { "epoch": 0.93, "grad_norm": 3.65517426014055, "learning_rate": 1.1403054459300788e-07, "loss": 0.2389, "step": 32612 }, { "epoch": 0.93, "grad_norm": 6.403929908990757, "learning_rate": 1.1393208696011738e-07, "loss": 0.7218, "step": 32613 }, { "epoch": 0.93, "grad_norm": 5.712561797761439, "learning_rate": 1.1383367136146739e-07, "loss": 0.2234, "step": 32614 }, { "epoch": 0.93, "grad_norm": 9.064657125579616, "learning_rate": 1.1373529779790449e-07, "loss": 0.5441, "step": 32615 }, { "epoch": 0.93, "grad_norm": 4.424837609452537, "learning_rate": 1.1363696627027576e-07, "loss": 0.5799, "step": 32616 }, { "epoch": 0.93, "grad_norm": 2.367378642608291, "learning_rate": 1.135386767794261e-07, "loss": 0.3849, "step": 32617 }, { "epoch": 0.93, "grad_norm": 4.406857550108634, "learning_rate": 1.1344042932620203e-07, "loss": 0.3549, "step": 32618 }, { "epoch": 0.93, "grad_norm": 3.842972188266768, "learning_rate": 1.1334222391144845e-07, "loss": 0.3196, "step": 32619 }, { "epoch": 0.93, "grad_norm": 3.6972360907018347, "learning_rate": 1.1324406053600912e-07, "loss": 0.2375, "step": 32620 }, { "epoch": 0.93, "grad_norm": 7.902874916014513, "learning_rate": 1.1314593920073059e-07, "loss": 0.6589, "step": 32621 }, { "epoch": 0.93, "grad_norm": 2.61458339665832, "learning_rate": 1.1304785990645495e-07, "loss": 0.2241, "step": 32622 }, { "epoch": 0.93, "grad_norm": 3.5461729590055087, "learning_rate": 1.1294982265402765e-07, "loss": 0.5982, "step": 32623 }, { "epoch": 0.93, "grad_norm": 4.197286954588589, "learning_rate": 1.128518274442908e-07, "loss": 0.408, "step": 32624 }, { "epoch": 0.93, "grad_norm": 6.138723977013769, "learning_rate": 1.1275387427808815e-07, "loss": 0.3286, "step": 32625 }, { "epoch": 0.93, "grad_norm": 7.455783713662308, "learning_rate": 1.1265596315626181e-07, "loss": 0.4686, "step": 32626 }, { "epoch": 0.93, "grad_norm": 3.636875923603712, "learning_rate": 1.1255809407965445e-07, "loss": 0.5261, "step": 32627 }, { "epoch": 0.93, "grad_norm": 9.186074801601613, "learning_rate": 1.1246026704910873e-07, "loss": 0.6886, "step": 32628 }, { "epoch": 0.93, "grad_norm": 7.331497034722281, "learning_rate": 1.1236248206546507e-07, "loss": 0.8786, "step": 32629 }, { "epoch": 0.93, "grad_norm": 3.491362924535325, "learning_rate": 1.1226473912956559e-07, "loss": 0.2005, "step": 32630 }, { "epoch": 0.93, "grad_norm": 9.025282581163918, "learning_rate": 1.1216703824225017e-07, "loss": 0.5153, "step": 32631 }, { "epoch": 0.93, "grad_norm": 3.800254322626478, "learning_rate": 1.1206937940436091e-07, "loss": 0.438, "step": 32632 }, { "epoch": 0.93, "grad_norm": 2.6023540266435883, "learning_rate": 1.1197176261673603e-07, "loss": 0.1225, "step": 32633 }, { "epoch": 0.93, "grad_norm": 5.112977976945003, "learning_rate": 1.1187418788021654e-07, "loss": 0.6793, "step": 32634 }, { "epoch": 0.93, "grad_norm": 4.813193036004608, "learning_rate": 1.1177665519564119e-07, "loss": 0.3477, "step": 32635 }, { "epoch": 0.93, "grad_norm": 3.8007039045597195, "learning_rate": 1.1167916456384931e-07, "loss": 0.3787, "step": 32636 }, { "epoch": 0.93, "grad_norm": 5.448346030515243, "learning_rate": 1.1158171598568023e-07, "loss": 0.3425, "step": 32637 }, { "epoch": 0.93, "grad_norm": 3.4997238833640165, "learning_rate": 1.114843094619722e-07, "loss": 0.1178, "step": 32638 }, { "epoch": 0.93, "grad_norm": 6.4567691590552965, "learning_rate": 1.1138694499356229e-07, "loss": 0.4662, "step": 32639 }, { "epoch": 0.93, "grad_norm": 9.90645767619981, "learning_rate": 1.1128962258128817e-07, "loss": 0.7265, "step": 32640 }, { "epoch": 0.93, "grad_norm": 5.028928043210993, "learning_rate": 1.1119234222598862e-07, "loss": 0.4957, "step": 32641 }, { "epoch": 0.93, "grad_norm": 33.80362876960348, "learning_rate": 1.1109510392849799e-07, "loss": 0.4225, "step": 32642 }, { "epoch": 0.93, "grad_norm": 4.977306891181975, "learning_rate": 1.1099790768965501e-07, "loss": 0.3219, "step": 32643 }, { "epoch": 0.93, "grad_norm": 3.6956649192848823, "learning_rate": 1.1090075351029517e-07, "loss": 0.4549, "step": 32644 }, { "epoch": 0.93, "grad_norm": 3.1374789370251537, "learning_rate": 1.1080364139125443e-07, "loss": 0.1948, "step": 32645 }, { "epoch": 0.93, "grad_norm": 3.8404555119611032, "learning_rate": 1.107065713333677e-07, "loss": 0.2809, "step": 32646 }, { "epoch": 0.93, "grad_norm": 2.821607002045088, "learning_rate": 1.1060954333746987e-07, "loss": 0.3317, "step": 32647 }, { "epoch": 0.93, "grad_norm": 4.2165526424918385, "learning_rate": 1.1051255740439693e-07, "loss": 0.704, "step": 32648 }, { "epoch": 0.93, "grad_norm": 3.681292222433387, "learning_rate": 1.1041561353498154e-07, "loss": 0.3275, "step": 32649 }, { "epoch": 0.94, "grad_norm": 3.2211499202104332, "learning_rate": 1.1031871173005915e-07, "loss": 0.3873, "step": 32650 }, { "epoch": 0.94, "grad_norm": 12.824358369716188, "learning_rate": 1.1022185199046298e-07, "loss": 0.8767, "step": 32651 }, { "epoch": 0.94, "grad_norm": 7.984812088142512, "learning_rate": 1.101250343170257e-07, "loss": 0.5902, "step": 32652 }, { "epoch": 0.94, "grad_norm": 5.530961368594855, "learning_rate": 1.1002825871058165e-07, "loss": 0.354, "step": 32653 }, { "epoch": 0.94, "grad_norm": 5.588891546051999, "learning_rate": 1.0993152517196126e-07, "loss": 0.6098, "step": 32654 }, { "epoch": 0.94, "grad_norm": 3.98753246784445, "learning_rate": 1.0983483370199888e-07, "loss": 0.3514, "step": 32655 }, { "epoch": 0.94, "grad_norm": 8.23825022590768, "learning_rate": 1.0973818430152439e-07, "loss": 0.6863, "step": 32656 }, { "epoch": 0.94, "grad_norm": 2.7457836079086713, "learning_rate": 1.0964157697137101e-07, "loss": 0.3663, "step": 32657 }, { "epoch": 0.94, "grad_norm": 6.852735847386442, "learning_rate": 1.0954501171236864e-07, "loss": 0.4873, "step": 32658 }, { "epoch": 0.94, "grad_norm": 6.424999284558683, "learning_rate": 1.0944848852534828e-07, "loss": 0.5976, "step": 32659 }, { "epoch": 0.94, "grad_norm": 4.5357833809840855, "learning_rate": 1.0935200741114038e-07, "loss": 0.5465, "step": 32660 }, { "epoch": 0.94, "grad_norm": 5.7884331694448115, "learning_rate": 1.0925556837057538e-07, "loss": 0.6338, "step": 32661 }, { "epoch": 0.94, "grad_norm": 2.078183965635178, "learning_rate": 1.0915917140448263e-07, "loss": 0.142, "step": 32662 }, { "epoch": 0.94, "grad_norm": 4.99016101283067, "learning_rate": 1.0906281651369033e-07, "loss": 0.3678, "step": 32663 }, { "epoch": 0.94, "grad_norm": 5.427658668036751, "learning_rate": 1.089665036990295e-07, "loss": 0.4064, "step": 32664 }, { "epoch": 0.94, "grad_norm": 2.5253121820408797, "learning_rate": 1.0887023296132782e-07, "loss": 0.3323, "step": 32665 }, { "epoch": 0.94, "grad_norm": 2.709912878761642, "learning_rate": 1.0877400430141294e-07, "loss": 0.1728, "step": 32666 }, { "epoch": 0.94, "grad_norm": 4.703943986017487, "learning_rate": 1.0867781772011254e-07, "loss": 0.5545, "step": 32667 }, { "epoch": 0.94, "grad_norm": 4.996831700732042, "learning_rate": 1.0858167321825485e-07, "loss": 0.414, "step": 32668 }, { "epoch": 0.94, "grad_norm": 5.428611549967737, "learning_rate": 1.0848557079666755e-07, "loss": 0.1622, "step": 32669 }, { "epoch": 0.94, "grad_norm": 4.520025201575316, "learning_rate": 1.0838951045617551e-07, "loss": 0.2564, "step": 32670 }, { "epoch": 0.94, "grad_norm": 3.7178134700622856, "learning_rate": 1.0829349219760699e-07, "loss": 0.2631, "step": 32671 }, { "epoch": 0.94, "grad_norm": 4.243916505179236, "learning_rate": 1.0819751602178741e-07, "loss": 0.3868, "step": 32672 }, { "epoch": 0.94, "grad_norm": 9.352257061423426, "learning_rate": 1.0810158192954223e-07, "loss": 0.6794, "step": 32673 }, { "epoch": 0.94, "grad_norm": 10.039559128644973, "learning_rate": 1.0800568992169635e-07, "loss": 0.8515, "step": 32674 }, { "epoch": 0.94, "grad_norm": 4.002301507682603, "learning_rate": 1.0790983999907579e-07, "loss": 0.2871, "step": 32675 }, { "epoch": 0.94, "grad_norm": 5.688876645667756, "learning_rate": 1.0781403216250375e-07, "loss": 0.3033, "step": 32676 }, { "epoch": 0.94, "grad_norm": 6.0728070645614, "learning_rate": 1.0771826641280514e-07, "loss": 0.4233, "step": 32677 }, { "epoch": 0.94, "grad_norm": 8.495420175795772, "learning_rate": 1.0762254275080542e-07, "loss": 0.8951, "step": 32678 }, { "epoch": 0.94, "grad_norm": 4.069618971419841, "learning_rate": 1.0752686117732559e-07, "loss": 0.4745, "step": 32679 }, { "epoch": 0.94, "grad_norm": 6.105630688219001, "learning_rate": 1.0743122169318998e-07, "loss": 0.3379, "step": 32680 }, { "epoch": 0.94, "grad_norm": 4.16993914205295, "learning_rate": 1.0733562429922073e-07, "loss": 0.271, "step": 32681 }, { "epoch": 0.94, "grad_norm": 4.595570754881879, "learning_rate": 1.0724006899624106e-07, "loss": 0.3587, "step": 32682 }, { "epoch": 0.94, "grad_norm": 4.998537183401403, "learning_rate": 1.0714455578507255e-07, "loss": 0.5856, "step": 32683 }, { "epoch": 0.94, "grad_norm": 2.499501154721339, "learning_rate": 1.0704908466653729e-07, "loss": 0.1898, "step": 32684 }, { "epoch": 0.94, "grad_norm": 9.787771080423688, "learning_rate": 1.0695365564145632e-07, "loss": 0.6612, "step": 32685 }, { "epoch": 0.94, "grad_norm": 3.7833866356499244, "learning_rate": 1.0685826871065008e-07, "loss": 0.4241, "step": 32686 }, { "epoch": 0.94, "grad_norm": 3.9126242852503905, "learning_rate": 1.0676292387494014e-07, "loss": 0.2314, "step": 32687 }, { "epoch": 0.94, "grad_norm": 8.029808271264047, "learning_rate": 1.0666762113514584e-07, "loss": 0.5357, "step": 32688 }, { "epoch": 0.94, "grad_norm": 8.239230553895142, "learning_rate": 1.0657236049208819e-07, "loss": 0.4295, "step": 32689 }, { "epoch": 0.94, "grad_norm": 4.113086315437414, "learning_rate": 1.0647714194658542e-07, "loss": 0.2976, "step": 32690 }, { "epoch": 0.94, "grad_norm": 6.293774241274807, "learning_rate": 1.06381965499458e-07, "loss": 0.638, "step": 32691 }, { "epoch": 0.94, "grad_norm": 4.033743301683934, "learning_rate": 1.062868311515236e-07, "loss": 0.2627, "step": 32692 }, { "epoch": 0.94, "grad_norm": 5.158693139382399, "learning_rate": 1.0619173890360102e-07, "loss": 0.2992, "step": 32693 }, { "epoch": 0.94, "grad_norm": 5.059483347551989, "learning_rate": 1.0609668875650847e-07, "loss": 0.6857, "step": 32694 }, { "epoch": 0.94, "grad_norm": 4.6996656095983065, "learning_rate": 1.0600168071106309e-07, "loss": 0.4522, "step": 32695 }, { "epoch": 0.94, "grad_norm": 4.46531313000032, "learning_rate": 1.0590671476808312e-07, "loss": 0.3462, "step": 32696 }, { "epoch": 0.94, "grad_norm": 8.706104254889041, "learning_rate": 1.0581179092838512e-07, "loss": 0.6717, "step": 32697 }, { "epoch": 0.94, "grad_norm": 9.560592286544667, "learning_rate": 1.0571690919278566e-07, "loss": 0.5859, "step": 32698 }, { "epoch": 0.94, "grad_norm": 5.240125179397144, "learning_rate": 1.056220695621013e-07, "loss": 0.3256, "step": 32699 }, { "epoch": 0.94, "grad_norm": 3.4842740715702387, "learning_rate": 1.0552727203714697e-07, "loss": 0.2235, "step": 32700 }, { "epoch": 0.94, "grad_norm": 3.5430975220819745, "learning_rate": 1.0543251661873976e-07, "loss": 0.1226, "step": 32701 }, { "epoch": 0.94, "grad_norm": 5.941694203759255, "learning_rate": 1.0533780330769294e-07, "loss": 0.4708, "step": 32702 }, { "epoch": 0.94, "grad_norm": 5.120320905597622, "learning_rate": 1.0524313210482307e-07, "loss": 0.4991, "step": 32703 }, { "epoch": 0.94, "grad_norm": 5.420315389008356, "learning_rate": 1.0514850301094337e-07, "loss": 0.3046, "step": 32704 }, { "epoch": 0.94, "grad_norm": 5.076667318204227, "learning_rate": 1.0505391602686932e-07, "loss": 0.5773, "step": 32705 }, { "epoch": 0.94, "grad_norm": 3.519968376153078, "learning_rate": 1.0495937115341304e-07, "loss": 0.1839, "step": 32706 }, { "epoch": 0.94, "grad_norm": 5.777084582063045, "learning_rate": 1.0486486839138943e-07, "loss": 0.3996, "step": 32707 }, { "epoch": 0.94, "grad_norm": 5.334559048928575, "learning_rate": 1.0477040774160952e-07, "loss": 0.682, "step": 32708 }, { "epoch": 0.94, "grad_norm": 4.160748850563687, "learning_rate": 1.0467598920488708e-07, "loss": 0.3303, "step": 32709 }, { "epoch": 0.94, "grad_norm": 5.596949445526502, "learning_rate": 1.0458161278203538e-07, "loss": 0.3335, "step": 32710 }, { "epoch": 0.94, "grad_norm": 5.676242208435585, "learning_rate": 1.0448727847386486e-07, "loss": 0.5397, "step": 32711 }, { "epoch": 0.94, "grad_norm": 4.780839092075408, "learning_rate": 1.0439298628118766e-07, "loss": 0.1714, "step": 32712 }, { "epoch": 0.94, "grad_norm": 7.044319298390036, "learning_rate": 1.0429873620481423e-07, "loss": 0.3443, "step": 32713 }, { "epoch": 0.94, "grad_norm": 3.694297215548457, "learning_rate": 1.0420452824555671e-07, "loss": 0.4341, "step": 32714 }, { "epoch": 0.94, "grad_norm": 4.368040189259476, "learning_rate": 1.0411036240422445e-07, "loss": 0.3903, "step": 32715 }, { "epoch": 0.94, "grad_norm": 3.4670549495436633, "learning_rate": 1.0401623868162791e-07, "loss": 0.1426, "step": 32716 }, { "epoch": 0.94, "grad_norm": 5.804286034153725, "learning_rate": 1.03922157078577e-07, "loss": 0.5204, "step": 32717 }, { "epoch": 0.94, "grad_norm": 5.378802218847498, "learning_rate": 1.0382811759588107e-07, "loss": 0.3152, "step": 32718 }, { "epoch": 0.94, "grad_norm": 4.274913749047821, "learning_rate": 1.0373412023434893e-07, "loss": 0.6356, "step": 32719 }, { "epoch": 0.94, "grad_norm": 3.7688629664860653, "learning_rate": 1.036401649947888e-07, "loss": 0.3156, "step": 32720 }, { "epoch": 0.94, "grad_norm": 4.101297191940156, "learning_rate": 1.0354625187801004e-07, "loss": 0.3592, "step": 32721 }, { "epoch": 0.94, "grad_norm": 2.4585521399919767, "learning_rate": 1.034523808848198e-07, "loss": 0.1346, "step": 32722 }, { "epoch": 0.94, "grad_norm": 6.206595422941597, "learning_rate": 1.033585520160263e-07, "loss": 0.2671, "step": 32723 }, { "epoch": 0.94, "grad_norm": 5.59078295566122, "learning_rate": 1.0326476527243556e-07, "loss": 0.1892, "step": 32724 }, { "epoch": 0.94, "grad_norm": 6.897694393237851, "learning_rate": 1.0317102065485585e-07, "loss": 0.4562, "step": 32725 }, { "epoch": 0.94, "grad_norm": 5.966560081865451, "learning_rate": 1.0307731816409262e-07, "loss": 0.3036, "step": 32726 }, { "epoch": 0.94, "grad_norm": 4.2713762574015535, "learning_rate": 1.0298365780095187e-07, "loss": 0.3134, "step": 32727 }, { "epoch": 0.94, "grad_norm": 3.6554398739565546, "learning_rate": 1.0289003956624077e-07, "loss": 0.4817, "step": 32728 }, { "epoch": 0.94, "grad_norm": 4.318047645528611, "learning_rate": 1.0279646346076255e-07, "loss": 0.1558, "step": 32729 }, { "epoch": 0.94, "grad_norm": 6.190443437004076, "learning_rate": 1.0270292948532435e-07, "loss": 0.4829, "step": 32730 }, { "epoch": 0.94, "grad_norm": 2.382945773040372, "learning_rate": 1.0260943764072939e-07, "loss": 0.0737, "step": 32731 }, { "epoch": 0.94, "grad_norm": 2.9894730927525615, "learning_rate": 1.0251598792778262e-07, "loss": 0.2719, "step": 32732 }, { "epoch": 0.94, "grad_norm": 4.827857593427267, "learning_rate": 1.024225803472878e-07, "loss": 0.2657, "step": 32733 }, { "epoch": 0.94, "grad_norm": 2.325792026975328, "learning_rate": 1.0232921490004821e-07, "loss": 0.2973, "step": 32734 }, { "epoch": 0.94, "grad_norm": 4.249858573355609, "learning_rate": 1.0223589158686764e-07, "loss": 0.2814, "step": 32735 }, { "epoch": 0.94, "grad_norm": 3.4712093673157667, "learning_rate": 1.0214261040854878e-07, "loss": 0.2034, "step": 32736 }, { "epoch": 0.94, "grad_norm": 16.12434758485538, "learning_rate": 1.0204937136589377e-07, "loss": 0.7073, "step": 32737 }, { "epoch": 0.94, "grad_norm": 3.003145694562125, "learning_rate": 1.019561744597053e-07, "loss": 0.2904, "step": 32738 }, { "epoch": 0.94, "grad_norm": 5.100231096230329, "learning_rate": 1.0186301969078494e-07, "loss": 0.2805, "step": 32739 }, { "epoch": 0.94, "grad_norm": 5.793808529588016, "learning_rate": 1.0176990705993318e-07, "loss": 0.5931, "step": 32740 }, { "epoch": 0.94, "grad_norm": 2.91413153190766, "learning_rate": 1.0167683656795213e-07, "loss": 0.5529, "step": 32741 }, { "epoch": 0.94, "grad_norm": 2.427020636282187, "learning_rate": 1.0158380821564229e-07, "loss": 0.1426, "step": 32742 }, { "epoch": 0.94, "grad_norm": 5.982542628938624, "learning_rate": 1.0149082200380356e-07, "loss": 0.6204, "step": 32743 }, { "epoch": 0.94, "grad_norm": 12.07153981706529, "learning_rate": 1.0139787793323641e-07, "loss": 0.456, "step": 32744 }, { "epoch": 0.94, "grad_norm": 5.61489940407133, "learning_rate": 1.0130497600474076e-07, "loss": 0.3875, "step": 32745 }, { "epoch": 0.94, "grad_norm": 3.5945767156639454, "learning_rate": 1.0121211621911487e-07, "loss": 0.3355, "step": 32746 }, { "epoch": 0.94, "grad_norm": 9.6292900829145, "learning_rate": 1.0111929857715752e-07, "loss": 0.4444, "step": 32747 }, { "epoch": 0.94, "grad_norm": 5.134151522137265, "learning_rate": 1.0102652307966809e-07, "loss": 0.5365, "step": 32748 }, { "epoch": 0.94, "grad_norm": 8.887414443064309, "learning_rate": 1.0093378972744373e-07, "loss": 0.7495, "step": 32749 }, { "epoch": 0.94, "grad_norm": 5.560284719929998, "learning_rate": 1.0084109852128321e-07, "loss": 0.3878, "step": 32750 }, { "epoch": 0.94, "grad_norm": 6.3840436740220285, "learning_rate": 1.007484494619837e-07, "loss": 0.5302, "step": 32751 }, { "epoch": 0.94, "grad_norm": 7.264500487890548, "learning_rate": 1.0065584255034177e-07, "loss": 0.2958, "step": 32752 }, { "epoch": 0.94, "grad_norm": 5.2454313426663814, "learning_rate": 1.0056327778715458e-07, "loss": 0.3974, "step": 32753 }, { "epoch": 0.94, "grad_norm": 4.8498958605961136, "learning_rate": 1.0047075517321814e-07, "loss": 0.2622, "step": 32754 }, { "epoch": 0.94, "grad_norm": 6.4600697756917, "learning_rate": 1.0037827470932904e-07, "loss": 0.2175, "step": 32755 }, { "epoch": 0.94, "grad_norm": 5.179739789281695, "learning_rate": 1.0028583639628164e-07, "loss": 0.4004, "step": 32756 }, { "epoch": 0.94, "grad_norm": 9.510200545385578, "learning_rate": 1.0019344023487255e-07, "loss": 0.5104, "step": 32757 }, { "epoch": 0.94, "grad_norm": 6.558092435543085, "learning_rate": 1.0010108622589553e-07, "loss": 0.6249, "step": 32758 }, { "epoch": 0.94, "grad_norm": 7.299075789278018, "learning_rate": 1.0000877437014556e-07, "loss": 0.6458, "step": 32759 }, { "epoch": 0.94, "grad_norm": 5.1100702308561505, "learning_rate": 9.991650466841752e-08, "loss": 0.6612, "step": 32760 }, { "epoch": 0.94, "grad_norm": 6.55975266124067, "learning_rate": 9.982427712150356e-08, "loss": 0.3131, "step": 32761 }, { "epoch": 0.94, "grad_norm": 8.221951807432507, "learning_rate": 9.97320917301986e-08, "loss": 0.8031, "step": 32762 }, { "epoch": 0.94, "grad_norm": 4.46185165097793, "learning_rate": 9.96399484952948e-08, "loss": 0.4345, "step": 32763 }, { "epoch": 0.94, "grad_norm": 3.751995954214489, "learning_rate": 9.954784741758539e-08, "loss": 0.3761, "step": 32764 }, { "epoch": 0.94, "grad_norm": 7.706316118660559, "learning_rate": 9.945578849786197e-08, "loss": 0.5394, "step": 32765 }, { "epoch": 0.94, "grad_norm": 6.925019731734497, "learning_rate": 9.93637717369178e-08, "loss": 0.5485, "step": 32766 }, { "epoch": 0.94, "grad_norm": 20.504344840290788, "learning_rate": 9.927179713554391e-08, "loss": 0.3774, "step": 32767 }, { "epoch": 0.94, "grad_norm": 5.15243870298451, "learning_rate": 9.917986469453023e-08, "loss": 0.5586, "step": 32768 }, { "epoch": 0.94, "grad_norm": 2.837242257179843, "learning_rate": 9.908797441466944e-08, "loss": 0.407, "step": 32769 }, { "epoch": 0.94, "grad_norm": 5.66040118564833, "learning_rate": 9.899612629675148e-08, "loss": 0.4164, "step": 32770 }, { "epoch": 0.94, "grad_norm": 4.86302818038215, "learning_rate": 9.890432034156627e-08, "loss": 0.4178, "step": 32771 }, { "epoch": 0.94, "grad_norm": 6.1779671648362555, "learning_rate": 9.881255654990373e-08, "loss": 0.3932, "step": 32772 }, { "epoch": 0.94, "grad_norm": 2.820030790086489, "learning_rate": 9.87208349225538e-08, "loss": 0.3253, "step": 32773 }, { "epoch": 0.94, "grad_norm": 6.5719435546317735, "learning_rate": 9.862915546030416e-08, "loss": 0.7463, "step": 32774 }, { "epoch": 0.94, "grad_norm": 3.1276031904936366, "learning_rate": 9.853751816394474e-08, "loss": 0.2274, "step": 32775 }, { "epoch": 0.94, "grad_norm": 3.2833824494523856, "learning_rate": 9.844592303426381e-08, "loss": 0.266, "step": 32776 }, { "epoch": 0.94, "grad_norm": 9.180882256857574, "learning_rate": 9.835437007204906e-08, "loss": 0.505, "step": 32777 }, { "epoch": 0.94, "grad_norm": 4.609252824618902, "learning_rate": 9.826285927808821e-08, "loss": 0.4377, "step": 32778 }, { "epoch": 0.94, "grad_norm": 6.947117436855197, "learning_rate": 9.817139065316839e-08, "loss": 0.6063, "step": 32779 }, { "epoch": 0.94, "grad_norm": 5.073394215043472, "learning_rate": 9.807996419807675e-08, "loss": 0.4336, "step": 32780 }, { "epoch": 0.94, "grad_norm": 5.696464202349175, "learning_rate": 9.798857991359933e-08, "loss": 0.1891, "step": 32781 }, { "epoch": 0.94, "grad_norm": 7.801162672197391, "learning_rate": 9.789723780052273e-08, "loss": 0.7345, "step": 32782 }, { "epoch": 0.94, "grad_norm": 3.0956574927014344, "learning_rate": 9.780593785963244e-08, "loss": 0.0837, "step": 32783 }, { "epoch": 0.94, "grad_norm": 4.6893021234734285, "learning_rate": 9.771468009171392e-08, "loss": 0.2919, "step": 32784 }, { "epoch": 0.94, "grad_norm": 7.224114707642286, "learning_rate": 9.762346449755266e-08, "loss": 0.7463, "step": 32785 }, { "epoch": 0.94, "grad_norm": 2.97594828037984, "learning_rate": 9.753229107793361e-08, "loss": 0.2393, "step": 32786 }, { "epoch": 0.94, "grad_norm": 3.192030323881224, "learning_rate": 9.744115983364055e-08, "loss": 0.2977, "step": 32787 }, { "epoch": 0.94, "grad_norm": 6.903875987215485, "learning_rate": 9.735007076545678e-08, "loss": 0.2482, "step": 32788 }, { "epoch": 0.94, "grad_norm": 11.688054239931828, "learning_rate": 9.725902387416719e-08, "loss": 0.7857, "step": 32789 }, { "epoch": 0.94, "grad_norm": 7.065967113879391, "learning_rate": 9.716801916055452e-08, "loss": 0.5763, "step": 32790 }, { "epoch": 0.94, "grad_norm": 3.183906661493803, "learning_rate": 9.707705662540145e-08, "loss": 0.267, "step": 32791 }, { "epoch": 0.94, "grad_norm": 6.01327881081358, "learning_rate": 9.698613626949183e-08, "loss": 0.3176, "step": 32792 }, { "epoch": 0.94, "grad_norm": 6.810104228831152, "learning_rate": 9.689525809360612e-08, "loss": 0.8712, "step": 32793 }, { "epoch": 0.94, "grad_norm": 4.497346466565138, "learning_rate": 9.680442209852702e-08, "loss": 0.4052, "step": 32794 }, { "epoch": 0.94, "grad_norm": 5.560194330592909, "learning_rate": 9.671362828503506e-08, "loss": 0.444, "step": 32795 }, { "epoch": 0.94, "grad_norm": 3.736205733628246, "learning_rate": 9.66228766539129e-08, "loss": 0.115, "step": 32796 }, { "epoch": 0.94, "grad_norm": 4.856627199505266, "learning_rate": 9.653216720593995e-08, "loss": 0.2645, "step": 32797 }, { "epoch": 0.94, "grad_norm": 8.198660894701561, "learning_rate": 9.644149994189722e-08, "loss": 0.712, "step": 32798 }, { "epoch": 0.94, "grad_norm": 6.1247260363285285, "learning_rate": 9.635087486256467e-08, "loss": 0.8276, "step": 32799 }, { "epoch": 0.94, "grad_norm": 6.9860404842928086, "learning_rate": 9.626029196872167e-08, "loss": 0.7135, "step": 32800 }, { "epoch": 0.94, "grad_norm": 5.061166658363029, "learning_rate": 9.616975126114758e-08, "loss": 0.6139, "step": 32801 }, { "epoch": 0.94, "grad_norm": 7.912136163527302, "learning_rate": 9.607925274062124e-08, "loss": 0.915, "step": 32802 }, { "epoch": 0.94, "grad_norm": 6.705598317076413, "learning_rate": 9.598879640792147e-08, "loss": 0.6462, "step": 32803 }, { "epoch": 0.94, "grad_norm": 6.39195349578985, "learning_rate": 9.589838226382597e-08, "loss": 0.7603, "step": 32804 }, { "epoch": 0.94, "grad_norm": 7.279221935823004, "learning_rate": 9.580801030911302e-08, "loss": 0.7455, "step": 32805 }, { "epoch": 0.94, "grad_norm": 9.57983870989703, "learning_rate": 9.571768054456032e-08, "loss": 0.7102, "step": 32806 }, { "epoch": 0.94, "grad_norm": 5.223584659658993, "learning_rate": 9.562739297094392e-08, "loss": 0.4847, "step": 32807 }, { "epoch": 0.94, "grad_norm": 5.02739444638932, "learning_rate": 9.553714758904208e-08, "loss": 0.3992, "step": 32808 }, { "epoch": 0.94, "grad_norm": 4.097601323809665, "learning_rate": 9.544694439962921e-08, "loss": 0.2823, "step": 32809 }, { "epoch": 0.94, "grad_norm": 15.295253804131539, "learning_rate": 9.535678340348353e-08, "loss": 0.3258, "step": 32810 }, { "epoch": 0.94, "grad_norm": 4.613461742051904, "learning_rate": 9.526666460137835e-08, "loss": 0.2603, "step": 32811 }, { "epoch": 0.94, "grad_norm": 6.151357110247342, "learning_rate": 9.517658799409136e-08, "loss": 0.4381, "step": 32812 }, { "epoch": 0.94, "grad_norm": 4.173203617513473, "learning_rate": 9.508655358239582e-08, "loss": 0.3983, "step": 32813 }, { "epoch": 0.94, "grad_norm": 8.2060851650723, "learning_rate": 9.49965613670667e-08, "loss": 0.408, "step": 32814 }, { "epoch": 0.94, "grad_norm": 6.312850139840515, "learning_rate": 9.490661134887835e-08, "loss": 0.3657, "step": 32815 }, { "epoch": 0.94, "grad_norm": 4.029251993348178, "learning_rate": 9.481670352860406e-08, "loss": 0.4893, "step": 32816 }, { "epoch": 0.94, "grad_norm": 6.693305445849821, "learning_rate": 9.47268379070182e-08, "loss": 0.5465, "step": 32817 }, { "epoch": 0.94, "grad_norm": 2.911672844715548, "learning_rate": 9.463701448489293e-08, "loss": 0.3073, "step": 32818 }, { "epoch": 0.94, "grad_norm": 4.529595118906672, "learning_rate": 9.454723326300208e-08, "loss": 0.4808, "step": 32819 }, { "epoch": 0.94, "grad_norm": 6.492800870672819, "learning_rate": 9.445749424211725e-08, "loss": 0.2885, "step": 32820 }, { "epoch": 0.94, "grad_norm": 5.634697079034981, "learning_rate": 9.436779742301062e-08, "loss": 0.8411, "step": 32821 }, { "epoch": 0.94, "grad_norm": 3.0362231663522885, "learning_rate": 9.427814280645376e-08, "loss": 0.1624, "step": 32822 }, { "epoch": 0.94, "grad_norm": 2.8158728614291326, "learning_rate": 9.418853039321773e-08, "loss": 0.1385, "step": 32823 }, { "epoch": 0.94, "grad_norm": 3.3471212771975862, "learning_rate": 9.409896018407416e-08, "loss": 0.2418, "step": 32824 }, { "epoch": 0.94, "grad_norm": 3.344357551211238, "learning_rate": 9.400943217979241e-08, "loss": 0.1802, "step": 32825 }, { "epoch": 0.94, "grad_norm": 8.458536104925608, "learning_rate": 9.391994638114466e-08, "loss": 0.6827, "step": 32826 }, { "epoch": 0.94, "grad_norm": 2.212969543719767, "learning_rate": 9.383050278889915e-08, "loss": 0.1697, "step": 32827 }, { "epoch": 0.94, "grad_norm": 3.1349872451352403, "learning_rate": 9.374110140382586e-08, "loss": 0.1829, "step": 32828 }, { "epoch": 0.94, "grad_norm": 6.237369477888305, "learning_rate": 9.365174222669304e-08, "loss": 0.4442, "step": 32829 }, { "epoch": 0.94, "grad_norm": 3.1194615303500868, "learning_rate": 9.356242525827119e-08, "loss": 0.2506, "step": 32830 }, { "epoch": 0.94, "grad_norm": 4.572418093023344, "learning_rate": 9.347315049932692e-08, "loss": 0.4996, "step": 32831 }, { "epoch": 0.94, "grad_norm": 4.8509008721230265, "learning_rate": 9.338391795062906e-08, "loss": 0.4897, "step": 32832 }, { "epoch": 0.94, "grad_norm": 8.50013188652493, "learning_rate": 9.32947276129459e-08, "loss": 0.54, "step": 32833 }, { "epoch": 0.94, "grad_norm": 5.058117794784491, "learning_rate": 9.320557948704346e-08, "loss": 0.2994, "step": 32834 }, { "epoch": 0.94, "grad_norm": 5.702789714833979, "learning_rate": 9.311647357368947e-08, "loss": 0.5276, "step": 32835 }, { "epoch": 0.94, "grad_norm": 3.355348018654716, "learning_rate": 9.302740987365e-08, "loss": 0.4882, "step": 32836 }, { "epoch": 0.94, "grad_norm": 5.26822804029688, "learning_rate": 9.293838838769221e-08, "loss": 0.5083, "step": 32837 }, { "epoch": 0.94, "grad_norm": 4.361196978075341, "learning_rate": 9.284940911658102e-08, "loss": 0.121, "step": 32838 }, { "epoch": 0.94, "grad_norm": 3.2474943552415994, "learning_rate": 9.276047206108196e-08, "loss": 0.2091, "step": 32839 }, { "epoch": 0.94, "grad_norm": 2.7391091046282723, "learning_rate": 9.267157722196108e-08, "loss": 0.2259, "step": 32840 }, { "epoch": 0.94, "grad_norm": 2.8749110373955116, "learning_rate": 9.258272459998108e-08, "loss": 0.425, "step": 32841 }, { "epoch": 0.94, "grad_norm": 3.1390351403934815, "learning_rate": 9.249391419590914e-08, "loss": 0.4508, "step": 32842 }, { "epoch": 0.94, "grad_norm": 5.1645452876790765, "learning_rate": 9.240514601050687e-08, "loss": 0.4859, "step": 32843 }, { "epoch": 0.94, "grad_norm": 3.4610181501993336, "learning_rate": 9.231642004453922e-08, "loss": 0.3145, "step": 32844 }, { "epoch": 0.94, "grad_norm": 4.505819637154774, "learning_rate": 9.222773629876891e-08, "loss": 0.4762, "step": 32845 }, { "epoch": 0.94, "grad_norm": 3.851823306587115, "learning_rate": 9.213909477395922e-08, "loss": 0.1801, "step": 32846 }, { "epoch": 0.94, "grad_norm": 6.945510638299635, "learning_rate": 9.205049547087286e-08, "loss": 0.6441, "step": 32847 }, { "epoch": 0.94, "grad_norm": 5.802672558856146, "learning_rate": 9.196193839027145e-08, "loss": 0.6903, "step": 32848 }, { "epoch": 0.94, "grad_norm": 5.5305245996019, "learning_rate": 9.187342353291773e-08, "loss": 0.2555, "step": 32849 }, { "epoch": 0.94, "grad_norm": 3.846998330811272, "learning_rate": 9.178495089957218e-08, "loss": 0.1869, "step": 32850 }, { "epoch": 0.94, "grad_norm": 2.513824914691375, "learning_rate": 9.169652049099642e-08, "loss": 0.0686, "step": 32851 }, { "epoch": 0.94, "grad_norm": 2.4422746746493136, "learning_rate": 9.160813230795152e-08, "loss": 0.2673, "step": 32852 }, { "epoch": 0.94, "grad_norm": 3.5749192182043794, "learning_rate": 9.15197863511974e-08, "loss": 0.5141, "step": 32853 }, { "epoch": 0.94, "grad_norm": 6.363280182165685, "learning_rate": 9.143148262149404e-08, "loss": 0.557, "step": 32854 }, { "epoch": 0.94, "grad_norm": 5.541131670183431, "learning_rate": 9.134322111960136e-08, "loss": 0.442, "step": 32855 }, { "epoch": 0.94, "grad_norm": 5.9376973972634985, "learning_rate": 9.125500184627822e-08, "loss": 0.5406, "step": 32856 }, { "epoch": 0.94, "grad_norm": 4.305429910378352, "learning_rate": 9.1166824802284e-08, "loss": 0.3925, "step": 32857 }, { "epoch": 0.94, "grad_norm": 12.915013882300595, "learning_rate": 9.10786899883781e-08, "loss": 0.5107, "step": 32858 }, { "epoch": 0.94, "grad_norm": 6.5525330923351675, "learning_rate": 9.099059740531713e-08, "loss": 0.6795, "step": 32859 }, { "epoch": 0.94, "grad_norm": 3.2421381935139157, "learning_rate": 9.090254705385993e-08, "loss": 0.3336, "step": 32860 }, { "epoch": 0.94, "grad_norm": 3.8902546043724207, "learning_rate": 9.08145389347631e-08, "loss": 0.2929, "step": 32861 }, { "epoch": 0.94, "grad_norm": 3.7022167170332803, "learning_rate": 9.072657304878496e-08, "loss": 0.3424, "step": 32862 }, { "epoch": 0.94, "grad_norm": 6.699515164326613, "learning_rate": 9.063864939668099e-08, "loss": 0.2628, "step": 32863 }, { "epoch": 0.94, "grad_norm": 6.737749320886189, "learning_rate": 9.055076797920892e-08, "loss": 0.5714, "step": 32864 }, { "epoch": 0.94, "grad_norm": 7.224501395262946, "learning_rate": 9.046292879712371e-08, "loss": 0.6341, "step": 32865 }, { "epoch": 0.94, "grad_norm": 2.1726732227988914, "learning_rate": 9.037513185118141e-08, "loss": 0.1713, "step": 32866 }, { "epoch": 0.94, "grad_norm": 4.20020965949781, "learning_rate": 9.02873771421381e-08, "loss": 0.1737, "step": 32867 }, { "epoch": 0.94, "grad_norm": 4.736386567374615, "learning_rate": 9.019966467074703e-08, "loss": 0.3706, "step": 32868 }, { "epoch": 0.94, "grad_norm": 2.5192399913789285, "learning_rate": 9.011199443776431e-08, "loss": 0.1857, "step": 32869 }, { "epoch": 0.94, "grad_norm": 5.168872065181, "learning_rate": 9.002436644394319e-08, "loss": 0.3503, "step": 32870 }, { "epoch": 0.94, "grad_norm": 10.884129254931766, "learning_rate": 8.993678069003864e-08, "loss": 0.6423, "step": 32871 }, { "epoch": 0.94, "grad_norm": 5.068603839013419, "learning_rate": 8.984923717680227e-08, "loss": 0.5549, "step": 32872 }, { "epoch": 0.94, "grad_norm": 3.7469386002310427, "learning_rate": 8.97617359049896e-08, "loss": 0.364, "step": 32873 }, { "epoch": 0.94, "grad_norm": 7.320903807119759, "learning_rate": 8.967427687535169e-08, "loss": 0.4483, "step": 32874 }, { "epoch": 0.94, "grad_norm": 12.05142731503575, "learning_rate": 8.95868600886407e-08, "loss": 0.3675, "step": 32875 }, { "epoch": 0.94, "grad_norm": 8.786713661836128, "learning_rate": 8.94994855456105e-08, "loss": 0.5105, "step": 32876 }, { "epoch": 0.94, "grad_norm": 4.252885385976746, "learning_rate": 8.941215324701047e-08, "loss": 0.2848, "step": 32877 }, { "epoch": 0.94, "grad_norm": 3.3972820203073426, "learning_rate": 8.93248631935939e-08, "loss": 0.5298, "step": 32878 }, { "epoch": 0.94, "grad_norm": 3.111982373777304, "learning_rate": 8.923761538611076e-08, "loss": 0.3547, "step": 32879 }, { "epoch": 0.94, "grad_norm": 5.486816471429485, "learning_rate": 8.915040982531153e-08, "loss": 0.2943, "step": 32880 }, { "epoch": 0.94, "grad_norm": 4.6647371094971914, "learning_rate": 8.906324651194731e-08, "loss": 0.4176, "step": 32881 }, { "epoch": 0.94, "grad_norm": 8.414780723357703, "learning_rate": 8.897612544676637e-08, "loss": 0.519, "step": 32882 }, { "epoch": 0.94, "grad_norm": 6.213407589109877, "learning_rate": 8.888904663051978e-08, "loss": 0.3796, "step": 32883 }, { "epoch": 0.94, "grad_norm": 2.5595953458735647, "learning_rate": 8.880201006395583e-08, "loss": 0.1498, "step": 32884 }, { "epoch": 0.94, "grad_norm": 5.55256077739596, "learning_rate": 8.871501574782393e-08, "loss": 0.7596, "step": 32885 }, { "epoch": 0.94, "grad_norm": 2.456935741223934, "learning_rate": 8.86280636828718e-08, "loss": 0.1789, "step": 32886 }, { "epoch": 0.94, "grad_norm": 6.529729821368536, "learning_rate": 8.854115386984774e-08, "loss": 0.7269, "step": 32887 }, { "epoch": 0.94, "grad_norm": 4.239027247719957, "learning_rate": 8.845428630949948e-08, "loss": 0.3851, "step": 32888 }, { "epoch": 0.94, "grad_norm": 2.907232487640135, "learning_rate": 8.836746100257365e-08, "loss": 0.0907, "step": 32889 }, { "epoch": 0.94, "grad_norm": 2.933063667447836, "learning_rate": 8.828067794981854e-08, "loss": 0.0941, "step": 32890 }, { "epoch": 0.94, "grad_norm": 5.337604446639923, "learning_rate": 8.819393715197965e-08, "loss": 0.3059, "step": 32891 }, { "epoch": 0.94, "grad_norm": 5.0823175582752675, "learning_rate": 8.810723860980363e-08, "loss": 0.277, "step": 32892 }, { "epoch": 0.94, "grad_norm": 5.621997625382342, "learning_rate": 8.802058232403654e-08, "loss": 0.3088, "step": 32893 }, { "epoch": 0.94, "grad_norm": 2.241232279756941, "learning_rate": 8.793396829542389e-08, "loss": 0.1763, "step": 32894 }, { "epoch": 0.94, "grad_norm": 7.322960862034956, "learning_rate": 8.784739652470953e-08, "loss": 0.5221, "step": 32895 }, { "epoch": 0.94, "grad_norm": 6.926387925963123, "learning_rate": 8.776086701264008e-08, "loss": 0.5242, "step": 32896 }, { "epoch": 0.94, "grad_norm": 6.106902810129947, "learning_rate": 8.76743797599583e-08, "loss": 0.3636, "step": 32897 }, { "epoch": 0.94, "grad_norm": 6.374368000492337, "learning_rate": 8.758793476740913e-08, "loss": 0.5508, "step": 32898 }, { "epoch": 0.94, "grad_norm": 5.540232891186653, "learning_rate": 8.750153203573696e-08, "loss": 0.3732, "step": 32899 }, { "epoch": 0.94, "grad_norm": 5.373318653173405, "learning_rate": 8.741517156568402e-08, "loss": 0.5921, "step": 32900 }, { "epoch": 0.94, "grad_norm": 2.467386013787762, "learning_rate": 8.732885335799302e-08, "loss": 0.1062, "step": 32901 }, { "epoch": 0.94, "grad_norm": 4.456227235883055, "learning_rate": 8.724257741340725e-08, "loss": 0.3912, "step": 32902 }, { "epoch": 0.94, "grad_norm": 4.821015095011182, "learning_rate": 8.71563437326689e-08, "loss": 0.5977, "step": 32903 }, { "epoch": 0.94, "grad_norm": 7.830775803855362, "learning_rate": 8.707015231651905e-08, "loss": 0.9809, "step": 32904 }, { "epoch": 0.94, "grad_norm": 6.3727010059644, "learning_rate": 8.698400316570044e-08, "loss": 0.4071, "step": 32905 }, { "epoch": 0.94, "grad_norm": 6.413156759081024, "learning_rate": 8.689789628095301e-08, "loss": 0.2104, "step": 32906 }, { "epoch": 0.94, "grad_norm": 8.986050816025273, "learning_rate": 8.681183166301843e-08, "loss": 0.9104, "step": 32907 }, { "epoch": 0.94, "grad_norm": 5.835712678758668, "learning_rate": 8.672580931263663e-08, "loss": 0.7319, "step": 32908 }, { "epoch": 0.94, "grad_norm": 7.211659255850099, "learning_rate": 8.663982923054759e-08, "loss": 0.5135, "step": 32909 }, { "epoch": 0.94, "grad_norm": 3.6740322837096464, "learning_rate": 8.655389141749126e-08, "loss": 0.3591, "step": 32910 }, { "epoch": 0.94, "grad_norm": 7.4933044746574025, "learning_rate": 8.646799587420707e-08, "loss": 0.4091, "step": 32911 }, { "epoch": 0.94, "grad_norm": 3.1292932773446074, "learning_rate": 8.638214260143385e-08, "loss": 0.2975, "step": 32912 }, { "epoch": 0.94, "grad_norm": 8.633296680249895, "learning_rate": 8.629633159990991e-08, "loss": 0.7603, "step": 32913 }, { "epoch": 0.94, "grad_norm": 9.028380733471428, "learning_rate": 8.6210562870373e-08, "loss": 1.0426, "step": 32914 }, { "epoch": 0.94, "grad_norm": 4.610613433504844, "learning_rate": 8.612483641356251e-08, "loss": 0.3487, "step": 32915 }, { "epoch": 0.94, "grad_norm": 3.6001015304447317, "learning_rate": 8.603915223021453e-08, "loss": 0.4708, "step": 32916 }, { "epoch": 0.94, "grad_norm": 5.1254626739812945, "learning_rate": 8.59535103210668e-08, "loss": 0.3467, "step": 32917 }, { "epoch": 0.94, "grad_norm": 6.1912921112803385, "learning_rate": 8.586791068685596e-08, "loss": 0.2742, "step": 32918 }, { "epoch": 0.94, "grad_norm": 9.928541596064978, "learning_rate": 8.578235332831863e-08, "loss": 0.7424, "step": 32919 }, { "epoch": 0.94, "grad_norm": 4.731892655742388, "learning_rate": 8.569683824619035e-08, "loss": 0.1634, "step": 32920 }, { "epoch": 0.94, "grad_norm": 4.274488469801597, "learning_rate": 8.561136544120773e-08, "loss": 0.3438, "step": 32921 }, { "epoch": 0.94, "grad_norm": 6.00749167985852, "learning_rate": 8.552593491410466e-08, "loss": 0.3187, "step": 32922 }, { "epoch": 0.94, "grad_norm": 6.2184413660801265, "learning_rate": 8.544054666561719e-08, "loss": 0.5306, "step": 32923 }, { "epoch": 0.94, "grad_norm": 5.569006682537315, "learning_rate": 8.535520069647973e-08, "loss": 0.4453, "step": 32924 }, { "epoch": 0.94, "grad_norm": 6.382152285101176, "learning_rate": 8.526989700742616e-08, "loss": 0.3375, "step": 32925 }, { "epoch": 0.94, "grad_norm": 11.593248053118788, "learning_rate": 8.518463559919087e-08, "loss": 0.7201, "step": 32926 }, { "epoch": 0.94, "grad_norm": 4.096273678107882, "learning_rate": 8.509941647250664e-08, "loss": 0.4165, "step": 32927 }, { "epoch": 0.94, "grad_norm": 5.547311040368565, "learning_rate": 8.501423962810785e-08, "loss": 0.2691, "step": 32928 }, { "epoch": 0.94, "grad_norm": 6.518217892750709, "learning_rate": 8.492910506672502e-08, "loss": 0.261, "step": 32929 }, { "epoch": 0.94, "grad_norm": 5.663140836246662, "learning_rate": 8.48440127890926e-08, "loss": 0.2872, "step": 32930 }, { "epoch": 0.94, "grad_norm": 6.985554879225602, "learning_rate": 8.47589627959422e-08, "loss": 0.6176, "step": 32931 }, { "epoch": 0.94, "grad_norm": 2.836078629778375, "learning_rate": 8.467395508800491e-08, "loss": 0.2387, "step": 32932 }, { "epoch": 0.94, "grad_norm": 2.4140586976450384, "learning_rate": 8.458898966601292e-08, "loss": 0.1886, "step": 32933 }, { "epoch": 0.94, "grad_norm": 4.665927775958378, "learning_rate": 8.45040665306962e-08, "loss": 0.3315, "step": 32934 }, { "epoch": 0.94, "grad_norm": 3.760072326040628, "learning_rate": 8.44191856827864e-08, "loss": 0.1191, "step": 32935 }, { "epoch": 0.94, "grad_norm": 5.764305191121933, "learning_rate": 8.433434712301237e-08, "loss": 0.6339, "step": 32936 }, { "epoch": 0.94, "grad_norm": 2.3625405277832905, "learning_rate": 8.424955085210518e-08, "loss": 0.1479, "step": 32937 }, { "epoch": 0.94, "grad_norm": 3.3444261847613994, "learning_rate": 8.416479687079315e-08, "loss": 0.2452, "step": 32938 }, { "epoch": 0.94, "grad_norm": 3.6005589541320924, "learning_rate": 8.408008517980626e-08, "loss": 0.3865, "step": 32939 }, { "epoch": 0.94, "grad_norm": 5.1552894391149175, "learning_rate": 8.399541577987447e-08, "loss": 0.4357, "step": 32940 }, { "epoch": 0.94, "grad_norm": 6.112858396293205, "learning_rate": 8.391078867172331e-08, "loss": 0.3375, "step": 32941 }, { "epoch": 0.94, "grad_norm": 2.9458022987820867, "learning_rate": 8.382620385608332e-08, "loss": 0.1302, "step": 32942 }, { "epoch": 0.94, "grad_norm": 5.9508345795994115, "learning_rate": 8.374166133368055e-08, "loss": 0.386, "step": 32943 }, { "epoch": 0.94, "grad_norm": 4.689093382548422, "learning_rate": 8.365716110524336e-08, "loss": 0.5079, "step": 32944 }, { "epoch": 0.94, "grad_norm": 9.470662418025466, "learning_rate": 8.357270317149779e-08, "loss": 0.7108, "step": 32945 }, { "epoch": 0.94, "grad_norm": 10.065824089935587, "learning_rate": 8.348828753317162e-08, "loss": 0.5086, "step": 32946 }, { "epoch": 0.94, "grad_norm": 4.903573125974484, "learning_rate": 8.340391419098981e-08, "loss": 0.3035, "step": 32947 }, { "epoch": 0.94, "grad_norm": 9.94508034314536, "learning_rate": 8.3319583145679e-08, "loss": 0.6054, "step": 32948 }, { "epoch": 0.94, "grad_norm": 5.6844812703068595, "learning_rate": 8.323529439796474e-08, "loss": 0.6004, "step": 32949 }, { "epoch": 0.94, "grad_norm": 7.791270961675135, "learning_rate": 8.315104794857143e-08, "loss": 0.5417, "step": 32950 }, { "epoch": 0.94, "grad_norm": 4.255508583460889, "learning_rate": 8.30668437982246e-08, "loss": 0.4346, "step": 32951 }, { "epoch": 0.94, "grad_norm": 6.716912448573145, "learning_rate": 8.298268194764759e-08, "loss": 0.3461, "step": 32952 }, { "epoch": 0.94, "grad_norm": 4.475523024101643, "learning_rate": 8.289856239756588e-08, "loss": 0.2978, "step": 32953 }, { "epoch": 0.94, "grad_norm": 4.521668320409574, "learning_rate": 8.281448514870283e-08, "loss": 0.3366, "step": 32954 }, { "epoch": 0.94, "grad_norm": 6.203392580348305, "learning_rate": 8.27304502017806e-08, "loss": 0.3844, "step": 32955 }, { "epoch": 0.94, "grad_norm": 5.465297502899228, "learning_rate": 8.26464575575231e-08, "loss": 0.5252, "step": 32956 }, { "epoch": 0.94, "grad_norm": 3.6069246702289104, "learning_rate": 8.25625072166525e-08, "loss": 0.3017, "step": 32957 }, { "epoch": 0.94, "grad_norm": 5.529179460309005, "learning_rate": 8.247859917989154e-08, "loss": 0.44, "step": 32958 }, { "epoch": 0.94, "grad_norm": 4.828633871081847, "learning_rate": 8.239473344796134e-08, "loss": 0.1867, "step": 32959 }, { "epoch": 0.94, "grad_norm": 5.618810660572629, "learning_rate": 8.231091002158409e-08, "loss": 0.3843, "step": 32960 }, { "epoch": 0.94, "grad_norm": 6.928658358459767, "learning_rate": 8.222712890148032e-08, "loss": 0.6792, "step": 32961 }, { "epoch": 0.94, "grad_norm": 4.319230853084734, "learning_rate": 8.214339008837168e-08, "loss": 0.1885, "step": 32962 }, { "epoch": 0.94, "grad_norm": 2.821978217145631, "learning_rate": 8.205969358297705e-08, "loss": 0.2044, "step": 32963 }, { "epoch": 0.94, "grad_norm": 3.5455719827218095, "learning_rate": 8.197603938601806e-08, "loss": 0.2603, "step": 32964 }, { "epoch": 0.94, "grad_norm": 4.4762488982650135, "learning_rate": 8.189242749821357e-08, "loss": 0.4057, "step": 32965 }, { "epoch": 0.94, "grad_norm": 3.2767140598562787, "learning_rate": 8.180885792028248e-08, "loss": 0.3781, "step": 32966 }, { "epoch": 0.94, "grad_norm": 8.396891095820717, "learning_rate": 8.172533065294475e-08, "loss": 0.4986, "step": 32967 }, { "epoch": 0.94, "grad_norm": 7.930782569476145, "learning_rate": 8.164184569691869e-08, "loss": 0.5297, "step": 32968 }, { "epoch": 0.94, "grad_norm": 11.207257433071776, "learning_rate": 8.155840305292206e-08, "loss": 0.5929, "step": 32969 }, { "epoch": 0.94, "grad_norm": 6.218604618079211, "learning_rate": 8.147500272167264e-08, "loss": 0.4658, "step": 32970 }, { "epoch": 0.94, "grad_norm": 4.278234993389317, "learning_rate": 8.139164470388816e-08, "loss": 0.5539, "step": 32971 }, { "epoch": 0.94, "grad_norm": 6.442636385130192, "learning_rate": 8.130832900028585e-08, "loss": 0.3155, "step": 32972 }, { "epoch": 0.94, "grad_norm": 4.198362992494761, "learning_rate": 8.122505561158234e-08, "loss": 0.0367, "step": 32973 }, { "epoch": 0.94, "grad_norm": 4.713454603652155, "learning_rate": 8.114182453849428e-08, "loss": 0.2279, "step": 32974 }, { "epoch": 0.94, "grad_norm": 6.730154108717526, "learning_rate": 8.105863578173723e-08, "loss": 0.3431, "step": 32975 }, { "epoch": 0.94, "grad_norm": 5.367388770092965, "learning_rate": 8.097548934202781e-08, "loss": 0.4253, "step": 32976 }, { "epoch": 0.94, "grad_norm": 4.896562816842137, "learning_rate": 8.089238522007936e-08, "loss": 0.106, "step": 32977 }, { "epoch": 0.94, "grad_norm": 8.669930014850312, "learning_rate": 8.080932341660908e-08, "loss": 0.8349, "step": 32978 }, { "epoch": 0.94, "grad_norm": 2.509122654826484, "learning_rate": 8.072630393232973e-08, "loss": 0.171, "step": 32979 }, { "epoch": 0.94, "grad_norm": 7.674567035801545, "learning_rate": 8.064332676795739e-08, "loss": 0.5209, "step": 32980 }, { "epoch": 0.94, "grad_norm": 3.303001460685719, "learning_rate": 8.056039192420428e-08, "loss": 0.1842, "step": 32981 }, { "epoch": 0.94, "grad_norm": 6.895283307347155, "learning_rate": 8.047749940178374e-08, "loss": 0.5933, "step": 32982 }, { "epoch": 0.94, "grad_norm": 5.444712229498859, "learning_rate": 8.039464920141071e-08, "loss": 0.5051, "step": 32983 }, { "epoch": 0.94, "grad_norm": 3.9835359176025804, "learning_rate": 8.031184132379576e-08, "loss": 0.29, "step": 32984 }, { "epoch": 0.94, "grad_norm": 5.605650576523467, "learning_rate": 8.022907576965333e-08, "loss": 0.4288, "step": 32985 }, { "epoch": 0.94, "grad_norm": 5.626062631900539, "learning_rate": 8.014635253969338e-08, "loss": 0.472, "step": 32986 }, { "epoch": 0.94, "grad_norm": 5.891269759855878, "learning_rate": 8.006367163462925e-08, "loss": 0.3546, "step": 32987 }, { "epoch": 0.94, "grad_norm": 4.841348902024269, "learning_rate": 7.998103305517146e-08, "loss": 0.425, "step": 32988 }, { "epoch": 0.94, "grad_norm": 2.1703295114508268, "learning_rate": 7.989843680203113e-08, "loss": 0.2187, "step": 32989 }, { "epoch": 0.94, "grad_norm": 3.7150043248270768, "learning_rate": 7.98158828759188e-08, "loss": 0.2458, "step": 32990 }, { "epoch": 0.94, "grad_norm": 4.844311417068998, "learning_rate": 7.973337127754444e-08, "loss": 0.4159, "step": 32991 }, { "epoch": 0.94, "grad_norm": 6.324175094067397, "learning_rate": 7.965090200761805e-08, "loss": 0.8234, "step": 32992 }, { "epoch": 0.94, "grad_norm": 4.905991419884589, "learning_rate": 7.956847506684906e-08, "loss": 0.2879, "step": 32993 }, { "epoch": 0.94, "grad_norm": 5.507248263846131, "learning_rate": 7.948609045594746e-08, "loss": 0.3941, "step": 32994 }, { "epoch": 0.94, "grad_norm": 3.6144722949881887, "learning_rate": 7.940374817562046e-08, "loss": 0.264, "step": 32995 }, { "epoch": 0.94, "grad_norm": 5.214165872812884, "learning_rate": 7.93214482265775e-08, "loss": 0.4427, "step": 32996 }, { "epoch": 0.94, "grad_norm": 6.924414545575177, "learning_rate": 7.923919060952634e-08, "loss": 0.3454, "step": 32997 }, { "epoch": 0.94, "grad_norm": 5.181494810351164, "learning_rate": 7.915697532517363e-08, "loss": 0.31, "step": 32998 }, { "epoch": 0.95, "grad_norm": 8.255441431711684, "learning_rate": 7.907480237422883e-08, "loss": 0.6956, "step": 32999 }, { "epoch": 0.95, "grad_norm": 1.9122224721688905, "learning_rate": 7.899267175739689e-08, "loss": 0.1361, "step": 33000 }, { "epoch": 0.95, "grad_norm": 3.9737209517481293, "learning_rate": 7.89105834753856e-08, "loss": 0.4853, "step": 33001 }, { "epoch": 0.95, "grad_norm": 4.333962012805296, "learning_rate": 7.882853752889996e-08, "loss": 0.331, "step": 33002 }, { "epoch": 0.95, "grad_norm": 3.483925589416126, "learning_rate": 7.874653391864717e-08, "loss": 0.2531, "step": 33003 }, { "epoch": 0.95, "grad_norm": 5.492453187509478, "learning_rate": 7.866457264533167e-08, "loss": 0.41, "step": 33004 }, { "epoch": 0.95, "grad_norm": 7.379098658478973, "learning_rate": 7.858265370965901e-08, "loss": 0.2407, "step": 33005 }, { "epoch": 0.95, "grad_norm": 4.7532269408601096, "learning_rate": 7.850077711233361e-08, "loss": 0.6627, "step": 33006 }, { "epoch": 0.95, "grad_norm": 3.990480390182433, "learning_rate": 7.841894285406049e-08, "loss": 0.4521, "step": 33007 }, { "epoch": 0.95, "grad_norm": 6.6901445596697355, "learning_rate": 7.83371509355435e-08, "loss": 0.428, "step": 33008 }, { "epoch": 0.95, "grad_norm": 7.2316773824912035, "learning_rate": 7.825540135748544e-08, "loss": 0.5814, "step": 33009 }, { "epoch": 0.95, "grad_norm": 9.87515780165897, "learning_rate": 7.817369412059017e-08, "loss": 0.7391, "step": 33010 }, { "epoch": 0.95, "grad_norm": 4.477252740555722, "learning_rate": 7.809202922556103e-08, "loss": 0.3332, "step": 33011 }, { "epoch": 0.95, "grad_norm": 8.466243020920333, "learning_rate": 7.801040667309967e-08, "loss": 0.2601, "step": 33012 }, { "epoch": 0.95, "grad_norm": 2.9164567270108055, "learning_rate": 7.792882646390887e-08, "loss": 0.1786, "step": 33013 }, { "epoch": 0.95, "grad_norm": 2.713801693065969, "learning_rate": 7.784728859869084e-08, "loss": 0.2071, "step": 33014 }, { "epoch": 0.95, "grad_norm": 3.154257173554087, "learning_rate": 7.776579307814613e-08, "loss": 0.3322, "step": 33015 }, { "epoch": 0.95, "grad_norm": 4.493936374072992, "learning_rate": 7.768433990297641e-08, "loss": 0.6818, "step": 33016 }, { "epoch": 0.95, "grad_norm": 5.209025568418389, "learning_rate": 7.760292907388223e-08, "loss": 0.6883, "step": 33017 }, { "epoch": 0.95, "grad_norm": 9.380690818350864, "learning_rate": 7.752156059156358e-08, "loss": 0.2863, "step": 33018 }, { "epoch": 0.95, "grad_norm": 5.691418628770801, "learning_rate": 7.744023445672155e-08, "loss": 0.3071, "step": 33019 }, { "epoch": 0.95, "grad_norm": 5.7725320191154115, "learning_rate": 7.73589506700545e-08, "loss": 0.8107, "step": 33020 }, { "epoch": 0.95, "grad_norm": 17.343362195602342, "learning_rate": 7.727770923226296e-08, "loss": 0.267, "step": 33021 }, { "epoch": 0.95, "grad_norm": 4.7132172137411885, "learning_rate": 7.719651014404472e-08, "loss": 0.3015, "step": 33022 }, { "epoch": 0.95, "grad_norm": 6.679361078864091, "learning_rate": 7.711535340609865e-08, "loss": 0.1282, "step": 33023 }, { "epoch": 0.95, "grad_norm": 3.507115692053444, "learning_rate": 7.703423901912365e-08, "loss": 0.2371, "step": 33024 }, { "epoch": 0.95, "grad_norm": 6.184501152428911, "learning_rate": 7.695316698381639e-08, "loss": 0.6402, "step": 33025 }, { "epoch": 0.95, "grad_norm": 6.040503168431587, "learning_rate": 7.687213730087518e-08, "loss": 0.6692, "step": 33026 }, { "epoch": 0.95, "grad_norm": 4.909551469434578, "learning_rate": 7.679114997099668e-08, "loss": 0.3155, "step": 33027 }, { "epoch": 0.95, "grad_norm": 8.552840917550455, "learning_rate": 7.671020499487813e-08, "loss": 1.0761, "step": 33028 }, { "epoch": 0.95, "grad_norm": 5.192971733348363, "learning_rate": 7.662930237321509e-08, "loss": 0.4311, "step": 33029 }, { "epoch": 0.95, "grad_norm": 10.561454111719131, "learning_rate": 7.65484421067042e-08, "loss": 0.544, "step": 33030 }, { "epoch": 0.95, "grad_norm": 5.240107503144949, "learning_rate": 7.646762419604103e-08, "loss": 0.3012, "step": 33031 }, { "epoch": 0.95, "grad_norm": 7.2761745199545915, "learning_rate": 7.638684864192003e-08, "loss": 0.443, "step": 33032 }, { "epoch": 0.95, "grad_norm": 5.318529051372587, "learning_rate": 7.630611544503729e-08, "loss": 0.2115, "step": 33033 }, { "epoch": 0.95, "grad_norm": 5.618575899441678, "learning_rate": 7.622542460608673e-08, "loss": 0.4011, "step": 33034 }, { "epoch": 0.95, "grad_norm": 3.7671068053936034, "learning_rate": 7.614477612576276e-08, "loss": 0.3946, "step": 33035 }, { "epoch": 0.95, "grad_norm": 4.059290880208255, "learning_rate": 7.606417000475874e-08, "loss": 0.2616, "step": 33036 }, { "epoch": 0.95, "grad_norm": 6.67717830532889, "learning_rate": 7.598360624376855e-08, "loss": 0.4481, "step": 33037 }, { "epoch": 0.95, "grad_norm": 8.574498394900521, "learning_rate": 7.590308484348497e-08, "loss": 0.368, "step": 33038 }, { "epoch": 0.95, "grad_norm": 2.4483811347038547, "learning_rate": 7.582260580460132e-08, "loss": 0.1062, "step": 33039 }, { "epoch": 0.95, "grad_norm": 4.877548163140523, "learning_rate": 7.57421691278093e-08, "loss": 0.3053, "step": 33040 }, { "epoch": 0.95, "grad_norm": 7.91676439593041, "learning_rate": 7.56617748138011e-08, "loss": 0.5547, "step": 33041 }, { "epoch": 0.95, "grad_norm": 3.3058750102184904, "learning_rate": 7.558142286326841e-08, "loss": 0.1585, "step": 33042 }, { "epoch": 0.95, "grad_norm": 3.691757952370212, "learning_rate": 7.550111327690234e-08, "loss": 0.1995, "step": 33043 }, { "epoch": 0.95, "grad_norm": 4.078313655070502, "learning_rate": 7.542084605539402e-08, "loss": 0.522, "step": 33044 }, { "epoch": 0.95, "grad_norm": 6.005529637162684, "learning_rate": 7.534062119943342e-08, "loss": 0.4354, "step": 33045 }, { "epoch": 0.95, "grad_norm": 9.146842837106078, "learning_rate": 7.526043870971112e-08, "loss": 0.5198, "step": 33046 }, { "epoch": 0.95, "grad_norm": 4.892238509217738, "learning_rate": 7.518029858691711e-08, "loss": 0.3895, "step": 33047 }, { "epoch": 0.95, "grad_norm": 5.3836590858295725, "learning_rate": 7.510020083174085e-08, "loss": 0.5118, "step": 33048 }, { "epoch": 0.95, "grad_norm": 4.3781144636900065, "learning_rate": 7.502014544487124e-08, "loss": 0.4486, "step": 33049 }, { "epoch": 0.95, "grad_norm": 4.347278136393799, "learning_rate": 7.49401324269966e-08, "loss": 0.3844, "step": 33050 }, { "epoch": 0.95, "grad_norm": 5.383235455244225, "learning_rate": 7.486016177880584e-08, "loss": 0.4385, "step": 33051 }, { "epoch": 0.95, "grad_norm": 4.686321822557522, "learning_rate": 7.478023350098618e-08, "loss": 0.5209, "step": 33052 }, { "epoch": 0.95, "grad_norm": 6.089279569535657, "learning_rate": 7.47003475942265e-08, "loss": 0.5055, "step": 33053 }, { "epoch": 0.95, "grad_norm": 3.0269912022228698, "learning_rate": 7.46205040592124e-08, "loss": 0.2314, "step": 33054 }, { "epoch": 0.95, "grad_norm": 5.818369414342317, "learning_rate": 7.454070289663273e-08, "loss": 0.4165, "step": 33055 }, { "epoch": 0.95, "grad_norm": 4.810227960074014, "learning_rate": 7.446094410717253e-08, "loss": 0.8537, "step": 33056 }, { "epoch": 0.95, "grad_norm": 6.585970732581301, "learning_rate": 7.43812276915179e-08, "loss": 0.5784, "step": 33057 }, { "epoch": 0.95, "grad_norm": 7.124825860288006, "learning_rate": 7.430155365035607e-08, "loss": 0.453, "step": 33058 }, { "epoch": 0.95, "grad_norm": 2.55774028025967, "learning_rate": 7.42219219843704e-08, "loss": 0.2496, "step": 33059 }, { "epoch": 0.95, "grad_norm": 6.1868513326002414, "learning_rate": 7.414233269424809e-08, "loss": 0.4638, "step": 33060 }, { "epoch": 0.95, "grad_norm": 3.4731622591160187, "learning_rate": 7.40627857806725e-08, "loss": 0.2042, "step": 33061 }, { "epoch": 0.95, "grad_norm": 4.631597195479844, "learning_rate": 7.398328124432808e-08, "loss": 0.5728, "step": 33062 }, { "epoch": 0.95, "grad_norm": 12.124232670994589, "learning_rate": 7.390381908589873e-08, "loss": 0.5434, "step": 33063 }, { "epoch": 0.95, "grad_norm": 4.405670019626797, "learning_rate": 7.382439930606833e-08, "loss": 0.4771, "step": 33064 }, { "epoch": 0.95, "grad_norm": 3.5009096530282497, "learning_rate": 7.37450219055208e-08, "loss": 0.4147, "step": 33065 }, { "epoch": 0.95, "grad_norm": 7.616532298543789, "learning_rate": 7.366568688493781e-08, "loss": 0.4452, "step": 33066 }, { "epoch": 0.95, "grad_norm": 5.162951397500255, "learning_rate": 7.358639424500269e-08, "loss": 0.5583, "step": 33067 }, { "epoch": 0.95, "grad_norm": 8.240768353832271, "learning_rate": 7.350714398639713e-08, "loss": 0.637, "step": 33068 }, { "epoch": 0.95, "grad_norm": 3.8228263775023144, "learning_rate": 7.342793610980281e-08, "loss": 0.3934, "step": 33069 }, { "epoch": 0.95, "grad_norm": 3.866773748901898, "learning_rate": 7.33487706159014e-08, "loss": 0.268, "step": 33070 }, { "epoch": 0.95, "grad_norm": 4.030414672181173, "learning_rate": 7.326964750537347e-08, "loss": 0.2535, "step": 33071 }, { "epoch": 0.95, "grad_norm": 11.60052625350313, "learning_rate": 7.319056677890124e-08, "loss": 0.6205, "step": 33072 }, { "epoch": 0.95, "grad_norm": 5.782741416117018, "learning_rate": 7.311152843716307e-08, "loss": 0.3082, "step": 33073 }, { "epoch": 0.95, "grad_norm": 6.354702875828297, "learning_rate": 7.303253248084063e-08, "loss": 0.3027, "step": 33074 }, { "epoch": 0.95, "grad_norm": 7.513055564407826, "learning_rate": 7.295357891061228e-08, "loss": 0.485, "step": 33075 }, { "epoch": 0.95, "grad_norm": 4.342057646257598, "learning_rate": 7.287466772715746e-08, "loss": 0.3425, "step": 33076 }, { "epoch": 0.95, "grad_norm": 5.886778853324146, "learning_rate": 7.279579893115507e-08, "loss": 0.6859, "step": 33077 }, { "epoch": 0.95, "grad_norm": 4.039166506344967, "learning_rate": 7.271697252328403e-08, "loss": 0.1935, "step": 33078 }, { "epoch": 0.95, "grad_norm": 3.358545067756678, "learning_rate": 7.263818850422266e-08, "loss": 0.4228, "step": 33079 }, { "epoch": 0.95, "grad_norm": 5.725004642601296, "learning_rate": 7.255944687464766e-08, "loss": 0.6597, "step": 33080 }, { "epoch": 0.95, "grad_norm": 3.7365005062556733, "learning_rate": 7.248074763523738e-08, "loss": 0.322, "step": 33081 }, { "epoch": 0.95, "grad_norm": 7.514450457309171, "learning_rate": 7.24020907866685e-08, "loss": 0.2006, "step": 33082 }, { "epoch": 0.95, "grad_norm": 5.772941599029956, "learning_rate": 7.232347632961767e-08, "loss": 0.6392, "step": 33083 }, { "epoch": 0.95, "grad_norm": 7.784252644499563, "learning_rate": 7.224490426476105e-08, "loss": 0.5224, "step": 33084 }, { "epoch": 0.95, "grad_norm": 10.172017953457065, "learning_rate": 7.216637459277531e-08, "loss": 0.3893, "step": 33085 }, { "epoch": 0.95, "grad_norm": 2.4056125762650358, "learning_rate": 7.208788731433491e-08, "loss": 0.1885, "step": 33086 }, { "epoch": 0.95, "grad_norm": 5.526486430724493, "learning_rate": 7.200944243011598e-08, "loss": 0.2116, "step": 33087 }, { "epoch": 0.95, "grad_norm": 6.700319392149723, "learning_rate": 7.193103994079353e-08, "loss": 0.5185, "step": 33088 }, { "epoch": 0.95, "grad_norm": 2.545626193738658, "learning_rate": 7.185267984704092e-08, "loss": 0.312, "step": 33089 }, { "epoch": 0.95, "grad_norm": 5.527206149595779, "learning_rate": 7.177436214953315e-08, "loss": 0.3165, "step": 33090 }, { "epoch": 0.95, "grad_norm": 7.457340224086161, "learning_rate": 7.169608684894358e-08, "loss": 0.4008, "step": 33091 }, { "epoch": 0.95, "grad_norm": 4.845743224513521, "learning_rate": 7.161785394594612e-08, "loss": 0.6037, "step": 33092 }, { "epoch": 0.95, "grad_norm": 8.3171292371929, "learning_rate": 7.153966344121299e-08, "loss": 0.5828, "step": 33093 }, { "epoch": 0.95, "grad_norm": 3.45598746021742, "learning_rate": 7.146151533541812e-08, "loss": 0.5793, "step": 33094 }, { "epoch": 0.95, "grad_norm": 3.5188128450517198, "learning_rate": 7.138340962923263e-08, "loss": 0.2188, "step": 33095 }, { "epoch": 0.95, "grad_norm": 2.260686648780376, "learning_rate": 7.13053463233282e-08, "loss": 0.1829, "step": 33096 }, { "epoch": 0.95, "grad_norm": 4.979295204876357, "learning_rate": 7.122732541837762e-08, "loss": 0.3583, "step": 33097 }, { "epoch": 0.95, "grad_norm": 3.7962764064057573, "learning_rate": 7.114934691505149e-08, "loss": 0.5019, "step": 33098 }, { "epoch": 0.95, "grad_norm": 4.788191748859525, "learning_rate": 7.107141081402092e-08, "loss": 0.2923, "step": 33099 }, { "epoch": 0.95, "grad_norm": 4.659899823663289, "learning_rate": 7.099351711595537e-08, "loss": 0.3207, "step": 33100 }, { "epoch": 0.95, "grad_norm": 8.870984149425468, "learning_rate": 7.091566582152654e-08, "loss": 0.5429, "step": 33101 }, { "epoch": 0.95, "grad_norm": 2.575868763708256, "learning_rate": 7.083785693140333e-08, "loss": 0.1059, "step": 33102 }, { "epoch": 0.95, "grad_norm": 3.2927591386105917, "learning_rate": 7.076009044625465e-08, "loss": 0.5029, "step": 33103 }, { "epoch": 0.95, "grad_norm": 6.056848388966112, "learning_rate": 7.068236636674997e-08, "loss": 0.5478, "step": 33104 }, { "epoch": 0.95, "grad_norm": 4.9129716544829245, "learning_rate": 7.06046846935582e-08, "loss": 0.3406, "step": 33105 }, { "epoch": 0.95, "grad_norm": 5.888842811444193, "learning_rate": 7.052704542734767e-08, "loss": 0.717, "step": 33106 }, { "epoch": 0.95, "grad_norm": 7.180367886886287, "learning_rate": 7.044944856878566e-08, "loss": 0.5418, "step": 33107 }, { "epoch": 0.95, "grad_norm": 3.7489620043726095, "learning_rate": 7.037189411853995e-08, "loss": 0.3098, "step": 33108 }, { "epoch": 0.95, "grad_norm": 4.745580800912069, "learning_rate": 7.029438207727834e-08, "loss": 0.3601, "step": 33109 }, { "epoch": 0.95, "grad_norm": 3.467037912505323, "learning_rate": 7.021691244566753e-08, "loss": 0.2555, "step": 33110 }, { "epoch": 0.95, "grad_norm": 7.388047750856732, "learning_rate": 7.013948522437253e-08, "loss": 0.5489, "step": 33111 }, { "epoch": 0.95, "grad_norm": 5.144208824116391, "learning_rate": 7.006210041406114e-08, "loss": 0.3986, "step": 33112 }, { "epoch": 0.95, "grad_norm": 3.373185588411974, "learning_rate": 6.998475801539895e-08, "loss": 0.6061, "step": 33113 }, { "epoch": 0.95, "grad_norm": 8.152627337741324, "learning_rate": 6.990745802904985e-08, "loss": 0.5125, "step": 33114 }, { "epoch": 0.95, "grad_norm": 6.060855445788261, "learning_rate": 6.983020045568112e-08, "loss": 0.3936, "step": 33115 }, { "epoch": 0.95, "grad_norm": 8.264202377684796, "learning_rate": 6.975298529595554e-08, "loss": 0.617, "step": 33116 }, { "epoch": 0.95, "grad_norm": 9.871809576819434, "learning_rate": 6.967581255053813e-08, "loss": 0.8482, "step": 33117 }, { "epoch": 0.95, "grad_norm": 10.089144290747916, "learning_rate": 6.959868222009224e-08, "loss": 0.7009, "step": 33118 }, { "epoch": 0.95, "grad_norm": 6.18536910538479, "learning_rate": 6.952159430528238e-08, "loss": 0.6124, "step": 33119 }, { "epoch": 0.95, "grad_norm": 8.528835961161507, "learning_rate": 6.944454880677076e-08, "loss": 0.38, "step": 33120 }, { "epoch": 0.95, "grad_norm": 2.996255663857261, "learning_rate": 6.936754572522075e-08, "loss": 0.2381, "step": 33121 }, { "epoch": 0.95, "grad_norm": 4.40079135822508, "learning_rate": 6.929058506129515e-08, "loss": 0.4933, "step": 33122 }, { "epoch": 0.95, "grad_norm": 7.709252325039881, "learning_rate": 6.921366681565455e-08, "loss": 0.2664, "step": 33123 }, { "epoch": 0.95, "grad_norm": 4.660115116318358, "learning_rate": 6.913679098896232e-08, "loss": 0.639, "step": 33124 }, { "epoch": 0.95, "grad_norm": 7.8843173961976944, "learning_rate": 6.9059957581879e-08, "loss": 0.5727, "step": 33125 }, { "epoch": 0.95, "grad_norm": 2.0852078651165735, "learning_rate": 6.898316659506632e-08, "loss": 0.1299, "step": 33126 }, { "epoch": 0.95, "grad_norm": 7.708941664189622, "learning_rate": 6.890641802918374e-08, "loss": 0.7626, "step": 33127 }, { "epoch": 0.95, "grad_norm": 8.454298096661867, "learning_rate": 6.882971188489241e-08, "loss": 0.5347, "step": 33128 }, { "epoch": 0.95, "grad_norm": 4.134896054235644, "learning_rate": 6.875304816285178e-08, "loss": 0.3549, "step": 33129 }, { "epoch": 0.95, "grad_norm": 4.406727785219495, "learning_rate": 6.867642686372133e-08, "loss": 0.398, "step": 33130 }, { "epoch": 0.95, "grad_norm": 7.706262471853345, "learning_rate": 6.859984798816055e-08, "loss": 0.6201, "step": 33131 }, { "epoch": 0.95, "grad_norm": 2.3741220307051156, "learning_rate": 6.852331153682779e-08, "loss": 0.0983, "step": 33132 }, { "epoch": 0.95, "grad_norm": 8.13861937903036, "learning_rate": 6.844681751038251e-08, "loss": 0.4806, "step": 33133 }, { "epoch": 0.95, "grad_norm": 5.335775392452, "learning_rate": 6.837036590948143e-08, "loss": 0.445, "step": 33134 }, { "epoch": 0.95, "grad_norm": 10.451251285417394, "learning_rate": 6.829395673478345e-08, "loss": 0.645, "step": 33135 }, { "epoch": 0.95, "grad_norm": 2.844141796914996, "learning_rate": 6.821758998694528e-08, "loss": 0.3959, "step": 33136 }, { "epoch": 0.95, "grad_norm": 6.164253937778746, "learning_rate": 6.814126566662361e-08, "loss": 0.2223, "step": 33137 }, { "epoch": 0.95, "grad_norm": 5.183518179922603, "learning_rate": 6.806498377447568e-08, "loss": 0.4846, "step": 33138 }, { "epoch": 0.95, "grad_norm": 5.175457408058673, "learning_rate": 6.79887443111571e-08, "loss": 0.3748, "step": 33139 }, { "epoch": 0.95, "grad_norm": 4.78368268426402, "learning_rate": 6.791254727732455e-08, "loss": 0.2775, "step": 33140 }, { "epoch": 0.95, "grad_norm": 3.706380226201814, "learning_rate": 6.783639267363251e-08, "loss": 0.1045, "step": 33141 }, { "epoch": 0.95, "grad_norm": 6.3977619788018325, "learning_rate": 6.776028050073769e-08, "loss": 0.6255, "step": 33142 }, { "epoch": 0.95, "grad_norm": 4.392543441479483, "learning_rate": 6.768421075929343e-08, "loss": 0.3289, "step": 33143 }, { "epoch": 0.95, "grad_norm": 6.890759594353181, "learning_rate": 6.760818344995423e-08, "loss": 0.5017, "step": 33144 }, { "epoch": 0.95, "grad_norm": 3.8061473325215567, "learning_rate": 6.753219857337512e-08, "loss": 0.2461, "step": 33145 }, { "epoch": 0.95, "grad_norm": 6.134668970667382, "learning_rate": 6.745625613020835e-08, "loss": 0.6315, "step": 33146 }, { "epoch": 0.95, "grad_norm": 5.874430162106081, "learning_rate": 6.738035612110894e-08, "loss": 0.4617, "step": 33147 }, { "epoch": 0.95, "grad_norm": 1.3845209677662809, "learning_rate": 6.730449854672915e-08, "loss": 0.2208, "step": 33148 }, { "epoch": 0.95, "grad_norm": 6.85822524599333, "learning_rate": 6.722868340772071e-08, "loss": 0.589, "step": 33149 }, { "epoch": 0.95, "grad_norm": 5.593319540669861, "learning_rate": 6.715291070473695e-08, "loss": 0.2443, "step": 33150 }, { "epoch": 0.95, "grad_norm": 3.4839698316448264, "learning_rate": 6.707718043842959e-08, "loss": 0.2587, "step": 33151 }, { "epoch": 0.95, "grad_norm": 6.50733698725369, "learning_rate": 6.700149260944921e-08, "loss": 0.7226, "step": 33152 }, { "epoch": 0.95, "grad_norm": 2.539906264738191, "learning_rate": 6.692584721844752e-08, "loss": 0.1216, "step": 33153 }, { "epoch": 0.95, "grad_norm": 4.6459332043415005, "learning_rate": 6.685024426607623e-08, "loss": 0.3312, "step": 33154 }, { "epoch": 0.95, "grad_norm": 2.773785980252551, "learning_rate": 6.677468375298479e-08, "loss": 0.1285, "step": 33155 }, { "epoch": 0.95, "grad_norm": 5.7689197089628035, "learning_rate": 6.669916567982327e-08, "loss": 0.404, "step": 33156 }, { "epoch": 0.95, "grad_norm": 5.883571945553227, "learning_rate": 6.662369004724112e-08, "loss": 0.3737, "step": 33157 }, { "epoch": 0.95, "grad_norm": 4.3498664988936175, "learning_rate": 6.654825685588784e-08, "loss": 0.3557, "step": 33158 }, { "epoch": 0.95, "grad_norm": 3.116024651334404, "learning_rate": 6.64728661064129e-08, "loss": 0.2465, "step": 33159 }, { "epoch": 0.95, "grad_norm": 7.037539335313705, "learning_rate": 6.639751779946412e-08, "loss": 0.2817, "step": 33160 }, { "epoch": 0.95, "grad_norm": 5.57928262447147, "learning_rate": 6.632221193568989e-08, "loss": 0.5078, "step": 33161 }, { "epoch": 0.95, "grad_norm": 4.243155802728583, "learning_rate": 6.62469485157391e-08, "loss": 0.3242, "step": 33162 }, { "epoch": 0.95, "grad_norm": 6.562056680873429, "learning_rate": 6.617172754025791e-08, "loss": 0.4144, "step": 33163 }, { "epoch": 0.95, "grad_norm": 8.712005987624185, "learning_rate": 6.60965490098936e-08, "loss": 0.8, "step": 33164 }, { "epoch": 0.95, "grad_norm": 5.076680139251152, "learning_rate": 6.602141292529341e-08, "loss": 0.5292, "step": 33165 }, { "epoch": 0.95, "grad_norm": 3.788934198153971, "learning_rate": 6.594631928710294e-08, "loss": 0.3209, "step": 33166 }, { "epoch": 0.95, "grad_norm": 3.5259347088968482, "learning_rate": 6.587126809596944e-08, "loss": 0.5862, "step": 33167 }, { "epoch": 0.95, "grad_norm": 7.1542621500274866, "learning_rate": 6.579625935253797e-08, "loss": 0.429, "step": 33168 }, { "epoch": 0.95, "grad_norm": 3.546039718212927, "learning_rate": 6.572129305745356e-08, "loss": 0.265, "step": 33169 }, { "epoch": 0.95, "grad_norm": 7.213639403047177, "learning_rate": 6.564636921136125e-08, "loss": 0.486, "step": 33170 }, { "epoch": 0.95, "grad_norm": 6.133265756756536, "learning_rate": 6.557148781490608e-08, "loss": 0.4542, "step": 33171 }, { "epoch": 0.95, "grad_norm": 3.6107949399187187, "learning_rate": 6.549664886873142e-08, "loss": 0.1231, "step": 33172 }, { "epoch": 0.95, "grad_norm": 2.177972474360824, "learning_rate": 6.542185237348176e-08, "loss": 0.2139, "step": 33173 }, { "epoch": 0.95, "grad_norm": 6.193411190185529, "learning_rate": 6.534709832980046e-08, "loss": 0.3816, "step": 33174 }, { "epoch": 0.95, "grad_norm": 3.8619433671660066, "learning_rate": 6.527238673833036e-08, "loss": 0.2107, "step": 33175 }, { "epoch": 0.95, "grad_norm": 1.7784590941924432, "learning_rate": 6.519771759971427e-08, "loss": 0.2952, "step": 33176 }, { "epoch": 0.95, "grad_norm": 4.0406649747613255, "learning_rate": 6.512309091459445e-08, "loss": 0.3762, "step": 33177 }, { "epoch": 0.95, "grad_norm": 3.0634902015020042, "learning_rate": 6.504850668361318e-08, "loss": 0.3496, "step": 33178 }, { "epoch": 0.95, "grad_norm": 6.720193539284738, "learning_rate": 6.497396490741214e-08, "loss": 0.7005, "step": 33179 }, { "epoch": 0.95, "grad_norm": 5.474238665873755, "learning_rate": 6.489946558663252e-08, "loss": 0.5365, "step": 33180 }, { "epoch": 0.95, "grad_norm": 4.721180346810658, "learning_rate": 6.482500872191488e-08, "loss": 0.4546, "step": 33181 }, { "epoch": 0.95, "grad_norm": 7.086685729977557, "learning_rate": 6.475059431390041e-08, "loss": 0.3867, "step": 33182 }, { "epoch": 0.95, "grad_norm": 5.292173071031523, "learning_rate": 6.467622236322913e-08, "loss": 0.4174, "step": 33183 }, { "epoch": 0.95, "grad_norm": 2.1654694455910386, "learning_rate": 6.460189287053997e-08, "loss": 0.1725, "step": 33184 }, { "epoch": 0.95, "grad_norm": 6.1940993748429385, "learning_rate": 6.452760583647355e-08, "loss": 0.3796, "step": 33185 }, { "epoch": 0.95, "grad_norm": 7.411696479565269, "learning_rate": 6.445336126166824e-08, "loss": 0.8121, "step": 33186 }, { "epoch": 0.95, "grad_norm": 8.162131107517729, "learning_rate": 6.437915914676295e-08, "loss": 0.4532, "step": 33187 }, { "epoch": 0.95, "grad_norm": 5.36428281318843, "learning_rate": 6.43049994923961e-08, "loss": 0.3784, "step": 33188 }, { "epoch": 0.95, "grad_norm": 8.275645450416674, "learning_rate": 6.423088229920605e-08, "loss": 0.4748, "step": 33189 }, { "epoch": 0.95, "grad_norm": 5.059279748068283, "learning_rate": 6.415680756783005e-08, "loss": 0.4796, "step": 33190 }, { "epoch": 0.95, "grad_norm": 5.009361824879778, "learning_rate": 6.408277529890428e-08, "loss": 0.488, "step": 33191 }, { "epoch": 0.95, "grad_norm": 5.5672945511745064, "learning_rate": 6.400878549306766e-08, "loss": 0.5588, "step": 33192 }, { "epoch": 0.95, "grad_norm": 6.5489560554767, "learning_rate": 6.393483815095525e-08, "loss": 0.4281, "step": 33193 }, { "epoch": 0.95, "grad_norm": 6.37808760501678, "learning_rate": 6.386093327320375e-08, "loss": 0.2034, "step": 33194 }, { "epoch": 0.95, "grad_norm": 5.179129407916951, "learning_rate": 6.378707086044877e-08, "loss": 0.7103, "step": 33195 }, { "epoch": 0.95, "grad_norm": 4.378538608005743, "learning_rate": 6.371325091332647e-08, "loss": 0.2554, "step": 33196 }, { "epoch": 0.95, "grad_norm": 4.441391393792663, "learning_rate": 6.363947343247079e-08, "loss": 0.2545, "step": 33197 }, { "epoch": 0.95, "grad_norm": 9.18059545044367, "learning_rate": 6.356573841851732e-08, "loss": 0.6769, "step": 33198 }, { "epoch": 0.95, "grad_norm": 6.830766110326989, "learning_rate": 6.349204587209945e-08, "loss": 0.3914, "step": 33199 }, { "epoch": 0.95, "grad_norm": 3.574943127213107, "learning_rate": 6.341839579385168e-08, "loss": 0.1361, "step": 33200 }, { "epoch": 0.95, "grad_norm": 4.940271324127522, "learning_rate": 6.334478818440848e-08, "loss": 0.3611, "step": 33201 }, { "epoch": 0.95, "grad_norm": 4.132030622222962, "learning_rate": 6.327122304440159e-08, "loss": 0.3812, "step": 33202 }, { "epoch": 0.95, "grad_norm": 5.594803822736852, "learning_rate": 6.319770037446438e-08, "loss": 0.2569, "step": 33203 }, { "epoch": 0.95, "grad_norm": 8.20359115002725, "learning_rate": 6.312422017523024e-08, "loss": 0.8621, "step": 33204 }, { "epoch": 0.95, "grad_norm": 4.120566297286855, "learning_rate": 6.305078244732976e-08, "loss": 0.5448, "step": 33205 }, { "epoch": 0.95, "grad_norm": 4.143772872010135, "learning_rate": 6.297738719139634e-08, "loss": 0.4646, "step": 33206 }, { "epoch": 0.95, "grad_norm": 4.807841572938629, "learning_rate": 6.290403440806004e-08, "loss": 0.6902, "step": 33207 }, { "epoch": 0.95, "grad_norm": 7.018814007814564, "learning_rate": 6.283072409795255e-08, "loss": 0.5091, "step": 33208 }, { "epoch": 0.95, "grad_norm": 8.569480167055474, "learning_rate": 6.275745626170504e-08, "loss": 0.5655, "step": 33209 }, { "epoch": 0.95, "grad_norm": 9.649684068716322, "learning_rate": 6.268423089994647e-08, "loss": 0.6581, "step": 33210 }, { "epoch": 0.95, "grad_norm": 6.1560812602662836, "learning_rate": 6.261104801330798e-08, "loss": 0.3138, "step": 33211 }, { "epoch": 0.95, "grad_norm": 6.900781805556025, "learning_rate": 6.253790760241795e-08, "loss": 0.6495, "step": 33212 }, { "epoch": 0.95, "grad_norm": 7.484936018207187, "learning_rate": 6.246480966790757e-08, "loss": 0.5876, "step": 33213 }, { "epoch": 0.95, "grad_norm": 1.4334631608321526, "learning_rate": 6.239175421040355e-08, "loss": 0.0687, "step": 33214 }, { "epoch": 0.95, "grad_norm": 5.5845673274482275, "learning_rate": 6.231874123053594e-08, "loss": 0.5114, "step": 33215 }, { "epoch": 0.95, "grad_norm": 2.2905593248556757, "learning_rate": 6.2245770728932e-08, "loss": 0.1692, "step": 33216 }, { "epoch": 0.95, "grad_norm": 8.410524607386137, "learning_rate": 6.217284270622014e-08, "loss": 0.5086, "step": 33217 }, { "epoch": 0.95, "grad_norm": 4.40628067979886, "learning_rate": 6.209995716302708e-08, "loss": 0.271, "step": 33218 }, { "epoch": 0.95, "grad_norm": 3.354100757843794, "learning_rate": 6.202711409997953e-08, "loss": 0.1887, "step": 33219 }, { "epoch": 0.95, "grad_norm": 3.820658350230626, "learning_rate": 6.195431351770532e-08, "loss": 0.3362, "step": 33220 }, { "epoch": 0.95, "grad_norm": 3.7977110644733933, "learning_rate": 6.188155541683005e-08, "loss": 0.3929, "step": 33221 }, { "epoch": 0.95, "grad_norm": 4.133817669476376, "learning_rate": 6.180883979797991e-08, "loss": 0.1972, "step": 33222 }, { "epoch": 0.95, "grad_norm": 2.8179623012823227, "learning_rate": 6.17361666617805e-08, "loss": 0.5882, "step": 33223 }, { "epoch": 0.95, "grad_norm": 7.010230830442143, "learning_rate": 6.166353600885632e-08, "loss": 0.5036, "step": 33224 }, { "epoch": 0.95, "grad_norm": 4.564992432896171, "learning_rate": 6.159094783983299e-08, "loss": 0.3507, "step": 33225 }, { "epoch": 0.95, "grad_norm": 6.438020703635888, "learning_rate": 6.151840215533445e-08, "loss": 0.2468, "step": 33226 }, { "epoch": 0.95, "grad_norm": 2.783345044181842, "learning_rate": 6.144589895598519e-08, "loss": 0.2814, "step": 33227 }, { "epoch": 0.95, "grad_norm": 3.8604297818192546, "learning_rate": 6.137343824240916e-08, "loss": 0.347, "step": 33228 }, { "epoch": 0.95, "grad_norm": 8.034249751132219, "learning_rate": 6.13010200152292e-08, "loss": 0.8827, "step": 33229 }, { "epoch": 0.95, "grad_norm": 6.556248257384964, "learning_rate": 6.122864427506814e-08, "loss": 0.374, "step": 33230 }, { "epoch": 0.95, "grad_norm": 3.1368963017930835, "learning_rate": 6.115631102254938e-08, "loss": 0.3394, "step": 33231 }, { "epoch": 0.95, "grad_norm": 5.237200666081354, "learning_rate": 6.108402025829462e-08, "loss": 0.7444, "step": 33232 }, { "epoch": 0.95, "grad_norm": 2.906563588113379, "learning_rate": 6.101177198292618e-08, "loss": 0.2633, "step": 33233 }, { "epoch": 0.95, "grad_norm": 4.668312117585023, "learning_rate": 6.093956619706521e-08, "loss": 0.5994, "step": 33234 }, { "epoch": 0.95, "grad_norm": 4.894035880555515, "learning_rate": 6.086740290133286e-08, "loss": 0.6061, "step": 33235 }, { "epoch": 0.95, "grad_norm": 3.430390236011886, "learning_rate": 6.079528209635033e-08, "loss": 0.3763, "step": 33236 }, { "epoch": 0.95, "grad_norm": 8.055959251490162, "learning_rate": 6.072320378273766e-08, "loss": 0.9378, "step": 33237 }, { "epoch": 0.95, "grad_norm": 5.831624900320888, "learning_rate": 6.065116796111548e-08, "loss": 0.582, "step": 33238 }, { "epoch": 0.95, "grad_norm": 7.104662966428924, "learning_rate": 6.057917463210272e-08, "loss": 0.6373, "step": 33239 }, { "epoch": 0.95, "grad_norm": 4.794411688425427, "learning_rate": 6.050722379631946e-08, "loss": 0.4155, "step": 33240 }, { "epoch": 0.95, "grad_norm": 7.495071953751942, "learning_rate": 6.043531545438408e-08, "loss": 0.6525, "step": 33241 }, { "epoch": 0.95, "grad_norm": 8.048038139351481, "learning_rate": 6.036344960691554e-08, "loss": 0.8188, "step": 33242 }, { "epoch": 0.95, "grad_norm": 3.814041123258015, "learning_rate": 6.029162625453278e-08, "loss": 0.348, "step": 33243 }, { "epoch": 0.95, "grad_norm": 15.728789989011634, "learning_rate": 6.021984539785197e-08, "loss": 0.6029, "step": 33244 }, { "epoch": 0.95, "grad_norm": 7.693138783748043, "learning_rate": 6.014810703749208e-08, "loss": 0.3228, "step": 33245 }, { "epoch": 0.95, "grad_norm": 4.49917168411302, "learning_rate": 6.007641117406982e-08, "loss": 0.3106, "step": 33246 }, { "epoch": 0.95, "grad_norm": 5.112425986875796, "learning_rate": 6.000475780820192e-08, "loss": 0.7291, "step": 33247 }, { "epoch": 0.95, "grad_norm": 7.439119723604096, "learning_rate": 5.993314694050457e-08, "loss": 0.6798, "step": 33248 }, { "epoch": 0.95, "grad_norm": 4.312037954595124, "learning_rate": 5.986157857159503e-08, "loss": 0.4524, "step": 33249 }, { "epoch": 0.95, "grad_norm": 7.593951642534245, "learning_rate": 5.979005270208726e-08, "loss": 0.4905, "step": 33250 }, { "epoch": 0.95, "grad_norm": 4.658165736018384, "learning_rate": 5.971856933259746e-08, "loss": 0.1499, "step": 33251 }, { "epoch": 0.95, "grad_norm": 4.12099178242298, "learning_rate": 5.964712846374121e-08, "loss": 0.3523, "step": 33252 }, { "epoch": 0.95, "grad_norm": 4.790235167366964, "learning_rate": 5.957573009613138e-08, "loss": 0.3571, "step": 33253 }, { "epoch": 0.95, "grad_norm": 6.829539626036375, "learning_rate": 5.950437423038413e-08, "loss": 0.3435, "step": 33254 }, { "epoch": 0.95, "grad_norm": 4.655481159816538, "learning_rate": 5.943306086711176e-08, "loss": 0.582, "step": 33255 }, { "epoch": 0.95, "grad_norm": 5.171397299472522, "learning_rate": 5.9361790006929323e-08, "loss": 0.321, "step": 33256 }, { "epoch": 0.95, "grad_norm": 6.396702495088306, "learning_rate": 5.929056165044855e-08, "loss": 0.5935, "step": 33257 }, { "epoch": 0.95, "grad_norm": 3.904403921685498, "learning_rate": 5.921937579828341e-08, "loss": 0.1977, "step": 33258 }, { "epoch": 0.95, "grad_norm": 4.836796969208129, "learning_rate": 5.9148232451044504e-08, "loss": 0.2117, "step": 33259 }, { "epoch": 0.95, "grad_norm": 5.039822637903749, "learning_rate": 5.9077131609345807e-08, "loss": 0.3485, "step": 33260 }, { "epoch": 0.95, "grad_norm": 6.6972591637073755, "learning_rate": 5.900607327379793e-08, "loss": 0.3006, "step": 33261 }, { "epoch": 0.95, "grad_norm": 5.291390429317806, "learning_rate": 5.89350574450126e-08, "loss": 0.3928, "step": 33262 }, { "epoch": 0.95, "grad_norm": 4.012242142838315, "learning_rate": 5.8864084123601004e-08, "loss": 0.4665, "step": 33263 }, { "epoch": 0.95, "grad_norm": 2.829152937328152, "learning_rate": 5.8793153310172654e-08, "loss": 0.1257, "step": 33264 }, { "epoch": 0.95, "grad_norm": 8.7716846430461, "learning_rate": 5.872226500533873e-08, "loss": 0.6681, "step": 33265 }, { "epoch": 0.95, "grad_norm": 6.529439027124892, "learning_rate": 5.865141920970874e-08, "loss": 0.5118, "step": 33266 }, { "epoch": 0.95, "grad_norm": 6.049348665281198, "learning_rate": 5.858061592389164e-08, "loss": 0.4924, "step": 33267 }, { "epoch": 0.95, "grad_norm": 5.309754458783113, "learning_rate": 5.8509855148498054e-08, "loss": 0.6927, "step": 33268 }, { "epoch": 0.95, "grad_norm": 2.6320938352015317, "learning_rate": 5.8439136884135275e-08, "loss": 0.3639, "step": 33269 }, { "epoch": 0.95, "grad_norm": 5.336779727623723, "learning_rate": 5.836846113141226e-08, "loss": 0.3063, "step": 33270 }, { "epoch": 0.95, "grad_norm": 4.474441105556345, "learning_rate": 5.8297827890936853e-08, "loss": 0.3531, "step": 33271 }, { "epoch": 0.95, "grad_norm": 9.96264940099561, "learning_rate": 5.822723716331691e-08, "loss": 0.5099, "step": 33272 }, { "epoch": 0.95, "grad_norm": 4.505314364499382, "learning_rate": 5.8156688949159156e-08, "loss": 0.4731, "step": 33273 }, { "epoch": 0.95, "grad_norm": 8.102617391920559, "learning_rate": 5.8086183249072e-08, "loss": 0.6589, "step": 33274 }, { "epoch": 0.95, "grad_norm": 8.505551488758956, "learning_rate": 5.8015720063659964e-08, "loss": 0.4659, "step": 33275 }, { "epoch": 0.95, "grad_norm": 4.91936712404691, "learning_rate": 5.794529939353033e-08, "loss": 0.382, "step": 33276 }, { "epoch": 0.95, "grad_norm": 8.013930530171786, "learning_rate": 5.787492123928928e-08, "loss": 0.4869, "step": 33277 }, { "epoch": 0.95, "grad_norm": 4.464305855296886, "learning_rate": 5.780458560154134e-08, "loss": 0.3568, "step": 33278 }, { "epoch": 0.95, "grad_norm": 5.925274117482631, "learning_rate": 5.7734292480892685e-08, "loss": 0.1779, "step": 33279 }, { "epoch": 0.95, "grad_norm": 7.483329938903356, "learning_rate": 5.766404187794672e-08, "loss": 0.5975, "step": 33280 }, { "epoch": 0.95, "grad_norm": 4.5356605899923625, "learning_rate": 5.7593833793308515e-08, "loss": 0.0667, "step": 33281 }, { "epoch": 0.95, "grad_norm": 5.51284503851697, "learning_rate": 5.752366822758204e-08, "loss": 0.2956, "step": 33282 }, { "epoch": 0.95, "grad_norm": 5.251511605948821, "learning_rate": 5.745354518137125e-08, "loss": 0.2431, "step": 33283 }, { "epoch": 0.95, "grad_norm": 7.834400729915819, "learning_rate": 5.7383464655278996e-08, "loss": 0.5078, "step": 33284 }, { "epoch": 0.95, "grad_norm": 3.2948337943396617, "learning_rate": 5.731342664990813e-08, "loss": 0.2177, "step": 33285 }, { "epoch": 0.95, "grad_norm": 5.846556576040677, "learning_rate": 5.7243431165861505e-08, "loss": 0.6318, "step": 33286 }, { "epoch": 0.95, "grad_norm": 7.803192514808073, "learning_rate": 5.7173478203740864e-08, "loss": 0.5556, "step": 33287 }, { "epoch": 0.95, "grad_norm": 4.419872679796923, "learning_rate": 5.7103567764148494e-08, "loss": 0.2818, "step": 33288 }, { "epoch": 0.95, "grad_norm": 6.924079698907202, "learning_rate": 5.703369984768503e-08, "loss": 0.6843, "step": 33289 }, { "epoch": 0.95, "grad_norm": 3.819077961226077, "learning_rate": 5.696387445495277e-08, "loss": 0.2883, "step": 33290 }, { "epoch": 0.95, "grad_norm": 5.035343944996129, "learning_rate": 5.689409158655124e-08, "loss": 0.4523, "step": 33291 }, { "epoch": 0.95, "grad_norm": 5.645756677574036, "learning_rate": 5.6824351243081054e-08, "loss": 0.2821, "step": 33292 }, { "epoch": 0.95, "grad_norm": 6.028523792544826, "learning_rate": 5.67546534251423e-08, "loss": 0.4393, "step": 33293 }, { "epoch": 0.95, "grad_norm": 4.308510330789874, "learning_rate": 5.6684998133335056e-08, "loss": 0.2924, "step": 33294 }, { "epoch": 0.95, "grad_norm": 8.124575501136626, "learning_rate": 5.6615385368257726e-08, "loss": 0.7239, "step": 33295 }, { "epoch": 0.95, "grad_norm": 13.119695045485923, "learning_rate": 5.6545815130510386e-08, "loss": 0.3104, "step": 33296 }, { "epoch": 0.95, "grad_norm": 5.446413867403502, "learning_rate": 5.647628742069033e-08, "loss": 0.5002, "step": 33297 }, { "epoch": 0.95, "grad_norm": 3.018680793253228, "learning_rate": 5.640680223939543e-08, "loss": 0.1587, "step": 33298 }, { "epoch": 0.95, "grad_norm": 4.063190812283662, "learning_rate": 5.6337359587225194e-08, "loss": 0.5091, "step": 33299 }, { "epoch": 0.95, "grad_norm": 9.383335362587184, "learning_rate": 5.626795946477526e-08, "loss": 0.7605, "step": 33300 }, { "epoch": 0.95, "grad_norm": 4.90465506366408, "learning_rate": 5.619860187264292e-08, "loss": 0.5396, "step": 33301 }, { "epoch": 0.95, "grad_norm": 10.396055617978838, "learning_rate": 5.612928681142604e-08, "loss": 0.7032, "step": 33302 }, { "epoch": 0.95, "grad_norm": 4.631331671896836, "learning_rate": 5.606001428172081e-08, "loss": 0.4127, "step": 33303 }, { "epoch": 0.95, "grad_norm": 3.609881014672643, "learning_rate": 5.599078428412175e-08, "loss": 0.1858, "step": 33304 }, { "epoch": 0.95, "grad_norm": 6.834023047351685, "learning_rate": 5.592159681922504e-08, "loss": 0.6957, "step": 33305 }, { "epoch": 0.95, "grad_norm": 4.710322300357832, "learning_rate": 5.585245188762689e-08, "loss": 0.3745, "step": 33306 }, { "epoch": 0.95, "grad_norm": 5.666561485698139, "learning_rate": 5.57833494899207e-08, "loss": 0.4017, "step": 33307 }, { "epoch": 0.95, "grad_norm": 8.189717465698603, "learning_rate": 5.5714289626701554e-08, "loss": 0.3693, "step": 33308 }, { "epoch": 0.95, "grad_norm": 10.480275565011594, "learning_rate": 5.564527229856398e-08, "loss": 1.181, "step": 33309 }, { "epoch": 0.95, "grad_norm": 2.7054962826549773, "learning_rate": 5.5576297506101385e-08, "loss": 0.2857, "step": 33310 }, { "epoch": 0.95, "grad_norm": 9.102339551769333, "learning_rate": 5.550736524990719e-08, "loss": 0.7258, "step": 33311 }, { "epoch": 0.95, "grad_norm": 6.012023481809198, "learning_rate": 5.543847553057369e-08, "loss": 0.8707, "step": 33312 }, { "epoch": 0.95, "grad_norm": 3.904130528519227, "learning_rate": 5.5369628348694857e-08, "loss": 0.2815, "step": 33313 }, { "epoch": 0.95, "grad_norm": 4.790194603186311, "learning_rate": 5.5300823704861895e-08, "loss": 0.6091, "step": 33314 }, { "epoch": 0.95, "grad_norm": 5.033720606416603, "learning_rate": 5.523206159966765e-08, "loss": 0.5178, "step": 33315 }, { "epoch": 0.95, "grad_norm": 9.447264666002251, "learning_rate": 5.516334203370277e-08, "loss": 0.7514, "step": 33316 }, { "epoch": 0.95, "grad_norm": 3.6468865571741915, "learning_rate": 5.5094665007559e-08, "loss": 0.3705, "step": 33317 }, { "epoch": 0.95, "grad_norm": 7.529231422228883, "learning_rate": 5.502603052182698e-08, "loss": 0.4357, "step": 33318 }, { "epoch": 0.95, "grad_norm": 8.796600500129964, "learning_rate": 5.4957438577097345e-08, "loss": 0.7366, "step": 33319 }, { "epoch": 0.95, "grad_norm": 11.23095922841988, "learning_rate": 5.488888917395963e-08, "loss": 0.7513, "step": 33320 }, { "epoch": 0.95, "grad_norm": 5.163769504470575, "learning_rate": 5.4820382313003906e-08, "loss": 0.3817, "step": 33321 }, { "epoch": 0.95, "grad_norm": 7.993280449767343, "learning_rate": 5.475191799482027e-08, "loss": 0.8203, "step": 33322 }, { "epoch": 0.95, "grad_norm": 2.5137297142742994, "learning_rate": 5.468349621999658e-08, "loss": 0.2468, "step": 33323 }, { "epoch": 0.95, "grad_norm": 10.107517738071966, "learning_rate": 5.461511698912181e-08, "loss": 0.7397, "step": 33324 }, { "epoch": 0.95, "grad_norm": 1.5740535699118847, "learning_rate": 5.4546780302784376e-08, "loss": 0.1527, "step": 33325 }, { "epoch": 0.95, "grad_norm": 7.006887725745026, "learning_rate": 5.4478486161571584e-08, "loss": 0.7614, "step": 33326 }, { "epoch": 0.95, "grad_norm": 4.4373314516536535, "learning_rate": 5.4410234566071865e-08, "loss": 0.371, "step": 33327 }, { "epoch": 0.95, "grad_norm": 9.004134923716286, "learning_rate": 5.4342025516871954e-08, "loss": 0.7954, "step": 33328 }, { "epoch": 0.95, "grad_norm": 6.024278713121323, "learning_rate": 5.427385901455917e-08, "loss": 0.7401, "step": 33329 }, { "epoch": 0.95, "grad_norm": 6.817511683678905, "learning_rate": 5.420573505971916e-08, "loss": 0.8095, "step": 33330 }, { "epoch": 0.95, "grad_norm": 2.2752602011830514, "learning_rate": 5.4137653652938104e-08, "loss": 0.1173, "step": 33331 }, { "epoch": 0.95, "grad_norm": 3.31995639462158, "learning_rate": 5.406961479480166e-08, "loss": 0.2789, "step": 33332 }, { "epoch": 0.95, "grad_norm": 5.276587329701028, "learning_rate": 5.400161848589547e-08, "loss": 0.2467, "step": 33333 }, { "epoch": 0.95, "grad_norm": 4.149238121233142, "learning_rate": 5.393366472680461e-08, "loss": 0.2998, "step": 33334 }, { "epoch": 0.95, "grad_norm": 4.800982391141586, "learning_rate": 5.386575351811307e-08, "loss": 0.3256, "step": 33335 }, { "epoch": 0.95, "grad_norm": 9.355169915063513, "learning_rate": 5.3797884860405936e-08, "loss": 0.6434, "step": 33336 }, { "epoch": 0.95, "grad_norm": 6.2190441272897985, "learning_rate": 5.3730058754266624e-08, "loss": 0.4438, "step": 33337 }, { "epoch": 0.95, "grad_norm": 3.0167117260804575, "learning_rate": 5.366227520027856e-08, "loss": 0.6138, "step": 33338 }, { "epoch": 0.95, "grad_norm": 5.326933159748462, "learning_rate": 5.359453419902461e-08, "loss": 0.4763, "step": 33339 }, { "epoch": 0.95, "grad_norm": 3.76635943963097, "learning_rate": 5.352683575108819e-08, "loss": 0.3116, "step": 33340 }, { "epoch": 0.95, "grad_norm": 5.913624352938806, "learning_rate": 5.345917985705107e-08, "loss": 0.8238, "step": 33341 }, { "epoch": 0.95, "grad_norm": 7.143241953702175, "learning_rate": 5.339156651749555e-08, "loss": 0.8827, "step": 33342 }, { "epoch": 0.95, "grad_norm": 4.916058254852785, "learning_rate": 5.3323995733003397e-08, "loss": 0.1653, "step": 33343 }, { "epoch": 0.95, "grad_norm": 7.342397492225403, "learning_rate": 5.325646750415636e-08, "loss": 0.5678, "step": 33344 }, { "epoch": 0.95, "grad_norm": 9.771095830865743, "learning_rate": 5.318898183153454e-08, "loss": 0.5171, "step": 33345 }, { "epoch": 0.95, "grad_norm": 1.2218090937174395, "learning_rate": 5.312153871571857e-08, "loss": 0.0407, "step": 33346 }, { "epoch": 0.95, "grad_norm": 7.952514265252096, "learning_rate": 5.305413815728966e-08, "loss": 0.8287, "step": 33347 }, { "epoch": 0.96, "grad_norm": 5.396055410913619, "learning_rate": 5.298678015682624e-08, "loss": 0.4147, "step": 33348 }, { "epoch": 0.96, "grad_norm": 4.824578720438907, "learning_rate": 5.291946471490839e-08, "loss": 0.1585, "step": 33349 }, { "epoch": 0.96, "grad_norm": 5.350283103654012, "learning_rate": 5.285219183211565e-08, "loss": 0.5255, "step": 33350 }, { "epoch": 0.96, "grad_norm": 3.90281514021312, "learning_rate": 5.278496150902646e-08, "loss": 0.0396, "step": 33351 }, { "epoch": 0.96, "grad_norm": 5.374554815264263, "learning_rate": 5.2717773746219225e-08, "loss": 0.6647, "step": 33352 }, { "epoch": 0.96, "grad_norm": 7.042408477112663, "learning_rate": 5.265062854427128e-08, "loss": 0.9136, "step": 33353 }, { "epoch": 0.96, "grad_norm": 7.209732963495058, "learning_rate": 5.2583525903761034e-08, "loss": 0.4438, "step": 33354 }, { "epoch": 0.96, "grad_norm": 4.265828501559576, "learning_rate": 5.251646582526582e-08, "loss": 0.305, "step": 33355 }, { "epoch": 0.96, "grad_norm": 2.144412720119724, "learning_rate": 5.2449448309362385e-08, "loss": 0.1778, "step": 33356 }, { "epoch": 0.96, "grad_norm": 6.0400140365538535, "learning_rate": 5.238247335662694e-08, "loss": 0.5841, "step": 33357 }, { "epoch": 0.96, "grad_norm": 5.182470342217254, "learning_rate": 5.231554096763569e-08, "loss": 0.6202, "step": 33358 }, { "epoch": 0.96, "grad_norm": 2.201901089126343, "learning_rate": 5.2248651142964846e-08, "loss": 0.1553, "step": 33359 }, { "epoch": 0.96, "grad_norm": 8.09314584134145, "learning_rate": 5.21818038831895e-08, "loss": 0.6242, "step": 33360 }, { "epoch": 0.96, "grad_norm": 6.652311798753687, "learning_rate": 5.2114999188885293e-08, "loss": 0.7435, "step": 33361 }, { "epoch": 0.96, "grad_norm": 2.6680478688875704, "learning_rate": 5.204823706062623e-08, "loss": 0.108, "step": 33362 }, { "epoch": 0.96, "grad_norm": 4.62908152316833, "learning_rate": 5.198151749898739e-08, "loss": 0.4475, "step": 33363 }, { "epoch": 0.96, "grad_norm": 7.890053484775294, "learning_rate": 5.191484050454221e-08, "loss": 0.5542, "step": 33364 }, { "epoch": 0.96, "grad_norm": 6.470988747752167, "learning_rate": 5.184820607786467e-08, "loss": 0.6356, "step": 33365 }, { "epoch": 0.96, "grad_norm": 3.5367363027883227, "learning_rate": 5.17816142195271e-08, "loss": 0.3001, "step": 33366 }, { "epoch": 0.96, "grad_norm": 5.607684632525817, "learning_rate": 5.171506493010348e-08, "loss": 0.1889, "step": 33367 }, { "epoch": 0.96, "grad_norm": 8.905699970594537, "learning_rate": 5.164855821016557e-08, "loss": 0.7258, "step": 33368 }, { "epoch": 0.96, "grad_norm": 6.16258471510036, "learning_rate": 5.158209406028625e-08, "loss": 0.202, "step": 33369 }, { "epoch": 0.96, "grad_norm": 3.2413158669860924, "learning_rate": 5.1515672481036725e-08, "loss": 0.2458, "step": 33370 }, { "epoch": 0.96, "grad_norm": 7.577058288376198, "learning_rate": 5.1449293472988773e-08, "loss": 0.5033, "step": 33371 }, { "epoch": 0.96, "grad_norm": 6.29060062530692, "learning_rate": 5.1382957036713036e-08, "loss": 0.5004, "step": 33372 }, { "epoch": 0.96, "grad_norm": 4.632151821945865, "learning_rate": 5.131666317278072e-08, "loss": 0.1297, "step": 33373 }, { "epoch": 0.96, "grad_norm": 7.855682385317214, "learning_rate": 5.1250411881761385e-08, "loss": 0.7329, "step": 33374 }, { "epoch": 0.96, "grad_norm": 2.681414434196982, "learning_rate": 5.118420316422568e-08, "loss": 0.2053, "step": 33375 }, { "epoch": 0.96, "grad_norm": 5.681751930717542, "learning_rate": 5.111803702074314e-08, "loss": 0.5999, "step": 33376 }, { "epoch": 0.96, "grad_norm": 2.354548045017542, "learning_rate": 5.105191345188276e-08, "loss": 0.1777, "step": 33377 }, { "epoch": 0.96, "grad_norm": 7.366552590512513, "learning_rate": 5.0985832458213534e-08, "loss": 0.6644, "step": 33378 }, { "epoch": 0.96, "grad_norm": 4.316951472167167, "learning_rate": 5.091979404030334e-08, "loss": 0.4808, "step": 33379 }, { "epoch": 0.96, "grad_norm": 5.372890945773359, "learning_rate": 5.085379819872116e-08, "loss": 0.2968, "step": 33380 }, { "epoch": 0.96, "grad_norm": 4.450036567366297, "learning_rate": 5.078784493403432e-08, "loss": 0.169, "step": 33381 }, { "epoch": 0.96, "grad_norm": 8.242856487586762, "learning_rate": 5.0721934246810156e-08, "loss": 0.6763, "step": 33382 }, { "epoch": 0.96, "grad_norm": 6.457418200557753, "learning_rate": 5.065606613761653e-08, "loss": 0.5055, "step": 33383 }, { "epoch": 0.96, "grad_norm": 3.0899917432063626, "learning_rate": 5.0590240607019116e-08, "loss": 0.1089, "step": 33384 }, { "epoch": 0.96, "grad_norm": 5.090509308214017, "learning_rate": 5.0524457655583556e-08, "loss": 0.1824, "step": 33385 }, { "epoch": 0.96, "grad_norm": 8.000107526056558, "learning_rate": 5.0458717283877747e-08, "loss": 0.4841, "step": 33386 }, { "epoch": 0.96, "grad_norm": 7.323050167192678, "learning_rate": 5.0393019492465666e-08, "loss": 0.4638, "step": 33387 }, { "epoch": 0.96, "grad_norm": 4.342290509605732, "learning_rate": 5.032736428191298e-08, "loss": 0.3533, "step": 33388 }, { "epoch": 0.96, "grad_norm": 7.0504650266962265, "learning_rate": 5.0261751652784794e-08, "loss": 0.5151, "step": 33389 }, { "epoch": 0.96, "grad_norm": 10.161394477199105, "learning_rate": 5.019618160564565e-08, "loss": 0.6298, "step": 33390 }, { "epoch": 0.96, "grad_norm": 6.677553927004939, "learning_rate": 5.0130654141059e-08, "loss": 0.4327, "step": 33391 }, { "epoch": 0.96, "grad_norm": 5.439038771457303, "learning_rate": 5.0065169259588265e-08, "loss": 0.4992, "step": 33392 }, { "epoch": 0.96, "grad_norm": 3.3702265617509775, "learning_rate": 4.9999726961798556e-08, "loss": 0.2821, "step": 33393 }, { "epoch": 0.96, "grad_norm": 3.1401140452132705, "learning_rate": 4.9934327248251094e-08, "loss": 0.2392, "step": 33394 }, { "epoch": 0.96, "grad_norm": 4.459970239103778, "learning_rate": 4.9868970119509306e-08, "loss": 0.3085, "step": 33395 }, { "epoch": 0.96, "grad_norm": 5.329536676076132, "learning_rate": 4.980365557613498e-08, "loss": 0.2473, "step": 33396 }, { "epoch": 0.96, "grad_norm": 3.5043164101416413, "learning_rate": 4.973838361869043e-08, "loss": 0.2588, "step": 33397 }, { "epoch": 0.96, "grad_norm": 10.411062508189953, "learning_rate": 4.9673154247737444e-08, "loss": 0.5426, "step": 33398 }, { "epoch": 0.96, "grad_norm": 3.698086912865793, "learning_rate": 4.960796746383612e-08, "loss": 0.3751, "step": 33399 }, { "epoch": 0.96, "grad_norm": 5.447710776100611, "learning_rate": 4.954282326754878e-08, "loss": 0.3116, "step": 33400 }, { "epoch": 0.96, "grad_norm": 4.363660394627695, "learning_rate": 4.947772165943443e-08, "loss": 0.3747, "step": 33401 }, { "epoch": 0.96, "grad_norm": 7.380653994252375, "learning_rate": 4.9412662640054284e-08, "loss": 0.8391, "step": 33402 }, { "epoch": 0.96, "grad_norm": 5.702770023533712, "learning_rate": 4.9347646209966794e-08, "loss": 0.3342, "step": 33403 }, { "epoch": 0.96, "grad_norm": 7.645628119962079, "learning_rate": 4.928267236973205e-08, "loss": 0.6948, "step": 33404 }, { "epoch": 0.96, "grad_norm": 7.06544969789193, "learning_rate": 4.921774111990962e-08, "loss": 0.5611, "step": 33405 }, { "epoch": 0.96, "grad_norm": 2.8703553372562567, "learning_rate": 4.915285246105683e-08, "loss": 0.0642, "step": 33406 }, { "epoch": 0.96, "grad_norm": 5.716399329841974, "learning_rate": 4.908800639373268e-08, "loss": 0.6492, "step": 33407 }, { "epoch": 0.96, "grad_norm": 6.102816522462062, "learning_rate": 4.9023202918494496e-08, "loss": 0.4204, "step": 33408 }, { "epoch": 0.96, "grad_norm": 6.196073911041332, "learning_rate": 4.8958442035900164e-08, "loss": 0.4033, "step": 33409 }, { "epoch": 0.96, "grad_norm": 9.529762664813726, "learning_rate": 4.889372374650758e-08, "loss": 0.5223, "step": 33410 }, { "epoch": 0.96, "grad_norm": 5.544999614196159, "learning_rate": 4.8829048050871855e-08, "loss": 0.5329, "step": 33411 }, { "epoch": 0.96, "grad_norm": 4.329932210801259, "learning_rate": 4.876441494955031e-08, "loss": 0.3656, "step": 33412 }, { "epoch": 0.96, "grad_norm": 2.4119447350781607, "learning_rate": 4.869982444309918e-08, "loss": 0.262, "step": 33413 }, { "epoch": 0.96, "grad_norm": 4.888449578102337, "learning_rate": 4.863527653207356e-08, "loss": 0.4503, "step": 33414 }, { "epoch": 0.96, "grad_norm": 7.846660697073706, "learning_rate": 4.857077121702913e-08, "loss": 0.7308, "step": 33415 }, { "epoch": 0.96, "grad_norm": 4.934221072269346, "learning_rate": 4.8506308498521006e-08, "loss": 0.5142, "step": 33416 }, { "epoch": 0.96, "grad_norm": 5.150754786243612, "learning_rate": 4.844188837710317e-08, "loss": 0.3762, "step": 33417 }, { "epoch": 0.96, "grad_norm": 5.260748396381731, "learning_rate": 4.83775108533302e-08, "loss": 0.3657, "step": 33418 }, { "epoch": 0.96, "grad_norm": 5.809699255403194, "learning_rate": 4.831317592775553e-08, "loss": 0.356, "step": 33419 }, { "epoch": 0.96, "grad_norm": 3.612573766780198, "learning_rate": 4.824888360093316e-08, "loss": 0.3902, "step": 33420 }, { "epoch": 0.96, "grad_norm": 6.301615707660899, "learning_rate": 4.818463387341599e-08, "loss": 0.5244, "step": 33421 }, { "epoch": 0.96, "grad_norm": 4.072780219847033, "learning_rate": 4.81204267457569e-08, "loss": 0.5517, "step": 33422 }, { "epoch": 0.96, "grad_norm": 6.999855925575955, "learning_rate": 4.8056262218507674e-08, "loss": 0.8228, "step": 33423 }, { "epoch": 0.96, "grad_norm": 3.0853173320851814, "learning_rate": 4.7992140292221214e-08, "loss": 0.262, "step": 33424 }, { "epoch": 0.96, "grad_norm": 3.72968687481789, "learning_rate": 4.792806096744873e-08, "loss": 0.3332, "step": 33425 }, { "epoch": 0.96, "grad_norm": 5.369895840314847, "learning_rate": 4.7864024244740904e-08, "loss": 0.4197, "step": 33426 }, { "epoch": 0.96, "grad_norm": 5.108191116784464, "learning_rate": 4.780003012464951e-08, "loss": 0.1272, "step": 33427 }, { "epoch": 0.96, "grad_norm": 5.01962341936445, "learning_rate": 4.773607860772467e-08, "loss": 0.1774, "step": 33428 }, { "epoch": 0.96, "grad_norm": 6.77075318117873, "learning_rate": 4.767216969451649e-08, "loss": 0.2113, "step": 33429 }, { "epoch": 0.96, "grad_norm": 5.9104951810501705, "learning_rate": 4.760830338557454e-08, "loss": 0.3408, "step": 33430 }, { "epoch": 0.96, "grad_norm": 6.936664427879568, "learning_rate": 4.754447968144948e-08, "loss": 0.5924, "step": 33431 }, { "epoch": 0.96, "grad_norm": 5.9859956384522475, "learning_rate": 4.748069858268922e-08, "loss": 0.4608, "step": 33432 }, { "epoch": 0.96, "grad_norm": 4.638201019482477, "learning_rate": 4.74169600898422e-08, "loss": 0.1394, "step": 33433 }, { "epoch": 0.96, "grad_norm": 6.540730435705814, "learning_rate": 4.7353264203457425e-08, "loss": 0.624, "step": 33434 }, { "epoch": 0.96, "grad_norm": 2.796955938606598, "learning_rate": 4.728961092408279e-08, "loss": 0.07, "step": 33435 }, { "epoch": 0.96, "grad_norm": 16.892162270686345, "learning_rate": 4.72260002522662e-08, "loss": 0.6166, "step": 33436 }, { "epoch": 0.96, "grad_norm": 6.524223746241987, "learning_rate": 4.716243218855443e-08, "loss": 0.3357, "step": 33437 }, { "epoch": 0.96, "grad_norm": 3.9053469109879297, "learning_rate": 4.709890673349482e-08, "loss": 0.3196, "step": 33438 }, { "epoch": 0.96, "grad_norm": 6.3572589624863705, "learning_rate": 4.703542388763249e-08, "loss": 0.2519, "step": 33439 }, { "epoch": 0.96, "grad_norm": 6.445968391106607, "learning_rate": 4.697198365151534e-08, "loss": 0.4624, "step": 33440 }, { "epoch": 0.96, "grad_norm": 6.0370860548257035, "learning_rate": 4.690858602568793e-08, "loss": 0.4188, "step": 33441 }, { "epoch": 0.96, "grad_norm": 2.9117700697642066, "learning_rate": 4.684523101069649e-08, "loss": 0.4308, "step": 33442 }, { "epoch": 0.96, "grad_norm": 4.228458564496647, "learning_rate": 4.678191860708559e-08, "loss": 0.506, "step": 33443 }, { "epoch": 0.96, "grad_norm": 5.8739431932252755, "learning_rate": 4.6718648815399784e-08, "loss": 0.4714, "step": 33444 }, { "epoch": 0.96, "grad_norm": 8.145293260965433, "learning_rate": 4.665542163618364e-08, "loss": 0.4853, "step": 33445 }, { "epoch": 0.96, "grad_norm": 7.367881442869229, "learning_rate": 4.659223706998117e-08, "loss": 0.6666, "step": 33446 }, { "epoch": 0.96, "grad_norm": 5.589164836608911, "learning_rate": 4.6529095117335834e-08, "loss": 0.4125, "step": 33447 }, { "epoch": 0.96, "grad_norm": 2.3347754378844146, "learning_rate": 4.6465995778790516e-08, "loss": 0.2495, "step": 33448 }, { "epoch": 0.96, "grad_norm": 4.586213306665948, "learning_rate": 4.640293905488869e-08, "loss": 0.3067, "step": 33449 }, { "epoch": 0.96, "grad_norm": 10.731952643788684, "learning_rate": 4.6339924946172124e-08, "loss": 0.7761, "step": 33450 }, { "epoch": 0.96, "grad_norm": 3.64670426819772, "learning_rate": 4.627695345318372e-08, "loss": 0.2923, "step": 33451 }, { "epoch": 0.96, "grad_norm": 6.922769871855444, "learning_rate": 4.621402457646473e-08, "loss": 0.634, "step": 33452 }, { "epoch": 0.96, "grad_norm": 4.038752294909889, "learning_rate": 4.615113831655582e-08, "loss": 0.3559, "step": 33453 }, { "epoch": 0.96, "grad_norm": 5.291692465871963, "learning_rate": 4.608829467399933e-08, "loss": 0.3932, "step": 33454 }, { "epoch": 0.96, "grad_norm": 3.7940302658879737, "learning_rate": 4.6025493649335396e-08, "loss": 0.2861, "step": 33455 }, { "epoch": 0.96, "grad_norm": 5.138457771924323, "learning_rate": 4.5962735243103575e-08, "loss": 0.3559, "step": 33456 }, { "epoch": 0.96, "grad_norm": 3.967730819319288, "learning_rate": 4.5900019455845104e-08, "loss": 0.2461, "step": 33457 }, { "epoch": 0.96, "grad_norm": 4.535225866869346, "learning_rate": 4.583734628809844e-08, "loss": 0.2821, "step": 33458 }, { "epoch": 0.96, "grad_norm": 5.87987433792312, "learning_rate": 4.5774715740403705e-08, "loss": 0.4278, "step": 33459 }, { "epoch": 0.96, "grad_norm": 19.952016559183846, "learning_rate": 4.5712127813298255e-08, "loss": 0.4494, "step": 33460 }, { "epoch": 0.96, "grad_norm": 3.0873192506462246, "learning_rate": 4.5649582507321656e-08, "loss": 0.2823, "step": 33461 }, { "epoch": 0.96, "grad_norm": 5.093235481347493, "learning_rate": 4.5587079823011806e-08, "loss": 0.3449, "step": 33462 }, { "epoch": 0.96, "grad_norm": 8.415488402982323, "learning_rate": 4.552461976090661e-08, "loss": 0.5541, "step": 33463 }, { "epoch": 0.96, "grad_norm": 6.120006238273483, "learning_rate": 4.54622023215423e-08, "loss": 0.6097, "step": 33464 }, { "epoch": 0.96, "grad_norm": 7.369898826080656, "learning_rate": 4.539982750545735e-08, "loss": 0.4275, "step": 33465 }, { "epoch": 0.96, "grad_norm": 7.141119674810172, "learning_rate": 4.533749531318743e-08, "loss": 0.2295, "step": 33466 }, { "epoch": 0.96, "grad_norm": 5.583783696168807, "learning_rate": 4.527520574526878e-08, "loss": 0.4147, "step": 33467 }, { "epoch": 0.96, "grad_norm": 4.569464032570165, "learning_rate": 4.5212958802238195e-08, "loss": 0.6328, "step": 33468 }, { "epoch": 0.96, "grad_norm": 7.139626731625635, "learning_rate": 4.515075448462969e-08, "loss": 0.5354, "step": 33469 }, { "epoch": 0.96, "grad_norm": 8.416505308461513, "learning_rate": 4.50885927929795e-08, "loss": 0.7511, "step": 33470 }, { "epoch": 0.96, "grad_norm": 5.497723368313204, "learning_rate": 4.502647372782221e-08, "loss": 0.1441, "step": 33471 }, { "epoch": 0.96, "grad_norm": 9.00621379815899, "learning_rate": 4.496439728969238e-08, "loss": 0.7918, "step": 33472 }, { "epoch": 0.96, "grad_norm": 5.688097702311276, "learning_rate": 4.490236347912291e-08, "loss": 0.5099, "step": 33473 }, { "epoch": 0.96, "grad_norm": 7.173398855375718, "learning_rate": 4.484037229664839e-08, "loss": 0.5371, "step": 33474 }, { "epoch": 0.96, "grad_norm": 4.290539707744718, "learning_rate": 4.4778423742802835e-08, "loss": 0.3537, "step": 33475 }, { "epoch": 0.96, "grad_norm": 4.487200415129293, "learning_rate": 4.471651781811748e-08, "loss": 0.4194, "step": 33476 }, { "epoch": 0.96, "grad_norm": 5.427330109104264, "learning_rate": 4.465465452312634e-08, "loss": 0.4503, "step": 33477 }, { "epoch": 0.96, "grad_norm": 2.5859627275878636, "learning_rate": 4.4592833858361216e-08, "loss": 0.1545, "step": 33478 }, { "epoch": 0.96, "grad_norm": 4.240530291000792, "learning_rate": 4.453105582435391e-08, "loss": 0.5688, "step": 33479 }, { "epoch": 0.96, "grad_norm": 4.78639318555372, "learning_rate": 4.446932042163565e-08, "loss": 0.1497, "step": 33480 }, { "epoch": 0.96, "grad_norm": 3.6891253493891827, "learning_rate": 4.440762765073714e-08, "loss": 0.1798, "step": 33481 }, { "epoch": 0.96, "grad_norm": 5.426625218401058, "learning_rate": 4.4345977512190164e-08, "loss": 0.164, "step": 33482 }, { "epoch": 0.96, "grad_norm": 3.539180559963846, "learning_rate": 4.4284370006524305e-08, "loss": 0.3031, "step": 33483 }, { "epoch": 0.96, "grad_norm": 7.269804902744957, "learning_rate": 4.422280513427024e-08, "loss": 0.6247, "step": 33484 }, { "epoch": 0.96, "grad_norm": 4.846069199104607, "learning_rate": 4.4161282895957006e-08, "loss": 0.6453, "step": 33485 }, { "epoch": 0.96, "grad_norm": 4.399388618692032, "learning_rate": 4.4099803292114166e-08, "loss": 0.4082, "step": 33486 }, { "epoch": 0.96, "grad_norm": 5.588222863121497, "learning_rate": 4.4038366323270744e-08, "loss": 0.5575, "step": 33487 }, { "epoch": 0.96, "grad_norm": 3.0803391292195372, "learning_rate": 4.3976971989954654e-08, "loss": 0.2438, "step": 33488 }, { "epoch": 0.96, "grad_norm": 3.1319623445242284, "learning_rate": 4.391562029269436e-08, "loss": 0.2966, "step": 33489 }, { "epoch": 0.96, "grad_norm": 5.900883653707492, "learning_rate": 4.3854311232017776e-08, "loss": 0.7904, "step": 33490 }, { "epoch": 0.96, "grad_norm": 4.988196794217628, "learning_rate": 4.3793044808452815e-08, "loss": 0.367, "step": 33491 }, { "epoch": 0.96, "grad_norm": 7.462164301645891, "learning_rate": 4.373182102252571e-08, "loss": 0.4681, "step": 33492 }, { "epoch": 0.96, "grad_norm": 5.42474577259722, "learning_rate": 4.367063987476383e-08, "loss": 0.7617, "step": 33493 }, { "epoch": 0.96, "grad_norm": 6.483627331383928, "learning_rate": 4.3609501365692864e-08, "loss": 0.4111, "step": 33494 }, { "epoch": 0.96, "grad_norm": 6.943070589436629, "learning_rate": 4.354840549583961e-08, "loss": 0.5076, "step": 33495 }, { "epoch": 0.96, "grad_norm": 6.5278805889212865, "learning_rate": 4.3487352265728646e-08, "loss": 0.5968, "step": 33496 }, { "epoch": 0.96, "grad_norm": 4.651398198330517, "learning_rate": 4.3426341675885666e-08, "loss": 0.4803, "step": 33497 }, { "epoch": 0.96, "grad_norm": 4.967480263327644, "learning_rate": 4.336537372683636e-08, "loss": 0.3328, "step": 33498 }, { "epoch": 0.96, "grad_norm": 3.830780991913168, "learning_rate": 4.330444841910364e-08, "loss": 0.3638, "step": 33499 }, { "epoch": 0.96, "grad_norm": 2.682598256980667, "learning_rate": 4.3243565753212643e-08, "loss": 0.1832, "step": 33500 }, { "epoch": 0.96, "grad_norm": 7.053655369347092, "learning_rate": 4.318272572968685e-08, "loss": 0.7102, "step": 33501 }, { "epoch": 0.96, "grad_norm": 2.212916536507746, "learning_rate": 4.312192834905027e-08, "loss": 0.2604, "step": 33502 }, { "epoch": 0.96, "grad_norm": 4.146521109491496, "learning_rate": 4.306117361182527e-08, "loss": 0.3514, "step": 33503 }, { "epoch": 0.96, "grad_norm": 2.4583958898009683, "learning_rate": 4.300046151853421e-08, "loss": 0.2576, "step": 33504 }, { "epoch": 0.96, "grad_norm": 2.0559548070445564, "learning_rate": 4.293979206970056e-08, "loss": 0.1461, "step": 33505 }, { "epoch": 0.96, "grad_norm": 2.621676566397699, "learning_rate": 4.2879165265845015e-08, "loss": 0.1092, "step": 33506 }, { "epoch": 0.96, "grad_norm": 6.199376508680087, "learning_rate": 4.2818581107489933e-08, "loss": 0.3192, "step": 33507 }, { "epoch": 0.96, "grad_norm": 6.471045874474153, "learning_rate": 4.275803959515601e-08, "loss": 0.4958, "step": 33508 }, { "epoch": 0.96, "grad_norm": 9.753694347910674, "learning_rate": 4.2697540729364495e-08, "loss": 0.6334, "step": 33509 }, { "epoch": 0.96, "grad_norm": 6.780960287770192, "learning_rate": 4.2637084510635526e-08, "loss": 0.4633, "step": 33510 }, { "epoch": 0.96, "grad_norm": 38.33733186737794, "learning_rate": 4.257667093948981e-08, "loss": 0.7001, "step": 33511 }, { "epoch": 0.96, "grad_norm": 9.282977678710374, "learning_rate": 4.2516300016446354e-08, "loss": 0.6975, "step": 33512 }, { "epoch": 0.96, "grad_norm": 3.4439648265260896, "learning_rate": 4.245597174202476e-08, "loss": 0.1226, "step": 33513 }, { "epoch": 0.96, "grad_norm": 4.478673547022483, "learning_rate": 4.2395686116744053e-08, "loss": 0.2875, "step": 33514 }, { "epoch": 0.96, "grad_norm": 6.7860633538029544, "learning_rate": 4.2335443141122704e-08, "loss": 0.3101, "step": 33515 }, { "epoch": 0.96, "grad_norm": 4.893947606098183, "learning_rate": 4.2275242815679186e-08, "loss": 0.836, "step": 33516 }, { "epoch": 0.96, "grad_norm": 5.042429474851394, "learning_rate": 4.221508514093142e-08, "loss": 0.7727, "step": 33517 }, { "epoch": 0.96, "grad_norm": 5.384998957103462, "learning_rate": 4.2154970117396774e-08, "loss": 0.3912, "step": 33518 }, { "epoch": 0.96, "grad_norm": 4.329521201886584, "learning_rate": 4.209489774559261e-08, "loss": 0.4262, "step": 33519 }, { "epoch": 0.96, "grad_norm": 7.354139113622242, "learning_rate": 4.203486802603574e-08, "loss": 0.8319, "step": 33520 }, { "epoch": 0.96, "grad_norm": 7.076324975055052, "learning_rate": 4.1974880959242405e-08, "loss": 0.6399, "step": 33521 }, { "epoch": 0.96, "grad_norm": 5.575638139236956, "learning_rate": 4.1914936545728314e-08, "loss": 0.393, "step": 33522 }, { "epoch": 0.96, "grad_norm": 4.048386692801606, "learning_rate": 4.185503478600972e-08, "loss": 0.1684, "step": 33523 }, { "epoch": 0.96, "grad_norm": 6.552360330699184, "learning_rate": 4.179517568060232e-08, "loss": 0.8149, "step": 33524 }, { "epoch": 0.96, "grad_norm": 4.961090349861401, "learning_rate": 4.173535923002014e-08, "loss": 0.2885, "step": 33525 }, { "epoch": 0.96, "grad_norm": 6.023162083445826, "learning_rate": 4.1675585434778345e-08, "loss": 0.5287, "step": 33526 }, { "epoch": 0.96, "grad_norm": 5.443174470594989, "learning_rate": 4.161585429539095e-08, "loss": 0.4077, "step": 33527 }, { "epoch": 0.96, "grad_norm": 3.180311001281295, "learning_rate": 4.155616581237143e-08, "loss": 0.3714, "step": 33528 }, { "epoch": 0.96, "grad_norm": 4.675136639643243, "learning_rate": 4.149651998623439e-08, "loss": 0.4211, "step": 33529 }, { "epoch": 0.96, "grad_norm": 5.06045764180421, "learning_rate": 4.143691681749162e-08, "loss": 0.4488, "step": 33530 }, { "epoch": 0.96, "grad_norm": 5.849060012418202, "learning_rate": 4.1377356306657176e-08, "loss": 0.3078, "step": 33531 }, { "epoch": 0.96, "grad_norm": 4.877327681195249, "learning_rate": 4.131783845424231e-08, "loss": 0.3179, "step": 33532 }, { "epoch": 0.96, "grad_norm": 4.707834211692539, "learning_rate": 4.125836326075994e-08, "loss": 0.3227, "step": 33533 }, { "epoch": 0.96, "grad_norm": 6.541181323600145, "learning_rate": 4.119893072672132e-08, "loss": 0.5832, "step": 33534 }, { "epoch": 0.96, "grad_norm": 8.88640932990688, "learning_rate": 4.1139540852637164e-08, "loss": 0.7573, "step": 33535 }, { "epoch": 0.96, "grad_norm": 5.944402717828125, "learning_rate": 4.108019363901983e-08, "loss": 1.0285, "step": 33536 }, { "epoch": 0.96, "grad_norm": 5.557014503705832, "learning_rate": 4.102088908637836e-08, "loss": 0.6171, "step": 33537 }, { "epoch": 0.96, "grad_norm": 4.463000299955029, "learning_rate": 4.0961627195224005e-08, "loss": 0.4275, "step": 33538 }, { "epoch": 0.96, "grad_norm": 4.393445992296405, "learning_rate": 4.0902407966066373e-08, "loss": 0.352, "step": 33539 }, { "epoch": 0.96, "grad_norm": 9.306979597709331, "learning_rate": 4.084323139941393e-08, "loss": 0.8477, "step": 33540 }, { "epoch": 0.96, "grad_norm": 7.358334283479461, "learning_rate": 4.078409749577739e-08, "loss": 0.5074, "step": 33541 }, { "epoch": 0.96, "grad_norm": 5.044051735891734, "learning_rate": 4.0725006255664115e-08, "loss": 0.2658, "step": 33542 }, { "epoch": 0.96, "grad_norm": 6.817465486176839, "learning_rate": 4.06659576795837e-08, "loss": 0.6293, "step": 33543 }, { "epoch": 0.96, "grad_norm": 4.850212658811298, "learning_rate": 4.0606951768042415e-08, "loss": 0.2339, "step": 33544 }, { "epoch": 0.96, "grad_norm": 2.778805992971171, "learning_rate": 4.054798852154984e-08, "loss": 0.2077, "step": 33545 }, { "epoch": 0.96, "grad_norm": 15.283022787474373, "learning_rate": 4.0489067940612245e-08, "loss": 0.3127, "step": 33546 }, { "epoch": 0.96, "grad_norm": 5.191622647037101, "learning_rate": 4.043019002573589e-08, "loss": 0.7252, "step": 33547 }, { "epoch": 0.96, "grad_norm": 7.159115146964742, "learning_rate": 4.037135477742871e-08, "loss": 0.4699, "step": 33548 }, { "epoch": 0.96, "grad_norm": 5.363220436768298, "learning_rate": 4.031256219619528e-08, "loss": 0.1812, "step": 33549 }, { "epoch": 0.96, "grad_norm": 6.336897056534432, "learning_rate": 4.025381228254299e-08, "loss": 0.3351, "step": 33550 }, { "epoch": 0.96, "grad_norm": 4.287544690669393, "learning_rate": 4.019510503697588e-08, "loss": 0.47, "step": 33551 }, { "epoch": 0.96, "grad_norm": 4.868514576558959, "learning_rate": 4.0136440460000205e-08, "loss": 0.3985, "step": 33552 }, { "epoch": 0.96, "grad_norm": 4.99272551644696, "learning_rate": 4.007781855212001e-08, "loss": 0.4455, "step": 33553 }, { "epoch": 0.96, "grad_norm": 4.679989469080268, "learning_rate": 4.001923931383933e-08, "loss": 0.2855, "step": 33554 }, { "epoch": 0.96, "grad_norm": 3.6661143139749086, "learning_rate": 3.996070274566277e-08, "loss": 0.2021, "step": 33555 }, { "epoch": 0.96, "grad_norm": 7.9221896347976575, "learning_rate": 3.990220884809326e-08, "loss": 0.5329, "step": 33556 }, { "epoch": 0.96, "grad_norm": 6.341148782738555, "learning_rate": 3.9843757621634285e-08, "loss": 0.3108, "step": 33557 }, { "epoch": 0.96, "grad_norm": 1.366004311981349, "learning_rate": 3.9785349066789326e-08, "loss": 0.0251, "step": 33558 }, { "epoch": 0.96, "grad_norm": 7.279419894109075, "learning_rate": 3.972698318405965e-08, "loss": 0.477, "step": 33559 }, { "epoch": 0.96, "grad_norm": 8.334556171978344, "learning_rate": 3.96686599739482e-08, "loss": 0.4559, "step": 33560 }, { "epoch": 0.96, "grad_norm": 3.7611990279461254, "learning_rate": 3.961037943695678e-08, "loss": 0.2692, "step": 33561 }, { "epoch": 0.96, "grad_norm": 6.295634618467968, "learning_rate": 3.955214157358611e-08, "loss": 0.6226, "step": 33562 }, { "epoch": 0.96, "grad_norm": 4.018969104483413, "learning_rate": 3.949394638433801e-08, "loss": 0.2145, "step": 33563 }, { "epoch": 0.96, "grad_norm": 7.231756968353867, "learning_rate": 3.943579386971264e-08, "loss": 0.6063, "step": 33564 }, { "epoch": 0.96, "grad_norm": 4.238665191557937, "learning_rate": 3.937768403021014e-08, "loss": 0.4656, "step": 33565 }, { "epoch": 0.96, "grad_norm": 2.776081028862609, "learning_rate": 3.931961686633124e-08, "loss": 0.2535, "step": 33566 }, { "epoch": 0.96, "grad_norm": 6.268867395733696, "learning_rate": 3.926159237857441e-08, "loss": 0.6027, "step": 33567 }, { "epoch": 0.96, "grad_norm": 3.786552998209037, "learning_rate": 3.920361056743926e-08, "loss": 0.4183, "step": 33568 }, { "epoch": 0.96, "grad_norm": 8.434659267750133, "learning_rate": 3.914567143342485e-08, "loss": 0.4305, "step": 33569 }, { "epoch": 0.96, "grad_norm": 4.246144298824609, "learning_rate": 3.908777497702909e-08, "loss": 0.3339, "step": 33570 }, { "epoch": 0.96, "grad_norm": 3.6820982711846986, "learning_rate": 3.9029921198751044e-08, "loss": 0.1885, "step": 33571 }, { "epoch": 0.96, "grad_norm": 5.319765799465549, "learning_rate": 3.8972110099087524e-08, "loss": 0.4627, "step": 33572 }, { "epoch": 0.96, "grad_norm": 2.6654260227941005, "learning_rate": 3.891434167853592e-08, "loss": 0.1282, "step": 33573 }, { "epoch": 0.96, "grad_norm": 3.1134815606324264, "learning_rate": 3.8856615937593046e-08, "loss": 0.2886, "step": 33574 }, { "epoch": 0.96, "grad_norm": 6.162730257943936, "learning_rate": 3.879893287675684e-08, "loss": 0.3857, "step": 33575 }, { "epoch": 0.96, "grad_norm": 2.7408633281817454, "learning_rate": 3.874129249652192e-08, "loss": 0.1708, "step": 33576 }, { "epoch": 0.96, "grad_norm": 5.004103550233666, "learning_rate": 3.8683694797385096e-08, "loss": 0.4897, "step": 33577 }, { "epoch": 0.96, "grad_norm": 3.7207750528747163, "learning_rate": 3.862613977984098e-08, "loss": 0.243, "step": 33578 }, { "epoch": 0.96, "grad_norm": 5.638629718936089, "learning_rate": 3.856862744438639e-08, "loss": 0.2194, "step": 33579 }, { "epoch": 0.96, "grad_norm": 7.237815780207382, "learning_rate": 3.8511157791514266e-08, "loss": 0.6209, "step": 33580 }, { "epoch": 0.96, "grad_norm": 4.522330812523299, "learning_rate": 3.845373082171977e-08, "loss": 0.3121, "step": 33581 }, { "epoch": 0.96, "grad_norm": 5.931416135175631, "learning_rate": 3.839634653549751e-08, "loss": 0.2443, "step": 33582 }, { "epoch": 0.96, "grad_norm": 2.40471714291511, "learning_rate": 3.8339004933339865e-08, "loss": 0.2067, "step": 33583 }, { "epoch": 0.96, "grad_norm": 7.443708505202552, "learning_rate": 3.8281706015741436e-08, "loss": 0.5177, "step": 33584 }, { "epoch": 0.96, "grad_norm": 9.55420359823279, "learning_rate": 3.8224449783194615e-08, "loss": 0.6907, "step": 33585 }, { "epoch": 0.96, "grad_norm": 5.399883380266401, "learning_rate": 3.8167236236191785e-08, "loss": 0.7807, "step": 33586 }, { "epoch": 0.96, "grad_norm": 2.864836537123464, "learning_rate": 3.811006537522477e-08, "loss": 0.2991, "step": 33587 }, { "epoch": 0.96, "grad_norm": 6.0061430477976385, "learning_rate": 3.805293720078651e-08, "loss": 0.2509, "step": 33588 }, { "epoch": 0.96, "grad_norm": 3.546260377171541, "learning_rate": 3.799585171336773e-08, "loss": 0.3909, "step": 33589 }, { "epoch": 0.96, "grad_norm": 4.674255351567727, "learning_rate": 3.793880891345969e-08, "loss": 0.4378, "step": 33590 }, { "epoch": 0.96, "grad_norm": 6.907312544475359, "learning_rate": 3.788180880155312e-08, "loss": 0.459, "step": 33591 }, { "epoch": 0.96, "grad_norm": 7.055985982870163, "learning_rate": 3.782485137813874e-08, "loss": 0.5086, "step": 33592 }, { "epoch": 0.96, "grad_norm": 3.8871415564824825, "learning_rate": 3.7767936643706146e-08, "loss": 0.2314, "step": 33593 }, { "epoch": 0.96, "grad_norm": 6.160079558868139, "learning_rate": 3.771106459874441e-08, "loss": 0.3734, "step": 33594 }, { "epoch": 0.96, "grad_norm": 4.266350848584439, "learning_rate": 3.7654235243743676e-08, "loss": 0.3425, "step": 33595 }, { "epoch": 0.96, "grad_norm": 6.7107175495665, "learning_rate": 3.759744857919301e-08, "loss": 0.6689, "step": 33596 }, { "epoch": 0.96, "grad_norm": 2.9373169497006892, "learning_rate": 3.754070460557979e-08, "loss": 0.3989, "step": 33597 }, { "epoch": 0.96, "grad_norm": 4.869161631037345, "learning_rate": 3.7484003323393637e-08, "loss": 0.4735, "step": 33598 }, { "epoch": 0.96, "grad_norm": 2.771249396892339, "learning_rate": 3.7427344733121374e-08, "loss": 0.2057, "step": 33599 }, { "epoch": 0.96, "grad_norm": 4.966105159065524, "learning_rate": 3.737072883525039e-08, "loss": 0.7644, "step": 33600 }, { "epoch": 0.96, "grad_norm": 5.099631255039714, "learning_rate": 3.7314155630268076e-08, "loss": 0.5296, "step": 33601 }, { "epoch": 0.96, "grad_norm": 3.1628980966531333, "learning_rate": 3.725762511866127e-08, "loss": 0.1412, "step": 33602 }, { "epoch": 0.96, "grad_norm": 4.282136616474478, "learning_rate": 3.720113730091568e-08, "loss": 0.2808, "step": 33603 }, { "epoch": 0.96, "grad_norm": 4.935933287046058, "learning_rate": 3.714469217751815e-08, "loss": 0.4366, "step": 33604 }, { "epoch": 0.96, "grad_norm": 5.2836720256769025, "learning_rate": 3.708828974895384e-08, "loss": 0.4704, "step": 33605 }, { "epoch": 0.96, "grad_norm": 4.869647976256466, "learning_rate": 3.7031930015707374e-08, "loss": 0.5191, "step": 33606 }, { "epoch": 0.96, "grad_norm": 6.454563673240358, "learning_rate": 3.697561297826502e-08, "loss": 0.6662, "step": 33607 }, { "epoch": 0.96, "grad_norm": 5.362814641938416, "learning_rate": 3.6919338637109724e-08, "loss": 0.4027, "step": 33608 }, { "epoch": 0.96, "grad_norm": 5.572761607166873, "learning_rate": 3.6863106992726106e-08, "loss": 0.2455, "step": 33609 }, { "epoch": 0.96, "grad_norm": 6.205125798326538, "learning_rate": 3.680691804559822e-08, "loss": 0.3027, "step": 33610 }, { "epoch": 0.96, "grad_norm": 5.337687661939017, "learning_rate": 3.6750771796210116e-08, "loss": 0.3163, "step": 33611 }, { "epoch": 0.96, "grad_norm": 7.241100966376676, "learning_rate": 3.6694668245043083e-08, "loss": 0.7043, "step": 33612 }, { "epoch": 0.96, "grad_norm": 17.880471199193153, "learning_rate": 3.663860739258118e-08, "loss": 0.5379, "step": 33613 }, { "epoch": 0.96, "grad_norm": 4.906011782165885, "learning_rate": 3.6582589239306245e-08, "loss": 0.7099, "step": 33614 }, { "epoch": 0.96, "grad_norm": 4.1782042251185745, "learning_rate": 3.6526613785700105e-08, "loss": 0.2173, "step": 33615 }, { "epoch": 0.96, "grad_norm": 8.348750726820024, "learning_rate": 3.6470681032244047e-08, "loss": 0.3986, "step": 33616 }, { "epoch": 0.96, "grad_norm": 5.334682780651369, "learning_rate": 3.6414790979419914e-08, "loss": 0.5283, "step": 33617 }, { "epoch": 0.96, "grad_norm": 6.8291050188014095, "learning_rate": 3.635894362770842e-08, "loss": 0.3571, "step": 33618 }, { "epoch": 0.96, "grad_norm": 3.8288656024374803, "learning_rate": 3.6303138977589744e-08, "loss": 0.1827, "step": 33619 }, { "epoch": 0.96, "grad_norm": 3.702014434651324, "learning_rate": 3.624737702954351e-08, "loss": 0.1992, "step": 33620 }, { "epoch": 0.96, "grad_norm": 4.564051769208716, "learning_rate": 3.619165778405043e-08, "loss": 0.3483, "step": 33621 }, { "epoch": 0.96, "grad_norm": 5.932122757013851, "learning_rate": 3.6135981241589014e-08, "loss": 0.4224, "step": 33622 }, { "epoch": 0.96, "grad_norm": 4.507896885338235, "learning_rate": 3.608034740263888e-08, "loss": 0.5598, "step": 33623 }, { "epoch": 0.96, "grad_norm": 5.30259148561511, "learning_rate": 3.6024756267677984e-08, "loss": 0.7701, "step": 33624 }, { "epoch": 0.96, "grad_norm": 7.627611869859349, "learning_rate": 3.596920783718538e-08, "loss": 0.5796, "step": 33625 }, { "epoch": 0.96, "grad_norm": 7.976860227642146, "learning_rate": 3.591370211163847e-08, "loss": 0.4922, "step": 33626 }, { "epoch": 0.96, "grad_norm": 8.872574891273864, "learning_rate": 3.5858239091514647e-08, "loss": 0.5302, "step": 33627 }, { "epoch": 0.96, "grad_norm": 3.0682877326434896, "learning_rate": 3.580281877729075e-08, "loss": 0.1909, "step": 33628 }, { "epoch": 0.96, "grad_norm": 8.158121179375076, "learning_rate": 3.574744116944418e-08, "loss": 0.8064, "step": 33629 }, { "epoch": 0.96, "grad_norm": 8.742903311079495, "learning_rate": 3.569210626845177e-08, "loss": 0.4728, "step": 33630 }, { "epoch": 0.96, "grad_norm": 5.3327268489080994, "learning_rate": 3.563681407478814e-08, "loss": 0.4012, "step": 33631 }, { "epoch": 0.96, "grad_norm": 6.902456216807983, "learning_rate": 3.558156458893069e-08, "loss": 0.8409, "step": 33632 }, { "epoch": 0.96, "grad_norm": 4.324801852808438, "learning_rate": 3.552635781135294e-08, "loss": 0.2555, "step": 33633 }, { "epoch": 0.96, "grad_norm": 3.790368207141119, "learning_rate": 3.547119374253116e-08, "loss": 0.3924, "step": 33634 }, { "epoch": 0.96, "grad_norm": 6.769723051934262, "learning_rate": 3.541607238293943e-08, "loss": 0.9176, "step": 33635 }, { "epoch": 0.96, "grad_norm": 4.94509756455829, "learning_rate": 3.5360993733051796e-08, "loss": 0.1849, "step": 33636 }, { "epoch": 0.96, "grad_norm": 5.0512119708462215, "learning_rate": 3.5305957793342336e-08, "loss": 0.2528, "step": 33637 }, { "epoch": 0.96, "grad_norm": 4.3568238481061545, "learning_rate": 3.525096456428456e-08, "loss": 0.5786, "step": 33638 }, { "epoch": 0.96, "grad_norm": 5.367824289007256, "learning_rate": 3.519601404635142e-08, "loss": 0.2211, "step": 33639 }, { "epoch": 0.96, "grad_norm": 3.7313614660894716, "learning_rate": 3.5141106240015876e-08, "loss": 0.359, "step": 33640 }, { "epoch": 0.96, "grad_norm": 5.061850541452894, "learning_rate": 3.508624114574977e-08, "loss": 0.403, "step": 33641 }, { "epoch": 0.96, "grad_norm": 5.216863279730515, "learning_rate": 3.503141876402549e-08, "loss": 0.5202, "step": 33642 }, { "epoch": 0.96, "grad_norm": 1.987443983740118, "learning_rate": 3.4976639095314345e-08, "loss": 0.2312, "step": 33643 }, { "epoch": 0.96, "grad_norm": 8.360644691514247, "learning_rate": 3.4921902140088174e-08, "loss": 0.5213, "step": 33644 }, { "epoch": 0.96, "grad_norm": 4.697966728310244, "learning_rate": 3.48672078988177e-08, "loss": 0.6887, "step": 33645 }, { "epoch": 0.96, "grad_norm": 9.741000692828262, "learning_rate": 3.4812556371973115e-08, "loss": 0.3793, "step": 33646 }, { "epoch": 0.96, "grad_norm": 4.0258156868046395, "learning_rate": 3.475794756002515e-08, "loss": 0.55, "step": 33647 }, { "epoch": 0.96, "grad_norm": 5.461317057373585, "learning_rate": 3.4703381463442876e-08, "loss": 0.3669, "step": 33648 }, { "epoch": 0.96, "grad_norm": 5.856721872475889, "learning_rate": 3.4648858082695915e-08, "loss": 0.5814, "step": 33649 }, { "epoch": 0.96, "grad_norm": 5.5881927845876325, "learning_rate": 3.459437741825389e-08, "loss": 0.8011, "step": 33650 }, { "epoch": 0.96, "grad_norm": 5.26567503229024, "learning_rate": 3.453993947058476e-08, "loss": 0.2366, "step": 33651 }, { "epoch": 0.96, "grad_norm": 6.822618651736942, "learning_rate": 3.4485544240157043e-08, "loss": 0.3068, "step": 33652 }, { "epoch": 0.96, "grad_norm": 4.112398956819938, "learning_rate": 3.443119172743925e-08, "loss": 0.4112, "step": 33653 }, { "epoch": 0.96, "grad_norm": 4.310327978068337, "learning_rate": 3.4376881932898235e-08, "loss": 0.5653, "step": 33654 }, { "epoch": 0.96, "grad_norm": 25.072493161432476, "learning_rate": 3.432261485700139e-08, "loss": 0.5775, "step": 33655 }, { "epoch": 0.96, "grad_norm": 5.362443162487944, "learning_rate": 3.426839050021613e-08, "loss": 0.3747, "step": 33656 }, { "epoch": 0.96, "grad_norm": 4.570784924209354, "learning_rate": 3.421420886300819e-08, "loss": 0.4104, "step": 33657 }, { "epoch": 0.96, "grad_norm": 6.43734792881575, "learning_rate": 3.416006994584387e-08, "loss": 0.78, "step": 33658 }, { "epoch": 0.96, "grad_norm": 8.268307573242085, "learning_rate": 3.410597374918945e-08, "loss": 0.5186, "step": 33659 }, { "epoch": 0.96, "grad_norm": 7.371267246939678, "learning_rate": 3.405192027351012e-08, "loss": 0.5363, "step": 33660 }, { "epoch": 0.96, "grad_norm": 6.226538070573351, "learning_rate": 3.399790951927051e-08, "loss": 0.649, "step": 33661 }, { "epoch": 0.96, "grad_norm": 9.639805837789005, "learning_rate": 3.394394148693525e-08, "loss": 0.5756, "step": 33662 }, { "epoch": 0.96, "grad_norm": 3.4658944688195774, "learning_rate": 3.389001617696841e-08, "loss": 0.2517, "step": 33663 }, { "epoch": 0.96, "grad_norm": 8.305763497898312, "learning_rate": 3.383613358983518e-08, "loss": 0.3974, "step": 33664 }, { "epoch": 0.96, "grad_norm": 2.447495847743314, "learning_rate": 3.3782293725997394e-08, "loss": 0.2036, "step": 33665 }, { "epoch": 0.96, "grad_norm": 6.490236138278425, "learning_rate": 3.3728496585919704e-08, "loss": 0.287, "step": 33666 }, { "epoch": 0.96, "grad_norm": 5.468621999060048, "learning_rate": 3.3674742170063946e-08, "loss": 0.1757, "step": 33667 }, { "epoch": 0.96, "grad_norm": 3.1894890058453256, "learning_rate": 3.3621030478893645e-08, "loss": 0.2687, "step": 33668 }, { "epoch": 0.96, "grad_norm": 3.186591636028457, "learning_rate": 3.356736151286899e-08, "loss": 0.1332, "step": 33669 }, { "epoch": 0.96, "grad_norm": 5.747875816400076, "learning_rate": 3.35137352724535e-08, "loss": 0.4141, "step": 33670 }, { "epoch": 0.96, "grad_norm": 3.24125162428386, "learning_rate": 3.34601517581079e-08, "loss": 0.1997, "step": 33671 }, { "epoch": 0.96, "grad_norm": 16.994819973886873, "learning_rate": 3.340661097029296e-08, "loss": 0.6093, "step": 33672 }, { "epoch": 0.96, "grad_norm": 5.484602246915404, "learning_rate": 3.3353112909469966e-08, "loss": 0.5114, "step": 33673 }, { "epoch": 0.96, "grad_norm": 2.3914724605632753, "learning_rate": 3.3299657576097985e-08, "loss": 0.2919, "step": 33674 }, { "epoch": 0.96, "grad_norm": 5.810289844630239, "learning_rate": 3.3246244970637776e-08, "loss": 0.7083, "step": 33675 }, { "epoch": 0.96, "grad_norm": 4.275588308532958, "learning_rate": 3.3192875093548405e-08, "loss": 0.2119, "step": 33676 }, { "epoch": 0.96, "grad_norm": 5.470150969756455, "learning_rate": 3.313954794528895e-08, "loss": 0.2855, "step": 33677 }, { "epoch": 0.96, "grad_norm": 7.984294375854252, "learning_rate": 3.308626352631905e-08, "loss": 0.6158, "step": 33678 }, { "epoch": 0.96, "grad_norm": 10.172429082871256, "learning_rate": 3.303302183709611e-08, "loss": 0.4767, "step": 33679 }, { "epoch": 0.96, "grad_norm": 4.513925648401023, "learning_rate": 3.297982287807866e-08, "loss": 0.2054, "step": 33680 }, { "epoch": 0.96, "grad_norm": 6.312798729050594, "learning_rate": 3.292666664972411e-08, "loss": 0.4293, "step": 33681 }, { "epoch": 0.96, "grad_norm": 7.973580186790513, "learning_rate": 3.287355315249041e-08, "loss": 0.7397, "step": 33682 }, { "epoch": 0.96, "grad_norm": 5.623470607107887, "learning_rate": 3.282048238683333e-08, "loss": 0.7586, "step": 33683 }, { "epoch": 0.96, "grad_norm": 8.273641927785281, "learning_rate": 3.276745435321027e-08, "loss": 0.6545, "step": 33684 }, { "epoch": 0.96, "grad_norm": 8.0347078649828, "learning_rate": 3.271446905207698e-08, "loss": 0.5262, "step": 33685 }, { "epoch": 0.96, "grad_norm": 4.049335306275982, "learning_rate": 3.266152648388976e-08, "loss": 0.5964, "step": 33686 }, { "epoch": 0.96, "grad_norm": 3.5036541051735317, "learning_rate": 3.260862664910436e-08, "loss": 0.1261, "step": 33687 }, { "epoch": 0.96, "grad_norm": 4.277601706334079, "learning_rate": 3.25557695481743e-08, "loss": 0.4869, "step": 33688 }, { "epoch": 0.96, "grad_norm": 15.437003556273048, "learning_rate": 3.2502955181555886e-08, "loss": 0.272, "step": 33689 }, { "epoch": 0.96, "grad_norm": 1.3880942359849096, "learning_rate": 3.24501835497032e-08, "loss": 0.086, "step": 33690 }, { "epoch": 0.96, "grad_norm": 5.367140635101184, "learning_rate": 3.239745465306976e-08, "loss": 0.4829, "step": 33691 }, { "epoch": 0.96, "grad_norm": 5.325431075579429, "learning_rate": 3.2344768492109104e-08, "loss": 0.627, "step": 33692 }, { "epoch": 0.96, "grad_norm": 4.28042098880474, "learning_rate": 3.2292125067275306e-08, "loss": 0.424, "step": 33693 }, { "epoch": 0.96, "grad_norm": 4.1568714336952945, "learning_rate": 3.223952437902023e-08, "loss": 0.5891, "step": 33694 }, { "epoch": 0.96, "grad_norm": 4.537141534606752, "learning_rate": 3.218696642779684e-08, "loss": 0.5185, "step": 33695 }, { "epoch": 0.96, "grad_norm": 7.004561708994177, "learning_rate": 3.213445121405756e-08, "loss": 0.6868, "step": 33696 }, { "epoch": 0.97, "grad_norm": 7.318925610136756, "learning_rate": 3.2081978738253696e-08, "loss": 0.4634, "step": 33697 }, { "epoch": 0.97, "grad_norm": 5.160810361261098, "learning_rate": 3.202954900083766e-08, "loss": 0.4682, "step": 33698 }, { "epoch": 0.97, "grad_norm": 5.913524406676252, "learning_rate": 3.197716200225909e-08, "loss": 0.733, "step": 33699 }, { "epoch": 0.97, "grad_norm": 2.309245603288985, "learning_rate": 3.192481774296929e-08, "loss": 0.335, "step": 33700 }, { "epoch": 0.97, "grad_norm": 5.427251497129563, "learning_rate": 3.187251622341902e-08, "loss": 0.2406, "step": 33701 }, { "epoch": 0.97, "grad_norm": 5.159006387723254, "learning_rate": 3.182025744405737e-08, "loss": 0.3824, "step": 33702 }, { "epoch": 0.97, "grad_norm": 4.776373676614637, "learning_rate": 3.1768041405334515e-08, "loss": 0.4921, "step": 33703 }, { "epoch": 0.97, "grad_norm": 7.838093454382499, "learning_rate": 3.1715868107699e-08, "loss": 0.5473, "step": 33704 }, { "epoch": 0.97, "grad_norm": 4.727279482599914, "learning_rate": 3.1663737551601014e-08, "loss": 0.375, "step": 33705 }, { "epoch": 0.97, "grad_norm": 7.618460868625296, "learning_rate": 3.161164973748798e-08, "loss": 0.6145, "step": 33706 }, { "epoch": 0.97, "grad_norm": 4.14824663143404, "learning_rate": 3.155960466580788e-08, "loss": 0.2435, "step": 33707 }, { "epoch": 0.97, "grad_norm": 7.3464801988920945, "learning_rate": 3.1507602337008667e-08, "loss": 0.4433, "step": 33708 }, { "epoch": 0.97, "grad_norm": 3.4022212219949695, "learning_rate": 3.145564275153834e-08, "loss": 0.3127, "step": 33709 }, { "epoch": 0.97, "grad_norm": 5.563330256189067, "learning_rate": 3.140372590984264e-08, "loss": 0.2911, "step": 33710 }, { "epoch": 0.97, "grad_norm": 2.8957724439042165, "learning_rate": 3.135185181236955e-08, "loss": 0.2743, "step": 33711 }, { "epoch": 0.97, "grad_norm": 4.058300312437934, "learning_rate": 3.1300020459564815e-08, "loss": 0.1611, "step": 33712 }, { "epoch": 0.97, "grad_norm": 4.167402119897306, "learning_rate": 3.124823185187364e-08, "loss": 0.3064, "step": 33713 }, { "epoch": 0.97, "grad_norm": 5.484375869446602, "learning_rate": 3.11964859897429e-08, "loss": 0.2563, "step": 33714 }, { "epoch": 0.97, "grad_norm": 8.75071359176915, "learning_rate": 3.114478287361666e-08, "loss": 0.6677, "step": 33715 }, { "epoch": 0.97, "grad_norm": 8.586488187171035, "learning_rate": 3.109312250393959e-08, "loss": 0.3756, "step": 33716 }, { "epoch": 0.97, "grad_norm": 3.5434353656797697, "learning_rate": 3.1041504881157425e-08, "loss": 0.491, "step": 33717 }, { "epoch": 0.97, "grad_norm": 10.483759672979838, "learning_rate": 3.09899300057126e-08, "loss": 0.8657, "step": 33718 }, { "epoch": 0.97, "grad_norm": 5.755563159436015, "learning_rate": 3.093839787805031e-08, "loss": 0.3136, "step": 33719 }, { "epoch": 0.97, "grad_norm": 6.051835108102268, "learning_rate": 3.088690849861298e-08, "loss": 0.3396, "step": 33720 }, { "epoch": 0.97, "grad_norm": 3.8657165966730878, "learning_rate": 3.083546186784358e-08, "loss": 0.1406, "step": 33721 }, { "epoch": 0.97, "grad_norm": 9.920840952898475, "learning_rate": 3.078405798618456e-08, "loss": 0.3031, "step": 33722 }, { "epoch": 0.97, "grad_norm": 5.248603362551342, "learning_rate": 3.0732696854079425e-08, "loss": 0.6328, "step": 33723 }, { "epoch": 0.97, "grad_norm": 3.866396428124838, "learning_rate": 3.0681378471968396e-08, "loss": 0.3273, "step": 33724 }, { "epoch": 0.97, "grad_norm": 2.385862705779626, "learning_rate": 3.063010284029333e-08, "loss": 0.1546, "step": 33725 }, { "epoch": 0.97, "grad_norm": 2.8245853170352384, "learning_rate": 3.0578869959496106e-08, "loss": 0.299, "step": 33726 }, { "epoch": 0.97, "grad_norm": 7.949067705298707, "learning_rate": 3.052767983001748e-08, "loss": 0.5077, "step": 33727 }, { "epoch": 0.97, "grad_norm": 3.9375628890207506, "learning_rate": 3.0476532452296534e-08, "loss": 0.3037, "step": 33728 }, { "epoch": 0.97, "grad_norm": 7.227268196909353, "learning_rate": 3.04254278267746e-08, "loss": 0.5457, "step": 33729 }, { "epoch": 0.97, "grad_norm": 5.10795904948788, "learning_rate": 3.037436595389076e-08, "loss": 0.5994, "step": 33730 }, { "epoch": 0.97, "grad_norm": 5.210329080318595, "learning_rate": 3.0323346834084114e-08, "loss": 0.3886, "step": 33731 }, { "epoch": 0.97, "grad_norm": 6.257461366553232, "learning_rate": 3.0272370467793745e-08, "loss": 0.4994, "step": 33732 }, { "epoch": 0.97, "grad_norm": 3.511922319089708, "learning_rate": 3.022143685545875e-08, "loss": 0.5805, "step": 33733 }, { "epoch": 0.97, "grad_norm": 5.2855640375294115, "learning_rate": 3.0170545997516566e-08, "loss": 0.541, "step": 33734 }, { "epoch": 0.97, "grad_norm": 12.331609777353796, "learning_rate": 3.0119697894405164e-08, "loss": 0.4693, "step": 33735 }, { "epoch": 0.97, "grad_norm": 5.446237362401981, "learning_rate": 3.0068892546562536e-08, "loss": 0.3579, "step": 33736 }, { "epoch": 0.97, "grad_norm": 10.318582786901167, "learning_rate": 3.001812995442499e-08, "loss": 0.4516, "step": 33737 }, { "epoch": 0.97, "grad_norm": 3.312737438401195, "learning_rate": 2.9967410118429405e-08, "loss": 0.188, "step": 33738 }, { "epoch": 0.97, "grad_norm": 3.4701602921304984, "learning_rate": 2.991673303901266e-08, "loss": 0.1364, "step": 33739 }, { "epoch": 0.97, "grad_norm": 4.893161396276666, "learning_rate": 2.9866098716609946e-08, "loss": 0.3453, "step": 33740 }, { "epoch": 0.97, "grad_norm": 2.2129875626505413, "learning_rate": 2.9815507151657595e-08, "loss": 0.2643, "step": 33741 }, { "epoch": 0.97, "grad_norm": 4.544468036150976, "learning_rate": 2.9764958344590257e-08, "loss": 0.3398, "step": 33742 }, { "epoch": 0.97, "grad_norm": 7.342217533028228, "learning_rate": 2.9714452295843134e-08, "loss": 0.6242, "step": 33743 }, { "epoch": 0.97, "grad_norm": 4.235203060249632, "learning_rate": 2.9663989005850324e-08, "loss": 0.6378, "step": 33744 }, { "epoch": 0.97, "grad_norm": 7.021407906615942, "learning_rate": 2.9613568475046485e-08, "loss": 0.2911, "step": 33745 }, { "epoch": 0.97, "grad_norm": 4.629333553630813, "learning_rate": 2.956319070386515e-08, "loss": 0.5033, "step": 33746 }, { "epoch": 0.97, "grad_norm": 7.967378026999979, "learning_rate": 2.9512855692739872e-08, "loss": 0.3976, "step": 33747 }, { "epoch": 0.97, "grad_norm": 5.024396173057924, "learning_rate": 2.9462563442103632e-08, "loss": 0.4802, "step": 33748 }, { "epoch": 0.97, "grad_norm": 7.4168214317490255, "learning_rate": 2.941231395238886e-08, "loss": 0.3083, "step": 33749 }, { "epoch": 0.97, "grad_norm": 2.1049752302819154, "learning_rate": 2.9362107224027436e-08, "loss": 0.0884, "step": 33750 }, { "epoch": 0.97, "grad_norm": 5.909713456231666, "learning_rate": 2.9311943257452345e-08, "loss": 0.5082, "step": 33751 }, { "epoch": 0.97, "grad_norm": 2.6694990156693117, "learning_rate": 2.9261822053094358e-08, "loss": 0.3813, "step": 33752 }, { "epoch": 0.97, "grad_norm": 3.407550248624283, "learning_rate": 2.921174361138479e-08, "loss": 0.2651, "step": 33753 }, { "epoch": 0.97, "grad_norm": 9.18451361874448, "learning_rate": 2.9161707932754967e-08, "loss": 0.6166, "step": 33754 }, { "epoch": 0.97, "grad_norm": 7.388829309498961, "learning_rate": 2.91117150176351e-08, "loss": 0.6559, "step": 33755 }, { "epoch": 0.97, "grad_norm": 4.510236620521439, "learning_rate": 2.906176486645429e-08, "loss": 0.3825, "step": 33756 }, { "epoch": 0.97, "grad_norm": 6.796619629721328, "learning_rate": 2.9011857479643857e-08, "loss": 0.6314, "step": 33757 }, { "epoch": 0.97, "grad_norm": 1.5557433585283857, "learning_rate": 2.8961992857631793e-08, "loss": 0.1074, "step": 33758 }, { "epoch": 0.97, "grad_norm": 6.919465000661954, "learning_rate": 2.8912171000847754e-08, "loss": 0.4282, "step": 33759 }, { "epoch": 0.97, "grad_norm": 4.026237802140602, "learning_rate": 2.8862391909720843e-08, "loss": 0.5124, "step": 33760 }, { "epoch": 0.97, "grad_norm": 4.350652383795685, "learning_rate": 2.881265558467794e-08, "loss": 0.1322, "step": 33761 }, { "epoch": 0.97, "grad_norm": 4.371530860406248, "learning_rate": 2.8762962026148144e-08, "loss": 0.1971, "step": 33762 }, { "epoch": 0.97, "grad_norm": 5.366030683605322, "learning_rate": 2.871331123455834e-08, "loss": 0.6539, "step": 33763 }, { "epoch": 0.97, "grad_norm": 3.2600078549904765, "learning_rate": 2.866370321033596e-08, "loss": 0.3394, "step": 33764 }, { "epoch": 0.97, "grad_norm": 2.820047878626607, "learning_rate": 2.8614137953907327e-08, "loss": 0.4164, "step": 33765 }, { "epoch": 0.97, "grad_norm": 11.66500118947258, "learning_rate": 2.8564615465699887e-08, "loss": 0.4722, "step": 33766 }, { "epoch": 0.97, "grad_norm": 3.610926917872322, "learning_rate": 2.8515135746138288e-08, "loss": 0.3444, "step": 33767 }, { "epoch": 0.97, "grad_norm": 6.536800125324966, "learning_rate": 2.8465698795649422e-08, "loss": 0.574, "step": 33768 }, { "epoch": 0.97, "grad_norm": 3.34283778858346, "learning_rate": 2.8416304614657385e-08, "loss": 0.2702, "step": 33769 }, { "epoch": 0.97, "grad_norm": 4.721272381766596, "learning_rate": 2.8366953203587956e-08, "loss": 0.2775, "step": 33770 }, { "epoch": 0.97, "grad_norm": 4.473318392453397, "learning_rate": 2.8317644562865786e-08, "loss": 0.4572, "step": 33771 }, { "epoch": 0.97, "grad_norm": 7.9139300652557445, "learning_rate": 2.8268378692914432e-08, "loss": 0.5747, "step": 33772 }, { "epoch": 0.97, "grad_norm": 5.296484693702627, "learning_rate": 2.8219155594158552e-08, "loss": 0.3883, "step": 33773 }, { "epoch": 0.97, "grad_norm": 8.89127448044411, "learning_rate": 2.8169975267020587e-08, "loss": 0.738, "step": 33774 }, { "epoch": 0.97, "grad_norm": 5.014620671478632, "learning_rate": 2.8120837711924643e-08, "loss": 0.3752, "step": 33775 }, { "epoch": 0.97, "grad_norm": 6.176781280834863, "learning_rate": 2.80717429292926e-08, "loss": 0.6726, "step": 33776 }, { "epoch": 0.97, "grad_norm": 5.265319385205963, "learning_rate": 2.8022690919547457e-08, "loss": 0.3888, "step": 33777 }, { "epoch": 0.97, "grad_norm": 4.536425666676869, "learning_rate": 2.79736816831111e-08, "loss": 0.6172, "step": 33778 }, { "epoch": 0.97, "grad_norm": 5.0586048258656495, "learning_rate": 2.7924715220404298e-08, "loss": 0.29, "step": 33779 }, { "epoch": 0.97, "grad_norm": 7.346768931503845, "learning_rate": 2.7875791531849493e-08, "loss": 0.5393, "step": 33780 }, { "epoch": 0.97, "grad_norm": 8.471317591775358, "learning_rate": 2.7826910617867465e-08, "loss": 0.7655, "step": 33781 }, { "epoch": 0.97, "grad_norm": 6.330728103802894, "learning_rate": 2.777807247887787e-08, "loss": 0.6675, "step": 33782 }, { "epoch": 0.97, "grad_norm": 6.7753791885961245, "learning_rate": 2.7729277115300936e-08, "loss": 0.4238, "step": 33783 }, { "epoch": 0.97, "grad_norm": 3.9326129739955236, "learning_rate": 2.768052452755743e-08, "loss": 0.4947, "step": 33784 }, { "epoch": 0.97, "grad_norm": 2.8057977184482157, "learning_rate": 2.7631814716065907e-08, "loss": 0.2332, "step": 33785 }, { "epoch": 0.97, "grad_norm": 3.818741894852298, "learning_rate": 2.7583147681246037e-08, "loss": 0.3533, "step": 33786 }, { "epoch": 0.97, "grad_norm": 4.312889012468671, "learning_rate": 2.7534523423515813e-08, "loss": 0.3039, "step": 33787 }, { "epoch": 0.97, "grad_norm": 4.173251207222053, "learning_rate": 2.748594194329379e-08, "loss": 0.3859, "step": 33788 }, { "epoch": 0.97, "grad_norm": 3.108484533653421, "learning_rate": 2.7437403240997972e-08, "loss": 0.2151, "step": 33789 }, { "epoch": 0.97, "grad_norm": 4.931016359003859, "learning_rate": 2.73889073170458e-08, "loss": 0.4213, "step": 33790 }, { "epoch": 0.97, "grad_norm": 6.228546152072955, "learning_rate": 2.734045417185527e-08, "loss": 0.2751, "step": 33791 }, { "epoch": 0.97, "grad_norm": 4.385692255538169, "learning_rate": 2.7292043805841607e-08, "loss": 0.2656, "step": 33792 }, { "epoch": 0.97, "grad_norm": 3.186848742206113, "learning_rate": 2.724367621942281e-08, "loss": 0.1016, "step": 33793 }, { "epoch": 0.97, "grad_norm": 1.8713673210442596, "learning_rate": 2.719535141301466e-08, "loss": 0.0952, "step": 33794 }, { "epoch": 0.97, "grad_norm": 3.28576018217341, "learning_rate": 2.7147069387032376e-08, "loss": 0.1883, "step": 33795 }, { "epoch": 0.97, "grad_norm": 7.065481383708594, "learning_rate": 2.7098830141891742e-08, "loss": 0.4922, "step": 33796 }, { "epoch": 0.97, "grad_norm": 4.2862528786103065, "learning_rate": 2.7050633678006865e-08, "loss": 0.4482, "step": 33797 }, { "epoch": 0.97, "grad_norm": 3.5159825800137585, "learning_rate": 2.7002479995794084e-08, "loss": 0.3661, "step": 33798 }, { "epoch": 0.97, "grad_norm": 4.792161794319277, "learning_rate": 2.6954369095665843e-08, "loss": 0.7392, "step": 33799 }, { "epoch": 0.97, "grad_norm": 8.309334890922274, "learning_rate": 2.6906300978037368e-08, "loss": 0.8054, "step": 33800 }, { "epoch": 0.97, "grad_norm": 3.9319902047649395, "learning_rate": 2.6858275643321662e-08, "loss": 0.2814, "step": 33801 }, { "epoch": 0.97, "grad_norm": 4.028407536342618, "learning_rate": 2.6810293091931726e-08, "loss": 0.3344, "step": 33802 }, { "epoch": 0.97, "grad_norm": 3.9714150079121144, "learning_rate": 2.6762353324280567e-08, "loss": 0.2937, "step": 33803 }, { "epoch": 0.97, "grad_norm": 7.763618501683674, "learning_rate": 2.6714456340780072e-08, "loss": 0.5666, "step": 33804 }, { "epoch": 0.97, "grad_norm": 6.741447399268897, "learning_rate": 2.6666602141843802e-08, "loss": 0.4419, "step": 33805 }, { "epoch": 0.97, "grad_norm": 3.097366434901453, "learning_rate": 2.6618790727881426e-08, "loss": 0.3079, "step": 33806 }, { "epoch": 0.97, "grad_norm": 8.496914247377982, "learning_rate": 2.6571022099305955e-08, "loss": 0.661, "step": 33807 }, { "epoch": 0.97, "grad_norm": 4.124317401704042, "learning_rate": 2.6523296256527608e-08, "loss": 0.2585, "step": 33808 }, { "epoch": 0.97, "grad_norm": 4.5748924326106835, "learning_rate": 2.6475613199956617e-08, "loss": 0.5563, "step": 33809 }, { "epoch": 0.97, "grad_norm": 1.2602891529095304, "learning_rate": 2.6427972930003766e-08, "loss": 0.0682, "step": 33810 }, { "epoch": 0.97, "grad_norm": 3.9905405996026055, "learning_rate": 2.6380375447078722e-08, "loss": 0.706, "step": 33811 }, { "epoch": 0.97, "grad_norm": 9.788587042609418, "learning_rate": 2.6332820751590605e-08, "loss": 0.581, "step": 33812 }, { "epoch": 0.97, "grad_norm": 6.964519431136455, "learning_rate": 2.6285308843949088e-08, "loss": 0.3226, "step": 33813 }, { "epoch": 0.97, "grad_norm": 4.253115830141787, "learning_rate": 2.6237839724563287e-08, "loss": 0.5273, "step": 33814 }, { "epoch": 0.97, "grad_norm": 7.110596000704125, "learning_rate": 2.6190413393840096e-08, "loss": 0.8335, "step": 33815 }, { "epoch": 0.97, "grad_norm": 5.895373345573417, "learning_rate": 2.6143029852189195e-08, "loss": 0.3993, "step": 33816 }, { "epoch": 0.97, "grad_norm": 6.16378025428899, "learning_rate": 2.6095689100016364e-08, "loss": 0.5246, "step": 33817 }, { "epoch": 0.97, "grad_norm": 5.002085965860361, "learning_rate": 2.6048391137730722e-08, "loss": 0.5386, "step": 33818 }, { "epoch": 0.97, "grad_norm": 4.2519416861496335, "learning_rate": 2.6001135965738056e-08, "loss": 0.2076, "step": 33819 }, { "epoch": 0.97, "grad_norm": 5.266659915090813, "learning_rate": 2.5953923584445263e-08, "loss": 0.6121, "step": 33820 }, { "epoch": 0.97, "grad_norm": 3.8612496460197883, "learning_rate": 2.590675399425868e-08, "loss": 0.2939, "step": 33821 }, { "epoch": 0.97, "grad_norm": 6.690411548871102, "learning_rate": 2.585962719558355e-08, "loss": 0.3288, "step": 33822 }, { "epoch": 0.97, "grad_norm": 5.030168547413393, "learning_rate": 2.5812543188825644e-08, "loss": 0.3922, "step": 33823 }, { "epoch": 0.97, "grad_norm": 5.348563285237836, "learning_rate": 2.5765501974390204e-08, "loss": 0.3238, "step": 33824 }, { "epoch": 0.97, "grad_norm": 6.163465270299135, "learning_rate": 2.5718503552681352e-08, "loss": 0.427, "step": 33825 }, { "epoch": 0.97, "grad_norm": 10.210542982049546, "learning_rate": 2.5671547924103758e-08, "loss": 1.3732, "step": 33826 }, { "epoch": 0.97, "grad_norm": 12.201637167597722, "learning_rate": 2.5624635089061546e-08, "loss": 0.5132, "step": 33827 }, { "epoch": 0.97, "grad_norm": 5.132072267616687, "learning_rate": 2.557776504795828e-08, "loss": 0.2566, "step": 33828 }, { "epoch": 0.97, "grad_norm": 6.675352883029936, "learning_rate": 2.5530937801196975e-08, "loss": 0.3057, "step": 33829 }, { "epoch": 0.97, "grad_norm": 3.8987638226087027, "learning_rate": 2.548415334918064e-08, "loss": 0.4605, "step": 33830 }, { "epoch": 0.97, "grad_norm": 5.998203525218009, "learning_rate": 2.5437411692311176e-08, "loss": 0.4335, "step": 33831 }, { "epoch": 0.97, "grad_norm": 6.3914314637889245, "learning_rate": 2.5390712830991592e-08, "loss": 0.7966, "step": 33832 }, { "epoch": 0.97, "grad_norm": 4.194137806592565, "learning_rate": 2.5344056765623237e-08, "loss": 0.2624, "step": 33833 }, { "epoch": 0.97, "grad_norm": 11.049030002032964, "learning_rate": 2.5297443496607453e-08, "loss": 0.7009, "step": 33834 }, { "epoch": 0.97, "grad_norm": 3.3098201167792647, "learning_rate": 2.5250873024345035e-08, "loss": 0.3786, "step": 33835 }, { "epoch": 0.97, "grad_norm": 3.0837062833522397, "learning_rate": 2.520434534923677e-08, "loss": 0.2037, "step": 33836 }, { "epoch": 0.97, "grad_norm": 6.544193856853874, "learning_rate": 2.515786047168345e-08, "loss": 0.6136, "step": 33837 }, { "epoch": 0.97, "grad_norm": 7.310043574462937, "learning_rate": 2.5111418392084196e-08, "loss": 0.5433, "step": 33838 }, { "epoch": 0.97, "grad_norm": 4.799602615278416, "learning_rate": 2.506501911083925e-08, "loss": 0.295, "step": 33839 }, { "epoch": 0.97, "grad_norm": 4.118186670195607, "learning_rate": 2.5018662628347177e-08, "loss": 0.755, "step": 33840 }, { "epoch": 0.97, "grad_norm": 3.7524419304295744, "learning_rate": 2.4972348945007106e-08, "loss": 0.4547, "step": 33841 }, { "epoch": 0.97, "grad_norm": 7.913471406965612, "learning_rate": 2.4926078061217052e-08, "loss": 0.7487, "step": 33842 }, { "epoch": 0.97, "grad_norm": 5.147275570705463, "learning_rate": 2.487984997737558e-08, "loss": 0.4074, "step": 33843 }, { "epoch": 0.97, "grad_norm": 4.517546670542788, "learning_rate": 2.4833664693880154e-08, "loss": 0.3975, "step": 33844 }, { "epoch": 0.97, "grad_norm": 5.80286519542875, "learning_rate": 2.4787522211128233e-08, "loss": 0.5216, "step": 33845 }, { "epoch": 0.97, "grad_norm": 4.595902308513932, "learning_rate": 2.4741422529516723e-08, "loss": 0.3957, "step": 33846 }, { "epoch": 0.97, "grad_norm": 3.731330684128171, "learning_rate": 2.4695365649442525e-08, "loss": 0.2378, "step": 33847 }, { "epoch": 0.97, "grad_norm": 3.9020800496644426, "learning_rate": 2.4649351571300883e-08, "loss": 0.356, "step": 33848 }, { "epoch": 0.97, "grad_norm": 2.2786621412874393, "learning_rate": 2.46033802954887e-08, "loss": 0.0583, "step": 33849 }, { "epoch": 0.97, "grad_norm": 7.522583529768301, "learning_rate": 2.4557451822400658e-08, "loss": 0.6728, "step": 33850 }, { "epoch": 0.97, "grad_norm": 6.1773441660610615, "learning_rate": 2.4511566152432554e-08, "loss": 0.4254, "step": 33851 }, { "epoch": 0.97, "grad_norm": 10.433926821597755, "learning_rate": 2.4465723285978516e-08, "loss": 0.43, "step": 33852 }, { "epoch": 0.97, "grad_norm": 5.90980793997488, "learning_rate": 2.4419923223433784e-08, "loss": 0.3998, "step": 33853 }, { "epoch": 0.97, "grad_norm": 2.8304536200468307, "learning_rate": 2.4374165965191932e-08, "loss": 0.2291, "step": 33854 }, { "epoch": 0.97, "grad_norm": 4.766589658066215, "learning_rate": 2.432845151164598e-08, "loss": 0.4552, "step": 33855 }, { "epoch": 0.97, "grad_norm": 11.08658331830718, "learning_rate": 2.4282779863190053e-08, "loss": 0.7259, "step": 33856 }, { "epoch": 0.97, "grad_norm": 9.783448944080229, "learning_rate": 2.423715102021662e-08, "loss": 0.6855, "step": 33857 }, { "epoch": 0.97, "grad_norm": 3.1447635446364886, "learning_rate": 2.4191564983118143e-08, "loss": 0.363, "step": 33858 }, { "epoch": 0.97, "grad_norm": 5.477873010056887, "learning_rate": 2.4146021752287084e-08, "loss": 0.4006, "step": 33859 }, { "epoch": 0.97, "grad_norm": 3.4465143957541633, "learning_rate": 2.4100521328115356e-08, "loss": 0.449, "step": 33860 }, { "epoch": 0.97, "grad_norm": 6.518280146957382, "learning_rate": 2.4055063710994307e-08, "loss": 0.2684, "step": 33861 }, { "epoch": 0.97, "grad_norm": 5.565114403445858, "learning_rate": 2.400964890131474e-08, "loss": 0.561, "step": 33862 }, { "epoch": 0.97, "grad_norm": 4.786100931145091, "learning_rate": 2.3964276899467454e-08, "loss": 0.7164, "step": 33863 }, { "epoch": 0.97, "grad_norm": 2.6902872535955757, "learning_rate": 2.3918947705842687e-08, "loss": 0.3441, "step": 33864 }, { "epoch": 0.97, "grad_norm": 4.885521904544606, "learning_rate": 2.3873661320830686e-08, "loss": 0.3305, "step": 33865 }, { "epoch": 0.97, "grad_norm": 3.4988514429141415, "learning_rate": 2.382841774482114e-08, "loss": 0.1542, "step": 33866 }, { "epoch": 0.97, "grad_norm": 3.9113119058168913, "learning_rate": 2.3783216978202628e-08, "loss": 0.2779, "step": 33867 }, { "epoch": 0.97, "grad_norm": 10.251072246013738, "learning_rate": 2.3738059021364834e-08, "loss": 0.72, "step": 33868 }, { "epoch": 0.97, "grad_norm": 3.366309013741824, "learning_rate": 2.3692943874695784e-08, "loss": 0.4119, "step": 33869 }, { "epoch": 0.97, "grad_norm": 7.910359095103634, "learning_rate": 2.3647871538582944e-08, "loss": 0.6516, "step": 33870 }, { "epoch": 0.97, "grad_norm": 7.134526893846813, "learning_rate": 2.360284201341545e-08, "loss": 0.4072, "step": 33871 }, { "epoch": 0.97, "grad_norm": 4.707530736736774, "learning_rate": 2.3557855299579657e-08, "loss": 0.5359, "step": 33872 }, { "epoch": 0.97, "grad_norm": 5.297814941279679, "learning_rate": 2.3512911397463035e-08, "loss": 0.1587, "step": 33873 }, { "epoch": 0.97, "grad_norm": 3.5445270581185953, "learning_rate": 2.3468010307452494e-08, "loss": 0.2514, "step": 33874 }, { "epoch": 0.97, "grad_norm": 6.426858346752986, "learning_rate": 2.3423152029933836e-08, "loss": 0.4187, "step": 33875 }, { "epoch": 0.97, "grad_norm": 2.9367017270036517, "learning_rate": 2.3378336565292313e-08, "loss": 0.3093, "step": 33876 }, { "epoch": 0.97, "grad_norm": 3.7490388910164376, "learning_rate": 2.3333563913914836e-08, "loss": 0.1807, "step": 33877 }, { "epoch": 0.97, "grad_norm": 17.532455865251514, "learning_rate": 2.3288834076186095e-08, "loss": 0.8805, "step": 33878 }, { "epoch": 0.97, "grad_norm": 8.09002667657252, "learning_rate": 2.3244147052490228e-08, "loss": 0.8541, "step": 33879 }, { "epoch": 0.97, "grad_norm": 3.6865254585311096, "learning_rate": 2.3199502843212484e-08, "loss": 0.3077, "step": 33880 }, { "epoch": 0.97, "grad_norm": 7.622252735445112, "learning_rate": 2.3154901448736998e-08, "loss": 0.5316, "step": 33881 }, { "epoch": 0.97, "grad_norm": 6.7486614030285335, "learning_rate": 2.31103428694468e-08, "loss": 0.7477, "step": 33882 }, { "epoch": 0.97, "grad_norm": 4.894791458410223, "learning_rate": 2.306582710572547e-08, "loss": 0.4618, "step": 33883 }, { "epoch": 0.97, "grad_norm": 8.56229806926439, "learning_rate": 2.3021354157956033e-08, "loss": 0.6123, "step": 33884 }, { "epoch": 0.97, "grad_norm": 4.558970667040778, "learning_rate": 2.2976924026521518e-08, "loss": 0.4491, "step": 33885 }, { "epoch": 0.97, "grad_norm": 3.6374984544573774, "learning_rate": 2.293253671180329e-08, "loss": 0.3279, "step": 33886 }, { "epoch": 0.97, "grad_norm": 3.6021017800353907, "learning_rate": 2.2888192214183813e-08, "loss": 0.1686, "step": 33887 }, { "epoch": 0.97, "grad_norm": 4.083208854231159, "learning_rate": 2.2843890534044454e-08, "loss": 0.5313, "step": 33888 }, { "epoch": 0.97, "grad_norm": 5.596482893319945, "learning_rate": 2.2799631671766022e-08, "loss": 0.3045, "step": 33889 }, { "epoch": 0.97, "grad_norm": 6.276931796654015, "learning_rate": 2.275541562772987e-08, "loss": 0.5139, "step": 33890 }, { "epoch": 0.97, "grad_norm": 4.975816128752726, "learning_rate": 2.2711242402316256e-08, "loss": 0.3618, "step": 33891 }, { "epoch": 0.97, "grad_norm": 2.6122253748181827, "learning_rate": 2.2667111995904324e-08, "loss": 0.0958, "step": 33892 }, { "epoch": 0.97, "grad_norm": 5.925260718359648, "learning_rate": 2.2623024408874872e-08, "loss": 0.4532, "step": 33893 }, { "epoch": 0.97, "grad_norm": 4.867206653049068, "learning_rate": 2.257897964160649e-08, "loss": 0.2588, "step": 33894 }, { "epoch": 0.97, "grad_norm": 5.246987386730135, "learning_rate": 2.2534977694478878e-08, "loss": 0.2759, "step": 33895 }, { "epoch": 0.97, "grad_norm": 5.0804746855082685, "learning_rate": 2.2491018567869505e-08, "loss": 0.4482, "step": 33896 }, { "epoch": 0.97, "grad_norm": 9.098397918777543, "learning_rate": 2.244710226215696e-08, "loss": 0.7624, "step": 33897 }, { "epoch": 0.97, "grad_norm": 3.3591524094758762, "learning_rate": 2.2403228777719277e-08, "loss": 0.2381, "step": 33898 }, { "epoch": 0.97, "grad_norm": 9.223015887669769, "learning_rate": 2.2359398114933927e-08, "loss": 0.4142, "step": 33899 }, { "epoch": 0.97, "grad_norm": 4.16530996964348, "learning_rate": 2.231561027417728e-08, "loss": 0.3919, "step": 33900 }, { "epoch": 0.97, "grad_norm": 4.311233085627901, "learning_rate": 2.2271865255827362e-08, "loss": 0.425, "step": 33901 }, { "epoch": 0.97, "grad_norm": 9.640032831982937, "learning_rate": 2.2228163060259434e-08, "loss": 0.7426, "step": 33902 }, { "epoch": 0.97, "grad_norm": 7.8690749388391525, "learning_rate": 2.2184503687849857e-08, "loss": 0.4874, "step": 33903 }, { "epoch": 0.97, "grad_norm": 4.691138139951857, "learning_rate": 2.2140887138973334e-08, "loss": 0.6055, "step": 33904 }, { "epoch": 0.97, "grad_norm": 3.8816703175655465, "learning_rate": 2.209731341400678e-08, "loss": 0.2789, "step": 33905 }, { "epoch": 0.97, "grad_norm": 7.7810280396696925, "learning_rate": 2.2053782513323797e-08, "loss": 0.4588, "step": 33906 }, { "epoch": 0.97, "grad_norm": 4.71610837851666, "learning_rate": 2.2010294437299074e-08, "loss": 0.2289, "step": 33907 }, { "epoch": 0.97, "grad_norm": 4.190936031078183, "learning_rate": 2.1966849186307314e-08, "loss": 0.3723, "step": 33908 }, { "epoch": 0.97, "grad_norm": 8.259454338394129, "learning_rate": 2.1923446760721002e-08, "loss": 0.5267, "step": 33909 }, { "epoch": 0.97, "grad_norm": 7.5721720802131545, "learning_rate": 2.1880087160915388e-08, "loss": 0.1297, "step": 33910 }, { "epoch": 0.97, "grad_norm": 6.932560064067288, "learning_rate": 2.183677038726184e-08, "loss": 0.6681, "step": 33911 }, { "epoch": 0.97, "grad_norm": 7.484860971875867, "learning_rate": 2.1793496440133954e-08, "loss": 0.456, "step": 33912 }, { "epoch": 0.97, "grad_norm": 6.794327181641777, "learning_rate": 2.1750265319903097e-08, "loss": 0.619, "step": 33913 }, { "epoch": 0.97, "grad_norm": 4.834473272704423, "learning_rate": 2.1707077026942303e-08, "loss": 0.5335, "step": 33914 }, { "epoch": 0.97, "grad_norm": 5.679363089299509, "learning_rate": 2.1663931561622386e-08, "loss": 0.5976, "step": 33915 }, { "epoch": 0.97, "grad_norm": 9.524153772444917, "learning_rate": 2.1620828924315274e-08, "loss": 0.3893, "step": 33916 }, { "epoch": 0.97, "grad_norm": 5.7011786706197185, "learning_rate": 2.1577769115390113e-08, "loss": 0.4371, "step": 33917 }, { "epoch": 0.97, "grad_norm": 11.372969058316126, "learning_rate": 2.1534752135219384e-08, "loss": 0.5059, "step": 33918 }, { "epoch": 0.97, "grad_norm": 6.138059667486821, "learning_rate": 2.149177798417168e-08, "loss": 0.3488, "step": 33919 }, { "epoch": 0.97, "grad_norm": 4.109755697855668, "learning_rate": 2.144884666261726e-08, "loss": 0.306, "step": 33920 }, { "epoch": 0.97, "grad_norm": 6.727748398577111, "learning_rate": 2.1405958170925832e-08, "loss": 0.3777, "step": 33921 }, { "epoch": 0.97, "grad_norm": 4.756224017880587, "learning_rate": 2.136311250946599e-08, "loss": 0.3777, "step": 33922 }, { "epoch": 0.97, "grad_norm": 3.959054081533209, "learning_rate": 2.1320309678605767e-08, "loss": 0.4058, "step": 33923 }, { "epoch": 0.97, "grad_norm": 10.860661316577294, "learning_rate": 2.1277549678714314e-08, "loss": 1.0286, "step": 33924 }, { "epoch": 0.97, "grad_norm": 3.7910922247622563, "learning_rate": 2.123483251015912e-08, "loss": 0.338, "step": 33925 }, { "epoch": 0.97, "grad_norm": 5.047972899262943, "learning_rate": 2.1192158173307665e-08, "loss": 0.5703, "step": 33926 }, { "epoch": 0.97, "grad_norm": 6.4276738217566605, "learning_rate": 2.1149526668526877e-08, "loss": 0.3784, "step": 33927 }, { "epoch": 0.97, "grad_norm": 5.426181281374577, "learning_rate": 2.1106937996183685e-08, "loss": 0.2634, "step": 33928 }, { "epoch": 0.97, "grad_norm": 4.350668796559439, "learning_rate": 2.1064392156644465e-08, "loss": 0.6487, "step": 33929 }, { "epoch": 0.97, "grad_norm": 6.049861948944606, "learning_rate": 2.1021889150275588e-08, "loss": 0.6874, "step": 33930 }, { "epoch": 0.97, "grad_norm": 5.3630971633869775, "learning_rate": 2.0979428977442318e-08, "loss": 0.4912, "step": 33931 }, { "epoch": 0.97, "grad_norm": 2.239107042869159, "learning_rate": 2.0937011638509917e-08, "loss": 0.0976, "step": 33932 }, { "epoch": 0.97, "grad_norm": 5.130729589429024, "learning_rate": 2.08946371338431e-08, "loss": 0.3131, "step": 33933 }, { "epoch": 0.97, "grad_norm": 5.5685776072661275, "learning_rate": 2.0852305463806567e-08, "loss": 0.4423, "step": 33934 }, { "epoch": 0.97, "grad_norm": 7.430684244422011, "learning_rate": 2.0810016628765028e-08, "loss": 0.6416, "step": 33935 }, { "epoch": 0.97, "grad_norm": 6.418960986806703, "learning_rate": 2.076777062908153e-08, "loss": 0.6111, "step": 33936 }, { "epoch": 0.97, "grad_norm": 3.1517397503992175, "learning_rate": 2.072556746512022e-08, "loss": 0.2957, "step": 33937 }, { "epoch": 0.97, "grad_norm": 10.191443165792306, "learning_rate": 2.0683407137243595e-08, "loss": 0.572, "step": 33938 }, { "epoch": 0.97, "grad_norm": 5.962743563045875, "learning_rate": 2.0641289645814688e-08, "loss": 0.3768, "step": 33939 }, { "epoch": 0.97, "grad_norm": 4.385243984033216, "learning_rate": 2.0599214991195437e-08, "loss": 0.5683, "step": 33940 }, { "epoch": 0.97, "grad_norm": 5.615632692224566, "learning_rate": 2.055718317374833e-08, "loss": 0.3808, "step": 33941 }, { "epoch": 0.97, "grad_norm": 9.86913685635318, "learning_rate": 2.0515194193834743e-08, "loss": 0.4847, "step": 33942 }, { "epoch": 0.97, "grad_norm": 7.764571765112817, "learning_rate": 2.0473248051815497e-08, "loss": 0.7562, "step": 33943 }, { "epoch": 0.97, "grad_norm": 2.4481406630170084, "learning_rate": 2.0431344748051972e-08, "loss": 0.171, "step": 33944 }, { "epoch": 0.97, "grad_norm": 6.3423699818156765, "learning_rate": 2.038948428290444e-08, "loss": 0.4263, "step": 33945 }, { "epoch": 0.97, "grad_norm": 5.464474756245888, "learning_rate": 2.0347666656733156e-08, "loss": 0.8371, "step": 33946 }, { "epoch": 0.97, "grad_norm": 7.509506177451005, "learning_rate": 2.0305891869897286e-08, "loss": 0.5583, "step": 33947 }, { "epoch": 0.97, "grad_norm": 9.368356919647189, "learning_rate": 2.0264159922757653e-08, "loss": 0.8506, "step": 33948 }, { "epoch": 0.97, "grad_norm": 5.9339754786703125, "learning_rate": 2.0222470815671746e-08, "loss": 0.317, "step": 33949 }, { "epoch": 0.97, "grad_norm": 9.488539307855362, "learning_rate": 2.0180824548998724e-08, "loss": 0.6015, "step": 33950 }, { "epoch": 0.97, "grad_norm": 3.993858749031362, "learning_rate": 2.0139221123097184e-08, "loss": 0.2362, "step": 33951 }, { "epoch": 0.97, "grad_norm": 8.3700937732106, "learning_rate": 2.0097660538324626e-08, "loss": 0.5842, "step": 33952 }, { "epoch": 0.97, "grad_norm": 4.587609364871649, "learning_rate": 2.005614279503909e-08, "loss": 0.2336, "step": 33953 }, { "epoch": 0.97, "grad_norm": 5.214503176422717, "learning_rate": 2.0014667893596963e-08, "loss": 0.6061, "step": 33954 }, { "epoch": 0.97, "grad_norm": 3.534539235259784, "learning_rate": 1.997323583435573e-08, "loss": 0.2111, "step": 33955 }, { "epoch": 0.97, "grad_norm": 12.5715760425179, "learning_rate": 1.9931846617671225e-08, "loss": 0.5773, "step": 33956 }, { "epoch": 0.97, "grad_norm": 6.311007955997961, "learning_rate": 1.989050024390038e-08, "loss": 0.4512, "step": 33957 }, { "epoch": 0.97, "grad_norm": 4.917154646219152, "learning_rate": 1.9849196713398466e-08, "loss": 0.2267, "step": 33958 }, { "epoch": 0.97, "grad_norm": 5.12041465976322, "learning_rate": 1.9807936026520202e-08, "loss": 0.3174, "step": 33959 }, { "epoch": 0.97, "grad_norm": 3.3267166130665005, "learning_rate": 1.9766718183621414e-08, "loss": 0.2046, "step": 33960 }, { "epoch": 0.97, "grad_norm": 10.208901048301607, "learning_rate": 1.972554318505626e-08, "loss": 0.7242, "step": 33961 }, { "epoch": 0.97, "grad_norm": 10.984812742711258, "learning_rate": 1.9684411031179463e-08, "loss": 0.7723, "step": 33962 }, { "epoch": 0.97, "grad_norm": 6.776012173314064, "learning_rate": 1.9643321722344065e-08, "loss": 0.5419, "step": 33963 }, { "epoch": 0.97, "grad_norm": 3.182026014878621, "learning_rate": 1.9602275258904234e-08, "loss": 0.1979, "step": 33964 }, { "epoch": 0.97, "grad_norm": 3.2786843941713335, "learning_rate": 1.9561271641212464e-08, "loss": 0.325, "step": 33965 }, { "epoch": 0.97, "grad_norm": 9.343885848164579, "learning_rate": 1.9520310869621806e-08, "loss": 0.3176, "step": 33966 }, { "epoch": 0.97, "grad_norm": 6.256496029035768, "learning_rate": 1.947939294448531e-08, "loss": 0.3086, "step": 33967 }, { "epoch": 0.97, "grad_norm": 3.857305968269512, "learning_rate": 1.9438517866153804e-08, "loss": 0.2842, "step": 33968 }, { "epoch": 0.97, "grad_norm": 8.042384996762914, "learning_rate": 1.939768563497979e-08, "loss": 0.3667, "step": 33969 }, { "epoch": 0.97, "grad_norm": 4.565438983112194, "learning_rate": 1.9356896251314096e-08, "loss": 0.3552, "step": 33970 }, { "epoch": 0.97, "grad_norm": 4.759146667734131, "learning_rate": 1.9316149715508103e-08, "loss": 0.309, "step": 33971 }, { "epoch": 0.97, "grad_norm": 4.295753388103416, "learning_rate": 1.9275446027911538e-08, "loss": 0.2948, "step": 33972 }, { "epoch": 0.97, "grad_norm": 2.9378907978558426, "learning_rate": 1.923478518887578e-08, "loss": 0.1719, "step": 33973 }, { "epoch": 0.97, "grad_norm": 5.609596736850104, "learning_rate": 1.9194167198748893e-08, "loss": 0.4734, "step": 33974 }, { "epoch": 0.97, "grad_norm": 6.880548890834896, "learning_rate": 1.9153592057882254e-08, "loss": 0.6585, "step": 33975 }, { "epoch": 0.97, "grad_norm": 5.216479327135742, "learning_rate": 1.911305976662392e-08, "loss": 0.584, "step": 33976 }, { "epoch": 0.97, "grad_norm": 6.30289237974253, "learning_rate": 1.907257032532195e-08, "loss": 0.3703, "step": 33977 }, { "epoch": 0.97, "grad_norm": 5.276472108635467, "learning_rate": 1.903212373432606e-08, "loss": 0.5332, "step": 33978 }, { "epoch": 0.97, "grad_norm": 4.30951050786668, "learning_rate": 1.8991719993983193e-08, "loss": 0.2616, "step": 33979 }, { "epoch": 0.97, "grad_norm": 5.004355965029401, "learning_rate": 1.895135910464141e-08, "loss": 0.5283, "step": 33980 }, { "epoch": 0.97, "grad_norm": 7.162883632432031, "learning_rate": 1.8911041066647652e-08, "loss": 0.5309, "step": 33981 }, { "epoch": 0.97, "grad_norm": 4.649029144333608, "learning_rate": 1.887076588034942e-08, "loss": 0.2734, "step": 33982 }, { "epoch": 0.97, "grad_norm": 4.193015522888767, "learning_rate": 1.8830533546092545e-08, "loss": 0.5162, "step": 33983 }, { "epoch": 0.97, "grad_norm": 5.514676409623918, "learning_rate": 1.8790344064222864e-08, "loss": 0.6908, "step": 33984 }, { "epoch": 0.97, "grad_norm": 4.551353320400333, "learning_rate": 1.875019743508677e-08, "loss": 0.3839, "step": 33985 }, { "epoch": 0.97, "grad_norm": 4.67465912907267, "learning_rate": 1.8710093659029538e-08, "loss": 0.4162, "step": 33986 }, { "epoch": 0.97, "grad_norm": 5.3326800611867995, "learning_rate": 1.8670032736395894e-08, "loss": 0.4459, "step": 33987 }, { "epoch": 0.97, "grad_norm": 1.7188551610506813, "learning_rate": 1.8630014667530562e-08, "loss": 0.1221, "step": 33988 }, { "epoch": 0.97, "grad_norm": 5.796345257267369, "learning_rate": 1.8590039452778265e-08, "loss": 0.2888, "step": 33989 }, { "epoch": 0.97, "grad_norm": 4.4253666979041615, "learning_rate": 1.8550107092482616e-08, "loss": 0.265, "step": 33990 }, { "epoch": 0.97, "grad_norm": 3.205800664844524, "learning_rate": 1.8510217586986677e-08, "loss": 0.2692, "step": 33991 }, { "epoch": 0.97, "grad_norm": 3.938665732133307, "learning_rate": 1.8470370936634063e-08, "loss": 0.5474, "step": 33992 }, { "epoch": 0.97, "grad_norm": 6.877612501893102, "learning_rate": 1.8430567141767275e-08, "loss": 0.4742, "step": 33993 }, { "epoch": 0.97, "grad_norm": 6.7639558420211845, "learning_rate": 1.8390806202729372e-08, "loss": 0.5255, "step": 33994 }, { "epoch": 0.97, "grad_norm": 9.19956701959845, "learning_rate": 1.8351088119861748e-08, "loss": 0.7811, "step": 33995 }, { "epoch": 0.97, "grad_norm": 4.276450787702974, "learning_rate": 1.831141289350691e-08, "loss": 0.23, "step": 33996 }, { "epoch": 0.97, "grad_norm": 4.892416823844312, "learning_rate": 1.8271780524004577e-08, "loss": 0.3071, "step": 33997 }, { "epoch": 0.97, "grad_norm": 5.272720355968064, "learning_rate": 1.8232191011697818e-08, "loss": 0.2669, "step": 33998 }, { "epoch": 0.97, "grad_norm": 6.027858432790754, "learning_rate": 1.819264435692525e-08, "loss": 0.4649, "step": 33999 }, { "epoch": 0.97, "grad_norm": 4.675448017786392, "learning_rate": 1.815314056002826e-08, "loss": 0.1193, "step": 34000 }, { "epoch": 0.97, "grad_norm": 6.352399895189673, "learning_rate": 1.8113679621347136e-08, "loss": 0.5225, "step": 34001 }, { "epoch": 0.97, "grad_norm": 23.292024349225468, "learning_rate": 1.807426154121994e-08, "loss": 0.5608, "step": 34002 }, { "epoch": 0.97, "grad_norm": 3.2689324198599254, "learning_rate": 1.80348863199864e-08, "loss": 0.5597, "step": 34003 }, { "epoch": 0.97, "grad_norm": 4.224148722103974, "learning_rate": 1.799555395798569e-08, "loss": 0.2459, "step": 34004 }, { "epoch": 0.97, "grad_norm": 4.822371477438497, "learning_rate": 1.7956264455555872e-08, "loss": 0.3319, "step": 34005 }, { "epoch": 0.97, "grad_norm": 3.1739585121955147, "learning_rate": 1.7917017813034454e-08, "loss": 0.4766, "step": 34006 }, { "epoch": 0.97, "grad_norm": 7.493904402337658, "learning_rate": 1.7877814030760054e-08, "loss": 0.3019, "step": 34007 }, { "epoch": 0.97, "grad_norm": 6.875125814500412, "learning_rate": 1.783865310906907e-08, "loss": 0.4867, "step": 34008 }, { "epoch": 0.97, "grad_norm": 6.474748272218669, "learning_rate": 1.779953504829901e-08, "loss": 0.7133, "step": 34009 }, { "epoch": 0.97, "grad_norm": 4.370017838867231, "learning_rate": 1.7760459848785715e-08, "loss": 0.313, "step": 34010 }, { "epoch": 0.97, "grad_norm": 5.089103123728471, "learning_rate": 1.772142751086614e-08, "loss": 0.3716, "step": 34011 }, { "epoch": 0.97, "grad_norm": 6.41595668910812, "learning_rate": 1.7682438034875572e-08, "loss": 0.6392, "step": 34012 }, { "epoch": 0.97, "grad_norm": 11.535991985921417, "learning_rate": 1.7643491421149293e-08, "loss": 0.8593, "step": 34013 }, { "epoch": 0.97, "grad_norm": 5.847045744253613, "learning_rate": 1.7604587670022598e-08, "loss": 0.2817, "step": 34014 }, { "epoch": 0.97, "grad_norm": 4.26821457505617, "learning_rate": 1.756572678183077e-08, "loss": 0.451, "step": 34015 }, { "epoch": 0.97, "grad_norm": 4.836287478106769, "learning_rate": 1.752690875690688e-08, "loss": 0.234, "step": 34016 }, { "epoch": 0.97, "grad_norm": 6.5916779141464765, "learning_rate": 1.748813359558621e-08, "loss": 0.3287, "step": 34017 }, { "epoch": 0.97, "grad_norm": 4.182732814960313, "learning_rate": 1.7449401298201273e-08, "loss": 0.1567, "step": 34018 }, { "epoch": 0.97, "grad_norm": 5.177603300860912, "learning_rate": 1.7410711865085694e-08, "loss": 0.3768, "step": 34019 }, { "epoch": 0.97, "grad_norm": 4.328538785359029, "learning_rate": 1.7372065296571983e-08, "loss": 0.2166, "step": 34020 }, { "epoch": 0.97, "grad_norm": 5.497471444895832, "learning_rate": 1.7333461592993206e-08, "loss": 0.7721, "step": 34021 }, { "epoch": 0.97, "grad_norm": 2.172521220609494, "learning_rate": 1.729490075468132e-08, "loss": 0.1003, "step": 34022 }, { "epoch": 0.97, "grad_norm": 5.217293724584974, "learning_rate": 1.725638278196773e-08, "loss": 0.2801, "step": 34023 }, { "epoch": 0.97, "grad_norm": 4.139619352468449, "learning_rate": 1.721790767518383e-08, "loss": 0.3164, "step": 34024 }, { "epoch": 0.97, "grad_norm": 6.7415901352633325, "learning_rate": 1.717947543466103e-08, "loss": 0.5399, "step": 34025 }, { "epoch": 0.97, "grad_norm": 7.951697320707785, "learning_rate": 1.7141086060729617e-08, "loss": 0.2045, "step": 34026 }, { "epoch": 0.97, "grad_norm": 5.955806018337397, "learning_rate": 1.7102739553719884e-08, "loss": 0.3706, "step": 34027 }, { "epoch": 0.97, "grad_norm": 5.963638633594906, "learning_rate": 1.706443591396212e-08, "loss": 0.6041, "step": 34028 }, { "epoch": 0.97, "grad_norm": 5.857934922317562, "learning_rate": 1.7026175141785506e-08, "loss": 0.5238, "step": 34029 }, { "epoch": 0.97, "grad_norm": 3.696156088932865, "learning_rate": 1.6987957237518672e-08, "loss": 0.3803, "step": 34030 }, { "epoch": 0.97, "grad_norm": 7.833754413190189, "learning_rate": 1.694978220149135e-08, "loss": 0.5775, "step": 34031 }, { "epoch": 0.97, "grad_norm": 10.350627905923147, "learning_rate": 1.6911650034031058e-08, "loss": 0.5772, "step": 34032 }, { "epoch": 0.97, "grad_norm": 5.149649493317212, "learning_rate": 1.687356073546642e-08, "loss": 0.1995, "step": 34033 }, { "epoch": 0.97, "grad_norm": 5.562785087737778, "learning_rate": 1.6835514306125512e-08, "loss": 0.8072, "step": 34034 }, { "epoch": 0.97, "grad_norm": 4.394461669371024, "learning_rate": 1.6797510746334734e-08, "loss": 0.4229, "step": 34035 }, { "epoch": 0.97, "grad_norm": 6.907645554888365, "learning_rate": 1.6759550056421604e-08, "loss": 0.4333, "step": 34036 }, { "epoch": 0.97, "grad_norm": 4.487090401830615, "learning_rate": 1.6721632236712527e-08, "loss": 0.4756, "step": 34037 }, { "epoch": 0.97, "grad_norm": 2.9729599365225634, "learning_rate": 1.6683757287533352e-08, "loss": 0.1609, "step": 34038 }, { "epoch": 0.97, "grad_norm": 3.517005013773872, "learning_rate": 1.664592520921049e-08, "loss": 0.162, "step": 34039 }, { "epoch": 0.97, "grad_norm": 7.439454759440865, "learning_rate": 1.6608136002068675e-08, "loss": 0.3634, "step": 34040 }, { "epoch": 0.97, "grad_norm": 6.322267736812283, "learning_rate": 1.657038966643376e-08, "loss": 0.3758, "step": 34041 }, { "epoch": 0.97, "grad_norm": 3.63468404162812, "learning_rate": 1.6532686202630486e-08, "loss": 0.2083, "step": 34042 }, { "epoch": 0.97, "grad_norm": 4.562502064116547, "learning_rate": 1.649502561098304e-08, "loss": 0.3213, "step": 34043 }, { "epoch": 0.97, "grad_norm": 7.879574007438237, "learning_rate": 1.6457407891815047e-08, "loss": 0.4918, "step": 34044 }, { "epoch": 0.97, "grad_norm": 5.3957203542696215, "learning_rate": 1.6419833045450693e-08, "loss": 0.3116, "step": 34045 }, { "epoch": 0.97, "grad_norm": 3.5068277790736344, "learning_rate": 1.6382301072212504e-08, "loss": 0.2408, "step": 34046 }, { "epoch": 0.98, "grad_norm": 4.972163629216228, "learning_rate": 1.6344811972424103e-08, "loss": 0.5728, "step": 34047 }, { "epoch": 0.98, "grad_norm": 2.939970610935563, "learning_rate": 1.6307365746407454e-08, "loss": 0.2461, "step": 34048 }, { "epoch": 0.98, "grad_norm": 7.378622296969995, "learning_rate": 1.6269962394485085e-08, "loss": 0.3449, "step": 34049 }, { "epoch": 0.98, "grad_norm": 5.444579284429802, "learning_rate": 1.6232601916978396e-08, "loss": 0.4613, "step": 34050 }, { "epoch": 0.98, "grad_norm": 4.44450298641644, "learning_rate": 1.6195284314209358e-08, "loss": 0.5644, "step": 34051 }, { "epoch": 0.98, "grad_norm": 3.5123916846846024, "learning_rate": 1.615800958649827e-08, "loss": 0.3946, "step": 34052 }, { "epoch": 0.98, "grad_norm": 6.0574834459230535, "learning_rate": 1.6120777734166536e-08, "loss": 0.4254, "step": 34053 }, { "epoch": 0.98, "grad_norm": 4.865480886100158, "learning_rate": 1.6083588757533353e-08, "loss": 0.4846, "step": 34054 }, { "epoch": 0.98, "grad_norm": 6.6037076997169395, "learning_rate": 1.6046442656920123e-08, "loss": 0.71, "step": 34055 }, { "epoch": 0.98, "grad_norm": 3.9136251787757486, "learning_rate": 1.6009339432646043e-08, "loss": 0.3601, "step": 34056 }, { "epoch": 0.98, "grad_norm": 4.681605332949346, "learning_rate": 1.5972279085029186e-08, "loss": 0.2454, "step": 34057 }, { "epoch": 0.98, "grad_norm": 9.82376579317376, "learning_rate": 1.5935261614389853e-08, "loss": 1.1362, "step": 34058 }, { "epoch": 0.98, "grad_norm": 5.422029476548589, "learning_rate": 1.589828702104501e-08, "loss": 0.5568, "step": 34059 }, { "epoch": 0.98, "grad_norm": 4.99928161705562, "learning_rate": 1.586135530531441e-08, "loss": 0.4951, "step": 34060 }, { "epoch": 0.98, "grad_norm": 7.058148537355626, "learning_rate": 1.5824466467514455e-08, "loss": 0.3625, "step": 34061 }, { "epoch": 0.98, "grad_norm": 4.792166595363637, "learning_rate": 1.5787620507962674e-08, "loss": 0.2112, "step": 34062 }, { "epoch": 0.98, "grad_norm": 6.400372781923652, "learning_rate": 1.575081742697715e-08, "loss": 0.5428, "step": 34063 }, { "epoch": 0.98, "grad_norm": 10.10208447163243, "learning_rate": 1.571405722487318e-08, "loss": 1.0198, "step": 34064 }, { "epoch": 0.98, "grad_norm": 3.5415106215413985, "learning_rate": 1.567733990196718e-08, "loss": 0.3775, "step": 34065 }, { "epoch": 0.98, "grad_norm": 7.49678142150508, "learning_rate": 1.5640665458575564e-08, "loss": 0.3475, "step": 34066 }, { "epoch": 0.98, "grad_norm": 6.335415142460139, "learning_rate": 1.5604033895013637e-08, "loss": 0.5141, "step": 34067 }, { "epoch": 0.98, "grad_norm": 3.5987220038821968, "learning_rate": 1.5567445211596698e-08, "loss": 0.1929, "step": 34068 }, { "epoch": 0.98, "grad_norm": 7.1845013541401475, "learning_rate": 1.5530899408638945e-08, "loss": 0.749, "step": 34069 }, { "epoch": 0.98, "grad_norm": 3.801772581317239, "learning_rate": 1.5494396486455677e-08, "loss": 0.2725, "step": 34070 }, { "epoch": 0.98, "grad_norm": 5.713719140643146, "learning_rate": 1.545793644535998e-08, "loss": 0.6213, "step": 34071 }, { "epoch": 0.98, "grad_norm": 2.1166498550238013, "learning_rate": 1.5421519285666043e-08, "loss": 0.1214, "step": 34072 }, { "epoch": 0.98, "grad_norm": 5.0239538043109375, "learning_rate": 1.538514500768695e-08, "loss": 0.4828, "step": 34073 }, { "epoch": 0.98, "grad_norm": 3.9491448852996647, "learning_rate": 1.534881361173579e-08, "loss": 0.3934, "step": 34074 }, { "epoch": 0.98, "grad_norm": 7.478856132528283, "learning_rate": 1.5312525098125085e-08, "loss": 0.5464, "step": 34075 }, { "epoch": 0.98, "grad_norm": 3.5502234630367915, "learning_rate": 1.5276279467167364e-08, "loss": 0.1817, "step": 34076 }, { "epoch": 0.98, "grad_norm": 6.082973987817843, "learning_rate": 1.5240076719174046e-08, "loss": 0.2001, "step": 34077 }, { "epoch": 0.98, "grad_norm": 2.983492206404761, "learning_rate": 1.520391685445599e-08, "loss": 0.1199, "step": 34078 }, { "epoch": 0.98, "grad_norm": 1.0584683671736024, "learning_rate": 1.5167799873325174e-08, "loss": 0.0568, "step": 34079 }, { "epoch": 0.98, "grad_norm": 4.125939233374035, "learning_rate": 1.5131725776092453e-08, "loss": 0.4505, "step": 34080 }, { "epoch": 0.98, "grad_norm": 7.127243491875416, "learning_rate": 1.509569456306703e-08, "loss": 0.4623, "step": 34081 }, { "epoch": 0.98, "grad_norm": 3.4058050337467343, "learning_rate": 1.5059706234560323e-08, "loss": 0.2554, "step": 34082 }, { "epoch": 0.98, "grad_norm": 6.907414195811451, "learning_rate": 1.502376079088097e-08, "loss": 0.4255, "step": 34083 }, { "epoch": 0.98, "grad_norm": 5.628952672106459, "learning_rate": 1.498785823233817e-08, "loss": 0.475, "step": 34084 }, { "epoch": 0.98, "grad_norm": 8.0916339950871, "learning_rate": 1.4951998559241122e-08, "loss": 0.3488, "step": 34085 }, { "epoch": 0.98, "grad_norm": 6.4736287336671845, "learning_rate": 1.491618177189791e-08, "loss": 0.3627, "step": 34086 }, { "epoch": 0.98, "grad_norm": 8.64084775337485, "learning_rate": 1.488040787061773e-08, "loss": 0.775, "step": 34087 }, { "epoch": 0.98, "grad_norm": 3.9372253019394017, "learning_rate": 1.4844676855707007e-08, "loss": 0.2981, "step": 34088 }, { "epoch": 0.98, "grad_norm": 5.277193915825804, "learning_rate": 1.4808988727474382e-08, "loss": 0.5006, "step": 34089 }, { "epoch": 0.98, "grad_norm": 5.726474568731456, "learning_rate": 1.4773343486225722e-08, "loss": 0.3475, "step": 34090 }, { "epoch": 0.98, "grad_norm": 6.951344122685879, "learning_rate": 1.4737741132268557e-08, "loss": 0.6838, "step": 34091 }, { "epoch": 0.98, "grad_norm": 5.714047734379956, "learning_rate": 1.47021816659082e-08, "loss": 0.6465, "step": 34092 }, { "epoch": 0.98, "grad_norm": 7.125975843160038, "learning_rate": 1.4666665087451626e-08, "loss": 0.3331, "step": 34093 }, { "epoch": 0.98, "grad_norm": 17.361041767575276, "learning_rate": 1.4631191397204148e-08, "loss": 0.5064, "step": 34094 }, { "epoch": 0.98, "grad_norm": 3.5927450682340982, "learning_rate": 1.4595760595469965e-08, "loss": 0.0639, "step": 34095 }, { "epoch": 0.98, "grad_norm": 4.685898723486375, "learning_rate": 1.4560372682555501e-08, "loss": 0.2913, "step": 34096 }, { "epoch": 0.98, "grad_norm": 4.1904012963127295, "learning_rate": 1.4525027658763845e-08, "loss": 0.3828, "step": 34097 }, { "epoch": 0.98, "grad_norm": 7.022509554961383, "learning_rate": 1.4489725524399756e-08, "loss": 0.4115, "step": 34098 }, { "epoch": 0.98, "grad_norm": 2.273467171858811, "learning_rate": 1.4454466279766321e-08, "loss": 0.2135, "step": 34099 }, { "epoch": 0.98, "grad_norm": 10.211633706674439, "learning_rate": 1.4419249925167745e-08, "loss": 0.6177, "step": 34100 }, { "epoch": 0.98, "grad_norm": 5.482292953839937, "learning_rate": 1.4384076460906004e-08, "loss": 0.2538, "step": 34101 }, { "epoch": 0.98, "grad_norm": 4.0473309705569545, "learning_rate": 1.434894588728475e-08, "loss": 0.4837, "step": 34102 }, { "epoch": 0.98, "grad_norm": 5.453605302792367, "learning_rate": 1.4313858204605402e-08, "loss": 0.712, "step": 34103 }, { "epoch": 0.98, "grad_norm": 8.547660373121571, "learning_rate": 1.4278813413169945e-08, "loss": 0.6889, "step": 34104 }, { "epoch": 0.98, "grad_norm": 8.121289623810284, "learning_rate": 1.424381151328036e-08, "loss": 0.4449, "step": 34105 }, { "epoch": 0.98, "grad_norm": 3.871894422579397, "learning_rate": 1.420885250523696e-08, "loss": 0.4493, "step": 34106 }, { "epoch": 0.98, "grad_norm": 4.727591511341468, "learning_rate": 1.417393638934117e-08, "loss": 0.3892, "step": 34107 }, { "epoch": 0.98, "grad_norm": 9.412427628886107, "learning_rate": 1.4139063165893307e-08, "loss": 0.7704, "step": 34108 }, { "epoch": 0.98, "grad_norm": 10.557710046794709, "learning_rate": 1.4104232835193132e-08, "loss": 0.6272, "step": 34109 }, { "epoch": 0.98, "grad_norm": 3.444403418152207, "learning_rate": 1.4069445397540405e-08, "loss": 0.2246, "step": 34110 }, { "epoch": 0.98, "grad_norm": 3.065795798798183, "learning_rate": 1.4034700853234329e-08, "loss": 0.297, "step": 34111 }, { "epoch": 0.98, "grad_norm": 5.812853279452665, "learning_rate": 1.3999999202573555e-08, "loss": 0.5115, "step": 34112 }, { "epoch": 0.98, "grad_norm": 3.2060062010878227, "learning_rate": 1.3965340445857289e-08, "loss": 0.2529, "step": 34113 }, { "epoch": 0.98, "grad_norm": 6.408128714136707, "learning_rate": 1.3930724583382516e-08, "loss": 0.346, "step": 34114 }, { "epoch": 0.98, "grad_norm": 2.324558540958923, "learning_rate": 1.389615161544844e-08, "loss": 0.1409, "step": 34115 }, { "epoch": 0.98, "grad_norm": 3.452478305104878, "learning_rate": 1.3861621542352043e-08, "loss": 0.3253, "step": 34116 }, { "epoch": 0.98, "grad_norm": 7.956293070902066, "learning_rate": 1.3827134364390315e-08, "loss": 0.6406, "step": 34117 }, { "epoch": 0.98, "grad_norm": 3.9040656734555337, "learning_rate": 1.3792690081859129e-08, "loss": 0.3771, "step": 34118 }, { "epoch": 0.98, "grad_norm": 4.344228375116909, "learning_rate": 1.3758288695056022e-08, "loss": 0.3945, "step": 34119 }, { "epoch": 0.98, "grad_norm": 8.053991574434809, "learning_rate": 1.3723930204276314e-08, "loss": 0.6581, "step": 34120 }, { "epoch": 0.98, "grad_norm": 4.716755047409325, "learning_rate": 1.3689614609815883e-08, "loss": 0.2886, "step": 34121 }, { "epoch": 0.98, "grad_norm": 5.997787584260909, "learning_rate": 1.3655341911969489e-08, "loss": 0.4403, "step": 34122 }, { "epoch": 0.98, "grad_norm": 6.678842609355319, "learning_rate": 1.3621112111032453e-08, "loss": 0.3804, "step": 34123 }, { "epoch": 0.98, "grad_norm": 9.627944991160243, "learning_rate": 1.3586925207298984e-08, "loss": 0.8614, "step": 34124 }, { "epoch": 0.98, "grad_norm": 5.9432090216255595, "learning_rate": 1.3552781201062737e-08, "loss": 0.762, "step": 34125 }, { "epoch": 0.98, "grad_norm": 4.523183416896238, "learning_rate": 1.3518680092618474e-08, "loss": 0.3377, "step": 34126 }, { "epoch": 0.98, "grad_norm": 2.5597356795968396, "learning_rate": 1.3484621882259297e-08, "loss": 0.1475, "step": 34127 }, { "epoch": 0.98, "grad_norm": 6.116285157670777, "learning_rate": 1.3450606570277747e-08, "loss": 0.8394, "step": 34128 }, { "epoch": 0.98, "grad_norm": 3.284518693707863, "learning_rate": 1.3416634156966368e-08, "loss": 0.2949, "step": 34129 }, { "epoch": 0.98, "grad_norm": 6.325190826975115, "learning_rate": 1.3382704642617705e-08, "loss": 0.4181, "step": 34130 }, { "epoch": 0.98, "grad_norm": 6.150243937298849, "learning_rate": 1.3348818027524302e-08, "loss": 0.7846, "step": 34131 }, { "epoch": 0.98, "grad_norm": 5.894997266747745, "learning_rate": 1.331497431197648e-08, "loss": 0.5225, "step": 34132 }, { "epoch": 0.98, "grad_norm": 3.392724336538187, "learning_rate": 1.3281173496266231e-08, "loss": 0.1205, "step": 34133 }, { "epoch": 0.98, "grad_norm": 5.730064393425659, "learning_rate": 1.3247415580683875e-08, "loss": 0.4298, "step": 34134 }, { "epoch": 0.98, "grad_norm": 6.67451492718719, "learning_rate": 1.3213700565520294e-08, "loss": 0.3648, "step": 34135 }, { "epoch": 0.98, "grad_norm": 6.544829857161445, "learning_rate": 1.3180028451065253e-08, "loss": 0.4055, "step": 34136 }, { "epoch": 0.98, "grad_norm": 4.457842071895073, "learning_rate": 1.3146399237607966e-08, "loss": 0.3974, "step": 34137 }, { "epoch": 0.98, "grad_norm": 5.394351456261169, "learning_rate": 1.3112812925438756e-08, "loss": 0.3753, "step": 34138 }, { "epoch": 0.98, "grad_norm": 6.522584633268988, "learning_rate": 1.3079269514845171e-08, "loss": 0.4797, "step": 34139 }, { "epoch": 0.98, "grad_norm": 5.573024736824217, "learning_rate": 1.3045769006117537e-08, "loss": 0.4106, "step": 34140 }, { "epoch": 0.98, "grad_norm": 5.748925398879544, "learning_rate": 1.3012311399542843e-08, "loss": 0.7768, "step": 34141 }, { "epoch": 0.98, "grad_norm": 3.7874785268445543, "learning_rate": 1.2978896695409194e-08, "loss": 0.3326, "step": 34142 }, { "epoch": 0.98, "grad_norm": 11.033825452111612, "learning_rate": 1.2945524894004136e-08, "loss": 0.4354, "step": 34143 }, { "epoch": 0.98, "grad_norm": 5.32334192994693, "learning_rate": 1.2912195995614662e-08, "loss": 0.4591, "step": 34144 }, { "epoch": 0.98, "grad_norm": 3.9235890810787826, "learning_rate": 1.2878910000527212e-08, "loss": 0.5148, "step": 34145 }, { "epoch": 0.98, "grad_norm": 6.801260995349434, "learning_rate": 1.2845666909028775e-08, "loss": 0.5613, "step": 34146 }, { "epoch": 0.98, "grad_norm": 5.579129254208802, "learning_rate": 1.2812466721405236e-08, "loss": 0.4521, "step": 34147 }, { "epoch": 0.98, "grad_norm": 4.0447635223617215, "learning_rate": 1.2779309437941922e-08, "loss": 0.2468, "step": 34148 }, { "epoch": 0.98, "grad_norm": 4.21440059959017, "learning_rate": 1.2746195058923605e-08, "loss": 0.1882, "step": 34149 }, { "epoch": 0.98, "grad_norm": 3.5610978730240492, "learning_rate": 1.2713123584636722e-08, "loss": 0.3991, "step": 34150 }, { "epoch": 0.98, "grad_norm": 5.590612586547648, "learning_rate": 1.2680095015363825e-08, "loss": 0.3966, "step": 34151 }, { "epoch": 0.98, "grad_norm": 2.5044070024304417, "learning_rate": 1.2647109351390797e-08, "loss": 0.4267, "step": 34152 }, { "epoch": 0.98, "grad_norm": 3.668096083115801, "learning_rate": 1.2614166593000187e-08, "loss": 0.2414, "step": 34153 }, { "epoch": 0.98, "grad_norm": 7.835842204251357, "learning_rate": 1.2581266740475662e-08, "loss": 0.5368, "step": 34154 }, { "epoch": 0.98, "grad_norm": 9.473482466384448, "learning_rate": 1.254840979410088e-08, "loss": 0.551, "step": 34155 }, { "epoch": 0.98, "grad_norm": 4.18448997607394, "learning_rate": 1.2515595754157839e-08, "loss": 0.2401, "step": 34156 }, { "epoch": 0.98, "grad_norm": 5.505091418417594, "learning_rate": 1.2482824620929091e-08, "loss": 0.3202, "step": 34157 }, { "epoch": 0.98, "grad_norm": 6.611865717387983, "learning_rate": 1.245009639469663e-08, "loss": 0.49, "step": 34158 }, { "epoch": 0.98, "grad_norm": 1.589485634250767, "learning_rate": 1.2417411075741349e-08, "loss": 0.0767, "step": 34159 }, { "epoch": 0.98, "grad_norm": 4.4503451588532394, "learning_rate": 1.2384768664345791e-08, "loss": 0.1884, "step": 34160 }, { "epoch": 0.98, "grad_norm": 11.589383786353517, "learning_rate": 1.2352169160789185e-08, "loss": 0.5032, "step": 34161 }, { "epoch": 0.98, "grad_norm": 5.43062542834891, "learning_rate": 1.2319612565353523e-08, "loss": 0.3783, "step": 34162 }, { "epoch": 0.98, "grad_norm": 8.569044243610977, "learning_rate": 1.2287098878318027e-08, "loss": 0.5336, "step": 34163 }, { "epoch": 0.98, "grad_norm": 6.492225766212624, "learning_rate": 1.2254628099961918e-08, "loss": 0.5231, "step": 34164 }, { "epoch": 0.98, "grad_norm": 11.663879433975533, "learning_rate": 1.2222200230565529e-08, "loss": 0.3286, "step": 34165 }, { "epoch": 0.98, "grad_norm": 6.134869622894768, "learning_rate": 1.2189815270407524e-08, "loss": 0.5419, "step": 34166 }, { "epoch": 0.98, "grad_norm": 4.4115373258337875, "learning_rate": 1.2157473219766015e-08, "loss": 0.459, "step": 34167 }, { "epoch": 0.98, "grad_norm": 5.81391574681585, "learning_rate": 1.212517407892022e-08, "loss": 0.2677, "step": 34168 }, { "epoch": 0.98, "grad_norm": 4.012568932532371, "learning_rate": 1.2092917848147146e-08, "loss": 0.0881, "step": 34169 }, { "epoch": 0.98, "grad_norm": 8.485829265580646, "learning_rate": 1.2060704527724899e-08, "loss": 0.5576, "step": 34170 }, { "epoch": 0.98, "grad_norm": 5.0041900840145725, "learning_rate": 1.2028534117929924e-08, "loss": 0.3433, "step": 34171 }, { "epoch": 0.98, "grad_norm": 3.7098090132423103, "learning_rate": 1.1996406619039225e-08, "loss": 0.2242, "step": 34172 }, { "epoch": 0.98, "grad_norm": 3.0438212227762063, "learning_rate": 1.1964322031329245e-08, "loss": 0.3618, "step": 34173 }, { "epoch": 0.98, "grad_norm": 6.020901790700501, "learning_rate": 1.193228035507643e-08, "loss": 0.4848, "step": 34174 }, { "epoch": 0.98, "grad_norm": 4.603274611393876, "learning_rate": 1.1900281590555562e-08, "loss": 0.5123, "step": 34175 }, { "epoch": 0.98, "grad_norm": 3.7552906544092735, "learning_rate": 1.1868325738043086e-08, "loss": 0.3822, "step": 34176 }, { "epoch": 0.98, "grad_norm": 6.467662848924328, "learning_rate": 1.1836412797812668e-08, "loss": 0.5779, "step": 34177 }, { "epoch": 0.98, "grad_norm": 15.425210300291779, "learning_rate": 1.1804542770140203e-08, "loss": 0.1469, "step": 34178 }, { "epoch": 0.98, "grad_norm": 4.483363020750169, "learning_rate": 1.177271565529825e-08, "loss": 0.5368, "step": 34179 }, { "epoch": 0.98, "grad_norm": 8.167184041301491, "learning_rate": 1.1740931453561588e-08, "loss": 0.59, "step": 34180 }, { "epoch": 0.98, "grad_norm": 10.056372583565594, "learning_rate": 1.1709190165203887e-08, "loss": 0.3988, "step": 34181 }, { "epoch": 0.98, "grad_norm": 4.678033794464485, "learning_rate": 1.1677491790497709e-08, "loss": 0.2766, "step": 34182 }, { "epoch": 0.98, "grad_norm": 4.616902867046288, "learning_rate": 1.164583632971561e-08, "loss": 0.3801, "step": 34183 }, { "epoch": 0.98, "grad_norm": 6.822238505976017, "learning_rate": 1.1614223783130708e-08, "loss": 0.3787, "step": 34184 }, { "epoch": 0.98, "grad_norm": 3.7621177707309497, "learning_rate": 1.158265415101445e-08, "loss": 0.226, "step": 34185 }, { "epoch": 0.98, "grad_norm": 1.4731532429458718, "learning_rate": 1.1551127433637733e-08, "loss": 0.0838, "step": 34186 }, { "epoch": 0.98, "grad_norm": 4.983375496935795, "learning_rate": 1.1519643631273114e-08, "loss": 0.2337, "step": 34187 }, { "epoch": 0.98, "grad_norm": 4.599052871763489, "learning_rate": 1.148820274419038e-08, "loss": 0.6221, "step": 34188 }, { "epoch": 0.98, "grad_norm": 7.076778056339721, "learning_rate": 1.1456804772660424e-08, "loss": 0.6364, "step": 34189 }, { "epoch": 0.98, "grad_norm": 7.931156717628119, "learning_rate": 1.142544971695414e-08, "loss": 0.3922, "step": 34190 }, { "epoch": 0.98, "grad_norm": 7.556047287612776, "learning_rate": 1.1394137577339649e-08, "loss": 0.7065, "step": 34191 }, { "epoch": 0.98, "grad_norm": 6.235917591282075, "learning_rate": 1.1362868354087841e-08, "loss": 0.4359, "step": 34192 }, { "epoch": 0.98, "grad_norm": 3.6142523373075592, "learning_rate": 1.1331642047466284e-08, "loss": 0.2788, "step": 34193 }, { "epoch": 0.98, "grad_norm": 6.371605773596225, "learning_rate": 1.1300458657745317e-08, "loss": 0.5807, "step": 34194 }, { "epoch": 0.98, "grad_norm": 11.903736159502769, "learning_rate": 1.1269318185191391e-08, "loss": 0.4739, "step": 34195 }, { "epoch": 0.98, "grad_norm": 4.091917836442979, "learning_rate": 1.1238220630073738e-08, "loss": 0.3796, "step": 34196 }, { "epoch": 0.98, "grad_norm": 3.392301716192901, "learning_rate": 1.1207165992659363e-08, "loss": 0.1811, "step": 34197 }, { "epoch": 0.98, "grad_norm": 4.56158683731163, "learning_rate": 1.1176154273215278e-08, "loss": 0.3898, "step": 34198 }, { "epoch": 0.98, "grad_norm": 1.551853453063712, "learning_rate": 1.1145185472009046e-08, "loss": 0.1495, "step": 34199 }, { "epoch": 0.98, "grad_norm": 5.2482764730142435, "learning_rate": 1.1114259589306008e-08, "loss": 0.3479, "step": 34200 }, { "epoch": 0.98, "grad_norm": 6.227766085789633, "learning_rate": 1.1083376625373176e-08, "loss": 0.4541, "step": 34201 }, { "epoch": 0.98, "grad_norm": 6.792422359838559, "learning_rate": 1.1052536580475337e-08, "loss": 0.6206, "step": 34202 }, { "epoch": 0.98, "grad_norm": 5.561424837251507, "learning_rate": 1.1021739454878944e-08, "loss": 0.3859, "step": 34203 }, { "epoch": 0.98, "grad_norm": 8.852518971036908, "learning_rate": 1.0990985248847675e-08, "loss": 0.5065, "step": 34204 }, { "epoch": 0.98, "grad_norm": 5.123749813090067, "learning_rate": 1.0960273962646873e-08, "loss": 0.2535, "step": 34205 }, { "epoch": 0.98, "grad_norm": 3.582257337517551, "learning_rate": 1.0929605596540771e-08, "loss": 0.2616, "step": 34206 }, { "epoch": 0.98, "grad_norm": 7.524004287998371, "learning_rate": 1.0898980150792494e-08, "loss": 0.4389, "step": 34207 }, { "epoch": 0.98, "grad_norm": 6.830205814170479, "learning_rate": 1.0868397625666272e-08, "loss": 0.5607, "step": 34208 }, { "epoch": 0.98, "grad_norm": 9.0244686445805, "learning_rate": 1.0837858021425229e-08, "loss": 0.4913, "step": 34209 }, { "epoch": 0.98, "grad_norm": 4.325095841553108, "learning_rate": 1.0807361338331379e-08, "loss": 0.3267, "step": 34210 }, { "epoch": 0.98, "grad_norm": 8.435963243479485, "learning_rate": 1.0776907576647844e-08, "loss": 0.2635, "step": 34211 }, { "epoch": 0.98, "grad_norm": 11.392792169818716, "learning_rate": 1.0746496736636081e-08, "loss": 0.7901, "step": 34212 }, { "epoch": 0.98, "grad_norm": 5.230084383048575, "learning_rate": 1.0716128818558103e-08, "loss": 0.5336, "step": 34213 }, { "epoch": 0.98, "grad_norm": 5.490792891009499, "learning_rate": 1.0685803822674811e-08, "loss": 0.3459, "step": 34214 }, { "epoch": 0.98, "grad_norm": 4.355907195735897, "learning_rate": 1.065552174924711e-08, "loss": 0.1757, "step": 34215 }, { "epoch": 0.98, "grad_norm": 6.798802525604456, "learning_rate": 1.0625282598535902e-08, "loss": 0.4442, "step": 34216 }, { "epoch": 0.98, "grad_norm": 9.770144828066535, "learning_rate": 1.0595086370800977e-08, "loss": 0.5071, "step": 34217 }, { "epoch": 0.98, "grad_norm": 5.373325619388341, "learning_rate": 1.0564933066302684e-08, "loss": 0.1425, "step": 34218 }, { "epoch": 0.98, "grad_norm": 5.529403205800504, "learning_rate": 1.053482268529915e-08, "loss": 0.5678, "step": 34219 }, { "epoch": 0.98, "grad_norm": 7.016980386487784, "learning_rate": 1.0504755228050722e-08, "loss": 0.7468, "step": 34220 }, { "epoch": 0.98, "grad_norm": 4.135355003275147, "learning_rate": 1.0474730694815528e-08, "loss": 0.3839, "step": 34221 }, { "epoch": 0.98, "grad_norm": 7.255676086493039, "learning_rate": 1.044474908585169e-08, "loss": 0.4686, "step": 34222 }, { "epoch": 0.98, "grad_norm": 6.562684701409348, "learning_rate": 1.0414810401417341e-08, "loss": 0.5074, "step": 34223 }, { "epoch": 0.98, "grad_norm": 6.511909285276696, "learning_rate": 1.0384914641770605e-08, "loss": 0.4529, "step": 34224 }, { "epoch": 0.98, "grad_norm": 6.989616867720256, "learning_rate": 1.0355061807167388e-08, "loss": 0.6771, "step": 34225 }, { "epoch": 0.98, "grad_norm": 4.8005468374930835, "learning_rate": 1.0325251897865263e-08, "loss": 0.4957, "step": 34226 }, { "epoch": 0.98, "grad_norm": 10.096768804692429, "learning_rate": 1.0295484914121245e-08, "loss": 0.4414, "step": 34227 }, { "epoch": 0.98, "grad_norm": 3.594980576506497, "learning_rate": 1.0265760856190133e-08, "loss": 0.3537, "step": 34228 }, { "epoch": 0.98, "grad_norm": 6.214668909247342, "learning_rate": 1.023607972432894e-08, "loss": 0.3475, "step": 34229 }, { "epoch": 0.98, "grad_norm": 5.94540619744996, "learning_rate": 1.020644151879191e-08, "loss": 0.4027, "step": 34230 }, { "epoch": 0.98, "grad_norm": 4.436260802555986, "learning_rate": 1.0176846239834948e-08, "loss": 0.2405, "step": 34231 }, { "epoch": 0.98, "grad_norm": 4.504122408785599, "learning_rate": 1.014729388771174e-08, "loss": 0.4764, "step": 34232 }, { "epoch": 0.98, "grad_norm": 4.0961733626785515, "learning_rate": 1.0117784462677638e-08, "loss": 0.5987, "step": 34233 }, { "epoch": 0.98, "grad_norm": 4.009462905360172, "learning_rate": 1.0088317964985218e-08, "loss": 0.2288, "step": 34234 }, { "epoch": 0.98, "grad_norm": 5.019538327223245, "learning_rate": 1.0058894394888719e-08, "loss": 0.5064, "step": 34235 }, { "epoch": 0.98, "grad_norm": 6.60316738506819, "learning_rate": 1.0029513752641273e-08, "loss": 0.3147, "step": 34236 }, { "epoch": 0.98, "grad_norm": 7.630649287444262, "learning_rate": 1.0000176038496013e-08, "loss": 0.5345, "step": 34237 }, { "epoch": 0.98, "grad_norm": 11.036158693406627, "learning_rate": 9.9708812527044e-09, "loss": 0.5815, "step": 34238 }, { "epoch": 0.98, "grad_norm": 2.8790523020991126, "learning_rate": 9.94162939551846e-09, "loss": 0.4712, "step": 34239 }, { "epoch": 0.98, "grad_norm": 5.2614471433413925, "learning_rate": 9.91242046719132e-09, "loss": 0.3222, "step": 34240 }, { "epoch": 0.98, "grad_norm": 7.538167918204616, "learning_rate": 9.883254467972225e-09, "loss": 0.9674, "step": 34241 }, { "epoch": 0.98, "grad_norm": 5.1364859060448556, "learning_rate": 9.854131398113753e-09, "loss": 0.4656, "step": 34242 }, { "epoch": 0.98, "grad_norm": 7.980967023906829, "learning_rate": 9.825051257865703e-09, "loss": 0.601, "step": 34243 }, { "epoch": 0.98, "grad_norm": 2.4268810954718583, "learning_rate": 9.796014047477876e-09, "loss": 0.4544, "step": 34244 }, { "epoch": 0.98, "grad_norm": 9.207690520023162, "learning_rate": 9.76701976720118e-09, "loss": 0.3505, "step": 34245 }, { "epoch": 0.98, "grad_norm": 10.727437627535913, "learning_rate": 9.738068417283753e-09, "loss": 0.9273, "step": 34246 }, { "epoch": 0.98, "grad_norm": 4.100867773892149, "learning_rate": 9.709159997975948e-09, "loss": 0.482, "step": 34247 }, { "epoch": 0.98, "grad_norm": 6.512152279230873, "learning_rate": 9.680294509525345e-09, "loss": 0.8133, "step": 34248 }, { "epoch": 0.98, "grad_norm": 4.388845823106182, "learning_rate": 9.65147195218119e-09, "loss": 0.1936, "step": 34249 }, { "epoch": 0.98, "grad_norm": 4.389868294341254, "learning_rate": 9.622692326190508e-09, "loss": 0.4398, "step": 34250 }, { "epoch": 0.98, "grad_norm": 6.39199038518438, "learning_rate": 9.593955631801432e-09, "loss": 0.2796, "step": 34251 }, { "epoch": 0.98, "grad_norm": 6.477265709777389, "learning_rate": 9.565261869261544e-09, "loss": 0.5177, "step": 34252 }, { "epoch": 0.98, "grad_norm": 4.564662890986541, "learning_rate": 9.536611038816757e-09, "loss": 0.59, "step": 34253 }, { "epoch": 0.98, "grad_norm": 2.173412286651942, "learning_rate": 9.508003140714095e-09, "loss": 0.0789, "step": 34254 }, { "epoch": 0.98, "grad_norm": 8.770546468240306, "learning_rate": 9.479438175200029e-09, "loss": 0.2655, "step": 34255 }, { "epoch": 0.98, "grad_norm": 3.87957760935217, "learning_rate": 9.450916142519917e-09, "loss": 0.225, "step": 34256 }, { "epoch": 0.98, "grad_norm": 8.5853377072848, "learning_rate": 9.422437042918564e-09, "loss": 0.525, "step": 34257 }, { "epoch": 0.98, "grad_norm": 5.522835209016871, "learning_rate": 9.394000876641885e-09, "loss": 0.3288, "step": 34258 }, { "epoch": 0.98, "grad_norm": 6.674151744593254, "learning_rate": 9.365607643934127e-09, "loss": 0.6418, "step": 34259 }, { "epoch": 0.98, "grad_norm": 4.888119137858866, "learning_rate": 9.337257345040096e-09, "loss": 0.6136, "step": 34260 }, { "epoch": 0.98, "grad_norm": 4.60507661219352, "learning_rate": 9.308949980202375e-09, "loss": 0.3777, "step": 34261 }, { "epoch": 0.98, "grad_norm": 4.069123986045302, "learning_rate": 9.280685549665768e-09, "loss": 0.3984, "step": 34262 }, { "epoch": 0.98, "grad_norm": 3.8301093427522868, "learning_rate": 9.252464053672861e-09, "loss": 0.3511, "step": 34263 }, { "epoch": 0.98, "grad_norm": 3.5483780755560406, "learning_rate": 9.224285492466235e-09, "loss": 0.3492, "step": 34264 }, { "epoch": 0.98, "grad_norm": 7.340258510984961, "learning_rate": 9.196149866289028e-09, "loss": 0.4532, "step": 34265 }, { "epoch": 0.98, "grad_norm": 7.710523507272063, "learning_rate": 9.168057175382162e-09, "loss": 0.4988, "step": 34266 }, { "epoch": 0.98, "grad_norm": 4.333964625859831, "learning_rate": 9.140007419988772e-09, "loss": 0.4295, "step": 34267 }, { "epoch": 0.98, "grad_norm": 2.089937995853318, "learning_rate": 9.112000600349225e-09, "loss": 0.3007, "step": 34268 }, { "epoch": 0.98, "grad_norm": 24.781921089004243, "learning_rate": 9.084036716704437e-09, "loss": 0.6667, "step": 34269 }, { "epoch": 0.98, "grad_norm": 5.4190087561331595, "learning_rate": 9.056115769295326e-09, "loss": 0.3346, "step": 34270 }, { "epoch": 0.98, "grad_norm": 5.2434675811263025, "learning_rate": 9.028237758361703e-09, "loss": 0.462, "step": 34271 }, { "epoch": 0.98, "grad_norm": 5.06125656210412, "learning_rate": 9.000402684143928e-09, "loss": 0.3785, "step": 34272 }, { "epoch": 0.98, "grad_norm": 6.849304140206333, "learning_rate": 8.972610546881256e-09, "loss": 0.6064, "step": 34273 }, { "epoch": 0.98, "grad_norm": 6.15466520562532, "learning_rate": 8.944861346812383e-09, "loss": 0.476, "step": 34274 }, { "epoch": 0.98, "grad_norm": 6.847289889991912, "learning_rate": 8.917155084176565e-09, "loss": 0.5174, "step": 34275 }, { "epoch": 0.98, "grad_norm": 3.030318505421955, "learning_rate": 8.889491759211943e-09, "loss": 0.1947, "step": 34276 }, { "epoch": 0.98, "grad_norm": 4.225973148118017, "learning_rate": 8.861871372156661e-09, "loss": 0.5157, "step": 34277 }, { "epoch": 0.98, "grad_norm": 5.109487036547, "learning_rate": 8.83429392324775e-09, "loss": 0.3926, "step": 34278 }, { "epoch": 0.98, "grad_norm": 6.317512788023722, "learning_rate": 8.806759412723353e-09, "loss": 0.206, "step": 34279 }, { "epoch": 0.98, "grad_norm": 3.8150440545494457, "learning_rate": 8.779267840819394e-09, "loss": 0.1241, "step": 34280 }, { "epoch": 0.98, "grad_norm": 6.1585519040370205, "learning_rate": 8.75181920777346e-09, "loss": 0.5292, "step": 34281 }, { "epoch": 0.98, "grad_norm": 6.2126454626050975, "learning_rate": 8.724413513820362e-09, "loss": 0.5581, "step": 34282 }, { "epoch": 0.98, "grad_norm": 4.998935657706013, "learning_rate": 8.697050759197134e-09, "loss": 0.3248, "step": 34283 }, { "epoch": 0.98, "grad_norm": 4.571434000769865, "learning_rate": 8.669730944138589e-09, "loss": 0.301, "step": 34284 }, { "epoch": 0.98, "grad_norm": 4.84565767438331, "learning_rate": 8.642454068879536e-09, "loss": 0.3693, "step": 34285 }, { "epoch": 0.98, "grad_norm": 5.1097595816714225, "learning_rate": 8.61522013365479e-09, "loss": 0.3878, "step": 34286 }, { "epoch": 0.98, "grad_norm": 3.153731653928975, "learning_rate": 8.588029138699162e-09, "loss": 0.5325, "step": 34287 }, { "epoch": 0.98, "grad_norm": 7.942304763963927, "learning_rate": 8.560881084245797e-09, "loss": 0.2371, "step": 34288 }, { "epoch": 0.98, "grad_norm": 8.93765504075669, "learning_rate": 8.533775970528956e-09, "loss": 0.4117, "step": 34289 }, { "epoch": 0.98, "grad_norm": 4.584206093430962, "learning_rate": 8.50671379778123e-09, "loss": 0.4608, "step": 34290 }, { "epoch": 0.98, "grad_norm": 6.45626030742934, "learning_rate": 8.479694566235764e-09, "loss": 0.4383, "step": 34291 }, { "epoch": 0.98, "grad_norm": 4.041862093077722, "learning_rate": 8.45271827612515e-09, "loss": 0.2075, "step": 34292 }, { "epoch": 0.98, "grad_norm": 8.153635153960929, "learning_rate": 8.42578492768087e-09, "loss": 0.5849, "step": 34293 }, { "epoch": 0.98, "grad_norm": 8.535082392951328, "learning_rate": 8.39889452113496e-09, "loss": 0.7851, "step": 34294 }, { "epoch": 0.98, "grad_norm": 6.220721962509499, "learning_rate": 8.372047056718902e-09, "loss": 0.61, "step": 34295 }, { "epoch": 0.98, "grad_norm": 1.410480200523584, "learning_rate": 8.345242534663623e-09, "loss": 0.0836, "step": 34296 }, { "epoch": 0.98, "grad_norm": 4.346496770160053, "learning_rate": 8.318480955199493e-09, "loss": 0.4066, "step": 34297 }, { "epoch": 0.98, "grad_norm": 6.049524559696941, "learning_rate": 8.291762318556885e-09, "loss": 0.2626, "step": 34298 }, { "epoch": 0.98, "grad_norm": 8.777076027848501, "learning_rate": 8.265086624966168e-09, "loss": 0.3586, "step": 34299 }, { "epoch": 0.98, "grad_norm": 2.479241399413325, "learning_rate": 8.238453874655494e-09, "loss": 0.2807, "step": 34300 }, { "epoch": 0.98, "grad_norm": 2.9376287229214157, "learning_rate": 8.211864067855236e-09, "loss": 0.3132, "step": 34301 }, { "epoch": 0.98, "grad_norm": 7.627833605947336, "learning_rate": 8.185317204793542e-09, "loss": 0.5825, "step": 34302 }, { "epoch": 0.98, "grad_norm": 4.921370198838728, "learning_rate": 8.158813285699118e-09, "loss": 0.4239, "step": 34303 }, { "epoch": 0.98, "grad_norm": 3.2492811801908594, "learning_rate": 8.132352310799562e-09, "loss": 0.2908, "step": 34304 }, { "epoch": 0.98, "grad_norm": 3.5432543150565254, "learning_rate": 8.105934280323025e-09, "loss": 0.3861, "step": 34305 }, { "epoch": 0.98, "grad_norm": 3.3558450827540107, "learning_rate": 8.079559194496544e-09, "loss": 0.3353, "step": 34306 }, { "epoch": 0.98, "grad_norm": 6.3187828146123, "learning_rate": 8.053227053546608e-09, "loss": 0.2749, "step": 34307 }, { "epoch": 0.98, "grad_norm": 5.488996856844227, "learning_rate": 8.026937857700812e-09, "loss": 0.2152, "step": 34308 }, { "epoch": 0.98, "grad_norm": 4.577438719676251, "learning_rate": 8.000691607183975e-09, "loss": 0.2627, "step": 34309 }, { "epoch": 0.98, "grad_norm": 8.549185766522319, "learning_rate": 7.974488302223138e-09, "loss": 1.0562, "step": 34310 }, { "epoch": 0.98, "grad_norm": 11.19163841552653, "learning_rate": 7.948327943042566e-09, "loss": 0.7768, "step": 34311 }, { "epoch": 0.98, "grad_norm": 13.186963255747974, "learning_rate": 7.92221052986819e-09, "loss": 0.4139, "step": 34312 }, { "epoch": 0.98, "grad_norm": 5.1523621206700785, "learning_rate": 7.896136062924275e-09, "loss": 0.2873, "step": 34313 }, { "epoch": 0.98, "grad_norm": 5.87796359694904, "learning_rate": 7.870104542435641e-09, "loss": 0.5425, "step": 34314 }, { "epoch": 0.98, "grad_norm": 5.73321729402142, "learning_rate": 7.844115968625443e-09, "loss": 0.4913, "step": 34315 }, { "epoch": 0.98, "grad_norm": 6.201543149872279, "learning_rate": 7.81817034171739e-09, "loss": 0.4163, "step": 34316 }, { "epoch": 0.98, "grad_norm": 5.827583717389636, "learning_rate": 7.792267661935748e-09, "loss": 0.2231, "step": 34317 }, { "epoch": 0.98, "grad_norm": 5.084788859287106, "learning_rate": 7.766407929502006e-09, "loss": 0.4782, "step": 34318 }, { "epoch": 0.98, "grad_norm": 2.9357421972454847, "learning_rate": 7.74059114463932e-09, "loss": 0.1891, "step": 34319 }, { "epoch": 0.98, "grad_norm": 6.923597788779314, "learning_rate": 7.714817307569733e-09, "loss": 0.3326, "step": 34320 }, { "epoch": 0.98, "grad_norm": 4.783204121899141, "learning_rate": 7.689086418514735e-09, "loss": 0.1936, "step": 34321 }, { "epoch": 0.98, "grad_norm": 4.23469750559201, "learning_rate": 7.66339847769637e-09, "loss": 0.1892, "step": 34322 }, { "epoch": 0.98, "grad_norm": 6.007447071643954, "learning_rate": 7.637753485334464e-09, "loss": 0.3371, "step": 34323 }, { "epoch": 0.98, "grad_norm": 5.089663920296086, "learning_rate": 7.612151441650507e-09, "loss": 0.7301, "step": 34324 }, { "epoch": 0.98, "grad_norm": 7.842675610401113, "learning_rate": 7.586592346864873e-09, "loss": 0.4492, "step": 34325 }, { "epoch": 0.98, "grad_norm": 4.590341133601263, "learning_rate": 7.561076201196838e-09, "loss": 0.4782, "step": 34326 }, { "epoch": 0.98, "grad_norm": 6.381168410539005, "learning_rate": 7.53560300486622e-09, "loss": 0.4767, "step": 34327 }, { "epoch": 0.98, "grad_norm": 4.11813479678398, "learning_rate": 7.510172758091738e-09, "loss": 0.3941, "step": 34328 }, { "epoch": 0.98, "grad_norm": 4.059475154096373, "learning_rate": 7.484785461093214e-09, "loss": 0.2783, "step": 34329 }, { "epoch": 0.98, "grad_norm": 7.309634176929589, "learning_rate": 7.459441114087696e-09, "loss": 0.2634, "step": 34330 }, { "epoch": 0.98, "grad_norm": 4.529116924308695, "learning_rate": 7.434139717294453e-09, "loss": 0.2244, "step": 34331 }, { "epoch": 0.98, "grad_norm": 5.3232894388697005, "learning_rate": 7.408881270930535e-09, "loss": 0.6779, "step": 34332 }, { "epoch": 0.98, "grad_norm": 6.92313189321686, "learning_rate": 7.383665775212989e-09, "loss": 0.5383, "step": 34333 }, { "epoch": 0.98, "grad_norm": 8.349595954393454, "learning_rate": 7.35849323035942e-09, "loss": 0.6593, "step": 34334 }, { "epoch": 0.98, "grad_norm": 2.8492598969710303, "learning_rate": 7.333363636585766e-09, "loss": 0.0905, "step": 34335 }, { "epoch": 0.98, "grad_norm": 4.978841885411503, "learning_rate": 7.308276994108521e-09, "loss": 0.3317, "step": 34336 }, { "epoch": 0.98, "grad_norm": 2.9477424615040997, "learning_rate": 7.283233303143622e-09, "loss": 0.3221, "step": 34337 }, { "epoch": 0.98, "grad_norm": 5.050738388316298, "learning_rate": 7.258232563906453e-09, "loss": 0.2088, "step": 34338 }, { "epoch": 0.98, "grad_norm": 4.358451007010572, "learning_rate": 7.233274776611288e-09, "loss": 0.4805, "step": 34339 }, { "epoch": 0.98, "grad_norm": 6.470341417569962, "learning_rate": 7.208359941474064e-09, "loss": 0.7152, "step": 34340 }, { "epoch": 0.98, "grad_norm": 5.061195440923937, "learning_rate": 7.1834880587084985e-09, "loss": 0.5178, "step": 34341 }, { "epoch": 0.98, "grad_norm": 6.260907459115562, "learning_rate": 7.1586591285288665e-09, "loss": 0.6167, "step": 34342 }, { "epoch": 0.98, "grad_norm": 5.341927178448162, "learning_rate": 7.13387315114833e-09, "loss": 0.375, "step": 34343 }, { "epoch": 0.98, "grad_norm": 5.180906241578209, "learning_rate": 7.10913012678005e-09, "loss": 0.4125, "step": 34344 }, { "epoch": 0.98, "grad_norm": 4.974512537360796, "learning_rate": 7.084430055637748e-09, "loss": 0.1362, "step": 34345 }, { "epoch": 0.98, "grad_norm": 3.7994001492325125, "learning_rate": 7.0597729379329184e-09, "loss": 0.3993, "step": 34346 }, { "epoch": 0.98, "grad_norm": 7.181831429949965, "learning_rate": 7.035158773878726e-09, "loss": 0.32, "step": 34347 }, { "epoch": 0.98, "grad_norm": 4.44834957055999, "learning_rate": 7.010587563685556e-09, "loss": 0.3362, "step": 34348 }, { "epoch": 0.98, "grad_norm": 3.4931891026931137, "learning_rate": 6.986059307566018e-09, "loss": 0.2183, "step": 34349 }, { "epoch": 0.98, "grad_norm": 10.280336959786112, "learning_rate": 6.9615740057304984e-09, "loss": 0.91, "step": 34350 }, { "epoch": 0.98, "grad_norm": 8.182781111762825, "learning_rate": 6.937131658389939e-09, "loss": 0.1484, "step": 34351 }, { "epoch": 0.98, "grad_norm": 7.434813070198467, "learning_rate": 6.912732265754174e-09, "loss": 0.5017, "step": 34352 }, { "epoch": 0.98, "grad_norm": 9.269894984616485, "learning_rate": 6.888375828033589e-09, "loss": 0.4024, "step": 34353 }, { "epoch": 0.98, "grad_norm": 4.816141385780124, "learning_rate": 6.8640623454380165e-09, "loss": 0.4054, "step": 34354 }, { "epoch": 0.98, "grad_norm": 4.392039821100284, "learning_rate": 6.839791818175067e-09, "loss": 0.4129, "step": 34355 }, { "epoch": 0.98, "grad_norm": 4.819087832788547, "learning_rate": 6.815564246455686e-09, "loss": 0.4375, "step": 34356 }, { "epoch": 0.98, "grad_norm": 3.0609679087695447, "learning_rate": 6.791379630486372e-09, "loss": 0.152, "step": 34357 }, { "epoch": 0.98, "grad_norm": 3.6811269872348444, "learning_rate": 6.767237970476404e-09, "loss": 0.5174, "step": 34358 }, { "epoch": 0.98, "grad_norm": 6.990826351725994, "learning_rate": 6.743139266633392e-09, "loss": 0.3499, "step": 34359 }, { "epoch": 0.98, "grad_norm": 5.53804667510549, "learning_rate": 6.719083519164393e-09, "loss": 0.4882, "step": 34360 }, { "epoch": 0.98, "grad_norm": 5.771467024878744, "learning_rate": 6.695070728275909e-09, "loss": 0.4848, "step": 34361 }, { "epoch": 0.98, "grad_norm": 10.230100155337308, "learning_rate": 6.671100894174998e-09, "loss": 0.5995, "step": 34362 }, { "epoch": 0.98, "grad_norm": 9.87714546171736, "learning_rate": 6.647174017067604e-09, "loss": 0.4166, "step": 34363 }, { "epoch": 0.98, "grad_norm": 4.9897532371098805, "learning_rate": 6.623290097160228e-09, "loss": 0.4345, "step": 34364 }, { "epoch": 0.98, "grad_norm": 8.393760393694409, "learning_rate": 6.59944913465771e-09, "loss": 0.5834, "step": 34365 }, { "epoch": 0.98, "grad_norm": 9.845886386163922, "learning_rate": 6.575651129765437e-09, "loss": 0.5134, "step": 34366 }, { "epoch": 0.98, "grad_norm": 4.598267416574337, "learning_rate": 6.551896082688247e-09, "loss": 0.6589, "step": 34367 }, { "epoch": 0.98, "grad_norm": 4.869275155104459, "learning_rate": 6.528183993629866e-09, "loss": 0.2906, "step": 34368 }, { "epoch": 0.98, "grad_norm": 11.000563953855114, "learning_rate": 6.504514862794575e-09, "loss": 0.6104, "step": 34369 }, { "epoch": 0.98, "grad_norm": 5.669975782473884, "learning_rate": 6.4808886903861e-09, "loss": 0.43, "step": 34370 }, { "epoch": 0.98, "grad_norm": 3.7990892021468965, "learning_rate": 6.457305476608167e-09, "loss": 0.5017, "step": 34371 }, { "epoch": 0.98, "grad_norm": 4.772667231120161, "learning_rate": 6.433765221663391e-09, "loss": 0.4735, "step": 34372 }, { "epoch": 0.98, "grad_norm": 4.233651187307885, "learning_rate": 6.410267925753278e-09, "loss": 0.254, "step": 34373 }, { "epoch": 0.98, "grad_norm": 3.0937662413199694, "learning_rate": 6.386813589081553e-09, "loss": 0.2737, "step": 34374 }, { "epoch": 0.98, "grad_norm": 3.838839208422826, "learning_rate": 6.3634022118486125e-09, "loss": 0.5573, "step": 34375 }, { "epoch": 0.98, "grad_norm": 9.918197743377856, "learning_rate": 6.340033794257072e-09, "loss": 0.697, "step": 34376 }, { "epoch": 0.98, "grad_norm": 4.305817637448465, "learning_rate": 6.316708336507327e-09, "loss": 0.3238, "step": 34377 }, { "epoch": 0.98, "grad_norm": 6.483439771012241, "learning_rate": 6.293425838799772e-09, "loss": 0.4173, "step": 34378 }, { "epoch": 0.98, "grad_norm": 4.1337842465883385, "learning_rate": 6.270186301335357e-09, "loss": 0.4224, "step": 34379 }, { "epoch": 0.98, "grad_norm": 4.366348622778424, "learning_rate": 6.246989724313368e-09, "loss": 0.4137, "step": 34380 }, { "epoch": 0.98, "grad_norm": 5.88316935841157, "learning_rate": 6.2238361079336455e-09, "loss": 0.3426, "step": 34381 }, { "epoch": 0.98, "grad_norm": 7.24233583958871, "learning_rate": 6.200725452395473e-09, "loss": 0.3881, "step": 34382 }, { "epoch": 0.98, "grad_norm": 4.5143484667405325, "learning_rate": 6.1776577578981364e-09, "loss": 0.4174, "step": 34383 }, { "epoch": 0.98, "grad_norm": 9.053948886556002, "learning_rate": 6.154633024638701e-09, "loss": 0.4658, "step": 34384 }, { "epoch": 0.98, "grad_norm": 5.791843461921984, "learning_rate": 6.131651252817006e-09, "loss": 0.3556, "step": 34385 }, { "epoch": 0.98, "grad_norm": 3.628818802452321, "learning_rate": 6.108712442629006e-09, "loss": 0.3367, "step": 34386 }, { "epoch": 0.98, "grad_norm": 9.104567972121707, "learning_rate": 6.08581659427343e-09, "loss": 0.2008, "step": 34387 }, { "epoch": 0.98, "grad_norm": 4.0462469254259, "learning_rate": 6.06296370794679e-09, "loss": 0.3043, "step": 34388 }, { "epoch": 0.98, "grad_norm": 7.2996011089401245, "learning_rate": 6.040153783845038e-09, "loss": 0.4671, "step": 34389 }, { "epoch": 0.98, "grad_norm": 10.627777634114555, "learning_rate": 6.017386822165239e-09, "loss": 0.3747, "step": 34390 }, { "epoch": 0.98, "grad_norm": 3.3646047471411196, "learning_rate": 5.994662823103347e-09, "loss": 0.0765, "step": 34391 }, { "epoch": 0.98, "grad_norm": 5.44554166161111, "learning_rate": 5.971981786854209e-09, "loss": 0.2656, "step": 34392 }, { "epoch": 0.98, "grad_norm": 5.797838444604181, "learning_rate": 5.94934371361322e-09, "loss": 0.263, "step": 34393 }, { "epoch": 0.98, "grad_norm": 6.512071623963631, "learning_rate": 5.926748603574673e-09, "loss": 0.5445, "step": 34394 }, { "epoch": 0.98, "grad_norm": 4.351292682275082, "learning_rate": 5.904196456933964e-09, "loss": 0.451, "step": 34395 }, { "epoch": 0.99, "grad_norm": 4.60285071587171, "learning_rate": 5.881687273884828e-09, "loss": 0.2186, "step": 34396 }, { "epoch": 0.99, "grad_norm": 4.454056411510972, "learning_rate": 5.8592210546198905e-09, "loss": 0.4167, "step": 34397 }, { "epoch": 0.99, "grad_norm": 6.009991790016901, "learning_rate": 5.836797799333993e-09, "loss": 0.471, "step": 34398 }, { "epoch": 0.99, "grad_norm": 3.2016739419556277, "learning_rate": 5.8144175082186506e-09, "loss": 0.1789, "step": 34399 }, { "epoch": 0.99, "grad_norm": 2.239988697685525, "learning_rate": 5.792080181467041e-09, "loss": 0.0858, "step": 34400 }, { "epoch": 0.99, "grad_norm": 5.907604486071743, "learning_rate": 5.769785819271234e-09, "loss": 0.3822, "step": 34401 }, { "epoch": 0.99, "grad_norm": 9.276921916807822, "learning_rate": 5.747534421823298e-09, "loss": 0.4687, "step": 34402 }, { "epoch": 0.99, "grad_norm": 7.01043590828313, "learning_rate": 5.72532598931419e-09, "loss": 0.6057, "step": 34403 }, { "epoch": 0.99, "grad_norm": 4.954483712299218, "learning_rate": 5.703160521935425e-09, "loss": 0.6159, "step": 34404 }, { "epoch": 0.99, "grad_norm": 5.674072848637952, "learning_rate": 5.681038019877405e-09, "loss": 0.6241, "step": 34405 }, { "epoch": 0.99, "grad_norm": 6.808324111440414, "learning_rate": 5.658958483330534e-09, "loss": 0.28, "step": 34406 }, { "epoch": 0.99, "grad_norm": 5.201196674054309, "learning_rate": 5.636921912484661e-09, "loss": 0.509, "step": 34407 }, { "epoch": 0.99, "grad_norm": 7.122130368229414, "learning_rate": 5.614928307529077e-09, "loss": 0.35, "step": 34408 }, { "epoch": 0.99, "grad_norm": 5.626313162217948, "learning_rate": 5.592977668653632e-09, "loss": 0.6731, "step": 34409 }, { "epoch": 0.99, "grad_norm": 6.303833139004813, "learning_rate": 5.5710699960470625e-09, "loss": 0.4398, "step": 34410 }, { "epoch": 0.99, "grad_norm": 6.071721329562946, "learning_rate": 5.549205289897552e-09, "loss": 0.4876, "step": 34411 }, { "epoch": 0.99, "grad_norm": 6.375459392675273, "learning_rate": 5.527383550393284e-09, "loss": 0.6314, "step": 34412 }, { "epoch": 0.99, "grad_norm": 4.188941465491101, "learning_rate": 5.505604777721885e-09, "loss": 0.3745, "step": 34413 }, { "epoch": 0.99, "grad_norm": 7.505127790990864, "learning_rate": 5.483868972070427e-09, "loss": 0.9317, "step": 34414 }, { "epoch": 0.99, "grad_norm": 5.823931701668405, "learning_rate": 5.462176133627095e-09, "loss": 0.4729, "step": 34415 }, { "epoch": 0.99, "grad_norm": 8.581676644251358, "learning_rate": 5.4405262625772945e-09, "loss": 0.6098, "step": 34416 }, { "epoch": 0.99, "grad_norm": 4.6298357195371125, "learning_rate": 5.418919359107544e-09, "loss": 0.4403, "step": 34417 }, { "epoch": 0.99, "grad_norm": 6.07812713779912, "learning_rate": 5.39735542340436e-09, "loss": 0.6833, "step": 34418 }, { "epoch": 0.99, "grad_norm": 3.978155330473412, "learning_rate": 5.375834455652596e-09, "loss": 0.3764, "step": 34419 }, { "epoch": 0.99, "grad_norm": 5.64830135112783, "learning_rate": 5.354356456037102e-09, "loss": 0.8508, "step": 34420 }, { "epoch": 0.99, "grad_norm": 6.204211392144297, "learning_rate": 5.332921424743842e-09, "loss": 0.3592, "step": 34421 }, { "epoch": 0.99, "grad_norm": 7.782722395118952, "learning_rate": 5.311529361956003e-09, "loss": 0.4155, "step": 34422 }, { "epoch": 0.99, "grad_norm": 7.448076885785338, "learning_rate": 5.290180267858436e-09, "loss": 0.6751, "step": 34423 }, { "epoch": 0.99, "grad_norm": 8.97877070239077, "learning_rate": 5.268874142634328e-09, "loss": 0.3179, "step": 34424 }, { "epoch": 0.99, "grad_norm": 8.440408558561428, "learning_rate": 5.247610986466867e-09, "loss": 0.2204, "step": 34425 }, { "epoch": 0.99, "grad_norm": 7.775715280485065, "learning_rate": 5.2263907995397935e-09, "loss": 0.6271, "step": 34426 }, { "epoch": 0.99, "grad_norm": 6.788166087926862, "learning_rate": 5.205213582034629e-09, "loss": 0.4291, "step": 34427 }, { "epoch": 0.99, "grad_norm": 5.304750051677382, "learning_rate": 5.184079334134562e-09, "loss": 0.3266, "step": 34428 }, { "epoch": 0.99, "grad_norm": 6.277201168432645, "learning_rate": 5.162988056020557e-09, "loss": 0.4817, "step": 34429 }, { "epoch": 0.99, "grad_norm": 6.069683351647385, "learning_rate": 5.141939747874136e-09, "loss": 0.279, "step": 34430 }, { "epoch": 0.99, "grad_norm": 3.786676831699102, "learning_rate": 5.120934409876821e-09, "loss": 0.2844, "step": 34431 }, { "epoch": 0.99, "grad_norm": 5.4587646221771315, "learning_rate": 5.099972042209023e-09, "loss": 0.5206, "step": 34432 }, { "epoch": 0.99, "grad_norm": 4.8003205838038205, "learning_rate": 5.079052645051708e-09, "loss": 0.312, "step": 34433 }, { "epoch": 0.99, "grad_norm": 8.209373695422803, "learning_rate": 5.058176218583621e-09, "loss": 0.3368, "step": 34434 }, { "epoch": 0.99, "grad_norm": 5.612581850572264, "learning_rate": 5.0373427629851755e-09, "loss": 0.4834, "step": 34435 }, { "epoch": 0.99, "grad_norm": 6.2401405769187805, "learning_rate": 5.016552278436226e-09, "loss": 0.4018, "step": 34436 }, { "epoch": 0.99, "grad_norm": 9.459284164652937, "learning_rate": 4.995804765113854e-09, "loss": 0.5291, "step": 34437 }, { "epoch": 0.99, "grad_norm": 3.9780984246863866, "learning_rate": 4.975100223197915e-09, "loss": 0.2409, "step": 34438 }, { "epoch": 0.99, "grad_norm": 4.743961385809402, "learning_rate": 4.9544386528666e-09, "loss": 0.2819, "step": 34439 }, { "epoch": 0.99, "grad_norm": 2.995889092210767, "learning_rate": 4.933820054296989e-09, "loss": 0.3961, "step": 34440 }, { "epoch": 0.99, "grad_norm": 4.564406793413897, "learning_rate": 4.913244427666719e-09, "loss": 0.2645, "step": 34441 }, { "epoch": 0.99, "grad_norm": 10.404661773361111, "learning_rate": 4.892711773152869e-09, "loss": 0.1858, "step": 34442 }, { "epoch": 0.99, "grad_norm": 5.295790727305408, "learning_rate": 4.872222090931411e-09, "loss": 0.2258, "step": 34443 }, { "epoch": 0.99, "grad_norm": 8.541520737742806, "learning_rate": 4.85177538117998e-09, "loss": 0.5081, "step": 34444 }, { "epoch": 0.99, "grad_norm": 3.8517437132014494, "learning_rate": 4.831371644073435e-09, "loss": 0.2105, "step": 34445 }, { "epoch": 0.99, "grad_norm": 6.258285676018723, "learning_rate": 4.811010879787193e-09, "loss": 0.4156, "step": 34446 }, { "epoch": 0.99, "grad_norm": 3.537314246325204, "learning_rate": 4.7906930884972226e-09, "loss": 0.1821, "step": 34447 }, { "epoch": 0.99, "grad_norm": 3.986825137295247, "learning_rate": 4.77041827037783e-09, "loss": 0.626, "step": 34448 }, { "epoch": 0.99, "grad_norm": 4.110687452401282, "learning_rate": 4.7501864256038756e-09, "loss": 0.3858, "step": 34449 }, { "epoch": 0.99, "grad_norm": 4.986864239702235, "learning_rate": 4.729997554348553e-09, "loss": 0.2158, "step": 34450 }, { "epoch": 0.99, "grad_norm": 3.915860370402745, "learning_rate": 4.7098516567861686e-09, "loss": 0.2367, "step": 34451 }, { "epoch": 0.99, "grad_norm": 7.2437036731676905, "learning_rate": 4.6897487330899164e-09, "loss": 0.36, "step": 34452 }, { "epoch": 0.99, "grad_norm": 6.693663849262876, "learning_rate": 4.669688783432436e-09, "loss": 0.5915, "step": 34453 }, { "epoch": 0.99, "grad_norm": 3.1531292056158655, "learning_rate": 4.649671807986922e-09, "loss": 0.3405, "step": 34454 }, { "epoch": 0.99, "grad_norm": 13.672608518269206, "learning_rate": 4.629697806924904e-09, "loss": 0.5051, "step": 34455 }, { "epoch": 0.99, "grad_norm": 8.890192822509643, "learning_rate": 4.6097667804190224e-09, "loss": 0.4009, "step": 34456 }, { "epoch": 0.99, "grad_norm": 14.0700749997307, "learning_rate": 4.589878728639696e-09, "loss": 0.5013, "step": 34457 }, { "epoch": 0.99, "grad_norm": 4.994857981231328, "learning_rate": 4.570033651759009e-09, "loss": 0.1906, "step": 34458 }, { "epoch": 0.99, "grad_norm": 5.773552569531828, "learning_rate": 4.550231549947382e-09, "loss": 0.2836, "step": 34459 }, { "epoch": 0.99, "grad_norm": 2.4052244948852914, "learning_rate": 4.5304724233746765e-09, "loss": 0.1591, "step": 34460 }, { "epoch": 0.99, "grad_norm": 5.904021190752596, "learning_rate": 4.510756272211314e-09, "loss": 0.5989, "step": 34461 }, { "epoch": 0.99, "grad_norm": 10.167915382201235, "learning_rate": 4.491083096627158e-09, "loss": 0.5837, "step": 34462 }, { "epoch": 0.99, "grad_norm": 5.835903203370661, "learning_rate": 4.471452896790962e-09, "loss": 0.7572, "step": 34463 }, { "epoch": 0.99, "grad_norm": 4.556222856438137, "learning_rate": 4.45186567287148e-09, "loss": 0.299, "step": 34464 }, { "epoch": 0.99, "grad_norm": 3.087590569160055, "learning_rate": 4.432321425038022e-09, "loss": 0.2217, "step": 34465 }, { "epoch": 0.99, "grad_norm": 4.667443894762683, "learning_rate": 4.4128201534582305e-09, "loss": 0.2657, "step": 34466 }, { "epoch": 0.99, "grad_norm": 4.2537612820278685, "learning_rate": 4.3933618582991945e-09, "loss": 0.6335, "step": 34467 }, { "epoch": 0.99, "grad_norm": 6.756689006929706, "learning_rate": 4.373946539729668e-09, "loss": 0.4752, "step": 34468 }, { "epoch": 0.99, "grad_norm": 5.022147907468694, "learning_rate": 4.354574197915629e-09, "loss": 0.3649, "step": 34469 }, { "epoch": 0.99, "grad_norm": 7.603614585030624, "learning_rate": 4.335244833024721e-09, "loss": 0.3873, "step": 34470 }, { "epoch": 0.99, "grad_norm": 5.589964774025903, "learning_rate": 4.315958445221813e-09, "loss": 0.5197, "step": 34471 }, { "epoch": 0.99, "grad_norm": 6.8731272140711654, "learning_rate": 4.296715034673993e-09, "loss": 0.4488, "step": 34472 }, { "epoch": 0.99, "grad_norm": 3.5774176618909417, "learning_rate": 4.277514601546684e-09, "loss": 0.2215, "step": 34473 }, { "epoch": 0.99, "grad_norm": 3.354017412328135, "learning_rate": 4.2583571460047545e-09, "loss": 0.5349, "step": 34474 }, { "epoch": 0.99, "grad_norm": 4.4592434527556595, "learning_rate": 4.239242668213072e-09, "loss": 0.3368, "step": 34475 }, { "epoch": 0.99, "grad_norm": 6.598068378014385, "learning_rate": 4.220171168335396e-09, "loss": 0.3839, "step": 34476 }, { "epoch": 0.99, "grad_norm": 2.3944756157589944, "learning_rate": 4.201142646537149e-09, "loss": 0.1682, "step": 34477 }, { "epoch": 0.99, "grad_norm": 7.112294513569932, "learning_rate": 4.182157102981532e-09, "loss": 0.351, "step": 34478 }, { "epoch": 0.99, "grad_norm": 9.466076892180213, "learning_rate": 4.163214537831195e-09, "loss": 0.4899, "step": 34479 }, { "epoch": 0.99, "grad_norm": 2.5646323891685863, "learning_rate": 4.144314951249895e-09, "loss": 0.1399, "step": 34480 }, { "epoch": 0.99, "grad_norm": 4.449781615009877, "learning_rate": 4.125458343400279e-09, "loss": 0.3999, "step": 34481 }, { "epoch": 0.99, "grad_norm": 4.385673310056033, "learning_rate": 4.106644714443886e-09, "loss": 0.4789, "step": 34482 }, { "epoch": 0.99, "grad_norm": 8.27033882360343, "learning_rate": 4.0878740645428074e-09, "loss": 0.3572, "step": 34483 }, { "epoch": 0.99, "grad_norm": 3.7584513480074953, "learning_rate": 4.0691463938585804e-09, "loss": 0.2514, "step": 34484 }, { "epoch": 0.99, "grad_norm": 3.1355981784804072, "learning_rate": 4.050461702552744e-09, "loss": 0.2354, "step": 34485 }, { "epoch": 0.99, "grad_norm": 4.3684024656187, "learning_rate": 4.031819990785168e-09, "loss": 0.4142, "step": 34486 }, { "epoch": 0.99, "grad_norm": 4.288385779075787, "learning_rate": 4.013221258717392e-09, "loss": 0.5004, "step": 34487 }, { "epoch": 0.99, "grad_norm": 7.013171270272409, "learning_rate": 3.994665506508177e-09, "loss": 0.5021, "step": 34488 }, { "epoch": 0.99, "grad_norm": 3.786377039705271, "learning_rate": 3.97615273431795e-09, "loss": 0.1173, "step": 34489 }, { "epoch": 0.99, "grad_norm": 2.9625750318916326, "learning_rate": 3.95768294230603e-09, "loss": 0.2918, "step": 34490 }, { "epoch": 0.99, "grad_norm": 2.059046961427139, "learning_rate": 3.93925613063062e-09, "loss": 0.3102, "step": 34491 }, { "epoch": 0.99, "grad_norm": 5.5765489120082705, "learning_rate": 3.920872299450485e-09, "loss": 0.3569, "step": 34492 }, { "epoch": 0.99, "grad_norm": 2.795406424840577, "learning_rate": 3.902531448924385e-09, "loss": 0.3505, "step": 34493 }, { "epoch": 0.99, "grad_norm": 5.991599759120467, "learning_rate": 3.884233579209418e-09, "loss": 0.5951, "step": 34494 }, { "epoch": 0.99, "grad_norm": 2.261499898333213, "learning_rate": 3.86597869046379e-09, "loss": 0.15, "step": 34495 }, { "epoch": 0.99, "grad_norm": 3.967499843950755, "learning_rate": 3.8477667828434875e-09, "loss": 0.3082, "step": 34496 }, { "epoch": 0.99, "grad_norm": 3.1859948400526297, "learning_rate": 3.829597856505607e-09, "loss": 0.2724, "step": 34497 }, { "epoch": 0.99, "grad_norm": 5.932666537860584, "learning_rate": 3.81147191160669e-09, "loss": 0.5068, "step": 34498 }, { "epoch": 0.99, "grad_norm": 7.747906432863475, "learning_rate": 3.793388948302723e-09, "loss": 0.6299, "step": 34499 }, { "epoch": 0.99, "grad_norm": 3.387816870392492, "learning_rate": 3.775348966749137e-09, "loss": 0.3771, "step": 34500 }, { "epoch": 0.99, "grad_norm": 4.3346126147474795, "learning_rate": 3.757351967100808e-09, "loss": 0.3679, "step": 34501 }, { "epoch": 0.99, "grad_norm": 6.272102945592286, "learning_rate": 3.7393979495126135e-09, "loss": 0.5042, "step": 34502 }, { "epoch": 0.99, "grad_norm": 6.791908396376117, "learning_rate": 3.721486914139427e-09, "loss": 0.5621, "step": 34503 }, { "epoch": 0.99, "grad_norm": 4.311165893521781, "learning_rate": 3.7036188611355717e-09, "loss": 0.4933, "step": 34504 }, { "epoch": 0.99, "grad_norm": 4.036170824537839, "learning_rate": 3.6857937906537023e-09, "loss": 0.3124, "step": 34505 }, { "epoch": 0.99, "grad_norm": 5.95011627580524, "learning_rate": 3.6680117028481398e-09, "loss": 0.2988, "step": 34506 }, { "epoch": 0.99, "grad_norm": 2.5366375890494703, "learning_rate": 3.6502725978715403e-09, "loss": 0.166, "step": 34507 }, { "epoch": 0.99, "grad_norm": 3.760564442775129, "learning_rate": 3.632576475876559e-09, "loss": 0.1194, "step": 34508 }, { "epoch": 0.99, "grad_norm": 9.785957739131634, "learning_rate": 3.6149233370147417e-09, "loss": 0.8145, "step": 34509 }, { "epoch": 0.99, "grad_norm": 10.064496381338369, "learning_rate": 3.597313181439299e-09, "loss": 0.5331, "step": 34510 }, { "epoch": 0.99, "grad_norm": 4.391677675951213, "learning_rate": 3.5797460093006663e-09, "loss": 0.2701, "step": 34511 }, { "epoch": 0.99, "grad_norm": 6.594411880032826, "learning_rate": 3.562221820749834e-09, "loss": 0.3453, "step": 34512 }, { "epoch": 0.99, "grad_norm": 5.2033512650514195, "learning_rate": 3.544740615938902e-09, "loss": 0.3092, "step": 34513 }, { "epoch": 0.99, "grad_norm": 3.8162460527796345, "learning_rate": 3.527302395016641e-09, "loss": 0.1182, "step": 34514 }, { "epoch": 0.99, "grad_norm": 7.8189834242753715, "learning_rate": 3.5099071581345957e-09, "loss": 0.4772, "step": 34515 }, { "epoch": 0.99, "grad_norm": 7.217812481396393, "learning_rate": 3.4925549054409813e-09, "loss": 0.1239, "step": 34516 }, { "epoch": 0.99, "grad_norm": 8.439337466753333, "learning_rate": 3.475245637085678e-09, "loss": 0.9491, "step": 34517 }, { "epoch": 0.99, "grad_norm": 3.299254476257604, "learning_rate": 3.4579793532180104e-09, "loss": 0.7328, "step": 34518 }, { "epoch": 0.99, "grad_norm": 3.6390358399347806, "learning_rate": 3.440756053986194e-09, "loss": 0.4884, "step": 34519 }, { "epoch": 0.99, "grad_norm": 12.940799955702111, "learning_rate": 3.4235757395384427e-09, "loss": 0.3676, "step": 34520 }, { "epoch": 0.99, "grad_norm": 2.5590500752274004, "learning_rate": 3.406438410022417e-09, "loss": 0.0888, "step": 34521 }, { "epoch": 0.99, "grad_norm": 4.433919886218465, "learning_rate": 3.3893440655852204e-09, "loss": 0.3807, "step": 34522 }, { "epoch": 0.99, "grad_norm": 4.20434722714291, "learning_rate": 3.3722927063745137e-09, "loss": 0.325, "step": 34523 }, { "epoch": 0.99, "grad_norm": 5.2991818831256765, "learning_rate": 3.3552843325368456e-09, "loss": 0.2003, "step": 34524 }, { "epoch": 0.99, "grad_norm": 9.876508090595513, "learning_rate": 3.3383189442182108e-09, "loss": 0.4094, "step": 34525 }, { "epoch": 0.99, "grad_norm": 6.605051560991183, "learning_rate": 3.3213965415651585e-09, "loss": 0.5504, "step": 34526 }, { "epoch": 0.99, "grad_norm": 3.946378386387389, "learning_rate": 3.3045171247225727e-09, "loss": 0.4619, "step": 34527 }, { "epoch": 0.99, "grad_norm": 6.5779883581182, "learning_rate": 3.287680693836448e-09, "loss": 0.4371, "step": 34528 }, { "epoch": 0.99, "grad_norm": 5.462551986132708, "learning_rate": 3.2708872490511134e-09, "loss": 0.3676, "step": 34529 }, { "epoch": 0.99, "grad_norm": 9.120308989719737, "learning_rate": 3.254136790510898e-09, "loss": 0.5449, "step": 34530 }, { "epoch": 0.99, "grad_norm": 10.701912314219543, "learning_rate": 3.23742931836013e-09, "loss": 0.9183, "step": 34531 }, { "epoch": 0.99, "grad_norm": 8.482205220617047, "learning_rate": 3.2207648327425845e-09, "loss": 0.5884, "step": 34532 }, { "epoch": 0.99, "grad_norm": 2.6982854176280973, "learning_rate": 3.2041433338014795e-09, "loss": 0.2604, "step": 34533 }, { "epoch": 0.99, "grad_norm": 6.31424647428776, "learning_rate": 3.1875648216800337e-09, "loss": 0.4331, "step": 34534 }, { "epoch": 0.99, "grad_norm": 4.01907099840449, "learning_rate": 3.171029296520911e-09, "loss": 0.2078, "step": 34535 }, { "epoch": 0.99, "grad_norm": 4.566615221448461, "learning_rate": 3.15453675846622e-09, "loss": 0.3067, "step": 34536 }, { "epoch": 0.99, "grad_norm": 8.007314497186192, "learning_rate": 3.1380872076575143e-09, "loss": 0.5208, "step": 34537 }, { "epoch": 0.99, "grad_norm": 3.693832133198133, "learning_rate": 3.1216806442369017e-09, "loss": 0.4252, "step": 34538 }, { "epoch": 0.99, "grad_norm": 3.2516237567518984, "learning_rate": 3.1053170683448262e-09, "loss": 0.2087, "step": 34539 }, { "epoch": 0.99, "grad_norm": 11.207950781683506, "learning_rate": 3.0889964801228413e-09, "loss": 0.4095, "step": 34540 }, { "epoch": 0.99, "grad_norm": 3.695088063172709, "learning_rate": 3.0727188797108343e-09, "loss": 0.174, "step": 34541 }, { "epoch": 0.99, "grad_norm": 7.017670806773861, "learning_rate": 3.056484267248694e-09, "loss": 0.289, "step": 34542 }, { "epoch": 0.99, "grad_norm": 4.582925252376037, "learning_rate": 3.0402926428768633e-09, "loss": 0.2731, "step": 34543 }, { "epoch": 0.99, "grad_norm": 6.049923938872099, "learning_rate": 3.02414400673412e-09, "loss": 0.6966, "step": 34544 }, { "epoch": 0.99, "grad_norm": 6.659511716200954, "learning_rate": 3.008038358959242e-09, "loss": 0.44, "step": 34545 }, { "epoch": 0.99, "grad_norm": 5.666099908011861, "learning_rate": 2.9919756996910076e-09, "loss": 0.2777, "step": 34546 }, { "epoch": 0.99, "grad_norm": 7.013239941513044, "learning_rate": 2.9759560290676393e-09, "loss": 0.4011, "step": 34547 }, { "epoch": 0.99, "grad_norm": 4.054911142403428, "learning_rate": 2.9599793472268048e-09, "loss": 0.2271, "step": 34548 }, { "epoch": 0.99, "grad_norm": 2.922089135386198, "learning_rate": 2.9440456543061714e-09, "loss": 0.1595, "step": 34549 }, { "epoch": 0.99, "grad_norm": 6.894515999839278, "learning_rate": 2.9281549504422968e-09, "loss": 0.5101, "step": 34550 }, { "epoch": 0.99, "grad_norm": 5.162228765128183, "learning_rate": 2.9123072357722935e-09, "loss": 0.5305, "step": 34551 }, { "epoch": 0.99, "grad_norm": 5.064473980008521, "learning_rate": 2.896502510432719e-09, "loss": 0.3721, "step": 34552 }, { "epoch": 0.99, "grad_norm": 6.217065918589985, "learning_rate": 2.8807407745595763e-09, "loss": 0.6538, "step": 34553 }, { "epoch": 0.99, "grad_norm": 5.750125303147062, "learning_rate": 2.8650220282877562e-09, "loss": 0.4981, "step": 34554 }, { "epoch": 0.99, "grad_norm": 4.140188805978734, "learning_rate": 2.8493462717527064e-09, "loss": 0.4762, "step": 34555 }, { "epoch": 0.99, "grad_norm": 5.932423439158799, "learning_rate": 2.8337135050893193e-09, "loss": 0.5375, "step": 34556 }, { "epoch": 0.99, "grad_norm": 5.271009448530586, "learning_rate": 2.818123728432487e-09, "loss": 0.3209, "step": 34557 }, { "epoch": 0.99, "grad_norm": 5.430208008331556, "learning_rate": 2.802576941916546e-09, "loss": 0.5082, "step": 34558 }, { "epoch": 0.99, "grad_norm": 3.0479985635270967, "learning_rate": 2.7870731456741686e-09, "loss": 0.4104, "step": 34559 }, { "epoch": 0.99, "grad_norm": 3.9765256868768133, "learning_rate": 2.771612339839691e-09, "loss": 0.3731, "step": 34560 }, { "epoch": 0.99, "grad_norm": 4.159720656823295, "learning_rate": 2.7561945245452305e-09, "loss": 0.1882, "step": 34561 }, { "epoch": 0.99, "grad_norm": 2.7560073953371527, "learning_rate": 2.7408196999245685e-09, "loss": 0.2533, "step": 34562 }, { "epoch": 0.99, "grad_norm": 16.494484528682406, "learning_rate": 2.7254878661092663e-09, "loss": 0.487, "step": 34563 }, { "epoch": 0.99, "grad_norm": 4.838221391221029, "learning_rate": 2.710199023230886e-09, "loss": 0.6553, "step": 34564 }, { "epoch": 0.99, "grad_norm": 8.13121552045873, "learning_rate": 2.6949531714215437e-09, "loss": 0.4931, "step": 34565 }, { "epoch": 0.99, "grad_norm": 5.104450103778956, "learning_rate": 2.6797503108122458e-09, "loss": 0.3339, "step": 34566 }, { "epoch": 0.99, "grad_norm": 4.258237653031884, "learning_rate": 2.6645904415339987e-09, "loss": 0.4453, "step": 34567 }, { "epoch": 0.99, "grad_norm": 5.545719080014865, "learning_rate": 2.649473563716698e-09, "loss": 0.5366, "step": 34568 }, { "epoch": 0.99, "grad_norm": 4.059209694260692, "learning_rate": 2.6343996774907953e-09, "loss": 0.2231, "step": 34569 }, { "epoch": 0.99, "grad_norm": 7.440905568658051, "learning_rate": 2.6193687829861867e-09, "loss": 0.2783, "step": 34570 }, { "epoch": 0.99, "grad_norm": 5.4914894582881155, "learning_rate": 2.6043808803311033e-09, "loss": 0.4748, "step": 34571 }, { "epoch": 0.99, "grad_norm": 6.528651108274902, "learning_rate": 2.5894359696559956e-09, "loss": 0.5712, "step": 34572 }, { "epoch": 0.99, "grad_norm": 5.288857054558927, "learning_rate": 2.5745340510885398e-09, "loss": 0.6681, "step": 34573 }, { "epoch": 0.99, "grad_norm": 9.855589416466763, "learning_rate": 2.559675124756411e-09, "loss": 0.4762, "step": 34574 }, { "epoch": 0.99, "grad_norm": 12.867136174037642, "learning_rate": 2.544859190788951e-09, "loss": 0.8432, "step": 34575 }, { "epoch": 0.99, "grad_norm": 10.357566065668493, "learning_rate": 2.530086249312169e-09, "loss": 0.6022, "step": 34576 }, { "epoch": 0.99, "grad_norm": 4.4905826388745576, "learning_rate": 2.515356300454297e-09, "loss": 0.2886, "step": 34577 }, { "epoch": 0.99, "grad_norm": 5.374106732152223, "learning_rate": 2.5006693443407893e-09, "loss": 0.4632, "step": 34578 }, { "epoch": 0.99, "grad_norm": 6.406883059600681, "learning_rate": 2.486025381099322e-09, "loss": 0.5254, "step": 34579 }, { "epoch": 0.99, "grad_norm": 2.6380527653821715, "learning_rate": 2.4714244108547945e-09, "loss": 0.2008, "step": 34580 }, { "epoch": 0.99, "grad_norm": 5.817266417282357, "learning_rate": 2.4568664337337734e-09, "loss": 0.2879, "step": 34581 }, { "epoch": 0.99, "grad_norm": 7.057999856660776, "learning_rate": 2.4423514498606027e-09, "loss": 0.8417, "step": 34582 }, { "epoch": 0.99, "grad_norm": 3.5969290562734564, "learning_rate": 2.427879459360738e-09, "loss": 0.569, "step": 34583 }, { "epoch": 0.99, "grad_norm": 7.152140507708898, "learning_rate": 2.413450462358524e-09, "loss": 0.5621, "step": 34584 }, { "epoch": 0.99, "grad_norm": 3.4599649858356836, "learning_rate": 2.3990644589783064e-09, "loss": 0.3647, "step": 34585 }, { "epoch": 0.99, "grad_norm": 1.7891271113093594, "learning_rate": 2.384721449343319e-09, "loss": 0.0932, "step": 34586 }, { "epoch": 0.99, "grad_norm": 5.909412304434191, "learning_rate": 2.3704214335773524e-09, "loss": 0.5837, "step": 34587 }, { "epoch": 0.99, "grad_norm": 9.030883887238838, "learning_rate": 2.3561644118030857e-09, "loss": 0.667, "step": 34588 }, { "epoch": 0.99, "grad_norm": 4.670731114208498, "learning_rate": 2.341950384143754e-09, "loss": 0.4453, "step": 34589 }, { "epoch": 0.99, "grad_norm": 4.702574738918019, "learning_rate": 2.3277793507209266e-09, "loss": 0.2447, "step": 34590 }, { "epoch": 0.99, "grad_norm": 12.595127759212053, "learning_rate": 2.3136513116572833e-09, "loss": 0.5086, "step": 34591 }, { "epoch": 0.99, "grad_norm": 10.826003622350786, "learning_rate": 2.2995662670738385e-09, "loss": 1.1092, "step": 34592 }, { "epoch": 0.99, "grad_norm": 4.7952938891137, "learning_rate": 2.285524217091606e-09, "loss": 0.4246, "step": 34593 }, { "epoch": 0.99, "grad_norm": 6.081751704912115, "learning_rate": 2.2715251618321553e-09, "loss": 0.7988, "step": 34594 }, { "epoch": 0.99, "grad_norm": 4.286917728667734, "learning_rate": 2.257569101415391e-09, "loss": 0.2795, "step": 34595 }, { "epoch": 0.99, "grad_norm": 3.5097496274351085, "learning_rate": 2.243656035961217e-09, "loss": 0.3718, "step": 34596 }, { "epoch": 0.99, "grad_norm": 3.113742291667988, "learning_rate": 2.229785965589537e-09, "loss": 0.1813, "step": 34597 }, { "epoch": 0.99, "grad_norm": 4.995229209322453, "learning_rate": 2.2159588904197004e-09, "loss": 0.488, "step": 34598 }, { "epoch": 0.99, "grad_norm": 11.706787758536203, "learning_rate": 2.202174810570501e-09, "loss": 1.2836, "step": 34599 }, { "epoch": 0.99, "grad_norm": 13.916027823459567, "learning_rate": 2.1884337261607325e-09, "loss": 0.7233, "step": 34600 }, { "epoch": 0.99, "grad_norm": 7.80009030265376, "learning_rate": 2.1747356373086338e-09, "loss": 0.3512, "step": 34601 }, { "epoch": 0.99, "grad_norm": 5.583180079088639, "learning_rate": 2.1610805441318884e-09, "loss": 0.4301, "step": 34602 }, { "epoch": 0.99, "grad_norm": 5.238206423367972, "learning_rate": 2.147468446747625e-09, "loss": 0.6495, "step": 34603 }, { "epoch": 0.99, "grad_norm": 5.808644543998397, "learning_rate": 2.133899345274082e-09, "loss": 0.2533, "step": 34604 }, { "epoch": 0.99, "grad_norm": 6.147779650073227, "learning_rate": 2.1203732398267228e-09, "loss": 0.3352, "step": 34605 }, { "epoch": 0.99, "grad_norm": 6.976291903281255, "learning_rate": 2.1068901305226763e-09, "loss": 0.565, "step": 34606 }, { "epoch": 0.99, "grad_norm": 4.495157311670871, "learning_rate": 2.0934500174774053e-09, "loss": 0.3378, "step": 34607 }, { "epoch": 0.99, "grad_norm": 2.2292806062962, "learning_rate": 2.080052900806928e-09, "loss": 0.5789, "step": 34608 }, { "epoch": 0.99, "grad_norm": 7.884322960281542, "learning_rate": 2.066698780626708e-09, "loss": 0.5074, "step": 34609 }, { "epoch": 0.99, "grad_norm": 5.139992161904381, "learning_rate": 2.0533876570505428e-09, "loss": 0.8539, "step": 34610 }, { "epoch": 0.99, "grad_norm": 4.000640281453779, "learning_rate": 2.040119530194451e-09, "loss": 0.3848, "step": 34611 }, { "epoch": 0.99, "grad_norm": 6.211072850701887, "learning_rate": 2.026894400171675e-09, "loss": 0.8135, "step": 34612 }, { "epoch": 0.99, "grad_norm": 7.400912313026635, "learning_rate": 2.013712267096013e-09, "loss": 0.2694, "step": 34613 }, { "epoch": 0.99, "grad_norm": 6.044182071803782, "learning_rate": 2.0005731310812627e-09, "loss": 0.361, "step": 34614 }, { "epoch": 0.99, "grad_norm": 7.120111896835747, "learning_rate": 1.9874769922401117e-09, "loss": 0.5411, "step": 34615 }, { "epoch": 0.99, "grad_norm": 7.163068363347563, "learning_rate": 1.9744238506852475e-09, "loss": 0.4963, "step": 34616 }, { "epoch": 0.99, "grad_norm": 4.694970954748301, "learning_rate": 1.961413706529358e-09, "loss": 0.3642, "step": 34617 }, { "epoch": 0.99, "grad_norm": 4.537855214278548, "learning_rate": 1.9484465598834658e-09, "loss": 0.1782, "step": 34618 }, { "epoch": 0.99, "grad_norm": 8.437432295916153, "learning_rate": 1.9355224108602576e-09, "loss": 0.2487, "step": 34619 }, { "epoch": 0.99, "grad_norm": 5.84938968507016, "learning_rate": 1.9226412595702016e-09, "loss": 0.4912, "step": 34620 }, { "epoch": 0.99, "grad_norm": 7.508336202752867, "learning_rate": 1.9098031061243194e-09, "loss": 0.5425, "step": 34621 }, { "epoch": 0.99, "grad_norm": 4.818155928430963, "learning_rate": 1.8970079506325235e-09, "loss": 0.3477, "step": 34622 }, { "epoch": 0.99, "grad_norm": 7.046398882630387, "learning_rate": 1.884255793205836e-09, "loss": 0.7039, "step": 34623 }, { "epoch": 0.99, "grad_norm": 4.720994832216039, "learning_rate": 1.871546633953614e-09, "loss": 0.0831, "step": 34624 }, { "epoch": 0.99, "grad_norm": 3.851511028325014, "learning_rate": 1.8588804729846589e-09, "loss": 0.1653, "step": 34625 }, { "epoch": 0.99, "grad_norm": 5.00733186078616, "learning_rate": 1.8462573104088832e-09, "loss": 0.6289, "step": 34626 }, { "epoch": 0.99, "grad_norm": 6.598581144235889, "learning_rate": 1.8336771463339787e-09, "loss": 0.3748, "step": 34627 }, { "epoch": 0.99, "grad_norm": 4.603374467707843, "learning_rate": 1.821139980868747e-09, "loss": 0.5241, "step": 34628 }, { "epoch": 0.99, "grad_norm": 5.123937845651214, "learning_rate": 1.8086458141208795e-09, "loss": 0.5098, "step": 34629 }, { "epoch": 0.99, "grad_norm": 5.702534935094578, "learning_rate": 1.7961946461980684e-09, "loss": 0.5484, "step": 34630 }, { "epoch": 0.99, "grad_norm": 5.7487286530505, "learning_rate": 1.7837864772068947e-09, "loss": 0.6417, "step": 34631 }, { "epoch": 0.99, "grad_norm": 3.5953563748848643, "learning_rate": 1.771421307254495e-09, "loss": 0.2236, "step": 34632 }, { "epoch": 0.99, "grad_norm": 3.707744726694146, "learning_rate": 1.7590991364468957e-09, "loss": 0.399, "step": 34633 }, { "epoch": 0.99, "grad_norm": 7.700038887520227, "learning_rate": 1.746819964890678e-09, "loss": 0.5621, "step": 34634 }, { "epoch": 0.99, "grad_norm": 4.010616801168661, "learning_rate": 1.7345837926907583e-09, "loss": 0.3821, "step": 34635 }, { "epoch": 0.99, "grad_norm": 4.241493646069451, "learning_rate": 1.7223906199531625e-09, "loss": 0.2488, "step": 34636 }, { "epoch": 0.99, "grad_norm": 4.485007743190855, "learning_rate": 1.710240446782252e-09, "loss": 0.4146, "step": 34637 }, { "epoch": 0.99, "grad_norm": 5.648802179617651, "learning_rate": 1.6981332732829424e-09, "loss": 0.3727, "step": 34638 }, { "epoch": 0.99, "grad_norm": 4.79505738107678, "learning_rate": 1.68606909955904e-09, "loss": 0.4255, "step": 34639 }, { "epoch": 0.99, "grad_norm": 4.896107023448491, "learning_rate": 1.6740479257149055e-09, "loss": 0.2836, "step": 34640 }, { "epoch": 0.99, "grad_norm": 14.85913535004073, "learning_rate": 1.6620697518532346e-09, "loss": 0.2307, "step": 34641 }, { "epoch": 0.99, "grad_norm": 4.395571925389178, "learning_rate": 1.6501345780772781e-09, "loss": 0.2358, "step": 34642 }, { "epoch": 0.99, "grad_norm": 4.924313828066225, "learning_rate": 1.6382424044897317e-09, "loss": 0.2604, "step": 34643 }, { "epoch": 0.99, "grad_norm": 6.874580093044846, "learning_rate": 1.6263932311927354e-09, "loss": 0.4046, "step": 34644 }, { "epoch": 0.99, "grad_norm": 5.79171671491263, "learning_rate": 1.6145870582889856e-09, "loss": 0.408, "step": 34645 }, { "epoch": 0.99, "grad_norm": 3.9064419813663083, "learning_rate": 1.602823885879512e-09, "loss": 0.2967, "step": 34646 }, { "epoch": 0.99, "grad_norm": 4.734346370799306, "learning_rate": 1.5911037140653452e-09, "loss": 0.4048, "step": 34647 }, { "epoch": 0.99, "grad_norm": 2.389220828890606, "learning_rate": 1.5794265429475153e-09, "loss": 0.161, "step": 34648 }, { "epoch": 0.99, "grad_norm": 4.113520109046571, "learning_rate": 1.5677923726259426e-09, "loss": 0.1391, "step": 34649 }, { "epoch": 0.99, "grad_norm": 5.529328136295083, "learning_rate": 1.5562012032022122e-09, "loss": 0.4785, "step": 34650 }, { "epoch": 0.99, "grad_norm": 6.501180578270795, "learning_rate": 1.544653034774024e-09, "loss": 0.5972, "step": 34651 }, { "epoch": 0.99, "grad_norm": 2.7868527653164987, "learning_rate": 1.5331478674424083e-09, "loss": 0.162, "step": 34652 }, { "epoch": 0.99, "grad_norm": 6.754143926388839, "learning_rate": 1.5216857013056196e-09, "loss": 0.595, "step": 34653 }, { "epoch": 0.99, "grad_norm": 6.262095839199238, "learning_rate": 1.5102665364624681e-09, "loss": 0.4946, "step": 34654 }, { "epoch": 0.99, "grad_norm": 5.493687865651256, "learning_rate": 1.498890373010653e-09, "loss": 0.5509, "step": 34655 }, { "epoch": 0.99, "grad_norm": 4.508253105751855, "learning_rate": 1.4875572110489845e-09, "loss": 0.1975, "step": 34656 }, { "epoch": 0.99, "grad_norm": 5.304873883171146, "learning_rate": 1.4762670506740517e-09, "loss": 0.2729, "step": 34657 }, { "epoch": 0.99, "grad_norm": 6.323410689417523, "learning_rate": 1.4650198919835546e-09, "loss": 0.5103, "step": 34658 }, { "epoch": 0.99, "grad_norm": 6.793483124573666, "learning_rate": 1.4538157350735272e-09, "loss": 0.2947, "step": 34659 }, { "epoch": 0.99, "grad_norm": 6.743687963332953, "learning_rate": 1.4426545800416691e-09, "loss": 0.5717, "step": 34660 }, { "epoch": 0.99, "grad_norm": 2.927387260233789, "learning_rate": 1.4315364269829046e-09, "loss": 0.3474, "step": 34661 }, { "epoch": 0.99, "grad_norm": 4.2276466939279365, "learning_rate": 1.4204612759932678e-09, "loss": 0.4735, "step": 34662 }, { "epoch": 0.99, "grad_norm": 7.23606816492823, "learning_rate": 1.409429127167683e-09, "loss": 0.2886, "step": 34663 }, { "epoch": 0.99, "grad_norm": 4.74036620105697, "learning_rate": 1.398439980601629e-09, "loss": 0.5052, "step": 34664 }, { "epoch": 0.99, "grad_norm": 4.465580222864343, "learning_rate": 1.3874938363889202e-09, "loss": 0.2969, "step": 34665 }, { "epoch": 0.99, "grad_norm": 3.389587261117177, "learning_rate": 1.37659069462448e-09, "loss": 0.2468, "step": 34666 }, { "epoch": 0.99, "grad_norm": 6.458494118770725, "learning_rate": 1.3657305554015675e-09, "loss": 0.505, "step": 34667 }, { "epoch": 0.99, "grad_norm": 5.961457915980686, "learning_rate": 1.3549134188139967e-09, "loss": 0.3507, "step": 34668 }, { "epoch": 0.99, "grad_norm": 14.196919679330009, "learning_rate": 1.344139284955026e-09, "loss": 0.6624, "step": 34669 }, { "epoch": 0.99, "grad_norm": 3.528295175318704, "learning_rate": 1.333408153916249e-09, "loss": 0.2861, "step": 34670 }, { "epoch": 0.99, "grad_norm": 4.2244318247118375, "learning_rate": 1.3227200257914797e-09, "loss": 0.399, "step": 34671 }, { "epoch": 0.99, "grad_norm": 10.136620634629477, "learning_rate": 1.3120749006712007e-09, "loss": 0.4037, "step": 34672 }, { "epoch": 0.99, "grad_norm": 4.108730541214505, "learning_rate": 1.3014727786481162e-09, "loss": 0.2767, "step": 34673 }, { "epoch": 0.99, "grad_norm": 6.123934516875567, "learning_rate": 1.2909136598127092e-09, "loss": 0.5869, "step": 34674 }, { "epoch": 0.99, "grad_norm": 3.2658499498780884, "learning_rate": 1.280397544256573e-09, "loss": 0.2976, "step": 34675 }, { "epoch": 0.99, "grad_norm": 7.036158564968487, "learning_rate": 1.2699244320690806e-09, "loss": 0.4511, "step": 34676 }, { "epoch": 0.99, "grad_norm": 3.1928977049298926, "learning_rate": 1.2594943233412703e-09, "loss": 0.2852, "step": 34677 }, { "epoch": 0.99, "grad_norm": 5.453652692473597, "learning_rate": 1.2491072181625153e-09, "loss": 0.4485, "step": 34678 }, { "epoch": 0.99, "grad_norm": 4.274207539195951, "learning_rate": 1.2387631166227432e-09, "loss": 0.6016, "step": 34679 }, { "epoch": 0.99, "grad_norm": 7.027166647426133, "learning_rate": 1.2284620188096619e-09, "loss": 0.3115, "step": 34680 }, { "epoch": 0.99, "grad_norm": 5.26875251914764, "learning_rate": 1.2182039248131995e-09, "loss": 0.1963, "step": 34681 }, { "epoch": 0.99, "grad_norm": 5.675716160862427, "learning_rate": 1.2079888347210633e-09, "loss": 0.3645, "step": 34682 }, { "epoch": 0.99, "grad_norm": 6.078191330260961, "learning_rate": 1.1978167486204062e-09, "loss": 0.2822, "step": 34683 }, { "epoch": 0.99, "grad_norm": 4.626860579422199, "learning_rate": 1.187687666600046e-09, "loss": 0.4609, "step": 34684 }, { "epoch": 0.99, "grad_norm": 5.20094324598572, "learning_rate": 1.1776015887465796e-09, "loss": 0.2902, "step": 34685 }, { "epoch": 0.99, "grad_norm": 5.592793238987613, "learning_rate": 1.167558515146605e-09, "loss": 0.2705, "step": 34686 }, { "epoch": 0.99, "grad_norm": 4.825436013044641, "learning_rate": 1.1575584458867195e-09, "loss": 0.4343, "step": 34687 }, { "epoch": 0.99, "grad_norm": 4.044262399887787, "learning_rate": 1.1476013810529651e-09, "loss": 0.1428, "step": 34688 }, { "epoch": 0.99, "grad_norm": 6.48348500222279, "learning_rate": 1.1376873207308293e-09, "loss": 0.7077, "step": 34689 }, { "epoch": 0.99, "grad_norm": 5.343848288898134, "learning_rate": 1.1278162650057989e-09, "loss": 0.6573, "step": 34690 }, { "epoch": 0.99, "grad_norm": 3.821323892779268, "learning_rate": 1.117988213962251e-09, "loss": 0.308, "step": 34691 }, { "epoch": 0.99, "grad_norm": 5.0671867998742, "learning_rate": 1.108203167685673e-09, "loss": 0.4089, "step": 34692 }, { "epoch": 0.99, "grad_norm": 3.757527330645608, "learning_rate": 1.0984611262593315e-09, "loss": 0.4458, "step": 34693 }, { "epoch": 0.99, "grad_norm": 4.941175613750523, "learning_rate": 1.0887620897676033e-09, "loss": 0.4871, "step": 34694 }, { "epoch": 0.99, "grad_norm": 4.77851495243075, "learning_rate": 1.0791060582943102e-09, "loss": 0.4152, "step": 34695 }, { "epoch": 0.99, "grad_norm": 4.591570668102588, "learning_rate": 1.0694930319210538e-09, "loss": 0.8146, "step": 34696 }, { "epoch": 0.99, "grad_norm": 4.819210575718819, "learning_rate": 1.0599230107322111e-09, "loss": 0.6114, "step": 34697 }, { "epoch": 0.99, "grad_norm": 4.803251743567894, "learning_rate": 1.0503959948088283e-09, "loss": 0.5757, "step": 34698 }, { "epoch": 0.99, "grad_norm": 7.545369884709597, "learning_rate": 1.0409119842336169e-09, "loss": 0.4465, "step": 34699 }, { "epoch": 0.99, "grad_norm": 5.8031819831215605, "learning_rate": 1.0314709790881783e-09, "loss": 0.5337, "step": 34700 }, { "epoch": 0.99, "grad_norm": 4.592419775572058, "learning_rate": 1.0220729794530037e-09, "loss": 0.4092, "step": 34701 }, { "epoch": 0.99, "grad_norm": 5.145406800583764, "learning_rate": 1.0127179854102498e-09, "loss": 0.2508, "step": 34702 }, { "epoch": 0.99, "grad_norm": 3.5330748105090333, "learning_rate": 1.0034059970387421e-09, "loss": 0.2842, "step": 34703 }, { "epoch": 0.99, "grad_norm": 5.639952136032781, "learning_rate": 9.941370144200824e-10, "loss": 0.8121, "step": 34704 }, { "epoch": 0.99, "grad_norm": 3.5321714161224276, "learning_rate": 9.849110376330962e-10, "loss": 0.2117, "step": 34705 }, { "epoch": 0.99, "grad_norm": 9.33012964123835, "learning_rate": 9.757280667571645e-10, "loss": 0.5145, "step": 34706 }, { "epoch": 0.99, "grad_norm": 7.894988834307978, "learning_rate": 9.665881018716683e-10, "loss": 0.3256, "step": 34707 }, { "epoch": 0.99, "grad_norm": 8.783196745962007, "learning_rate": 9.574911430554334e-10, "loss": 0.5082, "step": 34708 }, { "epoch": 0.99, "grad_norm": 9.098129004663434, "learning_rate": 9.484371903861755e-10, "loss": 0.502, "step": 34709 }, { "epoch": 0.99, "grad_norm": 4.095426463695864, "learning_rate": 9.394262439421653e-10, "loss": 0.3038, "step": 34710 }, { "epoch": 0.99, "grad_norm": 3.4859669217757885, "learning_rate": 9.304583038005632e-10, "loss": 0.3258, "step": 34711 }, { "epoch": 0.99, "grad_norm": 6.621699680745669, "learning_rate": 9.215333700385298e-10, "loss": 0.0907, "step": 34712 }, { "epoch": 0.99, "grad_norm": 7.301663316373123, "learning_rate": 9.126514427332256e-10, "loss": 0.7376, "step": 34713 }, { "epoch": 0.99, "grad_norm": 10.336401227609867, "learning_rate": 9.038125219612559e-10, "loss": 0.602, "step": 34714 }, { "epoch": 0.99, "grad_norm": 2.3970249999457, "learning_rate": 8.950166077981159e-10, "loss": 0.2757, "step": 34715 }, { "epoch": 0.99, "grad_norm": 6.819129919976909, "learning_rate": 8.862637003193009e-10, "loss": 0.6062, "step": 34716 }, { "epoch": 0.99, "grad_norm": 3.966790579148015, "learning_rate": 8.77553799600861e-10, "loss": 0.17, "step": 34717 }, { "epoch": 0.99, "grad_norm": 8.328366789609065, "learning_rate": 8.688869057171812e-10, "loss": 0.297, "step": 34718 }, { "epoch": 0.99, "grad_norm": 10.030850696637957, "learning_rate": 8.602630187426464e-10, "loss": 0.7247, "step": 34719 }, { "epoch": 0.99, "grad_norm": 5.025960617861395, "learning_rate": 8.516821387521967e-10, "loss": 0.5176, "step": 34720 }, { "epoch": 0.99, "grad_norm": 4.893601410657385, "learning_rate": 8.431442658191069e-10, "loss": 0.5971, "step": 34721 }, { "epoch": 0.99, "grad_norm": 3.2424199767815254, "learning_rate": 8.346494000166516e-10, "loss": 0.3, "step": 34722 }, { "epoch": 0.99, "grad_norm": 8.46661316211445, "learning_rate": 8.261975414186607e-10, "loss": 0.3071, "step": 34723 }, { "epoch": 0.99, "grad_norm": 4.775347787863849, "learning_rate": 8.177886900967435e-10, "loss": 0.174, "step": 34724 }, { "epoch": 0.99, "grad_norm": 2.5967210505972464, "learning_rate": 8.094228461241749e-10, "loss": 0.2203, "step": 34725 }, { "epoch": 0.99, "grad_norm": 4.61525240396989, "learning_rate": 8.011000095731192e-10, "loss": 0.6435, "step": 34726 }, { "epoch": 0.99, "grad_norm": 6.804762549703636, "learning_rate": 7.928201805140756e-10, "loss": 0.6385, "step": 34727 }, { "epoch": 0.99, "grad_norm": 5.616041509586289, "learning_rate": 7.845833590192087e-10, "loss": 0.6793, "step": 34728 }, { "epoch": 0.99, "grad_norm": 8.460319404885269, "learning_rate": 7.763895451590175e-10, "loss": 0.7669, "step": 34729 }, { "epoch": 0.99, "grad_norm": 5.946979705001047, "learning_rate": 7.682387390034463e-10, "loss": 0.1704, "step": 34730 }, { "epoch": 0.99, "grad_norm": 4.804603910882068, "learning_rate": 7.601309406241042e-10, "loss": 0.2194, "step": 34731 }, { "epoch": 0.99, "grad_norm": 3.1602978102920596, "learning_rate": 7.5206615008927e-10, "loss": 0.2136, "step": 34732 }, { "epoch": 0.99, "grad_norm": 11.232745777992694, "learning_rate": 7.44044367468888e-10, "loss": 0.8231, "step": 34733 }, { "epoch": 0.99, "grad_norm": 5.758798522579191, "learning_rate": 7.360655928323468e-10, "loss": 0.7577, "step": 34734 }, { "epoch": 0.99, "grad_norm": 3.291668473416247, "learning_rate": 7.281298262473702e-10, "loss": 0.2498, "step": 34735 }, { "epoch": 0.99, "grad_norm": 2.18512924335854, "learning_rate": 7.20237067783347e-10, "loss": 0.1506, "step": 34736 }, { "epoch": 0.99, "grad_norm": 1.7122304115968388, "learning_rate": 7.123873175068907e-10, "loss": 0.1484, "step": 34737 }, { "epoch": 0.99, "grad_norm": 5.6827845466016464, "learning_rate": 7.045805754868351e-10, "loss": 0.8942, "step": 34738 }, { "epoch": 0.99, "grad_norm": 6.044262777704154, "learning_rate": 6.968168417897936e-10, "loss": 0.7928, "step": 34739 }, { "epoch": 0.99, "grad_norm": 2.6602588135249134, "learning_rate": 6.890961164823795e-10, "loss": 0.1465, "step": 34740 }, { "epoch": 0.99, "grad_norm": 3.007532190910049, "learning_rate": 6.814183996306512e-10, "loss": 0.3694, "step": 34741 }, { "epoch": 0.99, "grad_norm": 2.3156903400546325, "learning_rate": 6.73783691301777e-10, "loss": 0.1086, "step": 34742 }, { "epoch": 0.99, "grad_norm": 5.700412178194859, "learning_rate": 6.661919915607052e-10, "loss": 0.3453, "step": 34743 }, { "epoch": 0.99, "grad_norm": 2.9967229746902087, "learning_rate": 6.586433004723835e-10, "loss": 0.1296, "step": 34744 }, { "epoch": 1.0, "grad_norm": 2.7619803600151642, "learning_rate": 6.511376181028706e-10, "loss": 0.2273, "step": 34745 }, { "epoch": 1.0, "grad_norm": 4.271100871692649, "learning_rate": 6.436749445154489e-10, "loss": 0.4531, "step": 34746 }, { "epoch": 1.0, "grad_norm": 6.264824338065387, "learning_rate": 6.362552797750665e-10, "loss": 0.3859, "step": 34747 }, { "epoch": 1.0, "grad_norm": 5.95257185321043, "learning_rate": 6.288786239461165e-10, "loss": 0.6939, "step": 34748 }, { "epoch": 1.0, "grad_norm": 4.716329724416247, "learning_rate": 6.215449770907711e-10, "loss": 0.3117, "step": 34749 }, { "epoch": 1.0, "grad_norm": 2.5493641023059257, "learning_rate": 6.142543392728684e-10, "loss": 0.2592, "step": 34750 }, { "epoch": 1.0, "grad_norm": 2.8861026132189473, "learning_rate": 6.070067105551358e-10, "loss": 0.2038, "step": 34751 }, { "epoch": 1.0, "grad_norm": 4.333689971455274, "learning_rate": 5.998020909991908e-10, "loss": 0.3558, "step": 34752 }, { "epoch": 1.0, "grad_norm": 5.064490786352447, "learning_rate": 5.926404806683161e-10, "loss": 0.1444, "step": 34753 }, { "epoch": 1.0, "grad_norm": 6.484294109529344, "learning_rate": 5.855218796230189e-10, "loss": 0.6571, "step": 34754 }, { "epoch": 1.0, "grad_norm": 2.8629150780991837, "learning_rate": 5.784462879249164e-10, "loss": 0.1746, "step": 34755 }, { "epoch": 1.0, "grad_norm": 5.270603803177891, "learning_rate": 5.714137056350711e-10, "loss": 0.4561, "step": 34756 }, { "epoch": 1.0, "grad_norm": 5.34014099231423, "learning_rate": 5.644241328139899e-10, "loss": 0.4465, "step": 34757 }, { "epoch": 1.0, "grad_norm": 5.83774086334806, "learning_rate": 5.574775695216251e-10, "loss": 0.3985, "step": 34758 }, { "epoch": 1.0, "grad_norm": 4.718239459783015, "learning_rate": 5.505740158173733e-10, "loss": 0.3745, "step": 34759 }, { "epoch": 1.0, "grad_norm": 5.9724202280956575, "learning_rate": 5.43713471761187e-10, "loss": 0.4962, "step": 34760 }, { "epoch": 1.0, "grad_norm": 4.821879819915795, "learning_rate": 5.368959374119076e-10, "loss": 0.6648, "step": 34761 }, { "epoch": 1.0, "grad_norm": 5.817150019735766, "learning_rate": 5.301214128278221e-10, "loss": 0.1891, "step": 34762 }, { "epoch": 1.0, "grad_norm": 6.9029265472359205, "learning_rate": 5.233898980677721e-10, "loss": 0.4649, "step": 34763 }, { "epoch": 1.0, "grad_norm": 5.126872302190324, "learning_rate": 5.167013931894893e-10, "loss": 0.4384, "step": 34764 }, { "epoch": 1.0, "grad_norm": 4.951355988278909, "learning_rate": 5.100558982507054e-10, "loss": 0.4844, "step": 34765 }, { "epoch": 1.0, "grad_norm": 8.128019828071965, "learning_rate": 5.034534133080415e-10, "loss": 0.6922, "step": 34766 }, { "epoch": 1.0, "grad_norm": 7.1339258200443885, "learning_rate": 4.968939384186744e-10, "loss": 0.5977, "step": 34767 }, { "epoch": 1.0, "grad_norm": 7.848875643153886, "learning_rate": 4.903774736392253e-10, "loss": 0.469, "step": 34768 }, { "epoch": 1.0, "grad_norm": 7.224640329773675, "learning_rate": 4.839040190252054e-10, "loss": 0.3818, "step": 34769 }, { "epoch": 1.0, "grad_norm": 4.8148797084165675, "learning_rate": 4.774735746326808e-10, "loss": 0.233, "step": 34770 }, { "epoch": 1.0, "grad_norm": 2.409186441684961, "learning_rate": 4.710861405166078e-10, "loss": 0.1352, "step": 34771 }, { "epoch": 1.0, "grad_norm": 6.256058655517516, "learning_rate": 4.6474171673249746e-10, "loss": 0.256, "step": 34772 }, { "epoch": 1.0, "grad_norm": 7.320484432127044, "learning_rate": 4.5844030333475063e-10, "loss": 0.4842, "step": 34773 }, { "epoch": 1.0, "grad_norm": 4.456751527960283, "learning_rate": 4.521819003772132e-10, "loss": 0.2807, "step": 34774 }, { "epoch": 1.0, "grad_norm": 5.168995173588745, "learning_rate": 4.459665079142861e-10, "loss": 0.515, "step": 34775 }, { "epoch": 1.0, "grad_norm": 2.543537644653823, "learning_rate": 4.3979412599926e-10, "loss": 0.1715, "step": 34776 }, { "epoch": 1.0, "grad_norm": 8.652643492180351, "learning_rate": 4.3366475468487047e-10, "loss": 0.723, "step": 34777 }, { "epoch": 1.0, "grad_norm": 4.383149485969115, "learning_rate": 4.275783940238531e-10, "loss": 0.5762, "step": 34778 }, { "epoch": 1.0, "grad_norm": 6.141615712469041, "learning_rate": 4.215350440689436e-10, "loss": 0.6856, "step": 34779 }, { "epoch": 1.0, "grad_norm": 6.6651091743107225, "learning_rate": 4.155347048723224e-10, "loss": 0.2557, "step": 34780 }, { "epoch": 1.0, "grad_norm": 5.7396582190675955, "learning_rate": 4.0957737648505967e-10, "loss": 0.4067, "step": 34781 }, { "epoch": 1.0, "grad_norm": 8.713583615314743, "learning_rate": 4.036630589587809e-10, "loss": 0.5685, "step": 34782 }, { "epoch": 1.0, "grad_norm": 4.036070078563695, "learning_rate": 3.9779175234400116e-10, "loss": 0.4345, "step": 34783 }, { "epoch": 1.0, "grad_norm": 6.296614307847573, "learning_rate": 3.919634566917907e-10, "loss": 0.4908, "step": 34784 }, { "epoch": 1.0, "grad_norm": 3.7724888874848386, "learning_rate": 3.861781720515545e-10, "loss": 0.3391, "step": 34785 }, { "epoch": 1.0, "grad_norm": 6.818408608566161, "learning_rate": 3.804358984738077e-10, "loss": 0.4726, "step": 34786 }, { "epoch": 1.0, "grad_norm": 3.7034170944968876, "learning_rate": 3.7473663600740005e-10, "loss": 0.3569, "step": 34787 }, { "epoch": 1.0, "grad_norm": 2.6779496811020302, "learning_rate": 3.6908038470173655e-10, "loss": 0.2313, "step": 34788 }, { "epoch": 1.0, "grad_norm": 5.564988908182557, "learning_rate": 3.634671446051119e-10, "loss": 0.4377, "step": 34789 }, { "epoch": 1.0, "grad_norm": 5.238079764113133, "learning_rate": 3.5789691576582075e-10, "loss": 0.3673, "step": 34790 }, { "epoch": 1.0, "grad_norm": 2.66474933663913, "learning_rate": 3.523696982321578e-10, "loss": 0.2433, "step": 34791 }, { "epoch": 1.0, "grad_norm": 3.5007552030377993, "learning_rate": 3.468854920518627e-10, "loss": 0.369, "step": 34792 }, { "epoch": 1.0, "grad_norm": 3.264206683241703, "learning_rate": 3.414442972710097e-10, "loss": 0.2667, "step": 34793 }, { "epoch": 1.0, "grad_norm": 5.726153474904778, "learning_rate": 3.3604611393789343e-10, "loss": 0.3573, "step": 34794 }, { "epoch": 1.0, "grad_norm": 9.425427780764268, "learning_rate": 3.3069094209747797e-10, "loss": 0.4357, "step": 34795 }, { "epoch": 1.0, "grad_norm": 4.713201987600534, "learning_rate": 3.2537878179694784e-10, "loss": 0.5693, "step": 34796 }, { "epoch": 1.0, "grad_norm": 8.368077676901239, "learning_rate": 3.201096330818221e-10, "loss": 0.3948, "step": 34797 }, { "epoch": 1.0, "grad_norm": 4.830583705579999, "learning_rate": 3.148834959970648e-10, "loss": 0.4034, "step": 34798 }, { "epoch": 1.0, "grad_norm": 6.625459115296415, "learning_rate": 3.0970037058764004e-10, "loss": 0.7003, "step": 34799 }, { "epoch": 1.0, "grad_norm": 3.54950304179606, "learning_rate": 3.045602568985118e-10, "loss": 0.2939, "step": 34800 }, { "epoch": 1.0, "grad_norm": 10.998210327906541, "learning_rate": 2.9946315497353385e-10, "loss": 0.4381, "step": 34801 }, { "epoch": 1.0, "grad_norm": 4.253398069743087, "learning_rate": 2.9440906485656005e-10, "loss": 0.3177, "step": 34802 }, { "epoch": 1.0, "grad_norm": 6.084282979479969, "learning_rate": 2.8939798659144424e-10, "loss": 0.4389, "step": 34803 }, { "epoch": 1.0, "grad_norm": 3.140137022454697, "learning_rate": 2.8442992022037485e-10, "loss": 0.4561, "step": 34804 }, { "epoch": 1.0, "grad_norm": 8.306249233042019, "learning_rate": 2.7950486578776083e-10, "loss": 0.3694, "step": 34805 }, { "epoch": 1.0, "grad_norm": 9.377173057476636, "learning_rate": 2.7462282333412525e-10, "loss": 0.7741, "step": 34806 }, { "epoch": 1.0, "grad_norm": 5.314082874773869, "learning_rate": 2.697837929027669e-10, "loss": 0.3875, "step": 34807 }, { "epoch": 1.0, "grad_norm": 7.0605958211423685, "learning_rate": 2.649877745353191e-10, "loss": 0.3213, "step": 34808 }, { "epoch": 1.0, "grad_norm": 4.104962347650172, "learning_rate": 2.602347682717499e-10, "loss": 0.4223, "step": 34809 }, { "epoch": 1.0, "grad_norm": 3.2481222229990303, "learning_rate": 2.555247741542477e-10, "loss": 0.0932, "step": 34810 }, { "epoch": 1.0, "grad_norm": 6.8938210097224, "learning_rate": 2.508577922227806e-10, "loss": 0.5422, "step": 34811 }, { "epoch": 1.0, "grad_norm": 11.571112811777477, "learning_rate": 2.4623382251787176e-10, "loss": 0.5169, "step": 34812 }, { "epoch": 1.0, "grad_norm": 4.071040522216982, "learning_rate": 2.416528650783789e-10, "loss": 0.4277, "step": 34813 }, { "epoch": 1.0, "grad_norm": 4.045212260478143, "learning_rate": 2.3711491994482526e-10, "loss": 0.2191, "step": 34814 }, { "epoch": 1.0, "grad_norm": 7.029861584381932, "learning_rate": 2.326199871560686e-10, "loss": 0.3655, "step": 34815 }, { "epoch": 1.0, "grad_norm": 7.039535658976829, "learning_rate": 2.2816806674985647e-10, "loss": 0.3808, "step": 34816 }, { "epoch": 1.0, "grad_norm": 4.142647706220357, "learning_rate": 2.2375915876560183e-10, "loss": 0.1702, "step": 34817 }, { "epoch": 1.0, "grad_norm": 3.9476650966452302, "learning_rate": 2.1939326324049714e-10, "loss": 0.3565, "step": 34818 }, { "epoch": 1.0, "grad_norm": 4.2228466912023555, "learning_rate": 2.1507038021228998e-10, "loss": 0.18, "step": 34819 }, { "epoch": 1.0, "grad_norm": 3.8383346650180226, "learning_rate": 2.1079050971872793e-10, "loss": 0.3345, "step": 34820 }, { "epoch": 1.0, "grad_norm": 6.421029288115376, "learning_rate": 2.0655365179589325e-10, "loss": 0.6538, "step": 34821 }, { "epoch": 1.0, "grad_norm": 10.514841217714144, "learning_rate": 2.023598064804233e-10, "loss": 0.4714, "step": 34822 }, { "epoch": 1.0, "grad_norm": 5.443253553420616, "learning_rate": 1.982089738084003e-10, "loss": 0.3885, "step": 34823 }, { "epoch": 1.0, "grad_norm": 5.2568453711251975, "learning_rate": 1.9410115381535143e-10, "loss": 0.5561, "step": 34824 }, { "epoch": 1.0, "grad_norm": 5.7856526699519195, "learning_rate": 1.900363465373589e-10, "loss": 0.6584, "step": 34825 }, { "epoch": 1.0, "grad_norm": 21.734575019755223, "learning_rate": 1.8601455200883967e-10, "loss": 0.4888, "step": 34826 }, { "epoch": 1.0, "grad_norm": 5.554345314871147, "learning_rate": 1.8203577026421059e-10, "loss": 0.6275, "step": 34827 }, { "epoch": 1.0, "grad_norm": 2.089419990429443, "learning_rate": 1.7810000133788863e-10, "loss": 0.094, "step": 34828 }, { "epoch": 1.0, "grad_norm": 8.023885177707351, "learning_rate": 1.7420724526373557e-10, "loss": 0.6019, "step": 34829 }, { "epoch": 1.0, "grad_norm": 2.9877612490528005, "learning_rate": 1.7035750207561319e-10, "loss": 0.5145, "step": 34830 }, { "epoch": 1.0, "grad_norm": 5.738227360980687, "learning_rate": 1.665507718062731e-10, "loss": 0.2056, "step": 34831 }, { "epoch": 1.0, "grad_norm": 3.31386584787032, "learning_rate": 1.6278705448791177e-10, "loss": 0.2921, "step": 34832 }, { "epoch": 1.0, "grad_norm": 3.9298547819112484, "learning_rate": 1.5906635015439099e-10, "loss": 0.1599, "step": 34833 }, { "epoch": 1.0, "grad_norm": 10.190653821804414, "learning_rate": 1.553886588362419e-10, "loss": 0.6808, "step": 34834 }, { "epoch": 1.0, "grad_norm": 2.650116582321373, "learning_rate": 1.5175398056566094e-10, "loss": 0.1994, "step": 34835 }, { "epoch": 1.0, "grad_norm": 4.960076733503222, "learning_rate": 1.481623153742895e-10, "loss": 0.3787, "step": 34836 }, { "epoch": 1.0, "grad_norm": 6.734408597574707, "learning_rate": 1.4461366329265868e-10, "loss": 0.5837, "step": 34837 }, { "epoch": 1.0, "grad_norm": 7.231511812078185, "learning_rate": 1.4110802435074455e-10, "loss": 0.527, "step": 34838 }, { "epoch": 1.0, "grad_norm": 12.160138486462591, "learning_rate": 1.3764539857963332e-10, "loss": 0.5312, "step": 34839 }, { "epoch": 1.0, "grad_norm": 5.089739197530356, "learning_rate": 1.3422578600930102e-10, "loss": 0.6722, "step": 34840 }, { "epoch": 1.0, "grad_norm": 2.580322831567346, "learning_rate": 1.3084918666805833e-10, "loss": 0.4567, "step": 34841 }, { "epoch": 1.0, "grad_norm": 4.294389984100277, "learning_rate": 1.275156005853262e-10, "loss": 0.3191, "step": 34842 }, { "epoch": 1.0, "grad_norm": 3.290009206045889, "learning_rate": 1.2422502779052547e-10, "loss": 0.1733, "step": 34843 }, { "epoch": 1.0, "grad_norm": 3.5140845251008, "learning_rate": 1.2097746831085666e-10, "loss": 0.5279, "step": 34844 }, { "epoch": 1.0, "grad_norm": 9.058880050102552, "learning_rate": 1.1777292217518555e-10, "loss": 0.698, "step": 34845 }, { "epoch": 1.0, "grad_norm": 3.69470329182562, "learning_rate": 1.146113894107126e-10, "loss": 0.2602, "step": 34846 }, { "epoch": 1.0, "grad_norm": 7.268565462743298, "learning_rate": 1.1149287004463827e-10, "loss": 0.5406, "step": 34847 }, { "epoch": 1.0, "grad_norm": 4.410804072492676, "learning_rate": 1.0841736410360793e-10, "loss": 0.5547, "step": 34848 }, { "epoch": 1.0, "grad_norm": 3.744747297778416, "learning_rate": 1.0538487161426691e-10, "loss": 0.5566, "step": 34849 }, { "epoch": 1.0, "grad_norm": 7.486881990990741, "learning_rate": 1.0239539260270547e-10, "loss": 0.4176, "step": 34850 }, { "epoch": 1.0, "grad_norm": 12.167156122543656, "learning_rate": 9.944892709501385e-11, "loss": 0.2762, "step": 34851 }, { "epoch": 1.0, "grad_norm": 6.201778448280345, "learning_rate": 9.654547511561696e-11, "loss": 0.1834, "step": 34852 }, { "epoch": 1.0, "grad_norm": 3.0671836693652845, "learning_rate": 9.368503669060502e-11, "loss": 0.2945, "step": 34853 }, { "epoch": 1.0, "grad_norm": 8.739824454641827, "learning_rate": 9.086761184329274e-11, "loss": 0.5616, "step": 34854 }, { "epoch": 1.0, "grad_norm": 6.3457970534465, "learning_rate": 8.809320059921522e-11, "loss": 0.6604, "step": 34855 }, { "epoch": 1.0, "grad_norm": 8.943240902830993, "learning_rate": 8.536180298168717e-11, "loss": 0.5721, "step": 34856 }, { "epoch": 1.0, "grad_norm": 6.332043750710594, "learning_rate": 8.267341901402326e-11, "loss": 0.219, "step": 34857 }, { "epoch": 1.0, "grad_norm": 4.719666549897791, "learning_rate": 8.002804871953818e-11, "loss": 0.6607, "step": 34858 }, { "epoch": 1.0, "grad_norm": 3.7135128922526945, "learning_rate": 7.742569212043638e-11, "loss": 0.2478, "step": 34859 }, { "epoch": 1.0, "grad_norm": 5.8025104449965825, "learning_rate": 7.486634924058767e-11, "loss": 0.5636, "step": 34860 }, { "epoch": 1.0, "grad_norm": 3.865881173257971, "learning_rate": 7.235002010053116e-11, "loss": 0.3501, "step": 34861 }, { "epoch": 1.0, "grad_norm": 7.471812972860731, "learning_rate": 6.987670472247133e-11, "loss": 0.3567, "step": 34862 }, { "epoch": 1.0, "grad_norm": 4.1598773128501145, "learning_rate": 6.744640312750239e-11, "loss": 0.2758, "step": 34863 }, { "epoch": 1.0, "grad_norm": 6.675287718411914, "learning_rate": 6.505911533671861e-11, "loss": 0.8295, "step": 34864 }, { "epoch": 1.0, "grad_norm": 3.8185424760786444, "learning_rate": 6.271484137065908e-11, "loss": 0.583, "step": 34865 }, { "epoch": 1.0, "grad_norm": 6.415195713462116, "learning_rate": 6.041358124986296e-11, "loss": 0.6828, "step": 34866 }, { "epoch": 1.0, "grad_norm": 5.829777583006738, "learning_rate": 5.8155334993204024e-11, "loss": 0.3755, "step": 34867 }, { "epoch": 1.0, "grad_norm": 3.9331581760997376, "learning_rate": 5.594010262066629e-11, "loss": 0.386, "step": 34868 }, { "epoch": 1.0, "grad_norm": 13.715298994131082, "learning_rate": 5.376788415167866e-11, "loss": 0.653, "step": 34869 }, { "epoch": 1.0, "grad_norm": 9.630820756044553, "learning_rate": 5.163867960400471e-11, "loss": 0.4707, "step": 34870 }, { "epoch": 1.0, "grad_norm": 4.631037199868926, "learning_rate": 4.955248899707332e-11, "loss": 0.2794, "step": 34871 }, { "epoch": 1.0, "grad_norm": 5.494984680931946, "learning_rate": 4.750931234753786e-11, "loss": 0.4554, "step": 34872 }, { "epoch": 1.0, "grad_norm": 9.07470018716533, "learning_rate": 4.5509149674272114e-11, "loss": 0.7412, "step": 34873 }, { "epoch": 1.0, "grad_norm": 5.47736242876308, "learning_rate": 4.355200099337431e-11, "loss": 0.523, "step": 34874 }, { "epoch": 1.0, "grad_norm": 11.093512862988728, "learning_rate": 4.163786632260802e-11, "loss": 0.972, "step": 34875 }, { "epoch": 1.0, "grad_norm": 5.613618186230481, "learning_rate": 3.976674567807148e-11, "loss": 0.4525, "step": 34876 }, { "epoch": 1.0, "grad_norm": 3.12981638726315, "learning_rate": 3.793863907530781e-11, "loss": 0.2741, "step": 34877 }, { "epoch": 1.0, "grad_norm": 3.8336113676883365, "learning_rate": 3.615354653097036e-11, "loss": 0.2703, "step": 34878 }, { "epoch": 1.0, "grad_norm": 6.526625899102158, "learning_rate": 3.4411468060047136e-11, "loss": 0.6229, "step": 34879 }, { "epoch": 1.0, "grad_norm": 1.8372279076010354, "learning_rate": 3.271240367697104e-11, "loss": 0.0771, "step": 34880 }, { "epoch": 1.0, "grad_norm": 7.883639334801178, "learning_rate": 3.105635339728519e-11, "loss": 0.5071, "step": 34881 }, { "epoch": 1.0, "grad_norm": 10.287972777060686, "learning_rate": 2.9443317234867375e-11, "loss": 0.6958, "step": 34882 }, { "epoch": 1.0, "grad_norm": 4.1458167689997305, "learning_rate": 2.7873295203040272e-11, "loss": 0.1639, "step": 34883 }, { "epoch": 1.0, "grad_norm": 7.749526716978154, "learning_rate": 2.6346287316236784e-11, "loss": 0.3705, "step": 34884 }, { "epoch": 1.0, "grad_norm": 5.8008667396899085, "learning_rate": 2.4862293587224474e-11, "loss": 0.1977, "step": 34885 }, { "epoch": 1.0, "grad_norm": 5.4872592829780835, "learning_rate": 2.342131402821579e-11, "loss": 0.5247, "step": 34886 }, { "epoch": 1.0, "grad_norm": 12.267496342865433, "learning_rate": 2.2023348652533416e-11, "loss": 0.6657, "step": 34887 }, { "epoch": 1.0, "grad_norm": 9.162722334304448, "learning_rate": 2.0668397471834688e-11, "loss": 0.5535, "step": 34888 }, { "epoch": 1.0, "grad_norm": 8.356408079331201, "learning_rate": 1.935646049722184e-11, "loss": 0.6276, "step": 34889 }, { "epoch": 1.0, "grad_norm": 2.7564592796679457, "learning_rate": 1.8087537740907325e-11, "loss": 0.3116, "step": 34890 }, { "epoch": 1.0, "grad_norm": 9.240948914110007, "learning_rate": 1.6861629213438257e-11, "loss": 0.529, "step": 34891 }, { "epoch": 1.0, "grad_norm": 4.427309666252169, "learning_rate": 1.567873492480665e-11, "loss": 0.3833, "step": 34892 }, { "epoch": 1.0, "grad_norm": 4.3382269511232945, "learning_rate": 1.4538854886114729e-11, "loss": 0.3749, "step": 34893 }, { "epoch": 1.0, "grad_norm": 7.199552994998322, "learning_rate": 1.3441989106799391e-11, "loss": 0.6525, "step": 34894 }, { "epoch": 1.0, "grad_norm": 7.259298379849246, "learning_rate": 1.2388137595742422e-11, "loss": 0.9599, "step": 34895 }, { "epoch": 1.0, "grad_norm": 4.4535340238414785, "learning_rate": 1.1377300362935828e-11, "loss": 0.4109, "step": 34896 }, { "epoch": 1.0, "grad_norm": 4.802074335824918, "learning_rate": 1.040947741615117e-11, "loss": 0.2277, "step": 34897 }, { "epoch": 1.0, "grad_norm": 3.6958355200817317, "learning_rate": 9.484668764825344e-12, "loss": 0.3825, "step": 34898 }, { "epoch": 1.0, "grad_norm": 6.8179659832402075, "learning_rate": 8.6028744161748e-12, "loss": 0.4361, "step": 34899 }, { "epoch": 1.0, "grad_norm": 4.483320557506373, "learning_rate": 7.764094377415988e-12, "loss": 0.2679, "step": 34900 }, { "epoch": 1.0, "grad_norm": 5.9862322203777785, "learning_rate": 6.968328656320467e-12, "loss": 0.4905, "step": 34901 }, { "epoch": 1.0, "grad_norm": 4.013330657637033, "learning_rate": 6.215577260104688e-12, "loss": 0.4187, "step": 34902 }, { "epoch": 1.0, "grad_norm": 6.915269580366382, "learning_rate": 5.505840194319767e-12, "loss": 0.6673, "step": 34903 }, { "epoch": 1.0, "grad_norm": 7.25686866035214, "learning_rate": 4.839117466182153e-12, "loss": 0.7867, "step": 34904 }, { "epoch": 1.0, "grad_norm": 3.3120812565261977, "learning_rate": 4.215409080132738e-12, "loss": 0.3864, "step": 34905 }, { "epoch": 1.0, "grad_norm": 3.5871758577071637, "learning_rate": 3.6347150428328593e-12, "loss": 0.5051, "step": 34906 }, { "epoch": 1.0, "grad_norm": 5.647660789007721, "learning_rate": 3.0970353581682987e-12, "loss": 0.345, "step": 34907 }, { "epoch": 1.0, "grad_norm": 4.590295491815201, "learning_rate": 2.6023700311350596e-12, "loss": 0.4427, "step": 34908 }, { "epoch": 1.0, "grad_norm": 5.738504196416735, "learning_rate": 2.1507190667291457e-12, "loss": 0.806, "step": 34909 }, { "epoch": 1.0, "grad_norm": 4.448237256446224, "learning_rate": 1.742082467726114e-12, "loss": 0.6142, "step": 34910 }, { "epoch": 1.0, "grad_norm": 5.735847637456663, "learning_rate": 1.3764602380117454e-12, "loss": 0.7323, "step": 34911 }, { "epoch": 1.0, "grad_norm": 4.596888812762058, "learning_rate": 1.053852380916709e-12, "loss": 0.3208, "step": 34912 }, { "epoch": 1.0, "grad_norm": 6.38275295869471, "learning_rate": 7.742588992165623e-13, "loss": 0.4802, "step": 34913 }, { "epoch": 1.0, "grad_norm": 6.028144392631982, "learning_rate": 5.376797956868629e-13, "loss": 0.4014, "step": 34914 }, { "epoch": 1.0, "grad_norm": 6.279804044326865, "learning_rate": 3.4411507143783387e-13, "loss": 0.7286, "step": 34915 }, { "epoch": 1.0, "grad_norm": 2.3828728339889746, "learning_rate": 1.9356472868992115e-13, "loss": 0.1468, "step": 34916 }, { "epoch": 1.0, "grad_norm": 6.5349137830701824, "learning_rate": 8.602876855334785e-14, "loss": 0.5582, "step": 34917 }, { "epoch": 1.0, "grad_norm": 3.3979978079651243, "learning_rate": 2.1507192138336963e-14, "loss": 0.3757, "step": 34918 }, { "epoch": 1.0, "grad_norm": 4.213785231269565, "learning_rate": 0.0, "loss": 0.2497, "step": 34919 }, { "epoch": 1.0, "step": 34919, "total_flos": 1134176345958400.0, "train_loss": 0.48779953998131087, "train_runtime": 314674.3705, "train_samples_per_second": 0.888, "train_steps_per_second": 0.111 } ], "logging_steps": 1.0, "max_steps": 34919, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 1134176345958400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }